diff --git a/engine/includes/resources/texture.h b/engine/includes/resources/texture.h index 41ffccf5d..27f67f0f4 100644 --- a/engine/includes/resources/texture.h +++ b/engine/includes/resources/texture.h @@ -15,13 +15,13 @@ class ENGINE_EXPORT Texture : public Resource { A_PROPERTY(int, height, Texture::height, Texture::setHeight), A_PROPERTY(int, depth, Texture::depth, Texture::setDepth), A_PROPERTY(int, format, Texture::format, Texture::setFormat), + A_PROPERTY(int, compress, Texture::compress, Texture::setCompress), A_PROPERTY(int, wrap, Texture::wrap, Texture::setWrap), A_PROPERTY(int, filtering, Texture::filtering, Texture::setFiltering) ) A_METHODS( A_METHOD(int, Texture::getPixel), - A_METHOD(bool, Texture::isCompressed), A_METHOD(bool, Texture::isCubemap), A_METHOD(void, Texture::setDirty), A_METHOD(void, Texture::resize) @@ -61,9 +61,13 @@ class ENGINE_EXPORT Texture : public Resource { enum CompressionType { Uncompressed, - DXT1, - DXT5, - ETC2 + BC1, + BC3, + BC7, + ASTC, + ETC1, + ETC2, + PVRTC }; enum FilteringType { @@ -104,7 +108,6 @@ class ENGINE_EXPORT Texture : public Resource { bool isRender() const; bool isFeedback() const; - bool isCompressed() const; bool isCubemap() const; bool isArray() const; @@ -119,6 +122,9 @@ class ENGINE_EXPORT Texture : public Resource { int format() const; void setFormat(int type); + int compress() const; + void setCompress(int method); + int wrap() const; void setWrap(int type); @@ -149,8 +155,6 @@ class ENGINE_EXPORT Texture : public Resource { void switchState(Resource::State state) override; bool isUnloadable() override; - int32_t size(int32_t width, int32_t height, int32_t depth) const; - int32_t sizeDXTc(int32_t width, int32_t height, int32_t depth) const; int32_t sizeRGB(int32_t width, int32_t height, int32_t depth) const; bool isDwordAligned(); diff --git a/engine/src/editor/projectsettings.cpp b/engine/src/editor/projectsettings.cpp index a5b5910be..3da5c0e4a 100644 --- a/engine/src/editor/projectsettings.cpp +++ b/engine/src/editor/projectsettings.cpp @@ -12,9 +12,6 @@ #include "config.h" -#include "components/actor.h" -#include "resources/map.h" - #include "editor/assetmanager.h" #include "editor/codebuilder.h" #include "editor/editorplatform.h" @@ -110,7 +107,6 @@ void ProjectSettings::loadSettings() { m_projectId = QUuid::createUuid().toString().toStdString(); - const MetaObject *meta = metaObject(); for(const auto &it : object) { TString name = it.first; name.remove('_'); @@ -122,7 +118,7 @@ void ProjectSettings::loadSettings() { auto it = object.find(gPlatforms); if(it != object.end()) { m_platforms.clear(); - for(auto platform : it->second.toList()) { + for(auto &platform : it->second.toList()) { m_platforms.push_back(platform.toString()); } } @@ -130,7 +126,7 @@ void ProjectSettings::loadSettings() { { auto it = object.find(gModules); if(it != object.end()) { - for(auto module : it->second.toList()) { + for(auto &module : it->second.toList()) { m_modules.insert(module.toString()); } } @@ -139,7 +135,7 @@ void ProjectSettings::loadSettings() { auto it = object.find(gPlugins); if(it != object.end()) { VariantMap plugins = it->second.toMap(); - for(auto plugin : plugins) { + for(auto &plugin : plugins) { m_plugins[plugin.first] = plugin.second.toBool(); } } @@ -200,7 +196,7 @@ void ProjectSettings::saveSettings() { object[gModules] = modules; if(!m_plugins.empty()) { VariantMap plugins; - for(auto it : m_plugins) { + for(auto &it : m_plugins) { plugins[it.first] = it.second; } object[gPlugins] = plugins; @@ -345,7 +341,7 @@ StringList ProjectSettings::modules() const { StringList ProjectSettings::platforms() const { StringList list; - for(auto it : m_supportedPlatforms) { + for(auto &it : m_supportedPlatforms) { list.push_back(it.first.data()); } return (m_platforms.empty()) ? list : m_platforms; diff --git a/engine/src/resources/texture.cpp b/engine/src/resources/texture.cpp index b04629a8f..7c19e5c5a 100644 --- a/engine/src/resources/texture.cpp +++ b/engine/src/resources/texture.cpp @@ -5,8 +5,7 @@ #include namespace { - const char *gHeader = "Header"; - const char *gData = "Data"; + const char *gData("Data"); } uint32_t Texture::s_maxTextureSize = 1024; @@ -89,31 +88,16 @@ Texture::~Texture() { void Texture::loadUserData(const VariantMap &data) { clear(); - { - auto it = data.find(gData); - if(it != data.end()) { - const VariantList &surfaces = (*it).second.value(); - for(auto &s : surfaces) { - Surface img; - int32_t w = m_width; - int32_t h = m_height; - int32_t d = m_depth; - const VariantList &lods = s.value(); - for(auto &l : lods) { - ByteArray bits = l.toByteArray(); - uint32_t s = size(w, h, d); - if(s && !bits.empty()) { - ByteArray pixels; - pixels.resize(s); - memcpy(&pixels[0], &bits[0], s); - img.push_back(pixels); - } - w = MAX(w / 2, 1); - h = MAX(h / 2, 1); - d = MAX(d / 2, 1); - } - addSurface(img); + auto it = data.find(gData); + if(it != data.end()) { + const VariantList &surfaces = (*it).second.value(); + for(auto &s : surfaces) { + Surface surface; + + for(auto &l : s.value()) { + surface.push_back(l.toByteArray()); } + addSurface(surface); } } } @@ -246,9 +230,8 @@ void Texture::resize(int width, int height) { if(!(m_flags & Flags::Render) || (m_flags & Flags::Feedback)) { clear(); - int32_t length = size(m_width, m_height, m_depth); ByteArray pixels; - pixels.resize(length); + pixels.resize(sizeRGB(m_width, m_height, m_depth)); addSurface({pixels}); } @@ -342,18 +325,23 @@ void Texture::setFlags(int flags) { m_flags = flags; if(isFeedback() && sides() == 0) { - int32_t length = size(m_width, m_height, m_depth); ByteArray pixels; - pixels.resize(length); + pixels.resize(sizeRGB(m_width, m_height, m_depth)); addSurface({pixels}); } } /*! - Returns true if texture uses one of the compression formats; otherwise returns false. + Returns compression method. +*/ +int Texture::compress() const { + return m_compress; +} +/*! + Set the compression \a method. */ -bool Texture::isCompressed() const { - return m_compress != Uncompressed; +void Texture::setCompress(int method) { + m_compress = method; } /*! Returns true if the texture is a cube map; otherwise returns false. @@ -426,22 +414,7 @@ void Texture::setMaxCubemapSize(uint32_t size) { /*! \internal */ -int32_t Texture::size(int32_t width, int32_t height, int32_t depth) const { - int32_t (Texture::*sizefunc)(int32_t, int32_t, int32_t) const; - sizefunc = (isCompressed() ? &Texture::sizeDXTc : &Texture::sizeRGB); - - return (this->*sizefunc)(width, height, depth); -} -/*! - \internal -*/ -inline int32_t Texture::sizeDXTc(int32_t width, int32_t height, int32_t depth) const { - return ((width + 3) / 4) * ((height + 3) / 4) * ((depth + 3) / 4) * (m_compress == DXT1 ? 8 : 16); -} -/*! - \internal -*/ -inline int32_t Texture::sizeRGB(int32_t width, int32_t height, int32_t depth) const { +int32_t Texture::sizeRGB(int32_t width, int32_t height, int32_t depth) const { int32_t s = 1; switch(m_format) { case RGBA32Float: s = 4; break; @@ -455,7 +428,7 @@ inline int32_t Texture::sizeRGB(int32_t width, int32_t height, int32_t depth) co */ bool Texture::isDwordAligned() { int dwordLineSize = dwordAlignedLineSize(width(), components() * 8); - int curLineSize = width() * components(); + int curLineSize = width() * components(); return (dwordLineSize == curLineSize); } diff --git a/modules/editor/texturetools/converter/textureconverter.cpp b/modules/editor/texturetools/converter/textureconverter.cpp index d3b902593..e3a774732 100644 --- a/modules/editor/texturetools/converter/textureconverter.cpp +++ b/modules/editor/texturetools/converter/textureconverter.cpp @@ -1,8 +1,10 @@ #include "textureconverter.h" -#include +#define STB_IMAGE_IMPLEMENTATION +#include -#include +#define STB_IMAGE_RESIZE_IMPLEMENTATION +#include #include #include @@ -15,30 +17,17 @@ #include #include -#define FORMAT_VERSION 9 - -void copyData(uint8_t *dst, const uchar *src, uint32_t size, uint8_t channels) { - if(channels == 3) { - uint32_t m = 0; - for(uint32_t i = 0; i < size; i++) { - dst[i] = src[m]; +#include - if(i % channels == 2) { - m++; - } - m++; - } - } else { - memcpy(dst, src, size); - } -} +#define FORMAT_VERSION 9 TextureImportSettings::TextureImportSettings() : m_assetType(AssetType::Texture2D), m_filtering(FilteringType::None), m_wrap(WrapType::Repeat), m_pixels(100), - m_lod(false) { + m_lod(false), + m_compressed(false) { setVersion(FORMAT_VERSION); } @@ -85,6 +74,16 @@ void TextureImportSettings::setLod(bool lod) { } } +bool TextureImportSettings::compressed() const { + return m_compressed; +} +void TextureImportSettings::setCompressed(bool compressed) { + if(m_compressed != compressed) { + m_compressed = compressed; + setModified(); + } +} + int TextureImportSettings::pixels() const { return m_pixels; } @@ -271,132 +270,187 @@ AssetConverter::ReturnCode TextureConverter::convertFile(AssetConverterSettings } void TextureConverter::convertTexture(Texture *texture, TextureImportSettings *settings) { - uint8_t channels = 4; - QImage src(settings->source().data()); - QImage img = src.convertToFormat(QImage::Format_RGBA8888); - - texture->clear(); - - texture->setFormat(Texture::RGBA8); - texture->setFiltering(Texture::FilteringType(settings->filtering())); - texture->setWrap(Texture::WrapType(settings->wrap())); - - QList sides; - if(settings->assetType() == TextureImportSettings::AssetType::Cubemap) { - QList positions; - float ratio = (float)img.width() / (float)img.height(); - texture->resize(img.width(), img.height()); - if(ratio == 6.0f / 1.0f) { // Row - texture->resize(img.width() / 6, img.height()); - for(int i = 0; i < 6; i++) { - positions.push_back(QPoint(i * texture->width(), 0)); + int32_t width = 1; + int32_t height = 1; + int32_t channels = 4; + + uint8_t *sourceData = stbi_load(settings->source().data(), &width, &height, &channels, 0); + + if(sourceData) { + texture->clear(); + + int format = Texture::RGBA8; + if(channels == 3) { + format = Texture::RGB8; + } + + texture->setFormat(format); + texture->setFiltering(Texture::FilteringType(settings->filtering())); + texture->setWrap(Texture::WrapType(settings->wrap())); + + std::list sides; + if(settings->assetType() == TextureImportSettings::AssetType::Cubemap) { + std::list positions; + float ratio = (float)width / (float)height; + texture->resize(width, height); + if(ratio == 6.0f / 1.0f) { // Row + texture->resize(width / 6, height); + for(int i = 0; i < 6; i++) { + positions.push_back(Vector2(i * texture->width(), 0)); + } + } else if(ratio == 1.0f / 6.0f) { // Column + texture->resize(width, height / 6); + for(int i = 0; i < 6; i++) { + positions.push_back(Vector2(0, i * texture->height())); + } + } else if(ratio == 4.0f / 3.0f) { // Horizontal cross + texture->resize(width / 4, height / 3); + positions.push_back(Vector2(2 * texture->width(), 1 * texture->height())); + positions.push_back(Vector2(0 * texture->width(), 1 * texture->height())); + positions.push_back(Vector2(1 * texture->width(), 0 * texture->height())); + positions.push_back(Vector2(1 * texture->width(), 2 * texture->height())); + positions.push_back(Vector2(1 * texture->width(), 1 * texture->height())); + positions.push_back(Vector2(3 * texture->width(), 1 * texture->height())); + } else if(ratio == 3.0f / 4.0f) { // Vertical cross + texture->resize(width / 3, height / 4); + positions.push_back(Vector2(1 * texture->width(), 1 * texture->height())); + positions.push_back(Vector2(1 * texture->width(), 3 * texture->height())); + positions.push_back(Vector2(1 * texture->width(), 0 * texture->height())); + positions.push_back(Vector2(1 * texture->width(), 2 * texture->height())); + positions.push_back(Vector2(0 * texture->width(), 1 * texture->height())); + positions.push_back(Vector2(2 * texture->width(), 1 * texture->height())); } - } else if(ratio == 1.0f / 6.0f) { // Column - texture->resize(img.width(), img.height() / 6); - for(int i = 0; i < 6; i++) { - positions.push_back(QPoint(0, i * texture->height())); + + ByteArray result; + result.resize(width * height * channels); + + for(const Vector2 &it : positions) { + copyRegion(sourceData, Vector2(width, height), channels, result, it, Vector2(texture->width(), texture->height())); + + sides.push_back(result); } - } else if(ratio == 4.0f / 3.0f) { // Horizontal cross - texture->resize(img.width() / 4, img.height() / 3); - positions.push_back(QPoint(2 * texture->width(), 1 * texture->height())); - positions.push_back(QPoint(0 * texture->width(), 1 * texture->height())); - positions.push_back(QPoint(1 * texture->width(), 0 * texture->height())); - positions.push_back(QPoint(1 * texture->width(), 2 * texture->height())); - positions.push_back(QPoint(1 * texture->width(), 1 * texture->height())); - positions.push_back(QPoint(3 * texture->width(), 1 * texture->height())); - } else if(ratio == 3.0f / 4.0f) { // Vertical cross - texture->resize(img.width() / 3, img.height() / 4); - positions.push_back(QPoint(1 * texture->width(), 1 * texture->height())); - positions.push_back(QPoint(1 * texture->width(), 3 * texture->height())); - positions.push_back(QPoint(1 * texture->width(), 0 * texture->height())); - positions.push_back(QPoint(1 * texture->width(), 2 * texture->height())); - positions.push_back(QPoint(0 * texture->width(), 1 * texture->height())); - positions.push_back(QPoint(2 * texture->width(), 1 * texture->height())); - } + } else if(settings->assetType() == TextureImportSettings::AssetType::Texture3D) { + float ratio = (float)width / (float)height; + + ByteArray result; + + if(ratio > 1.0f) { // Row + texture->resize(height, height); + int32_t depth = width / height; + texture->setDepth(depth); + + result.resize(height * height * depth * channels); - QRect sub; - sub.setSize(QSize(texture->width(), texture->height())); - foreach(const QPoint &it, positions) { - sub.moveTo(it); - sides.push_back(img.copy(sub)); + for(int d = 0; d < texture->depth(); d++) { + copyRegion(sourceData, Vector2(width, height), channels, result, + Vector2(texture->width() * d, 0), Vector2(texture->width(), texture->height())); + } + } else { // Column + texture->resize(width, width); + int32_t depth = height / width; + texture->setDepth(depth); + + result.resize(width * width * depth * channels); + + for(int d = 0; d < texture->depth(); d++) { + copyRegion(sourceData, Vector2(width, height), channels, result, + Vector2(0, texture->height() * d), Vector2(texture->width(), texture->height())); + } + } + + sides.push_back(result); + } else { + texture->resize(width, height); + + ByteArray result; + result.resize(width * height * channels); + + copyRegion(sourceData, Vector2(width, height), channels, result, Vector2(), Vector2(width, height), true); + + sides.push_back(result); } - } else if(settings->assetType() == TextureImportSettings::AssetType::Texture3D) { - float ratio = (float)img.width() / (float)img.height(); - if(ratio > 1.0f) { // Row - texture->resize(img.height(), img.height()); - texture->setDepth(img.width() / img.height()); - - QImage result(texture->width(), texture->height() * texture->depth(), QImage::Format_RGBA8888); - - for(int d = 0; d < texture->depth(); d++) { - for(int h = 0; h < texture->height(); h++) { - for(int w = 0; w < texture->width(); w++) { - result.setPixelColor(w, texture->height() * d + h, img.pixelColor(texture->width() * d + w, h)); - } + + texture->clear(); + + if(settings->compressed()) { + int method = Texture::BC7; // Desktop + + TString platform = ProjectSettings::instance()->currentPlatformName(); + if(platform.contains("webgl")) { + method = Texture::BC3; + if(channels == 3) { + method = Texture::BC1; } + } else if(platform.contains("android")) { + method = Texture::ETC2; + if(channels == 3) { + method = Texture::ETC1; + } + } else if(platform.contains("ios")) { + method = Texture::PVRTC; + } else if(platform.contains("tvos")) { + method = Texture::ASTC; } - img = result; - } else { // Column - texture->resize(img.width(), img.width()); - texture->setDepth(img.height() / img.width()); + texture->setCompress(method); } - sides.push_back(img); - } else { - texture->resize(img.width(), img.height()); - sides.push_back(img.mirrored()); - } + for(const ByteArray &side : sides) { + Texture::Surface surface; - texture->clear(); + surface.push_back(side); - int i = 0; - foreach(const QImage &it, sides) { - Texture::Surface surface; + // Mip map creation + if(settings->lod()) { + int32_t w = texture->width(); + int32_t h = texture->height(); + int32_t d = texture->depth(); - VariantList lods; + ByteArray origin = side; + while(w > 1 && h > 1 ) { + int32_t originW = w; + int32_t originH = h; + int32_t originD = d; - int w = texture->width(); - int h = texture->height(); - int d = texture->depth(); + w = MAX(originW / 2, 1); + h = MAX(originH / 2, 1); + d = MAX(originD / 2, 1); - ByteArray data; - uint32_t size = w * h * d * channels; - if(size) { - data.resize(size); - copyData(data.data(), it.constBits(), size, channels); - } - surface.push_back(data); - - if(settings->lod()) { - QImage mip = it; - while(w > 1 && h > 1 ) { - w = MAX(w / 2, 1); - h = MAX(h / 2, 1); - d = MAX(d / 2, 1); - - mip = mip.scaled(w, h, Qt::IgnoreAspectRatio); - size = w * h * d * channels; - if(size) { - data.resize(size); - copyData(&data[0], mip.constBits(), size, channels); + ByteArray mip; + mip.resize(w * h * d * channels); + + stbir_resize_uint8_linear(origin.data(), originW, originH, 0, + mip.data(), w, h, 0, static_cast(channels)); + origin = mip; + surface.push_back(mip); } - surface.push_back(data); } + + texture->addSurface(surface); } - texture->addSurface(surface); - i++; - } + if(texture->compress() != Texture::Uncompressed) { + compress(texture); + } + + texture->setDirty(); - texture->setDirty(); + stbi_image_free(sourceData); + } } -uint32_t TextureConverter::toMeta(int type) { - if(type == TextureImportSettings::AssetType::Sprite) { - return MetaType::type(); +void TextureConverter::copyRegion(const uint8_t *sourcedata, const Vector2 &sourceSize, int channels, ByteArray &data, const Vector2 &pos, const Vector2 &size, bool mirror) { + for(int y = 0; y < size.y; y++) { + for(int x = 0; x < size.x; x++) { + int srcY = pos.y + ((mirror) ? sourceSize.y - y - 1 : y); + int srcIndex = (srcY * sourceSize.x + (pos.x + x)) * channels; + int dstIndex = (y * size.x + x) * channels; + + for(int c = 0; c < channels; c++) { + data[dstIndex + c] = sourcedata[srcIndex + c]; + } + } } - return MetaType::type(); } void TextureConverter::convertSprite(Sprite *sprite, TextureImportSettings *settings) { diff --git a/modules/editor/texturetools/converter/textureconverter.h b/modules/editor/texturetools/converter/textureconverter.h index 31d742421..cfa02349e 100644 --- a/modules/editor/texturetools/converter/textureconverter.h +++ b/modules/editor/texturetools/converter/textureconverter.h @@ -13,6 +13,7 @@ class TextureImportSettings : public AssetConverterSettings { A_PROPERTYEX(AssetType, type, TextureImportSettings::assetType, TextureImportSettings::setAssetType, "enum=AssetType"), A_PROPERTYEX(WrapType, wrap, TextureImportSettings::wrap, TextureImportSettings::setWrap, "enum=WrapType"), A_PROPERTY(bool, mipMaping, TextureImportSettings::lod, TextureImportSettings::setLod), + A_PROPERTY(bool, compressed, TextureImportSettings::compressed, TextureImportSettings::setCompressed), A_PROPERTYEX(FilteringType, filtering, TextureImportSettings::filtering, TextureImportSettings::setFiltering, "enum=FilteringType"), A_PROPERTY(int, pixelsPerUnit, TextureImportSettings::pixels, TextureImportSettings::setPixels) ) @@ -78,6 +79,9 @@ class TextureImportSettings : public AssetConverterSettings { bool lod() const; void setLod(bool lod); + bool compressed() const; + void setCompressed(bool compressed); + int pixels() const; void setPixels(int pixels); @@ -111,6 +115,8 @@ class TextureImportSettings : public AssetConverterSettings { bool m_lod; + bool m_compressed; + }; class TextureConverter : public AssetConverter { @@ -118,11 +124,13 @@ class TextureConverter : public AssetConverter { void convertTexture(Texture *texture, TextureImportSettings *settings); void convertSprite(Sprite *sheet, TextureImportSettings *settings); - static uint32_t toMeta(int type); - private: void init() override; + bool compress(Texture *texture); + + void copyRegion(const uint8_t *sourcedata, const Vector2 &sourceSize, int channels, ByteArray &data, const Vector2 &pos, const Vector2 &size, bool mirror = false); + StringList suffixes() const override { return {"bmp", "dds", "jpg", "jpeg", "png", "tga", "ico", "tif"}; } ReturnCode convertFile(AssetConverterSettings *settings) override; diff --git a/modules/editor/texturetools/converter/textureencoder.cpp b/modules/editor/texturetools/converter/textureencoder.cpp new file mode 100644 index 000000000..4ea777713 --- /dev/null +++ b/modules/editor/texturetools/converter/textureencoder.cpp @@ -0,0 +1,110 @@ +#include + +#include + +#include "textureconverter.h" + +uint32_t estimateTranscodedSize(uint32_t width, uint32_t height, basist::transcoder_texture_format format) { + switch(format) { + case basist::transcoder_texture_format::cTFBC1_RGB: + return ((width + 3) / 4) * ((height + 3) / 4) * 8; + case basist::transcoder_texture_format::cTFBC3_RGBA: + case basist::transcoder_texture_format::cTFBC7_RGBA: + return ((width + 3) / 4) * ((height + 3) / 4) * 16; + case basist::transcoder_texture_format::cTFASTC_4x4_RGBA: + return ((width + 3) / 4) * ((height + 3) / 4) * 16; + case basist::transcoder_texture_format::cTFETC1_RGB: + case basist::transcoder_texture_format::cTFETC2_RGBA: + return ((width + 3) / 4) * ((height + 3) / 4) * 8; + case basist::transcoder_texture_format::cTFPVRTC1_4_RGB: + case basist::transcoder_texture_format::cTFPVRTC1_4_RGBA: + return std::max(width * height * 2 / 8, 32U); + default: break; + } + + return width * height * 4; +} + +bool TextureConverter::compress(Texture *texture) { + bool result = basisu::basisu_encoder_init(); + + if(result) { + basisu::job_pool jpool(std::thread::hardware_concurrency()); + + basisu::basis_compressor_params params; + params.m_multithreading = true; + params.m_pJob_pool = &jpool; + + params.m_uastc = true; + params.m_pack_uastc_ldr_4x4_flags = basisu::cPackUASTCLevelDefault; + params.m_rdo_uastc_ldr_4x4_quality_scalar = 1.0f; + + if(texture->isCubemap()) { + params.m_tex_type = basist::cBASISTexTypeCubemapArray; + } + + if(texture->depth() > 1) { + params.m_tex_type = basist::cBASISTexTypeVolume; + } + + uint32_t w = texture->width(); + uint32_t h = texture->height(); + + uint32_t channels = 4; + if(texture->format() == Texture::RGB8) { + channels = 3; + } + + basist::basisu_transcoder transcoder; + + result = false; + for(uint32_t side = 0; side < texture->sides(); side++) { + Texture::Surface &surface = texture->surface(side); + + for(uint32_t lod = 0; lod < surface.size(); lod++) { + params.m_source_images.push_back(basisu::image(surface[lod].data(), (w >> lod), (h >> lod), channels)); + + basisu::basis_compressor comp; + if(comp.init(params)) { + if(comp.process() == basisu::basis_compressor::cECSuccess) { + const basisu::uint8_vec &data = comp.get_output_basis_file(); + + result = transcoder.start_transcoding(data.data(), data.size()); + + basist::basisu_file_info fileInfo; + if(!transcoder.get_file_info(data.data(), data.size(), fileInfo)) { + return false; + } + + basist::basisu_image_info levelInfo; + if(!transcoder.get_image_info(data.data(), data.size(), levelInfo, lod)) { + continue; + } + + basist::transcoder_texture_format format = basist::transcoder_texture_format::cTFRGBA32; + switch(texture->compress()) { + case Texture::BC1: format = basist::transcoder_texture_format::cTFBC1_RGB; break; + case Texture::BC3: format = basist::transcoder_texture_format::cTFBC3_RGBA; break; + case Texture::BC7: format = basist::transcoder_texture_format::cTFBC7_RGBA; break; + case Texture::ASTC: format = basist::transcoder_texture_format::cTFASTC_4x4_RGBA; break; + case Texture::ETC1: format = basist::transcoder_texture_format::cTFETC1_RGB; break; + case Texture::ETC2: format = basist::transcoder_texture_format::cTFETC2_RGBA; break; + case Texture::PVRTC: format = basist::transcoder_texture_format::cTFPVRTC1_4_RGBA; break; + default: break; + } + + surface[lod].resize(estimateTranscodedSize((w >> lod), (h >> lod), format)); + + result = transcoder.transcode_image_level(data.data(), data.size(), side, lod, surface[lod].data(), surface[lod].size(), format); + + transcoder.stop_transcoding(); + } + } + } + } + + basisu::basisu_encoder_deinit(); + } + + return result; +} diff --git a/modules/editor/texturetools/texturetools.qbs b/modules/editor/texturetools/texturetools.qbs index 0a12b904c..b8fe61408 100644 --- a/modules/editor/texturetools/texturetools.qbs +++ b/modules/editor/texturetools/texturetools.qbs @@ -21,7 +21,9 @@ Project { "../../../engine/includes/editor", "../../../thirdparty/next/inc", "../../../thirdparty/next/inc/math", - "../../../thirdparty/next/inc/core" + "../../../thirdparty/next/inc/core", + "../../../thirdparty/stb", + "../../../thirdparty/basisu" ] DynamicLibrary { @@ -32,6 +34,7 @@ Project { Depends { name: "bundle" } Depends { name: "next-editor" } Depends { name: "engine-editor" } + Depends { name: "basisu" } Depends { name: "Qt"; submodules: ["core", "gui", "widgets"]; } bundle.isBundle: false diff --git a/modules/renders/rendergl/src/renderglsystem.cpp b/modules/renders/rendergl/src/renderglsystem.cpp index fe603f6bf..3c188ad8b 100644 --- a/modules/renders/rendergl/src/renderglsystem.cpp +++ b/modules/renders/rendergl/src/renderglsystem.cpp @@ -97,6 +97,7 @@ bool RenderGLSystem::init() { } CheckGLError(); #endif + int32_t texture; glGetIntegerv(GL_MAX_TEXTURE_SIZE, &texture); CheckGLError(); diff --git a/modules/renders/rendergl/src/resources/texturegl.cpp b/modules/renders/rendergl/src/resources/texturegl.cpp index 8444cc276..55f2e5cdb 100644 --- a/modules/renders/rendergl/src/resources/texturegl.cpp +++ b/modules/renders/rendergl/src/resources/texturegl.cpp @@ -1,6 +1,6 @@ #include "resources/texturegl.h" -#include +//#include #include "agl.h" #include "commandbuffergl.h" @@ -32,8 +32,8 @@ void TextureGL::readPixels(int x, int y, int width, int height) { bool depth = (format() == Depth); glReadPixels(x, y, width, height, - (depth) ? GL_DEPTH_COMPONENT : GL_RGBA, - (depth) ? GL_FLOAT : GL_UNSIGNED_BYTE, dst[0].data()); + (depth) ? GL_DEPTH_COMPONENT : GL_RGBA, + (depth) ? GL_FLOAT : GL_UNSIGNED_BYTE, dst[0].data()); CheckGLError(); } } @@ -88,45 +88,62 @@ void TextureGL::updateTexture() { uint32_t glformat = GL_RGBA; uint32_t type = GL_UNSIGNED_BYTE; - switch(format()) { - case R8: { - internal = GL_R8; - glformat = GL_RED; - } break; - case RGB8: { - internal = GL_RGB8; - glformat = GL_RGB; - } break; - case RGB10A2: { - #ifndef THUNDER_MOBILE - internal = GL_RGB10_A2; - type = GL_UNSIGNED_INT_10_10_10_2; - #else - internal = GL_RGB10_A2; - type = GL_UNSIGNED_INT_2_10_10_10_REV; - #endif - } break; - case R11G11B10Float: { - internal = GL_R11F_G11F_B10F; - glformat = GL_RGB; - type = GL_FLOAT; - } break; - case RGBA32Float: { - internal = GL_RGBA32F; - glformat = GL_RGBA; - type = GL_FLOAT; - } break; - case RGBA16Float: { - internal = GL_RGBA16F; - glformat = GL_RGBA; - type = GL_FLOAT; - } break; - case Depth: { - internal = (m_depthBits == 16) ? GL_DEPTH_COMPONENT16 : GL_DEPTH_COMPONENT24; - glformat = GL_DEPTH_COMPONENT; - type = GL_UNSIGNED_INT; - } break; - default: break; + if(m_compress == Uncompressed) { + switch(m_format) { + case R8: { + internal = GL_R8; + glformat = GL_RED; + } break; + case RGB8: { + internal = GL_RGB8; + glformat = GL_RGB; + } break; + case RGB10A2: { + #ifndef THUNDER_MOBILE + internal = GL_RGB10_A2; + type = GL_UNSIGNED_INT_10_10_10_2; + #else + internal = GL_RGB10_A2; + type = GL_UNSIGNED_INT_2_10_10_10_REV; + #endif + } break; + case R11G11B10Float: { + internal = GL_R11F_G11F_B10F; + glformat = GL_RGB; + type = GL_FLOAT; + } break; + case RGBA32Float: { + internal = GL_RGBA32F; + glformat = GL_RGBA; + type = GL_FLOAT; + } break; + case RGBA16Float: { + internal = GL_RGBA16F; + glformat = GL_RGBA; + type = GL_FLOAT; + } break; + case Depth: { + internal = (m_depthBits == 16) ? GL_DEPTH_COMPONENT16 : GL_DEPTH_COMPONENT24; + glformat = GL_DEPTH_COMPONENT; + type = GL_UNSIGNED_INT; + } break; + default: break; + } + } else { + switch(m_compress) { +#ifndef THUNDER_MOBILE + case BC1: internal = GL_COMPRESSED_RGB_S3TC_DXT1_EXT; break; + case BC3: internal = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT; break; + case BC7: internal = GL_COMPRESSED_RGBA_BPTC_UNORM_ARB; break; + case ASTC: internal = GL_COMPRESSED_RGBA_ASTC_4x4_KHR; break; +#endif +#ifdef GLFM_INCLUDE_ES32 + case ASTC: internal = GL_COMPRESSED_RGBA_ASTC_4x4; break; +#endif + case ETC1: internal = GL_COMPRESSED_RGB8_ETC2; break; + case ETC2: internal = GL_COMPRESSED_RGBA8_ETC2_EAC; break; + default: break; + } } if(target == GL_TEXTURE_CUBE_MAP) { @@ -155,19 +172,14 @@ void TextureGL::destroyTexture() { } bool TextureGL::uploadTexture(uint32_t imageIndex, uint32_t target, uint32_t internal, uint32_t format, uint32_t type) { - int32_t w = m_width; - int32_t h = m_height; - int32_t d = m_depth; - if(isRender()) { - glTexImage2D(target, 0, internal, w, h, 0, format, type, nullptr); + glTexImage2D(target, 0, internal, m_width, m_height, 0, format, type, nullptr); } else { const Surface &image = surface(imageIndex); - if(isCompressed()) { + if(m_compress != Uncompressed) { // load all mipmaps for(uint32_t i = 0; i < image.size(); i++) { - const uint8_t *data = image[i].data(); - glCompressedTexImage2D(target, i, internal, (w >> i), (h >> i), 0, size((w >> i), (h >> i), (d >> i)), data); + glCompressedTexImage2D(target, i, internal, (m_width >> i), (m_height >> i), 0, image[i].size(), image[i].data()); CheckGLError(); } } else { @@ -181,11 +193,10 @@ bool TextureGL::uploadTexture(uint32_t imageIndex, uint32_t target, uint32_t int // load all mipmaps for(uint32_t i = 0; i < image.size(); i++) { - const uint8_t *data = image[i].data(); if(m_depth > 1) { - glTexImage3D(target, i, internal, (w >> i), (h >> i), (d >> i), 0, format, type, data); + glTexImage3D(target, i, internal, (m_width >> i), (m_height >> i), (m_depth >> i), 0, format, type, image[i].data()); } else { - glTexImage2D(target, i, internal, (w >> i), (h >> i), 0, format, type, data); + glTexImage2D(target, i, internal, (m_width >> i), (m_height >> i), 0, format, type, image[i].data()); } CheckGLError(); } diff --git a/modules/renders/rendermt/src/resources/texturemt.cpp b/modules/renders/rendermt/src/resources/texturemt.cpp index dc1e6be2a..05b393727 100644 --- a/modules/renders/rendermt/src/resources/texturemt.cpp +++ b/modules/renders/rendermt/src/resources/texturemt.cpp @@ -54,7 +54,7 @@ void TextureMt::readPixels(int x, int y, int width, int height) { MTL::Origin readOrigin(x, y, 0); MTL::Size readSize(width, height, 1); - int textSize = size(m_width, m_height, 1); + int textSize = sizeRGB(m_width, m_height, 1); int rowSize = textSize / m_height; encoder->copyFromTexture(m_native, 0, 0, readOrigin, readSize, m_buffer, 0, rowSize, rowSize * m_height); @@ -146,7 +146,7 @@ void TextureMt::updateTexture() { m_buffer->release(); } - m_buffer = WrapperMt::device()->newBuffer(size(m_width, m_height, 1), MTL::ResourceStorageModeShared); + m_buffer = WrapperMt::device()->newBuffer(sizeRGB(m_width, m_height, 1), MTL::ResourceStorageModeShared); } } @@ -156,22 +156,48 @@ void TextureMt::uploadTexture(uint32_t slice) { bool cube = isCubemap(); for(uint32_t i = 0; i < image.size(); i++) { - int32_t w = (m_width >> i); - int32_t h = (m_height >> i); - int32_t d = cube ? (m_depth >> i) : 1; - m_native->replaceRegion(MTL::Region(0, 0, 0, w, h, d), i, slice, image[i].data(), size(w, h, d) / h, image[i].size()); + uint32_t w = (m_width >> i); + uint32_t h = (m_height >> i); + uint32_t d = cube ? (m_depth >> i) : 1; + + int rowSize = w * components(); + switch(m_compress) { + case Texture::BC1: rowSize = ((w + 3) / 4) * 8; break; + case Texture::BC3: + case Texture::BC7: rowSize = ((w + 3) / 4) * 16; break; + case Texture::ASTC: rowSize = ((w + 3) / 4) * 16; break; + case Texture::ETC1: rowSize = ((w + 3) / 4) * 8; break; + case Texture::ETC2: rowSize = ((w + 3) / 4) * 16; break; + case Texture::PVRTC: rowSize = ((std::max(w, 8U) + 3) / 4) * 8; break; + default: break; + } + + m_native->replaceRegion(MTL::Region(0, 0, 0, w, h, d), i, slice, image[i].data(), rowSize, image[i].size()); } } MTL::PixelFormat TextureMt::pixelFormat() { - switch(m_format) { - case R8: return MTL::PixelFormatR8Unorm; - case RGB10A2: return MTL::PixelFormatRGB10A2Unorm; - case R11G11B10Float: return MTL::PixelFormatRG11B10Float; - case RGBA32Float: return MTL::PixelFormatRGBA32Float; - case RGBA16Float: return MTL::PixelFormatRGBA16Float; - case Depth: return MTL::PixelFormatDepth16Unorm; - default: break; + if(m_compress) { + switch(m_compress) { + case Texture::BC1: return MTL::PixelFormatBC1_RGBA; + case Texture::BC3: return MTL::PixelFormatBC3_RGBA; + case Texture::BC7: return MTL::PixelFormatBC7_RGBAUnorm; + case Texture::ASTC: return MTL::PixelFormatASTC_4x4_LDR; + case Texture::ETC1: return MTL::PixelFormatETC2_RGB8; + case Texture::ETC2: return MTL::PixelFormatETC2_RGB8A1; + case Texture::PVRTC: return MTL::PixelFormatPVRTC_RGBA_4BPP; + default: break; + } + } else { + switch(m_format) { + case R8: return MTL::PixelFormatR8Unorm; + case RGB10A2: return MTL::PixelFormatRGB10A2Unorm; + case R11G11B10Float: return MTL::PixelFormatRG11B10Float; + case RGBA32Float: return MTL::PixelFormatRGBA32Float; + case RGBA16Float: return MTL::PixelFormatRGBA16Float; + case Depth: return MTL::PixelFormatDepth16Unorm; + default: break; + } } return MTL::PixelFormatRGBA8Unorm; diff --git a/modules/renders/rendervk/src/resources/texturevk.cpp b/modules/renders/rendervk/src/resources/texturevk.cpp index 109073b29..6d73795ae 100644 --- a/modules/renders/rendervk/src/resources/texturevk.cpp +++ b/modules/renders/rendervk/src/resources/texturevk.cpp @@ -4,7 +4,6 @@ #include "wrappervk.h" -#include "commandbuffervk.h" #include "resources/materialvk.h" #include "resources/rendertarget.h" @@ -57,26 +56,28 @@ void TextureVk::attributes(VkDescriptorImageInfo &imageinfo) { VkFormat TextureVk::vkFormat() const { VkFormat result = VK_FORMAT_R8G8B8A8_UNORM; - switch(format()) { - case R8: { - result = VK_FORMAT_R8_UNORM; - } break; - case RGB10A2: { - result = VK_FORMAT_A2R10G10B10_UNORM_PACK32; - } break; - case RGBA32Float: { - result = VK_FORMAT_R32G32B32A32_SFLOAT; - } break; - case RGBA16Float: { - result = VK_FORMAT_R16G16B16A16_SFLOAT; - } break; - case R11G11B10Float: { - result = VK_FORMAT_B10G11R11_UFLOAT_PACK32; - } break; - case Depth: { - result = (depthBits() == 16) ? VK_FORMAT_D16_UNORM_S8_UINT : VK_FORMAT_D24_UNORM_S8_UINT; - } break; + + if(m_compress == Uncompressed) { + switch(format()) { + case R8: result = VK_FORMAT_R8_UNORM; break; + case RGB10A2: result = VK_FORMAT_A2R10G10B10_UNORM_PACK32; break; + case RGBA32Float: result = VK_FORMAT_R32G32B32A32_SFLOAT; break; + case RGBA16Float: result = VK_FORMAT_R16G16B16A16_SFLOAT; break; + case R11G11B10Float: result = VK_FORMAT_B10G11R11_UFLOAT_PACK32; break; + case Depth: result = (depthBits() == 16) ? VK_FORMAT_D16_UNORM_S8_UINT : VK_FORMAT_D24_UNORM_S8_UINT; break; default: break; + } + } else { + switch(m_compress) { + case BC1: result = VK_FORMAT_BC1_RGB_UNORM_BLOCK; break; + case BC3: result = VK_FORMAT_BC3_UNORM_BLOCK; break; + case BC7: result = VK_FORMAT_BC7_UNORM_BLOCK; break; + case ASTC: result = VK_FORMAT_ASTC_4x4_UNORM_BLOCK; break; + case ETC1: result = VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK; break; + case ETC2: result = VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK; break; + case PVRTC: result = VK_FORMAT_PVRTC1_4BPP_UNORM_BLOCK_IMG; break; + default: break; + } } return result; @@ -98,7 +99,7 @@ void TextureVk::readPixels(int x, int y, int width, int height) { region.imageOffset = {x, y, 0}; region.imageExtent = {(uint32_t)width, (uint32_t)height, 1}; - VkDeviceSize textureSize = size(width, height, 1); + VkDeviceSize textureSize = sizeRGB(width, height, 1); VkBuffer stagingBuffer; VkDeviceMemory stagingMemory; @@ -167,9 +168,7 @@ void TextureVk::updateTexture() { VkDeviceSize textureSize = 0; for(uint32_t mip = 0; mip < src.size(); mip++) { - textureSize += size(mipLevel(m_width, mip), - mipLevel(m_height, mip), - mipLevel(m_depth, mip)); + textureSize += src[mip].size(); } VkBuffer stagingBuffer; @@ -179,9 +178,7 @@ void TextureVk::updateTexture() { uint8_t *dst = nullptr; vkMapMemory(device, stagingMemory, 0, textureSize, 0, reinterpret_cast(&dst)); for(uint32_t mip = 0; mip < src.size(); mip++) { - uint32_t mipSize = size(mipLevel(m_width, mip), - mipLevel(m_height, mip), - mipLevel(m_depth, mip)); + uint32_t mipSize = src[mip].size(); memcpy(dst, src[mip].data(), mipSize); dst += mipSize; } @@ -195,7 +192,7 @@ void TextureVk::updateTexture() { uint32_t h = mipLevel(m_height, mip); uint32_t d = mipLevel(m_depth, mip); - VkDeviceSize mipSize = size(w, h, d); + VkDeviceSize mipSize = src[mip].size(); VkBufferImageCopy region = {}; region.bufferOffset = offset; diff --git a/thirdparty/basisu/basisu.qbs b/thirdparty/basisu/basisu.qbs new file mode 100644 index 000000000..2770186b7 --- /dev/null +++ b/thirdparty/basisu/basisu.qbs @@ -0,0 +1,29 @@ +import qbs + +Project { + id: basisu + property stringList srcFiles: [ + "encoder/**/*.cpp", + "transcoder/**/*.cpp", + "zstd/zstd.c" + ] + + property stringList incPaths: [ + "encoder" + ] + + StaticLibrary { + name: "basisu" + condition: basisu.desktop + + files: basisu.srcFiles + Depends { name: "cpp" } + Depends { name: "bundle" } + bundle.isBundle: false + + cpp.defines: [ ] + cpp.includePaths: basisu.incPaths + cpp.cxxLanguageVersion: basisu.languageVersion + cpp.cxxStandardLibrary: basisu.standardLibrary + } +} diff --git a/thirdparty/basisu/encoder/3rdparty/android_astc_decomp.cpp b/thirdparty/basisu/encoder/3rdparty/android_astc_decomp.cpp new file mode 100644 index 000000000..a667d0d63 --- /dev/null +++ b/thirdparty/basisu/encoder/3rdparty/android_astc_decomp.cpp @@ -0,0 +1,2060 @@ +// File: android_astc_decomp.cpp + +/*------------------------------------------------------------------------- + * drawElements Quality Program Tester Core + * ---------------------------------------- + * + * Copyright 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * rg: Removed external dependencies, minor fix to decompress() so it converts non-sRGB + * output to 8-bits correctly. I've compared this decoder's output + * vs. astc-codec with random inputs. + * + *//*! + * \file + * \brief ASTC Utilities. + *//*--------------------------------------------------------------------*/ +#include "android_astc_decomp.h" +#include +#include +#include +#include + +#define DE_LENGTH_OF_ARRAY(x) (sizeof(x)/sizeof(x[0])) +#define DE_UNREF(x) (void)x + +typedef uint8_t deUint8; +typedef int8_t deInt8; +typedef uint32_t deUint32; +typedef int32_t deInt32; +typedef uint16_t deUint16; +typedef int16_t deInt16; +typedef int64_t deInt64; +typedef uint64_t deUint64; + +#define DE_ASSERT assert + +#ifdef _MSC_VER +#pragma warning (disable:4505) // unreferenced local function has been removed +#elif defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-function" +#endif + +namespace basisu_astc +{ + template inline S maximum(S a, S b) { return (a > b) ? a : b; } + template inline S maximum(S a, S b, S c) { return maximum(maximum(a, b), c); } + template inline S maximum(S a, S b, S c, S d) { return maximum(maximum(maximum(a, b), c), d); } + + static bool inBounds(int v, int l, int h) + { + return (v >= l) && (v < h); + } + + static bool inRange(int v, int l, int h) + { + return (v >= l) && (v <= h); + } + + template + static inline T max(T a, T b) + { + return (a > b) ? a : b; + } + + template + static inline T min(T a, T b) + { + return (a < b) ? a : b; + } + + template + static inline T clamp(T a, T l, T h) + { + if (a < l) + return l; + else if (a > h) + return h; + return a; + } + + struct UVec4 + { + uint32_t m_c[4]; + + UVec4() + { + m_c[0] = 0; + m_c[1] = 0; + m_c[2] = 0; + m_c[3] = 0; + } + + UVec4(uint32_t x, uint32_t y, uint32_t z, uint32_t w) + { + m_c[0] = x; + m_c[1] = y; + m_c[2] = z; + m_c[3] = w; + } + + uint32_t x() const { return m_c[0]; } + uint32_t y() const { return m_c[1]; } + uint32_t z() const { return m_c[2]; } + uint32_t w() const { return m_c[3]; } + + uint32_t& x() { return m_c[0]; } + uint32_t& y() { return m_c[1]; } + uint32_t& z() { return m_c[2]; } + uint32_t& w() { return m_c[3]; } + + uint32_t operator[] (uint32_t idx) const { assert(idx < 4); return m_c[idx]; } + uint32_t& operator[] (uint32_t idx) { assert(idx < 4); return m_c[idx]; } + }; + + struct IVec4 + { + int32_t m_c[4]; + + IVec4() + { + m_c[0] = 0; + m_c[1] = 0; + m_c[2] = 0; + m_c[3] = 0; + } + + IVec4(int32_t x, int32_t y, int32_t z, int32_t w) + { + m_c[0] = x; + m_c[1] = y; + m_c[2] = z; + m_c[3] = w; + } + + int32_t x() const { return m_c[0]; } + int32_t y() const { return m_c[1]; } + int32_t z() const { return m_c[2]; } + int32_t w() const { return m_c[3]; } + + int32_t& x() { return m_c[0]; } + int32_t& y() { return m_c[1]; } + int32_t& z() { return m_c[2]; } + int32_t& w() { return m_c[3]; } + + UVec4 asUint() const + { + return UVec4(maximum(0, m_c[0]), maximum(0, m_c[1]), maximum(0, m_c[2]), maximum(0, m_c[3])); + } + + int32_t operator[] (uint32_t idx) const { assert(idx < 4); return m_c[idx]; } + int32_t& operator[] (uint32_t idx) { assert(idx < 4); return m_c[idx]; } + }; + + struct IVec3 + { + int32_t m_c[3]; + + IVec3() + { + m_c[0] = 0; + m_c[1] = 0; + m_c[2] = 0; + } + + IVec3(int32_t x, int32_t y, int32_t z) + { + m_c[0] = x; + m_c[1] = y; + m_c[2] = z; + } + + int32_t x() const { return m_c[0]; } + int32_t y() const { return m_c[1]; } + int32_t z() const { return m_c[2]; } + + int32_t& x() { return m_c[0]; } + int32_t& y() { return m_c[1]; } + int32_t& z() { return m_c[2]; } + + int32_t operator[] (uint32_t idx) const { assert(idx < 3); return m_c[idx]; } + int32_t& operator[] (uint32_t idx) { assert(idx < 3); return m_c[idx]; } + }; + + static uint32_t deDivRoundUp32(uint32_t a, uint32_t b) + { + return (a + b - 1) / b; + } + + static bool deInBounds32(uint32_t v, uint32_t l, uint32_t h) + { + return (v >= l) && (v < h); + } + +namespace astc +{ + +using std::vector; + +namespace +{ + +// Common utilities +enum +{ + MAX_BLOCK_WIDTH = 12, + MAX_BLOCK_HEIGHT = 12 +}; + +inline deUint32 getBit (deUint32 src, int ndx) +{ + DE_ASSERT(basisu_astc::inBounds(ndx, 0, 32)); + return (src >> ndx) & 1; +} + +inline deUint32 getBits (deUint32 src, int low, int high) +{ + const int numBits = (high-low) + 1; + DE_ASSERT(basisu_astc::inRange(numBits, 1, 32)); + + if (numBits < 32) + return (deUint32)((src >> low) & ((1u<> low) & 0xFFFFFFFFu); +} + +inline bool isBitSet (deUint32 src, int ndx) +{ + return getBit(src, ndx) != 0; +} + +inline deUint32 reverseBits (deUint32 src, int numBits) +{ + DE_ASSERT(basisu_astc::inRange(numBits, 0, 32)); + + deUint32 result = 0; + for (int i = 0; i < numBits; i++) + result |= ((src >> i) & 1) << (numBits-1-i); + + return result; +} + +inline deUint32 bitReplicationScale (deUint32 src, int numSrcBits, int numDstBits) +{ + DE_ASSERT(numSrcBits <= numDstBits); + DE_ASSERT((src & ((1< -numSrcBits; shift -= numSrcBits) + dst |= (shift >= 0) ? (src << shift) : (src >> -shift); + + return dst; +} + +inline deInt32 signExtend (deInt32 src, int numSrcBits) +{ + DE_ASSERT(basisu_astc::inRange(numSrcBits, 2, 31)); + + const bool negative = (src & (1 << (numSrcBits-1))) != 0; + return src | (negative ? ~((1 << numSrcBits) - 1) : 0); +} + +typedef uint16_t deFloat16; + +inline bool isFloat16InfOrNan (deFloat16 v) +{ + return getBits(v, 10, 14) == 31; +} + +float deFloat16To32(deFloat16 val16) +{ + deUint32 sign; + deUint32 expotent; + deUint32 mantissa; + + union + { + float f; + deUint32 u; + } x; + + x.u = 0u; + + sign = ((deUint32)val16 >> 15u) & 0x00000001u; + expotent = ((deUint32)val16 >> 10u) & 0x0000001fu; + mantissa = (deUint32)val16 & 0x000003ffu; + + if (expotent == 0u) + { + if (mantissa == 0u) + { + /* +/- 0 */ + x.u = sign << 31u; + return x.f; + } + else + { + /* Denormalized, normalize it. */ + + while (!(mantissa & 0x00000400u)) + { + mantissa <<= 1u; + expotent -= 1u; + } + + expotent += 1u; + mantissa &= ~0x00000400u; + } + } + else if (expotent == 31u) + { + if (mantissa == 0u) + { + /* +/- InF */ + x.u = (sign << 31u) | 0x7f800000u; + return x.f; + } + else + { + /* +/- NaN */ + x.u = (sign << 31u) | 0x7f800000u | (mantissa << 13u); + return x.f; + } + } + + expotent = expotent + (127u - 15u); + mantissa = mantissa << 13u; + + x.u = (sign << 31u) | (expotent << 23u) | mantissa; + return x.f; +} + +enum ISEMode +{ + ISEMODE_TRIT = 0, + ISEMODE_QUINT, + ISEMODE_PLAIN_BIT, + ISEMODE_LAST +}; + +struct ISEParams +{ + ISEMode mode; + int numBits; + ISEParams (ISEMode mode_, int numBits_) : mode(mode_), numBits(numBits_) {} +}; + +inline int computeNumRequiredBits (const ISEParams& iseParams, int numValues) +{ + switch (iseParams.mode) + { + case ISEMODE_TRIT: return deDivRoundUp32(numValues*8, 5) + numValues*iseParams.numBits; + case ISEMODE_QUINT: return deDivRoundUp32(numValues*7, 3) + numValues*iseParams.numBits; + case ISEMODE_PLAIN_BIT: return numValues*iseParams.numBits; + default: + DE_ASSERT(false); + return -1; + } +} + +ISEParams computeMaximumRangeISEParams (int numAvailableBits, int numValuesInSequence) +{ + int curBitsForTritMode = 6; + int curBitsForQuintMode = 5; + int curBitsForPlainBitMode = 8; + + while (true) + { + DE_ASSERT(curBitsForTritMode > 0 || curBitsForQuintMode > 0 || curBitsForPlainBitMode > 0); + const int tritRange = (curBitsForTritMode > 0) ? (3 << curBitsForTritMode) - 1 : -1; + const int quintRange = (curBitsForQuintMode > 0) ? (5 << curBitsForQuintMode) - 1 : -1; + const int plainBitRange = (curBitsForPlainBitMode > 0) ? (1 << curBitsForPlainBitMode) - 1 : -1; + const int maxRange = basisu_astc::max(basisu_astc::max(tritRange, quintRange), plainBitRange); + + if (maxRange == tritRange) + { + const ISEParams params(ISEMODE_TRIT, curBitsForTritMode); + + if (computeNumRequiredBits(params, numValuesInSequence) <= numAvailableBits) + return ISEParams(ISEMODE_TRIT, curBitsForTritMode); + + curBitsForTritMode--; + } + else if (maxRange == quintRange) + { + const ISEParams params(ISEMODE_QUINT, curBitsForQuintMode); + + if (computeNumRequiredBits(params, numValuesInSequence) <= numAvailableBits) + return ISEParams(ISEMODE_QUINT, curBitsForQuintMode); + + curBitsForQuintMode--; + } + else + { + const ISEParams params(ISEMODE_PLAIN_BIT, curBitsForPlainBitMode); + DE_ASSERT(maxRange == plainBitRange); + + if (computeNumRequiredBits(params, numValuesInSequence) <= numAvailableBits) + return ISEParams(ISEMODE_PLAIN_BIT, curBitsForPlainBitMode); + + curBitsForPlainBitMode--; + } + } +} + +inline int computeNumColorEndpointValues (deUint32 endpointMode) +{ + DE_ASSERT(endpointMode < 16); + return (endpointMode/4 + 1) * 2; +} + +// Decompression utilities +enum DecompressResult +{ + DECOMPRESS_RESULT_VALID_BLOCK = 0, //!< Decompressed valid block + DECOMPRESS_RESULT_ERROR, //!< Encountered error while decompressing, error color written + DECOMPRESS_RESULT_LAST +}; + +// A helper for getting bits from a 128-bit block. +class Block128 +{ +private: + typedef deUint64 Word; + + enum + { + WORD_BYTES = sizeof(Word), + WORD_BITS = 8*WORD_BYTES, + NUM_WORDS = 128 / WORD_BITS + }; + //DE_STATIC_ASSERT(128 % WORD_BITS == 0); + +public: + Block128 (const deUint8* src) + { + for (int wordNdx = 0; wordNdx < NUM_WORDS; wordNdx++) + { + m_words[wordNdx] = 0; + for (int byteNdx = 0; byteNdx < WORD_BYTES; byteNdx++) + m_words[wordNdx] |= (Word)src[wordNdx*WORD_BYTES + byteNdx] << (8*byteNdx); + } + } + + deUint32 getBit (int ndx) const + { + DE_ASSERT(basisu_astc::inBounds(ndx, 0, 128)); + return (m_words[ndx / WORD_BITS] >> (ndx % WORD_BITS)) & 1; + } + + deUint32 getBits (int low, int high) const + { + DE_ASSERT(basisu_astc::inBounds(low, 0, 128)); + DE_ASSERT(basisu_astc::inBounds(high, 0, 128)); + DE_ASSERT(basisu_astc::inRange(high-low+1, 0, 32)); + + if (high-low+1 == 0) + return 0; + + const int word0Ndx = low / WORD_BITS; + const int word1Ndx = high / WORD_BITS; + // \note "foo << bar << 1" done instead of "foo << (bar+1)" to avoid overflow, i.e. shift amount being too big. + if (word0Ndx == word1Ndx) + return (deUint32)((m_words[word0Ndx] & ((((Word)1 << high%WORD_BITS << 1) - 1))) >> ((Word)low % WORD_BITS)); + else + { + DE_ASSERT(word1Ndx == word0Ndx + 1); + return (deUint32)(m_words[word0Ndx] >> (low%WORD_BITS)) | + (deUint32)((m_words[word1Ndx] & (((Word)1 << high%WORD_BITS << 1) - 1)) << (high-low - high%WORD_BITS)); + } + } + + bool isBitSet (int ndx) const + { + DE_ASSERT(basisu_astc::inBounds(ndx, 0, 128)); + return getBit(ndx) != 0; + } + +private: + Word m_words[NUM_WORDS]; +}; + +// A helper for sequential access into a Block128. +class BitAccessStream +{ +public: + BitAccessStream (const Block128& src, int startNdxInSrc, int length, bool forward) + : m_src (src) + , m_startNdxInSrc (startNdxInSrc) + , m_length (length) + , m_forward (forward) + , m_ndx (0) + { + } + + // Get the next num bits. Bits at positions greater than or equal to m_length are zeros. + deUint32 getNext (int num) + { + if (num == 0 || m_ndx >= m_length) + return 0; + const int end = m_ndx + num; + const int numBitsFromSrc = basisu_astc::max(0, basisu_astc::min(m_length, end) - m_ndx); + const int low = m_ndx; + const int high = m_ndx + numBitsFromSrc - 1; + + m_ndx += num; + + return m_forward ? m_src.getBits(m_startNdxInSrc + low, m_startNdxInSrc + high) + : reverseBits(m_src.getBits(m_startNdxInSrc - high, m_startNdxInSrc - low), numBitsFromSrc); + } + +private: + const Block128& m_src; + const int m_startNdxInSrc; + const int m_length; + const bool m_forward; + int m_ndx; +}; + +struct ISEDecodedResult +{ + deUint32 m; + deUint32 tq; //!< Trit or quint value, depending on ISE mode. + deUint32 v; +}; + +// Data from an ASTC block's "block mode" part (i.e. bits [0,10]). +struct ASTCBlockMode +{ + bool isError; + // \note Following fields only relevant if !isError. + bool isVoidExtent; + // \note Following fields only relevant if !isVoidExtent. + bool isDualPlane; + int weightGridWidth; + int weightGridHeight; + ISEParams weightISEParams; + + ASTCBlockMode (void) + : isError (true) + , isVoidExtent (true) + , isDualPlane (true) + , weightGridWidth (-1) + , weightGridHeight (-1) + , weightISEParams (ISEMODE_LAST, -1) + { + } +}; + +inline int computeNumWeights (const ASTCBlockMode& mode) +{ + return mode.weightGridWidth * mode.weightGridHeight * (mode.isDualPlane ? 2 : 1); +} + +struct ColorEndpointPair +{ + UVec4 e0; + UVec4 e1; +}; + +struct TexelWeightPair +{ + deUint32 w[2]; +}; + +ASTCBlockMode getASTCBlockMode (deUint32 blockModeData) +{ + ASTCBlockMode blockMode; + blockMode.isError = true; // \note Set to false later, if not error. + blockMode.isVoidExtent = getBits(blockModeData, 0, 8) == 0x1fc; + if (!blockMode.isVoidExtent) + { + if ((getBits(blockModeData, 0, 1) == 0 && getBits(blockModeData, 6, 8) == 7) || getBits(blockModeData, 0, 3) == 0) + return blockMode; // Invalid ("reserved"). + + deUint32 r = (deUint32)-1; // \note Set in the following branches. + + if (getBits(blockModeData, 0, 1) == 0) + { + const deUint32 r0 = getBit(blockModeData, 4); + const deUint32 r1 = getBit(blockModeData, 2); + const deUint32 r2 = getBit(blockModeData, 3); + const deUint32 i78 = getBits(blockModeData, 7, 8); + + r = (r2 << 2) | (r1 << 1) | (r0 << 0); + + if (i78 == 3) + { + const bool i5 = isBitSet(blockModeData, 5); + blockMode.weightGridWidth = i5 ? 10 : 6; + blockMode.weightGridHeight = i5 ? 6 : 10; + } + else + { + const deUint32 a = getBits(blockModeData, 5, 6); + + switch (i78) + { + case 0: blockMode.weightGridWidth = 12; blockMode.weightGridHeight = a + 2; break; + case 1: blockMode.weightGridWidth = a + 2; blockMode.weightGridHeight = 12; break; + case 2: blockMode.weightGridWidth = a + 6; blockMode.weightGridHeight = getBits(blockModeData, 9, 10) + 6; break; + default: DE_ASSERT(false); + } + } + } + else + { + const deUint32 r0 = getBit(blockModeData, 4); + const deUint32 r1 = getBit(blockModeData, 0); + const deUint32 r2 = getBit(blockModeData, 1); + const deUint32 i23 = getBits(blockModeData, 2, 3); + const deUint32 a = getBits(blockModeData, 5, 6); + + r = (r2 << 2) | (r1 << 1) | (r0 << 0); + if (i23 == 3) + { + const deUint32 b = getBit(blockModeData, 7); + const bool i8 = isBitSet(blockModeData, 8); + blockMode.weightGridWidth = i8 ? b+2 : a+2; + blockMode.weightGridHeight = i8 ? a+2 : b+6; + } + else + { + const deUint32 b = getBits(blockModeData, 7, 8); + switch (i23) + { + case 0: blockMode.weightGridWidth = b + 4; blockMode.weightGridHeight = a + 2; break; + case 1: blockMode.weightGridWidth = b + 8; blockMode.weightGridHeight = a + 2; break; + case 2: blockMode.weightGridWidth = a + 2; blockMode.weightGridHeight = b + 8; break; + default: DE_ASSERT(false); + } + } + } + + const bool zeroDH = getBits(blockModeData, 0, 1) == 0 && getBits(blockModeData, 7, 8) == 2; + const bool h = zeroDH ? 0 : isBitSet(blockModeData, 9); + blockMode.isDualPlane = zeroDH ? 0 : isBitSet(blockModeData, 10); + + { + ISEMode& m = blockMode.weightISEParams.mode; + int& b = blockMode.weightISEParams.numBits; + m = ISEMODE_PLAIN_BIT; + b = 0; + if (h) + { + switch (r) + { + case 2: m = ISEMODE_QUINT; b = 1; break; + case 3: m = ISEMODE_TRIT; b = 2; break; + case 4: b = 4; break; + case 5: m = ISEMODE_QUINT; b = 2; break; + case 6: m = ISEMODE_TRIT; b = 3; break; + case 7: b = 5; break; + default: DE_ASSERT(false); + } + } + else + { + switch (r) + { + case 2: b = 1; break; + case 3: m = ISEMODE_TRIT; break; + case 4: b = 2; break; + case 5: m = ISEMODE_QUINT; break; + case 6: m = ISEMODE_TRIT; b = 1; break; + case 7: b = 3; break; + default: DE_ASSERT(false); + } + } + } + } + + blockMode.isError = false; + return blockMode; +} + +inline void setASTCErrorColorBlock (void* dst, int blockWidth, int blockHeight, bool isSRGB) +{ + if (isSRGB) + { + deUint8* const dstU = (deUint8*)dst; + for (int i = 0; i < blockWidth*blockHeight; i++) + { + dstU[4*i + 0] = 0xff; + dstU[4*i + 1] = 0; + dstU[4*i + 2] = 0xff; + dstU[4*i + 3] = 0xff; + } + } + else + { + float* const dstF = (float*)dst; + for (int i = 0; i < blockWidth*blockHeight; i++) + { + dstF[4*i + 0] = 1.0f; + dstF[4*i + 1] = 0.0f; + dstF[4*i + 2] = 1.0f; + dstF[4*i + 3] = 1.0f; + } + } +} + +DecompressResult decodeVoidExtentBlock (void* dst, const Block128& blockData, int blockWidth, int blockHeight, bool isSRGB, bool isLDRMode) +{ + const deUint32 minSExtent = blockData.getBits(12, 24); + const deUint32 maxSExtent = blockData.getBits(25, 37); + const deUint32 minTExtent = blockData.getBits(38, 50); + const deUint32 maxTExtent = blockData.getBits(51, 63); + const bool allExtentsAllOnes = (minSExtent == 0x1fff) && (maxSExtent == 0x1fff) && (minTExtent == 0x1fff) && (maxTExtent == 0x1fff); + const bool isHDRBlock = blockData.isBitSet(9); + + if ((isLDRMode && isHDRBlock) || (!allExtentsAllOnes && (minSExtent >= maxSExtent || minTExtent >= maxTExtent))) + { + setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB); + return DECOMPRESS_RESULT_ERROR; + } + + const deUint32 rgba[4] = + { + blockData.getBits(64, 79), + blockData.getBits(80, 95), + blockData.getBits(96, 111), + blockData.getBits(112, 127) + }; + + if (isSRGB) + { + deUint8* const dstU = (deUint8*)dst; + for (int i = 0; i < blockWidth * blockHeight; i++) + { + for (int c = 0; c < 4; c++) + dstU[i * 4 + c] = (deUint8)((rgba[c] & 0xff00) >> 8); + } + } + else + { + float* const dstF = (float*)dst; + + if (isHDRBlock) + { + for (int c = 0; c < 4; c++) + { + if (isFloat16InfOrNan((deFloat16)rgba[c])) + { + //throw InternalError("Infinity or NaN color component in HDR void extent block in ASTC texture (behavior undefined by ASTC specification)"); + setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB); + return DECOMPRESS_RESULT_ERROR; + } + } + + for (int i = 0; i < blockWidth * blockHeight; i++) + { + for (int c = 0; c < 4; c++) + dstF[i * 4 + c] = deFloat16To32((deFloat16)rgba[c]); + } + } + else + { + for (int i = 0; i < blockWidth * blockHeight; i++) + { + for (int c = 0; c < 4; c++) + dstF[i * 4 + c] = (rgba[c] == 65535) ? 1.0f : ((float)rgba[c] / 65536.0f); + } + } + } + + return DECOMPRESS_RESULT_VALID_BLOCK; +} + +void decodeColorEndpointModes (deUint32* endpointModesDst, const Block128& blockData, int numPartitions, int extraCemBitsStart) +{ + if (numPartitions == 1) + endpointModesDst[0] = blockData.getBits(13, 16); + else + { + const deUint32 highLevelSelector = blockData.getBits(23, 24); + + if (highLevelSelector == 0) + { + const deUint32 mode = blockData.getBits(25, 28); + + for (int i = 0; i < numPartitions; i++) + endpointModesDst[i] = mode; + } + else + { + for (int partNdx = 0; partNdx < numPartitions; partNdx++) + { + const deUint32 cemClass = highLevelSelector - (blockData.isBitSet(25 + partNdx) ? 0 : 1); + const deUint32 lowBit0Ndx = numPartitions + 2*partNdx; + const deUint32 lowBit1Ndx = numPartitions + 2*partNdx + 1; + const deUint32 lowBit0 = blockData.getBit(lowBit0Ndx < 4 ? 25+lowBit0Ndx : extraCemBitsStart+lowBit0Ndx-4); + const deUint32 lowBit1 = blockData.getBit(lowBit1Ndx < 4 ? 25+lowBit1Ndx : extraCemBitsStart+lowBit1Ndx-4); + + endpointModesDst[partNdx] = (cemClass << 2) | (lowBit1 << 1) | lowBit0; + } + } + } +} + +int computeNumColorEndpointValues (const deUint32* endpointModes, int numPartitions) +{ + int result = 0; + + for (int i = 0; i < numPartitions; i++) + result += computeNumColorEndpointValues(endpointModes[i]); + + return result; +} + +void decodeISETritBlock (ISEDecodedResult* dst, int numValues, BitAccessStream& data, int numBits) +{ + DE_ASSERT(basisu_astc::inRange(numValues, 1, 5)); + + deUint32 m[5]; + m[0] = data.getNext(numBits); + deUint32 T01 = data.getNext(2); + m[1] = data.getNext(numBits); + deUint32 T23 = data.getNext(2); + m[2] = data.getNext(numBits); + deUint32 T4 = data.getNext(1); + m[3] = data.getNext(numBits); + deUint32 T56 = data.getNext(2); + m[4] = data.getNext(numBits); + deUint32 T7 = data.getNext(1); + +#ifndef __EMSCRIPTEN__ +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wimplicit-fallthrough=" +#endif +#endif + switch (numValues) + { + // \note Fall-throughs. + case 1: T23 = 0; + case 2: T4 = 0; + case 3: T56 = 0; + case 4: T7 = 0; + case 5: break; + default: + DE_ASSERT(false); + } +#ifndef __EMSCRIPTEN__ +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif +#endif + + const deUint32 T = (T7 << 7) | (T56 << 5) | (T4 << 4) | (T23 << 2) | (T01 << 0); + + static const deUint32 tritsFromT[256][5] = + { + { 0,0,0,0,0 }, { 1,0,0,0,0 }, { 2,0,0,0,0 }, { 0,0,2,0,0 }, { 0,1,0,0,0 }, { 1,1,0,0,0 }, { 2,1,0,0,0 }, { 1,0,2,0,0 }, { 0,2,0,0,0 }, { 1,2,0,0,0 }, { 2,2,0,0,0 }, { 2,0,2,0,0 }, { 0,2,2,0,0 }, { 1,2,2,0,0 }, { 2,2,2,0,0 }, { 2,0,2,0,0 }, + { 0,0,1,0,0 }, { 1,0,1,0,0 }, { 2,0,1,0,0 }, { 0,1,2,0,0 }, { 0,1,1,0,0 }, { 1,1,1,0,0 }, { 2,1,1,0,0 }, { 1,1,2,0,0 }, { 0,2,1,0,0 }, { 1,2,1,0,0 }, { 2,2,1,0,0 }, { 2,1,2,0,0 }, { 0,0,0,2,2 }, { 1,0,0,2,2 }, { 2,0,0,2,2 }, { 0,0,2,2,2 }, + { 0,0,0,1,0 }, { 1,0,0,1,0 }, { 2,0,0,1,0 }, { 0,0,2,1,0 }, { 0,1,0,1,0 }, { 1,1,0,1,0 }, { 2,1,0,1,0 }, { 1,0,2,1,0 }, { 0,2,0,1,0 }, { 1,2,0,1,0 }, { 2,2,0,1,0 }, { 2,0,2,1,0 }, { 0,2,2,1,0 }, { 1,2,2,1,0 }, { 2,2,2,1,0 }, { 2,0,2,1,0 }, + { 0,0,1,1,0 }, { 1,0,1,1,0 }, { 2,0,1,1,0 }, { 0,1,2,1,0 }, { 0,1,1,1,0 }, { 1,1,1,1,0 }, { 2,1,1,1,0 }, { 1,1,2,1,0 }, { 0,2,1,1,0 }, { 1,2,1,1,0 }, { 2,2,1,1,0 }, { 2,1,2,1,0 }, { 0,1,0,2,2 }, { 1,1,0,2,2 }, { 2,1,0,2,2 }, { 1,0,2,2,2 }, + { 0,0,0,2,0 }, { 1,0,0,2,0 }, { 2,0,0,2,0 }, { 0,0,2,2,0 }, { 0,1,0,2,0 }, { 1,1,0,2,0 }, { 2,1,0,2,0 }, { 1,0,2,2,0 }, { 0,2,0,2,0 }, { 1,2,0,2,0 }, { 2,2,0,2,0 }, { 2,0,2,2,0 }, { 0,2,2,2,0 }, { 1,2,2,2,0 }, { 2,2,2,2,0 }, { 2,0,2,2,0 }, + { 0,0,1,2,0 }, { 1,0,1,2,0 }, { 2,0,1,2,0 }, { 0,1,2,2,0 }, { 0,1,1,2,0 }, { 1,1,1,2,0 }, { 2,1,1,2,0 }, { 1,1,2,2,0 }, { 0,2,1,2,0 }, { 1,2,1,2,0 }, { 2,2,1,2,0 }, { 2,1,2,2,0 }, { 0,2,0,2,2 }, { 1,2,0,2,2 }, { 2,2,0,2,2 }, { 2,0,2,2,2 }, + { 0,0,0,0,2 }, { 1,0,0,0,2 }, { 2,0,0,0,2 }, { 0,0,2,0,2 }, { 0,1,0,0,2 }, { 1,1,0,0,2 }, { 2,1,0,0,2 }, { 1,0,2,0,2 }, { 0,2,0,0,2 }, { 1,2,0,0,2 }, { 2,2,0,0,2 }, { 2,0,2,0,2 }, { 0,2,2,0,2 }, { 1,2,2,0,2 }, { 2,2,2,0,2 }, { 2,0,2,0,2 }, + { 0,0,1,0,2 }, { 1,0,1,0,2 }, { 2,0,1,0,2 }, { 0,1,2,0,2 }, { 0,1,1,0,2 }, { 1,1,1,0,2 }, { 2,1,1,0,2 }, { 1,1,2,0,2 }, { 0,2,1,0,2 }, { 1,2,1,0,2 }, { 2,2,1,0,2 }, { 2,1,2,0,2 }, { 0,2,2,2,2 }, { 1,2,2,2,2 }, { 2,2,2,2,2 }, { 2,0,2,2,2 }, + { 0,0,0,0,1 }, { 1,0,0,0,1 }, { 2,0,0,0,1 }, { 0,0,2,0,1 }, { 0,1,0,0,1 }, { 1,1,0,0,1 }, { 2,1,0,0,1 }, { 1,0,2,0,1 }, { 0,2,0,0,1 }, { 1,2,0,0,1 }, { 2,2,0,0,1 }, { 2,0,2,0,1 }, { 0,2,2,0,1 }, { 1,2,2,0,1 }, { 2,2,2,0,1 }, { 2,0,2,0,1 }, + { 0,0,1,0,1 }, { 1,0,1,0,1 }, { 2,0,1,0,1 }, { 0,1,2,0,1 }, { 0,1,1,0,1 }, { 1,1,1,0,1 }, { 2,1,1,0,1 }, { 1,1,2,0,1 }, { 0,2,1,0,1 }, { 1,2,1,0,1 }, { 2,2,1,0,1 }, { 2,1,2,0,1 }, { 0,0,1,2,2 }, { 1,0,1,2,2 }, { 2,0,1,2,2 }, { 0,1,2,2,2 }, + { 0,0,0,1,1 }, { 1,0,0,1,1 }, { 2,0,0,1,1 }, { 0,0,2,1,1 }, { 0,1,0,1,1 }, { 1,1,0,1,1 }, { 2,1,0,1,1 }, { 1,0,2,1,1 }, { 0,2,0,1,1 }, { 1,2,0,1,1 }, { 2,2,0,1,1 }, { 2,0,2,1,1 }, { 0,2,2,1,1 }, { 1,2,2,1,1 }, { 2,2,2,1,1 }, { 2,0,2,1,1 }, + { 0,0,1,1,1 }, { 1,0,1,1,1 }, { 2,0,1,1,1 }, { 0,1,2,1,1 }, { 0,1,1,1,1 }, { 1,1,1,1,1 }, { 2,1,1,1,1 }, { 1,1,2,1,1 }, { 0,2,1,1,1 }, { 1,2,1,1,1 }, { 2,2,1,1,1 }, { 2,1,2,1,1 }, { 0,1,1,2,2 }, { 1,1,1,2,2 }, { 2,1,1,2,2 }, { 1,1,2,2,2 }, + { 0,0,0,2,1 }, { 1,0,0,2,1 }, { 2,0,0,2,1 }, { 0,0,2,2,1 }, { 0,1,0,2,1 }, { 1,1,0,2,1 }, { 2,1,0,2,1 }, { 1,0,2,2,1 }, { 0,2,0,2,1 }, { 1,2,0,2,1 }, { 2,2,0,2,1 }, { 2,0,2,2,1 }, { 0,2,2,2,1 }, { 1,2,2,2,1 }, { 2,2,2,2,1 }, { 2,0,2,2,1 }, + { 0,0,1,2,1 }, { 1,0,1,2,1 }, { 2,0,1,2,1 }, { 0,1,2,2,1 }, { 0,1,1,2,1 }, { 1,1,1,2,1 }, { 2,1,1,2,1 }, { 1,1,2,2,1 }, { 0,2,1,2,1 }, { 1,2,1,2,1 }, { 2,2,1,2,1 }, { 2,1,2,2,1 }, { 0,2,1,2,2 }, { 1,2,1,2,2 }, { 2,2,1,2,2 }, { 2,1,2,2,2 }, + { 0,0,0,1,2 }, { 1,0,0,1,2 }, { 2,0,0,1,2 }, { 0,0,2,1,2 }, { 0,1,0,1,2 }, { 1,1,0,1,2 }, { 2,1,0,1,2 }, { 1,0,2,1,2 }, { 0,2,0,1,2 }, { 1,2,0,1,2 }, { 2,2,0,1,2 }, { 2,0,2,1,2 }, { 0,2,2,1,2 }, { 1,2,2,1,2 }, { 2,2,2,1,2 }, { 2,0,2,1,2 }, + { 0,0,1,1,2 }, { 1,0,1,1,2 }, { 2,0,1,1,2 }, { 0,1,2,1,2 }, { 0,1,1,1,2 }, { 1,1,1,1,2 }, { 2,1,1,1,2 }, { 1,1,2,1,2 }, { 0,2,1,1,2 }, { 1,2,1,1,2 }, { 2,2,1,1,2 }, { 2,1,2,1,2 }, { 0,2,2,2,2 }, { 1,2,2,2,2 }, { 2,2,2,2,2 }, { 2,1,2,2,2 } + }; + + const deUint32 (& trits)[5] = tritsFromT[T]; + for (int i = 0; i < numValues; i++) + { + dst[i].m = m[i]; + dst[i].tq = trits[i]; + dst[i].v = (trits[i] << numBits) + m[i]; + } +} + +void decodeISEQuintBlock (ISEDecodedResult* dst, int numValues, BitAccessStream& data, int numBits) +{ + DE_ASSERT(basisu_astc::inRange(numValues, 1, 3)); + + deUint32 m[3]; + m[0] = data.getNext(numBits); + deUint32 Q012 = data.getNext(3); + m[1] = data.getNext(numBits); + deUint32 Q34 = data.getNext(2); + m[2] = data.getNext(numBits); + deUint32 Q56 = data.getNext(2); + +#ifndef __EMSCRIPTEN__ +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wimplicit-fallthrough=" +#endif +#endif + switch (numValues) + { + // \note Fall-throughs. + case 1: Q34 = 0; + case 2: Q56 = 0; + case 3: break; + default: + DE_ASSERT(false); + } +#ifndef __EMSCRIPTEN__ +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif +#endif + + const deUint32 Q = (Q56 << 5) | (Q34 << 3) | (Q012 << 0); + + static const deUint32 quintsFromQ[256][3] = + { + { 0,0,0 }, { 1,0,0 }, { 2,0,0 }, { 3,0,0 }, { 4,0,0 }, { 0,4,0 }, { 4,4,0 }, { 4,4,4 }, { 0,1,0 }, { 1,1,0 }, { 2,1,0 }, { 3,1,0 }, { 4,1,0 }, { 1,4,0 }, { 4,4,1 }, { 4,4,4 }, + { 0,2,0 }, { 1,2,0 }, { 2,2,0 }, { 3,2,0 }, { 4,2,0 }, { 2,4,0 }, { 4,4,2 }, { 4,4,4 }, { 0,3,0 }, { 1,3,0 }, { 2,3,0 }, { 3,3,0 }, { 4,3,0 }, { 3,4,0 }, { 4,4,3 }, { 4,4,4 }, + { 0,0,1 }, { 1,0,1 }, { 2,0,1 }, { 3,0,1 }, { 4,0,1 }, { 0,4,1 }, { 4,0,4 }, { 0,4,4 }, { 0,1,1 }, { 1,1,1 }, { 2,1,1 }, { 3,1,1 }, { 4,1,1 }, { 1,4,1 }, { 4,1,4 }, { 1,4,4 }, + { 0,2,1 }, { 1,2,1 }, { 2,2,1 }, { 3,2,1 }, { 4,2,1 }, { 2,4,1 }, { 4,2,4 }, { 2,4,4 }, { 0,3,1 }, { 1,3,1 }, { 2,3,1 }, { 3,3,1 }, { 4,3,1 }, { 3,4,1 }, { 4,3,4 }, { 3,4,4 }, + { 0,0,2 }, { 1,0,2 }, { 2,0,2 }, { 3,0,2 }, { 4,0,2 }, { 0,4,2 }, { 2,0,4 }, { 3,0,4 }, { 0,1,2 }, { 1,1,2 }, { 2,1,2 }, { 3,1,2 }, { 4,1,2 }, { 1,4,2 }, { 2,1,4 }, { 3,1,4 }, + { 0,2,2 }, { 1,2,2 }, { 2,2,2 }, { 3,2,2 }, { 4,2,2 }, { 2,4,2 }, { 2,2,4 }, { 3,2,4 }, { 0,3,2 }, { 1,3,2 }, { 2,3,2 }, { 3,3,2 }, { 4,3,2 }, { 3,4,2 }, { 2,3,4 }, { 3,3,4 }, + { 0,0,3 }, { 1,0,3 }, { 2,0,3 }, { 3,0,3 }, { 4,0,3 }, { 0,4,3 }, { 0,0,4 }, { 1,0,4 }, { 0,1,3 }, { 1,1,3 }, { 2,1,3 }, { 3,1,3 }, { 4,1,3 }, { 1,4,3 }, { 0,1,4 }, { 1,1,4 }, + { 0,2,3 }, { 1,2,3 }, { 2,2,3 }, { 3,2,3 }, { 4,2,3 }, { 2,4,3 }, { 0,2,4 }, { 1,2,4 }, { 0,3,3 }, { 1,3,3 }, { 2,3,3 }, { 3,3,3 }, { 4,3,3 }, { 3,4,3 }, { 0,3,4 }, { 1,3,4 } + }; + + const deUint32 (& quints)[3] = quintsFromQ[Q]; + for (int i = 0; i < numValues; i++) + { + dst[i].m = m[i]; + dst[i].tq = quints[i]; + dst[i].v = (quints[i] << numBits) + m[i]; + } +} + +inline void decodeISEBitBlock (ISEDecodedResult* dst, BitAccessStream& data, int numBits) +{ + dst[0].m = data.getNext(numBits); + dst[0].v = dst[0].m; +} + +void decodeISE (ISEDecodedResult* dst, int numValues, BitAccessStream& data, const ISEParams& params) +{ + if (params.mode == ISEMODE_TRIT) + { + const int numBlocks = deDivRoundUp32(numValues, 5); + for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++) + { + const int numValuesInBlock = blockNdx == numBlocks-1 ? numValues - 5*(numBlocks-1) : 5; + decodeISETritBlock(&dst[5*blockNdx], numValuesInBlock, data, params.numBits); + } + } + else if (params.mode == ISEMODE_QUINT) + { + const int numBlocks = deDivRoundUp32(numValues, 3); + for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++) + { + const int numValuesInBlock = blockNdx == numBlocks-1 ? numValues - 3*(numBlocks-1) : 3; + decodeISEQuintBlock(&dst[3*blockNdx], numValuesInBlock, data, params.numBits); + } + } + else + { + DE_ASSERT(params.mode == ISEMODE_PLAIN_BIT); + for (int i = 0; i < numValues; i++) + decodeISEBitBlock(&dst[i], data, params.numBits); + } +} + +void unquantizeColorEndpoints (deUint32* dst, const ISEDecodedResult* iseResults, int numEndpoints, const ISEParams& iseParams) +{ + if ((iseParams.mode == ISEMODE_TRIT) || (iseParams.mode == ISEMODE_QUINT)) + { + const int rangeCase = iseParams.numBits*2 - (iseParams.mode == ISEMODE_TRIT ? 2 : 1); + DE_ASSERT(basisu_astc::inRange(rangeCase, 0, 10)); + + static const deUint32 Ca[11] = { 204, 113, 93, 54, 44, 26, 22, 13, 11, 6, 5 }; + const deUint32 C = Ca[rangeCase]; + + for (int endpointNdx = 0; endpointNdx < numEndpoints; endpointNdx++) + { + const deUint32 a = getBit(iseResults[endpointNdx].m, 0); + const deUint32 b = getBit(iseResults[endpointNdx].m, 1); + const deUint32 c = getBit(iseResults[endpointNdx].m, 2); + const deUint32 d = getBit(iseResults[endpointNdx].m, 3); + const deUint32 e = getBit(iseResults[endpointNdx].m, 4); + const deUint32 f = getBit(iseResults[endpointNdx].m, 5); + const deUint32 A = (a == 0) ? 0 : (1<<9)-1; + + const deUint32 B = (rangeCase == 0) ? 0 + : (rangeCase == 1) ? 0 + : (rangeCase == 2) ? ((b << 8) | (b << 4) | (b << 2) | (b << 1)) + : (rangeCase == 3) ? ((b << 8) | (b << 3) | (b << 2)) + : (rangeCase == 4) ? ((c << 8) | (b << 7) | (c << 3) | (b << 2) | (c << 1) | (b << 0)) + : (rangeCase == 5) ? ((c << 8) | (b << 7) | (c << 2) | (b << 1) | (c << 0)) + : (rangeCase == 6) ? ((d << 8) | (c << 7) | (b << 6) | (d << 2) | (c << 1) | (b << 0)) + : (rangeCase == 7) ? ((d << 8) | (c << 7) | (b << 6) | (d << 1) | (c << 0)) + : (rangeCase == 8) ? ((e << 8) | (d << 7) | (c << 6) | (b << 5) | (e << 1) | (d << 0)) + : (rangeCase == 9) ? ((e << 8) | (d << 7) | (c << 6) | (b << 5) | (e << 0)) + : (rangeCase == 10) ? ((f << 8) | (e << 7) | (d << 6) | (c << 5) | (b << 4) | (f << 0)) + : (deUint32)-1; + + DE_ASSERT(B != (deUint32)-1); + dst[endpointNdx] = (((iseResults[endpointNdx].tq*C + B) ^ A) >> 2) | (A & 0x80); + } + } + else + { + DE_ASSERT(iseParams.mode == ISEMODE_PLAIN_BIT); + for (int endpointNdx = 0; endpointNdx < numEndpoints; endpointNdx++) + dst[endpointNdx] = bitReplicationScale(iseResults[endpointNdx].v, iseParams.numBits, 8); + } +} + +inline void bitTransferSigned (deInt32& a, deInt32& b) +{ + b >>= 1; + b |= a & 0x80; + a >>= 1; + a &= 0x3f; + if (isBitSet(a, 5)) + a -= 0x40; +} + +inline UVec4 clampedRGBA (const IVec4& rgba) +{ + return UVec4(basisu_astc::clamp(rgba.x(), 0, 0xff), + basisu_astc::clamp(rgba.y(), 0, 0xff), + basisu_astc::clamp(rgba.z(), 0, 0xff), + basisu_astc::clamp(rgba.w(), 0, 0xff)); +} + +inline IVec4 blueContract (int r, int g, int b, int a) +{ + return IVec4((r+b)>>1, (g+b)>>1, b, a); +} + +inline bool isColorEndpointModeHDR (deUint32 mode) +{ + return (mode == 2) || + (mode == 3) || + (mode == 7) || + (mode == 11) || + (mode == 14) || + (mode == 15); +} + +void decodeHDREndpointMode7 (UVec4& e0, UVec4& e1, deUint32 v0, deUint32 v1, deUint32 v2, deUint32 v3) +{ + const deUint32 m10 = getBit(v1, 7) | (getBit(v2, 7) << 1); + const deUint32 m23 = getBits(v0, 6, 7); + + const deUint32 majComp = (m10 != 3) ? m10 + : (m23 != 3) ? m23 + : 0; + + const deUint32 mode = (m10 != 3) ? m23 + : (m23 != 3) ? 4 + : 5; + + deInt32 red = (deInt32)getBits(v0, 0, 5); + deInt32 green = (deInt32)getBits(v1, 0, 4); + deInt32 blue = (deInt32)getBits(v2, 0, 4); + deInt32 scale = (deInt32)getBits(v3, 0, 4); + + { +#define SHOR(DST_VAR, SHIFT, BIT_VAR) (DST_VAR) |= (BIT_VAR) << (SHIFT) +#define ASSIGN_X_BITS(V0,S0, V1,S1, V2,S2, V3,S3, V4,S4, V5,S5, V6,S6) do { SHOR(V0,S0,x0); SHOR(V1,S1,x1); SHOR(V2,S2,x2); SHOR(V3,S3,x3); SHOR(V4,S4,x4); SHOR(V5,S5,x5); SHOR(V6,S6,x6); } while (false) + + const deUint32 x0 = getBit(v1, 6); + const deUint32 x1 = getBit(v1, 5); + const deUint32 x2 = getBit(v2, 6); + const deUint32 x3 = getBit(v2, 5); + const deUint32 x4 = getBit(v3, 7); + const deUint32 x5 = getBit(v3, 6); + const deUint32 x6 = getBit(v3, 5); + + deInt32& R = red; + deInt32& G = green; + deInt32& B = blue; + deInt32& S = scale; + + switch (mode) + { + case 0: ASSIGN_X_BITS(R,9, R,8, R,7, R,10, R,6, S,6, S,5); break; + case 1: ASSIGN_X_BITS(R,8, G,5, R,7, B,5, R,6, R,10, R,9); break; + case 2: ASSIGN_X_BITS(R,9, R,8, R,7, R,6, S,7, S,6, S,5); break; + case 3: ASSIGN_X_BITS(R,8, G,5, R,7, B,5, R,6, S,6, S,5); break; + case 4: ASSIGN_X_BITS(G,6, G,5, B,6, B,5, R,6, R,7, S,5); break; + case 5: ASSIGN_X_BITS(G,6, G,5, B,6, B,5, R,6, S,6, S,5); break; + default: + DE_ASSERT(false); + } +#undef ASSIGN_X_BITS +#undef SHOR + } + + static const int shiftAmounts[] = { 1, 1, 2, 3, 4, 5 }; + DE_ASSERT(mode < DE_LENGTH_OF_ARRAY(shiftAmounts)); + + red <<= shiftAmounts[mode]; + green <<= shiftAmounts[mode]; + blue <<= shiftAmounts[mode]; + scale <<= shiftAmounts[mode]; + + if (mode != 5) + { + green = red - green; + blue = red - blue; + } + + if (majComp == 1) + std::swap(red, green); + else if (majComp == 2) + std::swap(red, blue); + + e0 = UVec4(basisu_astc::clamp(red - scale, 0, 0xfff), + basisu_astc::clamp(green - scale, 0, 0xfff), + basisu_astc::clamp(blue - scale, 0, 0xfff), + 0x780); + + e1 = UVec4(basisu_astc::clamp(red, 0, 0xfff), + basisu_astc::clamp(green, 0, 0xfff), + basisu_astc::clamp(blue, 0, 0xfff), + 0x780); +} + +void decodeHDREndpointMode11 (UVec4& e0, UVec4& e1, deUint32 v0, deUint32 v1, deUint32 v2, deUint32 v3, deUint32 v4, deUint32 v5) +{ + const deUint32 major = (getBit(v5, 7) << 1) | getBit(v4, 7); + + if (major == 3) + { + e0 = UVec4(v0<<4, v2<<4, getBits(v4,0,6)<<5, 0x780); + e1 = UVec4(v1<<4, v3<<4, getBits(v5,0,6)<<5, 0x780); + } + else + { + const deUint32 mode = (getBit(v3, 7) << 2) | (getBit(v2, 7) << 1) | getBit(v1, 7); + + deInt32 a = (deInt32)((getBit(v1, 6) << 8) | v0); + deInt32 c = (deInt32)(getBits(v1, 0, 5)); + deInt32 b0 = (deInt32)(getBits(v2, 0, 5)); + deInt32 b1 = (deInt32)(getBits(v3, 0, 5)); + deInt32 d0 = (deInt32)(getBits(v4, 0, 4)); + deInt32 d1 = (deInt32)(getBits(v5, 0, 4)); + + { +#define SHOR(DST_VAR, SHIFT, BIT_VAR) (DST_VAR) |= (BIT_VAR) << (SHIFT) +#define ASSIGN_X_BITS(V0,S0, V1,S1, V2,S2, V3,S3, V4,S4, V5,S5) do { SHOR(V0,S0,x0); SHOR(V1,S1,x1); SHOR(V2,S2,x2); SHOR(V3,S3,x3); SHOR(V4,S4,x4); SHOR(V5,S5,x5); } while (false) + const deUint32 x0 = getBit(v2, 6); + const deUint32 x1 = getBit(v3, 6); + const deUint32 x2 = getBit(v4, 6); + const deUint32 x3 = getBit(v5, 6); + const deUint32 x4 = getBit(v4, 5); + const deUint32 x5 = getBit(v5, 5); + + switch (mode) + { + case 0: ASSIGN_X_BITS(b0,6, b1,6, d0,6, d1,6, d0,5, d1,5); break; + case 1: ASSIGN_X_BITS(b0,6, b1,6, b0,7, b1,7, d0,5, d1,5); break; + case 2: ASSIGN_X_BITS(a,9, c,6, d0,6, d1,6, d0,5, d1,5); break; + case 3: ASSIGN_X_BITS(b0,6, b1,6, a,9, c,6, d0,5, d1,5); break; + case 4: ASSIGN_X_BITS(b0,6, b1,6, b0,7, b1,7, a,9, a,10); break; + case 5: ASSIGN_X_BITS(a,9, a,10, c,7, c,6, d0,5, d1,5); break; + case 6: ASSIGN_X_BITS(b0,6, b1,6, a,11, c,6, a,9, a,10); break; + case 7: ASSIGN_X_BITS(a,9, a,10, a,11, c,6, d0,5, d1,5); break; + default: + DE_ASSERT(false); + } +#undef ASSIGN_X_BITS +#undef SHOR + } + + static const int numDBits[] = { 7, 6, 7, 6, 5, 6, 5, 6 }; + DE_ASSERT(mode < DE_LENGTH_OF_ARRAY(numDBits)); + d0 = signExtend(d0, numDBits[mode]); + d1 = signExtend(d1, numDBits[mode]); + + const int shiftAmount = (mode >> 1) ^ 3; + a = (uint32_t)a << shiftAmount; + c = (uint32_t)c << shiftAmount; + b0 = (uint32_t)b0 << shiftAmount; + b1 = (uint32_t)b1 << shiftAmount; + d0 = (uint32_t)d0 << shiftAmount; + d1 = (uint32_t)d1 << shiftAmount; + + e0 = UVec4(basisu_astc::clamp(a-c, 0, 0xfff), basisu_astc::clamp(a-b0-c-d0, 0, 0xfff), basisu_astc::clamp(a-b1-c-d1, 0, 0xfff), 0x780); + e1 = UVec4(basisu_astc::clamp(a, 0, 0xfff), basisu_astc::clamp(a-b0, 0, 0xfff), basisu_astc::clamp(a-b1, 0, 0xfff), 0x780); + + if (major == 1) + { + std::swap(e0.x(), e0.y()); + std::swap(e1.x(), e1.y()); + } + else if (major == 2) + { + std::swap(e0.x(), e0.z()); + std::swap(e1.x(), e1.z()); + } + } +} + +void decodeHDREndpointMode15(UVec4& e0, UVec4& e1, deUint32 v0, deUint32 v1, deUint32 v2, deUint32 v3, deUint32 v4, deUint32 v5, deUint32 v6In, deUint32 v7In) +{ + decodeHDREndpointMode11(e0, e1, v0, v1, v2, v3, v4, v5); + + const deUint32 mode = (getBit(v7In, 7) << 1) | getBit(v6In, 7); + deInt32 v6 = (deInt32)getBits(v6In, 0, 6); + deInt32 v7 = (deInt32)getBits(v7In, 0, 6); + + if (mode == 3) + { + e0.w() = v6 << 5; + e1.w() = v7 << 5; + } + else + { + v6 |= (v7 << (mode+1)) & 0x780; + v7 &= (0x3f >> mode); + v7 ^= 0x20 >> mode; + v7 -= 0x20 >> mode; + v6 <<= 4-mode; + v7 <<= 4-mode; + v7 += v6; + v7 = basisu_astc::clamp(v7, 0, 0xfff); + e0.w() = v6; + e1.w() = v7; + } +} + +void decodeColorEndpoints (ColorEndpointPair* dst, const deUint32* unquantizedEndpoints, const deUint32* endpointModes, int numPartitions) +{ + int unquantizedNdx = 0; + + for (int partitionNdx = 0; partitionNdx < numPartitions; partitionNdx++) + { + const deUint32 endpointMode = endpointModes[partitionNdx]; + const deUint32* v = &unquantizedEndpoints[unquantizedNdx]; + + UVec4& e0 = dst[partitionNdx].e0; + UVec4& e1 = dst[partitionNdx].e1; + unquantizedNdx += computeNumColorEndpointValues(endpointMode); + + switch (endpointMode) + { + case 0: + { + e0 = UVec4(v[0], v[0], v[0], 0xff); + e1 = UVec4(v[1], v[1], v[1], 0xff); + break; + } + case 1: + { + const deUint32 L0 = (v[0] >> 2) | (getBits(v[1], 6, 7) << 6); + const deUint32 L1 = basisu_astc::min(0xffu, L0 + getBits(v[1], 0, 5)); + e0 = UVec4(L0, L0, L0, 0xff); + e1 = UVec4(L1, L1, L1, 0xff); + break; + } + case 2: + { + const deUint32 v1Gr = v[1] >= v[0]; + const deUint32 y0 = v1Gr ? v[0]<<4 : (v[1]<<4) + 8; + const deUint32 y1 = v1Gr ? v[1]<<4 : (v[0]<<4) - 8; + e0 = UVec4(y0, y0, y0, 0x780); + e1 = UVec4(y1, y1, y1, 0x780); + break; + } + case 3: + { + const bool m = isBitSet(v[0], 7); + const deUint32 y0 = m ? (getBits(v[1], 5, 7) << 9) | (getBits(v[0], 0, 6) << 2) + : (getBits(v[1], 4, 7) << 8) | (getBits(v[0], 0, 6) << 1); + const deUint32 d = m ? getBits(v[1], 0, 4) << 2 + : getBits(v[1], 0, 3) << 1; + const deUint32 y1 = basisu_astc::min(0xfffu, y0+d); + e0 = UVec4(y0, y0, y0, 0x780); + e1 = UVec4(y1, y1, y1, 0x780); + break; + } + case 4: + { + e0 = UVec4(v[0], v[0], v[0], v[2]); + e1 = UVec4(v[1], v[1], v[1], v[3]); + break; + } + case 5: + { + deInt32 v0 = (deInt32)v[0]; + deInt32 v1 = (deInt32)v[1]; + deInt32 v2 = (deInt32)v[2]; + deInt32 v3 = (deInt32)v[3]; + bitTransferSigned(v1, v0); + bitTransferSigned(v3, v2); + e0 = clampedRGBA(IVec4(v0, v0, v0, v2)); + e1 = clampedRGBA(IVec4(v0+v1, v0+v1, v0+v1, v2+v3)); + break; + } + case 6: + e0 = UVec4((v[0]*v[3]) >> 8, (v[1]*v[3]) >> 8, (v[2]*v[3]) >> 8, 0xff); + e1 = UVec4(v[0], v[1], v[2], 0xff); + break; + case 7: + decodeHDREndpointMode7(e0, e1, v[0], v[1], v[2], v[3]); + break; + case 8: + { + if (v[1]+v[3]+v[5] >= v[0]+v[2]+v[4]) + { + e0 = UVec4(v[0], v[2], v[4], 0xff); + e1 = UVec4(v[1], v[3], v[5], 0xff); + } + else + { + e0 = blueContract(v[1], v[3], v[5], 0xff).asUint(); + e1 = blueContract(v[0], v[2], v[4], 0xff).asUint(); + } + break; + } + case 9: + { + deInt32 v0 = (deInt32)v[0]; + deInt32 v1 = (deInt32)v[1]; + deInt32 v2 = (deInt32)v[2]; + deInt32 v3 = (deInt32)v[3]; + deInt32 v4 = (deInt32)v[4]; + deInt32 v5 = (deInt32)v[5]; + bitTransferSigned(v1, v0); + bitTransferSigned(v3, v2); + bitTransferSigned(v5, v4); + if (v1+v3+v5 >= 0) + { + e0 = clampedRGBA(IVec4(v0, v2, v4, 0xff)); + e1 = clampedRGBA(IVec4(v0+v1, v2+v3, v4+v5, 0xff)); + } + else + { + e0 = clampedRGBA(blueContract(v0+v1, v2+v3, v4+v5, 0xff)); + e1 = clampedRGBA(blueContract(v0, v2, v4, 0xff)); + } + break; + } + case 10: + { + e0 = UVec4((v[0]*v[3]) >> 8, (v[1]*v[3]) >> 8, (v[2]*v[3]) >> 8, v[4]); + e1 = UVec4(v[0], v[1], v[2], v[5]); + break; + } + case 11: + { + decodeHDREndpointMode11(e0, e1, v[0], v[1], v[2], v[3], v[4], v[5]); + break; + } + case 12: + { + if (v[1] + v[3] + v[5] >= v[0] + v[2] + v[4]) + { + e0 = UVec4(v[0], v[2], v[4], v[6]); + e1 = UVec4(v[1], v[3], v[5], v[7]); + } + else + { + e0 = clampedRGBA(blueContract(v[1], v[3], v[5], v[7])); + e1 = clampedRGBA(blueContract(v[0], v[2], v[4], v[6])); + } + break; + } + case 13: + { + deInt32 v0 = (deInt32)v[0]; + deInt32 v1 = (deInt32)v[1]; + deInt32 v2 = (deInt32)v[2]; + deInt32 v3 = (deInt32)v[3]; + deInt32 v4 = (deInt32)v[4]; + deInt32 v5 = (deInt32)v[5]; + deInt32 v6 = (deInt32)v[6]; + deInt32 v7 = (deInt32)v[7]; + bitTransferSigned(v1, v0); + bitTransferSigned(v3, v2); + bitTransferSigned(v5, v4); + bitTransferSigned(v7, v6); + if (v1+v3+v5 >= 0) + { + e0 = clampedRGBA(IVec4(v0, v2, v4, v6)); + e1 = clampedRGBA(IVec4(v0+v1, v2+v3, v4+v5, v6+v7)); + } + else + { + e0 = clampedRGBA(blueContract(v0+v1, v2+v3, v4+v5, v6+v7)); + e1 = clampedRGBA(blueContract(v0, v2, v4, v6)); + } + break; + } + case 14: + decodeHDREndpointMode11(e0, e1, v[0], v[1], v[2], v[3], v[4], v[5]); + e0.w() = v[6]; + e1.w() = v[7]; + break; + case 15: + { + decodeHDREndpointMode15(e0, e1, v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]); + break; + } + default: + DE_ASSERT(false); + } + } +} + +void computeColorEndpoints (ColorEndpointPair* dst, const Block128& blockData, const deUint32* endpointModes, int numPartitions, int numColorEndpointValues, const ISEParams& iseParams, int numBitsAvailable) +{ + const int colorEndpointDataStart = (numPartitions == 1) ? 17 : 29; + ISEDecodedResult colorEndpointData[18]; + + { + BitAccessStream dataStream(blockData, colorEndpointDataStart, numBitsAvailable, true); + decodeISE(&colorEndpointData[0], numColorEndpointValues, dataStream, iseParams); + } + + { + deUint32 unquantizedEndpoints[18]; + unquantizeColorEndpoints(&unquantizedEndpoints[0], &colorEndpointData[0], numColorEndpointValues, iseParams); + decodeColorEndpoints(dst, &unquantizedEndpoints[0], &endpointModes[0], numPartitions); + } +} + +void unquantizeWeights (deUint32 dst[64], const ISEDecodedResult* weightGrid, const ASTCBlockMode& blockMode) +{ + const int numWeights = computeNumWeights(blockMode); + const ISEParams& iseParams = blockMode.weightISEParams; + + if ((iseParams.mode == ISEMODE_TRIT) || (iseParams.mode == ISEMODE_QUINT)) + { + const int rangeCase = iseParams.numBits*2 + (iseParams.mode == ISEMODE_QUINT ? 1 : 0); + + if ((rangeCase == 0) || (rangeCase == 1)) + { + static const deUint32 map0[3] = { 0, 32, 63 }; + static const deUint32 map1[5] = { 0, 16, 32, 47, 63 }; + const deUint32* const map = (rangeCase == 0) ? &map0[0] : &map1[0]; + + for (int i = 0; i < numWeights; i++) + { + DE_ASSERT(weightGrid[i].v < (rangeCase == 0 ? 3u : 5u)); + dst[i] = map[weightGrid[i].v]; + } + } + else + { + DE_ASSERT(rangeCase <= 6); + static const deUint32 Ca[5] = { 50, 28, 23, 13, 11 }; + const deUint32 C = Ca[rangeCase-2]; + + for (int weightNdx = 0; weightNdx < numWeights; weightNdx++) + { + const deUint32 a = getBit(weightGrid[weightNdx].m, 0); + const deUint32 b = getBit(weightGrid[weightNdx].m, 1); + const deUint32 c = getBit(weightGrid[weightNdx].m, 2); + + const deUint32 A = (a == 0) ? 0 : (1<<7)-1; + const deUint32 B = (rangeCase == 2) ? 0 + : (rangeCase == 3) ? 0 + : (rangeCase == 4) ? (b << 6) | (b << 2) | (b << 0) + : (rangeCase == 5) ? (b << 6) | (b << 1) + : (rangeCase == 6) ? (c << 6) | (b << 5) | (c << 1) | (b << 0) + : (deUint32)-1; + + dst[weightNdx] = (((weightGrid[weightNdx].tq*C + B) ^ A) >> 2) | (A & 0x20); + } + } + } + else + { + DE_ASSERT(iseParams.mode == ISEMODE_PLAIN_BIT); + for (int weightNdx = 0; weightNdx < numWeights; weightNdx++) + dst[weightNdx] = bitReplicationScale(weightGrid[weightNdx].v, iseParams.numBits, 6); + } + + for (int weightNdx = 0; weightNdx < numWeights; weightNdx++) + dst[weightNdx] += dst[weightNdx] > 32 ? 1 : 0; + + // Initialize nonexistent weights to poison values + for (int weightNdx = numWeights; weightNdx < 64; weightNdx++) + dst[weightNdx] = ~0u; +} + +void interpolateWeights (TexelWeightPair* dst, const deUint32 (&unquantizedWeights) [64], int blockWidth, int blockHeight, const ASTCBlockMode& blockMode) +{ + const int numWeightsPerTexel = blockMode.isDualPlane ? 2 : 1; + const deUint32 scaleX = (1024 + blockWidth/2) / (blockWidth-1); + const deUint32 scaleY = (1024 + blockHeight/2) / (blockHeight-1); + DE_ASSERT(blockMode.weightGridWidth*blockMode.weightGridHeight*numWeightsPerTexel <= (int)DE_LENGTH_OF_ARRAY(unquantizedWeights)); + + for (int texelY = 0; texelY < blockHeight; texelY++) + { + for (int texelX = 0; texelX < blockWidth; texelX++) + { + const deUint32 gX = (scaleX*texelX*(blockMode.weightGridWidth-1) + 32) >> 6; + const deUint32 gY = (scaleY*texelY*(blockMode.weightGridHeight-1) + 32) >> 6; + const deUint32 jX = gX >> 4; + const deUint32 jY = gY >> 4; + const deUint32 fX = gX & 0xf; + const deUint32 fY = gY & 0xf; + const deUint32 w11 = (fX*fY + 8) >> 4; + const deUint32 w10 = fY - w11; + const deUint32 w01 = fX - w11; + const deUint32 w00 = 16 - fX - fY + w11; + const deUint32 i00 = jY*blockMode.weightGridWidth + jX; + const deUint32 i01 = i00 + 1; + const deUint32 i10 = i00 + blockMode.weightGridWidth; + const deUint32 i11 = i00 + blockMode.weightGridWidth + 1; + + // These addresses can be out of bounds, but respective weights will be 0 then. + DE_ASSERT(deInBounds32(i00, 0, blockMode.weightGridWidth*blockMode.weightGridHeight) || w00 == 0); + DE_ASSERT(deInBounds32(i01, 0, blockMode.weightGridWidth*blockMode.weightGridHeight) || w01 == 0); + DE_ASSERT(deInBounds32(i10, 0, blockMode.weightGridWidth*blockMode.weightGridHeight) || w10 == 0); + DE_ASSERT(deInBounds32(i11, 0, blockMode.weightGridWidth*blockMode.weightGridHeight) || w11 == 0); + + for (int texelWeightNdx = 0; texelWeightNdx < numWeightsPerTexel; texelWeightNdx++) + { + // & 0x3f clamps address to bounds of unquantizedWeights + const deUint32 p00 = unquantizedWeights[(i00 * numWeightsPerTexel + texelWeightNdx) & 0x3f]; + const deUint32 p01 = unquantizedWeights[(i01 * numWeightsPerTexel + texelWeightNdx) & 0x3f]; + const deUint32 p10 = unquantizedWeights[(i10 * numWeightsPerTexel + texelWeightNdx) & 0x3f]; + const deUint32 p11 = unquantizedWeights[(i11 * numWeightsPerTexel + texelWeightNdx) & 0x3f]; + + dst[texelY*blockWidth + texelX].w[texelWeightNdx] = (p00*w00 + p01*w01 + p10*w10 + p11*w11 + 8) >> 4; + } + } + } +} + +void computeTexelWeights (TexelWeightPair* dst, const Block128& blockData, int blockWidth, int blockHeight, const ASTCBlockMode& blockMode) +{ + ISEDecodedResult weightGrid[64]; + + { + BitAccessStream dataStream(blockData, 127, computeNumRequiredBits(blockMode.weightISEParams, computeNumWeights(blockMode)), false); + decodeISE(&weightGrid[0], computeNumWeights(blockMode), dataStream, blockMode.weightISEParams); + } + + { + deUint32 unquantizedWeights[64]; + unquantizeWeights(&unquantizedWeights[0], &weightGrid[0], blockMode); + + interpolateWeights(dst, unquantizedWeights, blockWidth, blockHeight, blockMode); + } +} + +inline deUint32 hash52 (deUint32 v) +{ + deUint32 p = v; + p ^= p >> 15; p -= p << 17; p += p << 7; p += p << 4; + p ^= p >> 5; p += p << 16; p ^= p >> 7; p ^= p >> 3; + p ^= p << 6; p ^= p >> 17; + return p; +} + +int computeTexelPartition (deUint32 seedIn, deUint32 xIn, deUint32 yIn, deUint32 zIn, int numPartitions, bool smallBlock) +{ + DE_ASSERT(zIn == 0); + + const deUint32 x = smallBlock ? xIn << 1 : xIn; + const deUint32 y = smallBlock ? yIn << 1 : yIn; + const deUint32 z = smallBlock ? zIn << 1 : zIn; + const deUint32 seed = seedIn + 1024*(numPartitions-1); + const deUint32 rnum = hash52(seed); + + deUint8 seed1 = (deUint8)( rnum & 0xf); + deUint8 seed2 = (deUint8)((rnum >> 4) & 0xf); + deUint8 seed3 = (deUint8)((rnum >> 8) & 0xf); + deUint8 seed4 = (deUint8)((rnum >> 12) & 0xf); + deUint8 seed5 = (deUint8)((rnum >> 16) & 0xf); + deUint8 seed6 = (deUint8)((rnum >> 20) & 0xf); + deUint8 seed7 = (deUint8)((rnum >> 24) & 0xf); + deUint8 seed8 = (deUint8)((rnum >> 28) & 0xf); + deUint8 seed9 = (deUint8)((rnum >> 18) & 0xf); + deUint8 seed10 = (deUint8)((rnum >> 22) & 0xf); + deUint8 seed11 = (deUint8)((rnum >> 26) & 0xf); + deUint8 seed12 = (deUint8)(((rnum >> 30) | (rnum << 2)) & 0xf); + + seed1 = (deUint8)(seed1 * seed1 ); + seed2 = (deUint8)(seed2 * seed2 ); + seed3 = (deUint8)(seed3 * seed3 ); + seed4 = (deUint8)(seed4 * seed4 ); + seed5 = (deUint8)(seed5 * seed5 ); + seed6 = (deUint8)(seed6 * seed6 ); + seed7 = (deUint8)(seed7 * seed7 ); + seed8 = (deUint8)(seed8 * seed8 ); + seed9 = (deUint8)(seed9 * seed9 ); + seed10 = (deUint8)(seed10 * seed10); + seed11 = (deUint8)(seed11 * seed11); + seed12 = (deUint8)(seed12 * seed12); + + const int shA = (seed & 2) != 0 ? 4 : 5; + const int shB = numPartitions == 3 ? 6 : 5; + const int sh1 = (seed & 1) != 0 ? shA : shB; + const int sh2 = (seed & 1) != 0 ? shB : shA; + const int sh3 = (seed & 0x10) != 0 ? sh1 : sh2; + + seed1 = (deUint8)(seed1 >> sh1); + seed2 = (deUint8)(seed2 >> sh2); + seed3 = (deUint8)(seed3 >> sh1); + seed4 = (deUint8)(seed4 >> sh2); + seed5 = (deUint8)(seed5 >> sh1); + seed6 = (deUint8)(seed6 >> sh2); + seed7 = (deUint8)(seed7 >> sh1); + seed8 = (deUint8)(seed8 >> sh2); + seed9 = (deUint8)(seed9 >> sh3); + seed10 = (deUint8)(seed10 >> sh3); + seed11 = (deUint8)(seed11 >> sh3); + seed12 = (deUint8)(seed12 >> sh3); + + const int a = 0x3f & (seed1*x + seed2*y + seed11*z + (rnum >> 14)); + const int b = 0x3f & (seed3*x + seed4*y + seed12*z + (rnum >> 10)); + const int c = (numPartitions >= 3) ? 0x3f & (seed5*x + seed6*y + seed9*z + (rnum >> 6)) : 0; + const int d = (numPartitions >= 4) ? 0x3f & (seed7*x + seed8*y + seed10*z + (rnum >> 2)) : 0; + + return (a >= b && a >= c && a >= d) ? 0 + : (b >= c && b >= d) ? 1 + : (c >= d) ? 2 + : 3; +} + +DecompressResult setTexelColors (void* dst, ColorEndpointPair* colorEndpoints, TexelWeightPair* texelWeights, int ccs, deUint32 partitionIndexSeed, + int numPartitions, int blockWidth, int blockHeight, bool isSRGB, bool isLDRMode, const deUint32* colorEndpointModes) +{ + const bool smallBlock = blockWidth*blockHeight < 31; + DecompressResult result = DECOMPRESS_RESULT_VALID_BLOCK; + bool isHDREndpoint[4]; + + for (int i = 0; i < numPartitions; i++) + { + isHDREndpoint[i] = isColorEndpointModeHDR(colorEndpointModes[i]); + } + + for (int texelY = 0; texelY < blockHeight; texelY++) + { + for (int texelX = 0; texelX < blockWidth; texelX++) + { + const int texelNdx = texelY * blockWidth + texelX; + const int colorEndpointNdx = (numPartitions == 1) ? 0 : computeTexelPartition(partitionIndexSeed, texelX, texelY, 0, numPartitions, smallBlock); + + DE_ASSERT(colorEndpointNdx < numPartitions); + const UVec4& e0 = colorEndpoints[colorEndpointNdx].e0; + const UVec4& e1 = colorEndpoints[colorEndpointNdx].e1; + const TexelWeightPair& weight = texelWeights[texelNdx]; + + if (isLDRMode && isHDREndpoint[colorEndpointNdx]) + { + if (isSRGB) + { + ((deUint8*)dst)[texelNdx * 4 + 0] = 0xff; + ((deUint8*)dst)[texelNdx * 4 + 1] = 0; + ((deUint8*)dst)[texelNdx * 4 + 2] = 0xff; + ((deUint8*)dst)[texelNdx * 4 + 3] = 0xff; + } + else + { + ((float*)dst)[texelNdx * 4 + 0] = 1.0f; + ((float*)dst)[texelNdx * 4 + 1] = 0; + ((float*)dst)[texelNdx * 4 + 2] = 1.0f; + ((float*)dst)[texelNdx * 4 + 3] = 1.0f; + } + result = DECOMPRESS_RESULT_ERROR; + } + else + { + for (int channelNdx = 0; channelNdx < 4; channelNdx++) + { + if (!isHDREndpoint[colorEndpointNdx] || (channelNdx == 3 && colorEndpointModes[colorEndpointNdx] == 14)) // \note Alpha for mode 14 is treated the same as LDR. + { + const deUint32 c0 = (e0[channelNdx] << 8) | (isSRGB ? 0x80 : e0[channelNdx]); + const deUint32 c1 = (e1[channelNdx] << 8) | (isSRGB ? 0x80 : e1[channelNdx]); + const deUint32 w = weight.w[ccs == channelNdx ? 1 : 0]; + const deUint32 c = (c0 * (64 - w) + c1 * w + 32) / 64; + + if (isSRGB) + ((deUint8*)dst)[texelNdx * 4 + channelNdx] = (deUint8)((c & 0xff00) >> 8); + else + ((float*)dst)[texelNdx * 4 + channelNdx] = (c == 65535) ? 1.0f : (float)c / 65536.0f; + } + else + { + DE_ASSERT(!isSRGB); + //DE_STATIC_ASSERT((basisu_astc::meta::TypesSame::Value)); + + const deUint32 c0 = e0[channelNdx] << 4; + const deUint32 c1 = e1[channelNdx] << 4; + const deUint32 w = weight.w[(ccs == channelNdx) ? 1 : 0]; + const deUint32 c = (c0 * (64 - w) + c1 * w + 32) / 64; + const deUint32 e = getBits(c, 11, 15); + const deUint32 m = getBits(c, 0, 10); + const deUint32 mt = (m < 512) ? (3 * m) + : (m >= 1536) ? (5 * m - 2048) + : (4 * m - 512); + + const deFloat16 cf = (deFloat16)((e << 10) + (mt >> 3)); + + ((float*)dst)[texelNdx * 4 + channelNdx] = deFloat16To32(isFloat16InfOrNan(cf) ? 0x7bff : cf); + } + + } // channelNdx + } + } // texelX + } // texelY + + return result; +} + +DecompressResult decompressBlock (void* dst, const Block128& blockData, int blockWidth, int blockHeight, bool isSRGB, bool isLDR) +{ + DE_ASSERT(isLDR || !isSRGB); + + // Decode block mode. + const ASTCBlockMode blockMode = getASTCBlockMode(blockData.getBits(0, 10)); + + // Check for block mode errors. + if (blockMode.isError) + { + setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB); + return DECOMPRESS_RESULT_ERROR; + } + + // Separate path for void-extent. + if (blockMode.isVoidExtent) + return decodeVoidExtentBlock(dst, blockData, blockWidth, blockHeight, isSRGB, isLDR); + + // Compute weight grid values. + const int numWeights = computeNumWeights(blockMode); + const int numWeightDataBits = computeNumRequiredBits(blockMode.weightISEParams, numWeights); + const int numPartitions = (int)blockData.getBits(11, 12) + 1; + + // Check for errors in weight grid, partition and dual-plane parameters. + if ((numWeights > 64) || + (numWeightDataBits > 96) || + (numWeightDataBits < 24) || + (blockMode.weightGridWidth > blockWidth) || + (blockMode.weightGridHeight > blockHeight) || + ((numPartitions == 4) && blockMode.isDualPlane)) + { + setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB); + return DECOMPRESS_RESULT_ERROR; + } + + // Compute number of bits available for color endpoint data. + const bool isSingleUniqueCem = (numPartitions == 1) || (blockData.getBits(23, 24) == 0); + + const int numConfigDataBits = ((numPartitions == 1) ? 17 : isSingleUniqueCem ? 29 : 25 + 3*numPartitions) + + (blockMode.isDualPlane ? 2 : 0); + + const int numBitsForColorEndpoints = 128 - numWeightDataBits - numConfigDataBits; + + const int extraCemBitsStart = 127 - numWeightDataBits - (isSingleUniqueCem ? -1 + : (numPartitions == 4) ? 7 + : (numPartitions == 3) ? 4 + : (numPartitions == 2) ? 1 + : 0); + + // Decode color endpoint modes. + deUint32 colorEndpointModes[4]; + decodeColorEndpointModes(&colorEndpointModes[0], blockData, numPartitions, extraCemBitsStart); + const int numColorEndpointValues = computeNumColorEndpointValues(colorEndpointModes, numPartitions); + + // Check for errors in color endpoint value count. + if ((numColorEndpointValues > 18) || (numBitsForColorEndpoints < (int)deDivRoundUp32(13*numColorEndpointValues, 5))) + { + setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB); + return DECOMPRESS_RESULT_ERROR; + } + + // Compute color endpoints. + ColorEndpointPair colorEndpoints[4]; + computeColorEndpoints(&colorEndpoints[0], blockData, &colorEndpointModes[0], numPartitions, numColorEndpointValues, + computeMaximumRangeISEParams(numBitsForColorEndpoints, numColorEndpointValues), numBitsForColorEndpoints); + + // Compute texel weights. + TexelWeightPair texelWeights[MAX_BLOCK_WIDTH*MAX_BLOCK_HEIGHT]; + computeTexelWeights(&texelWeights[0], blockData, blockWidth, blockHeight, blockMode); + + // Set texel colors. + const int ccs = blockMode.isDualPlane ? (int)blockData.getBits(extraCemBitsStart-2, extraCemBitsStart-1) : -1; + const deUint32 partitionIndexSeed = (numPartitions > 1) ? blockData.getBits(13, 22) : (deUint32)-1; + + return setTexelColors(dst, &colorEndpoints[0], &texelWeights[0], ccs, partitionIndexSeed, numPartitions, blockWidth, blockHeight, isSRGB, isLDR, &colorEndpointModes[0]); +} + +// Returns -1 on error, 0 if LDR, 1 if HDR +int isHDR(const Block128& blockData, int blockWidth, int blockHeight) +{ + // Decode block mode. + const ASTCBlockMode blockMode = getASTCBlockMode(blockData.getBits(0, 10)); + + // Check for block mode errors. + if (blockMode.isError) + return -1; + + // Separate path for void-extent. + if (blockMode.isVoidExtent) + { + const bool isHDRBlock = blockData.isBitSet(9); + return isHDRBlock ? 1 : 0; + } + + // Compute weight grid values. + const int numWeights = computeNumWeights(blockMode); + const int numWeightDataBits = computeNumRequiredBits(blockMode.weightISEParams, numWeights); + const int numPartitions = (int)blockData.getBits(11, 12) + 1; + + // Check for errors in weight grid, partition and dual-plane parameters. + if ((numWeights > 64) || + (numWeightDataBits > 96) || + (numWeightDataBits < 24) || + (blockMode.weightGridWidth > blockWidth) || + (blockMode.weightGridHeight > blockHeight) || + ((numPartitions == 4) && blockMode.isDualPlane)) + { + return -1; + } + + // Compute number of bits available for color endpoint data. + const bool isSingleUniqueCem = (numPartitions == 1) || (blockData.getBits(23, 24) == 0); + + const int extraCemBitsStart = 127 - numWeightDataBits - (isSingleUniqueCem ? -1 + : (numPartitions == 4) ? 7 + : (numPartitions == 3) ? 4 + : (numPartitions == 2) ? 1 + : 0); + + // Decode color endpoint modes. + deUint32 colorEndpointModes[4]; + decodeColorEndpointModes(&colorEndpointModes[0], blockData, numPartitions, extraCemBitsStart); + + for (int i = 0; i < numPartitions; i++) + { + if (isColorEndpointModeHDR(colorEndpointModes[i])) + return 1; + } + + return 0; +} + +typedef uint16_t half_float; + +half_float float_to_half(float val, bool toward_zero) +{ + union { float f; int32_t i; uint32_t u; } fi = { val }; + const int flt_m = fi.i & 0x7FFFFF, flt_e = (fi.i >> 23) & 0xFF, flt_s = (fi.i >> 31) & 0x1; + int s = flt_s, e = 0, m = 0; + + // inf/NaN + if (flt_e == 0xff) + { + e = 31; + if (flt_m != 0) // NaN + m = 1; + } + // not zero or denormal + else if (flt_e != 0) + { + int new_exp = flt_e - 127; + if (new_exp > 15) + e = 31; + else if (new_exp < -14) + { + if (toward_zero) + m = (int)truncf((1 << 24) * fabsf(fi.f)); + else + m = lrintf((1 << 24) * fabsf(fi.f)); + } + else + { + e = new_exp + 15; + if (toward_zero) + m = (int)truncf((float)flt_m * (1.0f / (float)(1 << 13))); + else + m = lrintf((float)flt_m * (1.0f / (float)(1 << 13))); + } + } + + assert((0 <= m) && (m <= 1024)); + if (m == 1024) + { + e++; + m = 0; + } + + assert((s >= 0) && (s <= 1)); + assert((e >= 0) && (e <= 31)); + assert((m >= 0) && (m <= 1023)); + + half_float result = (half_float)((s << 15) | (e << 10) | m); + return result; +} + +float half_to_float(half_float hval) +{ + union { float f; uint32_t u; } x = { 0 }; + + uint32_t s = ((uint32_t)hval >> 15) & 1; + uint32_t e = ((uint32_t)hval >> 10) & 0x1F; + uint32_t m = (uint32_t)hval & 0x3FF; + + if (!e) + { + if (!m) + { + // +- 0 + x.u = s << 31; + return x.f; + } + else + { + // denormalized + while (!(m & 0x00000400)) + { + m <<= 1; + --e; + } + + ++e; + m &= ~0x00000400; + } + } + else if (e == 31) + { + if (m == 0) + { + // +/- INF + x.u = (s << 31) | 0x7f800000; + return x.f; + } + else + { + // +/- NaN + x.u = (s << 31) | 0x7f800000 | (m << 13); + return x.f; + } + } + + e = e + (127 - 15); + m = m << 13; + + assert(s <= 1); + assert(m <= 0x7FFFFF); + assert(e <= 255); + + x.u = m | (e << 23) | (s << 31); + return x.f; +} + +} // anonymous + +// See https://registry.khronos.org/DataFormat/specs/1.3/dataformat.1.3.inline.html#_hdr_endpoint_decoding +static void convert_to_half_prec(uint32_t n, float* pVals) +{ +#if 0 + const int prev_dir = fesetround(FE_TOWARDZERO); + + for (uint32_t i = 0; i < n; i++) + pVals[i] = half_to_float(float_to_half(pVals[i])); + + fesetround(prev_dir); + + for (uint32_t i = 0; i < n; i++) + { + assert(pVals[i] == half_to_float(float_to_half(pVals[i], true))); + } +#else + // This ensures the values are rounded towards zero as half floats. + for (uint32_t i = 0; i < n; i++) + { + pVals[i] = half_to_float(float_to_half(pVals[i], true)); + } +#endif +} + +bool decompress_ldr(uint8_t *pDst, const uint8_t * data, bool isSRGB, int blockWidth, int blockHeight) +{ + float linear[MAX_BLOCK_WIDTH * MAX_BLOCK_HEIGHT * 4]; + + const Block128 blockData(data); + + // isSRGB is true, this writes uint8_t's. Otherwise it writes floats. + if (decompressBlock(isSRGB ? (void*)pDst : (void*)&linear[0], blockData, blockWidth, blockHeight, isSRGB, true) != DECOMPRESS_RESULT_VALID_BLOCK) + { + return false; + } + + if (!isSRGB) + { + // Convert the floats to 8-bits with rounding. + int pix = 0; + for (int i = 0; i < blockHeight; i++) + { + for (int j = 0; j < blockWidth; j++, pix++) + { + pDst[4 * pix + 0] = (uint8_t)(basisu_astc::clamp((int)(linear[pix * 4 + 0] * 65536.0f + .5f), 0, 65535) >> 8); + pDst[4 * pix + 1] = (uint8_t)(basisu_astc::clamp((int)(linear[pix * 4 + 1] * 65536.0f + .5f), 0, 65535) >> 8); + pDst[4 * pix + 2] = (uint8_t)(basisu_astc::clamp((int)(linear[pix * 4 + 2] * 65536.0f + .5f), 0, 65535) >> 8); + pDst[4 * pix + 3] = (uint8_t)(basisu_astc::clamp((int)(linear[pix * 4 + 3] * 65536.0f + .5f), 0, 65535) >> 8); + } + } + } + + return true; +} + +bool decompress_hdr(float* pDstRGBA, const uint8_t* data, int blockWidth, int blockHeight) +{ + const Block128 blockData(data); + + if (decompressBlock(pDstRGBA, blockData, blockWidth, blockHeight, false, false) != DECOMPRESS_RESULT_VALID_BLOCK) + { + return false; + } + + convert_to_half_prec(blockWidth * blockHeight * 4, pDstRGBA); + + return true; +} + +bool is_hdr(const uint8_t* data, int blockWidth, int blockHeight, bool &is_hdr) +{ + is_hdr = false; + + const Block128 blockData(data); + + int status = isHDR(blockData, blockWidth, blockHeight); + if (status < 0) + { + return false; + } + + is_hdr = (status == 1); + + return true; +} + +} // astc + +} // basisu_astc + +#if defined(__GNUC__) +#pragma GCC diagnostic pop +#endif diff --git a/thirdparty/basisu/encoder/3rdparty/android_astc_decomp.h b/thirdparty/basisu/encoder/3rdparty/android_astc_decomp.h new file mode 100644 index 000000000..ad13093a6 --- /dev/null +++ b/thirdparty/basisu/encoder/3rdparty/android_astc_decomp.h @@ -0,0 +1,45 @@ +// File: android_astc_decomp.h +#ifndef _TCUASTCUTIL_HPP +#define _TCUASTCUTIL_HPP +/*------------------------------------------------------------------------- + * drawElements Quality Program Tester Core + * ---------------------------------------- + * + * Copyright 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + *//*! + * \file + * \brief ASTC Utilities. + *//*--------------------------------------------------------------------*/ + +#include +#include + +namespace basisu_astc +{ +namespace astc +{ + +// Unpacks a single ASTC block to pDst +// If isSRGB is true, the spec requires the decoder to scale the LDR 8-bit endpoints to 16-bit before interpolation slightly differently, +// which will lead to different outputs. So be sure to set it correctly (ideally it should match whatever the encoder did). +bool decompress_ldr(uint8_t* pDst, const uint8_t* data, bool isSRGB, int blockWidth, int blockHeight); +bool decompress_hdr(float* pDstRGBA, const uint8_t* data, int blockWidth, int blockHeight); +bool is_hdr(const uint8_t* data, int blockWidth, int blockHeight, bool& is_hdr); + +} // astc +} // basisu + +#endif diff --git a/thirdparty/basisu/encoder/3rdparty/qoi.h b/thirdparty/basisu/encoder/3rdparty/qoi.h new file mode 100644 index 000000000..be8a2d53c --- /dev/null +++ b/thirdparty/basisu/encoder/3rdparty/qoi.h @@ -0,0 +1,659 @@ +/* + +Copyright (c) 2021, Dominic Szablewski - https://phoboslab.org +SPDX-License-Identifier: MIT + + +QOI - The "Quite OK Image" format for fast, lossless image compression + +-- About + +QOI encodes and decodes images in a lossless format. Compared to stb_image and +stb_image_write QOI offers 20x-50x faster encoding, 3x-4x faster decoding and +20% better compression. + + +-- Synopsis + +// Define `QOI_IMPLEMENTATION` in *one* C/C++ file before including this +// library to create the implementation. + +#define QOI_IMPLEMENTATION +#include "qoi.h" + +// Encode and store an RGBA buffer to the file system. The qoi_desc describes +// the input pixel data. +qoi_write("image_new.qoi", rgba_pixels, &(qoi_desc){ + .width = 1920, + .height = 1080, + .channels = 4, + .colorspace = QOI_SRGB +}); + +// Load and decode a QOI image from the file system into a 32bbp RGBA buffer. +// The qoi_desc struct will be filled with the width, height, number of channels +// and colorspace read from the file header. +qoi_desc desc; +void *rgba_pixels = qoi_read("image.qoi", &desc, 4); + + + +-- Documentation + +This library provides the following functions; +- qoi_read -- read and decode a QOI file +- qoi_decode -- decode the raw bytes of a QOI image from memory +- qoi_write -- encode and write a QOI file +- qoi_encode -- encode an rgba buffer into a QOI image in memory + +See the function declaration below for the signature and more information. + +If you don't want/need the qoi_read and qoi_write functions, you can define +QOI_NO_STDIO before including this library. + +This library uses malloc() and free(). To supply your own malloc implementation +you can define QOI_MALLOC and QOI_FREE before including this library. + +This library uses memset() to zero-initialize the index. To supply your own +implementation you can define QOI_ZEROARR before including this library. + + +-- Data Format + +A QOI file has a 14 byte header, followed by any number of data "chunks" and an +8-byte end marker. + +struct qoi_header_t { + char magic[4]; // magic bytes "qoif" + uint32_t width; // image width in pixels (BE) + uint32_t height; // image height in pixels (BE) + uint8_t channels; // 3 = RGB, 4 = RGBA + uint8_t colorspace; // 0 = sRGB with linear alpha, 1 = all channels linear +}; + +Images are encoded row by row, left to right, top to bottom. The decoder and +encoder start with {r: 0, g: 0, b: 0, a: 255} as the previous pixel value. An +image is complete when all pixels specified by width * height have been covered. + +Pixels are encoded as + - a run of the previous pixel + - an index into an array of previously seen pixels + - a difference to the previous pixel value in r,g,b + - full r,g,b or r,g,b,a values + +The color channels are assumed to not be premultiplied with the alpha channel +("un-premultiplied alpha"). + +A running array[64] (zero-initialized) of previously seen pixel values is +maintained by the encoder and decoder. Each pixel that is seen by the encoder +and decoder is put into this array at the position formed by a hash function of +the color value. In the encoder, if the pixel value at the index matches the +current pixel, this index position is written to the stream as QOI_OP_INDEX. +The hash function for the index is: + + index_position = (r * 3 + g * 5 + b * 7 + a * 11) % 64 + +Each chunk starts with a 2- or 8-bit tag, followed by a number of data bits. The +bit length of chunks is divisible by 8 - i.e. all chunks are byte aligned. All +values encoded in these data bits have the most significant bit on the left. + +The 8-bit tags have precedence over the 2-bit tags. A decoder must check for the +presence of an 8-bit tag first. + +The byte stream's end is marked with 7 0x00 bytes followed a single 0x01 byte. + + +The possible chunks are: + + +.- QOI_OP_INDEX ----------. +| Byte[0] | +| 7 6 5 4 3 2 1 0 | +|-------+-----------------| +| 0 0 | index | +`-------------------------` +2-bit tag b00 +6-bit index into the color index array: 0..63 + +A valid encoder must not issue 2 or more consecutive QOI_OP_INDEX chunks to the +same index. QOI_OP_RUN should be used instead. + + +.- QOI_OP_DIFF -----------. +| Byte[0] | +| 7 6 5 4 3 2 1 0 | +|-------+-----+-----+-----| +| 0 1 | dr | dg | db | +`-------------------------` +2-bit tag b01 +2-bit red channel difference from the previous pixel between -2..1 +2-bit green channel difference from the previous pixel between -2..1 +2-bit blue channel difference from the previous pixel between -2..1 + +The difference to the current channel values are using a wraparound operation, +so "1 - 2" will result in 255, while "255 + 1" will result in 0. + +Values are stored as unsigned integers with a bias of 2. E.g. -2 is stored as +0 (b00). 1 is stored as 3 (b11). + +The alpha value remains unchanged from the previous pixel. + + +.- QOI_OP_LUMA -------------------------------------. +| Byte[0] | Byte[1] | +| 7 6 5 4 3 2 1 0 | 7 6 5 4 3 2 1 0 | +|-------+-----------------+-------------+-----------| +| 1 0 | green diff | dr - dg | db - dg | +`---------------------------------------------------` +2-bit tag b10 +6-bit green channel difference from the previous pixel -32..31 +4-bit red channel difference minus green channel difference -8..7 +4-bit blue channel difference minus green channel difference -8..7 + +The green channel is used to indicate the general direction of change and is +encoded in 6 bits. The red and blue channels (dr and db) base their diffs off +of the green channel difference and are encoded in 4 bits. I.e.: + dr_dg = (cur_px.r - prev_px.r) - (cur_px.g - prev_px.g) + db_dg = (cur_px.b - prev_px.b) - (cur_px.g - prev_px.g) + +The difference to the current channel values are using a wraparound operation, +so "10 - 13" will result in 253, while "250 + 7" will result in 1. + +Values are stored as unsigned integers with a bias of 32 for the green channel +and a bias of 8 for the red and blue channel. + +The alpha value remains unchanged from the previous pixel. + + +.- QOI_OP_RUN ------------. +| Byte[0] | +| 7 6 5 4 3 2 1 0 | +|-------+-----------------| +| 1 1 | run | +`-------------------------` +2-bit tag b11 +6-bit run-length repeating the previous pixel: 1..62 + +The run-length is stored with a bias of -1. Note that the run-lengths 63 and 64 +(b111110 and b111111) are illegal as they are occupied by the QOI_OP_RGB and +QOI_OP_RGBA tags. + + +.- QOI_OP_RGB ------------------------------------------. +| Byte[0] | Byte[1] | Byte[2] | Byte[3] | +| 7 6 5 4 3 2 1 0 | 7 .. 0 | 7 .. 0 | 7 .. 0 | +|-------------------------+---------+---------+---------| +| 1 1 1 1 1 1 1 0 | red | green | blue | +`-------------------------------------------------------` +8-bit tag b11111110 +8-bit red channel value +8-bit green channel value +8-bit blue channel value + +The alpha value remains unchanged from the previous pixel. + + +.- QOI_OP_RGBA ---------------------------------------------------. +| Byte[0] | Byte[1] | Byte[2] | Byte[3] | Byte[4] | +| 7 6 5 4 3 2 1 0 | 7 .. 0 | 7 .. 0 | 7 .. 0 | 7 .. 0 | +|-------------------------+---------+---------+---------+---------| +| 1 1 1 1 1 1 1 1 | red | green | blue | alpha | +`-----------------------------------------------------------------` +8-bit tag b11111111 +8-bit red channel value +8-bit green channel value +8-bit blue channel value +8-bit alpha channel value + +*/ + + +/* ----------------------------------------------------------------------------- +Header - Public functions */ + +#ifndef QOI_H +#define QOI_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* A pointer to a qoi_desc struct has to be supplied to all of qoi's functions. +It describes either the input format (for qoi_write and qoi_encode), or is +filled with the description read from the file header (for qoi_read and +qoi_decode). + +The colorspace in this qoi_desc is an enum where + 0 = sRGB, i.e. gamma scaled RGB channels and a linear alpha channel + 1 = all channels are linear +You may use the constants QOI_SRGB or QOI_LINEAR. The colorspace is purely +informative. It will be saved to the file header, but does not affect +how chunks are en-/decoded. */ + +#define QOI_SRGB 0 +#define QOI_LINEAR 1 + +typedef struct { + unsigned int width; + unsigned int height; + unsigned char channels; + unsigned char colorspace; +} qoi_desc; + +#ifndef QOI_NO_STDIO + +/* Encode raw RGB or RGBA pixels into a QOI image and write it to the file +system. The qoi_desc struct must be filled with the image width, height, +number of channels (3 = RGB, 4 = RGBA) and the colorspace. + +The function returns 0 on failure (invalid parameters, or fopen or malloc +failed) or the number of bytes written on success. */ + +int qoi_write(const char *filename, const void *data, const qoi_desc *desc); + + +/* Read and decode a QOI image from the file system. If channels is 0, the +number of channels from the file header is used. If channels is 3 or 4 the +output format will be forced into this number of channels. + +The function either returns NULL on failure (invalid data, or malloc or fopen +failed) or a pointer to the decoded pixels. On success, the qoi_desc struct +will be filled with the description from the file header. + +The returned pixel data should be free()d after use. */ + +void *qoi_read(const char *filename, qoi_desc *desc, int channels); + +#endif /* QOI_NO_STDIO */ + + +/* Encode raw RGB or RGBA pixels into a QOI image in memory. + +The function either returns NULL on failure (invalid parameters or malloc +failed) or a pointer to the encoded data on success. On success the out_len +is set to the size in bytes of the encoded data. + +The returned qoi data should be free()d after use. */ + +void *qoi_encode(const void *data, const qoi_desc *desc, int *out_len); + + +/* Decode a QOI image from memory. + +The function either returns NULL on failure (invalid parameters or malloc +failed) or a pointer to the decoded pixels. On success, the qoi_desc struct +is filled with the description from the file header. + +The returned pixel data should be free()d after use. */ + +void *qoi_decode(const void *data, int size, qoi_desc *desc, int channels); + + +#ifdef __cplusplus +} +#endif +#endif /* QOI_H */ + + +/* ----------------------------------------------------------------------------- +Implementation */ + +#ifdef QOI_IMPLEMENTATION +#include +#include + +#ifndef QOI_MALLOC + #define QOI_MALLOC(sz) malloc(sz) + #define QOI_FREE(p) free(p) +#endif +#ifndef QOI_ZEROARR + #define QOI_ZEROARR(a) memset((a),0,sizeof(a)) +#endif + +#define QOI_OP_INDEX 0x00 /* 00xxxxxx */ +#define QOI_OP_DIFF 0x40 /* 01xxxxxx */ +#define QOI_OP_LUMA 0x80 /* 10xxxxxx */ +#define QOI_OP_RUN 0xc0 /* 11xxxxxx */ +#define QOI_OP_RGB 0xfe /* 11111110 */ +#define QOI_OP_RGBA 0xff /* 11111111 */ + +#define QOI_MASK_2 0xc0 /* 11000000 */ + +#define QOI_COLOR_HASH(C) (C.rgba.r*3 + C.rgba.g*5 + C.rgba.b*7 + C.rgba.a*11) +#define QOI_MAGIC \ + (((unsigned int)'q') << 24 | ((unsigned int)'o') << 16 | \ + ((unsigned int)'i') << 8 | ((unsigned int)'f')) +#define QOI_HEADER_SIZE 14 + +/* 2GB is the max file size that this implementation can safely handle. We guard +against anything larger than that, assuming the worst case with 5 bytes per +pixel, rounded down to a nice clean value. 400 million pixels ought to be +enough for anybody. */ +#define QOI_PIXELS_MAX ((unsigned int)400000000) + +typedef union { + struct { unsigned char r, g, b, a; } rgba; + unsigned int v; +} qoi_rgba_t; + +static const unsigned char qoi_padding[8] = {0,0,0,0,0,0,0,1}; + +static void qoi_write_32(unsigned char *bytes, int *p, unsigned int v) { + bytes[(*p)++] = (uint8_t)((0xff000000 & v) >> 24); + bytes[(*p)++] = (uint8_t)((0x00ff0000 & v) >> 16); + bytes[(*p)++] = (uint8_t)((0x0000ff00 & v) >> 8); + bytes[(*p)++] = (uint8_t)((0x000000ff & v)); +} + +static unsigned int qoi_read_32(const unsigned char *bytes, int *p) { + unsigned int a = bytes[(*p)++]; + unsigned int b = bytes[(*p)++]; + unsigned int c = bytes[(*p)++]; + unsigned int d = bytes[(*p)++]; + return a << 24 | b << 16 | c << 8 | d; +} + +void *qoi_encode(const void *data, const qoi_desc *desc, int *out_len) { + int i, max_size, p, run; + int px_len, px_end, px_pos, channels; + unsigned char *bytes; + const unsigned char *pixels; + qoi_rgba_t index[64]; + qoi_rgba_t px, px_prev; + + if ( + data == NULL || out_len == NULL || desc == NULL || + desc->width == 0 || desc->height == 0 || + desc->channels < 3 || desc->channels > 4 || + desc->colorspace > 1 || + desc->height >= QOI_PIXELS_MAX / desc->width + ) { + return NULL; + } + + max_size = + desc->width * desc->height * (desc->channels + 1) + + QOI_HEADER_SIZE + sizeof(qoi_padding); + + p = 0; + bytes = (unsigned char *) QOI_MALLOC(max_size); + if (!bytes) { + return NULL; + } + + qoi_write_32(bytes, &p, QOI_MAGIC); + qoi_write_32(bytes, &p, desc->width); + qoi_write_32(bytes, &p, desc->height); + bytes[p++] = desc->channels; + bytes[p++] = desc->colorspace; + + + pixels = (const unsigned char *)data; + + QOI_ZEROARR(index); + + run = 0; + px_prev.rgba.r = 0; + px_prev.rgba.g = 0; + px_prev.rgba.b = 0; + px_prev.rgba.a = 255; + px = px_prev; + + px_len = desc->width * desc->height * desc->channels; + px_end = px_len - desc->channels; + channels = desc->channels; + + for (px_pos = 0; px_pos < px_len; px_pos += channels) { + px.rgba.r = pixels[px_pos + 0]; + px.rgba.g = pixels[px_pos + 1]; + px.rgba.b = pixels[px_pos + 2]; + + if (channels == 4) { + px.rgba.a = pixels[px_pos + 3]; + } + + if (px.v == px_prev.v) { + run++; + if (run == 62 || px_pos == px_end) { + bytes[p++] = (uint8_t)(QOI_OP_RUN | (run - 1)); + run = 0; + } + } + else { + int index_pos; + + if (run > 0) { + bytes[p++] = (uint8_t)(QOI_OP_RUN | (run - 1)); + run = 0; + } + + index_pos = QOI_COLOR_HASH(px) % 64; + + if (index[index_pos].v == px.v) { + bytes[p++] = (uint8_t)(QOI_OP_INDEX | index_pos); + } + else { + index[index_pos] = px; + + if (px.rgba.a == px_prev.rgba.a) { + signed char vr = px.rgba.r - px_prev.rgba.r; + signed char vg = px.rgba.g - px_prev.rgba.g; + signed char vb = px.rgba.b - px_prev.rgba.b; + + signed char vg_r = vr - vg; + signed char vg_b = vb - vg; + + if ( + vr > -3 && vr < 2 && + vg > -3 && vg < 2 && + vb > -3 && vb < 2 + ) { + bytes[p++] = QOI_OP_DIFF | (vr + 2) << 4 | (vg + 2) << 2 | (vb + 2); + } + else if ( + vg_r > -9 && vg_r < 8 && + vg > -33 && vg < 32 && + vg_b > -9 && vg_b < 8 + ) { + bytes[p++] = QOI_OP_LUMA | (vg + 32); + bytes[p++] = (vg_r + 8) << 4 | (vg_b + 8); + } + else { + bytes[p++] = QOI_OP_RGB; + bytes[p++] = px.rgba.r; + bytes[p++] = px.rgba.g; + bytes[p++] = px.rgba.b; + } + } + else { + bytes[p++] = QOI_OP_RGBA; + bytes[p++] = px.rgba.r; + bytes[p++] = px.rgba.g; + bytes[p++] = px.rgba.b; + bytes[p++] = px.rgba.a; + } + } + } + px_prev = px; + } + + for (i = 0; i < (int)sizeof(qoi_padding); i++) { + bytes[p++] = qoi_padding[i]; + } + + *out_len = p; + return bytes; +} + +void *qoi_decode(const void *data, int size, qoi_desc *desc, int channels) { + const unsigned char *bytes; + unsigned int header_magic; + unsigned char *pixels; + qoi_rgba_t index[64]; + qoi_rgba_t px; + int px_len, chunks_len, px_pos; + int p = 0, run = 0; + + if ( + data == NULL || desc == NULL || + (channels != 0 && channels != 3 && channels != 4) || + size < QOI_HEADER_SIZE + (int)sizeof(qoi_padding) + ) { + return NULL; + } + + bytes = (const unsigned char *)data; + + header_magic = qoi_read_32(bytes, &p); + desc->width = qoi_read_32(bytes, &p); + desc->height = qoi_read_32(bytes, &p); + desc->channels = bytes[p++]; + desc->colorspace = bytes[p++]; + + if ( + desc->width == 0 || desc->height == 0 || + desc->channels < 3 || desc->channels > 4 || + desc->colorspace > 1 || + header_magic != QOI_MAGIC || + desc->height >= QOI_PIXELS_MAX / desc->width + ) { + return NULL; + } + + if (channels == 0) { + channels = desc->channels; + } + + px_len = desc->width * desc->height * channels; + pixels = (unsigned char *) QOI_MALLOC(px_len); + if (!pixels) { + return NULL; + } + + QOI_ZEROARR(index); + px.rgba.r = 0; + px.rgba.g = 0; + px.rgba.b = 0; + px.rgba.a = 255; + + chunks_len = size - (int)sizeof(qoi_padding); + for (px_pos = 0; px_pos < px_len; px_pos += channels) { + if (run > 0) { + run--; + } + else if (p < chunks_len) { + int b1 = bytes[p++]; + + if (b1 == QOI_OP_RGB) { + px.rgba.r = bytes[p++]; + px.rgba.g = bytes[p++]; + px.rgba.b = bytes[p++]; + } + else if (b1 == QOI_OP_RGBA) { + px.rgba.r = bytes[p++]; + px.rgba.g = bytes[p++]; + px.rgba.b = bytes[p++]; + px.rgba.a = bytes[p++]; + } + else if ((b1 & QOI_MASK_2) == QOI_OP_INDEX) { + px = index[b1]; + } + else if ((b1 & QOI_MASK_2) == QOI_OP_DIFF) { + px.rgba.r += ((b1 >> 4) & 0x03) - 2; + px.rgba.g += ((b1 >> 2) & 0x03) - 2; + px.rgba.b += ( b1 & 0x03) - 2; + } + else if ((b1 & QOI_MASK_2) == QOI_OP_LUMA) { + int b2 = bytes[p++]; + int vg = (b1 & 0x3f) - 32; + px.rgba.r += (uint8_t)(vg - 8 + ((b2 >> 4) & 0x0f)); + px.rgba.g += (uint8_t)(vg); + px.rgba.b += (uint8_t)(vg - 8 + (b2 & 0x0f)); + } + else if ((b1 & QOI_MASK_2) == QOI_OP_RUN) { + run = (b1 & 0x3f); + } + + index[QOI_COLOR_HASH(px) % 64] = px; + } + + pixels[px_pos + 0] = px.rgba.r; + pixels[px_pos + 1] = px.rgba.g; + pixels[px_pos + 2] = px.rgba.b; + + if (channels == 4) { + pixels[px_pos + 3] = px.rgba.a; + } + } + + return pixels; +} + +#ifndef QOI_NO_STDIO +#include + +int qoi_write(const char *filename, const void *data, const qoi_desc *desc) { +#ifdef _MSC_VER + FILE* f = NULL; + fopen_s(&f, filename, "wb"); +#else + FILE *f = fopen(filename, "wb"); +#endif + int size, err; + void *encoded; + + if (!f) { + return 0; + } + + encoded = qoi_encode(data, desc, &size); + if (!encoded) { + fclose(f); + return 0; + } + + fwrite(encoded, 1, size, f); + fflush(f); + err = ferror(f); + fclose(f); + + QOI_FREE(encoded); + return err ? 0 : size; +} + +void *qoi_read(const char *filename, qoi_desc *desc, int channels) { +#ifdef _MSC_VER + FILE* f = NULL; + fopen_s(&f, filename, "rb"); +#else + FILE *f = fopen(filename, "rb"); +#endif + int size, bytes_read; + void *pixels, *data; + + if (!f) { + return NULL; + } + + fseek(f, 0, SEEK_END); + size = ftell(f); + if (size <= 0 || fseek(f, 0, SEEK_SET) != 0) { + fclose(f); + return NULL; + } + + data = QOI_MALLOC(size); + if (!data) { + fclose(f); + return NULL; + } + + bytes_read = (int)fread(data, 1, size, f); + fclose(f); + pixels = (bytes_read != size) ? NULL : qoi_decode(data, bytes_read, desc, channels); + QOI_FREE(data); + return pixels; +} + +#endif /* QOI_NO_STDIO */ +#endif /* QOI_IMPLEMENTATION */ diff --git a/thirdparty/basisu/encoder/3rdparty/tinydds.h b/thirdparty/basisu/encoder/3rdparty/tinydds.h new file mode 100644 index 000000000..d8bc17e60 --- /dev/null +++ b/thirdparty/basisu/encoder/3rdparty/tinydds.h @@ -0,0 +1,2083 @@ +// MIT license see full LICENSE text at end of file +#pragma once +#ifndef TINY_DDS_TINYDDS_H +#define TINY_DDS_TINYDDS_H + +#ifndef TINYDDS_HAVE_UINTXX_T +#include // for uint32_t and int64_t +#endif +#ifndef TINYDDS_HAVE_BOOL +#include // for bool +#endif +#ifndef TINYDDS_HAVE_SIZE_T +#include // for size_t +#endif +#ifndef TINYDDS_HAVE_MEMCPY +#include // for memcpy +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#define TINYDDS_MAX_MIPMAPLEVELS 16 + +typedef struct TinyDDS_Context *TinyDDS_ContextHandle; + +typedef void *(*TinyDDS_AllocFunc)(void *user, size_t size); +typedef void (*TinyDDS_FreeFunc)(void *user, void *memory); +typedef size_t (*TinyDDS_ReadFunc)(void *user, void *buffer, size_t byteCount); +typedef bool (*TinyDDS_SeekFunc)(void *user, int64_t offset); +typedef int64_t (*TinyDDS_TellFunc)(void *user); +typedef void (*TinyDDS_ErrorFunc)(void *user, char const *msg); + +typedef struct TinyDDS_Callbacks { + TinyDDS_ErrorFunc errorFn; + TinyDDS_AllocFunc allocFn; + TinyDDS_FreeFunc freeFn; + TinyDDS_ReadFunc readFn; + TinyDDS_SeekFunc seekFn; + TinyDDS_TellFunc tellFn; +} TinyDDS_Callbacks; + +TinyDDS_ContextHandle TinyDDS_CreateContext(TinyDDS_Callbacks const *callbacks, void *user); +void TinyDDS_DestroyContext(TinyDDS_ContextHandle handle); + +// reset lets you reuse the context for another file (saves an alloc/free cycle) +void TinyDDS_Reset(TinyDDS_ContextHandle handle); + +// call this to read the header file should already be at the start of the KTX data +bool TinyDDS_ReadHeader(TinyDDS_ContextHandle handle); + +bool TinyDDS_Is1D(TinyDDS_ContextHandle handle); +bool TinyDDS_Is2D(TinyDDS_ContextHandle handle); +bool TinyDDS_Is3D(TinyDDS_ContextHandle handle); +bool TinyDDS_IsCubemap(TinyDDS_ContextHandle handle); +bool TinyDDS_IsArray(TinyDDS_ContextHandle handle); + +bool TinyDDS_Dimensions(TinyDDS_ContextHandle handle, + uint32_t *width, + uint32_t *height, + uint32_t *depth, + uint32_t *slices); +uint32_t TinyDDS_Width(TinyDDS_ContextHandle handle); +uint32_t TinyDDS_Height(TinyDDS_ContextHandle handle); +uint32_t TinyDDS_Depth(TinyDDS_ContextHandle handle); +uint32_t TinyDDS_ArraySlices(TinyDDS_ContextHandle handle); + +bool TinyDDS_NeedsGenerationOfMipmaps(TinyDDS_ContextHandle handle); +bool TinyDDS_NeedsEndianCorrecting(TinyDDS_ContextHandle handle); + +uint32_t TinyDDS_NumberOfMipmaps(TinyDDS_ContextHandle handle); +uint32_t TinyDDS_ImageSize(TinyDDS_ContextHandle handle, uint32_t mipmaplevel); + +// data return by ImageRawData is owned by the context. Don't free it! +void const *TinyDDS_ImageRawData(TinyDDS_ContextHandle handle, uint32_t mipmaplevel); + +typedef void (*TinyDDS_WriteFunc)(void *user, void const *buffer, size_t byteCount); + +typedef struct TinyDDS_WriteCallbacks { + TinyDDS_ErrorFunc error; + TinyDDS_AllocFunc alloc; + TinyDDS_FreeFunc free; + TinyDDS_WriteFunc write; +} TinyDDS_WriteCallbacks; + +#ifndef TINYIMAGEFORMAT_DXGIFORMAT +#define TINYIMAGEFORMAT_DXGIFORMAT + +// early DDS was a direct copy of the Draw Draw surface bits, later on (Dx10) it moved to +// DXGI_FORMAT we use a similar thing to DXGI_FORMAT second form but will synthesis +// the old style when required when saving and vice versa when loading. +typedef enum TinyImageFormat_DXGI_FORMAT { + TIF_DXGI_FORMAT_UNKNOWN = 0, + TIF_DXGI_FORMAT_R32G32B32A32_TYPELESS = 1, + TIF_DXGI_FORMAT_R32G32B32A32_FLOAT = 2, + TIF_DXGI_FORMAT_R32G32B32A32_UINT = 3, + TIF_DXGI_FORMAT_R32G32B32A32_SINT = 4, + TIF_DXGI_FORMAT_R32G32B32_TYPELESS = 5, + TIF_DXGI_FORMAT_R32G32B32_FLOAT = 6, + TIF_DXGI_FORMAT_R32G32B32_UINT = 7, + TIF_DXGI_FORMAT_R32G32B32_SINT = 8, + TIF_DXGI_FORMAT_R16G16B16A16_TYPELESS = 9, + TIF_DXGI_FORMAT_R16G16B16A16_FLOAT = 10, + TIF_DXGI_FORMAT_R16G16B16A16_UNORM = 11, + TIF_DXGI_FORMAT_R16G16B16A16_UINT = 12, + TIF_DXGI_FORMAT_R16G16B16A16_SNORM = 13, + TIF_DXGI_FORMAT_R16G16B16A16_SINT = 14, + TIF_DXGI_FORMAT_R32G32_TYPELESS = 15, + TIF_DXGI_FORMAT_R32G32_FLOAT = 16, + TIF_DXGI_FORMAT_R32G32_UINT = 17, + TIF_DXGI_FORMAT_R32G32_SINT = 18, + TIF_DXGI_FORMAT_R32G8X24_TYPELESS = 19, + TIF_DXGI_FORMAT_D32_FLOAT_S8X24_UINT = 20, + TIF_DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS = 21, + TIF_DXGI_FORMAT_X32_TYPELESS_G8X24_UINT = 22, + TIF_DXGI_FORMAT_R10G10B10A2_TYPELESS = 23, + TIF_DXGI_FORMAT_R10G10B10A2_UNORM = 24, + TIF_DXGI_FORMAT_R10G10B10A2_UINT = 25, + TIF_DXGI_FORMAT_R11G11B10_FLOAT = 26, + TIF_DXGI_FORMAT_R8G8B8A8_TYPELESS = 27, + TIF_DXGI_FORMAT_R8G8B8A8_UNORM = 28, + TIF_DXGI_FORMAT_R8G8B8A8_UNORM_SRGB = 29, + TIF_DXGI_FORMAT_R8G8B8A8_UINT = 30, + TIF_DXGI_FORMAT_R8G8B8A8_SNORM = 31, + TIF_DXGI_FORMAT_R8G8B8A8_SINT = 32, + TIF_DXGI_FORMAT_R16G16_TYPELESS = 33, + TIF_DXGI_FORMAT_R16G16_FLOAT = 34, + TIF_DXGI_FORMAT_R16G16_UNORM = 35, + TIF_DXGI_FORMAT_R16G16_UINT = 36, + TIF_DXGI_FORMAT_R16G16_SNORM = 37, + TIF_DXGI_FORMAT_R16G16_SINT = 38, + TIF_DXGI_FORMAT_R32_TYPELESS = 39, + TIF_DXGI_FORMAT_D32_FLOAT = 40, + TIF_DXGI_FORMAT_R32_FLOAT = 41, + TIF_DXGI_FORMAT_R32_UINT = 42, + TIF_DXGI_FORMAT_R32_SINT = 43, + TIF_DXGI_FORMAT_R24G8_TYPELESS = 44, + TIF_DXGI_FORMAT_D24_UNORM_S8_UINT = 45, + TIF_DXGI_FORMAT_R24_UNORM_X8_TYPELESS = 46, + TIF_DXGI_FORMAT_X24_TYPELESS_G8_UINT = 47, + TIF_DXGI_FORMAT_R8G8_TYPELESS = 48, + TIF_DXGI_FORMAT_R8G8_UNORM = 49, + TIF_DXGI_FORMAT_R8G8_UINT = 50, + TIF_DXGI_FORMAT_R8G8_SNORM = 51, + TIF_DXGI_FORMAT_R8G8_SINT = 52, + TIF_DXGI_FORMAT_R16_TYPELESS = 53, + TIF_DXGI_FORMAT_R16_FLOAT = 54, + TIF_DXGI_FORMAT_D16_UNORM = 55, + TIF_DXGI_FORMAT_R16_UNORM = 56, + TIF_DXGI_FORMAT_R16_UINT = 57, + TIF_DXGI_FORMAT_R16_SNORM = 58, + TIF_DXGI_FORMAT_R16_SINT = 59, + TIF_DXGI_FORMAT_R8_TYPELESS = 60, + TIF_DXGI_FORMAT_R8_UNORM = 61, + TIF_DXGI_FORMAT_R8_UINT = 62, + TIF_DXGI_FORMAT_R8_SNORM = 63, + TIF_DXGI_FORMAT_R8_SINT = 64, + TIF_DXGI_FORMAT_A8_UNORM = 65, + TIF_DXGI_FORMAT_R1_UNORM = 66, + TIF_DXGI_FORMAT_R9G9B9E5_SHAREDEXP = 67, + TIF_DXGI_FORMAT_R8G8_B8G8_UNORM = 68, + TIF_DXGI_FORMAT_G8R8_G8B8_UNORM = 69, + TIF_DXGI_FORMAT_BC1_TYPELESS = 70, + TIF_DXGI_FORMAT_BC1_UNORM = 71, + TIF_DXGI_FORMAT_BC1_UNORM_SRGB = 72, + TIF_DXGI_FORMAT_BC2_TYPELESS = 73, + TIF_DXGI_FORMAT_BC2_UNORM = 74, + TIF_DXGI_FORMAT_BC2_UNORM_SRGB = 75, + TIF_DXGI_FORMAT_BC3_TYPELESS = 76, + TIF_DXGI_FORMAT_BC3_UNORM = 77, + TIF_DXGI_FORMAT_BC3_UNORM_SRGB = 78, + TIF_DXGI_FORMAT_BC4_TYPELESS = 79, + TIF_DXGI_FORMAT_BC4_UNORM = 80, + TIF_DXGI_FORMAT_BC4_SNORM = 81, + TIF_DXGI_FORMAT_BC5_TYPELESS = 82, + TIF_DXGI_FORMAT_BC5_UNORM = 83, + TIF_DXGI_FORMAT_BC5_SNORM = 84, + TIF_DXGI_FORMAT_B5G6R5_UNORM = 85, + TIF_DXGI_FORMAT_B5G5R5A1_UNORM = 86, + TIF_DXGI_FORMAT_B8G8R8A8_UNORM = 87, + TIF_DXGI_FORMAT_B8G8R8X8_UNORM = 88, + TIF_DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM = 89, + TIF_DXGI_FORMAT_B8G8R8A8_TYPELESS = 90, + TIF_DXGI_FORMAT_B8G8R8A8_UNORM_SRGB = 91, + TIF_DXGI_FORMAT_B8G8R8X8_TYPELESS = 92, + TIF_DXGI_FORMAT_B8G8R8X8_UNORM_SRGB = 93, + TIF_DXGI_FORMAT_BC6H_TYPELESS = 94, + TIF_DXGI_FORMAT_BC6H_UF16 = 95, + TIF_DXGI_FORMAT_BC6H_SF16 = 96, + TIF_DXGI_FORMAT_BC7_TYPELESS = 97, + TIF_DXGI_FORMAT_BC7_UNORM = 98, + TIF_DXGI_FORMAT_BC7_UNORM_SRGB = 99, + TIF_DXGI_FORMAT_AYUV = 100, + TIF_DXGI_FORMAT_Y410 = 101, + TIF_DXGI_FORMAT_Y416 = 102, + TIF_DXGI_FORMAT_NV12 = 103, + TIF_DXGI_FORMAT_P010 = 104, + TIF_DXGI_FORMAT_P016 = 105, + TIF_DXGI_FORMAT_420_OPAQUE = 106, + TIF_DXGI_FORMAT_YUY2 = 107, + TIF_DXGI_FORMAT_Y210 = 108, + TIF_DXGI_FORMAT_Y216 = 109, + TIF_DXGI_FORMAT_NV11 = 110, + TIF_DXGI_FORMAT_AI44 = 111, + TIF_DXGI_FORMAT_IA44 = 112, + TIF_DXGI_FORMAT_P8 = 113, + TIF_DXGI_FORMAT_A8P8 = 114, + TIF_DXGI_FORMAT_B4G4R4A4_UNORM = 115, + + // xbox 360 formats + TIF_DXGI_FORMAT_R10G10B10_7E3_A2_FLOAT = 116, + TIF_DXGI_FORMAT_R10G10B10_6E4_A2_FLOAT = 117, + TIF_DXGI_FORMAT_D16_UNORM_S8_UINT = 118, + TIF_DXGI_FORMAT_R16_UNORM_X8_TYPELESS = 119, + TIF_DXGI_FORMAT_X16_TYPELESS_G8_UINT = 120, + + TIF_DXGI_FORMAT_P208 = 130, + TIF_DXGI_FORMAT_V208 = 131, + TIF_DXGI_FORMAT_V408 = 132, + + // XBox One formats + TIF_DXGI_FORMAT_R10G10B10_SNORM_A2_UNORM = 189, + TIF_DXGI_FORMAT_R4G4_UNORM = 190, + +} TinyImageFormat_DXGI_FORMAT; +#endif + +typedef enum TinyDDS_Format { + TDDS_UNDEFINED = TIF_DXGI_FORMAT_UNKNOWN, + TDDS_B5G6R5_UNORM = TIF_DXGI_FORMAT_B5G6R5_UNORM, + TDDS_B5G5R5A1_UNORM = TIF_DXGI_FORMAT_B5G5R5A1_UNORM, + TDDS_R8_UNORM = TIF_DXGI_FORMAT_R8_UNORM, + TDDS_R8_SNORM = TIF_DXGI_FORMAT_R8_SNORM, + TDDS_A8_UNORM = TIF_DXGI_FORMAT_A8_UNORM, + TDDS_R1_UNORM = TIF_DXGI_FORMAT_R1_UNORM, + TDDS_R8_UINT = TIF_DXGI_FORMAT_R8_UINT, + TDDS_R8_SINT = TIF_DXGI_FORMAT_R8_SINT, + TDDS_R8G8_UNORM = TIF_DXGI_FORMAT_R8G8_UNORM, + TDDS_R8G8_SNORM = TIF_DXGI_FORMAT_R8G8_SNORM, + TDDS_R8G8_UINT = TIF_DXGI_FORMAT_R8G8_UINT, + TDDS_R8G8_SINT = TIF_DXGI_FORMAT_R8G8_SINT, + TDDS_R8G8B8A8_UNORM = TIF_DXGI_FORMAT_R8G8B8A8_UNORM, + TDDS_R8G8B8A8_SNORM = TIF_DXGI_FORMAT_R8G8B8A8_SNORM, + TDDS_R8G8B8A8_UINT = TIF_DXGI_FORMAT_R8G8B8A8_UINT, + TDDS_R8G8B8A8_SINT = TIF_DXGI_FORMAT_R8G8B8A8_SINT, + TDDS_R8G8B8A8_SRGB = TIF_DXGI_FORMAT_R8G8B8A8_UNORM_SRGB, + TDDS_B8G8R8A8_UNORM = TIF_DXGI_FORMAT_B8G8R8A8_UNORM, + TDDS_B8G8R8A8_SRGB = TIF_DXGI_FORMAT_B8G8R8A8_UNORM_SRGB, + + TDDS_R9G9B9E5_UFLOAT = TIF_DXGI_FORMAT_R9G9B9E5_SHAREDEXP, + TDDS_R10G10B10A2_UNORM = TIF_DXGI_FORMAT_R10G10B10A2_UNORM, + TDDS_R10G10B10A2_UINT = TIF_DXGI_FORMAT_R10G10B10A2_UINT, + TDDS_R11G11B10_UFLOAT = TIF_DXGI_FORMAT_R11G11B10_FLOAT, + + TDDS_R16_UNORM = TIF_DXGI_FORMAT_R16_UNORM, + TDDS_R16_SNORM = TIF_DXGI_FORMAT_R16_SNORM, + TDDS_R16_UINT = TIF_DXGI_FORMAT_R16_UINT, + TDDS_R16_SINT = TIF_DXGI_FORMAT_R16_SINT, + TDDS_R16_SFLOAT = TIF_DXGI_FORMAT_R16_FLOAT, + + TDDS_R16G16_UNORM = TIF_DXGI_FORMAT_R16G16_UNORM, + TDDS_R16G16_SNORM = TIF_DXGI_FORMAT_R16G16_SNORM, + TDDS_R16G16_UINT = TIF_DXGI_FORMAT_R16G16_UINT, + TDDS_R16G16_SINT = TIF_DXGI_FORMAT_R16G16_SINT, + TDDS_R16G16_SFLOAT = TIF_DXGI_FORMAT_R16G16_FLOAT, + + TDDS_R16G16B16A16_UNORM = TIF_DXGI_FORMAT_R16G16B16A16_UNORM, + TDDS_R16G16B16A16_SNORM = TIF_DXGI_FORMAT_R16G16B16A16_SNORM, + TDDS_R16G16B16A16_UINT = TIF_DXGI_FORMAT_R16G16B16A16_UINT, + TDDS_R16G16B16A16_SINT = TIF_DXGI_FORMAT_R16G16B16A16_SINT, + TDDS_R16G16B16A16_SFLOAT = TIF_DXGI_FORMAT_R16G16B16A16_FLOAT, + + TDDS_R32_UINT = TIF_DXGI_FORMAT_R32_UINT, + TDDS_R32_SINT = TIF_DXGI_FORMAT_R32_SINT, + TDDS_R32_SFLOAT = TIF_DXGI_FORMAT_R32_FLOAT, + + TDDS_R32G32_UINT = TIF_DXGI_FORMAT_R32G32_UINT, + TDDS_R32G32_SINT = TIF_DXGI_FORMAT_R32G32_SINT, + TDDS_R32G32_SFLOAT = TIF_DXGI_FORMAT_R32G32_FLOAT, + + TDDS_R32G32B32_UINT = TIF_DXGI_FORMAT_R32G32B32_UINT, + TDDS_R32G32B32_SINT = TIF_DXGI_FORMAT_R32G32B32_SINT, + TDDS_R32G32B32_SFLOAT = TIF_DXGI_FORMAT_R32G32B32_FLOAT, + + TDDS_R32G32B32A32_UINT = TIF_DXGI_FORMAT_R32G32B32A32_UINT, + TDDS_R32G32B32A32_SINT = TIF_DXGI_FORMAT_R32G32B32A32_SINT, + TDDS_R32G32B32A32_SFLOAT = TIF_DXGI_FORMAT_R32G32B32A32_FLOAT, + + TDDS_BC1_RGBA_UNORM_BLOCK = TIF_DXGI_FORMAT_BC1_UNORM, + TDDS_BC1_RGBA_SRGB_BLOCK = TIF_DXGI_FORMAT_BC1_UNORM_SRGB, + TDDS_BC2_UNORM_BLOCK = TIF_DXGI_FORMAT_BC2_UNORM, + TDDS_BC2_SRGB_BLOCK = TIF_DXGI_FORMAT_BC2_UNORM_SRGB, + TDDS_BC3_UNORM_BLOCK = TIF_DXGI_FORMAT_BC3_UNORM, + TDDS_BC3_SRGB_BLOCK = TIF_DXGI_FORMAT_BC3_UNORM_SRGB, + TDDS_BC4_UNORM_BLOCK = TIF_DXGI_FORMAT_BC4_UNORM, + TDDS_BC4_SNORM_BLOCK = TIF_DXGI_FORMAT_BC4_SNORM, + TDDS_BC5_UNORM_BLOCK = TIF_DXGI_FORMAT_BC5_UNORM, + TDDS_BC5_SNORM_BLOCK = TIF_DXGI_FORMAT_BC5_SNORM, + + TDDS_BC6H_UFLOAT_BLOCK = TIF_DXGI_FORMAT_BC6H_UF16, + TDDS_BC6H_SFLOAT_BLOCK = TIF_DXGI_FORMAT_BC6H_SF16, + TDDS_BC7_UNORM_BLOCK = TIF_DXGI_FORMAT_BC7_UNORM, + TDDS_BC7_SRGB_BLOCK = TIF_DXGI_FORMAT_BC7_UNORM_SRGB, + + TDDS_AYUV = TIF_DXGI_FORMAT_AYUV, + TDDS_Y410 = TIF_DXGI_FORMAT_Y410, + TDDS_Y416 = TIF_DXGI_FORMAT_Y416, + TDDS_NV12 = TIF_DXGI_FORMAT_NV12, + TDDS_P010 = TIF_DXGI_FORMAT_P010, + TDDS_P016 = TIF_DXGI_FORMAT_P016, + TDDS_420_OPAQUE = TIF_DXGI_FORMAT_420_OPAQUE, + TDDS_YUY2 = TIF_DXGI_FORMAT_YUY2, + TDDS_Y210 = TIF_DXGI_FORMAT_Y210, + TDDS_Y216 = TIF_DXGI_FORMAT_Y216, + TDDS_NV11 = TIF_DXGI_FORMAT_NV11, + TDDS_AI44 = TIF_DXGI_FORMAT_AI44, + TDDS_IA44 = TIF_DXGI_FORMAT_IA44, + TDDS_P8 = TIF_DXGI_FORMAT_P8, + TDDS_A8P8 = TIF_DXGI_FORMAT_A8P8, + TDDS_B4G4R4A4_UNORM = TIF_DXGI_FORMAT_B4G4R4A4_UNORM, + TDDS_R10G10B10_7E3_A2_FLOAT = TIF_DXGI_FORMAT_R10G10B10_7E3_A2_FLOAT, + TDDS_R10G10B10_6E4_A2_FLOAT = TIF_DXGI_FORMAT_R10G10B10_6E4_A2_FLOAT, + TDDS_D16_UNORM_S8_UINT = TIF_DXGI_FORMAT_D16_UNORM_S8_UINT, + TDDS_R16_UNORM_X8_TYPELESS = TIF_DXGI_FORMAT_R16_UNORM_X8_TYPELESS, + TDDS_X16_TYPELESS_G8_UINT = TIF_DXGI_FORMAT_X16_TYPELESS_G8_UINT, + TDDS_P208 = TIF_DXGI_FORMAT_P208, + TDDS_V208 = TIF_DXGI_FORMAT_V208, + TDDS_V408 = TIF_DXGI_FORMAT_V408, + TDDS_R10G10B10_SNORM_A2_UNORM = TIF_DXGI_FORMAT_R10G10B10_SNORM_A2_UNORM, + TDDS_R4G4_UNORM = TIF_DXGI_FORMAT_R4G4_UNORM, + + TDDS_SYNTHESISED_DXGIFORMATS = 0xFFFF, + TDDS_G4R4_UNORM = TDDS_SYNTHESISED_DXGIFORMATS, + + TDDS_A4B4G4R4_UNORM, + TDDS_X4B4G4R4_UNORM, + + TDDS_A4R4G4B4_UNORM, + TDDS_X4R4G4B4_UNORM, + + TDDS_B4G4R4X4_UNORM, + + TDDS_R4G4B4A4_UNORM, + TDDS_R4G4B4X4_UNORM, + + TDDS_B5G5R5X1_UNORM, + + TDDS_R5G5B5A1_UNORM, + TDDS_R5G5B5X1_UNORM, + + TDDS_A1R5G5B5_UNORM, + TDDS_X1R5G5B5_UNORM, + + TDDS_A1B5G5R5_UNORM, + TDDS_X1B5G5R5_UNORM, + + TDDS_R5G6B5_UNORM, + + TDDS_B2G3R3_UNORM, + TDDS_B2G3R3A8_UNORM, + + TDDS_G8R8_UNORM, + TDDS_G8R8_SNORM, + + TDDS_R8G8B8_UNORM, + TDDS_B8G8R8_UNORM, + + TDDS_A8B8G8R8_SNORM, + TDDS_B8G8R8A8_SNORM, + + TDDS_R8G8B8X8_UNORM, + TDDS_B8G8R8X8_UNORM, + TDDS_A8B8G8R8_UNORM, + TDDS_X8B8G8R8_UNORM, + TDDS_A8R8G8B8_UNORM, + TDDS_X8R8G8B8_UNORM, + + TDDS_R10G10B10A2_SNORM, + TDDS_B10G10R10A2_UNORM, + TDDS_B10G10R10A2_SNORM, + TDDS_A2B10G10R10_UNORM, + TDDS_A2B10G10R10_SNORM, + TDDS_A2R10G10B10_UNORM, + TDDS_A2R10G10B10_SNORM, + + TDDS_G16R16_UNORM, + TDDS_G16R16_SNORM, + +} TinyDDS_Format; + +// tiny_imageformat/format needs included before tinydds.h for this functionality +#ifdef TINYIMAGEFORMAT_BASE_H_ + +static TinyImageFormat TinyImageFormat_FromTinyDDSFormat(TinyDDS_Format fmt) { + switch (fmt) { + case TDDS_UNDEFINED: return TinyImageFormat_UNDEFINED; + + case TDDS_R32G32B32A32_SFLOAT: return TinyImageFormat_R32G32B32A32_SFLOAT; + case TDDS_R32G32B32A32_UINT: return TinyImageFormat_R32G32B32A32_UINT; + case TDDS_R32G32B32A32_SINT: return TinyImageFormat_R32G32B32A32_SINT; + case TDDS_R32G32B32_SFLOAT: return TinyImageFormat_R32G32B32_SFLOAT; + case TDDS_R32G32B32_UINT: return TinyImageFormat_R32G32B32_UINT; + case TDDS_R32G32B32_SINT: return TinyImageFormat_R32G32B32_SINT; + case TDDS_R16G16B16A16_SFLOAT: return TinyImageFormat_R16G16B16A16_SFLOAT; + case TDDS_R16G16B16A16_UNORM: return TinyImageFormat_R16G16B16A16_UNORM; + case TDDS_R16G16B16A16_UINT: return TinyImageFormat_R16G16B16A16_UINT; + case TDDS_R16G16B16A16_SNORM: return TinyImageFormat_R16G16B16A16_SNORM; + case TDDS_R16G16B16A16_SINT: return TinyImageFormat_R16G16B16A16_SINT; + case TDDS_R32G32_SFLOAT: return TinyImageFormat_R32G32_SFLOAT; + case TDDS_R32G32_UINT: return TinyImageFormat_R32G32_UINT; + case TDDS_R32G32_SINT: return TinyImageFormat_R32G32_SINT; + case TDDS_R8G8B8A8_UNORM: return TinyImageFormat_R8G8B8A8_UNORM; + case TDDS_R8G8B8A8_SRGB: return TinyImageFormat_R8G8B8A8_SRGB; + case TDDS_R8G8B8A8_UINT: return TinyImageFormat_R8G8B8A8_UINT; + case TDDS_R8G8B8A8_SNORM: return TinyImageFormat_R8G8B8A8_SNORM; + case TDDS_R8G8B8A8_SINT: return TinyImageFormat_R8G8B8A8_SINT; + case TDDS_R16G16_SFLOAT: return TinyImageFormat_R16G16_SFLOAT; + case TDDS_R16G16_UNORM: return TinyImageFormat_R16G16_UNORM; + case TDDS_R16G16_UINT: return TinyImageFormat_R16G16_UINT; + case TDDS_R16G16_SNORM: return TinyImageFormat_R16G16_SNORM; + case TDDS_R16G16_SINT: return TinyImageFormat_R16G16_SINT; + case TDDS_R32_SFLOAT: return TinyImageFormat_R32_SFLOAT; + case TDDS_R32_UINT: return TinyImageFormat_R32_UINT; + case TDDS_R32_SINT: return TinyImageFormat_R32_SINT; + + case TDDS_R8G8_UNORM: return TinyImageFormat_R8G8_UNORM; + case TDDS_R8G8_UINT: return TinyImageFormat_R8G8_UINT; + case TDDS_R8G8_SNORM: return TinyImageFormat_R8G8_SNORM; + case TDDS_R8G8_SINT: return TinyImageFormat_R8G8_SINT; + case TDDS_G8R8_UNORM: return TinyImageFormat_G8R8_UNORM; + case TDDS_G8R8_SNORM: return TinyImageFormat_G8R8_SNORM; + + case TDDS_R16_SFLOAT: return TinyImageFormat_R16_SFLOAT; + case TDDS_R16_UNORM: return TinyImageFormat_R16_UNORM; + case TDDS_R16_UINT: return TinyImageFormat_R16_UINT; + case TDDS_R16_SNORM: return TinyImageFormat_R16_SNORM; + case TDDS_R16_SINT: return TinyImageFormat_R16_SINT; + case TDDS_R8_UNORM: return TinyImageFormat_R8_UNORM; + case TDDS_R8_UINT: return TinyImageFormat_R8_UINT; + case TDDS_R8_SNORM: return TinyImageFormat_R8_SNORM; + case TDDS_R8_SINT: return TinyImageFormat_R8_SINT; + case TDDS_A8_UNORM: return TinyImageFormat_A8_UNORM; + case TDDS_BC1_RGBA_UNORM_BLOCK: return TinyImageFormat_DXBC1_RGBA_UNORM; + case TDDS_BC1_RGBA_SRGB_BLOCK: return TinyImageFormat_DXBC1_RGBA_SRGB; + case TDDS_BC2_UNORM_BLOCK: return TinyImageFormat_DXBC2_UNORM; + case TDDS_BC2_SRGB_BLOCK: return TinyImageFormat_DXBC2_SRGB; + case TDDS_BC3_UNORM_BLOCK: return TinyImageFormat_DXBC3_UNORM; + case TDDS_BC3_SRGB_BLOCK: return TinyImageFormat_DXBC3_SRGB; + case TDDS_BC4_UNORM_BLOCK: return TinyImageFormat_DXBC4_UNORM; + case TDDS_BC4_SNORM_BLOCK: return TinyImageFormat_DXBC4_SNORM; + case TDDS_BC5_UNORM_BLOCK: return TinyImageFormat_DXBC5_UNORM; + case TDDS_BC5_SNORM_BLOCK: return TinyImageFormat_DXBC5_SNORM; + case TDDS_BC6H_UFLOAT_BLOCK: return TinyImageFormat_DXBC6H_UFLOAT; + case TDDS_BC6H_SFLOAT_BLOCK: return TinyImageFormat_DXBC6H_SFLOAT; + case TDDS_BC7_UNORM_BLOCK: return TinyImageFormat_DXBC7_UNORM; + case TDDS_BC7_SRGB_BLOCK: return TinyImageFormat_DXBC7_SRGB; + case TDDS_B8G8R8A8_UNORM: return TinyImageFormat_B8G8R8A8_UNORM; + case TDDS_B8G8R8A8_SRGB: return TinyImageFormat_B8G8R8A8_SRGB; + + case TDDS_B2G3R3A8_UNORM: return TinyImageFormat_B2G3R3A8_UNORM; + case TDDS_B2G3R3_UNORM: return TinyImageFormat_B2G3R3_UNORM; + case TDDS_R4G4_UNORM: return TinyImageFormat_R4G4_UNORM; + + case TDDS_R8G8B8_UNORM: return TinyImageFormat_R8G8B8_UNORM; + case TDDS_B8G8R8_UNORM: return TinyImageFormat_B8G8R8_UNORM; + case TDDS_B8G8R8A8_SNORM: return TinyImageFormat_B8G8R8A8_SNORM; + + case TDDS_R9G9B9E5_UFLOAT: return TinyImageFormat_E5B9G9R9_UFLOAT; + case TDDS_R11G11B10_UFLOAT: return TinyImageFormat_B10G11R11_UFLOAT; + case TDDS_G4R4_UNORM: return TinyImageFormat_G4R4_UNORM; + + case TDDS_R5G6B5_UNORM: return TinyImageFormat_R5G6B5_UNORM; + case TDDS_B5G6R5_UNORM: return TinyImageFormat_B5G6R5_UNORM; + + case TDDS_B5G5R5A1_UNORM: return TinyImageFormat_B5G5R5A1_UNORM; + case TDDS_B5G5R5X1_UNORM: return TinyImageFormat_B5G5R5X1_UNORM; + + case TDDS_R5G5B5A1_UNORM: return TinyImageFormat_R5G5B5A1_UNORM; + case TDDS_R5G5B5X1_UNORM: return TinyImageFormat_R5G5B5X1_UNORM; + + case TDDS_A1R5G5B5_UNORM: return TinyImageFormat_A1R5G5B5_UNORM; + case TDDS_X1R5G5B5_UNORM: return TinyImageFormat_X1R5G5B5_UNORM; + + case TDDS_X1B5G5R5_UNORM: return TinyImageFormat_X1B5G5R5_UNORM; + case TDDS_A1B5G5R5_UNORM: return TinyImageFormat_A1B5G5R5_UNORM; + + case TDDS_X4B4G4R4_UNORM: return TinyImageFormat_X4B4G4R4_UNORM; + case TDDS_X4R4G4B4_UNORM: return TinyImageFormat_X4R4G4B4_UNORM; + case TDDS_A4R4G4B4_UNORM: return TinyImageFormat_A4R4G4B4_UNORM; + case TDDS_B4G4R4A4_UNORM: return TinyImageFormat_B4G4R4A4_UNORM; + case TDDS_A4B4G4R4_UNORM: return TinyImageFormat_A4B4G4R4_UNORM; + case TDDS_B4G4R4X4_UNORM: return TinyImageFormat_B4G4R4X4_UNORM; + case TDDS_R4G4B4A4_UNORM: return TinyImageFormat_R4G4B4A4_UNORM; + case TDDS_R4G4B4X4_UNORM: return TinyImageFormat_R4G4B4X4_UNORM; + + case TDDS_R8G8B8X8_UNORM: return TinyImageFormat_R8G8B8X8_UNORM; + + // DDS A2R10B10G10 support is basically broken historically so expect channels to need swapping + case TDDS_A2B10G10R10_UNORM: return TinyImageFormat_A2B10G10R10_UNORM; + case TDDS_A2B10G10R10_SNORM: return TinyImageFormat_A2B10G10R10_SNORM; + case TDDS_A2R10G10B10_UNORM: return TinyImageFormat_A2R10G10B10_UNORM; + case TDDS_A2R10G10B10_SNORM: return TinyImageFormat_A2R10G10B10_SNORM; + case TDDS_B10G10R10A2_UNORM: return TinyImageFormat_R10G10B10A2_UNORM; + case TDDS_B10G10R10A2_SNORM: return TinyImageFormat_R10G10B10A2_SNORM; + case TDDS_R10G10B10A2_UNORM: return TinyImageFormat_B10G10R10A2_UNORM; + case TDDS_R10G10B10A2_SNORM: return TinyImageFormat_B10G10R10A2_SNORM; + case TDDS_R10G10B10A2_UINT: return TinyImageFormat_B10G10R10A2_UINT; + + case TDDS_B8G8R8X8_UNORM: return TinyImageFormat_B8G8R8X8_UNORM; + + case TDDS_G16R16_UNORM: return TinyImageFormat_G16R16_UNORM; + case TDDS_G16R16_SNORM: return TinyImageFormat_G16R16_SNORM; + case TDDS_X8B8G8R8_UNORM: return TinyImageFormat_R8G8B8X8_UNORM; + case TDDS_X8R8G8B8_UNORM: return TinyImageFormat_B8G8R8X8_UNORM; + case TDDS_A8B8G8R8_UNORM: return TinyImageFormat_R8G8B8A8_UNORM; + case TDDS_A8R8G8B8_UNORM: return TinyImageFormat_B8G8R8A8_UNORM; + case TDDS_A8B8G8R8_SNORM: return TinyImageFormat_R8G8B8X8_UNORM; + case TDDS_P8: return TinyImageFormat_CLUT_P8; + case TDDS_A8P8: return TinyImageFormat_CLUT_P8A8; + case TDDS_R1_UNORM: return TinyImageFormat_R1_UNORM; + + case TDDS_AYUV:break; + case TDDS_Y410:break; + case TDDS_Y416:break; + case TDDS_NV12:break; + case TDDS_P010:break; + case TDDS_P016:break; + case TDDS_420_OPAQUE:break; + case TDDS_YUY2:break; + case TDDS_Y210:break; + case TDDS_Y216:break; + case TDDS_NV11:break; + case TDDS_AI44:break; + case TDDS_IA44:break; + case TDDS_R10G10B10_7E3_A2_FLOAT:break; + case TDDS_R10G10B10_6E4_A2_FLOAT:break; + case TDDS_D16_UNORM_S8_UINT:break; + case TDDS_R16_UNORM_X8_TYPELESS:break; + case TDDS_X16_TYPELESS_G8_UINT:break; + case TDDS_P208:break; + case TDDS_V208:break; + case TDDS_V408:break; + case TDDS_R10G10B10_SNORM_A2_UNORM:break; + } + + return TinyImageFormat_UNDEFINED; +} + +static TinyDDS_Format TinyImageFormat_ToTinyDDSFormat(TinyImageFormat fmt) { + switch (fmt) { + case TinyImageFormat_R4G4_UNORM: return TDDS_R4G4_UNORM; + case TinyImageFormat_G4R4_UNORM: return TDDS_G4R4_UNORM; + + case TinyImageFormat_A4R4G4B4_UNORM: return TDDS_A4R4G4B4_UNORM; + case TinyImageFormat_B4G4R4A4_UNORM: return TDDS_B4G4R4A4_UNORM; + case TinyImageFormat_A4B4G4R4_UNORM: return TDDS_A4B4G4R4_UNORM; + case TinyImageFormat_X4R4G4B4_UNORM: return TDDS_X4R4G4B4_UNORM; + case TinyImageFormat_X4B4G4R4_UNORM: return TDDS_X4B4G4R4_UNORM; + case TinyImageFormat_R4G4B4A4_UNORM: return TDDS_R4G4B4A4_UNORM; + case TinyImageFormat_R4G4B4X4_UNORM: return TDDS_R4G4B4X4_UNORM; + + case TinyImageFormat_A1B5G5R5_UNORM: return TDDS_A1B5G5R5_UNORM; + case TinyImageFormat_X1B5G5R5_UNORM: return TDDS_X1B5G5R5_UNORM; + + case TinyImageFormat_A1R5G5B5_UNORM: return TDDS_A1R5G5B5_UNORM; + case TinyImageFormat_X1R5G5B5_UNORM: return TDDS_X1R5G5B5_UNORM; + + case TinyImageFormat_B5G5R5A1_UNORM: return TDDS_B5G5R5A1_UNORM; + case TinyImageFormat_B5G5R5X1_UNORM: return TDDS_B5G5R5X1_UNORM; + + case TinyImageFormat_R5G5B5A1_UNORM: return TDDS_R5G5B5A1_UNORM; + case TinyImageFormat_R5G5B5X1_UNORM: return TDDS_R5G5B5X1_UNORM; + + case TinyImageFormat_R5G6B5_UNORM: return TDDS_R5G6B5_UNORM; + case TinyImageFormat_B5G6R5_UNORM: return TDDS_B5G6R5_UNORM; + + case TinyImageFormat_A2B10G10R10_UNORM: return TDDS_A2B10G10R10_UNORM; + case TinyImageFormat_A2B10G10R10_SNORM: return TDDS_A2B10G10R10_SNORM; + case TinyImageFormat_A2R10G10B10_UNORM: return TDDS_A2R10G10B10_UNORM; + case TinyImageFormat_A2R10G10B10_SNORM: return TDDS_A2R10G10B10_SNORM; + case TinyImageFormat_R10G10B10A2_UNORM: return TDDS_B10G10R10A2_UNORM; + case TinyImageFormat_R10G10B10A2_SNORM: return TDDS_B10G10R10A2_SNORM; + case TinyImageFormat_B10G10R10A2_UNORM: return TDDS_R10G10B10A2_UNORM; + case TinyImageFormat_B10G10R10A2_SNORM: return TDDS_R10G10B10A2_SNORM; + case TinyImageFormat_B10G10R10A2_UINT: return TDDS_R10G10B10A2_UINT; + + case TinyImageFormat_E5B9G9R9_UFLOAT: return TDDS_R9G9B9E5_UFLOAT; + case TinyImageFormat_B10G11R11_UFLOAT: return TDDS_R11G11B10_UFLOAT; + + case TinyImageFormat_R8_UNORM: return TDDS_R8_UNORM; + case TinyImageFormat_R8_SNORM: return TDDS_R8_SNORM; + case TinyImageFormat_R8_UINT: return TDDS_R8_UINT; + case TinyImageFormat_R8_SINT: return TDDS_R8_SINT; + case TinyImageFormat_A8_UNORM: return TDDS_A8_UNORM; + case TinyImageFormat_B2G3R3_UNORM: return TDDS_B2G3R3_UNORM; + + case TinyImageFormat_B2G3R3A8_UNORM: return TDDS_B2G3R3A8_UNORM; + case TinyImageFormat_R8G8_UNORM: return TDDS_R8G8_UNORM; + case TinyImageFormat_R8G8_SNORM: return TDDS_R8G8_SNORM; + case TinyImageFormat_R8G8_UINT: return TDDS_R8G8_UINT; + case TinyImageFormat_R8G8_SINT: return TDDS_R8G8_SINT; + case TinyImageFormat_G8R8_UNORM: return TDDS_G8R8_UNORM; + case TinyImageFormat_G8R8_SNORM: return TDDS_G8R8_SNORM; + + case TinyImageFormat_R8G8B8_UNORM: return TDDS_R8G8B8_UNORM; + case TinyImageFormat_B8G8R8_UNORM: return TDDS_B8G8R8_UNORM; + + case TinyImageFormat_R8G8B8A8_UNORM: return TDDS_R8G8B8A8_UNORM; + case TinyImageFormat_R8G8B8A8_SNORM: return TDDS_R8G8B8A8_SNORM; + case TinyImageFormat_R8G8B8A8_UINT: return TDDS_R8G8B8A8_UINT; + case TinyImageFormat_R8G8B8A8_SINT: return TDDS_R8G8B8A8_SINT; + case TinyImageFormat_R8G8B8A8_SRGB: return TDDS_R8G8B8A8_SRGB; + case TinyImageFormat_B8G8R8A8_UNORM: return TDDS_B8G8R8A8_UNORM; + case TinyImageFormat_B8G8R8A8_SRGB: return TDDS_B8G8R8A8_SRGB; + + case TinyImageFormat_R16_UNORM: return TDDS_R16_UNORM; + case TinyImageFormat_R16_SNORM: return TDDS_R16_SNORM; + case TinyImageFormat_R16_UINT: return TDDS_R16_UINT; + case TinyImageFormat_R16_SINT: return TDDS_R16_SINT; + case TinyImageFormat_R16_SFLOAT: return TDDS_R16_SFLOAT; + + case TinyImageFormat_R16G16_UNORM: return TDDS_R16G16_UNORM; + case TinyImageFormat_R16G16_SNORM: return TDDS_R16G16_SNORM; + case TinyImageFormat_R16G16_UINT: return TDDS_R16G16_UINT; + case TinyImageFormat_R16G16_SINT: return TDDS_R16G16_SINT; + case TinyImageFormat_R16G16_SFLOAT: return TDDS_R16G16_SFLOAT; + + case TinyImageFormat_G16R16_UNORM: return TDDS_G16R16_UNORM; + case TinyImageFormat_G16R16_SNORM: return TDDS_G16R16_SNORM; + + case TinyImageFormat_R16G16B16A16_UNORM: return TDDS_R16G16B16A16_UNORM; + case TinyImageFormat_R16G16B16A16_SNORM: return TDDS_R16G16B16A16_SNORM; + case TinyImageFormat_R16G16B16A16_UINT: return TDDS_R16G16B16A16_UINT; + case TinyImageFormat_R16G16B16A16_SINT: return TDDS_R16G16B16A16_SINT; + case TinyImageFormat_R16G16B16A16_SFLOAT: return TDDS_R16G16B16A16_SFLOAT; + + case TinyImageFormat_R32_UINT: return TDDS_R32_UINT; + case TinyImageFormat_R32_SINT: return TDDS_R32_SINT; + case TinyImageFormat_R32_SFLOAT: return TDDS_R32_SFLOAT; + + case TinyImageFormat_R32G32_UINT: return TDDS_R32G32_UINT; + case TinyImageFormat_R32G32_SINT: return TDDS_R32G32_SINT; + case TinyImageFormat_R32G32_SFLOAT: return TDDS_R32G32_SFLOAT; + + case TinyImageFormat_R32G32B32_UINT: return TDDS_R32G32B32_UINT; + case TinyImageFormat_R32G32B32_SINT: return TDDS_R32G32B32_SINT; + case TinyImageFormat_R32G32B32_SFLOAT:return TDDS_R32G32B32_SFLOAT; + + case TinyImageFormat_R32G32B32A32_UINT: return TDDS_R32G32B32A32_UINT; + case TinyImageFormat_R32G32B32A32_SINT: return TDDS_R32G32B32A32_SINT; + case TinyImageFormat_R32G32B32A32_SFLOAT: return TDDS_R32G32B32A32_SFLOAT; + + case TinyImageFormat_D16_UNORM: return TDDS_R16_UNORM; + case TinyImageFormat_D32_SFLOAT: return TDDS_R32_SFLOAT; + case TinyImageFormat_S8_UINT: return TDDS_R8_UINT; + case TinyImageFormat_DXBC1_RGB_UNORM: return TDDS_BC1_RGBA_UNORM_BLOCK; + case TinyImageFormat_DXBC1_RGB_SRGB: return TDDS_BC1_RGBA_SRGB_BLOCK; + case TinyImageFormat_DXBC1_RGBA_UNORM: return TDDS_BC1_RGBA_UNORM_BLOCK; + case TinyImageFormat_DXBC1_RGBA_SRGB: return TDDS_BC1_RGBA_SRGB_BLOCK; + case TinyImageFormat_DXBC2_UNORM: return TDDS_BC2_UNORM_BLOCK; + case TinyImageFormat_DXBC2_SRGB: return TDDS_BC2_SRGB_BLOCK; + case TinyImageFormat_DXBC3_UNORM: return TDDS_BC3_UNORM_BLOCK; + case TinyImageFormat_DXBC3_SRGB: return TDDS_BC3_SRGB_BLOCK; + case TinyImageFormat_DXBC4_UNORM: return TDDS_BC4_UNORM_BLOCK; + case TinyImageFormat_DXBC4_SNORM: return TDDS_BC4_SNORM_BLOCK; + case TinyImageFormat_DXBC5_UNORM: return TDDS_BC5_UNORM_BLOCK; + case TinyImageFormat_DXBC5_SNORM: return TDDS_BC5_SNORM_BLOCK; + case TinyImageFormat_DXBC6H_UFLOAT: return TDDS_BC6H_UFLOAT_BLOCK; + case TinyImageFormat_DXBC6H_SFLOAT: return TDDS_BC6H_SFLOAT_BLOCK; + case TinyImageFormat_DXBC7_UNORM: return TDDS_BC7_UNORM_BLOCK; + case TinyImageFormat_DXBC7_SRGB: return TDDS_BC7_SRGB_BLOCK; + + case TinyImageFormat_CLUT_P8: return TDDS_P8; + case TinyImageFormat_CLUT_P8A8: return TDDS_A8P8; + case TinyImageFormat_R1_UNORM: return TDDS_R1_UNORM; + + // unsupported + // TODO Some of these can be via Dx10/4CC codes I think + default: return TDDS_UNDEFINED; + } + + return TDDS_UNDEFINED; +} +#endif + +TinyDDS_Format TinyDDS_GetFormat(TinyDDS_ContextHandle handle); + +bool TinyDDS_WriteImage(TinyDDS_WriteCallbacks const *callbacks, + void *user, + uint32_t width, + uint32_t height, + uint32_t depth, + uint32_t slices, + uint32_t mipmaplevels, + TinyDDS_Format format, + bool cubemap, + bool preferDx10Format, + uint32_t const *mipmapsizes, + void const **mipmaps); + +#ifdef TINYDDS_IMPLEMENTATION + +#define TINYDDS_DDSD_CAPS 0x00000001 +#define TINYDDS_DDSD_HEIGHT 0x00000002 +#define TINYDDS_DDSD_WIDTH 0x00000004 +#define TINYDDS_DDSD_PITCH 0x00000008 +#define TINYDDS_DDSD_PIXELFORMAT 0x00001000 +#define TINYDDS_DDSD_MIPMAPCOUNT 0x00020000 +#define TINYDDS_DDSD_LINEARSIZE 0x00080000 +#define TINYDDS_DDSD_DEPTH 0x00800000 +#define TINYDDS_DDSCAPS_COMPLEX 0x00000008 +#define TINYDDS_DDSCAPS_TEXTURE 0x00001000 +#define TINYDDS_DDSCAPS_MIPMAP 0x00400000 +#define TINYDDS_DDSCAPS2_CUBEMAP 0x00000200 +#define TINYDDS_DDSCAPS2_VOLUME 0x00200000 +#define TINYDDS_DDSCAPS2_CUBEMAP_ALL 0x0000FC000 +#define TINYDDS_D3D10_RESOURCE_MISC_TEXTURECUBE 0x4 +#define TINYDDS_D3D10_RESOURCE_DIMENSION_BUFFER 1 +#define TINYDDS_D3D10_RESOURCE_DIMENSION_TEXTURE1D 2 +#define TINYDDS_D3D10_RESOURCE_DIMENSION_TEXTURE2D 3 +#define TINYDDS_D3D10_RESOURCE_DIMENSION_TEXTURE3D 4 +#define TINYDDS_DDPF_ALPHAPIXELS 0x00000001l +#define TINYDDS_DDPF_ALPHA 0x00000002l +#define TINYDDS_DDPF_FOURCC 0x00000004l +#define TINYDDS_DDPF_PALETTEINDEXED4 0x00000008l +#define TINYDDS_DDPF_PALETTEINDEXEDTO8 0x00000010l +#define TINYDDS_DDPF_PALETTEINDEXED8 0x00000020l +#define TINYDDS_DDPF_RGB 0x00000040l +#define TINYDDS_DDPF_LUMINANCE 0x00020000l +#define TINYDDS_DDPF_BUMPLUMINANCE 0x00040000l +#define TINYDDS_DDPF_BUMPDUDV 0x00080000l + +// some of these get stuck in unofficial DDS v9 FourCC code +typedef enum TINYDDS_D3DFORMAT { + TINYDDS_D3DFMT_UNKNOWN = 0, + TINYDDS_D3DFMT_R8G8B8 = 20, + TINYDDS_D3DFMT_A8R8G8B8 = 21, + TINYDDS_D3DFMT_X8R8G8B8 = 22, + TINYDDS_D3DFMT_R5G6B5 = 23, + TINYDDS_D3DFMT_X1R5G5B5 = 24, + TINYDDS_D3DFMT_A1R5G5B5 = 25, + TINYDDS_D3DFMT_A4R4G4B4 = 26, + TINYDDS_D3DFMT_R3G3B2 = 27, + TINYDDS_D3DFMT_A8 = 28, + TINYDDS_D3DFMT_A8R3G3B2 = 29, + TINYDDS_D3DFMT_X4R4G4B4 = 30, + TINYDDS_D3DFMT_A2B10G10R10 = 31, + TINYDDS_D3DFMT_A8B8G8R8 = 32, + TINYDDS_D3DFMT_X8B8G8R8 = 33, + TINYDDS_D3DFMT_G16R16 = 34, + TINYDDS_D3DFMT_A2R10G10B10 = 35, + TINYDDS_D3DFMT_A16B16G16R16 = 36, + TINYDDS_D3DFMT_A8P8 = 40, + TINYDDS_D3DFMT_P8 = 41, + TINYDDS_D3DFMT_L8 = 50, + TINYDDS_D3DFMT_A8L8 = 51, + TINYDDS_D3DFMT_A4L4 = 52, + TINYDDS_D3DFMT_V8U8 = 60, + TINYDDS_D3DFMT_L6V5U5 = 61, + TINYDDS_D3DFMT_X8L8V8U8 = 62, + TINYDDS_D3DFMT_Q8W8V8U8 = 63, + TINYDDS_D3DFMT_V16U16 = 64, + TINYDDS_D3DFMT_A2W10V10U10 = 67, + TINYDDS_D3DFMT_L16 = 81, + TINYDDS_D3DFMT_Q16W16V16U16 = 110, + TINYDDS_D3DFMT_R16F = 111, + TINYDDS_D3DFMT_G16R16F = 112, + TINYDDS_D3DFMT_A16B16G16R16F = 113, + TINYDDS_D3DFMT_R32F = 114, + TINYDDS_D3DFMT_G32R32F = 115, + TINYDDS_D3DFMT_A32B32G32R32F = 116, + TINYDDS_D3DFMT_CxV8U8 = 117, + TINYDDS_D3DFMT_A1 = 118, + TINYDDS_D3DFMT_A2B10G10R10_XR_BIAS = 119, +} TINYDDS_D3DFORMAT; + +typedef struct TinyDDS_Header { + uint32_t magic; + uint32_t size; + uint32_t flags; + uint32_t height; + uint32_t width; + uint32_t pitchOrLinearSize; + uint32_t depth; + uint32_t mipMapCount; + uint32_t reserved0[11]; + + uint32_t formatSize; + uint32_t formatFlags; + uint32_t formatFourCC; + uint32_t formatRGBBitCount; + uint32_t formatRBitMask; + uint32_t formatGBitMask; + uint32_t formatBBitMask; + uint32_t formatABitMask; + + uint32_t caps1; + uint32_t caps2; + uint32_t caps3; // not used? + uint32_t caps4; // not used? + + uint32_t reserved1; +} TinyDDS_Header; + +typedef struct TinyDDS_HeaderDX10 { + uint32_t DXGIFormat; + uint32_t resourceDimension; + uint32_t miscFlag; + uint32_t arraySize; + uint32_t reserved; +} TinyDDS_HeaderDX10; + +typedef struct TinyDDS_Context { + TinyDDS_Callbacks callbacks; + void *user; + uint64_t headerPos; + uint64_t firstImagePos; + + TinyDDS_Header header; + TinyDDS_HeaderDX10 headerDx10; + TinyDDS_Format format; + + bool headerValid; + uint8_t const *mipmaps[TINYDDS_MAX_MIPMAPLEVELS]; + uint32_t const *clut; + +} TinyDDS_Context; + +#define TINYDDS_MAKE_RIFFCODE(a, b, c, d) (a | (b << 8) | (c << 16) | (d << 24)) + +static uint32_t TinyDDS_fileIdentifier = TINYDDS_MAKE_RIFFCODE('D', 'D', 'S', ' '); + +static void TinyDDS_NullErrorFunc(void *user, char const *msg) { BASISU_NOTE_UNUSED(user); BASISU_NOTE_UNUSED(msg); } + +TinyDDS_ContextHandle TinyDDS_CreateContext(TinyDDS_Callbacks const *callbacks, void *user) { + TinyDDS_Context *ctx = (TinyDDS_Context *) callbacks->allocFn(user, sizeof(TinyDDS_Context)); + if (ctx == NULL) + return NULL; + + memset(ctx, 0, sizeof(TinyDDS_Context)); + memcpy(&ctx->callbacks, callbacks, sizeof(TinyDDS_Callbacks)); + ctx->user = user; + if (ctx->callbacks.errorFn == NULL) { + ctx->callbacks.errorFn = &TinyDDS_NullErrorFunc; + } + + if (ctx->callbacks.readFn == NULL) { + ctx->callbacks.errorFn(user, "TinyDDS must have read callback"); + return NULL; + } + if (ctx->callbacks.allocFn == NULL) { + ctx->callbacks.errorFn(user, "TinyDDS must have alloc callback"); + return NULL; + } + if (ctx->callbacks.freeFn == NULL) { + ctx->callbacks.errorFn(user, "TinyDDS must have free callback"); + return NULL; + } + if (ctx->callbacks.seekFn == NULL) { + ctx->callbacks.errorFn(user, "TinyDDS must have seek callback"); + return NULL; + } + if (ctx->callbacks.tellFn == NULL) { + ctx->callbacks.errorFn(user, "TinyDDS must have tell callback"); + return NULL; + } + + TinyDDS_Reset(ctx); + + return ctx; +} + +void TinyDDS_DestroyContext(TinyDDS_ContextHandle handle) { + TinyDDS_Context *ctx = (TinyDDS_Context *) handle; + if (ctx == NULL) + return; + TinyDDS_Reset(handle); + + ctx->callbacks.freeFn(ctx->user, ctx); +} + +void TinyDDS_Reset(TinyDDS_ContextHandle handle) { + TinyDDS_Context *ctx = (TinyDDS_Context *) handle; + if (ctx == NULL) + return; + + // backup user provided callbacks and data + TinyDDS_Callbacks callbacks; + memcpy(&callbacks, &ctx->callbacks, sizeof(TinyDDS_Callbacks)); + void *user = ctx->user; + + for (int i = 0; i < TINYDDS_MAX_MIPMAPLEVELS; ++i) { + if (ctx->mipmaps[i] != NULL) { + callbacks.freeFn(user, (void *) ctx->mipmaps[i]); + } + } + + if(ctx->clut) { + callbacks.freeFn(user, (void *) ctx->clut); + ctx->clut = NULL; + } + + // reset to default state + memset(ctx, 0, sizeof(TinyDDS_Context)); + memcpy(&ctx->callbacks, &callbacks, sizeof(TinyDDS_Callbacks)); + ctx->user = user; + +} + +static bool TinyDDS_IsCLUT(TinyDDS_Format fmt) { + switch (fmt) { + case TDDS_P8: + case TDDS_A8P8: + return true; + default: return false; + } +} + +static bool TinyDDS_IsCompressed(TinyDDS_Format fmt) { + switch (fmt) { + case TDDS_BC1_RGBA_UNORM_BLOCK: + case TDDS_BC1_RGBA_SRGB_BLOCK: + case TDDS_BC2_UNORM_BLOCK: + case TDDS_BC2_SRGB_BLOCK: + case TDDS_BC3_UNORM_BLOCK: + case TDDS_BC3_SRGB_BLOCK: + case TDDS_BC4_UNORM_BLOCK: + case TDDS_BC4_SNORM_BLOCK: + case TDDS_BC5_UNORM_BLOCK: + case TDDS_BC5_SNORM_BLOCK: + case TDDS_BC6H_UFLOAT_BLOCK: + case TDDS_BC6H_SFLOAT_BLOCK: + case TDDS_BC7_UNORM_BLOCK: + case TDDS_BC7_SRGB_BLOCK: return true; + default: return false; + } +} + +// the size is per pixel (except R1) for uncompressed and per block of 16 pixels for compressed +static uint32_t TinyDDS_FormatSize(TinyDDS_Format fmt) { + switch(fmt) { + // 8 pixels at 1 bits each + case TDDS_R1_UNORM: + return 1; + // 2 * 4 bits + case TDDS_R4G4_UNORM: + case TDDS_G4R4_UNORM: + // 1 * 8 bits + case TDDS_P8:; + case TDDS_R8_UNORM: + case TDDS_R8_SNORM: + case TDDS_R8_UINT: + case TDDS_R8_SINT: + case TDDS_A8_UNORM: + // 2 + 2 * 3 bits + case TDDS_B2G3R3_UNORM: + return 1; + + // 2 + 2 * 3 +8 bits + case TDDS_B2G3R3A8_UNORM: + // 4 * 4 bits + case TDDS_B4G4R4A4_UNORM: + case TDDS_A4B4G4R4_UNORM: + case TDDS_X4B4G4R4_UNORM: + case TDDS_A4R4G4B4_UNORM: + case TDDS_X4R4G4B4_UNORM: + case TDDS_B4G4R4X4_UNORM: + case TDDS_R4G4B4A4_UNORM: + case TDDS_R4G4B4X4_UNORM: + + // 3 * 5 bits + 1 bit + case TDDS_B5G5R5A1_UNORM: + case TDDS_B5G5R5X1_UNORM: + case TDDS_R5G5B5A1_UNORM: + case TDDS_R5G5B5X1_UNORM: + case TDDS_A1R5G5B5_UNORM: + case TDDS_X1R5G5B5_UNORM: + case TDDS_A1B5G5R5_UNORM: + case TDDS_X1B5G5R5_UNORM: + + // 1 * 6 bit + 2 * 5 bits + case TDDS_R5G6B5_UNORM: + case TDDS_B5G6R5_UNORM: + // 2 x 8 bits + case TDDS_A8P8: + case TDDS_R8G8_UNORM: + case TDDS_R8G8_SNORM: + case TDDS_G8R8_UNORM: + case TDDS_G8R8_SNORM: + case TDDS_R8G8_UINT: + case TDDS_R8G8_SINT: + // 1 * 16 bits + case TDDS_R16_UNORM: + case TDDS_R16_SNORM: + case TDDS_R16_UINT: + case TDDS_R16_SINT: + case TDDS_R16_SFLOAT: + return 2; + + // 3 * 8 bits + case TDDS_R8G8B8_UNORM: + case TDDS_B8G8R8_UNORM: + return 3; + // 4 * 8 bits + case TDDS_A8B8G8R8_SNORM: + case TDDS_R8G8B8A8_SNORM: + case TDDS_R8G8B8A8_UINT: + case TDDS_R8G8B8A8_SINT: + case TDDS_R8G8B8A8_SRGB: + case TDDS_B8G8R8A8_SRGB: + case TDDS_B8G8R8A8_SNORM: + + case TDDS_R8G8B8A8_UNORM: + case TDDS_R8G8B8X8_UNORM: + case TDDS_B8G8R8A8_UNORM: + case TDDS_B8G8R8X8_UNORM: + case TDDS_A8B8G8R8_UNORM: + case TDDS_X8B8G8R8_UNORM: + case TDDS_A8R8G8B8_UNORM: + case TDDS_X8R8G8B8_UNORM: + + // 3 * 9 bits + 5 bits + case TDDS_R9G9B9E5_UFLOAT: + // 3 * 10 bits + 2 bits + case TDDS_R10G10B10_7E3_A2_FLOAT: + case TDDS_R10G10B10_6E4_A2_FLOAT: + case TDDS_R10G10B10_SNORM_A2_UNORM: + + case TDDS_B10G10R10A2_UNORM: + case TDDS_B10G10R10A2_SNORM: + case TDDS_A2B10G10R10_UNORM: + case TDDS_A2B10G10R10_SNORM: + case TDDS_A2R10G10B10_UNORM: + case TDDS_A2R10G10B10_SNORM: + case TDDS_R10G10B10A2_UNORM: + case TDDS_R10G10B10A2_SNORM: + case TDDS_R10G10B10A2_UINT: + + // 2 * 11 bits + 10 bits + case TDDS_R11G11B10_UFLOAT: + // 2 * 16 bits + case TDDS_R16G16_UNORM: + case TDDS_R16G16_SNORM: + case TDDS_R16G16_UINT: + case TDDS_R16G16_SINT: + case TDDS_R16G16_SFLOAT: + case TDDS_G16R16_UNORM: + case TDDS_G16R16_SNORM: + // 1 * 32 bits + case TDDS_R32_UINT: + case TDDS_R32_SINT: + case TDDS_R32_SFLOAT: + return 4; + // 4 * 16 bits + case TDDS_R16G16B16A16_UNORM: + case TDDS_R16G16B16A16_SNORM: + case TDDS_R16G16B16A16_UINT: + case TDDS_R16G16B16A16_SINT: + case TDDS_R16G16B16A16_SFLOAT: + // 2 * 32 bits + case TDDS_R32G32_UINT: + case TDDS_R32G32_SINT: + case TDDS_R32G32_SFLOAT: + return 8; + // 3 * 32 bits + case TDDS_R32G32B32_UINT: + case TDDS_R32G32B32_SINT: + case TDDS_R32G32B32_SFLOAT: + return 12; + // 4 * 32 bits + case TDDS_R32G32B32A32_UINT: + case TDDS_R32G32B32A32_SINT: + case TDDS_R32G32B32A32_SFLOAT: + return 16; + // block formats + case TDDS_BC1_RGBA_UNORM_BLOCK: + case TDDS_BC1_RGBA_SRGB_BLOCK: + case TDDS_BC4_UNORM_BLOCK: + case TDDS_BC4_SNORM_BLOCK: + return 8; + + case TDDS_BC2_UNORM_BLOCK: + case TDDS_BC2_SRGB_BLOCK: + case TDDS_BC3_UNORM_BLOCK: + case TDDS_BC3_SRGB_BLOCK: + case TDDS_BC5_UNORM_BLOCK: + case TDDS_BC5_SNORM_BLOCK: + case TDDS_BC6H_UFLOAT_BLOCK: + case TDDS_BC6H_SFLOAT_BLOCK: + case TDDS_BC7_UNORM_BLOCK: + case TDDS_BC7_SRGB_BLOCK: + return 16; + + case TDDS_UNDEFINED: return 0; + // default: return 0; + case TDDS_AYUV:break; + case TDDS_Y410:break; + case TDDS_Y416:break; + case TDDS_NV12:break; + case TDDS_P010:break; + case TDDS_P016:break; + case TDDS_420_OPAQUE:break; + case TDDS_YUY2:break; + case TDDS_Y210:break; + case TDDS_Y216:break; + case TDDS_NV11:break; + case TDDS_AI44:break; + case TDDS_IA44:break; + case TDDS_D16_UNORM_S8_UINT:break; + case TDDS_R16_UNORM_X8_TYPELESS:break; + case TDDS_X16_TYPELESS_G8_UINT:break; + case TDDS_P208:break; + case TDDS_V208:break; + case TDDS_V408:break; + } + return 0; +} + +#define TINYDDS_CHK_DDSFORMAT(bits, rm, gm, bm, am, fmt) \ + if ((ctx->header.formatRGBBitCount == bits) && \ + (ctx->header.formatRBitMask == rm) && \ + (ctx->header.formatGBitMask == gm) && \ + (ctx->header.formatBBitMask == bm) && \ + (ctx->header.formatABitMask == am)) { return fmt; } + +static TinyDDS_Format TinyDDS_DecodeFormat(TinyDDS_Context *ctx) { + if (ctx->header.formatFlags & TINYDDS_DDPF_FOURCC) { + if (ctx->headerDx10.DXGIFormat != TIF_DXGI_FORMAT_UNKNOWN) { + return (TinyDDS_Format) ctx->headerDx10.DXGIFormat; + } + + // check fourCC and some special numbers.. + // unofficially during the dx9 timeline, D3D_FORMAT were stuck directly into + // formatFourCC field we handle FourCC and these < 119 codes here + // its unclear if this was only for formats that couldn't be exposed via + // Direct Draw Surfaces (like floats etc.) so I decode most of them anyway + switch (ctx->header.formatFourCC) { + case TINYDDS_D3DFMT_R8G8B8: return TDDS_R8G8B8_UNORM; + case TINYDDS_D3DFMT_A8R8G8B8: return TDDS_A8R8G8B8_UNORM; + case TINYDDS_D3DFMT_X8R8G8B8: return TDDS_X8R8G8B8_UNORM; + case TINYDDS_D3DFMT_R5G6B5: return TDDS_R5G6B5_UNORM; + case TINYDDS_D3DFMT_X1R5G5B5: return TDDS_X1R5G5B5_UNORM; + case TINYDDS_D3DFMT_A1R5G5B5: return TDDS_A1R5G5B5_UNORM; + case TINYDDS_D3DFMT_A4R4G4B4: return TDDS_A4R4G4B4_UNORM; + case TINYDDS_D3DFMT_R3G3B2: return TDDS_B2G3R3_UNORM; + case TINYDDS_D3DFMT_A8: return TDDS_A8_UNORM; + case TINYDDS_D3DFMT_A8R3G3B2: return TDDS_B2G3R3A8_UNORM; + case TINYDDS_D3DFMT_X4R4G4B4: return TDDS_A4R4G4B4_UNORM; + case TINYDDS_D3DFMT_A2B10G10R10: return TDDS_A2B10G10R10_UNORM; + case TINYDDS_D3DFMT_A8B8G8R8: return TDDS_A8B8G8R8_UNORM; + case TINYDDS_D3DFMT_X8B8G8R8: return TDDS_A8B8G8R8_UNORM; + case TINYDDS_D3DFMT_A2R10G10B10: return TDDS_A2R10G10B10_UNORM; + case TINYDDS_D3DFMT_G16R16: return TDDS_R16G16_UNORM; + case TINYDDS_D3DFMT_A16B16G16R16: return TDDS_R16G16B16A16_UNORM; + case TINYDDS_D3DFMT_R16F: return TDDS_R16_SFLOAT; + case TINYDDS_D3DFMT_G16R16F: return TDDS_R16G16_SFLOAT; + case TINYDDS_D3DFMT_A16B16G16R16F: return TDDS_R16G16B16A16_SFLOAT; + case TINYDDS_D3DFMT_A8P8: return TDDS_A8P8; + case TINYDDS_D3DFMT_P8: return TDDS_P8; + case TINYDDS_D3DFMT_L8: return TDDS_R8_UNORM; + case TINYDDS_D3DFMT_A8L8: return TDDS_R8G8_UNORM; + case TINYDDS_D3DFMT_A4L4: return TDDS_R4G4_UNORM; + case TINYDDS_D3DFMT_V8U8: return TDDS_G8R8_SNORM; + case TINYDDS_D3DFMT_L6V5U5: return TDDS_UNDEFINED; // TODO TDDS_R5G6B5_SNORM_PACK16; + case TINYDDS_D3DFMT_X8L8V8U8: return TDDS_R8G8B8A8_SNORM; + case TINYDDS_D3DFMT_Q8W8V8U8: return TDDS_R8G8B8A8_SNORM; + case TINYDDS_D3DFMT_V16U16: return TDDS_R16G16_SNORM; + case TINYDDS_D3DFMT_A2W10V10U10: return TDDS_A2B10G10R10_SNORM; + case TINYDDS_D3DFMT_L16: return TDDS_R16_UNORM; + case TINYDDS_D3DFMT_Q16W16V16U16: return TDDS_R16G16B16A16_SNORM; + case TINYDDS_D3DFMT_R32F: return TDDS_R32_SFLOAT; + case TINYDDS_D3DFMT_G32R32F: return TDDS_R32G32_SFLOAT; + case TINYDDS_D3DFMT_A32B32G32R32F: return TDDS_R32G32B32A32_SFLOAT; + case TINYDDS_D3DFMT_CxV8U8: return TDDS_UNDEFINED; + case TINYDDS_D3DFMT_A1: return TDDS_R1_UNORM; + case TINYDDS_D3DFMT_A2B10G10R10_XR_BIAS: return TDDS_UNDEFINED; + + // real 4CC no exotics yet just the block compression ones + case TINYDDS_MAKE_RIFFCODE('D', 'X', 'T', '1'): return TDDS_BC1_RGBA_UNORM_BLOCK; + case TINYDDS_MAKE_RIFFCODE('D', 'X', 'T', '2'): return TDDS_BC2_UNORM_BLOCK; + case TINYDDS_MAKE_RIFFCODE('D', 'X', 'T', '3'): return TDDS_BC2_UNORM_BLOCK; + case TINYDDS_MAKE_RIFFCODE('D', 'X', 'T', '4'): return TDDS_BC3_UNORM_BLOCK; + case TINYDDS_MAKE_RIFFCODE('D', 'X', 'T', '5'): return TDDS_BC3_UNORM_BLOCK; + case TINYDDS_MAKE_RIFFCODE('A', 'T', 'I', '1'): return TDDS_BC4_UNORM_BLOCK; + case TINYDDS_MAKE_RIFFCODE('A', 'T', 'I', '2'): return TDDS_BC5_UNORM_BLOCK; + case TINYDDS_MAKE_RIFFCODE('B', 'C', '4', 'U'): return TDDS_BC4_UNORM_BLOCK; + case TINYDDS_MAKE_RIFFCODE('B', 'C', '4', 'S'): return TDDS_BC4_SNORM_BLOCK; + case TINYDDS_MAKE_RIFFCODE('B', 'C', '5', 'U'): return TDDS_BC5_UNORM_BLOCK; + case TINYDDS_MAKE_RIFFCODE('B', 'C', '5', 'S'): return TDDS_BC5_SNORM_BLOCK; + } + } + + // okay back to direct draw surface bit fields to try and work format out. + // TODO this could be better i'm sure + + if ((ctx->header.formatFlags & TINYDDS_DDPF_PALETTEINDEXED4)) { + return TDDS_UNDEFINED; // TODO 4 bit CLUTs + } + + if ((ctx->header.formatFlags & TINYDDS_DDPF_PALETTEINDEXED8)) { + if(ctx->header.formatRGBBitCount != 8) return TDDS_UNDEFINED; + if(ctx->header.formatFlags & TINYDDS_DDPF_ALPHA) { + return TDDS_A8P8; + } else { + return TDDS_P8; + } + } + // what is this? TINYDDS_DDPF_PALETTEINDEXEDTO8 + + // most have RGB data and/or alpha + if ((ctx->header.formatFlags & TINYDDS_DDPF_RGB) || + (ctx->header.formatFlags & TINYDDS_DDPF_ALPHA)) { + + TINYDDS_CHK_DDSFORMAT(1, 0x1, 0x0, 0, 0, TDDS_R1_UNORM); + + TINYDDS_CHK_DDSFORMAT(8, 0xF0, 0x0F, 0, 0, TDDS_G4R4_UNORM); + TINYDDS_CHK_DDSFORMAT(8, 0x0F, 0xF0, 0, 0, TDDS_R4G4_UNORM); + TINYDDS_CHK_DDSFORMAT(8, 0xFF, 0, 0, 0, TDDS_R8_UNORM); + TINYDDS_CHK_DDSFORMAT(8, 0, 0, 0, 0xFF, TDDS_A8_UNORM); + TINYDDS_CHK_DDSFORMAT(8, 0xE0, 0x1C, 0x3, 0, TDDS_B2G3R3_UNORM); + + TINYDDS_CHK_DDSFORMAT(16, 0xF000, 0x0F00, 0x00F0, 0x000F, TDDS_A4B4G4R4_UNORM); + TINYDDS_CHK_DDSFORMAT(16, 0xF000, 0x0F00, 0x00F0, 0x0000, TDDS_X4B4G4R4_UNORM); + + TINYDDS_CHK_DDSFORMAT(16, 0x00F0, 0x0F00, 0xF000, 0x000F, TDDS_A4R4G4B4_UNORM); + TINYDDS_CHK_DDSFORMAT(16, 0x00F0, 0x0F00, 0xF000, 0x0000, TDDS_X4R4G4B4_UNORM); + + TINYDDS_CHK_DDSFORMAT(16, 0x0F00, 0x00F0, 0x000F, 0xF000, TDDS_B4G4R4A4_UNORM); + TINYDDS_CHK_DDSFORMAT(16, 0x0F00, 0x00F0, 0x000F, 0x0000, TDDS_B4G4R4X4_UNORM); + + TINYDDS_CHK_DDSFORMAT(16, 0x000F, 0x00F0, 0x0F00, 0xF000, TDDS_R4G4B4A4_UNORM); + TINYDDS_CHK_DDSFORMAT(16, 0x000F, 0x00F0, 0x0F00, 0x0000, TDDS_R4G4B4X4_UNORM); + + TINYDDS_CHK_DDSFORMAT(16, 0x7C00, 0x03E0, 0x001F, 0x8000, TDDS_B5G5R5A1_UNORM); + TINYDDS_CHK_DDSFORMAT(16, 0x7C00, 0x03E0, 0x001F, 0x0000, TDDS_B5G5R5X1_UNORM); + + TINYDDS_CHK_DDSFORMAT(16, 0x001F, 0x03E0, 0x7C00, 0x8000, TDDS_R5G5B5A1_UNORM); + TINYDDS_CHK_DDSFORMAT(16, 0x001F, 0x03E0, 0x7C00, 0x0000, TDDS_R5G5B5X1_UNORM); + + TINYDDS_CHK_DDSFORMAT(16, 0x003E, 0x07C0, 0xF800, 0x0001, TDDS_A1R5G5B5_UNORM); + TINYDDS_CHK_DDSFORMAT(16, 0x003E, 0x07C0, 0xF800, 0x0000, TDDS_X1R5G5B5_UNORM); + + TINYDDS_CHK_DDSFORMAT(16, 0xF800, 0x07C0, 0x003E, 0x0001, TDDS_A1B5G5R5_UNORM); + TINYDDS_CHK_DDSFORMAT(16, 0xF800, 0x07C0, 0x003E, 0x0000, TDDS_X1B5G5R5_UNORM); + + TINYDDS_CHK_DDSFORMAT(16, 0xF800, 0x07E0, 0x001F, 0x0000, TDDS_B5G6R5_UNORM); + TINYDDS_CHK_DDSFORMAT(16, 0x001F, 0x07E0, 0xF800, 0x0000, TDDS_R5G6B5_UNORM); + + TINYDDS_CHK_DDSFORMAT(16, 0x00FF, 0xFF00, 0x0000, 0x0000, TDDS_R8G8_UNORM); + TINYDDS_CHK_DDSFORMAT(16, 0xFF00, 0x00FF, 0x0000, 0x0000, TDDS_G8R8_UNORM); + + TINYDDS_CHK_DDSFORMAT(16, 0xFFFF, 0x0000, 0x0000, 0x0000, TDDS_R16_UNORM); + + TINYDDS_CHK_DDSFORMAT(16, 0xE0, 0x1C, 0x3, 0xFF00, TDDS_B2G3R3A8_UNORM); + + TINYDDS_CHK_DDSFORMAT(24, 0xFF0000, 0x00FF00, 0x0000FF, 0x0, TDDS_B8G8R8_UNORM); + TINYDDS_CHK_DDSFORMAT(24, 0x0000FF, 0x00FF00, 0xFF0000, 0x0, TDDS_R8G8B8_UNORM); + + TINYDDS_CHK_DDSFORMAT(32, 0x000000FF, 0x0000FF00, 0x00FF0000, 0xFF000000, TDDS_R8G8B8A8_UNORM); + TINYDDS_CHK_DDSFORMAT(32, 0x000000FF, 0x0000FF00, 0x00FF0000, 0x00000000, TDDS_R8G8B8X8_UNORM); + + TINYDDS_CHK_DDSFORMAT(32, 0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000, TDDS_B8G8R8A8_UNORM); + TINYDDS_CHK_DDSFORMAT(32, 0x00FF0000, 0x0000FF00, 0x000000FF, 0x00000000, TDDS_B8G8R8X8_UNORM); + + TINYDDS_CHK_DDSFORMAT(32, 0xFF000000, 0x00FF0000, 0x0000FF00, 0x000000FF, TDDS_A8B8G8R8_UNORM); + TINYDDS_CHK_DDSFORMAT(32, 0xFF000000, 0x00FF0000, 0x0000FF00, 0x00000000, TDDS_X8B8G8R8_UNORM); + + TINYDDS_CHK_DDSFORMAT(32, 0x0000FF00, 0x00FF0000, 0xFF000000, 0x000000FF, TDDS_A8R8G8B8_UNORM); + TINYDDS_CHK_DDSFORMAT(32, 0x0000FF00, 0x00FF0000, 0xFF000000, 0x00000000, TDDS_X8R8G8B8_UNORM); + + TINYDDS_CHK_DDSFORMAT(32, 0x000003FF, 0x000FFC00, 0x3FF00000, 0xC0000000, TDDS_R10G10B10A2_UNORM); + TINYDDS_CHK_DDSFORMAT(32, 0xFFC00000, 0x003FF000, 0x00000FFC, 0x00000003, TDDS_A2B10G10R10_UNORM); + TINYDDS_CHK_DDSFORMAT(32, 0x00000FFC, 0x003FF000, 0xFFC00000, 0x00000003, TDDS_A2R10G10B10_UNORM); + + // this is often written incorrectly so we use the most 'common' version + TINYDDS_CHK_DDSFORMAT(32, 0x3FF00000, 0x000FFC00, 0x000003FF, 0xC0000000, TDDS_B10G10R10A2_UNORM); + + + TINYDDS_CHK_DDSFORMAT(32, 0xFFFF0000, 0x0000FFFF, 0x00000000, 0x00000000, TDDS_G16R16_UNORM); + TINYDDS_CHK_DDSFORMAT(32, 0x0000FFFF, 0xFFFF0000, 0x00000000, 0x00000000, TDDS_R16G16_UNORM); + TINYDDS_CHK_DDSFORMAT(32, 0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000, TDDS_R32_UINT); + + if (ctx->header.formatRGBBitCount == 8) return TDDS_R8_UINT; + if (ctx->header.formatRGBBitCount == 16) return TDDS_R16_UINT; + if (ctx->header.formatRGBBitCount == 32) return TDDS_R32_UINT; + } + + if ((ctx->header.formatFlags & TINYDDS_DDPF_BUMPDUDV) || + (ctx->header.formatFlags & TINYDDS_DDPF_BUMPLUMINANCE)) { + TINYDDS_CHK_DDSFORMAT(16, 0xFF00, 0x00FF, 0x0000, 0x0000, TDDS_G8R8_SNORM); + TINYDDS_CHK_DDSFORMAT(16, 0x00FF, 0xFF00, 0x0000, 0x0000, TDDS_R8G8_SNORM); + + TINYDDS_CHK_DDSFORMAT(32, 0xFFFF0000, 0x0000FFFF, 0x0000, 0x0, TDDS_G16R16_SNORM); + TINYDDS_CHK_DDSFORMAT(32, 0x0000FFFF, 0xFFFF0000, 0x0000, 0x0, TDDS_R16G16_SNORM); + TINYDDS_CHK_DDSFORMAT(32, 0x000000FF, 0x0000FF00, 0x00FF0000, 0xFF000000, TDDS_R8G8B8A8_SNORM); + TINYDDS_CHK_DDSFORMAT(32, 0x000003FF, 0x000FFC00, 0x3FF00000, 0xC0000000, TDDS_R10G10B10A2_SNORM); + TINYDDS_CHK_DDSFORMAT(32, 0x3FF00000, 0x000FFC00, 0x000003FF, 0xC0000000, TDDS_B10G10R10A2_SNORM); + TINYDDS_CHK_DDSFORMAT(32, 0x00000FFC, 0x003FF000, 0xFFC00000, 0x00000003, TDDS_A2R10G10B10_SNORM); + TINYDDS_CHK_DDSFORMAT(32, 0xFFC00000, 0x003FF000, 0x00000FFC, 0x00000003, TDDS_A2B10G10R10_SNORM); + + if (ctx->header.formatRGBBitCount == 8) return TDDS_R8_SINT; + if (ctx->header.formatRGBBitCount == 16) return TDDS_R16_SINT; + if (ctx->header.formatRGBBitCount == 32) return TDDS_R32_SINT; + } + + if (ctx->header.formatFlags & TINYDDS_DDPF_LUMINANCE) { + TINYDDS_CHK_DDSFORMAT(8, 0x0F, 0x00, 0x00, 0xF0, TDDS_R4G4_UNORM); // this is A4L4 aka A4R4 we decode this as R4G4 + TINYDDS_CHK_DDSFORMAT(16, 0x00FF, 0x0000, 0x0000, 0xFF00, TDDS_R8G8_UNORM); // this is A8L8 aka A4R8 we decode this as R8G8 + + if (ctx->header.formatRGBBitCount == 8) return TDDS_R8_UNORM; + if (ctx->header.formatRGBBitCount == 16) return TDDS_R16_UNORM; + if (ctx->header.formatRGBBitCount == 32) return TDDS_R32_UINT; + + } + + return TDDS_UNDEFINED; +} +#undef TINYDDS_CHK_DDSFORMAT + +static uint32_t TinyDDS_MipMapReduce(uint32_t value, uint32_t mipmaplevel) { + + // handle 0 being passed in + if (value <= 1) + return 1; + + // there are better ways of doing this (log2 etc.) but this doesn't require any + // dependecies and isn't used enough to matter imho + for (uint32_t i = 0u; i < mipmaplevel; ++i) { + if (value <= 1) + return 1; + value = value / 2; + } + return value; +} + +bool TinyDDS_ReadHeader(TinyDDS_ContextHandle handle) { + TinyDDS_Context *ctx = (TinyDDS_Context *) handle; + if (ctx == NULL) + return false; + + ctx->headerPos = ctx->callbacks.tellFn(ctx->user); + if( ctx->callbacks.readFn(ctx->user, &ctx->header, sizeof(TinyDDS_Header)) != sizeof(TinyDDS_Header)) { + ctx->callbacks.errorFn(ctx->user, "Could not read DDS header"); + return false; + } + + // try the easy case of a modern dx10 DDS file + if ((ctx->header.formatFlags & TINYDDS_DDPF_FOURCC) && + (ctx->header.formatFourCC == TINYDDS_MAKE_RIFFCODE('D', 'X', '1', '0'))) { + ctx->callbacks.readFn(ctx->user, &ctx->headerDx10, sizeof(TinyDDS_HeaderDX10)); + + if (ctx->headerDx10.DXGIFormat >= TDDS_SYNTHESISED_DXGIFORMATS) { + ctx->callbacks.errorFn(ctx->user, "DX10 Header has an invalid DXGI_FORMAT code"); + return false; + } + } + + ctx->format = TinyDDS_DecodeFormat(ctx); + if (ctx->format == TDDS_UNDEFINED) { + ctx->callbacks.errorFn(ctx->user, "Could not decode DDS format"); + return false; + } + + if( (ctx->header.formatFourCC == 0) && + (ctx->header.formatRGBBitCount != 0) && + ((ctx->header.formatRGBBitCount/8) != TinyDDS_FormatSize(ctx->format))) { + ctx->callbacks.errorFn(ctx->user, "Format size mismatch"); + return false; + } + + // correct for dodgy mipmap levels counts + if(ctx->header.mipMapCount > 1) { + uint32_t w = ctx->header.width; + uint32_t h = ctx->header.height; + + for(uint32_t i = 0; i < ctx->header.mipMapCount;++i) { + if (TinyDDS_IsCompressed(ctx->format)) { + if (w <= 4 || h <= 4) { + ctx->header.mipMapCount = i + 1; + break; + } + } else if (w <= 1 || h <= 1) { + ctx->header.mipMapCount = i + 1; + break; + } + + + w = w / 2; + h = h / 2; + } + + } + + if (TinyDDS_IsCompressed(ctx->format)) { + // compressed images never get asked to make mip maps which is good as + // requires decompress/compress cycle + if(ctx->header.mipMapCount == 0) ctx->header.mipMapCount = 1; + } + + if(TinyDDS_IsCLUT(ctx->format)) { + // for now don't ask to generate mipmaps for cluts + if(ctx->header.mipMapCount == 0) ctx->header.mipMapCount = 1; + + size_t const clutSize = 256 * sizeof(uint32_t); + + ctx->clut = (uint32_t*) ctx->callbacks.allocFn(ctx->user, clutSize); + + if( ctx->callbacks.readFn(ctx->user, (void*)ctx->clut, clutSize) != clutSize) { + ctx->callbacks.errorFn(ctx->user, "Could not read DDS CLUT"); + return false; + } + } + + ctx->firstImagePos = ctx->callbacks.tellFn(ctx->user); + ctx->headerValid = true; + return true; +} + +bool TinyDDS_IsCubemap(TinyDDS_ContextHandle handle) { + TinyDDS_Context *ctx = (TinyDDS_Context *) handle; + if (ctx == NULL) + return false; + if (!ctx->headerValid) { + ctx->callbacks.errorFn(ctx->user, "Header data hasn't been read yet or its invalid"); + return false; + } + + return (ctx->header.caps2 & TINYDDS_DDSCAPS2_CUBEMAP); +} + +bool TinyDDS_Dimensions(TinyDDS_ContextHandle handle, + uint32_t *width, + uint32_t *height, + uint32_t *depth, + uint32_t *slices) { + TinyDDS_Context *ctx = (TinyDDS_Context *) handle; + if (ctx == NULL) + return false; + if (!ctx->headerValid) { + ctx->callbacks.errorFn(ctx->user, "Header data hasn't been read yet or its invalid"); + return false; + } + + if (width) + *width = ctx->header.width; + if (height) + *height = ctx->header.height; + if (depth) + *depth = ctx->header.depth; + if (slices) + *slices = ctx->headerDx10.arraySize; + return true; +} + +uint32_t TinyDDS_Width(TinyDDS_ContextHandle handle) { + TinyDDS_Context *ctx = (TinyDDS_Context *) handle; + if (ctx == NULL) + return 0; + if (!ctx->headerValid) { + ctx->callbacks.errorFn(ctx->user, "Header data hasn't been read yet or its invalid"); + return 0; + } + return ctx->header.width; +} + +uint32_t TinyDDS_Height(TinyDDS_ContextHandle handle) { + TinyDDS_Context *ctx = (TinyDDS_Context *) handle; + if (ctx == NULL) + return 0; + if (!ctx->headerValid) { + ctx->callbacks.errorFn(ctx->user, "Header data hasn't been read yet or its invalid"); + return 0; + } + return ctx->header.height; +} + +uint32_t TinyDDS_Depth(TinyDDS_ContextHandle handle) { + TinyDDS_Context *ctx = (TinyDDS_Context *) handle; + if (ctx == NULL) + return 0; + if (!ctx->headerValid) { + ctx->callbacks.errorFn(ctx->user, "Header data hasn't been read yet or its invalid"); + return 0; + } + + return ctx->header.depth; +} + +uint32_t TinyDDS_ArraySlices(TinyDDS_ContextHandle handle) { + TinyDDS_Context *ctx = (TinyDDS_Context *) handle; + if (ctx == NULL) + return 0; + if (!ctx->headerValid) { + ctx->callbacks.errorFn(ctx->user, "Header data hasn't been read yet or its invalid"); + return 0; + } + + return ctx->headerDx10.arraySize; +} + +bool TinyDDS_Is1D(TinyDDS_ContextHandle handle) { + TinyDDS_Context *ctx = (TinyDDS_Context *) handle; + if (ctx == NULL) + return false; + if (!ctx->headerValid) { + ctx->callbacks.errorFn(ctx->user, "Header data hasn't been read yet or its invalid"); + return false; + } + return (ctx->header.height <= 1 && ctx->header.depth <= 1); +} +bool TinyDDS_Is2D(TinyDDS_ContextHandle handle) { + TinyDDS_Context *ctx = (TinyDDS_Context *) handle; + if (ctx == NULL) + return false; + if (!ctx->headerValid) { + ctx->callbacks.errorFn(ctx->user, "Header data hasn't been read yet or its invalid"); + return false; + } + return (ctx->header.height > 1 && ctx->header.depth <= 1); +} +bool TinyDDS_Is3D(TinyDDS_ContextHandle handle) { + TinyDDS_Context *ctx = (TinyDDS_Context *) handle; + if (ctx == NULL) + return false; + if (!ctx->headerValid) { + ctx->callbacks.errorFn(ctx->user, "Header data hasn't been read yet or its invalid"); + return false; + } + + return (ctx->header.height > 1 && ctx->header.depth > 1); +} + +bool TinyDDS_IsArray(TinyDDS_ContextHandle handle) { + TinyDDS_Context *ctx = (TinyDDS_Context *) handle; + if (ctx == NULL) + return false; + if (!ctx->headerValid) { + ctx->callbacks.errorFn(ctx->user, "Header data hasn't been read yet or its invalid"); + return false; + } + + return (ctx->headerDx10.arraySize >= 1); +} + +uint32_t TinyDDS_NumberOfMipmaps(TinyDDS_ContextHandle handle) { + TinyDDS_Context *ctx = (TinyDDS_Context *) handle; + if (ctx == NULL) + return 0; + if (!ctx->headerValid) { + ctx->callbacks.errorFn(ctx->user, "Header data hasn't been read yet or its invalid"); + return 0; + } + + return ctx->header.mipMapCount ? ctx->header.mipMapCount : 1; +} + +bool TinyDDS_NeedsGenerationOfMipmaps(TinyDDS_ContextHandle handle) { + TinyDDS_Context *ctx = (TinyDDS_Context *) handle; + if (ctx == NULL) + return false; + if (!ctx->headerValid) { + ctx->callbacks.errorFn(ctx->user, "Header data hasn't been read yet or its invalid"); + return false; + } + + return ctx->header.mipMapCount == 0; +} + +bool TinyDDS_NeedsEndianCorrecting(TinyDDS_ContextHandle handle) { + // TODO should return true if this file is compiled on big endian machines + BASISU_NOTE_UNUSED(handle); + return false; +} + +uint32_t TinyDDS_FaceSize(TinyDDS_ContextHandle handle, uint32_t mipmaplevel) { + TinyDDS_Context *ctx = (TinyDDS_Context *) handle; + if (ctx == NULL) + return 0; + + if (!ctx->headerValid) { + ctx->callbacks.errorFn(ctx->user, "Header data hasn't been read yet or its invalid"); + return 0; + } + uint32_t w = TinyDDS_MipMapReduce(ctx->header.width, mipmaplevel); + uint32_t h = TinyDDS_MipMapReduce(ctx->header.height, mipmaplevel); + uint32_t d = TinyDDS_MipMapReduce(ctx->header.depth, mipmaplevel); + uint32_t s = ctx->headerDx10.arraySize ? ctx->headerDx10.arraySize : 1; + + if(d > 1 && s > 1) { + ctx->callbacks.errorFn(ctx->user, "Volume textures can't have array slices or be cubemap"); + return 0; + } + + if (TinyDDS_IsCompressed(ctx->format)) { + // padd to block boundaries + w = (w + 3) / 4; + h = (h + 3) / 4; + } + // 1 bit special case + if(ctx->format == TDDS_R1_UNORM) { + w = (w + 7) / 8; + } + + uint32_t const formatSize = TinyDDS_FormatSize(ctx->format); + return w * h * d * s * formatSize; +} + +uint32_t TinyDDS_ImageSize(TinyDDS_ContextHandle handle, uint32_t mipmaplevel) { + TinyDDS_Context *ctx = (TinyDDS_Context *) handle; + if (ctx == NULL) + return 0; + + if (!ctx->headerValid) { + ctx->callbacks.errorFn(ctx->user, "Header data hasn't been read yet or its invalid"); + return 0; + } + + if( ctx->header.caps2 & TINYDDS_DDSCAPS2_CUBEMAP || + ctx->headerDx10.miscFlag & TINYDDS_D3D10_RESOURCE_MISC_TEXTURECUBE ) { + return TinyDDS_FaceSize(handle, mipmaplevel) * 6; + } else { + return TinyDDS_FaceSize(handle, mipmaplevel); + } +} + +void const *TinyDDS_ImageRawData(TinyDDS_ContextHandle handle, uint32_t mipmaplevel) { + TinyDDS_Context *ctx = (TinyDDS_Context *) handle; + if (ctx == NULL) + return NULL; + + if (!ctx->headerValid) { + ctx->callbacks.errorFn(ctx->user, "Header data hasn't been read yet or its invalid"); + return NULL; + } + + if (mipmaplevel >= (ctx->header.mipMapCount ? ctx->header.mipMapCount : 1) ) { + ctx->callbacks.errorFn(ctx->user, "Invalid mipmap level"); + return NULL; + } + + if (mipmaplevel >= TINYDDS_MAX_MIPMAPLEVELS) { + ctx->callbacks.errorFn(ctx->user, "Invalid mipmap level"); + return NULL; + } + + if (ctx->mipmaps[mipmaplevel] != NULL) + return ctx->mipmaps[mipmaplevel]; + + if( ctx->header.caps2 & TINYDDS_DDSCAPS2_CUBEMAP || + ctx->headerDx10.miscFlag & TINYDDS_D3D10_RESOURCE_MISC_TEXTURECUBE ) { + + uint64_t offset = 0; + for(uint32_t i=0;i < mipmaplevel;++i) { + offset += TinyDDS_FaceSize(handle, i); + } + + uint32_t mipMapCount = ctx->header.mipMapCount; + if(mipMapCount == 0) mipMapCount = 1; + + // at least one cubemap generater has mipMapCount wrong which causes + // image artifacts :( + uint64_t nextFaceOffset = 0; + for(uint32_t i = 0;i < mipMapCount;++i) { + nextFaceOffset += TinyDDS_FaceSize(handle, i); + } + + size_t const faceSize = TinyDDS_FaceSize(handle, mipmaplevel); + ctx->mipmaps[mipmaplevel] = (uint8_t const *) ctx->callbacks.allocFn(ctx->user, faceSize * 6); + if(!ctx->mipmaps[mipmaplevel]) return NULL; + + uint8_t *dstPtr = (uint8_t*)ctx->mipmaps[mipmaplevel]; + for (uint32_t i = 0u;i < 6;++i) { + ctx->callbacks.seekFn(ctx->user, offset + ctx->firstImagePos); + size_t read = ctx->callbacks.readFn(ctx->user, (void *) dstPtr, faceSize); + if(read != faceSize) { + ctx->callbacks.freeFn(ctx->user, (void*)&ctx->mipmaps[mipmaplevel]); + return NULL; + } + dstPtr += faceSize; + offset += nextFaceOffset; + } + return ctx->mipmaps[mipmaplevel]; + } + + uint64_t offset = 0; + for(uint32_t i=0;i < mipmaplevel;++i) { + offset += TinyDDS_ImageSize(handle, i); + } + + uint32_t size = TinyDDS_ImageSize(handle, mipmaplevel); + if (size == 0) + return NULL; + + ctx->callbacks.seekFn(ctx->user, offset + ctx->firstImagePos); + + ctx->mipmaps[mipmaplevel] = (uint8_t const *) ctx->callbacks.allocFn(ctx->user, size); + if (!ctx->mipmaps[mipmaplevel]) return NULL; + size_t read = ctx->callbacks.readFn(ctx->user, (void *) ctx->mipmaps[mipmaplevel], size); + if(read != size) { + ctx->callbacks.freeFn(ctx->user, (void*)&ctx->mipmaps[mipmaplevel]); + return NULL; + } + + return ctx->mipmaps[mipmaplevel]; +} + +TinyDDS_Format TinyDDS_GetFormat(TinyDDS_ContextHandle handle) { + TinyDDS_Context *ctx = (TinyDDS_Context *) handle; + if (ctx == NULL) + return TDDS_UNDEFINED; + + if (!ctx->headerValid) { + ctx->callbacks.errorFn(ctx->user, "Header data hasn't been read yet or its invalid"); + return TDDS_UNDEFINED; + } + return ctx->format; +} + +#define TDDS_EF(bits, rm, gm, bm, am, fl) \ + header->formatRGBBitCount = bits; \ + header->formatRBitMask = rm; \ + header->formatGBitMask = gm; \ + header->formatBBitMask = bm; \ + header->formatABitMask = am; \ + header->formatFlags = fl; \ + header->formatFourCC = 0; \ + return true; + +#define TDDS_EF_RGB(bits, rm, gm, bm) TDDS_EF(bits, rm, gm, bm, 0, TINYDDS_DDPF_RGB ) +#define TDDS_EF_RGBA(bits, rm, gm, bm, am) TDDS_EF(bits, rm, gm, bm, am, TINYDDS_DDPF_RGB | TINYDDS_DDPF_ALPHAPIXELS) +#define TDDS_EF_ALPHA(bits, am) TDDS_EF(bits, 0, 0, 0, am, TINYDDS_DDPF_ALPHA) + +#define TDDS_EF_BUMP_RG(bits, rm, gm) TDDS_EF(bits, rm, gm, 0, 0, TINYDDS_DDPF_BUMPDUDV) +#define TDDS_EF_BUMP_RGB(bits, rm, gm, bm) TDDS_EF(bits, rm, gm, bm, 0, TINYDDS_DDPF_BUMPLUMINANCE) +#define TDDS_EF_BUMP_RGBA(bits, rm, gm, bm, am) TDDS_EF(bits, rm, gm, bm, am, TINYDDS_DDPF_BUMPLUMINANCE | TINYDDS_DDPF_ALPHAPIXELS) + +static bool TinyDDS_EncodeFormat(TinyDDS_Format fmt, TinyDDS_Header* header, TinyDDS_HeaderDX10* headerDx10) { + // lets start with the easy part. if its real DXGI_FORMAT we can just fill in the Dx10 part + if(fmt < TDDS_SYNTHESISED_DXGIFORMATS) { + headerDx10->DXGIFormat = (TinyImageFormat_DXGI_FORMAT)fmt; + header->formatFourCC = TINYDDS_MAKE_RIFFCODE('D','X','1','0'); + header->formatFlags = TINYDDS_DDPF_FOURCC; + } else { + headerDx10->DXGIFormat = TIF_DXGI_FORMAT_UNKNOWN; + } + // now lets try synthesising if possible + // if we can reset the DX10 fourCC but leave the format in place + // that way if we have slices which can only be DXGI_FORMAT we can use it + switch(fmt) { + case TDDS_UNDEFINED: break; + + case TDDS_R1_UNORM: TDDS_EF_RGB(1, 0x1, 0, 0) + case TDDS_R4G4_UNORM: TDDS_EF_RGB(8, 0x0F, 0xF0, 0) + case TDDS_G4R4_UNORM: TDDS_EF_RGB(8, 0xF0, 0x0F, 0) + case TDDS_B2G3R3_UNORM: TDDS_EF_RGB(8, 0x3, 0x7, 0x7 ) + case TDDS_R8_UNORM: TDDS_EF_RGB(8, 0xFF, 0, 0 ); + case TDDS_A8_UNORM: TDDS_EF_ALPHA( 8, 0xFF); + + case TDDS_R16_UNORM:TDDS_EF_RGB( 16,0x0000FFFF, 0, 0) + case TDDS_A4B4G4R4_UNORM: + TDDS_EF_RGBA(16, 0xF000, 0x0F00, 0x00F0, 0x000F); + case TDDS_X4B4G4R4_UNORM: + TDDS_EF_RGBA(16, 0xF000, 0x0F00, 0x00F0, 0x0000); + case TDDS_B4G4R4A4_UNORM: + TDDS_EF_RGBA(16, 0x0F00, 0x00F0, 0x000F, 0xF000); + case TDDS_B4G4R4X4_UNORM: + TDDS_EF_RGBA(16, 0x0F00, 0x00F0, 0x000F, 0x0000); + case TDDS_A4R4G4B4_UNORM: + TDDS_EF_RGBA(16, 0x00F0, 0x0F00, 0xF000, 0x000F); + case TDDS_X4R4G4B4_UNORM: + TDDS_EF_RGBA(16, 0x00F0, 0x0F00, 0xF000, 0x0000); + case TDDS_R4G4B4A4_UNORM: + TDDS_EF_RGBA(16, 0x000F, 0x00F0, 0x0F00, 0xF000); + case TDDS_R4G4B4X4_UNORM: + TDDS_EF_RGBA(16, 0x000F, 0x00F0, 0x0F00, 0x0000); + + case TDDS_B5G5R5A1_UNORM: + TDDS_EF_RGBA(16, 0x7C00, 0x03E0, 0x001F, 0x8000); + case TDDS_B5G5R5X1_UNORM: + TDDS_EF_RGBA(16, 0x7C00, 0x03E0, 0x001F, 0x0000); + + case TDDS_R5G5B5A1_UNORM: + TDDS_EF_RGBA(16, 0x001F, 0x03E0, 0x7C00, 0x8000); + case TDDS_R5G5B5X1_UNORM: + TDDS_EF_RGBA(16, 0x001F, 0x03E0, 0x7C00, 0x0000); + + case TDDS_A1R5G5B5_UNORM: + TDDS_EF_RGBA(16, 0x003E, 0x07C0, 0xF800, 0x0001); + case TDDS_X1R5G5B5_UNORM: + TDDS_EF_RGBA(16, 0x003E, 0x07C0, 0xF800, 0x0000); + case TDDS_A1B5G5R5_UNORM: + TDDS_EF_RGBA(16, 0xF800, 0x07C0, 0x003E, 0x0001); + case TDDS_X1B5G5R5_UNORM: + TDDS_EF_RGBA(16, 0xF800, 0x07C0, 0x003E, 0x0000); + + case TDDS_B5G6R5_UNORM: + TDDS_EF_RGB(16, 0xF800, 0x07E0, 0x001F); + case TDDS_R5G6B5_UNORM: + TDDS_EF_RGB(16, 0x001F, 0x07E0, 0xF800); + + case TDDS_R8G8_UNORM: + TDDS_EF_RGB(16, 0x00FF, 0xFF00, 0); + case TDDS_G8R8_UNORM: + TDDS_EF_RGB(16, 0xFF00, 0x00FF, 0); + case TDDS_G8R8_SNORM: + TDDS_EF_BUMP_RG(16, 0xFF00, 0x00FF); + + case TDDS_B2G3R3A8_UNORM: TDDS_EF_RGBA(8, 0x3, 0x7, 0x7, 0xFF00 ) + + case TDDS_R8G8B8_UNORM: + TDDS_EF_RGB( 24,0x000000FF, 0x0000FF00, 0x00FF0000) + case TDDS_B8G8R8_UNORM: + TDDS_EF_RGB( 24,0x00FF0000, 0x0000FF00, 0x000000FF) + + case TDDS_R8G8B8A8_UNORM: + TDDS_EF_RGBA( 32,0x000000FF, 0x0000FF00, 0x00FF0000, 0xFF000000) + case TDDS_R8G8B8X8_UNORM: + TDDS_EF_RGBA( 32,0x000000FF, 0x0000FF00, 0x00FF0000, 0x00000000) + case TDDS_B8G8R8A8_UNORM: + TDDS_EF_RGBA( 32,0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000) + case TDDS_B8G8R8X8_UNORM: + TDDS_EF_RGBA( 32,0x00FF0000, 0x0000FF00, 0x000000FF, 0x00000000) + case TDDS_A8B8G8R8_UNORM: + TDDS_EF_RGBA( 32,0xFF000000, 0x00FF0000, 0x0000FF00, 0x000000FF) + case TDDS_X8B8G8R8_UNORM: + TDDS_EF_RGBA( 32,0xFF000000, 0x00FF0000, 0x0000FF00, 0x00000000) + case TDDS_A8R8G8B8_UNORM: + TDDS_EF_RGBA( 32,0x0000FF00, 0x00FF0000, 0xFF000000, 0x000000FF) + case TDDS_X8R8G8B8_UNORM: + TDDS_EF_RGBA( 32,0x0000FF00, 0x00FF0000, 0xFF000000, 0x00000000) + + /* A2R10G10B10 is broken via the traditional DDS descriptions, so we + * always use the Dx10 header for those + case TDDS_R10G10B10A2_UNORM: + TDDS_EF_RGBA( 32,0x3FF00000, 0x000FFC00, 0x000003FF, 0xC0000000) + case TDDS_A2B10G10R10_UNORM: + TDDS_EF_RGBA( 32,0xFFC00000, 0x003FF000, 0x00000FFC, 0x00000003) + case TDDS_A2R10G10B10_UNORM: + TDDS_EF_RGBA( 32,0x00000FFC, 0x003FF000, 0xFFC00000, 0x00000003) + case TDDS_B10G10R10A2_UNORM: + TDDS_EF_RGBA( 32,0x3FF00000, 0x000FFC00, 0x000003FF, 0xC0000000) + */ + case TDDS_R10G10B10A2_UNORM: + case TDDS_B10G10R10A2_UNORM: + case TDDS_A2B10G10R10_UNORM: + case TDDS_A2R10G10B10_UNORM: + case TDDS_R10G10B10A2_SNORM: + case TDDS_B10G10R10A2_SNORM: + case TDDS_A2B10G10R10_SNORM: + case TDDS_A2R10G10B10_SNORM: + break; + + case TDDS_R16G16_UNORM: TDDS_EF_RGB( 32,0x0000FFFF, 0xFFFF0000, 0) + case TDDS_G16R16_UNORM: TDDS_EF_RGB( 32,0xFFFF0000, 0x0000FFFF, 0) + + case TDDS_BC1_RGBA_UNORM_BLOCK: + header->formatFourCC = TINYDDS_MAKE_RIFFCODE('D','X','T','1'); + header->formatFlags = TINYDDS_DDPF_FOURCC; + return true; + case TDDS_BC2_UNORM_BLOCK: + header->formatFourCC = TINYDDS_MAKE_RIFFCODE('D','X','T','3'); + header->formatFlags = TINYDDS_DDPF_FOURCC; + return true; + case TDDS_BC3_UNORM_BLOCK: + header->formatFourCC = TINYDDS_MAKE_RIFFCODE('D','X','T','5'); + header->formatFlags = TINYDDS_DDPF_FOURCC; + return true; + case TDDS_BC4_UNORM_BLOCK: + header->formatFourCC = TINYDDS_MAKE_RIFFCODE('A','T','I','1'); + header->formatFlags = TINYDDS_DDPF_FOURCC; + return true; + case TDDS_BC5_UNORM_BLOCK: + header->formatFourCC = TINYDDS_MAKE_RIFFCODE('A','T','I','2'); + header->formatFlags = TINYDDS_DDPF_FOURCC; + return true; + + + case TDDS_R8_SNORM: + case TDDS_R8G8_SNORM: + case TDDS_R8G8B8A8_SNORM: + case TDDS_R16_SNORM: + case TDDS_R16G16_SNORM: + case TDDS_A8B8G8R8_SNORM: + case TDDS_B8G8R8A8_SNORM: + case TDDS_G16R16_SNORM: + + case TDDS_R8_UINT: + case TDDS_R8_SINT: + case TDDS_R8G8_UINT: + case TDDS_R8G8_SINT: + case TDDS_R8G8B8A8_UINT: + case TDDS_R8G8B8A8_SINT: + case TDDS_R8G8B8A8_SRGB: + case TDDS_B8G8R8A8_SRGB: + case TDDS_R9G9B9E5_UFLOAT: + case TDDS_R10G10B10A2_UINT: + case TDDS_R11G11B10_UFLOAT: + case TDDS_R16_UINT: + case TDDS_R16_SINT: + case TDDS_R16_SFLOAT: + case TDDS_R16G16_UINT: + case TDDS_R16G16_SINT: + case TDDS_R16G16_SFLOAT: + case TDDS_R16G16B16A16_UNORM: + case TDDS_R16G16B16A16_SNORM: + case TDDS_R16G16B16A16_UINT: + case TDDS_R16G16B16A16_SINT: + case TDDS_R16G16B16A16_SFLOAT: + case TDDS_R32_UINT: + case TDDS_R32_SINT: + case TDDS_R32_SFLOAT: + case TDDS_R32G32_UINT: + case TDDS_R32G32_SINT: + case TDDS_R32G32_SFLOAT: + case TDDS_R32G32B32_UINT: + case TDDS_R32G32B32_SINT: + case TDDS_R32G32B32_SFLOAT: + case TDDS_R32G32B32A32_UINT: + case TDDS_R32G32B32A32_SINT: + case TDDS_R32G32B32A32_SFLOAT: + case TDDS_BC1_RGBA_SRGB_BLOCK: + case TDDS_BC2_SRGB_BLOCK: + case TDDS_BC3_SRGB_BLOCK: + case TDDS_BC4_SNORM_BLOCK: + case TDDS_BC5_SNORM_BLOCK: + case TDDS_BC6H_UFLOAT_BLOCK: + case TDDS_BC6H_SFLOAT_BLOCK: + case TDDS_BC7_UNORM_BLOCK: + case TDDS_BC7_SRGB_BLOCK: + case TDDS_AYUV: + case TDDS_Y410: + case TDDS_Y416: + case TDDS_NV12: + case TDDS_P010: + case TDDS_P016: + case TDDS_420_OPAQUE: + case TDDS_YUY2: + case TDDS_Y210: + case TDDS_Y216: + case TDDS_NV11: + case TDDS_AI44: + case TDDS_IA44: + case TDDS_P8: + case TDDS_A8P8: + case TDDS_R10G10B10_7E3_A2_FLOAT: + case TDDS_R10G10B10_6E4_A2_FLOAT: + case TDDS_D16_UNORM_S8_UINT: + case TDDS_R16_UNORM_X8_TYPELESS: + case TDDS_X16_TYPELESS_G8_UINT: + case TDDS_P208: + case TDDS_V208: + case TDDS_V408: + case TDDS_R10G10B10_SNORM_A2_UNORM: + break; + + } + // these formats can probably be done via dx10 header so check + if(headerDx10->DXGIFormat == TIF_DXGI_FORMAT_UNKNOWN) return false; + else return true; +} + +#undef TDDS_EF +#undef TDDS_EF_RGB +#undef TDDS_EF_RGBA +#undef TDDS_EF_ALPHA + +bool TinyDDS_WriteImage(TinyDDS_WriteCallbacks const *callbacks, + void *user, + uint32_t width, + uint32_t height, + uint32_t depth, // 3D texture depth + uint32_t slices, // Array slices + uint32_t mipmaplevels, + TinyDDS_Format format, + bool cubemap, + bool preferDx10Format, + uint32_t const *mipmapsizes, + void const **mipmaps) { + TinyDDS_Header header; + TinyDDS_HeaderDX10 headerDX10; + memset(&header, 0, sizeof(header)); + memset(&headerDX10, 0, sizeof(headerDX10)); + + header.magic = TINYDDS_MAKE_RIFFCODE('D', 'D', 'S', ' '); + header.size = 124; + header.formatSize = 32; + + header.width = width; + header.height = height; + header.depth = (depth > 1) ? depth : 0; + header.mipMapCount = mipmaplevels; + + if(!TinyDDS_EncodeFormat(format, &header, &headerDX10)) return false; + + // do we have to force dx10 (for slices) + if (slices > 1) { + if(headerDX10.DXGIFormat == TIF_DXGI_FORMAT_UNKNOWN) { + // DDS doesn't support slices for formats that aren't DXGI compatible + return false; + } + header.formatFlags = TINYDDS_DDPF_FOURCC; + header.formatFourCC = TINYDDS_MAKE_RIFFCODE('D','X','1','0'); + headerDX10.arraySize = slices; + } + header.flags = TINYDDS_DDSD_CAPS | TINYDDS_DDSD_PIXELFORMAT | TINYDDS_DDSD_MIPMAPCOUNT; + header.caps1 = TINYDDS_DDSCAPS_TEXTURE | TINYDDS_DDSCAPS_COMPLEX | TINYDDS_DDSCAPS_MIPMAP; + + if(depth > 1) { + headerDX10.resourceDimension = TINYDDS_D3D10_RESOURCE_DIMENSION_TEXTURE3D; + header.flags |= TINYDDS_DDSD_DEPTH; + header.caps2 |= TINYDDS_DDSCAPS2_VOLUME; + } + else if(height > 1) { + headerDX10.resourceDimension = TINYDDS_D3D10_RESOURCE_DIMENSION_TEXTURE2D; + header.flags |= TINYDDS_DDSD_HEIGHT; + } + else if(width > 1) { + headerDX10.resourceDimension = TINYDDS_D3D10_RESOURCE_DIMENSION_TEXTURE1D; + header.flags |= TINYDDS_DDSD_WIDTH; + } + if(cubemap) { + headerDX10.miscFlag |= TINYDDS_D3D10_RESOURCE_MISC_TEXTURECUBE; + header.caps2 |= TINYDDS_DDSCAPS2_CUBEMAP | TINYDDS_DDSCAPS2_CUBEMAP_ALL; + } + + // unclear whether we need to save this or exactly what it should be... + header.pitchOrLinearSize = 0; + if(preferDx10Format && headerDX10.DXGIFormat != TIF_DXGI_FORMAT_UNKNOWN) { + header.formatFlags = TINYDDS_DDPF_FOURCC; + header.formatFourCC = TINYDDS_MAKE_RIFFCODE('D','X','1','0'); + } + + // now write + callbacks->write(user, &header, sizeof(TinyDDS_Header)); + if(header.formatFlags & TINYDDS_DDPF_FOURCC && + header.formatFourCC == TINYDDS_MAKE_RIFFCODE('D','X','1','0')) { + callbacks->write(user, &headerDX10, sizeof(TinyDDS_HeaderDX10)); + } + + // rg 8/27/2024: The original tinydds.h code is wrong for mipmapped cubemaps. + // I'm going to work around this by having the caller compose the top mip data correctly. + // https://learn.microsoft.com/en-us/windows/win32/direct3ddds/dds-file-layout-for-cubic-environment-maps + for (uint32_t mipMapLevel = 0; mipMapLevel < header.mipMapCount; mipMapLevel++) + { + // rg: Adding this check, in case the caller wants to compose all the data themselves. + if (mipmapsizes[mipMapLevel]) + { + callbacks->write(user, mipmaps[mipMapLevel], mipmapsizes[mipMapLevel]); + } + } + return true; +} + +#endif + +#ifdef __cplusplus +}; +#endif + +#endif // end header +/* +MIT License + +Copyright (c) 2019 DeanoC + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ diff --git a/thirdparty/basisu/encoder/3rdparty/tinyexr.cpp b/thirdparty/basisu/encoder/3rdparty/tinyexr.cpp new file mode 100644 index 000000000..5548c5a45 --- /dev/null +++ b/thirdparty/basisu/encoder/3rdparty/tinyexr.cpp @@ -0,0 +1,12 @@ +#if defined(_WIN32) +#ifndef NOMINMAX +#define NOMINMAX +#endif +#endif + +#ifdef _MSC_VER +#pragma warning (disable:4530) // warning C4530: C++ exception handler used, but unwind semantics are not enabled. Specify /EHsc +#endif + +#define TINYEXR_IMPLEMENTATION +#include "tinyexr.h" diff --git a/thirdparty/basisu/encoder/3rdparty/tinyexr.h b/thirdparty/basisu/encoder/3rdparty/tinyexr.h new file mode 100644 index 000000000..a2a065a06 --- /dev/null +++ b/thirdparty/basisu/encoder/3rdparty/tinyexr.h @@ -0,0 +1,9334 @@ +// rg 8/23/2024: I fixed some minor undefined behavior in this module (signed 32-bit left shifts). + +#ifndef TINYEXR_H_ +#define TINYEXR_H_ +/* +Copyright (c) 2014 - 2021, Syoyo Fujita and many contributors. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Syoyo Fujita nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +// TinyEXR contains some OpenEXR code, which is licensed under ------------ + +/////////////////////////////////////////////////////////////////////////// +// +// Copyright (c) 2002, Industrial Light & Magic, a division of Lucas +// Digital Ltd. LLC +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Industrial Light & Magic nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////// + +// End of OpenEXR license ------------------------------------------------- + + +// +// +// Do this: +// #define TINYEXR_IMPLEMENTATION +// before you include this file in *one* C or C++ file to create the +// implementation. +// +// // i.e. it should look like this: +// #include ... +// #include ... +// #include ... +// #define TINYEXR_IMPLEMENTATION +// #include "tinyexr.h" +// +// + +#include // for size_t +#include // guess stdint.h is available(C99) + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \ + defined(__i386) || defined(__i486__) || defined(__i486) || \ + defined(i386) || defined(__ia64__) || defined(__x86_64__) +#define TINYEXR_X86_OR_X64_CPU 1 +#else +#define TINYEXR_X86_OR_X64_CPU 0 +#endif + +#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) || TINYEXR_X86_OR_X64_CPU +#define TINYEXR_LITTLE_ENDIAN 1 +#else +#define TINYEXR_LITTLE_ENDIAN 0 +#endif + +// Use miniz or not to decode ZIP format pixel. Linking with zlib +// required if this flag is 0 and TINYEXR_USE_STB_ZLIB is 0. +#ifndef TINYEXR_USE_MINIZ +#define TINYEXR_USE_MINIZ (1) +#ifndef MINIZ_HEADER_FILE_ONLY +#define MINIZ_HEADER_FILE_ONLY (1) +#endif +#endif + +// Use the ZIP implementation of stb_image.h and stb_image_write.h. +#ifndef TINYEXR_USE_STB_ZLIB +#define TINYEXR_USE_STB_ZLIB (0) +#endif + +// Use nanozlib. +#ifndef TINYEXR_USE_NANOZLIB +#define TINYEXR_USE_NANOZLIB (0) +#endif + +// Disable PIZ compression when applying cpplint. +#ifndef TINYEXR_USE_PIZ +#define TINYEXR_USE_PIZ (1) +#endif + +#ifndef TINYEXR_USE_ZFP +#define TINYEXR_USE_ZFP (0) // TinyEXR extension. +// http://computation.llnl.gov/projects/floating-point-compression +#endif + +#ifndef TINYEXR_USE_THREAD +#define TINYEXR_USE_THREAD (0) // No threaded loading. +// http://computation.llnl.gov/projects/floating-point-compression +#endif + +#ifndef TINYEXR_USE_OPENMP +#ifdef _OPENMP +#define TINYEXR_USE_OPENMP (1) +#else +#define TINYEXR_USE_OPENMP (0) +#endif +#endif + +#define TINYEXR_SUCCESS (0) +#define TINYEXR_ERROR_INVALID_MAGIC_NUMBER (-1) +#define TINYEXR_ERROR_INVALID_EXR_VERSION (-2) +#define TINYEXR_ERROR_INVALID_ARGUMENT (-3) +#define TINYEXR_ERROR_INVALID_DATA (-4) +#define TINYEXR_ERROR_INVALID_FILE (-5) +#define TINYEXR_ERROR_INVALID_PARAMETER (-6) +#define TINYEXR_ERROR_CANT_OPEN_FILE (-7) +#define TINYEXR_ERROR_UNSUPPORTED_FORMAT (-8) +#define TINYEXR_ERROR_INVALID_HEADER (-9) +#define TINYEXR_ERROR_UNSUPPORTED_FEATURE (-10) +#define TINYEXR_ERROR_CANT_WRITE_FILE (-11) +#define TINYEXR_ERROR_SERIALIZATION_FAILED (-12) +#define TINYEXR_ERROR_LAYER_NOT_FOUND (-13) +#define TINYEXR_ERROR_DATA_TOO_LARGE (-14) + +// @note { OpenEXR file format: http://www.openexr.com/openexrfilelayout.pdf } + +// pixel type: possible values are: UINT = 0 HALF = 1 FLOAT = 2 +#define TINYEXR_PIXELTYPE_UINT (0) +#define TINYEXR_PIXELTYPE_HALF (1) +#define TINYEXR_PIXELTYPE_FLOAT (2) + +#define TINYEXR_MAX_HEADER_ATTRIBUTES (1024) +#define TINYEXR_MAX_CUSTOM_ATTRIBUTES (128) + +#define TINYEXR_COMPRESSIONTYPE_NONE (0) +#define TINYEXR_COMPRESSIONTYPE_RLE (1) +#define TINYEXR_COMPRESSIONTYPE_ZIPS (2) +#define TINYEXR_COMPRESSIONTYPE_ZIP (3) +#define TINYEXR_COMPRESSIONTYPE_PIZ (4) +#define TINYEXR_COMPRESSIONTYPE_ZFP (128) // TinyEXR extension + +#define TINYEXR_ZFP_COMPRESSIONTYPE_RATE (0) +#define TINYEXR_ZFP_COMPRESSIONTYPE_PRECISION (1) +#define TINYEXR_ZFP_COMPRESSIONTYPE_ACCURACY (2) + +#define TINYEXR_TILE_ONE_LEVEL (0) +#define TINYEXR_TILE_MIPMAP_LEVELS (1) +#define TINYEXR_TILE_RIPMAP_LEVELS (2) + +#define TINYEXR_TILE_ROUND_DOWN (0) +#define TINYEXR_TILE_ROUND_UP (1) + +typedef struct TEXRVersion { + int version; // this must be 2 + // tile format image; + // not zero for only a single-part "normal" tiled file (according to spec.) + int tiled; + int long_name; // long name attribute + // deep image(EXR 2.0); + // for a multi-part file, indicates that at least one part is of type deep* (according to spec.) + int non_image; + int multipart; // multi-part(EXR 2.0) +} EXRVersion; + +typedef struct TEXRAttribute { + char name[256]; // name and type are up to 255 chars long. + char type[256]; + unsigned char *value; // uint8_t* + int size; + int pad0; +} EXRAttribute; + +typedef struct TEXRChannelInfo { + char name[256]; // less than 255 bytes long + int pixel_type; + int x_sampling; + int y_sampling; + unsigned char p_linear; + unsigned char pad[3]; +} EXRChannelInfo; + +typedef struct TEXRTile { + int offset_x; + int offset_y; + int level_x; + int level_y; + + int width; // actual width in a tile. + int height; // actual height int a tile. + + unsigned char **images; // image[channels][pixels] +} EXRTile; + +typedef struct TEXRBox2i { + int min_x; + int min_y; + int max_x; + int max_y; +} EXRBox2i; + +typedef struct TEXRHeader { + float pixel_aspect_ratio; + int line_order; + EXRBox2i data_window; + EXRBox2i display_window; + float screen_window_center[2]; + float screen_window_width; + + int chunk_count; + + // Properties for tiled format(`tiledesc`). + int tiled; + int tile_size_x; + int tile_size_y; + int tile_level_mode; + int tile_rounding_mode; + + int long_name; + // for a single-part file, agree with the version field bit 11 + // for a multi-part file, it is consistent with the type of part + int non_image; + int multipart; + unsigned int header_len; + + // Custom attributes(exludes required attributes(e.g. `channels`, + // `compression`, etc) + int num_custom_attributes; + EXRAttribute *custom_attributes; // array of EXRAttribute. size = + // `num_custom_attributes`. + + EXRChannelInfo *channels; // [num_channels] + + int *pixel_types; // Loaded pixel type(TINYEXR_PIXELTYPE_*) of `images` for + // each channel. This is overwritten with `requested_pixel_types` when + // loading. + int num_channels; + + int compression_type; // compression type(TINYEXR_COMPRESSIONTYPE_*) + int *requested_pixel_types; // Filled initially by + // ParseEXRHeaderFrom(Meomory|File), then users + // can edit it(only valid for HALF pixel type + // channel) + // name attribute required for multipart files; + // must be unique and non empty (according to spec.); + // use EXRSetNameAttr for setting value; + // max 255 character allowed - excluding terminating zero + char name[256]; +} EXRHeader; + +typedef struct TEXRMultiPartHeader { + int num_headers; + EXRHeader *headers; + +} EXRMultiPartHeader; + +typedef struct TEXRImage { + EXRTile *tiles; // Tiled pixel data. The application must reconstruct image + // from tiles manually. NULL if scanline format. + struct TEXRImage* next_level; // NULL if scanline format or image is the last level. + int level_x; // x level index + int level_y; // y level index + + unsigned char **images; // image[channels][pixels]. NULL if tiled format. + + int width; + int height; + int num_channels; + + // Properties for tile format. + int num_tiles; + +} EXRImage; + +typedef struct TEXRMultiPartImage { + int num_images; + EXRImage *images; + +} EXRMultiPartImage; + +typedef struct TDeepImage { + const char **channel_names; + float ***image; // image[channels][scanlines][samples] + int **offset_table; // offset_table[scanline][offsets] + int num_channels; + int width; + int height; + int pad0; +} DeepImage; + +// @deprecated { For backward compatibility. Not recommended to use. } +// Loads single-frame OpenEXR image. Assume EXR image contains A(single channel +// alpha) or RGB(A) channels. +// Application must free image data as returned by `out_rgba` +// Result image format is: float x RGBA x width x hight +// Returns negative value and may set error string in `err` when there's an +// error +extern int LoadEXR(float **out_rgba, int *width, int *height, + const char *filename, const char **err); + +// Loads single-frame OpenEXR image by specifying layer name. Assume EXR image +// contains A(single channel alpha) or RGB(A) channels. Application must free +// image data as returned by `out_rgba` Result image format is: float x RGBA x +// width x hight Returns negative value and may set error string in `err` when +// there's an error When the specified layer name is not found in the EXR file, +// the function will return `TINYEXR_ERROR_LAYER_NOT_FOUND`. +extern int LoadEXRWithLayer(float **out_rgba, int *width, int *height, + const char *filename, const char *layer_name, + const char **err, int *num_chans = NULL); + +// +// Get layer infos from EXR file. +// +// @param[out] layer_names List of layer names. Application must free memory +// after using this. +// @param[out] num_layers The number of layers +// @param[out] err Error string(will be filled when the function returns error +// code). Free it using FreeEXRErrorMessage after using this value. +// +// @return TINYEXR_SUCCEES upon success. +// +extern int EXRLayers(const char *filename, const char **layer_names[], + int *num_layers, const char **err); + +// @deprecated +// Simple wrapper API for ParseEXRHeaderFromFile. +// checking given file is a EXR file(by just look up header) +// @return TINYEXR_SUCCEES for EXR image, TINYEXR_ERROR_INVALID_HEADER for +// others +extern int IsEXR(const char *filename); + +// Simple wrapper API for ParseEXRHeaderFromMemory. +// Check if given data is a EXR image(by just looking up a header section) +// @return TINYEXR_SUCCEES for EXR image, TINYEXR_ERROR_INVALID_HEADER for +// others +extern int IsEXRFromMemory(const unsigned char *memory, size_t size); + +// @deprecated +// Saves single-frame OpenEXR image to a buffer. Assume EXR image contains RGB(A) channels. +// components must be 1(Grayscale), 3(RGB) or 4(RGBA). +// Input image format is: `float x width x height`, or `float x RGB(A) x width x +// hight` +// Save image as fp16(HALF) format when `save_as_fp16` is positive non-zero +// value. +// Save image as fp32(FLOAT) format when `save_as_fp16` is 0. +// Use ZIP compression by default. +// `buffer` is the pointer to write EXR data. +// Memory for `buffer` is allocated internally in SaveEXRToMemory. +// Returns the data size of EXR file when the value is positive(up to 2GB EXR data). +// Returns negative value and may set error string in `err` when there's an +// error +extern int SaveEXRToMemory(const float *data, const int width, const int height, + const int components, const int save_as_fp16, + const unsigned char **buffer, const char **err); + +// @deprecated { Not recommended, but handy to use. } +// Saves single-frame OpenEXR image to a buffer. Assume EXR image contains RGB(A) channels. +// components must be 1(Grayscale), 3(RGB) or 4(RGBA). +// Input image format is: `float x width x height`, or `float x RGB(A) x width x +// hight` +// Save image as fp16(HALF) format when `save_as_fp16` is positive non-zero +// value. +// Save image as fp32(FLOAT) format when `save_as_fp16` is 0. +// Use ZIP compression by default. +// Returns TINYEXR_SUCCEES(0) when success. +// Returns negative value and may set error string in `err` when there's an +// error +extern int SaveEXR(const float *data, const int width, const int height, + const int components, const int save_as_fp16, + const char *filename, const char **err); + +// Returns the number of resolution levels of the image (including the base) +extern int EXRNumLevels(const EXRImage* exr_image); + +// Initialize EXRHeader struct +extern void InitEXRHeader(EXRHeader *exr_header); + +// Set name attribute of EXRHeader struct (it makes a copy) +extern void EXRSetNameAttr(EXRHeader *exr_header, const char* name); + +// Initialize EXRImage struct +extern void InitEXRImage(EXRImage *exr_image); + +// Frees internal data of EXRHeader struct +extern int FreeEXRHeader(EXRHeader *exr_header); + +// Frees internal data of EXRImage struct +extern int FreeEXRImage(EXRImage *exr_image); + +// Frees error message +extern void FreeEXRErrorMessage(const char *msg); + +// Parse EXR version header of a file. +extern int ParseEXRVersionFromFile(EXRVersion *version, const char *filename); + +// Parse EXR version header from memory-mapped EXR data. +extern int ParseEXRVersionFromMemory(EXRVersion *version, + const unsigned char *memory, size_t size); + +// Parse single-part OpenEXR header from a file and initialize `EXRHeader`. +// When there was an error message, Application must free `err` with +// FreeEXRErrorMessage() +extern int ParseEXRHeaderFromFile(EXRHeader *header, const EXRVersion *version, + const char *filename, const char **err); + +// Parse single-part OpenEXR header from a memory and initialize `EXRHeader`. +// When there was an error message, Application must free `err` with +// FreeEXRErrorMessage() +extern int ParseEXRHeaderFromMemory(EXRHeader *header, + const EXRVersion *version, + const unsigned char *memory, size_t size, + const char **err); + +// Parse multi-part OpenEXR headers from a file and initialize `EXRHeader*` +// array. +// When there was an error message, Application must free `err` with +// FreeEXRErrorMessage() +extern int ParseEXRMultipartHeaderFromFile(EXRHeader ***headers, + int *num_headers, + const EXRVersion *version, + const char *filename, + const char **err); + +// Parse multi-part OpenEXR headers from a memory and initialize `EXRHeader*` +// array +// When there was an error message, Application must free `err` with +// FreeEXRErrorMessage() +extern int ParseEXRMultipartHeaderFromMemory(EXRHeader ***headers, + int *num_headers, + const EXRVersion *version, + const unsigned char *memory, + size_t size, const char **err); + +// Loads single-part OpenEXR image from a file. +// Application must setup `ParseEXRHeaderFromFile` before calling this function. +// Application can free EXRImage using `FreeEXRImage` +// Returns negative value and may set error string in `err` when there's an +// error +// When there was an error message, Application must free `err` with +// FreeEXRErrorMessage() +extern int LoadEXRImageFromFile(EXRImage *image, const EXRHeader *header, + const char *filename, const char **err); + +// Loads single-part OpenEXR image from a memory. +// Application must setup `EXRHeader` with +// `ParseEXRHeaderFromMemory` before calling this function. +// Application can free EXRImage using `FreeEXRImage` +// Returns negative value and may set error string in `err` when there's an +// error +// When there was an error message, Application must free `err` with +// FreeEXRErrorMessage() +extern int LoadEXRImageFromMemory(EXRImage *image, const EXRHeader *header, + const unsigned char *memory, + const size_t size, const char **err); + +// Loads multi-part OpenEXR image from a file. +// Application must setup `ParseEXRMultipartHeaderFromFile` before calling this +// function. +// Application can free EXRImage using `FreeEXRImage` +// Returns negative value and may set error string in `err` when there's an +// error +// When there was an error message, Application must free `err` with +// FreeEXRErrorMessage() +extern int LoadEXRMultipartImageFromFile(EXRImage *images, + const EXRHeader **headers, + unsigned int num_parts, + const char *filename, + const char **err); + +// Loads multi-part OpenEXR image from a memory. +// Application must setup `EXRHeader*` array with +// `ParseEXRMultipartHeaderFromMemory` before calling this function. +// Application can free EXRImage using `FreeEXRImage` +// Returns negative value and may set error string in `err` when there's an +// error +// When there was an error message, Application must free `err` with +// FreeEXRErrorMessage() +extern int LoadEXRMultipartImageFromMemory(EXRImage *images, + const EXRHeader **headers, + unsigned int num_parts, + const unsigned char *memory, + const size_t size, const char **err); + +// Saves multi-channel, single-frame OpenEXR image to a file. +// Returns negative value and may set error string in `err` when there's an +// error +// When there was an error message, Application must free `err` with +// FreeEXRErrorMessage() +extern int SaveEXRImageToFile(const EXRImage *image, + const EXRHeader *exr_header, const char *filename, + const char **err); + +// Saves multi-channel, single-frame OpenEXR image to a memory. +// Image is compressed using EXRImage.compression value. +// Return the number of bytes if success. +// Return zero and will set error string in `err` when there's an +// error. +// When there was an error message, Application must free `err` with +// FreeEXRErrorMessage() +extern size_t SaveEXRImageToMemory(const EXRImage *image, + const EXRHeader *exr_header, + unsigned char **memory, const char **err); + +// Saves multi-channel, multi-frame OpenEXR image to a memory. +// Image is compressed using EXRImage.compression value. +// File global attributes (eg. display_window) must be set in the first header. +// Returns negative value and may set error string in `err` when there's an +// error +// When there was an error message, Application must free `err` with +// FreeEXRErrorMessage() +extern int SaveEXRMultipartImageToFile(const EXRImage *images, + const EXRHeader **exr_headers, + unsigned int num_parts, + const char *filename, const char **err); + +// Saves multi-channel, multi-frame OpenEXR image to a memory. +// Image is compressed using EXRImage.compression value. +// File global attributes (eg. display_window) must be set in the first header. +// Return the number of bytes if success. +// Return zero and will set error string in `err` when there's an +// error. +// When there was an error message, Application must free `err` with +// FreeEXRErrorMessage() +extern size_t SaveEXRMultipartImageToMemory(const EXRImage *images, + const EXRHeader **exr_headers, + unsigned int num_parts, + unsigned char **memory, const char **err); +// Loads single-frame OpenEXR deep image. +// Application must free memory of variables in DeepImage(image, offset_table) +// Returns negative value and may set error string in `err` when there's an +// error +// When there was an error message, Application must free `err` with +// FreeEXRErrorMessage() +extern int LoadDeepEXR(DeepImage *out_image, const char *filename, + const char **err); + +// NOT YET IMPLEMENTED: +// Saves single-frame OpenEXR deep image. +// Returns negative value and may set error string in `err` when there's an +// error +// extern int SaveDeepEXR(const DeepImage *in_image, const char *filename, +// const char **err); + +// NOT YET IMPLEMENTED: +// Loads multi-part OpenEXR deep image. +// Application must free memory of variables in DeepImage(image, offset_table) +// extern int LoadMultiPartDeepEXR(DeepImage **out_image, int num_parts, const +// char *filename, +// const char **err); + +// For emscripten. +// Loads single-frame OpenEXR image from memory. Assume EXR image contains +// RGB(A) channels. +// Returns negative value and may set error string in `err` when there's an +// error +// When there was an error message, Application must free `err` with +// FreeEXRErrorMessage() +extern int LoadEXRFromMemory(float **out_rgba, int *width, int *height, + const unsigned char *memory, size_t size, + const char **err); + +#ifdef __cplusplus +} +#endif + +#endif // TINYEXR_H_ + +#ifdef TINYEXR_IMPLEMENTATION +#ifndef TINYEXR_IMPLEMENTATION_DEFINED +#define TINYEXR_IMPLEMENTATION_DEFINED + +#ifdef _WIN32 + +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif +#ifndef NOMINMAX +#define NOMINMAX +#endif +#include // for UTF-8 and memory-mapping + +#if !defined(WINAPI_FAMILY) || (WINAPI_FAMILY == WINAPI_FAMILY_DESKTOP_APP) +#define TINYEXR_USE_WIN32_MMAP (1) +#endif + +#elif defined(__linux__) || defined(__unix__) +#include // for open() +#include // for memory-mapping +#include // for stat +#include // for close() +#define TINYEXR_USE_POSIX_MMAP (1) +#endif + +#include +#include +#include +#include +#include + +//#include // debug + +#include +#include +#include +#include + +// https://stackoverflow.com/questions/5047971/how-do-i-check-for-c11-support +#if __cplusplus > 199711L || (defined(_MSC_VER) && _MSC_VER >= 1900) +#define TINYEXR_HAS_CXX11 (1) +// C++11 +#include + +#if TINYEXR_USE_THREAD +#include +#include +#endif + +#else // __cplusplus > 199711L +#define TINYEXR_HAS_CXX11 (0) +#endif // __cplusplus > 199711L + +#if TINYEXR_USE_OPENMP +#include +#endif + +#if defined(TINYEXR_USE_MINIZ) && (TINYEXR_USE_MINIZ==1) +#include "../basisu_miniz.h" +#else +// Issue #46. Please include your own zlib-compatible API header before +// including `tinyexr.h` +//#include "zlib.h" +#endif + +#if defined(TINYEXR_USE_NANOZLIB) && (TINYEXR_USE_NANOZLIB==1) +#define NANOZLIB_IMPLEMENTATION +#include "nanozlib.h" +#endif + +#if TINYEXR_USE_STB_ZLIB +// Since we don't know where a project has stb_image.h and stb_image_write.h +// and whether they are in the include path, we don't include them here, and +// instead declare the two relevant functions manually. +// from stb_image.h: +extern "C" int stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); +// from stb_image_write.h: +extern "C" unsigned char *stbi_zlib_compress(unsigned char *data, int data_len, int *out_len, int quality); +#endif + + +#if TINYEXR_USE_ZFP + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Weverything" +#endif + +#include "zfp.h" + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +#endif + +// cond: conditional expression +// msg: std::string +// err: std::string* +#define TINYEXR_CHECK_AND_RETURN_MSG(cond, msg, err) do { \ + if (!(cond)) { \ + if (!err) { \ + std::ostringstream ss_e; \ + ss_e << __func__ << "():" << __LINE__ << msg << "\n"; \ + (*err) += ss_e.str(); \ + } \ + return false;\ + } \ + } while(0) + +// no error message. +#define TINYEXR_CHECK_AND_RETURN_C(cond, retcode) do { \ + if (!(cond)) { \ + return retcode; \ + } \ + } while(0) + +namespace tinyexr { + +#if __cplusplus > 199711L +// C++11 +typedef uint64_t tinyexr_uint64; +typedef int64_t tinyexr_int64; +#else +// Although `long long` is not a standard type pre C++11, assume it is defined +// as a compiler's extension. +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wc++11-long-long" +#endif +typedef unsigned long long tinyexr_uint64; +typedef long long tinyexr_int64; +#ifdef __clang__ +#pragma clang diagnostic pop +#endif +#endif + +// static bool IsBigEndian(void) { +// union { +// unsigned int i; +// char c[4]; +// } bint = {0x01020304}; +// +// return bint.c[0] == 1; +//} + +static void SetErrorMessage(const std::string &msg, const char **err) { + if (err) { +#ifdef _WIN32 + (*err) = _strdup(msg.c_str()); +#else + (*err) = strdup(msg.c_str()); +#endif + } +} + +#if 0 +static void SetWarningMessage(const std::string &msg, const char **warn) { + if (warn) { +#ifdef _WIN32 + (*warn) = _strdup(msg.c_str()); +#else + (*warn) = strdup(msg.c_str()); +#endif + } +} +#endif + +static const int kEXRVersionSize = 8; + +static void cpy2(unsigned short *dst_val, const unsigned short *src_val) { + unsigned char *dst = reinterpret_cast(dst_val); + const unsigned char *src = reinterpret_cast(src_val); + + dst[0] = src[0]; + dst[1] = src[1]; +} + +static void swap2(unsigned short *val) { +#if TINYEXR_LITTLE_ENDIAN + (void)val; +#else + unsigned short tmp = *val; + unsigned char *dst = reinterpret_cast(val); + unsigned char *src = reinterpret_cast(&tmp); + + dst[0] = src[1]; + dst[1] = src[0]; +#endif +} + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-function" +#endif + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-function" +#endif +static void cpy4(int *dst_val, const int *src_val) { + unsigned char *dst = reinterpret_cast(dst_val); + const unsigned char *src = reinterpret_cast(src_val); + + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = src[3]; +} + +static void cpy4(unsigned int *dst_val, const unsigned int *src_val) { + unsigned char *dst = reinterpret_cast(dst_val); + const unsigned char *src = reinterpret_cast(src_val); + + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = src[3]; +} + +static void cpy4(float *dst_val, const float *src_val) { + unsigned char *dst = reinterpret_cast(dst_val); + const unsigned char *src = reinterpret_cast(src_val); + + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = src[3]; +} +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif + +static void swap4(unsigned int *val) { +#if TINYEXR_LITTLE_ENDIAN + (void)val; +#else + unsigned int tmp = *val; + unsigned char *dst = reinterpret_cast(val); + unsigned char *src = reinterpret_cast(&tmp); + + dst[0] = src[3]; + dst[1] = src[2]; + dst[2] = src[1]; + dst[3] = src[0]; +#endif +} + +static void swap4(int *val) { +#if TINYEXR_LITTLE_ENDIAN + (void)val; +#else + int tmp = *val; + unsigned char *dst = reinterpret_cast(val); + unsigned char *src = reinterpret_cast(&tmp); + + dst[0] = src[3]; + dst[1] = src[2]; + dst[2] = src[1]; + dst[3] = src[0]; +#endif +} + +static void swap4(float *val) { +#if TINYEXR_LITTLE_ENDIAN + (void)val; +#else + float tmp = *val; + unsigned char *dst = reinterpret_cast(val); + unsigned char *src = reinterpret_cast(&tmp); + + dst[0] = src[3]; + dst[1] = src[2]; + dst[2] = src[1]; + dst[3] = src[0]; +#endif +} + +#if 0 +static void cpy8(tinyexr::tinyexr_uint64 *dst_val, const tinyexr::tinyexr_uint64 *src_val) { + unsigned char *dst = reinterpret_cast(dst_val); + const unsigned char *src = reinterpret_cast(src_val); + + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = src[3]; + dst[4] = src[4]; + dst[5] = src[5]; + dst[6] = src[6]; + dst[7] = src[7]; +} +#endif + +static void swap8(tinyexr::tinyexr_uint64 *val) { +#if TINYEXR_LITTLE_ENDIAN + (void)val; +#else + tinyexr::tinyexr_uint64 tmp = (*val); + unsigned char *dst = reinterpret_cast(val); + unsigned char *src = reinterpret_cast(&tmp); + + dst[0] = src[7]; + dst[1] = src[6]; + dst[2] = src[5]; + dst[3] = src[4]; + dst[4] = src[3]; + dst[5] = src[2]; + dst[6] = src[1]; + dst[7] = src[0]; +#endif +} + +// https://gist.github.com/rygorous/2156668 +union FP32 { + unsigned int u; + float f; + struct { +#if TINYEXR_LITTLE_ENDIAN + unsigned int Mantissa : 23; + unsigned int Exponent : 8; + unsigned int Sign : 1; +#else + unsigned int Sign : 1; + unsigned int Exponent : 8; + unsigned int Mantissa : 23; +#endif + } s; +}; + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wpadded" +#endif + +union FP16 { + unsigned short u; + struct { +#if TINYEXR_LITTLE_ENDIAN + unsigned int Mantissa : 10; + unsigned int Exponent : 5; + unsigned int Sign : 1; +#else + unsigned int Sign : 1; + unsigned int Exponent : 5; + unsigned int Mantissa : 10; +#endif + } s; +}; + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +static FP32 half_to_float(FP16 h) { + static const FP32 magic = {113 << 23}; + static const unsigned int shifted_exp = 0x7c00 + << 13; // exponent mask after shift + FP32 o; + + o.u = (h.u & 0x7fffU) << 13U; // exponent/mantissa bits + unsigned int exp_ = shifted_exp & o.u; // just the exponent + o.u += (127 - 15) << 23; // exponent adjust + + // handle exponent special cases + if (exp_ == shifted_exp) // Inf/NaN? + o.u += (128 - 16) << 23; // extra exp adjust + else if (exp_ == 0) // Zero/Denormal? + { + o.u += 1 << 23; // extra exp adjust + o.f -= magic.f; // renormalize + } + + o.u |= (h.u & 0x8000U) << 16U; // sign bit + return o; +} + +static FP16 float_to_half_full(FP32 f) { + FP16 o = {0}; + + // Based on ISPC reference code (with minor modifications) + if (f.s.Exponent == 0) // Signed zero/denormal (which will underflow) + o.s.Exponent = 0; + else if (f.s.Exponent == 255) // Inf or NaN (all exponent bits set) + { + o.s.Exponent = 31; + o.s.Mantissa = f.s.Mantissa ? 0x200 : 0; // NaN->qNaN and Inf->Inf + } else // Normalized number + { + // Exponent unbias the single, then bias the halfp + int newexp = f.s.Exponent - 127 + 15; + if (newexp >= 31) // Overflow, return signed infinity + o.s.Exponent = 31; + else if (newexp <= 0) // Underflow + { + if ((14 - newexp) <= 24) // Mantissa might be non-zero + { + unsigned int mant = f.s.Mantissa | 0x800000; // Hidden 1 bit + o.s.Mantissa = mant >> (14 - newexp); + if ((mant >> (13 - newexp)) & 1) // Check for rounding + o.u++; // Round, might overflow into exp bit, but this is OK + } + } else { + o.s.Exponent = static_cast(newexp); + o.s.Mantissa = f.s.Mantissa >> 13; + if (f.s.Mantissa & 0x1000) // Check for rounding + o.u++; // Round, might overflow to inf, this is OK + } + } + + o.s.Sign = f.s.Sign; + return o; +} + +// NOTE: From OpenEXR code +// #define IMF_INCREASING_Y 0 +// #define IMF_DECREASING_Y 1 +// #define IMF_RAMDOM_Y 2 +// +// #define IMF_NO_COMPRESSION 0 +// #define IMF_RLE_COMPRESSION 1 +// #define IMF_ZIPS_COMPRESSION 2 +// #define IMF_ZIP_COMPRESSION 3 +// #define IMF_PIZ_COMPRESSION 4 +// #define IMF_PXR24_COMPRESSION 5 +// #define IMF_B44_COMPRESSION 6 +// #define IMF_B44A_COMPRESSION 7 + +#ifdef __clang__ +#pragma clang diagnostic push + +#if __has_warning("-Wzero-as-null-pointer-constant") +#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" +#endif + +#endif + +static const char *ReadString(std::string *s, const char *ptr, size_t len) { + // Read untile NULL(\0). + const char *p = ptr; + const char *q = ptr; + while ((size_t(q - ptr) < len) && (*q) != 0) { + q++; + } + + if (size_t(q - ptr) >= len) { + (*s).clear(); + return NULL; + } + + (*s) = std::string(p, q); + + return q + 1; // skip '\0' +} + +static bool ReadAttribute(std::string *name, std::string *type, + std::vector *data, size_t *marker_size, + const char *marker, size_t size) { + size_t name_len = strnlen(marker, size); + if (name_len == size) { + // String does not have a terminating character. + return false; + } + *name = std::string(marker, name_len); + + marker += name_len + 1; + size -= name_len + 1; + + size_t type_len = strnlen(marker, size); + if (type_len == size) { + return false; + } + *type = std::string(marker, type_len); + + marker += type_len + 1; + size -= type_len + 1; + + if (size < sizeof(uint32_t)) { + return false; + } + + uint32_t data_len; + memcpy(&data_len, marker, sizeof(uint32_t)); + tinyexr::swap4(reinterpret_cast(&data_len)); + + if (data_len == 0) { + if ((*type).compare("string") == 0) { + // Accept empty string attribute. + + marker += sizeof(uint32_t); + size -= sizeof(uint32_t); + + *marker_size = name_len + 1 + type_len + 1 + sizeof(uint32_t); + + data->resize(1); + (*data)[0] = '\0'; + + return true; + } else { + return false; + } + } + + marker += sizeof(uint32_t); + size -= sizeof(uint32_t); + + if (size < data_len) { + return false; + } + + data->resize(static_cast(data_len)); + memcpy(&data->at(0), marker, static_cast(data_len)); + + *marker_size = name_len + 1 + type_len + 1 + sizeof(uint32_t) + data_len; + return true; +} + +static void WriteAttributeToMemory(std::vector *out, + const char *name, const char *type, + const unsigned char *data, int len) { + out->insert(out->end(), name, name + strlen(name) + 1); + out->insert(out->end(), type, type + strlen(type) + 1); + + int outLen = len; + tinyexr::swap4(&outLen); + out->insert(out->end(), reinterpret_cast(&outLen), + reinterpret_cast(&outLen) + sizeof(int)); + out->insert(out->end(), data, data + len); +} + +typedef struct TChannelInfo { + std::string name; // less than 255 bytes long + int pixel_type; + int requested_pixel_type; + int x_sampling; + int y_sampling; + unsigned char p_linear; + unsigned char pad[3]; +} ChannelInfo; + +typedef struct { + int min_x; + int min_y; + int max_x; + int max_y; +} Box2iInfo; + +struct HeaderInfo { + std::vector channels; + std::vector attributes; + + Box2iInfo data_window; + int line_order; + Box2iInfo display_window; + float screen_window_center[2]; + float screen_window_width; + float pixel_aspect_ratio; + + int chunk_count; + + // Tiled format + int tiled; // Non-zero if the part is tiled. + int tile_size_x; + int tile_size_y; + int tile_level_mode; + int tile_rounding_mode; + + unsigned int header_len; + + int compression_type; + + // required for multi-part or non-image files + std::string name; + // required for multi-part or non-image files + std::string type; + + void clear() { + channels.clear(); + attributes.clear(); + + data_window.min_x = 0; + data_window.min_y = 0; + data_window.max_x = 0; + data_window.max_y = 0; + line_order = 0; + display_window.min_x = 0; + display_window.min_y = 0; + display_window.max_x = 0; + display_window.max_y = 0; + screen_window_center[0] = 0.0f; + screen_window_center[1] = 0.0f; + screen_window_width = 0.0f; + pixel_aspect_ratio = 0.0f; + + chunk_count = 0; + + // Tiled format + tiled = 0; + tile_size_x = 0; + tile_size_y = 0; + tile_level_mode = 0; + tile_rounding_mode = 0; + + header_len = 0; + compression_type = 0; + + name.clear(); + type.clear(); + } +}; + +static bool ReadChannelInfo(std::vector &channels, + const std::vector &data) { + const char *p = reinterpret_cast(&data.at(0)); + + for (;;) { + if ((*p) == 0) { + break; + } + ChannelInfo info; + info.requested_pixel_type = 0; + + tinyexr_int64 data_len = static_cast(data.size()) - + (p - reinterpret_cast(data.data())); + if (data_len < 0) { + return false; + } + + p = ReadString(&info.name, p, size_t(data_len)); + if ((p == NULL) && (info.name.empty())) { + // Buffer overrun. Issue #51. + return false; + } + + const unsigned char *data_end = + reinterpret_cast(p) + 16; + if (data_end >= (data.data() + data.size())) { + return false; + } + + memcpy(&info.pixel_type, p, sizeof(int)); + p += 4; + info.p_linear = static_cast(p[0]); // uchar + p += 1 + 3; // reserved: uchar[3] + memcpy(&info.x_sampling, p, sizeof(int)); // int + p += 4; + memcpy(&info.y_sampling, p, sizeof(int)); // int + p += 4; + + tinyexr::swap4(&info.pixel_type); + tinyexr::swap4(&info.x_sampling); + tinyexr::swap4(&info.y_sampling); + + channels.push_back(info); + } + + return true; +} + +static void WriteChannelInfo(std::vector &data, + const std::vector &channels) { + size_t sz = 0; + + // Calculate total size. + for (size_t c = 0; c < channels.size(); c++) { + sz += channels[c].name.length() + 1; // +1 for \0 + sz += 16; // 4 * int + } + data.resize(sz + 1); + + unsigned char *p = &data.at(0); + + for (size_t c = 0; c < channels.size(); c++) { + memcpy(p, channels[c].name.c_str(), channels[c].name.length()); + p += channels[c].name.length(); + (*p) = '\0'; + p++; + + int pixel_type = channels[c].requested_pixel_type; + int x_sampling = channels[c].x_sampling; + int y_sampling = channels[c].y_sampling; + tinyexr::swap4(&pixel_type); + tinyexr::swap4(&x_sampling); + tinyexr::swap4(&y_sampling); + + memcpy(p, &pixel_type, sizeof(int)); + p += sizeof(int); + + (*p) = channels[c].p_linear; + p += 4; + + memcpy(p, &x_sampling, sizeof(int)); + p += sizeof(int); + + memcpy(p, &y_sampling, sizeof(int)); + p += sizeof(int); + } + + (*p) = '\0'; +} + +static bool CompressZip(unsigned char *dst, + tinyexr::tinyexr_uint64 &compressedSize, + const unsigned char *src, unsigned long src_size) { + std::vector tmpBuf(src_size); + + // + // Apply EXR-specific? postprocess. Grabbed from OpenEXR's + // ImfZipCompressor.cpp + // + + // + // Reorder the pixel data. + // + + const char *srcPtr = reinterpret_cast(src); + + { + char *t1 = reinterpret_cast(&tmpBuf.at(0)); + char *t2 = reinterpret_cast(&tmpBuf.at(0)) + (src_size + 1) / 2; + const char *stop = srcPtr + src_size; + + for (;;) { + if (srcPtr < stop) + *(t1++) = *(srcPtr++); + else + break; + + if (srcPtr < stop) + *(t2++) = *(srcPtr++); + else + break; + } + } + + // + // Predictor. + // + + { + unsigned char *t = &tmpBuf.at(0) + 1; + unsigned char *stop = &tmpBuf.at(0) + src_size; + int p = t[-1]; + + while (t < stop) { + int d = int(t[0]) - p + (128 + 256); + p = t[0]; + t[0] = static_cast(d); + ++t; + } + } + +#if defined(TINYEXR_USE_MINIZ) && (TINYEXR_USE_MINIZ==1) + // + // Compress the data using miniz + // + + buminiz::mz_ulong outSize = buminiz::mz_compressBound(src_size); + int ret = buminiz::mz_compress( + dst, &outSize, static_cast(&tmpBuf.at(0)), + src_size); + if (ret != buminiz::MZ_OK) { + return false; + } + + compressedSize = outSize; +#elif defined(TINYEXR_USE_STB_ZLIB) && (TINYEXR_USE_STB_ZLIB==1) + int outSize; + unsigned char* ret = stbi_zlib_compress(const_cast(&tmpBuf.at(0)), src_size, &outSize, 8); + if (!ret) { + return false; + } + memcpy(dst, ret, outSize); + free(ret); + + compressedSize = outSize; +#elif defined(TINYEXR_USE_NANOZLIB) && (TINYEXR_USE_NANOZLIB==1) + uint64_t dstSize = nanoz_compressBound(static_cast(src_size)); + int outSize{0}; + unsigned char *ret = nanoz_compress(&tmpBuf.at(0), src_size, &outSize, /* quality */8); + if (!ret) { + return false; + } + + memcpy(dst, ret, outSize); + free(ret); + + compressedSize = outSize; +#else + uLong outSize = compressBound(static_cast(src_size)); + int ret = compress(dst, &outSize, static_cast(&tmpBuf.at(0)), + src_size); + if (ret != Z_OK) { + return false; + } + + compressedSize = outSize; +#endif + + // Use uncompressed data when compressed data is larger than uncompressed. + // (Issue 40) + if (compressedSize >= src_size) { + compressedSize = src_size; + memcpy(dst, src, src_size); + } + + return true; +} + +static bool DecompressZip(unsigned char *dst, + unsigned long *uncompressed_size /* inout */, + const unsigned char *src, unsigned long src_size) { + if ((*uncompressed_size) == src_size) { + // Data is not compressed(Issue 40). + memcpy(dst, src, src_size); + return true; + } + std::vector tmpBuf(*uncompressed_size); + +#if defined(TINYEXR_USE_MINIZ) && (TINYEXR_USE_MINIZ==1) + int ret = + buminiz::mz_uncompress(&tmpBuf.at(0), uncompressed_size, src, src_size); + if (buminiz::MZ_OK != ret) { + return false; + } +#elif TINYEXR_USE_STB_ZLIB + int ret = stbi_zlib_decode_buffer(reinterpret_cast(&tmpBuf.at(0)), + *uncompressed_size, reinterpret_cast(src), src_size); + if (ret < 0) { + return false; + } +#elif defined(TINYEXR_USE_NANOZLIB) && (TINYEXR_USE_NANOZLIB==1) + uint64_t dest_size = (*uncompressed_size); + uint64_t uncomp_size{0}; + nanoz_status_t ret = + nanoz_uncompress(src, src_size, dest_size, &tmpBuf.at(0), &uncomp_size); + if (NANOZ_SUCCESS != ret) { + return false; + } + if ((*uncompressed_size) != uncomp_size) { + return false; + } +#else + int ret = uncompress(&tmpBuf.at(0), uncompressed_size, src, src_size); + if (Z_OK != ret) { + return false; + } +#endif + + // + // Apply EXR-specific? postprocess. Grabbed from OpenEXR's + // ImfZipCompressor.cpp + // + + // Predictor. + { + unsigned char *t = &tmpBuf.at(0) + 1; + unsigned char *stop = &tmpBuf.at(0) + (*uncompressed_size); + + while (t < stop) { + int d = int(t[-1]) + int(t[0]) - 128; + t[0] = static_cast(d); + ++t; + } + } + + // Reorder the pixel data. + { + const char *t1 = reinterpret_cast(&tmpBuf.at(0)); + const char *t2 = reinterpret_cast(&tmpBuf.at(0)) + + (*uncompressed_size + 1) / 2; + char *s = reinterpret_cast(dst); + char *stop = s + (*uncompressed_size); + + for (;;) { + if (s < stop) + *(s++) = *(t1++); + else + break; + + if (s < stop) + *(s++) = *(t2++); + else + break; + } + } + + return true; +} + +// RLE code from OpenEXR -------------------------------------- + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wsign-conversion" +#if __has_warning("-Wextra-semi-stmt") +#pragma clang diagnostic ignored "-Wextra-semi-stmt" +#endif +#endif + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4204) // nonstandard extension used : non-constant + // aggregate initializer (also supported by GNU + // C and C99, so no big deal) +#pragma warning(disable : 4244) // 'initializing': conversion from '__int64' to + // 'int', possible loss of data +#pragma warning(disable : 4267) // 'argument': conversion from '__int64' to + // 'int', possible loss of data +#pragma warning(disable : 4996) // 'strdup': The POSIX name for this item is + // deprecated. Instead, use the ISO C and C++ + // conformant name: _strdup. +#endif + +const int MIN_RUN_LENGTH = 3; +const int MAX_RUN_LENGTH = 127; + +// +// Compress an array of bytes, using run-length encoding, +// and return the length of the compressed data. +// + +static int rleCompress(int inLength, const char in[], signed char out[]) { + const char *inEnd = in + inLength; + const char *runStart = in; + const char *runEnd = in + 1; + signed char *outWrite = out; + + while (runStart < inEnd) { + while (runEnd < inEnd && *runStart == *runEnd && + runEnd - runStart - 1 < MAX_RUN_LENGTH) { + ++runEnd; + } + + if (runEnd - runStart >= MIN_RUN_LENGTH) { + // + // Compressible run + // + + *outWrite++ = static_cast(runEnd - runStart) - 1; + *outWrite++ = *(reinterpret_cast(runStart)); + runStart = runEnd; + } else { + // + // Uncompressable run + // + + while (runEnd < inEnd && + ((runEnd + 1 >= inEnd || *runEnd != *(runEnd + 1)) || + (runEnd + 2 >= inEnd || *(runEnd + 1) != *(runEnd + 2))) && + runEnd - runStart < MAX_RUN_LENGTH) { + ++runEnd; + } + + *outWrite++ = static_cast(runStart - runEnd); + + while (runStart < runEnd) { + *outWrite++ = *(reinterpret_cast(runStart++)); + } + } + + ++runEnd; + } + + return static_cast(outWrite - out); +} + +// +// Uncompress an array of bytes compressed with rleCompress(). +// Returns the length of the uncompressed data, or 0 if the +// length of the uncompressed data would be more than maxLength. +// + +static int rleUncompress(int inLength, int maxLength, const signed char in[], + char out[]) { + char *outStart = out; + + while (inLength > 0) { + if (*in < 0) { + int count = -(static_cast(*in++)); + inLength -= count + 1; + + // Fixes #116: Add bounds check to in buffer. + if ((0 > (maxLength -= count)) || (inLength < 0)) return 0; + + memcpy(out, in, count); + out += count; + in += count; + } else { + int count = *in++; + inLength -= 2; + + if ((0 > (maxLength -= count + 1)) || (inLength < 0)) return 0; + + memset(out, *reinterpret_cast(in), count + 1); + out += count + 1; + + in++; + } + } + + return static_cast(out - outStart); +} + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +// End of RLE code from OpenEXR ----------------------------------- + +static bool CompressRle(unsigned char *dst, + tinyexr::tinyexr_uint64 &compressedSize, + const unsigned char *src, unsigned long src_size) { + std::vector tmpBuf(src_size); + + // + // Apply EXR-specific? postprocess. Grabbed from OpenEXR's + // ImfRleCompressor.cpp + // + + // + // Reorder the pixel data. + // + + const char *srcPtr = reinterpret_cast(src); + + { + char *t1 = reinterpret_cast(&tmpBuf.at(0)); + char *t2 = reinterpret_cast(&tmpBuf.at(0)) + (src_size + 1) / 2; + const char *stop = srcPtr + src_size; + + for (;;) { + if (srcPtr < stop) + *(t1++) = *(srcPtr++); + else + break; + + if (srcPtr < stop) + *(t2++) = *(srcPtr++); + else + break; + } + } + + // + // Predictor. + // + + { + unsigned char *t = &tmpBuf.at(0) + 1; + unsigned char *stop = &tmpBuf.at(0) + src_size; + int p = t[-1]; + + while (t < stop) { + int d = int(t[0]) - p + (128 + 256); + p = t[0]; + t[0] = static_cast(d); + ++t; + } + } + + // outSize will be (srcSiz * 3) / 2 at max. + int outSize = rleCompress(static_cast(src_size), + reinterpret_cast(&tmpBuf.at(0)), + reinterpret_cast(dst)); + TINYEXR_CHECK_AND_RETURN_C(outSize > 0, false); + + compressedSize = static_cast(outSize); + + // Use uncompressed data when compressed data is larger than uncompressed. + // (Issue 40) + if (compressedSize >= src_size) { + compressedSize = src_size; + memcpy(dst, src, src_size); + } + + return true; +} + +static bool DecompressRle(unsigned char *dst, + const unsigned long uncompressed_size, + const unsigned char *src, unsigned long src_size) { + if (uncompressed_size == src_size) { + // Data is not compressed(Issue 40). + memcpy(dst, src, src_size); + return true; + } + + // Workaround for issue #112. + // TODO(syoyo): Add more robust out-of-bounds check in `rleUncompress`. + if (src_size <= 2) { + return false; + } + + std::vector tmpBuf(uncompressed_size); + + int ret = rleUncompress(static_cast(src_size), + static_cast(uncompressed_size), + reinterpret_cast(src), + reinterpret_cast(&tmpBuf.at(0))); + if (ret != static_cast(uncompressed_size)) { + return false; + } + + // + // Apply EXR-specific? postprocess. Grabbed from OpenEXR's + // ImfRleCompressor.cpp + // + + // Predictor. + { + unsigned char *t = &tmpBuf.at(0) + 1; + unsigned char *stop = &tmpBuf.at(0) + uncompressed_size; + + while (t < stop) { + int d = int(t[-1]) + int(t[0]) - 128; + t[0] = static_cast(d); + ++t; + } + } + + // Reorder the pixel data. + { + const char *t1 = reinterpret_cast(&tmpBuf.at(0)); + const char *t2 = reinterpret_cast(&tmpBuf.at(0)) + + (uncompressed_size + 1) / 2; + char *s = reinterpret_cast(dst); + char *stop = s + uncompressed_size; + + for (;;) { + if (s < stop) + *(s++) = *(t1++); + else + break; + + if (s < stop) + *(s++) = *(t2++); + else + break; + } + } + + return true; +} + +#if TINYEXR_USE_PIZ + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wc++11-long-long" +#pragma clang diagnostic ignored "-Wold-style-cast" +#pragma clang diagnostic ignored "-Wpadded" +#pragma clang diagnostic ignored "-Wsign-conversion" +#pragma clang diagnostic ignored "-Wc++11-extensions" +#pragma clang diagnostic ignored "-Wconversion" +#pragma clang diagnostic ignored "-Wc++98-compat-pedantic" + +#if __has_warning("-Wcast-qual") +#pragma clang diagnostic ignored "-Wcast-qual" +#endif + +#if __has_warning("-Wextra-semi-stmt") +#pragma clang diagnostic ignored "-Wextra-semi-stmt" +#endif + +#endif + +// +// PIZ compress/uncompress, based on OpenEXR's ImfPizCompressor.cpp +// +// ----------------------------------------------------------------- +// Copyright (c) 2004, Industrial Light & Magic, a division of Lucas +// Digital Ltd. LLC) +// (3 clause BSD license) +// + +struct PIZChannelData { + unsigned short *start; + unsigned short *end; + int nx; + int ny; + int ys; + int size; +}; + +//----------------------------------------------------------------------------- +// +// 16-bit Haar Wavelet encoding and decoding +// +// The source code in this file is derived from the encoding +// and decoding routines written by Christian Rouet for his +// PIZ image file format. +// +//----------------------------------------------------------------------------- + +// +// Wavelet basis functions without modulo arithmetic; they produce +// the best compression ratios when the wavelet-transformed data are +// Huffman-encoded, but the wavelet transform works only for 14-bit +// data (untransformed data values must be less than (1 << 14)). +// + +inline void wenc14(unsigned short a, unsigned short b, unsigned short &l, + unsigned short &h) { + short as = static_cast(a); + short bs = static_cast(b); + + short ms = (as + bs) >> 1; + short ds = as - bs; + + l = static_cast(ms); + h = static_cast(ds); +} + +inline void wdec14(unsigned short l, unsigned short h, unsigned short &a, + unsigned short &b) { + short ls = static_cast(l); + short hs = static_cast(h); + + int hi = hs; + int ai = ls + (hi & 1) + (hi >> 1); + + short as = static_cast(ai); + short bs = static_cast(ai - hi); + + a = static_cast(as); + b = static_cast(bs); +} + +// +// Wavelet basis functions with modulo arithmetic; they work with full +// 16-bit data, but Huffman-encoding the wavelet-transformed data doesn't +// compress the data quite as well. +// + +const int NBITS = 16; +const int A_OFFSET = 1 << (NBITS - 1); +const int M_OFFSET = 1 << (NBITS - 1); +const int MOD_MASK = (1 << NBITS) - 1; + +inline void wenc16(unsigned short a, unsigned short b, unsigned short &l, + unsigned short &h) { + int ao = (a + A_OFFSET) & MOD_MASK; + int m = ((ao + b) >> 1); + int d = ao - b; + + if (d < 0) m = (m + M_OFFSET) & MOD_MASK; + + d &= MOD_MASK; + + l = static_cast(m); + h = static_cast(d); +} + +inline void wdec16(unsigned short l, unsigned short h, unsigned short &a, + unsigned short &b) { + int m = l; + int d = h; + int bb = (m - (d >> 1)) & MOD_MASK; + int aa = (d + bb - A_OFFSET) & MOD_MASK; + b = static_cast(bb); + a = static_cast(aa); +} + +// +// 2D Wavelet encoding: +// + +static void wav2Encode( + unsigned short *in, // io: values are transformed in place + int nx, // i : x size + int ox, // i : x offset + int ny, // i : y size + int oy, // i : y offset + unsigned short mx) // i : maximum in[x][y] value +{ + bool w14 = (mx < (1 << 14)); + int n = (nx > ny) ? ny : nx; + int p = 1; // == 1 << level + int p2 = 2; // == 1 << (level+1) + + // + // Hierarchical loop on smaller dimension n + // + + while (p2 <= n) { + unsigned short *py = in; + unsigned short *ey = in + oy * (ny - p2); + int oy1 = oy * p; + int oy2 = oy * p2; + int ox1 = ox * p; + int ox2 = ox * p2; + unsigned short i00, i01, i10, i11; + + // + // Y loop + // + + for (; py <= ey; py += oy2) { + unsigned short *px = py; + unsigned short *ex = py + ox * (nx - p2); + + // + // X loop + // + + for (; px <= ex; px += ox2) { + unsigned short *p01 = px + ox1; + unsigned short *p10 = px + oy1; + unsigned short *p11 = p10 + ox1; + + // + // 2D wavelet encoding + // + + if (w14) { + wenc14(*px, *p01, i00, i01); + wenc14(*p10, *p11, i10, i11); + wenc14(i00, i10, *px, *p10); + wenc14(i01, i11, *p01, *p11); + } else { + wenc16(*px, *p01, i00, i01); + wenc16(*p10, *p11, i10, i11); + wenc16(i00, i10, *px, *p10); + wenc16(i01, i11, *p01, *p11); + } + } + + // + // Encode (1D) odd column (still in Y loop) + // + + if (nx & p) { + unsigned short *p10 = px + oy1; + + if (w14) + wenc14(*px, *p10, i00, *p10); + else + wenc16(*px, *p10, i00, *p10); + + *px = i00; + } + } + + // + // Encode (1D) odd line (must loop in X) + // + + if (ny & p) { + unsigned short *px = py; + unsigned short *ex = py + ox * (nx - p2); + + for (; px <= ex; px += ox2) { + unsigned short *p01 = px + ox1; + + if (w14) + wenc14(*px, *p01, i00, *p01); + else + wenc16(*px, *p01, i00, *p01); + + *px = i00; + } + } + + // + // Next level + // + + p = p2; + p2 <<= 1; + } +} + +// +// 2D Wavelet decoding: +// + +static void wav2Decode( + unsigned short *in, // io: values are transformed in place + int nx, // i : x size + int ox, // i : x offset + int ny, // i : y size + int oy, // i : y offset + unsigned short mx) // i : maximum in[x][y] value +{ + bool w14 = (mx < (1 << 14)); + int n = (nx > ny) ? ny : nx; + int p = 1; + int p2; + + // + // Search max level + // + + while (p <= n) p <<= 1; + + p >>= 1; + p2 = p; + p >>= 1; + + // + // Hierarchical loop on smaller dimension n + // + + while (p >= 1) { + unsigned short *py = in; + unsigned short *ey = in + oy * (ny - p2); + int oy1 = oy * p; + int oy2 = oy * p2; + int ox1 = ox * p; + int ox2 = ox * p2; + unsigned short i00, i01, i10, i11; + + // + // Y loop + // + + for (; py <= ey; py += oy2) { + unsigned short *px = py; + unsigned short *ex = py + ox * (nx - p2); + + // + // X loop + // + + for (; px <= ex; px += ox2) { + unsigned short *p01 = px + ox1; + unsigned short *p10 = px + oy1; + unsigned short *p11 = p10 + ox1; + + // + // 2D wavelet decoding + // + + if (w14) { + wdec14(*px, *p10, i00, i10); + wdec14(*p01, *p11, i01, i11); + wdec14(i00, i01, *px, *p01); + wdec14(i10, i11, *p10, *p11); + } else { + wdec16(*px, *p10, i00, i10); + wdec16(*p01, *p11, i01, i11); + wdec16(i00, i01, *px, *p01); + wdec16(i10, i11, *p10, *p11); + } + } + + // + // Decode (1D) odd column (still in Y loop) + // + + if (nx & p) { + unsigned short *p10 = px + oy1; + + if (w14) + wdec14(*px, *p10, i00, *p10); + else + wdec16(*px, *p10, i00, *p10); + + *px = i00; + } + } + + // + // Decode (1D) odd line (must loop in X) + // + + if (ny & p) { + unsigned short *px = py; + unsigned short *ex = py + ox * (nx - p2); + + for (; px <= ex; px += ox2) { + unsigned short *p01 = px + ox1; + + if (w14) + wdec14(*px, *p01, i00, *p01); + else + wdec16(*px, *p01, i00, *p01); + + *px = i00; + } + } + + // + // Next level + // + + p2 = p; + p >>= 1; + } +} + +//----------------------------------------------------------------------------- +// +// 16-bit Huffman compression and decompression. +// +// The source code in this file is derived from the 8-bit +// Huffman compression and decompression routines written +// by Christian Rouet for his PIZ image file format. +// +//----------------------------------------------------------------------------- + +// Adds some modification for tinyexr. + +const int HUF_ENCBITS = 16; // literal (value) bit length +const int HUF_DECBITS = 14; // decoding bit size (>= 8) + +const int HUF_ENCSIZE = (1 << HUF_ENCBITS) + 1; // encoding table size +const int HUF_DECSIZE = 1 << HUF_DECBITS; // decoding table size +const int HUF_DECMASK = HUF_DECSIZE - 1; + +struct HufDec { // short code long code + //------------------------------- + unsigned int len : 8; // code length 0 + unsigned int lit : 24; // lit p size + unsigned int *p; // 0 lits +}; + +inline long long hufLength(long long code) { return code & 63; } + +inline long long hufCode(long long code) { return code >> 6; } + +inline void outputBits(int nBits, long long bits, long long &c, int &lc, + char *&out) { + c <<= nBits; + lc += nBits; + + c |= bits; + + while (lc >= 8) *out++ = static_cast((c >> (lc -= 8))); +} + +inline long long getBits(int nBits, long long &c, int &lc, const char *&in) { + while (lc < nBits) { + c = (long long)((unsigned long long)c << 8) | *(reinterpret_cast(in++)); + lc += 8; + } + + lc -= nBits; + return (c >> lc) & ((1 << nBits) - 1); +} + +// +// ENCODING TABLE BUILDING & (UN)PACKING +// + +// +// Build a "canonical" Huffman code table: +// - for each (uncompressed) symbol, hcode contains the length +// of the corresponding code (in the compressed data) +// - canonical codes are computed and stored in hcode +// - the rules for constructing canonical codes are as follows: +// * shorter codes (if filled with zeroes to the right) +// have a numerically higher value than longer codes +// * for codes with the same length, numerical values +// increase with numerical symbol values +// - because the canonical code table can be constructed from +// symbol lengths alone, the code table can be transmitted +// without sending the actual code values +// - see http://www.compressconsult.com/huffman/ +// + +static void hufCanonicalCodeTable(long long hcode[HUF_ENCSIZE]) { + long long n[59]; + + // + // For each i from 0 through 58, count the + // number of different codes of length i, and + // store the count in n[i]. + // + + for (int i = 0; i <= 58; ++i) n[i] = 0; + + for (int i = 0; i < HUF_ENCSIZE; ++i) n[hcode[i]] += 1; + + // + // For each i from 58 through 1, compute the + // numerically lowest code with length i, and + // store that code in n[i]. + // + + long long c = 0; + + for (int i = 58; i > 0; --i) { + long long nc = ((c + n[i]) >> 1); + n[i] = c; + c = nc; + } + + // + // hcode[i] contains the length, l, of the + // code for symbol i. Assign the next available + // code of length l to the symbol and store both + // l and the code in hcode[i]. + // + + for (int i = 0; i < HUF_ENCSIZE; ++i) { + int l = static_cast(hcode[i]); + + if (l > 0) hcode[i] = l | (n[l]++ << 6); + } +} + +// +// Compute Huffman codes (based on frq input) and store them in frq: +// - code structure is : [63:lsb - 6:msb] | [5-0: bit length]; +// - max code length is 58 bits; +// - codes outside the range [im-iM] have a null length (unused values); +// - original frequencies are destroyed; +// - encoding tables are used by hufEncode() and hufBuildDecTable(); +// + +struct FHeapCompare { + bool operator()(long long *a, long long *b) { return *a > *b; } +}; + +static bool hufBuildEncTable( + long long *frq, // io: input frequencies [HUF_ENCSIZE], output table + int *im, // o: min frq index + int *iM) // o: max frq index +{ + // + // This function assumes that when it is called, array frq + // indicates the frequency of all possible symbols in the data + // that are to be Huffman-encoded. (frq[i] contains the number + // of occurrences of symbol i in the data.) + // + // The loop below does three things: + // + // 1) Finds the minimum and maximum indices that point + // to non-zero entries in frq: + // + // frq[im] != 0, and frq[i] == 0 for all i < im + // frq[iM] != 0, and frq[i] == 0 for all i > iM + // + // 2) Fills array fHeap with pointers to all non-zero + // entries in frq. + // + // 3) Initializes array hlink such that hlink[i] == i + // for all array entries. + // + + std::vector hlink(HUF_ENCSIZE); + std::vector fHeap(HUF_ENCSIZE); + + *im = 0; + + while (!frq[*im]) (*im)++; + + int nf = 0; + + for (int i = *im; i < HUF_ENCSIZE; i++) { + hlink[i] = i; + + if (frq[i]) { + fHeap[nf] = &frq[i]; + nf++; + *iM = i; + } + } + + // + // Add a pseudo-symbol, with a frequency count of 1, to frq; + // adjust the fHeap and hlink array accordingly. Function + // hufEncode() uses the pseudo-symbol for run-length encoding. + // + + (*iM)++; + frq[*iM] = 1; + fHeap[nf] = &frq[*iM]; + nf++; + + // + // Build an array, scode, such that scode[i] contains the number + // of bits assigned to symbol i. Conceptually this is done by + // constructing a tree whose leaves are the symbols with non-zero + // frequency: + // + // Make a heap that contains all symbols with a non-zero frequency, + // with the least frequent symbol on top. + // + // Repeat until only one symbol is left on the heap: + // + // Take the two least frequent symbols off the top of the heap. + // Create a new node that has first two nodes as children, and + // whose frequency is the sum of the frequencies of the first + // two nodes. Put the new node back into the heap. + // + // The last node left on the heap is the root of the tree. For each + // leaf node, the distance between the root and the leaf is the length + // of the code for the corresponding symbol. + // + // The loop below doesn't actually build the tree; instead we compute + // the distances of the leaves from the root on the fly. When a new + // node is added to the heap, then that node's descendants are linked + // into a single linear list that starts at the new node, and the code + // lengths of the descendants (that is, their distance from the root + // of the tree) are incremented by one. + // + + std::make_heap(&fHeap[0], &fHeap[nf], FHeapCompare()); + + std::vector scode(HUF_ENCSIZE); + memset(scode.data(), 0, sizeof(long long) * HUF_ENCSIZE); + + while (nf > 1) { + // + // Find the indices, mm and m, of the two smallest non-zero frq + // values in fHeap, add the smallest frq to the second-smallest + // frq, and remove the smallest frq value from fHeap. + // + + int mm = fHeap[0] - frq; + std::pop_heap(&fHeap[0], &fHeap[nf], FHeapCompare()); + --nf; + + int m = fHeap[0] - frq; + std::pop_heap(&fHeap[0], &fHeap[nf], FHeapCompare()); + + frq[m] += frq[mm]; + std::push_heap(&fHeap[0], &fHeap[nf], FHeapCompare()); + + // + // The entries in scode are linked into lists with the + // entries in hlink serving as "next" pointers and with + // the end of a list marked by hlink[j] == j. + // + // Traverse the lists that start at scode[m] and scode[mm]. + // For each element visited, increment the length of the + // corresponding code by one bit. (If we visit scode[j] + // during the traversal, then the code for symbol j becomes + // one bit longer.) + // + // Merge the lists that start at scode[m] and scode[mm] + // into a single list that starts at scode[m]. + // + + // + // Add a bit to all codes in the first list. + // + + for (int j = m;; j = hlink[j]) { + scode[j]++; + + TINYEXR_CHECK_AND_RETURN_C(scode[j] <= 58, false); + + if (hlink[j] == j) { + // + // Merge the two lists. + // + + hlink[j] = mm; + break; + } + } + + // + // Add a bit to all codes in the second list + // + + for (int j = mm;; j = hlink[j]) { + scode[j]++; + + TINYEXR_CHECK_AND_RETURN_C(scode[j] <= 58, false); + + if (hlink[j] == j) break; + } + } + + // + // Build a canonical Huffman code table, replacing the code + // lengths in scode with (code, code length) pairs. Copy the + // code table from scode into frq. + // + + hufCanonicalCodeTable(scode.data()); + memcpy(frq, scode.data(), sizeof(long long) * HUF_ENCSIZE); + + return true; +} + +// +// Pack an encoding table: +// - only code lengths, not actual codes, are stored +// - runs of zeroes are compressed as follows: +// +// unpacked packed +// -------------------------------- +// 1 zero 0 (6 bits) +// 2 zeroes 59 +// 3 zeroes 60 +// 4 zeroes 61 +// 5 zeroes 62 +// n zeroes (6 or more) 63 n-6 (6 + 8 bits) +// + +const int SHORT_ZEROCODE_RUN = 59; +const int LONG_ZEROCODE_RUN = 63; +const int SHORTEST_LONG_RUN = 2 + LONG_ZEROCODE_RUN - SHORT_ZEROCODE_RUN; +const int LONGEST_LONG_RUN = 255 + SHORTEST_LONG_RUN; + +static void hufPackEncTable( + const long long *hcode, // i : encoding table [HUF_ENCSIZE] + int im, // i : min hcode index + int iM, // i : max hcode index + char **pcode) // o: ptr to packed table (updated) +{ + char *p = *pcode; + long long c = 0; + int lc = 0; + + for (; im <= iM; im++) { + int l = hufLength(hcode[im]); + + if (l == 0) { + int zerun = 1; + + while ((im < iM) && (zerun < LONGEST_LONG_RUN)) { + if (hufLength(hcode[im + 1]) > 0) break; + im++; + zerun++; + } + + if (zerun >= 2) { + if (zerun >= SHORTEST_LONG_RUN) { + outputBits(6, LONG_ZEROCODE_RUN, c, lc, p); + outputBits(8, zerun - SHORTEST_LONG_RUN, c, lc, p); + } else { + outputBits(6, SHORT_ZEROCODE_RUN + zerun - 2, c, lc, p); + } + continue; + } + } + + outputBits(6, l, c, lc, p); + } + + if (lc > 0) *p++ = (unsigned char)(c << (8 - lc)); + + *pcode = p; +} + +// +// Unpack an encoding table packed by hufPackEncTable(): +// + +static bool hufUnpackEncTable( + const char **pcode, // io: ptr to packed table (updated) + int ni, // i : input size (in bytes) + int im, // i : min hcode index + int iM, // i : max hcode index + long long *hcode) // o: encoding table [HUF_ENCSIZE] +{ + memset(hcode, 0, sizeof(long long) * HUF_ENCSIZE); + + const char *p = *pcode; + long long c = 0; + int lc = 0; + + for (; im <= iM; im++) { + if (p - *pcode >= ni) { + return false; + } + + long long l = hcode[im] = getBits(6, c, lc, p); // code length + + if (l == (long long)LONG_ZEROCODE_RUN) { + if (p - *pcode > ni) { + return false; + } + + int zerun = getBits(8, c, lc, p) + SHORTEST_LONG_RUN; + + if (im + zerun > iM + 1) { + return false; + } + + while (zerun--) hcode[im++] = 0; + + im--; + } else if (l >= (long long)SHORT_ZEROCODE_RUN) { + int zerun = l - SHORT_ZEROCODE_RUN + 2; + + if (im + zerun > iM + 1) { + return false; + } + + while (zerun--) hcode[im++] = 0; + + im--; + } + } + + *pcode = const_cast(p); + + hufCanonicalCodeTable(hcode); + + return true; +} + +// +// DECODING TABLE BUILDING +// + +// +// Clear a newly allocated decoding table so that it contains only zeroes. +// + +static void hufClearDecTable(HufDec *hdecod) // io: (allocated by caller) +// decoding table [HUF_DECSIZE] +{ + for (int i = 0; i < HUF_DECSIZE; i++) { + hdecod[i].len = 0; + hdecod[i].lit = 0; + hdecod[i].p = NULL; + } + // memset(hdecod, 0, sizeof(HufDec) * HUF_DECSIZE); +} + +// +// Build a decoding hash table based on the encoding table hcode: +// - short codes (<= HUF_DECBITS) are resolved with a single table access; +// - long code entry allocations are not optimized, because long codes are +// unfrequent; +// - decoding tables are used by hufDecode(); +// + +static bool hufBuildDecTable(const long long *hcode, // i : encoding table + int im, // i : min index in hcode + int iM, // i : max index in hcode + HufDec *hdecod) // o: (allocated by caller) +// decoding table [HUF_DECSIZE] +{ + // + // Init hashtable & loop on all codes. + // Assumes that hufClearDecTable(hdecod) has already been called. + // + + for (; im <= iM; im++) { + long long c = hufCode(hcode[im]); + int l = hufLength(hcode[im]); + + if (c >> l) { + // + // Error: c is supposed to be an l-bit code, + // but c contains a value that is greater + // than the largest l-bit number. + // + + // invalidTableEntry(); + return false; + } + + if (l > HUF_DECBITS) { + // + // Long code: add a secondary entry + // + + HufDec *pl = hdecod + (c >> (l - HUF_DECBITS)); + + if (pl->len) { + // + // Error: a short code has already + // been stored in table entry *pl. + // + + // invalidTableEntry(); + return false; + } + + pl->lit++; + + if (pl->p) { + unsigned int *p = pl->p; + pl->p = new unsigned int[pl->lit]; + + for (unsigned int i = 0; i < pl->lit - 1u; ++i) pl->p[i] = p[i]; + + delete[] p; + } else { + pl->p = new unsigned int[1]; + } + + pl->p[pl->lit - 1] = im; + } else if (l) { + // + // Short code: init all primary entries + // + + HufDec *pl = hdecod + (c << (HUF_DECBITS - l)); + + for (long long i = 1ULL << (HUF_DECBITS - l); i > 0; i--, pl++) { + if (pl->len || pl->p) { + // + // Error: a short code or a long code has + // already been stored in table entry *pl. + // + + // invalidTableEntry(); + return false; + } + + pl->len = l; + pl->lit = im; + } + } + } + + return true; +} + +// +// Free the long code entries of a decoding table built by hufBuildDecTable() +// + +static void hufFreeDecTable(HufDec *hdecod) // io: Decoding table +{ + for (int i = 0; i < HUF_DECSIZE; i++) { + if (hdecod[i].p) { + delete[] hdecod[i].p; + hdecod[i].p = 0; + } + } +} + +// +// ENCODING +// + +inline void outputCode(long long code, long long &c, int &lc, char *&out) { + outputBits(hufLength(code), hufCode(code), c, lc, out); +} + +inline void sendCode(long long sCode, int runCount, long long runCode, + long long &c, int &lc, char *&out) { + // + // Output a run of runCount instances of the symbol sCount. + // Output the symbols explicitly, or if that is shorter, output + // the sCode symbol once followed by a runCode symbol and runCount + // expressed as an 8-bit number. + // + + if (hufLength(sCode) + hufLength(runCode) + 8 < hufLength(sCode) * runCount) { + outputCode(sCode, c, lc, out); + outputCode(runCode, c, lc, out); + outputBits(8, runCount, c, lc, out); + } else { + while (runCount-- >= 0) outputCode(sCode, c, lc, out); + } +} + +// +// Encode (compress) ni values based on the Huffman encoding table hcode: +// + +static int hufEncode // return: output size (in bits) + (const long long *hcode, // i : encoding table + const unsigned short *in, // i : uncompressed input buffer + const int ni, // i : input buffer size (in bytes) + int rlc, // i : rl code + char *out) // o: compressed output buffer +{ + char *outStart = out; + long long c = 0; // bits not yet written to out + int lc = 0; // number of valid bits in c (LSB) + int s = in[0]; + int cs = 0; + + // + // Loop on input values + // + + for (int i = 1; i < ni; i++) { + // + // Count same values or send code + // + + if (s == in[i] && cs < 255) { + cs++; + } else { + sendCode(hcode[s], cs, hcode[rlc], c, lc, out); + cs = 0; + } + + s = in[i]; + } + + // + // Send remaining code + // + + sendCode(hcode[s], cs, hcode[rlc], c, lc, out); + + if (lc) *out = (c << (8 - lc)) & 0xff; + + return (out - outStart) * 8 + lc; +} + +// +// DECODING +// + +// +// In order to force the compiler to inline them, +// getChar() and getCode() are implemented as macros +// instead of "inline" functions. +// + +#define getChar(c, lc, in) \ + { \ + c = ((unsigned long long)c << 8) | *(unsigned char *)(in++); \ + lc += 8; \ + } + +#if 0 +#define getCode(po, rlc, c, lc, in, out, ob, oe) \ + { \ + if (po == rlc) { \ + if (lc < 8) getChar(c, lc, in); \ + \ + lc -= 8; \ + \ + unsigned char cs = (c >> lc); \ + \ + if (out + cs > oe) return false; \ + \ + /* TinyEXR issue 78 */ \ + unsigned short s = out[-1]; \ + \ + while (cs-- > 0) *out++ = s; \ + } else if (out < oe) { \ + *out++ = po; \ + } else { \ + return false; \ + } \ + } +#else +static bool getCode(int po, int rlc, long long &c, int &lc, const char *&in, + const char *in_end, unsigned short *&out, + const unsigned short *ob, const unsigned short *oe) { + (void)ob; + if (po == rlc) { + if (lc < 8) { + /* TinyEXR issue 78 */ + /* TinyEXR issue 160. in + 1 -> in */ + if (in >= in_end) { + return false; + } + + getChar(c, lc, in); + } + + lc -= 8; + + unsigned char cs = (c >> lc); + + if (out + cs > oe) return false; + + // Bounds check for safety + // Issue 100. + if ((out - 1) < ob) return false; + unsigned short s = out[-1]; + + while (cs-- > 0) *out++ = s; + } else if (out < oe) { + *out++ = po; + } else { + return false; + } + return true; +} +#endif + +// +// Decode (uncompress) ni bits based on encoding & decoding tables: +// + +static bool hufDecode(const long long *hcode, // i : encoding table + const HufDec *hdecod, // i : decoding table + const char *in, // i : compressed input buffer + int ni, // i : input size (in bits) + int rlc, // i : run-length code + int no, // i : expected output size (in bytes) + unsigned short *out) // o: uncompressed output buffer +{ + long long c = 0; + int lc = 0; + unsigned short *outb = out; // begin + unsigned short *oe = out + no; // end + const char *ie = in + (ni + 7) / 8; // input byte size + + // + // Loop on input bytes + // + + while (in < ie) { + getChar(c, lc, in); + + // + // Access decoding table + // + + while (lc >= HUF_DECBITS) { + const HufDec pl = hdecod[(c >> (lc - HUF_DECBITS)) & HUF_DECMASK]; + + if (pl.len) { + // + // Get short code + // + + lc -= pl.len; + // std::cout << "lit = " << pl.lit << std::endl; + // std::cout << "rlc = " << rlc << std::endl; + // std::cout << "c = " << c << std::endl; + // std::cout << "lc = " << lc << std::endl; + // std::cout << "in = " << in << std::endl; + // std::cout << "out = " << out << std::endl; + // std::cout << "oe = " << oe << std::endl; + if (!getCode(pl.lit, rlc, c, lc, in, ie, out, outb, oe)) { + return false; + } + } else { + if (!pl.p) { + return false; + } + // invalidCode(); // wrong code + + // + // Search long code + // + + unsigned int j; + + for (j = 0; j < pl.lit; j++) { + int l = hufLength(hcode[pl.p[j]]); + + while (lc < l && in < ie) // get more bits + getChar(c, lc, in); + + if (lc >= l) { + if (hufCode(hcode[pl.p[j]]) == + ((c >> (lc - l)) & (((long long)(1) << l) - 1))) { + // + // Found : get long code + // + + lc -= l; + if (!getCode(pl.p[j], rlc, c, lc, in, ie, out, outb, oe)) { + return false; + } + break; + } + } + } + + if (j == pl.lit) { + return false; + // invalidCode(); // Not found + } + } + } + } + + // + // Get remaining (short) codes + // + + int i = (8 - ni) & 7; + c >>= i; + lc -= i; + + while (lc > 0) { + const HufDec pl = hdecod[((unsigned long long)c << (HUF_DECBITS - lc)) & HUF_DECMASK]; + + if (pl.len) { + lc -= pl.len; + if (!getCode(pl.lit, rlc, c, lc, in, ie, out, outb, oe)) { + return false; + } + } else { + return false; + // invalidCode(); // wrong (long) code + } + } + + if (out - outb != no) { + return false; + } + // notEnoughData (); + + return true; +} + +static void countFrequencies(std::vector &freq, + const unsigned short data[/*n*/], int n) { + for (int i = 0; i < HUF_ENCSIZE; ++i) freq[i] = 0; + + for (int i = 0; i < n; ++i) ++freq[data[i]]; +} + +static void writeUInt(char buf[4], unsigned int i) { + unsigned char *b = (unsigned char *)buf; + + b[0] = i; + b[1] = i >> 8; + b[2] = i >> 16; + b[3] = i >> 24; +} + +static unsigned int readUInt(const char buf[4]) { + const unsigned char *b = (const unsigned char *)buf; + + return (b[0] & 0x000000ff) | ((b[1] << 8) & 0x0000ff00) | + ((b[2] << 16) & 0x00ff0000) | ((b[3] << 24) & 0xff000000); +} + +// +// EXTERNAL INTERFACE +// + +static int hufCompress(const unsigned short raw[], int nRaw, + char compressed[]) { + if (nRaw == 0) return 0; + + std::vector freq(HUF_ENCSIZE); + + countFrequencies(freq, raw, nRaw); + + int im = 0; + int iM = 0; + hufBuildEncTable(freq.data(), &im, &iM); + + char *tableStart = compressed + 20; + char *tableEnd = tableStart; + hufPackEncTable(freq.data(), im, iM, &tableEnd); + int tableLength = tableEnd - tableStart; + + char *dataStart = tableEnd; + int nBits = hufEncode(freq.data(), raw, nRaw, iM, dataStart); + int data_length = (nBits + 7) / 8; + + writeUInt(compressed, im); + writeUInt(compressed + 4, iM); + writeUInt(compressed + 8, tableLength); + writeUInt(compressed + 12, nBits); + writeUInt(compressed + 16, 0); // room for future extensions + + return dataStart + data_length - compressed; +} + +static bool hufUncompress(const char compressed[], int nCompressed, + std::vector *raw) { + if (nCompressed == 0) { + if (raw->size() != 0) return false; + + return false; + } + + int im = readUInt(compressed); + int iM = readUInt(compressed + 4); + // int tableLength = readUInt (compressed + 8); + int nBits = readUInt(compressed + 12); + + if (im < 0 || im >= HUF_ENCSIZE || iM < 0 || iM >= HUF_ENCSIZE) return false; + + const char *ptr = compressed + 20; + + // + // Fast decoder needs at least 2x64-bits of compressed data, and + // needs to be run-able on this platform. Otherwise, fall back + // to the original decoder + // + + // if (FastHufDecoder::enabled() && nBits > 128) + //{ + // FastHufDecoder fhd (ptr, nCompressed - (ptr - compressed), im, iM, iM); + // fhd.decode ((unsigned char*)ptr, nBits, raw, nRaw); + //} + // else + { + std::vector freq(HUF_ENCSIZE); + std::vector hdec(HUF_DECSIZE); + + hufClearDecTable(&hdec.at(0)); + + hufUnpackEncTable(&ptr, nCompressed - (ptr - compressed), im, iM, + &freq.at(0)); + + { + if (nBits > 8 * (nCompressed - (ptr - compressed))) { + return false; + } + + hufBuildDecTable(&freq.at(0), im, iM, &hdec.at(0)); + hufDecode(&freq.at(0), &hdec.at(0), ptr, nBits, iM, raw->size(), + raw->data()); + } + // catch (...) + //{ + // hufFreeDecTable (hdec); + // throw; + //} + + hufFreeDecTable(&hdec.at(0)); + } + + return true; +} + +// +// Functions to compress the range of values in the pixel data +// + +const int USHORT_RANGE = (1 << 16); +const int BITMAP_SIZE = (USHORT_RANGE >> 3); + +static void bitmapFromData(const unsigned short data[/*nData*/], int nData, + unsigned char bitmap[BITMAP_SIZE], + unsigned short &minNonZero, + unsigned short &maxNonZero) { + for (int i = 0; i < BITMAP_SIZE; ++i) bitmap[i] = 0; + + for (int i = 0; i < nData; ++i) bitmap[data[i] >> 3] |= (1 << (data[i] & 7)); + + bitmap[0] &= ~1; // zero is not explicitly stored in + // the bitmap; we assume that the + // data always contain zeroes + minNonZero = BITMAP_SIZE - 1; + maxNonZero = 0; + + for (int i = 0; i < BITMAP_SIZE; ++i) { + if (bitmap[i]) { + if (minNonZero > i) minNonZero = i; + if (maxNonZero < i) maxNonZero = i; + } + } +} + +static unsigned short forwardLutFromBitmap( + const unsigned char bitmap[BITMAP_SIZE], unsigned short lut[USHORT_RANGE]) { + int k = 0; + + for (int i = 0; i < USHORT_RANGE; ++i) { + if ((i == 0) || (bitmap[i >> 3] & (1 << (i & 7)))) + lut[i] = k++; + else + lut[i] = 0; + } + + return k - 1; // maximum value stored in lut[], +} // i.e. number of ones in bitmap minus 1 + +static unsigned short reverseLutFromBitmap( + const unsigned char bitmap[BITMAP_SIZE], unsigned short lut[USHORT_RANGE]) { + int k = 0; + + for (int i = 0; i < USHORT_RANGE; ++i) { + if ((i == 0) || (bitmap[i >> 3] & (1 << (i & 7)))) lut[k++] = i; + } + + int n = k - 1; + + while (k < USHORT_RANGE) lut[k++] = 0; + + return n; // maximum k where lut[k] is non-zero, +} // i.e. number of ones in bitmap minus 1 + +static void applyLut(const unsigned short lut[USHORT_RANGE], + unsigned short data[/*nData*/], int nData) { + for (int i = 0; i < nData; ++i) data[i] = lut[data[i]]; +} + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif // __clang__ + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +static bool CompressPiz(unsigned char *outPtr, unsigned int *outSize, + const unsigned char *inPtr, size_t inSize, + const std::vector &channelInfo, + int data_width, int num_lines) { + std::vector bitmap(BITMAP_SIZE); + unsigned short minNonZero; + unsigned short maxNonZero; + +#if !TINYEXR_LITTLE_ENDIAN + // @todo { PIZ compression on BigEndian architecture. } + return false; +#endif + + // Assume `inSize` is multiple of 2 or 4. + std::vector tmpBuffer(inSize / sizeof(unsigned short)); + + std::vector channelData(channelInfo.size()); + unsigned short *tmpBufferEnd = &tmpBuffer.at(0); + + for (size_t c = 0; c < channelData.size(); c++) { + PIZChannelData &cd = channelData[c]; + + cd.start = tmpBufferEnd; + cd.end = cd.start; + + cd.nx = data_width; + cd.ny = num_lines; + // cd.ys = c.channel().ySampling; + + size_t pixelSize = sizeof(int); // UINT and FLOAT + if (channelInfo[c].requested_pixel_type == TINYEXR_PIXELTYPE_HALF) { + pixelSize = sizeof(short); + } + + cd.size = static_cast(pixelSize / sizeof(short)); + + tmpBufferEnd += cd.nx * cd.ny * cd.size; + } + + const unsigned char *ptr = inPtr; + for (int y = 0; y < num_lines; ++y) { + for (size_t i = 0; i < channelData.size(); ++i) { + PIZChannelData &cd = channelData[i]; + + // if (modp (y, cd.ys) != 0) + // continue; + + size_t n = static_cast(cd.nx * cd.size); + memcpy(cd.end, ptr, n * sizeof(unsigned short)); + ptr += n * sizeof(unsigned short); + cd.end += n; + } + } + + bitmapFromData(&tmpBuffer.at(0), static_cast(tmpBuffer.size()), + bitmap.data(), minNonZero, maxNonZero); + + std::vector lut(USHORT_RANGE); + unsigned short maxValue = forwardLutFromBitmap(bitmap.data(), lut.data()); + applyLut(lut.data(), &tmpBuffer.at(0), static_cast(tmpBuffer.size())); + + // + // Store range compression info in _outBuffer + // + + char *buf = reinterpret_cast(outPtr); + + memcpy(buf, &minNonZero, sizeof(unsigned short)); + buf += sizeof(unsigned short); + memcpy(buf, &maxNonZero, sizeof(unsigned short)); + buf += sizeof(unsigned short); + + if (minNonZero <= maxNonZero) { + memcpy(buf, reinterpret_cast(&bitmap[0] + minNonZero), + maxNonZero - minNonZero + 1); + buf += maxNonZero - minNonZero + 1; + } + + // + // Apply wavelet encoding + // + + for (size_t i = 0; i < channelData.size(); ++i) { + PIZChannelData &cd = channelData[i]; + + for (int j = 0; j < cd.size; ++j) { + wav2Encode(cd.start + j, cd.nx, cd.size, cd.ny, cd.nx * cd.size, + maxValue); + } + } + + // + // Apply Huffman encoding; append the result to _outBuffer + // + + // length header(4byte), then huff data. Initialize length header with zero, + // then later fill it by `length`. + char *lengthPtr = buf; + int zero = 0; + memcpy(buf, &zero, sizeof(int)); + buf += sizeof(int); + + int length = + hufCompress(&tmpBuffer.at(0), static_cast(tmpBuffer.size()), buf); + memcpy(lengthPtr, &length, sizeof(int)); + + (*outSize) = static_cast( + (reinterpret_cast(buf) - outPtr) + + static_cast(length)); + + // Use uncompressed data when compressed data is larger than uncompressed. + // (Issue 40) + if ((*outSize) >= inSize) { + (*outSize) = static_cast(inSize); + memcpy(outPtr, inPtr, inSize); + } + return true; +} + +static bool DecompressPiz(unsigned char *outPtr, const unsigned char *inPtr, + size_t tmpBufSizeInBytes, size_t inLen, int num_channels, + const EXRChannelInfo *channels, int data_width, + int num_lines) { + if (inLen == tmpBufSizeInBytes) { + // Data is not compressed(Issue 40). + memcpy(outPtr, inPtr, inLen); + return true; + } + + std::vector bitmap(BITMAP_SIZE); + unsigned short minNonZero; + unsigned short maxNonZero; + +#if !TINYEXR_LITTLE_ENDIAN + // @todo { PIZ compression on BigEndian architecture. } + return false; +#endif + + memset(bitmap.data(), 0, BITMAP_SIZE); + + if (inLen < 4) { + return false; + } + + size_t readLen = 0; + + const unsigned char *ptr = inPtr; + // minNonZero = *(reinterpret_cast(ptr)); + tinyexr::cpy2(&minNonZero, reinterpret_cast(ptr)); + // maxNonZero = *(reinterpret_cast(ptr + 2)); + tinyexr::cpy2(&maxNonZero, reinterpret_cast(ptr + 2)); + ptr += 4; + readLen += 4; + + if (maxNonZero >= BITMAP_SIZE) { + return false; + } + + //printf("maxNonZero = %d\n", maxNonZero); + //printf("minNonZero = %d\n", minNonZero); + //printf("len = %d\n", (maxNonZero - minNonZero + 1)); + //printf("BITMAPSIZE - min = %d\n", (BITMAP_SIZE - minNonZero)); + + if (minNonZero <= maxNonZero) { + if (((maxNonZero - minNonZero + 1) + readLen) > inLen) { + // Input too short + return false; + } + + memcpy(reinterpret_cast(&bitmap[0] + minNonZero), ptr, + maxNonZero - minNonZero + 1); + ptr += maxNonZero - minNonZero + 1; + readLen += maxNonZero - minNonZero + 1; + } else { + // Issue 194 + if ((minNonZero == (BITMAP_SIZE - 1)) && (maxNonZero == 0)) { + // OK. all pixels are zero. And no need to read `bitmap` data. + } else { + // invalid minNonZero/maxNonZero combination. + return false; + } + } + + std::vector lut(USHORT_RANGE); + memset(lut.data(), 0, sizeof(unsigned short) * USHORT_RANGE); + unsigned short maxValue = reverseLutFromBitmap(bitmap.data(), lut.data()); + + // + // Huffman decoding + // + + if ((readLen + 4) > inLen) { + return false; + } + + int length=0; + + // length = *(reinterpret_cast(ptr)); + tinyexr::cpy4(&length, reinterpret_cast(ptr)); + ptr += sizeof(int); + + if (size_t((ptr - inPtr) + length) > inLen) { + return false; + } + + std::vector tmpBuffer(tmpBufSizeInBytes / sizeof(unsigned short)); + hufUncompress(reinterpret_cast(ptr), length, &tmpBuffer); + + // + // Wavelet decoding + // + + std::vector channelData(static_cast(num_channels)); + + unsigned short *tmpBufferEnd = &tmpBuffer.at(0); + + for (size_t i = 0; i < static_cast(num_channels); ++i) { + const EXRChannelInfo &chan = channels[i]; + + size_t pixelSize = sizeof(int); // UINT and FLOAT + if (chan.pixel_type == TINYEXR_PIXELTYPE_HALF) { + pixelSize = sizeof(short); + } + + channelData[i].start = tmpBufferEnd; + channelData[i].end = channelData[i].start; + channelData[i].nx = data_width; + channelData[i].ny = num_lines; + // channelData[i].ys = 1; + channelData[i].size = static_cast(pixelSize / sizeof(short)); + + tmpBufferEnd += channelData[i].nx * channelData[i].ny * channelData[i].size; + } + + for (size_t i = 0; i < channelData.size(); ++i) { + PIZChannelData &cd = channelData[i]; + + for (int j = 0; j < cd.size; ++j) { + wav2Decode(cd.start + j, cd.nx, cd.size, cd.ny, cd.nx * cd.size, + maxValue); + } + } + + // + // Expand the pixel data to their original range + // + + applyLut(lut.data(), &tmpBuffer.at(0), static_cast(tmpBufSizeInBytes / sizeof(unsigned short))); + + for (int y = 0; y < num_lines; y++) { + for (size_t i = 0; i < channelData.size(); ++i) { + PIZChannelData &cd = channelData[i]; + + // if (modp (y, cd.ys) != 0) + // continue; + + size_t n = static_cast(cd.nx * cd.size); + memcpy(outPtr, cd.end, static_cast(n * sizeof(unsigned short))); + outPtr += n * sizeof(unsigned short); + cd.end += n; + } + } + + return true; +} +#endif // TINYEXR_USE_PIZ + +#if TINYEXR_USE_ZFP + +struct ZFPCompressionParam { + double rate; + unsigned int precision; + unsigned int __pad0; + double tolerance; + int type; // TINYEXR_ZFP_COMPRESSIONTYPE_* + unsigned int __pad1; + + ZFPCompressionParam() { + type = TINYEXR_ZFP_COMPRESSIONTYPE_RATE; + rate = 2.0; + precision = 0; + tolerance = 0.0; + } +}; + +static bool FindZFPCompressionParam(ZFPCompressionParam *param, + const EXRAttribute *attributes, + int num_attributes, std::string *err) { + bool foundType = false; + + for (int i = 0; i < num_attributes; i++) { + if ((strcmp(attributes[i].name, "zfpCompressionType") == 0)) { + if (attributes[i].size == 1) { + param->type = static_cast(attributes[i].value[0]); + foundType = true; + break; + } else { + if (err) { + (*err) += + "zfpCompressionType attribute must be uchar(1 byte) type.\n"; + } + return false; + } + } + } + + if (!foundType) { + if (err) { + (*err) += "`zfpCompressionType` attribute not found.\n"; + } + return false; + } + + if (param->type == TINYEXR_ZFP_COMPRESSIONTYPE_RATE) { + for (int i = 0; i < num_attributes; i++) { + if ((strcmp(attributes[i].name, "zfpCompressionRate") == 0) && + (attributes[i].size == 8)) { + param->rate = *(reinterpret_cast(attributes[i].value)); + return true; + } + } + + if (err) { + (*err) += "`zfpCompressionRate` attribute not found.\n"; + } + + } else if (param->type == TINYEXR_ZFP_COMPRESSIONTYPE_PRECISION) { + for (int i = 0; i < num_attributes; i++) { + if ((strcmp(attributes[i].name, "zfpCompressionPrecision") == 0) && + (attributes[i].size == 4)) { + param->rate = *(reinterpret_cast(attributes[i].value)); + return true; + } + } + + if (err) { + (*err) += "`zfpCompressionPrecision` attribute not found.\n"; + } + + } else if (param->type == TINYEXR_ZFP_COMPRESSIONTYPE_ACCURACY) { + for (int i = 0; i < num_attributes; i++) { + if ((strcmp(attributes[i].name, "zfpCompressionTolerance") == 0) && + (attributes[i].size == 8)) { + param->tolerance = *(reinterpret_cast(attributes[i].value)); + return true; + } + } + + if (err) { + (*err) += "`zfpCompressionTolerance` attribute not found.\n"; + } + } else { + if (err) { + (*err) += "Unknown value specified for `zfpCompressionType`.\n"; + } + } + + return false; +} + +// Assume pixel format is FLOAT for all channels. +static bool DecompressZfp(float *dst, int dst_width, int dst_num_lines, + size_t num_channels, const unsigned char *src, + unsigned long src_size, + const ZFPCompressionParam ¶m) { + size_t uncompressed_size = + size_t(dst_width) * size_t(dst_num_lines) * num_channels; + + if (uncompressed_size == src_size) { + // Data is not compressed(Issue 40). + memcpy(dst, src, src_size); + } + + zfp_stream *zfp = NULL; + zfp_field *field = NULL; + + TINYEXR_CHECK_AND_RETURN_C((dst_width % 4) == 0, false); + TINYEXR_CHECK_AND_RETURN_C((dst_num_lines % 4) == 0, false); + + if ((size_t(dst_width) & 3U) || (size_t(dst_num_lines) & 3U)) { + return false; + } + + field = + zfp_field_2d(reinterpret_cast(const_cast(src)), + zfp_type_float, static_cast(dst_width), + static_cast(dst_num_lines) * + static_cast(num_channels)); + zfp = zfp_stream_open(NULL); + + if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_RATE) { + zfp_stream_set_rate(zfp, param.rate, zfp_type_float, /* dimension */ 2, + /* write random access */ 0); + } else if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_PRECISION) { + zfp_stream_set_precision(zfp, param.precision); + } else if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_ACCURACY) { + zfp_stream_set_accuracy(zfp, param.tolerance); + } else { + return false; + } + + size_t buf_size = zfp_stream_maximum_size(zfp, field); + std::vector buf(buf_size); + memcpy(&buf.at(0), src, src_size); + + bitstream *stream = stream_open(&buf.at(0), buf_size); + zfp_stream_set_bit_stream(zfp, stream); + zfp_stream_rewind(zfp); + + size_t image_size = size_t(dst_width) * size_t(dst_num_lines); + + for (size_t c = 0; c < size_t(num_channels); c++) { + // decompress 4x4 pixel block. + for (size_t y = 0; y < size_t(dst_num_lines); y += 4) { + for (size_t x = 0; x < size_t(dst_width); x += 4) { + float fblock[16]; + zfp_decode_block_float_2(zfp, fblock); + for (size_t j = 0; j < 4; j++) { + for (size_t i = 0; i < 4; i++) { + dst[c * image_size + ((y + j) * size_t(dst_width) + (x + i))] = + fblock[j * 4 + i]; + } + } + } + } + } + + zfp_field_free(field); + zfp_stream_close(zfp); + stream_close(stream); + + return true; +} + +// Assume pixel format is FLOAT for all channels. +static bool CompressZfp(std::vector *outBuf, + unsigned int *outSize, const float *inPtr, int width, + int num_lines, int num_channels, + const ZFPCompressionParam ¶m) { + zfp_stream *zfp = NULL; + zfp_field *field = NULL; + + TINYEXR_CHECK_AND_RETURN_C((width % 4) == 0, false); + TINYEXR_CHECK_AND_RETURN_C((num_lines % 4) == 0, false); + + if ((size_t(width) & 3U) || (size_t(num_lines) & 3U)) { + return false; + } + + // create input array. + field = zfp_field_2d(reinterpret_cast(const_cast(inPtr)), + zfp_type_float, static_cast(width), + static_cast(num_lines * num_channels)); + + zfp = zfp_stream_open(NULL); + + if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_RATE) { + zfp_stream_set_rate(zfp, param.rate, zfp_type_float, 2, 0); + } else if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_PRECISION) { + zfp_stream_set_precision(zfp, param.precision); + } else if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_ACCURACY) { + zfp_stream_set_accuracy(zfp, param.tolerance); + } else { + return false; + } + + size_t buf_size = zfp_stream_maximum_size(zfp, field); + + outBuf->resize(buf_size); + + bitstream *stream = stream_open(&outBuf->at(0), buf_size); + zfp_stream_set_bit_stream(zfp, stream); + zfp_field_free(field); + + size_t image_size = size_t(width) * size_t(num_lines); + + for (size_t c = 0; c < size_t(num_channels); c++) { + // compress 4x4 pixel block. + for (size_t y = 0; y < size_t(num_lines); y += 4) { + for (size_t x = 0; x < size_t(width); x += 4) { + float fblock[16]; + for (size_t j = 0; j < 4; j++) { + for (size_t i = 0; i < 4; i++) { + fblock[j * 4 + i] = + inPtr[c * image_size + ((y + j) * size_t(width) + (x + i))]; + } + } + zfp_encode_block_float_2(zfp, fblock); + } + } + } + + zfp_stream_flush(zfp); + (*outSize) = static_cast(zfp_stream_compressed_size(zfp)); + + zfp_stream_close(zfp); + + return true; +} + +#endif + +// +// ----------------------------------------------------------------- +// + +// heuristics +#define TINYEXR_DIMENSION_THRESHOLD (1024 * 8192) + +// TODO(syoyo): Refactor function arguments. +static bool DecodePixelData(/* out */ unsigned char **out_images, + const int *requested_pixel_types, + const unsigned char *data_ptr, size_t data_len, + int compression_type, int line_order, int width, + int height, int x_stride, int y, int line_no, + int num_lines, size_t pixel_data_size, + size_t num_attributes, + const EXRAttribute *attributes, size_t num_channels, + const EXRChannelInfo *channels, + const std::vector &channel_offset_list) { + if (compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { // PIZ +#if TINYEXR_USE_PIZ + if ((width == 0) || (num_lines == 0) || (pixel_data_size == 0)) { + // Invalid input #90 + return false; + } + + // Allocate original data size. + std::vector outBuf(static_cast( + static_cast(width * num_lines) * pixel_data_size)); + size_t tmpBufLen = outBuf.size(); + + bool ret = tinyexr::DecompressPiz( + reinterpret_cast(&outBuf.at(0)), data_ptr, tmpBufLen, + data_len, static_cast(num_channels), channels, width, num_lines); + + if (!ret) { + return false; + } + + // For PIZ_COMPRESSION: + // pixel sample data for channel 0 for scanline 0 + // pixel sample data for channel 1 for scanline 0 + // pixel sample data for channel ... for scanline 0 + // pixel sample data for channel n for scanline 0 + // pixel sample data for channel 0 for scanline 1 + // pixel sample data for channel 1 for scanline 1 + // pixel sample data for channel ... for scanline 1 + // pixel sample data for channel n for scanline 1 + // ... + for (size_t c = 0; c < static_cast(num_channels); c++) { + if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { + for (size_t v = 0; v < static_cast(num_lines); v++) { + const unsigned short *line_ptr = reinterpret_cast( + &outBuf.at(v * pixel_data_size * static_cast(width) + + channel_offset_list[c] * static_cast(width))); + for (size_t u = 0; u < static_cast(width); u++) { + FP16 hf; + + // hf.u = line_ptr[u]; + // use `cpy` to avoid unaligned memory access when compiler's + // optimization is on. + tinyexr::cpy2(&(hf.u), line_ptr + u); + + tinyexr::swap2(reinterpret_cast(&hf.u)); + + if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { + unsigned short *image = + reinterpret_cast(out_images)[c]; + if (line_order == 0) { + image += (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + image += static_cast( + (height - 1 - (line_no + static_cast(v)))) * + static_cast(x_stride) + + u; + } + *image = hf.u; + } else { // HALF -> FLOAT + FP32 f32 = half_to_float(hf); + float *image = reinterpret_cast(out_images)[c]; + size_t offset = 0; + if (line_order == 0) { + offset = (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + offset = static_cast( + (height - 1 - (line_no + static_cast(v)))) * + static_cast(x_stride) + + u; + } + image += offset; + *image = f32.f; + } + } + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { + TINYEXR_CHECK_AND_RETURN_C(requested_pixel_types[c] == TINYEXR_PIXELTYPE_UINT, false); + + for (size_t v = 0; v < static_cast(num_lines); v++) { + const unsigned int *line_ptr = reinterpret_cast( + &outBuf.at(v * pixel_data_size * static_cast(width) + + channel_offset_list[c] * static_cast(width))); + for (size_t u = 0; u < static_cast(width); u++) { + unsigned int val; + // val = line_ptr[u]; + tinyexr::cpy4(&val, line_ptr + u); + + tinyexr::swap4(&val); + + unsigned int *image = + reinterpret_cast(out_images)[c]; + if (line_order == 0) { + image += (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + image += static_cast( + (height - 1 - (line_no + static_cast(v)))) * + static_cast(x_stride) + + u; + } + *image = val; + } + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { + TINYEXR_CHECK_AND_RETURN_C(requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT, false); + for (size_t v = 0; v < static_cast(num_lines); v++) { + const float *line_ptr = reinterpret_cast(&outBuf.at( + v * pixel_data_size * static_cast(width) + + channel_offset_list[c] * static_cast(width))); + for (size_t u = 0; u < static_cast(width); u++) { + float val; + // val = line_ptr[u]; + tinyexr::cpy4(&val, line_ptr + u); + + tinyexr::swap4(reinterpret_cast(&val)); + + float *image = reinterpret_cast(out_images)[c]; + if (line_order == 0) { + image += (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + image += static_cast( + (height - 1 - (line_no + static_cast(v)))) * + static_cast(x_stride) + + u; + } + *image = val; + } + } + } else { + return false; + } + } +#else + return false; +#endif + + } else if (compression_type == TINYEXR_COMPRESSIONTYPE_ZIPS || + compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) { + // Allocate original data size. + std::vector outBuf(static_cast(width) * + static_cast(num_lines) * + pixel_data_size); + + unsigned long dstLen = static_cast(outBuf.size()); + TINYEXR_CHECK_AND_RETURN_C(dstLen > 0, false); + if (!tinyexr::DecompressZip( + reinterpret_cast(&outBuf.at(0)), &dstLen, data_ptr, + static_cast(data_len))) { + return false; + } + + // For ZIP_COMPRESSION: + // pixel sample data for channel 0 for scanline 0 + // pixel sample data for channel 1 for scanline 0 + // pixel sample data for channel ... for scanline 0 + // pixel sample data for channel n for scanline 0 + // pixel sample data for channel 0 for scanline 1 + // pixel sample data for channel 1 for scanline 1 + // pixel sample data for channel ... for scanline 1 + // pixel sample data for channel n for scanline 1 + // ... + for (size_t c = 0; c < static_cast(num_channels); c++) { + if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { + for (size_t v = 0; v < static_cast(num_lines); v++) { + const unsigned short *line_ptr = reinterpret_cast( + &outBuf.at(v * static_cast(pixel_data_size) * + static_cast(width) + + channel_offset_list[c] * static_cast(width))); + for (size_t u = 0; u < static_cast(width); u++) { + tinyexr::FP16 hf; + + // hf.u = line_ptr[u]; + tinyexr::cpy2(&(hf.u), line_ptr + u); + + tinyexr::swap2(reinterpret_cast(&hf.u)); + + if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { + unsigned short *image = + reinterpret_cast(out_images)[c]; + if (line_order == 0) { + image += (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + image += (static_cast(height) - 1U - + (static_cast(line_no) + v)) * + static_cast(x_stride) + + u; + } + *image = hf.u; + } else { // HALF -> FLOAT + tinyexr::FP32 f32 = half_to_float(hf); + float *image = reinterpret_cast(out_images)[c]; + size_t offset = 0; + if (line_order == 0) { + offset = (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + offset = (static_cast(height) - 1U - + (static_cast(line_no) + v)) * + static_cast(x_stride) + + u; + } + image += offset; + + *image = f32.f; + } + } + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { + TINYEXR_CHECK_AND_RETURN_C(requested_pixel_types[c] == TINYEXR_PIXELTYPE_UINT, false); + + for (size_t v = 0; v < static_cast(num_lines); v++) { + const unsigned int *line_ptr = reinterpret_cast( + &outBuf.at(v * pixel_data_size * static_cast(width) + + channel_offset_list[c] * static_cast(width))); + for (size_t u = 0; u < static_cast(width); u++) { + unsigned int val; + // val = line_ptr[u]; + tinyexr::cpy4(&val, line_ptr + u); + + tinyexr::swap4(&val); + + unsigned int *image = + reinterpret_cast(out_images)[c]; + if (line_order == 0) { + image += (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + image += (static_cast(height) - 1U - + (static_cast(line_no) + v)) * + static_cast(x_stride) + + u; + } + *image = val; + } + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { + TINYEXR_CHECK_AND_RETURN_C(requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT, false); + for (size_t v = 0; v < static_cast(num_lines); v++) { + const float *line_ptr = reinterpret_cast( + &outBuf.at(v * pixel_data_size * static_cast(width) + + channel_offset_list[c] * static_cast(width))); + for (size_t u = 0; u < static_cast(width); u++) { + float val; + // val = line_ptr[u]; + tinyexr::cpy4(&val, line_ptr + u); + + tinyexr::swap4(reinterpret_cast(&val)); + + float *image = reinterpret_cast(out_images)[c]; + if (line_order == 0) { + image += (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + image += (static_cast(height) - 1U - + (static_cast(line_no) + v)) * + static_cast(x_stride) + + u; + } + *image = val; + } + } + } else { + return false; + } + } + } else if (compression_type == TINYEXR_COMPRESSIONTYPE_RLE) { + // Allocate original data size. + std::vector outBuf(static_cast(width) * + static_cast(num_lines) * + pixel_data_size); + + unsigned long dstLen = static_cast(outBuf.size()); + if (dstLen == 0) { + return false; + } + + if (!tinyexr::DecompressRle( + reinterpret_cast(&outBuf.at(0)), dstLen, data_ptr, + static_cast(data_len))) { + return false; + } + + // For RLE_COMPRESSION: + // pixel sample data for channel 0 for scanline 0 + // pixel sample data for channel 1 for scanline 0 + // pixel sample data for channel ... for scanline 0 + // pixel sample data for channel n for scanline 0 + // pixel sample data for channel 0 for scanline 1 + // pixel sample data for channel 1 for scanline 1 + // pixel sample data for channel ... for scanline 1 + // pixel sample data for channel n for scanline 1 + // ... + for (size_t c = 0; c < static_cast(num_channels); c++) { + if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { + for (size_t v = 0; v < static_cast(num_lines); v++) { + const unsigned short *line_ptr = reinterpret_cast( + &outBuf.at(v * static_cast(pixel_data_size) * + static_cast(width) + + channel_offset_list[c] * static_cast(width))); + for (size_t u = 0; u < static_cast(width); u++) { + tinyexr::FP16 hf; + + // hf.u = line_ptr[u]; + tinyexr::cpy2(&(hf.u), line_ptr + u); + + tinyexr::swap2(reinterpret_cast(&hf.u)); + + if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { + unsigned short *image = + reinterpret_cast(out_images)[c]; + if (line_order == 0) { + image += (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + image += (static_cast(height) - 1U - + (static_cast(line_no) + v)) * + static_cast(x_stride) + + u; + } + *image = hf.u; + } else { // HALF -> FLOAT + tinyexr::FP32 f32 = half_to_float(hf); + float *image = reinterpret_cast(out_images)[c]; + if (line_order == 0) { + image += (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + image += (static_cast(height) - 1U - + (static_cast(line_no) + v)) * + static_cast(x_stride) + + u; + } + *image = f32.f; + } + } + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { + TINYEXR_CHECK_AND_RETURN_C(requested_pixel_types[c] == TINYEXR_PIXELTYPE_UINT, false); + + for (size_t v = 0; v < static_cast(num_lines); v++) { + const unsigned int *line_ptr = reinterpret_cast( + &outBuf.at(v * pixel_data_size * static_cast(width) + + channel_offset_list[c] * static_cast(width))); + for (size_t u = 0; u < static_cast(width); u++) { + unsigned int val; + // val = line_ptr[u]; + tinyexr::cpy4(&val, line_ptr + u); + + tinyexr::swap4(&val); + + unsigned int *image = + reinterpret_cast(out_images)[c]; + if (line_order == 0) { + image += (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + image += (static_cast(height) - 1U - + (static_cast(line_no) + v)) * + static_cast(x_stride) + + u; + } + *image = val; + } + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { + TINYEXR_CHECK_AND_RETURN_C(requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT, false); + for (size_t v = 0; v < static_cast(num_lines); v++) { + const float *line_ptr = reinterpret_cast( + &outBuf.at(v * pixel_data_size * static_cast(width) + + channel_offset_list[c] * static_cast(width))); + for (size_t u = 0; u < static_cast(width); u++) { + float val; + // val = line_ptr[u]; + tinyexr::cpy4(&val, line_ptr + u); + + tinyexr::swap4(reinterpret_cast(&val)); + + float *image = reinterpret_cast(out_images)[c]; + if (line_order == 0) { + image += (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + image += (static_cast(height) - 1U - + (static_cast(line_no) + v)) * + static_cast(x_stride) + + u; + } + *image = val; + } + } + } else { + return false; + } + } + } else if (compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { +#if TINYEXR_USE_ZFP + tinyexr::ZFPCompressionParam zfp_compression_param; + std::string e; + if (!tinyexr::FindZFPCompressionParam(&zfp_compression_param, attributes, + int(num_attributes), &e)) { + // This code path should not be reachable. + return false; + } + + // Allocate original data size. + std::vector outBuf(static_cast(width) * + static_cast(num_lines) * + pixel_data_size); + + unsigned long dstLen = outBuf.size(); + TINYEXR_CHECK_AND_RETURN_C(dstLen > 0, false); + tinyexr::DecompressZfp(reinterpret_cast(&outBuf.at(0)), width, + num_lines, num_channels, data_ptr, + static_cast(data_len), + zfp_compression_param); + + // For ZFP_COMPRESSION: + // pixel sample data for channel 0 for scanline 0 + // pixel sample data for channel 1 for scanline 0 + // pixel sample data for channel ... for scanline 0 + // pixel sample data for channel n for scanline 0 + // pixel sample data for channel 0 for scanline 1 + // pixel sample data for channel 1 for scanline 1 + // pixel sample data for channel ... for scanline 1 + // pixel sample data for channel n for scanline 1 + // ... + for (size_t c = 0; c < static_cast(num_channels); c++) { + TINYEXR_CHECK_AND_RETURN_C(channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT, false); + if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { + TINYEXR_CHECK_AND_RETURN_C(requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT, false); + for (size_t v = 0; v < static_cast(num_lines); v++) { + const float *line_ptr = reinterpret_cast( + &outBuf.at(v * pixel_data_size * static_cast(width) + + channel_offset_list[c] * static_cast(width))); + for (size_t u = 0; u < static_cast(width); u++) { + float val; + tinyexr::cpy4(&val, line_ptr + u); + + tinyexr::swap4(reinterpret_cast(&val)); + + float *image = reinterpret_cast(out_images)[c]; + if (line_order == 0) { + image += (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + image += (static_cast(height) - 1U - + (static_cast(line_no) + v)) * + static_cast(x_stride) + + u; + } + *image = val; + } + } + } else { + return false; + } + } +#else + (void)attributes; + (void)num_attributes; + (void)num_channels; + return false; +#endif + } else if (compression_type == TINYEXR_COMPRESSIONTYPE_NONE) { + for (size_t c = 0; c < num_channels; c++) { + for (size_t v = 0; v < static_cast(num_lines); v++) { + if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { + const unsigned short *line_ptr = + reinterpret_cast( + data_ptr + v * pixel_data_size * size_t(width) + + channel_offset_list[c] * static_cast(width)); + + if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { + unsigned short *outLine = + reinterpret_cast(out_images[c]); + if (line_order == 0) { + outLine += (size_t(y) + v) * size_t(x_stride); + } else { + outLine += + (size_t(height) - 1 - (size_t(y) + v)) * size_t(x_stride); + } + + for (int u = 0; u < width; u++) { + tinyexr::FP16 hf; + + // hf.u = line_ptr[u]; + tinyexr::cpy2(&(hf.u), line_ptr + u); + + tinyexr::swap2(reinterpret_cast(&hf.u)); + + outLine[u] = hf.u; + } + } else if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT) { + float *outLine = reinterpret_cast(out_images[c]); + if (line_order == 0) { + outLine += (size_t(y) + v) * size_t(x_stride); + } else { + outLine += + (size_t(height) - 1 - (size_t(y) + v)) * size_t(x_stride); + } + + if (reinterpret_cast(line_ptr + width) > + (data_ptr + data_len)) { + // Insufficient data size + return false; + } + + for (int u = 0; u < width; u++) { + tinyexr::FP16 hf; + + // address may not be aligned. use byte-wise copy for safety.#76 + // hf.u = line_ptr[u]; + tinyexr::cpy2(&(hf.u), line_ptr + u); + + tinyexr::swap2(reinterpret_cast(&hf.u)); + + tinyexr::FP32 f32 = half_to_float(hf); + + outLine[u] = f32.f; + } + } else { + return false; + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { + const float *line_ptr = reinterpret_cast( + data_ptr + v * pixel_data_size * size_t(width) + + channel_offset_list[c] * static_cast(width)); + + float *outLine = reinterpret_cast(out_images[c]); + if (line_order == 0) { + outLine += (size_t(y) + v) * size_t(x_stride); + } else { + outLine += + (size_t(height) - 1 - (size_t(y) + v)) * size_t(x_stride); + } + + if (reinterpret_cast(line_ptr + width) > + (data_ptr + data_len)) { + // Insufficient data size + return false; + } + + for (int u = 0; u < width; u++) { + float val; + tinyexr::cpy4(&val, line_ptr + u); + + tinyexr::swap4(reinterpret_cast(&val)); + + outLine[u] = val; + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { + const unsigned int *line_ptr = reinterpret_cast( + data_ptr + v * pixel_data_size * size_t(width) + + channel_offset_list[c] * static_cast(width)); + + unsigned int *outLine = + reinterpret_cast(out_images[c]); + if (line_order == 0) { + outLine += (size_t(y) + v) * size_t(x_stride); + } else { + outLine += + (size_t(height) - 1 - (size_t(y) + v)) * size_t(x_stride); + } + + if (reinterpret_cast(line_ptr + width) > + (data_ptr + data_len)) { + // Corrupted data + return false; + } + + for (int u = 0; u < width; u++) { + + unsigned int val; + tinyexr::cpy4(&val, line_ptr + u); + + tinyexr::swap4(reinterpret_cast(&val)); + + outLine[u] = val; + } + } + } + } + } + + return true; +} + +static bool DecodeTiledPixelData( + unsigned char **out_images, int *width, int *height, + const int *requested_pixel_types, const unsigned char *data_ptr, + size_t data_len, int compression_type, int line_order, int data_width, + int data_height, int tile_offset_x, int tile_offset_y, int tile_size_x, + int tile_size_y, size_t pixel_data_size, size_t num_attributes, + const EXRAttribute *attributes, size_t num_channels, + const EXRChannelInfo *channels, + const std::vector &channel_offset_list) { + // Here, data_width and data_height are the dimensions of the current (sub)level. + if (tile_size_x * tile_offset_x > data_width || + tile_size_y * tile_offset_y > data_height) { + return false; + } + + // Compute actual image size in a tile. + if ((tile_offset_x + 1) * tile_size_x >= data_width) { + (*width) = data_width - (tile_offset_x * tile_size_x); + } else { + (*width) = tile_size_x; + } + + if ((tile_offset_y + 1) * tile_size_y >= data_height) { + (*height) = data_height - (tile_offset_y * tile_size_y); + } else { + (*height) = tile_size_y; + } + + // Image size = tile size. + return DecodePixelData(out_images, requested_pixel_types, data_ptr, data_len, + compression_type, line_order, (*width), tile_size_y, + /* stride */ tile_size_x, /* y */ 0, /* line_no */ 0, + (*height), pixel_data_size, num_attributes, attributes, + num_channels, channels, channel_offset_list); +} + +static bool ComputeChannelLayout(std::vector *channel_offset_list, + int *pixel_data_size, size_t *channel_offset, + int num_channels, + const EXRChannelInfo *channels) { + channel_offset_list->resize(static_cast(num_channels)); + + (*pixel_data_size) = 0; + (*channel_offset) = 0; + + for (size_t c = 0; c < static_cast(num_channels); c++) { + (*channel_offset_list)[c] = (*channel_offset); + if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { + (*pixel_data_size) += sizeof(unsigned short); + (*channel_offset) += sizeof(unsigned short); + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { + (*pixel_data_size) += sizeof(float); + (*channel_offset) += sizeof(float); + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { + (*pixel_data_size) += sizeof(unsigned int); + (*channel_offset) += sizeof(unsigned int); + } else { + // ??? + return false; + } + } + return true; +} + +// TODO: Simply return nullptr when failed to allocate? +static unsigned char **AllocateImage(int num_channels, + const EXRChannelInfo *channels, + const int *requested_pixel_types, + int data_width, int data_height, bool *success) { + unsigned char **images = + reinterpret_cast(static_cast( + malloc(sizeof(float *) * static_cast(num_channels)))); + + for (size_t c = 0; c < static_cast(num_channels); c++) { + images[c] = NULL; + } + + bool valid = true; + + for (size_t c = 0; c < static_cast(num_channels); c++) { + size_t data_len = + static_cast(data_width) * static_cast(data_height); + if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { + // pixel_data_size += sizeof(unsigned short); + // channel_offset += sizeof(unsigned short); + // Alloc internal image for half type. + if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { + images[c] = + reinterpret_cast(static_cast( + malloc(sizeof(unsigned short) * data_len))); + } else if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT) { + images[c] = reinterpret_cast( + static_cast(malloc(sizeof(float) * data_len))); + } else { + images[c] = NULL; // just in case. + valid = false; + break; + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { + // pixel_data_size += sizeof(float); + // channel_offset += sizeof(float); + images[c] = reinterpret_cast( + static_cast(malloc(sizeof(float) * data_len))); + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { + // pixel_data_size += sizeof(unsigned int); + // channel_offset += sizeof(unsigned int); + images[c] = reinterpret_cast( + static_cast(malloc(sizeof(unsigned int) * data_len))); + } else { + images[c] = NULL; // just in case. + valid = false; + break; + } + } + + if (!valid) { + for (size_t c = 0; c < static_cast(num_channels); c++) { + if (images[c]) { + free(images[c]); + images[c] = NULL; + } + } + + if (success) { + (*success) = false; + } + } else { + if (success) { + (*success) = true; + } + } + + return images; +} + +#ifdef _WIN32 +static inline std::wstring UTF8ToWchar(const std::string &str) { + int wstr_size = + MultiByteToWideChar(CP_UTF8, 0, str.data(), (int)str.size(), NULL, 0); + std::wstring wstr(wstr_size, 0); + MultiByteToWideChar(CP_UTF8, 0, str.data(), (int)str.size(), &wstr[0], + (int)wstr.size()); + return wstr; +} +#endif + + +static int ParseEXRHeader(HeaderInfo *info, bool *empty_header, + const EXRVersion *version, std::string *err, + const unsigned char *buf, size_t size) { + const char *marker = reinterpret_cast(&buf[0]); + + if (empty_header) { + (*empty_header) = false; + } + + if (version->multipart) { + if (size > 0 && marker[0] == '\0') { + // End of header list. + if (empty_header) { + (*empty_header) = true; + } + return TINYEXR_SUCCESS; + } + } + + // According to the spec, the header of every OpenEXR file must contain at + // least the following attributes: + // + // channels chlist + // compression compression + // dataWindow box2i + // displayWindow box2i + // lineOrder lineOrder + // pixelAspectRatio float + // screenWindowCenter v2f + // screenWindowWidth float + bool has_channels = false; + bool has_compression = false; + bool has_data_window = false; + bool has_display_window = false; + bool has_line_order = false; + bool has_pixel_aspect_ratio = false; + bool has_screen_window_center = false; + bool has_screen_window_width = false; + bool has_name = false; + bool has_type = false; + + info->name.clear(); + info->type.clear(); + + info->data_window.min_x = 0; + info->data_window.min_y = 0; + info->data_window.max_x = 0; + info->data_window.max_y = 0; + info->line_order = 0; // @fixme + info->display_window.min_x = 0; + info->display_window.min_y = 0; + info->display_window.max_x = 0; + info->display_window.max_y = 0; + info->screen_window_center[0] = 0.0f; + info->screen_window_center[1] = 0.0f; + info->screen_window_width = -1.0f; + info->pixel_aspect_ratio = -1.0f; + + info->tiled = 0; + info->tile_size_x = -1; + info->tile_size_y = -1; + info->tile_level_mode = -1; + info->tile_rounding_mode = -1; + + info->attributes.clear(); + + // Read attributes + size_t orig_size = size; + for (size_t nattr = 0; nattr < TINYEXR_MAX_HEADER_ATTRIBUTES; nattr++) { + if (0 == size) { + if (err) { + (*err) += "Insufficient data size for attributes.\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } else if (marker[0] == '\0') { + size--; + break; + } + + std::string attr_name; + std::string attr_type; + std::vector data; + size_t marker_size; + if (!tinyexr::ReadAttribute(&attr_name, &attr_type, &data, &marker_size, + marker, size)) { + if (err) { + (*err) += "Failed to read attribute.\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } + marker += marker_size; + size -= marker_size; + + // For a multipart file, the version field 9th bit is 0. + if ((version->tiled || version->multipart || version->non_image) && attr_name.compare("tiles") == 0) { + unsigned int x_size, y_size; + unsigned char tile_mode; + if (data.size() != 9) { + if (err) { + (*err) += "(ParseEXRHeader) Invalid attribute data size. Attribute data size must be 9.\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } + + memcpy(&x_size, &data.at(0), sizeof(int)); + memcpy(&y_size, &data.at(4), sizeof(int)); + tile_mode = data[8]; + tinyexr::swap4(&x_size); + tinyexr::swap4(&y_size); + + if (x_size > static_cast(std::numeric_limits::max()) || + y_size > static_cast(std::numeric_limits::max())) { + if (err) { + (*err) = "Tile sizes were invalid."; + } + return TINYEXR_ERROR_UNSUPPORTED_FORMAT; + } + + info->tile_size_x = static_cast(x_size); + info->tile_size_y = static_cast(y_size); + + // mode = levelMode + roundingMode * 16 + info->tile_level_mode = tile_mode & 0x3; + info->tile_rounding_mode = (tile_mode >> 4) & 0x1; + info->tiled = 1; + } else if (attr_name.compare("compression") == 0) { + bool ok = false; + if (data[0] < TINYEXR_COMPRESSIONTYPE_PIZ) { + ok = true; + } + + if (data[0] == TINYEXR_COMPRESSIONTYPE_PIZ) { +#if TINYEXR_USE_PIZ + ok = true; +#else + if (err) { + (*err) = "PIZ compression is not supported."; + } + return TINYEXR_ERROR_UNSUPPORTED_FORMAT; +#endif + } + + if (data[0] == TINYEXR_COMPRESSIONTYPE_ZFP) { +#if TINYEXR_USE_ZFP + ok = true; +#else + if (err) { + (*err) = "ZFP compression is not supported."; + } + return TINYEXR_ERROR_UNSUPPORTED_FORMAT; +#endif + } + + if (!ok) { + if (err) { + (*err) = "Unknown compression type."; + } + return TINYEXR_ERROR_UNSUPPORTED_FORMAT; + } + + info->compression_type = static_cast(data[0]); + has_compression = true; + + } else if (attr_name.compare("channels") == 0) { + // name: zero-terminated string, from 1 to 255 bytes long + // pixel type: int, possible values are: UINT = 0 HALF = 1 FLOAT = 2 + // pLinear: unsigned char, possible values are 0 and 1 + // reserved: three chars, should be zero + // xSampling: int + // ySampling: int + + if (!ReadChannelInfo(info->channels, data)) { + if (err) { + (*err) += "Failed to parse channel info.\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } + + if (info->channels.size() < 1) { + if (err) { + (*err) += "# of channels is zero.\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } + + has_channels = true; + + } else if (attr_name.compare("dataWindow") == 0) { + if (data.size() >= 16) { + memcpy(&info->data_window.min_x, &data.at(0), sizeof(int)); + memcpy(&info->data_window.min_y, &data.at(4), sizeof(int)); + memcpy(&info->data_window.max_x, &data.at(8), sizeof(int)); + memcpy(&info->data_window.max_y, &data.at(12), sizeof(int)); + tinyexr::swap4(&info->data_window.min_x); + tinyexr::swap4(&info->data_window.min_y); + tinyexr::swap4(&info->data_window.max_x); + tinyexr::swap4(&info->data_window.max_y); + has_data_window = true; + } + } else if (attr_name.compare("displayWindow") == 0) { + if (data.size() >= 16) { + memcpy(&info->display_window.min_x, &data.at(0), sizeof(int)); + memcpy(&info->display_window.min_y, &data.at(4), sizeof(int)); + memcpy(&info->display_window.max_x, &data.at(8), sizeof(int)); + memcpy(&info->display_window.max_y, &data.at(12), sizeof(int)); + tinyexr::swap4(&info->display_window.min_x); + tinyexr::swap4(&info->display_window.min_y); + tinyexr::swap4(&info->display_window.max_x); + tinyexr::swap4(&info->display_window.max_y); + + has_display_window = true; + } + } else if (attr_name.compare("lineOrder") == 0) { + if (data.size() >= 1) { + info->line_order = static_cast(data[0]); + has_line_order = true; + } + } else if (attr_name.compare("pixelAspectRatio") == 0) { + if (data.size() >= sizeof(float)) { + memcpy(&info->pixel_aspect_ratio, &data.at(0), sizeof(float)); + tinyexr::swap4(&info->pixel_aspect_ratio); + has_pixel_aspect_ratio = true; + } + } else if (attr_name.compare("screenWindowCenter") == 0) { + if (data.size() >= 8) { + memcpy(&info->screen_window_center[0], &data.at(0), sizeof(float)); + memcpy(&info->screen_window_center[1], &data.at(4), sizeof(float)); + tinyexr::swap4(&info->screen_window_center[0]); + tinyexr::swap4(&info->screen_window_center[1]); + has_screen_window_center = true; + } + } else if (attr_name.compare("screenWindowWidth") == 0) { + if (data.size() >= sizeof(float)) { + memcpy(&info->screen_window_width, &data.at(0), sizeof(float)); + tinyexr::swap4(&info->screen_window_width); + + has_screen_window_width = true; + } + } else if (attr_name.compare("chunkCount") == 0) { + if (data.size() >= sizeof(int)) { + memcpy(&info->chunk_count, &data.at(0), sizeof(int)); + tinyexr::swap4(&info->chunk_count); + } + } else if (attr_name.compare("name") == 0) { + if (!data.empty() && data[0]) { + data.push_back(0); + size_t len = strlen(reinterpret_cast(&data[0])); + info->name.resize(len); + info->name.assign(reinterpret_cast(&data[0]), len); + has_name = true; + } + } else if (attr_name.compare("type") == 0) { + if (!data.empty() && data[0]) { + data.push_back(0); + size_t len = strlen(reinterpret_cast(&data[0])); + info->type.resize(len); + info->type.assign(reinterpret_cast(&data[0]), len); + has_type = true; + } + } else { + // Custom attribute(up to TINYEXR_MAX_CUSTOM_ATTRIBUTES) + if (info->attributes.size() < TINYEXR_MAX_CUSTOM_ATTRIBUTES) { + EXRAttribute attrib; +#ifdef _MSC_VER + strncpy_s(attrib.name, attr_name.c_str(), 255); + strncpy_s(attrib.type, attr_type.c_str(), 255); +#else + strncpy(attrib.name, attr_name.c_str(), 255); + strncpy(attrib.type, attr_type.c_str(), 255); +#endif + attrib.name[255] = '\0'; + attrib.type[255] = '\0'; + //std::cout << "i = " << info->attributes.size() << ", dsize = " << data.size() << "\n"; + attrib.size = static_cast(data.size()); + attrib.value = static_cast(malloc(data.size())); + memcpy(reinterpret_cast(attrib.value), &data.at(0), + data.size()); + info->attributes.push_back(attrib); + } + } + } + + // Check if required attributes exist + { + std::stringstream ss_err; + + if (!has_compression) { + ss_err << "\"compression\" attribute not found in the header." + << std::endl; + } + + if (!has_channels) { + ss_err << "\"channels\" attribute not found in the header." << std::endl; + } + + if (!has_line_order) { + ss_err << "\"lineOrder\" attribute not found in the header." << std::endl; + } + + if (!has_display_window) { + ss_err << "\"displayWindow\" attribute not found in the header." + << std::endl; + } + + if (!has_data_window) { + ss_err << "\"dataWindow\" attribute not found in the header or invalid." + << std::endl; + } + + if (!has_pixel_aspect_ratio) { + ss_err << "\"pixelAspectRatio\" attribute not found in the header." + << std::endl; + } + + if (!has_screen_window_width) { + ss_err << "\"screenWindowWidth\" attribute not found in the header." + << std::endl; + } + + if (!has_screen_window_center) { + ss_err << "\"screenWindowCenter\" attribute not found in the header." + << std::endl; + } + + if (version->multipart || version->non_image) { + if (!has_name) { + ss_err << "\"name\" attribute not found in the header." + << std::endl; + } + if (!has_type) { + ss_err << "\"type\" attribute not found in the header." + << std::endl; + } + } + + if (!(ss_err.str().empty())) { + if (err) { + (*err) += ss_err.str(); + } + + return TINYEXR_ERROR_INVALID_HEADER; + } + } + + info->header_len = static_cast(orig_size - size); + + return TINYEXR_SUCCESS; +} + +// C++ HeaderInfo to C EXRHeader conversion. +static bool ConvertHeader(EXRHeader *exr_header, const HeaderInfo &info, std::string *warn, std::string *err) { + exr_header->pixel_aspect_ratio = info.pixel_aspect_ratio; + exr_header->screen_window_center[0] = info.screen_window_center[0]; + exr_header->screen_window_center[1] = info.screen_window_center[1]; + exr_header->screen_window_width = info.screen_window_width; + exr_header->chunk_count = info.chunk_count; + exr_header->display_window.min_x = info.display_window.min_x; + exr_header->display_window.min_y = info.display_window.min_y; + exr_header->display_window.max_x = info.display_window.max_x; + exr_header->display_window.max_y = info.display_window.max_y; + exr_header->data_window.min_x = info.data_window.min_x; + exr_header->data_window.min_y = info.data_window.min_y; + exr_header->data_window.max_x = info.data_window.max_x; + exr_header->data_window.max_y = info.data_window.max_y; + exr_header->line_order = info.line_order; + exr_header->compression_type = info.compression_type; + exr_header->tiled = info.tiled; + exr_header->tile_size_x = info.tile_size_x; + exr_header->tile_size_y = info.tile_size_y; + exr_header->tile_level_mode = info.tile_level_mode; + exr_header->tile_rounding_mode = info.tile_rounding_mode; + + EXRSetNameAttr(exr_header, info.name.c_str()); + + + if (!info.type.empty()) { + bool valid = true; + if (info.type == "scanlineimage") { + if (exr_header->tiled) { + if (err) { + (*err) += "(ConvertHeader) tiled bit must be off for `scanlineimage` type.\n"; + } + valid = false; + } + } else if (info.type == "tiledimage") { + if (!exr_header->tiled) { + if (err) { + (*err) += "(ConvertHeader) tiled bit must be on for `tiledimage` type.\n"; + } + valid = false; + } + } else if (info.type == "deeptile") { + exr_header->non_image = 1; + if (!exr_header->tiled) { + if (err) { + (*err) += "(ConvertHeader) tiled bit must be on for `deeptile` type.\n"; + } + valid = false; + } + } else if (info.type == "deepscanline") { + exr_header->non_image = 1; + if (exr_header->tiled) { + if (err) { + (*err) += "(ConvertHeader) tiled bit must be off for `deepscanline` type.\n"; + } + //valid = false; + } + } else { + if (warn) { + std::stringstream ss; + ss << "(ConvertHeader) Unsupported or unknown info.type: " << info.type << "\n"; + (*warn) += ss.str(); + } + } + + if (!valid) { + return false; + } + } + + exr_header->num_channels = static_cast(info.channels.size()); + + exr_header->channels = static_cast(malloc( + sizeof(EXRChannelInfo) * static_cast(exr_header->num_channels))); + for (size_t c = 0; c < static_cast(exr_header->num_channels); c++) { +#ifdef _MSC_VER + strncpy_s(exr_header->channels[c].name, info.channels[c].name.c_str(), 255); +#else + strncpy(exr_header->channels[c].name, info.channels[c].name.c_str(), 255); +#endif + // manually add '\0' for safety. + exr_header->channels[c].name[255] = '\0'; + + exr_header->channels[c].pixel_type = info.channels[c].pixel_type; + exr_header->channels[c].p_linear = info.channels[c].p_linear; + exr_header->channels[c].x_sampling = info.channels[c].x_sampling; + exr_header->channels[c].y_sampling = info.channels[c].y_sampling; + } + + exr_header->pixel_types = static_cast( + malloc(sizeof(int) * static_cast(exr_header->num_channels))); + for (size_t c = 0; c < static_cast(exr_header->num_channels); c++) { + exr_header->pixel_types[c] = info.channels[c].pixel_type; + } + + // Initially fill with values of `pixel_types` + exr_header->requested_pixel_types = static_cast( + malloc(sizeof(int) * static_cast(exr_header->num_channels))); + for (size_t c = 0; c < static_cast(exr_header->num_channels); c++) { + exr_header->requested_pixel_types[c] = info.channels[c].pixel_type; + } + + exr_header->num_custom_attributes = static_cast(info.attributes.size()); + + if (exr_header->num_custom_attributes > 0) { + // TODO(syoyo): Report warning when # of attributes exceeds + // `TINYEXR_MAX_CUSTOM_ATTRIBUTES` + if (exr_header->num_custom_attributes > TINYEXR_MAX_CUSTOM_ATTRIBUTES) { + exr_header->num_custom_attributes = TINYEXR_MAX_CUSTOM_ATTRIBUTES; + } + + exr_header->custom_attributes = static_cast(malloc( + sizeof(EXRAttribute) * size_t(exr_header->num_custom_attributes))); + + for (size_t i = 0; i < size_t(exr_header->num_custom_attributes); i++) { + memcpy(exr_header->custom_attributes[i].name, info.attributes[i].name, + 256); + memcpy(exr_header->custom_attributes[i].type, info.attributes[i].type, + 256); + exr_header->custom_attributes[i].size = info.attributes[i].size; + // Just copy pointer + exr_header->custom_attributes[i].value = info.attributes[i].value; + } + + } else { + exr_header->custom_attributes = NULL; + } + + exr_header->header_len = info.header_len; + + return true; +} + +struct OffsetData { + OffsetData() : num_x_levels(0), num_y_levels(0) {} + std::vector > > offsets; + int num_x_levels; + int num_y_levels; +}; + +// -1 = error +static int LevelIndex(int lx, int ly, int tile_level_mode, int num_x_levels) { + switch (tile_level_mode) { + case TINYEXR_TILE_ONE_LEVEL: + return 0; + + case TINYEXR_TILE_MIPMAP_LEVELS: + return lx; + + case TINYEXR_TILE_RIPMAP_LEVELS: + return lx + ly * num_x_levels; + + default: + return -1; + } +// return 0; +} + +static int LevelSize(int toplevel_size, int level, int tile_rounding_mode) { + if (level < 0) { + return -1; + } + + int b = static_cast(1u << static_cast(level)); + int level_size = toplevel_size / b; + + if (tile_rounding_mode == TINYEXR_TILE_ROUND_UP && level_size * b < toplevel_size) + level_size += 1; + + return std::max(level_size, 1); +} + +static int DecodeTiledLevel(EXRImage* exr_image, const EXRHeader* exr_header, + const OffsetData& offset_data, + const std::vector& channel_offset_list, + int pixel_data_size, + const unsigned char* head, const size_t size, + std::string* err) { + int num_channels = exr_header->num_channels; + + int level_index = LevelIndex(exr_image->level_x, exr_image->level_y, exr_header->tile_level_mode, offset_data.num_x_levels); + int num_y_tiles = int(offset_data.offsets[size_t(level_index)].size()); + if (num_y_tiles < 1) { + return TINYEXR_ERROR_INVALID_DATA; + } + int num_x_tiles = int(offset_data.offsets[size_t(level_index)][0].size()); + if (num_x_tiles < 1) { + return TINYEXR_ERROR_INVALID_DATA; + } + int num_tiles = num_x_tiles * num_y_tiles; + + int err_code = TINYEXR_SUCCESS; + + enum { + EF_SUCCESS = 0, + EF_INVALID_DATA = 1, + EF_INSUFFICIENT_DATA = 2, + EF_FAILED_TO_DECODE = 4 + }; +#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0) + std::atomic error_flag(EF_SUCCESS); +#else + unsigned error_flag(EF_SUCCESS); +#endif + + // Although the spec says : "...the data window is subdivided into an array of smaller rectangles...", + // the IlmImf library allows the dimensions of the tile to be larger (or equal) than the dimensions of the data window. +#if 0 + if ((exr_header->tile_size_x > exr_image->width || exr_header->tile_size_y > exr_image->height) && + exr_image->level_x == 0 && exr_image->level_y == 0) { + if (err) { + (*err) += "Failed to decode tile data.\n"; + } + err_code = TINYEXR_ERROR_INVALID_DATA; + } +#endif + exr_image->tiles = static_cast( + calloc(sizeof(EXRTile), static_cast(num_tiles))); + +#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0) + std::vector workers; + std::atomic tile_count(0); + + int num_threads = std::max(1, int(std::thread::hardware_concurrency())); + if (num_threads > int(num_tiles)) { + num_threads = int(num_tiles); + } + + for (int t = 0; t < num_threads; t++) { + workers.emplace_back(std::thread([&]() + { + int tile_idx = 0; + while ((tile_idx = tile_count++) < num_tiles) { + +#else +#if TINYEXR_USE_OPENMP +#pragma omp parallel for +#endif + for (int tile_idx = 0; tile_idx < num_tiles; tile_idx++) { +#endif + // Allocate memory for each tile. + bool alloc_success = false; + exr_image->tiles[tile_idx].images = tinyexr::AllocateImage( + num_channels, exr_header->channels, + exr_header->requested_pixel_types, exr_header->tile_size_x, + exr_header->tile_size_y, &alloc_success); + + if (!alloc_success) { + error_flag |= EF_INVALID_DATA; + continue; + } + + int x_tile = tile_idx % num_x_tiles; + int y_tile = tile_idx / num_x_tiles; + // 16 byte: tile coordinates + // 4 byte : data size + // ~ : data(uncompressed or compressed) + tinyexr::tinyexr_uint64 offset = offset_data.offsets[size_t(level_index)][size_t(y_tile)][size_t(x_tile)]; + if (offset + sizeof(int) * 5 > size) { + // Insufficient data size. + error_flag |= EF_INSUFFICIENT_DATA; + continue; + } + + size_t data_size = + size_t(size - (offset + sizeof(int) * 5)); + const unsigned char* data_ptr = + reinterpret_cast(head + offset); + + int tile_coordinates[4]; + memcpy(tile_coordinates, data_ptr, sizeof(int) * 4); + tinyexr::swap4(&tile_coordinates[0]); + tinyexr::swap4(&tile_coordinates[1]); + tinyexr::swap4(&tile_coordinates[2]); + tinyexr::swap4(&tile_coordinates[3]); + + if (tile_coordinates[2] != exr_image->level_x) { + // Invalid data. + error_flag |= EF_INVALID_DATA; + continue; + } + if (tile_coordinates[3] != exr_image->level_y) { + // Invalid data. + error_flag |= EF_INVALID_DATA; + continue; + } + + int data_len; + memcpy(&data_len, data_ptr + 16, + sizeof(int)); // 16 = sizeof(tile_coordinates) + tinyexr::swap4(&data_len); + + if (data_len < 2 || size_t(data_len) > data_size) { + // Insufficient data size. + error_flag |= EF_INSUFFICIENT_DATA; + continue; + } + + // Move to data addr: 20 = 16 + 4; + data_ptr += 20; + bool ret = tinyexr::DecodeTiledPixelData( + exr_image->tiles[tile_idx].images, + &(exr_image->tiles[tile_idx].width), + &(exr_image->tiles[tile_idx].height), + exr_header->requested_pixel_types, data_ptr, + static_cast(data_len), exr_header->compression_type, + exr_header->line_order, + exr_image->width, exr_image->height, + tile_coordinates[0], tile_coordinates[1], exr_header->tile_size_x, + exr_header->tile_size_y, static_cast(pixel_data_size), + static_cast(exr_header->num_custom_attributes), + exr_header->custom_attributes, + static_cast(exr_header->num_channels), + exr_header->channels, channel_offset_list); + + if (!ret) { + // Failed to decode tile data. + error_flag |= EF_FAILED_TO_DECODE; + } + + exr_image->tiles[tile_idx].offset_x = tile_coordinates[0]; + exr_image->tiles[tile_idx].offset_y = tile_coordinates[1]; + exr_image->tiles[tile_idx].level_x = tile_coordinates[2]; + exr_image->tiles[tile_idx].level_y = tile_coordinates[3]; + +#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0) + } + })); + } // num_thread loop + + for (auto& t : workers) { + t.join(); + } + +#else + } // parallel for +#endif + + // Even in the event of an error, the reserved memory may be freed. + exr_image->num_channels = num_channels; + exr_image->num_tiles = static_cast(num_tiles); + + if (error_flag) err_code = TINYEXR_ERROR_INVALID_DATA; + if (err) { + if (error_flag & EF_INSUFFICIENT_DATA) { + (*err) += "Insufficient data length.\n"; + } + if (error_flag & EF_FAILED_TO_DECODE) { + (*err) += "Failed to decode tile data.\n"; + } + } + return err_code; +} + +static int DecodeChunk(EXRImage *exr_image, const EXRHeader *exr_header, + const OffsetData& offset_data, + const unsigned char *head, const size_t size, + std::string *err) { + int num_channels = exr_header->num_channels; + + int num_scanline_blocks = 1; + if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) { + num_scanline_blocks = 16; + } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { + num_scanline_blocks = 32; + } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { + num_scanline_blocks = 16; + +#if TINYEXR_USE_ZFP + tinyexr::ZFPCompressionParam zfp_compression_param; + if (!FindZFPCompressionParam(&zfp_compression_param, + exr_header->custom_attributes, + int(exr_header->num_custom_attributes), err)) { + return TINYEXR_ERROR_INVALID_HEADER; + } +#endif + } + + if (exr_header->data_window.max_x < exr_header->data_window.min_x || + exr_header->data_window.max_y < exr_header->data_window.min_y) { + if (err) { + (*err) += "Invalid data window.\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } + + tinyexr_int64 data_width = + static_cast(exr_header->data_window.max_x) - static_cast(exr_header->data_window.min_x) + static_cast(1); + tinyexr_int64 data_height = + static_cast(exr_header->data_window.max_y) - static_cast(exr_header->data_window.min_y) + static_cast(1); + + if (data_width <= 0) { + if (err) { + (*err) += "Invalid data window width.\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } + + if (data_height <= 0) { + if (err) { + (*err) += "Invalid data window height.\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } + + // Do not allow too large data_width and data_height. header invalid? + { + if ((data_width > TINYEXR_DIMENSION_THRESHOLD) || (data_height > TINYEXR_DIMENSION_THRESHOLD)) { + if (err) { + std::stringstream ss; + ss << "data_with or data_height too large. data_width: " << data_width + << ", " + << "data_height = " << data_height << std::endl; + (*err) += ss.str(); + } + return TINYEXR_ERROR_INVALID_DATA; + } + if (exr_header->tiled) { + if ((exr_header->tile_size_x > TINYEXR_DIMENSION_THRESHOLD) || (exr_header->tile_size_y > TINYEXR_DIMENSION_THRESHOLD)) { + if (err) { + std::stringstream ss; + ss << "tile with or tile height too large. tile width: " << exr_header->tile_size_x + << ", " + << "tile height = " << exr_header->tile_size_y << std::endl; + (*err) += ss.str(); + } + return TINYEXR_ERROR_INVALID_DATA; + } + } + } + + const std::vector& offsets = offset_data.offsets[0][0]; + size_t num_blocks = offsets.size(); + + std::vector channel_offset_list; + int pixel_data_size = 0; + size_t channel_offset = 0; + if (!tinyexr::ComputeChannelLayout(&channel_offset_list, &pixel_data_size, + &channel_offset, num_channels, + exr_header->channels)) { + if (err) { + (*err) += "Failed to compute channel layout.\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } + +#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0) + std::atomic invalid_data(false); +#else + bool invalid_data(false); +#endif + + if (exr_header->tiled) { + // value check + if (exr_header->tile_size_x < 0) { + if (err) { + std::stringstream ss; + ss << "Invalid tile size x : " << exr_header->tile_size_x << "\n"; + (*err) += ss.str(); + } + return TINYEXR_ERROR_INVALID_HEADER; + } + + if (exr_header->tile_size_y < 0) { + if (err) { + std::stringstream ss; + ss << "Invalid tile size y : " << exr_header->tile_size_y << "\n"; + (*err) += ss.str(); + } + return TINYEXR_ERROR_INVALID_HEADER; + } + if (exr_header->tile_level_mode != TINYEXR_TILE_RIPMAP_LEVELS) { + EXRImage* level_image = NULL; + for (int level = 0; level < offset_data.num_x_levels; ++level) { + if (!level_image) { + level_image = exr_image; + } else { + level_image->next_level = new EXRImage; + InitEXRImage(level_image->next_level); + level_image = level_image->next_level; + } + level_image->width = + LevelSize(exr_header->data_window.max_x - exr_header->data_window.min_x + 1, level, exr_header->tile_rounding_mode); + if (level_image->width < 1) { + return TINYEXR_ERROR_INVALID_DATA; + } + + level_image->height = + LevelSize(exr_header->data_window.max_y - exr_header->data_window.min_y + 1, level, exr_header->tile_rounding_mode); + + if (level_image->height < 1) { + return TINYEXR_ERROR_INVALID_DATA; + } + + level_image->level_x = level; + level_image->level_y = level; + + int ret = DecodeTiledLevel(level_image, exr_header, + offset_data, + channel_offset_list, + pixel_data_size, + head, size, + err); + if (ret != TINYEXR_SUCCESS) return ret; + } + } else { + EXRImage* level_image = NULL; + for (int level_y = 0; level_y < offset_data.num_y_levels; ++level_y) + for (int level_x = 0; level_x < offset_data.num_x_levels; ++level_x) { + if (!level_image) { + level_image = exr_image; + } else { + level_image->next_level = new EXRImage; + InitEXRImage(level_image->next_level); + level_image = level_image->next_level; + } + + level_image->width = + LevelSize(exr_header->data_window.max_x - exr_header->data_window.min_x + 1, level_x, exr_header->tile_rounding_mode); + if (level_image->width < 1) { + return TINYEXR_ERROR_INVALID_DATA; + } + + level_image->height = + LevelSize(exr_header->data_window.max_y - exr_header->data_window.min_y + 1, level_y, exr_header->tile_rounding_mode); + if (level_image->height < 1) { + return TINYEXR_ERROR_INVALID_DATA; + } + + level_image->level_x = level_x; + level_image->level_y = level_y; + + int ret = DecodeTiledLevel(level_image, exr_header, + offset_data, + channel_offset_list, + pixel_data_size, + head, size, + err); + if (ret != TINYEXR_SUCCESS) return ret; + } + } + } else { // scanline format + // Don't allow too large image(256GB * pixel_data_size or more). Workaround + // for #104. + size_t total_data_len = + size_t(data_width) * size_t(data_height) * size_t(num_channels); + const bool total_data_len_overflown = + sizeof(void *) == 8 ? (total_data_len >= 0x4000000000) : false; + if ((total_data_len == 0) || total_data_len_overflown) { + if (err) { + std::stringstream ss; + ss << "Image data size is zero or too large: width = " << data_width + << ", height = " << data_height << ", channels = " << num_channels + << std::endl; + (*err) += ss.str(); + } + return TINYEXR_ERROR_INVALID_DATA; + } + + bool alloc_success = false; + exr_image->images = tinyexr::AllocateImage( + num_channels, exr_header->channels, exr_header->requested_pixel_types, + int(data_width), int(data_height), &alloc_success); + + if (!alloc_success) { + if (err) { + std::stringstream ss; + ss << "Failed to allocate memory for Images. Maybe EXR header is corrupted or Image data size is too large: width = " << data_width + << ", height = " << data_height << ", channels = " << num_channels + << std::endl; + (*err) += ss.str(); + } + return TINYEXR_ERROR_INVALID_DATA; + } + +#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0) + std::vector workers; + std::atomic y_count(0); + + int num_threads = std::max(1, int(std::thread::hardware_concurrency())); + if (num_threads > int(num_blocks)) { + num_threads = int(num_blocks); + } + + for (int t = 0; t < num_threads; t++) { + workers.emplace_back(std::thread([&]() { + int y = 0; + while ((y = y_count++) < int(num_blocks)) { + +#else + +#if TINYEXR_USE_OPENMP +#pragma omp parallel for +#endif + for (int y = 0; y < static_cast(num_blocks); y++) { + +#endif + size_t y_idx = static_cast(y); + + if (offsets[y_idx] + sizeof(int) * 2 > size) { + invalid_data = true; + } else { + // 4 byte: scan line + // 4 byte: data size + // ~ : pixel data(uncompressed or compressed) + size_t data_size = + size_t(size - (offsets[y_idx] + sizeof(int) * 2)); + const unsigned char *data_ptr = + reinterpret_cast(head + offsets[y_idx]); + + int line_no; + memcpy(&line_no, data_ptr, sizeof(int)); + int data_len; + memcpy(&data_len, data_ptr + 4, sizeof(int)); + tinyexr::swap4(&line_no); + tinyexr::swap4(&data_len); + + if (size_t(data_len) > data_size) { + invalid_data = true; + + } else if ((line_no > (2 << 20)) || (line_no < -(2 << 20))) { + // Too large value. Assume this is invalid + // 2**20 = 1048576 = heuristic value. + invalid_data = true; + } else if (data_len == 0) { + // TODO(syoyo): May be ok to raise the threshold for example + // `data_len < 4` + invalid_data = true; + } else { + // line_no may be negative. + int end_line_no = (std::min)(line_no + num_scanline_blocks, + (exr_header->data_window.max_y + 1)); + + int num_lines = end_line_no - line_no; + + if (num_lines <= 0) { + invalid_data = true; + } else { + // Move to data addr: 8 = 4 + 4; + data_ptr += 8; + + // Adjust line_no with data_window.bmin.y + + // overflow check + tinyexr_int64 lno = + static_cast(line_no) - + static_cast(exr_header->data_window.min_y); + if (lno > std::numeric_limits::max()) { + line_no = -1; // invalid + } else if (lno < -std::numeric_limits::max()) { + line_no = -1; // invalid + } else { + line_no -= exr_header->data_window.min_y; + } + + if (line_no < 0) { + invalid_data = true; + } else { + if (!tinyexr::DecodePixelData( + exr_image->images, exr_header->requested_pixel_types, + data_ptr, static_cast(data_len), + exr_header->compression_type, exr_header->line_order, + int(data_width), int(data_height), int(data_width), y, line_no, + num_lines, static_cast(pixel_data_size), + static_cast( + exr_header->num_custom_attributes), + exr_header->custom_attributes, + static_cast(exr_header->num_channels), + exr_header->channels, channel_offset_list)) { + invalid_data = true; + } + } + } + } + } + +#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0) + } + })); + } + + for (auto &t : workers) { + t.join(); + } +#else + } // omp parallel +#endif + } + + if (invalid_data) { + if (err) { + (*err) += "Invalid/Corrupted data found when decoding pixels.\n"; + } + + // free alloced image. + for (size_t c = 0; c < static_cast(num_channels); c++) { + if (exr_image->images[c]) { + free(exr_image->images[c]); + exr_image->images[c] = NULL; + } + } + return TINYEXR_ERROR_INVALID_DATA; + } + + // Overwrite `pixel_type` with `requested_pixel_type`. + { + for (int c = 0; c < exr_header->num_channels; c++) { + exr_header->pixel_types[c] = exr_header->requested_pixel_types[c]; + } + } + + { + exr_image->num_channels = num_channels; + + exr_image->width = int(data_width); + exr_image->height = int(data_height); + } + + return TINYEXR_SUCCESS; +} + +static bool ReconstructLineOffsets( + std::vector *offsets, size_t n, + const unsigned char *head, const unsigned char *marker, const size_t size) { + if (head >= marker) { + return false; + } + if (offsets->size() != n) { + return false; + } + + for (size_t i = 0; i < n; i++) { + size_t offset = static_cast(marker - head); + // Offset should not exceed whole EXR file/data size. + if ((offset + sizeof(tinyexr::tinyexr_uint64)) >= size) { + return false; + } + + int y; + unsigned int data_len; + + memcpy(&y, marker, sizeof(int)); + memcpy(&data_len, marker + 4, sizeof(unsigned int)); + + if (data_len >= size) { + return false; + } + + tinyexr::swap4(&y); + tinyexr::swap4(&data_len); + + (*offsets)[i] = offset; + + marker += data_len + 8; // 8 = 4 bytes(y) + 4 bytes(data_len) + } + + return true; +} + + +static int FloorLog2(unsigned x) { + // + // For x > 0, floorLog2(y) returns floor(log(x)/log(2)). + // + int y = 0; + while (x > 1) { + y += 1; + x >>= 1u; + } + return y; +} + + +static int CeilLog2(unsigned x) { + // + // For x > 0, ceilLog2(y) returns ceil(log(x)/log(2)). + // + int y = 0; + int r = 0; + while (x > 1) { + if (x & 1) + r = 1; + + y += 1; + x >>= 1u; + } + return y + r; +} + +static int RoundLog2(int x, int tile_rounding_mode) { + return (tile_rounding_mode == TINYEXR_TILE_ROUND_DOWN) ? FloorLog2(static_cast(x)) : CeilLog2(static_cast(x)); +} + +static int CalculateNumXLevels(const EXRHeader* exr_header) { + int min_x = exr_header->data_window.min_x; + int max_x = exr_header->data_window.max_x; + int min_y = exr_header->data_window.min_y; + int max_y = exr_header->data_window.max_y; + + int num = 0; + switch (exr_header->tile_level_mode) { + case TINYEXR_TILE_ONE_LEVEL: + + num = 1; + break; + + case TINYEXR_TILE_MIPMAP_LEVELS: + + { + int w = max_x - min_x + 1; + int h = max_y - min_y + 1; + num = RoundLog2(std::max(w, h), exr_header->tile_rounding_mode) + 1; + } + break; + + case TINYEXR_TILE_RIPMAP_LEVELS: + + { + int w = max_x - min_x + 1; + num = RoundLog2(w, exr_header->tile_rounding_mode) + 1; + } + break; + + default: + + return -1; + } + + return num; +} + +static int CalculateNumYLevels(const EXRHeader* exr_header) { + int min_x = exr_header->data_window.min_x; + int max_x = exr_header->data_window.max_x; + int min_y = exr_header->data_window.min_y; + int max_y = exr_header->data_window.max_y; + int num = 0; + + switch (exr_header->tile_level_mode) { + case TINYEXR_TILE_ONE_LEVEL: + + num = 1; + break; + + case TINYEXR_TILE_MIPMAP_LEVELS: + + { + int w = max_x - min_x + 1; + int h = max_y - min_y + 1; + num = RoundLog2(std::max(w, h), exr_header->tile_rounding_mode) + 1; + } + break; + + case TINYEXR_TILE_RIPMAP_LEVELS: + + { + int h = max_y - min_y + 1; + num = RoundLog2(h, exr_header->tile_rounding_mode) + 1; + } + break; + + default: + + return -1; + } + + return num; +} + +static bool CalculateNumTiles(std::vector& numTiles, + int toplevel_size, + int size, + int tile_rounding_mode) { + for (unsigned i = 0; i < numTiles.size(); i++) { + int l = LevelSize(toplevel_size, int(i), tile_rounding_mode); + if (l < 0) { + return false; + } + TINYEXR_CHECK_AND_RETURN_C(l <= std::numeric_limits::max() - size + 1, false); + + numTiles[i] = (l + size - 1) / size; + } + return true; +} + +static bool PrecalculateTileInfo(std::vector& num_x_tiles, + std::vector& num_y_tiles, + const EXRHeader* exr_header) { + int min_x = exr_header->data_window.min_x; + int max_x = exr_header->data_window.max_x; + int min_y = exr_header->data_window.min_y; + int max_y = exr_header->data_window.max_y; + + int num_x_levels = CalculateNumXLevels(exr_header); + + if (num_x_levels < 0) { + return false; + } + + int num_y_levels = CalculateNumYLevels(exr_header); + + if (num_y_levels < 0) { + return false; + } + + num_x_tiles.resize(size_t(num_x_levels)); + num_y_tiles.resize(size_t(num_y_levels)); + + if (!CalculateNumTiles(num_x_tiles, + max_x - min_x + 1, + exr_header->tile_size_x, + exr_header->tile_rounding_mode)) { + return false; + } + + if (!CalculateNumTiles(num_y_tiles, + max_y - min_y + 1, + exr_header->tile_size_y, + exr_header->tile_rounding_mode)) { + return false; + } + + return true; +} + +static void InitSingleResolutionOffsets(OffsetData& offset_data, size_t num_blocks) { + offset_data.offsets.resize(1); + offset_data.offsets[0].resize(1); + offset_data.offsets[0][0].resize(num_blocks); + offset_data.num_x_levels = 1; + offset_data.num_y_levels = 1; +} + +// Return sum of tile blocks. +// 0 = error +static int InitTileOffsets(OffsetData& offset_data, + const EXRHeader* exr_header, + const std::vector& num_x_tiles, + const std::vector& num_y_tiles) { + int num_tile_blocks = 0; + offset_data.num_x_levels = static_cast(num_x_tiles.size()); + offset_data.num_y_levels = static_cast(num_y_tiles.size()); + switch (exr_header->tile_level_mode) { + case TINYEXR_TILE_ONE_LEVEL: + case TINYEXR_TILE_MIPMAP_LEVELS: + TINYEXR_CHECK_AND_RETURN_C(offset_data.num_x_levels == offset_data.num_y_levels, 0); + offset_data.offsets.resize(size_t(offset_data.num_x_levels)); + + for (unsigned int l = 0; l < offset_data.offsets.size(); ++l) { + offset_data.offsets[l].resize(size_t(num_y_tiles[l])); + + for (unsigned int dy = 0; dy < offset_data.offsets[l].size(); ++dy) { + offset_data.offsets[l][dy].resize(size_t(num_x_tiles[l])); + num_tile_blocks += num_x_tiles[l]; + } + } + break; + + case TINYEXR_TILE_RIPMAP_LEVELS: + + offset_data.offsets.resize(static_cast(offset_data.num_x_levels) * static_cast(offset_data.num_y_levels)); + + for (int ly = 0; ly < offset_data.num_y_levels; ++ly) { + for (int lx = 0; lx < offset_data.num_x_levels; ++lx) { + int l = ly * offset_data.num_x_levels + lx; + offset_data.offsets[size_t(l)].resize(size_t(num_y_tiles[size_t(ly)])); + + for (size_t dy = 0; dy < offset_data.offsets[size_t(l)].size(); ++dy) { + offset_data.offsets[size_t(l)][dy].resize(size_t(num_x_tiles[size_t(lx)])); + num_tile_blocks += num_x_tiles[size_t(lx)]; + } + } + } + break; + + default: + return 0; + } + return num_tile_blocks; +} + +static bool IsAnyOffsetsAreInvalid(const OffsetData& offset_data) { + for (unsigned int l = 0; l < offset_data.offsets.size(); ++l) + for (unsigned int dy = 0; dy < offset_data.offsets[l].size(); ++dy) + for (unsigned int dx = 0; dx < offset_data.offsets[l][dy].size(); ++dx) + if (reinterpret_cast(offset_data.offsets[l][dy][dx]) <= 0) + return true; + + return false; +} + +static bool isValidTile(const EXRHeader* exr_header, + const OffsetData& offset_data, + int dx, int dy, int lx, int ly) { + if (lx < 0 || ly < 0 || dx < 0 || dy < 0) return false; + int num_x_levels = offset_data.num_x_levels; + int num_y_levels = offset_data.num_y_levels; + switch (exr_header->tile_level_mode) { + case TINYEXR_TILE_ONE_LEVEL: + + if (lx == 0 && + ly == 0 && + offset_data.offsets.size() > 0 && + offset_data.offsets[0].size() > static_cast(dy) && + offset_data.offsets[0][size_t(dy)].size() > static_cast(dx)) { + return true; + } + + break; + + case TINYEXR_TILE_MIPMAP_LEVELS: + + if (lx < num_x_levels && + ly < num_y_levels && + offset_data.offsets.size() > static_cast(lx) && + offset_data.offsets[size_t(lx)].size() > static_cast(dy) && + offset_data.offsets[size_t(lx)][size_t(dy)].size() > static_cast(dx)) { + return true; + } + + break; + + case TINYEXR_TILE_RIPMAP_LEVELS: + { + size_t idx = static_cast(lx) + static_cast(ly)* static_cast(num_x_levels); + if (lx < num_x_levels && + ly < num_y_levels && + (offset_data.offsets.size() > idx) && + offset_data.offsets[idx].size() > static_cast(dy) && + offset_data.offsets[idx][size_t(dy)].size() > static_cast(dx)) { + return true; + } + } + + break; + + default: + + return false; + } + + return false; +} + +static bool ReconstructTileOffsets(OffsetData& offset_data, + const EXRHeader* exr_header, + const unsigned char* head, const unsigned char* marker, const size_t size, + bool isMultiPartFile, + bool isDeep) { + int numXLevels = offset_data.num_x_levels; + for (unsigned int l = 0; l < offset_data.offsets.size(); ++l) { + for (unsigned int dy = 0; dy < offset_data.offsets[l].size(); ++dy) { + for (unsigned int dx = 0; dx < offset_data.offsets[l][dy].size(); ++dx) { + tinyexr::tinyexr_uint64 tileOffset = tinyexr::tinyexr_uint64(marker - head); + + + if (isMultiPartFile) { + if ((marker + sizeof(int)) >= (head + size)) { + return false; + } + + //int partNumber; + marker += sizeof(int); + } + + if ((marker + 4 * sizeof(int)) >= (head + size)) { + return false; + } + + int tileX; + memcpy(&tileX, marker, sizeof(int)); + tinyexr::swap4(&tileX); + marker += sizeof(int); + + int tileY; + memcpy(&tileY, marker, sizeof(int)); + tinyexr::swap4(&tileY); + marker += sizeof(int); + + int levelX; + memcpy(&levelX, marker, sizeof(int)); + tinyexr::swap4(&levelX); + marker += sizeof(int); + + int levelY; + memcpy(&levelY, marker, sizeof(int)); + tinyexr::swap4(&levelY); + marker += sizeof(int); + + if (isDeep) { + if ((marker + 2 * sizeof(tinyexr::tinyexr_int64)) >= (head + size)) { + return false; + } + tinyexr::tinyexr_int64 packed_offset_table_size; + memcpy(&packed_offset_table_size, marker, sizeof(tinyexr::tinyexr_int64)); + tinyexr::swap8(reinterpret_cast(&packed_offset_table_size)); + marker += sizeof(tinyexr::tinyexr_int64); + + tinyexr::tinyexr_int64 packed_sample_size; + memcpy(&packed_sample_size, marker, sizeof(tinyexr::tinyexr_int64)); + tinyexr::swap8(reinterpret_cast(&packed_sample_size)); + marker += sizeof(tinyexr::tinyexr_int64); + + // next Int64 is unpacked sample size - skip that too + marker += packed_offset_table_size + packed_sample_size + 8; + + if (marker >= (head + size)) { + return false; + } + + } else { + + if ((marker + sizeof(uint32_t)) >= (head + size)) { + return false; + } + + uint32_t dataSize; + memcpy(&dataSize, marker, sizeof(uint32_t)); + tinyexr::swap4(&dataSize); + marker += sizeof(uint32_t); + + marker += dataSize; + + if (marker >= (head + size)) { + return false; + } + } + + if (!isValidTile(exr_header, offset_data, + tileX, tileY, levelX, levelY)) { + return false; + } + + int level_idx = LevelIndex(levelX, levelY, exr_header->tile_level_mode, numXLevels); + if (level_idx < 0) { + return false; + } + + if (size_t(level_idx) >= offset_data.offsets.size()) { + return false; + } + + if (size_t(tileY) >= offset_data.offsets[size_t(level_idx)].size()) { + return false; + } + + if (size_t(tileX) >= offset_data.offsets[size_t(level_idx)][size_t(tileY)].size()) { + return false; + } + + offset_data.offsets[size_t(level_idx)][size_t(tileY)][size_t(tileX)] = tileOffset; + } + } + } + return true; +} + +// marker output is also +static int ReadOffsets(OffsetData& offset_data, + const unsigned char* head, + const unsigned char*& marker, + const size_t size, + const char** err) { + for (unsigned int l = 0; l < offset_data.offsets.size(); ++l) { + for (unsigned int dy = 0; dy < offset_data.offsets[l].size(); ++dy) { + for (unsigned int dx = 0; dx < offset_data.offsets[l][dy].size(); ++dx) { + tinyexr::tinyexr_uint64 offset; + if ((marker + sizeof(tinyexr_uint64)) >= (head + size)) { + tinyexr::SetErrorMessage("Insufficient data size in offset table.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + + memcpy(&offset, marker, sizeof(tinyexr::tinyexr_uint64)); + tinyexr::swap8(&offset); + if (offset >= size) { + tinyexr::SetErrorMessage("Invalid offset value in DecodeEXRImage.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + marker += sizeof(tinyexr::tinyexr_uint64); // = 8 + offset_data.offsets[l][dy][dx] = offset; + } + } + } + return TINYEXR_SUCCESS; +} + +static int DecodeEXRImage(EXRImage *exr_image, const EXRHeader *exr_header, + const unsigned char *head, + const unsigned char *marker, const size_t size, + const char **err) { + if (exr_image == NULL || exr_header == NULL || head == NULL || + marker == NULL || (size <= tinyexr::kEXRVersionSize)) { + tinyexr::SetErrorMessage("Invalid argument for DecodeEXRImage().", err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + int num_scanline_blocks = 1; + if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) { + num_scanline_blocks = 16; + } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { + num_scanline_blocks = 32; + } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { + num_scanline_blocks = 16; + } + + if (exr_header->data_window.max_x < exr_header->data_window.min_x || + exr_header->data_window.max_x - exr_header->data_window.min_x == + std::numeric_limits::max()) { + // Issue 63 + tinyexr::SetErrorMessage("Invalid data width value", err); + return TINYEXR_ERROR_INVALID_DATA; + } + tinyexr_int64 data_width = + static_cast(exr_header->data_window.max_x) - static_cast(exr_header->data_window.min_x) + static_cast(1); + if (data_width <= 0) { + tinyexr::SetErrorMessage("Invalid data window width value", err); + return TINYEXR_ERROR_INVALID_DATA; + } + + if (exr_header->data_window.max_y < exr_header->data_window.min_y || + exr_header->data_window.max_y - exr_header->data_window.min_y == + std::numeric_limits::max()) { + tinyexr::SetErrorMessage("Invalid data height value", err); + return TINYEXR_ERROR_INVALID_DATA; + } + tinyexr_int64 data_height = + static_cast(exr_header->data_window.max_y) - static_cast(exr_header->data_window.min_y) + static_cast(1); + + if (data_height <= 0) { + tinyexr::SetErrorMessage("Invalid data window height value", err); + return TINYEXR_ERROR_INVALID_DATA; + } + + // Do not allow too large data_width and data_height. header invalid? + { + if (data_width > TINYEXR_DIMENSION_THRESHOLD) { + tinyexr::SetErrorMessage("data width too large.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + if (data_height > TINYEXR_DIMENSION_THRESHOLD) { + tinyexr::SetErrorMessage("data height too large.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + } + + if (exr_header->tiled) { + if (exr_header->tile_size_x > TINYEXR_DIMENSION_THRESHOLD) { + tinyexr::SetErrorMessage("tile width too large.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + if (exr_header->tile_size_y > TINYEXR_DIMENSION_THRESHOLD) { + tinyexr::SetErrorMessage("tile height too large.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + } + + // Read offset tables. + OffsetData offset_data; + size_t num_blocks = 0; + // For a multi-resolution image, the size of the offset table will be calculated from the other attributes of the header. + // If chunk_count > 0 then chunk_count must be equal to the calculated tile count. + if (exr_header->tiled) { + { + std::vector num_x_tiles, num_y_tiles; + if (!PrecalculateTileInfo(num_x_tiles, num_y_tiles, exr_header)) { + tinyexr::SetErrorMessage("Failed to precalculate tile info.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + num_blocks = size_t(InitTileOffsets(offset_data, exr_header, num_x_tiles, num_y_tiles)); + if (exr_header->chunk_count > 0) { + if (exr_header->chunk_count != static_cast(num_blocks)) { + tinyexr::SetErrorMessage("Invalid offset table size.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + } + } + + int ret = ReadOffsets(offset_data, head, marker, size, err); + if (ret != TINYEXR_SUCCESS) return ret; + if (IsAnyOffsetsAreInvalid(offset_data)) { + if (!ReconstructTileOffsets(offset_data, exr_header, + head, marker, size, + exr_header->multipart, exr_header->non_image)) { + + tinyexr::SetErrorMessage("Invalid Tile Offsets data.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + } + } else if (exr_header->chunk_count > 0) { + // Use `chunkCount` attribute. + num_blocks = static_cast(exr_header->chunk_count); + InitSingleResolutionOffsets(offset_data, num_blocks); + } else { + num_blocks = static_cast(data_height) / + static_cast(num_scanline_blocks); + if (num_blocks * static_cast(num_scanline_blocks) < + static_cast(data_height)) { + num_blocks++; + } + + InitSingleResolutionOffsets(offset_data, num_blocks); + } + + if (!exr_header->tiled) { + std::vector& offsets = offset_data.offsets[0][0]; + for (size_t y = 0; y < num_blocks; y++) { + tinyexr::tinyexr_uint64 offset; + // Issue #81 + if ((marker + sizeof(tinyexr_uint64)) >= (head + size)) { + tinyexr::SetErrorMessage("Insufficient data size in offset table.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + + memcpy(&offset, marker, sizeof(tinyexr::tinyexr_uint64)); + tinyexr::swap8(&offset); + if (offset >= size) { + tinyexr::SetErrorMessage("Invalid offset value in DecodeEXRImage.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + marker += sizeof(tinyexr::tinyexr_uint64); // = 8 + offsets[y] = offset; + } + + // If line offsets are invalid, we try to reconstruct it. + // See OpenEXR/IlmImf/ImfScanLineInputFile.cpp::readLineOffsets() for details. + for (size_t y = 0; y < num_blocks; y++) { + if (offsets[y] <= 0) { + // TODO(syoyo) Report as warning? + // if (err) { + // stringstream ss; + // ss << "Incomplete lineOffsets." << std::endl; + // (*err) += ss.str(); + //} + bool ret = + ReconstructLineOffsets(&offsets, num_blocks, head, marker, size); + if (ret) { + // OK + break; + } else { + tinyexr::SetErrorMessage( + "Cannot reconstruct lineOffset table in DecodeEXRImage.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + } + } + } + + { + std::string e; + int ret = DecodeChunk(exr_image, exr_header, offset_data, head, size, &e); + + if (ret != TINYEXR_SUCCESS) { + if (!e.empty()) { + tinyexr::SetErrorMessage(e, err); + } + +#if 1 + FreeEXRImage(exr_image); +#else + // release memory(if exists) + if ((exr_header->num_channels > 0) && exr_image && exr_image->images) { + for (size_t c = 0; c < size_t(exr_header->num_channels); c++) { + if (exr_image->images[c]) { + free(exr_image->images[c]); + exr_image->images[c] = NULL; + } + } + free(exr_image->images); + exr_image->images = NULL; + } +#endif + } + + return ret; + } +} + +static void GetLayers(const EXRHeader &exr_header, + std::vector &layer_names) { + // Naive implementation + // Group channels by layers + // go over all channel names, split by periods + // collect unique names + layer_names.clear(); + for (int c = 0; c < exr_header.num_channels; c++) { + std::string full_name(exr_header.channels[c].name); + const size_t pos = full_name.find_last_of('.'); + if (pos != std::string::npos && pos != 0 && pos + 1 < full_name.size()) { + full_name.erase(pos); + if (std::find(layer_names.begin(), layer_names.end(), full_name) == + layer_names.end()) + layer_names.push_back(full_name); + } + } +} + +struct LayerChannel { + explicit LayerChannel(size_t i, std::string n) : index(i), name(n) {} + size_t index; + std::string name; +}; + +static void ChannelsInLayer(const EXRHeader &exr_header, + const std::string &layer_name, + std::vector &channels) { + channels.clear(); + //std::cout << "layer_name = " << layer_name << "\n"; + for (int c = 0; c < exr_header.num_channels; c++) { + //std::cout << "chan[" << c << "] = " << exr_header.channels[c].name << "\n"; + std::string ch_name(exr_header.channels[c].name); + if (layer_name.empty()) { + const size_t pos = ch_name.find_last_of('.'); + if (pos != std::string::npos && pos < ch_name.size()) { + if (pos != 0) continue; + ch_name = ch_name.substr(pos + 1); + } + } else { + const size_t pos = ch_name.find(layer_name + '.'); + if (pos == std::string::npos) continue; + if (pos == 0) { + ch_name = ch_name.substr(layer_name.size() + 1); + } + } + LayerChannel ch(size_t(c), ch_name); + channels.push_back(ch); + } +} + +} // namespace tinyexr + +int EXRLayers(const char *filename, const char **layer_names[], int *num_layers, + const char **err) { + EXRVersion exr_version; + EXRHeader exr_header; + InitEXRHeader(&exr_header); + + { + int ret = ParseEXRVersionFromFile(&exr_version, filename); + if (ret != TINYEXR_SUCCESS) { + tinyexr::SetErrorMessage("Invalid EXR header.", err); + return ret; + } + + if (exr_version.multipart || exr_version.non_image) { + tinyexr::SetErrorMessage( + "Loading multipart or DeepImage is not supported in LoadEXR() API", + err); + return TINYEXR_ERROR_INVALID_DATA; // @fixme. + } + } + + int ret = ParseEXRHeaderFromFile(&exr_header, &exr_version, filename, err); + if (ret != TINYEXR_SUCCESS) { + FreeEXRHeader(&exr_header); + return ret; + } + + std::vector layer_vec; + tinyexr::GetLayers(exr_header, layer_vec); + + (*num_layers) = int(layer_vec.size()); + (*layer_names) = static_cast( + malloc(sizeof(const char *) * static_cast(layer_vec.size()))); + for (size_t c = 0; c < static_cast(layer_vec.size()); c++) { +#ifdef _MSC_VER + (*layer_names)[c] = _strdup(layer_vec[c].c_str()); +#else + (*layer_names)[c] = strdup(layer_vec[c].c_str()); +#endif + } + + FreeEXRHeader(&exr_header); + return TINYEXR_SUCCESS; +} + +int LoadEXR(float **out_rgba, int *width, int *height, const char *filename, + const char **err, int *num_chans) { + return LoadEXRWithLayer(out_rgba, width, height, filename, + /* layername */ NULL, err, num_chans); +} + +int LoadEXRWithLayer(float **out_rgba, int *width, int *height, + const char *filename, const char *layername, + const char **err, int *num_chans) { + if (num_chans) + *num_chans = 0; + + if (out_rgba == NULL) { + tinyexr::SetErrorMessage("Invalid argument for LoadEXR()", err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + EXRVersion exr_version; + EXRImage exr_image; + EXRHeader exr_header; + InitEXRHeader(&exr_header); + InitEXRImage(&exr_image); + + { + int ret = ParseEXRVersionFromFile(&exr_version, filename); + if (ret != TINYEXR_SUCCESS) { + std::stringstream ss; + ss << "Failed to open EXR file or read version info from EXR file. code(" + << ret << ")"; + tinyexr::SetErrorMessage(ss.str(), err); + return ret; + } + + if (exr_version.multipart || exr_version.non_image) { + tinyexr::SetErrorMessage( + "Loading multipart or DeepImage is not supported in LoadEXR() API", + err); + return TINYEXR_ERROR_INVALID_DATA; // @fixme. + } + } + + { + int ret = ParseEXRHeaderFromFile(&exr_header, &exr_version, filename, err); + if (ret != TINYEXR_SUCCESS) { + FreeEXRHeader(&exr_header); + return ret; + } + } + + // Read HALF channel as FLOAT. + for (int i = 0; i < exr_header.num_channels; i++) { + if (exr_header.pixel_types[i] == TINYEXR_PIXELTYPE_HALF) { + exr_header.requested_pixel_types[i] = TINYEXR_PIXELTYPE_FLOAT; + } + } + + // TODO: Probably limit loading to layers (channels) selected by layer index + { + int ret = LoadEXRImageFromFile(&exr_image, &exr_header, filename, err); + if (ret != TINYEXR_SUCCESS) { + FreeEXRHeader(&exr_header); + return ret; + } + } + + // RGBA + int idxR = -1; + int idxG = -1; + int idxB = -1; + int idxA = -1; + + std::vector layer_names; + tinyexr::GetLayers(exr_header, layer_names); + + std::vector channels; + tinyexr::ChannelsInLayer( + exr_header, layername == NULL ? "" : std::string(layername), channels); + + + if (channels.size() < 1) { + if (layername == NULL) { + tinyexr::SetErrorMessage("Layer Not Found. Seems EXR contains channels with layer(e.g. `diffuse.R`). if you are using LoadEXR(), please try LoadEXRWithLayer(). LoadEXR() cannot load EXR having channels with layer.", err); + + } else { + tinyexr::SetErrorMessage("Layer Not Found", err); + } + FreeEXRHeader(&exr_header); + FreeEXRImage(&exr_image); + return TINYEXR_ERROR_LAYER_NOT_FOUND; + } + + size_t ch_count = channels.size() < 4 ? channels.size() : 4; + for (size_t c = 0; c < ch_count; c++) { + const tinyexr::LayerChannel &ch = channels[c]; + + if (ch.name == "R") { + idxR = int(ch.index); + } else if (ch.name == "G") { + idxG = int(ch.index); + } else if (ch.name == "B") { + idxB = int(ch.index); + } else if (ch.name == "A") { + idxA = int(ch.index); + } + } + + if (channels.size() == 1) { + if (num_chans) + *num_chans = 1; + + int chIdx = int(channels.front().index); + // Grayscale channel only. + + (*out_rgba) = reinterpret_cast( + malloc(4 * sizeof(float) * static_cast(exr_image.width) * + static_cast(exr_image.height))); + + if (exr_header.tiled) { + const size_t tile_size_x = static_cast(exr_header.tile_size_x); + const size_t tile_size_y = static_cast(exr_header.tile_size_y); + for (int it = 0; it < exr_image.num_tiles; it++) { + for (size_t j = 0; j < tile_size_y; j++) { + for (size_t i = 0; i < tile_size_x; i++) { + const size_t ii = + static_cast(exr_image.tiles[it].offset_x) * tile_size_x + + i; + const size_t jj = + static_cast(exr_image.tiles[it].offset_y) * tile_size_y + + j; + const size_t idx = ii + jj * static_cast(exr_image.width); + + // out of region check. + if (ii >= static_cast(exr_image.width)) { + continue; + } + if (jj >= static_cast(exr_image.height)) { + continue; + } + const size_t srcIdx = i + j * tile_size_x; + unsigned char **src = exr_image.tiles[it].images; + (*out_rgba)[4 * idx + 0] = + reinterpret_cast(src)[chIdx][srcIdx]; + (*out_rgba)[4 * idx + 1] = + reinterpret_cast(src)[chIdx][srcIdx]; + (*out_rgba)[4 * idx + 2] = + reinterpret_cast(src)[chIdx][srcIdx]; + (*out_rgba)[4 * idx + 3] = + reinterpret_cast(src)[chIdx][srcIdx]; + } + } + } + } else { + const size_t pixel_size = static_cast(exr_image.width) * + static_cast(exr_image.height); + for (size_t i = 0; i < pixel_size; i++) { + const float val = + reinterpret_cast(exr_image.images)[chIdx][i]; + (*out_rgba)[4 * i + 0] = val; + (*out_rgba)[4 * i + 1] = val; + (*out_rgba)[4 * i + 2] = val; + (*out_rgba)[4 * i + 3] = val; + } + } + } else { + // Assume RGB(A) + + if (idxR == -1) { + tinyexr::SetErrorMessage("R channel not found", err); + + FreeEXRHeader(&exr_header); + FreeEXRImage(&exr_image); + return TINYEXR_ERROR_INVALID_DATA; + } + + if (idxG == -1) { + tinyexr::SetErrorMessage("G channel not found", err); + FreeEXRHeader(&exr_header); + FreeEXRImage(&exr_image); + return TINYEXR_ERROR_INVALID_DATA; + } + + if (idxB == -1) { + tinyexr::SetErrorMessage("B channel not found", err); + FreeEXRHeader(&exr_header); + FreeEXRImage(&exr_image); + return TINYEXR_ERROR_INVALID_DATA; + } + + if (num_chans) + *num_chans = (idxA != -1) ? 4 : 3; + + (*out_rgba) = reinterpret_cast( + malloc(4 * sizeof(float) * static_cast(exr_image.width) * + static_cast(exr_image.height))); + if (exr_header.tiled) { + const size_t tile_size_x = static_cast(exr_header.tile_size_x); + const size_t tile_size_y = static_cast(exr_header.tile_size_y); + for (int it = 0; it < exr_image.num_tiles; it++) { + for (size_t j = 0; j < tile_size_y; j++) { + for (size_t i = 0; i < tile_size_x; i++) { + const size_t ii = + static_cast(exr_image.tiles[it].offset_x) * + tile_size_x + + i; + const size_t jj = + static_cast(exr_image.tiles[it].offset_y) * + tile_size_y + + j; + const size_t idx = ii + jj * static_cast(exr_image.width); + + // out of region check. + if (ii >= static_cast(exr_image.width)) { + continue; + } + if (jj >= static_cast(exr_image.height)) { + continue; + } + const size_t srcIdx = i + j * tile_size_x; + unsigned char **src = exr_image.tiles[it].images; + (*out_rgba)[4 * idx + 0] = + reinterpret_cast(src)[idxR][srcIdx]; + (*out_rgba)[4 * idx + 1] = + reinterpret_cast(src)[idxG][srcIdx]; + (*out_rgba)[4 * idx + 2] = + reinterpret_cast(src)[idxB][srcIdx]; + if (idxA != -1) { + (*out_rgba)[4 * idx + 3] = + reinterpret_cast(src)[idxA][srcIdx]; + } else { + (*out_rgba)[4 * idx + 3] = 1.0; + } + } + } + } + } else { + const size_t pixel_size = static_cast(exr_image.width) * + static_cast(exr_image.height); + for (size_t i = 0; i < pixel_size; i++) { + (*out_rgba)[4 * i + 0] = + reinterpret_cast(exr_image.images)[idxR][i]; + (*out_rgba)[4 * i + 1] = + reinterpret_cast(exr_image.images)[idxG][i]; + (*out_rgba)[4 * i + 2] = + reinterpret_cast(exr_image.images)[idxB][i]; + if (idxA != -1) { + (*out_rgba)[4 * i + 3] = + reinterpret_cast(exr_image.images)[idxA][i]; + } else { + (*out_rgba)[4 * i + 3] = 1.0; + } + } + } + } + + (*width) = exr_image.width; + (*height) = exr_image.height; + + FreeEXRHeader(&exr_header); + FreeEXRImage(&exr_image); + + return TINYEXR_SUCCESS; +} + +int IsEXR(const char *filename) { + EXRVersion exr_version; + + int ret = ParseEXRVersionFromFile(&exr_version, filename); + if (ret != TINYEXR_SUCCESS) { + return ret; + } + + return TINYEXR_SUCCESS; +} + +int IsEXRFromMemory(const unsigned char *memory, size_t size) { + EXRVersion exr_version; + + int ret = ParseEXRVersionFromMemory(&exr_version, memory, size); + if (ret != TINYEXR_SUCCESS) { + return ret; + } + + return TINYEXR_SUCCESS; +} + +int ParseEXRHeaderFromMemory(EXRHeader *exr_header, const EXRVersion *version, + const unsigned char *memory, size_t size, + const char **err) { + if (memory == NULL || exr_header == NULL) { + tinyexr::SetErrorMessage( + "Invalid argument. `memory` or `exr_header` argument is null in " + "ParseEXRHeaderFromMemory()", + err); + + // Invalid argument + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + if (size < tinyexr::kEXRVersionSize) { + tinyexr::SetErrorMessage("Insufficient header/data size.\n", err); + return TINYEXR_ERROR_INVALID_DATA; + } + + const unsigned char *marker = memory + tinyexr::kEXRVersionSize; + size_t marker_size = size - tinyexr::kEXRVersionSize; + + tinyexr::HeaderInfo info; + info.clear(); + + int ret; + { + std::string err_str; + ret = ParseEXRHeader(&info, NULL, version, &err_str, marker, marker_size); + + if (ret != TINYEXR_SUCCESS) { + if (err && !err_str.empty()) { + tinyexr::SetErrorMessage(err_str, err); + } + } + } + + { + std::string warn; + std::string err_str; + + if (!ConvertHeader(exr_header, info, &warn, &err_str)) { + // release mem + for (size_t i = 0; i < info.attributes.size(); i++) { + if (info.attributes[i].value) { + free(info.attributes[i].value); + } + } + if (err && !err_str.empty()) { + tinyexr::SetErrorMessage(err_str, err); + } + ret = TINYEXR_ERROR_INVALID_HEADER; + } + } + + exr_header->multipart = version->multipart ? 1 : 0; + exr_header->non_image = version->non_image ? 1 : 0; + + return ret; +} + +int LoadEXRFromMemory(float **out_rgba, int *width, int *height, + const unsigned char *memory, size_t size, + const char **err) { + if (out_rgba == NULL || memory == NULL) { + tinyexr::SetErrorMessage("Invalid argument for LoadEXRFromMemory", err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + EXRVersion exr_version; + EXRImage exr_image; + EXRHeader exr_header; + + InitEXRHeader(&exr_header); + + int ret = ParseEXRVersionFromMemory(&exr_version, memory, size); + if (ret != TINYEXR_SUCCESS) { + std::stringstream ss; + ss << "Failed to parse EXR version. code(" << ret << ")"; + tinyexr::SetErrorMessage(ss.str(), err); + return ret; + } + + ret = ParseEXRHeaderFromMemory(&exr_header, &exr_version, memory, size, err); + if (ret != TINYEXR_SUCCESS) { + return ret; + } + + // Read HALF channel as FLOAT. + for (int i = 0; i < exr_header.num_channels; i++) { + if (exr_header.pixel_types[i] == TINYEXR_PIXELTYPE_HALF) { + exr_header.requested_pixel_types[i] = TINYEXR_PIXELTYPE_FLOAT; + } + } + + InitEXRImage(&exr_image); + ret = LoadEXRImageFromMemory(&exr_image, &exr_header, memory, size, err); + if (ret != TINYEXR_SUCCESS) { + return ret; + } + + // RGBA + int idxR = -1; + int idxG = -1; + int idxB = -1; + int idxA = -1; + for (int c = 0; c < exr_header.num_channels; c++) { + if (strcmp(exr_header.channels[c].name, "R") == 0) { + idxR = c; + } else if (strcmp(exr_header.channels[c].name, "G") == 0) { + idxG = c; + } else if (strcmp(exr_header.channels[c].name, "B") == 0) { + idxB = c; + } else if (strcmp(exr_header.channels[c].name, "A") == 0) { + idxA = c; + } + } + + // TODO(syoyo): Refactor removing same code as used in LoadEXR(). + if (exr_header.num_channels == 1) { + // Grayscale channel only. + + (*out_rgba) = reinterpret_cast( + malloc(4 * sizeof(float) * static_cast(exr_image.width) * + static_cast(exr_image.height))); + + if (exr_header.tiled) { + const size_t tile_size_x = static_cast(exr_header.tile_size_x); + const size_t tile_size_y = static_cast(exr_header.tile_size_y); + for (int it = 0; it < exr_image.num_tiles; it++) { + for (size_t j = 0; j < tile_size_y; j++) { + for (size_t i = 0; i < tile_size_x; i++) { + const size_t ii = + static_cast(exr_image.tiles[it].offset_x) * + tile_size_x + + i; + const size_t jj = + static_cast(exr_image.tiles[it].offset_y) * + tile_size_y + + j; + const size_t idx = ii + jj * static_cast(exr_image.width); + + // out of region check. + if (ii >= static_cast(exr_image.width)) { + continue; + } + if (jj >= static_cast(exr_image.height)) { + continue; + } + const size_t srcIdx = i + j * tile_size_x; + unsigned char **src = exr_image.tiles[it].images; + (*out_rgba)[4 * idx + 0] = + reinterpret_cast(src)[0][srcIdx]; + (*out_rgba)[4 * idx + 1] = + reinterpret_cast(src)[0][srcIdx]; + (*out_rgba)[4 * idx + 2] = + reinterpret_cast(src)[0][srcIdx]; + (*out_rgba)[4 * idx + 3] = + reinterpret_cast(src)[0][srcIdx]; + } + } + } + } else { + const size_t pixel_size = static_cast(exr_image.width) * + static_cast(exr_image.height); + for (size_t i = 0; i < pixel_size; i++) { + const float val = reinterpret_cast(exr_image.images)[0][i]; + (*out_rgba)[4 * i + 0] = val; + (*out_rgba)[4 * i + 1] = val; + (*out_rgba)[4 * i + 2] = val; + (*out_rgba)[4 * i + 3] = val; + } + } + + } else { + // TODO(syoyo): Support non RGBA image. + + if (idxR == -1) { + tinyexr::SetErrorMessage("R channel not found", err); + + // @todo { free exr_image } + return TINYEXR_ERROR_INVALID_DATA; + } + + if (idxG == -1) { + tinyexr::SetErrorMessage("G channel not found", err); + // @todo { free exr_image } + return TINYEXR_ERROR_INVALID_DATA; + } + + if (idxB == -1) { + tinyexr::SetErrorMessage("B channel not found", err); + // @todo { free exr_image } + return TINYEXR_ERROR_INVALID_DATA; + } + + (*out_rgba) = reinterpret_cast( + malloc(4 * sizeof(float) * static_cast(exr_image.width) * + static_cast(exr_image.height))); + + if (exr_header.tiled) { + const size_t tile_size_x = static_cast(exr_header.tile_size_x); + const size_t tile_size_y = static_cast(exr_header.tile_size_y); + for (int it = 0; it < exr_image.num_tiles; it++) { + for (size_t j = 0; j < tile_size_y; j++) + for (size_t i = 0; i < tile_size_x; i++) { + const size_t ii = + static_cast(exr_image.tiles[it].offset_x) * + tile_size_x + + i; + const size_t jj = + static_cast(exr_image.tiles[it].offset_y) * + tile_size_y + + j; + const size_t idx = ii + jj * static_cast(exr_image.width); + + // out of region check. + if (ii >= static_cast(exr_image.width)) { + continue; + } + if (jj >= static_cast(exr_image.height)) { + continue; + } + const size_t srcIdx = i + j * tile_size_x; + unsigned char **src = exr_image.tiles[it].images; + (*out_rgba)[4 * idx + 0] = + reinterpret_cast(src)[idxR][srcIdx]; + (*out_rgba)[4 * idx + 1] = + reinterpret_cast(src)[idxG][srcIdx]; + (*out_rgba)[4 * idx + 2] = + reinterpret_cast(src)[idxB][srcIdx]; + if (idxA != -1) { + (*out_rgba)[4 * idx + 3] = + reinterpret_cast(src)[idxA][srcIdx]; + } else { + (*out_rgba)[4 * idx + 3] = 1.0; + } + } + } + } else { + const size_t pixel_size = static_cast(exr_image.width) * + static_cast(exr_image.height); + for (size_t i = 0; i < pixel_size; i++) { + (*out_rgba)[4 * i + 0] = + reinterpret_cast(exr_image.images)[idxR][i]; + (*out_rgba)[4 * i + 1] = + reinterpret_cast(exr_image.images)[idxG][i]; + (*out_rgba)[4 * i + 2] = + reinterpret_cast(exr_image.images)[idxB][i]; + if (idxA != -1) { + (*out_rgba)[4 * i + 3] = + reinterpret_cast(exr_image.images)[idxA][i]; + } else { + (*out_rgba)[4 * i + 3] = 1.0; + } + } + } + } + + (*width) = exr_image.width; + (*height) = exr_image.height; + + FreeEXRHeader(&exr_header); + FreeEXRImage(&exr_image); + + return TINYEXR_SUCCESS; +} + +// Represents a read-only file mapped to an address space in memory. +// If no memory-mapping API is available, falls back to allocating a buffer +// with a copy of the file's data. +struct MemoryMappedFile { + unsigned char *data; // To the start of the file's data. + size_t size; // The size of the file in bytes. +#ifdef TINYEXR_USE_WIN32_MMAP + HANDLE windows_file; + HANDLE windows_file_mapping; +#elif defined(TINYEXR_USE_POSIX_MMAP) + int posix_descriptor; +#endif + + // MemoryMappedFile's constructor tries to map memory to a file. + // If this succeeds, valid() will return true and all fields + // are usable; otherwise, valid() will return false. + MemoryMappedFile(const char *filename) { + data = NULL; + size = 0; +#ifdef TINYEXR_USE_WIN32_MMAP + windows_file_mapping = NULL; + windows_file = + CreateFileW(tinyexr::UTF8ToWchar(filename).c_str(), // lpFileName + GENERIC_READ, // dwDesiredAccess + FILE_SHARE_READ, // dwShareMode + NULL, // lpSecurityAttributes + OPEN_EXISTING, // dwCreationDisposition + FILE_ATTRIBUTE_READONLY, // dwFlagsAndAttributes + NULL); // hTemplateFile + if (windows_file == INVALID_HANDLE_VALUE) { + return; + } + + windows_file_mapping = CreateFileMapping(windows_file, // hFile + NULL, // lpFileMappingAttributes + PAGE_READONLY, // flProtect + 0, // dwMaximumSizeHigh + 0, // dwMaximumSizeLow + NULL); // lpName + if (windows_file_mapping == NULL) { + return; + } + + data = reinterpret_cast( + MapViewOfFile(windows_file_mapping, // hFileMappingObject + FILE_MAP_READ, // dwDesiredAccess + 0, // dwFileOffsetHigh + 0, // dwFileOffsetLow + 0)); // dwNumberOfBytesToMap + if (!data) { + return; + } + + LARGE_INTEGER windows_file_size = {}; + if (!GetFileSizeEx(windows_file, &windows_file_size) || + static_cast(windows_file_size.QuadPart) > + std::numeric_limits::max()) { + UnmapViewOfFile(data); + data = NULL; + return; + } + size = static_cast(windows_file_size.QuadPart); +#elif defined(TINYEXR_USE_POSIX_MMAP) + posix_descriptor = open(filename, O_RDONLY); + if (posix_descriptor == -1) { + return; + } + + struct stat info; + if (fstat(posix_descriptor, &info) < 0) { + return; + } + // Make sure st_size is in the valid range for a size_t. The second case + // can only fail if a POSIX implementation defines off_t to be a larger + // type than size_t - for instance, compiling with _FILE_OFFSET_BITS=64 + // on a 32-bit system. On current 64-bit systems, this check can never + // fail, so we turn off clang's Wtautological-type-limit-compare warning + // around this code. +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wtautological-type-limit-compare" +#endif + if (info.st_size < 0 || + info.st_size > std::numeric_limits::max()) { + return; + } +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + size = static_cast(info.st_size); + + data = reinterpret_cast( + mmap(0, size, PROT_READ, MAP_SHARED, posix_descriptor, 0)); + if (data == MAP_FAILED) { + data = nullptr; + return; + } +#else + FILE *fp = fopen(filename, "rb"); + if (!fp) { + return; + } + + // Calling fseek(fp, 0, SEEK_END) isn't strictly-conforming C code, but + // since neither the WIN32 nor POSIX APIs are available in this branch, this + // is a reasonable fallback option. + if (fseek(fp, 0, SEEK_END) != 0) { + fclose(fp); + return; + } + const long ftell_result = ftell(fp); + if (ftell_result < 0) { + // Error from ftell + fclose(fp); + return; + } + size = static_cast(ftell_result); + if (fseek(fp, 0, SEEK_SET) != 0) { + fclose(fp); + size = 0; + return; + } + + data = reinterpret_cast(malloc(size)); + if (!data) { + size = 0; + fclose(fp); + return; + } + size_t read_bytes = fread(data, 1, size, fp); + if (read_bytes != size) { + // TODO: Try to read data until reading `size` bytes. + fclose(fp); + size = 0; + data = nullptr; + return; + } + fclose(fp); +#endif + } + + // MemoryMappedFile's destructor closes all its handles. + ~MemoryMappedFile() { +#ifdef TINYEXR_USE_WIN32_MMAP + if (data) { + (void)UnmapViewOfFile(data); + data = NULL; + } + + if (windows_file_mapping != NULL) { + (void)CloseHandle(windows_file_mapping); + } + + if (windows_file != INVALID_HANDLE_VALUE) { + (void)CloseHandle(windows_file); + } +#elif defined(TINYEXR_USE_POSIX_MMAP) + if (data) { + (void)munmap(data, size); + data = NULL; + } + + if (posix_descriptor != -1) { + (void)close(posix_descriptor); + } +#else + if (data) { + (void)free(data); + } + data = NULL; +#endif + } + + // A MemoryMappedFile cannot be copied or moved. + // Only check for this when compiling with C++11 or higher, since deleted + // function definitions were added then. +#if TINYEXR_HAS_CXX11 +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wc++98-compat" +#endif + MemoryMappedFile(const MemoryMappedFile &) = delete; + MemoryMappedFile &operator=(const MemoryMappedFile &) = delete; + MemoryMappedFile(MemoryMappedFile &&other) noexcept = delete; + MemoryMappedFile &operator=(MemoryMappedFile &&other) noexcept = delete; +#ifdef __clang__ +#pragma clang diagnostic pop +#endif +#endif + + // Returns whether this was successfully opened. + bool valid() const { return data; } +}; + +int LoadEXRImageFromFile(EXRImage *exr_image, const EXRHeader *exr_header, + const char *filename, const char **err) { + if (exr_image == NULL) { + tinyexr::SetErrorMessage("Invalid argument for LoadEXRImageFromFile", err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + MemoryMappedFile file(filename); + if (!file.valid()) { + tinyexr::SetErrorMessage("Cannot read file " + std::string(filename), err); + return TINYEXR_ERROR_CANT_OPEN_FILE; + } + + if (file.size < 16) { + tinyexr::SetErrorMessage("File size too short : " + std::string(filename), + err); + return TINYEXR_ERROR_INVALID_FILE; + } + + return LoadEXRImageFromMemory(exr_image, exr_header, file.data, file.size, + err); +} + +int LoadEXRImageFromMemory(EXRImage *exr_image, const EXRHeader *exr_header, + const unsigned char *memory, const size_t size, + const char **err) { + if (exr_image == NULL || memory == NULL || + (size < tinyexr::kEXRVersionSize)) { + tinyexr::SetErrorMessage("Invalid argument for LoadEXRImageFromMemory", + err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + if (exr_header->header_len == 0) { + tinyexr::SetErrorMessage("EXRHeader variable is not initialized.", err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + const unsigned char *head = memory; + const unsigned char *marker = reinterpret_cast( + memory + exr_header->header_len + + 8); // +8 for magic number + version header. + return tinyexr::DecodeEXRImage(exr_image, exr_header, head, marker, size, + err); +} + +namespace tinyexr +{ + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wsign-conversion" +#endif + +// out_data must be allocated initially with the block-header size +// of the current image(-part) type +static bool EncodePixelData(/* out */ std::vector& out_data, + const unsigned char* const* images, + int compression_type, + int /*line_order*/, + int width, // for tiled : tile.width + int /*height*/, // for tiled : header.tile_size_y + int x_stride, // for tiled : header.tile_size_x + int line_no, // for tiled : 0 + int num_lines, // for tiled : tile.height + size_t pixel_data_size, + const std::vector& channels, + const std::vector& channel_offset_list, + std::string *err, + const void* compression_param = 0) // zfp compression param +{ + size_t buf_size = static_cast(width) * + static_cast(num_lines) * + static_cast(pixel_data_size); + //int last2bit = (buf_size & 3); + // buf_size must be multiple of four + //if(last2bit) buf_size += 4 - last2bit; + std::vector buf(buf_size); + + size_t start_y = static_cast(line_no); + for (size_t c = 0; c < channels.size(); c++) { + if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { + if (channels[c].requested_pixel_type == TINYEXR_PIXELTYPE_FLOAT) { + for (int y = 0; y < num_lines; y++) { + // Assume increasing Y + float *line_ptr = reinterpret_cast(&buf.at( + static_cast(pixel_data_size * size_t(y) * size_t(width)) + + channel_offset_list[c] * + static_cast(width))); + for (int x = 0; x < width; x++) { + tinyexr::FP16 h16; + h16.u = reinterpret_cast( + images)[c][(y + start_y) * size_t(x_stride) + size_t(x)]; + + tinyexr::FP32 f32 = half_to_float(h16); + + tinyexr::swap4(&f32.f); + + // line_ptr[x] = f32.f; + tinyexr::cpy4(line_ptr + x, &(f32.f)); + } + } + } else if (channels[c].requested_pixel_type == TINYEXR_PIXELTYPE_HALF) { + for (int y = 0; y < num_lines; y++) { + // Assume increasing Y + unsigned short *line_ptr = reinterpret_cast( + &buf.at(static_cast(pixel_data_size * y * + width) + + channel_offset_list[c] * + static_cast(width))); + for (int x = 0; x < width; x++) { + unsigned short val = reinterpret_cast( + images)[c][(y + start_y) * x_stride + x]; + + tinyexr::swap2(&val); + + // line_ptr[x] = val; + tinyexr::cpy2(line_ptr + x, &val); + } + } + } else { + if (err) { + (*err) += "Invalid requested_pixel_type.\n"; + } + return false; + } + + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { + if (channels[c].requested_pixel_type == TINYEXR_PIXELTYPE_HALF) { + for (int y = 0; y < num_lines; y++) { + // Assume increasing Y + unsigned short *line_ptr = reinterpret_cast( + &buf.at(static_cast(pixel_data_size * y * + width) + + channel_offset_list[c] * + static_cast(width))); + for (int x = 0; x < width; x++) { + tinyexr::FP32 f32; + f32.f = reinterpret_cast( + images)[c][(y + start_y) * x_stride + x]; + + tinyexr::FP16 h16; + h16 = float_to_half_full(f32); + + tinyexr::swap2(reinterpret_cast(&h16.u)); + + // line_ptr[x] = h16.u; + tinyexr::cpy2(line_ptr + x, &(h16.u)); + } + } + } else if (channels[c].requested_pixel_type == TINYEXR_PIXELTYPE_FLOAT) { + for (int y = 0; y < num_lines; y++) { + // Assume increasing Y + float *line_ptr = reinterpret_cast(&buf.at( + static_cast(pixel_data_size * y * width) + + channel_offset_list[c] * + static_cast(width))); + for (int x = 0; x < width; x++) { + float val = reinterpret_cast( + images)[c][(y + start_y) * x_stride + x]; + + tinyexr::swap4(&val); + + // line_ptr[x] = val; + tinyexr::cpy4(line_ptr + x, &val); + } + } + } else { + if (err) { + (*err) += "Invalid requested_pixel_type.\n"; + } + return false; + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { + for (int y = 0; y < num_lines; y++) { + // Assume increasing Y + unsigned int *line_ptr = reinterpret_cast(&buf.at( + static_cast(pixel_data_size * y * width) + + channel_offset_list[c] * static_cast(width))); + for (int x = 0; x < width; x++) { + unsigned int val = reinterpret_cast( + images)[c][(y + start_y) * x_stride + x]; + + tinyexr::swap4(&val); + + // line_ptr[x] = val; + tinyexr::cpy4(line_ptr + x, &val); + } + } + } + } + + if (compression_type == TINYEXR_COMPRESSIONTYPE_NONE) { + // 4 byte: scan line + // 4 byte: data size + // ~ : pixel data(uncompressed) + out_data.insert(out_data.end(), buf.begin(), buf.end()); + + } else if ((compression_type == TINYEXR_COMPRESSIONTYPE_ZIPS) || + (compression_type == TINYEXR_COMPRESSIONTYPE_ZIP)) { +#if defined(TINYEXR_USE_MINIZ) && (TINYEXR_USE_MINIZ==1) + std::vector block(buminiz::mz_compressBound( + static_cast(buf.size()))); +#elif TINYEXR_USE_STB_ZLIB + // there is no compressBound() function, so we use a value that + // is grossly overestimated, but should always work + std::vector block(256 + 2 * buf.size()); +#elif defined(TINYEXR_USE_NANOZLIB) && (TINYEXR_USE_NANOZLIB == 1) + std::vector block(nanoz_compressBound( + static_cast(buf.size()))); +#else + std::vector block( + compressBound(static_cast(buf.size()))); +#endif + tinyexr::tinyexr_uint64 outSize = block.size(); + + if (!tinyexr::CompressZip(&block.at(0), outSize, + reinterpret_cast(&buf.at(0)), + static_cast(buf.size()))) { + if (err) { + (*err) += "Zip compresssion failed.\n"; + } + return false; + } + + // 4 byte: scan line + // 4 byte: data size + // ~ : pixel data(compressed) + unsigned int data_len = static_cast(outSize); // truncate + + out_data.insert(out_data.end(), block.begin(), block.begin() + data_len); + + } else if (compression_type == TINYEXR_COMPRESSIONTYPE_RLE) { + // (buf.size() * 3) / 2 would be enough. + std::vector block((buf.size() * 3) / 2); + + tinyexr::tinyexr_uint64 outSize = block.size(); + + if (!tinyexr::CompressRle(&block.at(0), outSize, + reinterpret_cast(&buf.at(0)), + static_cast(buf.size()))) { + if (err) { + (*err) += "RLE compresssion failed.\n"; + } + return false; + } + + // 4 byte: scan line + // 4 byte: data size + // ~ : pixel data(compressed) + unsigned int data_len = static_cast(outSize); // truncate + out_data.insert(out_data.end(), block.begin(), block.begin() + data_len); + + } else if (compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { +#if TINYEXR_USE_PIZ + unsigned int bufLen = + 8192 + static_cast( + 2 * static_cast( + buf.size())); // @fixme { compute good bound. } + std::vector block(bufLen); + unsigned int outSize = static_cast(block.size()); + + if (!CompressPiz(&block.at(0), &outSize, + reinterpret_cast(&buf.at(0)), + buf.size(), channels, width, num_lines)) { + if (err) { + (*err) += "PIZ compresssion failed.\n"; + } + return false; + } + + // 4 byte: scan line + // 4 byte: data size + // ~ : pixel data(compressed) + unsigned int data_len = outSize; + out_data.insert(out_data.end(), block.begin(), block.begin() + data_len); + +#else + if (err) { + (*err) += "PIZ compression is disabled in this build.\n"; + } + return false; +#endif + } else if (compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { +#if TINYEXR_USE_ZFP + const ZFPCompressionParam* zfp_compression_param = reinterpret_cast(compression_param); + std::vector block; + unsigned int outSize; + + tinyexr::CompressZfp( + &block, &outSize, reinterpret_cast(&buf.at(0)), + width, num_lines, static_cast(channels.size()), *zfp_compression_param); + + // 4 byte: scan line + // 4 byte: data size + // ~ : pixel data(compressed) + unsigned int data_len = outSize; + out_data.insert(out_data.end(), block.begin(), block.begin() + data_len); + +#else + if (err) { + (*err) += "ZFP compression is disabled in this build.\n"; + } + (void)compression_param; + return false; +#endif + } else { + return false; + } + + return true; +} + +static int EncodeTiledLevel(const EXRImage* level_image, const EXRHeader* exr_header, + const std::vector& channels, + std::vector >& data_list, + size_t start_index, // for data_list + int num_x_tiles, int num_y_tiles, + const std::vector& channel_offset_list, + int pixel_data_size, + const void* compression_param, // must be set if zfp compression is enabled + std::string* err) { + int num_tiles = num_x_tiles * num_y_tiles; + if (num_tiles != level_image->num_tiles) { + if (err) { + (*err) += "Invalid number of tiles in argument.\n"; + } + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + if ((exr_header->tile_size_x > level_image->width || exr_header->tile_size_y > level_image->height) && + level_image->level_x == 0 && level_image->level_y == 0) { + if (err) { + (*err) += "Failed to encode tile data.\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } + + +#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0) + std::atomic invalid_data(false); +#else + bool invalid_data(false); +#endif + +#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0) + std::vector workers; + std::atomic tile_count(0); + + int num_threads = std::max(1, int(std::thread::hardware_concurrency())); + if (num_threads > int(num_tiles)) { + num_threads = int(num_tiles); + } + + for (int t = 0; t < num_threads; t++) { + workers.emplace_back(std::thread([&]() { + int i = 0; + while ((i = tile_count++) < num_tiles) { + +#else + // Use signed int since some OpenMP compiler doesn't allow unsigned type for + // `parallel for` +#if TINYEXR_USE_OPENMP +#pragma omp parallel for +#endif + for (int i = 0; i < num_tiles; i++) { + +#endif + size_t tile_idx = static_cast(i); + size_t data_idx = tile_idx + start_index; + + int x_tile = i % num_x_tiles; + int y_tile = i / num_x_tiles; + + EXRTile& tile = level_image->tiles[tile_idx]; + + const unsigned char* const* images = + static_cast(tile.images); + + data_list[data_idx].resize(5*sizeof(int)); + size_t data_header_size = data_list[data_idx].size(); + bool ret = EncodePixelData(data_list[data_idx], + images, + exr_header->compression_type, + 0, // increasing y + tile.width, + exr_header->tile_size_y, + exr_header->tile_size_x, + 0, + tile.height, + pixel_data_size, + channels, + channel_offset_list, + err, compression_param); + if (!ret) { + invalid_data = true; + continue; + } + if (data_list[data_idx].size() <= data_header_size) { + invalid_data = true; + continue; + } + + int data_len = static_cast(data_list[data_idx].size() - data_header_size); + //tileX, tileY, levelX, levelY // pixel_data_size(int) + memcpy(&data_list[data_idx][0], &x_tile, sizeof(int)); + memcpy(&data_list[data_idx][4], &y_tile, sizeof(int)); + memcpy(&data_list[data_idx][8], &level_image->level_x, sizeof(int)); + memcpy(&data_list[data_idx][12], &level_image->level_y, sizeof(int)); + memcpy(&data_list[data_idx][16], &data_len, sizeof(int)); + + swap4(reinterpret_cast(&data_list[data_idx][0])); + swap4(reinterpret_cast(&data_list[data_idx][4])); + swap4(reinterpret_cast(&data_list[data_idx][8])); + swap4(reinterpret_cast(&data_list[data_idx][12])); + swap4(reinterpret_cast(&data_list[data_idx][16])); + +#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0) + } +})); + } + + for (auto &t : workers) { + t.join(); + } +#else + } // omp parallel +#endif + + if (invalid_data) { + if (err) { + (*err) += "Failed to encode tile data.\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } + return TINYEXR_SUCCESS; +} + +static int NumScanlines(int compression_type) { + int num_scanlines = 1; + if (compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) { + num_scanlines = 16; + } else if (compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { + num_scanlines = 32; + } else if (compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { + num_scanlines = 16; + } + return num_scanlines; +} + +static int EncodeChunk(const EXRImage* exr_image, const EXRHeader* exr_header, + const std::vector& channels, + int num_blocks, + tinyexr_uint64 chunk_offset, // starting offset of current chunk + bool is_multipart, + OffsetData& offset_data, // output block offsets, must be initialized + std::vector >& data_list, // output + tinyexr_uint64& total_size, // output: ending offset of current chunk + std::string* err) { + int num_scanlines = NumScanlines(exr_header->compression_type); + + data_list.resize(num_blocks); + + std::vector channel_offset_list( + static_cast(exr_header->num_channels)); + + int pixel_data_size = 0; + { + size_t channel_offset = 0; + for (size_t c = 0; c < static_cast(exr_header->num_channels); c++) { + channel_offset_list[c] = channel_offset; + if (channels[c].requested_pixel_type == TINYEXR_PIXELTYPE_HALF) { + pixel_data_size += sizeof(unsigned short); + channel_offset += sizeof(unsigned short); + } else if (channels[c].requested_pixel_type == + TINYEXR_PIXELTYPE_FLOAT) { + pixel_data_size += sizeof(float); + channel_offset += sizeof(float); + } else if (channels[c].requested_pixel_type == TINYEXR_PIXELTYPE_UINT) { + pixel_data_size += sizeof(unsigned int); + channel_offset += sizeof(unsigned int); + } else { + if (err) { + (*err) += "Invalid requested_pixel_type.\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } + } + } + + const void* compression_param = 0; +#if TINYEXR_USE_ZFP + tinyexr::ZFPCompressionParam zfp_compression_param; + + // Use ZFP compression parameter from custom attributes(if such a parameter + // exists) + { + std::string e; + bool ret = tinyexr::FindZFPCompressionParam( + &zfp_compression_param, exr_header->custom_attributes, + exr_header->num_custom_attributes, &e); + + if (!ret) { + // Use predefined compression parameter. + zfp_compression_param.type = 0; + zfp_compression_param.rate = 2; + } + compression_param = &zfp_compression_param; + } +#endif + + tinyexr_uint64 offset = chunk_offset; + tinyexr_uint64 doffset = is_multipart ? 4u : 0u; + + if (exr_image->tiles) { + const EXRImage* level_image = exr_image; + size_t block_idx = 0; + //tinyexr::tinyexr_uint64 block_data_size = 0; + int num_levels = (exr_header->tile_level_mode != TINYEXR_TILE_RIPMAP_LEVELS) ? + offset_data.num_x_levels : (offset_data.num_x_levels * offset_data.num_y_levels); + for (int level_index = 0; level_index < num_levels; ++level_index) { + if (!level_image) { + if (err) { + (*err) += "Invalid number of tiled levels for EncodeChunk\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } + + int level_index_from_image = LevelIndex(level_image->level_x, level_image->level_y, + exr_header->tile_level_mode, offset_data.num_x_levels); + if (level_index_from_image < 0) { + if (err) { + (*err) += "Invalid tile level mode\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } + + if (level_index_from_image != level_index) { + if (err) { + (*err) += "Incorrect level ordering in tiled image\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } + int num_y_tiles = int(offset_data.offsets[level_index].size()); + if (num_y_tiles <= 0) { + if (err) { + (*err) += "Invalid Y tile size\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } + + int num_x_tiles = int(offset_data.offsets[level_index][0].size()); + if (num_x_tiles <= 0) { + if (err) { + (*err) += "Invalid X tile size\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } + + std::string e; + int ret = EncodeTiledLevel(level_image, + exr_header, + channels, + data_list, + block_idx, + num_x_tiles, + num_y_tiles, + channel_offset_list, + pixel_data_size, + compression_param, + &e); + if (ret != TINYEXR_SUCCESS) { + if (!e.empty() && err) { + (*err) += e; + } + return ret; + } + + for (size_t j = 0; j < static_cast(num_y_tiles); ++j) + for (size_t i = 0; i < static_cast(num_x_tiles); ++i) { + offset_data.offsets[level_index][j][i] = offset; + swap8(reinterpret_cast(&offset_data.offsets[level_index][j][i])); + offset += data_list[block_idx].size() + doffset; + //block_data_size += data_list[block_idx].size(); + ++block_idx; + } + level_image = level_image->next_level; + } + TINYEXR_CHECK_AND_RETURN_C(static_cast(block_idx) == num_blocks, TINYEXR_ERROR_INVALID_DATA); + total_size = offset; + } else { // scanlines + std::vector& offsets = offset_data.offsets[0][0]; + +#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0) + std::atomic invalid_data(false); + std::vector workers; + std::atomic block_count(0); + + int num_threads = std::min(std::max(1, int(std::thread::hardware_concurrency())), num_blocks); + + for (int t = 0; t < num_threads; t++) { + workers.emplace_back(std::thread([&]() { + int i = 0; + while ((i = block_count++) < num_blocks) { + +#else + bool invalid_data(false); +#if TINYEXR_USE_OPENMP +#pragma omp parallel for +#endif + for (int i = 0; i < num_blocks; i++) { + +#endif + int start_y = num_scanlines * i; + int end_Y = (std::min)(num_scanlines * (i + 1), exr_image->height); + int num_lines = end_Y - start_y; + + const unsigned char* const* images = + static_cast(exr_image->images); + + data_list[i].resize(2*sizeof(int)); + size_t data_header_size = data_list[i].size(); + + bool ret = EncodePixelData(data_list[i], + images, + exr_header->compression_type, + 0, // increasing y + exr_image->width, + exr_image->height, + exr_image->width, + start_y, + num_lines, + pixel_data_size, + channels, + channel_offset_list, + err, + compression_param); + if (!ret) { + invalid_data = true; + continue; // "break" cannot be used with OpenMP + } + if (data_list[i].size() <= data_header_size) { + invalid_data = true; + continue; // "break" cannot be used with OpenMP + } + int data_len = static_cast(data_list[i].size() - data_header_size); + memcpy(&data_list[i][0], &start_y, sizeof(int)); + memcpy(&data_list[i][4], &data_len, sizeof(int)); + + swap4(reinterpret_cast(&data_list[i][0])); + swap4(reinterpret_cast(&data_list[i][4])); +#if TINYEXR_HAS_CXX11 && (TINYEXR_USE_THREAD > 0) + } + })); + } + + for (auto &t : workers) { + t.join(); + } +#else + } // omp parallel +#endif + + if (invalid_data) { + if (err) { + (*err) += "Failed to encode scanline data.\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } + + for (size_t i = 0; i < static_cast(num_blocks); i++) { + offsets[i] = offset; + tinyexr::swap8(reinterpret_cast(&offsets[i])); + offset += data_list[i].size() + doffset; + } + + total_size = static_cast(offset); + } + return TINYEXR_SUCCESS; +} + +// can save a single or multi-part image (no deep* formats) +static size_t SaveEXRNPartImageToMemory(const EXRImage* exr_images, + const EXRHeader** exr_headers, + unsigned int num_parts, + unsigned char** memory_out, const char** err) { + if (exr_images == NULL || exr_headers == NULL || num_parts == 0 || + memory_out == NULL) { + SetErrorMessage("Invalid argument for SaveEXRNPartImageToMemory", + err); + return 0; + } + { + for (unsigned int i = 0; i < num_parts; ++i) { + if (exr_headers[i]->compression_type < 0) { + SetErrorMessage("Invalid argument for SaveEXRNPartImageToMemory", + err); + return 0; + } +#if !TINYEXR_USE_PIZ + if (exr_headers[i]->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { + SetErrorMessage("PIZ compression is not supported in this build", + err); + return 0; + } +#endif + if (exr_headers[i]->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { +#if !TINYEXR_USE_ZFP + SetErrorMessage("ZFP compression is not supported in this build", + err); + return 0; +#else + // All channels must be fp32. + // No fp16 support in ZFP atm(as of 2023 June) + // https://github.com/LLNL/fpzip/issues/2 + for (int c = 0; c < exr_headers[i]->num_channels; ++c) { + if (exr_headers[i]->requested_pixel_types[c] != TINYEXR_PIXELTYPE_FLOAT) { + SetErrorMessage("Pixel type must be FLOAT for ZFP compression", + err); + return 0; + } + } +#endif + } + } + } + + std::vector memory; + + // Header + { + const char header[] = { 0x76, 0x2f, 0x31, 0x01 }; + memory.insert(memory.end(), header, header + 4); + } + + // Version + // using value from the first header + int long_name = exr_headers[0]->long_name; + { + char marker[] = { 2, 0, 0, 0 }; + /* @todo + if (exr_header->non_image) { + marker[1] |= 0x8; + } + */ + // tiled + if (num_parts == 1 && exr_images[0].tiles) { + marker[1] |= 0x2; + } + // long_name + if (long_name) { + marker[1] |= 0x4; + } + // multipart + if (num_parts > 1) { + marker[1] |= 0x10; + } + memory.insert(memory.end(), marker, marker + 4); + } + + int total_chunk_count = 0; + std::vector chunk_count(num_parts); + std::vector offset_data(num_parts); + for (unsigned int i = 0; i < num_parts; ++i) { + if (!exr_images[i].tiles) { + int num_scanlines = NumScanlines(exr_headers[i]->compression_type); + chunk_count[i] = + (exr_images[i].height + num_scanlines - 1) / num_scanlines; + InitSingleResolutionOffsets(offset_data[i], chunk_count[i]); + total_chunk_count += chunk_count[i]; + } else { + { + std::vector num_x_tiles, num_y_tiles; + if (!PrecalculateTileInfo(num_x_tiles, num_y_tiles, exr_headers[i])) { + SetErrorMessage("Failed to precalculate Tile info", + err); + return (size_t)TINYEXR_ERROR_INVALID_DATA; + } + int ntiles = InitTileOffsets(offset_data[i], exr_headers[i], num_x_tiles, num_y_tiles); + if (ntiles > 0) { + chunk_count[i] = ntiles; + } else { + SetErrorMessage("Failed to compute Tile offsets", + err); + return (size_t)TINYEXR_ERROR_INVALID_DATA; + + } + total_chunk_count += chunk_count[i]; + } + } + } + // Write attributes to memory buffer. + std::vector< std::vector > channels(num_parts); + { + std::set partnames; + for (unsigned int i = 0; i < num_parts; ++i) { + //channels + { + std::vector data; + + for (int c = 0; c < exr_headers[i]->num_channels; c++) { + tinyexr::ChannelInfo info; + info.p_linear = 0; + info.pixel_type = exr_headers[i]->pixel_types[c]; + info.requested_pixel_type = exr_headers[i]->requested_pixel_types[c]; + info.x_sampling = 1; + info.y_sampling = 1; + info.name = std::string(exr_headers[i]->channels[c].name); + channels[i].push_back(info); + } + + tinyexr::WriteChannelInfo(data, channels[i]); + + tinyexr::WriteAttributeToMemory(&memory, "channels", "chlist", &data.at(0), + static_cast(data.size())); + } + + { + int comp = exr_headers[i]->compression_type; + swap4(&comp); + WriteAttributeToMemory( + &memory, "compression", "compression", + reinterpret_cast(&comp), 1); + } + + { + int data[4] = { 0, 0, exr_images[i].width - 1, exr_images[i].height - 1 }; + swap4(&data[0]); + swap4(&data[1]); + swap4(&data[2]); + swap4(&data[3]); + WriteAttributeToMemory( + &memory, "dataWindow", "box2i", + reinterpret_cast(data), sizeof(int) * 4); + + int data0[4] = { 0, 0, exr_images[0].width - 1, exr_images[0].height - 1 }; + swap4(&data0[0]); + swap4(&data0[1]); + swap4(&data0[2]); + swap4(&data0[3]); + // Note: must be the same across parts (currently, using value from the first header) + WriteAttributeToMemory( + &memory, "displayWindow", "box2i", + reinterpret_cast(data0), sizeof(int) * 4); + } + + { + unsigned char line_order = 0; // @fixme { read line_order from EXRHeader } + WriteAttributeToMemory(&memory, "lineOrder", "lineOrder", + &line_order, 1); + } + + { + // Note: must be the same across parts + float aspectRatio = 1.0f; + swap4(&aspectRatio); + WriteAttributeToMemory( + &memory, "pixelAspectRatio", "float", + reinterpret_cast(&aspectRatio), sizeof(float)); + } + + { + float center[2] = { 0.0f, 0.0f }; + swap4(¢er[0]); + swap4(¢er[1]); + WriteAttributeToMemory( + &memory, "screenWindowCenter", "v2f", + reinterpret_cast(center), 2 * sizeof(float)); + } + + { + float w = 1.0f; + swap4(&w); + WriteAttributeToMemory(&memory, "screenWindowWidth", "float", + reinterpret_cast(&w), + sizeof(float)); + } + + if (exr_images[i].tiles) { + unsigned char tile_mode = static_cast(exr_headers[i]->tile_level_mode & 0x3); + if (exr_headers[i]->tile_rounding_mode) tile_mode |= (1u << 4u); + //unsigned char data[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + unsigned int datai[3] = { 0, 0, 0 }; + unsigned char* data = reinterpret_cast(&datai[0]); + datai[0] = static_cast(exr_headers[i]->tile_size_x); + datai[1] = static_cast(exr_headers[i]->tile_size_y); + data[8] = tile_mode; + swap4(reinterpret_cast(&data[0])); + swap4(reinterpret_cast(&data[4])); + WriteAttributeToMemory( + &memory, "tiles", "tiledesc", + reinterpret_cast(data), 9); + } + + // must be present for multi-part files - according to spec. + if (num_parts > 1) { + // name + { + size_t len = 0; + if ((len = strlen(exr_headers[i]->name)) > 0) { +#if TINYEXR_HAS_CXX11 + partnames.emplace(exr_headers[i]->name); +#else + partnames.insert(std::string(exr_headers[i]->name)); +#endif + if (partnames.size() != i + 1) { + SetErrorMessage("'name' attributes must be unique for a multi-part file", err); + return 0; + } + WriteAttributeToMemory( + &memory, "name", "string", + reinterpret_cast(exr_headers[i]->name), + static_cast(len)); + } else { + SetErrorMessage("Invalid 'name' attribute for a multi-part file", err); + return 0; + } + } + // type + { + const char* type = "scanlineimage"; + if (exr_images[i].tiles) type = "tiledimage"; + WriteAttributeToMemory( + &memory, "type", "string", + reinterpret_cast(type), + static_cast(strlen(type))); + } + // chunkCount + { + WriteAttributeToMemory( + &memory, "chunkCount", "int", + reinterpret_cast(&chunk_count[i]), + 4); + } + } + + // Custom attributes + if (exr_headers[i]->num_custom_attributes > 0) { + for (int j = 0; j < exr_headers[i]->num_custom_attributes; j++) { + tinyexr::WriteAttributeToMemory( + &memory, exr_headers[i]->custom_attributes[j].name, + exr_headers[i]->custom_attributes[j].type, + reinterpret_cast( + exr_headers[i]->custom_attributes[j].value), + exr_headers[i]->custom_attributes[j].size); + } + } + + { // end of header + memory.push_back(0); + } + } + } + if (num_parts > 1) { + // end of header list + memory.push_back(0); + } + + tinyexr_uint64 chunk_offset = memory.size() + size_t(total_chunk_count) * sizeof(tinyexr_uint64); + + tinyexr_uint64 total_size = 0; + std::vector< std::vector< std::vector > > data_lists(num_parts); + for (unsigned int i = 0; i < num_parts; ++i) { + std::string e; + int ret = EncodeChunk(&exr_images[i], exr_headers[i], + channels[i], + chunk_count[i], + // starting offset of current chunk after part-number + chunk_offset, + num_parts > 1, + offset_data[i], // output: block offsets, must be initialized + data_lists[i], // output + total_size, // output + &e); + if (ret != TINYEXR_SUCCESS) { + if (!e.empty()) { + tinyexr::SetErrorMessage(e, err); + } + return 0; + } + chunk_offset = total_size; + } + + // Allocating required memory + if (total_size == 0) { // something went wrong + tinyexr::SetErrorMessage("Output memory size is zero", err); + return (size_t)TINYEXR_ERROR_INVALID_DATA; + } + (*memory_out) = static_cast(malloc(size_t(total_size))); + + // Writing header + memcpy((*memory_out), &memory[0], memory.size()); + unsigned char* memory_ptr = *memory_out + memory.size(); + size_t sum = memory.size(); + + // Writing offset data for chunks + for (unsigned int i = 0; i < num_parts; ++i) { + if (exr_images[i].tiles) { + const EXRImage* level_image = &exr_images[i]; + int num_levels = (exr_headers[i]->tile_level_mode != TINYEXR_TILE_RIPMAP_LEVELS) ? + offset_data[i].num_x_levels : (offset_data[i].num_x_levels * offset_data[i].num_y_levels); + for (int level_index = 0; level_index < num_levels; ++level_index) { + for (size_t j = 0; j < offset_data[i].offsets[level_index].size(); ++j) { + size_t num_bytes = sizeof(tinyexr_uint64) * offset_data[i].offsets[level_index][j].size(); + sum += num_bytes; + if (sum > total_size) { + tinyexr::SetErrorMessage("Invalid offset bytes in Tiled Part image.", err); + return (size_t)TINYEXR_ERROR_INVALID_DATA; + } + + memcpy(memory_ptr, + reinterpret_cast(&offset_data[i].offsets[level_index][j][0]), + num_bytes); + memory_ptr += num_bytes; + } + level_image = level_image->next_level; + } + } else { + size_t num_bytes = sizeof(tinyexr::tinyexr_uint64) * static_cast(chunk_count[i]); + sum += num_bytes; + if (sum > total_size) { + tinyexr::SetErrorMessage("Invalid offset bytes in Part image.", err); + return (size_t)TINYEXR_ERROR_INVALID_DATA; + } + std::vector& offsets = offset_data[i].offsets[0][0]; + memcpy(memory_ptr, reinterpret_cast(&offsets[0]), num_bytes); + memory_ptr += num_bytes; + } + } + + // Writing chunk data + for (unsigned int i = 0; i < num_parts; ++i) { + for (size_t j = 0; j < static_cast(chunk_count[i]); ++j) { + if (num_parts > 1) { + sum += 4; + if (sum > total_size) { + tinyexr::SetErrorMessage("Buffer overrun in reading Part image chunk data.", err); + return (size_t)TINYEXR_ERROR_INVALID_DATA; + } + unsigned int part_number = i; + swap4(&part_number); + memcpy(memory_ptr, &part_number, 4); + memory_ptr += 4; + } + sum += data_lists[i][j].size(); + if (sum > total_size) { + tinyexr::SetErrorMessage("Buffer overrun in reading Part image chunk data.", err); + return (size_t)TINYEXR_ERROR_INVALID_DATA; + } + memcpy(memory_ptr, &data_lists[i][j][0], data_lists[i][j].size()); + memory_ptr += data_lists[i][j].size(); + } + } + + if (sum != total_size) { + tinyexr::SetErrorMessage("Corrupted Part image chunk data.", err); + return (size_t)TINYEXR_ERROR_INVALID_DATA; + } + + return size_t(total_size); // OK +} + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +} // tinyexr + +size_t SaveEXRImageToMemory(const EXRImage* exr_image, + const EXRHeader* exr_header, + unsigned char** memory_out, const char** err) { + return tinyexr::SaveEXRNPartImageToMemory(exr_image, &exr_header, 1, memory_out, err); +} + +int SaveEXRImageToFile(const EXRImage *exr_image, const EXRHeader *exr_header, + const char *filename, const char **err) { + if (exr_image == NULL || filename == NULL || + exr_header->compression_type < 0) { + tinyexr::SetErrorMessage("Invalid argument for SaveEXRImageToFile", err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + +#if !TINYEXR_USE_PIZ + if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { + tinyexr::SetErrorMessage("PIZ compression is not supported in this build", + err); + return TINYEXR_ERROR_UNSUPPORTED_FEATURE; + } +#endif + +#if !TINYEXR_USE_ZFP + if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { + tinyexr::SetErrorMessage("ZFP compression is not supported in this build", + err); + return TINYEXR_ERROR_UNSUPPORTED_FEATURE; + } +#endif + + FILE *fp = NULL; +#ifdef _WIN32 +#if defined(_MSC_VER) || (defined(MINGW_HAS_SECURE_API) && MINGW_HAS_SECURE_API) // MSVC, MinGW GCC, or Clang + errno_t errcode = + _wfopen_s(&fp, tinyexr::UTF8ToWchar(filename).c_str(), L"wb"); + if (errcode != 0) { + tinyexr::SetErrorMessage("Cannot write a file: " + std::string(filename), + err); + return TINYEXR_ERROR_CANT_WRITE_FILE; + } +#else + // Unknown compiler or MinGW without MINGW_HAS_SECURE_API. + fp = fopen(filename, "wb"); +#endif +#else + fp = fopen(filename, "wb"); +#endif + if (!fp) { + tinyexr::SetErrorMessage("Cannot write a file: " + std::string(filename), + err); + return TINYEXR_ERROR_CANT_WRITE_FILE; + } + + unsigned char *mem = NULL; + size_t mem_size = SaveEXRImageToMemory(exr_image, exr_header, &mem, err); + if (mem_size == 0) { + fclose(fp); + return TINYEXR_ERROR_SERIALIZATION_FAILED; + } + + size_t written_size = 0; + if ((mem_size > 0) && mem) { + written_size = fwrite(mem, 1, mem_size, fp); + } + free(mem); + + fclose(fp); + + if (written_size != mem_size) { + tinyexr::SetErrorMessage("Cannot write a file", err); + return TINYEXR_ERROR_CANT_WRITE_FILE; + } + + return TINYEXR_SUCCESS; +} + +size_t SaveEXRMultipartImageToMemory(const EXRImage* exr_images, + const EXRHeader** exr_headers, + unsigned int num_parts, + unsigned char** memory_out, const char** err) { + if (exr_images == NULL || exr_headers == NULL || num_parts < 2 || + memory_out == NULL) { + tinyexr::SetErrorMessage("Invalid argument for SaveEXRNPartImageToMemory", + err); + return 0; + } + return tinyexr::SaveEXRNPartImageToMemory(exr_images, exr_headers, num_parts, memory_out, err); +} + +int SaveEXRMultipartImageToFile(const EXRImage* exr_images, + const EXRHeader** exr_headers, + unsigned int num_parts, + const char* filename, + const char** err) { + if (exr_images == NULL || exr_headers == NULL || num_parts < 2) { + tinyexr::SetErrorMessage("Invalid argument for SaveEXRMultipartImageToFile", + err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + FILE *fp = NULL; +#ifdef _WIN32 +#if defined(_MSC_VER) || (defined(MINGW_HAS_SECURE_API) && MINGW_HAS_SECURE_API) // MSVC, MinGW GCC, or Clang. + errno_t errcode = + _wfopen_s(&fp, tinyexr::UTF8ToWchar(filename).c_str(), L"wb"); + if (errcode != 0) { + tinyexr::SetErrorMessage("Cannot write a file: " + std::string(filename), + err); + return TINYEXR_ERROR_CANT_WRITE_FILE; + } +#else + // Unknown compiler or MinGW without MINGW_HAS_SECURE_API. + fp = fopen(filename, "wb"); +#endif +#else + fp = fopen(filename, "wb"); +#endif + if (!fp) { + tinyexr::SetErrorMessage("Cannot write a file: " + std::string(filename), + err); + return TINYEXR_ERROR_CANT_WRITE_FILE; + } + + unsigned char *mem = NULL; + size_t mem_size = SaveEXRMultipartImageToMemory(exr_images, exr_headers, num_parts, &mem, err); + if (mem_size == 0) { + fclose(fp); + return TINYEXR_ERROR_SERIALIZATION_FAILED; + } + + size_t written_size = 0; + if ((mem_size > 0) && mem) { + written_size = fwrite(mem, 1, mem_size, fp); + } + free(mem); + + fclose(fp); + + if (written_size != mem_size) { + tinyexr::SetErrorMessage("Cannot write a file", err); + return TINYEXR_ERROR_CANT_WRITE_FILE; + } + + return TINYEXR_SUCCESS; +} + +int LoadDeepEXR(DeepImage *deep_image, const char *filename, const char **err) { + if (deep_image == NULL) { + tinyexr::SetErrorMessage("Invalid argument for LoadDeepEXR", err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + MemoryMappedFile file(filename); + if (!file.valid()) { + tinyexr::SetErrorMessage("Cannot read file " + std::string(filename), err); + return TINYEXR_ERROR_CANT_OPEN_FILE; + } + + if (file.size == 0) { + tinyexr::SetErrorMessage("File size is zero : " + std::string(filename), + err); + return TINYEXR_ERROR_INVALID_FILE; + } + + const char *head = reinterpret_cast(file.data); + const char *marker = reinterpret_cast(file.data); + + // Header check. + { + const char header[] = {0x76, 0x2f, 0x31, 0x01}; + + if (memcmp(marker, header, 4) != 0) { + tinyexr::SetErrorMessage("Invalid magic number", err); + return TINYEXR_ERROR_INVALID_MAGIC_NUMBER; + } + marker += 4; + } + + // Version, scanline. + { + // ver 2.0, scanline, deep bit on(0x800) + // must be [2, 0, 0, 0] + if (marker[0] != 2 || marker[1] != 8 || marker[2] != 0 || marker[3] != 0) { + tinyexr::SetErrorMessage("Unsupported version or scanline", err); + return TINYEXR_ERROR_UNSUPPORTED_FORMAT; + } + + marker += 4; + } + + int dx = -1; + int dy = -1; + int dw = -1; + int dh = -1; + int num_scanline_blocks = 1; // 16 for ZIP compression. + int compression_type = -1; + int num_channels = -1; + std::vector channels; + + // Read attributes + size_t size = file.size - tinyexr::kEXRVersionSize; + for (;;) { + if (0 == size) { + return TINYEXR_ERROR_INVALID_DATA; + } else if (marker[0] == '\0') { + marker++; + size--; + break; + } + + std::string attr_name; + std::string attr_type; + std::vector data; + size_t marker_size; + if (!tinyexr::ReadAttribute(&attr_name, &attr_type, &data, &marker_size, + marker, size)) { + std::stringstream ss; + ss << "Failed to parse attribute\n"; + tinyexr::SetErrorMessage(ss.str(), err); + return TINYEXR_ERROR_INVALID_DATA; + } + marker += marker_size; + size -= marker_size; + + if (attr_name.compare("compression") == 0) { + compression_type = data[0]; + if (compression_type > TINYEXR_COMPRESSIONTYPE_PIZ) { + std::stringstream ss; + ss << "Unsupported compression type : " << compression_type; + tinyexr::SetErrorMessage(ss.str(), err); + return TINYEXR_ERROR_UNSUPPORTED_FORMAT; + } + + if (compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) { + num_scanline_blocks = 16; + } + + } else if (attr_name.compare("channels") == 0) { + // name: zero-terminated string, from 1 to 255 bytes long + // pixel type: int, possible values are: UINT = 0 HALF = 1 FLOAT = 2 + // pLinear: unsigned char, possible values are 0 and 1 + // reserved: three chars, should be zero + // xSampling: int + // ySampling: int + + if (!tinyexr::ReadChannelInfo(channels, data)) { + tinyexr::SetErrorMessage("Failed to parse channel info", err); + return TINYEXR_ERROR_INVALID_DATA; + } + + num_channels = static_cast(channels.size()); + + if (num_channels < 1) { + tinyexr::SetErrorMessage("Invalid channels format", err); + return TINYEXR_ERROR_INVALID_DATA; + } + + } else if (attr_name.compare("dataWindow") == 0) { + memcpy(&dx, &data.at(0), sizeof(int)); + memcpy(&dy, &data.at(4), sizeof(int)); + memcpy(&dw, &data.at(8), sizeof(int)); + memcpy(&dh, &data.at(12), sizeof(int)); + tinyexr::swap4(&dx); + tinyexr::swap4(&dy); + tinyexr::swap4(&dw); + tinyexr::swap4(&dh); + + } else if (attr_name.compare("displayWindow") == 0) { + int x; + int y; + int w; + int h; + memcpy(&x, &data.at(0), sizeof(int)); + memcpy(&y, &data.at(4), sizeof(int)); + memcpy(&w, &data.at(8), sizeof(int)); + memcpy(&h, &data.at(12), sizeof(int)); + tinyexr::swap4(&x); + tinyexr::swap4(&y); + tinyexr::swap4(&w); + tinyexr::swap4(&h); + } + } + + TINYEXR_CHECK_AND_RETURN_C(dx >= 0, TINYEXR_ERROR_INVALID_DATA); + TINYEXR_CHECK_AND_RETURN_C(dy >= 0, TINYEXR_ERROR_INVALID_DATA); + TINYEXR_CHECK_AND_RETURN_C(dw >= 0, TINYEXR_ERROR_INVALID_DATA); + TINYEXR_CHECK_AND_RETURN_C(dh >= 0, TINYEXR_ERROR_INVALID_DATA); + TINYEXR_CHECK_AND_RETURN_C(num_channels >= 1, TINYEXR_ERROR_INVALID_DATA); + + int data_width = dw - dx + 1; + int data_height = dh - dy + 1; + + // Read offset tables. + int num_blocks = data_height / num_scanline_blocks; + if (num_blocks * num_scanline_blocks < data_height) { + num_blocks++; + } + + std::vector offsets(static_cast(num_blocks)); + + for (size_t y = 0; y < static_cast(num_blocks); y++) { + tinyexr::tinyexr_int64 offset; + memcpy(&offset, marker, sizeof(tinyexr::tinyexr_int64)); + tinyexr::swap8(reinterpret_cast(&offset)); + marker += sizeof(tinyexr::tinyexr_int64); // = 8 + offsets[y] = offset; + } + +#if TINYEXR_USE_PIZ + if ((compression_type == TINYEXR_COMPRESSIONTYPE_NONE) || + (compression_type == TINYEXR_COMPRESSIONTYPE_RLE) || + (compression_type == TINYEXR_COMPRESSIONTYPE_ZIPS) || + (compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) || + (compression_type == TINYEXR_COMPRESSIONTYPE_PIZ)) { +#else + if ((compression_type == TINYEXR_COMPRESSIONTYPE_NONE) || + (compression_type == TINYEXR_COMPRESSIONTYPE_RLE) || + (compression_type == TINYEXR_COMPRESSIONTYPE_ZIPS) || + (compression_type == TINYEXR_COMPRESSIONTYPE_ZIP)) { +#endif + // OK + } else { + tinyexr::SetErrorMessage("Unsupported compression format", err); + return TINYEXR_ERROR_UNSUPPORTED_FORMAT; + } + + deep_image->image = static_cast( + malloc(sizeof(float **) * static_cast(num_channels))); + for (int c = 0; c < num_channels; c++) { + deep_image->image[c] = static_cast( + malloc(sizeof(float *) * static_cast(data_height))); + for (int y = 0; y < data_height; y++) { + } + } + + deep_image->offset_table = static_cast( + malloc(sizeof(int *) * static_cast(data_height))); + for (int y = 0; y < data_height; y++) { + deep_image->offset_table[y] = static_cast( + malloc(sizeof(int) * static_cast(data_width))); + } + + for (size_t y = 0; y < static_cast(num_blocks); y++) { + const unsigned char *data_ptr = + reinterpret_cast(head + offsets[y]); + + // int: y coordinate + // int64: packed size of pixel offset table + // int64: packed size of sample data + // int64: unpacked size of sample data + // compressed pixel offset table + // compressed sample data + int line_no; + tinyexr::tinyexr_int64 packedOffsetTableSize; + tinyexr::tinyexr_int64 packedSampleDataSize; + tinyexr::tinyexr_int64 unpackedSampleDataSize; + memcpy(&line_no, data_ptr, sizeof(int)); + memcpy(&packedOffsetTableSize, data_ptr + 4, + sizeof(tinyexr::tinyexr_int64)); + memcpy(&packedSampleDataSize, data_ptr + 12, + sizeof(tinyexr::tinyexr_int64)); + memcpy(&unpackedSampleDataSize, data_ptr + 20, + sizeof(tinyexr::tinyexr_int64)); + + tinyexr::swap4(&line_no); + tinyexr::swap8( + reinterpret_cast(&packedOffsetTableSize)); + tinyexr::swap8( + reinterpret_cast(&packedSampleDataSize)); + tinyexr::swap8( + reinterpret_cast(&unpackedSampleDataSize)); + + std::vector pixelOffsetTable(static_cast(data_width)); + + // decode pixel offset table. + { + unsigned long dstLen = + static_cast(pixelOffsetTable.size() * sizeof(int)); + if (!tinyexr::DecompressZip( + reinterpret_cast(&pixelOffsetTable.at(0)), + &dstLen, data_ptr + 28, + static_cast(packedOffsetTableSize))) { + return false; + } + + TINYEXR_CHECK_AND_RETURN_C(dstLen == pixelOffsetTable.size() * sizeof(int), TINYEXR_ERROR_INVALID_DATA); + for (size_t i = 0; i < static_cast(data_width); i++) { + deep_image->offset_table[y][i] = pixelOffsetTable[i]; + } + } + + std::vector sample_data( + static_cast(unpackedSampleDataSize)); + + // decode sample data. + { + unsigned long dstLen = static_cast(unpackedSampleDataSize); + if (dstLen) { + if (!tinyexr::DecompressZip( + reinterpret_cast(&sample_data.at(0)), &dstLen, + data_ptr + 28 + packedOffsetTableSize, + static_cast(packedSampleDataSize))) { + return false; + } + TINYEXR_CHECK_AND_RETURN_C(dstLen == static_cast(unpackedSampleDataSize), TINYEXR_ERROR_INVALID_DATA); + } + } + + // decode sample + int sampleSize = -1; + std::vector channel_offset_list(static_cast(num_channels)); + { + int channel_offset = 0; + for (size_t i = 0; i < static_cast(num_channels); i++) { + channel_offset_list[i] = channel_offset; + if (channels[i].pixel_type == TINYEXR_PIXELTYPE_UINT) { // UINT + channel_offset += 4; + } else if (channels[i].pixel_type == TINYEXR_PIXELTYPE_HALF) { // half + channel_offset += 2; + } else if (channels[i].pixel_type == + TINYEXR_PIXELTYPE_FLOAT) { // float + channel_offset += 4; + } else { + tinyexr::SetErrorMessage("Invalid pixel_type in chnnels.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + } + sampleSize = channel_offset; + } + TINYEXR_CHECK_AND_RETURN_C(sampleSize >= 2, TINYEXR_ERROR_INVALID_DATA); + + TINYEXR_CHECK_AND_RETURN_C(static_cast( + pixelOffsetTable[static_cast(data_width - 1)] * + sampleSize) == sample_data.size(), TINYEXR_ERROR_INVALID_DATA); + int samples_per_line = static_cast(sample_data.size()) / sampleSize; + + // + // Alloc memory + // + + // + // pixel data is stored as image[channels][pixel_samples] + // + { + tinyexr::tinyexr_uint64 data_offset = 0; + for (size_t c = 0; c < static_cast(num_channels); c++) { + deep_image->image[c][y] = static_cast( + malloc(sizeof(float) * static_cast(samples_per_line))); + + if (channels[c].pixel_type == 0) { // UINT + for (size_t x = 0; x < static_cast(samples_per_line); x++) { + unsigned int ui; + unsigned int *src_ptr = reinterpret_cast( + &sample_data.at(size_t(data_offset) + x * sizeof(int))); + tinyexr::cpy4(&ui, src_ptr); + deep_image->image[c][y][x] = static_cast(ui); // @fixme + } + data_offset += + sizeof(unsigned int) * static_cast(samples_per_line); + } else if (channels[c].pixel_type == 1) { // half + for (size_t x = 0; x < static_cast(samples_per_line); x++) { + tinyexr::FP16 f16; + const unsigned short *src_ptr = reinterpret_cast( + &sample_data.at(size_t(data_offset) + x * sizeof(short))); + tinyexr::cpy2(&(f16.u), src_ptr); + tinyexr::FP32 f32 = half_to_float(f16); + deep_image->image[c][y][x] = f32.f; + } + data_offset += sizeof(short) * static_cast(samples_per_line); + } else { // float + for (size_t x = 0; x < static_cast(samples_per_line); x++) { + float f; + const float *src_ptr = reinterpret_cast( + &sample_data.at(size_t(data_offset) + x * sizeof(float))); + tinyexr::cpy4(&f, src_ptr); + deep_image->image[c][y][x] = f; + } + data_offset += sizeof(float) * static_cast(samples_per_line); + } + } + } + } // y + + deep_image->width = data_width; + deep_image->height = data_height; + + deep_image->channel_names = static_cast( + malloc(sizeof(const char *) * static_cast(num_channels))); + for (size_t c = 0; c < static_cast(num_channels); c++) { +#ifdef _WIN32 + deep_image->channel_names[c] = _strdup(channels[c].name.c_str()); +#else + deep_image->channel_names[c] = strdup(channels[c].name.c_str()); +#endif + } + deep_image->num_channels = num_channels; + + return TINYEXR_SUCCESS; +} + +void InitEXRImage(EXRImage *exr_image) { + if (exr_image == NULL) { + return; + } + + exr_image->width = 0; + exr_image->height = 0; + exr_image->num_channels = 0; + + exr_image->images = NULL; + exr_image->tiles = NULL; + exr_image->next_level = NULL; + exr_image->level_x = 0; + exr_image->level_y = 0; + + exr_image->num_tiles = 0; +} + +void FreeEXRErrorMessage(const char *msg) { + if (msg) { + free(reinterpret_cast(const_cast(msg))); + } + return; +} + +void InitEXRHeader(EXRHeader *exr_header) { + if (exr_header == NULL) { + return; + } + + memset(exr_header, 0, sizeof(EXRHeader)); +} + +int FreeEXRHeader(EXRHeader *exr_header) { + if (exr_header == NULL) { + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + if (exr_header->channels) { + free(exr_header->channels); + } + + if (exr_header->pixel_types) { + free(exr_header->pixel_types); + } + + if (exr_header->requested_pixel_types) { + free(exr_header->requested_pixel_types); + } + + for (int i = 0; i < exr_header->num_custom_attributes; i++) { + if (exr_header->custom_attributes[i].value) { + free(exr_header->custom_attributes[i].value); + } + } + + if (exr_header->custom_attributes) { + free(exr_header->custom_attributes); + } + + EXRSetNameAttr(exr_header, NULL); + + return TINYEXR_SUCCESS; +} + +void EXRSetNameAttr(EXRHeader* exr_header, const char* name) { + if (exr_header == NULL) { + return; + } + memset(exr_header->name, 0, 256); + if (name != NULL) { + size_t len = std::min(strlen(name), size_t(255)); + if (len) { + memcpy(exr_header->name, name, len); + } + } +} + +int EXRNumLevels(const EXRImage* exr_image) { + if (exr_image == NULL) return 0; + if(exr_image->images) return 1; // scanlines + int levels = 1; + const EXRImage* level_image = exr_image; + +#if 0 + while ((level_image = level_image->next_level)) + ++levels; +#else + for (; ;) + { + level_image = level_image->next_level; + if (!level_image) + break; + ++levels; + } +#endif + + return levels; +} + +int FreeEXRImage(EXRImage *exr_image) { + if (exr_image == NULL) { + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + if (exr_image->next_level) { + FreeEXRImage(exr_image->next_level); + delete exr_image->next_level; + } + + for (int i = 0; i < exr_image->num_channels; i++) { + if (exr_image->images && exr_image->images[i]) { + free(exr_image->images[i]); + } + } + + if (exr_image->images) { + free(exr_image->images); + } + + if (exr_image->tiles) { + for (int tid = 0; tid < exr_image->num_tiles; tid++) { + for (int i = 0; i < exr_image->num_channels; i++) { + if (exr_image->tiles[tid].images && exr_image->tiles[tid].images[i]) { + free(exr_image->tiles[tid].images[i]); + } + } + if (exr_image->tiles[tid].images) { + free(exr_image->tiles[tid].images); + } + } + free(exr_image->tiles); + } + + return TINYEXR_SUCCESS; +} + +int ParseEXRHeaderFromFile(EXRHeader *exr_header, const EXRVersion *exr_version, + const char *filename, const char **err) { + if (exr_header == NULL || exr_version == NULL || filename == NULL) { + tinyexr::SetErrorMessage("Invalid argument for ParseEXRHeaderFromFile", + err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + MemoryMappedFile file(filename); + if (!file.valid()) { + tinyexr::SetErrorMessage("Cannot read file " + std::string(filename), err); + return TINYEXR_ERROR_CANT_OPEN_FILE; + } + + return ParseEXRHeaderFromMemory(exr_header, exr_version, file.data, file.size, + err); +} + +int ParseEXRMultipartHeaderFromMemory(EXRHeader ***exr_headers, + int *num_headers, + const EXRVersion *exr_version, + const unsigned char *memory, size_t size, + const char **err) { + if (memory == NULL || exr_headers == NULL || num_headers == NULL || + exr_version == NULL) { + // Invalid argument + tinyexr::SetErrorMessage( + "Invalid argument for ParseEXRMultipartHeaderFromMemory", err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + if (size < tinyexr::kEXRVersionSize) { + tinyexr::SetErrorMessage("Data size too short", err); + return TINYEXR_ERROR_INVALID_DATA; + } + + const unsigned char *marker = memory + tinyexr::kEXRVersionSize; + size_t marker_size = size - tinyexr::kEXRVersionSize; + + std::vector infos; + + for (;;) { + tinyexr::HeaderInfo info; + info.clear(); + + std::string err_str; + bool empty_header = false; + int ret = ParseEXRHeader(&info, &empty_header, exr_version, &err_str, + marker, marker_size); + + if (ret != TINYEXR_SUCCESS) { + + // Free malloc-allocated memory here. + for (size_t i = 0; i < info.attributes.size(); i++) { + if (info.attributes[i].value) { + free(info.attributes[i].value); + } + } + + tinyexr::SetErrorMessage(err_str, err); + return ret; + } + + if (empty_header) { + marker += 1; // skip '\0' + break; + } + + // `chunkCount` must exist in the header. + if (info.chunk_count == 0) { + + // Free malloc-allocated memory here. + for (size_t i = 0; i < info.attributes.size(); i++) { + if (info.attributes[i].value) { + free(info.attributes[i].value); + } + } + + tinyexr::SetErrorMessage( + "`chunkCount' attribute is not found in the header.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + + infos.push_back(info); + + // move to next header. + marker += info.header_len; + size -= info.header_len; + } + + // allocate memory for EXRHeader and create array of EXRHeader pointers. + (*exr_headers) = + static_cast(malloc(sizeof(EXRHeader *) * infos.size())); + + + int retcode = TINYEXR_SUCCESS; + + for (size_t i = 0; i < infos.size(); i++) { + EXRHeader *exr_header = static_cast(malloc(sizeof(EXRHeader))); + memset(exr_header, 0, sizeof(EXRHeader)); + + std::string warn; + std::string _err; + if (!ConvertHeader(exr_header, infos[i], &warn, &_err)) { + + // Free malloc-allocated memory here. + for (size_t k = 0; k < infos[i].attributes.size(); k++) { + if (infos[i].attributes[k].value) { + free(infos[i].attributes[k].value); + } + } + + if (!_err.empty()) { + tinyexr::SetErrorMessage( + _err, err); + } + // continue to converting headers + retcode = TINYEXR_ERROR_INVALID_HEADER; + } + + exr_header->multipart = exr_version->multipart ? 1 : 0; + + (*exr_headers)[i] = exr_header; + } + + (*num_headers) = static_cast(infos.size()); + + return retcode; +} + +int ParseEXRMultipartHeaderFromFile(EXRHeader ***exr_headers, int *num_headers, + const EXRVersion *exr_version, + const char *filename, const char **err) { + if (exr_headers == NULL || num_headers == NULL || exr_version == NULL || + filename == NULL) { + tinyexr::SetErrorMessage( + "Invalid argument for ParseEXRMultipartHeaderFromFile()", err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + MemoryMappedFile file(filename); + if (!file.valid()) { + tinyexr::SetErrorMessage("Cannot read file " + std::string(filename), err); + return TINYEXR_ERROR_CANT_OPEN_FILE; + } + + return ParseEXRMultipartHeaderFromMemory( + exr_headers, num_headers, exr_version, file.data, file.size, err); +} + +int ParseEXRVersionFromMemory(EXRVersion *version, const unsigned char *memory, + size_t size) { + if (version == NULL || memory == NULL) { + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + if (size < tinyexr::kEXRVersionSize) { + return TINYEXR_ERROR_INVALID_DATA; + } + + const unsigned char *marker = memory; + + // Header check. + { + const char header[] = {0x76, 0x2f, 0x31, 0x01}; + + if (memcmp(marker, header, 4) != 0) { + return TINYEXR_ERROR_INVALID_MAGIC_NUMBER; + } + marker += 4; + } + + version->tiled = false; + version->long_name = false; + version->non_image = false; + version->multipart = false; + + // Parse version header. + { + // must be 2 + if (marker[0] != 2) { + return TINYEXR_ERROR_INVALID_EXR_VERSION; + } + + if (version == NULL) { + return TINYEXR_SUCCESS; // May OK + } + + version->version = 2; + + if (marker[1] & 0x2) { // 9th bit + version->tiled = true; + } + if (marker[1] & 0x4) { // 10th bit + version->long_name = true; + } + if (marker[1] & 0x8) { // 11th bit + version->non_image = true; // (deep image) + } + if (marker[1] & 0x10) { // 12th bit + version->multipart = true; + } + } + + return TINYEXR_SUCCESS; +} + +int ParseEXRVersionFromFile(EXRVersion *version, const char *filename) { + if (filename == NULL) { + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + FILE *fp = NULL; +#ifdef _WIN32 +#if defined(_MSC_VER) || (defined(MINGW_HAS_SECURE_API) && MINGW_HAS_SECURE_API) // MSVC, MinGW GCC, or Clang. + errno_t err = _wfopen_s(&fp, tinyexr::UTF8ToWchar(filename).c_str(), L"rb"); + if (err != 0) { + // TODO(syoyo): return wfopen_s erro code + return TINYEXR_ERROR_CANT_OPEN_FILE; + } +#else + // Unknown compiler or MinGW without MINGW_HAS_SECURE_API. + fp = fopen(filename, "rb"); +#endif +#else + fp = fopen(filename, "rb"); +#endif + if (!fp) { + return TINYEXR_ERROR_CANT_OPEN_FILE; + } + + // Try to read kEXRVersionSize bytes; if the file is shorter than + // kEXRVersionSize, this will produce an error. This avoids a call to + // fseek(fp, 0, SEEK_END), which is not required to be supported by C + // implementations. + unsigned char buf[tinyexr::kEXRVersionSize]; + size_t ret = fread(&buf[0], 1, tinyexr::kEXRVersionSize, fp); + fclose(fp); + + if (ret != tinyexr::kEXRVersionSize) { + return TINYEXR_ERROR_INVALID_FILE; + } + + return ParseEXRVersionFromMemory(version, buf, tinyexr::kEXRVersionSize); +} + +int LoadEXRMultipartImageFromMemory(EXRImage *exr_images, + const EXRHeader **exr_headers, + unsigned int num_parts, + const unsigned char *memory, + const size_t size, const char **err) { + if (exr_images == NULL || exr_headers == NULL || num_parts == 0 || + memory == NULL || (size <= tinyexr::kEXRVersionSize)) { + tinyexr::SetErrorMessage( + "Invalid argument for LoadEXRMultipartImageFromMemory()", err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + // compute total header size. + size_t total_header_size = 0; + for (unsigned int i = 0; i < num_parts; i++) { + if (exr_headers[i]->header_len == 0) { + tinyexr::SetErrorMessage("EXRHeader variable is not initialized.", err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + total_header_size += exr_headers[i]->header_len; + } + + const char *marker = reinterpret_cast( + memory + total_header_size + 4 + + 4); // +8 for magic number and version header. + + marker += 1; // Skip empty header. + + // NOTE 1: + // In multipart image, There is 'part number' before chunk data. + // 4 byte : part number + // 4+ : chunk + // + // NOTE 2: + // EXR spec says 'part number' is 'unsigned long' but actually this is + // 'unsigned int(4 bytes)' in OpenEXR implementation... + // http://www.openexr.com/openexrfilelayout.pdf + + // Load chunk offset table. + std::vector chunk_offset_table_list; + chunk_offset_table_list.reserve(num_parts); + for (size_t i = 0; i < static_cast(num_parts); i++) { + chunk_offset_table_list.resize(chunk_offset_table_list.size() + 1); + tinyexr::OffsetData& offset_data = chunk_offset_table_list.back(); + if (!exr_headers[i]->tiled || exr_headers[i]->tile_level_mode == TINYEXR_TILE_ONE_LEVEL) { + tinyexr::InitSingleResolutionOffsets(offset_data, size_t(exr_headers[i]->chunk_count)); + std::vector& offset_table = offset_data.offsets[0][0]; + + for (size_t c = 0; c < offset_table.size(); c++) { + tinyexr::tinyexr_uint64 offset; + memcpy(&offset, marker, 8); + tinyexr::swap8(&offset); + + if (offset >= size) { + tinyexr::SetErrorMessage("Invalid offset size in EXR header chunks.", + err); + return TINYEXR_ERROR_INVALID_DATA; + } + + offset_table[c] = offset + 4; // +4 to skip 'part number' + marker += 8; + } + } else { + { + std::vector num_x_tiles, num_y_tiles; + if (!tinyexr::PrecalculateTileInfo(num_x_tiles, num_y_tiles, exr_headers[i])) { + tinyexr::SetErrorMessage("Invalid tile info.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + int num_blocks = InitTileOffsets(offset_data, exr_headers[i], num_x_tiles, num_y_tiles); + if (num_blocks != exr_headers[i]->chunk_count) { + tinyexr::SetErrorMessage("Invalid offset table size.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + } + for (unsigned int l = 0; l < offset_data.offsets.size(); ++l) { + for (unsigned int dy = 0; dy < offset_data.offsets[l].size(); ++dy) { + for (unsigned int dx = 0; dx < offset_data.offsets[l][dy].size(); ++dx) { + tinyexr::tinyexr_uint64 offset; + memcpy(&offset, marker, sizeof(tinyexr::tinyexr_uint64)); + tinyexr::swap8(&offset); + if (offset >= size) { + tinyexr::SetErrorMessage("Invalid offset size in EXR header chunks.", + err); + return TINYEXR_ERROR_INVALID_DATA; + } + offset_data.offsets[l][dy][dx] = offset + 4; // +4 to skip 'part number' + marker += sizeof(tinyexr::tinyexr_uint64); // = 8 + } + } + } + } + } + + // Decode image. + for (size_t i = 0; i < static_cast(num_parts); i++) { + tinyexr::OffsetData &offset_data = chunk_offset_table_list[i]; + + // First check 'part number' is identical to 'i' + for (unsigned int l = 0; l < offset_data.offsets.size(); ++l) + for (unsigned int dy = 0; dy < offset_data.offsets[l].size(); ++dy) + for (unsigned int dx = 0; dx < offset_data.offsets[l][dy].size(); ++dx) { + + const unsigned char *part_number_addr = + memory + offset_data.offsets[l][dy][dx] - 4; // -4 to move to 'part number' field. + unsigned int part_no; + memcpy(&part_no, part_number_addr, sizeof(unsigned int)); // 4 + tinyexr::swap4(&part_no); + + if (part_no != i) { + tinyexr::SetErrorMessage("Invalid `part number' in EXR header chunks.", + err); + return TINYEXR_ERROR_INVALID_DATA; + } + } + + std::string e; + int ret = tinyexr::DecodeChunk(&exr_images[i], exr_headers[i], offset_data, + memory, size, &e); + if (ret != TINYEXR_SUCCESS) { + if (!e.empty()) { + tinyexr::SetErrorMessage(e, err); + } + return ret; + } + } + + return TINYEXR_SUCCESS; +} + +int LoadEXRMultipartImageFromFile(EXRImage *exr_images, + const EXRHeader **exr_headers, + unsigned int num_parts, const char *filename, + const char **err) { + if (exr_images == NULL || exr_headers == NULL || num_parts == 0) { + tinyexr::SetErrorMessage( + "Invalid argument for LoadEXRMultipartImageFromFile", err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + MemoryMappedFile file(filename); + if (!file.valid()) { + tinyexr::SetErrorMessage("Cannot read file " + std::string(filename), err); + return TINYEXR_ERROR_CANT_OPEN_FILE; + } + + return LoadEXRMultipartImageFromMemory(exr_images, exr_headers, num_parts, + file.data, file.size, err); +} + +int SaveEXRToMemory(const float *data, int width, int height, int components, + const int save_as_fp16, const unsigned char **outbuf, const char **err) { + + if ((components == 1) || components == 3 || components == 4) { + // OK + } else { + std::stringstream ss; + ss << "Unsupported component value : " << components << std::endl; + + tinyexr::SetErrorMessage(ss.str(), err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + EXRHeader header; + InitEXRHeader(&header); + + if ((width < 16) && (height < 16)) { + // No compression for small image. + header.compression_type = TINYEXR_COMPRESSIONTYPE_NONE; + } else { + header.compression_type = TINYEXR_COMPRESSIONTYPE_ZIP; + } + + EXRImage image; + InitEXRImage(&image); + + image.num_channels = components; + + std::vector images[4]; + + if (components == 1) { + images[0].resize(static_cast(width * height)); + memcpy(images[0].data(), data, sizeof(float) * size_t(width * height)); + } else { + images[0].resize(static_cast(width * height)); + images[1].resize(static_cast(width * height)); + images[2].resize(static_cast(width * height)); + images[3].resize(static_cast(width * height)); + + // Split RGB(A)RGB(A)RGB(A)... into R, G and B(and A) layers + for (size_t i = 0; i < static_cast(width * height); i++) { + images[0][i] = data[static_cast(components) * i + 0]; + images[1][i] = data[static_cast(components) * i + 1]; + images[2][i] = data[static_cast(components) * i + 2]; + if (components == 4) { + images[3][i] = data[static_cast(components) * i + 3]; + } + } + } + + float *image_ptr[4] = {0, 0, 0, 0}; + if (components == 4) { + image_ptr[0] = &(images[3].at(0)); // A + image_ptr[1] = &(images[2].at(0)); // B + image_ptr[2] = &(images[1].at(0)); // G + image_ptr[3] = &(images[0].at(0)); // R + } else if (components == 3) { + image_ptr[0] = &(images[2].at(0)); // B + image_ptr[1] = &(images[1].at(0)); // G + image_ptr[2] = &(images[0].at(0)); // R + } else if (components == 1) { + image_ptr[0] = &(images[0].at(0)); // A + } + + image.images = reinterpret_cast(image_ptr); + image.width = width; + image.height = height; + + header.num_channels = components; + header.channels = static_cast(malloc( + sizeof(EXRChannelInfo) * static_cast(header.num_channels))); + // Must be (A)BGR order, since most of EXR viewers expect this channel order. + if (components == 4) { +#ifdef _MSC_VER + strncpy_s(header.channels[0].name, "A", 255); + strncpy_s(header.channels[1].name, "B", 255); + strncpy_s(header.channels[2].name, "G", 255); + strncpy_s(header.channels[3].name, "R", 255); +#else + strncpy(header.channels[0].name, "A", 255); + strncpy(header.channels[1].name, "B", 255); + strncpy(header.channels[2].name, "G", 255); + strncpy(header.channels[3].name, "R", 255); +#endif + header.channels[0].name[strlen("A")] = '\0'; + header.channels[1].name[strlen("B")] = '\0'; + header.channels[2].name[strlen("G")] = '\0'; + header.channels[3].name[strlen("R")] = '\0'; + } else if (components == 3) { +#ifdef _MSC_VER + strncpy_s(header.channels[0].name, "B", 255); + strncpy_s(header.channels[1].name, "G", 255); + strncpy_s(header.channels[2].name, "R", 255); +#else + strncpy(header.channels[0].name, "B", 255); + strncpy(header.channels[1].name, "G", 255); + strncpy(header.channels[2].name, "R", 255); +#endif + header.channels[0].name[strlen("B")] = '\0'; + header.channels[1].name[strlen("G")] = '\0'; + header.channels[2].name[strlen("R")] = '\0'; + } else { +#ifdef _MSC_VER + strncpy_s(header.channels[0].name, "A", 255); +#else + strncpy(header.channels[0].name, "A", 255); +#endif + header.channels[0].name[strlen("A")] = '\0'; + } + + header.pixel_types = static_cast( + malloc(sizeof(int) * static_cast(header.num_channels))); + header.requested_pixel_types = static_cast( + malloc(sizeof(int) * static_cast(header.num_channels))); + for (int i = 0; i < header.num_channels; i++) { + header.pixel_types[i] = + TINYEXR_PIXELTYPE_FLOAT; // pixel type of input image + + if (save_as_fp16 > 0) { + header.requested_pixel_types[i] = + TINYEXR_PIXELTYPE_HALF; // save with half(fp16) pixel format + } else { + header.requested_pixel_types[i] = + TINYEXR_PIXELTYPE_FLOAT; // save with float(fp32) pixel format(i.e. + // no precision reduction) + } + } + + + unsigned char *mem_buf; + size_t mem_size = SaveEXRImageToMemory(&image, &header, &mem_buf, err); + + if (mem_size == 0) { + return TINYEXR_ERROR_SERIALIZATION_FAILED; + } + + free(header.channels); + free(header.pixel_types); + free(header.requested_pixel_types); + + if (mem_size > size_t(std::numeric_limits::max())) { + free(mem_buf); + return TINYEXR_ERROR_DATA_TOO_LARGE; + } + + (*outbuf) = mem_buf; + + return int(mem_size); +} + +int SaveEXR(const float *data, int width, int height, int components, + const int save_as_fp16, const char *outfilename, const char **err) { + if ((components == 1) || components == 3 || components == 4) { + // OK + } else { + std::stringstream ss; + ss << "Unsupported component value : " << components << std::endl; + + tinyexr::SetErrorMessage(ss.str(), err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + EXRHeader header; + InitEXRHeader(&header); + + if ((width < 16) && (height < 16)) { + // No compression for small image. + header.compression_type = TINYEXR_COMPRESSIONTYPE_NONE; + } else { + header.compression_type = TINYEXR_COMPRESSIONTYPE_ZIP; + } + + EXRImage image; + InitEXRImage(&image); + + image.num_channels = components; + + std::vector images[4]; + const size_t pixel_count = + static_cast(width) * static_cast(height); + + if (components == 1) { + images[0].resize(pixel_count); + memcpy(images[0].data(), data, sizeof(float) * pixel_count); + } else { + images[0].resize(pixel_count); + images[1].resize(pixel_count); + images[2].resize(pixel_count); + images[3].resize(pixel_count); + + // Split RGB(A)RGB(A)RGB(A)... into R, G and B(and A) layers + for (size_t i = 0; i < pixel_count; i++) { + images[0][i] = data[static_cast(components) * i + 0]; + images[1][i] = data[static_cast(components) * i + 1]; + images[2][i] = data[static_cast(components) * i + 2]; + if (components == 4) { + images[3][i] = data[static_cast(components) * i + 3]; + } + } + } + + float *image_ptr[4] = {0, 0, 0, 0}; + if (components == 4) { + image_ptr[0] = &(images[3].at(0)); // A + image_ptr[1] = &(images[2].at(0)); // B + image_ptr[2] = &(images[1].at(0)); // G + image_ptr[3] = &(images[0].at(0)); // R + } else if (components == 3) { + image_ptr[0] = &(images[2].at(0)); // B + image_ptr[1] = &(images[1].at(0)); // G + image_ptr[2] = &(images[0].at(0)); // R + } else if (components == 1) { + image_ptr[0] = &(images[0].at(0)); // A + } + + image.images = reinterpret_cast(image_ptr); + image.width = width; + image.height = height; + + header.num_channels = components; + header.channels = static_cast(malloc( + sizeof(EXRChannelInfo) * static_cast(header.num_channels))); + // Must be (A)BGR order, since most of EXR viewers expect this channel order. + if (components == 4) { +#ifdef _MSC_VER + strncpy_s(header.channels[0].name, "A", 255); + strncpy_s(header.channels[1].name, "B", 255); + strncpy_s(header.channels[2].name, "G", 255); + strncpy_s(header.channels[3].name, "R", 255); +#else + strncpy(header.channels[0].name, "A", 255); + strncpy(header.channels[1].name, "B", 255); + strncpy(header.channels[2].name, "G", 255); + strncpy(header.channels[3].name, "R", 255); +#endif + header.channels[0].name[strlen("A")] = '\0'; + header.channels[1].name[strlen("B")] = '\0'; + header.channels[2].name[strlen("G")] = '\0'; + header.channels[3].name[strlen("R")] = '\0'; + } else if (components == 3) { +#ifdef _MSC_VER + strncpy_s(header.channels[0].name, "B", 255); + strncpy_s(header.channels[1].name, "G", 255); + strncpy_s(header.channels[2].name, "R", 255); +#else + strncpy(header.channels[0].name, "B", 255); + strncpy(header.channels[1].name, "G", 255); + strncpy(header.channels[2].name, "R", 255); +#endif + header.channels[0].name[strlen("B")] = '\0'; + header.channels[1].name[strlen("G")] = '\0'; + header.channels[2].name[strlen("R")] = '\0'; + } else { +#ifdef _MSC_VER + strncpy_s(header.channels[0].name, "A", 255); +#else + strncpy(header.channels[0].name, "A", 255); +#endif + header.channels[0].name[strlen("A")] = '\0'; + } + + header.pixel_types = static_cast( + malloc(sizeof(int) * static_cast(header.num_channels))); + header.requested_pixel_types = static_cast( + malloc(sizeof(int) * static_cast(header.num_channels))); + for (int i = 0; i < header.num_channels; i++) { + header.pixel_types[i] = + TINYEXR_PIXELTYPE_FLOAT; // pixel type of input image + + if (save_as_fp16 > 0) { + header.requested_pixel_types[i] = + TINYEXR_PIXELTYPE_HALF; // save with half(fp16) pixel format + } else { + header.requested_pixel_types[i] = + TINYEXR_PIXELTYPE_FLOAT; // save with float(fp32) pixel format(i.e. + // no precision reduction) + } + } + + int ret = SaveEXRImageToFile(&image, &header, outfilename, err); + if (ret != TINYEXR_SUCCESS) { + return ret; + } + + free(header.channels); + free(header.pixel_types); + free(header.requested_pixel_types); + + return ret; +} + +#ifdef __clang__ +// zero-as-null-pointer-constant +#pragma clang diagnostic pop +#endif + +#endif // TINYEXR_IMPLEMENTATION_DEFINED +#endif // TINYEXR_IMPLEMENTATION diff --git a/thirdparty/basisu/encoder/basisu_astc_hdr_6x6_enc.cpp b/thirdparty/basisu/encoder/basisu_astc_hdr_6x6_enc.cpp new file mode 100644 index 000000000..3da89d412 --- /dev/null +++ b/thirdparty/basisu/encoder/basisu_astc_hdr_6x6_enc.cpp @@ -0,0 +1,7027 @@ +// File: basisu_astc_hdr_6x6_enc.cpp +#include "basisu_astc_hdr_6x6_enc.h" +#include "basisu_enc.h" +#include "basisu_astc_hdr_common.h" +#include "basisu_math.h" +#include "basisu_resampler.h" +#include "basisu_resampler_filters.h" + +#define MINIZ_HEADER_FILE_ONLY +#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES +#include "basisu_miniz.h" + +#include "3rdparty/android_astc_decomp.h" + +#include + +using namespace basisu; +using namespace buminiz; +using namespace basist::astc_6x6_hdr; + +namespace astc_6x6_hdr +{ + +static void atomic_max(std::atomic& atomic_var, uint32_t new_value) +{ + uint32_t current = atomic_var.load(std::memory_order_relaxed); + for ( ; ; ) + { + uint32_t new_max = std::max(current, new_value); + if (atomic_var.compare_exchange_weak(current, new_max, std::memory_order_relaxed, std::memory_order_relaxed)) + break; + } +} + +void astc_hdr_6x6_global_config::set_user_level(int level) +{ + level = basisu::clamp(level, 0, ASTC_HDR_6X6_MAX_USER_COMP_LEVEL); + + m_master_comp_level = 0; + m_highest_comp_level = 0; + m_num_reuse_xy_deltas = NUM_REUSE_XY_DELTAS; + m_extra_patterns_flag = false; + m_brute_force_partition_matching = false; + + switch (level) + { + case 0: + { + // Both reduce compression a lot when lambda>0 + m_favor_higher_compression = false; + m_num_reuse_xy_deltas = NUM_REUSE_XY_DELTAS / 2; + break; + } + case 1: + { + m_master_comp_level = 0; + m_highest_comp_level = 0; + break; + } + case 2: + { + m_master_comp_level = 0; + m_highest_comp_level = 1; + break; + } + case 3: + { + m_master_comp_level = 1; + m_highest_comp_level = 1; + break; + } + case 4: + { + m_master_comp_level = 1; + m_highest_comp_level = 2; + break; + } + case 5: + { + m_master_comp_level = 1; + m_highest_comp_level = 3; + break; + } + case 6: + { + m_master_comp_level = 1; + m_highest_comp_level = 4; + break; + } + case 7: + { + m_master_comp_level = 2; + m_highest_comp_level = 2; + break; + } + case 8: + { + m_master_comp_level = 2; + m_highest_comp_level = 3; + break; + } + case 9: + { + m_master_comp_level = 2; + m_highest_comp_level = 4; + break; + } + case 10: + { + m_master_comp_level = 3; + m_highest_comp_level = 3; + break; + } + case 11: + { + m_master_comp_level = 3; + m_highest_comp_level = 4; + break; + } + case 12: + default: + { + m_master_comp_level = 4; + m_highest_comp_level = 4; + m_extra_patterns_flag = true; + m_brute_force_partition_matching = true; + break; + } + } +} + +const float m1 = 0.1593017578125f; // (2610 / 2^14) * (1/100) +const float m2 = 78.84375f; // (2523 / 32) * (1/100) +const float c1 = 0.8359375f; // 3424 / (2^12) +const float c2 = 18.8515625f; // (2413 / 128) +const float c3 = 18.6875f; // (2392 / 128) + +static float forwardPQ(float Y) +{ + // 10,000 here is an absolute scale - it's in nits (cd per square meter) + float L = Y * (1.0f / 10000.0f); + + float num = powf(L, m1); + float N = powf((c1 + c2 * num) / (1 + c3 * num), m2); + + return N; +} + +#if 0 +static float inversePQ(float E) +{ + float N = powf(E, 1.0f / m2); + + float num = basisu::maximum((N - c1), 0.0f) / (c2 - c3 * N); + float L = powf(num, 1.0f / m1); + + return L * 10000.0f; +} +#endif + +// PQ function approximation: convert input to bfloat16, look up in tables, bilinear interpolation between table entries. +// max_er: 0.000023007392883, max_rel_er: 0.000023472490284, avg_er: 0.000004330495689, 6-7x faster on x86 +// Highest error is for values less than SMALLEST_PQ_VAL_IN. +// +// Approximation is round trip lossless for 10-12 bits at [0,10000] nits: +// for x [0,1024] (SCALE=1023) or for x [0,4095] (SCALE=4096): +// round(forwardPQTab(inversePQ(x / SCALE)) * SCALE) == x +// +// bfloat16 has enough precision to handle 8-bit sRGB to linear conversions: +// round(linear_to_srgb(bfloat16_to_float(float_to_bfloat16(srgb_to_linear(isRGB/255.0f))))*255.0) is lossless + +const int PQ_APPROX_MIN_EXP = -16, PQ_APPROX_MAX_EXP = 16; +const int PQ_APPROX_EXP_RANGE = (PQ_APPROX_MAX_EXP - PQ_APPROX_MIN_EXP + 1); + +const float SMALLEST_PQ_VAL_IN = 0.000015258829080f; +const float SMALLEST_PQ_VAL = 0.000551903737f; // forwardPQ(SMALLEST_PQ_VAL_IN) + +const float LARGEST_PQ_VAL = 1.251312f; + +float g_pq_approx_tabs[PQ_APPROX_EXP_RANGE][128]; + +static void init_pq_tables() +{ + for (int exp = PQ_APPROX_MIN_EXP; exp <= PQ_APPROX_MAX_EXP; exp++) + { + for (int mant = 0; mant < 128; mant++) + { + bfloat16 b = bfloat16_init(1, exp, mant); + float bf = bfloat16_to_float(b); + + float pq = forwardPQ(bf); + + g_pq_approx_tabs[exp - PQ_APPROX_MIN_EXP][mant] = pq; + } + } + + //fmt_printf("{.15} {.15}\n", g_pq_approx_tabs[0][0], inversePQ(g_pq_approx_tabs[0][0])); + //fmt_printf("{.15}\n", forwardPQ(SMALLEST_PQ_VAL_IN)); +} + +static inline float forwardPQTab(float v) +{ + assert(g_pq_approx_tabs[0][0]); + + assert(v >= 0.0f); + if (v == 0.0f) + return 0.0f; + + bfloat16 bf = float_to_bfloat16(v, false); + assert(v >= bfloat16_to_float(bf)); + + int exp = bfloat16_get_exp(bf); + + if (exp < PQ_APPROX_MIN_EXP) + { + // not accurate but should be good enough for our uses + return lerp(0.0f, SMALLEST_PQ_VAL, minimum(1.0f, v / SMALLEST_PQ_VAL_IN)); + } + else if (exp > PQ_APPROX_MAX_EXP) + return LARGEST_PQ_VAL; + + int mant = bfloat16_get_mantissa(bf); + + float a = g_pq_approx_tabs[exp - PQ_APPROX_MIN_EXP][mant]; + float bf_f32 = bfloat16_to_float(bf); + + int next_mant = mant + 1; + int next_exp = exp; + if (next_mant == 128) + { + next_mant = 0; + next_exp++; + if (next_exp > PQ_APPROX_MAX_EXP) + return a; + } + + float b = g_pq_approx_tabs[next_exp - PQ_APPROX_MIN_EXP][next_mant]; + + bfloat16 next_bf = bfloat16_init(1, next_exp, next_mant); + float next_bf_f32 = bfloat16_to_float(next_bf); + assert(v <= next_bf_f32); + + float lerp_factor = (v - bf_f32) / (next_bf_f32 - bf_f32); + assert((lerp_factor >= 0) && (lerp_factor <= 1.0f)); + + return lerp(a, b, lerp_factor); +} + +// 100 nits = ~.5 i +// This converts absolute linear RGB light in either REC 709 or REC2020/BT2100 color gamut to ICtCp, a coding space where Ct is scaled by 2. +// To convert to perceptual ITP for error/distance calculations, multiply the result Ct by .5 (or set itp_flag to true). +// Assumes REC 709 input, or REC 2020/BT.2100 RGB input if rec2020_bt2100_color_gamut is true. +// +// ITP info: +// https://www.portrait.com/resource-center/ictcp-color-difference-metric/ +// https://professional.dolby.com/siteassets/pdfs/measuringperceptualcolorvolume_v07.253.pdf (see scale to JND's) +// This also converts from a ICtCp coding space to threshold or perceptually uniform space ITP. +// +// Linear REC709 to REC2020/BT.2100 gamut conversion: +// rgb_2100[0] = rgb_in[0] * 0.6274f + rgb_in[1] * 0.3293f + rgb_in[2] * 0.0433f; +// rgb_2100[1] = rgb_in[0] * 0.0691f + rgb_in[1] * 0.9195f + rgb_in[2] * 0.0114f; +// rgb_2100[2] = rgb_in[0] * 0.0164f + rgb_in[1] * 0.0880f + rgb_in[2] * 0.8956f; +// const float S = 1.0f / 4096.0f; +// l = (1688.0f * S) * rgb_2100[0] + (2146.0f * S) * rgb_2100[1] + (262.0f * S) * rgb_2100[2]; +// m = (683.0f * S) * rgb_2100[0] + (2951.0f * S) * rgb_2100[1] + (462.0f * S) * rgb_2100[2]; +// s = (99.0f * S) * rgb_2100[0] + (309.0f * S) * rgb_2100[1] + (3688.0f * S) * rgb_2100[2]; +static void linear_rgb_to_ictcp(const vec3F& rgb_in, vec3F& ictcp, bool itp_flag = false, bool rec2020_bt2100_color_gamut = false) +{ + vec3F rgb_2100(rgb_in); + + float l, m, s; + if (!rec2020_bt2100_color_gamut) + { + // Assume REC 709 input color gamut + // (REC2020_to_LMS * REC709_to_2020) * input_color + l = rgb_2100[0] * 0.2958097f + rgb_2100[1] * 0.6230863f + rgb_2100[2] * 0.0811040f; + m = rgb_2100[0] * 0.1562512f + rgb_2100[1] * 0.7272980f + rgb_2100[2] * 0.1164508f; + s = rgb_2100[0] * 0.0351435f + rgb_2100[1] * 0.1565601f + rgb_2100[2] * 0.8082964f; + } + else + { + // Assumes REC2020/BT.2100 input color gamut (this is from the spec) + l = 0.412109375f * rgb_2100[0] + 0.52392578125f * rgb_2100[1] + 0.06396484375f * rgb_2100[2]; + m = 0.166748046875f * rgb_2100[0] + 0.720458984375f * rgb_2100[1] + 0.11279296875f * rgb_2100[2]; + s = 0.024169921875f * rgb_2100[0] + 0.075439453125f * rgb_2100[1] + 0.900390625f * rgb_2100[2]; + } + + float ld = forwardPQTab(l); + float md = forwardPQTab(m); + float sd = forwardPQTab(s); + + ictcp[0] = .5f * ld + .5f * md; + + // if ITP scale Ct by .5 (the ICtCp spec scaled Ct to better exploit the full scaled output, which is not perceptually linear) + if (itp_flag) + ictcp[1] = 0.806884765625f * ld + -1.6617431640625f * md + 0.8548583984375f * sd; + else + ictcp[1] = 1.61376953125f * ld + -3.323486328125f * md + 1.709716796875f * sd; + + ictcp[2] = 4.378173828125f * ld + -4.24560546875f * md + -0.132568359375f * sd; +} + +static inline void linear_rgb_to_itp(const vec3F& rgb_in, vec3F& itp, const astc_hdr_6x6_global_config &cfg) +{ + linear_rgb_to_ictcp(rgb_in, itp, true, cfg.m_rec2020_bt2100_color_gamut); +} + +#if 0 +// Outputs rec2020/bt2100 color gamut (i.e. this doesn't convert back to REC709 gamut). +static void ictcp_to_linear_rgb(const vec3F& ictcp, vec3F& rgb, bool itp_flag = false) +{ + float ct = ictcp[1]; + + if (itp_flag) + ct *= 2.0f; + + float ld = ictcp[0] + ct * 0.008609037037932726f + ictcp[2] * 0.11102962500302596f; + float md = ictcp[0] + ct * -0.008609037037932726f + ictcp[2] * -0.11102962500302596f; + float sd = ictcp[0] + ct * 0.5600313357106792f + ictcp[2] * -0.32062717498731885f; + + float l = inversePQ(ld); + float m = inversePQ(md); + float s = inversePQ(sd); + + rgb[0] = l * 3.436606694333079f + m * -2.5064521186562705f + s * 0.06984542432319149f; + rgb[1] = l * -0.7913295555989289f + m * 1.983600451792291f + s * -0.192270896193362f; + rgb[2] = l * -0.025949899690592672f + m * -0.09891371471172646f + s * 1.1248636144023192f; +} +#endif + +struct half_vec3 +{ + basist::half_float m_vals[3]; + + inline half_vec3() { } + + inline half_vec3(basist::half_float x, basist::half_float y, basist::half_float z) + { + m_vals[0] = x; + m_vals[1] = y; + m_vals[2] = z; + } + + inline half_vec3(const half_vec3& other) + { + *this = other; + } + + inline half_vec3& operator= (const half_vec3& rhs) + { + m_vals[0] = rhs.m_vals[0]; + m_vals[1] = rhs.m_vals[1]; + m_vals[2] = rhs.m_vals[2]; + return *this; + } + + inline void clear() + { + clear_obj(m_vals); + } + + inline half_vec3 &set(basist::half_float x, basist::half_float y, basist::half_float z) + { + m_vals[0] = x; + m_vals[1] = y; + m_vals[2] = z; + return *this; + } + + inline half_vec3& set(float x, float y, float z) + { + m_vals[0] = basist::float_to_half(x); + m_vals[1] = basist::float_to_half(y); + m_vals[2] = basist::float_to_half(z); + return *this; + } + + template + inline half_vec3& set_vec(const T& vec) + { + m_vals[0] = basist::float_to_half(vec[0]); + m_vals[1] = basist::float_to_half(vec[1]); + m_vals[2] = basist::float_to_half(vec[2]); + return *this; + } + + template + inline T get_vec() const + { + return T(basist::half_to_float(m_vals[0]), basist::half_to_float(m_vals[1]), basist::half_to_float(m_vals[2])); + } + + inline basist::half_float operator[] (uint32_t c) const { assert(c < 3); return m_vals[c]; } + inline basist::half_float& operator[] (uint32_t c) { assert(c < 3); return m_vals[c]; } + + float get_float_comp(uint32_t c) const + { + assert(c < 3); + return basist::half_to_float(m_vals[c]); + } + + half_vec3& set_float_comp(uint32_t c, float v) + { + assert(c < 3); + m_vals[c] = basist::float_to_half(v); + return *this; + } +}; + +struct half_vec4 +{ + basist::half_float m_vals[4]; + + inline half_vec4() { } + + inline half_vec4(basist::half_float x, basist::half_float y, basist::half_float z, basist::half_float w) + { + m_vals[0] = x; + m_vals[1] = y; + m_vals[2] = z; + m_vals[3] = w; + } + + inline half_vec4(const half_vec4& other) + { + *this = other; + } + + inline half_vec4& operator= (const half_vec4& rhs) + { + m_vals[0] = rhs.m_vals[0]; + m_vals[1] = rhs.m_vals[1]; + m_vals[2] = rhs.m_vals[2]; + m_vals[3] = rhs.m_vals[3]; + return *this; + } + + inline void clear() + { + clear_obj(m_vals); + } + + inline half_vec4& set(basist::half_float x, basist::half_float y, basist::half_float z, basist::half_float w) + { + m_vals[0] = x; + m_vals[1] = y; + m_vals[2] = z; + m_vals[3] = w; + return *this; + } + + inline half_vec4& set(float x, float y, float z, float w) + { + m_vals[0] = basist::float_to_half(x); + m_vals[1] = basist::float_to_half(y); + m_vals[2] = basist::float_to_half(z); + m_vals[3] = basist::float_to_half(w); + return *this; + } + + template + inline half_vec4& set_vec(const T& vec) + { + m_vals[0] = basist::float_to_half(vec[0]); + m_vals[1] = basist::float_to_half(vec[1]); + m_vals[2] = basist::float_to_half(vec[2]); + m_vals[3] = basist::float_to_half(vec[3]); + return *this; + } + + template + inline T get_vec() const + { + return T(basist::half_to_float(m_vals[0]), basist::half_to_float(m_vals[1]), basist::half_to_float(m_vals[2]), basist::half_to_float(m_vals[3])); + } + + inline basist::half_float operator[] (uint32_t c) const { assert(c < 4); return m_vals[c]; } + inline basist::half_float &operator[] (uint32_t c) { assert(c < 4); return m_vals[c]; } + + float get_float_comp(uint32_t c) const + { + assert(c < 4); + return basist::half_to_float(m_vals[c]); + } + + half_vec4& set_float_comp(uint32_t c, float v) + { + assert(c < 4); + m_vals[c] = basist::float_to_half(v); + return *this; + } +}; + +const uint32_t MAX_BLOCK_W = 6, MAX_BLOCK_H = 6; + +struct trial_result +{ + astc_helpers::log_astc_block m_log_blk; + double m_err; + bool m_valid; +}; + +//---------------------------------------------------------- + +const uint32_t NUM_PART3_MAPPINGS = 6; +static uint8_t g_part3_mapping[NUM_PART3_MAPPINGS][3] = +{ + { 0, 1, 2 }, + { 1, 2, 0 }, + { 2, 0, 1 }, + { 0, 2, 1 }, + { 1, 0, 2 }, + { 2, 1, 0 } +}; + +struct partition_pattern_vec +{ + uint8_t m_parts[6 * 6]; + + partition_pattern_vec() + { + clear(); + } + + partition_pattern_vec(const partition_pattern_vec& other) + { + *this = other; + } + + void clear() + { + memset(m_parts, 0, sizeof(m_parts)); + } + + partition_pattern_vec& operator= (const partition_pattern_vec& rhs) + { + if (this == &rhs) + return *this; + memcpy(m_parts, rhs.m_parts, 36); + return *this; + } + + uint8_t operator[] (uint32_t i) const { assert(i < 36); return m_parts[i]; } + uint8_t& operator[] (uint32_t i) { assert(i < 36); return m_parts[i]; } + + uint8_t operator() (uint32_t x, uint32_t y) const { assert((x < 6) && (y < 6)); return m_parts[x + y * 6]; } + uint8_t& operator() (uint32_t x, uint32_t y) { assert((x < 6) && (y < 6)); return m_parts[x + y * 6]; } + + int get_squared_distance(const partition_pattern_vec& other) const + { + int total_dist = 0; + for (uint32_t i = 0; i < 36; i++) + total_dist += iabs((int)m_parts[i] - (int)other.m_parts[i]); + return total_dist; + } + + float get_distance(const partition_pattern_vec& other) const + { + return sqrtf((float)get_squared_distance(other)); + } + + partition_pattern_vec get_permuted2(uint32_t permute_index) const + { + assert(permute_index <= 1); + + partition_pattern_vec res; + for (uint32_t i = 0; i < 36; i++) + { + assert(m_parts[i] <= 1); + res.m_parts[i] = (uint8_t)(m_parts[i] ^ permute_index); + } + + return res; + } + + partition_pattern_vec get_permuted3(uint32_t permute_index) const + { + assert(permute_index <= 5); + + partition_pattern_vec res; + for (uint32_t i = 0; i < 36; i++) + { + assert(m_parts[i] <= 2); + res.m_parts[i] = g_part3_mapping[permute_index][m_parts[i]]; + } + + return res; + } + + partition_pattern_vec get_canonicalized() const + { + partition_pattern_vec res; + + int new_labels[3] = { -1, -1, -1 }; + uint32_t next_index = 0; + for (uint32_t i = 0; i < 36; i++) + { + uint32_t p = m_parts[i]; + if (new_labels[p] == -1) + new_labels[p] = next_index++; + + res.m_parts[i] = (uint8_t)new_labels[p]; + } + + return res; + } + + bool operator== (const partition_pattern_vec& rhs) const + { + return memcmp(m_parts, rhs.m_parts, sizeof(m_parts)) == 0; + } + + operator size_t() const + { + return basisu::hash_hsieh(m_parts, sizeof(m_parts)); + } +}; + +struct vp_tree_node +{ + partition_pattern_vec m_vantage_point; + uint32_t m_point_index; + float m_dist; + + int m_inner_node, m_outer_node; +}; + +#define BRUTE_FORCE_PART_SEARCH (0) + +class vp_tree +{ +public: + vp_tree() + { + } + + void clear() + { + m_nodes.clear(); + } + + // This requires no redundant patterns, i.e. all must be unique. + bool init(uint32_t n, const partition_pattern_vec* pUnique_pats) + { + clear(); + + uint_vec pat_indices(n); + for (uint32_t i = 0; i < n; i++) + pat_indices[i] = i; + + std::pair root_idx = find_best_vantage_point(n, pUnique_pats, pat_indices); + + if (root_idx.first == -1) + return false; + + m_nodes.resize(1); + m_nodes[0].m_vantage_point = pUnique_pats[root_idx.first]; + m_nodes[0].m_point_index = root_idx.first; + m_nodes[0].m_dist = root_idx.second; + m_nodes[0].m_inner_node = -1; + m_nodes[0].m_outer_node = -1; + + uint_vec inner_list, outer_list; + + inner_list.reserve(n / 2); + outer_list.reserve(n / 2); + + for (uint32_t pat_index = 0; pat_index < n; pat_index++) + { + if ((int)pat_index == root_idx.first) + continue; + + const float dist = m_nodes[0].m_vantage_point.get_distance(pUnique_pats[pat_index]); + + if (dist <= root_idx.second) + inner_list.push_back(pat_index); + else + outer_list.push_back(pat_index); + } + + if (inner_list.size()) + { + m_nodes[0].m_inner_node = create_node(n, pUnique_pats, inner_list); + if (m_nodes[0].m_inner_node < 0) + return false; + } + + if (outer_list.size()) + { + m_nodes[0].m_outer_node = create_node(n, pUnique_pats, outer_list); + if (m_nodes[0].m_outer_node < 0) + return false; + } + + return true; + } + + struct result + { + uint32_t m_pat_index; + uint32_t m_mapping_index; + float m_dist; + + bool operator< (const result& rhs) const { return m_dist < rhs.m_dist; } + bool operator> (const result& rhs) const { return m_dist > rhs.m_dist; } + }; + + class result_queue + { + enum { MaxSupportedSize = 256 + 1 }; + + public: + result_queue() : + m_cur_size(0) + { + } + + size_t get_size() const + { + return m_cur_size; + } + + bool empty() const + { + return !m_cur_size; + } + + typedef std::array result_array_type; + + const result_array_type& get_elements() const { return m_elements; } + result_array_type& get_elements() { return m_elements; } + + void clear() + { + m_cur_size = 0; + } + + void reserve(uint32_t n) + { + BASISU_NOTE_UNUSED(n); + } + + const result& top() const + { + assert(m_cur_size); + return m_elements[1]; + } + + bool insert(const result& val, uint32_t max_size) + { + assert(max_size < MaxSupportedSize); + + if (m_cur_size >= MaxSupportedSize) + return false; + + m_elements[++m_cur_size] = val; + up_heap(m_cur_size); + + if (m_cur_size > max_size) + pop(); + + return true; + } + + bool pop() + { + if (m_cur_size == 0) + return false; + + m_elements[1] = m_elements[m_cur_size--]; + down_heap(1); + return true; + } + + float get_highest_dist() const + { + if (!m_cur_size) + return 0.0f; + + return top().m_dist; + } + + private: + result_array_type m_elements; + size_t m_cur_size; + + void up_heap(size_t index) + { + while ((index > 1) && (m_elements[index] > m_elements[index >> 1])) + { + std::swap(m_elements[index], m_elements[index >> 1]); + index >>= 1; + } + } + + void down_heap(size_t index) + { + for ( ; ; ) + { + size_t largest = index, left_child = 2 * index, right_child = 2 * index + 1; + + if ((left_child <= m_cur_size) && (m_elements[left_child] > m_elements[largest])) + largest = left_child; + + if ((right_child <= m_cur_size) && (m_elements[right_child] > m_elements[largest])) + largest = right_child; + + if (largest == index) + break; + + std::swap(m_elements[index], m_elements[largest]); + index = largest; + } + } + }; + + void find_nearest(uint32_t num_subsets, const partition_pattern_vec& desired_pat, result_queue& results, uint32_t max_results) + { + assert((num_subsets >= 2) && (num_subsets <= 3)); + + results.clear(); + + if (!m_nodes.size()) + return; + + uint32_t num_desired_pats; + partition_pattern_vec desired_pats[NUM_PART3_MAPPINGS]; + + if (num_subsets == 2) + { + num_desired_pats = 2; + for (uint32_t i = 0; i < 2; i++) + desired_pats[i] = desired_pat.get_permuted2(i); + } + else + { + num_desired_pats = NUM_PART3_MAPPINGS; + for (uint32_t i = 0; i < NUM_PART3_MAPPINGS; i++) + desired_pats[i] = desired_pat.get_permuted3(i); + } + +#if 0 + find_nearest_at_node(0, num_desired_pats, desired_pats, results, max_results); +#else + find_nearest_at_node_non_recursive(0, num_desired_pats, desired_pats, results, max_results); +#endif + } + +private: + basisu::vector m_nodes; + + void find_nearest_at_node(int node_index, uint32_t num_desired_pats, const partition_pattern_vec* pDesired_pats, result_queue& results, uint32_t max_results) + { + float best_dist_to_vantage = BIG_FLOAT_VAL; + uint32_t best_mapping = 0; + for (uint32_t i = 0; i < num_desired_pats; i++) + { + float dist = pDesired_pats[i].get_distance(m_nodes[node_index].m_vantage_point); + if (dist < best_dist_to_vantage) + { + best_dist_to_vantage = dist; + best_mapping = i; + } + } + + result r; + r.m_dist = best_dist_to_vantage; + r.m_mapping_index = best_mapping; + r.m_pat_index = m_nodes[node_index].m_point_index; + + results.insert(r, max_results); + + if (best_dist_to_vantage <= m_nodes[node_index].m_dist) + { + // inner first + if (m_nodes[node_index].m_inner_node >= 0) + find_nearest_at_node(m_nodes[node_index].m_inner_node, num_desired_pats, pDesired_pats, results, max_results); + + if (m_nodes[node_index].m_outer_node >= 0) + { + if ( (results.get_size() < max_results) || + ((m_nodes[node_index].m_dist - best_dist_to_vantage) <= results.get_highest_dist()) + ) + { + find_nearest_at_node(m_nodes[node_index].m_outer_node, num_desired_pats, pDesired_pats, results, max_results); + } + } + } + else + { + // outer first + if (m_nodes[node_index].m_outer_node >= 0) + find_nearest_at_node(m_nodes[node_index].m_outer_node, num_desired_pats, pDesired_pats, results, max_results); + + if (m_nodes[node_index].m_inner_node >= 0) + { + if ( (results.get_size() < max_results) || + ((best_dist_to_vantage - m_nodes[node_index].m_dist) <= results.get_highest_dist()) + ) + { + find_nearest_at_node(m_nodes[node_index].m_inner_node, num_desired_pats, pDesired_pats, results, max_results); + } + } + } + } + + void find_nearest_at_node_non_recursive(int init_node_index, uint32_t num_desired_pats, const partition_pattern_vec* pDesired_pats, result_queue& results, uint32_t max_results) + { + uint_vec node_stack; + node_stack.reserve(16); + node_stack.push_back(init_node_index); + + do + { + const uint32_t node_index = node_stack.back(); + node_stack.pop_back(); + + float best_dist_to_vantage = BIG_FLOAT_VAL; + uint32_t best_mapping = 0; + for (uint32_t i = 0; i < num_desired_pats; i++) + { + float dist = pDesired_pats[i].get_distance(m_nodes[node_index].m_vantage_point); + if (dist < best_dist_to_vantage) + { + best_dist_to_vantage = dist; + best_mapping = i; + } + } + + result r; + r.m_dist = best_dist_to_vantage; + r.m_mapping_index = best_mapping; + r.m_pat_index = m_nodes[node_index].m_point_index; + + results.insert(r, max_results); + + if (best_dist_to_vantage <= m_nodes[node_index].m_dist) + { + if (m_nodes[node_index].m_outer_node >= 0) + { + if ((results.get_size() < max_results) || + ((m_nodes[node_index].m_dist - best_dist_to_vantage) <= results.get_highest_dist()) + ) + { + node_stack.push_back(m_nodes[node_index].m_outer_node); + } + } + + // inner first + if (m_nodes[node_index].m_inner_node >= 0) + { + node_stack.push_back(m_nodes[node_index].m_inner_node); + } + } + else + { + if (m_nodes[node_index].m_inner_node >= 0) + { + if ((results.get_size() < max_results) || + ((best_dist_to_vantage - m_nodes[node_index].m_dist) <= results.get_highest_dist()) + ) + { + node_stack.push_back(m_nodes[node_index].m_inner_node); + } + } + + // outer first + if (m_nodes[node_index].m_outer_node >= 0) + { + node_stack.push_back(m_nodes[node_index].m_outer_node); + } + } + + } while (!node_stack.empty()); + } + + // returns the index of the new node, or -1 on error + int create_node(uint32_t n, const partition_pattern_vec* pUnique_pats, const uint_vec& pat_indices) + { + std::pair root_idx = find_best_vantage_point(n, pUnique_pats, pat_indices); + + if (root_idx.first < 0) + return -1; + + m_nodes.resize(m_nodes.size() + 1); + const uint32_t new_node_index = m_nodes.size_u32() - 1; + + m_nodes[new_node_index].m_vantage_point = pUnique_pats[root_idx.first]; + m_nodes[new_node_index].m_point_index = root_idx.first; + m_nodes[new_node_index].m_dist = root_idx.second; + m_nodes[new_node_index].m_inner_node = -1; + m_nodes[new_node_index].m_outer_node = -1; + + uint_vec inner_list, outer_list; + + inner_list.reserve(pat_indices.size_u32() / 2); + outer_list.reserve(pat_indices.size_u32() / 2); + + for (uint32_t pat_indices_iter = 0; pat_indices_iter < pat_indices.size(); pat_indices_iter++) + { + const uint32_t pat_index = pat_indices[pat_indices_iter]; + + if ((int)pat_index == root_idx.first) + continue; + + const float dist = m_nodes[new_node_index].m_vantage_point.get_distance(pUnique_pats[pat_index]); + + if (dist <= root_idx.second) + inner_list.push_back(pat_index); + else + outer_list.push_back(pat_index); + } + + if (inner_list.size()) + m_nodes[new_node_index].m_inner_node = create_node(n, pUnique_pats, inner_list); + + if (outer_list.size()) + m_nodes[new_node_index].m_outer_node = create_node(n, pUnique_pats, outer_list); + + return new_node_index; + } + + // returns the pattern index of the vantage point (-1 on error), and the optimal split distance + std::pair find_best_vantage_point(uint32_t num_unique_pats, const partition_pattern_vec* pUnique_pats, const uint_vec &pat_indices) + { + BASISU_NOTE_UNUSED(num_unique_pats); + + const uint32_t n = pat_indices.size_u32(); + + assert(n); + if (n == 1) + return std::make_pair(pat_indices[0], 0.0f); + + float best_split_metric = -1.0f; + int best_split_pat = -1; + float best_split_dist = 0.0f; + float best_split_var = 0.0f; + + basisu::vector< std::pair > dists; + dists.reserve(n); + + float_vec float_dists; + float_dists.reserve(n); + + for (uint32_t pat_indices_iter = 0; pat_indices_iter < n; pat_indices_iter++) + { + const uint32_t split_pat_index = pat_indices[pat_indices_iter]; + assert(split_pat_index < num_unique_pats); + + const partition_pattern_vec& trial_vantage = pUnique_pats[split_pat_index]; + + dists.resize(0); + float_dists.resize(0); + + for (uint32_t j = 0; j < n; j++) + { + const uint32_t pat_index = pat_indices[j]; + assert(pat_index < num_unique_pats); + + if (pat_index == split_pat_index) + continue; + + float dist = trial_vantage.get_distance(pUnique_pats[pat_index]); + dists.emplace_back(std::make_pair(dist, pat_index)); + + float_dists.push_back(dist); + } + + stats s; + s.calc(float_dists.size_u32(), float_dists.data()); + + std::sort(dists.begin(), dists.end(), [](const auto &a, const auto &b) { + return a.first < b.first; + }); + + const uint32_t num_dists = dists.size_u32(); + float split_dist = dists[num_dists / 2].first; + if ((num_dists & 1) == 0) + split_dist = (split_dist + dists[(num_dists / 2) - 1].first) * .5f; + + uint32_t total_inner = 0, total_outer = 0; + + for (uint32_t j = 0; j < n; j++) + { + const uint32_t pat_index = pat_indices[j]; + if (pat_index == split_pat_index) + continue; + + float dist = trial_vantage.get_distance(pUnique_pats[pat_index]); + + if (dist <= split_dist) + total_inner++; + else + total_outer++; + } + + float split_metric = (float)minimum(total_inner, total_outer) / (float)maximum(total_inner, total_outer); + + if ( (split_metric > best_split_metric) || + ((split_metric == best_split_metric) && (s.m_var > best_split_var)) ) + { + best_split_metric = split_metric; + best_split_dist = split_dist; + best_split_pat = split_pat_index; + best_split_var = (float)s.m_var; + } + } + + return std::make_pair(best_split_pat, best_split_dist); + } +}; + +struct partition +{ + uint64_t m_p; + + inline partition() : + m_p(0) + { + } + + inline partition(uint64_t p) : + m_p(p) + { + assert(p < (1ULL << 36)); + } + + inline partition& operator=(uint64_t p) + { + assert(p < (1ULL << 36)); + m_p = p; + return *this; + } + + inline bool operator< (const partition& p) const + { + return m_p < p.m_p; + } + + inline bool operator== (const partition& p) const + { + return m_p == p.m_p; + } + + inline operator size_t() const + { + return hash_hsieh((const uint8_t *)&m_p, sizeof(m_p)); + } +}; + +partition_pattern_vec g_partitions2[NUM_UNIQUE_PARTITIONS2]; +int g_part2_seed_to_unique_index[1024]; +vp_tree g_part2_vp_tree; + +static inline vec3F vec3F_norm_approx(vec3F axis) +{ + float l = axis.norm(); + axis = (fabs(l) >= SMALL_FLOAT_VAL) ? (axis * bu_math::inv_sqrt(l)) : vec3F(0.577350269f); + return axis; +} + +static void init_partitions2_6x6() +{ +#if 0 + // makes pattern bits to the 10-bit ASTC seed index + typedef basisu::hash_map partition2_hash_map; + partition2_hash_map phash; + phash.reserve(1024); + + for (uint32_t i = 0; i < 1024; i++) + { + uint64_t p_bits = 0; + uint64_t p_bits_inv = 0; + + for (uint32_t y = 0; y < 6; y++) + { + for (uint32_t x = 0; x < 6; x++) + { + uint64_t p = astc_helpers::compute_texel_partition(i, x, y, 0, 2, false); + assert(p < 2); + + p_bits |= (p << (x + y * 6)); + p_bits_inv |= ((1 - p) << (x + y * 6)); + } + } + + if (!p_bits) + continue; + if (p_bits == ((1ULL << 36) - 1)) + continue; + + assert(p_bits < (1ULL << 36)); + assert(p_bits_inv < (1ULL << 36)); + + if (phash.contains(p_bits)) + { + } + else if (phash.contains(p_bits_inv)) + { + } + else + { + auto res = phash.insert(p_bits, i); + assert(res.second); + BASISU_NOTE_UNUSED(res); + } + } + + uint32_t num_unique_partitions2 = 0; + + for (const auto& r : phash) + { + assert(r.second < 1024); + + const uint32_t unique_index = num_unique_partitions2; + assert(unique_index < NUM_UNIQUE_PARTITIONS2); + + partition_pattern_vec pat_vec; + for (uint32_t i = 0; i < 36; i++) + pat_vec[i] = (uint8_t)((r.first >> i) & 1); + + g_partitions2[unique_index] = pat_vec; + + assert(g_part2_unique_index_to_seed[unique_index] == r.second); + g_part2_seed_to_unique_index[r.second] = unique_index; + + num_unique_partitions2++; + } + assert(num_unique_partitions2 == NUM_UNIQUE_PARTITIONS2); +#else + for (uint32_t unique_index = 0; unique_index < NUM_UNIQUE_PARTITIONS2; unique_index++) + { + const uint32_t seed_index = g_part2_unique_index_to_seed[unique_index]; + assert(seed_index < 1024); + + assert(g_part2_seed_to_unique_index[seed_index] == 0); + g_part2_seed_to_unique_index[seed_index] = unique_index; + + partition_pattern_vec& pat_vec = g_partitions2[unique_index]; + + for (uint32_t y = 0; y < 6; y++) + { + for (uint32_t x = 0; x < 6; x++) + { + uint8_t p = (uint8_t)astc_helpers::compute_texel_partition(seed_index, x, y, 0, 2, false); + assert(p < 2); + + pat_vec[x + y * 6] = p; + } + } + } +#endif + + g_part2_vp_tree.init(NUM_UNIQUE_PARTITIONS2, g_partitions2); +} + +static bool estimate_partition2_6x6( + const basist::half_float pBlock_pixels_half[][3], + int* pBest_parts, uint32_t num_best_parts) +{ + const uint32_t BLOCK_W = 6, BLOCK_H = 6, BLOCK_T = BLOCK_W * BLOCK_H; + + vec3F training_vecs[BLOCK_T], mean(0.0f); + + for (uint32_t i = 0; i < BLOCK_T; i++) + { + vec3F& v = training_vecs[i]; + + v[0] = (float)pBlock_pixels_half[i][0]; + v[1] = (float)pBlock_pixels_half[i][1]; + v[2] = (float)pBlock_pixels_half[i][2]; + + mean += v; + } + mean *= (1.0f / (float)BLOCK_T); + + vec3F max_vals(-BIG_FLOAT_VAL); + + for (uint32_t i = 0; i < BLOCK_T; i++) + { + vec3F& v = training_vecs[i]; + max_vals = vec3F::component_max(max_vals, v); + } + + // Initialize principle axis approximation + vec3F axis(max_vals - mean); + + // Incremental approx. PCA - only viable if we have a reasonably fast approximation for 1.0/sqrt(x). + for (uint32_t i = 0; i < BLOCK_T; i++) + { + axis = vec3F_norm_approx(axis); + + vec3F color(training_vecs[i] - mean); + + float d = color.dot(axis); + + axis += color * d; + } + + if (axis.norm() < SMALL_FLOAT_VAL) + axis.set(0.57735027f); + else + axis.normalize_in_place(); + +#if BRUTE_FORCE_PART_SEARCH + int desired_parts[BLOCK_H][BLOCK_W]; // [y][x] + for (uint32_t i = 0; i < BLOCK_T; i++) + { + float proj = (training_vecs[i] - mean).dot(axis); + + desired_parts[i / BLOCK_W][i % BLOCK_W] = proj < 0.0f; + } +#else + partition_pattern_vec desired_part; + + for (uint32_t i = 0; i < BLOCK_T; i++) + { + float proj = (training_vecs[i] - mean).dot(axis); + + desired_part.m_parts[i] = proj < 0.0f; + } +#endif + + //interval_timer tm; + //tm.start(); + +#if BRUTE_FORCE_PART_SEARCH + uint32_t part_similarity[NUM_UNIQUE_PARTITIONS2]; + + for (uint32_t part_index = 0; part_index < NUM_UNIQUE_PARTITIONS2; part_index++) + { + const partition_pattern_vec &pat_vec = g_partitions2[part_index]; + + int total_sim_non_inv = 0; + int total_sim_inv = 0; + + for (uint32_t y = 0; y < BLOCK_H; y++) + { + for (uint32_t x = 0; x < BLOCK_W; x++) + { + int part = pat_vec[x + y * 6]; + + if (part == desired_parts[y][x]) + total_sim_non_inv++; + + if ((part ^ 1) == desired_parts[y][x]) + total_sim_inv++; + } + } + + int total_sim = maximum(total_sim_non_inv, total_sim_inv); + + part_similarity[part_index] = (total_sim << 16) | part_index; + + } // part_index; + + std::sort(part_similarity, part_similarity + NUM_UNIQUE_PARTITIONS2); + + for (uint32_t i = 0; i < num_best_parts; i++) + pBest_parts[i] = part_similarity[(NUM_UNIQUE_PARTITIONS2 - 1) - i] & 0xFFFF; +#else + vp_tree::result_queue results; + results.reserve(num_best_parts); + g_part2_vp_tree.find_nearest(2, desired_part, results, num_best_parts); + + assert(results.get_size() == num_best_parts); + + const auto& elements = results.get_elements(); + + for (uint32_t i = 0; i < results.get_size(); i++) + pBest_parts[i] = elements[1 + i].m_pat_index; +#endif + + //fmt_printf("{} ", tm.get_elapsed_ms()); + + return true; +} + +const uint32_t MIN_REFINE_LEVEL = 0; + +static bool encode_block_2_subsets( + trial_result res[2], + uint32_t grid_w, uint32_t grid_h, + uint32_t cem, + uint32_t weights_ise_range, uint32_t endpoints_ise_range, + const half_vec3* pBlock_pixels_half, const vec4F* pBlock_pixels_q16, + astc_hdr_codec_base_options& coptions, + bool uber_mode_flag, + int unique_pat_index, + uint32_t comp_level, + opt_mode_t mode11_opt_mode, + bool refine_endpoints_flag) +{ + const uint32_t num_endpoint_vals = (cem == 11) ? basist::NUM_MODE11_ENDPOINTS : basist::NUM_MODE7_ENDPOINTS; + + res[0].m_valid = false; + res[1].m_valid = false; + + const uint32_t BLOCK_W = 6, BLOCK_H = 6; + + astc_helpers::log_astc_block best_log_blk; + clear_obj(best_log_blk); + + best_log_blk.m_num_partitions = 2; + best_log_blk.m_color_endpoint_modes[0] = (uint8_t)cem; + best_log_blk.m_color_endpoint_modes[1] = (uint8_t)cem; + best_log_blk.m_grid_width = (uint8_t)grid_w; + best_log_blk.m_grid_height = (uint8_t)grid_h; + + best_log_blk.m_weight_ise_range = (uint8_t)weights_ise_range; + best_log_blk.m_endpoint_ise_range = (uint8_t)endpoints_ise_range; + + partition_pattern_vec* pPat = &g_partitions2[unique_pat_index]; + const uint32_t p_seed = g_part2_unique_index_to_seed[unique_pat_index]; + + vec4F part_pixels_q16[2][64]; + half_vec3 part_half_pixels[2][64]; + uint8_t part_pixel_index[2][64]; + uint32_t part_total_pixels[2] = { 0 }; + + for (uint32_t y = 0; y < BLOCK_H; y++) + { + for (uint32_t x = 0; x < BLOCK_W; x++) + { + uint32_t part_index = (*pPat)[x + y * BLOCK_W]; + + uint32_t l = part_total_pixels[part_index]; + + part_pixels_q16[part_index][l] = pBlock_pixels_q16[x + y * BLOCK_W]; + part_half_pixels[part_index][l] = pBlock_pixels_half[x + y * BLOCK_W]; + part_pixel_index[part_index][l] = (uint8_t)(x + y * BLOCK_W); + + part_total_pixels[part_index] = l + 1; + } // x + } // y + + uint8_t blk_endpoints[2][basist::NUM_MODE11_ENDPOINTS]; + uint8_t blk_weights[2][BLOCK_W * BLOCK_H]; + uint32_t best_submode[2]; + + for (uint32_t part_iter = 0; part_iter < 2; part_iter++) + { + assert(part_total_pixels[part_iter]); + + double e; + if (cem == 7) + { + e = encode_astc_hdr_block_mode_7( + part_total_pixels[part_iter], + (basist::half_float(*)[3])part_half_pixels[part_iter], (vec4F*)part_pixels_q16[part_iter], + best_log_blk.m_weight_ise_range, + best_submode[part_iter], + BIG_FLOAT_VAL, + blk_endpoints[part_iter], + blk_weights[part_iter], + coptions, + best_log_blk.m_endpoint_ise_range); + } + else + { + assert(cem == 11); + + e = encode_astc_hdr_block_mode_11( + part_total_pixels[part_iter], + (basist::half_float(*)[3])part_half_pixels[part_iter], (vec4F*)part_pixels_q16[part_iter], + best_log_blk.m_weight_ise_range, + best_submode[part_iter], + BIG_FLOAT_VAL, + blk_endpoints[part_iter], + blk_weights[part_iter], + coptions, + false, + best_log_blk.m_endpoint_ise_range, uber_mode_flag, false, -1, 7, false, + mode11_opt_mode); + } + + if (e == BIG_FLOAT_VAL) + return false; + + } // part_iter + + uint8_t ise_weights[BLOCK_W * BLOCK_H]; + + uint32_t src_pixel_index[2] = { 0, 0 }; + for (uint32_t y = 0; y < BLOCK_H; y++) + { + for (uint32_t x = 0; x < BLOCK_W; x++) + { + uint32_t part_index = (*pPat)[x + y * BLOCK_W]; + ise_weights[x + y * BLOCK_W] = blk_weights[part_index][src_pixel_index[part_index]]; + src_pixel_index[part_index]++; + } // x + } // y + + if ((grid_w == BLOCK_W) && (grid_h == BLOCK_H)) + { + best_log_blk.m_partition_id = (uint16_t)p_seed; + + memcpy(best_log_blk.m_endpoints, blk_endpoints[0], num_endpoint_vals); + memcpy(best_log_blk.m_endpoints + num_endpoint_vals, blk_endpoints[1], num_endpoint_vals); + memcpy(best_log_blk.m_weights, ise_weights, BLOCK_W * BLOCK_H); + + res[0].m_valid = true; + res[0].m_log_blk = best_log_blk; + } + else + { + uint8_t desired_weights[BLOCK_H * BLOCK_W]; + + const auto& dequant_tab = astc_helpers::g_dequant_tables.get_weight_tab(weights_ise_range).m_ISE_to_val; + + for (uint32_t by = 0; by < BLOCK_H; by++) + for (uint32_t bx = 0; bx < BLOCK_W; bx++) + desired_weights[bx + by * BLOCK_W] = dequant_tab[ise_weights[bx + by * BLOCK_W]]; + + uint8_t downsampled_weights[BLOCK_H * BLOCK_W]; + + const float* pDownsample_matrix = get_6x6_downsample_matrix(grid_w, grid_h); + if (!pDownsample_matrix) + { + assert(0); + return false; + } + + downsample_weight_grid( + pDownsample_matrix, + BLOCK_W, BLOCK_H, // source/from dimension (block size) + grid_w, grid_h, // dest/to dimension (grid size) + desired_weights, // these are dequantized weights, NOT ISE symbols, [by][bx] + downsampled_weights); // [wy][wx] + + best_log_blk.m_partition_id = (uint16_t)p_seed; + memcpy(best_log_blk.m_endpoints, blk_endpoints[0], num_endpoint_vals); + memcpy(best_log_blk.m_endpoints + num_endpoint_vals, blk_endpoints[1], num_endpoint_vals); + + const auto& weight_to_ise = astc_helpers::g_dequant_tables.get_weight_tab(weights_ise_range).m_val_to_ise; + + for (uint32_t gy = 0; gy < grid_h; gy++) + for (uint32_t gx = 0; gx < grid_w; gx++) + best_log_blk.m_weights[gx + gy * grid_w] = weight_to_ise[downsampled_weights[gx + gy * grid_w]]; + + res[0].m_valid = true; + res[0].m_log_blk = best_log_blk; + + if ((refine_endpoints_flag) && (comp_level >= MIN_REFINE_LEVEL) && ((grid_w < 6) || (grid_h < 6))) + { + bool any_refined = false; + + for (uint32_t part_iter = 0; part_iter < 2; part_iter++) + { + bool refine_status = refine_endpoints( + cem, + endpoints_ise_range, + best_log_blk.m_endpoints + part_iter * num_endpoint_vals, // the endpoints to optimize + BLOCK_W, BLOCK_H, // block dimensions + grid_w, grid_h, best_log_blk.m_weights, weights_ise_range, // weight grid + part_total_pixels[part_iter], (basist::half_float(*)[3])part_half_pixels[part_iter], (vec4F*)part_pixels_q16[part_iter], + &part_pixel_index[part_iter][0], // maps this subset's pixels to block offsets + coptions, mode11_opt_mode); + + if (refine_status) + any_refined = true; + } + + if (any_refined) + { + res[1].m_valid = true; + res[1].m_log_blk = best_log_blk; + } + } + } + + return true; +} + +typedef basisu::hash_map > partition3_hash_map; + +partition_pattern_vec g_partitions3[NUM_UNIQUE_PARTITIONS3]; +int g_part3_seed_to_unique_index[1024]; +vp_tree g_part3_vp_tree; + +static void init_partitions3_6x6() +{ + uint32_t t = 0; + + for (uint32_t i = 0; i < 1024; i++) + g_part3_seed_to_unique_index[i] = -1; + + partition3_hash_map part3_hash; + part3_hash.reserve(512); + + for (uint32_t seed_index = 0; seed_index < 1024; seed_index++) + { + partition_pattern_vec p3; + uint32_t part_hist[3] = { 0 }; + + for (uint32_t y = 0; y < 6; y++) + { + for (uint32_t x = 0; x < 6; x++) + { + uint64_t p = astc_helpers::compute_texel_partition(seed_index, x, y, 0, 3, false); + assert(p < 3); + + p3.m_parts[x + y * 6] = (uint8_t)p; + part_hist[p]++; + } + } + + if (!part_hist[0] || !part_hist[1] || !part_hist[2]) + continue; + + uint32_t j; + for (j = 0; j < NUM_PART3_MAPPINGS; j++) + { + partition_pattern_vec temp_part3(p3.get_permuted3(j)); + + if (part3_hash.contains(temp_part3)) + break; + } + if (j < NUM_PART3_MAPPINGS) + continue; + + part3_hash.insert(p3, std::make_pair(seed_index, t) ); + + assert(g_part3_unique_index_to_seed[t] == seed_index); + g_part3_seed_to_unique_index[seed_index] = t; + g_partitions3[t] = p3; + + t++; + } + + g_part3_vp_tree.init(NUM_UNIQUE_PARTITIONS3, g_partitions3); +} + +static bool estimate_partition3_6x6( + const basist::half_float pBlock_pixels_half[][3], + int* pBest_parts, uint32_t num_best_parts) +{ + const uint32_t BLOCK_W = 6, BLOCK_H = 6, BLOCK_T = BLOCK_W * BLOCK_H, NUM_SUBSETS = 3; + + assert(num_best_parts && (num_best_parts <= NUM_UNIQUE_PARTITIONS3)); + + vec3F training_vecs[BLOCK_T], mean(0.0f); + + float brightest_inten = 0.0f, darkest_inten = BIG_FLOAT_VAL; + vec3F cluster_centroids[NUM_SUBSETS]; + + for (uint32_t i = 0; i < BLOCK_T; i++) + { + vec3F& v = training_vecs[i]; + + v.set((float)pBlock_pixels_half[i][0], (float)pBlock_pixels_half[i][1], (float)pBlock_pixels_half[i][2]); + + float inten = v.dot(vec3F(1.0f)); + if (inten < darkest_inten) + { + darkest_inten = inten; + cluster_centroids[0] = v; + } + + if (inten > brightest_inten) + { + brightest_inten = inten; + cluster_centroids[1] = v; + } + } + + if (cluster_centroids[0] == cluster_centroids[1]) + return false; + + float furthest_dist2 = 0.0f; + for (uint32_t i = 0; i < BLOCK_T; i++) + { + vec3F& v = training_vecs[i]; + + float dist_a = v.squared_distance(cluster_centroids[0]); + if (dist_a == 0.0f) + continue; + + float dist_b = v.squared_distance(cluster_centroids[1]); + if (dist_b == 0.0f) + continue; + + float dist2 = dist_a + dist_b; + if (dist2 > furthest_dist2) + { + furthest_dist2 = dist2; + cluster_centroids[2] = v; + } + } + + if ((cluster_centroids[0] == cluster_centroids[2]) || (cluster_centroids[1] == cluster_centroids[2])) + return false; + + uint32_t cluster_pixels[NUM_SUBSETS][BLOCK_T]; + uint32_t num_cluster_pixels[NUM_SUBSETS]; + vec3F new_cluster_means[NUM_SUBSETS]; + + const uint32_t NUM_ITERS = 4; + + for (uint32_t s = 0; s < NUM_ITERS; s++) + { + memset(num_cluster_pixels, 0, sizeof(num_cluster_pixels)); + memset(new_cluster_means, 0, sizeof(new_cluster_means)); + + for (uint32_t i = 0; i < BLOCK_T; i++) + { + float d[NUM_SUBSETS] = { + training_vecs[i].squared_distance(cluster_centroids[0]), + training_vecs[i].squared_distance(cluster_centroids[1]), + training_vecs[i].squared_distance(cluster_centroids[2]) }; + + float min_d = d[0]; + uint32_t min_idx = 0; + for (uint32_t j = 1; j < NUM_SUBSETS; j++) + { + if (d[j] < min_d) + { + min_d = d[j]; + min_idx = j; + } + } + + cluster_pixels[min_idx][num_cluster_pixels[min_idx]] = i; + new_cluster_means[min_idx] += training_vecs[i]; + num_cluster_pixels[min_idx]++; + } // i + + for (uint32_t j = 0; j < NUM_SUBSETS; j++) + { + if (!num_cluster_pixels[j]) + return false; + + cluster_centroids[j] = new_cluster_means[j] / (float)num_cluster_pixels[j]; + } + } // s + + partition_pattern_vec desired_part; + for (uint32_t p = 0; p < NUM_SUBSETS; p++) + { + for (uint32_t i = 0; i < num_cluster_pixels[p]; i++) + { + const uint32_t pix_index = cluster_pixels[p][i]; + desired_part[pix_index] = (uint8_t)p; + } + } + +#if BRUTE_FORCE_PART_SEARCH + partition_pattern_vec desired_parts[NUM_PART3_MAPPINGS]; + for (uint32_t j = 0; j < NUM_PART3_MAPPINGS; j++) + desired_parts[j] = desired_part.get_permuted3(j); + + uint32_t part_similarity[NUM_UNIQUE_PARTITIONS3]; + + for (uint32_t part_index = 0; part_index < NUM_UNIQUE_PARTITIONS3; part_index++) + { + const partition_pattern_vec& pat = g_partitions3[part_index]; + + uint32_t lowest_pat_dist = UINT32_MAX; + for (uint32_t p = 0; p < NUM_PART3_MAPPINGS; p++) + { + uint32_t dist = pat.get_squared_distance(desired_parts[p]); + if (dist < lowest_pat_dist) + lowest_pat_dist = dist; + } + + part_similarity[part_index] = (lowest_pat_dist << 16) | part_index; + + } // part_index; + + std::sort(part_similarity, part_similarity + NUM_UNIQUE_PARTITIONS3); + + for (uint32_t i = 0; i < num_best_parts; i++) + pBest_parts[i] = part_similarity[i] & 0xFFFF; +#else + vp_tree::result_queue results; + results.reserve(num_best_parts); + g_part3_vp_tree.find_nearest(3, desired_part, results, num_best_parts); + + assert(results.get_size() == num_best_parts); + + const auto& elements = results.get_elements(); + + for (uint32_t i = 0; i < results.get_size(); i++) + pBest_parts[i] = elements[1 + i].m_pat_index; +#endif + + return true; +} + +static bool encode_block_3_subsets( + trial_result& res, + uint32_t cem, + uint32_t grid_w, uint32_t grid_h, + uint32_t weights_ise_range, uint32_t endpoints_ise_range, + const half_vec3* pBlock_pixels_half, const vec4F* pBlock_pixels_q16, + astc_hdr_codec_base_options& coptions, + bool uber_mode_flag, + const int* pEst_patterns, int num_est_patterns, + uint32_t comp_level, + opt_mode_t mode11_opt_mode) +{ + BASISU_NOTE_UNUSED(uber_mode_flag); + const uint32_t BLOCK_W = 6, BLOCK_H = 6, NUM_SUBSETS = 3; + const uint32_t num_endpoint_vals = astc_helpers::get_num_cem_values(cem); + + res.m_valid = false; + + double best_e = BIG_FLOAT_VAL; + + astc_helpers::log_astc_block best_log_blk; + clear_obj(best_log_blk); + + best_log_blk.m_num_partitions = NUM_SUBSETS; + best_log_blk.m_color_endpoint_modes[0] = (uint8_t)cem; + best_log_blk.m_color_endpoint_modes[1] = (uint8_t)cem; + best_log_blk.m_color_endpoint_modes[2] = (uint8_t)cem; + best_log_blk.m_grid_width = (uint8_t)grid_w; + best_log_blk.m_grid_height = (uint8_t)grid_h; + + best_log_blk.m_weight_ise_range = (uint8_t)weights_ise_range; + best_log_blk.m_endpoint_ise_range = (uint8_t)endpoints_ise_range; + + const uint32_t n = num_est_patterns ? num_est_patterns : NUM_UNIQUE_PARTITIONS3; + + for (uint32_t unique_p_iter = 0; unique_p_iter < n; unique_p_iter++) + { + const uint32_t unique_part_index = num_est_patterns ? pEst_patterns[unique_p_iter] : unique_p_iter; + assert(unique_part_index < NUM_UNIQUE_PARTITIONS3); + const partition_pattern_vec*pPart = &g_partitions3[unique_part_index]; + + vec4F part_pixels_q16[NUM_SUBSETS][64]; + half_vec3 part_half_pixels[NUM_SUBSETS][64]; + uint8_t part_pixel_index[NUM_SUBSETS][64]; + uint32_t part_total_pixels[NUM_SUBSETS] = { 0 }; + + for (uint32_t y = 0; y < BLOCK_H; y++) + { + for (uint32_t x = 0; x < BLOCK_W; x++) + { + const uint32_t part_index = pPart->m_parts[x + y * BLOCK_W]; + + uint32_t l = part_total_pixels[part_index]; + + part_pixels_q16[part_index][l] = pBlock_pixels_q16[x + y * BLOCK_W]; + part_half_pixels[part_index][l] = pBlock_pixels_half[x + y * BLOCK_W]; + part_pixel_index[part_index][l] = (uint8_t)(x + y * BLOCK_W); + + part_total_pixels[part_index] = l + 1; + } // x + } // y + + uint8_t blk_endpoints[NUM_SUBSETS][basist::NUM_MODE11_ENDPOINTS]; + uint8_t blk_weights[NUM_SUBSETS][BLOCK_W * BLOCK_H]; + uint32_t best_submode[NUM_SUBSETS]; + + double e = 0.0f; + for (uint32_t part_iter = 0; part_iter < NUM_SUBSETS; part_iter++) + { + assert(part_total_pixels[part_iter]); + + if (cem == 7) + { + e += encode_astc_hdr_block_mode_7( + part_total_pixels[part_iter], + (basist::half_float(*)[3])part_half_pixels[part_iter], (vec4F*)part_pixels_q16[part_iter], + best_log_blk.m_weight_ise_range, + best_submode[part_iter], + BIG_FLOAT_VAL, + blk_endpoints[part_iter], + blk_weights[part_iter], + coptions, + best_log_blk.m_endpoint_ise_range); + } + else + { + assert(cem == 11); + + e += encode_astc_hdr_block_mode_11( + part_total_pixels[part_iter], + (basist::half_float(*)[3])part_half_pixels[part_iter], (vec4F*)part_pixels_q16[part_iter], + best_log_blk.m_weight_ise_range, + best_submode[part_iter], + BIG_FLOAT_VAL, + blk_endpoints[part_iter], + blk_weights[part_iter], + coptions, + false, best_log_blk.m_endpoint_ise_range, uber_mode_flag, false, + FIRST_MODE11_SUBMODE_INDEX, MAX_MODE11_SUBMODE_INDEX, false, mode11_opt_mode); + } + + } // part_iter + + uint8_t ise_weights[BLOCK_W * BLOCK_H]; + + uint32_t src_pixel_index[NUM_SUBSETS] = { 0 }; + for (uint32_t y = 0; y < BLOCK_H; y++) + { + for (uint32_t x = 0; x < BLOCK_W; x++) + { + const uint32_t part_index = pPart->m_parts[x + y * BLOCK_W]; + + ise_weights[x + y * BLOCK_W] = blk_weights[part_index][src_pixel_index[part_index]]; + src_pixel_index[part_index]++; + } // x + } // y + + if ((grid_w == BLOCK_W) && (grid_h == BLOCK_H)) + { + if (e < best_e) + { + best_e = e; + best_log_blk.m_partition_id = (uint16_t)g_part3_unique_index_to_seed[unique_part_index]; + + for (uint32_t p = 0; p < NUM_SUBSETS; p++) + memcpy(best_log_blk.m_endpoints + num_endpoint_vals * p, blk_endpoints[p], num_endpoint_vals); + + memcpy(best_log_blk.m_weights, ise_weights, BLOCK_W * BLOCK_H); + } + } + else + { + uint8_t desired_weights[BLOCK_H * BLOCK_W]; + + const auto& dequant_tab = astc_helpers::g_dequant_tables.get_weight_tab(weights_ise_range).m_ISE_to_val; + + for (uint32_t by = 0; by < BLOCK_H; by++) + for (uint32_t bx = 0; bx < BLOCK_W; bx++) + desired_weights[bx + by * BLOCK_W] = dequant_tab[ise_weights[bx + by * BLOCK_W]]; + + uint8_t downsampled_weights[BLOCK_H * BLOCK_W]; + + const float* pDownsample_matrix = get_6x6_downsample_matrix(grid_w, grid_h); + if (!pDownsample_matrix) + { + assert(0); + return false; + } + + downsample_weight_grid( + pDownsample_matrix, + BLOCK_W, BLOCK_H, // source/from dimension (block size) + grid_w, grid_h, // dest/to dimension (grid size) + desired_weights, // these are dequantized weights, NOT ISE symbols, [by][bx] + downsampled_weights); // [wy][wx] + + astc_helpers::log_astc_block trial_blk(best_log_blk); + + trial_blk.m_partition_id = (uint16_t)g_part3_unique_index_to_seed[unique_part_index]; + + for (uint32_t p = 0; p < NUM_SUBSETS; p++) + memcpy(trial_blk.m_endpoints + num_endpoint_vals * p, blk_endpoints[p], num_endpoint_vals); + + const auto& weight_to_ise = astc_helpers::g_dequant_tables.get_weight_tab(weights_ise_range).m_val_to_ise; + + for (uint32_t gy = 0; gy < grid_h; gy++) + for (uint32_t gx = 0; gx < grid_w; gx++) + trial_blk.m_weights[gx + gy * grid_w] = weight_to_ise[downsampled_weights[gx + gy * grid_w]]; + + if ((comp_level >= MIN_REFINE_LEVEL) && ((grid_w < 6) || (grid_h < 6))) + { + for (uint32_t part_iter = 0; part_iter < NUM_SUBSETS; part_iter++) + { + bool refine_status = refine_endpoints( + cem, + endpoints_ise_range, + trial_blk.m_endpoints + part_iter * num_endpoint_vals, // the endpoints to optimize + BLOCK_W, BLOCK_H, // block dimensions + grid_w, grid_h, trial_blk.m_weights, weights_ise_range, // weight grid + part_total_pixels[part_iter], (basist::half_float(*)[3])part_half_pixels[part_iter], (vec4F*)part_pixels_q16[part_iter], + &part_pixel_index[part_iter][0], // maps this subset's pixels to block offsets + coptions, mode11_opt_mode); + + BASISU_NOTE_UNUSED(refine_status); + } + } + + half_vec4 decoded_pixels_half4[BLOCK_H][BLOCK_W]; // [y][x] + bool status = astc_helpers::decode_block(trial_blk, decoded_pixels_half4, BLOCK_W, BLOCK_H, astc_helpers::cDecodeModeHDR16); + assert(status); + if (!status) + return false; + + half_vec3 decoded_pixels_half3[BLOCK_H][BLOCK_W]; + for (uint32_t y = 0; y < BLOCK_H; y++) + for (uint32_t x = 0; x < BLOCK_W; x++) + decoded_pixels_half3[y][x].set(decoded_pixels_half4[y][x][0], decoded_pixels_half4[y][x][1], decoded_pixels_half4[y][x][2]); + + double trial_err = compute_block_error(BLOCK_W * BLOCK_H, (const basist::half_float*)pBlock_pixels_half, (const basist::half_float*)decoded_pixels_half3, coptions); + if (trial_err < best_e) + { + best_e = trial_err; + best_log_blk = trial_blk; + } + } + + } // unique_p_iter + + if (best_e < BIG_FLOAT_VAL) + { + res.m_log_blk = best_log_blk; + res.m_valid = true; + res.m_err = best_e; + } + else + { + res.m_valid = false; + } + + return res.m_valid; +} + +static uint32_t encode_values(bitwise_coder &coder, uint32_t total_values, const uint8_t *pVals, uint32_t endpoint_range) +{ + const uint32_t MAX_VALS = 64; + uint32_t bit_values[MAX_VALS], tq_values[(MAX_VALS + 2) / 3]; + uint32_t total_tq_values = 0, tq_accum = 0, tq_mul = 1; + + assert((total_values) && (total_values <= MAX_VALS)); + + const uint32_t ep_bits = astc_helpers::g_ise_range_table[endpoint_range][0]; + const uint32_t ep_trits = astc_helpers::g_ise_range_table[endpoint_range][1]; + const uint32_t ep_quints = astc_helpers::g_ise_range_table[endpoint_range][2]; + + for (uint32_t i = 0; i < total_values; i++) + { + uint32_t val = pVals[i]; + + uint32_t bits = val & ((1 << ep_bits) - 1); + uint32_t tq = val >> ep_bits; + + bit_values[i] = bits; + + if (ep_trits) + { + assert(tq < 3); + tq_accum += tq * tq_mul; + tq_mul *= 3; + if (tq_mul == 243) + { + assert(total_tq_values < BASISU_ARRAY_SIZE(tq_values)); + tq_values[total_tq_values++] = tq_accum; + tq_accum = 0; + tq_mul = 1; + } + } + else if (ep_quints) + { + assert(tq < 5); + tq_accum += tq * tq_mul; + tq_mul *= 5; + if (tq_mul == 125) + { + assert(total_tq_values < BASISU_ARRAY_SIZE(tq_values)); + tq_values[total_tq_values++] = tq_accum; + tq_accum = 0; + tq_mul = 1; + } + } + } + + uint32_t total_bits_output = 0; + + for (uint32_t i = 0; i < total_tq_values; i++) + { + const uint32_t num_bits = ep_trits ? 8 : 7; + coder.put_bits(tq_values[i], num_bits); + total_bits_output += num_bits; + } + + if (tq_mul > 1) + { + uint32_t num_bits; + if (ep_trits) + { + if (tq_mul == 3) + num_bits = 2; + else if (tq_mul == 9) + num_bits = 4; + else if (tq_mul == 27) + num_bits = 5; + else //if (tq_mul == 81) + num_bits = 7; + } + else + { + if (tq_mul == 5) + num_bits = 3; + else //if (tq_mul == 25) + num_bits = 5; + } + coder.put_bits(tq_accum, num_bits); + total_bits_output += num_bits; + } + + for (uint32_t i = 0; i < total_values; i++) + { + coder.put_bits(bit_values[i], ep_bits); + total_bits_output += ep_bits; + } + + return total_bits_output; +} + +static inline uint32_t get_num_endpoint_vals(uint32_t cem) +{ + assert((cem == 7) || (cem == 11)); + return (cem == 11) ? basist::NUM_MODE11_ENDPOINTS : basist::NUM_MODE7_ENDPOINTS; +} + +static void code_block(bitwise_coder& coder, + const astc_helpers::log_astc_block& log_blk, + block_mode block_mode_index, + endpoint_mode em, const uint8_t *pEP_deltas) +{ + coder.put_truncated_binary((uint32_t)block_mode_index, (uint32_t)block_mode::cBMTotalModes); + coder.put_truncated_binary((uint32_t)em, (uint32_t)endpoint_mode::cTotal); + + const uint32_t num_endpoint_vals = get_num_endpoint_vals(log_blk.m_color_endpoint_modes[0]); + + if ((em == endpoint_mode::cUseLeftDelta) || (em == endpoint_mode::cUseUpperDelta)) + { + assert(log_blk.m_num_partitions == 1); + + for (uint32_t i = 0; i < num_endpoint_vals; i++) + coder.put_bits(pEP_deltas[i], NUM_ENDPOINT_DELTA_BITS); + } + else if (em == endpoint_mode::cRaw) + { + if (log_blk.m_num_partitions == 2) + { + const int unique_partition_index = g_part2_seed_to_unique_index[log_blk.m_partition_id]; + assert(unique_partition_index != -1); + + coder.put_truncated_binary(unique_partition_index, NUM_UNIQUE_PARTITIONS2); + } + else if (log_blk.m_num_partitions == 3) + { + const int unique_partition_index = g_part3_seed_to_unique_index[log_blk.m_partition_id]; + assert(unique_partition_index != -1); + + coder.put_truncated_binary(unique_partition_index, NUM_UNIQUE_PARTITIONS3); + } + + encode_values(coder, num_endpoint_vals * log_blk.m_num_partitions, log_blk.m_endpoints, log_blk.m_endpoint_ise_range); + } + + encode_values(coder, log_blk.m_grid_width * log_blk.m_grid_height * (log_blk.m_dual_plane ? 2 : 1), log_blk.m_weights, log_blk.m_weight_ise_range); +} + +struct smooth_map_params +{ + bool m_no_mse_scaling; + + float m_max_smooth_std_dev; + float m_smooth_max_mse_scale; + + float m_max_med_smooth_std_dev; + float m_med_smooth_max_mse_scale; + + float m_max_ultra_smooth_std_dev; + float m_ultra_smooth_max_mse_scale; + + bool m_debug_images; + + smooth_map_params() + { + clear(); + } + + void clear() + { + m_no_mse_scaling = false; + + // 3x3 region + m_max_smooth_std_dev = 100.0f; + m_smooth_max_mse_scale = 13000.0f; + + // 7x7 region + m_max_med_smooth_std_dev = 9.0f; + m_med_smooth_max_mse_scale = 15000.0f; + + // 11x11 region + m_max_ultra_smooth_std_dev = 4.0f; + //m_ultra_smooth_max_mse_scale = 4500.0f; + //m_ultra_smooth_max_mse_scale = 10000.0f; + //m_ultra_smooth_max_mse_scale = 50000.0f; + //m_ultra_smooth_max_mse_scale = 100000.0f; + //m_ultra_smooth_max_mse_scale = 400000.0f; + //m_ultra_smooth_max_mse_scale = 800000.0f; + m_ultra_smooth_max_mse_scale = 2000000.0f; + + m_debug_images = true; + } +}; + +Resampler::Contrib_List* g_contrib_lists[7]; // 1-6 + +static void init_contrib_lists() +{ + for (uint32_t dst_width = 1; dst_width <= 6; dst_width++) + //g_contrib_lists[dst_width] = Resampler::make_clist(6, 6, basisu::Resampler::BOUNDARY_CLAMP, gaussian_filter, BASISU_GAUSSIAN_FILTER_SUPPORT, 6.0f / (float)dst_width, 0.0f); + g_contrib_lists[dst_width] = Resampler::make_clist(6, 6, basisu::Resampler::BOUNDARY_CLAMP, gaussian_filter, BASISU_BELL_FILTER_SUPPORT, 6.0f / (float)dst_width, 0.0f); +} + +#if 0 +static void filter_block(uint32_t grid_x, uint32_t grid_y, const vec3F* pSrc_block, half_vec3 *pDst_block_half3, vec4F *pDst_block_q16) +{ + vec3F temp_block[6][6]; // [y][x] + + // first filter rows to temp_block + if (grid_x == 6) + { + memcpy(temp_block, pSrc_block, sizeof(vec3F) * 6 * 6); + } + else + { + Resampler::Contrib_List* pRow_lists = g_contrib_lists[grid_x]; + + for (uint32_t y = 0; y < 6; y++) + { + for (uint32_t x = 0; x < 6; x++) + { + vec3F p(0.0f); + + for (uint32_t i = 0; i < pRow_lists[x].n; i++) + p += pSrc_block[y * 6 + pRow_lists[x].p[i].pixel] * pRow_lists[x].p[i].weight; + + p.clamp(0.0f, basist::ASTC_HDR_MAX_VAL); + + temp_block[y][x] = p; + } // x + } // y + } + + // filter columns + if (grid_y == 6) + { + for (uint32_t y = 0; y < 6; y++) + { + for (uint32_t x = 0; x < 6; x++) + { + for (uint32_t c = 0; c < 3; c++) + { + const basist::half_float h = basist::float_to_half(temp_block[y][x][c]); + + pDst_block_half3[x + y * 6][c] = h; + pDst_block_q16[x + y * 6][c] = (float)half_to_qlog16(h); + } + + pDst_block_q16[x + y * 6][3] = 0.0f; + } // x + } // y + } + else + { + Resampler::Contrib_List* pCol_lists = g_contrib_lists[grid_y]; + + for (uint32_t x = 0; x < 6; x++) + { + for (uint32_t y = 0; y < 6; y++) + { + vec3F p(0.0f); + + for (uint32_t i = 0; i < pCol_lists[y].n; i++) + p += temp_block[pCol_lists[y].p[i].pixel][x] * pCol_lists[y].p[i].weight; + + p.clamp(0.0f, basist::ASTC_HDR_MAX_VAL); + + for (uint32_t c = 0; c < 3; c++) + { + const basist::half_float h = basist::float_to_half(p[c]); + + pDst_block_half3[x + y * 6][c] = h; + pDst_block_q16[x + y * 6][c] = (float)half_to_qlog16(h); + } + + pDst_block_q16[x + y * 6][3] = 0.0f; + + } // x + } // y + } +} +#endif + +static void filter_block(uint32_t grid_x, uint32_t grid_y, const vec4F* pSrc_block, vec4F* pDst_block) +{ + vec4F temp_block[6][6]; // [y][x] + + // first filter rows to temp_block + if (grid_x == 6) + { + memcpy(temp_block, pSrc_block, sizeof(vec4F) * 6 * 6); + } + else + { + Resampler::Contrib_List* pRow_lists = g_contrib_lists[grid_x]; + + for (uint32_t y = 0; y < 6; y++) + { + for (uint32_t x = 0; x < 6; x++) + { + vec3F p(0.0f); + + for (uint32_t i = 0; i < pRow_lists[x].n; i++) + p += vec3F(pSrc_block[y * 6 + pRow_lists[x].p[i].pixel]) * pRow_lists[x].p[i].weight; + + p.clamp(0.0f, basist::ASTC_HDR_MAX_VAL); + + temp_block[y][x] = p; + } // x + } // y + } + + // filter columns + if (grid_y == 6) + { + for (uint32_t y = 0; y < 6; y++) + { + for (uint32_t x = 0; x < 6; x++) + { + for (uint32_t c = 0; c < 3; c++) + pDst_block[x + y * 6][c] = temp_block[y][x][c]; + } // x + } // y + } + else + { + Resampler::Contrib_List* pCol_lists = g_contrib_lists[grid_y]; + + for (uint32_t x = 0; x < 6; x++) + { + for (uint32_t y = 0; y < 6; y++) + { + vec3F p(0.0f); + + for (uint32_t i = 0; i < pCol_lists[y].n; i++) + p += temp_block[pCol_lists[y].p[i].pixel][x] * pCol_lists[y].p[i].weight; + + p.clamp(0.0f, basist::ASTC_HDR_MAX_VAL); + + pDst_block[x + y * 6] = p; + + } // x + } // y + } +} + +static void filter_block(uint32_t grid_x, uint32_t grid_y, const vec3F* pSrc_block, vec3F* pDst_block) +{ + vec3F temp_block[6][6]; // [y][x] + + // first filter rows to temp_block + if (grid_x == 6) + { + memcpy(temp_block, pSrc_block, sizeof(vec3F) * 6 * 6); + } + else + { + Resampler::Contrib_List* pRow_lists = g_contrib_lists[grid_x]; + + for (uint32_t y = 0; y < 6; y++) + { + for (uint32_t x = 0; x < 6; x++) + { + vec3F p(0.0f); + + for (uint32_t i = 0; i < pRow_lists[x].n; i++) + p += vec3F(pSrc_block[y * 6 + pRow_lists[x].p[i].pixel]) * pRow_lists[x].p[i].weight; + + temp_block[y][x] = p; + } // x + } // y + } + + // filter columns + if (grid_y == 6) + { + memcpy((void *)pDst_block, temp_block, sizeof(vec3F) * 6 * 6); + } + else + { + Resampler::Contrib_List* pCol_lists = g_contrib_lists[grid_y]; + + for (uint32_t x = 0; x < 6; x++) + { + for (uint32_t y = 0; y < 6; y++) + { + vec3F& p = pDst_block[x + y * 6]; + p.set(0.0f); + + for (uint32_t i = 0; i < pCol_lists[y].n; i++) + p += temp_block[pCol_lists[y].p[i].pixel][x] * pCol_lists[y].p[i].weight; + } // x + } // y + } +} + +static float diff_blocks(const vec4F* pA, const vec4F* pB) +{ + const uint32_t BLOCK_T = 36; + + float diff = 0.0f; + for (uint32_t i = 0; i < BLOCK_T; i++) + diff += square(pA[i][0] - pB[i][0]) + square(pA[i][1] - pB[i][1]) + square(pA[i][2] - pB[i][2]); + + return diff * (1.0f / (float)BLOCK_T); +} + +static float sub_and_compute_std_dev(const vec3F* pA, const vec3F* pB) +{ + const uint32_t BLOCK_T = 36; + + vec3F mean(0.0f); + + for (uint32_t i = 0; i < BLOCK_T; i++) + { + vec3F diff(pA[i] - pB[i]); + mean += diff; + } + + mean *= (1.0f / (float)BLOCK_T); + + vec3F diff_sum(0.0f); + for (uint32_t i = 0; i < BLOCK_T; i++) + { + vec3F diff(pA[i] - pB[i]); + diff -= mean; + diff_sum += vec3F::component_mul(diff, diff); + } + + vec3F var(diff_sum * (1.0f / (float)BLOCK_T)); + + vec3F std_dev(sqrtf(var[0]), sqrtf(var[1]), sqrtf(var[2])); + + return maximum(std_dev[0], std_dev[1], std_dev[2]); +} + +static void create_smooth_maps2( + vector2D& smooth_block_mse_scales, + const image& orig_img, + smooth_map_params& params, image* pUltra_smooth_img = nullptr) +{ + const uint32_t width = orig_img.get_width(); + const uint32_t height = orig_img.get_height(); + //const uint32_t total_pixels = orig_img.get_total_pixels(); + const uint32_t num_comps = 3; + + if (params.m_no_mse_scaling) + { + smooth_block_mse_scales.set_all(1.0f); + return; + } + + smooth_block_mse_scales.resize(width, height); + + image smooth_vis, med_smooth_vis, ultra_smooth_vis; + + if (params.m_debug_images) + { + smooth_vis.resize(width, height); + med_smooth_vis.resize(width, height); + ultra_smooth_vis.resize(width, height); + } + + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + { + tracked_stat_dbl comp_stats[4]; + for (int yd = -1; yd <= 1; yd++) + { + for (int xd = -1; xd <= 1; xd++) + { + const color_rgba& p = orig_img.get_clamped((int)x + xd, (int)y + yd); + + comp_stats[0].update((float)p[0]); + comp_stats[1].update((float)p[1]); + comp_stats[2].update((float)p[2]); + } + } + + float max_std_dev = 0.0f; + for (uint32_t i = 0; i < num_comps; i++) + max_std_dev = basisu::maximum(max_std_dev, (float)comp_stats[i].get_std_dev()); + + float yl = clampf(max_std_dev / params.m_max_smooth_std_dev, 0.0f, 1.0f); + //yl = powf(yl, 2.0f); + yl = powf(yl, 1.0f / 2.0f); // substantially less bits + + smooth_block_mse_scales(x, y) = lerp(params.m_smooth_max_mse_scale, 1.0f, yl); + + if (params.m_debug_images) + { + //smooth_vis(x, y).set(clamp((int)((smooth_block_mse_scales(x, y) - 1.0f) / (params.m_smooth_max_mse_scale - 1.0f) * 255.0f + .5f), 0, 255)); + // white=high local activity (edges/detail) + // black=low local activity (smooth - error is amplified) + smooth_vis(x, y).set(clamp((int)((yl * 255.0f) + .5f), 0, 255)); + } + } + + { + tracked_stat_dbl comp_stats[4]; + + const int S = 3; + for (int yd = -S; yd < S; yd++) + { + for (int xd = -S; xd < S; xd++) + { + const color_rgba& p = orig_img.get_clamped((int)x + xd, (int)y + yd); + + comp_stats[0].update((float)p[0]); + comp_stats[1].update((float)p[1]); + comp_stats[2].update((float)p[2]); + } + } + + float max_std_dev = 0.0f; + for (uint32_t i = 0; i < num_comps; i++) + max_std_dev = basisu::maximum(max_std_dev, (float)comp_stats[i].get_std_dev()); + + float yl = clampf(max_std_dev / params.m_max_med_smooth_std_dev, 0.0f, 1.0f); + //yl = powf(yl, 2.0f); + + smooth_block_mse_scales(x, y) = lerp(params.m_med_smooth_max_mse_scale, smooth_block_mse_scales(x, y), yl); + + if (params.m_debug_images) + med_smooth_vis(x, y).set((int)std::round(yl * 255.0f)); + } + + { + tracked_stat_dbl comp_stats[4]; + + const int S = 5; + for (int yd = -S; yd < S; yd++) + { + for (int xd = -S; xd < S; xd++) + { + const color_rgba& p = orig_img.get_clamped((int)x + xd, (int)y + yd); + + comp_stats[0].update((float)p[0]); + comp_stats[1].update((float)p[1]); + comp_stats[2].update((float)p[2]); + } + } + + float max_std_dev = 0.0f; + for (uint32_t i = 0; i < num_comps; i++) + max_std_dev = basisu::maximum(max_std_dev, (float)comp_stats[i].get_std_dev()); + + float yl = clampf(max_std_dev / params.m_max_ultra_smooth_std_dev, 0.0f, 1.0f); + yl = powf(yl, 2.0f); + + smooth_block_mse_scales(x, y) = lerp(params.m_ultra_smooth_max_mse_scale, smooth_block_mse_scales(x, y), yl); + + if (params.m_debug_images) + ultra_smooth_vis(x, y).set((int)std::round(yl * 255.0f)); + } + + } + } + + if (params.m_debug_images) + { + save_png("dbg_smooth_vis.png", smooth_vis); + save_png("dbg_med_smooth_vis.png", med_smooth_vis); + save_png("dbg_ultra_smooth_vis.png", ultra_smooth_vis); + + image vis_img(width, height); + + float max_scale = 0.0f; + for (uint32_t y = 0; y < height; y++) + for (uint32_t x = 0; x < width; x++) + max_scale = basisu::maximumf(max_scale, smooth_block_mse_scales(x, y)); + + for (uint32_t y = 0; y < height; y++) + for (uint32_t x = 0; x < width; x++) + vis_img(x, y).set((int)std::round(smooth_block_mse_scales(x, y) * 255.0f / max_scale)); + + save_png("scale_vis.png", vis_img); + } + + if (pUltra_smooth_img) + *pUltra_smooth_img = ultra_smooth_vis; +} + +const float REALLY_DARK_I_THRESHOLD = 0.0625f; +const float REALLY_DARK_MSE_ERR_SCALE = 128.0f; +const float REALLY_DARK_DELTA_ITP_JND_SCALE = 5.0f; + +static float compute_pixel_mse_itp(const vec3F& orig_pixel_itp, const vec3F& comp_pixel_itp, bool delta_itp_dark_adjustment) +{ + float delta_i = orig_pixel_itp[0] - comp_pixel_itp[0]; + float delta_t = orig_pixel_itp[1] - comp_pixel_itp[1]; + float delta_p = orig_pixel_itp[2] - comp_pixel_itp[2]; + + float err = (delta_i * delta_i) + (delta_t * delta_t) + (delta_p * delta_p); + + if (delta_itp_dark_adjustment) + { + // We have to process a large range of inputs, including extremely dark inputs. + // Artifically amplify MSE on very dark pixels - otherwise they'll be overly compressed at higher lambdas. + // This is to better handle very dark signals which could be explictly overexposed. + float s = bu_math::smoothstep(0.0f, REALLY_DARK_I_THRESHOLD, orig_pixel_itp[0]); + s = lerp(REALLY_DARK_MSE_ERR_SCALE, 1.0f, s); + err *= s; + } + + return err; +} + +static float compute_block_mse_itp(uint32_t block_w, uint32_t block_h, const vec3F* pOrig_pixels_itp, const vec3F* pComp_pixels_itp, bool delta_itp_dark_adjustment) +{ + float total_mse = 0.0f; + + for (uint32_t y = 0; y < block_h; y++) + { + for (uint32_t x = 0; x < block_w; x++) + { + total_mse += compute_pixel_mse_itp(pOrig_pixels_itp[x + y * block_w], pComp_pixels_itp[x + y * block_w], delta_itp_dark_adjustment); + } // x + } // y + + return total_mse * (1.0f / (float)(block_w * block_h)); +} + +static float compute_block_ssim_itp(uint32_t block_w, uint32_t block_h, const vec3F* pOrig_pixels_itp, const vec3F* pComp_pixels_itp) +{ + const uint32_t n = block_w * block_h; + assert(n <= 36); + + stats x_stats[3], y_stats[3]; + comparative_stats xy_cov[3]; + + for (uint32_t c = 0; c < 3; c++) + { + x_stats[c].calc_simplified(n, &pOrig_pixels_itp[0][c], 3); + y_stats[c].calc_simplified(n, &pComp_pixels_itp[0][c], 3); + } + + for (uint32_t c = 0; c < 3; c++) + xy_cov[c].calc_cov(n, &pOrig_pixels_itp[0][c], &pComp_pixels_itp[0][c], 3, 3, &x_stats[c], &y_stats[c]); + + float ssim[3]; + const double d = 1.0f, k1 = .01f, k2 = .03f; + + // weight mean error more highly to reduce blocking + float ap = 1.5f, bp = 1.0f, cp = 1.0f; + + const double s_c1 = square(k1 * d), s_c2 = square(k2 * d); + const double s_c3(s_c2 * .5f); + + for (uint32_t c = 0; c < 3; c++) + { + float lum = (float)((2.0f * x_stats[c].m_avg * y_stats[c].m_avg + s_c1) / (square(x_stats[c].m_avg) + square(y_stats[c].m_avg) + s_c1)); + lum = saturate(lum); + + float con = (float)((2.0f * x_stats[c].m_std_dev * y_stats[c].m_std_dev + s_c2) / (x_stats[c].m_var + y_stats[c].m_var + s_c2)); + con = saturate(con); + + float str = (float)((xy_cov[c].m_cov + s_c3) / (x_stats[c].m_std_dev * y_stats[c].m_std_dev + s_c3)); + str = saturate(str); + + ssim[c] = powf(lum, ap) * powf(con, bp) * powf(str, cp); + } + +#if 0 + float final_ssim = (ssim[0] * .4f + ssim[1] * .3f + ssim[2] * .3f); +#elif 1 + float final_ssim = ssim[0] * ssim[1] * ssim[2]; +#else + const float LP = .75f; + float final_ssim = ssim[0] * powf((ssim[1] + ssim[2]) * .5f, LP); +#endif + + return final_ssim; +} + +// delta ITP, 1.0 is JND (Rec. ITU-R BT.2124), modified for higher error at low light +static float compute_pixel_delta_itp(const vec3F& a, const vec3F& b, const vec3F& orig, bool delta_itp_dark_adjustment) +{ + float delta_i = a[0] - b[0]; + float delta_t = a[1] - b[1]; + float delta_p = a[2] - b[2]; + + float err = 720.0f * sqrtf((delta_i * delta_i) + (delta_t * delta_t) + (delta_p * delta_p)); + + float s = bu_math::smoothstep(0.0f, REALLY_DARK_I_THRESHOLD, orig[0]); + + if (delta_itp_dark_adjustment) + { + // This is to better handle very dark signals which could be explictly overexposed. + s = lerp(REALLY_DARK_DELTA_ITP_JND_SCALE, 1.0f, s); + err *= s; + } + + return err; +} + +struct candidate_encoding +{ + encoding_type m_encoding_type; + + basist::half_float m_solid_color[3]; + + uint32_t m_run_len; + + vec3F m_comp_pixels[MAX_BLOCK_H][MAX_BLOCK_W]; // [y][x] + vec3F m_comp_pixels_itp[MAX_BLOCK_H][MAX_BLOCK_W]; // [y][x] + + endpoint_mode m_endpoint_mode; + block_mode m_block_mode; + + bitwise_coder m_coder; + + // The block to code, which may not be valid ASTC. This may have to be transcoded (by requantizing the weights/endpoints) before it's valid ASTC. + // Note the endpoints may be coded endpoints OR transcoded endpoints, depending on the encoding type. + astc_helpers::log_astc_block m_coded_log_blk; + + // The block the decoder outputs. + astc_helpers::log_astc_block m_decomp_log_blk; + + int m_reuse_delta_index; + + float m_t, m_d, m_bits; + + candidate_encoding() + { + clear(); + } + + candidate_encoding(const candidate_encoding &other) + { + *this = other; + } + + candidate_encoding(candidate_encoding&& other) + { + *this = std::move(other); + } + + candidate_encoding& operator=(const candidate_encoding& rhs) + { + if (this == &rhs) + return *this; + + m_encoding_type = rhs.m_encoding_type; + memcpy(m_solid_color, rhs.m_solid_color, sizeof(m_solid_color)); + m_run_len = rhs.m_run_len; + memcpy(m_comp_pixels, rhs.m_comp_pixels, sizeof(m_comp_pixels)); + m_endpoint_mode = rhs.m_endpoint_mode; + m_block_mode = rhs.m_block_mode; + m_coder = rhs.m_coder; + m_coded_log_blk = rhs.m_coded_log_blk; + m_decomp_log_blk = rhs.m_decomp_log_blk; + m_reuse_delta_index = rhs.m_reuse_delta_index; + + return *this; + } + + candidate_encoding& operator=(candidate_encoding&& rhs) + { + if (this == &rhs) + return *this; + + m_encoding_type = rhs.m_encoding_type; + memcpy(m_solid_color, rhs.m_solid_color, sizeof(m_solid_color)); + m_run_len = rhs.m_run_len; + memcpy(m_comp_pixels, rhs.m_comp_pixels, sizeof(m_comp_pixels)); + m_endpoint_mode = rhs.m_endpoint_mode; + m_block_mode = rhs.m_block_mode; + m_coder = std::move(rhs.m_coder); + m_coded_log_blk = rhs.m_coded_log_blk; + m_decomp_log_blk = rhs.m_decomp_log_blk; + m_reuse_delta_index = rhs.m_reuse_delta_index; + + return *this; + } + + void clear() + { + m_encoding_type = encoding_type::cInvalid; + + clear_obj(m_solid_color); + + m_run_len = 0; + + clear_obj(m_comp_pixels); + + m_endpoint_mode = endpoint_mode::cInvalid; + m_block_mode = block_mode::cInvalid; + + m_coder.restart(); + + m_coded_log_blk.clear(); + m_decomp_log_blk.clear(); + + m_t = 0; + m_d = 0; + m_bits = 0; + + m_reuse_delta_index = 0; + } +}; + +bool decode_astc_block(uint32_t block_w, uint32_t block_h, astc_helpers::log_astc_block &log_blk, vec3F *pPixels) +{ + assert((block_w <= 6) && (block_h <= 6)); + + half_vec4 decoded_pixels_half4[6 * 6]; // [y][x] + bool status = astc_helpers::decode_block(log_blk, decoded_pixels_half4, block_w, block_h, astc_helpers::cDecodeModeHDR16); + assert(status); + + if (!status) + return false; + + for (uint32_t y = 0; y < block_h; y++) + { + for (uint32_t x = 0; x < block_w; x++) + { + pPixels[x + y * block_w].set( + basist::half_to_float(decoded_pixels_half4[x + y * block_w][0]), + basist::half_to_float(decoded_pixels_half4[x + y * block_w][1]), + basist::half_to_float(decoded_pixels_half4[x + y * block_w][2])); + } // x + } //y + + return true; +} + +static inline bool validate_log_blk(const astc_helpers::log_astc_block &decomp_blk) +{ + astc_helpers::astc_block phys_blk; + return astc_helpers::pack_astc_block(phys_blk, decomp_blk); +} + +#define SYNC_MARKERS (0) + +static bool decode_file(const uint8_vec& comp_data, vector2D& decoded_blocks, uint32_t &width, uint32_t &height) +{ + interval_timer tm; + tm.start(); + + const uint32_t BLOCK_W = 6, BLOCK_H = 6; + + width = 0; + height = 0; + + if (comp_data.size() <= 2*3) + return false; + + basist::bitwise_decoder decoder; + if (!decoder.init(comp_data.data(), comp_data.size_u32())) + return false; + + if (decoder.get_bits(16) != 0xABCD) + return false; + + width = decoder.get_bits(16); + height = decoder.get_bits(16); + + if (!width || !height || (width > MAX_ASTC_HDR_6X6_DIM) || (height > MAX_ASTC_HDR_6X6_DIM)) + return false; + + const uint32_t num_blocks_x = (width + BLOCK_W - 1) / BLOCK_W; + const uint32_t num_blocks_y = (height + BLOCK_H - 1) / BLOCK_H; + const uint32_t total_blocks = num_blocks_x * num_blocks_y; + + decoded_blocks.resize(num_blocks_x, num_blocks_y); + //memset(decoded_blocks.get_ptr(), 0, decoded_blocks.size_in_bytes()); + + vector2D decoded_log_blocks(num_blocks_x, num_blocks_y); + //memset(decoded_log_blocks.get_ptr(), 0, decoded_log_blocks.size_in_bytes()); + + uint32_t cur_bx = 0, cur_by = 0; + uint32_t step_counter = 0; + BASISU_NOTE_UNUSED(step_counter); + + while (cur_by < num_blocks_y) + { + step_counter++; + + //if ((cur_bx == 9) && (cur_by == 13)) + // printf("!"); + +#if SYNC_MARKERS + uint32_t mk = decoder.get_bits(16); + if (mk != 0xDEAD) + { + printf("!"); + assert(0); + return false; + } +#endif + if (decoder.get_bits_remaining() < 1) + return false; + + encoding_type et = encoding_type::cBlock; + + uint32_t b0 = decoder.get_bits(1); + if (!b0) + { + uint32_t b1 = decoder.get_bits(1); + if (b1) + et = encoding_type::cReuse; + else + { + uint32_t b2 = decoder.get_bits(1); + if (b2) + et = encoding_type::cSolid; + else + et = encoding_type::cRun; + } + } + + switch (et) + { + case encoding_type::cRun: + { + if (!cur_bx && !cur_by) + return false; + + const uint32_t run_len = decoder.decode_vlc(5) + 1; + + uint32_t num_blocks_remaining = total_blocks - (cur_bx + cur_by * num_blocks_x); + if (run_len > num_blocks_remaining) + return false; + + uint32_t prev_bx = cur_bx, prev_by = cur_by; + + if (cur_bx) + prev_bx--; + else + { + prev_bx = num_blocks_x - 1; + prev_by--; + } + + const astc_helpers::log_astc_block& prev_log_blk = decoded_log_blocks(prev_bx, prev_by); + const astc_helpers::astc_block& prev_phys_blk = decoded_blocks(prev_bx, prev_by); + + for (uint32_t i = 0; i < run_len; i++) + { + decoded_log_blocks(cur_bx, cur_by) = prev_log_blk; + decoded_blocks(cur_bx, cur_by) = prev_phys_blk; + + cur_bx++; + if (cur_bx == num_blocks_x) + { + cur_bx = 0; + cur_by++; + } + } + + break; + } + case encoding_type::cSolid: + { + const basist::half_float rh = (basist::half_float)decoder.get_bits(15); + const basist::half_float gh = (basist::half_float)decoder.get_bits(15); + const basist::half_float bh = (basist::half_float)decoder.get_bits(15); + + astc_helpers::log_astc_block& log_blk = decoded_log_blocks(cur_bx, cur_by); + + log_blk.clear(); + log_blk.m_solid_color_flag_hdr = true; + log_blk.m_solid_color[0] = rh; + log_blk.m_solid_color[1] = gh; + log_blk.m_solid_color[2] = bh; + log_blk.m_solid_color[3] = basist::float_to_half(1.0f); + + bool status = astc_helpers::pack_astc_block(decoded_blocks(cur_bx, cur_by), log_blk); + if (!status) + return false; + + cur_bx++; + if (cur_bx == num_blocks_x) + { + cur_bx = 0; + cur_by++; + } + + break; + } + case encoding_type::cReuse: + { + if (!cur_bx && !cur_by) + return false; + + const uint32_t reuse_delta_index = decoder.get_bits(REUSE_XY_DELTA_BITS); + + const int reuse_delta_x = g_reuse_xy_deltas[reuse_delta_index].m_x; + const int reuse_delta_y = g_reuse_xy_deltas[reuse_delta_index].m_y; + + const int prev_bx = cur_bx + reuse_delta_x, prev_by = cur_by + reuse_delta_y; + if ((prev_bx < 0) || (prev_bx >= (int)num_blocks_x)) + return false; + if (prev_by < 0) + return false; + + const astc_helpers::log_astc_block& prev_log_blk = decoded_log_blocks(prev_bx, prev_by); + const astc_helpers::astc_block& prev_phys_blk = decoded_blocks(prev_bx, prev_by); + + if (prev_log_blk.m_solid_color_flag_hdr) + return false; + + astc_helpers::log_astc_block& log_blk = decoded_log_blocks(cur_bx, cur_by); + astc_helpers::astc_block& phys_blk = decoded_blocks(cur_bx, cur_by); + + log_blk = prev_log_blk; + + const uint32_t total_grid_weights = log_blk.m_grid_width * log_blk.m_grid_height * (log_blk.m_dual_plane ? 2 : 1); + + bool status = basist::astc_6x6_hdr::decode_values(decoder, total_grid_weights, log_blk.m_weight_ise_range, log_blk.m_weights); + if (!status) + return false; + + astc_helpers::log_astc_block decomp_blk; + status = astc_helpers::unpack_block(&prev_phys_blk, decomp_blk, BLOCK_W, BLOCK_H); + if (!status) + return false; + + uint8_t transcode_weights[MAX_BLOCK_W * MAX_BLOCK_H * 2]; + basist::astc_6x6_hdr::requantize_astc_weights(total_grid_weights, log_blk.m_weights, log_blk.m_weight_ise_range, transcode_weights, decomp_blk.m_weight_ise_range); + + copy_weight_grid(log_blk.m_dual_plane, log_blk.m_grid_width, log_blk.m_grid_height, transcode_weights, decomp_blk); + + status = astc_helpers::pack_astc_block(phys_blk, decomp_blk); + if (!status) + return false; + + cur_bx++; + if (cur_bx == num_blocks_x) + { + cur_bx = 0; + cur_by++; + } + + break; + } + case encoding_type::cBlock: + { + const block_mode bm = (block_mode)decoder.decode_truncated_binary((uint32_t)block_mode::cBMTotalModes); + const endpoint_mode em = (endpoint_mode)decoder.decode_truncated_binary((uint32_t)endpoint_mode::cTotal); + + switch (em) + { + case endpoint_mode::cUseLeft: + case endpoint_mode::cUseUpper: + { + int neighbor_bx = cur_bx, neighbor_by = cur_by; + + if (em == endpoint_mode::cUseLeft) + neighbor_bx--; + else + neighbor_by--; + + if ((neighbor_bx < 0) || (neighbor_by < 0)) + return false; + + const astc_helpers::log_astc_block& neighbor_blk = decoded_log_blocks(neighbor_bx, neighbor_by); + if (!neighbor_blk.m_color_endpoint_modes[0]) + return false; + + const block_mode_desc& bmd = g_block_mode_descs[(uint32_t)bm]; + const uint32_t num_endpoint_values = get_num_endpoint_vals(bmd.m_cem); + + if (bmd.m_cem != neighbor_blk.m_color_endpoint_modes[0]) + return false; + + astc_helpers::log_astc_block& log_blk = decoded_log_blocks(cur_bx, cur_by); + astc_helpers::astc_block& phys_blk = decoded_blocks(cur_bx, cur_by); + + log_blk.clear(); + log_blk.m_num_partitions = 1; + log_blk.m_color_endpoint_modes[0] = (uint8_t)bmd.m_cem; + log_blk.m_endpoint_ise_range = neighbor_blk.m_endpoint_ise_range; + log_blk.m_weight_ise_range = (uint8_t)bmd.m_weight_ise_range; + log_blk.m_grid_width = (uint8_t)bmd.m_grid_x; + log_blk.m_grid_height = (uint8_t)bmd.m_grid_y; + log_blk.m_dual_plane = (uint8_t)bmd.m_dp; + log_blk.m_color_component_selector = (uint8_t)bmd.m_dp_channel; + + memcpy(log_blk.m_endpoints, neighbor_blk.m_endpoints, num_endpoint_values); + + const uint32_t total_grid_weights = bmd.m_grid_x * bmd.m_grid_y * (bmd.m_dp ? 2 : 1); + + bool status = decode_values(decoder, total_grid_weights, bmd.m_weight_ise_range, log_blk.m_weights); + if (!status) + return false; + + astc_helpers::log_astc_block decomp_blk; + decomp_blk.clear(); + + decomp_blk.m_num_partitions = 1; + decomp_blk.m_color_endpoint_modes[0] = (uint8_t)bmd.m_cem; + decomp_blk.m_endpoint_ise_range = (uint8_t)bmd.m_transcode_endpoint_ise_range; + decomp_blk.m_weight_ise_range = (uint8_t)bmd.m_transcode_weight_ise_range; + decomp_blk.m_dual_plane = bmd.m_dp; + decomp_blk.m_color_component_selector = (uint8_t)bmd.m_dp_channel; + + basist::astc_6x6_hdr::requantize_ise_endpoints(bmd.m_cem, log_blk.m_endpoint_ise_range, log_blk.m_endpoints, bmd.m_transcode_endpoint_ise_range, decomp_blk.m_endpoints); + + uint8_t transcode_weights[MAX_BLOCK_W * MAX_BLOCK_H * 2]; + basist::astc_6x6_hdr::requantize_astc_weights(total_grid_weights, log_blk.m_weights, bmd.m_weight_ise_range, transcode_weights, bmd.m_transcode_weight_ise_range); + + copy_weight_grid(bmd.m_dp, bmd.m_grid_x, bmd.m_grid_y, transcode_weights, decomp_blk); + + status = astc_helpers::pack_astc_block(phys_blk, decomp_blk); + if (!status) + return false; + + cur_bx++; + if (cur_bx == num_blocks_x) + { + cur_bx = 0; + cur_by++; + } + + break; + } + case endpoint_mode::cUseLeftDelta: + case endpoint_mode::cUseUpperDelta: + { + int neighbor_bx = cur_bx, neighbor_by = cur_by; + + if (em == endpoint_mode::cUseLeftDelta) + neighbor_bx--; + else + neighbor_by--; + + if ((neighbor_bx < 0) || (neighbor_by < 0)) + return false; + + const astc_helpers::log_astc_block& neighbor_blk = decoded_log_blocks(neighbor_bx, neighbor_by); + if (!neighbor_blk.m_color_endpoint_modes[0]) + return false; + + const block_mode_desc& bmd = g_block_mode_descs[(uint32_t)bm]; + const uint32_t num_endpoint_values = get_num_endpoint_vals(bmd.m_cem); + + if (bmd.m_cem != neighbor_blk.m_color_endpoint_modes[0]) + return false; + + astc_helpers::log_astc_block& log_blk = decoded_log_blocks(cur_bx, cur_by); + astc_helpers::astc_block& phys_blk = decoded_blocks(cur_bx, cur_by); + + log_blk.clear(); + log_blk.m_num_partitions = 1; + log_blk.m_color_endpoint_modes[0] = (uint8_t)bmd.m_cem; + log_blk.m_dual_plane = bmd.m_dp; + log_blk.m_color_component_selector = (uint8_t)bmd.m_dp_channel; + + log_blk.m_endpoint_ise_range = (uint8_t)bmd.m_endpoint_ise_range; + basist::astc_6x6_hdr::requantize_ise_endpoints(bmd.m_cem, neighbor_blk.m_endpoint_ise_range, neighbor_blk.m_endpoints, bmd.m_endpoint_ise_range, log_blk.m_endpoints); + + const int total_endpoint_delta_vals = 1 << NUM_ENDPOINT_DELTA_BITS; + const int low_delta_limit = -(total_endpoint_delta_vals / 2); // high_delta_limit = (total_endpoint_delta_vals / 2) - 1; + + const auto& ise_to_rank = astc_helpers::g_dequant_tables.get_endpoint_tab(log_blk.m_endpoint_ise_range).m_ISE_to_rank; + const auto& rank_to_ise = astc_helpers::g_dequant_tables.get_endpoint_tab(log_blk.m_endpoint_ise_range).m_rank_to_ISE; + const int total_endpoint_levels = astc_helpers::get_ise_levels(log_blk.m_endpoint_ise_range); + + for (uint32_t i = 0; i < num_endpoint_values; i++) + { + int cur_val = ise_to_rank[log_blk.m_endpoints[i]]; + + int delta = (int)decoder.get_bits(NUM_ENDPOINT_DELTA_BITS) + low_delta_limit; + + cur_val += delta; + if ((cur_val < 0) || (cur_val >= total_endpoint_levels)) + return false; + + log_blk.m_endpoints[i] = rank_to_ise[cur_val]; + } + + log_blk.m_weight_ise_range = (uint8_t)bmd.m_weight_ise_range; + log_blk.m_grid_width = (uint8_t)bmd.m_grid_x; + log_blk.m_grid_height = (uint8_t)bmd.m_grid_y; + + const uint32_t total_grid_weights = bmd.m_grid_x * bmd.m_grid_y * (bmd.m_dp ? 2 : 1); + + bool status = decode_values(decoder, total_grid_weights, bmd.m_weight_ise_range, log_blk.m_weights); + if (!status) + return false; + + astc_helpers::log_astc_block decomp_blk; + decomp_blk.clear(); + + decomp_blk.m_num_partitions = 1; + decomp_blk.m_color_endpoint_modes[0] = (uint8_t)bmd.m_cem; + decomp_blk.m_endpoint_ise_range = (uint8_t)bmd.m_transcode_endpoint_ise_range; + decomp_blk.m_weight_ise_range = (uint8_t)bmd.m_transcode_weight_ise_range; + decomp_blk.m_dual_plane = (uint8_t)bmd.m_dp; + decomp_blk.m_color_component_selector = (uint8_t)bmd.m_dp_channel; + + basist::astc_6x6_hdr::requantize_ise_endpoints(bmd.m_cem, log_blk.m_endpoint_ise_range, log_blk.m_endpoints, bmd.m_transcode_endpoint_ise_range, decomp_blk.m_endpoints); + + uint8_t transcode_weights[MAX_BLOCK_W * MAX_BLOCK_H * 2]; + basist::astc_6x6_hdr::requantize_astc_weights(total_grid_weights, log_blk.m_weights, bmd.m_weight_ise_range, transcode_weights, bmd.m_transcode_weight_ise_range); + + copy_weight_grid(bmd.m_dp, bmd.m_grid_x, bmd.m_grid_y, transcode_weights, decomp_blk); + + status = astc_helpers::pack_astc_block(phys_blk, decomp_blk); + if (!status) + return false; + + cur_bx++; + if (cur_bx == num_blocks_x) + { + cur_bx = 0; + cur_by++; + } + + break; + } + case endpoint_mode::cRaw: + { + const block_mode_desc& bmd = g_block_mode_descs[(uint32_t)bm]; + + const uint32_t num_endpoint_values = get_num_endpoint_vals(bmd.m_cem); + + astc_helpers::log_astc_block& log_blk = decoded_log_blocks(cur_bx, cur_by); + astc_helpers::astc_block& phys_blk = decoded_blocks(cur_bx, cur_by); + + log_blk.clear(); + log_blk.m_num_partitions = (uint8_t)bmd.m_num_partitions; + + for (uint32_t p = 0; p < bmd.m_num_partitions; p++) + log_blk.m_color_endpoint_modes[p] = (uint8_t)bmd.m_cem; + + log_blk.m_endpoint_ise_range = (uint8_t)bmd.m_endpoint_ise_range; + log_blk.m_weight_ise_range = (uint8_t)bmd.m_weight_ise_range; + + log_blk.m_grid_width = (uint8_t)bmd.m_grid_x; + log_blk.m_grid_height = (uint8_t)bmd.m_grid_y; + log_blk.m_dual_plane = (uint8_t)bmd.m_dp; + log_blk.m_color_component_selector = (uint8_t)bmd.m_dp_channel; + + if (bmd.m_num_partitions == 2) + { + const uint32_t unique_partition_index = decoder.decode_truncated_binary(NUM_UNIQUE_PARTITIONS2); + log_blk.m_partition_id = (uint16_t)g_part2_unique_index_to_seed[unique_partition_index]; + } + else if (bmd.m_num_partitions == 3) + { + const uint32_t unique_partition_index = decoder.decode_truncated_binary(NUM_UNIQUE_PARTITIONS3); + log_blk.m_partition_id = (uint16_t)g_part3_unique_index_to_seed[unique_partition_index]; + } + + bool status = decode_values(decoder, num_endpoint_values * bmd.m_num_partitions, bmd.m_endpoint_ise_range, log_blk.m_endpoints); + if (!status) + return false; + + const uint32_t total_grid_weights = bmd.m_grid_x * bmd.m_grid_y * (bmd.m_dp ? 2 : 1); + + status = decode_values(decoder, total_grid_weights, bmd.m_weight_ise_range, log_blk.m_weights); + if (!status) + return false; + + astc_helpers::log_astc_block decomp_blk; + decomp_blk.clear(); + + decomp_blk.m_dual_plane = bmd.m_dp; + decomp_blk.m_color_component_selector = (uint8_t)bmd.m_dp_channel; + decomp_blk.m_partition_id = log_blk.m_partition_id; + + decomp_blk.m_num_partitions = (uint8_t)bmd.m_num_partitions; + + for (uint32_t p = 0; p < bmd.m_num_partitions; p++) + decomp_blk.m_color_endpoint_modes[p] = (uint8_t)bmd.m_cem; + + decomp_blk.m_endpoint_ise_range = (uint8_t)bmd.m_transcode_endpoint_ise_range; + decomp_blk.m_weight_ise_range = (uint8_t)bmd.m_transcode_weight_ise_range; + + for (uint32_t p = 0; p < bmd.m_num_partitions; p++) + basist::astc_6x6_hdr::requantize_ise_endpoints(bmd.m_cem, bmd.m_endpoint_ise_range, log_blk.m_endpoints + num_endpoint_values * p, bmd.m_transcode_endpoint_ise_range, decomp_blk.m_endpoints + num_endpoint_values * p); + + uint8_t transcode_weights[MAX_BLOCK_W * MAX_BLOCK_H * 2]; + basist::astc_6x6_hdr::requantize_astc_weights(total_grid_weights, log_blk.m_weights, bmd.m_weight_ise_range, transcode_weights, bmd.m_transcode_weight_ise_range); + + copy_weight_grid(bmd.m_dp, bmd.m_grid_x, bmd.m_grid_y, transcode_weights, decomp_blk); + + status = astc_helpers::pack_astc_block(phys_blk, decomp_blk); + if (!status) + return false; + + cur_bx++; + if (cur_bx == num_blocks_x) + { + cur_bx = 0; + cur_by++; + } + + break; + } + default: + { + assert(0); + return false; + } + } + + break; + } + default: + { + assert(0); + return false; + } + } + } + + if (decoder.get_bits(16) != 0xA742) + { + fmt_error_printf("End marker not found!\n"); + return false; + } + + //fmt_printf("Total decode_file() time: {} secs\n", tm.get_elapsed_secs()); + + return true; +} + +static bool unpack_physical_astc_block(const void* pBlock, uint32_t block_width, uint32_t block_height, vec4F* pPixels) +{ + astc_helpers::log_astc_block log_blk; + if (!astc_helpers::unpack_block(pBlock, log_blk, block_width, block_height)) + return false; + + basist::half_float half_block[MAX_BLOCK_W * MAX_BLOCK_H][4]; + if (!astc_helpers::decode_block(log_blk, half_block, block_width, block_height, astc_helpers::cDecodeModeHDR16)) + return false; + + const uint32_t total_block_pixels = block_width * block_height; + for (uint32_t p = 0; p < total_block_pixels; p++) + { + pPixels[p][0] = basist::half_to_float(half_block[p][0]); + pPixels[p][1] = basist::half_to_float(half_block[p][1]); + pPixels[p][2] = basist::half_to_float(half_block[p][2]); + pPixels[p][3] = basist::half_to_float(half_block[p][3]); + } + + return true; +} + +static bool unpack_physical_astc_block_google(const void* pBlock, uint32_t block_width, uint32_t block_height, vec4F* pPixels) +{ + return basisu_astc::astc::decompress_hdr((float *)pPixels, (uint8_t*)pBlock, block_width, block_height); +} + +static bool pack_bc6h_image(const imagef &src_img, vector2D &bc6h_blocks, imagef *pPacked_bc6h_img, const fast_bc6h_params &enc_params) +{ + const uint32_t width = src_img.get_width(); + const uint32_t height = src_img.get_height(); + + if (pPacked_bc6h_img) + pPacked_bc6h_img->resize(width, height); + + interval_timer tm; + double total_enc_time = 0.0f; + BASISU_NOTE_UNUSED(total_enc_time); + + const uint32_t num_blocks_x = src_img.get_block_width(4); + const uint32_t num_blocks_y = src_img.get_block_height(4); + + bc6h_blocks.resize(num_blocks_x, num_blocks_y); + + for (uint32_t by = 0; by < num_blocks_y; by++) + { + for (uint32_t bx = 0; bx < num_blocks_x; bx++) + { + // Extract source image block + vec4F block_pixels[4][4]; // [y][x] + src_img.extract_block_clamped(&block_pixels[0][0], bx * 4, by * 4, 4, 4); + + basist::half_float half_pixels[16 * 3]; // [y][x] + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + for (uint32_t c = 0; c < 3; c++) + { + float v = block_pixels[y][x][c]; + + basist::half_float h = basist::float_to_half(v); + + half_pixels[(x + y * 4) * 3 + c] = h; + + } // c + + } // x + } // y + + basist::bc6h_block& bc6h_blk = bc6h_blocks(bx, by); + + tm.start(); + + basist::astc_6x6_hdr::fast_encode_bc6h(half_pixels, &bc6h_blk, enc_params); + + total_enc_time += tm.get_elapsed_secs(); + + if (pPacked_bc6h_img) + { + basist::half_float unpacked_blk[16 * 3]; + bool status = unpack_bc6h(&bc6h_blk, unpacked_blk, false); + assert(status); + if (!status) + { + fmt_error_printf("unpack_bc6h() failed\n"); + return false; + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + vec4F p; + + for (uint32_t c = 0; c < 3; c++) + { + float v = basist::half_to_float(unpacked_blk[(x + y * 4) * 3 + c]); + p[c] = v; + + } // c + + p[3] = 1.0f; + + pPacked_bc6h_img->set_clipped(bx * 4 + x, by * 4 + y, p); + } // x + } // y + } + + } // bx + } // by + + //fmt_printf("Total BC6H encode time: {}\n", total_enc_time); + + return true; +} + +static float dist_to_line_squared(const vec3F& p, const vec3F &line_org, const vec3F &line_dir) +{ + vec3F q(p - line_org); + vec3F v(q - q.dot(line_dir) * line_dir); + return v.dot(v); +} + +static void estimate_partitions_mode7_and_11( + uint32_t num_parts, // 2 or 3 partitions + uint32_t num_unique_pats, const partition_pattern_vec* pUnique_pats, // list of all unique, canonicalized patterns + uint32_t num_pats_to_examine, const uint32_t* pUnique_pat_indices_to_examine, // indices of pats to examine + const vec3F *pHalf_pixels_as_floats, // block's half pixel values casted to floats + const astc_hdr_codec_base_options& coptions, // options + uint32_t num_desired_pats, + int *pDesired_pat_indices_mode11, int *pDesired_pat_indices_mode7) // output indices +{ + BASISU_NOTE_UNUSED(coptions); + BASISU_NOTE_UNUSED(num_unique_pats); + + const uint32_t BLOCK_W = 6, BLOCK_H = 6, MAX_PARTS = 3; // BLOCK_T = 6 * 6 + assert(num_parts <= MAX_PARTS); + + struct candidate_res + { + float m_total_sq_dist; + uint32_t m_index; + bool operator< (const candidate_res& rhs) const { return m_total_sq_dist < rhs.m_total_sq_dist; } + }; + + const uint32_t MAX_CANDIDATES = 1024; + assert(num_desired_pats && (num_desired_pats <= MAX_CANDIDATES)); + + candidate_res mode11_candidates[MAX_CANDIDATES]; + candidate_res mode7_candidates[MAX_CANDIDATES]; + + const vec3F grayscale_axis(0.5773502691f); + + for (uint32_t examine_iter = 0; examine_iter < num_pats_to_examine; examine_iter++) + { + const uint32_t unique_part_index = pUnique_pat_indices_to_examine[examine_iter]; + assert(unique_part_index < num_unique_pats); + + const partition_pattern_vec* pPat = &pUnique_pats[unique_part_index]; + + vec3F part_means[MAX_PARTS]; + uint32_t part_total_texels[MAX_PARTS] = { 0 }; + + for (uint32_t i = 0; i < num_parts; i++) + part_means[i].clear(); + + for (uint32_t y = 0; y < BLOCK_H; y++) + { + for (uint32_t x = 0; x < BLOCK_W; x++) + { + const uint32_t part_index = (*pPat)(x, y); + assert(part_index < num_parts); + + part_means[part_index] += pHalf_pixels_as_floats[x + y * BLOCK_W]; + part_total_texels[part_index]++; + + } // x + } // y + + for (uint32_t i = 0; i < num_parts; i++) + { + assert(part_total_texels[i]); + part_means[i] /= (float)part_total_texels[i]; + } + + float part_cov[MAX_PARTS][6]; + memset(part_cov, 0, sizeof(part_cov)); + + for (uint32_t y = 0; y < BLOCK_H; y++) + { + for (uint32_t x = 0; x < BLOCK_W; x++) + { + const uint32_t part_index = (*pPat)(x, y); + assert(part_index < num_parts); + + const vec3F p(pHalf_pixels_as_floats[x + y * BLOCK_W] - part_means[part_index]); + + const float r = p[0], g = p[1], b = p[2]; + + part_cov[part_index][0] += r * r; + part_cov[part_index][1] += r * g; + part_cov[part_index][2] += r * b; + part_cov[part_index][3] += g * g; + part_cov[part_index][4] += g * b; + part_cov[part_index][5] += b * b; + + } // x + } // y + + // For each partition compute the total variance of all channels. + float total_variance[MAX_PARTS]; + for (uint32_t part_index = 0; part_index < num_parts; part_index++) + total_variance[part_index] = part_cov[part_index][0] + part_cov[part_index][3] + part_cov[part_index][5]; + + vec3F part_axis[MAX_PARTS]; + float mode11_eigenvalue_est[MAX_PARTS]; // For each partition, compute the variance along the principle axis + float mode7_eigenvalue_est[MAX_PARTS]; // For each partition, compute the variance along the principle axis + + for (uint32_t part_index = 0; part_index < num_parts; part_index++) + { + float* pCov = &part_cov[part_index][0]; + + float xr = .9f, xg = 1.0f, xb = .7f; + + const uint32_t NUM_POWER_ITERS = 4; + for (uint32_t iter = 0; iter < NUM_POWER_ITERS; iter++) + { + float r = xr * pCov[0] + xg * pCov[1] + xb * pCov[2]; + float g = xr * pCov[1] + xg * pCov[3] + xb * pCov[4]; + float b = xr * pCov[2] + xg * pCov[4] + xb * pCov[5]; + + float m = maximumf(maximumf(fabsf(r), fabsf(g)), fabsf(b)); + + if (m >= 1e-10f) + { + m = 1.0f / m; + + r *= m; + g *= m; + b *= m; + } + + xr = r; + xg = g; + xb = b; + } + + float len_sq = xr * xr + xg * xg + xb * xb; + + if (len_sq < 1e-10f) + { + xr = grayscale_axis[0]; + xg = grayscale_axis[0]; + xb = grayscale_axis[0]; + } + else + { + len_sq = 1.0f / sqrtf(len_sq); + + xr *= len_sq; + xg *= len_sq; + xb *= len_sq; + } + + { + // Transform the principle axis by the covariance matrix, which will scale the vector by its eigenvalue (the variance of the dataset projected onto the principle axis). + float r = xr * pCov[0] + xg * pCov[1] + xb * pCov[2]; + float g = xr * pCov[1] + xg * pCov[3] + xb * pCov[4]; + float b = xr * pCov[2] + xg * pCov[4] + xb * pCov[5]; + + // Estimate the principle eigenvalue by computing the magnitude of the transformed vector. + // The result is the variance along the principle axis. + //float z1 = sqrtf(r * r + g * g + b * b); // this works with the principle axis + //float z2 = r * xr + g * xg + b * xb; // compute length projected along xr,xg,xb + + mode11_eigenvalue_est[part_index] = r * xr + g * xg + b * xb; + } + + { + const float yrgb = grayscale_axis[0]; + + // Transform the grayscale axis by the covariance matrix, which will scale the vector by the eigenvalue (which is the variance of the dataset projected onto this vector). + float r = yrgb * pCov[0] + yrgb * pCov[1] + yrgb * pCov[2]; + float g = yrgb * pCov[1] + yrgb * pCov[3] + yrgb * pCov[4]; + float b = yrgb * pCov[2] + yrgb * pCov[4] + yrgb * pCov[5]; + + mode7_eigenvalue_est[part_index] = r * yrgb + g * yrgb + b * yrgb; + } + + } // part_index + + // Compute the total variance (squared error) of the other 2 axes by subtracting the total variance of all channels by the variance of the principle axis. + // TODO: Could also compute the ratio of the principle axis's variance vs. the total variance. + float mode11_total_sq_dist_to_line_alt = 0.0f; + for (uint32_t part_index = 0; part_index < num_parts; part_index++) + { + float d = maximum(0.0f, total_variance[part_index] - mode11_eigenvalue_est[part_index]); + mode11_total_sq_dist_to_line_alt += d; + } + + { +#if 0 + // TODO: This total distance can be computed rapidly. First compute the total variance of each channel (sum the diag entries of the covar matrix), + // then compute the principle eigenvalue, and subtract. The result is the variance of the projection distances. + float total_sq_dist_to_line = 0.0f; + for (uint32_t i = 0; i < BLOCK_T; i++) + { + const uint32_t part_index = (*pPat)[i]; + assert(part_index < num_parts); + + total_sq_dist_to_line += dist_to_line_squared(pHalf_pixels_as_floats[i], part_means[part_index], part_axis[part_index]); + } + + mode11_candidates[examine_iter].m_total_sq_dist = total_sq_dist_to_line; +#else + mode11_candidates[examine_iter].m_total_sq_dist = mode11_total_sq_dist_to_line_alt; +#endif + mode11_candidates[examine_iter].m_index = unique_part_index; + } + + { + float mode7_total_sq_dist_to_line_alt = 0.0f; + for (uint32_t part_index = 0; part_index < num_parts; part_index++) + { + float d = maximum(0.0f, total_variance[part_index] - mode7_eigenvalue_est[part_index]); + mode7_total_sq_dist_to_line_alt += d; + } + + mode7_candidates[examine_iter].m_total_sq_dist = mode7_total_sq_dist_to_line_alt; + mode7_candidates[examine_iter].m_index = unique_part_index; + } + + } // examine_iter + + std::sort(&mode11_candidates[0], &mode11_candidates[num_pats_to_examine]); + std::sort(&mode7_candidates[0], &mode7_candidates[num_pats_to_examine]); + + for (uint32_t i = 0; i < num_desired_pats; i++) + pDesired_pat_indices_mode11[i] = mode11_candidates[i].m_index; + + for (uint32_t i = 0; i < num_desired_pats; i++) + pDesired_pat_indices_mode7[i] = mode7_candidates[i].m_index; +} + +static void estimate_partitions_mode7( + uint32_t num_parts, // 2 or 3 partitions + uint32_t num_unique_pats, const partition_pattern_vec* pUnique_pats, // list of all unique, canonicalized patterns + uint32_t num_pats_to_examine, const uint32_t* pUnique_pat_indices_to_examine, // indices of pats to examine + const vec3F* pHalf_pixels_as_floats, // block's half pixel values casted to floats + const astc_hdr_codec_base_options& coptions, // options + uint32_t num_desired_pats, uint32_t* pDesired_pat_indices) // output indices +{ + BASISU_NOTE_UNUSED(coptions); + BASISU_NOTE_UNUSED(num_unique_pats); + + const uint32_t BLOCK_W = 6, BLOCK_H = 6, BLOCK_T = 6 * 6, MAX_PARTS = 3; + assert(num_parts <= MAX_PARTS); + + struct candidate_res + { + float m_total_sq_dist; + uint32_t m_index; + bool operator< (const candidate_res& rhs) const { return m_total_sq_dist < rhs.m_total_sq_dist; } + }; + + const uint32_t MAX_CANDIDATES = 1024; + assert(num_desired_pats && (num_desired_pats <= MAX_CANDIDATES)); + + candidate_res candidates[MAX_CANDIDATES]; + + for (uint32_t examine_iter = 0; examine_iter < num_pats_to_examine; examine_iter++) + { + const uint32_t unique_part_index = pUnique_pat_indices_to_examine[examine_iter]; + assert(unique_part_index < num_unique_pats); + + const partition_pattern_vec* pPat = &pUnique_pats[unique_part_index]; + + vec3F part_means[MAX_PARTS]; + uint32_t part_total_texels[MAX_PARTS] = { 0 }; + + for (uint32_t i = 0; i < num_parts; i++) + part_means[i].clear(); + + for (uint32_t y = 0; y < BLOCK_H; y++) + { + for (uint32_t x = 0; x < BLOCK_W; x++) + { + const uint32_t part_index = (*pPat)(x, y); + assert(part_index < num_parts); + + part_means[part_index] += pHalf_pixels_as_floats[x + y * BLOCK_W]; + part_total_texels[part_index]++; + + } // x + } // y + + for (uint32_t i = 0; i < num_parts; i++) + { + assert(part_total_texels[i]); + part_means[i] /= (float)part_total_texels[i]; + } + + vec3F part_axis(0.5773502691f); + + // TODO: This total distance can be computed rapidly. First compute the total variance of each channel (sum the diag entries of the covar matrix), + // then compute the principle eigenvalue, and subtract. The result is the variance of the projection distances. + float total_sq_dist_to_line = 0.0f; + for (uint32_t i = 0; i < BLOCK_T; i++) + { + const uint32_t part_index = (*pPat)[i]; + assert(part_index < num_parts); + + total_sq_dist_to_line += dist_to_line_squared(pHalf_pixels_as_floats[i], part_means[part_index], part_axis); + } + + candidates[examine_iter].m_total_sq_dist = total_sq_dist_to_line; + + candidates[examine_iter].m_index = unique_part_index; + + } // examine_iter + + std::sort(&candidates[0], &candidates[num_pats_to_examine]); + + for (uint32_t i = 0; i < num_desired_pats; i++) + pDesired_pat_indices[i] = candidates[i].m_index; +} + +static float calc_deblocking_penalty_itp( + uint32_t bx, uint32_t by, uint32_t width, uint32_t height, + const imagef& pass_src_img_itp, const candidate_encoding& candidate) +{ + float total_deblock_penalty = 0.0f; + + float total_orig_mse = 0.0f, total_comp_mse = 0.0f; + uint32_t total_c = 0; + + for (uint32_t b = 0; b < 4; b++) + { + for (uint32_t i = 0; i < 6; i++) + { + int ox = 0, oy = 0, qx = 0, qy = 0; + + switch (b) + { + case 0: + ox = bx * 6 + i; oy = (by - 1) * 6 + 5; + qx = bx * 6 + i; qy = by * 6; + break; + case 1: + ox = bx * 6 + i; oy = (by + 1) * 6; + qx = bx * 6 + i; qy = by * 6 + 5; + break; + case 2: + ox = (bx - 1) * 6 + 5; oy = by * 6 + i; + qx = bx * 6; qy = by * 6 + i; + break; + case 3: + ox = (bx + 1) * 6; oy = by * 6 + i; + qx = bx * 6 + 5; qy = by * 6 + i; + break; + } + + if ((ox < 0) || (oy < 0) || (ox >= (int)width) || (oy >= (int)height)) + continue; + + const vec3F& o_pixel_itp = pass_src_img_itp(ox, oy); + const vec3F& q_pixel_itp = pass_src_img_itp(qx, qy); + + const vec3F &d_pixel_itp = candidate.m_comp_pixels_itp[qy - by * 6][qx - bx * 6]; // compressed block + + vec3F orig_delta_v(o_pixel_itp - q_pixel_itp); + total_orig_mse += square(orig_delta_v[0]) + square(orig_delta_v[1]) + square(orig_delta_v[2]); + + vec3F d_delta_v(o_pixel_itp - d_pixel_itp); + total_comp_mse += square(d_delta_v[0]) + square(d_delta_v[1]) + square(d_delta_v[2]); + + total_c++; + } + } + + if (total_c) + { + total_orig_mse /= (float)total_c; + total_comp_mse /= (float)total_c; + + if (total_orig_mse) + { + total_deblock_penalty = fabsf((total_comp_mse - total_orig_mse) / total_orig_mse); + } + } + + return total_deblock_penalty; +} + +static bool calc_strip_size( + float lambda, + uint32_t num_blocks_y, uint32_t total_threads, bool force_one_strip, + uint32_t& res_total_strips, uint32_t& res_rows_per_strip, astc_hdr_6x6_global_config &global_cfg) +{ + uint32_t total_strips = 1; + + if (lambda == 0.0f) + { + if (!force_one_strip) + { + total_strips = total_threads; + } + } + else + { + const uint32_t MIN_DESIRED_STRIPS = 8; + const uint32_t MAX_TARGET_STRIPS = 32; + const uint32_t TARGET_ASTC_6X6_ROWS_PER_STRIP = 12; + + if (!force_one_strip) + { + total_strips = maximum(1, num_blocks_y / TARGET_ASTC_6X6_ROWS_PER_STRIP); + + if (num_blocks_y >= MIN_DESIRED_STRIPS * 2) + total_strips = maximum(total_strips, MIN_DESIRED_STRIPS); + } + + total_strips = minimum(total_strips, MAX_TARGET_STRIPS); + } + + uint32_t rows_per_strip = 0; + if (total_strips <= 1) + { + rows_per_strip = num_blocks_y; + } + else + { + rows_per_strip = (num_blocks_y / total_strips) & ~1; + + if (rows_per_strip < 2) + rows_per_strip = 2;// num_blocks_y; + } + + assert((rows_per_strip == num_blocks_y) || ((rows_per_strip & 1) == 0)); + + total_strips = (num_blocks_y + rows_per_strip - 1) / rows_per_strip; + + if (global_cfg.m_debug_output) + { + fmt_printf("num_blocks_y: {}, total_threads : {}, Total strips : {}\n", num_blocks_y, total_threads, total_strips); + fmt_printf("ASTC 6x6 block rows per strip: {}\n", rows_per_strip); + fmt_printf("ASTC 6x6 block rows on final strip: {}\n", num_blocks_y - (total_strips - 1) * rows_per_strip); + } + + uint32_t total_rows = 0; + for (uint32_t strip_index = 0; strip_index < total_strips; strip_index++) + { + uint32_t strip_first_by = strip_index * rows_per_strip; + uint32_t strip_last_by = minimum(strip_first_by + rows_per_strip - 1, num_blocks_y); + + if (strip_index == (total_strips - 1)) + strip_last_by = num_blocks_y - 1; + + uint32_t num_strip_block_rows = (strip_last_by - strip_first_by) + 1; + total_rows += num_strip_block_rows; + + if (global_cfg.m_debug_output) + fmt_printf("Strip row: {}, total block rows: {}\n", strip_index, num_strip_block_rows); + } + + if (total_rows != num_blocks_y) + { + fmt_error_printf("Strip calc failed\n"); + return false; + } + + res_total_strips = total_strips; + res_rows_per_strip = rows_per_strip; + + return true; +} + +static void convet_rgb_image_to_itp(const imagef &src_img, imagef &dst_img, const astc_hdr_6x6_global_config& cfg) +{ + const uint32_t width = src_img.get_width(), height = src_img.get_height(); + + dst_img.resize(width, height); + + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + vec3F src_rgb(src_img(x, y)); + + vec3F src_itp; + linear_rgb_to_itp(src_rgb, src_itp, cfg); + + dst_img(x, y) = src_itp; + } + } +} + +const uint32_t BLOCK_W = 6, BLOCK_H = 6; +const uint32_t NUM_BLOCK_PIXELS = BLOCK_W * BLOCK_H; + +const float SOLID_PENALTY = 4.0f; +const float REUSE_PENALTY = 1.0f; +const float RUN_PENALTY = 10.0f; + +const float MSE_WEIGHT = 300000.0f; +const float SSIM_WEIGHT = 200.0f; +const float TWO_LEVEL_PENALTY = 1.425f; +const float SWITCH_TO_GAUSSIAN_FILTERED_THRESH1_D_SSIM = .04f; +const float SWITCH_TO_GAUSSIAN_FILTERED_THRESH2_D_SSIM = .04f; +const float COMPLEX_BLOCK_WEIGHT_GRID_2X2_MSE_PENALTY = 1.5f; +const float COMPLEX_BLOCK_WEIGHT_GRID_3X3_MSE_PENALTY = 1.25f; +const float COMPLEX_BLOCK_WEIGHT_GRID_4X4_MSE_PENALTY = 1.15f; + +struct uastc_hdr_6x6_debug_state +{ + uint32_t m_encoding_type_hist[(uint32_t)encoding_type::cTotal] = { 0 }; + uint32_t m_endpoint_mode_hist[(uint32_t)endpoint_mode::cTotal] = { 0 }; + uint32_t m_block_mode_hist[(uint32_t)block_mode::cBMTotalModes] = { 0 }; + uint64_t m_block_mode_total_bits[(uint32_t)block_mode::cBMTotalModes] = { 0 }; + + basisu::vector< basisu::stats > m_block_mode_comp_stats[(uint32_t)block_mode::cBMTotalModes][3]; + basisu::vector< basisu::comparative_stats > m_block_mode_comparative_stats[(uint32_t)block_mode::cBMTotalModes][3]; + + std::atomic m_total_gaussian1_blocks; + std::atomic m_total_gaussian2_blocks; + std::atomic m_total_filter_horizontal; + std::atomic m_detail_stats[5]; + std::atomic m_total_mode7_skips; + + std::atomic m_total_blocks_compressed; + + std::atomic m_total_candidates_considered; + std::atomic m_max_candidates_considered; + + std::atomic m_total_part2_stats[4]; + std::atomic m_dp_stats[5]; + + std::atomic m_reuse_num_parts[4]; + std::atomic m_reuse_total_dp; + + imagef m_stat_vis; + std::mutex m_stat_vis_mutex; + + image m_part_vis; + image m_mode_vis; + image m_mode_vis2; + image m_grid_vis; + image m_enc_vis; + std::mutex m_vis_image_mutex; + + std::atomic m_comp_level_hist[ASTC_HDR_6X6_MAX_COMP_LEVEL + 1]; + + std::atomic m_total_jnd_replacements; + + std::mutex m_stats_mutex; + + uastc_hdr_6x6_debug_state() + { + for (uint32_t i = 0; i < (uint32_t)block_mode::cBMTotalModes; i++) + { + for (uint32_t j = 0; j < 3; j++) + { + m_block_mode_comp_stats[i][j].reserve(512); + m_block_mode_comparative_stats[i][j].reserve(512); + } + } + } + + void init(uint32_t width, uint32_t height) + { + m_stat_vis.resize(width, height); + m_part_vis.resize(width, height); + m_mode_vis.resize(width, height); + m_mode_vis2.resize(width, height); + m_grid_vis.resize(width, height); + m_enc_vis.resize(width, height); + + basisu::clear_obj(m_encoding_type_hist); + basisu::clear_obj(m_endpoint_mode_hist); + basisu::clear_obj(m_block_mode_hist); + basisu::clear_obj(m_block_mode_total_bits); + + for (uint32_t i = 0; i < (uint32_t)block_mode::cBMTotalModes; i++) + { + for (uint32_t j = 0; j < 3; j++) + { + m_block_mode_comp_stats[i][j].clear(); + m_block_mode_comparative_stats[i][j].clear(); + } + } + + m_total_gaussian1_blocks.store(0); + m_total_gaussian2_blocks.store(0); + m_total_filter_horizontal.store(0); + for (uint32_t i = 0; i < std::size(m_detail_stats); i++) + m_detail_stats[i].store(0); + m_total_mode7_skips.store(0); + + for (uint32_t i = 0; i < std::size(m_comp_level_hist); i++) + m_comp_level_hist[i].store(0); + + m_total_blocks_compressed.store(0); + + m_total_candidates_considered.store(0); + m_max_candidates_considered.store(0); + + for (uint32_t i = 0; i < std::size(m_total_part2_stats); i++) + m_total_part2_stats[i].store(0); + + for (uint32_t i = 0; i < std::size(m_dp_stats); i++) + m_dp_stats[i].store(0); + + for (uint32_t i = 0; i < std::size(m_reuse_num_parts); i++) + m_reuse_num_parts[i] .store(0); + + m_reuse_total_dp.store(0); + + m_total_jnd_replacements.store(0); + } + + void print(uint32_t total_blocks) const + { + fmt_printf("Total blocks: {}\n", total_blocks); + fmt_printf("Total JND replacements: {} {3.2}%\n", m_total_jnd_replacements, (float)m_total_jnd_replacements * 100.0f / (float)total_blocks); + fmt_printf("Comp level histogram: {} {} {} {} {}\n", m_comp_level_hist[0], m_comp_level_hist[1], m_comp_level_hist[2], m_comp_level_hist[3], m_comp_level_hist[4]); + fmt_printf("Total gaussian 1 blocks: {} {3.2}%\n", m_total_gaussian1_blocks, (float)m_total_gaussian1_blocks * 100.0f / (float)total_blocks); + fmt_printf("Total gaussian 2 blocks: {} {3.2}%\n", m_total_gaussian2_blocks, (float)m_total_gaussian2_blocks * 100.0f / (float)total_blocks); + fmt_printf("Total filter horizontal: {} {3.2}%\n", m_total_filter_horizontal, (float)m_total_filter_horizontal * 100.0f / (float)total_blocks); + fmt_printf("Detail stats: Detailed block low grid skip: {}, Blurry block skip: {}, Very blurry block skip: {}, NH:{} H:{}\n", m_detail_stats[0], m_detail_stats[1], m_detail_stats[2], m_detail_stats[3], m_detail_stats[4]); + fmt_printf("Total mode7 skips: {}\n", m_total_mode7_skips); + + fmt_printf("Total candidates: {}, {} avg per block\n", m_total_candidates_considered, (float)m_total_candidates_considered / (float)total_blocks); + fmt_printf("Max ever candidates: {}\n", m_max_candidates_considered); + + fmt_printf("Part2/3 stats: {} {} {} {}\n", m_total_part2_stats[0], m_total_part2_stats[1], m_total_part2_stats[2], m_total_part2_stats[3]); + fmt_printf("Dual plane stats: {} {} {} {} {}\n", m_dp_stats[0], m_dp_stats[1], m_dp_stats[2], m_dp_stats[3], m_dp_stats[4]); + fmt_printf("Reuse total dual plane: {}\n", m_reuse_total_dp); + fmt_printf("Reuse part stats: {} {} {}\n", m_reuse_num_parts[1], m_reuse_num_parts[2], m_reuse_num_parts[3]); + + fmt_printf("\nEncoding type histogram:\n"); + for (uint32_t i = 0; i < std::size(m_encoding_type_hist); i++) + fmt_printf("{}: {}\n", i, m_encoding_type_hist[i]); + + fmt_printf("\nEndpoint mode histogram:\n"); + for (uint32_t i = 0; i < std::size(m_endpoint_mode_hist); i++) + fmt_printf("{}: {}\n", i, m_endpoint_mode_hist[i]); + + fmt_printf("\nBlock mode histogram:\n"); + + uint32_t total_dp = 0, total_sp = 0; + uint32_t total_mode11 = 0, total_mode7 = 0; + uint32_t part_hist[3] = { 0 }; + uint32_t part2_mode7_total = 0, part2_mode11_total = 0; + uint32_t total_used_modes = 0; + for (uint32_t i = 0; i < std::size(m_block_mode_hist); i++) + { + const auto& bm_desc = g_block_mode_descs[i]; + + const uint32_t total_uses = m_block_mode_hist[i]; + + if (bm_desc.m_dp) + total_dp += total_uses; + else + total_sp += total_uses; + + if (bm_desc.m_cem == 7) + total_mode7 += total_uses; + else + total_mode11 += total_uses; + + part_hist[bm_desc.m_num_partitions - 1] += total_uses; + + if (bm_desc.m_num_partitions == 2) + { + if (bm_desc.m_cem == 7) + part2_mode7_total += total_uses; + else + { + assert(bm_desc.m_cem == 11); + part2_mode11_total += total_uses; + } + } + + float avg_std_dev = 0.0f; + float avg_cross_correlations[3] = { 0 }; + + if (m_block_mode_comp_stats[i][0].size()) + { + const uint32_t num_uses = m_block_mode_comp_stats[i][0].size_u32(); + + for (uint32_t j = 0; j < num_uses; j++) + avg_std_dev += (float)maximum(m_block_mode_comp_stats[i][0][j].m_std_dev, m_block_mode_comp_stats[i][1][j].m_std_dev, m_block_mode_comp_stats[i][2][j].m_std_dev); + avg_std_dev /= (float)num_uses; + + for (uint32_t j = 0; j < num_uses; j++) + { + avg_cross_correlations[0] += fabsf((float)m_block_mode_comparative_stats[i][0][j].m_pearson); + avg_cross_correlations[1] += fabsf((float)m_block_mode_comparative_stats[i][1][j].m_pearson); + avg_cross_correlations[2] += fabsf((float)m_block_mode_comparative_stats[i][2][j].m_pearson); + } + + avg_cross_correlations[0] /= (float)num_uses; + avg_cross_correlations[1] /= (float)num_uses; + avg_cross_correlations[2] /= (float)num_uses; + } + + fmt_printf("{ 2}: uses: { 6}, cem: {}, dp: {} chan: {}, parts: {}, grid: {}x{}, endpoint levels: {}, weight levels: {}, Avg bits: {}, Avg Max Std Dev: {}, RG: {} RB: {} GB: {}\n", i, total_uses, + bm_desc.m_cem, + bm_desc.m_dp, bm_desc.m_dp_channel, + bm_desc.m_num_partitions, + bm_desc.m_grid_x, bm_desc.m_grid_y, + astc_helpers::get_ise_levels(bm_desc.m_endpoint_ise_range), + astc_helpers::get_ise_levels(bm_desc.m_weight_ise_range), + total_uses ? ((double)m_block_mode_total_bits[i] / total_uses) : 0.0f, + avg_std_dev, avg_cross_correlations[0], avg_cross_correlations[1], avg_cross_correlations[2]); + + if (total_uses) + total_used_modes++; + } + + fmt_printf("Total used modes: {}\n", total_used_modes); + + fmt_printf("Total single plane: {}, total dual plane: {}\n", total_sp, total_dp); + fmt_printf("Total mode 11: {}, mode 7: {}\n", total_mode11, total_mode7); + fmt_printf("Partition histogram: {} {} {}\n", part_hist[0], part_hist[1], part_hist[2]); + fmt_printf("2 subset mode 7 uses: {}, mode 11 uses: {}\n", part2_mode7_total, part2_mode11_total); + } +}; + +struct uastc_hdr_6x6_encode_state +{ + astc_hdr_codec_base_options master_coptions; + + imagef src_img; + + imagef src_img_filtered1; + imagef src_img_filtered2; + + imagef src_img_itp; + imagef src_img_filtered1_itp; + imagef src_img_filtered2_itp; + + vector2D smooth_block_mse_scales; + + imagef packed_img; + + basisu::vector strip_bits; + + basisu::vector2D final_astc_blocks; + + vector2D coded_blocks; +}; + +static bool compress_strip_task( + uint32_t strip_index, uint32_t total_strips, uint32_t strip_first_by, uint32_t strip_last_by, + uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t total_blocks, uint32_t width, uint32_t height, + astc_hdr_6x6_global_config &global_cfg, uastc_hdr_6x6_debug_state &debug_state, uastc_hdr_6x6_encode_state &enc_state) +{ + BASISU_NOTE_UNUSED(num_blocks_y); + BASISU_NOTE_UNUSED(total_strips); + + vec3F prev_comp_pixels[BLOCK_H][BLOCK_W]; // [y][x] + basisu::clear_obj(prev_comp_pixels); + + uint32_t prev_run_len = 0; + + bitwise_coder prev_encoding; + candidate_encoding prev_candidate_encoding; // the previous candidate written, which may have been a run extension + candidate_encoding prev_non_run_candidate_encoding; // the previous *non-run* candidate written + + bitwise_coder& strip_coded_bits = enc_state.strip_bits[strip_index]; + + const uint32_t CANDIDATES_TO_RESERVE = 1536; + + basisu::vector candidates; + candidates.reserve(CANDIDATES_TO_RESERVE); + + for (uint32_t by = strip_first_by; by <= strip_last_by; by++) + { + const bool has_upper_neighbor = by > strip_first_by; + + for (uint32_t bx = 0; bx < num_blocks_x; bx++) + { + //if ((bx == 1) && (by == 2)) + // basisu::fmt_printf("!"); + + for (uint32_t outer_pass = 0; outer_pass < 3; outer_pass++) + { + const bool has_left_neighbor = bx > 0; + //const bool has_prev = has_left_neighbor || has_upper_neighbor; + + // Select either the original source image, or the Gaussian filtered version. + // From here the encoder *must* use these 2 sources. + const imagef& pass_src_img = (outer_pass == 2) ? enc_state.src_img_filtered2 : + ((outer_pass == 1) ? enc_state.src_img_filtered1 : enc_state.src_img); + + const imagef& pass_src_img_itp = (outer_pass == 2) ? enc_state.src_img_filtered2_itp : + ((outer_pass == 1) ? enc_state.src_img_filtered1_itp : enc_state.src_img_itp); + + // Extract source image block + vec4F block_pixels[BLOCK_H][BLOCK_W]; // [y][x] + pass_src_img.extract_block_clamped(&block_pixels[0][0], bx * BLOCK_W, by * BLOCK_H, BLOCK_W, BLOCK_H); + + vec4F block_pixels_itp[BLOCK_H][BLOCK_W]; // [y][x] + pass_src_img_itp.extract_block_clamped(&block_pixels_itp[0][0], bx * BLOCK_W, by * BLOCK_H, BLOCK_W, BLOCK_H); + + half_vec3 half_pixels[BLOCK_H][BLOCK_W]; // [y][x] half-float values + vec3F half_pixels_as_floats[BLOCK_H][BLOCK_W]; // [y][x] half float values, integer bits as floats + vec4F block_pixels_q16[BLOCK_H][BLOCK_W]; // [y][x], q16 space for low-level ASTC encoding + vec3F block_pixels_as_itp[BLOCK_H][BLOCK_W]; // [y][x] input converted to itp space, for faster error calculations + + bool is_grayscale = true; + + candidates.resize(0); + + float block_ly = BIG_FLOAT_VAL, block_hy = 0.0f, block_avg_y = 0.0f; + + for (uint32_t y = 0; y < BLOCK_H; y++) + { + for (uint32_t x = 0; x < BLOCK_W; x++) + { + vec3F rgb_input; + + for (uint32_t c = 0; c < 3; c++) + { + float v = block_pixels[y][x][c]; + + rgb_input[c] = v; + + const basist::half_float h = basisu::fast_float_to_half_no_clamp_neg_nan_or_inf(v); + assert(h == basist::float_to_half(v)); + + half_pixels[y][x][c] = h; + + block_pixels_q16[y][x][c] = (float)half_to_qlog16(h); + + half_pixels_as_floats[y][x][c] = (float)h; + + } // c + + float py = rgb_input.dot(vec3F(REC_709_R, REC_709_G, REC_709_B)); + if (py < block_ly) + block_ly = py; + if (py > block_hy) + block_hy = py; + block_avg_y += py; + + //linear_rgb_to_itp(rgb_input, block_pixels_as_itp[y][x]); + + block_pixels_as_itp[y][x] = block_pixels_itp[y][x]; + + block_pixels_q16[y][x][3] = 0.0f; + + if ((half_pixels[y][x][0] != half_pixels[y][x][1]) || (half_pixels[y][x][0] != half_pixels[y][x][2])) + is_grayscale = false; + + } // x + } // y + + block_avg_y *= (1.0f / (float)NUM_BLOCK_PIXELS); + + encode_astc_block_stats enc_block_stats; + enc_block_stats.init(NUM_BLOCK_PIXELS, &block_pixels_q16[0][0]); + + vec4F x_filtered[6][6], y_filtered[6][6]; + + filter_block(3, 6, (vec4F*)block_pixels, (vec4F*)x_filtered); // filter rows (horizontal) + filter_block(6, 3, (vec4F*)block_pixels, (vec4F*)y_filtered); // filter cols (vertically) + + const float filtered_x_err = diff_blocks((vec4F*)block_pixels, (vec4F*)x_filtered); + const float filtered_y_err = diff_blocks((vec4F*)block_pixels, (vec4F*)y_filtered); + const bool filter_horizontally = filtered_x_err < filtered_y_err; + + //const float block_mag_gradient_mag = block_max_gradient_mag(bx, by); + + if (filter_horizontally) + debug_state.m_total_filter_horizontal.fetch_add(1, std::memory_order_relaxed); + + vec3F lowpass_filtered[6][6]; + filter_block(3, 3, &half_pixels_as_floats[0][0], &lowpass_filtered[0][0]); + float lowpass_std_dev = sub_and_compute_std_dev(&lowpass_filtered[0][0], &half_pixels_as_floats[0][0]); + + const bool very_detailed_block = lowpass_std_dev > 350.0f; + const bool very_blurry_block = lowpass_std_dev < 30.0f; + const bool super_blurry_block = lowpass_std_dev < 15.0f; + + basisu::stats half_comp_stats[3]; + for (uint32_t c = 0; c < 3; c++) + half_comp_stats[c].calc(NUM_BLOCK_PIXELS, &half_pixels_as_floats[0][0][c], 3); + + const float SINGLE_PART_HALF_THRESH = 256.0f; + const float COMPLEX_HALF_THRESH = 1024.0f; + // HACK HACK + const float VERY_COMPLEX_HALF_THRESH = 1400.0f; // 1536.0f; + + const float max_std_dev = (float)maximum(half_comp_stats[0].m_std_dev, half_comp_stats[1].m_std_dev, half_comp_stats[2].m_std_dev); + + const bool very_simple_block = (max_std_dev < SINGLE_PART_HALF_THRESH); + const bool complex_block = (max_std_dev > COMPLEX_HALF_THRESH); + const bool very_complex_block = (max_std_dev > VERY_COMPLEX_HALF_THRESH); + + // Dynamically choose a comp_level for this block. + astc_hdr_codec_base_options coptions(enc_state.master_coptions); + uint32_t comp_level = global_cfg.m_master_comp_level; + + if (very_complex_block) + comp_level = global_cfg.m_highest_comp_level; + else if (complex_block) + comp_level = (global_cfg.m_master_comp_level + global_cfg.m_highest_comp_level + 1) / 2; + + debug_state.m_comp_level_hist[comp_level].fetch_add(1, std::memory_order_relaxed); + + bool any_2subset_enabled = false, any_2subset_mode11_enabled = false, any_2subset_mode7_enabled = false, any_3subset_enabled = false; + BASISU_NOTE_UNUSED(any_2subset_mode11_enabled); + + for (uint32_t i = 0; i < (uint32_t)block_mode::cBMTotalModes; i++) + { + if (comp_level == 0) + { + if ((g_block_mode_descs[i].m_flags & BASIST_HDR_6X6_LEVEL0) == 0) + continue; + } + else if (comp_level == 1) + { + if ((g_block_mode_descs[i].m_flags & BASIST_HDR_6X6_LEVEL1) == 0) + continue; + } + else if (comp_level == 2) + { + if ((g_block_mode_descs[i].m_flags & BASIST_HDR_6X6_LEVEL2) == 0) + continue; + } + + if (g_block_mode_descs[i].m_num_partitions == 2) + { + any_2subset_enabled = true; + + if (g_block_mode_descs[i].m_cem == 7) + { + any_2subset_mode7_enabled = true; + } + else + { + assert(g_block_mode_descs[i].m_cem == 11); + any_2subset_mode11_enabled = true; + } + } + else if (g_block_mode_descs[i].m_num_partitions == 3) + any_3subset_enabled = true; + } + + coptions.m_mode7_full_s_optimization = (comp_level >= 2); + + const bool uber_mode_flag = (comp_level >= 3); + coptions.m_allow_uber_mode = uber_mode_flag; + + coptions.m_ultra_quant = (comp_level >= 4); + + coptions.m_take_first_non_clamping_mode11_submode = (comp_level <= 2); + coptions.m_take_first_non_clamping_mode7_submode = (comp_level <= 2); + + coptions.m_disable_weight_plane_optimization = (comp_level >= 2); + + // ------------------- + + uint32_t total_used_block_chans = 0; + for (uint32_t i = 0; i < 3; i++) + total_used_block_chans += (half_comp_stats[i].m_range > 0.0f); + + const bool is_solid_block = (total_used_block_chans == 0); + + basisu::comparative_stats half_cross_chan_stats[3]; + + // R vs. G + half_cross_chan_stats[0].calc_pearson(NUM_BLOCK_PIXELS, + &half_pixels_as_floats[0][0][0], &half_pixels_as_floats[0][0][1], + 3, 3, + &half_comp_stats[0], &half_comp_stats[1]); + + // R vs. B + half_cross_chan_stats[1].calc_pearson(NUM_BLOCK_PIXELS, + &half_pixels_as_floats[0][0][0], &half_pixels_as_floats[0][0][2], + 3, 3, + &half_comp_stats[0], &half_comp_stats[2]); + + // G vs. B + half_cross_chan_stats[2].calc_pearson(NUM_BLOCK_PIXELS, + &half_pixels_as_floats[0][0][1], &half_pixels_as_floats[0][0][2], + 3, 3, + &half_comp_stats[1], &half_comp_stats[2]); + + const float rg_corr = fabsf((float)half_cross_chan_stats[0].m_pearson); + const float rb_corr = fabsf((float)half_cross_chan_stats[1].m_pearson); + const float gb_corr = fabsf((float)half_cross_chan_stats[2].m_pearson); + + float min_corr = BIG_FLOAT_VAL, max_corr = -BIG_FLOAT_VAL; + for (uint32_t i = 0; i < 3; i++) + { +#if 0 + // 9/5/2025, wrong metric, we're iterating channels pairs here, not individual channels. + // On 3 active channel blocks this causes no difference. + if (half_comp_stats[i].m_range > 0.0f) +#else + static const uint8_t s_chan_pairs[3][2] = { {0, 1}, {0, 2}, {1, 2} }; + + const uint32_t chanA = s_chan_pairs[i][0]; + const uint32_t chanB = s_chan_pairs[i][1]; + + if ((half_comp_stats[chanA].m_range > 0.0f) && (half_comp_stats[chanB].m_range > 0.0f)) +#endif + { + const float c = fabsf((float)half_cross_chan_stats[i].m_pearson); + min_corr = minimum(min_corr, c); + max_corr = maximum(max_corr, c); + } + } + + bool use_single_subset_mode7 = true; + if (comp_level <= 1) + { + // TODO: could also compute angle between principle axis and the grayscale axis. + // TODO: Transform grayscale axis by covar matrix, compute variance vs. total variance + const float MODE7_MIN_CHAN_CORR = .5f; + const float MODE7_PCA_ANGLE_THRESH = .9f; + use_single_subset_mode7 = is_grayscale || is_solid_block || ((total_used_block_chans == 1) || (min_corr >= MODE7_MIN_CHAN_CORR)); + + if (use_single_subset_mode7) + { + float cos_ang = fabsf(enc_block_stats.m_axis_q16.dot(vec3F(0.5773502691f))); + if (cos_ang < MODE7_PCA_ANGLE_THRESH) + use_single_subset_mode7 = false; + } + } + + const float STRONG_CORR_THRESH = (comp_level <= 1) ? .5f : ((comp_level <= 3) ? .75f : .9f); + + int desired_dp_chan = -1; + if (total_used_block_chans <= 1) + { + // no need for dual plane (except possibly 2x2 weight grids for RDO) + } + else + { + if (min_corr >= STRONG_CORR_THRESH) + { + // all channel pairs strongly correlated, no need for dual plane + debug_state.m_dp_stats[0].fetch_add(1, std::memory_order_relaxed); + } + else + { + if (total_used_block_chans == 2) + { + if (half_comp_stats[0].m_range == 0.0f) + { + // r unused, check for strong gb correlation + if (gb_corr < STRONG_CORR_THRESH) + desired_dp_chan = 1; + } + else if (half_comp_stats[1].m_range == 0.0f) + { + // g unused, check for strong rb correlation + if (rb_corr < STRONG_CORR_THRESH) + desired_dp_chan = 0; + } + else + { + // b unused, check for strong rg correlation + if (rg_corr < STRONG_CORR_THRESH) + desired_dp_chan = 0; + } + } + else + { + assert(total_used_block_chans == 3); + + // see if rg/rb is weakly correlated vs. gb + if ((rg_corr < gb_corr) && (rb_corr < gb_corr)) + desired_dp_chan = 0; + // see if gr/gb is weakly correlated vs. rb + else if ((rg_corr < rb_corr) && (gb_corr < rb_corr)) + desired_dp_chan = 1; + // assume b is weakest + else + desired_dp_chan = 2; + } + + if (desired_dp_chan == -1) + debug_state.m_dp_stats[1].fetch_add(1, std::memory_order_relaxed); + else + debug_state.m_dp_stats[2 + desired_dp_chan].fetch_add(1, std::memory_order_relaxed); + } + } + + // 2x2 is special for RDO at higher lambdas - always pick a preferred channel. + int desired_dp_chan_2x2 = 0; + if (total_used_block_chans == 2) + { + if (half_comp_stats[0].m_range == 0.0f) + desired_dp_chan_2x2 = 1; + } + else if (total_used_block_chans == 3) + { + // see if rg/rb is weakly correlated vs. gb + if ((rg_corr < gb_corr) && (rb_corr < gb_corr)) + desired_dp_chan_2x2 = 0; + // see if gr/gb is weakly correlated vs. rb + else if ((rg_corr < rb_corr) && (gb_corr < rb_corr)) + desired_dp_chan_2x2 = 1; + // assume b is weakest + else + desired_dp_chan_2x2 = 2; + } + + // Gather all candidate encodings + bool status = false; + + // ---- Run candidate + if ((global_cfg.m_use_runs) && (has_left_neighbor || has_upper_neighbor)) + { + candidate_encoding candidate; + candidate.m_coder.reserve(24); + + candidate.m_encoding_type = encoding_type::cRun; + + candidate.m_decomp_log_blk = prev_non_run_candidate_encoding.m_decomp_log_blk; + candidate.m_coded_log_blk = prev_non_run_candidate_encoding.m_coded_log_blk; + + memcpy(candidate.m_comp_pixels, prev_comp_pixels, sizeof(prev_comp_pixels)); + + if (!prev_run_len) + { + candidate.m_coder.put_bits(RUN_CODE, RUN_CODE_LEN); + candidate.m_coder.put_vlc(0, 5); + } + else + { + // extend current run - compute the # of new bits needed for the extension. + + uint32_t prev_run_bits = prev_encoding.get_total_bits_u32(); + assert(prev_run_bits > 0); + + // We're not actually going to code this, because the previously emitted run code will be extended. + bitwise_coder temp_coder; + temp_coder.put_bits(RUN_CODE, RUN_CODE_LEN); + temp_coder.put_vlc((prev_run_len + 1) - 1, 5); + + uint32_t cur_run_bits = temp_coder.get_total_bits_u32(); + assert(cur_run_bits >= prev_run_bits); + + uint32_t total_new_bits = cur_run_bits - prev_run_bits; + if (total_new_bits > 0) + candidate.m_coder.put_bits(0, total_new_bits); // dummy bits + } + + candidate.m_run_len = prev_run_len + 1; + + candidates.emplace_back(std::move(candidate)); + } + + // ---- Reuse candidate + if ((!is_solid_block) && (global_cfg.m_lambda > 0.0f)) + { + for (uint32_t reuse_delta_index = 0; reuse_delta_index < global_cfg.m_num_reuse_xy_deltas; reuse_delta_index++) + { + const int reuse_delta_x = g_reuse_xy_deltas[reuse_delta_index].m_x; + const int reuse_delta_y = g_reuse_xy_deltas[reuse_delta_index].m_y; + + const int reuse_bx = bx + reuse_delta_x, reuse_by = by + reuse_delta_y; + if ((reuse_bx < 0) || (reuse_bx >= (int)num_blocks_x)) + continue; + if (reuse_by < (int)strip_first_by) + break; + + const candidate_encoding& prev_candidate = enc_state.coded_blocks(reuse_bx, reuse_by); + + // TODO - support this. + if (prev_candidate.m_encoding_type == encoding_type::cSolid) + continue; + assert((prev_candidate.m_encoding_type == encoding_type::cBlock) || (prev_candidate.m_encoding_type == encoding_type::cReuse)); + + candidate_encoding candidate; + candidate.m_coder.reserve(24); + astc_helpers::log_astc_block& coded_log_blk = candidate.m_coded_log_blk; + astc_helpers::log_astc_block& decomp_log_blk = candidate.m_decomp_log_blk; + + const astc_helpers::log_astc_block& prev_coded_log_blk = prev_candidate.m_coded_log_blk; + + const uint32_t grid_x = prev_coded_log_blk.m_grid_width, grid_y = prev_coded_log_blk.m_grid_height; + const bool dual_plane = prev_candidate.m_coded_log_blk.m_dual_plane; + const uint32_t num_grid_samples = grid_x * grid_y; + const uint32_t num_endpoint_vals = get_num_endpoint_vals(prev_coded_log_blk.m_color_endpoint_modes[0]); + + coded_log_blk = prev_candidate.m_coded_log_blk; + decomp_log_blk = prev_candidate.m_decomp_log_blk; + + if (prev_coded_log_blk.m_num_partitions == 1) + { + // Now encode the block using the transcoded endpoints + basist::half_float decoded_half[MAX_SUPPORTED_WEIGHT_LEVELS][3]; + + if (prev_coded_log_blk.m_color_endpoint_modes[0] == 7) + { + status = get_astc_hdr_mode_7_block_colors(coded_log_blk.m_endpoints, &decoded_half[0][0], nullptr, + astc_helpers::get_ise_levels(coded_log_blk.m_weight_ise_range), coded_log_blk.m_weight_ise_range, coded_log_blk.m_endpoint_ise_range); + } + else + { + status = get_astc_hdr_mode_11_block_colors(coded_log_blk.m_endpoints, &decoded_half[0][0], nullptr, + astc_helpers::get_ise_levels(coded_log_blk.m_weight_ise_range), coded_log_blk.m_weight_ise_range, coded_log_blk.m_endpoint_ise_range); + } + assert(status); + + uint8_t trial_weights0[BLOCK_W * BLOCK_H], trial_weights1[BLOCK_W * BLOCK_H]; + uint8_t transcode_weights[MAX_BLOCK_W * MAX_BLOCK_H * 2]; + + if (dual_plane) + { + eval_selectors_dual_plane(prev_candidate.m_coded_log_blk.m_color_component_selector, + BLOCK_W * BLOCK_H, trial_weights0, trial_weights1, (basist::half_float*)&half_pixels[0][0][0], astc_helpers::get_ise_levels(coded_log_blk.m_weight_ise_range), &decoded_half[0][0], coptions, UINT32_MAX); + + downsample_ise_weights_dual_plane( + coded_log_blk.m_weight_ise_range, coded_log_blk.m_weight_ise_range, + BLOCK_W, BLOCK_H, + grid_x, grid_y, + trial_weights0, trial_weights1, coded_log_blk.m_weights); + + basist::astc_6x6_hdr::requantize_astc_weights(num_grid_samples * 2, coded_log_blk.m_weights, coded_log_blk.m_weight_ise_range, transcode_weights, decomp_log_blk.m_weight_ise_range); + } + else + { + eval_selectors(BLOCK_W * BLOCK_H, trial_weights0, coded_log_blk.m_weight_ise_range, (basist::half_float*)&half_pixels[0][0][0], astc_helpers::get_ise_levels(coded_log_blk.m_weight_ise_range), &decoded_half[0][0], coptions, UINT32_MAX); + + downsample_ise_weights( + coded_log_blk.m_weight_ise_range, coded_log_blk.m_weight_ise_range, + BLOCK_W, BLOCK_H, + grid_x, grid_y, + trial_weights0, coded_log_blk.m_weights); + + basist::astc_6x6_hdr::requantize_astc_weights(num_grid_samples, coded_log_blk.m_weights, coded_log_blk.m_weight_ise_range, transcode_weights, decomp_log_blk.m_weight_ise_range); + } + + // Create the block the decoder would transcode into. + copy_weight_grid(dual_plane, grid_x, grid_y, transcode_weights, decomp_log_blk); + } + else if (prev_coded_log_blk.m_num_partitions == 2) + { + assert(!dual_plane); + + const int unique_pat_index = g_part2_seed_to_unique_index[coded_log_blk.m_partition_id]; + assert((unique_pat_index >= 0) && (unique_pat_index < (int)NUM_UNIQUE_PARTITIONS2)); + + const partition_pattern_vec& pat_vec = g_partitions2[unique_pat_index]; + + vec4F part_pixels_q16[2][64]; + half_vec3 part_half_pixels[2][64]; + uint32_t part_total_pixels[2] = { 0 }; + + for (uint32_t y = 0; y < BLOCK_H; y++) + { + for (uint32_t x = 0; x < BLOCK_W; x++) + { + const uint32_t part_index = pat_vec[x + y * 6]; + + uint32_t l = part_total_pixels[part_index]; + + part_pixels_q16[part_index][l] = block_pixels_q16[y][x]; + part_half_pixels[part_index][l] = half_pixels[y][x]; + + part_total_pixels[part_index] = l + 1; + } // x + } // y + + uint8_t blk_weights[2][BLOCK_W * BLOCK_H]; + + for (uint32_t part_index = 0; part_index < 2; part_index++) + { + basist::half_float decoded_half[MAX_SUPPORTED_WEIGHT_LEVELS][3]; + + if (prev_coded_log_blk.m_color_endpoint_modes[0] == 7) + { + status = get_astc_hdr_mode_7_block_colors(coded_log_blk.m_endpoints + num_endpoint_vals * part_index, &decoded_half[0][0], nullptr, + astc_helpers::get_ise_levels(coded_log_blk.m_weight_ise_range), coded_log_blk.m_weight_ise_range, coded_log_blk.m_endpoint_ise_range); + } + else + { + status = get_astc_hdr_mode_11_block_colors(coded_log_blk.m_endpoints + num_endpoint_vals * part_index, &decoded_half[0][0], nullptr, + astc_helpers::get_ise_levels(coded_log_blk.m_weight_ise_range), coded_log_blk.m_weight_ise_range, coded_log_blk.m_endpoint_ise_range); + } + assert(status); + + eval_selectors(part_total_pixels[part_index], blk_weights[part_index], coded_log_blk.m_weight_ise_range, + (basist::half_float*)&part_half_pixels[part_index][0][0], astc_helpers::get_ise_levels(coded_log_blk.m_weight_ise_range), &decoded_half[0][0], coptions, UINT32_MAX); + + } // part_index + + uint8_t ise_weights[BLOCK_W * BLOCK_H]; + + uint32_t src_pixel_index[2] = { 0, 0 }; + for (uint32_t y = 0; y < BLOCK_H; y++) + { + for (uint32_t x = 0; x < BLOCK_W; x++) + { + const uint32_t part_index = pat_vec[x + y * 6]; + + ise_weights[x + y * BLOCK_W] = blk_weights[part_index][src_pixel_index[part_index]]; + src_pixel_index[part_index]++; + } // x + } // y + + downsample_ise_weights( + coded_log_blk.m_weight_ise_range, coded_log_blk.m_weight_ise_range, + BLOCK_W, BLOCK_H, + grid_x, grid_y, + ise_weights, coded_log_blk.m_weights); + + // Transcode these codable weights to ASTC weights. + uint8_t transcode_weights[MAX_BLOCK_W * MAX_BLOCK_H]; + basist::astc_6x6_hdr::requantize_astc_weights(num_grid_samples, coded_log_blk.m_weights, coded_log_blk.m_weight_ise_range, transcode_weights, decomp_log_blk.m_weight_ise_range); + + // Create the block the decoder would transcode into. + copy_weight_grid(dual_plane, grid_x, grid_y, transcode_weights, decomp_log_blk); + } + else if (prev_coded_log_blk.m_num_partitions == 3) + { + assert(!dual_plane); + + const int unique_pat_index = g_part3_seed_to_unique_index[coded_log_blk.m_partition_id]; + assert((unique_pat_index >= 0) && (unique_pat_index < (int)NUM_UNIQUE_PARTITIONS3)); + + const partition_pattern_vec& pat = g_partitions3[unique_pat_index]; + + vec4F part_pixels_q16[3][64]; + half_vec3 part_half_pixels[3][64]; + uint32_t part_total_pixels[3] = { 0 }; + + for (uint32_t y = 0; y < BLOCK_H; y++) + { + for (uint32_t x = 0; x < BLOCK_W; x++) + { + const uint32_t part_index = pat.m_parts[x + y * BLOCK_W]; + + uint32_t l = part_total_pixels[part_index]; + + part_pixels_q16[part_index][l] = block_pixels_q16[y][x]; + part_half_pixels[part_index][l] = half_pixels[y][x]; + + part_total_pixels[part_index] = l + 1; + } // x + } // y + + uint8_t blk_weights[3][BLOCK_W * BLOCK_H]; + + for (uint32_t part_index = 0; part_index < 3; part_index++) + { + basist::half_float decoded_half[MAX_SUPPORTED_WEIGHT_LEVELS][3]; + + status = get_astc_hdr_mode_7_block_colors(coded_log_blk.m_endpoints + num_endpoint_vals * part_index, &decoded_half[0][0], nullptr, + astc_helpers::get_ise_levels(coded_log_blk.m_weight_ise_range), coded_log_blk.m_weight_ise_range, coded_log_blk.m_endpoint_ise_range); + assert(status); + + eval_selectors(part_total_pixels[part_index], blk_weights[part_index], coded_log_blk.m_weight_ise_range, + (basist::half_float*)&part_half_pixels[part_index][0][0], astc_helpers::get_ise_levels(coded_log_blk.m_weight_ise_range), &decoded_half[0][0], coptions, UINT32_MAX); + + } // part_index + + uint8_t ise_weights[BLOCK_W * BLOCK_H]; + + uint32_t src_pixel_index[3] = { 0 }; + for (uint32_t y = 0; y < BLOCK_H; y++) + { + for (uint32_t x = 0; x < BLOCK_W; x++) + { + const uint32_t part_index = pat.m_parts[x + y * BLOCK_W]; + + ise_weights[x + y * BLOCK_W] = blk_weights[part_index][src_pixel_index[part_index]]; + src_pixel_index[part_index]++; + } // x + } // y + + downsample_ise_weights( + coded_log_blk.m_weight_ise_range, coded_log_blk.m_weight_ise_range, + BLOCK_W, BLOCK_H, + grid_x, grid_y, + ise_weights, coded_log_blk.m_weights); + + // Transcode these codable weights to ASTC weights. + uint8_t transcode_weights[MAX_BLOCK_W * MAX_BLOCK_H]; + basist::astc_6x6_hdr::requantize_astc_weights(num_grid_samples, coded_log_blk.m_weights, coded_log_blk.m_weight_ise_range, transcode_weights, decomp_log_blk.m_weight_ise_range); + + // Create the block the decoder would transcode into. + copy_weight_grid(dual_plane, grid_x, grid_y, transcode_weights, decomp_log_blk); + } + + if (!validate_log_blk(decomp_log_blk)) + { + fmt_error_printf("pack_astc_block() failed\n"); + return false; + } + + status = decode_astc_block(BLOCK_W, BLOCK_H, decomp_log_blk, &candidate.m_comp_pixels[0][0]); + if (!status) + { + fmt_error_printf("decode_astc_block() failed\n"); + return false; + } + + candidate.m_coder.put_bits(REUSE_CODE, REUSE_CODE_LEN); + candidate.m_coder.put_bits(reuse_delta_index, REUSE_XY_DELTA_BITS); + encode_values(candidate.m_coder, num_grid_samples * (dual_plane ? 2 : 1), coded_log_blk.m_weights, coded_log_blk.m_weight_ise_range); + + candidate.m_encoding_type = encoding_type::cReuse; + candidate.m_block_mode = prev_candidate.m_block_mode; + candidate.m_endpoint_mode = prev_candidate.m_endpoint_mode; + candidate.m_reuse_delta_index = reuse_delta_index; + + candidates.emplace_back(std::move(candidate)); + + } // reuse_delta_index + } + + // ---- Solid candidate + if (global_cfg.m_use_solid_blocks) + { + candidate_encoding candidate; + candidate.m_coder.reserve(24); + + // solid + candidate.m_encoding_type = encoding_type::cSolid; + + float r = 0.0f, g = 0.0f, b = 0.0f; + const float LOG_BIAS = .125f; + bool solid_block = true; + for (uint32_t y = 0; y < BLOCK_H; y++) + { + for (uint32_t x = 0; x < BLOCK_W; x++) + { + if ((block_pixels[0][0][0] != block_pixels[y][x][0]) || + (block_pixels[0][0][1] != block_pixels[y][x][1]) || + (block_pixels[0][0][2] != block_pixels[y][x][2])) + { + solid_block = false; + } + + r += log2f(block_pixels[y][x][0] + LOG_BIAS); + g += log2f(block_pixels[y][x][1] + LOG_BIAS); + b += log2f(block_pixels[y][x][2] + LOG_BIAS); + } + } + + if (solid_block) + { + r = block_pixels[0][0][0]; + g = block_pixels[0][0][1]; + b = block_pixels[0][0][2]; + } + else + { + r = maximum(0.0f, powf(2.0f, r * (1.0f / (float)NUM_BLOCK_PIXELS)) - LOG_BIAS); + g = maximum(0.0f, powf(2.0f, g * (1.0f / (float)NUM_BLOCK_PIXELS)) - LOG_BIAS); + b = maximum(0.0f, powf(2.0f, b * (1.0f / (float)NUM_BLOCK_PIXELS)) - LOG_BIAS); + + r = minimum(r, basist::MAX_HALF_FLOAT); + g = minimum(g, basist::MAX_HALF_FLOAT); + b = minimum(b, basist::MAX_HALF_FLOAT); + } + + basist::half_float rh = float_to_half_non_neg_no_nan_inf(r), gh = float_to_half_non_neg_no_nan_inf(g), bh = float_to_half_non_neg_no_nan_inf(b); + + candidate.m_solid_color[0] = rh; + candidate.m_solid_color[1] = gh; + candidate.m_solid_color[2] = bh; + + candidate.m_coder.put_bits(SOLID_CODE, SOLID_CODE_LEN); + + candidate.m_coder.put_bits(rh, 15); + candidate.m_coder.put_bits(gh, 15); + candidate.m_coder.put_bits(bh, 15); + + vec3F cp(basist::half_to_float(rh), basist::half_to_float(gh), basist::half_to_float(bh)); + + for (uint32_t y = 0; y < BLOCK_H; y++) + for (uint32_t x = 0; x < BLOCK_W; x++) + candidate.m_comp_pixels[y][x] = cp; + + astc_helpers::log_astc_block& log_blk = candidate.m_coded_log_blk; + + log_blk.clear(); + log_blk.m_solid_color_flag_hdr = true; + log_blk.m_solid_color[0] = rh; + log_blk.m_solid_color[1] = gh; + log_blk.m_solid_color[2] = bh; + log_blk.m_solid_color[3] = basist::float_to_half(1.0f); + + candidate.m_decomp_log_blk = log_blk; + + candidates.emplace_back(std::move(candidate)); + } + + if ((!is_solid_block) || (!global_cfg.m_use_solid_blocks)) + { + static uint8_t s_parts2_normal[5] = { 0, 2, 4, 6, 8 }; + static uint8_t s_parts3_normal[5] = { 0, 0, 4, 6, 8 }; + + static uint8_t s_parts2_complex[5] = { 0, 4, 8, 10, 16 }; + static uint8_t s_parts3_complex[5] = { 0, 0, 8, 10, 16 }; + + static uint8_t s_parts2_very_complex[5] = { 0, 8, 12, 14, 20 }; + static uint8_t s_parts3_very_complex[5] = { 0, 0, 12, 14, 20 }; + + uint32_t total_parts2 = 0, total_parts3 = 0; + + assert(comp_level < 5); + if ((very_simple_block) && (comp_level <= 3)) + { + // Block's std dev is so low that 2-3 subsets are unlikely to help much + total_parts2 = 0; + total_parts3 = 0; + + debug_state.m_total_part2_stats[0].fetch_add(1, std::memory_order_relaxed); + } + else if (very_complex_block) + { + total_parts2 = s_parts2_very_complex[comp_level]; + total_parts3 = s_parts3_very_complex[comp_level]; + + if (global_cfg.m_extra_patterns_flag) + { + total_parts2 += (comp_level == 4) ? 30 : 20; + total_parts3 += (comp_level == 4) ? 30 : 20; + } + + debug_state.m_total_part2_stats[2].fetch_add(1, std::memory_order_relaxed); + } + else if (complex_block) + { + total_parts2 = s_parts2_complex[comp_level]; + total_parts3 = s_parts3_complex[comp_level]; + + if (global_cfg.m_extra_patterns_flag) + { + total_parts2 += (comp_level == 4) ? 15 : 10; + total_parts3 += (comp_level == 4) ? 15 : 10; + } + + debug_state.m_total_part2_stats[3].fetch_add(1, std::memory_order_relaxed); + } + else + { + // moderate complexity - use defaults + total_parts2 = s_parts2_normal[comp_level]; + total_parts3 = s_parts3_normal[comp_level]; + + if (global_cfg.m_extra_patterns_flag) + { + total_parts2 += 5; + total_parts3 += 5; + } + + debug_state.m_total_part2_stats[1].fetch_add(1, std::memory_order_relaxed); + } + + if (!any_2subset_enabled) + total_parts2 = 0; + + if (!any_3subset_enabled) + total_parts3 = 0; + + int best_parts2_mode11[NUM_UNIQUE_PARTITIONS2], best_parts2_mode7[NUM_UNIQUE_PARTITIONS2]; + bool has_estimated_parts2 = false; + + if (total_parts2) + { + if (global_cfg.m_brute_force_partition_matching) + { + int candidate_pats2[NUM_UNIQUE_PARTITIONS2]; + for (uint32_t i = 0; i < NUM_UNIQUE_PARTITIONS2; i++) + candidate_pats2[i] = i; + + if (any_2subset_enabled) + { + estimate_partitions_mode7_and_11( + 2, + NUM_UNIQUE_PARTITIONS2, g_partitions2, + NUM_UNIQUE_PARTITIONS2, (uint32_t*)candidate_pats2, + &half_pixels_as_floats[0][0], + coptions, + total_parts2, best_parts2_mode11, best_parts2_mode7); + } + + has_estimated_parts2 = true; + } + else + { + if (comp_level >= 1) + { + const uint32_t MAX_CANDIDATES2 = 48; + int candidate_pats2[MAX_CANDIDATES2 * 2]; + + uint32_t num_candidate_pats2 = maximum((total_parts2 * 3) / 2, very_complex_block ? MAX_CANDIDATES2 : (MAX_CANDIDATES2 / 2)); + num_candidate_pats2 = minimum(num_candidate_pats2, (uint32_t)std::size(candidate_pats2)); + + has_estimated_parts2 = estimate_partition2_6x6((basist::half_float(*)[3])half_pixels, candidate_pats2, num_candidate_pats2); + + if (has_estimated_parts2) + { + estimate_partitions_mode7_and_11( + 2, + NUM_UNIQUE_PARTITIONS2, g_partitions2, + num_candidate_pats2, (uint32_t*)candidate_pats2, + &half_pixels_as_floats[0][0], + coptions, + total_parts2, best_parts2_mode11, best_parts2_mode7); + } + } + else + { + has_estimated_parts2 = estimate_partition2_6x6((basist::half_float(*)[3])half_pixels, best_parts2_mode11, total_parts2); + + if ((has_estimated_parts2) && (any_2subset_mode7_enabled)) + memcpy(best_parts2_mode7, best_parts2_mode11, total_parts2 * sizeof(best_parts2_mode7[0])); + } + } + } + + int best_parts3[NUM_UNIQUE_PARTITIONS3]; + bool has_estimated_parts3 = false; + + if (total_parts3) + { +#if 0 + has_estimated_parts3 = estimate_partition3_6x6((basist::half_float(*)[3])half_pixels, best_parts3, total_parts3); +#elif 1 + if (global_cfg.m_brute_force_partition_matching) + { + int candidate_pats3[NUM_UNIQUE_PARTITIONS3]; + for (uint32_t i = 0; i < NUM_UNIQUE_PARTITIONS3; i++) + candidate_pats3[i] = i; + + estimate_partitions_mode7( + 3, + NUM_UNIQUE_PARTITIONS3, g_partitions3, + NUM_UNIQUE_PARTITIONS3, (uint32_t*)candidate_pats3, + &half_pixels_as_floats[0][0], + coptions, + total_parts3, (uint32_t*)best_parts3); + + has_estimated_parts3 = true; + } + else + { + const uint32_t MAX_CANDIDATES3 = 48; + int candidate_pats3[MAX_CANDIDATES3 * 2]; + + uint32_t num_candidate_pats3 = maximum((total_parts3 * 3) / 2, very_complex_block ? MAX_CANDIDATES3 : (MAX_CANDIDATES3 / 2)); + num_candidate_pats3 = minimum(num_candidate_pats3, (uint32_t)std::size(candidate_pats3)); + + has_estimated_parts3 = estimate_partition3_6x6((basist::half_float(*)[3])half_pixels, candidate_pats3, num_candidate_pats3); + + if (has_estimated_parts3) + { + estimate_partitions_mode7( + 3, + NUM_UNIQUE_PARTITIONS3, g_partitions3, + num_candidate_pats3, (uint32_t*)candidate_pats3, + &half_pixels_as_floats[0][0], + coptions, + total_parts3, (uint32_t*)best_parts3); + } + } +#endif + } + + const opt_mode_t mode11_opt_mode = complex_block ? cWeightedLeastSquares : cOrdinaryLeastSquares; + + // ---- Encoded block candidate + for (uint32_t block_mode_iter = 0; block_mode_iter < (uint32_t)block_mode::cBMTotalModes; block_mode_iter++) + { + const block_mode bm = (block_mode)block_mode_iter; + + if (comp_level == 0) + { + if ((g_block_mode_descs[block_mode_iter].m_flags & BASIST_HDR_6X6_LEVEL0) == 0) + continue; + } + else if (comp_level == 1) + { + if ((g_block_mode_descs[block_mode_iter].m_flags & BASIST_HDR_6X6_LEVEL1) == 0) + continue; + } + else if (comp_level == 2) + { + if ((g_block_mode_descs[block_mode_iter].m_flags & BASIST_HDR_6X6_LEVEL2) == 0) + continue; + } + + if (global_cfg.m_block_stat_optimizations_flag) + { + if ((comp_level <= 3) && (g_block_mode_descs[block_mode_iter].m_dp)) + { + if ((global_cfg.m_lambda > 0.0f) && (!complex_block) && (g_block_mode_descs[block_mode_iter].m_grid_x == 2) && (g_block_mode_descs[block_mode_iter].m_grid_y == 2)) + { + if (g_block_mode_descs[block_mode_iter].m_dp_channel != desired_dp_chan_2x2) + continue; + } + else + { + if (g_block_mode_descs[block_mode_iter].m_dp_channel != desired_dp_chan) + continue; + } + } + + if (comp_level <= 3) + { + const uint32_t grid_x = g_block_mode_descs[block_mode_iter].m_grid_x; + const uint32_t grid_y = g_block_mode_descs[block_mode_iter].m_grid_y; + + if (!g_block_mode_descs[block_mode_iter].m_dp) + { + // Minor gain (.5-1% less canidates) + if (very_detailed_block) + { + if (grid_x * grid_y <= 12) + { + debug_state.m_detail_stats[0].fetch_add(1, std::memory_order_relaxed); + continue; + } + } + + // Major gains (10-25% less candidates) + if (very_blurry_block) + { + if ((grid_x > 4) || (grid_y > 4) || (g_block_mode_descs[block_mode_iter].m_num_partitions > 1)) + { + debug_state.m_detail_stats[1].fetch_add(1, std::memory_order_relaxed); + continue; + } + } + if (super_blurry_block) + { + if ((grid_x > 3) || (grid_y > 3) || (g_block_mode_descs[block_mode_iter].m_num_partitions > 1)) + { + debug_state.m_detail_stats[2].fetch_add(1, std::memory_order_relaxed); + continue; + } + } + } + + if (grid_x != grid_y) + { + if (grid_x < grid_y) + { + if (!filter_horizontally) + { + debug_state.m_detail_stats[3].fetch_add(1, std::memory_order_relaxed); + continue; + } + } + else + { + if (filter_horizontally) + { + debug_state.m_detail_stats[4].fetch_add(1, std::memory_order_relaxed); + continue; + } + } + } + } + + if (global_cfg.m_lambda == 0.0f) + { + // Rarely useful if lambda=0 + if ((g_block_mode_descs[block_mode_iter].m_grid_x == 2) && (g_block_mode_descs[block_mode_iter].m_grid_y == 2)) + continue; + } + } // block_stat_optimizations_flag + + if ((!use_single_subset_mode7) && + (g_block_mode_descs[block_mode_iter].m_cem == 7) && + (g_block_mode_descs[block_mode_iter].m_num_partitions == 1)) + { + debug_state.m_total_mode7_skips.fetch_add(1, std::memory_order_relaxed); + continue; + } + + for (uint32_t endpoint_mode_iter = 0; endpoint_mode_iter < (uint32_t)endpoint_mode::cTotal; endpoint_mode_iter++) + { + if (global_cfg.m_lambda == 0.0f) + { + // No use trying anything else + if (endpoint_mode_iter != (uint32_t)endpoint_mode::cRaw) + continue; + } + + if (global_cfg.m_disable_delta_endpoint_usage) + { + if ((endpoint_mode_iter == (uint32_t)endpoint_mode::cUseUpperDelta) || (endpoint_mode_iter == (uint32_t)endpoint_mode::cUseLeftDelta)) + continue; + } + + if (!global_cfg.m_favor_higher_compression) + { + if (comp_level == 0) + { + if (endpoint_mode_iter == (uint32_t)endpoint_mode::cUseUpperDelta) + continue; + } + + if (comp_level <= 1) + { + if ((endpoint_mode_iter == (uint32_t)endpoint_mode::cUseLeft) || (endpoint_mode_iter == (uint32_t)endpoint_mode::cUseUpper)) + continue; + } + } + + const endpoint_mode em = (endpoint_mode)endpoint_mode_iter; + + switch (em) + { + case endpoint_mode::cUseLeft: + case endpoint_mode::cUseUpper: + { + const block_mode_desc& local_md = g_block_mode_descs[block_mode_iter]; + const uint32_t cem = local_md.m_cem; + + if (local_md.m_num_partitions > 1) + break; + + if ((em == endpoint_mode::cUseLeft) && (!has_left_neighbor)) + break; + else if ((em == endpoint_mode::cUseUpper) && (!has_upper_neighbor)) + break; + + candidate_encoding candidate; + candidate.m_coder.reserve(24); + astc_helpers::log_astc_block& coded_log_blk = candidate.m_coded_log_blk; + + int nx = bx, ny = by; + if (em == endpoint_mode::cUseLeft) + nx--; + else + ny--; + + const candidate_encoding& neighbor_blk = enc_state.coded_blocks(nx, ny); + if (neighbor_blk.m_encoding_type == encoding_type::cSolid) + break; + assert((neighbor_blk.m_encoding_type == encoding_type::cBlock) || (neighbor_blk.m_encoding_type == encoding_type::cReuse)); + + const block_mode_desc& neighbor_md = g_block_mode_descs[(uint32_t)neighbor_blk.m_block_mode]; + + if (neighbor_md.m_cem != cem) + break; + + assert(neighbor_blk.m_coded_log_blk.m_color_endpoint_modes[0] == cem); + + const uint32_t grid_x = local_md.m_grid_x, grid_y = local_md.m_grid_y; + const bool dual_plane = local_md.m_dp; + const uint32_t num_grid_samples = grid_x * grid_y; + const uint32_t num_endpoint_vals = get_num_endpoint_vals(local_md.m_cem); + + coded_log_blk.m_grid_width = (uint8_t)grid_x; + coded_log_blk.m_grid_height = (uint8_t)grid_y; + coded_log_blk.m_dual_plane = (uint8_t)dual_plane; + coded_log_blk.m_color_component_selector = (uint8_t)local_md.m_dp_channel; + coded_log_blk.m_num_partitions = 1; + coded_log_blk.m_color_endpoint_modes[0] = (uint8_t)neighbor_md.m_cem; + coded_log_blk.m_weight_ise_range = (uint8_t)local_md.m_weight_ise_range; + + // We're not explictly writing any endpoints, just reusing existing ones. So copy the neighbor's endpoints unchanged (so no loss). + coded_log_blk.m_endpoint_ise_range = neighbor_blk.m_coded_log_blk.m_endpoint_ise_range; + memcpy(coded_log_blk.m_endpoints, neighbor_blk.m_coded_log_blk.m_endpoints, num_endpoint_vals); + + uint8_t transcode_endpoints[basist::NUM_MODE11_ENDPOINTS]; + + // Requantize the neighbor's endpoints to whatever we'll have to transcode into to make a valid ASTC encoding. + basist::astc_6x6_hdr::requantize_ise_endpoints(neighbor_md.m_cem, + neighbor_blk.m_coded_log_blk.m_endpoint_ise_range, neighbor_blk.m_coded_log_blk.m_endpoints, + local_md.m_transcode_endpoint_ise_range, transcode_endpoints); + + // Now encode the block using the transcoded endpoints + basist::half_float decoded_half[MAX_SUPPORTED_WEIGHT_LEVELS][3]; + + if (cem == 7) + { + status = get_astc_hdr_mode_7_block_colors(transcode_endpoints, &decoded_half[0][0], nullptr, + astc_helpers::get_ise_levels(local_md.m_weight_ise_range), local_md.m_weight_ise_range, local_md.m_transcode_endpoint_ise_range); + } + else + { + status = get_astc_hdr_mode_11_block_colors(transcode_endpoints, &decoded_half[0][0], nullptr, + astc_helpers::get_ise_levels(local_md.m_weight_ise_range), local_md.m_weight_ise_range, local_md.m_transcode_endpoint_ise_range); + } + if (!status) + break; + + uint8_t trial_weights0[BLOCK_W * BLOCK_H], trial_weights1[BLOCK_W * BLOCK_H]; + if (dual_plane) + { + eval_selectors_dual_plane(local_md.m_dp_channel, BLOCK_W * BLOCK_H, trial_weights0, trial_weights1, (basist::half_float*)&half_pixels[0][0][0], astc_helpers::get_ise_levels(local_md.m_weight_ise_range), &decoded_half[0][0], coptions, UINT32_MAX); + + downsample_ise_weights_dual_plane( + local_md.m_weight_ise_range, local_md.m_weight_ise_range, + BLOCK_W, BLOCK_H, + grid_x, grid_y, + trial_weights0, trial_weights1, coded_log_blk.m_weights); + } + else + { + eval_selectors(BLOCK_W * BLOCK_H, trial_weights0, local_md.m_weight_ise_range, (basist::half_float*)&half_pixels[0][0][0], astc_helpers::get_ise_levels(local_md.m_weight_ise_range), &decoded_half[0][0], coptions, UINT32_MAX); + + downsample_ise_weights( + local_md.m_weight_ise_range, local_md.m_weight_ise_range, + BLOCK_W, BLOCK_H, + grid_x, grid_y, + trial_weights0, coded_log_blk.m_weights); + } + + // Transcode these codable weights to ASTC weights. + uint8_t transcode_weights[MAX_BLOCK_W * MAX_BLOCK_H * 2]; + basist::astc_6x6_hdr::requantize_astc_weights(num_grid_samples * (dual_plane ? 2 : 1), coded_log_blk.m_weights, local_md.m_weight_ise_range, transcode_weights, local_md.m_transcode_weight_ise_range); + + // Create the block the decoder would transcode into. + astc_helpers::log_astc_block& decomp_blk = candidate.m_decomp_log_blk; + decomp_blk.clear(); + + decomp_blk.m_color_endpoint_modes[0] = (uint8_t)local_md.m_cem; + decomp_blk.m_dual_plane = local_md.m_dp; + decomp_blk.m_color_component_selector = (uint8_t)local_md.m_dp_channel; + decomp_blk.m_num_partitions = 1; + decomp_blk.m_endpoint_ise_range = (uint8_t)local_md.m_transcode_endpoint_ise_range; + decomp_blk.m_weight_ise_range = (uint8_t)local_md.m_transcode_weight_ise_range; + + memcpy(decomp_blk.m_endpoints, transcode_endpoints, num_endpoint_vals); + + copy_weight_grid(dual_plane, grid_x, grid_y, transcode_weights, decomp_blk); + + if (!validate_log_blk(decomp_blk)) + { + fmt_error_printf("pack_astc_block() failed\n"); + return false; + } + + status = decode_astc_block(BLOCK_W, BLOCK_H, decomp_blk, &candidate.m_comp_pixels[0][0]); + if (!status) + { + fmt_error_printf("decode_astc_block() failed\n"); + return false; + } + + candidate.m_coder.put_bits(BLOCK_CODE, BLOCK_CODE_LEN); + code_block(candidate.m_coder, candidate.m_coded_log_blk, (block_mode)block_mode_iter, em, nullptr); + + candidate.m_encoding_type = encoding_type::cBlock; + candidate.m_endpoint_mode = em; + candidate.m_block_mode = bm; + + candidates.emplace_back(std::move(candidate)); + + break; + } + case endpoint_mode::cUseLeftDelta: + case endpoint_mode::cUseUpperDelta: + { + const block_mode_desc& local_md = g_block_mode_descs[block_mode_iter]; + const uint32_t cem = local_md.m_cem; + + if (local_md.m_num_partitions > 1) + break; + + if ((em == endpoint_mode::cUseLeftDelta) && (!has_left_neighbor)) + break; + else if ((em == endpoint_mode::cUseUpperDelta) && (!has_upper_neighbor)) + break; + + candidate_encoding candidate; + candidate.m_coder.reserve(24); + astc_helpers::log_astc_block& coded_log_blk = candidate.m_coded_log_blk; + + int nx = bx, ny = by; + if (em == endpoint_mode::cUseLeftDelta) + nx--; + else + ny--; + + const candidate_encoding& neighbor_blk = enc_state.coded_blocks(nx, ny); + if (neighbor_blk.m_encoding_type == encoding_type::cSolid) + break; + assert((neighbor_blk.m_encoding_type == encoding_type::cBlock) || (neighbor_blk.m_encoding_type == encoding_type::cReuse)); + + const block_mode_desc& neighbor_md = g_block_mode_descs[(uint32_t)neighbor_blk.m_block_mode]; + + if (neighbor_md.m_cem != cem) + break; + + assert(neighbor_md.m_cem == local_md.m_cem); + + const uint32_t grid_x = local_md.m_grid_x, grid_y = local_md.m_grid_y; + const bool dual_plane = local_md.m_dp; + const uint32_t num_grid_samples = grid_x * grid_y; + const uint32_t num_endpoint_vals = get_num_endpoint_vals(local_md.m_cem); + + // Dequantize neighbor's endpoints to ISE 20 + uint8_t neighbor_endpoints_ise20[basist::NUM_MODE11_ENDPOINTS]; + basist::astc_6x6_hdr::requantize_ise_endpoints(neighbor_md.m_cem, + neighbor_blk.m_coded_log_blk.m_endpoint_ise_range, neighbor_blk.m_coded_log_blk.m_endpoints, + astc_helpers::BISE_256_LEVELS, neighbor_endpoints_ise20); + + // Requantize neighbor's endpoints to our local desired coding ISE range + uint8_t neighbor_endpoints_coding_ise_local[basist::NUM_MODE11_ENDPOINTS]; + basist::astc_6x6_hdr::requantize_ise_endpoints(neighbor_md.m_cem, astc_helpers::BISE_256_LEVELS, neighbor_endpoints_ise20, local_md.m_endpoint_ise_range, neighbor_endpoints_coding_ise_local); + + uint8_t blk_endpoints[basist::NUM_MODE11_ENDPOINTS]; + uint8_t blk_weights0[NUM_BLOCK_PIXELS], blk_weights1[NUM_BLOCK_PIXELS]; + + // Now try to encode the current block using the neighbor's endpoints submode. + double err = 0.0f; + uint32_t best_submode = 0; + + if (cem == 7) + { + int maj_index, submode_index; + decode_cem_7_config(neighbor_endpoints_ise20, submode_index, maj_index); + + int first_submode = submode_index, last_submode = submode_index; + + err = encode_astc_hdr_block_mode_7( + NUM_BLOCK_PIXELS, + (basist::half_float(*)[3])half_pixels, (vec4F*)block_pixels_q16, + local_md.m_weight_ise_range, + best_submode, + BIG_FLOAT_VAL, + blk_endpoints, blk_weights0, + coptions, + local_md.m_endpoint_ise_range, + first_submode, last_submode, + &enc_block_stats); + } + else + { + int maj_index, submode_index; + decode_cem_11_config(neighbor_endpoints_ise20, submode_index, maj_index); + + int first_submode = -1, last_submode = -1; + if (maj_index == 3) + { + // direct + } + else + { + first_submode = submode_index; + last_submode = submode_index; + } + + if (dual_plane) + { + err = encode_astc_hdr_block_mode_11_dual_plane( + NUM_BLOCK_PIXELS, + (basist::half_float(*)[3])half_pixels, (vec4F*)block_pixels_q16, + local_md.m_dp_channel, + local_md.m_weight_ise_range, + best_submode, + BIG_FLOAT_VAL, + blk_endpoints, blk_weights0, blk_weights1, + coptions, + false, + local_md.m_endpoint_ise_range, + false, //uber_mode_flag, + false, + first_submode, last_submode, true); + } + else + { + err = encode_astc_hdr_block_mode_11( + NUM_BLOCK_PIXELS, + (basist::half_float(*)[3])half_pixels, (vec4F*)block_pixels_q16, + local_md.m_weight_ise_range, + best_submode, + BIG_FLOAT_VAL, + blk_endpoints, blk_weights0, + coptions, + false, + local_md.m_endpoint_ise_range, + false, //uber_mode_flag, + false, + first_submode, last_submode, true, + mode11_opt_mode, + &enc_block_stats); + } + } + + if (err == BIG_FLOAT_VAL) + break; + + uint8_t endpoint_deltas[basist::NUM_MODE11_ENDPOINTS]; + + // TODO: For now, just try 5 bits for each endpoint. Can tune later. + // This isn't right, it's computing the deltas in ISE space. + //const uint32_t NUM_ENDPOINT_DELTA_BITS = 5; + const int total_endpoint_delta_vals = 1 << NUM_ENDPOINT_DELTA_BITS; + const int low_delta_limit = -(total_endpoint_delta_vals / 2), high_delta_limit = (total_endpoint_delta_vals / 2) - 1; + + const auto& ise_to_rank = astc_helpers::g_dequant_tables.get_endpoint_tab(local_md.m_endpoint_ise_range).m_ISE_to_rank; + + bool all_deltas_in_limits = true; + for (uint32_t i = 0; i < num_endpoint_vals; i++) + { + int endpoint_delta = (int)ise_to_rank[blk_endpoints[i]] - (int)ise_to_rank[neighbor_endpoints_coding_ise_local[i]]; + + if ((endpoint_delta < low_delta_limit) || (endpoint_delta > high_delta_limit)) + all_deltas_in_limits = false; + + endpoint_deltas[i] = (uint8_t)(endpoint_delta + -low_delta_limit); + } + + if (all_deltas_in_limits) + { + coded_log_blk.m_grid_width = (uint8_t)grid_x; + coded_log_blk.m_grid_height = (uint8_t)grid_y; + coded_log_blk.m_dual_plane = (uint8_t)dual_plane; + coded_log_blk.m_color_component_selector = (uint8_t)local_md.m_dp_channel; + coded_log_blk.m_num_partitions = 1; + coded_log_blk.m_color_endpoint_modes[0] = (uint8_t)local_md.m_cem; + coded_log_blk.m_weight_ise_range = (uint8_t)local_md.m_weight_ise_range; + coded_log_blk.m_endpoint_ise_range = (uint8_t)local_md.m_endpoint_ise_range; + + memcpy(coded_log_blk.m_endpoints, blk_endpoints, num_endpoint_vals); + + uint8_t transcode_endpoints[basist::NUM_MODE11_ENDPOINTS]; + uint8_t transcode_weights[MAX_BLOCK_W * MAX_BLOCK_H * 2]; + + basist::astc_6x6_hdr::requantize_ise_endpoints(local_md.m_cem, local_md.m_endpoint_ise_range, blk_endpoints, local_md.m_transcode_endpoint_ise_range, transcode_endpoints); + + if (dual_plane) + { + downsample_ise_weights_dual_plane( + local_md.m_weight_ise_range, local_md.m_weight_ise_range, + BLOCK_W, BLOCK_H, + grid_x, grid_y, + blk_weights0, blk_weights1, + coded_log_blk.m_weights); + } + else + { + downsample_ise_weights( + local_md.m_weight_ise_range, local_md.m_weight_ise_range, + BLOCK_W, BLOCK_H, + grid_x, grid_y, + blk_weights0, coded_log_blk.m_weights); + } + + basist::astc_6x6_hdr::requantize_astc_weights(num_grid_samples * (dual_plane ? 2 : 1), coded_log_blk.m_weights, local_md.m_weight_ise_range, transcode_weights, local_md.m_transcode_weight_ise_range); + + // Create the block the decoder would transcode into. + + astc_helpers::log_astc_block& decomp_blk = candidate.m_decomp_log_blk; + decomp_blk.clear(); + + decomp_blk.m_color_endpoint_modes[0] = (uint8_t)local_md.m_cem; + decomp_blk.m_dual_plane = local_md.m_dp; + decomp_blk.m_color_component_selector = (uint8_t)local_md.m_dp_channel; + decomp_blk.m_num_partitions = 1; + decomp_blk.m_endpoint_ise_range = (uint8_t)local_md.m_transcode_endpoint_ise_range; + decomp_blk.m_weight_ise_range = (uint8_t)local_md.m_transcode_weight_ise_range; + + memcpy(decomp_blk.m_endpoints, transcode_endpoints, num_endpoint_vals); + + copy_weight_grid(dual_plane, grid_x, grid_y, transcode_weights, decomp_blk); + + if (!validate_log_blk(decomp_blk)) + { + fmt_error_printf("pack_astc_block() failed\n"); + return false; + } + + status = decode_astc_block(BLOCK_W, BLOCK_H, decomp_blk, &candidate.m_comp_pixels[0][0]); + if (!status) + { + fmt_error_printf("decode_astc_block() failed\n"); + return false; + } + + candidate.m_coder.put_bits(BLOCK_CODE, BLOCK_CODE_LEN); + code_block(candidate.m_coder, candidate.m_coded_log_blk, bm, em, endpoint_deltas); + + candidate.m_encoding_type = encoding_type::cBlock; + candidate.m_endpoint_mode = em; + candidate.m_block_mode = bm; + + candidates.emplace_back(std::move(candidate)); + } + + break; + } + case endpoint_mode::cRaw: + { + //if (candidates.size() == 339) + // fmt_printf("!"); + + const auto& mode_desc = g_block_mode_descs[(uint32_t)bm]; + const uint32_t cem = mode_desc.m_cem; + //const uint32_t num_endpoint_vals = get_num_endpoint_vals(cem); + const bool dual_plane = mode_desc.m_dp; + + if ((global_cfg.m_disable_twothree_subsets) && (mode_desc.m_num_partitions >= 2)) + break; + + if (mode_desc.m_num_partitions == 3) + { + assert(!dual_plane); + + if (!has_estimated_parts3) + break; + + assert(mode_desc.m_weight_ise_range == mode_desc.m_transcode_weight_ise_range); + assert(mode_desc.m_endpoint_ise_range == mode_desc.m_transcode_endpoint_ise_range); + + trial_result res; + + status = encode_block_3_subsets( + res, + cem, + mode_desc.m_grid_x, mode_desc.m_grid_y, + mode_desc.m_weight_ise_range, mode_desc.m_endpoint_ise_range, + &half_pixels[0][0], (vec4F*)block_pixels_q16, + coptions, + uber_mode_flag, + best_parts3, total_parts3, comp_level, mode11_opt_mode); + + if (!status) + break; + + assert(res.m_valid); + + candidate_encoding candidate; + candidate.m_coder.reserve(24); + astc_helpers::log_astc_block& coded_log_blk = candidate.m_coded_log_blk; + + coded_log_blk = res.m_log_blk; + + astc_helpers::log_astc_block& decomp_blk = candidate.m_decomp_log_blk; + decomp_blk = res.m_log_blk; + + if (!validate_log_blk(decomp_blk)) + { + fmt_error_printf("pack_astc_block() failed\n"); + return false; + } + + status = decode_astc_block(BLOCK_W, BLOCK_H, decomp_blk, &candidate.m_comp_pixels[0][0]); + if (!status) + { + fmt_error_printf("decode_astc_block() failed\n"); + return false; + } + + candidate.m_coder.put_bits(BLOCK_CODE, BLOCK_CODE_LEN); + code_block(candidate.m_coder, candidate.m_coded_log_blk, bm, em, nullptr); + + candidate.m_encoding_type = encoding_type::cBlock; + candidate.m_endpoint_mode = em; + candidate.m_block_mode = bm; + + candidates.emplace_back(std::move(candidate)); + } + else if (mode_desc.m_num_partitions == 2) + { + assert(!dual_plane); + + if (!has_estimated_parts2) + break; + + assert(mode_desc.m_weight_ise_range == mode_desc.m_transcode_weight_ise_range); + assert(mode_desc.m_endpoint_ise_range == mode_desc.m_transcode_endpoint_ise_range); + + for (uint32_t est_part_iter = 0; est_part_iter < total_parts2; est_part_iter++) + { + trial_result results[2]; + + assert(((cem == 11) && any_2subset_mode11_enabled) || ((cem == 7) && any_2subset_mode7_enabled)); + + status = encode_block_2_subsets( + results, + mode_desc.m_grid_x, mode_desc.m_grid_y, + mode_desc.m_cem, + mode_desc.m_weight_ise_range, mode_desc.m_endpoint_ise_range, + &half_pixels[0][0], (vec4F*)block_pixels_q16, + coptions, + uber_mode_flag, + (cem == 11) ? best_parts2_mode11[est_part_iter] : best_parts2_mode7[est_part_iter], + comp_level, + mode11_opt_mode, + true); + + if (!status) + continue; + + for (uint32_t r_iter = 0; r_iter < 2; r_iter++) + { + const trial_result& res = results[r_iter]; + + if (!res.m_valid) + continue; + + candidate_encoding candidate; + candidate.m_coder.reserve(24); + astc_helpers::log_astc_block& coded_log_blk = candidate.m_coded_log_blk; + + coded_log_blk = res.m_log_blk; + + astc_helpers::log_astc_block& decomp_blk = candidate.m_decomp_log_blk; + decomp_blk = res.m_log_blk; + + if (!validate_log_blk(decomp_blk)) + { + fmt_error_printf("pack_astc_block() failed\n"); + return false; + } + + status = decode_astc_block(BLOCK_W, BLOCK_H, decomp_blk, &candidate.m_comp_pixels[0][0]); + if (!status) + { + fmt_error_printf("decode_astc_block() failed\n"); + return false; + } + + candidate.m_coder.put_bits(BLOCK_CODE, BLOCK_CODE_LEN); + code_block(candidate.m_coder, candidate.m_coded_log_blk, bm, em, nullptr); + + candidate.m_encoding_type = encoding_type::cBlock; + candidate.m_endpoint_mode = em; + candidate.m_block_mode = bm; + + candidates.emplace_back(std::move(candidate)); + + } // r_iter + } + } + else + { + // 1 subset + uint8_t blk_weights0[BLOCK_W * BLOCK_H], blk_weights1[BLOCK_W * BLOCK_H]; + uint32_t best_submode = 0; + + candidate_encoding candidate; + candidate.m_coder.reserve(24); + astc_helpers::log_astc_block& coded_log_blk = candidate.m_coded_log_blk; + + const uint32_t grid_x = mode_desc.m_grid_x, grid_y = mode_desc.m_grid_y; + const uint32_t num_grid_samples = grid_x * grid_y; + + const half_vec3* pBlock_pixels_half = &half_pixels[0][0]; + const vec4F* pBlock_pixels_q16 = &block_pixels_q16[0][0]; + + const uint32_t num_grid_samples_dp = num_grid_samples * (dual_plane ? 2 : 1); + + uint8_t transcode_weights[MAX_BLOCK_W * MAX_BLOCK_H * 2]; + + coded_log_blk.m_grid_width = (uint8_t)grid_x; + coded_log_blk.m_grid_height = (uint8_t)grid_y; + coded_log_blk.m_dual_plane = (uint8_t)dual_plane; + coded_log_blk.m_color_component_selector = (uint8_t)mode_desc.m_dp_channel; + coded_log_blk.m_num_partitions = 1; + coded_log_blk.m_color_endpoint_modes[0] = (uint8_t)mode_desc.m_cem; + coded_log_blk.m_weight_ise_range = (uint8_t)mode_desc.m_weight_ise_range; + coded_log_blk.m_endpoint_ise_range = (uint8_t)mode_desc.m_endpoint_ise_range; + + if ((cem == 11) && (!dual_plane) && ((grid_x < BLOCK_W) || (grid_y < BLOCK_H))) + { + double e = encode_astc_hdr_block_downsampled_mode_11( + BLOCK_W, BLOCK_H, grid_x, grid_y, + mode_desc.m_weight_ise_range, mode_desc.m_endpoint_ise_range, + NUM_BLOCK_PIXELS, (basist::half_float(*)[3])pBlock_pixels_half, pBlock_pixels_q16, + BIG_FLOAT_VAL, + FIRST_MODE11_SUBMODE_INDEX, MAX_MODE11_SUBMODE_INDEX, false, mode11_opt_mode, + coded_log_blk.m_endpoints, coded_log_blk.m_weights, best_submode, + coptions, + &enc_block_stats); + + if (e == BIG_FLOAT_VAL) + break; + } + else + { + if (cem == 7) + { + assert(!dual_plane); + + double e = encode_astc_hdr_block_mode_7( + NUM_BLOCK_PIXELS, + (basist::half_float(*)[3])pBlock_pixels_half, pBlock_pixels_q16, + mode_desc.m_weight_ise_range, + best_submode, + BIG_FLOAT_VAL, + coded_log_blk.m_endpoints, + blk_weights0, + coptions, + mode_desc.m_endpoint_ise_range, + 0, MAX_MODE7_SUBMODE_INDEX, + &enc_block_stats); + BASISU_NOTE_UNUSED(e); + } + else + { + double e; + + if (dual_plane) + { + e = encode_astc_hdr_block_mode_11_dual_plane( + NUM_BLOCK_PIXELS, + (basist::half_float(*)[3])pBlock_pixels_half, pBlock_pixels_q16, + mode_desc.m_dp_channel, + mode_desc.m_weight_ise_range, + best_submode, + BIG_FLOAT_VAL, + coded_log_blk.m_endpoints, + blk_weights0, blk_weights1, + coptions, + false, + mode_desc.m_endpoint_ise_range, uber_mode_flag, false, -1, 7, false); + } + else + { + e = encode_astc_hdr_block_mode_11( + NUM_BLOCK_PIXELS, + (basist::half_float(*)[3])pBlock_pixels_half, pBlock_pixels_q16, + mode_desc.m_weight_ise_range, + best_submode, + BIG_FLOAT_VAL, + coded_log_blk.m_endpoints, + blk_weights0, + coptions, + false, + mode_desc.m_endpoint_ise_range, uber_mode_flag, false, -1, 7, false, + mode11_opt_mode, + &enc_block_stats); + } + + if (e == BIG_FLOAT_VAL) + break; + } + + if (dual_plane) + { + downsample_ise_weights_dual_plane( + mode_desc.m_weight_ise_range, mode_desc.m_weight_ise_range, + BLOCK_W, BLOCK_H, + grid_x, grid_y, + blk_weights0, blk_weights1, + coded_log_blk.m_weights); + } + else + { + downsample_ise_weights( + mode_desc.m_weight_ise_range, mode_desc.m_weight_ise_range, + BLOCK_W, BLOCK_H, + grid_x, grid_y, + blk_weights0, coded_log_blk.m_weights); + + if ((comp_level >= MIN_REFINE_LEVEL) && ((grid_x < BLOCK_W) || (grid_y < BLOCK_H))) + { + bool refine_status = refine_endpoints(cem, + mode_desc.m_endpoint_ise_range, coded_log_blk.m_endpoints, + 6, 6, mode_desc.m_grid_x, mode_desc.m_grid_y, + coded_log_blk.m_weights, mode_desc.m_weight_ise_range, + BLOCK_W * BLOCK_H, + (basist::half_float(*)[3])pBlock_pixels_half, (vec4F*)pBlock_pixels_q16, + nullptr, + coptions, mode11_opt_mode); + BASISU_NOTE_UNUSED(refine_status); + } + } + } + + basist::astc_6x6_hdr::requantize_astc_weights(num_grid_samples_dp, coded_log_blk.m_weights, mode_desc.m_weight_ise_range, transcode_weights, mode_desc.m_transcode_weight_ise_range); + + // Create the block the decoder would transcode into. + astc_helpers::log_astc_block& decomp_blk = candidate.m_decomp_log_blk; + decomp_blk.clear(); + + decomp_blk.m_color_endpoint_modes[0] = (uint8_t)mode_desc.m_cem; + decomp_blk.m_dual_plane = mode_desc.m_dp; + decomp_blk.m_color_component_selector = (uint8_t)mode_desc.m_dp_channel; + decomp_blk.m_num_partitions = 1; + decomp_blk.m_endpoint_ise_range = (uint8_t)mode_desc.m_transcode_endpoint_ise_range; + decomp_blk.m_weight_ise_range = (uint8_t)mode_desc.m_transcode_weight_ise_range; + + basist::astc_6x6_hdr::requantize_ise_endpoints(mode_desc.m_cem, mode_desc.m_endpoint_ise_range, coded_log_blk.m_endpoints, mode_desc.m_transcode_endpoint_ise_range, decomp_blk.m_endpoints); + + copy_weight_grid(dual_plane, grid_x, grid_y, transcode_weights, decomp_blk); + + if (!validate_log_blk(decomp_blk)) + { + fmt_error_printf("pack_astc_block() failed\n"); + return false; + } + + status = decode_astc_block(BLOCK_W, BLOCK_H, decomp_blk, &candidate.m_comp_pixels[0][0]); + if (!status) + { + fmt_error_printf("decode_astc_block() failed\n"); + return false; + } + + candidate.m_coder.put_bits(BLOCK_CODE, BLOCK_CODE_LEN); + code_block(candidate.m_coder, candidate.m_coded_log_blk, bm, em, nullptr); + + candidate.m_encoding_type = encoding_type::cBlock; + candidate.m_endpoint_mode = em; + candidate.m_block_mode = bm; + + candidates.emplace_back(std::move(candidate)); + } + + break; + } + default: + assert(0); + fmt_debug_printf("Invalid endpoint mode\n"); + return false; + + } // switch (em) + + } // endpoint_mode_iter + + } // block_mode_iter + + } // is_solid_block + + //------------------------------------------------ + + debug_state.m_total_candidates_considered.fetch_add(candidates.size_u32(), std::memory_order_relaxed); + atomic_max(debug_state.m_max_candidates_considered, candidates.size_u32()); + + for (uint32_t candidate_iter = 0; candidate_iter < candidates.size_u32(); candidate_iter++) + { + auto& candidate = candidates[candidate_iter]; + + for (uint32_t y = 0; y < BLOCK_H; y++) + for (uint32_t x = 0; x < BLOCK_W; x++) + linear_rgb_to_itp(candidate.m_comp_pixels[y][x], candidate.m_comp_pixels_itp[y][x], global_cfg); + } + + // Find best overall candidate + double best_t = BIG_FLOAT_VAL; + int best_candidate_index = -1; + + float best_d_ssim = BIG_FLOAT_VAL; + + if (global_cfg.m_lambda == 0.0f) + { + for (uint32_t candidate_iter = 0; candidate_iter < candidates.size_u32(); candidate_iter++) + { + const auto& candidate = candidates[candidate_iter]; + + float candidate_d_ssim = 1.0f - compute_block_ssim_itp(BLOCK_W, BLOCK_H, &block_pixels_as_itp[0][0], &candidate.m_comp_pixels_itp[0][0]); + + if (candidate_d_ssim < best_d_ssim) + best_d_ssim = candidate_d_ssim; + + candidate_d_ssim *= SSIM_WEIGHT; + + float candidate_mse = MSE_WEIGHT * compute_block_mse_itp(BLOCK_W, BLOCK_H, &block_pixels_as_itp[0][0], &candidate.m_comp_pixels_itp[0][0], global_cfg.m_delta_itp_dark_adjustment); + + candidate_mse += candidate_d_ssim; + + float total_deblock_penalty = 0.0f; + if (global_cfg.m_deblocking_flag) + { + total_deblock_penalty = calc_deblocking_penalty_itp(bx, by, width, height, pass_src_img_itp, candidate) * global_cfg.m_deblock_penalty_weight; + } + candidate_mse += total_deblock_penalty * SSIM_WEIGHT; + + if ((candidate.m_encoding_type == encoding_type::cBlock) || (candidate.m_encoding_type == encoding_type::cReuse)) + { + // Bias the encoder away from 2 level blocks on complex blocks + // TODO: Perhaps only do this on large or non-interpolated grids + if (complex_block) + { + if (candidate.m_coded_log_blk.m_weight_ise_range == astc_helpers::BISE_2_LEVELS) + { + candidate_mse *= TWO_LEVEL_PENALTY; + } + } + + // Bias the encoder away from smaller weight grids if the block is very complex + // TODO: Use the DCT to compute an approximation of the block energy/variance retained vs. lost by downsampling. + if (complex_block) + { + if ((candidate.m_coded_log_blk.m_grid_width == 2) && (candidate.m_coded_log_blk.m_grid_height == 2)) + candidate_mse *= COMPLEX_BLOCK_WEIGHT_GRID_2X2_MSE_PENALTY; + else if (minimum(candidate.m_coded_log_blk.m_grid_width, candidate.m_coded_log_blk.m_grid_height) <= 3) + candidate_mse *= COMPLEX_BLOCK_WEIGHT_GRID_3X3_MSE_PENALTY; + else if (minimum(candidate.m_coded_log_blk.m_grid_width, candidate.m_coded_log_blk.m_grid_height) <= 4) + candidate_mse *= COMPLEX_BLOCK_WEIGHT_GRID_4X4_MSE_PENALTY; + } + } + + float candidate_t = candidate_mse; + + if (candidate_t < best_t) + { + best_t = candidate_t; + best_candidate_index = candidate_iter; + } + + } // candidate_iter + + if (global_cfg.m_gaussian1_fallback && (outer_pass == 0) && (very_complex_block) && (best_d_ssim > SWITCH_TO_GAUSSIAN_FILTERED_THRESH1_D_SSIM)) + { + debug_state.m_total_gaussian1_blocks.fetch_add(1, std::memory_order_relaxed); + continue; + } + + const float block_y_contrast_ratio = block_hy / (block_ly + .00000125f); + + if (global_cfg.m_gaussian2_fallback && (comp_level >= 1) && (outer_pass == 1) && (very_complex_block) && (best_d_ssim > SWITCH_TO_GAUSSIAN_FILTERED_THRESH2_D_SSIM) && + (block_hy >= 18.0f) && (block_y_contrast_ratio > 150.0f) && + (block_avg_y >= 1.5f)) + { + debug_state.m_total_gaussian2_blocks.fetch_add(1, std::memory_order_relaxed); + continue; + } + } + else + { + assert(enc_state.smooth_block_mse_scales.get_width() > 0); + + // Compute block's perceptual weighting + float perceptual_scale = 0.0f; + for (uint32_t y = 0; y < BLOCK_H; y++) + for (uint32_t x = 0; x < BLOCK_W; x++) + perceptual_scale = basisu::maximumf(perceptual_scale, enc_state.smooth_block_mse_scales.at_clamped(bx * BLOCK_W + x, by * BLOCK_H + y)); + + // Very roughly normalize the computed distortion vs. bits. + perceptual_scale *= 10.0f; + + for (uint32_t candidate_iter = 0; candidate_iter < candidates.size_u32(); candidate_iter++) + { + auto& candidate = candidates[candidate_iter]; + + float d_ssim = 1.0f - compute_block_ssim_itp(BLOCK_W, BLOCK_H, &block_pixels_as_itp[0][0], &candidate.m_comp_pixels_itp[0][0]); + + if (d_ssim < best_d_ssim) + best_d_ssim = (float)d_ssim; + + d_ssim *= SSIM_WEIGHT; + + float candidate_mse = MSE_WEIGHT * compute_block_mse_itp(BLOCK_W, BLOCK_H, &block_pixels_as_itp[0][0], &candidate.m_comp_pixels_itp[0][0], global_cfg.m_delta_itp_dark_adjustment); + + candidate_mse += d_ssim; + + float total_deblock_penalty = 0.0f; + if (global_cfg.m_deblocking_flag) + { + total_deblock_penalty = calc_deblocking_penalty_itp(bx, by, width, height, pass_src_img_itp, candidate) * global_cfg.m_deblock_penalty_weight; + } + candidate_mse += total_deblock_penalty * SSIM_WEIGHT; + + if ((candidate.m_encoding_type == encoding_type::cBlock) || (candidate.m_encoding_type == encoding_type::cReuse)) + { + // Bias the encoder away from 2 level blocks on complex blocks + if (complex_block) + { + if (candidate.m_coded_log_blk.m_weight_ise_range == astc_helpers::BISE_2_LEVELS) + { + candidate_mse *= TWO_LEVEL_PENALTY; + } + } + + // Bias the encoder away from smaller weight grids if the block is very complex + if (complex_block) + { + if ((candidate.m_coded_log_blk.m_grid_width == 2) && (candidate.m_coded_log_blk.m_grid_height == 2)) + candidate_mse *= COMPLEX_BLOCK_WEIGHT_GRID_2X2_MSE_PENALTY; + else if (minimum(candidate.m_coded_log_blk.m_grid_width, candidate.m_coded_log_blk.m_grid_height) <= 3) + candidate_mse *= COMPLEX_BLOCK_WEIGHT_GRID_3X3_MSE_PENALTY; + else if (minimum(candidate.m_coded_log_blk.m_grid_width, candidate.m_coded_log_blk.m_grid_height) <= 4) + candidate_mse *= COMPLEX_BLOCK_WEIGHT_GRID_4X4_MSE_PENALTY; + } + } + + float mode_penalty = 1.0f; + if (candidate.m_encoding_type == encoding_type::cSolid) + mode_penalty *= SOLID_PENALTY; + else if (candidate.m_encoding_type == encoding_type::cReuse) + mode_penalty *= REUSE_PENALTY; + else if (candidate.m_encoding_type == encoding_type::cRun) + mode_penalty *= (complex_block ? RUN_PENALTY * 2.0f : RUN_PENALTY); + + float candidate_bits = (float)candidate.m_coder.get_total_bits(); + float candidate_d = candidate_mse * mode_penalty; + + const float D_POWER = 2.0f; + float candidate_t = perceptual_scale * powf(candidate_d, D_POWER) + candidate_bits * (global_cfg.m_lambda * 1000.0f); + + candidate.m_t = candidate_t; + candidate.m_d = candidate_d; + candidate.m_bits = candidate_bits; + + if (candidate_t < best_t) + { + best_t = candidate_t; + best_candidate_index = candidate_iter; + } + + } // candidate_iter + + if (global_cfg.m_gaussian1_fallback && (outer_pass == 0) && (very_complex_block) && (best_d_ssim > SWITCH_TO_GAUSSIAN_FILTERED_THRESH1_D_SSIM)) + { + debug_state.m_total_gaussian1_blocks.fetch_add(1, std::memory_order_relaxed); + continue; + } + + const float block_y_contrast_ratio = block_hy / (block_ly + .00000125f); + + if (global_cfg.m_gaussian2_fallback && (comp_level >= 1) && (outer_pass == 1) && (very_complex_block) && (best_d_ssim > SWITCH_TO_GAUSSIAN_FILTERED_THRESH2_D_SSIM) && + (block_hy >= 18.0f) && (block_y_contrast_ratio > 150.0f) && + (block_avg_y >= 1.5f)) + { + debug_state.m_total_gaussian2_blocks.fetch_add(1, std::memory_order_relaxed); + continue; + } + + if (global_cfg.m_rdo_candidate_diversity_boost) + { + // candidate diversity boosting - consider candidates along/near the Pareto front + const candidate_encoding& comp_candidate = candidates[best_candidate_index]; + + float best_d = BIG_FLOAT_VAL; + + for (uint32_t candidate_iter = 0; candidate_iter < candidates.size_u32(); candidate_iter++) + { + const auto& candidate = candidates[candidate_iter]; + + if (candidate.m_bits <= comp_candidate.m_bits * global_cfg.m_rdo_candidate_diversity_boost_bit_window_weight) + { + if (candidate.m_d < best_d) + { + best_d = candidate.m_d; + best_candidate_index = candidate_iter; + } + } + } + } + + // candidate JND optimization - if there's a cheaper to code candidate that is nearly equivalent visually to the best candidate chose, choose that + if (global_cfg.m_jnd_optimization) + { + const candidate_encoding& cur_comp_candidate = candidates[best_candidate_index]; + + float new_best_candidate_bits = BIG_FLOAT_VAL; + int new_best_candidate_index = -1; + + for (uint32_t candidate_iter = 0; candidate_iter < candidates.size_u32(); candidate_iter++) + { + if ((int)candidate_iter == best_candidate_index) + continue; + + const auto& candidate = candidates[candidate_iter]; + + if (candidate.m_bits >= cur_comp_candidate.m_bits) + continue; + + float max_delta_itp = 0.0f; + for (uint32_t y = 0; y < BLOCK_H; y++) + { + for (uint32_t x = 0; x < BLOCK_W; x++) + { + float delta_itp = compute_pixel_delta_itp(cur_comp_candidate.m_comp_pixels_itp[y][x], candidate.m_comp_pixels_itp[y][x], block_pixels_as_itp[y][x], global_cfg.m_delta_itp_dark_adjustment); + max_delta_itp = maximum(max_delta_itp, delta_itp); + + if (max_delta_itp >= global_cfg.m_jnd_delta_itp_thresh) + goto skip; + } + } + + skip: + if (max_delta_itp >= global_cfg.m_jnd_delta_itp_thresh) + continue; + + if (candidate.m_bits < new_best_candidate_bits) + { + new_best_candidate_bits = candidate.m_bits; + new_best_candidate_index = candidate_iter; + } + } + + if (new_best_candidate_index != -1) + { + best_candidate_index = new_best_candidate_index; + debug_state.m_total_jnd_replacements.fetch_add(1, std::memory_order_relaxed); + } + } + + } // if (lambda == 0.0f) + + if (global_cfg.m_debug_images) + { + std::lock_guard lck(debug_state.m_stat_vis_mutex); + debug_state.m_stat_vis.fill_box(bx * 6, by * 6, 6, 6, vec4F(best_d_ssim, max_std_dev, lowpass_std_dev, 1.0f)); + } + + if (best_candidate_index < 0) + { + assert(best_candidate_index >= 0); + fmt_error_printf("No candidates!\n"); + return false; + } + + const auto& best_candidate = candidates[best_candidate_index]; + + assert(best_candidate.m_encoding_type != encoding_type::cInvalid); + + if (best_candidate.m_encoding_type == encoding_type::cRun) + { + if (!prev_run_len) + { + if (prev_encoding.get_total_bits()) + { +#if SYNC_MARKERS + strip_coded_bits.put_bits(0xDEAD, 16); +#endif + + strip_coded_bits.append(prev_encoding); + } + + assert(best_candidate.m_coder.get_total_bits()); + + prev_encoding = best_candidate.m_coder; + + prev_run_len = 1; + } + else + { + prev_run_len++; + + const uint32_t prev_run_bits = prev_encoding.get_total_bits_u32(); + assert(prev_run_bits); + BASISU_NOTE_UNUSED(prev_run_bits); + + const uint32_t num_dummy_bits = best_candidate.m_coder.get_total_bits_u32(); + BASISU_NOTE_UNUSED(num_dummy_bits); + + // Rewrite the previous encoding to extend the run length. + prev_encoding.restart(); + prev_encoding.put_bits(RUN_CODE, RUN_CODE_LEN); + prev_encoding.put_vlc(prev_run_len - 1, 5); + + assert(prev_encoding.get_total_bits() == prev_run_bits + num_dummy_bits); + } + } + else + { + if (prev_encoding.get_total_bits()) + { +#if SYNC_MARKERS + strip_coded_bits.put_bits(0xDEAD, 16); +#endif + + strip_coded_bits.append(prev_encoding); + } + + prev_encoding = best_candidate.m_coder; + prev_run_len = 0; + } + + memcpy(prev_comp_pixels, best_candidate.m_comp_pixels, sizeof(vec3F) * BLOCK_W * BLOCK_H); + + prev_candidate_encoding = best_candidate; + + if (best_candidate.m_encoding_type != encoding_type::cRun) + prev_non_run_candidate_encoding = best_candidate; + + { + std::lock_guard lck(debug_state.m_stats_mutex); + + debug_state.m_encoding_type_hist[(uint32_t)best_candidate.m_encoding_type]++; + + if (best_candidate.m_encoding_type == encoding_type::cBlock) + { + debug_state.m_endpoint_mode_hist[(uint32_t)best_candidate.m_endpoint_mode]++; + } + + if ((best_candidate.m_encoding_type == encoding_type::cReuse) || (best_candidate.m_encoding_type == encoding_type::cBlock)) + { + const uint32_t bm_index = (uint32_t)best_candidate.m_block_mode; + assert(bm_index < (uint32_t)block_mode::cBMTotalModes); + + debug_state.m_block_mode_hist[bm_index]++; + debug_state.m_block_mode_total_bits[bm_index] += best_candidate.m_coder.get_total_bits(); + + for (uint32_t i = 0; i < 3; i++) + { + debug_state.m_block_mode_comp_stats[bm_index][i].push_back(half_comp_stats[i]); + debug_state.m_block_mode_comparative_stats[bm_index][i].push_back(half_cross_chan_stats[i]); + } + } + + if (best_candidate.m_encoding_type == encoding_type::cReuse) + { + debug_state.m_reuse_num_parts[best_candidate.m_coded_log_blk.m_num_partitions].fetch_add(1, std::memory_order_relaxed); + + if (best_candidate.m_coded_log_blk.m_dual_plane) + debug_state.m_reuse_total_dp.fetch_add(1, std::memory_order_relaxed); + } + } + + enc_state.coded_blocks(bx, by) = prev_non_run_candidate_encoding; + + // Update decoded image + vec4F decoded_float_pixels[BLOCK_H][BLOCK_W]; + for (uint32_t y = 0; y < BLOCK_H; y++) + for (uint32_t x = 0; x < BLOCK_W; x++) + decoded_float_pixels[y][x] = best_candidate.m_comp_pixels[y][x]; + + enc_state.packed_img.set_block_clipped((vec4F*)decoded_float_pixels, bx * BLOCK_W, by * BLOCK_H, BLOCK_W, BLOCK_H); + + status = astc_helpers::pack_astc_block(enc_state.final_astc_blocks(bx, by), best_candidate.m_decomp_log_blk, nullptr, nullptr); + if (!status) + { + fmt_error_printf("Failed packing block\n"); + return false; + } + + const uint32_t r = debug_state.m_total_blocks_compressed.fetch_add(1, std::memory_order_relaxed); + if ((r & 2047) == 2047) + { + if (global_cfg.m_status_output) + { + basisu::fmt_printf("{} of {} total blocks compressed, {3.2}%\n", r, total_blocks, (r * 100.0f) / total_blocks); + } + } + + if ((global_cfg.m_debug_images) && + ((best_candidate.m_encoding_type != encoding_type::cRun) && (best_candidate.m_encoding_type != encoding_type::cSolid))) + { + std::lock_guard lck(debug_state.m_vis_image_mutex); + + if (best_candidate.m_decomp_log_blk.m_num_partitions == 2) + { + const int part2_unique_index = g_part2_seed_to_unique_index[best_candidate.m_decomp_log_blk.m_partition_id]; + assert((part2_unique_index >= 0) && (part2_unique_index < (int)NUM_UNIQUE_PARTITIONS2)); + + const partition_pattern_vec& pat = g_partitions2[part2_unique_index]; + + for (uint32_t y = 0; y < 6; y++) + { + for (uint32_t x = 0; x < 6; x++) + { + const uint32_t p = pat[x + y * 6]; + debug_state.m_part_vis.set_clipped(bx * 6 + x, by * 6 + y, color_rgba(p ? 100 : 0, 128, p ? 100 : 0, 255)); + } // x + } // y + } + else if (best_candidate.m_decomp_log_blk.m_num_partitions == 3) + { + //part_vis.fill_box(bx * 6, by * 6, 6, 6, color_rgba(0, 0, 255, 255)); + + const int part3_unique_index = g_part3_seed_to_unique_index[best_candidate.m_decomp_log_blk.m_partition_id]; + assert((part3_unique_index >= 0) && (part3_unique_index < (int)NUM_UNIQUE_PARTITIONS3)); + + const partition_pattern_vec& pat = g_partitions3[part3_unique_index]; + + for (uint32_t y = 0; y < 6; y++) + { + for (uint32_t x = 0; x < 6; x++) + { + const uint32_t p = pat[x + y * 6]; + color_rgba c(0, 0, 150, 255); + if (p == 1) + c.set(100, 0, 150, 255); + else if (p == 2) + c.set(0, 100, 150, 255); + debug_state.m_part_vis.set_clipped(bx * 6 + x, by * 6 + y, c); + } // x + } // y + } + else if (best_candidate.m_decomp_log_blk.m_dual_plane) + { + debug_state.m_part_vis.fill_box(bx * 6, by * 6, 6, 6, color_rgba(255, 0, 255, 255)); + } + else + { + debug_state.m_part_vis.fill_box(bx * 6, by * 6, 6, 6, color_rgba(255, 0, 0, 255)); + } + + color_rgba c; + c.set((best_candidate.m_coded_log_blk.m_grid_width * best_candidate.m_coded_log_blk.m_grid_height * 255 + 18) / 36); + debug_state.m_grid_vis.fill_box(bx * 6, by * 6, 6, 6, c); + + c.set(0, 0, 0, 255); + if (complex_block) + c[0] = 255; + + if (very_complex_block) + c[1] = 255; + + if (outer_pass == 2) + c[2] = 255; + else if (outer_pass == 1) + c[2] = 128; + + debug_state.m_mode_vis.fill_box(bx * 6, by * 6, 6, 6, c); + + c.set(0, 255, 0, 255); + if (best_candidate.m_coded_log_blk.m_color_endpoint_modes[0] == 7) + c.set(255, 0, 0, 255); + debug_state.m_mode_vis2.fill_box(bx * 6, by * 6, 6, 6, c); + + switch (best_candidate.m_encoding_type) + { + case encoding_type::cRun: + c.set(0, 0, 0, 255); + break; + case encoding_type::cSolid: + c.set(128, 128, 128, 255); // dark grey + break; + case encoding_type::cReuse: + c.set(255, 255, 0, 255); // yellow + break; + case encoding_type::cBlock: + { + switch (best_candidate.m_endpoint_mode) + { + case endpoint_mode::cRaw: + c.set(255, 0, 0, 255); // red + break; + case endpoint_mode::cUseLeft: + c.set(0, 0, 255, 255); // blue + break; + case endpoint_mode::cUseUpper: + c.set(0, 0, 192, 255); // darker blue + break; + case endpoint_mode::cUseLeftDelta: + c.set(0, 255, 0, 255); // green + break; + case endpoint_mode::cUseUpperDelta: + c.set(0, 192, 0, 255); // darker green + break; + default: + break; + } + + break; + } + default: + break; + } + + if (filtered_x_err < filtered_y_err) + c[3] = 0; + else + c[3] = 255; + + debug_state.m_enc_vis.fill_box(bx * 6, by * 6, 6, 6, c); + } + + break; + + } // outer_pass + + } // bx + + } // by + + if (prev_encoding.get_total_bits()) + { +#if SYNC_MARKERS + strip_coded_bits.put_bits(0xDEAD, 16); +#endif + + strip_coded_bits.append(prev_encoding); + } + + return true; +} + +bool g_initialized = false; + +void global_init() +{ + if (g_initialized) + return; + + interval_timer tm; + tm.start(); + + init_pq_tables(); + + init_partitions2_6x6(); + init_partitions3_6x6(); + + init_contrib_lists(); + + g_initialized = true; + + //fmt_printf("astc_6x6_hdr::global_init() total time: {}\n", tm.get_elapsed_secs()); +} + +bool compress_photo(const basisu::imagef &orig_src_img, const astc_hdr_6x6_global_config &orig_global_cfg, job_pool *pJob_pool, + basisu::uint8_vec& intermediate_tex_data, basisu::uint8_vec& astc_tex_data, result_metrics& metrics) +{ + assert(g_initialized); + if (!g_initialized) + return false; + + assert(pJob_pool); + + if (orig_global_cfg.m_debug_output) + { + fmt_debug_printf("------ astc_6x6_hdr::compress_photo:\n"); + fmt_debug_printf("Source image dimensions: {}x{}\n", orig_src_img.get_width(), orig_src_img.get_height()); + fmt_debug_printf("Job pool total threads: {}\n", (uint64_t)pJob_pool->get_total_threads()); + orig_global_cfg.print(); + } + + if (!orig_src_img.get_width() || !orig_src_img.get_height()) + { + assert(false); + fmt_error_printf("compress_photo: Invalid source image\n"); + return false; + } + + astc_hdr_6x6_global_config global_cfg(orig_global_cfg); + + uastc_hdr_6x6_encode_state enc_state; + enc_state.master_coptions.m_q_log_bias = Q_LOG_BIAS_6x6; + enc_state.src_img = orig_src_img; + + //src_img.crop(256, 256); + + const uint32_t width = enc_state.src_img.get_width(); + const uint32_t height = enc_state.src_img.get_height(); + const uint32_t num_blocks_x = enc_state.src_img.get_block_width(BLOCK_W); + const uint32_t num_blocks_y = enc_state.src_img.get_block_height(BLOCK_H); + const uint32_t total_blocks = num_blocks_x * num_blocks_y; + + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + for (uint32_t c = 0; c < 3; c++) + { + float f = enc_state.src_img(x, y)[c]; + + if (std::isinf(f) || std::isnan(f) || (f < 0.0f)) + f = 0; + else if (f > basist::ASTC_HDR_MAX_VAL) + f = basist::ASTC_HDR_MAX_VAL; + + enc_state.src_img(x, y)[c] = f; + + } // c + + } // x + } // y + + if (global_cfg.m_debug_images) + { + write_exr((global_cfg.m_debug_image_prefix + "orig.exr").c_str(), enc_state.src_img, 3, 0); + } + + image src_img_compressed; + tonemap_image_compressive2(src_img_compressed, enc_state.src_img); + + if (global_cfg.m_debug_images) + { + save_png(global_cfg.m_debug_image_prefix + "compressive_tone_map.png", src_img_compressed); + } + + smooth_map_params rp; + rp.m_debug_images = global_cfg.m_debug_images; + + if (global_cfg.m_lambda != 0.0f) + { + if (global_cfg.m_status_output) + fmt_printf("Creating RDO perceptual weighting maps\n"); + + create_smooth_maps2(enc_state.smooth_block_mse_scales, src_img_compressed, rp); + } + + if (global_cfg.m_status_output) + fmt_printf("Blurring image\n"); + + enc_state.src_img_filtered1.resize(width, height); + image_resample(enc_state.src_img, enc_state.src_img_filtered1, "gaussian", global_cfg.m_gaussian1_strength); //1.45f); + + enc_state.src_img_filtered2.resize(width, height); + image_resample(enc_state.src_img, enc_state.src_img_filtered2, "gaussian", global_cfg.m_gaussian2_strength); //1.83f); + + if (global_cfg.m_debug_images) + { + write_exr((global_cfg.m_debug_image_prefix + "blurred1.exr").c_str(), enc_state.src_img_filtered1, 3, 0); + write_exr((global_cfg.m_debug_image_prefix + "blurred2.exr").c_str(), enc_state.src_img_filtered2, 3, 0); + } + + if (global_cfg.m_status_output) + fmt_printf("Transforming to ITP\n"); + + enc_state.src_img_itp.resize(width, height); + convet_rgb_image_to_itp(enc_state.src_img, enc_state.src_img_itp, global_cfg); + + enc_state.src_img_filtered1_itp.resize(width, height); + convet_rgb_image_to_itp(enc_state.src_img_filtered1, enc_state.src_img_filtered1_itp, global_cfg); + + enc_state.src_img_filtered2_itp.resize(width, height); + convet_rgb_image_to_itp(enc_state.src_img_filtered2, enc_state.src_img_filtered2_itp, global_cfg); + + if (global_cfg.m_lambda == 0.0f) + global_cfg.m_favor_higher_compression = false; + + uint32_t total_strips = 0, rows_per_strip = 0; + if (!calc_strip_size(global_cfg.m_lambda, num_blocks_y, (uint32_t)pJob_pool->get_total_threads(), global_cfg.m_force_one_strip, total_strips, rows_per_strip, global_cfg)) + { + fmt_error_printf("compress_photo: Failed computing strip sizes\n"); + return false; + } + + if (global_cfg.m_debug_output) + fmt_printf("lambda: {}, comp_level: {}, highest_comp_level: {}, extra patterns: {}\n", global_cfg.m_lambda, global_cfg.m_master_comp_level, global_cfg.m_highest_comp_level, global_cfg.m_extra_patterns_flag); + + enc_state.coded_blocks.resize(num_blocks_x, num_blocks_y); + + bitwise_coder coded_bits; + + coded_bits.put_bits(0xABCD, 16); + coded_bits.put_bits(width, 16); + coded_bits.put_bits(height, 16); + + enc_state.packed_img.resize(width, height); + + enc_state.strip_bits.resize(total_strips); + + enc_state.final_astc_blocks.resize(num_blocks_x, num_blocks_y); + + uastc_hdr_6x6_debug_state debug_state; + + if (global_cfg.m_debug_images) + debug_state.init(width, height); + else + debug_state.init(0, 0); + + interval_timer tm; + tm.start(); + + std::atomic_bool any_failed_flag; + any_failed_flag.store(false); + + for (uint32_t strip_index = 0; strip_index < total_strips; strip_index++) + { + const uint32_t strip_first_by = strip_index * rows_per_strip; + + uint32_t strip_last_by = minimum(strip_first_by + rows_per_strip - 1, num_blocks_y); + if (strip_index == (total_strips - 1)) + strip_last_by = num_blocks_y - 1; + + pJob_pool->add_job([&any_failed_flag, &global_cfg, &debug_state, &enc_state, + strip_index, total_strips, strip_first_by, strip_last_by, + num_blocks_x, num_blocks_y, total_blocks, width, height] + { + if (!any_failed_flag) + { + bool status = compress_strip_task( + strip_index, total_strips, strip_first_by, strip_last_by, + num_blocks_x, num_blocks_y, total_blocks, width, height, + global_cfg, debug_state, enc_state); + + if (!status) + { + fmt_error_printf("compress_photo: compress_strip_task() failed\n"); + any_failed_flag.store(true, std::memory_order_relaxed); + } + } + } ); + + if (any_failed_flag) + break; + + } // strip_index + + pJob_pool->wait_for_all(); + + if (any_failed_flag) + { + fmt_error_printf("One or more strips failed during compression\n"); + return false; + } + + if (global_cfg.m_debug_output) + fmt_printf("Encoding time: {} secs\n", tm.get_elapsed_secs()); + + if (global_cfg.m_debug_output) + debug_state.print(total_blocks); + + if (global_cfg.m_debug_images) + { + save_png(global_cfg.m_debug_image_prefix + "part_vis.png", debug_state.m_part_vis); + save_png(global_cfg.m_debug_image_prefix + "grid_vis.png", debug_state.m_grid_vis); + save_png(global_cfg.m_debug_image_prefix + "mode_vis.png", debug_state.m_mode_vis); + save_png(global_cfg.m_debug_image_prefix + "mode_vis2.png", debug_state.m_mode_vis2); + save_png(global_cfg.m_debug_image_prefix + "enc_vis.png", debug_state.m_enc_vis); + write_exr((global_cfg.m_debug_image_prefix + "stat_vis.exr").c_str(), debug_state.m_stat_vis, 3, 0); + } + + for (uint32_t i = 0; i < total_strips; i++) + coded_bits.append(enc_state.strip_bits[i]); + + coded_bits.put_bits(0xA742, 16); + + coded_bits.flush(); + + if (global_cfg.m_output_images) + { + write_exr((global_cfg.m_output_image_prefix + "comp.exr").c_str(), enc_state.packed_img, 3, 0); + } + + if (global_cfg.m_debug_output) + fmt_printf("\nTotal intermediate output bits/pixel: {3.4}\n", (float)coded_bits.get_total_bits() / (float)(width * height)); + + vector2D decoded_blocks1; + vector2D decoded_blocks2; + + if (global_cfg.m_debug_output) + fmt_printf("decode_file\n"); + + uint32_t unpacked_width = 0, unpacked_height = 0; + bool status = decode_file(coded_bits.get_bytes(), decoded_blocks1, unpacked_width, unpacked_height); + if (!status) + { + fmt_error_printf("decode_file() failed\n"); + return false; + } + + if (global_cfg.m_debug_output) + fmt_printf("decode_6x6_hdr\n"); + + status = decode_6x6_hdr(coded_bits.get_bytes().get_ptr(), coded_bits.get_bytes().size_in_bytes_u32(), decoded_blocks2, unpacked_width, unpacked_height); + if (!status) + { + fmt_error_printf("decode_6x6_hdr_file() failed\n"); + return false; + } + + if ((enc_state.final_astc_blocks.get_width() != decoded_blocks1.get_width()) || + (enc_state.final_astc_blocks.get_height() != decoded_blocks1.get_height())) + { + fmt_error_printf("Decode size mismatch with decode_file\n"); + return false; + } + + if ((enc_state.final_astc_blocks.get_width() != decoded_blocks2.get_width()) || + (enc_state.final_astc_blocks.get_height() != decoded_blocks2.get_height())) + { + fmt_error_printf("Decode size mismatch with decode_6x6_hdr_file\n"); + return false; + } + + if (memcmp(decoded_blocks1.get_ptr(), enc_state.final_astc_blocks.get_ptr(), decoded_blocks1.size_in_bytes()) != 0) + { + fmt_error_printf("Decoded ASTC blocks verification failed\n"); + return false; + } + + if (memcmp(decoded_blocks2.get_ptr(), enc_state.final_astc_blocks.get_ptr(), decoded_blocks2.size_in_bytes()) != 0) + { + fmt_error_printf("Decoded ASTC blocks verification failed\n"); + return false; + } + + if (global_cfg.m_debug_output) + basisu::fmt_printf("Decoded ASTC verification checks succeeded\n"); + + if (global_cfg.m_output_images) + { + if (write_astc_file((global_cfg.m_output_image_prefix + "decoded.astc").c_str(), decoded_blocks1.get_ptr(), BLOCK_W, BLOCK_H, width, height)) + { + basisu::platform_sleep(20); + + uint8_vec astc_file_data; + if (read_file_to_vec((global_cfg.m_output_image_prefix + "decoded.astc").c_str(), astc_file_data)) + { + if (astc_file_data.size() > 16) + { + astc_file_data.erase(0, 16); + + size_t comp_size = 0; + void* pComp_data = tdefl_compress_mem_to_heap(&astc_file_data[0], astc_file_data.size(), &comp_size, TDEFL_MAX_PROBES_MASK); + mz_free(pComp_data); + + if (global_cfg.m_debug_output) + { + fmt_printf(".ASTC file size (less header): {}, bits/pixel: {}, Deflate bits/pixel: {}\n", + (uint64_t)astc_file_data.size(), + (float)astc_file_data.size() * 8.0f / (float)(width * height), + (float)comp_size * 8.0f / (float)(width * height)); + } + } + } + } + } + + // Must decode all the blocks (even padded rows/cols) to match what the transcoder does. + imagef unpacked_astc_img(num_blocks_x * 6, num_blocks_y * 6); + imagef unpacked_astc_google_img(num_blocks_x * 6, num_blocks_y * 6); + + for (uint32_t y = 0; y < decoded_blocks1.get_height(); y++) + { + for (uint32_t x = 0; x < decoded_blocks1.get_width(); x++) + { + const auto& phys_blk = decoded_blocks1(x, y); + + vec4F pixels[MAX_BLOCK_W * MAX_BLOCK_H]; + status = unpack_physical_astc_block(&phys_blk, BLOCK_W, BLOCK_H, pixels); + if (!status) + { + fmt_error_printf("unpack_physical_astc_block() failed\n"); + return false; + } + + unpacked_astc_img.set_block_clipped(pixels, x * BLOCK_W, y * BLOCK_H, BLOCK_W, BLOCK_H); + + vec4F pixels_google[MAX_BLOCK_W * MAX_BLOCK_H]; + status = unpack_physical_astc_block_google(&phys_blk, BLOCK_W, BLOCK_H, pixels_google); + if (!status) + { + fmt_error_printf("unpack_physical_astc_block_google() failed\n"); + return false; + } + + unpacked_astc_google_img.set_block_clipped(pixels_google, x * BLOCK_W, y * BLOCK_H, BLOCK_W, BLOCK_H); + + for (uint32_t i = 0; i < 36; i++) + { + if (pixels[i] != pixels_google[i]) + { + fmt_error_printf("pixel unpack mismatch\n"); + return false; + } + } + } + } + + if (global_cfg.m_debug_output) + fmt_printf("\nUnpack succeeded\n"); + + imagef unpacked_bc6h_img; + + { + vector2D bc6h_blocks; + + fast_bc6h_params enc_params; + + bool pack_status = pack_bc6h_image(unpacked_astc_img, bc6h_blocks, &unpacked_bc6h_img, enc_params); + if (!pack_status) + { + fmt_error_printf("pack_bc6h_image() failed!"); + return false; + } + + unpacked_bc6h_img.crop(width, height); + + if (global_cfg.m_output_images) + { + write_exr((global_cfg.m_output_image_prefix + "unpacked_bc6h.exr").c_str(), unpacked_bc6h_img, 3, 0); + } + } + + unpacked_astc_img.crop(width, height); + unpacked_astc_google_img.crop(width, height); + + if (global_cfg.m_output_images) + { + write_exr((global_cfg.m_output_image_prefix + "unpacked_astc.exr").c_str(), unpacked_astc_img, 3, 0); + write_exr((global_cfg.m_output_image_prefix + "unpacked_google_astc.exr").c_str(), unpacked_astc_google_img, 3, 0); + } + + // ASTC metrics + if (global_cfg.m_image_stats) + { + image_metrics im; + + if (global_cfg.m_debug_output) + printf("\nASTC log2 float error metrics:\n"); + + for (uint32_t i = 0; i < 3; i++) + { + im.calc(enc_state.src_img, unpacked_astc_img, i, 1, true, true); + + if (global_cfg.m_debug_output) + { + printf("%c: ", "RGBA"[i]); + im.print_hp(); + } + } + + metrics.m_im_astc_log2.calc(enc_state.src_img, unpacked_astc_img, 0, 3, true, true); + + if (global_cfg.m_debug_output) + { + printf("RGB: "); + metrics.m_im_astc_log2.print_hp(); + + printf("\n"); + } + } + + if (global_cfg.m_image_stats) + { + image_metrics im; + + if (global_cfg.m_debug_output) + printf("ASTC half float space error metrics (a piecewise linear approximation of log2 error):\n"); + + for (uint32_t i = 0; i < 3; i++) + { + im.calc_half(enc_state.src_img, unpacked_astc_img, i, 1, true); + + if (global_cfg.m_debug_output) + { + printf("%c: ", "RGBA"[i]); + im.print_hp(); + } + } + + metrics.m_im_astc_half.calc_half(enc_state.src_img, unpacked_astc_img, 0, 3, true); + + if (global_cfg.m_debug_output) + { + printf("RGB: "); + metrics.m_im_astc_half.print_hp(); + } + } + + // BC6H metrics + if (global_cfg.m_image_stats) + { + image_metrics im; + + if (global_cfg.m_debug_output) + printf("\nBC6H log2 float error metrics:\n"); + + for (uint32_t i = 0; i < 3; i++) + { + im.calc(enc_state.src_img, unpacked_bc6h_img, i, 1, true, true); + + if (global_cfg.m_debug_output) + { + printf("%c: ", "RGBA"[i]); + im.print_hp(); + } + } + + metrics.m_im_bc6h_log2.calc(enc_state.src_img, unpacked_bc6h_img, 0, 3, true, true); + + if (global_cfg.m_debug_output) + { + printf("RGB: "); + metrics.m_im_bc6h_log2.print_hp(); + + printf("\n"); + } + } + + if (global_cfg.m_image_stats) + { + image_metrics im; + + if (global_cfg.m_debug_output) + printf("BC6H half float space error metrics (a piecewise linear approximation of log2 error):\n"); + + for (uint32_t i = 0; i < 3; i++) + { + im.calc_half(enc_state.src_img, unpacked_bc6h_img, i, 1, true); + + if (global_cfg.m_debug_output) + { + printf("%c: ", "RGBA"[i]); + im.print_hp(); + } + } + + metrics.m_im_bc6h_half.calc_half(enc_state.src_img, unpacked_bc6h_img, 0, 3, true); + + if (global_cfg.m_debug_output) + { + printf("RGB: "); + metrics.m_im_bc6h_half.print_hp(); + + printf("\n"); + } + } + + intermediate_tex_data.swap(coded_bits.get_bytes()); + + astc_tex_data.resize(decoded_blocks1.size_in_bytes()); + memcpy(astc_tex_data.data(), decoded_blocks1.get_ptr(), decoded_blocks1.size_in_bytes()); + + return true; +} + +} // namespace astc_6x6_hdr diff --git a/thirdparty/basisu/encoder/basisu_astc_hdr_6x6_enc.h b/thirdparty/basisu/encoder/basisu_astc_hdr_6x6_enc.h new file mode 100644 index 000000000..8b82ad8c2 --- /dev/null +++ b/thirdparty/basisu/encoder/basisu_astc_hdr_6x6_enc.h @@ -0,0 +1,129 @@ +// File: basisu_astc_hdr_6x6_enc.h +#pragma once +#include "basisu_enc.h" +#include "../transcoder/basisu_astc_hdr_core.h" + +namespace astc_6x6_hdr +{ + const uint32_t ASTC_HDR_6X6_MAX_USER_COMP_LEVEL = 12; + + const uint32_t ASTC_HDR_6X6_MAX_COMP_LEVEL = 4; + + const float LDR_BLACK_BIAS = 0.0f;// .49f; + + // Note: This struct is copied several times, so do not place any heavyweight objects in here. + struct astc_hdr_6x6_global_config + { + // Important: The Delta ITP colorspace error metric we use internally makes several assumptions about the nature of the HDR RGB inputs supplied to the encoder. + // This encoder computes colorspace error in the ICtCp (or more accurately the delta ITP, where CT is scaled by .5 vs. ICtCp to become T) colorspace, so getting this correct is important. + // By default the encoder assumes the input is in absolute luminance (in nits or candela per square meter, cd/m^2), specified as positive-only linear light RGB, using the REC 709 colorspace gamut (but NOT the sRGB transfer function, i.e. linear light). + // If the m_rec2020_bt2100_color_gamut flag is true, the input colorspace is treated as REC 2020/BT.2100 (which is wider than 709). + // For SDR/LDR->HDR upconversion, the REC 709 sRGB input should be converted to linear light (sRGB->linear) and the resulting normalized linear RGB values scaled by either 80 or 100 nits (the luminance of a typical SDR monitor). + // SDR upconversion to normalized [0,1] (i.e. non-absolute) luminances may work but is not supported because ITP errors will not be predicted correctly. + bool m_rec2020_bt2100_color_gamut = false; + + // levels 0-3 normal levels, 4=exhaustive + uint32_t m_master_comp_level = 0; + uint32_t m_highest_comp_level = 1; + + float m_lambda = 0.0f; + + bool m_extra_patterns_flag = false; // def to false, works in comp levels [1,4] + bool m_brute_force_partition_matching = false; // def to false + + bool m_jnd_optimization = false; // defaults to false for HDR inputs, on SDR upconverted images this can default to enabled + float m_jnd_delta_itp_thresh = .75f; + + bool m_force_one_strip = false; + + bool m_gaussian1_fallback = true; // def to true, if this is disabled m_gaussian2_fallback should be disabled too + float m_gaussian1_strength = 1.45f; + + bool m_gaussian2_fallback = true; // def to true, hopefully rarely kicks in + float m_gaussian2_strength = 1.83f; + + // m_disable_delta_endpoint_usage may give a slight increase in RDO ASTC encoding efficiency. It's also faster. + bool m_disable_delta_endpoint_usage = false; + + // Scale up Delta ITP errors for very dark pixels, assuming they will be brightly exposed > 1.0x. + // We don't know if the output will be exposed, or not. If heavily exposed, our JND calculations will not be conservative enough. + bool m_delta_itp_dark_adjustment = true; + + bool m_debug_images = false; + std::string m_debug_image_prefix = "dbg_astc_hdr_6x6_devel_"; + + bool m_output_images = false; + std::string m_output_image_prefix = "dbg_astc_hdr_6x6_output_"; + + bool m_debug_output = false; + bool m_image_stats = false; + bool m_status_output = false; + + //------------------------------------------------------------------------------------- + // Very low level/devel parameters - intended for development. Best not to change them. + //------------------------------------------------------------------------------------- + bool m_deblocking_flag = true; + float m_deblock_penalty_weight = .03f; + bool m_disable_twothree_subsets = false; // def to false + bool m_use_solid_blocks = true; // def to true + bool m_use_runs = true; // def to true + bool m_block_stat_optimizations_flag = true; // def to true + + bool m_rdo_candidate_diversity_boost = true; // def to true + float m_rdo_candidate_diversity_boost_bit_window_weight = 1.2f; + + bool m_favor_higher_compression = true; // utilize all modes + uint32_t m_num_reuse_xy_deltas = basist::astc_6x6_hdr::NUM_REUSE_XY_DELTAS; + + void print() const + { + basisu::fmt_debug_printf("m_master_comp_level: {}, m_highest_comp_level: {}\n", m_master_comp_level, m_highest_comp_level); + basisu::fmt_debug_printf("m_lambda: {}\n", m_lambda); + basisu::fmt_debug_printf("m_rec2020_bt2100_color_gamut: {}\n", m_rec2020_bt2100_color_gamut); + basisu::fmt_debug_printf("m_extra_patterns_flag: {}, m_brute_force_partition_matching: {}\n", m_extra_patterns_flag, m_brute_force_partition_matching); + basisu::fmt_debug_printf("m_jnd_optimization: {}, m_jnd_delta_itp_thresh: {}\n", m_jnd_optimization, m_jnd_delta_itp_thresh); + basisu::fmt_debug_printf("m_force_one_strip: {}\n", m_force_one_strip); + basisu::fmt_debug_printf("m_gaussian1_fallback: {}, m_gaussian1_strength: {}\n", m_gaussian1_fallback, m_gaussian1_strength); + basisu::fmt_debug_printf("m_gaussian2_fallback: {}, m_gaussian2_strength: {}\n", m_gaussian2_fallback, m_gaussian2_strength); + basisu::fmt_debug_printf("m_disable_delta_endpoint_usage: {}\n", m_disable_delta_endpoint_usage); + basisu::fmt_debug_printf("m_delta_itp_dark_adjustment: {}\n", m_delta_itp_dark_adjustment); + basisu::fmt_debug_printf("m_debug_images: {}, m_debug_image_prefix: {}\n", m_debug_images, m_debug_image_prefix); + basisu::fmt_debug_printf("m_output_images: {}, m_output_image_prefix: {}\n", m_output_images, m_output_image_prefix); + basisu::fmt_debug_printf("m_image_stats: {}, m_status_output: {}\n", m_image_stats, m_status_output); + basisu::fmt_debug_printf("m_deblocking_flag: {}, m_deblock_penalty_weight: {}\n", m_deblocking_flag, m_deblock_penalty_weight); + basisu::fmt_debug_printf("m_disable_twothree_subsets: {}, m_use_solid_blocks: {}\n", m_disable_twothree_subsets, m_use_solid_blocks); + basisu::fmt_debug_printf("m_use_runs: {}, m_block_stat_optimizations_flag: {}\n", m_use_runs, m_block_stat_optimizations_flag); + basisu::fmt_debug_printf("m_rdo_candidate_diversity_boost: {}, m_rdo_candidate_diversity_boost_bit_window_weight: {}\n", m_rdo_candidate_diversity_boost, m_rdo_candidate_diversity_boost_bit_window_weight); + basisu::fmt_debug_printf("m_favor_higher_compression: {}, m_num_reuse_xy_deltas: {}\n", m_favor_higher_compression, m_num_reuse_xy_deltas); + } + + astc_hdr_6x6_global_config() + { + } + + void clear() + { + astc_hdr_6x6_global_config def; + std::swap(*this, def); + } + + // Max level is ASTC_HDR_6X6_MAX_USER_COMP_LEVEL + void set_user_level(int level); + }; + + void global_init(); + + struct result_metrics + { + basisu::image_metrics m_im_astc_log2; + basisu::image_metrics m_im_astc_half; + + basisu::image_metrics m_im_bc6h_log2; + basisu::image_metrics m_im_bc6h_half; + }; + + // The input image should be unpadded to 6x6 boundaries, i.e. the original unexpanded image. + bool compress_photo(const basisu::imagef& orig_src_img, const astc_hdr_6x6_global_config& global_cfg, basisu::job_pool* pJob_pool, + basisu::uint8_vec& intermediate_tex_data, basisu::uint8_vec& astc_tex_data, result_metrics& metrics); + +} // namespace uastc_6x6_hdr diff --git a/thirdparty/basisu/encoder/basisu_astc_hdr_common.cpp b/thirdparty/basisu/encoder/basisu_astc_hdr_common.cpp new file mode 100644 index 000000000..b720e2690 --- /dev/null +++ b/thirdparty/basisu/encoder/basisu_astc_hdr_common.cpp @@ -0,0 +1,5357 @@ +// File: basisu_astc_hdr_common.cpp +#include "basisu_enc.h" +#include "basisu_gpu_texture.h" +#include "../transcoder/basisu_astc_helpers.h" +#include "../transcoder/basisu_astc_hdr_core.h" +#include "basisu_astc_hdr_common.h" + +using namespace basist; + +#ifndef __EMSCRIPTEN__ + #define BASISU_MULTITHREADED_INIT (0) +#endif + +namespace basisu +{ + +const uint8_t g_ise_weight_lerps[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][33] = +{ + { 2, 0, 64 }, // 0, note ise range=0 is invalid for 4x4 block sizes (<24 weight bits in the block) + { 3, 0, 32, 64 }, // 1 + { 4, 0, 21, 43, 64 }, // 2 + { 5, 0, 16, 32, 48, 64 }, // 3 + { 6, 0, 64, 12, 52, 25, 39 }, // 4 + { 8, 0, 9, 18, 27, 37, 46, 55, 64 }, // 5 + { 10, 0, 64, 7, 57, 14, 50, 21, 43, 28, 36 }, // 6 + { 12, 0, 64, 17, 47, 5, 59, 23, 41, 11, 53, 28, 36 }, // 7 + { 16, 0, 4, 8, 12, 17, 21, 25, 29, 35, 39, 43, 47, 52, 56, 60, 64 }, // 8 + { 20, 0,64,16,48,3,61,19,45,6,58,23,41,9,55,26,38,13,51,29,35}, // 9 + { 24, 0,64,8,56,16,48,24,40,2,62,11,53,19,45,27,37,5,59,13,51,22,42,30,34}, // 10 + { 32, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,34,36,38,40,42,44,46,48,50,52,54,56,58,60,62,64}, // 11 +}; + +//-------------------------------------------------------------------------------------------------------------------------- + +const float DEF_R_ERROR_SCALE = 2.0f; +const float DEF_G_ERROR_SCALE = 3.0f; + +void astc_hdr_codec_base_options::init() +{ + m_r_err_scale = DEF_R_ERROR_SCALE; + m_g_err_scale = DEF_G_ERROR_SCALE; + m_q_log_bias = Q_LOG_BIAS_4x4; + + m_ultra_quant = false; + + // Disabling by default to avoid transcoding outliers (try kodim26). The quality lost is very low. TODO: Could include the uber result in the output. + m_allow_uber_mode = false; + + m_mode7_full_s_optimization = true; + + m_take_first_non_clamping_mode11_submode = false; + m_take_first_non_clamping_mode7_submode = false; + + m_disable_weight_plane_optimization = true; +} + +//-------------------------------------------------------------------------------------------------------------------------- +// max usable qlog8 value is 247, 248=inf, >=249 is nan +// max usable qlog7 value is 123, 124=inf, >=125 is nan + +//const uint32_t TOTAL_USABLE_QLOG8 = 248; // 0-247 are usable, 0=0, 247=60416.0, 246=55296.0 + +// nearest values given a positive half float value (only) +static uint16_t g_half_to_qlog7[32768], g_half_to_qlog8[32768]; + +const uint32_t HALF_TO_QLOG_TABS_MIN_BITS = 7; +const uint32_t HALF_TO_QLOG_TABS_MAX_BITS = 8; +static uint16_t* g_pHalf_to_qlog_tabs[2] = +{ + g_half_to_qlog7, + g_half_to_qlog8, +}; + +#if 0 +static inline uint32_t half_to_qlog7_8(half_float h, uint32_t bits) +{ + assert((bits >= HALF_TO_QLOG_TABS_MIN_BITS) && (bits <= HALF_TO_QLOG_TABS_MAX_BITS)); + assert(h < 32768); + + return g_pHalf_to_qlog_tabs[bits - HALF_TO_QLOG_TABS_MIN_BITS][h]; +} +#endif + +// TODO: Tune this +static inline uint32_t quant_qlog16(uint32_t q16, uint32_t desired_bits) +{ + assert((desired_bits >= 7) && (desired_bits <= 12)); + assert(q16 <= 65535); + + const uint32_t shift = 16 - desired_bits; + uint32_t e = (q16 + (1U << (shift - 1U)) - 1U) >> shift; + + uint32_t max_val = (1U << desired_bits) - 1U; + e = minimum(e, max_val); + + return e; +} + +static void compute_half_to_qlog_table(uint32_t bits, uint16_t* pTable, const basisu::vector& qlog16_to_float) +{ + assert(bits >= 5 && bits <= 12); + const uint32_t max_val = (1 << bits) - 1; + + const uint32_t FIRST_INVALID_QLOG16_INDEX = 63488; // first inf, rest are inf/nan's + assert(std::isinf(qlog16_to_float[FIRST_INVALID_QLOG16_INDEX])); + assert(std::isinf(qlog16_to_float[FIRST_INVALID_QLOG16_INDEX + 1])); + assert(!std::isnan(qlog16_to_float[FIRST_INVALID_QLOG16_INDEX - 1])); + assert(!std::isinf(qlog16_to_float[FIRST_INVALID_QLOG16_INDEX - 1])); + + // For all positive half-floats + for (uint32_t h = 0; h < 32768; h++) + { + // Skip invalid values + if (is_half_inf_or_nan((half_float)h)) + continue; + const float desired_val = half_to_float((half_float)h); + + float best_err = BIG_FLOAT_VAL; + uint32_t best_qlog = 0; + + double prev_err = BIG_FLOAT_VAL; + + // For all possible qlog's + for (uint32_t i = 0; i <= max_val; i++) + { + // Skip invalid values + uint32_t idx = i << (16 - bits); + if (idx >= FIRST_INVALID_QLOG16_INDEX) + break; + + float v = qlog16_to_float[idx]; + //assert(!std::isinf(v) && !std::isnan(v)); // too clostly in debug + + // Compute error + float err = fabsf(v - desired_val); + + if (err > prev_err) + { + // Every remaining entry will have guaranteed higher error + break; + } + + prev_err = err; + + // Find best + if (err < best_err) + { + best_err = err; + best_qlog = i; + + if (best_err == 0.0f) + break; + } + } + + pTable[h] = (uint16_t)best_qlog; + } +} + +static void init_qlog_tables() +{ + basisu::vector qlog16_to_float(65536); + + // for all possible qlog16, compute the corresponding half float + for (uint32_t i = 0; i <= 65535; i++) + { + half_float h = astc_helpers::qlog16_to_half(i); + + qlog16_to_float[i] = half_to_float(h); + } + +#if BASISU_MULTITHREADED_INIT + job_pool jp(3); + + for (uint32_t bits = HALF_TO_QLOG_TABS_MIN_BITS; bits <= HALF_TO_QLOG_TABS_MAX_BITS; bits++) + { + jp.add_job( [bits, &qlog16_to_float]() { compute_half_to_qlog_table(bits, g_pHalf_to_qlog_tabs[bits - HALF_TO_QLOG_TABS_MIN_BITS], qlog16_to_float); }); + } + + jp.wait_for_all(); +#else + // for all possible half floats, find the nearest qlog5-12 float + for (uint32_t bits = HALF_TO_QLOG_TABS_MIN_BITS; bits <= HALF_TO_QLOG_TABS_MAX_BITS; bits++) + { + compute_half_to_qlog_table(bits, g_pHalf_to_qlog_tabs[bits - HALF_TO_QLOG_TABS_MIN_BITS], qlog16_to_float); + +#if 0 + std::vector check_tab(32768); + compute_half_to_qlog_table_orig(bits, check_tab.data(), qlog16_to_float); + for (uint32_t i = 0; i < (1 << bits); i++) + { + assert(check_tab[i] == g_pHalf_to_qlog_tabs[bits - HALF_TO_QLOG_TABS_MIN_BITS][i]); + } +#endif + } +#endif // BASISU_MULTITHREADED_INIT +} + +//-------------------------------------------------------------------------------------------------------------------------- + +static vec3F calc_mean(uint32_t num_pixels, const vec4F* pPixels) +{ + vec3F mean(0.0f); + + for (uint32_t i = 0; i < num_pixels; i++) + { + const vec4F& p = pPixels[i]; + + mean[0] += p[0]; + mean[1] += p[1]; + mean[2] += p[2]; + } + + return mean / static_cast(num_pixels); +} + +static vec3F calc_rgb_pca(uint32_t num_pixels, const vec4F* pPixels, const vec3F& mean_color) +{ + float cov[6] = { 0, 0, 0, 0, 0, 0 }; + + for (uint32_t i = 0; i < num_pixels; i++) + { + const vec4F& v = pPixels[i]; + + float r = v[0] - mean_color[0]; + float g = v[1] - mean_color[1]; + float b = v[2] - mean_color[2]; + + cov[0] += r * r; + cov[1] += r * g; + cov[2] += r * b; + cov[3] += g * g; + cov[4] += g * b; + cov[5] += b * b; + } + + float xr = .9f, xg = 1.0f, xb = .7f; + for (uint32_t iter = 0; iter < 3; iter++) + { + float r = xr * cov[0] + xg * cov[1] + xb * cov[2]; + float g = xr * cov[1] + xg * cov[3] + xb * cov[4]; + float b = xr * cov[2] + xg * cov[4] + xb * cov[5]; + + float m = maximumf(maximumf(fabsf(r), fabsf(g)), fabsf(b)); + + if (m > 1e-10f) + { + m = 1.0f / m; + + r *= m; + g *= m; + b *= m; + } + + xr = r; + xg = g; + xb = b; + } + + float len = xr * xr + xg * xg + xb * xb; + + vec3F axis(0.5773502691f); + + if (len >= 1e-10f) + { + len = 1.0f / sqrtf(len); + + xr *= len; + xg *= len; + xb *= len; + + axis.set(xr, xg, xb); + } + + return axis; +} + +void encode_astc_block_stats::init(uint32_t num_pixels, const vec4F pBlock_pixels_q16[]) +{ + m_num_pixels = num_pixels; + m_mean_q16 = calc_mean(num_pixels, pBlock_pixels_q16); + m_axis_q16 = calc_rgb_pca(num_pixels, pBlock_pixels_q16, m_mean_q16); +} + +static vec3F interp_color(const vec3F& mean, const vec3F& dir, float df, const aabb3F& colorspace_box, const aabb3F& input_box, bool* pInside = nullptr) +{ +#if 0 + assert(mean[0] >= input_box[0][0]); + assert(mean[1] >= input_box[0][1]); + assert(mean[2] >= input_box[0][2]); + assert(mean[0] <= input_box[1][0]); + assert(mean[1] <= input_box[1][1]); + assert(mean[2] <= input_box[1][2]); +#endif + + if (pInside) + *pInside = false; + + vec3F k(mean + dir * df); + if (colorspace_box.contains(k)) + { + if (pInside) + *pInside = true; + + return k; + } + + // starts inside + vec3F s(mean); + + // ends outside + vec3F e(mean + dir * df); + + // a ray guaranteed to go from the outside to inside + ray3F r(e, (s - e).normalize_in_place()); + vec3F c; + float t = 0.0f; + + intersection::result res = intersection::ray_aabb(c, t, r, input_box); + if (res != intersection::cSuccess) + c = k; + + return c; +} + +// all in Q16 space, 0-65535 +static bool compute_least_squares_endpoints_rgb( + uint32_t N, const uint8_t* pSelectors, const vec4F* pSelector_weights, + vec3F* pXl, vec3F* pXh, const vec4F* pColors, const aabb3F& input_box) +{ + // Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf + // https://web.archive.org/web/20150319232457/http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf + // I did this in matrix form first, expanded out all the ops, then optimized it a bit. + float z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f; + float q00_r = 0.0f, q10_r = 0.0f, t_r = 0.0f; + float q00_g = 0.0f, q10_g = 0.0f, t_g = 0.0f; + float q00_b = 0.0f, q10_b = 0.0f, t_b = 0.0f; + + for (uint32_t i = 0; i < N; i++) + { + const uint32_t sel = pSelectors[i]; + + z00 += pSelector_weights[sel][0]; + z10 += pSelector_weights[sel][1]; + z11 += pSelector_weights[sel][2]; + + float w = pSelector_weights[sel][3]; + + q00_r += w * pColors[i][0]; + t_r += pColors[i][0]; + + q00_g += w * pColors[i][1]; + t_g += pColors[i][1]; + + q00_b += w * pColors[i][2]; + t_b += pColors[i][2]; + } + + q10_r = t_r - q00_r; + q10_g = t_g - q00_g; + q10_b = t_b - q00_b; + + z01 = z10; + + float det = z00 * z11 - z01 * z10; + if (det == 0.0f) + return false; + + det = 1.0f / det; + + float iz00, iz01, iz10, iz11; + iz00 = z11 * det; + iz01 = -z01 * det; + iz10 = -z10 * det; + iz11 = z00 * det; + + (*pXl)[0] = (float)(iz00 * q00_r + iz01 * q10_r); + (*pXh)[0] = (float)(iz10 * q00_r + iz11 * q10_r); + + (*pXl)[1] = (float)(iz00 * q00_g + iz01 * q10_g); + (*pXh)[1] = (float)(iz10 * q00_g + iz11 * q10_g); + + (*pXl)[2] = (float)(iz00 * q00_b + iz01 * q10_b); + (*pXh)[2] = (float)(iz10 * q00_b + iz11 * q10_b); + + for (uint32_t c = 0; c < 3; c++) + { + float l = (*pXl)[c], h = (*pXh)[c]; + + if (input_box.get_dim(c) < .0000125f) + { + l = input_box[0][c]; + h = input_box[1][c]; + } + + (*pXl)[c] = l; + (*pXh)[c] = h; + } + + vec3F mean((*pXl + *pXh) * .5f); + vec3F dir(*pXh - *pXl); + + float ln = dir.length(); + if (ln) + { + dir /= ln; + + float ld = (*pXl - mean).dot(dir); + float hd = (*pXh - mean).dot(dir); + + aabb3F colorspace_box(vec3F(0.0f), vec3F(MAX_QLOG16_VAL)); + + bool was_inside1 = false; + + vec3F l = interp_color(mean, dir, ld, colorspace_box, input_box, &was_inside1); + if (!was_inside1) + *pXl = l; + + bool was_inside2 = false; + vec3F h = interp_color(mean, dir, hd, colorspace_box, input_box, &was_inside2); + if (!was_inside2) + *pXh = h; + } + + pXl->clamp(0.0f, MAX_QLOG16_VAL); + pXh->clamp(0.0f, MAX_QLOG16_VAL); + + return true; +} + +static bool compute_least_squares_endpoints_rgb_raw_weights( + uint32_t N, const uint8_t* pRaw_weights, + vec3F* pXl, vec3F* pXh, const vec4F* pColors, const aabb3F& input_box) +{ + // Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf + // https://web.archive.org/web/20150319232457/http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf + // I did this in matrix form first, expanded out all the ops, then optimized it a bit. + float z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f; + float q00_r = 0.0f, q10_r = 0.0f, t_r = 0.0f; + float q00_g = 0.0f, q10_g = 0.0f, t_g = 0.0f; + float q00_b = 0.0f, q10_b = 0.0f, t_b = 0.0f; + + for (uint32_t i = 0; i < N; i++) + { + const float wt = (float)pRaw_weights[i] * (1.0f / 64.0f); + assert(wt <= 1.0f); + + const float w0 = wt * wt; + const float w1 = (1.0f - wt) * wt; + const float w2 = (1.0f - wt) * (1.0f - wt); + const float w3 = wt; + + z00 += w0; + z10 += w1; + z11 += w2; + + float w = w3; + q00_r += w * pColors[i][0]; + t_r += pColors[i][0]; + + q00_g += w * pColors[i][1]; + t_g += pColors[i][1]; + + q00_b += w * pColors[i][2]; + t_b += pColors[i][2]; + } + + q10_r = t_r - q00_r; + q10_g = t_g - q00_g; + q10_b = t_b - q00_b; + + z01 = z10; + + float det = z00 * z11 - z01 * z10; + if (det == 0.0f) + return false; + + det = 1.0f / det; + + float iz00, iz01, iz10, iz11; + iz00 = z11 * det; + iz01 = -z01 * det; + iz10 = -z10 * det; + iz11 = z00 * det; + + (*pXl)[0] = (float)(iz00 * q00_r + iz01 * q10_r); + (*pXh)[0] = (float)(iz10 * q00_r + iz11 * q10_r); + + (*pXl)[1] = (float)(iz00 * q00_g + iz01 * q10_g); + (*pXh)[1] = (float)(iz10 * q00_g + iz11 * q10_g); + + (*pXl)[2] = (float)(iz00 * q00_b + iz01 * q10_b); + (*pXh)[2] = (float)(iz10 * q00_b + iz11 * q10_b); + + for (uint32_t c = 0; c < 3; c++) + { + float l = (*pXl)[c], h = (*pXh)[c]; + + if (input_box.get_dim(c) < .0000125f) + { + l = input_box[0][c]; + h = input_box[1][c]; + } + + (*pXl)[c] = l; + (*pXh)[c] = h; + } + + vec3F mean((*pXl + *pXh) * .5f); + vec3F dir(*pXh - *pXl); + + float ln = dir.length(); + if (ln) + { + dir /= ln; + + float ld = (*pXl - mean).dot(dir); + float hd = (*pXh - mean).dot(dir); + + aabb3F colorspace_box(vec3F(0.0f), vec3F(MAX_QLOG16_VAL)); + + bool was_inside1 = false; + + vec3F l = interp_color(mean, dir, ld, colorspace_box, input_box, &was_inside1); + if (!was_inside1) + *pXl = l; + + bool was_inside2 = false; + vec3F h = interp_color(mean, dir, hd, colorspace_box, input_box, &was_inside2); + if (!was_inside2) + *pXh = h; + } + + pXl->clamp(0.0f, MAX_QLOG16_VAL); + pXh->clamp(0.0f, MAX_QLOG16_VAL); + + return true; +} + +static bool compute_least_squares_endpoints_2D( + uint32_t N, const uint8_t* pSelectors, const vec4F* pSelector_weights, + vec2F* pXl, vec2F* pXh, const vec2F* pColors, const aabb2F& input_box) +{ + // Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf + // https://web.archive.org/web/20150319232457/http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf + // I did this in matrix form first, expanded out all the ops, then optimized it a bit. + float z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f; + float q00_r = 0.0f, q10_r = 0.0f, t_r = 0.0f; + float q00_g = 0.0f, q10_g = 0.0f, t_g = 0.0f; + + for (uint32_t i = 0; i < N; i++) + { + const uint32_t sel = pSelectors[i]; + z00 += pSelector_weights[sel][0]; + z10 += pSelector_weights[sel][1]; + z11 += pSelector_weights[sel][2]; + + float w = pSelector_weights[sel][3]; + q00_r += w * pColors[i][0]; + t_r += pColors[i][0]; + + q00_g += w * pColors[i][1]; + t_g += pColors[i][1]; + } + + q10_r = t_r - q00_r; + q10_g = t_g - q00_g; + + z01 = z10; + + float det = z00 * z11 - z01 * z10; + if (det == 0.0f) + return false; + + det = 1.0f / det; + + float iz00, iz01, iz10, iz11; + iz00 = z11 * det; + iz01 = -z01 * det; + iz10 = -z10 * det; + iz11 = z00 * det; + + (*pXl)[0] = (float)(iz00 * q00_r + iz01 * q10_r); + (*pXh)[0] = (float)(iz10 * q00_r + iz11 * q10_r); + + (*pXl)[1] = (float)(iz00 * q00_g + iz01 * q10_g); + (*pXh)[1] = (float)(iz10 * q00_g + iz11 * q10_g); + + for (uint32_t c = 0; c < 2; c++) + { + float l = (*pXl)[c], h = (*pXh)[c]; + + if (input_box.get_dim(c) < .0000125f) + { + l = input_box[0][c]; + h = input_box[1][c]; + } + + (*pXl)[c] = l; + (*pXh)[c] = h; + } + + pXl->clamp(0.0f, MAX_QLOG16_VAL); + pXh->clamp(0.0f, MAX_QLOG16_VAL); + + return true; +} + +static bool compute_least_squares_endpoints_1D( + uint32_t N, const uint8_t* pSelectors, const vec4F* pSelector_weights, + vec1F* pXl, vec1F* pXh, const vec1F* pColors, const aabb1F& input_box) +{ + // Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf + // https://web.archive.org/web/20150319232457/http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf + // I did this in matrix form first, expanded out all the ops, then optimized it a bit. + float z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f; + float q00_r = 0.0f, q10_r = 0.0f, t_r = 0.0f; + + for (uint32_t i = 0; i < N; i++) + { + const uint32_t sel = pSelectors[i]; + z00 += pSelector_weights[sel][0]; + z10 += pSelector_weights[sel][1]; + z11 += pSelector_weights[sel][2]; + + float w = pSelector_weights[sel][3]; + q00_r += w * pColors[i][0]; + t_r += pColors[i][0]; + } + + q10_r = t_r - q00_r; + + z01 = z10; + + float det = z00 * z11 - z01 * z10; + if (det == 0.0f) + return false; + + det = 1.0f / det; + + float iz00, iz01, iz10, iz11; + iz00 = z11 * det; + iz01 = -z01 * det; + iz10 = -z10 * det; + iz11 = z00 * det; + + (*pXl)[0] = (float)(iz00 * q00_r + iz01 * q10_r); + (*pXh)[0] = (float)(iz10 * q00_r + iz11 * q10_r); + + for (uint32_t c = 0; c < 1; c++) + { + float l = (*pXl)[c], h = (*pXh)[c]; + + if (input_box.get_dim(c) < .0000125f) + { + l = input_box[0][c]; + h = input_box[1][c]; + } + + (*pXl)[c] = l; + (*pXh)[c] = h; + } + + pXl->clamp(0.0f, MAX_QLOG16_VAL); + pXh->clamp(0.0f, MAX_QLOG16_VAL); + + return true; +} + +static bool compute_weighted_least_squares_endpoints_rgb( + uint32_t N, + const uint8_t* pSelectors, const vec4F* pSelector_weights, const float* pRaw_weights, /* ti */ + const float* pEmphasis_weights /* wi */, + vec3F* pXl, vec3F* pXh, + const vec4F* pColors, /* pi */ + const aabb3F& input_box) +{ + (void)input_box; + + assert(N); + assert((pSelectors && pSelector_weights) || pRaw_weights); + assert(pEmphasis_weights); + + // Pi = pixel colors + // Ti = project weights, [0,1] + // Wi = emphasis weights + + float total_wi = 0.0f; + for (uint32_t i = 0; i < N; i++) + total_wi += pEmphasis_weights[i]; + + if (total_wi == 0.0f) + return false; + + float weighted_mean_tw = 0.0f; + float weighted_mean_pw[3] = { 0.0f }; + + for (uint32_t i = 0; i < N; i++) + { + const float wi = pEmphasis_weights[i]; + const float ti = pSelectors ? pSelector_weights[pSelectors[i]][3] : pRaw_weights[i]; + const float pi_r = pColors[i][0], pi_g = pColors[i][1], pi_b = pColors[i][2]; + + weighted_mean_tw += wi * ti; + + weighted_mean_pw[0] += wi * pi_r; + weighted_mean_pw[1] += wi * pi_g; + weighted_mean_pw[2] += wi * pi_b; + } + + weighted_mean_tw /= total_wi; + + weighted_mean_pw[0] /= total_wi; + weighted_mean_pw[1] /= total_wi; + weighted_mean_pw[2] /= total_wi; + + float spt[3] = { 0.0f }; + float stt = 0.0f; + + for (uint32_t i = 0; i < N; i++) + { + const float wi = pEmphasis_weights[i]; + const float ti = pSelectors ? pSelector_weights[pSelectors[i]][3] : pRaw_weights[i]; + const float pi_r = pColors[i][0], pi_g = pColors[i][1], pi_b = pColors[i][2]; + + spt[0] += wi * (pi_r - weighted_mean_pw[0]) * (ti - weighted_mean_tw); + spt[1] += wi * (pi_g - weighted_mean_pw[1]) * (ti - weighted_mean_tw); + spt[2] += wi * (pi_b - weighted_mean_pw[2]) * (ti - weighted_mean_tw); + + stt += wi * square(ti - weighted_mean_tw); + } + + if (stt == 0.0f) + return false; + + for (uint32_t i = 0; i < 3; i++) + { + float h = weighted_mean_pw[i] + (spt[i] / stt) * (1.0f - weighted_mean_tw); + float l = weighted_mean_pw[i] - (spt[i] / stt) * weighted_mean_tw; + + (*pXh)[i] = h; + (*pXl)[i] = l; + } + + pXl->clamp(0.0f, MAX_QLOG16_VAL); + pXh->clamp(0.0f, MAX_QLOG16_VAL); + + return true; +} + +static vec4F g_astc_ls_weights_ise[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][MAX_SUPPORTED_WEIGHT_LEVELS]; + +static uint8_t g_map_astc_to_linear_order[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][MAX_SUPPORTED_WEIGHT_LEVELS]; // [ise_range][astc_index] -> linear index +static uint8_t g_map_linear_to_astc_order[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][MAX_SUPPORTED_WEIGHT_LEVELS]; // [ise_range][linear_index] -> astc_index + +static void encode_astc_hdr_init() +{ + // Precomputed weight constants used during least fit determination. For each entry: w * w, (1.0f - w) * w, (1.0f - w) * (1.0f - w), w + for (uint32_t range = MIN_SUPPORTED_ISE_WEIGHT_INDEX; range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX; range++) + { + const uint32_t num_levels = g_ise_weight_lerps[range][0]; + assert(num_levels == astc_helpers::get_ise_levels(range)); + assert((num_levels >= MIN_SUPPORTED_WEIGHT_LEVELS) && (num_levels <= MAX_SUPPORTED_WEIGHT_LEVELS)); + + for (uint32_t i = 0; i < num_levels; i++) + { + float w = g_ise_weight_lerps[range][1 + i] * (1.0f / 64.0f); + + g_astc_ls_weights_ise[range][i].set(w * w, (1.0f - w) * w, (1.0f - w) * (1.0f - w), w); + } + } + + for (uint32_t ise_range = MIN_SUPPORTED_ISE_WEIGHT_INDEX; ise_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX; ise_range++) + { + const uint32_t num_levels = g_ise_weight_lerps[ise_range][0]; + assert((num_levels >= MIN_SUPPORTED_WEIGHT_LEVELS) && (num_levels <= MAX_SUPPORTED_WEIGHT_LEVELS)); + + uint32_t s[MAX_SUPPORTED_WEIGHT_LEVELS]; + for (uint32_t i = 0; i < num_levels; i++) + s[i] = (g_ise_weight_lerps[ise_range][1 + i] << 8) + i; + + std::sort(s, s + num_levels); + + for (uint32_t i = 0; i < num_levels; i++) + g_map_linear_to_astc_order[ise_range][i] = (uint8_t)(s[i] & 0xFF); + + for (uint32_t i = 0; i < num_levels; i++) + g_map_astc_to_linear_order[ise_range][g_map_linear_to_astc_order[ise_range][i]] = (uint8_t)i; + } + + //init_quantize_tables(); +} + +bool g_astc_hdr_enc_initialized; + +void astc_hdr_enc_init() +{ + if (g_astc_hdr_enc_initialized) + return; + + astc_hdr_core_init(); + + astc_helpers::init_tables(true); + + init_qlog_tables(); + + encode_astc_hdr_init(); + + g_astc_hdr_enc_initialized = true; +} + +void interpolate_qlog12_colors( + const int e[2][3], + half_float* pDecoded_half, + vec3F* pDecoded_float, + uint32_t n, uint32_t ise_weight_range) +{ + assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX)); + + for (uint32_t i = 0; i < 2; i++) + { + for (uint32_t j = 0; j < 3; j++) + { + assert(in_range(e[i][j], 0, 0xFFF)); + } + } + + for (uint32_t i = 0; i < n; i++) + { + const int c = g_ise_weight_lerps[ise_weight_range][1 + i]; + assert(c == (int)astc_helpers::dequant_bise_weight(i, ise_weight_range)); + + half_float rf, gf, bf; + + { + uint32_t r0 = e[0][0] << 4; + uint32_t r1 = e[1][0] << 4; + int ri = (r0 * (64 - c) + r1 * c + 32) / 64; + rf = astc_helpers::qlog16_to_half(ri); + } + + { + uint32_t g0 = e[0][1] << 4; + uint32_t g1 = e[1][1] << 4; + int gi = (g0 * (64 - c) + g1 * c + 32) / 64; + gf = astc_helpers::qlog16_to_half(gi); + } + + { + uint32_t b0 = e[0][2] << 4; + uint32_t b1 = e[1][2] << 4; + int bi = (b0 * (64 - c) + b1 * c + 32) / 64; + bf = astc_helpers::qlog16_to_half(bi); + } + + if (pDecoded_half) + { + pDecoded_half[i * 3 + 0] = rf; + pDecoded_half[i * 3 + 1] = gf; + pDecoded_half[i * 3 + 2] = bf; + } + + if (pDecoded_float) + { + pDecoded_float[i][0] = half_to_float(rf); + pDecoded_float[i][1] = half_to_float(gf); + pDecoded_float[i][2] = half_to_float(bf); + } + } +} + +// decoded in ASTC order, not linear order +// return false if the ISE endpoint quantization leads to non-valid endpoints being decoded +bool get_astc_hdr_mode_11_block_colors( + const uint8_t* pEndpoints, + half_float* pDecoded_half, + vec3F* pDecoded_float, + uint32_t n, uint32_t ise_weight_range, uint32_t ise_endpoint_range) +{ + assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX)); + + int e[2][3]; + if (!decode_mode11_to_qlog12(pEndpoints, e, ise_endpoint_range)) + return false; + + interpolate_qlog12_colors(e, pDecoded_half, pDecoded_float, n, ise_weight_range); + + return true; +} + +// decoded in ASTC order, not linear order +// return false if the ISE endpoint quantization leads to non-valid endpoints being decoded +bool get_astc_hdr_mode_7_block_colors( + const uint8_t* pEndpoints, + half_float* pDecoded_half, + vec3F* pDecoded_float, + uint32_t n, uint32_t ise_weight_range, uint32_t ise_endpoint_range) +{ + assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX)); + + int e[2][3]; + if (!decode_mode7_to_qlog12(pEndpoints, e, nullptr, ise_endpoint_range)) + return false; + + interpolate_qlog12_colors(e, pDecoded_half, pDecoded_float, n, ise_weight_range); + + return true; +} + +double eval_selectors_f( + uint32_t num_pixels, + uint8_t* pWeights, + const half_float* pBlock_pixels_half, + uint32_t num_weight_levels, + const half_float* pDecoded_half, + const astc_hdr_codec_base_options& coptions, + uint32_t usable_selector_bitmask) +{ + assert((num_pixels >= 1) && (num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS)); + assert(usable_selector_bitmask); + + const float R_WEIGHT = coptions.m_r_err_scale; + const float G_WEIGHT = coptions.m_g_err_scale; + + double total_error = 0; + +#ifdef _DEBUG + for (uint32_t i = 0; i < num_weight_levels; i++) + { + assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 0])); + assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 1])); + assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 2])); + } +#endif + + double decoded_half_q[MAX_SUPPORTED_WEIGHT_LEVELS][3]; + + for (uint32_t i = 0; i < num_weight_levels; i++) + { + const half_float* p = &pDecoded_half[i * 3]; + + decoded_half_q[i][0] = q(p[0], coptions.m_q_log_bias); + decoded_half_q[i][1] = q(p[1], coptions.m_q_log_bias); + decoded_half_q[i][2] = q(p[2], coptions.m_q_log_bias); + } + + for (uint32_t p = 0; p < num_pixels; p++) + { + const half_float* pDesired_half = &pBlock_pixels_half[p * 3]; + + const double desired_half_r_q = q(pDesired_half[0], coptions.m_q_log_bias); + const double desired_half_g_q = q(pDesired_half[1], coptions.m_q_log_bias); + const double desired_half_b_q = q(pDesired_half[2], coptions.m_q_log_bias); + + double lowest_e = BIG_FLOAT_VAL; + + //double dists[MAX_SUPPORTED_WEIGHT_LEVELS]; + + // this is an approximation of MSLE + for (uint32_t i = 0; i < num_weight_levels; i++) + { + if (((1 << i) & usable_selector_bitmask) == 0) + continue; + + // compute piecewise linear approximation of log2(a+eps)-log2(b+eps), for each component, then MSLE + double rd = decoded_half_q[i][0] - desired_half_r_q; + double gd = decoded_half_q[i][1] - desired_half_g_q; + double bd = decoded_half_q[i][2] - desired_half_b_q; + + double e = R_WEIGHT * (rd * rd) + G_WEIGHT * (gd * gd) + bd * bd; + + //dists[i] = e; + + if (e < lowest_e) + { + lowest_e = e; + pWeights[p] = (uint8_t)i; + } + } + + total_error += lowest_e; + + } // p + + return total_error; +} + +double eval_selectors( + uint32_t num_pixels, + uint8_t* pWeights, + uint32_t ise_weight_range, + const half_float* pBlock_pixels_half, + uint32_t num_weight_levels, + const half_float* pDecoded_half, + const astc_hdr_codec_base_options& coptions, + uint32_t usable_selector_bitmask) +{ + if ((coptions.m_r_err_scale != 2.0f) || (coptions.m_g_err_scale != 3.0f)) + { + return eval_selectors_f( + num_pixels, + pWeights, + pBlock_pixels_half, + num_weight_levels, + pDecoded_half, + coptions, + usable_selector_bitmask); + } + + assert((num_pixels >= 1) && (num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS)); + assert(usable_selector_bitmask); + + uint64_t total_error = 0; + +#ifdef _DEBUG + for (uint32_t i = 0; i < num_weight_levels; i++) + { + assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 0])); + assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 1])); + assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 2])); + } +#endif + + int64_t decoded_half_q[MAX_SUPPORTED_WEIGHT_LEVELS][3]; + + for (uint32_t i = 0; i < num_weight_levels; i++) + { + const half_float* p = &pDecoded_half[i * 3]; + + decoded_half_q[i][0] = q2(p[0], coptions.m_q_log_bias); + decoded_half_q[i][1] = q2(p[1], coptions.m_q_log_bias); + decoded_half_q[i][2] = q2(p[2], coptions.m_q_log_bias); + } + + if (usable_selector_bitmask != UINT32_MAX) + { + for (uint32_t p = 0; p < num_pixels; p++) + { + const half_float* pDesired_half = &pBlock_pixels_half[p * 3]; + + const int64_t desired_half_r_q = q2(pDesired_half[0], coptions.m_q_log_bias); + const int64_t desired_half_g_q = q2(pDesired_half[1], coptions.m_q_log_bias); + const int64_t desired_half_b_q = q2(pDesired_half[2], coptions.m_q_log_bias); + + int64_t lowest_e = INT64_MAX; + + for (uint32_t i = 0; i < num_weight_levels; i++) + { + if (((1 << i) & usable_selector_bitmask) == 0) + continue; + + int64_t rd = decoded_half_q[i][0] - desired_half_r_q; + int64_t gd = decoded_half_q[i][1] - desired_half_g_q; + int64_t bd = decoded_half_q[i][2] - desired_half_b_q; + + int64_t e = 2 * (rd * rd) + 3 * (gd * gd) + bd * bd; + + if (e < lowest_e) + { + lowest_e = e; + pWeights[p] = (uint8_t)i; + } + } + + total_error += lowest_e; + + } // p + } + else + { + if ((num_weight_levels <= 4) || (coptions.m_disable_weight_plane_optimization)) + { + for (uint32_t p = 0; p < num_pixels; p++) + { + const half_float* pDesired_half = &pBlock_pixels_half[p * 3]; + + const half_float desired_r = pDesired_half[0], desired_g = pDesired_half[1], desired_b = pDesired_half[2]; + + const int64_t desired_half_r_q = q2(desired_r, coptions.m_q_log_bias); + const int64_t desired_half_g_q = q2(desired_g, coptions.m_q_log_bias); + const int64_t desired_half_b_q = q2(desired_b, coptions.m_q_log_bias); + + int64_t lowest_e = INT64_MAX; + + uint32_t i; + for (i = 0; (i + 1) < num_weight_levels; i += 2) + { + int64_t e0, e1; + + { + int64_t rd0 = decoded_half_q[i][0] - desired_half_r_q; // 27 bits maximum with half float inputs + int64_t gd0 = decoded_half_q[i][1] - desired_half_g_q; + int64_t bd0 = decoded_half_q[i][2] - desired_half_b_q; + e0 = ((2 * (rd0 * rd0) + 3 * (gd0 * gd0) + bd0 * bd0) << 5) | i; // max 62 bits (27*2+3+5) + } + + { + int64_t rd1 = decoded_half_q[i + 1][0] - desired_half_r_q; + int64_t gd1 = decoded_half_q[i + 1][1] - desired_half_g_q; + int64_t bd1 = decoded_half_q[i + 1][2] - desired_half_b_q; + e1 = ((2 * (rd1 * rd1) + 3 * (gd1 * gd1) + bd1 * bd1) << 5) | (i + 1); + } + + lowest_e = minimum(lowest_e, e0, e1); + } + + if (i != num_weight_levels) + { + int64_t rd0 = decoded_half_q[i][0] - desired_half_r_q; + int64_t gd0 = decoded_half_q[i][1] - desired_half_g_q; + int64_t bd0 = decoded_half_q[i][2] - desired_half_b_q; + int64_t e0 = ((2 * (rd0 * rd0) + 3 * (gd0 * gd0) + bd0 * bd0) << 5) | i; + + lowest_e = minimum(lowest_e, e0); + } + + pWeights[p] = (uint8_t)(lowest_e & 31); + + total_error += (lowest_e >> 5); + + } // p + } + else + { + const auto& weight_val_to_ise_tab = astc_helpers::g_dequant_tables.get_weight_tab(ise_weight_range).m_val_to_ise; + const int lo_index = weight_val_to_ise_tab[0], hi_index = weight_val_to_ise_tab[64], mid_index = weight_val_to_ise_tab[32]; + + const vec3F low_color((float)pDecoded_half[lo_index * 3 + 0], (float)pDecoded_half[lo_index * 3 + 1], (float)pDecoded_half[lo_index * 3 + 2]); + const vec3F high_color((float)pDecoded_half[hi_index * 3 + 0], (float)pDecoded_half[hi_index * 3 + 1], (float)pDecoded_half[hi_index * 3 + 2]); + const vec3F mid_color((float)pDecoded_half[mid_index * 3 + 0], (float)pDecoded_half[mid_index * 3 + 1], (float)pDecoded_half[mid_index * 3 + 2]); + + const vec3F block_dir(high_color - low_color); + + for (uint32_t p = 0; p < num_pixels; p++) + { + const half_float* pDesired_half = &pBlock_pixels_half[p * 3]; + + const half_float desired_r = pDesired_half[0], desired_g = pDesired_half[1], desired_b = pDesired_half[2]; + + const int64_t desired_half_r_q = q2(desired_r, coptions.m_q_log_bias); + const int64_t desired_half_g_q = q2(desired_g, coptions.m_q_log_bias); + const int64_t desired_half_b_q = q2(desired_b, coptions.m_q_log_bias); + + // Determine which side of the middle plane the point is for a modest gain + vec3F c((float)desired_r - mid_color[0], (float)desired_g - mid_color[1], (float)desired_b - mid_color[2]); + float d = c.dot(block_dir); + + int i = 0, high_index = (num_weight_levels / 2) + 1; + if (d >= 0.0f) + { + i = num_weight_levels / 2; + high_index = num_weight_levels; + } + + int64_t lowest_e = INT64_MAX; + + for (; (i + 1) < high_index; i += 2) + { + int64_t e0, e1; + + { + int64_t rd0 = decoded_half_q[i][0] - desired_half_r_q; // 27 bits maximum with half float inputs + int64_t gd0 = decoded_half_q[i][1] - desired_half_g_q; + int64_t bd0 = decoded_half_q[i][2] - desired_half_b_q; + e0 = ((2 * (rd0 * rd0) + 3 * (gd0 * gd0) + bd0 * bd0) << 5) | i; // max 62 bits (27*2+3+5) + } + + { + int64_t rd1 = decoded_half_q[i + 1][0] - desired_half_r_q; + int64_t gd1 = decoded_half_q[i + 1][1] - desired_half_g_q; + int64_t bd1 = decoded_half_q[i + 1][2] - desired_half_b_q; + e1 = ((2 * (rd1 * rd1) + 3 * (gd1 * gd1) + bd1 * bd1) << 5) | (i + 1); + } + + lowest_e = minimum(lowest_e, e0, e1); + } + + if (i != high_index) + { + int64_t rd0 = decoded_half_q[i][0] - desired_half_r_q; + int64_t gd0 = decoded_half_q[i][1] - desired_half_g_q; + int64_t bd0 = decoded_half_q[i][2] - desired_half_b_q; + int64_t e0 = ((2 * (rd0 * rd0) + 3 * (gd0 * gd0) + bd0 * bd0) << 5) | i; + + lowest_e = minimum(lowest_e, e0); + } + + pWeights[p] = (uint8_t)(lowest_e & 31); + + total_error += (lowest_e >> 5); + + } // p + } + } + + return (double)total_error; +} + +//-------------------------------------------------------------------------------------------------------------------------- + +double eval_selectors_dual_plane( + uint32_t channel_index, + uint32_t num_pixels, + uint8_t* pWeights0, uint8_t* pWeights1, + const half_float* pBlock_pixels_half, + uint32_t num_weight_levels, + const half_float* pDecoded_half, + const astc_hdr_codec_base_options& coptions, + uint32_t usable_selector_bitmask) +{ + assert((num_pixels >= 1) && (num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS)); + assert(usable_selector_bitmask); + + const float R_WEIGHT = coptions.m_r_err_scale; + const float G_WEIGHT = coptions.m_g_err_scale; + + double total_error = 0; + +#ifdef _DEBUG + for (uint32_t i = 0; i < num_weight_levels; i++) + { + assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 0])); + assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 1])); + assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 2])); + } +#endif + + double decoded_half_q[MAX_SUPPORTED_WEIGHT_LEVELS][3]; + + for (uint32_t i = 0; i < num_weight_levels; i++) + { + const half_float* p = &pDecoded_half[i * 3]; + + decoded_half_q[i][0] = q(p[0], coptions.m_q_log_bias); + decoded_half_q[i][1] = q(p[1], coptions.m_q_log_bias); + decoded_half_q[i][2] = q(p[2], coptions.m_q_log_bias); + } + + const double channel_weights[3] = { R_WEIGHT, G_WEIGHT, 1.0f }; + + const uint32_t first_channel = (channel_index + 1) % 3; + const uint32_t second_channel = (channel_index + 2) % 3; + + // First plane + const double first_channel_weight = channel_weights[first_channel]; + const double second_channel_weight = channel_weights[second_channel]; + + for (uint32_t p = 0; p < num_pixels; p++) + { + const half_float* pDesired_half = &pBlock_pixels_half[p * 3]; + + const double desired_half_x_q = q(pDesired_half[first_channel], coptions.m_q_log_bias); + const double desired_half_y_q = q(pDesired_half[second_channel], coptions.m_q_log_bias); + + double lowest_e = BIG_FLOAT_VAL; + + // this is an approximation of MSLE + for (uint32_t i = 0; i < num_weight_levels; i++) + { + if (((1 << i) & usable_selector_bitmask) == 0) + continue; + + double xd = decoded_half_q[i][first_channel] - desired_half_x_q; + double yd = decoded_half_q[i][second_channel] - desired_half_y_q; + + double e = first_channel_weight * (xd * xd) + second_channel_weight * (yd * yd); + + if (e < lowest_e) + { + lowest_e = e; + pWeights0[p] = (uint8_t)i; + } + } + + total_error += lowest_e; + + } // p + + // Second plane + const double alt_channel_weight = channel_weights[channel_index]; + + for (uint32_t p = 0; p < num_pixels; p++) + { + const half_float* pDesired_half = &pBlock_pixels_half[p * 3]; + + const double desired_half_a_q = q(pDesired_half[channel_index], coptions.m_q_log_bias); + + double lowest_e = BIG_FLOAT_VAL; + + // this is an approximation of MSLE + for (uint32_t i = 0; i < num_weight_levels; i++) + { + if (((1 << i) & usable_selector_bitmask) == 0) + continue; + + double ad = decoded_half_q[i][channel_index] - desired_half_a_q; + + double e = alt_channel_weight * (ad * ad); + + if (e < lowest_e) + { + lowest_e = e; + pWeights1[p] = (uint8_t)i; + } + } + + total_error += lowest_e; + + } // p + + return total_error; +} + +//-------------------------------------------------------------------------------------------------------------------------- + +double compute_block_error(uint32_t num_pixels, const half_float* pOrig_block, const half_float* pPacked_block, const astc_hdr_codec_base_options& coptions) +{ + const float R_WEIGHT = coptions.m_r_err_scale; + const float G_WEIGHT = coptions.m_g_err_scale; + + double total_error = 0; + + for (uint32_t p = 0; p < num_pixels; p++) + { + double rd = q(pOrig_block[p * 3 + 0], coptions.m_q_log_bias) - q(pPacked_block[p * 3 + 0], coptions.m_q_log_bias); + double gd = q(pOrig_block[p * 3 + 1], coptions.m_q_log_bias) - q(pPacked_block[p * 3 + 1], coptions.m_q_log_bias); + double bd = q(pOrig_block[p * 3 + 2], coptions.m_q_log_bias) - q(pPacked_block[p * 3 + 2], coptions.m_q_log_bias); + + double e = R_WEIGHT * (rd * rd) + G_WEIGHT * (gd * gd) + bd * bd; + + total_error += e; + } + + return total_error; +} + +//-------------------------------------------------------------------------------------------------------------------------- + +double compute_block_error_from_raw_weights( + uint32_t num_pixels, const basist::half_float pBlock_pixels_half[][3], + const uint8_t* pRaw_weights, + int endpoints_qlog12[2][3], + const astc_hdr_codec_base_options& coptions) +{ + // qlog12->qlog16 + int trial_e[2][3]; + for (uint32_t i = 0; i < 3; i++) + { + assert(endpoints_qlog12[0][i] <= (int)basist::MAX_QLOG12); + assert(endpoints_qlog12[1][i] <= (int)basist::MAX_QLOG12); + + trial_e[0][i] = endpoints_qlog12[0][i] << 4; + trial_e[1][i] = endpoints_qlog12[1][i] << 4; + } + + const float R_WEIGHT = coptions.m_r_err_scale, G_WEIGHT = coptions.m_g_err_scale; + + double trial_error = 0; + for (uint32_t p = 0; p < num_pixels; p++) + { + const half_float* pDesired_half = &pBlock_pixels_half[p][0]; + + const double desired_half_r_q = q(pDesired_half[0], coptions.m_q_log_bias), desired_half_g_q = q(pDesired_half[1], coptions.m_q_log_bias), desired_half_b_q = q(pDesired_half[2], coptions.m_q_log_bias); + + const uint32_t c = pRaw_weights[p]; + assert(c <= 64); + + { + half_float rf, gf, bf; + { + uint32_t r0 = trial_e[0][0], r1 = trial_e[1][0]; + int ri = (r0 * (64 - c) + r1 * c + 32) / 64; + rf = astc_helpers::qlog16_to_half(ri); + } + { + uint32_t g0 = trial_e[0][1], g1 = trial_e[1][1]; + int gi = (g0 * (64 - c) + g1 * c + 32) / 64; + gf = astc_helpers::qlog16_to_half(gi); + } + { + uint32_t b0 = trial_e[0][2], b1 = trial_e[1][2]; + int bi = (b0 * (64 - c) + b1 * c + 32) / 64; + bf = astc_helpers::qlog16_to_half(bi); + } + + const double decoded_half_q0 = q(rf, coptions.m_q_log_bias), decoded_half_q1 = q(gf, coptions.m_q_log_bias), decoded_half_q2 = q(bf, coptions.m_q_log_bias); + const double rd = decoded_half_q0 - desired_half_r_q, gd = decoded_half_q1 - desired_half_g_q, bd = decoded_half_q2 - desired_half_b_q; + trial_error += R_WEIGHT * (rd * rd) + G_WEIGHT * (gd * gd) + bd * bd; + } + } + + return trial_error; +} + +//-------------------------------------------------------------------------------------------------------------------------- + +static inline int compute_clamped_val(int v, int l, int h, bool& did_clamp, int& max_clamp_mag) +{ + assert(l < h); + + if (v < l) + { + max_clamp_mag = basisu::maximum(max_clamp_mag, l - v); + + v = l; + did_clamp = true; + } + else if (v > h) + { + max_clamp_mag = basisu::maximum(max_clamp_mag, v - h); + + v = h; + did_clamp = true; + } + + return v; +} + +//-------------------------------------------------------------------------------------------------------------------------- + +const uint8_t s_b_bits[8] = { 7, 8, 6, 7, 8, 6, 7, 6 }; +const uint8_t s_c_bits[8] = { 6, 6, 7, 7, 6, 7, 7, 7 }; +const uint8_t s_d_bits[8] = { 7, 6, 7, 6, 5, 6, 5, 6 }; + +// val_q[] must be already packed to qlog9-qlog12. +bool pack_astc_mode11_submode(uint32_t submode, uint8_t* pEndpoints, int val_q[2][3], int& max_clamp_mag, bool early_out_if_clamped, int max_clamp_mag_accept_thresh) +{ + assert(submode <= 7); + + const uint32_t a_bits = 9 + (submode >> 1); + const uint32_t b_bits = s_b_bits[submode]; + const uint32_t c_bits = s_c_bits[submode]; + const uint32_t d_bits = s_d_bits[submode]; + + const int max_a_val = (1 << a_bits) - 1; + const int max_b_val = (1 << b_bits) - 1; + const int max_c_val = (1 << c_bits) - 1; + + // The maximum usable value before it turns to NaN/Inf + const int max_a_qlog = get_max_qlog(a_bits); + BASISU_NOTE_UNUSED(max_a_qlog); + + const int min_d_val = -(1 << (d_bits - 1)); + const int max_d_val = -min_d_val - 1; + assert((max_d_val - min_d_val + 1) == (1 << d_bits)); + + int highest_q = -1, highest_val = 0, highest_comp = 0; + + for (uint32_t c = 0; c < 3; c++) + { + assert(val_q[0][c] <= max_a_qlog); + assert(val_q[1][c] <= max_a_qlog); + } + + for (uint32_t v = 0; v < 2; v++) + { + for (uint32_t c = 0; c < 3; c++) + { + assert(val_q[v][c] >= 0 && val_q[v][c] <= max_a_val); + + if (val_q[v][c] > highest_q) + { + highest_q = val_q[v][c]; + highest_val = v; + highest_comp = c; + } + } + } + + const bool had_tie = (val_q[highest_val ^ 1][highest_comp] == highest_q); + + if (highest_val != 1) + { + for (uint32_t c = 0; c < 3; c++) + { + std::swap(val_q[0][c], val_q[1][c]); + } + } + + if (highest_comp) + { + std::swap(val_q[0][0], val_q[0][highest_comp]); + std::swap(val_q[1][0], val_q[1][highest_comp]); + } + + int orig_q[2][3]; + memcpy(orig_q, val_q, sizeof(int) * 6); + + // val[1][0] is now guaranteed to be highest + int best_va = 0, best_vb0 = 0, best_vb1 = 0, best_vc = 0, best_vd0 = 0, best_vd1 = 0; + int best_max_clamp_mag = 0; + bool best_did_clamp = false; + int best_q[2][3] = { { 0, 0, 0}, { 0, 0, 0 } }; + BASISU_NOTE_UNUSED(best_q); + uint32_t best_dist = UINT_MAX; + + for (uint32_t pass = 0; pass < 2; pass++) + { + int trial_va = val_q[1][0]; + + assert(trial_va <= max_a_val); + assert(trial_va >= val_q[1][1]); + assert(trial_va >= val_q[1][2]); + + assert(trial_va >= val_q[0][0]); + assert(trial_va >= val_q[0][1]); + assert(trial_va >= val_q[0][2]); + + bool did_clamp = false; + int trial_max_clamp_mag = 0; + + int trial_vb0 = compute_clamped_val(trial_va - val_q[1][1], 0, max_b_val, did_clamp, trial_max_clamp_mag); + int trial_vb1 = compute_clamped_val(trial_va - val_q[1][2], 0, max_b_val, did_clamp, trial_max_clamp_mag); + int trial_vc = compute_clamped_val(trial_va - val_q[0][0], 0, max_c_val, did_clamp, trial_max_clamp_mag); + int trial_vd0 = compute_clamped_val((trial_va - trial_vb0 - trial_vc) - val_q[0][1], min_d_val, max_d_val, did_clamp, trial_max_clamp_mag); + int trial_vd1 = compute_clamped_val((trial_va - trial_vb1 - trial_vc) - val_q[0][2], min_d_val, max_d_val, did_clamp, trial_max_clamp_mag); + + if ((early_out_if_clamped) && (did_clamp) && (trial_max_clamp_mag > max_clamp_mag_accept_thresh)) + { + if ((!had_tie) || (pass == 1)) + { + max_clamp_mag = trial_max_clamp_mag; + return true; + } + } + + if (!did_clamp) + { + // Make sure decoder gets the expected values + assert(trial_va == val_q[1][0]); + assert(trial_va - trial_vb0 == val_q[1][1]); + assert(trial_va - trial_vb1 == val_q[1][2]); + + assert((trial_va - trial_vc) == val_q[0][0]); + assert((trial_va - trial_vb0 - trial_vc - trial_vd0) == val_q[0][1]); + assert((trial_va - trial_vb1 - trial_vc - trial_vd1) == val_q[0][2]); + } + + const int r_e0 = clamp(trial_va, 0, max_a_val); + const int r_e1 = clamp(trial_va - trial_vb0, 0, max_a_val); + const int r_e2 = clamp(trial_va - trial_vb1, 0, max_a_val); + + const int r_f0 = clamp(trial_va - trial_vc, 0, max_a_val); + const int r_f1 = clamp(trial_va - trial_vb0 - trial_vc - trial_vd0, 0, max_a_val); + const int r_f2 = clamp(trial_va - trial_vb1 - trial_vc - trial_vd1, 0, max_a_val); + + assert(r_e0 <= max_a_qlog); + assert(r_e1 <= max_a_qlog); + assert(r_e2 <= max_a_qlog); + + assert(r_f0 <= max_a_qlog); + assert(r_f1 <= max_a_qlog); + assert(r_f2 <= max_a_qlog); + + if ((!did_clamp) || (!had_tie)) + { + best_va = trial_va; + best_vb0 = trial_vb0; + best_vb1 = trial_vb1; + best_vc = trial_vc; + best_vd0 = trial_vd0; + best_vd1 = trial_vd1; + best_max_clamp_mag = trial_max_clamp_mag; + best_did_clamp = did_clamp; + + best_q[1][0] = r_e0; + best_q[1][1] = r_e1; + best_q[1][2] = r_e2; + best_q[0][0] = r_f0; + best_q[0][1] = r_f1; + best_q[0][2] = r_f2; + break; + } + + // we had a tie and it did clamp, try swapping L/H for a potential slight gain + + const uint32_t r_dist1 = basisu::square(r_e0 - val_q[1][0]) + basisu::square(r_e1 - val_q[1][1]) + basisu::square(r_e2 - val_q[1][2]); + const uint32_t r_dist0 = basisu::square(r_f0 - val_q[0][0]) + basisu::square(r_f1 - val_q[0][1]) + basisu::square(r_f2 - val_q[0][2]); + + const uint32_t total_dist = r_dist1 + r_dist0; + + if (total_dist < best_dist) + { + best_dist = total_dist; + + best_va = trial_va; + best_vb0 = trial_vb0; + best_vb1 = trial_vb1; + best_vc = trial_vc; + best_vd0 = trial_vd0; + best_vd1 = trial_vd1; + best_did_clamp = did_clamp; + + best_q[1][0] = r_e0; + best_q[1][1] = r_e1; + best_q[1][2] = r_e2; + best_q[0][0] = r_f0; + best_q[0][1] = r_f1; + best_q[0][2] = r_f2; + } + + for (uint32_t c = 0; c < 3; c++) + std::swap(val_q[0][c], val_q[1][c]); + } + + // pack bits now + int v0 = 0, v1 = 0, v2 = 0, v3 = 0, v4 = 0, v5 = 0; + + int x0 = 0, x1 = 0, x2 = 0, x3 = 0, x4 = 0, x5 = 0; + switch (submode) + { + case 0: + x0 = get_bit(best_vb0, 6); x1 = get_bit(best_vb1, 6); x2 = get_bit(best_vd0, 6); x3 = get_bit(best_vd1, 6); x4 = get_bit(best_vd0, 5); x5 = get_bit(best_vd1, 5); + break; + case 1: + x0 = get_bit(best_vb0, 6); x1 = get_bit(best_vb1, 6); x2 = get_bit(best_vb0, 7); x3 = get_bit(best_vb1, 7); x4 = get_bit(best_vd0, 5); x5 = get_bit(best_vd1, 5); + break; + case 2: + x0 = get_bit(best_va, 9); x1 = get_bit(best_vc, 6); x2 = get_bit(best_vd0, 6); x3 = get_bit(best_vd1, 6); x4 = get_bit(best_vd0, 5); x5 = get_bit(best_vd1, 5); + break; + case 3: + x0 = get_bit(best_vb0, 6); x1 = get_bit(best_vb1, 6); x2 = get_bit(best_va, 9); x3 = get_bit(best_vc, 6); x4 = get_bit(best_vd0, 5); x5 = get_bit(best_vd1, 5); + break; + case 4: + x0 = get_bit(best_vb0, 6); x1 = get_bit(best_vb1, 6); x2 = get_bit(best_vb0, 7); x3 = get_bit(best_vb1, 7); x4 = get_bit(best_va, 9); x5 = get_bit(best_va, 10); + break; + case 5: + x0 = get_bit(best_va, 9); x1 = get_bit(best_va, 10); x2 = get_bit(best_vc, 7); x3 = get_bit(best_vc, 6); x4 = get_bit(best_vd0, 5); x5 = get_bit(best_vd1, 5); + break; + case 6: + x0 = get_bit(best_vb0, 6); x1 = get_bit(best_vb1, 6); x2 = get_bit(best_va, 11); x3 = get_bit(best_vc, 6); x4 = get_bit(best_va, 9); x5 = get_bit(best_va, 10); + break; + case 7: + x0 = get_bit(best_va, 9); x1 = get_bit(best_va, 10); x2 = get_bit(best_va, 11); x3 = get_bit(best_vc, 6); x4 = get_bit(best_vd0, 5); x5 = get_bit(best_vd1, 5); + break; + default: + break; + } + + // write mode + pack_bit(v1, 7, submode, 0); + pack_bit(v2, 7, submode, 1); + pack_bit(v3, 7, submode, 2); + + // highest component + pack_bit(v4, 7, highest_comp, 0); + pack_bit(v5, 7, highest_comp, 1); + + // write bit 8 of va + pack_bit(v1, 6, best_va, 8); + + // extra bits + pack_bit(v2, 6, x0); + pack_bit(v3, 6, x1); + pack_bit(v4, 6, x2); + pack_bit(v5, 6, x3); + pack_bit(v4, 5, x4); + pack_bit(v5, 5, x5); + + v0 = best_va & 0xFF; + v1 |= (best_vc & 63); + v2 |= (best_vb0 & 63); + v3 |= (best_vb1 & 63); + v4 |= (best_vd0 & 31); + v5 |= (best_vd1 & 31); + + assert(in_range(v0, 0, 255) && in_range(v1, 0, 255) && in_range(v2, 0, 255) && in_range(v3, 0, 255) && in_range(v4, 0, 255) && in_range(v5, 0, 255)); + + pEndpoints[0] = (uint8_t)v0; + pEndpoints[1] = (uint8_t)v1; + pEndpoints[2] = (uint8_t)v2; + pEndpoints[3] = (uint8_t)v3; + pEndpoints[4] = (uint8_t)v4; + pEndpoints[5] = (uint8_t)v5; + +#ifdef _DEBUG + // Test for valid pack by unpacking + { + if (highest_comp) + { + std::swap(best_q[0][0], best_q[0][highest_comp]); + std::swap(best_q[1][0], best_q[1][highest_comp]); + + std::swap(orig_q[0][0], orig_q[0][highest_comp]); + std::swap(orig_q[1][0], orig_q[1][highest_comp]); + } + + int test_e[2][3]; + decode_mode11_to_qlog12(pEndpoints, test_e, astc_helpers::BISE_256_LEVELS); + for (uint32_t i = 0; i < 2; i++) + { + for (uint32_t j = 0; j < 3; j++) + { + assert(best_q[i][j] == test_e[i][j] >> (12 - a_bits)); + + if (!best_did_clamp) + { + assert((orig_q[i][j] == test_e[i][j] >> (12 - a_bits)) || + (orig_q[1 - i][j] == test_e[i][j] >> (12 - a_bits))); + } + } + } + } +#endif + + max_clamp_mag = best_max_clamp_mag; + + return best_did_clamp; +} + +bool pack_astc_mode11_submode(uint32_t submode, uint8_t* pEndpoints, const vec3F& low_q16, const vec3F& high_q16, int& max_clamp_mag, bool early_out_if_clamped, int max_clamp_mag_accept_thresh) +{ + assert(submode <= 7); + + const uint32_t a_bits = 9 + (submode >> 1); + const int max_a_val = (1 << a_bits) - 1; + + // The maximum usable value before it turns to NaN/Inf + const int max_a_qlog = get_max_qlog(a_bits); + + int val_q[2][3]; + + for (uint32_t c = 0; c < 3; c++) + { +#if 0 + // This is very slightly better, but ~8% slower likely due to the table lookups. + const half_float l = astc_helpers::qlog16_to_half((uint32_t)std::round(low_q16[c])); + val_q[0][c] = half_to_qlog7_12(l, a_bits); + + const half_float h = astc_helpers::qlog16_to_half((uint32_t)std::round(high_q16[c])); + val_q[1][c] = half_to_qlog7_12(h, a_bits); +#else + // TODO: Tune quant_qlog16() for higher precision. + val_q[0][c] = quant_qlog16((uint32_t)std::round(low_q16[c]), a_bits); + val_q[1][c] = quant_qlog16((uint32_t)std::round(high_q16[c]), a_bits); +#endif + +#if 1 + if (val_q[0][c] == val_q[1][c]) + { +#if 0 + if (l <= h) +#else + if (low_q16[c] < high_q16[c]) +#endif + { + if (val_q[0][c]) + val_q[0][c]--; + + if (val_q[1][c] != max_a_val) + val_q[1][c]++; + } + else + { + if (val_q[0][c] != max_a_val) + val_q[0][c]++; + + if (val_q[1][c]) + val_q[1][c]--; + } + } +#endif + + val_q[0][c] = minimum(val_q[0][c], max_a_qlog); + val_q[1][c] = minimum(val_q[1][c], max_a_qlog); + } + + return pack_astc_mode11_submode(submode, pEndpoints, val_q, max_clamp_mag, early_out_if_clamped, max_clamp_mag_accept_thresh); +} + +//-------------------------------------------------------------------------------------------------------------------------- + +void pack_astc_mode11_direct(uint8_t* pEndpoints, vec3F l_q16, vec3F h_q16) +{ + float lg = l_q16.dot(vec3F(1.0f)), hg = h_q16.dot(vec3F(1.0f)); + if (lg > hg) + { + // Ensure low endpoint is generally less bright than high in direct mode. + std::swap(l_q16, h_q16); + } + + for (uint32_t i = 0; i < 3; i++) + { + // TODO: This goes from QLOG16->HALF->QLOG8/7 + half_float l_half = astc_helpers::qlog16_to_half(clamp((int)std::round(l_q16[i]), 0, 65535)); + half_float h_half = astc_helpers::qlog16_to_half(clamp((int)std::round(h_q16[i]), 0, 65535)); + + int l_q, h_q; + + if (i == 2) + { + l_q = g_half_to_qlog7[bounds_check((uint32_t)l_half, 0U, 32768U)]; + h_q = g_half_to_qlog7[bounds_check((uint32_t)h_half, 0U, 32768U)]; + + l_q = minimum(l_q, MAX_QLOG7); + h_q = minimum(h_q, MAX_QLOG7); + } + else + { + l_q = g_half_to_qlog8[bounds_check((uint32_t)l_half, 0U, 32768U)]; + h_q = g_half_to_qlog8[bounds_check((uint32_t)h_half, 0U, 32768U)]; + + // this quantizes R and G as 7 bits vs. 8, for grayscale. + //l_q = g_half_to_qlog7[bounds_check((uint32_t)l_half, 0U, 32768U)] << 1; + //h_q = g_half_to_qlog7[bounds_check((uint32_t)h_half, 0U, 32768U)] << 1; + + l_q = minimum(l_q, MAX_QLOG8); + h_q = minimum(h_q, MAX_QLOG8); + } + +#if 1 + if (l_q == h_q) + { + const int m = (i == 2) ? MAX_QLOG7 : MAX_QLOG8; + + if (l_q16[i] <= h_q16[i]) + { + if (l_q) + l_q--; + + if (h_q != m) + h_q++; + } + else + { + if (h_q) + h_q--; + + if (l_q != m) + l_q++; + } + } +#endif + + if (i == 2) + { + assert(l_q <= (int)MAX_QLOG7 && h_q <= (int)MAX_QLOG7); + l_q |= 128; + h_q |= 128; + } + else + { + assert(l_q <= (int)MAX_QLOG8 && h_q <= (int)MAX_QLOG8); + } + + pEndpoints[2 * i + 0] = (uint8_t)l_q; + pEndpoints[2 * i + 1] = (uint8_t)h_q; + } +} + +//-------------------------------------------------------------------------------------------------------------------------- + +bool pack_astc_mode7_submode(uint32_t submode, uint8_t* pEndpoints, const vec3F& rgb_q16, float s_q16, int& max_clamp_mag, uint32_t ise_weight_range, bool early_out_if_clamped, int max_clamp_mag_accept_thresh) +{ + assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX)); + + assert(submode <= 5); + max_clamp_mag = 0; + + static const uint8_t s_r_bits[6] = { 11, 11, 10, 9, 8, 7 }; + static const uint8_t s_g_b_bits[6] = { 5, 6, 5, 6, 7, 7 }; + static const uint8_t s_s_bits[6] = { 7, 5, 8, 7, 6, 7 }; + + // The precision of the components + const uint32_t prec_bits = s_r_bits[submode]; + + int qlog[4], pack_bits[4]; + + for (uint32_t i = 0; i < 4; i++) + { + const float f = (i == 3) ? s_q16 : rgb_q16[i]; + + // The # of bits the component is packed into + if (i == 0) + pack_bits[i] = s_r_bits[submode]; + else if (i == 3) + pack_bits[i] = s_s_bits[submode]; + else + pack_bits[i] = s_g_b_bits[submode]; + +#if 0 + // this is slightly worse + // TODO: going from qlog16 to half loses some precision. Then going from half to qlog 7-12 will have extra error. + half_float h = qlog_to_half(clamp((int)std::round(f), 0, MAX_QLOG16), 16); + qlog[i] = half_to_qlog7_12((half_float)bounds_check((uint32_t)h, 0U, 32768U), prec_bits); +#else + qlog[i] = quant_qlog16(clamp((int)std::round(f), 0, MAX_QLOG16), prec_bits); + + // Only bias if there are enough texel weights, 4=6 weights + if (ise_weight_range >= 4) + { + // Explictly bias the high color, and the scale up, to better exploit the weights. + // The quantized range also then encompases the complete input range. + const uint32_t max_val = (1 << prec_bits) - 1; + const uint32_t K = 3; + if (i == 3) + { + qlog[i] = minimum(qlog[i] + K * 2, max_val); + } + else + { + qlog[i] = minimum(qlog[i] + K, max_val); + } + } +#endif + + if (i != 3) + qlog[i] = minimum(qlog[i], get_max_qlog(prec_bits)); + + // If S=0, we lose freedom for the texel weights to add any value. + if ((i == 3) && (qlog[i] == 0)) + qlog[i] = 1; + } + + uint32_t maj_index = 0; + + bool did_clamp = false; + + if (submode != 5) + { + int largest_qlog = 0; + for (uint32_t i = 0; i < 3; i++) + { + if (qlog[i] > largest_qlog) + { + largest_qlog = qlog[i]; + maj_index = i; + } + } + + if (maj_index) + { + std::swap(qlog[0], qlog[maj_index]); + } + + assert(qlog[0] >= qlog[1]); + assert(qlog[0] >= qlog[2]); + + qlog[1] = qlog[0] - qlog[1]; + qlog[2] = qlog[0] - qlog[2]; + + for (uint32_t i = 1; i < 4; i++) + { + const int max_val = (1 << pack_bits[i]) - 1; + + if (qlog[i] > max_val) + { + max_clamp_mag = maximum(max_clamp_mag, qlog[i] - max_val); + qlog[i] = max_val; + did_clamp = true; + + if ((early_out_if_clamped) && (max_clamp_mag > max_clamp_mag_accept_thresh)) + return true; + } + } + } + + for (uint32_t i = 0; i < 4; i++) + { + const int max_val = (1 << pack_bits[i]) - 1; (void)max_val; + + assert(qlog[i] <= max_val); + } + + int mode = 0; + + int r = qlog[0] & 63; // 6-bits + int g = qlog[1] & 31; // 5-bits + int b = qlog[2] & 31; // 5-bits + int s = qlog[3] & 31; // 5-bits + + int x0 = 0, x1 = 0, x2 = 0, x3 = 0, x4 = 0, x5 = 0, x6 = 0; + + switch (submode) + { + case 0: + { + mode = (maj_index << 2) | 0; + assert((mode & 0xC) != 0xC); + + x0 = get_bit(qlog[0], 9); // R9 + x1 = get_bit(qlog[0], 8); // R8 + x2 = get_bit(qlog[0], 7); // R7 + x3 = get_bit(qlog[0], 10); // R10 + x4 = get_bit(qlog[0], 6); // R6 + x5 = get_bit(qlog[3], 6); // S6 + x6 = get_bit(qlog[3], 5); // S5 + break; + } + case 1: + { + mode = (maj_index << 2) | 1; + assert((mode & 0xC) != 0xC); + + x0 = get_bit(qlog[0], 8); // R8 + x1 = get_bit(qlog[1], 5); // G5 + x2 = get_bit(qlog[0], 7); // R7 + x3 = get_bit(qlog[2], 5); // B5 + x4 = get_bit(qlog[0], 6); // R6 + x5 = get_bit(qlog[0], 10); // R10 + x6 = get_bit(qlog[0], 9); // R9 + break; + } + case 2: + { + mode = (maj_index << 2) | 2; + assert((mode & 0xC) != 0xC); + + x0 = get_bit(qlog[0], 9); // R9 + x1 = get_bit(qlog[0], 8); // R8 + x2 = get_bit(qlog[0], 7); // R7 + x3 = get_bit(qlog[0], 6); // R6 + x4 = get_bit(qlog[3], 7); // S7 + x5 = get_bit(qlog[3], 6); // S6 + x6 = get_bit(qlog[3], 5); // S5 + break; + } + case 3: + { + mode = (maj_index << 2) | 3; + assert((mode & 0xC) != 0xC); + + x0 = get_bit(qlog[0], 8); // R8 + x1 = get_bit(qlog[1], 5); // G5 + x2 = get_bit(qlog[0], 7); // R7 + x3 = get_bit(qlog[2], 5); // B5 + x4 = get_bit(qlog[0], 6); // R6 + x5 = get_bit(qlog[3], 6); // S6 + x6 = get_bit(qlog[3], 5); // S5 + break; + } + case 4: + { + mode = maj_index | 0xC; // 0b1100 + assert((mode & 0xC) == 0xC); + assert(mode != 0xF); + + x0 = get_bit(qlog[1], 6); // G6 + x1 = get_bit(qlog[1], 5); // G5 + x2 = get_bit(qlog[2], 6); // B6 + x3 = get_bit(qlog[2], 5); // B5 + x4 = get_bit(qlog[0], 6); // R6 + x5 = get_bit(qlog[0], 7); // R7 + x6 = get_bit(qlog[3], 5); // S5 + break; + } + case 5: + { + mode = 0xF; + + x0 = get_bit(qlog[1], 6); // G6 + x1 = get_bit(qlog[1], 5); // G5 + x2 = get_bit(qlog[2], 6); // B6 + x3 = get_bit(qlog[2], 5); // B5 + x4 = get_bit(qlog[0], 6); // R6 + x5 = get_bit(qlog[3], 6); // S6 + x6 = get_bit(qlog[3], 5); // S5 + break; + } + default: + { + assert(0); + break; + } + } + + pEndpoints[0] = (uint8_t)((get_bit(mode, 1) << 7) | (get_bit(mode, 0) << 6) | r); + pEndpoints[1] = (uint8_t)((get_bit(mode, 2) << 7) | (x0 << 6) | (x1 << 5) | g); + pEndpoints[2] = (uint8_t)((get_bit(mode, 3) << 7) | (x2 << 6) | (x3 << 5) | b); + pEndpoints[3] = (uint8_t)((x4 << 7) | (x5 << 6) | (x6 << 5) | s); + +#ifdef _DEBUG + // Test for valid pack by unpacking + { + const int inv_shift = 12 - prec_bits; + + int unpacked_e[2][3]; + if (submode != 5) + { + unpacked_e[1][0] = left_shift32(qlog[0], inv_shift); + unpacked_e[1][1] = clamp(left_shift32((qlog[0] - qlog[1]), inv_shift), 0, 0xFFF); + unpacked_e[1][2] = clamp(left_shift32((qlog[0] - qlog[2]), inv_shift), 0, 0xFFF); + + unpacked_e[0][0] = clamp(left_shift32((qlog[0] - qlog[3]), inv_shift), 0, 0xFFF); + unpacked_e[0][1] = clamp(left_shift32(((qlog[0] - qlog[1]) - qlog[3]), inv_shift), 0, 0xFFF); + unpacked_e[0][2] = clamp(left_shift32(((qlog[0] - qlog[2]) - qlog[3]), inv_shift), 0, 0xFFF); + } + else + { + unpacked_e[1][0] = left_shift32(qlog[0], inv_shift); + unpacked_e[1][1] = left_shift32(qlog[1], inv_shift); + unpacked_e[1][2] = left_shift32(qlog[2], inv_shift); + + unpacked_e[0][0] = clamp(left_shift32((qlog[0] - qlog[3]), inv_shift), 0, 0xFFF); + unpacked_e[0][1] = clamp(left_shift32((qlog[1] - qlog[3]), inv_shift), 0, 0xFFF); + unpacked_e[0][2] = clamp(left_shift32((qlog[2] - qlog[3]), inv_shift), 0, 0xFFF); + } + + if (maj_index) + { + std::swap(unpacked_e[0][0], unpacked_e[0][maj_index]); + std::swap(unpacked_e[1][0], unpacked_e[1][maj_index]); + } + + int e[2][3]; + decode_mode7_to_qlog12_ise20(pEndpoints, e, nullptr); + + for (uint32_t i = 0; i < 3; i++) + { + assert(unpacked_e[0][i] == e[0][i]); + assert(unpacked_e[1][i] == e[1][i]); + } + } +#endif + + return did_clamp; +} + +//-------------------------------------------------------------------------------------------------------------------------- + +bool pack_mode11(mode11_log_desc& desc, uint8_t* pEndpoints) +{ + memset(pEndpoints, 0, NUM_MODE11_ENDPOINTS); + + if (desc.is_direct()) + { + if ((desc.m_a < 0) || (desc.m_c < 0) || (desc.m_b0 < 0)) + return false; + + if (!((desc.m_a <= 255) && (desc.m_c <= 255) && (desc.m_b0 <= 127))) + return false; + + pEndpoints[0] = (uint8_t)desc.m_a; + pEndpoints[2] = (uint8_t)desc.m_c; + pEndpoints[4] = (uint8_t)desc.m_b0 | 128; + + if ((desc.m_b1 < 0) || (desc.m_d0 < 0) || (desc.m_d1 < 0)) + return false; + + if (!((desc.m_b1 <= 255) && (desc.m_d0 <= 255) && (desc.m_d1 <= 127))) + return false; + + pEndpoints[1] = (uint8_t)desc.m_b1; + pEndpoints[3] = (uint8_t)desc.m_d0; + pEndpoints[5] = (uint8_t)desc.m_d1 | 128; + + return true; + } + + if (!((desc.m_a >= 0) && (desc.m_a <= desc.m_max_a_val))) + return false; + if (!(((desc.m_c >= 0) && (desc.m_c <= desc.m_max_c_val)))) + return false; + if (!((desc.m_b0 >= 0) && (desc.m_b0 <= desc.m_max_b_val))) + return false; + if (!((desc.m_b1 >= 0) && (desc.m_b1 <= desc.m_max_b_val))) + return false; + if (!((desc.m_d0 >= desc.m_min_d_val) && (desc.m_d0 <= desc.m_max_d_val))) + return false; + if (!((desc.m_d1 >= desc.m_min_d_val) && (desc.m_d1 <= desc.m_max_d_val))) + return false; + + const int va = desc.m_a, vb0 = desc.m_b0, vb1 = desc.m_b1, vc = desc.m_c, vd0 = desc.m_d0, vd1 = desc.m_d1; + + int v0 = 0, v1 = 0, v2 = 0, v3 = 0, v4 = 0, v5 = 0; + + int x0 = 0, x1 = 0, x2 = 0, x3 = 0, x4 = 0, x5 = 0; + switch (desc.m_submode) + { + case 0: + x0 = get_bit(vb0, 6); x1 = get_bit(vb1, 6); x2 = get_bit(vd0, 6); x3 = get_bit(vd1, 6); x4 = get_bit(vd0, 5); x5 = get_bit(vd1, 5); + break; + case 1: + x0 = get_bit(vb0, 6); x1 = get_bit(vb1, 6); x2 = get_bit(vb0, 7); x3 = get_bit(vb1, 7); x4 = get_bit(vd0, 5); x5 = get_bit(vd1, 5); + break; + case 2: + x0 = get_bit(va, 9); x1 = get_bit(vc, 6); x2 = get_bit(vd0, 6); x3 = get_bit(vd1, 6); x4 = get_bit(vd0, 5); x5 = get_bit(vd1, 5); + break; + case 3: + x0 = get_bit(vb0, 6); x1 = get_bit(vb1, 6); x2 = get_bit(va, 9); x3 = get_bit(vc, 6); x4 = get_bit(vd0, 5); x5 = get_bit(vd1, 5); + break; + case 4: + x0 = get_bit(vb0, 6); x1 = get_bit(vb1, 6); x2 = get_bit(vb0, 7); x3 = get_bit(vb1, 7); x4 = get_bit(va, 9); x5 = get_bit(va, 10); + break; + case 5: + x0 = get_bit(va, 9); x1 = get_bit(va, 10); x2 = get_bit(vc, 7); x3 = get_bit(vc, 6); x4 = get_bit(vd0, 5); x5 = get_bit(vd1, 5); + break; + case 6: + x0 = get_bit(vb0, 6); x1 = get_bit(vb1, 6); x2 = get_bit(va, 11); x3 = get_bit(vc, 6); x4 = get_bit(va, 9); x5 = get_bit(va, 10); + break; + case 7: + x0 = get_bit(va, 9); x1 = get_bit(va, 10); x2 = get_bit(va, 11); x3 = get_bit(vc, 6); x4 = get_bit(vd0, 5); x5 = get_bit(vd1, 5); + break; + default: + break; + } + + // write mode + pack_bit(v1, 7, desc.m_submode, 0); + pack_bit(v2, 7, desc.m_submode, 1); + pack_bit(v3, 7, desc.m_submode, 2); + + // highest component + pack_bit(v4, 7, desc.m_maj_comp, 0); + pack_bit(v5, 7, desc.m_maj_comp, 1); + + // write bit 8 of va + pack_bit(v1, 6, va, 8); + + // extra bits + pack_bit(v2, 6, x0); + pack_bit(v3, 6, x1); + pack_bit(v4, 6, x2); + pack_bit(v5, 6, x3); + pack_bit(v4, 5, x4); + pack_bit(v5, 5, x5); + + v0 = va & 0xFF; + v1 |= (vc & 63); + v2 |= (vb0 & 63); + v3 |= (vb1 & 63); + v4 |= (vd0 & 31); + v5 |= (vd1 & 31); + + assert(in_range(v0, 0, 255) && in_range(v1, 0, 255) && in_range(v2, 0, 255) && in_range(v3, 0, 255) && in_range(v4, 0, 255) && in_range(v5, 0, 255)); + + pEndpoints[0] = (uint8_t)v0; + pEndpoints[1] = (uint8_t)v1; + pEndpoints[2] = (uint8_t)v2; + pEndpoints[3] = (uint8_t)v3; + pEndpoints[4] = (uint8_t)v4; + pEndpoints[5] = (uint8_t)v5; + + return true; +} + +static inline int astc_hdr_sign_extend(int src, int num_src_bits) +{ + assert(basisu::in_range(num_src_bits, 2, 31)); + + const bool negative = (src & (1 << (num_src_bits - 1))) != 0; + if (negative) + return src | ~((1 << num_src_bits) - 1); + else + return src & ((1 << num_src_bits) - 1); +} + +void unpack_mode11(const uint8_t* pEndpoints, mode11_log_desc& desc) +{ + clear_obj(desc); + + pack_bit(desc.m_maj_comp, 0, pEndpoints[4], 7); + pack_bit(desc.m_maj_comp, 1, pEndpoints[5], 7); + + if (desc.m_maj_comp == 3) + { + desc.m_a = pEndpoints[0]; + desc.m_c = pEndpoints[2]; + desc.m_b0 = pEndpoints[4] & 0x7F; + + desc.m_b1 = pEndpoints[1]; + desc.m_d0 = pEndpoints[3]; + desc.m_d1 = pEndpoints[5] & 0x7F; + + return; + } + + pack_bit(desc.m_submode, 0, pEndpoints[1], 7); + pack_bit(desc.m_submode, 1, pEndpoints[2], 7); + pack_bit(desc.m_submode, 2, pEndpoints[3], 7); + + desc.m_a = pEndpoints[0]; // 8 bits + pack_bit(desc.m_a, 8, pEndpoints[1], 6); + + desc.m_c = pEndpoints[1] & 63; // 6 bits + desc.m_b0 = pEndpoints[2] & 63; // 6 bits + desc.m_b1 = pEndpoints[3] & 63; // 6 bits + desc.m_d0 = pEndpoints[4] & 31; // 5 bits + desc.m_d1 = pEndpoints[5] & 31; // 5 bits + + const int x0 = get_bit(pEndpoints[2], 6); + const int x1 = get_bit(pEndpoints[3], 6); + const int x2 = get_bit(pEndpoints[4], 6); + const int x3 = get_bit(pEndpoints[5], 6); + const int x4 = get_bit(pEndpoints[4], 5); + const int x5 = get_bit(pEndpoints[5], 5); + + switch (desc.m_submode) + { + case 0: + pack_bit(desc.m_b0, 6, x0, 0); pack_bit(desc.m_b1, 6, x1, 0); pack_bit(desc.m_d0, 6, x2, 0); pack_bit(desc.m_d1, 6, x3, 0); pack_bit(desc.m_d0, 5, x4, 0); pack_bit(desc.m_d1, 5, x5, 0); + break; + case 1: + pack_bit(desc.m_b0, 6, x0, 0); pack_bit(desc.m_b1, 6, x1, 0); pack_bit(desc.m_b0, 7, x2, 0); pack_bit(desc.m_b1, 7, x3, 0); pack_bit(desc.m_d0, 5, x4, 0); pack_bit(desc.m_d1, 5, x5, 0); + break; + case 2: + pack_bit(desc.m_a, 9, x0, 0); pack_bit(desc.m_c, 6, x1, 0); pack_bit(desc.m_d0, 6, x2, 0); pack_bit(desc.m_d1, 6, x3, 0); pack_bit(desc.m_d0, 5, x4, 0); pack_bit(desc.m_d1, 5, x5, 0); + break; + case 3: + pack_bit(desc.m_b0, 6, x0, 0); pack_bit(desc.m_b1, 6, x1, 0); pack_bit(desc.m_a, 9, x2, 0); pack_bit(desc.m_c, 6, x3, 0); pack_bit(desc.m_d0, 5, x4, 0); pack_bit(desc.m_d1, 5, x5, 0); + break; + case 4: + pack_bit(desc.m_b0, 6, x0, 0); pack_bit(desc.m_b1, 6, x1, 0); pack_bit(desc.m_b0, 7, x2, 0); pack_bit(desc.m_b1, 7, x3, 0); pack_bit(desc.m_a, 9, x4, 0); pack_bit(desc.m_a, 10, x5, 0); + break; + case 5: + pack_bit(desc.m_a, 9, x0, 0); pack_bit(desc.m_a, 10, x1, 0); pack_bit(desc.m_c, 7, x2, 0); pack_bit(desc.m_c, 6, x3, 0); pack_bit(desc.m_d0, 5, x4, 0); pack_bit(desc.m_d1, 5, x5, 0); + break; + case 6: + pack_bit(desc.m_b0, 6, x0, 0); pack_bit(desc.m_b1, 6, x1, 0); pack_bit(desc.m_a, 11, x2, 0); pack_bit(desc.m_c, 6, x3, 0); pack_bit(desc.m_a, 9, x4, 0); pack_bit(desc.m_a, 10, x5, 0); + break; + case 7: + default: + pack_bit(desc.m_a, 9, x0, 0); pack_bit(desc.m_a, 10, x1, 0); pack_bit(desc.m_a, 11, x2, 0); pack_bit(desc.m_c, 6, x3, 0); pack_bit(desc.m_d0, 5, x4, 0); pack_bit(desc.m_d1, 5, x5, 0); + break; + } + + desc.m_a_bits = 9 + (desc.m_submode >> 1); + desc.m_b_bits = s_b_bits[desc.m_submode]; + desc.m_c_bits = s_c_bits[desc.m_submode]; + desc.m_d_bits = s_d_bits[desc.m_submode]; + + desc.m_max_a_val = (1 << desc.m_a_bits) - 1; + desc.m_max_b_val = (1 << desc.m_b_bits) - 1; + desc.m_max_c_val = (1 << desc.m_c_bits) - 1; + + desc.m_min_d_val = -(1 << (desc.m_d_bits - 1)); + desc.m_max_d_val = -desc.m_min_d_val - 1; + + desc.m_d0 = astc_hdr_sign_extend(desc.m_d0, desc.m_d_bits); + desc.m_d1 = astc_hdr_sign_extend(desc.m_d1, desc.m_d_bits); + + assert((desc.m_a >= 0) && (desc.m_a <= desc.m_max_a_val)); + assert((desc.m_c >= 0) && (desc.m_c <= desc.m_max_c_val)); + assert((desc.m_b0 >= 0) && (desc.m_b0 <= desc.m_max_b_val)); + assert((desc.m_b1 >= 0) && (desc.m_b1 <= desc.m_max_b_val)); + assert((desc.m_d0 >= desc.m_min_d_val) && (desc.m_d0 <= desc.m_max_d_val)); + assert((desc.m_d1 >= desc.m_min_d_val) && (desc.m_d1 <= desc.m_max_d_val)); +} + +//-------------------------------------------------------------------------------------------------------------------------- + +void decode_cem_11_config(const uint8_t* pEndpoints, int& submode_index, int& maj_index) +{ + submode_index = 0; + maj_index = 0; + + pack_bit(submode_index, 0, pEndpoints[1], 7); + pack_bit(submode_index, 1, pEndpoints[2], 7); + pack_bit(submode_index, 2, pEndpoints[3], 7); + + pack_bit(maj_index, 0, pEndpoints[4], 7); + pack_bit(maj_index, 1, pEndpoints[5], 7); +} + +//-------------------------------------------------------------------------------------------------------------------------- + +void decode_cem_7_config(const uint8_t* pEndpoints, int& submode_index, int &maj_index) +{ + const int v0 = pEndpoints[0], v1 = pEndpoints[1], v2 = pEndpoints[2], v3 = pEndpoints[3]; + (void)v3; + + // Extract mode bits and unpack to major component and mode. + const int modeval = ((v0 & 0xC0) >> 6) | ((v1 & 0x80) >> 5) | ((v2 & 0x80) >> 4); + + if ((modeval & 0xC) != 0xC) + { + maj_index = modeval >> 2; + submode_index = modeval & 3; + } + else if (modeval != 0xF) + { + maj_index = modeval & 3; + submode_index = 4; + } + else + { + maj_index = 0; + submode_index = 5; + } +} + +//-------------------------------------------------------------------------------------------------------------------------- +// TODO: Use pack_mode11() as a shared function. + +bool pack_mode11( + const vec3F& low_color_q16, const vec3F& high_color_q16, + uint32_t ise_endpoint_range, uint8_t* pEndpoints, + const astc_hdr_codec_base_options& coptions, + bool direct_only, int32_t first_submode, int32_t last_submode, bool ignore_clamping, uint32_t& submode_used) +{ + uint8_t orig_trial_endpoints[NUM_MODE11_ENDPOINTS]; + + if (direct_only) + { + first_submode = -1; + last_submode = -1; + } + + assert(first_submode <= last_submode); + assert((first_submode >= -1) && (first_submode <= 7)); + assert((last_submode >= -1) && (last_submode <= 7)); + + memset(pEndpoints, 0, NUM_MODE11_ENDPOINTS); + + double best_trial_dist = BIG_FLOAT_VAL; + int best_submode = 0; + + for (int submode = last_submode; submode >= first_submode; submode--) + { + bool did_clamp = false; + int max_clamp_mag = 0; + if (submode == -1) + { + // If it had to clamp with one of the submodes, try direct which can't clamp, but has low precision. + pack_astc_mode11_direct(orig_trial_endpoints, low_color_q16, high_color_q16); + } + else + { + const int MAX_CLAMP_MAG_ACCEPT_THRESH = 32; + did_clamp = pack_astc_mode11_submode(submode, orig_trial_endpoints, low_color_q16, high_color_q16, max_clamp_mag, !ignore_clamping, MAX_CLAMP_MAG_ACCEPT_THRESH); + + if (!ignore_clamping) + { + // If it had to clamp and the clamp was too high, it'll distort the endpoint colors too much, which could lead to noticeable artifacts. + if ((did_clamp) && (max_clamp_mag > MAX_CLAMP_MAG_ACCEPT_THRESH)) + continue; + } + } + + uint8_t trial_endpoints[NUM_MODE11_ENDPOINTS]; + + // This will distort the endpoints if the ISE endpoint range isn't 256 levels (20). + // It could massively distort the endpoints, but still result in a valid encoding. + basist::astc_6x6_hdr::requantize_ise_endpoints(11, astc_helpers::BISE_256_LEVELS, orig_trial_endpoints, ise_endpoint_range, trial_endpoints); + + int e[2][3]; + if (!decode_mode11_to_qlog12(trial_endpoints, e, ise_endpoint_range)) + continue; + + vec3F e0( + (float)(e[0][0] << 4), + (float)(e[0][1] << 4), + (float)(e[0][2] << 4) + ); + + vec3F e1( + (float)(e[1][0] << 4), + (float)(e[1][1] << 4), + (float)(e[1][2] << 4) + ); + + double dist0 = e0.squared_distance_d(low_color_q16) + e1.squared_distance_d(high_color_q16); + double dist1 = e1.squared_distance_d(low_color_q16) + e0.squared_distance_d(high_color_q16); + double dist = helpers::minimum(dist0, dist1); + + if (dist < best_trial_dist) + { + best_trial_dist = dist; + best_submode = submode; + memcpy(pEndpoints, trial_endpoints, NUM_MODE11_ENDPOINTS); + } + + if (coptions.m_take_first_non_clamping_mode11_submode) + { + if (!did_clamp) + break; + } + + } // submode + + if ((coptions.m_ultra_quant) && + (ise_endpoint_range < astc_helpers::BISE_256_LEVELS) && + (best_trial_dist != BIG_FLOAT_VAL)) + { + uint8_t orig_best_trial_endpoints[NUM_MODE11_ENDPOINTS]; + memcpy(orig_best_trial_endpoints, pEndpoints, NUM_MODE11_ENDPOINTS); + + for (uint32_t c = 0; c < NUM_MODE11_ENDPOINTS; c++) + { + for (int dt = 0; dt <= 1; dt++) + { + const int d = dt ? 1 : -1; + + uint8_t varied_endpoints[NUM_MODE11_ENDPOINTS]; + memcpy(varied_endpoints, orig_best_trial_endpoints, NUM_MODE11_ENDPOINTS); + + int ise = varied_endpoints[c]; + + int rank = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_ISE_to_rank[ise]; + rank = clamp(rank + d, 0, astc_helpers::get_ise_levels(ise_endpoint_range) - 1); + + ise = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_rank_to_ISE[rank]; + + varied_endpoints[c] = (uint8_t)ise; + + int e[2][3]; + if (!decode_mode11_to_qlog12(varied_endpoints, e, ise_endpoint_range)) + continue; + + vec3F e0( + (float)(e[0][0] << 4), + (float)(e[0][1] << 4), + (float)(e[0][2] << 4) + ); + + vec3F e1( + (float)(e[1][0] << 4), + (float)(e[1][1] << 4), + (float)(e[1][2] << 4) + ); + + double dist0 = e0.squared_distance_d(low_color_q16) + e1.squared_distance_d(high_color_q16); + double dist1 = e1.squared_distance_d(low_color_q16) + e0.squared_distance_d(high_color_q16); + double dist = helpers::minimum(dist0, dist1); + + if (dist < best_trial_dist) + { + best_trial_dist = dist; + memcpy(pEndpoints, varied_endpoints, NUM_MODE11_ENDPOINTS); + } + } // d + } // c + } // if (coptions.m_ultra_quant) + + submode_used = best_submode + 1; + + return (best_trial_dist != BIG_FLOAT_VAL); +} + +bool try_mode11(uint32_t num_pixels, + uint8_t* pEndpoints, uint8_t* pWeights, double& cur_block_error, uint32_t& submode_used, + const vec3F& low_color_q16, const vec3F& high_color_q16, + const basist::half_float block_pixels_half[][3], + uint32_t num_weight_levels, uint32_t ise_weight_range, const astc_hdr_codec_base_options& coptions, bool direct_only, uint32_t ise_endpoint_range, + bool constrain_ise_weight_selectors, + int32_t first_submode, int32_t last_submode, bool ignore_clamping) // -1, 7 +{ + assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX)); + assert((num_weight_levels >= MIN_SUPPORTED_WEIGHT_LEVELS) && (num_weight_levels <= MAX_SUPPORTED_WEIGHT_LEVELS)); + assert((num_pixels >= 1) && (num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS)); + assert(num_weight_levels == astc_helpers::get_ise_levels(ise_weight_range)); + + half_float decoded_half[MAX_SUPPORTED_WEIGHT_LEVELS][3]; + uint8_t orig_trial_endpoints[NUM_MODE11_ENDPOINTS], trial_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; + + if (direct_only) + { + first_submode = -1; + last_submode = -1; + } + + assert(first_submode <= last_submode); + assert((first_submode >= -1) && (first_submode <= 7)); + assert((last_submode >= -1) && (last_submode <= 7)); + + uint8_t best_trial_endpoints[NUM_MODE11_ENDPOINTS]; + clear_obj(best_trial_endpoints); + double best_trial_dist = BIG_FLOAT_VAL; + int best_submode = 0; + + for (int submode = last_submode; submode >= first_submode; submode--) + { + bool did_clamp = false; + int max_clamp_mag = 0; + if (submode == -1) + { + // If it had to clamp with one of the submodes, try direct which can't clamp, but has low precision. + pack_astc_mode11_direct(orig_trial_endpoints, low_color_q16, high_color_q16); + } + else + { + const int MAX_CLAMP_MAG_ACCEPT_THRESH = 32; + did_clamp = pack_astc_mode11_submode(submode, orig_trial_endpoints, low_color_q16, high_color_q16, max_clamp_mag, !ignore_clamping, MAX_CLAMP_MAG_ACCEPT_THRESH); + + if (!ignore_clamping) + { + // If it had to clamp and the clamp was too high, it'll distort the endpoint colors too much, which could lead to noticeable artifacts. + if ((did_clamp) && (max_clamp_mag > MAX_CLAMP_MAG_ACCEPT_THRESH)) + continue; + } + } + + uint8_t trial_endpoints[NUM_MODE11_ENDPOINTS]; + + // This will distort the endpoints if the ISE endpoint range isn't 256 levels (20). + // It could massively distort the endpoints, but still result in a valid encoding. + basist::astc_6x6_hdr::requantize_ise_endpoints(11, astc_helpers::BISE_256_LEVELS, orig_trial_endpoints, ise_endpoint_range, trial_endpoints); + + int e[2][3]; + if (!decode_mode11_to_qlog12(trial_endpoints, e, ise_endpoint_range)) + continue; + + vec3F e0( + (float)(e[0][0] << 4), + (float)(e[0][1] << 4), + (float)(e[0][2] << 4) + ); + + vec3F e1( + (float)(e[1][0] << 4), + (float)(e[1][1] << 4), + (float)(e[1][2] << 4) + ); + + double dist0 = e0.squared_distance_d(low_color_q16) + e1.squared_distance_d(high_color_q16); + double dist1 = e1.squared_distance_d(low_color_q16) + e0.squared_distance_d(high_color_q16); + double dist = helpers::minimum(dist0, dist1); + + if (dist < best_trial_dist) + { + best_trial_dist = dist; + best_submode = submode; + memcpy(best_trial_endpoints, trial_endpoints, sizeof(best_trial_endpoints)); + } + + if (coptions.m_take_first_non_clamping_mode11_submode) + { + if (!did_clamp) + break; + } + + } // submode + + if ((coptions.m_ultra_quant) && + (ise_endpoint_range < astc_helpers::BISE_256_LEVELS) && + (best_trial_dist != BIG_FLOAT_VAL)) + { + uint8_t orig_best_trial_endpoints[NUM_MODE11_ENDPOINTS]; + memcpy(orig_best_trial_endpoints, best_trial_endpoints, NUM_MODE11_ENDPOINTS); + + for (uint32_t c = 0; c < NUM_MODE11_ENDPOINTS; c++) + { + for (int dt = 0; dt <= 1; dt++) + { + const int d = dt ? 1 : -1; + + uint8_t varied_endpoints[NUM_MODE11_ENDPOINTS]; + memcpy(varied_endpoints, orig_best_trial_endpoints, NUM_MODE11_ENDPOINTS); + + int ise = varied_endpoints[c]; + + int rank = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_ISE_to_rank[ise]; + rank = clamp(rank + d, 0, astc_helpers::get_ise_levels(ise_endpoint_range) - 1); + + ise = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_rank_to_ISE[rank]; + + varied_endpoints[c] = (uint8_t)ise; + + int e[2][3]; + if (!decode_mode11_to_qlog12(varied_endpoints, e, ise_endpoint_range)) + continue; + + vec3F e0( + (float)(e[0][0] << 4), + (float)(e[0][1] << 4), + (float)(e[0][2] << 4) + ); + + vec3F e1( + (float)(e[1][0] << 4), + (float)(e[1][1] << 4), + (float)(e[1][2] << 4) + ); + + double dist0 = e0.squared_distance_d(low_color_q16) + e1.squared_distance_d(high_color_q16); + double dist1 = e1.squared_distance_d(low_color_q16) + e0.squared_distance_d(high_color_q16); + double dist = helpers::minimum(dist0, dist1); + + if (dist < best_trial_dist) + { + best_trial_dist = dist; + memcpy(best_trial_endpoints, varied_endpoints, NUM_MODE11_ENDPOINTS); + } + } // d + } // c + } // if (coptions.m_ultra_quant) + + bool improved_flag = false; + + if (best_trial_dist != BIG_FLOAT_VAL) + { + if (get_astc_hdr_mode_11_block_colors(best_trial_endpoints, &decoded_half[0][0], nullptr, num_weight_levels, ise_weight_range, ise_endpoint_range)) + { + uint32_t usable_selector_bitmask = UINT32_MAX; + if ((constrain_ise_weight_selectors) && (ise_weight_range == astc_helpers::BISE_16_LEVELS)) + usable_selector_bitmask = (1 << 0) | (1 << 1) | (1 << 4) | (1 << 5) | (1 << 10) | (1 << 11) | (1 << 14) | (1 << 15); + else if ((constrain_ise_weight_selectors) && (ise_weight_range == astc_helpers::BISE_12_LEVELS)) + usable_selector_bitmask = (1 << 0) | (1 << 1) | (1 << 2) | (1 << 3); + + double trial_blk_error = eval_selectors(num_pixels, trial_weights, ise_weight_range, &block_pixels_half[0][0], num_weight_levels, &decoded_half[0][0], coptions, usable_selector_bitmask); + if (trial_blk_error < cur_block_error) + { + cur_block_error = trial_blk_error; + memcpy(pEndpoints, best_trial_endpoints, NUM_MODE11_ENDPOINTS); + memcpy(pWeights, trial_weights, num_pixels); + submode_used = best_submode + 1; + improved_flag = true; + } + } + } + + return improved_flag; +} + +//-------------------------------------------------------------------------------------------------------------------------- + +bool try_mode11_dual_plane(uint32_t channel_index, uint32_t num_pixels, + uint8_t* pEndpoints, uint8_t* pWeights0, uint8_t* pWeights1, double& cur_block_error, uint32_t& submode_used, + const vec3F& low_color_q16, const vec3F& high_color_q16, + const basist::half_float block_pixels_half[][3], + uint32_t num_weight_levels, uint32_t ise_weight_range, const astc_hdr_codec_base_options& coptions, bool direct_only, uint32_t ise_endpoint_range, + bool constrain_ise_weight_selectors, + int32_t first_submode, int32_t last_submode, bool ignore_clamping) // -1, 7 +{ + assert(channel_index <= 2); + assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX)); + assert((num_weight_levels >= MIN_SUPPORTED_WEIGHT_LEVELS) && (num_weight_levels <= MAX_SUPPORTED_WEIGHT_LEVELS)); + assert((num_pixels >= 1) && (num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS)); + assert(num_weight_levels == astc_helpers::get_ise_levels(ise_weight_range)); + + half_float decoded_half[MAX_SUPPORTED_WEIGHT_LEVELS][3]; + uint8_t orig_trial_endpoints[NUM_MODE11_ENDPOINTS], trial_weights0[MAX_ASTC_HDR_ENC_BLOCK_PIXELS], trial_weights1[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; + + if (direct_only) + { + first_submode = -1; + last_submode = -1; + } + + assert(first_submode <= last_submode); + assert((first_submode >= -1) && (first_submode <= 7)); + assert((last_submode >= -1) && (last_submode <= 7)); + + uint8_t best_trial_endpoints[NUM_MODE11_ENDPOINTS]; + clear_obj(best_trial_endpoints); + + double best_trial_dist = BIG_FLOAT_VAL; + int best_submode = 0; + + for (int submode = last_submode; submode >= first_submode; submode--) + { + bool did_clamp = false; + int max_clamp_mag = 0; + if (submode == -1) + { + // If it had to clamp with one of the submodes, try direct which can't clamp, but has low precision. + pack_astc_mode11_direct(orig_trial_endpoints, low_color_q16, high_color_q16); + } + else + { + const int MAX_CLAMP_MAG_ACCEPT_THRESH = 32; + did_clamp = pack_astc_mode11_submode(submode, orig_trial_endpoints, low_color_q16, high_color_q16, max_clamp_mag, !ignore_clamping, MAX_CLAMP_MAG_ACCEPT_THRESH); + + if (!ignore_clamping) + { + // If it had to clamp and the clamp was too high, it'll distort the endpoint colors too much, which could lead to noticeable artifacts. + if ((did_clamp) && (max_clamp_mag > MAX_CLAMP_MAG_ACCEPT_THRESH)) + continue; + } + } + + uint8_t trial_endpoints[NUM_MODE11_ENDPOINTS]; + + // This will distort the endpoints if the ISE endpoint range isn't 256 levels (20). + // It could massively distort the endpoints, but still result in a valid encoding. + basist::astc_6x6_hdr::requantize_ise_endpoints(11, astc_helpers::BISE_256_LEVELS, orig_trial_endpoints, ise_endpoint_range, trial_endpoints); + + int e[2][3]; + if (!decode_mode11_to_qlog12(trial_endpoints, e, ise_endpoint_range)) + continue; + + vec3F e0( + (float)(e[0][0] << 4), + (float)(e[0][1] << 4), + (float)(e[0][2] << 4) + ); + + vec3F e1( + (float)(e[1][0] << 4), + (float)(e[1][1] << 4), + (float)(e[1][2] << 4) + ); + + double dist0 = e0.squared_distance_d(low_color_q16) + e1.squared_distance_d(high_color_q16); + double dist1 = e1.squared_distance_d(low_color_q16) + e0.squared_distance_d(high_color_q16); + double dist = helpers::minimum(dist0, dist1); + + if (dist < best_trial_dist) + { + best_trial_dist = dist; + best_submode = submode; + memcpy(best_trial_endpoints, trial_endpoints, sizeof(best_trial_endpoints)); + } + + if (coptions.m_take_first_non_clamping_mode11_submode) + { + if (!did_clamp) + break; + } + + } // submode + + if ((coptions.m_ultra_quant) && + (ise_endpoint_range < astc_helpers::BISE_256_LEVELS) && + (best_trial_dist != BIG_FLOAT_VAL)) + { + uint8_t orig_best_trial_endpoints[NUM_MODE11_ENDPOINTS]; + memcpy(orig_best_trial_endpoints, best_trial_endpoints, NUM_MODE11_ENDPOINTS); + + for (uint32_t c = 0; c < NUM_MODE11_ENDPOINTS; c++) + { + for (int dt = 0; dt <= 1; dt++) + { + const int d = dt ? 1 : -1; + + uint8_t varied_endpoints[NUM_MODE11_ENDPOINTS]; + memcpy(varied_endpoints, orig_best_trial_endpoints, NUM_MODE11_ENDPOINTS); + + int ise = varied_endpoints[c]; + + int rank = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_ISE_to_rank[ise]; + rank = clamp(rank + d, 0, astc_helpers::get_ise_levels(ise_endpoint_range) - 1); + + ise = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_rank_to_ISE[rank]; + + varied_endpoints[c] = (uint8_t)ise; + + int e[2][3]; + if (!decode_mode11_to_qlog12(varied_endpoints, e, ise_endpoint_range)) + continue; + + vec3F e0( + (float)(e[0][0] << 4), + (float)(e[0][1] << 4), + (float)(e[0][2] << 4) + ); + + vec3F e1( + (float)(e[1][0] << 4), + (float)(e[1][1] << 4), + (float)(e[1][2] << 4) + ); + + double dist0 = e0.squared_distance_d(low_color_q16) + e1.squared_distance_d(high_color_q16); + double dist1 = e1.squared_distance_d(low_color_q16) + e0.squared_distance_d(high_color_q16); + double dist = helpers::minimum(dist0, dist1); + + if (dist < best_trial_dist) + { + best_trial_dist = dist; + memcpy(best_trial_endpoints, varied_endpoints, NUM_MODE11_ENDPOINTS); + } + } // d + } // c + } // if (coptions.m_ultra_quant) + + bool improved_flag = false; + + if (best_trial_dist != BIG_FLOAT_VAL) + { + if (get_astc_hdr_mode_11_block_colors(best_trial_endpoints, &decoded_half[0][0], nullptr, num_weight_levels, ise_weight_range, ise_endpoint_range)) + { + uint32_t usable_selector_bitmask = UINT32_MAX; + if ((constrain_ise_weight_selectors) && (ise_weight_range == astc_helpers::BISE_16_LEVELS)) + usable_selector_bitmask = (1 << 0) | (1 << 1) | (1 << 4) | (1 << 5) | (1 << 10) | (1 << 11) | (1 << 14) | (1 << 15); + else if ((constrain_ise_weight_selectors) && (ise_weight_range == astc_helpers::BISE_12_LEVELS)) + usable_selector_bitmask = (1 << 0) | (1 << 1) | (1 << 2) | (1 << 3); + + double trial_blk_error = eval_selectors_dual_plane(channel_index, num_pixels, trial_weights0, trial_weights1, &block_pixels_half[0][0], num_weight_levels, &decoded_half[0][0], coptions, usable_selector_bitmask); + if (trial_blk_error < cur_block_error) + { + cur_block_error = trial_blk_error; + memcpy(pEndpoints, best_trial_endpoints, NUM_MODE11_ENDPOINTS); + memcpy(pWeights0, trial_weights0, num_pixels); + memcpy(pWeights1, trial_weights1, num_pixels); + submode_used = best_submode + 1; + improved_flag = true; + } + } + } + + return improved_flag; +} + +//-------------------------------------------------------------------------------------------------------------------------- + +bool pack_mode7( + const vec3F& high_color_q16, const float s_q16, + uint32_t ise_endpoint_range, uint8_t* pEndpoints, + uint32_t ise_weight_range, // only used for determining biasing during packing + const astc_hdr_codec_base_options& coptions, + int32_t first_submode, int32_t last_submode, bool ignore_clamping, uint32_t& submode_used) +{ + assert(first_submode <= last_submode); + assert((first_submode >= 0) && (first_submode <= (int)MAX_MODE7_SUBMODE_INDEX)); + assert(last_submode <= (int)MAX_MODE7_SUBMODE_INDEX); + + uint8_t unquant_trial_endpoints[NUM_MODE7_ENDPOINTS]; + + memset(pEndpoints, 0, NUM_MODE7_ENDPOINTS); + + double best_trial_dist = BIG_FLOAT_VAL; + int best_trial_submode = 0; + + for (int submode = first_submode; submode <= last_submode; submode++) + { + const int MAX_CLAMP_MAG_ACCEPT_THRESH = 16; + + int max_clamp_mag = 0; + const bool did_clamp = pack_astc_mode7_submode(submode, unquant_trial_endpoints, high_color_q16, s_q16, max_clamp_mag, ise_weight_range, !ignore_clamping, MAX_CLAMP_MAG_ACCEPT_THRESH); + + if (submode < 5) + { + if (!ignore_clamping) + { + if ((did_clamp) && (max_clamp_mag > MAX_CLAMP_MAG_ACCEPT_THRESH)) + continue; + } + } + + uint8_t trial_endpoints[NUM_MODE7_ENDPOINTS]; + + // This will distort the endpoints if the ISE endpoint range isn't 256 levels (20). + // It could massively distort the endpoints, but still result in a valid encoding. + basist::astc_6x6_hdr::requantize_ise_endpoints(7, astc_helpers::BISE_256_LEVELS, unquant_trial_endpoints, ise_endpoint_range, trial_endpoints); + + int e[2][3]; + int decoded_s = 0; + if (!decode_mode7_to_qlog12(trial_endpoints, e, &decoded_s, ise_endpoint_range)) + continue; + + // e1 is always the high color + vec3F e1( + (float)(e[1][0] << 4), + (float)(e[1][1] << 4), + (float)(e[1][2] << 4) + ); + + decoded_s <<= 4; + + double dist = e1.squared_distance_d(high_color_q16) + squared((double)decoded_s - s_q16) * 3; + + if (dist < best_trial_dist) + { + best_trial_dist = dist; + best_trial_submode = submode; + memcpy(pEndpoints, trial_endpoints, NUM_MODE7_ENDPOINTS); + } + + if (coptions.m_take_first_non_clamping_mode7_submode) + { + if (!did_clamp) + break; + } + + } // submode + + if ((coptions.m_ultra_quant) && + (ise_endpoint_range < astc_helpers::BISE_256_LEVELS) && + (best_trial_dist != BIG_FLOAT_VAL)) + { + uint8_t orig_best_trial_endpoints[NUM_MODE7_ENDPOINTS]; + memcpy(orig_best_trial_endpoints, pEndpoints, NUM_MODE7_ENDPOINTS); + + vec3F low_color_q16(high_color_q16 - vec3F(s_q16)); + low_color_q16.clamp(0.0f, 65535.0f); + + for (uint32_t c = 0; c < NUM_MODE7_ENDPOINTS; c++) + { + for (int dt = 0; dt <= 1; dt++) + { + const int d = dt ? 1 : -1; + + uint8_t varied_endpoints[NUM_MODE7_ENDPOINTS]; + memcpy(varied_endpoints, orig_best_trial_endpoints, NUM_MODE7_ENDPOINTS); + + int ise = varied_endpoints[c]; + + int rank = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_ISE_to_rank[ise]; + rank = clamp(rank + d, 0, astc_helpers::get_ise_levels(ise_endpoint_range) - 1); + + ise = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_rank_to_ISE[rank]; + + varied_endpoints[c] = (uint8_t)ise; + + int e[2][3]; + int decoded_s = 0; + if (!decode_mode7_to_qlog12(varied_endpoints, e, &decoded_s, ise_endpoint_range)) + continue; + + // e1 is always the high color + vec3F e1( + (float)(e[1][0] << 4), + (float)(e[1][1] << 4), + (float)(e[1][2] << 4) + ); + + decoded_s <<= 4; + + double dist = e1.squared_distance_d(high_color_q16) + squared((double)decoded_s - s_q16) * 3; + + if (dist < best_trial_dist) + { + best_trial_dist = dist; + memcpy(pEndpoints, varied_endpoints, NUM_MODE7_ENDPOINTS); + } + + } // d + } // c + } + + submode_used = best_trial_submode; + + return (best_trial_dist != BIG_FLOAT_VAL); +} + +//-------------------------------------------------------------------------------------------------------------------------- + +bool try_mode7( + uint32_t num_pixels, + uint8_t* pEndpoints, uint8_t* pWeights, double& cur_block_error, uint32_t& submode_used, + const vec3F& high_color_q16, const float s_q16, + const half_float block_pixels_half[][3], + uint32_t num_weight_levels, uint32_t ise_weight_range, const astc_hdr_codec_base_options& coptions, + uint32_t ise_endpoint_range, + int32_t first_submode, int32_t last_submode) +{ + assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX)); + assert((num_pixels >= 1) && (num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS)); + + assert(first_submode <= last_submode); + assert((first_submode >= 0) && (first_submode <= (int)MAX_MODE7_SUBMODE_INDEX)); + assert(last_submode <= (int)MAX_MODE7_SUBMODE_INDEX); + assert(num_weight_levels == astc_helpers::get_ise_levels(ise_weight_range)); + + uint8_t unquant_trial_endpoints[NUM_MODE7_ENDPOINTS]; + + uint8_t best_trial_endpoints[NUM_MODE7_ENDPOINTS]; + clear_obj(best_trial_endpoints); + double best_trial_dist = BIG_FLOAT_VAL; + int best_trial_submode = 0; + + for (int submode = first_submode; submode <= last_submode; submode++) + { + const int MAX_CLAMP_MAG_ACCEPT_THRESH = 16; + + int max_clamp_mag = 0; + const bool did_clamp = pack_astc_mode7_submode(submode, unquant_trial_endpoints, high_color_q16, s_q16, max_clamp_mag, ise_weight_range, true, MAX_CLAMP_MAG_ACCEPT_THRESH); + + if (submode < 5) + { + if ((did_clamp) && (max_clamp_mag > MAX_CLAMP_MAG_ACCEPT_THRESH)) + continue; + } + + uint8_t trial_endpoints[NUM_MODE7_ENDPOINTS]; + + // This will distort the endpoints if the ISE endpoint range isn't 256 levels (20). + // It could massively distort the endpoints, but still result in a valid encoding. + basist::astc_6x6_hdr::requantize_ise_endpoints(7, astc_helpers::BISE_256_LEVELS, unquant_trial_endpoints, ise_endpoint_range, trial_endpoints); + + int e[2][3]; + int decoded_s = 0; + if (!decode_mode7_to_qlog12(trial_endpoints, e, &decoded_s, ise_endpoint_range)) + continue; + + // e1 is always the high color + vec3F e1( + (float)(e[1][0] << 4), + (float)(e[1][1] << 4), + (float)(e[1][2] << 4) + ); + + decoded_s <<= 4; + + double dist = e1.squared_distance_d(high_color_q16) + squared((double)decoded_s - s_q16) * 3; + + if (dist < best_trial_dist) + { + best_trial_dist = dist; + best_trial_submode = submode; + memcpy(best_trial_endpoints, trial_endpoints, sizeof(best_trial_endpoints)); + } + + if (coptions.m_take_first_non_clamping_mode7_submode) + { + if (!did_clamp) + break; + } + + } // submode + + if ((coptions.m_ultra_quant) && + (ise_endpoint_range < astc_helpers::BISE_256_LEVELS) && + (best_trial_dist != BIG_FLOAT_VAL)) + { + uint8_t orig_best_trial_endpoints[NUM_MODE7_ENDPOINTS]; + memcpy(orig_best_trial_endpoints, best_trial_endpoints, NUM_MODE7_ENDPOINTS); + + vec3F low_color_q16(high_color_q16 - vec3F(s_q16)); + low_color_q16.clamp(0.0f, 65535.0f); + + for (uint32_t c = 0; c < NUM_MODE7_ENDPOINTS; c++) + { + for (int dt = 0; dt <= 1; dt++) + { + const int d = dt ? 1 : -1; + + uint8_t varied_endpoints[NUM_MODE7_ENDPOINTS]; + memcpy(varied_endpoints, orig_best_trial_endpoints, NUM_MODE7_ENDPOINTS); + + int ise = varied_endpoints[c]; + + int rank = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_ISE_to_rank[ise]; + rank = clamp(rank + d, 0, astc_helpers::get_ise_levels(ise_endpoint_range) - 1); + + ise = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_rank_to_ISE[rank]; + + varied_endpoints[c] = (uint8_t)ise; + + int e[2][3]; + int decoded_s = 0; + if (!decode_mode7_to_qlog12(varied_endpoints, e, &decoded_s, ise_endpoint_range)) + continue; + + // e1 is always the high color + vec3F e1( + (float)(e[1][0] << 4), + (float)(e[1][1] << 4), + (float)(e[1][2] << 4) + ); + + decoded_s <<= 4; + + double dist = e1.squared_distance_d(high_color_q16) + squared((double)decoded_s - s_q16) * 3; + + if (dist < best_trial_dist) + { + best_trial_dist = dist; + memcpy(best_trial_endpoints, varied_endpoints, NUM_MODE7_ENDPOINTS); + } + + } // d + } // c + } + + bool improved_flag = false; + + if (best_trial_dist != BIG_FLOAT_VAL) + { + half_float decoded_half[MAX_SUPPORTED_WEIGHT_LEVELS][3]; + uint8_t trial_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; + + if (get_astc_hdr_mode_7_block_colors(best_trial_endpoints, &decoded_half[0][0], nullptr, num_weight_levels, ise_weight_range, ise_endpoint_range)) + { + double trial_blk_error = eval_selectors(num_pixels, trial_weights, ise_weight_range, &block_pixels_half[0][0], num_weight_levels, &decoded_half[0][0], coptions); + if (trial_blk_error < cur_block_error) + { + cur_block_error = trial_blk_error; + memcpy(pEndpoints, best_trial_endpoints, NUM_MODE7_ENDPOINTS); + memcpy(pWeights, trial_weights, num_pixels); + submode_used = best_trial_submode; + improved_flag = true; + } + } + } + + return improved_flag; +} + +//-------------------------------------------------------------------------------------------------------------------------- +const float LOW_EMPHASIS_WEIGHT = 1.0f, MIDDLE_EMPHASIS_WEIGHT = 1.25f, HIGH_EMPHASIS_WEIGHT = 1.0f; +const float LOW_EMPHASIS_WEIGHT_HEAVY = 1.0f, MIDDLE_EMPHASIS_WEIGHT_HEAVY = 4.0f, HIGH_EMPHASIS_WEIGHT_HEAVY = 1.0f; + +double encode_astc_hdr_block_mode_11( + uint32_t num_pixels, + const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[], + uint32_t ise_weight_range, + uint32_t& best_submode, + double cur_block_error, + uint8_t* blk_endpoints, uint8_t* blk_weights, + const astc_hdr_codec_base_options& coptions, + bool direct_only, + uint32_t ise_endpoint_range, + bool uber_mode, + bool constrain_ise_weight_selectors, + int32_t first_submode, int32_t last_submode, bool ignore_clamping, opt_mode_t opt_mode, + const encode_astc_block_stats* pBlock_stats) +{ + assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX)); + assert((ise_endpoint_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_endpoint_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE)); + assert((num_pixels >= 1) && (num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS)); + + assert((first_submode >= FIRST_MODE11_SUBMODE_INDEX) && (first_submode <= last_submode)); + assert(last_submode <= MAX_MODE11_SUBMODE_INDEX); + + best_submode = 0; + + const uint32_t num_weight_levels = astc_helpers::get_ise_levels(ise_weight_range); + assert(num_weight_levels <= MAX_SUPPORTED_WEIGHT_LEVELS); + + vec3F block_mean_color_q16, block_axis_q16; + if (!pBlock_stats) + { + block_mean_color_q16 = calc_mean(num_pixels, pBlock_pixels_q16); + block_axis_q16 = calc_rgb_pca(num_pixels, pBlock_pixels_q16, block_mean_color_q16); + } + else + { + assert(num_pixels == pBlock_stats->m_num_pixels); + block_mean_color_q16 = pBlock_stats->m_mean_q16; + block_axis_q16 = pBlock_stats->m_axis_q16; + } + + aabb3F color_box_q16(cInitExpand); + + float l = BIG_FLOAT_VAL, h = -BIG_FLOAT_VAL; + vec3F low_color_q16, high_color_q16; + + for (uint32_t i = 0; i < num_pixels; i++) + { + color_box_q16.expand(pBlock_pixels_q16[i]); + + vec3F k(vec3F(pBlock_pixels_q16[i]) - block_mean_color_q16); + float kd = k.dot(block_axis_q16); + + if (kd < l) + { + l = kd; + low_color_q16 = pBlock_pixels_q16[i]; + } + + if (kd > h) + { + h = kd; + high_color_q16 = pBlock_pixels_q16[i]; + } + } + + vec3F old_low_color_q16(low_color_q16), old_high_color_q16(high_color_q16); + + for (uint32_t i = 0; i < 3; i++) + { + low_color_q16[i] = lerp(old_low_color_q16[i], old_high_color_q16[i], 1.0f / 64.0f); + high_color_q16[i] = lerp(old_low_color_q16[i], old_high_color_q16[i], 63.0f / 64.0f); + } + + uint8_t trial_blk_endpoints[NUM_MODE11_ENDPOINTS]; + uint8_t trial_blk_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; + uint32_t trial_best_submode = 0; + + clear_obj(trial_blk_endpoints); + clear_obj(trial_blk_weights); + + double trial_blk_error = BIG_FLOAT_VAL; + + bool did_improve = try_mode11(num_pixels, trial_blk_endpoints, trial_blk_weights, trial_blk_error, trial_best_submode, + low_color_q16, high_color_q16, + pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight_selectors, + first_submode, last_submode, ignore_clamping); + + // If we couldn't find ANY usable solution due to endpoint quantization, just return. There's nothing we can do. + if (!did_improve) + return cur_block_error; + + // Did the solution improve? + if (trial_blk_error < cur_block_error) + { + cur_block_error = trial_blk_error; + memcpy(blk_endpoints, trial_blk_endpoints, NUM_MODE11_ENDPOINTS); + memcpy(blk_weights, trial_blk_weights, num_pixels); + best_submode = trial_best_submode; + } + + if (opt_mode == cNoOpt) + return cur_block_error; + + // least squares on the most promising trial weight indices found + const uint32_t NUM_LS_PASSES = 3; + + float emphasis_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; + + if (opt_mode == cWeightedAverage) + { + const uint32_t NUM_OPT_PASSES = 3; + for (uint32_t pass = 0; pass < NUM_OPT_PASSES; pass++) + { + vec3F low_p(0.0f); + float total_low = 0.0f; + + vec3F high_p(0.0f); + float total_high = 0.0f; + + for (uint32_t i = 0; i < num_pixels; i++) + { + vec3F p(pBlock_pixels_q16[i]); + float lerp = g_ise_weight_lerps[ise_weight_range][trial_blk_weights[i] + 1] * (1.0f / 64.0f); + + low_p += p * (1.0f - lerp); + total_low += (1.0f - lerp); + + high_p += p * lerp; + total_high += lerp; + } + + if (total_low != 0.0f) + low_p *= (1.0f / total_low); + + if (total_high != 0.0f) + high_p *= (1.0f / total_high); + + vec3F low, high; + + bool was_improved = try_mode11(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode, + low_p, high_p, + pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight_selectors, + first_submode, last_submode, ignore_clamping); + + if (!was_improved) + break; + + memcpy(trial_blk_weights, blk_weights, num_pixels); + } + } + else if (opt_mode == cOrdinaryLeastSquares) + { + for (uint32_t pass = 0; pass < NUM_LS_PASSES; pass++) + { + vec3F l_q16, h_q16; + + if (!compute_least_squares_endpoints_rgb(num_pixels, trial_blk_weights, &g_astc_ls_weights_ise[ise_weight_range][0], &l_q16, &h_q16, pBlock_pixels_q16, color_box_q16)) + break; + + bool was_improved = try_mode11(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode, + l_q16, h_q16, + pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight_selectors, + first_submode, last_submode, ignore_clamping); + + if (!was_improved) + break; + + // It's improved, so let's take the new weight indices. + memcpy(trial_blk_weights, blk_weights, num_pixels); + + } // pass + } + else + { + if (h == l) + { + for (uint32_t i = 0; i < num_pixels; i++) + emphasis_weights[i] = 1.0f; + } + else + { + float mid = (0.0f - l) / (h - l); + mid = clamp(mid, .01f, .99f); + + float lw = LOW_EMPHASIS_WEIGHT, mw = MIDDLE_EMPHASIS_WEIGHT, hw = HIGH_EMPHASIS_WEIGHT; + if (opt_mode == cWeightedLeastSquaresHeavy) + lw = LOW_EMPHASIS_WEIGHT_HEAVY, mw = MIDDLE_EMPHASIS_WEIGHT_HEAVY, hw = HIGH_EMPHASIS_WEIGHT_HEAVY; + + for (uint32_t i = 0; i < num_pixels; i++) + { + vec3F k(vec3F(pBlock_pixels_q16[i]) - block_mean_color_q16); + float kd = k.dot(block_axis_q16); + + assert((kd >= l) && (kd <= h)); + + float v = (kd - l) / (h - l); + + if (v < mid) + v = lerp(lw, mw, v / mid); + else + v = lerp(mw, hw, (v - mid) * (1.0f - mid)); + + emphasis_weights[i] = v; + } + +#if 0 + if (num_pixels == 6 * 6) + { + const float EDGE_WEIGHT = .1f; + for (uint32_t i = 0; i < 6; i++) + { + emphasis_weights[i] += EDGE_WEIGHT; + emphasis_weights[i + 5 * 6] += EDGE_WEIGHT; + emphasis_weights[i * 6] += EDGE_WEIGHT; + emphasis_weights[5 + i * 6] += EDGE_WEIGHT; + } + } +#endif + } + + for (uint32_t pass = 0; pass < NUM_LS_PASSES; pass++) + { + vec3F l_q16, h_q16; + + if (!compute_weighted_least_squares_endpoints_rgb( + num_pixels, + trial_blk_weights, &g_astc_ls_weights_ise[ise_weight_range][0], nullptr, + emphasis_weights, + &l_q16, &h_q16, + pBlock_pixels_q16, + color_box_q16)) + break; + + bool was_improved = try_mode11(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode, + l_q16, h_q16, + pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight_selectors, + first_submode, last_submode, ignore_clamping); + + if (!was_improved) + break; + + // It's improved, so let's take the new weight indices. + memcpy(trial_blk_weights, blk_weights, num_pixels); + + } // pass + } + + if ( (uber_mode) && (ise_weight_range >= astc_helpers::BISE_3_LEVELS) && + ((opt_mode == cOrdinaryLeastSquares) || (opt_mode == cWeightedLeastSquares) || (opt_mode == cWeightedLeastSquaresHeavy)) ) + { + // Try varying the current best weight indices. This can be expanded/improved, but at potentially great cost. + + uint8_t temp_astc_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; + memcpy(temp_astc_weights, trial_blk_weights, num_pixels); + + uint32_t min_lin_sel = 256, max_lin_sel = 0; + for (uint32_t i = 0; i < num_pixels; i++) + { + const uint32_t astc_sel = temp_astc_weights[i]; + + const uint32_t lin_sel = g_map_astc_to_linear_order[ise_weight_range][astc_sel]; + assert(lin_sel < num_weight_levels); + + min_lin_sel = minimumu(min_lin_sel, lin_sel); + max_lin_sel = maximumu(max_lin_sel, lin_sel); + } + + bool was_improved = false; + (void)was_improved; + + { + bool weights_changed = false; + uint8_t trial_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; + for (uint32_t i = 0; i < num_pixels; i++) + { + uint32_t astc_sel = temp_astc_weights[i]; + uint32_t lin_sel = g_map_astc_to_linear_order[ise_weight_range][astc_sel]; + + if ((lin_sel == min_lin_sel) && (lin_sel < (num_weight_levels - 1))) + { + lin_sel++; + weights_changed = true; + } + + trial_weights[i] = g_map_linear_to_astc_order[ise_weight_range][lin_sel]; + } + + if (weights_changed) + { + vec3F l_q16, h_q16; + + bool succeeded; + if (opt_mode == cOrdinaryLeastSquares) + succeeded = compute_least_squares_endpoints_rgb(num_pixels, trial_weights, &g_astc_ls_weights_ise[ise_weight_range][0], &l_q16, &h_q16, pBlock_pixels_q16, color_box_q16); + else + succeeded = compute_weighted_least_squares_endpoints_rgb(num_pixels, trial_weights, &g_astc_ls_weights_ise[ise_weight_range][0], nullptr, emphasis_weights, &l_q16, &h_q16, pBlock_pixels_q16, color_box_q16); + + if (succeeded) + { + if (try_mode11(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode, + l_q16, h_q16, + pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight_selectors, + first_submode, last_submode, ignore_clamping)) + { + was_improved = true; + } + } + } + } + + { + bool weights_changed = false; + uint8_t trial_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; + + for (uint32_t i = 0; i < num_pixels; i++) + { + uint32_t astc_sel = temp_astc_weights[i]; + uint32_t lin_sel = g_map_astc_to_linear_order[ise_weight_range][astc_sel]; + + if ((lin_sel == max_lin_sel) && (lin_sel > 0)) + { + lin_sel--; + weights_changed = true; + } + + trial_weights[i] = g_map_linear_to_astc_order[ise_weight_range][lin_sel]; + } + + if (weights_changed) + { + vec3F l_q16, h_q16; + + bool succeeded; + if (opt_mode == cOrdinaryLeastSquares) + succeeded = compute_least_squares_endpoints_rgb(num_pixels, trial_weights, &g_astc_ls_weights_ise[ise_weight_range][0], &l_q16, &h_q16, pBlock_pixels_q16, color_box_q16); + else + succeeded = compute_weighted_least_squares_endpoints_rgb(num_pixels, trial_weights, &g_astc_ls_weights_ise[ise_weight_range][0], nullptr, emphasis_weights, &l_q16, &h_q16, pBlock_pixels_q16, color_box_q16); + + if (succeeded) + { + if (try_mode11(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode, + l_q16, h_q16, + pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight_selectors, + first_submode, last_submode, ignore_clamping)) + { + was_improved = true; + } + } + } + } + + { + bool weights_changed = false; + uint8_t trial_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; + for (uint32_t i = 0; i < num_pixels; i++) + { + uint32_t astc_sel = temp_astc_weights[i]; + uint32_t lin_sel = g_map_astc_to_linear_order[ise_weight_range][astc_sel]; + + if ((lin_sel == max_lin_sel) && (lin_sel > 0)) + { + lin_sel--; + weights_changed = true; + } + else if ((lin_sel == min_lin_sel) && (lin_sel < (num_weight_levels - 1))) + { + lin_sel++; + weights_changed = true; + } + + trial_weights[i] = g_map_linear_to_astc_order[ise_weight_range][lin_sel]; + } + + if (weights_changed) + { + vec3F l_q16, h_q16; + bool succeeded; + if (opt_mode == cOrdinaryLeastSquares) + succeeded = compute_least_squares_endpoints_rgb(num_pixels, trial_weights, &g_astc_ls_weights_ise[ise_weight_range][0], &l_q16, &h_q16, pBlock_pixels_q16, color_box_q16); + else + succeeded = compute_weighted_least_squares_endpoints_rgb(num_pixels, trial_weights, &g_astc_ls_weights_ise[ise_weight_range][0], nullptr, emphasis_weights, &l_q16, &h_q16, pBlock_pixels_q16, color_box_q16); + + if (succeeded) + { + if (try_mode11(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode, + l_q16, h_q16, + pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight_selectors, + first_submode, last_submode, ignore_clamping)) + { + was_improved = true; + } + } + } + } + + } // uber_mode + + return cur_block_error; +} + +//-------------------------------------------------------------------------------------------------------------------------- + +double encode_astc_hdr_block_downsampled_mode_11( + uint32_t block_x, uint32_t block_y, uint32_t grid_x, uint32_t grid_y, + uint32_t ise_weight_range, uint32_t ise_endpoint_range, + uint32_t num_pixels, const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[], + double cur_block_error, + int32_t first_submode, int32_t last_submode, bool ignore_clamping, opt_mode_t opt_mode, + uint8_t* pBlk_endpoints, uint8_t* pBlk_weights, uint32_t& best_submode, + const astc_hdr_codec_base_options& coptions, + const encode_astc_block_stats* pBlock_stats) +{ + assert((block_x >= 4) && (block_y >= 4) && (block_x <= MAX_ASTC_HDR_BLOCK_W) && (block_y <= MAX_ASTC_HDR_BLOCK_H)); + assert((grid_x >= 2) && (grid_y >= 2) && (grid_x <= block_x) && (grid_y <= block_y)); + + assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX)); + assert((ise_endpoint_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_endpoint_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE)); + assert((num_pixels >= 1) && (num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS)); + + assert((first_submode >= FIRST_MODE11_SUBMODE_INDEX) && (first_submode <= last_submode)); + assert(last_submode <= MAX_MODE11_SUBMODE_INDEX); + + best_submode = 0; + + assert(astc_helpers::get_ise_levels(ise_weight_range) <= MAX_SUPPORTED_WEIGHT_LEVELS); + + const uint32_t num_weights = grid_x * grid_y; + + vec3F block_mean_color_q16, block_axis_q16; + if (!pBlock_stats) + { + block_mean_color_q16 = calc_mean(num_pixels, pBlock_pixels_q16); + block_axis_q16 = calc_rgb_pca(num_pixels, pBlock_pixels_q16, block_mean_color_q16); + } + else + { + assert(num_pixels == pBlock_stats->m_num_pixels); + block_mean_color_q16 = pBlock_stats->m_mean_q16; + block_axis_q16 = pBlock_stats->m_axis_q16; + } + + aabb3F color_box_q16(cInitExpand); + + float l = BIG_FLOAT_VAL, h = -BIG_FLOAT_VAL; + vec3F low_color_q16, high_color_q16; + + for (uint32_t i = 0; i < num_pixels; i++) + { + color_box_q16.expand(pBlock_pixels_q16[i]); + + vec3F k(vec3F(pBlock_pixels_q16[i]) - block_mean_color_q16); + float kd = k.dot(block_axis_q16); + + if (kd < l) + { + l = kd; + low_color_q16 = pBlock_pixels_q16[i]; + } + + if (kd > h) + { + h = kd; + high_color_q16 = pBlock_pixels_q16[i]; + } + } + + vec3F old_low_color_q16(low_color_q16), old_high_color_q16(high_color_q16); + + for (uint32_t i = 0; i < 3; i++) + { + low_color_q16[i] = lerp(old_low_color_q16[i], old_high_color_q16[i], 1.0f / 64.0f); + high_color_q16[i] = lerp(old_low_color_q16[i], old_high_color_q16[i], 63.0f / 64.0f); + } + + const uint32_t NUM_PASSES = 3; + for (uint32_t pass = 0; pass < NUM_PASSES; pass++) + { + uint8_t trial_blk_endpoints[NUM_MODE11_ENDPOINTS]; + uint8_t trial_blk_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; // at block resolution, not grid res + uint32_t trial_best_submode = 0; + + clear_obj(trial_blk_endpoints); + clear_obj(trial_blk_weights); + + double trial_blk_error = BIG_FLOAT_VAL; + + bool could_pack = try_mode11(num_pixels, trial_blk_endpoints, trial_blk_weights, trial_blk_error, trial_best_submode, + low_color_q16, high_color_q16, + pBlock_pixels_half, 32, astc_helpers::BISE_32_LEVELS, coptions, false, ise_endpoint_range, false, + first_submode, last_submode, ignore_clamping); + + if (!could_pack) + break; + + uint8_t trial_downsampled_ise_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; + + downsample_ise_weights( + astc_helpers::BISE_32_LEVELS, ise_weight_range, + block_x, block_y, grid_x, grid_y, + trial_blk_weights, trial_downsampled_ise_weights); + + uint8_t trial_downsampled_raw_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; + dequantize_astc_weights(num_weights, trial_downsampled_ise_weights, ise_weight_range, trial_downsampled_raw_weights); + + uint8_t trial_upsampled_raw_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; // raw weights, NOT ISE + astc_helpers::upsample_weight_grid(block_x, block_y, grid_x, grid_y, trial_downsampled_raw_weights, trial_upsampled_raw_weights); + + //------ + + int trial_e[2][3]; + if (!decode_mode11_to_qlog12(trial_blk_endpoints, trial_e, ise_endpoint_range)) + return cur_block_error; + + double trial_error = compute_block_error_from_raw_weights(num_pixels, pBlock_pixels_half, trial_upsampled_raw_weights, trial_e, coptions); + + if (trial_error < cur_block_error) + { + cur_block_error = trial_error; + memcpy(pBlk_endpoints, trial_blk_endpoints, NUM_MODE11_ENDPOINTS); + memcpy(pBlk_weights, trial_downsampled_ise_weights, num_weights); + best_submode = trial_best_submode; + } + else if (pass) + break; + + if ((opt_mode == cWeightedLeastSquares) || (opt_mode == cWeightedLeastSquaresHeavy)) + { + float emphasis_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; + if (h == l) + { + for (uint32_t i = 0; i < num_pixels; i++) + emphasis_weights[i] = 1.0f; + } + else + { + float mid = (0.0f - l) / (h - l); + mid = clamp(mid, .01f, .99f); + + float lw = LOW_EMPHASIS_WEIGHT, mw = MIDDLE_EMPHASIS_WEIGHT, hw = HIGH_EMPHASIS_WEIGHT; + if (opt_mode == cWeightedLeastSquaresHeavy) + lw = LOW_EMPHASIS_WEIGHT_HEAVY, mw = MIDDLE_EMPHASIS_WEIGHT_HEAVY, hw = HIGH_EMPHASIS_WEIGHT_HEAVY; + + for (uint32_t i = 0; i < num_pixels; i++) + { + vec3F k(vec3F(pBlock_pixels_q16[i]) - block_mean_color_q16); + float kd = k.dot(block_axis_q16); + + assert((kd >= l) && (kd <= h)); + + float v = (kd - l) / (h - l); + + if (v < mid) + v = lerp(lw, mw, v / mid); + else + v = lerp(mw, hw, (v - mid) * (1.0f - mid)); + + emphasis_weights[i] = v; + } + } + + float trial_upsampled_raw_weightsf[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; + for (uint32_t i = 0; i < num_pixels; i++) + trial_upsampled_raw_weightsf[i] = (float)trial_upsampled_raw_weights[i] * (1.0f / 64.0f); + + if (!compute_weighted_least_squares_endpoints_rgb(num_pixels, nullptr, nullptr, trial_upsampled_raw_weightsf, emphasis_weights, &low_color_q16, &high_color_q16, pBlock_pixels_q16, color_box_q16)) + return false; + } + else + { + if (!compute_least_squares_endpoints_rgb_raw_weights(num_pixels, trial_upsampled_raw_weights, &low_color_q16, &high_color_q16, pBlock_pixels_q16, color_box_q16)) + break; + } + + bool pack_succeeded = pack_mode11(low_color_q16, high_color_q16, ise_endpoint_range, trial_blk_endpoints, coptions, false, first_submode, last_submode, false, trial_best_submode); + if (!pack_succeeded) + break; + + if (!decode_mode11_to_qlog12(trial_blk_endpoints, trial_e, ise_endpoint_range)) + break; + + trial_error = compute_block_error_from_raw_weights(num_pixels, pBlock_pixels_half, trial_upsampled_raw_weights, trial_e, coptions); + + if (trial_error < cur_block_error) + { + cur_block_error = trial_error; + memcpy(pBlk_endpoints, trial_blk_endpoints, NUM_MODE11_ENDPOINTS); + memcpy(pBlk_weights, trial_downsampled_ise_weights, num_weights); + best_submode = trial_best_submode; + } + else + { + break; + } + + } // pass + + return cur_block_error; +} + +//-------------------------------------------------------------------------------------------------------------------------- + +double encode_astc_hdr_block_mode_11_dual_plane( + uint32_t num_pixels, + const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[], + uint32_t channel_index, // 0-2 + uint32_t ise_weight_range, + uint32_t& best_submode, + double cur_block_error, + uint8_t* blk_endpoints, uint8_t* blk_weights0, uint8_t* blk_weights1, + const astc_hdr_codec_base_options& coptions, + bool direct_only, + uint32_t ise_endpoint_range, + bool uber_mode, + bool constrain_ise_weight_selectors, + int32_t first_submode, int32_t last_submode, bool ignore_clamping) +{ + (void)uber_mode; + + assert(channel_index <= 2); + assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX)); + assert((ise_endpoint_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_endpoint_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE)); + assert((num_pixels >= 1) && (num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS)); + + assert((first_submode >= FIRST_MODE11_SUBMODE_INDEX) && (first_submode <= last_submode)); + assert(last_submode <= MAX_MODE11_SUBMODE_INDEX); + + assert(num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS); + + best_submode = 0; + + const uint32_t num_weight_levels = astc_helpers::get_ise_levels(ise_weight_range); + assert(num_weight_levels <= MAX_SUPPORTED_WEIGHT_LEVELS); + + vec4F temp_block_pixels_q16[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; + for (uint32_t i = 0; i < num_pixels; i++) + { + temp_block_pixels_q16[i] = pBlock_pixels_q16[i]; + temp_block_pixels_q16[i][channel_index] = 0.0f; + } + + vec3F block_mean_color_q16(calc_mean(num_pixels, temp_block_pixels_q16)); + vec3F block_axis_q16(calc_rgb_pca(num_pixels, temp_block_pixels_q16, block_mean_color_q16)); + + float l = BIG_FLOAT_VAL, h = -BIG_FLOAT_VAL; + vec3F low_color_q16, high_color_q16; + + aabb3F color_box_q16(cInitExpand); + + for (uint32_t i = 0; i < num_pixels; i++) + { + color_box_q16.expand(pBlock_pixels_q16[i]); + + vec3F k(vec3F(temp_block_pixels_q16[i]) - block_mean_color_q16); + float kd = k.dot(block_axis_q16); + + if (kd < l) + { + l = kd; + low_color_q16 = pBlock_pixels_q16[i]; + } + + if (kd > h) + { + h = kd; + high_color_q16 = pBlock_pixels_q16[i]; + } + } + + low_color_q16[channel_index] = 0.0f; + high_color_q16[channel_index] = 0.0f; + + float a = low_color_q16.dot(vec3F(1.0f)), b = high_color_q16.dot(vec3F(1.0f)); + if (a <= b) + { + low_color_q16[channel_index] = color_box_q16.get_low()[channel_index]; + high_color_q16[channel_index] = color_box_q16.get_high()[channel_index]; + } + else + { + high_color_q16[channel_index] = color_box_q16.get_low()[channel_index]; + low_color_q16[channel_index] = color_box_q16.get_high()[channel_index]; + } + + vec3F old_low_color_q16(low_color_q16), old_high_color_q16(high_color_q16); + for (uint32_t i = 0; i < 3; i++) + { + low_color_q16[i] = lerp(old_low_color_q16[i], old_high_color_q16[i], 1.0f / 64.0f); + high_color_q16[i] = lerp(old_low_color_q16[i], old_high_color_q16[i], 63.0f / 64.0f); + } + + uint8_t trial_blk_endpoints[NUM_MODE11_ENDPOINTS]; + uint8_t trial_blk_weights0[MAX_ASTC_HDR_ENC_BLOCK_PIXELS], trial_blk_weights1[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; + uint32_t trial_best_submode = 0; + + clear_obj(trial_blk_endpoints); + clear_obj(trial_blk_weights0); + clear_obj(trial_blk_weights1); + + double trial_blk_error = BIG_FLOAT_VAL; + + bool did_improve = try_mode11_dual_plane(channel_index, num_pixels, trial_blk_endpoints, trial_blk_weights0, trial_blk_weights1, trial_blk_error, trial_best_submode, + low_color_q16, high_color_q16, + pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight_selectors, + first_submode, last_submode, ignore_clamping); + + // If we couldn't find ANY usable solution due to endpoint quantization, just return. There's nothing we can do. + if (!did_improve) + return cur_block_error; + + // Did the solution improve? + if (trial_blk_error < cur_block_error) + { + cur_block_error = trial_blk_error; + memcpy(blk_endpoints, trial_blk_endpoints, NUM_MODE11_ENDPOINTS); + memcpy(blk_weights0, trial_blk_weights0, num_pixels); + memcpy(blk_weights1, trial_blk_weights1, num_pixels); + best_submode = trial_best_submode; + } + + const uint32_t chan0 = (channel_index + 1) % 3, chan1 = (channel_index + 2) % 3; + + vec2F plane0_q16[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; + aabb2F plane0_bounds; + plane0_bounds[0].set(color_box_q16.get_low()[chan0], color_box_q16.get_low()[chan1]); + plane0_bounds[1].set(color_box_q16.get_high()[chan0], color_box_q16.get_high()[chan1]); + + vec1F plane1_q16[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; + aabb1F plane1_bounds; + plane1_bounds[0].set(color_box_q16.get_low()[channel_index]); + plane1_bounds[1].set(color_box_q16.get_high()[channel_index]); + + for (uint32_t i = 0; i < num_pixels; i++) + { + plane0_q16[i][0] = pBlock_pixels_q16[i][chan0]; + plane0_q16[i][1] = pBlock_pixels_q16[i][chan1]; + + plane1_q16[i][0] = pBlock_pixels_q16[i][channel_index]; + } + + const uint32_t NUM_LS_PASSES = 3; + + for (uint32_t pass = 0; pass < NUM_LS_PASSES; pass++) + { + vec2F l0_q16, h0_q16; + if (!compute_least_squares_endpoints_2D(num_pixels, trial_blk_weights0, &g_astc_ls_weights_ise[ise_weight_range][0], &l0_q16, &h0_q16, plane0_q16, plane0_bounds)) + break; + + vec1F l1_q16, h1_q16; + if (!compute_least_squares_endpoints_1D(num_pixels, trial_blk_weights1, &g_astc_ls_weights_ise[ise_weight_range][0], &l1_q16, &h1_q16, plane1_q16, plane1_bounds)) + break; + + vec3F l_q16, h_q16; + + l_q16[channel_index] = l1_q16[0]; + h_q16[channel_index] = h1_q16[0]; + + l_q16[chan0] = l0_q16[0]; + h_q16[chan0] = h0_q16[0]; + + l_q16[chan1] = l0_q16[1]; + h_q16[chan1] = h0_q16[1]; + + bool was_improved = try_mode11_dual_plane(channel_index, num_pixels, blk_endpoints, blk_weights0, blk_weights1, cur_block_error, best_submode, + l_q16, h_q16, + pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight_selectors, + first_submode, last_submode, ignore_clamping); + + if (!was_improved) + break; + + // It's improved, so let's take the new weight indices. + memcpy(trial_blk_weights0, blk_weights0, num_pixels); + memcpy(trial_blk_weights1, blk_weights1, num_pixels); + + } // pass + + return cur_block_error; +} + +//-------------------------------------------------------------------------------------------------------------------------- + +double encode_astc_hdr_block_mode_7( + uint32_t num_pixels, + const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[], + uint32_t ise_weight_range, + uint32_t& best_submode, + double cur_block_error, + uint8_t* blk_endpoints, //[4] + uint8_t* blk_weights, // [num_pixels] + const astc_hdr_codec_base_options& coptions, + uint32_t ise_endpoint_range, + int first_submode, int last_submode, + const encode_astc_block_stats* pBlock_stats) +{ + assert((num_pixels >= 1) && (num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS)); + assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX)); + assert((ise_endpoint_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_endpoint_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE)); + + const uint32_t num_weight_levels = astc_helpers::get_ise_levels(ise_weight_range); + assert(num_weight_levels <= MAX_SUPPORTED_WEIGHT_LEVELS); + + best_submode = 0; + + vec3F block_mean_color_q16; + if (!pBlock_stats) + block_mean_color_q16 = calc_mean(num_pixels, pBlock_pixels_q16); + else + { + assert(num_pixels == pBlock_stats->m_num_pixels); + block_mean_color_q16 = pBlock_stats->m_mean_q16; + } + + vec3F block_axis_q16(0.577350259f); + + aabb3F color_box_q16(cInitExpand); + + float l = BIG_FLOAT_VAL, h = -BIG_FLOAT_VAL; + for (uint32_t i = 0; i < num_pixels; i++) + { + color_box_q16.expand(pBlock_pixels_q16[i]); + + vec3F k(vec3F(pBlock_pixels_q16[i]) - block_mean_color_q16); + float kd = k.dot(block_axis_q16); + + l = basisu::minimum(l, kd); + h = basisu::maximum(h, kd); + } + + vec3F low_color_q16(interp_color(block_mean_color_q16, block_axis_q16, l, color_box_q16, color_box_q16)); + vec3F high_color_q16(interp_color(block_mean_color_q16, block_axis_q16, h, color_box_q16, color_box_q16)); + + low_color_q16.clamp(0.0f, MAX_QLOG16_VAL); + high_color_q16.clamp(0.0f, MAX_QLOG16_VAL); + + vec3F diff(high_color_q16 - low_color_q16); + + // The mul here (* block_axis_q16[0]) is because the "S" or scale value is subtracted from the high color with a scale of 1.0, + // i.e. it's equivalent to a vector of (1,1,1) multiplied by scale before the sub. We want to actually move along the grayscale axis, or (0.577350259, 0.577350259, 0.577350259). + float s_q16 = diff.dot(block_axis_q16) * block_axis_q16[0]; + + uint8_t trial_blk_endpoints[NUM_MODE7_ENDPOINTS]; + uint8_t trial_blk_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; + uint32_t trial_best_submode = 0; + + clear_obj(trial_blk_endpoints); + clear_obj(trial_blk_weights); + + double trial_blk_error = BIG_FLOAT_VAL; + + bool did_improve = try_mode7(num_pixels, trial_blk_endpoints, trial_blk_weights, trial_blk_error, trial_best_submode, + high_color_q16, ceilf(s_q16), + pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range, first_submode, last_submode); + + // If we couldn't find ANY usable solution due to endpoint quantization, just return. There's nothing we can do. + if (!did_improve) + { + return cur_block_error; + } + + // Did the solution improve? + if (trial_blk_error < cur_block_error) + { + cur_block_error = trial_blk_error; + memcpy(blk_endpoints, trial_blk_endpoints, NUM_MODE7_ENDPOINTS); + memcpy(blk_weights, trial_blk_weights, num_pixels); + best_submode = trial_best_submode; + } + +#if 1 + { + //const float TL = 8830.0f;// (float)half_to_qlog16(float_to_half(0.00061f)); + //const float TH = 41600.0f;// (float)half_to_qlog16(float_to_half(40.0f)); + //float zl = minimum(color_box_q16[0][0], color_box_q16[0][1], color_box_q16[0][2]); + //float zh = minimum(color_box_q16[1][0], color_box_q16[1][1], color_box_q16[1][2]); + + //if ((zl <= TL) && (zh >= TH)) + { + // Try a simpler technique for artifact reduction + l = BIG_FLOAT_VAL; + h = -BIG_FLOAT_VAL; + + vec3F alt_low_color_q16(0.0f), alt_high_color_q16(0.0f); + for (uint32_t i = 0; i < num_pixels; i++) + { + color_box_q16.expand(pBlock_pixels_q16[i]); + + vec3F k(vec3F(pBlock_pixels_q16[i]) - block_mean_color_q16); + float kd = k.dot(block_axis_q16); + + if (kd < l) + { + alt_low_color_q16 = pBlock_pixels_q16[i]; + l = kd; + } + + if (kd > h) + { + alt_high_color_q16 = pBlock_pixels_q16[i]; + h = kd; + } + } + + vec3F old_alt_low_color_q16(alt_low_color_q16); + + for (uint32_t i = 0; i < 3; i++) + alt_low_color_q16[i] = lerp(old_alt_low_color_q16[i], alt_high_color_q16[i], 1.0f / 64.0f); + + vec3F alt_diff(alt_high_color_q16 - alt_low_color_q16); + + // The mul here (* block_axis_q16[0]) is because the "S" or scale value is subtracted from the high color with a scale of 1.0, + // i.e. it's equivalent to a vector of (1,1,1) multiplied by scale before the sub. We want to actually move along the grayscale axis, or (0.577350259, 0.577350259, 0.577350259). + float alt_s_q16 = alt_diff.dot(block_axis_q16) * block_axis_q16[0]; + + try_mode7(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode, + alt_high_color_q16, ceilf(alt_s_q16), + pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range, first_submode, last_submode); + } + } +#endif + + const float one_over_num_pixels = 1.0f / (float)num_pixels; + + const uint32_t NUM_TRIALS = 2; + for (uint32_t trial = 0; trial < NUM_TRIALS; trial++) + { + // Given a set of selectors and S, try to compute a better high color + vec3F new_high_color_q16(block_mean_color_q16); + + int e[2][3]; + int cur_s = 0; + if (!decode_mode7_to_qlog12(trial_blk_endpoints, e, &cur_s, ise_endpoint_range)) + break; + + cur_s <<= 4; + + for (uint32_t i = 0; i < num_pixels; i++) + { + uint32_t astc_sel = trial_blk_weights[i]; + float lerp = g_ise_weight_lerps[ise_weight_range][astc_sel + 1] * (1.0f / 64.0f); + + float k = (float)cur_s * (1.0f - lerp) * one_over_num_pixels; + new_high_color_q16[0] += k; + new_high_color_q16[1] += k; + new_high_color_q16[2] += k; + } + + bool improved = try_mode7(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode, + new_high_color_q16, (float)cur_s, + pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range, first_submode, last_submode); + + if (improved) + { + memcpy(trial_blk_endpoints, blk_endpoints, NUM_MODE7_ENDPOINTS); + memcpy(trial_blk_weights, blk_weights, num_pixels); + } + + // Given a set of selectors and a high color, try to compute a better S. + float t = 0.0f; + + for (uint32_t i = 0; i < num_pixels; i++) + { + uint32_t astc_sel = trial_blk_weights[i]; + float lerp = g_ise_weight_lerps[ise_weight_range][astc_sel + 1] * (1.0f / 64.0f); + + t += (1.0f) - lerp; + } + + t *= one_over_num_pixels; + + //int e[2][3]; + if (!decode_mode7_to_qlog12(trial_blk_endpoints, e, nullptr, ise_endpoint_range)) + break; + + vec3F cur_h_q16((float)(e[1][0] << 4), (float)(e[1][1] << 4), (float)(e[1][2] << 4)); + + if (fabs(t) > .0000125f) + { + float s_r = (cur_h_q16[0] - block_mean_color_q16[0]) / t; + float s_g = (cur_h_q16[1] - block_mean_color_q16[1]) / t; + float s_b = (cur_h_q16[2] - block_mean_color_q16[2]) / t; + + // TODO: gather statistics on these + if (try_mode7(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode, + cur_h_q16, ceilf(s_r), + pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range, first_submode, last_submode)) + { + improved = true; + } + + if (coptions.m_mode7_full_s_optimization) + { + if (try_mode7(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode, + cur_h_q16, ceilf(s_g), + pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range, first_submode, last_submode)) + { + improved = true; + } + + if (try_mode7(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode, + cur_h_q16, ceilf(s_b), + pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range, first_submode, last_submode)) + { + improved = true; + } + + if (try_mode7(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode, + cur_h_q16, ceilf((s_r + s_g + s_b) / 3.0f), + pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range, first_submode, last_submode)) + { + improved = true; + } + + // Added this - quite strong. + if (try_mode7(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode, + cur_h_q16, minimum(maximum(s_r, s_g, s_b) * 1.1f, 65535.0f), + pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range, first_submode, last_submode)) + { + improved = true; + } + } // if (coptions.m_mode7_full_s_optimization) + + } // if (fabs(t) > .0000125f) + + if (!improved) + break; + + memcpy(trial_blk_endpoints, blk_endpoints, NUM_MODE7_ENDPOINTS); + memcpy(trial_blk_weights, blk_weights, num_pixels); + + } // trial + + return cur_block_error; +} + +//-------------------------------------------------------------------------------------------------------------------------- + +void dequantize_astc_weights(uint32_t n, const uint8_t* pSrc_ise_vals, uint32_t from_ise_range, uint8_t* pDst_raw_weights) +{ + const auto& dequant_tab = astc_helpers::g_dequant_tables.get_weight_tab(from_ise_range).m_ISE_to_val; + + for (uint32_t i = 0; i < n; i++) + pDst_raw_weights[i] = dequant_tab[pSrc_ise_vals[i]]; +} + +//-------------------------------------------------------------------------------------------------------------------------- + +// For each output (2x2) sample, the weight of each input (6x6) sample. +static const float g_weight_downsample_6x6_to_2x2[4][36] = { +{0.165438f, 0.132609f, 0.092681f, 0.028953f, 0.000000f, 0.000000f, 0.133716f, 0.111240f, 0.065133f, 0.022236f, 0.000000f, 0.000000f, 0.092623f, 0.063898f, 0.039120f, 0.000000f, 0.000000f, 0.000000f, 0.028168f, 0.024184f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.027262f, 0.091051f, 0.132446f, 0.164791f, 0.000000f, 0.000000f, 0.026038f, 0.066511f, 0.111644f, 0.133197f, 0.000000f, 0.000000f, 0.000000f, 0.040053f, 0.064757f, 0.091196f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.024265f, 0.026789f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.028282f, 0.024804f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.092871f, 0.066580f, 0.042024f, 0.000000f, 0.000000f, 0.000000f, 0.132115f, 0.107586f, 0.061943f, 0.025551f, 0.000000f, 0.000000f, 0.166111f, 0.132946f, 0.089043f, 0.030145f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.024535f, 0.028835f, 0.000000f, 0.000000f, 0.000000f, 0.044465f, 0.063652f, 0.093251f, 0.000000f, 0.000000f, 0.025961f, 0.063339f, 0.107329f, 0.132240f, 0.000000f, 0.000000f, 0.029844f, 0.089249f, 0.132200f, 0.165099f}, +}; + +// For each output (3x2) sample, the weight of each input (6x6) sample. +static const float g_weight_downsample_6x6_to_3x2[6][36] = { +{0.257933f, 0.144768f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.213754f, 0.109376f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.140969f, 0.064128f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.041270f, 0.027803f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.046066f, 0.153691f, 0.153395f, 0.042845f, 0.000000f, 0.000000f, 0.038497f, 0.131674f, 0.126804f, 0.041513f, 0.000000f, 0.000000f, 0.028434f, 0.081152f, 0.075499f, 0.025372f, 0.000000f, 0.000000f, 0.000000f, 0.030067f, 0.024989f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.147088f, 0.258980f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.105549f, 0.211746f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.066714f, 0.144015f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.027755f, 0.038152f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.044268f, 0.030990f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.141642f, 0.069930f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.207393f, 0.105354f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.255911f, 0.144511f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.026658f, 0.032535f, 0.000000f, 0.000000f, 0.000000f, 0.024618f, 0.079487f, 0.080415f, 0.026311f, 0.000000f, 0.000000f, 0.038382f, 0.133569f, 0.133162f, 0.033451f, 0.000000f, 0.000000f, 0.043697f, 0.152483f, 0.154345f, 0.040885f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.026401f, 0.040228f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.066688f, 0.142350f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.108504f, 0.210286f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.149666f, 0.255876f}, +}; + +// For each output (4x2) sample, the weight of each input (6x6) sample. +static const float g_weight_downsample_6x6_to_4x2[8][36] = { +{0.318857f, 0.081413f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.262816f, 0.064811f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.175211f, 0.046152f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.050740f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.163830f, 0.223661f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.128904f, 0.194332f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.080369f, 0.121162f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.041941f, 0.045801f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.230801f, 0.166220f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.193495f, 0.136548f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.113816f, 0.085890f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.043771f, 0.029459f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.087528f, 0.318213f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.059739f, 0.262039f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.046515f, 0.175973f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.049993f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.054078f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.173243f, 0.055145f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.254561f, 0.059695f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.319463f, 0.083816f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.038171f, 0.037447f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.076263f, 0.117360f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.134218f, 0.202503f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.163759f, 0.230278f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.044607f, 0.035170f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.114466f, 0.088407f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.201026f, 0.127983f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.224148f, 0.164194f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.052817f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.043531f, 0.174390f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.060164f, 0.262636f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.089340f, 0.317122f}, +}; + +// For each output (5x2) sample, the weight of each input (6x6) sample. +static const float g_weight_downsample_6x6_to_5x2[10][36] = { +{0.393855f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.327491f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.216089f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.062565f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.303101f, 0.078223f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.261199f, 0.068761f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.160056f, 0.054634f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.074026f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.202529f, 0.207447f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.151013f, 0.157673f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.100074f, 0.095239f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.043623f, 0.042402f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.083336f, 0.309647f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.061432f, 0.269582f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.046328f, 0.166035f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.063640f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.397684f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.326178f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.217856f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.058282f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.065541f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.215996f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.321124f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.397338f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.069030f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.159434f, 0.051902f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.266327f, 0.065732f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.305627f, 0.081948f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.038550f, 0.046259f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.092606f, 0.100038f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.162523f, 0.163345f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.199767f, 0.196912f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.066709f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.050841f, 0.169003f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.061591f, 0.265094f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.081426f, 0.305335f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.063517f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.210896f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.316133f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.027674f, 0.381781f}, +}; + +// For each output (6x2) sample, the weight of each input (6x6) sample. +static const float g_weight_downsample_6x6_to_6x2[12][36] = { +{0.395563f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.328397f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.214936f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.061104f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.395041f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.323513f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.208086f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.073360f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.393200f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.317339f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.218679f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.070782f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.399071f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.321356f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.214689f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.064883f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.399159f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.326009f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.212426f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.062406f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.398973f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.326510f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.217446f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.057071f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.065386f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.215039f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.321113f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.398462f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.072234f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.211515f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.319185f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.397066f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.053184f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.213286f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.332634f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.400895f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.063501f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.207210f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.334096f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.395193f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.074315f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.216723f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.320827f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.388135f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.063571f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.215814f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.325843f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.394772f}, +}; + +// For each output (2x3) sample, the weight of each input (6x6) sample. +static const float g_weight_downsample_6x6_to_2x3[6][36] = { +{0.253933f, 0.211745f, 0.142964f, 0.043509f, 0.000000f, 0.000000f, 0.146094f, 0.108119f, 0.068727f, 0.024908f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.043336f, 0.140540f, 0.208745f, 0.253069f, 0.000000f, 0.000000f, 0.031333f, 0.069242f, 0.108596f, 0.145138f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.044780f, 0.036916f, 0.026808f, 0.000000f, 0.000000f, 0.000000f, 0.151455f, 0.129189f, 0.076266f, 0.030885f, 0.000000f, 0.000000f, 0.151915f, 0.131628f, 0.081598f, 0.031903f, 0.000000f, 0.000000f, 0.043838f, 0.032645f, 0.030173f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.028998f, 0.038454f, 0.046460f, 0.000000f, 0.000000f, 0.033717f, 0.076274f, 0.130140f, 0.153377f, 0.000000f, 0.000000f, 0.025762f, 0.077843f, 0.130195f, 0.150217f, 0.000000f, 0.000000f, 0.000000f, 0.029422f, 0.034493f, 0.044648f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.145243f, 0.107655f, 0.062280f, 0.033041f, 0.000000f, 0.000000f, 0.257369f, 0.210260f, 0.139667f, 0.044485f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.037604f, 0.064104f, 0.105759f, 0.144848f, 0.000000f, 0.000000f, 0.042699f, 0.141511f, 0.207704f, 0.255772f}, +}; + +// For each output (3x3) sample, the weight of each input (6x6) sample. +static const float g_weight_downsample_6x6_to_3x3[9][36] = { +{0.412913f, 0.237773f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.237370f, 0.111944f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.066531f, 0.251421f, 0.245639f, 0.065785f, 0.000000f, 0.000000f, 0.047059f, 0.143642f, 0.128760f, 0.051164f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.234587f, 0.419421f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.110765f, 0.235227f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.067391f, 0.044131f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.248992f, 0.133218f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.247568f, 0.139987f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.072238f, 0.046475f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.040674f, 0.048555f, 0.000000f, 0.000000f, 0.000000f, 0.049640f, 0.158199f, 0.158521f, 0.046044f, 0.000000f, 0.000000f, 0.043591f, 0.153956f, 0.155258f, 0.049378f, 0.000000f, 0.000000f, 0.000000f, 0.046674f, 0.049509f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.049528f, 0.063611f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.137662f, 0.252612f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.134924f, 0.246668f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.042655f, 0.072341f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.237403f, 0.114850f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.418506f, 0.229241f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.049009f, 0.142093f, 0.136891f, 0.036294f, 0.000000f, 0.000000f, 0.074433f, 0.244437f, 0.251631f, 0.065212f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.121166f, 0.231108f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.236230f, 0.411495f}, +}; + +// For each output (4x3) sample, the weight of each input (6x6) sample. +static const float g_weight_downsample_6x6_to_4x3[12][36] = { +{0.508292f, 0.132529f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.285382f, 0.073798f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.266624f, 0.378457f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.144380f, 0.210539f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.380292f, 0.270590f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.200825f, 0.148293f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.130560f, 0.507542f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.071578f, 0.290320f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.094051f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.322294f, 0.082665f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.316365f, 0.092271f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.092353f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.046081f, 0.061377f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.158151f, 0.235006f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.152896f, 0.232594f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.052844f, 0.061053f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.061619f, 0.046867f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.227763f, 0.158202f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.222620f, 0.155545f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.073398f, 0.053986f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.082287f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.084098f, 0.330283f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.085224f, 0.323658f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.094451f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.286413f, 0.077046f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.512915f, 0.123625f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.140389f, 0.213324f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.267125f, 0.379163f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.208464f, 0.139969f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.382876f, 0.268691f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.080416f, 0.285653f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.131803f, 0.502128f}, +}; + +// For each output (5x3) sample, the weight of each input (6x6) sample. +static const float g_weight_downsample_6x6_to_5x3[15][36] = { +{0.618662f, 0.032137f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.349200f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.497060f, 0.129255f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.281642f, 0.092043f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.333166f, 0.338337f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.164333f, 0.164165f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.129409f, 0.504176f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.085525f, 0.280890f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.636943f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.363057f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.113467f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.394204f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.386741f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.105588f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.086925f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.317750f, 0.095763f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.321008f, 0.086368f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.092185f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.057696f, 0.061462f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.184995f, 0.197656f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.186342f, 0.186715f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.059712f, 0.065422f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.091939f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.079906f, 0.328876f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.085955f, 0.320229f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.093096f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.099585f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.398489f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.388782f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.113144f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.360655f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.639345f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.285578f, 0.088663f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.495946f, 0.129812f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.177513f, 0.166195f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.329950f, 0.326342f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.082692f, 0.279744f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.134353f, 0.503211f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.361178f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.638822f}, +}; + +// For each output (6x3) sample, the weight of each input (6x6) sample. +static const float g_weight_downsample_6x6_to_6x3[18][36] = { +{0.640623f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.359377f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.638697f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.361303f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.640672f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.359328f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.637721f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.362279f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.647342f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.352658f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.638418f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.361582f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.111041f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.395972f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.387932f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.105054f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.101949f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.395728f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.401263f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.101060f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.098132f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.388180f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.402030f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.111659f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.096173f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.393865f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.386312f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.123650f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.104357f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.398062f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.393265f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.104316f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.097666f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.400772f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.390396f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.111166f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.359466f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.640534f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.360569f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.639431f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.355750f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.644250f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.353865f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.646135f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.357727f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.642273f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.359539f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.640461f}, +}; + +// For each output (2x4) sample, the weight of each input (6x6) sample. +static const float g_weight_downsample_6x6_to_2x4[8][36] = { +{0.312206f, 0.261492f, 0.177496f, 0.055798f, 0.000000f, 0.000000f, 0.081944f, 0.062361f, 0.048703f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.054679f, 0.172805f, 0.260561f, 0.314742f, 0.000000f, 0.000000f, 0.000000f, 0.049040f, 0.065652f, 0.082520f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.164115f, 0.129589f, 0.083879f, 0.029309f, 0.000000f, 0.000000f, 0.231202f, 0.198851f, 0.118719f, 0.044334f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.035855f, 0.083276f, 0.127764f, 0.166965f, 0.000000f, 0.000000f, 0.045347f, 0.116503f, 0.193645f, 0.230645f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.223790f, 0.194804f, 0.115855f, 0.047371f, 0.000000f, 0.000000f, 0.164616f, 0.125798f, 0.087268f, 0.040497f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.044738f, 0.118365f, 0.198854f, 0.230745f, 0.000000f, 0.000000f, 0.029646f, 0.078141f, 0.131405f, 0.168106f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.080206f, 0.060505f, 0.041197f, 0.000000f, 0.000000f, 0.000000f, 0.320486f, 0.265233f, 0.174992f, 0.057380f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.051057f, 0.058139f, 0.082120f, 0.000000f, 0.000000f, 0.056168f, 0.174118f, 0.260525f, 0.317873f}, +}; + +// For each output (3x4) sample, the weight of each input (6x6) sample. +static const float g_weight_downsample_6x6_to_3x4[12][36] = { +{0.503381f, 0.288537f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.130806f, 0.077275f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.088808f, 0.319226f, 0.312498f, 0.086797f, 0.000000f, 0.000000f, 0.000000f, 0.092065f, 0.079421f, 0.021185f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.286250f, 0.514036f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.072999f, 0.126714f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.261935f, 0.133191f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.376226f, 0.207118f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.021529f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.059585f, 0.153016f, 0.152552f, 0.043373f, 0.000000f, 0.000000f, 0.063990f, 0.231504f, 0.235283f, 0.060696f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.146403f, 0.262394f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.208547f, 0.382656f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.374676f, 0.209306f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.270440f, 0.145577f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.059636f, 0.233975f, 0.235944f, 0.069029f, 0.000000f, 0.000000f, 0.048950f, 0.150198f, 0.154340f, 0.047929f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.200921f, 0.380881f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.146928f, 0.271271f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.128883f, 0.075468f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.509859f, 0.285791f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.095842f, 0.086878f, 0.000000f, 0.000000f, 0.000000f, 0.092942f, 0.314169f, 0.319263f, 0.090906f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.079652f, 0.124852f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.289868f, 0.505628f}, +}; + +// For each output (4x4) sample, the weight of each input (6x6) sample. +static const float g_weight_downsample_6x6_to_4x4[16][36] = { +{0.665277f, 0.167914f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.166809f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.325854f, 0.449938f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.094690f, 0.129518f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.455174f, 0.326025f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.109174f, 0.109627f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.166733f, 0.664155f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.169112f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.320619f, 0.090788f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.462066f, 0.126527f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.165890f, 0.235855f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.233931f, 0.364324f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.239319f, 0.151533f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.363629f, 0.245519f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.106763f, 0.311932f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.119451f, 0.461853f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.451893f, 0.124086f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.326160f, 0.097861f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.239712f, 0.365585f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.164178f, 0.230525f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.360274f, 0.237862f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.246139f, 0.155726f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.121863f, 0.457051f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.097828f, 0.323258f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.163634f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.667648f, 0.168718f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.094870f, 0.132660f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.316878f, 0.455591f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.116917f, 0.098433f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.458816f, 0.325834f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.168403f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.172019f, 0.659578f}, +}; + +// For each output (5x4) sample, the weight of each input (6x6) sample. +static const float g_weight_downsample_6x6_to_5x4[20][36] = { +{0.773702f, 0.033711f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.192588f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.633422f, 0.166577f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.170080f, 0.029921f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.388335f, 0.403694f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.100996f, 0.106975f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.161122f, 0.655288f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.183590f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.801705f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.198295f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.400989f, 0.025097f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.573915f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.309345f, 0.085396f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.478694f, 0.126565f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.194664f, 0.187267f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.292735f, 0.308960f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.016375f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.098049f, 0.295983f, 0.000000f, 0.000000f, 0.017892f, 0.000000f, 0.111938f, 0.476138f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.043545f, 0.386448f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.570007f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.566407f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.402307f, 0.031286f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.463145f, 0.120696f, 0.000000f, 0.019497f, 0.000000f, 0.000000f, 0.311721f, 0.084942f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.296730f, 0.300781f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.204639f, 0.197849f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.122117f, 0.469302f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.102545f, 0.306036f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.562064f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.041534f, 0.396403f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.190134f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.773971f, 0.035896f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.169927f, 0.035812f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.630284f, 0.163977f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.112667f, 0.106813f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.393502f, 0.387018f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.177024f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.170482f, 0.652494f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.192274f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033039f, 0.774687f}, +}; + +// For each output (6x4) sample, the weight of each input (6x6) sample. +static const float g_weight_downsample_6x6_to_6x4[24][36] = { +{0.804254f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.195746f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.804177f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.195823f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.799585f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.200415f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.803604f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.196396f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.807256f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.192744f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.805135f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.194865f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.410532f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.589468f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.408690f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.591310f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.416225f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.583775f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.414279f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.585721f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.406723f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.593277f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.402510f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.597490f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.584784f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.415216f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.590427f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.409573f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.590073f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.409927f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.580348f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.419652f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.588321f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.411679f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.587022f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.412978f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.193281f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.806719f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.189163f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.810837f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.195108f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.804892f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.188290f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.811710f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.192914f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.807086f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.195292f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.804708f}, +}; + +// For each output (2x5) sample, the weight of each input (6x6) sample. +static const float g_weight_downsample_6x6_to_2x5[10][36] = { +{0.387593f, 0.325123f, 0.221104f, 0.066180f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.065940f, 0.214659f, 0.326737f, 0.392664f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.309603f, 0.265953f, 0.168780f, 0.060600f, 0.000000f, 0.000000f, 0.084707f, 0.063017f, 0.047341f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.062836f, 0.170767f, 0.261053f, 0.307978f, 0.000000f, 0.000000f, 0.000000f, 0.049286f, 0.064361f, 0.083719f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.195787f, 0.153943f, 0.095706f, 0.042417f, 0.000000f, 0.000000f, 0.190695f, 0.154435f, 0.097288f, 0.040258f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.029471f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.017536f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.039307f, 0.094677f, 0.158696f, 0.199136f, 0.000000f, 0.000000f, 0.040959f, 0.093353f, 0.155294f, 0.201042f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.079432f, 0.065739f, 0.044876f, 0.000000f, 0.000000f, 0.000000f, 0.309205f, 0.264700f, 0.167247f, 0.068801f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.052112f, 0.064829f, 0.081363f, 0.000000f, 0.000000f, 0.064024f, 0.161136f, 0.263743f, 0.312793f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.393277f, 0.324792f, 0.213188f, 0.068743f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.066964f, 0.215440f, 0.323005f, 0.394591f}, +}; + +// For each output (3x5) sample, the weight of each input (6x6) sample. +static const float g_weight_downsample_6x6_to_3x5[15][36] = { +{0.620557f, 0.350797f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.028646f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.110170f, 0.397489f, 0.386326f, 0.106015f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.357348f, 0.642652f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.503934f, 0.275289f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.128280f, 0.092497f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.102294f, 0.316223f, 0.313576f, 0.092518f, 0.000000f, 0.000000f, 0.000000f, 0.081158f, 0.094231f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.279079f, 0.502163f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.086083f, 0.132675f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.325483f, 0.157739f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.322567f, 0.172225f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.021986f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.063342f, 0.192228f, 0.186950f, 0.057021f, 0.000000f, 0.000000f, 0.054779f, 0.186114f, 0.185666f, 0.073901f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.172195f, 0.331802f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.148212f, 0.322038f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.025751f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.123726f, 0.081188f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.507339f, 0.287746f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.093924f, 0.094021f, 0.000000f, 0.000000f, 0.000000f, 0.097070f, 0.315697f, 0.314560f, 0.084728f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.082560f, 0.129771f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.277014f, 0.486817f, 0.023837f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.644191f, 0.355809f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.107771f, 0.387615f, 0.393454f, 0.111159f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.360886f, 0.639114f}, +}; + +// For each output (4x5) sample, the weight of each input (6x6) sample. +static const float g_weight_downsample_6x6_to_4x5[20][36] = { +{0.778254f, 0.190730f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.031016f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.401147f, 0.570243f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.028610f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.563768f, 0.394241f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.041992f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.196238f, 0.767548f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.036214f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.637514f, 0.166734f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.167634f, 0.028118f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.322778f, 0.473312f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.085399f, 0.118511f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.471429f, 0.308185f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.118025f, 0.102361f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.176592f, 0.643933f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.179475f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.391609f, 0.100882f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.390531f, 0.116978f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.017259f, 0.000000f, 0.201618f, 0.301555f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.197600f, 0.281968f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.016735f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.293309f, 0.192842f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.268674f, 0.208109f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.020330f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.118514f, 0.380746f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.097621f, 0.381305f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.021814f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.157977f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.657533f, 0.184490f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.097522f, 0.128585f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.309864f, 0.464029f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.128900f, 0.090864f, 0.000000f, 0.025393f, 0.000000f, 0.000000f, 0.464029f, 0.290814f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.024593f, 0.172268f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.173412f, 0.629727f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.029582f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.778816f, 0.191602f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.036297f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.394454f, 0.569249f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.039685f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.561207f, 0.399108f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.034683f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.193744f, 0.771574f}, +}; + +// For each output (5x5) sample, the weight of each input (6x6) sample. +static const float g_weight_downsample_6x6_to_5x5[25][36] = { +{1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.794727f, 0.205273f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.465125f, 0.484079f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.028881f, 0.000000f, 0.000000f, 0.021914f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.192446f, 0.772941f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.034613f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033123f, 0.930510f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.036367f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.800234f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.199766f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.629079f, 0.165939f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.166390f, 0.019675f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.018918f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.378734f, 0.373861f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.111597f, 0.135808f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.177492f, 0.641195f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.181313f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.028722f, 0.761781f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.209497f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.475763f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.471882f, 0.029551f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.022804f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.382714f, 0.116167f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.383377f, 0.117742f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.254151f, 0.249987f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.241972f, 0.253891f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.017950f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.122722f, 0.376847f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.095099f, 0.369986f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.017396f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.029442f, 0.472507f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.471751f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.026300f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.190299f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.776924f, 0.032778f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.171498f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.666385f, 0.162117f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.125713f, 0.117624f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.387084f, 0.369579f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.028493f, 0.169318f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.173770f, 0.628419f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.198951f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.035634f, 0.765415f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.963102f, 0.036898f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.030322f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.771054f, 0.198624f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.021816f, 0.020944f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.481761f, 0.475479f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.032816f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.198418f, 0.768766f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033338f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966662f}, +}; + +// For each output (6x5) sample, the weight of each input (6x6) sample. +static const float g_weight_downsample_6x6_to_6x5[30][36] = { +{0.966284f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033716f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.966287f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033713f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.966287f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033713f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.966290f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033710f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966125f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033875f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966273f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033727f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.800857f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.199143f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.773463f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.201165f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.025372f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.805735f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.194265f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.788791f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.211209f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.785975f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.214025f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.787286f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.212714f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.490845f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.487242f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.021913f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.490663f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.486878f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.022459f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.505452f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.494548f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.495383f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.482180f, 0.000000f, 0.022437f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.022727f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.496545f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.480728f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.486261f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.486387f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.027352f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.196272f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.803728f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.210059f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.789941f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.212947f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.787053f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.215261f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.784739f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.209116f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.790884f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.205881f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.794119f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033710f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966290f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033711f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966289f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033713f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966287f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033719f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966281f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033712f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966288f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033712f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966288f}, +}; + +// For each output (2x6) sample, the weight of each input (6x6) sample. +static const float g_weight_downsample_6x6_to_2x6[12][36] = { +{0.388815f, 0.325435f, 0.220189f, 0.065562f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.064515f, 0.214042f, 0.327700f, 0.393742f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.398821f, 0.326200f, 0.217851f, 0.057128f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.062546f, 0.216408f, 0.322269f, 0.398777f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.396575f, 0.330631f, 0.212857f, 0.059936f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.070253f, 0.215326f, 0.317576f, 0.396845f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.398130f, 0.324745f, 0.213572f, 0.063553f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.062009f, 0.216253f, 0.324683f, 0.397055f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.397646f, 0.321346f, 0.212334f, 0.068675f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.067073f, 0.210768f, 0.318165f, 0.403993f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.395756f, 0.325048f, 0.211862f, 0.067334f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.065475f, 0.214113f, 0.324009f, 0.396403f}, +}; + +// For each output (3x6) sample, the weight of each input (6x6) sample. +static const float g_weight_downsample_6x6_to_3x6[18][36] = { +{0.640136f, 0.359864f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.108112f, 0.399968f, 0.388087f, 0.103833f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.356122f, 0.643878f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.646308f, 0.353692f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.122937f, 0.390166f, 0.380558f, 0.106339f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.355015f, 0.644985f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.642874f, 0.357126f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.111570f, 0.398638f, 0.387639f, 0.102153f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.359134f, 0.640866f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.640159f, 0.359841f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.098908f, 0.393303f, 0.400421f, 0.107369f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.357119f, 0.642881f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.640541f, 0.359459f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.116318f, 0.397635f, 0.395084f, 0.090964f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.361948f, 0.638052f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.645448f, 0.354552f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.106981f, 0.389214f, 0.395056f, 0.108749f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.359592f, 0.640408f}, +}; + +// For each output (4x6) sample, the weight of each input (6x6) sample. +static const float g_weight_downsample_6x6_to_4x6[24][36] = { +{0.806928f, 0.193072f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.412216f, 0.587784f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.590075f, 0.409925f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.200682f, 0.799318f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.809822f, 0.190178f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.423474f, 0.576526f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.580816f, 0.419184f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.190240f, 0.809760f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.800320f, 0.199680f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.408625f, 0.591375f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.583392f, 0.416608f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.200372f, 0.799628f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.798914f, 0.201086f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.411243f, 0.588757f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.586520f, 0.413480f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.203588f, 0.796412f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.802040f, 0.197960f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.411175f, 0.588825f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.599873f, 0.400127f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.193060f, 0.806940f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.806073f, 0.193927f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.408705f, 0.591295f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.585711f, 0.414289f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.197672f, 0.802328f}, +}; + +// For each output (5x6) sample, the weight of each input (6x6) sample. +static const float g_weight_downsample_6x6_to_5x6[30][36] = { +{0.966289f, 0.033711f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.794848f, 0.205152f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.473272f, 0.496525f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.030202f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.196955f, 0.803045f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033711f, 0.966289f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966284f, 0.033716f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.795787f, 0.204213f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.500928f, 0.499072f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.198603f, 0.801397f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033716f, 0.966284f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966283f, 0.033717f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.788424f, 0.211576f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.029276f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.484227f, 0.486497f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.201499f, 0.798501f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033724f, 0.966276f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966283f, 0.033717f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.791336f, 0.208664f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.490188f, 0.509812f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.204835f, 0.795165f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033703f, 0.966297f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966276f, 0.033724f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.799276f, 0.200724f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.022501f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.494443f, 0.483055f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.205967f, 0.794033f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033726f, 0.966274f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.965971f, 0.034029f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.798640f, 0.201360f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.502577f, 0.497423f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.203927f, 0.796073f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033706f, 0.966294f}, +}; + +// For each output (6x6) sample, the weight of each input (6x6) sample. +static const float g_weight_downsample_6x6_to_6x6[36][36] = { +{1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f}, +}; + +//-------------------------------------------------------------------------------------------------------------------------- + +const struct downsample_matrix_6x6 +{ + uint32_t m_grid_width, m_grid_height; + const float* m_p; +} g_downsample_matrices_6x6[] = { + { 2, 2, (const float*)g_weight_downsample_6x6_to_2x2 }, + { 3, 2, (const float*)g_weight_downsample_6x6_to_3x2 }, + { 4, 2, (const float*)g_weight_downsample_6x6_to_4x2 }, + { 5, 2, (const float*)g_weight_downsample_6x6_to_5x2 }, + { 6, 2, (const float*)g_weight_downsample_6x6_to_6x2 }, + { 2, 3, (const float*)g_weight_downsample_6x6_to_2x3 }, + { 3, 3, (const float*)g_weight_downsample_6x6_to_3x3 }, + { 4, 3, (const float*)g_weight_downsample_6x6_to_4x3 }, + { 5, 3, (const float*)g_weight_downsample_6x6_to_5x3 }, + { 6, 3, (const float*)g_weight_downsample_6x6_to_6x3 }, + { 2, 4, (const float*)g_weight_downsample_6x6_to_2x4 }, + { 3, 4, (const float*)g_weight_downsample_6x6_to_3x4 }, + { 4, 4, (const float*)g_weight_downsample_6x6_to_4x4 }, + { 5, 4, (const float*)g_weight_downsample_6x6_to_5x4 }, + { 6, 4, (const float*)g_weight_downsample_6x6_to_6x4 }, + { 2, 5, (const float*)g_weight_downsample_6x6_to_2x5 }, + { 3, 5, (const float*)g_weight_downsample_6x6_to_3x5 }, + { 4, 5, (const float*)g_weight_downsample_6x6_to_4x5 }, + { 5, 5, (const float*)g_weight_downsample_6x6_to_5x5 }, + { 6, 5, (const float*)g_weight_downsample_6x6_to_6x5 }, + { 2, 6, (const float*)g_weight_downsample_6x6_to_2x6 }, + { 3, 6, (const float*)g_weight_downsample_6x6_to_3x6 }, + { 4, 6, (const float*)g_weight_downsample_6x6_to_4x6 }, + { 5, 6, (const float*)g_weight_downsample_6x6_to_5x6 }, + { 6, 6, (const float*)g_weight_downsample_6x6_to_6x6 } +}; +//const uint32_t NUM_DOWNSAMPLE_MATRICES_6x6 = sizeof(g_downsample_matrices_6x6) / sizeof(g_downsample_matrices_6x6[0]); + +//-------------------------------------------------------------------------------------------------------------------------- + +const float* get_6x6_downsample_matrix(uint32_t grid_width, uint32_t grid_height) +{ + // TODO: Use hash or map lookup. + for (const auto& m : g_downsample_matrices_6x6) + if ((m.m_grid_width == grid_width) && (m.m_grid_height == grid_height)) + return m.m_p; + + assert(0); + return nullptr; +} + +void downsample_weight_grid( + const float* pMatrix_weights, + uint32_t bx, uint32_t by, // source/from dimension (block size) + uint32_t wx, uint32_t wy, // dest/to dimension (grid size) + const uint8_t* pSrc_weights, // these are dequantized weights, NOT ISE symbols, [by][bx] + uint8_t* pDst_weights) // [wy][wx] +{ + const uint32_t total_block_samples = bx * by; + + for (uint32_t y = 0; y < wy; y++) + { + for (uint32_t x = 0; x < wx; x++) + { + float total = 0.5f; + + for (uint32_t i = 0; i < total_block_samples; i++) + if (pMatrix_weights[i]) + total += pMatrix_weights[i] * (float)pSrc_weights[i]; + + pDst_weights[x + y * wx] = (uint8_t)clamp((int)total, 0, 64); + + pMatrix_weights += total_block_samples; + } + } +} + +//-------------------------------------------------------------------------------------------------------------------------- + +void downsample_ise_weights( + uint32_t dequant_weight_ise_range, uint32_t quant_weight_ise_range, + uint32_t block_w, uint32_t block_h, + uint32_t grid_w, uint32_t grid_h, + const uint8_t* pSrc_weights, uint8_t* pDst_weights) +{ + assert((block_w <= MAX_ASTC_HDR_BLOCK_W) && (block_h <= MAX_ASTC_HDR_BLOCK_H)); + assert((grid_w >= 2) && (grid_w <= MAX_ASTC_HDR_BLOCK_W)); + assert((grid_h >= 2) && (grid_h <= MAX_ASTC_HDR_BLOCK_H)); + + assert(dequant_weight_ise_range >= astc_helpers::FIRST_VALID_WEIGHT_ISE_RANGE); + assert(dequant_weight_ise_range <= astc_helpers::LAST_VALID_WEIGHT_ISE_RANGE); + + assert(quant_weight_ise_range >= astc_helpers::FIRST_VALID_WEIGHT_ISE_RANGE); + assert(quant_weight_ise_range <= astc_helpers::LAST_VALID_WEIGHT_ISE_RANGE); + + if ((block_w == grid_w) && (block_h == grid_h)) + { + if (dequant_weight_ise_range != quant_weight_ise_range) + { + basist::astc_6x6_hdr::requantize_astc_weights(block_w * block_h, pSrc_weights, dequant_weight_ise_range, pDst_weights, quant_weight_ise_range); + } + else + { + if (pDst_weights != pSrc_weights) + memcpy(pDst_weights, pSrc_weights, block_w * block_h); + } + + return; + } + + uint8_t desired_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; + + const auto& dequant_tab = astc_helpers::g_dequant_tables.get_weight_tab(dequant_weight_ise_range).m_ISE_to_val; + + for (uint32_t by = 0; by < block_h; by++) + for (uint32_t bx = 0; bx < block_w; bx++) + desired_weights[bx + by * block_w] = dequant_tab[pSrc_weights[bx + by * block_w]]; + + uint8_t downsampled_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; + + const float* pDownsample_matrix = get_6x6_downsample_matrix(grid_w, grid_h); + assert(pDownsample_matrix); + + downsample_weight_grid( + pDownsample_matrix, + block_w, block_h, // source/from dimension (block size) + grid_w, grid_h, // dest/to dimension (grid size) + desired_weights, // these are dequantized weights, NOT ISE symbols, [by][bx] + downsampled_weights); // [wy][wx] + + const auto& weight_quant_tab = astc_helpers::g_dequant_tables.get_weight_tab(quant_weight_ise_range).m_val_to_ise; + + for (uint32_t gy = 0; gy < grid_h; gy++) + for (uint32_t gx = 0; gx < grid_w; gx++) + pDst_weights[gx + gy * grid_w] = weight_quant_tab[downsampled_weights[gx + gy * grid_w]]; +} + +void downsample_ise_weights_dual_plane( + uint32_t dequant_weight_ise_range, uint32_t quant_weight_ise_range, + uint32_t block_w, uint32_t block_h, + uint32_t grid_w, uint32_t grid_h, + const uint8_t* pSrc_weights0, const uint8_t* pSrc_weights1, + uint8_t* pDst_weights) +{ + uint8_t downsampled_weights0[MAX_ASTC_HDR_BLOCK_W * MAX_ASTC_HDR_BLOCK_H], downsampled_weights1[MAX_ASTC_HDR_BLOCK_W * MAX_ASTC_HDR_BLOCK_H]; + + downsample_ise_weights( + dequant_weight_ise_range, quant_weight_ise_range, + block_w, block_h, + grid_w, grid_h, + pSrc_weights0, downsampled_weights0); + + downsample_ise_weights( + dequant_weight_ise_range, quant_weight_ise_range, + block_w, block_h, + grid_w, grid_h, + pSrc_weights1, downsampled_weights1); + + const uint32_t num_grid_samples = grid_w * grid_h; + for (uint32_t i = 0; i < num_grid_samples; i++) + { + pDst_weights[i * 2 + 0] = downsampled_weights0[i]; + pDst_weights[i * 2 + 1] = downsampled_weights1[i]; + } +} + +static bool refine_endpoints_mode11( + uint32_t endpoint_ise_range, + uint8_t* pEndpoint_vals, // the endpoints to optimize + uint32_t block_w, uint32_t block_h, // block dimensions + uint32_t grid_w, uint32_t grid_h, const uint8_t* pWeights, uint32_t weight_ise_range, // weight grid + uint32_t num_pixels, const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[], + const uint8_t* pPixel_block_ofs, // maps this subset's pixels to block offsets + astc_hdr_codec_base_options& coptions, + bool direct_only, int first_submode, int last_submode, + opt_mode_t opt_mode) +{ + if (opt_mode == cNoOpt) + return false; + + const uint32_t num_block_pixels = block_w * block_h; + + uint8_t def_pixel_block_ofs[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; + if (!pPixel_block_ofs) + { + for (uint32_t i = 0; i < num_block_pixels; i++) + def_pixel_block_ofs[i] = (uint8_t)i; + + pPixel_block_ofs = def_pixel_block_ofs; + } + + const uint32_t num_weights = grid_w * grid_h; + + uint8_t dequantized_raw_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; + for (uint32_t i = 0; i < num_weights; i++) + dequantized_raw_weights[i] = astc_helpers::g_dequant_tables.get_weight_tab(weight_ise_range).m_ISE_to_val[pWeights[i]]; + + uint8_t upsampled_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; // raw weights, NOT ISE + astc_helpers::upsample_weight_grid(block_w, block_h, grid_w, grid_h, dequantized_raw_weights, upsampled_weights); + + aabb3F color_box_q16(cInitExpand); + + uint8_t trial_blk_raw_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; // raw weights, NOT ISE + float trial_blk_raw_weightsf[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; + for (uint32_t i = 0; i < num_pixels; i++) + { + color_box_q16.expand(pBlock_pixels_q16[i]); + + assert(pPixel_block_ofs[i] < num_block_pixels); + + trial_blk_raw_weights[i] = upsampled_weights[pPixel_block_ofs[i]]; + trial_blk_raw_weightsf[i] = (float)trial_blk_raw_weights[i] * (1.0f / 64.0f); + } + + vec3F l_q16, h_q16; + if (opt_mode == cOrdinaryLeastSquares) + { + if (!compute_least_squares_endpoints_rgb_raw_weights(num_pixels, trial_blk_raw_weights, &l_q16, &h_q16, pBlock_pixels_q16, color_box_q16)) + return false; + } + else if ((opt_mode == cWeightedLeastSquares) || (opt_mode == cWeightedLeastSquaresHeavy)) + { + vec3F block_mean_color_q16(calc_mean(num_pixels, pBlock_pixels_q16)); + vec3F block_axis_q16(calc_rgb_pca(num_pixels, pBlock_pixels_q16, block_mean_color_q16)); + float l = BIG_FLOAT_VAL, h = -BIG_FLOAT_VAL; + for (uint32_t i = 0; i < num_pixels; i++) + { + vec3F k(vec3F(pBlock_pixels_q16[i]) - block_mean_color_q16); + float kd = k.dot(block_axis_q16); + if (kd < l) + l = kd; + if (kd > h) + h = kd; + } + float emphasis_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; + if (h == l) + { + for (uint32_t i = 0; i < num_pixels; i++) + emphasis_weights[i] = 1.0f; + } + else + { + float mid = (0.0f - l) / (h - l); + mid = clamp(mid, .01f, .99f); + + float lw = LOW_EMPHASIS_WEIGHT, mw = MIDDLE_EMPHASIS_WEIGHT, hw = HIGH_EMPHASIS_WEIGHT; + if (opt_mode == cWeightedLeastSquaresHeavy) + lw = LOW_EMPHASIS_WEIGHT_HEAVY, mw = MIDDLE_EMPHASIS_WEIGHT_HEAVY, hw = HIGH_EMPHASIS_WEIGHT_HEAVY; + + for (uint32_t i = 0; i < num_pixels; i++) + { + vec3F k(vec3F(pBlock_pixels_q16[i]) - block_mean_color_q16); + float kd = k.dot(block_axis_q16); + + assert((kd >= l) && (kd <= h)); + + float v = (kd - l) / (h - l); + + if (v < mid) + v = lerp(lw, mw, v / mid); + else + v = lerp(mw, hw, (v - mid) * (1.0f - mid)); + + emphasis_weights[i] = v; + } + } + + if (!compute_weighted_least_squares_endpoints_rgb(num_pixels, nullptr, nullptr, trial_blk_raw_weightsf, emphasis_weights, &l_q16, &h_q16, pBlock_pixels_q16, color_box_q16)) + return false; + } + else + { + assert(opt_mode == cWeightedAverage); + + l_q16.set(0.0f); + float total_low = 0.0f; + + h_q16.set(0.0f); + float total_high = 0.0f; + + for (uint32_t i = 0; i < num_pixels; i++) + { + vec3F p(pBlock_pixels_q16[i]); + float lerp = (float)trial_blk_raw_weights[i] * (1.0f / 64.0f); + + l_q16 += p * (1.0f - lerp); + total_low += (1.0f - lerp); + + h_q16 += p * lerp; + total_high += lerp; + } + + if (total_low != 0.0f) + l_q16 *= (1.0f / total_low); + else + return false; + + if (total_high != 0.0f) + h_q16 *= (1.0f / total_high); + else + return false; + } + + uint8_t trial_endpoints[NUM_MODE11_ENDPOINTS]; + + uint32_t submode_used; + + bool pack_succeeded = pack_mode11(l_q16, h_q16, endpoint_ise_range, trial_endpoints, coptions, direct_only, first_submode, last_submode, false, submode_used); + if (!pack_succeeded) + return false; + + int cur_e[2][3]; + if (!decode_mode11_to_qlog12(pEndpoint_vals, cur_e, endpoint_ise_range)) + return false; + + int trial_e[2][3]; + if (!decode_mode11_to_qlog12(trial_endpoints, trial_e, endpoint_ise_range)) + return false; + + for (uint32_t i = 0; i < 3; i++) + { + cur_e[0][i] <<= 4; + cur_e[1][i] <<= 4; + + trial_e[0][i] <<= 4; + trial_e[1][i] <<= 4; + } + + const float R_WEIGHT = coptions.m_r_err_scale, G_WEIGHT = coptions.m_g_err_scale; + + double cur_error = 0, trial_error = 0; + + for (uint32_t p = 0; p < num_pixels; p++) + { + const half_float* pDesired_half = &pBlock_pixels_half[p][0]; + + const double desired_half_r_q = q(pDesired_half[0], coptions.m_q_log_bias), desired_half_g_q = q(pDesired_half[1], coptions.m_q_log_bias), desired_half_b_q = q(pDesired_half[2], coptions.m_q_log_bias); + + const uint32_t c = trial_blk_raw_weights[p]; + assert(c <= 64); + + { + half_float rf, gf, bf; + + { + uint32_t r0 = cur_e[0][0], r1 = cur_e[1][0]; + int ri = (r0 * (64 - c) + r1 * c + 32) / 64; + rf = astc_helpers::qlog16_to_half(ri); + } + + { + uint32_t g0 = cur_e[0][1], g1 = cur_e[1][1]; + int gi = (g0 * (64 - c) + g1 * c + 32) / 64; + gf = astc_helpers::qlog16_to_half(gi); + } + + { + uint32_t b0 = cur_e[0][2], b1 = cur_e[1][2]; + int bi = (b0 * (64 - c) + b1 * c + 32) / 64; + bf = astc_helpers::qlog16_to_half(bi); + } + + const double decoded_half_q0 = q(rf, coptions.m_q_log_bias), decoded_half_q1 = q(gf, coptions.m_q_log_bias), decoded_half_q2 = q(bf, coptions.m_q_log_bias); + + const double rd = decoded_half_q0 - desired_half_r_q, gd = decoded_half_q1 - desired_half_g_q, bd = decoded_half_q2 - desired_half_b_q; + + cur_error += R_WEIGHT * (rd * rd) + G_WEIGHT * (gd * gd) + bd * bd; + } + + { + half_float rf, gf, bf; + + { + uint32_t r0 = trial_e[0][0], r1 = trial_e[1][0]; + int ri = (r0 * (64 - c) + r1 * c + 32) / 64; + rf = astc_helpers::qlog16_to_half(ri); + } + + { + uint32_t g0 = trial_e[0][1], g1 = trial_e[1][1]; + int gi = (g0 * (64 - c) + g1 * c + 32) / 64; + gf = astc_helpers::qlog16_to_half(gi); + } + + { + uint32_t b0 = trial_e[0][2], b1 = trial_e[1][2]; + int bi = (b0 * (64 - c) + b1 * c + 32) / 64; + bf = astc_helpers::qlog16_to_half(bi); + } + + const double decoded_half_q0 = q(rf, coptions.m_q_log_bias), decoded_half_q1 = q(gf, coptions.m_q_log_bias), decoded_half_q2 = q(bf, coptions.m_q_log_bias); + + const double rd = decoded_half_q0 - desired_half_r_q, gd = decoded_half_q1 - desired_half_g_q, bd = decoded_half_q2 - desired_half_b_q; + + trial_error += R_WEIGHT * (rd * rd) + G_WEIGHT * (gd * gd) + bd * bd; + } + + } // p + + if (trial_error < cur_error) + { + memcpy(pEndpoint_vals, trial_endpoints, NUM_MODE11_ENDPOINTS); + return true; + } + + return false; +} + +static bool refine_endpoints_mode7( + uint32_t endpoint_ise_range, + uint8_t* pEndpoint_vals, // the endpoints to optimize + uint32_t block_w, uint32_t block_h, // block dimensions + uint32_t grid_w, uint32_t grid_h, const uint8_t* pWeights, uint32_t weight_ise_range, // weight grid + uint32_t num_pixels, const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[], + const uint8_t* pPixel_block_ofs, // maps this subset's pixels to block offsets + astc_hdr_codec_base_options& coptions, + int first_submode, int last_submode) +{ + const uint32_t num_block_pixels = block_w * block_h; + + uint8_t def_pixel_block_ofs[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; + if (!pPixel_block_ofs) + { + for (uint32_t i = 0; i < num_block_pixels; i++) + def_pixel_block_ofs[i] = (uint8_t)i; + + pPixel_block_ofs = def_pixel_block_ofs; + } + + const uint32_t num_weights = grid_w * grid_h; + + uint8_t dequantized_raw_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; + for (uint32_t i = 0; i < num_weights; i++) + dequantized_raw_weights[i] = astc_helpers::g_dequant_tables.get_weight_tab(weight_ise_range).m_ISE_to_val[pWeights[i]]; + + uint8_t upsampled_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; // raw weights, NOT ISE + astc_helpers::upsample_weight_grid(block_w, block_h, grid_w, grid_h, dequantized_raw_weights, upsampled_weights); + + uint8_t trial_blk_raw_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; // raw weights, NOT ISE + for (uint32_t i = 0; i < num_pixels; i++) + { + assert(pPixel_block_ofs[i] < num_block_pixels); + + trial_blk_raw_weights[i] = upsampled_weights[pPixel_block_ofs[i]]; + } + + //-- + + int cur_e[2][3]; + int cur_s = 0; + if (!decode_mode7_to_qlog12(pEndpoint_vals, cur_e, &cur_s, endpoint_ise_range)) + return false; + + cur_s <<= 4; + + vec3F block_mean_color_q16(calc_mean(num_pixels, pBlock_pixels_q16)); + + vec3F new_high_color_q16(block_mean_color_q16); + + const float one_over_num_pixels = 1.0f / (float)num_pixels; + + for (uint32_t i = 0; i < num_pixels; i++) + { + float lerp = trial_blk_raw_weights[i] * (1.0f / 64.0f); + + float k = (float)cur_s * (1.0f - lerp) * one_over_num_pixels; + new_high_color_q16[0] += k; + new_high_color_q16[1] += k; + new_high_color_q16[2] += k; + } + + // Given a set of selectors and a high color, try to compute a better S. + float t = 0.0f; + + for (uint32_t i = 0; i < num_pixels; i++) + { + float lerp = trial_blk_raw_weights[i] * (1.0f / 64.0f); + + t += (1.0f) - lerp; + } + + t *= one_over_num_pixels; + + if (fabs(t) < .0000125f) + return false; + + uint8_t trial_endpoints[NUM_MODE7_ENDPOINTS]; + + uint32_t submode_used; + if (!pack_mode7(new_high_color_q16, (float)cur_s, endpoint_ise_range, trial_endpoints, weight_ise_range, coptions, first_submode, last_submode, false, submode_used)) + return false; + + int trial_e[2][3]; + if (!decode_mode7_to_qlog12(trial_endpoints, trial_e, nullptr, endpoint_ise_range)) + return false; + + vec3F cur_h_q16((float)(trial_e[1][0] << 4), (float)(trial_e[1][1] << 4), (float)(trial_e[1][2] << 4)); + + float s_r = (cur_h_q16[0] - block_mean_color_q16[0]) / t; + //float s_g = (cur_h_q16[1] - block_mean_color_q16[1]) / t; + //float s_b = (cur_h_q16[2] - block_mean_color_q16[2]) / t; + float new_s_q16 = ceilf(s_r); + + if (!pack_mode7(new_high_color_q16, new_s_q16, endpoint_ise_range, trial_endpoints, weight_ise_range, coptions, first_submode, last_submode, false, submode_used)) + return false; + + if (!decode_mode7_to_qlog12(trial_endpoints, trial_e, nullptr, endpoint_ise_range)) + return false; + + // -- + + for (uint32_t i = 0; i < 3; i++) + { + cur_e[0][i] <<= 4; + cur_e[1][i] <<= 4; + + trial_e[0][i] <<= 4; + trial_e[1][i] <<= 4; + } + + const float R_WEIGHT = coptions.m_r_err_scale, G_WEIGHT = coptions.m_g_err_scale; + + double cur_error = 0, trial_error = 0; + + for (uint32_t p = 0; p < num_pixels; p++) + { + const half_float* pDesired_half = &pBlock_pixels_half[p][0]; + + const double desired_half_r_q = q(pDesired_half[0], coptions.m_q_log_bias), desired_half_g_q = q(pDesired_half[1], coptions.m_q_log_bias), desired_half_b_q = q(pDesired_half[2], coptions.m_q_log_bias); + + const uint32_t c = trial_blk_raw_weights[p]; + assert(c <= 64); + + { + half_float rf, gf, bf; + + { + uint32_t r0 = cur_e[0][0], r1 = cur_e[1][0]; + int ri = (r0 * (64 - c) + r1 * c + 32) / 64; + rf = astc_helpers::qlog16_to_half(ri); + } + + { + uint32_t g0 = cur_e[0][1], g1 = cur_e[1][1]; + int gi = (g0 * (64 - c) + g1 * c + 32) / 64; + gf = astc_helpers::qlog16_to_half(gi); + } + + { + uint32_t b0 = cur_e[0][2], b1 = cur_e[1][2]; + int bi = (b0 * (64 - c) + b1 * c + 32) / 64; + bf = astc_helpers::qlog16_to_half(bi); + } + + const double decoded_half_q0 = q(rf, coptions.m_q_log_bias), decoded_half_q1 = q(gf, coptions.m_q_log_bias), decoded_half_q2 = q(bf, coptions.m_q_log_bias); + + const double rd = decoded_half_q0 - desired_half_r_q, gd = decoded_half_q1 - desired_half_g_q, bd = decoded_half_q2 - desired_half_b_q; + + cur_error += R_WEIGHT * (rd * rd) + G_WEIGHT * (gd * gd) + bd * bd; + } + + { + half_float rf, gf, bf; + + { + uint32_t r0 = trial_e[0][0], r1 = trial_e[1][0]; + int ri = (r0 * (64 - c) + r1 * c + 32) / 64; + rf = astc_helpers::qlog16_to_half(ri); + } + + { + uint32_t g0 = trial_e[0][1], g1 = trial_e[1][1]; + int gi = (g0 * (64 - c) + g1 * c + 32) / 64; + gf = astc_helpers::qlog16_to_half(gi); + } + + { + uint32_t b0 = trial_e[0][2], b1 = trial_e[1][2]; + int bi = (b0 * (64 - c) + b1 * c + 32) / 64; + bf = astc_helpers::qlog16_to_half(bi); + } + + const double decoded_half_q0 = q(rf, coptions.m_q_log_bias), decoded_half_q1 = q(gf, coptions.m_q_log_bias), decoded_half_q2 = q(bf, coptions.m_q_log_bias); + + const double rd = decoded_half_q0 - desired_half_r_q, gd = decoded_half_q1 - desired_half_g_q, bd = decoded_half_q2 - desired_half_b_q; + + trial_error += R_WEIGHT * (rd * rd) + G_WEIGHT * (gd * gd) + bd * bd; + } + + } // p + + if (trial_error < cur_error) + { + memcpy(pEndpoint_vals, trial_endpoints, NUM_MODE7_ENDPOINTS); + return true; + } + + return false; +} + +bool refine_endpoints( + uint32_t cem, + uint32_t endpoint_ise_range, + uint8_t* pEndpoint_vals, // the endpoints to optimize + uint32_t block_w, uint32_t block_h, // block dimensions + uint32_t grid_w, uint32_t grid_h, const uint8_t* pWeights, uint32_t weight_ise_range, // weight grid + uint32_t num_pixels, const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[], + const uint8_t* pPixel_block_ofs, // maps this subset's pixels to block offsets + astc_hdr_codec_base_options& coptions, opt_mode_t opt_mode) +{ + if (cem == 7) + { + return refine_endpoints_mode7( + endpoint_ise_range, + pEndpoint_vals, + block_w, block_h, + grid_w, grid_h, pWeights, weight_ise_range, + num_pixels, pBlock_pixels_half, pBlock_pixels_q16, + pPixel_block_ofs, + coptions, + FIRST_MODE7_SUBMODE_INDEX, MAX_MODE7_SUBMODE_INDEX); + } + else if (cem == 11) + { + return refine_endpoints_mode11( + endpoint_ise_range, + pEndpoint_vals, + block_w, block_h, + grid_w, grid_h, pWeights, weight_ise_range, + num_pixels, pBlock_pixels_half, pBlock_pixels_q16, + pPixel_block_ofs, + coptions, + false, FIRST_MODE11_SUBMODE_INDEX, MAX_MODE11_SUBMODE_INDEX, opt_mode); + } + + return false; +} + +} // namespace basisu + diff --git a/thirdparty/basisu/encoder/basisu_astc_hdr_common.h b/thirdparty/basisu/encoder/basisu_astc_hdr_common.h new file mode 100644 index 000000000..55be403fa --- /dev/null +++ b/thirdparty/basisu/encoder/basisu_astc_hdr_common.h @@ -0,0 +1,423 @@ +// File: basisu_astc_hdr_common.h +#pragma once +#include "basisu_enc.h" +#include "basisu_gpu_texture.h" +#include "../transcoder/basisu_astc_helpers.h" +#include "../transcoder/basisu_astc_hdr_core.h" + +namespace basisu +{ + const uint32_t MAX_ASTC_HDR_BLOCK_W = 6, MAX_ASTC_HDR_BLOCK_H = 6; + const uint32_t MAX_ASTC_HDR_ENC_BLOCK_PIXELS = 6 * 6; + + const uint32_t MODE11_TOTAL_SUBMODES = 8; // plus an extra hidden submode, directly encoded, for direct, so really 9 (see tables 99/100 of the ASTC spec) + const uint32_t MODE7_TOTAL_SUBMODES = 6; + + // [ise_range][0] = # levels + // [ise_range][1...] = lerp value [0,64] + // in ASTC order + // Supported ISE weight ranges: 0 to 11, 12 total + const uint32_t MIN_SUPPORTED_ISE_WEIGHT_INDEX = astc_helpers::BISE_2_LEVELS; // ISE 0=2 levels + const uint32_t MAX_SUPPORTED_ISE_WEIGHT_INDEX = astc_helpers::BISE_32_LEVELS; // ISE 11=16 levels + const uint32_t MIN_SUPPORTED_WEIGHT_LEVELS = 2; + const uint32_t MAX_SUPPORTED_WEIGHT_LEVELS = 32; + + extern const uint8_t g_ise_weight_lerps[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][33]; + + const float Q_LOG_BIAS_4x4 = .125f; // the original UASTC HDR 4x4 log bias + const float Q_LOG_BIAS_6x6 = 1.0f; // the log bias both encoders use now + + const float LDR_TO_HDR_NITS = 100.0f; + + struct astc_hdr_codec_base_options + { + float m_r_err_scale, m_g_err_scale; + float m_q_log_bias; + + bool m_ultra_quant; + + // If true, the ASTC HDR compressor is allowed to more aggressively vary weight indices for slightly higher compression in non-fastest mode. This will hurt BC6H quality, however. + bool m_allow_uber_mode; + + bool m_mode7_full_s_optimization; + + bool m_take_first_non_clamping_mode11_submode; + bool m_take_first_non_clamping_mode7_submode; + + bool m_disable_weight_plane_optimization; + + astc_hdr_codec_base_options() { init(); } + + void init(); + }; + + inline int get_bit( + int src_val, int src_bit) + { + assert(src_bit >= 0 && src_bit <= 31); + int bit = (src_val >> src_bit) & 1; + return bit; + } + + inline void pack_bit( + int& dst, int dst_bit, + int src_val, int src_bit = 0) + { + assert(dst_bit >= 0 && dst_bit <= 31); + int bit = get_bit(src_val, src_bit); + dst |= (bit << dst_bit); + } + + inline uint32_t get_max_qlog(uint32_t bits) + { + switch (bits) + { + case 7: return basist::MAX_QLOG7; + case 8: return basist::MAX_QLOG8; + case 9: return basist::MAX_QLOG9; + case 10: return basist::MAX_QLOG10; + case 11: return basist::MAX_QLOG11; + case 12: return basist::MAX_QLOG12; + case 16: return basist::MAX_QLOG16; + default: assert(0); break; + } + return 0; + } + +#if 0 + inline float get_max_qlog_val(uint32_t bits) + { + switch (bits) + { + case 7: return MAX_QLOG7_VAL; + case 8: return MAX_QLOG8_VAL; + case 9: return MAX_QLOG9_VAL; + case 10: return MAX_QLOG10_VAL; + case 11: return MAX_QLOG11_VAL; + case 12: return MAX_QLOG12_VAL; + case 16: return MAX_QLOG16_VAL; + default: assert(0); break; + } + return 0; + } +#endif + +#if 0 + // Input is the low 11 bits of the qlog + // Returns the 10-bit mantissa of the half float value + int qlog11_to_half_float_mantissa(int M) + { + assert(M <= 0x7FF); + int Mt; + if (M < 512) + Mt = 3 * M; + else if (M >= 1536) + Mt = 5 * M - 2048; + else + Mt = 4 * M - 512; + return (Mt >> 3); + } +#endif + + // Input is the 10-bit mantissa of the half float value + // Output is the 11-bit qlog value + // Inverse of qlog11_to_half_float_mantissa() + inline int half_float_mantissa_to_qlog11(int hf) + { + int q0 = (hf * 8 + 2) / 3; + int q1 = (hf * 8 + 2048 + 4) / 5; + + if (q0 < 512) + return q0; + else if (q1 >= 1536) + return q1; + + int q2 = (hf * 8 + 512 + 2) / 4; + return q2; + } + + inline int half_to_qlog16(int hf) + { + assert(!basist::half_is_signed((basist::half_float)hf) && !basist::is_half_inf_or_nan((basist::half_float)hf)); + + // extract 5 bits exponent, which is carried through to qlog16 unchanged + const int exp = (hf >> 10) & 0x1F; + + // extract and invert the 10 bit mantissa to nearest qlog11 (should be lossless) + const int mantissa = half_float_mantissa_to_qlog11(hf & 0x3FF); + assert(mantissa <= 0x7FF); + + // Now combine to qlog16, which is what ASTC HDR interpolates using the [0-64] weights. + uint32_t qlog16 = (exp << 11) | mantissa; + + // should be a lossless operation + assert(astc_helpers::qlog16_to_half(qlog16) == hf); + + return qlog16; + } + + void interpolate_qlog12_colors( + const int e[2][3], + basist::half_float* pDecoded_half, + vec3F* pDecoded_float, + uint32_t n, uint32_t ise_weight_range); + + bool get_astc_hdr_mode_11_block_colors( + const uint8_t* pEndpoints, + basist::half_float* pDecoded_half, + vec3F* pDecoded_float, + uint32_t n, uint32_t ise_weight_range, uint32_t ise_endpoint_range); + + bool get_astc_hdr_mode_7_block_colors( + const uint8_t* pEndpoints, + basist::half_float* pDecoded_half, + vec3F* pDecoded_float, + uint32_t n, uint32_t ise_weight_range, uint32_t ise_endpoint_range); + + // Fast high precision piecewise linear approximation of log2(bias+x). + // Half may be zero, positive or denormal. No NaN/Inf/negative. + BASISU_FORCE_INLINE double q(basist::half_float x, float log_bias) + { + union { float f; int32_t i; uint32_t u; } fi; + + fi.f = fast_half_to_float_pos_not_inf_or_nan(x); + + assert(fi.f >= 0.0f); + + fi.f += log_bias; + + return (double)fi.u; // approx log2f(fi.f), need to return double for the precision + } + + BASISU_FORCE_INLINE uint32_t q2(basist::half_float x, float log_bias) + { + union { float f; int32_t i; uint32_t u; } fi; + + fi.f = fast_half_to_float_pos_not_inf_or_nan(x); + + assert(fi.f >= 0.0f); + + fi.f += log_bias; + + return fi.u; + } + + double eval_selectors( + uint32_t num_pixels, + uint8_t* pWeights, + uint32_t ise_weight_range, + const basist::half_float* pBlock_pixels_half, + uint32_t num_weight_levels, + const basist::half_float* pDecoded_half, + const astc_hdr_codec_base_options& coptions, + uint32_t usable_selector_bitmask = UINT32_MAX); + + double eval_selectors_dual_plane( + uint32_t channel_index, + uint32_t num_pixels, + uint8_t* pWeights0, uint8_t* pWeights1, + const basist::half_float* pBlock_pixels_half, + uint32_t num_weight_levels, + const basist::half_float* pDecoded_half, + const astc_hdr_codec_base_options& coptions, + uint32_t usable_selector_bitmask = UINT32_MAX); + + double compute_block_error(uint32_t num_pixels, const basist::half_float* pOrig_block, const basist::half_float* pPacked_block, const astc_hdr_codec_base_options& coptions); + + const uint32_t FIRST_MODE7_SUBMODE_INDEX = 0; + const uint32_t MAX_MODE7_SUBMODE_INDEX = 5; + + bool pack_mode7( + const vec3F& high_color_q16, const float s_q16, + uint32_t ise_endpoint_range, uint8_t* pEndpoints, + uint32_t ise_weight_range, // only used for determining biasing during CEM 7 packing + const astc_hdr_codec_base_options& coptions, + int32_t first_submode, int32_t last_submode, bool ignore_clamping, uint32_t& submode_used); + + bool try_mode7( + uint32_t num_pixels, + uint8_t* pEndpoints, uint8_t* pWeights, double& cur_block_error, uint32_t& submode_used, + const vec3F& high_color_q16, const float s_q16, + const basist::half_float block_pixels_half[][3], + uint32_t num_weight_levels, uint32_t ise_weight_range, const astc_hdr_codec_base_options& coptions, + uint32_t ise_endpoint_range, + int32_t first_submode = 0, int32_t last_submode = MAX_MODE7_SUBMODE_INDEX); + + bool pack_mode11( + const vec3F& low_color_q16, const vec3F& high_color_q16, + uint32_t ise_endpoint_range, uint8_t* pEndpoints, + const astc_hdr_codec_base_options& coptions, + bool direct_only, int32_t first_submode, int32_t last_submode, bool ignore_clamping, uint32_t& submode_used); + + bool try_mode11(uint32_t num_pixels, + uint8_t* pEndpoints, uint8_t* pWeights, double& cur_block_error, uint32_t& submode_used, + const vec3F& low_color_q16, const vec3F& high_color_q16, + const basist::half_float block_pixels_half[][3], + uint32_t num_weight_levels, uint32_t ise_weight_range, const astc_hdr_codec_base_options& coptions, bool direct_only, uint32_t ise_endpoint_range, + bool constrain_ise_weight_selectors, + int32_t first_submode, int32_t last_submode, bool ignore_clamping); + + bool try_mode11_dual_plane(uint32_t channel_index, uint32_t num_pixels, + uint8_t* pEndpoints, uint8_t* pWeights0, uint8_t* pWeights1, double& cur_block_error, uint32_t& submode_used, + const vec3F& low_color_q16, const vec3F& high_color_q16, + const basist::half_float block_pixels_half[][3], + uint32_t num_weight_levels, uint32_t ise_weight_range, const astc_hdr_codec_base_options& coptions, bool direct_only, uint32_t ise_endpoint_range, + bool constrain_ise_weight_selectors, + int32_t first_submode, int32_t last_submode, bool ignore_clamping); + + const int FIRST_MODE11_SUBMODE_INDEX = -1; + const int MAX_MODE11_SUBMODE_INDEX = 7; + + enum opt_mode_t + { + cNoOpt, + cOrdinaryLeastSquares, + cWeightedLeastSquares, + cWeightedLeastSquaresHeavy, + cWeightedAverage + }; + + struct encode_astc_block_stats + { + uint32_t m_num_pixels; + vec3F m_mean_q16; + vec3F m_axis_q16; + + void init(uint32_t num_pixels, const vec4F pBlock_pixels_q16[]); + }; + + double encode_astc_hdr_block_mode_11( + uint32_t num_pixels, + const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[], + uint32_t ise_weight_range, + uint32_t& best_submode, + double cur_block_error, + uint8_t* blk_endpoints, uint8_t* blk_weights, + const astc_hdr_codec_base_options& coptions, + bool direct_only, + uint32_t ise_endpoint_range, + bool uber_mode, + bool constrain_ise_weight_selectors, + int32_t first_submode, int32_t last_submode, bool ignore_clamping, + opt_mode_t opt_mode, + const encode_astc_block_stats *pBlock_stats = nullptr); + + double encode_astc_hdr_block_downsampled_mode_11( + uint32_t block_x, uint32_t block_y, uint32_t grid_x, uint32_t grid_y, + uint32_t ise_weight_range, uint32_t ise_endpoint_range, + uint32_t num_pixels, const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[], + double cur_block_error, + int32_t first_submode, int32_t last_submode, bool ignore_clamping, opt_mode_t opt_mode, + uint8_t* pBlk_endpoints, uint8_t* pBlk_weights, uint32_t& best_submode, + const astc_hdr_codec_base_options& coptions, + const encode_astc_block_stats* pBlock_stats = nullptr); + + double encode_astc_hdr_block_mode_11_dual_plane( + uint32_t num_pixels, + const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[], + uint32_t channel_index, // 0-2 + uint32_t ise_weight_range, + uint32_t& best_submode, + double cur_block_error, + uint8_t* blk_endpoints, uint8_t* blk_weights0, uint8_t* blk_weights1, + const astc_hdr_codec_base_options& coptions, + bool direct_only, + uint32_t ise_endpoint_range, + bool uber_mode, + bool constrain_ise_weight_selectors, + int32_t first_submode, int32_t last_submode, + bool ignore_clamping); + + double encode_astc_hdr_block_mode_7( + uint32_t num_pixels, + const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[], + uint32_t ise_weight_range, + uint32_t& best_submode, + double cur_block_error, + uint8_t* blk_endpoints, //[4] + uint8_t* blk_weights, // [num_pixels] + const astc_hdr_codec_base_options& coptions, + uint32_t ise_endpoint_range, + int first_submode = 0, int last_submode = MAX_MODE7_SUBMODE_INDEX, + const encode_astc_block_stats *pBlock_stats = nullptr); + + //-------------------------------------------------------------------------------------------------------------------------- + + struct mode11_log_desc + { + int32_t m_submode; + int32_t m_maj_comp; + + // Or R0, G0, B0 if maj_comp==3 (direct) + int32_t m_a; // positive + int32_t m_c; // positive + int32_t m_b0; // positive + + // Or R1, G1, B1 if maj_comp==3 (direct) + int32_t m_b1; // positive + int32_t m_d0; // if not direct, is signed + int32_t m_d1; // if not direct, is signed + + // limits if not direct + int32_t m_a_bits, m_c_bits, m_b_bits, m_d_bits; + int32_t m_max_a_val, m_max_c_val, m_max_b_val, m_min_d_val, m_max_d_val; + + void clear() { clear_obj(*this); } + + bool is_direct() const { return m_maj_comp == 3; } + }; + + //-------------------------------------------------------------------------------------------------------------------------- + bool pack_astc_mode7_submode(uint32_t submode, uint8_t* pEndpoints, const vec3F& rgb_q16, float s_q16, int& max_clamp_mag, uint32_t ise_weight_range, bool early_out_if_clamped, int max_clamp_mag_accept_thresh); + + bool pack_astc_mode11_submode(uint32_t submode, uint8_t* pEndpoints, int val_q[2][3], int& max_clamp_mag, bool early_out_if_clamped = false, int max_clamp_mag_accept_thresh = 0); + bool pack_astc_mode11_submode(uint32_t submode, uint8_t* pEndpoints, const vec3F& low_q16, const vec3F& high_q16, int& max_clamp_mag, bool early_out_if_clamped = false, int max_clamp_mag_accept_thresh = 0); + void pack_astc_mode11_direct(uint8_t* pEndpoints, vec3F l_q16, vec3F h_q16); + + bool pack_mode11(mode11_log_desc& desc, uint8_t* pEndpoints); + void unpack_mode11(const uint8_t* pEndpoints, mode11_log_desc& desc); + + void decode_cem_11_config(const uint8_t* pEndpoints, int& submode_index, int& maj_index); + void decode_cem_7_config(const uint8_t* pEndpoints, int& submode_index, int& maj_index); + + void dequantize_astc_weights(uint32_t n, const uint8_t* pSrc_ise_vals, uint32_t from_ise_range, uint8_t* pDst_raw_weights); + + const float* get_6x6_downsample_matrix(uint32_t grid_width, uint32_t grid_height); + + void downsample_weight_grid( + const float* pMatrix_weights, + uint32_t bx, uint32_t by, // source/from dimension (block size) + uint32_t wx, uint32_t wy, // dest/to dimension (grid size) + const uint8_t* pSrc_weights, // these are dequantized weights, NOT ISE symbols, [by][bx] + uint8_t* pDst_weights); // [wy][wx] + + void downsample_ise_weights( + uint32_t weight_ise_range, uint32_t quant_weight_ise_range, + uint32_t block_w, uint32_t block_h, + uint32_t grid_w, uint32_t grid_h, + const uint8_t* pSrc_weights, uint8_t* pDst_weights); + + void downsample_ise_weights_dual_plane( + uint32_t dequant_weight_ise_range, uint32_t quant_weight_ise_range, + uint32_t block_w, uint32_t block_h, + uint32_t grid_w, uint32_t grid_h, + const uint8_t* pSrc_weights0, const uint8_t* pSrc_weights1, + uint8_t* pDst_weights); + + bool refine_endpoints( + uint32_t cem, + uint32_t endpoint_ise_range, + uint8_t* pEndpoint_vals, // the endpoints to optimize + uint32_t block_w, uint32_t block_h, // block dimensions + uint32_t grid_w, uint32_t grid_h, const uint8_t* pWeights, uint32_t weight_ise_range, // weight grid + uint32_t num_pixels, const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[], + const uint8_t* pPixel_block_ofs, // maps this subset's pixels to block offsets + astc_hdr_codec_base_options& coptions, opt_mode_t opt_mode); + + extern bool g_astc_hdr_enc_initialized; + + // This MUST be called before encoding any blocks. + void astc_hdr_enc_init(); + +} // namespace basisu + diff --git a/thirdparty/basisu/encoder/basisu_backend.cpp b/thirdparty/basisu/encoder/basisu_backend.cpp new file mode 100644 index 000000000..3fa3d8892 --- /dev/null +++ b/thirdparty/basisu/encoder/basisu_backend.cpp @@ -0,0 +1,1778 @@ +// basisu_backend.cpp +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// TODO: This code originally supported full ETC1 and ETC1S, so there's some legacy stuff in here. +// +#include "basisu_backend.h" + +#if BASISU_SUPPORT_SSE +#define CPPSPMD_NAME(a) a##_sse41 +#include "basisu_kernels_declares.h" +#endif + +#define BASISU_FASTER_SELECTOR_REORDERING 0 +#define BASISU_BACKEND_VERIFY(c) verify(c, __LINE__); + +namespace basisu +{ + // TODO + static inline void verify(bool condition, int line) + { + if (!condition) + { + fprintf(stderr, "ERROR: basisu_backend: verify() failed at line %i!\n", line); + abort(); + } + } + + basisu_backend::basisu_backend() + { + clear(); + } + + void basisu_backend::clear() + { + m_pFront_end = NULL; + m_params.clear(); + m_output.clear(); + } + + void basisu_backend::init(basisu_frontend* pFront_end, basisu_backend_params& params, const basisu_backend_slice_desc_vec& slice_descs) + { + m_pFront_end = pFront_end; + m_params = params; + m_slices = slice_descs; + + debug_printf("basisu_backend::Init: Slices: %u, ETC1S: %u, EndpointRDOQualityThresh: %f, SelectorRDOQualityThresh: %f\n", + m_slices.size(), + params.m_etc1s, + params.m_endpoint_rdo_quality_thresh, + params.m_selector_rdo_quality_thresh); + + debug_printf("Frontend endpoints: %u selectors: %u\n", m_pFront_end->get_total_endpoint_clusters(), m_pFront_end->get_total_selector_clusters()); + + for (uint32_t i = 0; i < m_slices.size(); i++) + { + debug_printf("Slice: %u, OrigWidth: %u, OrigHeight: %u, Width: %u, Height: %u, NumBlocksX: %u, NumBlocksY: %u, FirstBlockIndex: %u\n", + i, + m_slices[i].m_orig_width, m_slices[i].m_orig_height, + m_slices[i].m_width, m_slices[i].m_height, + m_slices[i].m_num_blocks_x, m_slices[i].m_num_blocks_y, + m_slices[i].m_first_block_index); + } + } + + void basisu_backend::create_endpoint_palette() + { + const basisu_frontend& r = *m_pFront_end; + + m_output.m_num_endpoints = r.get_total_endpoint_clusters(); + + m_endpoint_palette.resize(r.get_total_endpoint_clusters()); + for (uint32_t i = 0; i < r.get_total_endpoint_clusters(); i++) + { + etc1_endpoint_palette_entry& e = m_endpoint_palette[i]; + + e.m_color5_valid = r.get_endpoint_cluster_color_is_used(i, false); + e.m_color5 = r.get_endpoint_cluster_unscaled_color(i, false); + e.m_inten5 = r.get_endpoint_cluster_inten_table(i, false); + + BASISU_BACKEND_VERIFY(e.m_color5_valid); + } + } + + void basisu_backend::create_selector_palette() + { + const basisu_frontend& r = *m_pFront_end; + + m_output.m_num_selectors = r.get_total_selector_clusters(); + + m_selector_palette.resize(r.get_total_selector_clusters()); + + for (uint32_t i = 0; i < r.get_total_selector_clusters(); i++) + { + etc1_selector_palette_entry& s = m_selector_palette[i]; + + const etc_block& selector_bits = r.get_selector_cluster_selector_bits(i); + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + s[y * 4 + x] = static_cast(selector_bits.get_selector(x, y)); + } + } + } + } + + static const struct + { + int8_t m_dx, m_dy; + } g_endpoint_preds[] = + { + { -1, 0 }, + { 0, -1 }, + { -1, -1 } + }; + + void basisu_backend::reoptimize_and_sort_endpoints_codebook(uint32_t total_block_endpoints_remapped, uint_vec& all_endpoint_indices) + { + basisu_frontend& r = *m_pFront_end; + //const bool is_video = r.get_params().m_tex_type == basist::cBASISTexTypeVideoFrames; + + if (m_params.m_used_global_codebooks) + { + m_endpoint_remap_table_old_to_new.clear(); + m_endpoint_remap_table_old_to_new.resize(r.get_total_endpoint_clusters()); + for (uint32_t i = 0; i < r.get_total_endpoint_clusters(); i++) + m_endpoint_remap_table_old_to_new[i] = i; + } + else + { + //if ((total_block_endpoints_remapped) && (m_params.m_compression_level > 0)) + if ((total_block_endpoints_remapped) && (m_params.m_compression_level > 1)) + { + // We've changed the block endpoint indices, so we need to go and adjust the endpoint codebook (remove unused entries, optimize existing entries that have changed) + uint_vec new_block_endpoints(get_total_blocks()); + + for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++) + { + const uint32_t first_block_index = m_slices[slice_index].m_first_block_index; + const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x; + const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y; + + for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) + for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) + new_block_endpoints[first_block_index + block_x + block_y * num_blocks_x] = m_slice_encoder_blocks[slice_index](block_x, block_y).m_endpoint_index; + } + + int_vec old_to_new_endpoint_indices; + r.reoptimize_remapped_endpoints(new_block_endpoints, old_to_new_endpoint_indices, true); + + create_endpoint_palette(); + + for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++) + { + //const uint32_t first_block_index = m_slices[slice_index].m_first_block_index; + + //const uint32_t width = m_slices[slice_index].m_width; + //const uint32_t height = m_slices[slice_index].m_height; + const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x; + const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y; + + for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) + { + for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) + { + //const uint32_t block_index = first_block_index + block_x + block_y * num_blocks_x; + + encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y); + + m.m_endpoint_index = old_to_new_endpoint_indices[m.m_endpoint_index]; + } // block_x + } // block_y + } // slice_index + + for (uint32_t i = 0; i < all_endpoint_indices.size(); i++) + all_endpoint_indices[i] = old_to_new_endpoint_indices[all_endpoint_indices[i]]; + + } //if (total_block_endpoints_remapped) + + // Sort endpoint codebook + palette_index_reorderer reorderer; + reorderer.init((uint32_t)all_endpoint_indices.size(), &all_endpoint_indices[0], r.get_total_endpoint_clusters(), nullptr, nullptr, 0); + m_endpoint_remap_table_old_to_new = reorderer.get_remap_table(); + } + + // For endpoints, old_to_new[] may not be bijective! + // Some "old" entries may be unused and don't get remapped into the "new" array. + + m_old_endpoint_was_used.clear(); + m_old_endpoint_was_used.resize(r.get_total_endpoint_clusters()); + uint32_t first_old_entry_index = UINT32_MAX; + + for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++) + { + const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x, num_blocks_y = m_slices[slice_index].m_num_blocks_y; + for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) + { + for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) + { + encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y); + const uint32_t old_endpoint_index = m.m_endpoint_index; + + m_old_endpoint_was_used[old_endpoint_index] = true; + first_old_entry_index = basisu::minimum(first_old_entry_index, old_endpoint_index); + } // block_x + } // block_y + } // slice_index + + debug_printf("basisu_backend::reoptimize_and_sort_endpoints_codebook: First old entry index: %u\n", first_old_entry_index); + + m_new_endpoint_was_used.clear(); + m_new_endpoint_was_used.resize(r.get_total_endpoint_clusters()); + + m_endpoint_remap_table_new_to_old.clear(); + m_endpoint_remap_table_new_to_old.resize(r.get_total_endpoint_clusters()); + + // Set unused entries in the new array to point to the first used entry in the old array. + m_endpoint_remap_table_new_to_old.set_all(first_old_entry_index); + + for (uint32_t old_index = 0; old_index < m_endpoint_remap_table_old_to_new.size(); old_index++) + { + if (m_old_endpoint_was_used[old_index]) + { + const uint32_t new_index = m_endpoint_remap_table_old_to_new[old_index]; + + m_new_endpoint_was_used[new_index] = true; + + m_endpoint_remap_table_new_to_old[new_index] = old_index; + } + } + } + + void basisu_backend::sort_selector_codebook() + { + basisu_frontend& r = *m_pFront_end; + + m_selector_remap_table_new_to_old.resize(r.get_total_selector_clusters()); + + if ((m_params.m_compression_level == 0) || (m_params.m_used_global_codebooks)) + { + for (uint32_t i = 0; i < r.get_total_selector_clusters(); i++) + m_selector_remap_table_new_to_old[i] = i; + } + else + { + m_selector_remap_table_new_to_old[0] = 0; + uint32_t prev_selector_index = 0; + + int_vec remaining_selectors; + remaining_selectors.reserve(r.get_total_selector_clusters() - 1); + for (uint32_t i = 1; i < r.get_total_selector_clusters(); i++) + remaining_selectors.push_back(i); + + uint_vec selector_palette_bytes(m_selector_palette.size()); + for (uint32_t i = 0; i < m_selector_palette.size(); i++) + selector_palette_bytes[i] = m_selector_palette[i].get_byte(0) | (m_selector_palette[i].get_byte(1) << 8) | (m_selector_palette[i].get_byte(2) << 16) | (m_selector_palette[i].get_byte(3) << 24); + + // This is the traveling salesman problem. + for (uint32_t i = 1; i < r.get_total_selector_clusters(); i++) + { + uint32_t best_hamming_dist = 100; + uint32_t best_index = 0; + +#if BASISU_FASTER_SELECTOR_REORDERING + const uint32_t step = (remaining_selectors.size() > 16) ? 16 : 1; + for (uint32_t j = 0; j < remaining_selectors.size(); j += step) +#else + for (uint32_t j = 0; j < remaining_selectors.size(); j++) +#endif + { + int selector_index = remaining_selectors[j]; + + uint32_t k = selector_palette_bytes[prev_selector_index] ^ selector_palette_bytes[selector_index]; + uint32_t hamming_dist = g_hamming_dist[k & 0xFF] + g_hamming_dist[(k >> 8) & 0xFF] + g_hamming_dist[(k >> 16) & 0xFF] + g_hamming_dist[k >> 24]; + + if (hamming_dist < best_hamming_dist) + { + best_hamming_dist = hamming_dist; + best_index = j; + if (best_hamming_dist <= 1) + break; + } + } + + prev_selector_index = remaining_selectors[best_index]; + m_selector_remap_table_new_to_old[i] = prev_selector_index; + + remaining_selectors[best_index] = remaining_selectors.back(); + remaining_selectors.resize(remaining_selectors.size() - 1); + } + } + + m_selector_remap_table_old_to_new.resize(r.get_total_selector_clusters()); + for (uint32_t i = 0; i < m_selector_remap_table_new_to_old.size(); i++) + m_selector_remap_table_old_to_new[m_selector_remap_table_new_to_old[i]] = i; + } + int basisu_backend::find_video_frame(int slice_index, int delta) + { + for (uint32_t s = 0; s < m_slices.size(); s++) + { + if ((int)m_slices[s].m_source_file_index != ((int)m_slices[slice_index].m_source_file_index + delta)) + continue; + if (m_slices[s].m_mip_index != m_slices[slice_index].m_mip_index) + continue; + + // Being super paranoid here. + if (m_slices[s].m_num_blocks_x != (m_slices[slice_index].m_num_blocks_x)) + continue; + if (m_slices[s].m_num_blocks_y != (m_slices[slice_index].m_num_blocks_y)) + continue; + if (m_slices[s].m_alpha != (m_slices[slice_index].m_alpha)) + continue; + return s; + } + + return -1; + } + + void basisu_backend::check_for_valid_cr_blocks() + { + basisu_frontend& r = *m_pFront_end; + const bool is_video = r.get_params().m_tex_type == basist::cBASISTexTypeVideoFrames; + + if (!is_video) + return; + + debug_printf("basisu_backend::check_for_valid_cr_blocks\n"); + + uint32_t total_crs = 0; + uint32_t total_invalid_crs = 0; + + for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++) + { + const bool is_iframe = m_slices[slice_index].m_iframe; + //const uint32_t first_block_index = m_slices[slice_index].m_first_block_index; + + //const uint32_t width = m_slices[slice_index].m_width; + //const uint32_t height = m_slices[slice_index].m_height; + const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x; + const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y; + const int prev_frame_slice_index = find_video_frame(slice_index, -1); + + // If we don't have a previous frame, and we're not an i-frame, something is wrong. + if ((prev_frame_slice_index < 0) && (!is_iframe)) + { + BASISU_BACKEND_VERIFY(0); + } + + if ((is_iframe) || (prev_frame_slice_index < 0)) + { + // Ensure no blocks use CR's + for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) + { + for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) + { + encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y); + BASISU_BACKEND_VERIFY(m.m_endpoint_predictor != basist::CR_ENDPOINT_PRED_INDEX); + } + } + } + else + { + // For blocks that use CR's, make sure the endpoints/selectors haven't really changed. + for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) + { + for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) + { + encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y); + + if (m.m_endpoint_predictor == basist::CR_ENDPOINT_PRED_INDEX) + { + total_crs++; + + encoder_block& prev_m = m_slice_encoder_blocks[prev_frame_slice_index](block_x, block_y); + + if ((m.m_endpoint_index != prev_m.m_endpoint_index) || (m.m_selector_index != prev_m.m_selector_index)) + { + total_invalid_crs++; + } + } + } // block_x + } // block_y + + } // !slice_index + + } // slice_index + + debug_printf("Total CR's: %u, Total invalid CR's: %u\n", total_crs, total_invalid_crs); + + BASISU_BACKEND_VERIFY(total_invalid_crs == 0); + } + + void basisu_backend::create_encoder_blocks() + { + debug_printf("basisu_backend::create_encoder_blocks\n"); + + interval_timer tm; + tm.start(); + + basisu_frontend& r = *m_pFront_end; + const bool is_video = r.get_params().m_tex_type == basist::cBASISTexTypeVideoFrames; + + m_slice_encoder_blocks.resize(m_slices.size()); + + uint32_t total_endpoint_pred_missed = 0, total_endpoint_pred_hits = 0, total_block_endpoints_remapped = 0; + + uint_vec all_endpoint_indices; + all_endpoint_indices.reserve(get_total_blocks()); + + for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++) + { + const int prev_frame_slice_index = is_video ? find_video_frame(slice_index, -1) : -1; + const bool is_iframe = m_slices[slice_index].m_iframe; + const uint32_t first_block_index = m_slices[slice_index].m_first_block_index; + + //const uint32_t width = m_slices[slice_index].m_width; + //const uint32_t height = m_slices[slice_index].m_height; + const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x; + const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y; + + m_slice_encoder_blocks[slice_index].resize(num_blocks_x, num_blocks_y); + + for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) + { + for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) + { + const uint32_t block_index = first_block_index + block_x + block_y * num_blocks_x; + + encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y); + + m.m_endpoint_index = r.get_subblock_endpoint_cluster_index(block_index, 0); + BASISU_BACKEND_VERIFY(r.get_subblock_endpoint_cluster_index(block_index, 0) == r.get_subblock_endpoint_cluster_index(block_index, 1)); + + m.m_selector_index = r.get_block_selector_cluster_index(block_index); + + m.m_endpoint_predictor = basist::NO_ENDPOINT_PRED_INDEX; + + const uint32_t block_endpoint = m.m_endpoint_index; + + uint32_t best_endpoint_pred = UINT32_MAX; + + for (uint32_t endpoint_pred = 0; endpoint_pred < basist::NUM_ENDPOINT_PREDS; endpoint_pred++) + { + if ((is_video) && (endpoint_pred == basist::CR_ENDPOINT_PRED_INDEX)) + { + if ((prev_frame_slice_index != -1) && (!is_iframe)) + { + const uint32_t cur_endpoint = m_slice_encoder_blocks[slice_index](block_x, block_y).m_endpoint_index; + const uint32_t cur_selector = m_slice_encoder_blocks[slice_index](block_x, block_y).m_selector_index; + const uint32_t prev_endpoint = m_slice_encoder_blocks[prev_frame_slice_index](block_x, block_y).m_endpoint_index; + const uint32_t prev_selector = m_slice_encoder_blocks[prev_frame_slice_index](block_x, block_y).m_selector_index; + if ((cur_endpoint == prev_endpoint) && (cur_selector == prev_selector)) + { + best_endpoint_pred = basist::CR_ENDPOINT_PRED_INDEX; + m_slice_encoder_blocks[prev_frame_slice_index](block_x, block_y).m_is_cr_target = true; + } + } + } + else + { + int pred_block_x = block_x + g_endpoint_preds[endpoint_pred].m_dx; + if ((pred_block_x < 0) || (pred_block_x >= (int)num_blocks_x)) + continue; + + int pred_block_y = block_y + g_endpoint_preds[endpoint_pred].m_dy; + if ((pred_block_y < 0) || (pred_block_y >= (int)num_blocks_y)) + continue; + + uint32_t pred_endpoint = m_slice_encoder_blocks[slice_index](pred_block_x, pred_block_y).m_endpoint_index; + + if (pred_endpoint == block_endpoint) + { + if (endpoint_pred < best_endpoint_pred) + { + best_endpoint_pred = endpoint_pred; + } + } + } + + } // endpoint_pred + + if (best_endpoint_pred != UINT32_MAX) + { + m.m_endpoint_predictor = best_endpoint_pred; + + total_endpoint_pred_hits++; + } + else if (m_params.m_endpoint_rdo_quality_thresh > 0.0f) + { + const pixel_block& src_pixels = r.get_source_pixel_block(block_index); + + etc_block etc_blk(r.get_output_block(block_index)); + + uint64_t cur_err = etc_blk.evaluate_etc1_error(src_pixels.get_ptr(), r.get_params().m_perceptual); + + if (cur_err) + { + const uint64_t thresh_err = (uint64_t)(cur_err * maximum(1.0f, m_params.m_endpoint_rdo_quality_thresh)); + + etc_block trial_etc_block(etc_blk); + + uint64_t best_err = UINT64_MAX; + uint32_t best_endpoint_index = 0; + + best_endpoint_pred = UINT32_MAX; + + for (uint32_t endpoint_pred = 0; endpoint_pred < basist::NUM_ENDPOINT_PREDS; endpoint_pred++) + { + if ((is_video) && (endpoint_pred == basist::CR_ENDPOINT_PRED_INDEX)) + continue; + + int pred_block_x = block_x + g_endpoint_preds[endpoint_pred].m_dx; + if ((pred_block_x < 0) || (pred_block_x >= (int)num_blocks_x)) + continue; + + int pred_block_y = block_y + g_endpoint_preds[endpoint_pred].m_dy; + if ((pred_block_y < 0) || (pred_block_y >= (int)num_blocks_y)) + continue; + + uint32_t pred_endpoint_index = m_slice_encoder_blocks[slice_index](pred_block_x, pred_block_y).m_endpoint_index; + + uint32_t pred_inten = r.get_endpoint_cluster_inten_table(pred_endpoint_index, false); + color_rgba pred_color = r.get_endpoint_cluster_unscaled_color(pred_endpoint_index, false); + + trial_etc_block.set_block_color5(pred_color, pred_color); + trial_etc_block.set_inten_table(0, pred_inten); + trial_etc_block.set_inten_table(1, pred_inten); + + color_rgba trial_colors[16]; + unpack_etc1(trial_etc_block, trial_colors); + + uint64_t trial_err = 0; + if (r.get_params().m_perceptual) + { + for (uint32_t p = 0; p < 16; p++) + { + trial_err += color_distance(true, src_pixels.get_ptr()[p], trial_colors[p], false); + if (trial_err > thresh_err) + break; + } + } + else + { + for (uint32_t p = 0; p < 16; p++) + { + trial_err += color_distance(false, src_pixels.get_ptr()[p], trial_colors[p], false); + if (trial_err > thresh_err) + break; + } + } + + if (trial_err <= thresh_err) + { + if ((trial_err < best_err) || ((trial_err == best_err) && (endpoint_pred < best_endpoint_pred))) + { + best_endpoint_pred = endpoint_pred; + best_err = trial_err; + best_endpoint_index = pred_endpoint_index; + } + } + } // endpoint_pred + + if (best_endpoint_pred != UINT32_MAX) + { + m.m_endpoint_index = best_endpoint_index; + m.m_endpoint_predictor = best_endpoint_pred; + + total_endpoint_pred_hits++; + total_block_endpoints_remapped++; + } + else + { + total_endpoint_pred_missed++; + } + } + } + else + { + total_endpoint_pred_missed++; + } + + if (m.m_endpoint_predictor == basist::NO_ENDPOINT_PRED_INDEX) + { + all_endpoint_indices.push_back(m.m_endpoint_index); + } + + } // block_x + + } // block_y + + } // slice + + debug_printf("total_endpoint_pred_missed: %u (%3.2f%%) total_endpoint_pred_hit: %u (%3.2f%%), total_block_endpoints_remapped: %u (%3.2f%%)\n", + total_endpoint_pred_missed, total_endpoint_pred_missed * 100.0f / get_total_blocks(), + total_endpoint_pred_hits, total_endpoint_pred_hits * 100.0f / get_total_blocks(), + total_block_endpoints_remapped, total_block_endpoints_remapped * 100.0f / get_total_blocks()); + + reoptimize_and_sort_endpoints_codebook(total_block_endpoints_remapped, all_endpoint_indices); + + sort_selector_codebook(); + check_for_valid_cr_blocks(); + + debug_printf("Elapsed time: %3.3f secs\n", tm.get_elapsed_secs()); + } + + void basisu_backend::compute_slice_crcs() + { + for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++) + { + //const uint32_t first_block_index = m_slices[slice_index].m_first_block_index; + const uint32_t width = m_slices[slice_index].m_width; + const uint32_t height = m_slices[slice_index].m_height; + const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x; + const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y; + + gpu_image gi; + gi.init(texture_format::cETC1, width, height); + + for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) + { + for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) + { + //const uint32_t block_index = first_block_index + block_x + block_y * num_blocks_x; + + encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y); + + { + etc_block& output_block = *(etc_block*)gi.get_block_ptr(block_x, block_y); + + output_block.set_diff_bit(true); + // Setting the flip bit to false to be compatible with the Khronos KDFS. + //output_block.set_flip_bit(true); + output_block.set_flip_bit(false); + + const uint32_t endpoint_index = m.m_endpoint_index; + + output_block.set_block_color5_etc1s(m_endpoint_palette[endpoint_index].m_color5); + output_block.set_inten_tables_etc1s(m_endpoint_palette[endpoint_index].m_inten5); + + const uint32_t selector_idx = m.m_selector_index; + + const etc1_selector_palette_entry& selectors = m_selector_palette[selector_idx]; + for (uint32_t sy = 0; sy < 4; sy++) + for (uint32_t sx = 0; sx < 4; sx++) + output_block.set_selector(sx, sy, selectors(sx, sy)); + } + + } // block_x + } // block_y + + m_output.m_slice_image_crcs[slice_index] = basist::crc16(gi.get_ptr(), gi.get_size_in_bytes(), 0); + + if (m_params.m_debug_images) + { + image gi_unpacked; + gi.unpack(gi_unpacked); + + char buf[256]; +#ifdef _WIN32 + sprintf_s(buf, sizeof(buf), "basisu_backend_slice_%u.png", slice_index); +#else + snprintf(buf, sizeof(buf), "basisu_backend_slice_%u.png", slice_index); +#endif + save_png(buf, gi_unpacked); + } + + } // slice_index + } + + //uint32_t g_color_delta_hist[255 * 3 + 1]; + //uint32_t g_color_delta_bad_hist[255 * 3 + 1]; + + // TODO: Split this into multiple methods. + bool basisu_backend::encode_image() + { + basisu_frontend& r = *m_pFront_end; + const bool is_video = r.get_params().m_tex_type == basist::cBASISTexTypeVideoFrames; + + uint32_t total_used_selector_history_buf = 0; + uint32_t total_selector_indices_remapped = 0; + + basist::approx_move_to_front selector_history_buf(basist::MAX_SELECTOR_HISTORY_BUF_SIZE); + histogram selector_history_buf_histogram(basist::MAX_SELECTOR_HISTORY_BUF_SIZE); + histogram selector_histogram(r.get_total_selector_clusters() + basist::MAX_SELECTOR_HISTORY_BUF_SIZE + 1); + histogram selector_history_buf_rle_histogram(1 << basist::SELECTOR_HISTORY_BUF_RLE_COUNT_BITS); + + basisu::vector selector_syms(m_slices.size()); + + const uint32_t SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX = r.get_total_selector_clusters(); + const uint32_t SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX = SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX + basist::MAX_SELECTOR_HISTORY_BUF_SIZE; + + m_output.m_slice_image_crcs.resize(m_slices.size()); + + histogram delta_endpoint_histogram(r.get_total_endpoint_clusters()); + + histogram endpoint_pred_histogram(basist::ENDPOINT_PRED_TOTAL_SYMBOLS); + basisu::vector endpoint_pred_syms(m_slices.size()); + + uint32_t total_endpoint_indices_remapped = 0; + + uint_vec block_endpoint_indices, block_selector_indices; + + interval_timer tm; + tm.start(); + + const int COLOR_DELTA_THRESH = 8; + const int SEL_DIFF_THRESHOLD = 11; + + for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++) + { + //const int prev_frame_slice_index = is_video ? find_video_frame(slice_index, -1) : -1; + //const int next_frame_slice_index = is_video ? find_video_frame(slice_index, 1) : -1; + const uint32_t first_block_index = m_slices[slice_index].m_first_block_index; + //const uint32_t width = m_slices[slice_index].m_width; + //const uint32_t height = m_slices[slice_index].m_height; + const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x; + const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y; + + selector_history_buf.reset(); + + int selector_history_buf_rle_count = 0; + + int prev_endpoint_pred_sym_bits = -1, endpoint_pred_repeat_count = 0; + + uint32_t prev_endpoint_index = 0; + + vector2D block_endpoints_are_referenced(num_blocks_x, num_blocks_y); + + for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) + { + for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) + { + //const uint32_t block_index = first_block_index + block_x + block_y * num_blocks_x; + + encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y); + + if (m.m_endpoint_predictor == 0) + block_endpoints_are_referenced(block_x - 1, block_y) = true; + else if (m.m_endpoint_predictor == 1) + block_endpoints_are_referenced(block_x, block_y - 1) = true; + else if (m.m_endpoint_predictor == 2) + { + if (!is_video) + block_endpoints_are_referenced(block_x - 1, block_y - 1) = true; + } + if (is_video) + { + if (m.m_is_cr_target) + block_endpoints_are_referenced(block_x, block_y) = true; + } + + } // block_x + } // block_y + + for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) + { + for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) + { + const uint32_t block_index = first_block_index + block_x + block_y * num_blocks_x; + + encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y); + + if (((block_x & 1) == 0) && ((block_y & 1) == 0)) + { + uint32_t endpoint_pred_cur_sym_bits = 0; + + for (uint32_t y = 0; y < 2; y++) + { + for (uint32_t x = 0; x < 2; x++) + { + const uint32_t bx = block_x + x; + const uint32_t by = block_y + y; + + uint32_t pred = basist::NO_ENDPOINT_PRED_INDEX; + if ((bx < num_blocks_x) && (by < num_blocks_y)) + pred = m_slice_encoder_blocks[slice_index](bx, by).m_endpoint_predictor; + + endpoint_pred_cur_sym_bits |= (pred << (x * 2 + y * 4)); + } + } + + if ((int)endpoint_pred_cur_sym_bits == prev_endpoint_pred_sym_bits) + { + endpoint_pred_repeat_count++; + } + else + { + if (endpoint_pred_repeat_count > 0) + { + if (endpoint_pred_repeat_count > (int)basist::ENDPOINT_PRED_MIN_REPEAT_COUNT) + { + endpoint_pred_histogram.inc(basist::ENDPOINT_PRED_REPEAT_LAST_SYMBOL); + endpoint_pred_syms[slice_index].push_back(basist::ENDPOINT_PRED_REPEAT_LAST_SYMBOL); + + endpoint_pred_syms[slice_index].push_back(endpoint_pred_repeat_count); + } + else + { + for (int j = 0; j < endpoint_pred_repeat_count; j++) + { + endpoint_pred_histogram.inc(prev_endpoint_pred_sym_bits); + endpoint_pred_syms[slice_index].push_back(prev_endpoint_pred_sym_bits); + } + } + + endpoint_pred_repeat_count = 0; + } + + endpoint_pred_histogram.inc(endpoint_pred_cur_sym_bits); + endpoint_pred_syms[slice_index].push_back(endpoint_pred_cur_sym_bits); + + prev_endpoint_pred_sym_bits = endpoint_pred_cur_sym_bits; + } + } + + int new_endpoint_index = m_endpoint_remap_table_old_to_new[m.m_endpoint_index]; + + if (m.m_endpoint_predictor == basist::NO_ENDPOINT_PRED_INDEX) + { + int endpoint_delta = new_endpoint_index - prev_endpoint_index; + + if ((m_params.m_endpoint_rdo_quality_thresh > 1.0f) && (iabs(endpoint_delta) > 1) && (!block_endpoints_are_referenced(block_x, block_y))) + { + const pixel_block& src_pixels = r.get_source_pixel_block(block_index); + + etc_block etc_blk(r.get_output_block(block_index)); + + const uint64_t cur_err = etc_blk.evaluate_etc1_error(src_pixels.get_ptr(), r.get_params().m_perceptual); + const uint32_t cur_inten5 = etc_blk.get_inten_table(0); + + const etc1_endpoint_palette_entry& cur_endpoints = m_endpoint_palette[m.m_endpoint_index]; + + if (cur_err) + { + const float endpoint_remap_thresh = maximum(1.0f, m_params.m_endpoint_rdo_quality_thresh); + const uint64_t thresh_err = (uint64_t)(cur_err * endpoint_remap_thresh); + + //const int MAX_ENDPOINT_SEARCH_DIST = (m_params.m_compression_level >= 2) ? 64 : 32; + const int MAX_ENDPOINT_SEARCH_DIST = (m_params.m_compression_level >= 2) ? 64 : 16; + + if (!g_cpu_supports_sse41) + { + const uint64_t initial_best_trial_err = UINT64_MAX; + uint64_t best_trial_err = initial_best_trial_err; + int best_trial_idx = 0; + + etc_block trial_etc_blk(etc_blk); + + const int search_dist = minimum(iabs(endpoint_delta) - 1, MAX_ENDPOINT_SEARCH_DIST); + for (int d = -search_dist; d < search_dist; d++) + { + int trial_idx = prev_endpoint_index + d; + if (trial_idx < 0) + trial_idx += (int)r.get_total_endpoint_clusters(); + else if (trial_idx >= (int)r.get_total_endpoint_clusters()) + trial_idx -= (int)r.get_total_endpoint_clusters(); + + if (trial_idx == new_endpoint_index) + continue; + + // Skip it if this new endpoint palette entry is actually never used. + if (!m_new_endpoint_was_used[trial_idx]) + continue; + + const etc1_endpoint_palette_entry& p = m_endpoint_palette[m_endpoint_remap_table_new_to_old[trial_idx]]; + + if (m_params.m_compression_level <= 1) + { + if (p.m_inten5 > cur_inten5) + continue; + + int delta_r = iabs(cur_endpoints.m_color5.r - p.m_color5.r); + int delta_g = iabs(cur_endpoints.m_color5.g - p.m_color5.g); + int delta_b = iabs(cur_endpoints.m_color5.b - p.m_color5.b); + int color_delta = delta_r + delta_g + delta_b; + + if (color_delta > COLOR_DELTA_THRESH) + continue; + } + + trial_etc_blk.set_block_color5_etc1s(p.m_color5); + trial_etc_blk.set_inten_tables_etc1s(p.m_inten5); + + uint64_t trial_err = trial_etc_blk.evaluate_etc1_error(src_pixels.get_ptr(), r.get_params().m_perceptual); + + if ((trial_err < best_trial_err) && (trial_err <= thresh_err)) + { + best_trial_err = trial_err; + best_trial_idx = trial_idx; + } + } + + if (best_trial_err != initial_best_trial_err) + { + m.m_endpoint_index = m_endpoint_remap_table_new_to_old[best_trial_idx]; + + new_endpoint_index = best_trial_idx; + + endpoint_delta = new_endpoint_index - prev_endpoint_index; + + total_endpoint_indices_remapped++; + } + } + else + { +#if BASISU_SUPPORT_SSE + uint8_t block_selectors[16]; + for (uint32_t i = 0; i < 16; i++) + block_selectors[i] = (uint8_t)etc_blk.get_selector(i & 3, i >> 2); + + const int64_t initial_best_trial_err = INT64_MAX; + int64_t best_trial_err = initial_best_trial_err; + int best_trial_idx = 0; + + const int search_dist = minimum(iabs(endpoint_delta) - 1, MAX_ENDPOINT_SEARCH_DIST); + for (int d = -search_dist; d < search_dist; d++) + { + int trial_idx = prev_endpoint_index + d; + if (trial_idx < 0) + trial_idx += (int)r.get_total_endpoint_clusters(); + else if (trial_idx >= (int)r.get_total_endpoint_clusters()) + trial_idx -= (int)r.get_total_endpoint_clusters(); + + if (trial_idx == new_endpoint_index) + continue; + + // Skip it if this new endpoint palette entry is actually never used. + if (!m_new_endpoint_was_used[trial_idx]) + continue; + + const etc1_endpoint_palette_entry& p = m_endpoint_palette[m_endpoint_remap_table_new_to_old[trial_idx]]; + + if (m_params.m_compression_level <= 1) + { + if (p.m_inten5 > cur_inten5) + continue; + + int delta_r = iabs(cur_endpoints.m_color5.r - p.m_color5.r); + int delta_g = iabs(cur_endpoints.m_color5.g - p.m_color5.g); + int delta_b = iabs(cur_endpoints.m_color5.b - p.m_color5.b); + int color_delta = delta_r + delta_g + delta_b; + + if (color_delta > COLOR_DELTA_THRESH) + continue; + } + + color_rgba block_colors[4]; + etc_block::get_block_colors_etc1s(block_colors, p.m_color5, p.m_inten5); + + int64_t trial_err; + if (r.get_params().m_perceptual) + { + perceptual_distance_rgb_4_N_sse41(&trial_err, block_selectors, block_colors, src_pixels.get_ptr(), 16, best_trial_err); + } + else + { + linear_distance_rgb_4_N_sse41(&trial_err, block_selectors, block_colors, src_pixels.get_ptr(), 16, best_trial_err); + } + + //if (trial_err > thresh_err) + // g_color_delta_bad_hist[color_delta]++; + + if ((trial_err < best_trial_err) && (trial_err <= (int64_t)thresh_err)) + { + best_trial_err = trial_err; + best_trial_idx = trial_idx; + } + } + + if (best_trial_err != initial_best_trial_err) + { + m.m_endpoint_index = m_endpoint_remap_table_new_to_old[best_trial_idx]; + + new_endpoint_index = best_trial_idx; + + endpoint_delta = new_endpoint_index - prev_endpoint_index; + + total_endpoint_indices_remapped++; + } +#endif // BASISU_SUPPORT_SSE + } // if (!g_cpu_supports_sse41) + + } // if (cur_err) + + } // if ((m_params.m_endpoint_rdo_quality_thresh > 1.0f) && (iabs(endpoint_delta) > 1) && (!block_endpoints_are_referenced(block_x, block_y))) + + if (endpoint_delta < 0) + endpoint_delta += (int)r.get_total_endpoint_clusters(); + + delta_endpoint_histogram.inc(endpoint_delta); + + } // if (m.m_endpoint_predictor == basist::NO_ENDPOINT_PRED_INDEX) + + block_endpoint_indices.push_back(m_endpoint_remap_table_new_to_old[new_endpoint_index]); + + prev_endpoint_index = new_endpoint_index; + + if ((!is_video) || (m.m_endpoint_predictor != basist::CR_ENDPOINT_PRED_INDEX)) + { + int new_selector_index = m_selector_remap_table_old_to_new[m.m_selector_index]; + + const float selector_remap_thresh = maximum(1.0f, m_params.m_selector_rdo_quality_thresh); //2.5f; + + int selector_history_buf_index = -1; + + // At low comp levels this hurts compression a tiny amount, but is significantly faster so it's a good tradeoff. + if ((m.m_is_cr_target) || (m_params.m_compression_level <= 1)) + { + for (uint32_t j = 0; j < selector_history_buf.size(); j++) + { + const int trial_idx = selector_history_buf[j]; + if (trial_idx == new_selector_index) + { + total_used_selector_history_buf++; + selector_history_buf_index = j; + selector_history_buf_histogram.inc(j); + break; + } + } + } + + // If the block is a CR target we can't override its selectors. + if ((!m.m_is_cr_target) && (selector_history_buf_index == -1)) + { + const pixel_block& src_pixels = r.get_source_pixel_block(block_index); + + etc_block etc_blk = r.get_output_block(block_index); + + // This is new code - the initial release just used the endpoints from the frontend, which isn't correct/accurate. + const etc1_endpoint_palette_entry& q = m_endpoint_palette[m_endpoint_remap_table_new_to_old[new_endpoint_index]]; + etc_blk.set_block_color5_etc1s(q.m_color5); + etc_blk.set_inten_tables_etc1s(q.m_inten5); + + color_rgba block_colors[4]; + etc_blk.get_block_colors(block_colors, 0); + + const uint8_t* pCur_selectors = &m_selector_palette[m.m_selector_index][0]; + + uint64_t cur_err = 0; + if (r.get_params().m_perceptual) + { + for (uint32_t p = 0; p < 16; p++) + cur_err += color_distance(true, src_pixels.get_ptr()[p], block_colors[pCur_selectors[p]], false); + } + else + { + for (uint32_t p = 0; p < 16; p++) + cur_err += color_distance(false, src_pixels.get_ptr()[p], block_colors[pCur_selectors[p]], false); + } + + const uint64_t limit_err = (uint64_t)ceilf(cur_err * selector_remap_thresh); + + // Even if cur_err==limit_err, we still want to scan the history buffer because there may be equivalent entries that are cheaper to code. + + uint64_t best_trial_err = UINT64_MAX; + int best_trial_idx = 0; + uint32_t best_trial_history_buf_idx = 0; + + for (uint32_t j = 0; j < selector_history_buf.size(); j++) + { + const int trial_idx = selector_history_buf[j]; + + const uint8_t* pSelectors = &m_selector_palette[m_selector_remap_table_new_to_old[trial_idx]][0]; + + if (m_params.m_compression_level <= 1) + { + // Predict if evaluating the full color error would cause an early out, by summing the abs err of the selector indices. + int sel_diff = 0; + for (uint32_t p = 0; p < 16; p += 4) + { + sel_diff += iabs(pCur_selectors[p + 0] - pSelectors[p + 0]); + sel_diff += iabs(pCur_selectors[p + 1] - pSelectors[p + 1]); + sel_diff += iabs(pCur_selectors[p + 2] - pSelectors[p + 2]); + sel_diff += iabs(pCur_selectors[p + 3] - pSelectors[p + 3]); + if (sel_diff >= SEL_DIFF_THRESHOLD) + break; + } + if (sel_diff >= SEL_DIFF_THRESHOLD) + continue; + } + + const uint64_t thresh_err = minimum(limit_err, best_trial_err); + uint64_t trial_err = 0; + + // This tends to early out quickly, so SSE has a hard time competing. + if (r.get_params().m_perceptual) + { + for (uint32_t p = 0; p < 16; p++) + { + uint32_t sel = pSelectors[p]; + trial_err += color_distance(true, src_pixels.get_ptr()[p], block_colors[sel], false); + if (trial_err > thresh_err) + break; + } + } + else + { + for (uint32_t p = 0; p < 16; p++) + { + uint32_t sel = pSelectors[p]; + trial_err += color_distance(false, src_pixels.get_ptr()[p], block_colors[sel], false); + if (trial_err > thresh_err) + break; + } + } + + if ((trial_err < best_trial_err) && (trial_err <= thresh_err)) + { + assert(trial_err <= limit_err); + + best_trial_err = trial_err; + best_trial_idx = trial_idx; + best_trial_history_buf_idx = j; + } + } + + if (best_trial_err != UINT64_MAX) + { + if (new_selector_index != best_trial_idx) + total_selector_indices_remapped++; + + new_selector_index = best_trial_idx; + + total_used_selector_history_buf++; + + selector_history_buf_index = best_trial_history_buf_idx; + + selector_history_buf_histogram.inc(best_trial_history_buf_idx); + } + + } // if (m_params.m_selector_rdo_quality_thresh > 0.0f) + + m.m_selector_index = m_selector_remap_table_new_to_old[new_selector_index]; + + + if ((selector_history_buf_rle_count) && (selector_history_buf_index != 0)) + { + if (selector_history_buf_rle_count >= (int)basist::SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH) + { + selector_syms[slice_index].push_back(SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX); + selector_syms[slice_index].push_back(selector_history_buf_rle_count); + + int run_sym = selector_history_buf_rle_count - basist::SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH; + if (run_sym >= ((int)basist::SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL - 1)) + selector_history_buf_rle_histogram.inc(basist::SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL - 1); + else + selector_history_buf_rle_histogram.inc(run_sym); + + selector_histogram.inc(SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX); + } + else + { + for (int k = 0; k < selector_history_buf_rle_count; k++) + { + uint32_t sym_index = SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX + 0; + + selector_syms[slice_index].push_back(sym_index); + + selector_histogram.inc(sym_index); + } + } + + selector_history_buf_rle_count = 0; + } + + if (selector_history_buf_index >= 0) + { + if (selector_history_buf_index == 0) + selector_history_buf_rle_count++; + else + { + uint32_t history_buf_sym = SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX + selector_history_buf_index; + + selector_syms[slice_index].push_back(history_buf_sym); + + selector_histogram.inc(history_buf_sym); + } + } + else + { + selector_syms[slice_index].push_back(new_selector_index); + + selector_histogram.inc(new_selector_index); + } + + m.m_selector_history_buf_index = selector_history_buf_index; + + if (selector_history_buf_index < 0) + selector_history_buf.add(new_selector_index); + else if (selector_history_buf.size()) + selector_history_buf.use(selector_history_buf_index); + } + block_selector_indices.push_back(m.m_selector_index); + + } // block_x + + } // block_y + + if (endpoint_pred_repeat_count > 0) + { + if (endpoint_pred_repeat_count > (int)basist::ENDPOINT_PRED_MIN_REPEAT_COUNT) + { + endpoint_pred_histogram.inc(basist::ENDPOINT_PRED_REPEAT_LAST_SYMBOL); + endpoint_pred_syms[slice_index].push_back(basist::ENDPOINT_PRED_REPEAT_LAST_SYMBOL); + + endpoint_pred_syms[slice_index].push_back(endpoint_pred_repeat_count); + } + else + { + for (int j = 0; j < endpoint_pred_repeat_count; j++) + { + endpoint_pred_histogram.inc(prev_endpoint_pred_sym_bits); + endpoint_pred_syms[slice_index].push_back(prev_endpoint_pred_sym_bits); + } + } + + endpoint_pred_repeat_count = 0; + } + + if (selector_history_buf_rle_count) + { + if (selector_history_buf_rle_count >= (int)basist::SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH) + { + selector_syms[slice_index].push_back(SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX); + selector_syms[slice_index].push_back(selector_history_buf_rle_count); + + int run_sym = selector_history_buf_rle_count - basist::SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH; + if (run_sym >= ((int)basist::SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL - 1)) + selector_history_buf_rle_histogram.inc(basist::SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL - 1); + else + selector_history_buf_rle_histogram.inc(run_sym); + + selector_histogram.inc(SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX); + } + else + { + for (int i = 0; i < selector_history_buf_rle_count; i++) + { + uint32_t sym_index = SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX + 0; + + selector_syms[slice_index].push_back(sym_index); + + selector_histogram.inc(sym_index); + } + } + + selector_history_buf_rle_count = 0; + } + + } // slice_index + + //for (int i = 0; i <= 255 * 3; i++) + //{ + // printf("%u, %u, %f\n", g_color_delta_bad_hist[i], g_color_delta_hist[i], g_color_delta_hist[i] ? g_color_delta_bad_hist[i] / (float)g_color_delta_hist[i] : 0); + //} + + double total_prep_time = tm.get_elapsed_secs(); + debug_printf("basisu_backend::encode_image: Total prep time: %3.2f\n", total_prep_time); + + debug_printf("Endpoint pred RDO total endpoint indices remapped: %u %3.2f%%\n", + total_endpoint_indices_remapped, total_endpoint_indices_remapped * 100.0f / get_total_blocks()); + + debug_printf("Selector history RDO total selector indices remapped: %u %3.2f%%, Used history buf: %u %3.2f%%\n", + total_selector_indices_remapped, total_selector_indices_remapped * 100.0f / get_total_blocks(), + total_used_selector_history_buf, total_used_selector_history_buf * 100.0f / get_total_blocks()); + + //if ((total_endpoint_indices_remapped) && (m_params.m_compression_level > 0)) + if ((total_endpoint_indices_remapped) && (m_params.m_compression_level > 1) && (!m_params.m_used_global_codebooks)) + { + int_vec unused; + r.reoptimize_remapped_endpoints(block_endpoint_indices, unused, false, &block_selector_indices); + + create_endpoint_palette(); + } + + check_for_valid_cr_blocks(); + compute_slice_crcs(); + + double endpoint_pred_entropy = endpoint_pred_histogram.get_entropy() / endpoint_pred_histogram.get_total(); + double delta_endpoint_entropy = delta_endpoint_histogram.get_entropy() / delta_endpoint_histogram.get_total(); + double selector_entropy = selector_histogram.get_entropy() / selector_histogram.get_total(); + + debug_printf("Histogram entropy: EndpointPred: %3.3f DeltaEndpoint: %3.3f DeltaSelector: %3.3f\n", endpoint_pred_entropy, delta_endpoint_entropy, selector_entropy); + + if (!endpoint_pred_histogram.get_total()) + endpoint_pred_histogram.inc(0); + huffman_encoding_table endpoint_pred_model; + if (!endpoint_pred_model.init(endpoint_pred_histogram, 16)) + { + error_printf("endpoint_pred_model.init() failed!"); + return false; + } + + if (!delta_endpoint_histogram.get_total()) + delta_endpoint_histogram.inc(0); + huffman_encoding_table delta_endpoint_model; + if (!delta_endpoint_model.init(delta_endpoint_histogram, 16)) + { + error_printf("delta_endpoint_model.init() failed!"); + return false; + } + if (!selector_histogram.get_total()) + selector_histogram.inc(0); + + huffman_encoding_table selector_model; + if (!selector_model.init(selector_histogram, 16)) + { + error_printf("selector_model.init() failed!"); + return false; + } + + if (!selector_history_buf_rle_histogram.get_total()) + selector_history_buf_rle_histogram.inc(0); + + huffman_encoding_table selector_history_buf_rle_model; + if (!selector_history_buf_rle_model.init(selector_history_buf_rle_histogram, 16)) + { + error_printf("selector_history_buf_rle_model.init() failed!"); + return false; + } + + bitwise_coder coder; + coder.init(1024 * 1024 * 4); + + uint32_t endpoint_pred_model_bits = coder.emit_huffman_table(endpoint_pred_model); + uint32_t delta_endpoint_bits = coder.emit_huffman_table(delta_endpoint_model); + uint32_t selector_model_bits = coder.emit_huffman_table(selector_model); + uint32_t selector_history_buf_run_sym_bits = coder.emit_huffman_table(selector_history_buf_rle_model); + + coder.put_bits(basist::MAX_SELECTOR_HISTORY_BUF_SIZE, 13); + + debug_printf("Model sizes: EndpointPred: %u bits %u bytes (%3.3f bpp) DeltaEndpoint: %u bits %u bytes (%3.3f bpp) Selector: %u bits %u bytes (%3.3f bpp) SelectorHistBufRLE: %u bits %u bytes (%3.3f bpp)\n", + endpoint_pred_model_bits, (endpoint_pred_model_bits + 7) / 8, endpoint_pred_model_bits / float(get_total_input_texels()), + delta_endpoint_bits, (delta_endpoint_bits + 7) / 8, delta_endpoint_bits / float(get_total_input_texels()), + selector_model_bits, (selector_model_bits + 7) / 8, selector_model_bits / float(get_total_input_texels()), + selector_history_buf_run_sym_bits, (selector_history_buf_run_sym_bits + 7) / 8, selector_history_buf_run_sym_bits / float(get_total_input_texels())); + + coder.flush(); + + m_output.m_slice_image_tables = coder.get_bytes(); + + uint32_t total_endpoint_pred_bits = 0, total_delta_endpoint_bits = 0, total_selector_bits = 0; + + uint32_t total_image_bytes = 0; + + m_output.m_slice_image_data.resize(m_slices.size()); + + for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++) + { + //const uint32_t width = m_slices[slice_index].m_width; + //const uint32_t height = m_slices[slice_index].m_height; + const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x; + const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y; + + coder.init(1024 * 1024 * 4); + + uint32_t cur_selector_sym_ofs = 0; + uint32_t selector_rle_count = 0; + + int endpoint_pred_repeat_count = 0; + uint32_t cur_endpoint_pred_sym_ofs = 0; +// uint32_t prev_endpoint_pred_sym = 0; + uint32_t prev_endpoint_index = 0; + + for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) + { + for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) + { + const encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y); + + if (((block_x & 1) == 0) && ((block_y & 1) == 0)) + { + if (endpoint_pred_repeat_count > 0) + { + endpoint_pred_repeat_count--; + } + else + { + uint32_t sym = endpoint_pred_syms[slice_index][cur_endpoint_pred_sym_ofs++]; + + if (sym == basist::ENDPOINT_PRED_REPEAT_LAST_SYMBOL) + { + total_endpoint_pred_bits += coder.put_code(sym, endpoint_pred_model); + + endpoint_pred_repeat_count = endpoint_pred_syms[slice_index][cur_endpoint_pred_sym_ofs++]; + assert(endpoint_pred_repeat_count >= (int)basist::ENDPOINT_PRED_MIN_REPEAT_COUNT); + + total_endpoint_pred_bits += coder.put_vlc(endpoint_pred_repeat_count - basist::ENDPOINT_PRED_MIN_REPEAT_COUNT, basist::ENDPOINT_PRED_COUNT_VLC_BITS); + + endpoint_pred_repeat_count--; + } + else + { + total_endpoint_pred_bits += coder.put_code(sym, endpoint_pred_model); + + //prev_endpoint_pred_sym = sym; + } + } + } + + const int new_endpoint_index = m_endpoint_remap_table_old_to_new[m.m_endpoint_index]; + + if (m.m_endpoint_predictor == basist::NO_ENDPOINT_PRED_INDEX) + { + int endpoint_delta = new_endpoint_index - prev_endpoint_index; + if (endpoint_delta < 0) + endpoint_delta += (int)r.get_total_endpoint_clusters(); + + total_delta_endpoint_bits += coder.put_code(endpoint_delta, delta_endpoint_model); + } + + prev_endpoint_index = new_endpoint_index; + + if ((!is_video) || (m.m_endpoint_predictor != basist::CR_ENDPOINT_PRED_INDEX)) + { + if (!selector_rle_count) + { + uint32_t selector_sym_index = selector_syms[slice_index][cur_selector_sym_ofs++]; + + if (selector_sym_index == SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX) + selector_rle_count = selector_syms[slice_index][cur_selector_sym_ofs++]; + + total_selector_bits += coder.put_code(selector_sym_index, selector_model); + + if (selector_sym_index == SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX) + { + int run_sym = selector_rle_count - basist::SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH; + if (run_sym >= ((int)basist::SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL - 1)) + { + total_selector_bits += coder.put_code(basist::SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL - 1, selector_history_buf_rle_model); + + uint32_t n = selector_rle_count - basist::SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH; + total_selector_bits += coder.put_vlc(n, 7); + } + else + total_selector_bits += coder.put_code(run_sym, selector_history_buf_rle_model); + } + } + + if (selector_rle_count) + selector_rle_count--; + } + + } // block_x + + } // block_y + + BASISU_BACKEND_VERIFY(cur_endpoint_pred_sym_ofs == endpoint_pred_syms[slice_index].size()); + BASISU_BACKEND_VERIFY(cur_selector_sym_ofs == selector_syms[slice_index].size()); + + coder.flush(); + + m_output.m_slice_image_data[slice_index] = coder.get_bytes(); + + total_image_bytes += (uint32_t)coder.get_bytes().size(); + + debug_printf("Slice %u compressed size: %u bytes, %3.3f bits per slice texel\n", slice_index, m_output.m_slice_image_data[slice_index].size(), m_output.m_slice_image_data[slice_index].size() * 8.0f / (m_slices[slice_index].m_orig_width * m_slices[slice_index].m_orig_height)); + + } // slice_index + + const double total_texels = static_cast(get_total_input_texels()); + const double total_blocks = static_cast(get_total_blocks()); + + debug_printf("Total endpoint pred bits: %u bytes: %u bits/texel: %3.3f bits/block: %3.3f\n", total_endpoint_pred_bits, total_endpoint_pred_bits / 8, total_endpoint_pred_bits / total_texels, total_endpoint_pred_bits / total_blocks); + debug_printf("Total delta endpoint bits: %u bytes: %u bits/texel: %3.3f bits/block: %3.3f\n", total_delta_endpoint_bits, total_delta_endpoint_bits / 8, total_delta_endpoint_bits / total_texels, total_delta_endpoint_bits / total_blocks); + debug_printf("Total selector bits: %u bytes: %u bits/texel: %3.3f bits/block: %3.3f\n", total_selector_bits, total_selector_bits / 8, total_selector_bits / total_texels, total_selector_bits / total_blocks); + + debug_printf("Total table bytes: %u, %3.3f bits/texel\n", m_output.m_slice_image_tables.size(), m_output.m_slice_image_tables.size() * 8.0f / total_texels); + debug_printf("Total image bytes: %u, %3.3f bits/texel\n", total_image_bytes, total_image_bytes * 8.0f / total_texels); + + return true; + } + + bool basisu_backend::encode_endpoint_palette() + { + const basisu_frontend& r = *m_pFront_end; + + // The endpoint indices may have been changed by the backend's RDO step, so go and figure out which ones are actually used again. + bool_vec old_endpoint_was_used(r.get_total_endpoint_clusters()); + uint32_t first_old_entry_index = UINT32_MAX; + + for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++) + { + const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x, num_blocks_y = m_slices[slice_index].m_num_blocks_y; + for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) + { + for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) + { + encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y); + const uint32_t old_endpoint_index = m.m_endpoint_index; + + old_endpoint_was_used[old_endpoint_index] = true; + first_old_entry_index = basisu::minimum(first_old_entry_index, old_endpoint_index); + } // block_x + } // block_y + } // slice_index + + debug_printf("basisu_backend::encode_endpoint_palette: first_old_entry_index: %u\n", first_old_entry_index); + + // Maps NEW to OLD endpoints + uint_vec endpoint_remap_table_new_to_old(r.get_total_endpoint_clusters()); + endpoint_remap_table_new_to_old.set_all(first_old_entry_index); + + bool_vec new_endpoint_was_used(r.get_total_endpoint_clusters()); + + for (uint32_t old_endpoint_index = 0; old_endpoint_index < m_endpoint_remap_table_old_to_new.size(); old_endpoint_index++) + { + if (old_endpoint_was_used[old_endpoint_index]) + { + const uint32_t new_endpoint_index = m_endpoint_remap_table_old_to_new[old_endpoint_index]; + + new_endpoint_was_used[new_endpoint_index] = true; + + endpoint_remap_table_new_to_old[new_endpoint_index] = old_endpoint_index; + } + } + + // TODO: Some new endpoint palette entries may actually be unused and aren't worth coding. Fix that. + + uint32_t total_unused_new_entries = 0; + for (uint32_t i = 0; i < new_endpoint_was_used.size(); i++) + if (!new_endpoint_was_used[i]) + total_unused_new_entries++; + debug_printf("basisu_backend::encode_endpoint_palette: total_unused_new_entries: %u out of %u\n", total_unused_new_entries, new_endpoint_was_used.size()); + + bool is_grayscale = true; + for (uint32_t old_endpoint_index = 0; old_endpoint_index < (uint32_t)m_endpoint_palette.size(); old_endpoint_index++) + { + int r5 = m_endpoint_palette[old_endpoint_index].m_color5[0]; + int g5 = m_endpoint_palette[old_endpoint_index].m_color5[1]; + int b5 = m_endpoint_palette[old_endpoint_index].m_color5[2]; + if ((r5 != g5) || (r5 != b5)) + { + is_grayscale = false; + break; + } + } + + histogram color5_delta_hist0(32); // prev 0-9, delta is -9 to 31 + histogram color5_delta_hist1(32); // prev 10-21, delta is -21 to 21 + histogram color5_delta_hist2(32); // prev 22-31, delta is -31 to 9 + histogram inten_delta_hist(8); + + color_rgba prev_color5(16, 16, 16, 0); + uint32_t prev_inten = 0; + + for (uint32_t new_endpoint_index = 0; new_endpoint_index < r.get_total_endpoint_clusters(); new_endpoint_index++) + { + const uint32_t old_endpoint_index = endpoint_remap_table_new_to_old[new_endpoint_index]; + + int delta_inten = m_endpoint_palette[old_endpoint_index].m_inten5 - prev_inten; + inten_delta_hist.inc(delta_inten & 7); + prev_inten = m_endpoint_palette[old_endpoint_index].m_inten5; + + for (uint32_t i = 0; i < (is_grayscale ? 1U : 3U); i++) + { + const int delta = (m_endpoint_palette[old_endpoint_index].m_color5[i] - prev_color5[i]) & 31; + + if (prev_color5[i] <= basist::COLOR5_PAL0_PREV_HI) + color5_delta_hist0.inc(delta); + else if (prev_color5[i] <= basist::COLOR5_PAL1_PREV_HI) + color5_delta_hist1.inc(delta); + else + color5_delta_hist2.inc(delta); + + prev_color5[i] = m_endpoint_palette[old_endpoint_index].m_color5[i]; + } + } + + if (!color5_delta_hist0.get_total()) color5_delta_hist0.inc(0); + if (!color5_delta_hist1.get_total()) color5_delta_hist1.inc(0); + if (!color5_delta_hist2.get_total()) color5_delta_hist2.inc(0); + + huffman_encoding_table color5_delta_model0, color5_delta_model1, color5_delta_model2, inten_delta_model; + if (!color5_delta_model0.init(color5_delta_hist0, 16)) + { + error_printf("color5_delta_model.init() failed!"); + return false; + } + + if (!color5_delta_model1.init(color5_delta_hist1, 16)) + { + error_printf("color5_delta_model.init() failed!"); + return false; + } + + if (!color5_delta_model2.init(color5_delta_hist2, 16)) + { + error_printf("color5_delta_model.init() failed!"); + return false; + } + + if (!inten_delta_model.init(inten_delta_hist, 16)) + { + error_printf("inten3_model.init() failed!"); + return false; + } + + bitwise_coder coder; + + coder.init(8192); + + coder.emit_huffman_table(color5_delta_model0); + coder.emit_huffman_table(color5_delta_model1); + coder.emit_huffman_table(color5_delta_model2); + coder.emit_huffman_table(inten_delta_model); + + coder.put_bits(is_grayscale, 1); + + prev_color5.set(16, 16, 16, 0); + prev_inten = 0; + + for (uint32_t new_endpoint_index = 0; new_endpoint_index < r.get_total_endpoint_clusters(); new_endpoint_index++) + { + const uint32_t old_endpoint_index = endpoint_remap_table_new_to_old[new_endpoint_index]; + + int delta_inten = (m_endpoint_palette[old_endpoint_index].m_inten5 - prev_inten) & 7; + coder.put_code(delta_inten, inten_delta_model); + prev_inten = m_endpoint_palette[old_endpoint_index].m_inten5; + + for (uint32_t i = 0; i < (is_grayscale ? 1U : 3U); i++) + { + const int delta = (m_endpoint_palette[old_endpoint_index].m_color5[i] - prev_color5[i]) & 31; + + if (prev_color5[i] <= basist::COLOR5_PAL0_PREV_HI) + coder.put_code(delta, color5_delta_model0); + else if (prev_color5[i] <= basist::COLOR5_PAL1_PREV_HI) + coder.put_code(delta, color5_delta_model1); + else + coder.put_code(delta, color5_delta_model2); + + prev_color5[i] = m_endpoint_palette[old_endpoint_index].m_color5[i]; + } + + } // q + + coder.flush(); + + m_output.m_endpoint_palette = coder.get_bytes(); + + debug_printf("Endpoint codebook size: %u bits %u bytes, Bits per entry: %3.1f, Avg bits/texel: %3.3f\n", + 8 * (int)m_output.m_endpoint_palette.size(), (int)m_output.m_endpoint_palette.size(), m_output.m_endpoint_palette.size() * 8.0f / r.get_total_endpoint_clusters(), m_output.m_endpoint_palette.size() * 8.0f / get_total_input_texels()); + + return true; + } + + bool basisu_backend::encode_selector_palette() + { + const basisu_frontend& r = *m_pFront_end; + + histogram delta_selector_pal_histogram(256); + + for (uint32_t q = 0; q < r.get_total_selector_clusters(); q++) + { + if (!q) + continue; + + const etc1_selector_palette_entry& cur = m_selector_palette[m_selector_remap_table_new_to_old[q]]; + const etc1_selector_palette_entry predictor(m_selector_palette[m_selector_remap_table_new_to_old[q - 1]]); + + for (uint32_t j = 0; j < 4; j++) + delta_selector_pal_histogram.inc(cur.get_byte(j) ^ predictor.get_byte(j)); + } + + if (!delta_selector_pal_histogram.get_total()) + delta_selector_pal_histogram.inc(0); + + huffman_encoding_table delta_selector_pal_model; + if (!delta_selector_pal_model.init(delta_selector_pal_histogram, 16)) + { + error_printf("delta_selector_pal_model.init() failed!"); + return false; + } + + bitwise_coder coder; + coder.init(1024 * 1024); + + coder.put_bits(0, 1); // use global codebook + coder.put_bits(0, 1); // uses hybrid codebooks + + coder.put_bits(0, 1); // raw bytes + + coder.emit_huffman_table(delta_selector_pal_model); + + for (uint32_t q = 0; q < r.get_total_selector_clusters(); q++) + { + if (!q) + { + for (uint32_t j = 0; j < 4; j++) + coder.put_bits(m_selector_palette[m_selector_remap_table_new_to_old[q]].get_byte(j), 8); + continue; + } + + const etc1_selector_palette_entry& cur = m_selector_palette[m_selector_remap_table_new_to_old[q]]; + const etc1_selector_palette_entry predictor(m_selector_palette[m_selector_remap_table_new_to_old[q - 1]]); + + for (uint32_t j = 0; j < 4; j++) + coder.put_code(cur.get_byte(j) ^ predictor.get_byte(j), delta_selector_pal_model); + } + + coder.flush(); + + m_output.m_selector_palette = coder.get_bytes(); + + if (m_output.m_selector_palette.size() >= r.get_total_selector_clusters() * 4) + { + coder.init(1024 * 1024); + + coder.put_bits(0, 1); // use global codebook + coder.put_bits(0, 1); // uses hybrid codebooks + + coder.put_bits(1, 1); // raw bytes + + for (uint32_t q = 0; q < r.get_total_selector_clusters(); q++) + { + const uint32_t i = m_selector_remap_table_new_to_old[q]; + + for (uint32_t j = 0; j < 4; j++) + coder.put_bits(m_selector_palette[i].get_byte(j), 8); + } + + coder.flush(); + + m_output.m_selector_palette = coder.get_bytes(); + } + + debug_printf("Selector codebook bits: %u bytes: %u, Bits per entry: %3.1f, Avg bits/texel: %3.3f\n", + (int)m_output.m_selector_palette.size() * 8, (int)m_output.m_selector_palette.size(), + m_output.m_selector_palette.size() * 8.0f / r.get_total_selector_clusters(), m_output.m_selector_palette.size() * 8.0f / get_total_input_texels()); + + return true; + } + + uint32_t basisu_backend::encode() + { + //const bool is_video = m_pFront_end->get_params().m_tex_type == basist::cBASISTexTypeVideoFrames; + m_output.m_slice_desc = m_slices; + m_output.m_etc1s = m_params.m_etc1s; + m_output.m_uses_global_codebooks = m_params.m_used_global_codebooks; + m_output.m_srgb = m_pFront_end->get_params().m_perceptual; + + create_endpoint_palette(); + create_selector_palette(); + + create_encoder_blocks(); + + if (!encode_image()) + return 0; + + if (!encode_endpoint_palette()) + return 0; + + if (!encode_selector_palette()) + return 0; + + uint32_t total_compressed_bytes = (uint32_t)(m_output.m_slice_image_tables.size() + m_output.m_endpoint_palette.size() + m_output.m_selector_palette.size()); + for (uint32_t i = 0; i < m_output.m_slice_image_data.size(); i++) + total_compressed_bytes += (uint32_t)m_output.m_slice_image_data[i].size(); + + debug_printf("Wrote %u bytes, %3.3f bits/texel\n", total_compressed_bytes, total_compressed_bytes * 8.0f / get_total_input_texels()); + + return total_compressed_bytes; + } + +} // namespace basisu diff --git a/thirdparty/basisu/encoder/basisu_backend.h b/thirdparty/basisu/encoder/basisu_backend.h new file mode 100644 index 000000000..58a9a8aa0 --- /dev/null +++ b/thirdparty/basisu/encoder/basisu_backend.h @@ -0,0 +1,409 @@ +// basisu_backend.h +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include "../transcoder/basisu.h" +#include "basisu_enc.h" +#include "../transcoder/basisu_transcoder_internal.h" +#include "basisu_frontend.h" + +namespace basisu +{ + struct etc1_selector_palette_entry + { + etc1_selector_palette_entry() + { + clear(); + } + + void clear() + { + basisu::clear_obj(*this); + } + + uint8_t operator[] (uint32_t i) const { assert(i < 16); return m_selectors[i]; } + uint8_t& operator[] (uint32_t i) { assert(i < 16); return m_selectors[i]; } + + void set_uint32(uint32_t v) + { + for (uint32_t byte_index = 0; byte_index < 4; byte_index++) + { + uint32_t b = (v >> (byte_index * 8)) & 0xFF; + + m_selectors[byte_index * 4 + 0] = b & 3; + m_selectors[byte_index * 4 + 1] = (b >> 2) & 3; + m_selectors[byte_index * 4 + 2] = (b >> 4) & 3; + m_selectors[byte_index * 4 + 3] = (b >> 6) & 3; + } + } + + uint32_t get_uint32() const + { + return get_byte(0) | (get_byte(1) << 8) | (get_byte(2) << 16) | (get_byte(3) << 24); + } + + uint32_t get_byte(uint32_t byte_index) const + { + assert(byte_index < 4); + + return m_selectors[byte_index * 4 + 0] | + (m_selectors[byte_index * 4 + 1] << 2) | + (m_selectors[byte_index * 4 + 2] << 4) | + (m_selectors[byte_index * 4 + 3] << 6); + } + + uint8_t operator()(uint32_t x, uint32_t y) const { assert((x < 4) && (y < 4)); return m_selectors[x + y * 4]; } + uint8_t& operator()(uint32_t x, uint32_t y) { assert((x < 4) && (y < 4)); return m_selectors[x + y * 4]; } + + bool operator< (const etc1_selector_palette_entry& other) const + { + for (uint32_t i = 0; i < 16; i++) + { + if (m_selectors[i] < other.m_selectors[i]) + return true; + else if (m_selectors[i] != other.m_selectors[i]) + return false; + } + + return false; + } + + bool operator== (const etc1_selector_palette_entry& other) const + { + for (uint32_t i = 0; i < 16; i++) + { + if (m_selectors[i] != other.m_selectors[i]) + return false; + } + + return true; + } + + private: + uint8_t m_selectors[16]; + }; + + typedef basisu::vector etc1_selector_palette_entry_vec; + + struct encoder_block + { + encoder_block() + { + clear(); + } + + uint32_t m_endpoint_predictor; + + int m_endpoint_index; + int m_selector_index; + + int m_selector_history_buf_index; + + bool m_is_cr_target; + void clear() + { + m_endpoint_predictor = 0; + + m_endpoint_index = 0; + m_selector_index = 0; + + m_selector_history_buf_index = 0; + m_is_cr_target = false; + } + }; + + typedef basisu::vector encoder_block_vec; + typedef vector2D encoder_block_vec2D; + + struct etc1_endpoint_palette_entry + { + etc1_endpoint_palette_entry() + { + clear(); + } + + color_rgba m_color5; + uint32_t m_inten5; + bool m_color5_valid; + + void clear() + { + clear_obj(*this); + } + }; + + typedef basisu::vector etc1_endpoint_palette_entry_vec; + + struct basisu_backend_params + { + bool m_etc1s; + bool m_debug, m_debug_images; + float m_endpoint_rdo_quality_thresh; + float m_selector_rdo_quality_thresh; + uint32_t m_compression_level; + + bool m_used_global_codebooks; + + bool m_validate; + + basisu_backend_params() + { + clear(); + } + + void clear() + { + m_etc1s = false; + m_debug = false; + m_debug_images = false; + m_endpoint_rdo_quality_thresh = 0.0f; + m_selector_rdo_quality_thresh = 0.0f; + m_compression_level = 0; + m_used_global_codebooks = false; + m_validate = true; + } + }; + + struct basisu_backend_slice_desc + { + basisu_backend_slice_desc() + { + clear(); + } + + void clear() + { + clear_obj(*this); + } + + uint32_t m_first_block_index; + + uint32_t m_orig_width; + uint32_t m_orig_height; + + uint32_t m_width; + uint32_t m_height; + + uint32_t m_num_blocks_x; + uint32_t m_num_blocks_y; + + uint32_t m_num_macroblocks_x; + uint32_t m_num_macroblocks_y; + + uint32_t m_source_file_index; // also the basis image index + uint32_t m_mip_index; + bool m_alpha; + bool m_iframe; + }; + + typedef basisu::vector basisu_backend_slice_desc_vec; + + struct basisu_backend_output + { + basist::basis_tex_format m_tex_format; + + bool m_etc1s; + bool m_uses_global_codebooks; + bool m_srgb; + + uint32_t m_num_endpoints; + uint32_t m_num_selectors; + + uint8_vec m_endpoint_palette; + uint8_vec m_selector_palette; + + basisu_backend_slice_desc_vec m_slice_desc; + + uint8_vec m_slice_image_tables; + basisu::vector m_slice_image_data; + uint16_vec m_slice_image_crcs; + + basisu_backend_output() + { + clear(); + } + + void clear() + { + m_tex_format = basist::basis_tex_format::cETC1S; + m_etc1s = false; + m_uses_global_codebooks = false; + m_srgb = true; + + m_num_endpoints = 0; + m_num_selectors = 0; + + m_endpoint_palette.clear(); + m_selector_palette.clear(); + m_slice_desc.clear(); + m_slice_image_tables.clear(); + m_slice_image_data.clear(); + m_slice_image_crcs.clear(); + } + + uint32_t get_output_size_estimate() const + { + uint32_t total_compressed_bytes = (uint32_t)(m_slice_image_tables.size() + m_endpoint_palette.size() + m_selector_palette.size()); + for (uint32_t i = 0; i < m_slice_image_data.size(); i++) + total_compressed_bytes += (uint32_t)m_slice_image_data[i].size(); + + return total_compressed_bytes; + } + }; + + class basisu_backend + { + BASISU_NO_EQUALS_OR_COPY_CONSTRUCT(basisu_backend); + + public: + + basisu_backend(); + + void clear(); + + void init(basisu_frontend *pFront_end, basisu_backend_params ¶ms, const basisu_backend_slice_desc_vec &slice_desc); + + uint32_t encode(); + + const basisu_backend_output &get_output() const { return m_output; } + const basisu_backend_params& get_params() const { return m_params; } + + private: + basisu_frontend *m_pFront_end; + basisu_backend_params m_params; + basisu_backend_slice_desc_vec m_slices; + basisu_backend_output m_output; + + etc1_endpoint_palette_entry_vec m_endpoint_palette; + etc1_selector_palette_entry_vec m_selector_palette; + + struct etc1_global_selector_cb_entry_desc + { + uint32_t m_pal_index; + uint32_t m_mod_index; + bool m_was_used; + }; + + typedef basisu::vector etc1_global_selector_cb_entry_desc_vec; + + etc1_global_selector_cb_entry_desc_vec m_global_selector_palette_desc; + + basisu::vector m_slice_encoder_blocks; + + // Maps OLD to NEW endpoint/selector indices + uint_vec m_endpoint_remap_table_old_to_new; + uint_vec m_endpoint_remap_table_new_to_old; + bool_vec m_old_endpoint_was_used; + bool_vec m_new_endpoint_was_used; + + uint_vec m_selector_remap_table_old_to_new; + + // Maps NEW to OLD endpoint/selector indices + uint_vec m_selector_remap_table_new_to_old; + + uint32_t get_total_slices() const + { + return (uint32_t)m_slices.size(); + } + + uint32_t get_total_slice_blocks() const + { + return m_pFront_end->get_total_output_blocks(); + } + + uint32_t get_block_index(uint32_t slice_index, uint32_t block_x, uint32_t block_y) const + { + const basisu_backend_slice_desc &slice = m_slices[slice_index]; + + assert((block_x < slice.m_num_blocks_x) && (block_y < slice.m_num_blocks_y)); + + return slice.m_first_block_index + block_y * slice.m_num_blocks_x + block_x; + } + + uint32_t get_total_blocks(uint32_t slice_index) const + { + return m_slices[slice_index].m_num_blocks_x * m_slices[slice_index].m_num_blocks_y; + } + + uint32_t get_total_blocks() const + { + uint32_t total_blocks = 0; + for (uint32_t i = 0; i < m_slices.size(); i++) + total_blocks += get_total_blocks(i); + return total_blocks; + } + + // Returns the total number of input texels, not counting padding up to blocks/macroblocks. + uint32_t get_total_input_texels(uint32_t slice_index) const + { + return m_slices[slice_index].m_orig_width * m_slices[slice_index].m_orig_height; + } + + uint32_t get_total_input_texels() const + { + uint32_t total_texels = 0; + for (uint32_t i = 0; i < m_slices.size(); i++) + total_texels += get_total_input_texels(i); + return total_texels; + } + + int find_slice(uint32_t block_index, uint32_t *pBlock_x, uint32_t *pBlock_y) const + { + for (uint32_t i = 0; i < m_slices.size(); i++) + { + if ((block_index >= m_slices[i].m_first_block_index) && (block_index < (m_slices[i].m_first_block_index + m_slices[i].m_num_blocks_x * m_slices[i].m_num_blocks_y))) + { + const uint32_t ofs = block_index - m_slices[i].m_first_block_index; + const uint32_t x = ofs % m_slices[i].m_num_blocks_x; + const uint32_t y = ofs / m_slices[i].m_num_blocks_x; + + if (pBlock_x) *pBlock_x = x; + if (pBlock_y) *pBlock_y = y; + + return i; + } + } + return -1; + } + + void create_endpoint_palette(); + + void create_selector_palette(); + + // endpoint palette + // 5:5:5 and predicted 4:4:4 colors, 1 or 2 3-bit intensity table indices + // selector palette + // 4x4 2-bit selectors + + // per-macroblock: + // 4 diff bits + // 4 flip bits + // Endpoint template index, 1-8 endpoint indices + // Alternately, if no template applies, we can send 4 ETC1S bits followed by 4-8 endpoint indices + // 4 selector indices + + void reoptimize_and_sort_endpoints_codebook(uint32_t total_block_endpoints_remapped, uint_vec &all_endpoint_indices); + void sort_selector_codebook(); + void create_encoder_blocks(); + void compute_slice_crcs(); + bool encode_image(); + bool encode_endpoint_palette(); + bool encode_selector_palette(); + int find_video_frame(int slice_index, int delta); + void check_for_valid_cr_blocks(); + }; + +} // namespace basisu + diff --git a/thirdparty/basisu/encoder/basisu_basis_file.cpp b/thirdparty/basisu/encoder/basisu_basis_file.cpp new file mode 100644 index 000000000..77f467f67 --- /dev/null +++ b/thirdparty/basisu/encoder/basisu_basis_file.cpp @@ -0,0 +1,269 @@ +// basisu_basis_file.cpp +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "basisu_basis_file.h" +#include "../transcoder/basisu_transcoder.h" + +// The output file version. Keep in sync with BASISD_SUPPORTED_BASIS_VERSION. +#define BASIS_FILE_VERSION (0x13) + +namespace basisu +{ + void basisu_file::create_header(const basisu_backend_output &encoder_output, basist::basis_texture_type tex_type, uint32_t userdata0, uint32_t userdata1, bool y_flipped, uint32_t us_per_frame) + { + m_header.m_header_size = sizeof(basist::basis_file_header); + + m_header.m_data_size = m_total_file_size - sizeof(basist::basis_file_header); + + m_header.m_total_slices = (uint32_t)encoder_output.m_slice_desc.size(); + + m_header.m_total_images = 0; + for (uint32_t i = 0; i < encoder_output.m_slice_desc.size(); i++) + m_header.m_total_images = maximum(m_header.m_total_images, encoder_output.m_slice_desc[i].m_source_file_index + 1); + + m_header.m_tex_format = (int)encoder_output.m_tex_format; + m_header.m_flags = 0; + + if (encoder_output.m_etc1s) + { + assert(encoder_output.m_tex_format == basist::basis_tex_format::cETC1S); + m_header.m_flags = m_header.m_flags | basist::cBASISHeaderFlagETC1S; + } + else + { + assert(encoder_output.m_tex_format != basist::basis_tex_format::cETC1S); + } + + if (y_flipped) + m_header.m_flags = m_header.m_flags | basist::cBASISHeaderFlagYFlipped; + if (encoder_output.m_uses_global_codebooks) + m_header.m_flags = m_header.m_flags | basist::cBASISHeaderFlagUsesGlobalCodebook; + if (encoder_output.m_srgb) + m_header.m_flags = m_header.m_flags | basist::cBASISHeaderFlagSRGB; + + for (uint32_t i = 0; i < encoder_output.m_slice_desc.size(); i++) + { + if (encoder_output.m_slice_desc[i].m_alpha) + { + m_header.m_flags = m_header.m_flags | basist::cBASISHeaderFlagHasAlphaSlices; + break; + } + } + + m_header.m_tex_type = static_cast(tex_type); + m_header.m_us_per_frame = clamp(us_per_frame, 0, basist::cBASISMaxUSPerFrame); + + m_header.m_userdata0 = userdata0; + m_header.m_userdata1 = userdata1; + + m_header.m_total_endpoints = encoder_output.m_num_endpoints; + if (!encoder_output.m_uses_global_codebooks) + { + m_header.m_endpoint_cb_file_ofs = m_endpoint_cb_file_ofs; + m_header.m_endpoint_cb_file_size = (uint32_t)encoder_output.m_endpoint_palette.size(); + } + else + { + assert(!m_endpoint_cb_file_ofs); + } + + m_header.m_total_selectors = encoder_output.m_num_selectors; + if (!encoder_output.m_uses_global_codebooks) + { + m_header.m_selector_cb_file_ofs = m_selector_cb_file_ofs; + m_header.m_selector_cb_file_size = (uint32_t)encoder_output.m_selector_palette.size(); + } + else + { + assert(!m_selector_cb_file_ofs); + } + + m_header.m_tables_file_ofs = m_tables_file_ofs; + m_header.m_tables_file_size = (uint32_t)encoder_output.m_slice_image_tables.size(); + + m_header.m_slice_desc_file_ofs = m_slice_descs_file_ofs; + } + + bool basisu_file::create_image_descs(const basisu_backend_output &encoder_output) + { + const basisu_backend_slice_desc_vec &slice_descs = encoder_output.m_slice_desc; + + m_images_descs.resize(slice_descs.size()); + + uint64_t cur_slice_file_ofs = m_first_image_file_ofs; + for (uint32_t i = 0; i < slice_descs.size(); i++) + { + clear_obj(m_images_descs[i]); + + m_images_descs[i].m_image_index = slice_descs[i].m_source_file_index; + m_images_descs[i].m_level_index = slice_descs[i].m_mip_index; + + if (slice_descs[i].m_alpha) + m_images_descs[i].m_flags = m_images_descs[i].m_flags | basist::cSliceDescFlagsHasAlpha; + if (slice_descs[i].m_iframe) + m_images_descs[i].m_flags = m_images_descs[i].m_flags | basist::cSliceDescFlagsFrameIsIFrame; + + m_images_descs[i].m_orig_width = slice_descs[i].m_orig_width; + m_images_descs[i].m_orig_height = slice_descs[i].m_orig_height; + m_images_descs[i].m_num_blocks_x = slice_descs[i].m_num_blocks_x; + m_images_descs[i].m_num_blocks_y = slice_descs[i].m_num_blocks_y; + m_images_descs[i].m_slice_data_crc16 = encoder_output.m_slice_image_crcs[i]; + + if (encoder_output.m_slice_image_data[i].size() > UINT32_MAX) + { + error_printf("basisu_file::create_image_descs: Basis file too large\n"); + return false; + } + + const uint32_t image_size = (uint32_t)encoder_output.m_slice_image_data[i].size(); + + m_images_descs[i].m_file_ofs = (uint32_t)cur_slice_file_ofs; + m_images_descs[i].m_file_size = image_size; + + cur_slice_file_ofs += image_size; + if (cur_slice_file_ofs > UINT32_MAX) + { + error_printf("basisu_file::create_image_descs: Basis file too large\n"); + return false; + } + } + + assert(cur_slice_file_ofs == m_total_file_size); + return true; + } + + void basisu_file::create_comp_data(const basisu_backend_output &encoder_output) + { + const basisu_backend_slice_desc_vec &slice_descs = encoder_output.m_slice_desc; + + append_vector(m_comp_data, reinterpret_cast(&m_header), sizeof(m_header)); + + assert(m_comp_data.size() == m_slice_descs_file_ofs); + append_vector(m_comp_data, reinterpret_cast(&m_images_descs[0]), m_images_descs.size() * sizeof(m_images_descs[0])); + + if (!encoder_output.m_uses_global_codebooks) + { + if (encoder_output.m_endpoint_palette.size()) + { + assert(m_comp_data.size() == m_endpoint_cb_file_ofs); + append_vector(m_comp_data, reinterpret_cast(&encoder_output.m_endpoint_palette[0]), encoder_output.m_endpoint_palette.size()); + } + + if (encoder_output.m_selector_palette.size()) + { + assert(m_comp_data.size() == m_selector_cb_file_ofs); + append_vector(m_comp_data, reinterpret_cast(&encoder_output.m_selector_palette[0]), encoder_output.m_selector_palette.size()); + } + } + + if (encoder_output.m_slice_image_tables.size()) + { + assert(m_comp_data.size() == m_tables_file_ofs); + append_vector(m_comp_data, reinterpret_cast(&encoder_output.m_slice_image_tables[0]), encoder_output.m_slice_image_tables.size()); + } + + assert(m_comp_data.size() == m_first_image_file_ofs); + for (uint32_t i = 0; i < slice_descs.size(); i++) + append_vector(m_comp_data, &encoder_output.m_slice_image_data[i][0], encoder_output.m_slice_image_data[i].size()); + + assert(m_comp_data.size() == m_total_file_size); + } + + void basisu_file::fixup_crcs() + { + basist::basis_file_header *pHeader = reinterpret_cast(&m_comp_data[0]); + + pHeader->m_data_size = m_total_file_size - sizeof(basist::basis_file_header); + pHeader->m_data_crc16 = basist::crc16(&m_comp_data[0] + sizeof(basist::basis_file_header), m_total_file_size - sizeof(basist::basis_file_header), 0); + + pHeader->m_header_crc16 = basist::crc16(&pHeader->m_data_size, sizeof(basist::basis_file_header) - BASISU_OFFSETOF(basist::basis_file_header, m_data_size), 0); + + pHeader->m_sig = basist::basis_file_header::cBASISSigValue; + pHeader->m_ver = BASIS_FILE_VERSION;// basist::basis_file_header::cBASISFirstVersion; + } + + bool basisu_file::init(const basisu_backend_output &encoder_output, basist::basis_texture_type tex_type, uint32_t userdata0, uint32_t userdata1, bool y_flipped, uint32_t us_per_frame) + { + clear(); + + const basisu_backend_slice_desc_vec &slice_descs = encoder_output.m_slice_desc; + + // The Basis file uses 32-bit fields for lots of stuff, so make sure it's not too large. + uint64_t check_size = 0; + if (!encoder_output.m_uses_global_codebooks) + { + check_size = (uint64_t)sizeof(basist::basis_file_header) + (uint64_t)sizeof(basist::basis_slice_desc) * slice_descs.size() + + (uint64_t)encoder_output.m_endpoint_palette.size() + (uint64_t)encoder_output.m_selector_palette.size() + (uint64_t)encoder_output.m_slice_image_tables.size(); + } + else + { + check_size = (uint64_t)sizeof(basist::basis_file_header) + (uint64_t)sizeof(basist::basis_slice_desc) * slice_descs.size() + + (uint64_t)encoder_output.m_slice_image_tables.size(); + } + if (check_size >= 0xFFFF0000ULL) + { + error_printf("basisu_file::init: File is too large!\n"); + return false; + } + + m_header_file_ofs = 0; + m_slice_descs_file_ofs = sizeof(basist::basis_file_header); + if (encoder_output.m_tex_format == basist::basis_tex_format::cETC1S) + { + if (encoder_output.m_uses_global_codebooks) + { + m_endpoint_cb_file_ofs = 0; + m_selector_cb_file_ofs = 0; + m_tables_file_ofs = m_slice_descs_file_ofs + sizeof(basist::basis_slice_desc) * (uint32_t)slice_descs.size(); + } + else + { + m_endpoint_cb_file_ofs = m_slice_descs_file_ofs + sizeof(basist::basis_slice_desc) * (uint32_t)slice_descs.size(); + m_selector_cb_file_ofs = m_endpoint_cb_file_ofs + (uint32_t)encoder_output.m_endpoint_palette.size(); + m_tables_file_ofs = m_selector_cb_file_ofs + (uint32_t)encoder_output.m_selector_palette.size(); + } + m_first_image_file_ofs = m_tables_file_ofs + (uint32_t)encoder_output.m_slice_image_tables.size(); + } + else + { + m_endpoint_cb_file_ofs = 0; + m_selector_cb_file_ofs = 0; + m_tables_file_ofs = 0; + m_first_image_file_ofs = m_slice_descs_file_ofs + sizeof(basist::basis_slice_desc) * (uint32_t)slice_descs.size(); + } + + uint64_t total_file_size = m_first_image_file_ofs; + for (uint32_t i = 0; i < encoder_output.m_slice_image_data.size(); i++) + total_file_size += encoder_output.m_slice_image_data[i].size(); + if (total_file_size >= 0xFFFF0000ULL) + { + error_printf("basisu_file::init: File is too large!\n"); + return false; + } + + m_total_file_size = (uint32_t)total_file_size; + + create_header(encoder_output, tex_type, userdata0, userdata1, y_flipped, us_per_frame); + + if (!create_image_descs(encoder_output)) + return false; + + create_comp_data(encoder_output); + + fixup_crcs(); + + return true; + } + +} // namespace basisu diff --git a/thirdparty/basisu/encoder/basisu_basis_file.h b/thirdparty/basisu/encoder/basisu_basis_file.h new file mode 100644 index 000000000..57448bccb --- /dev/null +++ b/thirdparty/basisu/encoder/basisu_basis_file.h @@ -0,0 +1,70 @@ +// basisu_basis_file.h +// Copyright (C) 2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "../transcoder/basisu_file_headers.h" +#include "basisu_backend.h" + +namespace basisu +{ + class basisu_file + { + BASISU_NO_EQUALS_OR_COPY_CONSTRUCT(basisu_file); + + public: + basisu_file() + { + } + + void clear() + { + m_comp_data.clear(); + + clear_obj(m_header); + m_images_descs.clear(); + + m_header_file_ofs = 0; + m_slice_descs_file_ofs = 0; + m_endpoint_cb_file_ofs = 0; + m_selector_cb_file_ofs = 0; + m_tables_file_ofs = 0; + m_first_image_file_ofs = 0; + m_total_file_size = 0; + } + + bool init(const basisu_backend_output& encoder_output, basist::basis_texture_type tex_type, uint32_t userdata0, uint32_t userdata1, bool y_flipped, uint32_t us_per_frame); + + const uint8_vec &get_compressed_data() const { return m_comp_data; } + + private: + basist::basis_file_header m_header; + basisu::vector m_images_descs; + + uint8_vec m_comp_data; + + uint32_t m_header_file_ofs; + uint32_t m_slice_descs_file_ofs; + uint32_t m_endpoint_cb_file_ofs; + uint32_t m_selector_cb_file_ofs; + uint32_t m_tables_file_ofs; + uint32_t m_first_image_file_ofs; + uint32_t m_total_file_size; + + void create_header(const basisu_backend_output& encoder_output, basist::basis_texture_type tex_type, uint32_t userdata0, uint32_t userdata1, bool y_flipped, uint32_t us_per_frame); + bool create_image_descs(const basisu_backend_output& encoder_output); + void create_comp_data(const basisu_backend_output& encoder_output); + void fixup_crcs(); + }; + +} // namespace basisu diff --git a/thirdparty/basisu/encoder/basisu_bc7enc.cpp b/thirdparty/basisu/encoder/basisu_bc7enc.cpp new file mode 100644 index 000000000..914e7fbbb --- /dev/null +++ b/thirdparty/basisu/encoder/basisu_bc7enc.cpp @@ -0,0 +1,1986 @@ +// File: basisu_bc7enc.cpp +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "basisu_bc7enc.h" + +#ifdef _DEBUG +#define BC7ENC_CHECK_OVERALL_ERROR 1 +#else +#define BC7ENC_CHECK_OVERALL_ERROR 0 +#endif + +using namespace basist; + +namespace basisu +{ + +// Helpers +static inline color_quad_u8 *color_quad_u8_set_clamped(color_quad_u8 *pRes, int32_t r, int32_t g, int32_t b, int32_t a) { pRes->m_c[0] = (uint8_t)clampi(r, 0, 255); pRes->m_c[1] = (uint8_t)clampi(g, 0, 255); pRes->m_c[2] = (uint8_t)clampi(b, 0, 255); pRes->m_c[3] = (uint8_t)clampi(a, 0, 255); return pRes; } +static inline color_quad_u8 *color_quad_u8_set(color_quad_u8 *pRes, int32_t r, int32_t g, int32_t b, int32_t a) { assert((uint32_t)(r | g | b | a) <= 255); pRes->m_c[0] = (uint8_t)r; pRes->m_c[1] = (uint8_t)g; pRes->m_c[2] = (uint8_t)b; pRes->m_c[3] = (uint8_t)a; return pRes; } +static inline bc7enc_bool color_quad_u8_notequals(const color_quad_u8 *pLHS, const color_quad_u8 *pRHS) { return (pLHS->m_c[0] != pRHS->m_c[0]) || (pLHS->m_c[1] != pRHS->m_c[1]) || (pLHS->m_c[2] != pRHS->m_c[2]) || (pLHS->m_c[3] != pRHS->m_c[3]); } +static inline bc7enc_vec4F*vec4F_set_scalar(bc7enc_vec4F*pV, float x) { pV->m_c[0] = x; pV->m_c[1] = x; pV->m_c[2] = x; pV->m_c[3] = x; return pV; } +static inline bc7enc_vec4F*vec4F_set(bc7enc_vec4F*pV, float x, float y, float z, float w) { pV->m_c[0] = x; pV->m_c[1] = y; pV->m_c[2] = z; pV->m_c[3] = w; return pV; } +static inline bc7enc_vec4F*vec4F_saturate_in_place(bc7enc_vec4F*pV) { pV->m_c[0] = saturate(pV->m_c[0]); pV->m_c[1] = saturate(pV->m_c[1]); pV->m_c[2] = saturate(pV->m_c[2]); pV->m_c[3] = saturate(pV->m_c[3]); return pV; } +static inline bc7enc_vec4F vec4F_saturate(const bc7enc_vec4F*pV) { bc7enc_vec4F res; res.m_c[0] = saturate(pV->m_c[0]); res.m_c[1] = saturate(pV->m_c[1]); res.m_c[2] = saturate(pV->m_c[2]); res.m_c[3] = saturate(pV->m_c[3]); return res; } +static inline bc7enc_vec4F vec4F_from_color(const color_quad_u8 *pC) { bc7enc_vec4F res; vec4F_set(&res, pC->m_c[0], pC->m_c[1], pC->m_c[2], pC->m_c[3]); return res; } +static inline bc7enc_vec4F vec4F_add(const bc7enc_vec4F*pLHS, const bc7enc_vec4F*pRHS) { bc7enc_vec4F res; vec4F_set(&res, pLHS->m_c[0] + pRHS->m_c[0], pLHS->m_c[1] + pRHS->m_c[1], pLHS->m_c[2] + pRHS->m_c[2], pLHS->m_c[3] + pRHS->m_c[3]); return res; } +static inline bc7enc_vec4F vec4F_sub(const bc7enc_vec4F*pLHS, const bc7enc_vec4F*pRHS) { bc7enc_vec4F res; vec4F_set(&res, pLHS->m_c[0] - pRHS->m_c[0], pLHS->m_c[1] - pRHS->m_c[1], pLHS->m_c[2] - pRHS->m_c[2], pLHS->m_c[3] - pRHS->m_c[3]); return res; } +static inline float vec4F_dot(const bc7enc_vec4F*pLHS, const bc7enc_vec4F*pRHS) { return pLHS->m_c[0] * pRHS->m_c[0] + pLHS->m_c[1] * pRHS->m_c[1] + pLHS->m_c[2] * pRHS->m_c[2] + pLHS->m_c[3] * pRHS->m_c[3]; } +static inline bc7enc_vec4F vec4F_mul(const bc7enc_vec4F*pLHS, float s) { bc7enc_vec4F res; vec4F_set(&res, pLHS->m_c[0] * s, pLHS->m_c[1] * s, pLHS->m_c[2] * s, pLHS->m_c[3] * s); return res; } +static inline bc7enc_vec4F* vec4F_normalize_in_place(bc7enc_vec4F*pV) { float s = pV->m_c[0] * pV->m_c[0] + pV->m_c[1] * pV->m_c[1] + pV->m_c[2] * pV->m_c[2] + pV->m_c[3] * pV->m_c[3]; if (s != 0.0f) { s = 1.0f / sqrtf(s); pV->m_c[0] *= s; pV->m_c[1] *= s; pV->m_c[2] *= s; pV->m_c[3] *= s; } return pV; } + +// Precomputed weight constants used during least fit determination. For each entry in g_bc7_weights[]: w * w, (1.0f - w) * w, (1.0f - w) * (1.0f - w), w +const float g_bc7_weights1x[2 * 4] = { 0.000000f, 0.000000f, 1.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 1.000000f }; + +const float g_bc7_weights2x[4 * 4] = { 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.107666f, 0.220459f, 0.451416f, 0.328125f, 0.451416f, 0.220459f, 0.107666f, 0.671875f, 1.000000f, 0.000000f, 0.000000f, 1.000000f }; + +const float g_bc7_weights3x[8 * 4] = { 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.019775f, 0.120850f, 0.738525f, 0.140625f, 0.079102f, 0.202148f, 0.516602f, 0.281250f, 0.177979f, 0.243896f, 0.334229f, 0.421875f, 0.334229f, 0.243896f, 0.177979f, 0.578125f, 0.516602f, 0.202148f, + 0.079102f, 0.718750f, 0.738525f, 0.120850f, 0.019775f, 0.859375f, 1.000000f, 0.000000f, 0.000000f, 1.000000f }; + +const float g_bc7_weights4x[16 * 4] = { 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.003906f, 0.058594f, 0.878906f, 0.062500f, 0.019775f, 0.120850f, 0.738525f, 0.140625f, 0.041260f, 0.161865f, 0.635010f, 0.203125f, 0.070557f, 0.195068f, 0.539307f, 0.265625f, 0.107666f, 0.220459f, + 0.451416f, 0.328125f, 0.165039f, 0.241211f, 0.352539f, 0.406250f, 0.219727f, 0.249023f, 0.282227f, 0.468750f, 0.282227f, 0.249023f, 0.219727f, 0.531250f, 0.352539f, 0.241211f, 0.165039f, 0.593750f, 0.451416f, 0.220459f, 0.107666f, 0.671875f, 0.539307f, 0.195068f, 0.070557f, 0.734375f, + 0.635010f, 0.161865f, 0.041260f, 0.796875f, 0.738525f, 0.120850f, 0.019775f, 0.859375f, 0.878906f, 0.058594f, 0.003906f, 0.937500f, 1.000000f, 0.000000f, 0.000000f, 1.000000f }; + +const float g_astc_weights4x[16 * 4] = { 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.003906f, 0.058594f, 0.878906f, 0.062500f, 0.015625f, 0.109375f, 0.765625f, 0.125000f, 0.035156f, 0.152344f, 0.660156f, 0.187500f, 0.070557f, 0.195068f, 0.539307f, 0.265625f, 0.107666f, 0.220459f, + 0.451416f, 0.328125f, 0.152588f, 0.238037f, 0.371338f, 0.390625f, 0.205322f, 0.247803f, 0.299072f, 0.453125f, 0.299072f, 0.247803f, 0.205322f, 0.546875f, 0.371338f, 0.238037f, 0.152588f, 0.609375f, 0.451416f, 0.220459f, 0.107666f, 0.671875f, 0.539307f, 0.195068f, 0.070557f, 0.734375f, + 0.660156f, 0.152344f, 0.035156f, 0.812500f, 0.765625f, 0.109375f, 0.015625f, 0.875000f, 0.878906f, 0.058594f, 0.003906f, 0.937500f, 1.000000f, 0.000000f, 0.000000f, 1.000000f }; + +const float g_astc_weights5x[32 * 4] = { 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000977f, 0.030273f, 0.938477f, 0.031250f, 0.003906f, 0.058594f, 0.878906f, 0.062500f, 0.008789f, 0.084961f, 0.821289f, + 0.093750f, 0.015625f, 0.109375f, 0.765625f, 0.125000f, 0.024414f, 0.131836f, 0.711914f, 0.156250f, 0.035156f, 0.152344f, 0.660156f, 0.187500f, 0.047852f, 0.170898f, 0.610352f, 0.218750f, 0.062500f, 0.187500f, + 0.562500f, 0.250000f, 0.079102f, 0.202148f, 0.516602f, 0.281250f, 0.097656f, 0.214844f, 0.472656f, 0.312500f, 0.118164f, 0.225586f, 0.430664f, 0.343750f, 0.140625f, 0.234375f, 0.390625f, 0.375000f, 0.165039f, + 0.241211f, 0.352539f, 0.406250f, 0.191406f, 0.246094f, 0.316406f, 0.437500f, 0.219727f, 0.249023f, 0.282227f, 0.468750f, 0.282227f, 0.249023f, 0.219727f, 0.531250f, 0.316406f, 0.246094f, 0.191406f, 0.562500f, + 0.352539f, 0.241211f, 0.165039f, 0.593750f, 0.390625f, 0.234375f, 0.140625f, 0.625000f, 0.430664f, 0.225586f, 0.118164f, 0.656250f, 0.472656f, 0.214844f, 0.097656f, 0.687500f, 0.516602f, 0.202148f, 0.079102f, + 0.718750f, 0.562500f, 0.187500f, 0.062500f, 0.750000f, 0.610352f, 0.170898f, 0.047852f, 0.781250f, 0.660156f, 0.152344f, 0.035156f, 0.812500f, 0.711914f, 0.131836f, 0.024414f, 0.843750f, 0.765625f, 0.109375f, + 0.015625f, 0.875000f, 0.821289f, 0.084961f, 0.008789f, 0.906250f, 0.878906f, 0.058594f, 0.003906f, 0.937500f, 0.938477f, 0.030273f, 0.000977f, 0.968750f, 1.000000f, 0.000000f, 0.000000f, 1.000000f }; + +const float g_astc_weights_3levelsx[3 * 4] = { + 0.000000f, 0.000000f, 1.000000f, 0.000000f, + .5f * .5f, (1.0f - .5f) * .5f, (1.0f - .5f) * (1.0f - .5f), .5f, + 1.000000f, 0.000000f, 0.000000f, 1.000000f }; + +static endpoint_err g_bc7_mode_1_optimal_endpoints[256][2]; // [c][pbit] +static const uint32_t BC7ENC_MODE_1_OPTIMAL_INDEX = 2; + +static endpoint_err g_astc_4bit_3bit_optimal_endpoints[256]; // [c] +static const uint32_t BC7ENC_ASTC_4BIT_3BIT_OPTIMAL_INDEX = 2; + +static endpoint_err g_astc_4bit_2bit_optimal_endpoints[256]; // [c] +static const uint32_t BC7ENC_ASTC_4BIT_2BIT_OPTIMAL_INDEX = 1; + +static endpoint_err g_astc_range7_2bit_optimal_endpoints[256]; // [c] +static const uint32_t BC7ENC_ASTC_RANGE7_2BIT_OPTIMAL_INDEX = 1; + +static endpoint_err g_astc_range13_4bit_optimal_endpoints[256]; // [c] +static const uint32_t BC7ENC_ASTC_RANGE13_4BIT_OPTIMAL_INDEX = 2; + +static endpoint_err g_astc_range13_2bit_optimal_endpoints[256]; // [c] +static const uint32_t BC7ENC_ASTC_RANGE13_2BIT_OPTIMAL_INDEX = 1; + +static endpoint_err g_astc_range11_5bit_optimal_endpoints[256]; // [c] +static const uint32_t BC7ENC_ASTC_RANGE11_5BIT_OPTIMAL_INDEX = 13; // not 1, which is optimal, because 26 losslessly maps to BC7 4-bit weights + +astc_quant_bin g_astc_sorted_order_unquant[BC7ENC_TOTAL_ASTC_RANGES][256]; // [sorted unquantized order] + +static uint8_t g_astc_nearest_sorted_index[BC7ENC_TOTAL_ASTC_RANGES][256]; + +static void astc_init() +{ + for (uint32_t range = 0; range < BC7ENC_TOTAL_ASTC_RANGES; range++) + { + if (!astc_is_valid_endpoint_range(range)) + continue; + + const uint32_t levels = astc_get_levels(range); + + uint32_t vals[256]; + // TODO + for (uint32_t i = 0; i < levels; i++) + vals[i] = (unquant_astc_endpoint_val(i, range) << 8) | i; + + std::sort(vals, vals + levels); + + for (uint32_t i = 0; i < levels; i++) + { + uint32_t order = vals[i] & 0xFF; + uint32_t unq = vals[i] >> 8; + + g_astc_sorted_order_unquant[range][i].m_unquant = (uint8_t)unq; + g_astc_sorted_order_unquant[range][i].m_index = (uint8_t)order; + + } // i + +#if 0 + if (g_astc_bise_range_table[range][1] || g_astc_bise_range_table[range][2]) + { + printf("// Range: %u, Levels: %u, Bits: %u, Trits: %u, Quints: %u\n", range, levels, g_astc_bise_range_table[range][0], g_astc_bise_range_table[range][1], g_astc_bise_range_table[range][2]); + + printf("{"); + for (uint32_t i = 0; i < levels; i++) + { + printf("{%u,%u}", g_astc_sorted_order_unquant[range][i].m_index, g_astc_sorted_order_unquant[range][i].m_unquant); + if (i != (levels - 1)) + printf(","); + } + printf("}\n"); + } +#endif + +#if 0 + if (g_astc_bise_range_table[range][1] || g_astc_bise_range_table[range][2]) + { + printf("// Range: %u, Levels: %u, Bits: %u, Trits: %u, Quints: %u\n", range, levels, g_astc_bise_range_table[range][0], g_astc_bise_range_table[range][1], g_astc_bise_range_table[range][2]); + + printf("{"); + for (uint32_t i = 0; i < levels; i++) + { + printf("{%u,%u}", g_astc_unquant[range][i].m_index, g_astc_unquant[range][i].m_unquant); + if (i != (levels - 1)) + printf(","); + } + printf("}\n"); + } +#endif + + for (uint32_t i = 0; i < 256; i++) + { + uint32_t best_index = 0; + int best_err = INT32_MAX; + + for (uint32_t j = 0; j < levels; j++) + { + int err = g_astc_sorted_order_unquant[range][j].m_unquant - i; + if (err < 0) + err = -err; + if (err < best_err) + { + best_err = err; + best_index = j; + } + } + + g_astc_nearest_sorted_index[range][i] = (uint8_t)best_index; + } // i + } // range +} + +static inline uint32_t astc_interpolate_linear(uint32_t l, uint32_t h, uint32_t w) +{ + l = (l << 8) | l; + h = (h << 8) | h; + uint32_t k = (l * (64 - w) + h * w + 32) >> 6; + return k >> 8; +} + +// Initialize the lookup table used for optimal single color compression in mode 1. Must be called before encoding. +void bc7enc_compress_block_init() +{ + astc_init(); + + // BC7 666.1 + for (int c = 0; c < 256; c++) + { + for (uint32_t lp = 0; lp < 2; lp++) + { + endpoint_err best; + best.m_error = (uint16_t)UINT16_MAX; + for (uint32_t l = 0; l < 64; l++) + { + uint32_t low = ((l << 1) | lp) << 1; + low |= (low >> 7); + for (uint32_t h = 0; h < 64; h++) + { + uint32_t high = ((h << 1) | lp) << 1; + high |= (high >> 7); + const int k = (low * (64 - g_bc7_weights3[BC7ENC_MODE_1_OPTIMAL_INDEX]) + high * g_bc7_weights3[BC7ENC_MODE_1_OPTIMAL_INDEX] + 32) >> 6; + const int err = (k - c) * (k - c); + if (err < best.m_error) + { + best.m_error = (uint16_t)err; + best.m_lo = (uint8_t)l; + best.m_hi = (uint8_t)h; + } + } // h + } // l + g_bc7_mode_1_optimal_endpoints[c][lp] = best; + } // lp + } // c + + // ASTC [0,15] 3-bit + for (int c = 0; c < 256; c++) + { + endpoint_err best; + best.m_error = (uint16_t)UINT16_MAX; + for (uint32_t l = 0; l < 16; l++) + { + uint32_t low = (l << 4) | l; + + for (uint32_t h = 0; h < 16; h++) + { + uint32_t high = (h << 4) | h; + + const int k = astc_interpolate_linear(low, high, g_bc7_weights3[BC7ENC_ASTC_4BIT_3BIT_OPTIMAL_INDEX]); + const int err = (k - c) * (k - c); + + if (err < best.m_error) + { + best.m_error = (uint16_t)err; + best.m_lo = (uint8_t)l; + best.m_hi = (uint8_t)h; + } + } // h + } // l + + g_astc_4bit_3bit_optimal_endpoints[c] = best; + + } // c + + // ASTC [0,15] 2-bit + for (int c = 0; c < 256; c++) + { + endpoint_err best; + best.m_error = (uint16_t)UINT16_MAX; + for (uint32_t l = 0; l < 16; l++) + { + uint32_t low = (l << 4) | l; + + for (uint32_t h = 0; h < 16; h++) + { + uint32_t high = (h << 4) | h; + + const int k = astc_interpolate_linear(low, high, g_bc7_weights2[BC7ENC_ASTC_4BIT_2BIT_OPTIMAL_INDEX]); + const int err = (k - c) * (k - c); + + if (err < best.m_error) + { + best.m_error = (uint16_t)err; + best.m_lo = (uint8_t)l; + best.m_hi = (uint8_t)h; + } + } // h + } // l + + g_astc_4bit_2bit_optimal_endpoints[c] = best; + + } // c + + // ASTC range 7 [0,11] 2-bit + for (int c = 0; c < 256; c++) + { + endpoint_err best; + best.m_error = (uint16_t)UINT16_MAX; + for (uint32_t l = 0; l < 12; l++) + { + uint32_t low = g_astc_sorted_order_unquant[7][l].m_unquant; + + for (uint32_t h = 0; h < 12; h++) + { + uint32_t high = g_astc_sorted_order_unquant[7][h].m_unquant; + + const int k = astc_interpolate_linear(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE7_2BIT_OPTIMAL_INDEX]); + const int err = (k - c) * (k - c); + + if (err < best.m_error) + { + best.m_error = (uint16_t)err; + best.m_lo = (uint8_t)l; + best.m_hi = (uint8_t)h; + } + } // h + } // l + + g_astc_range7_2bit_optimal_endpoints[c] = best; + + } // c + + // ASTC range 13 [0,47] 4-bit + for (int c = 0; c < 256; c++) + { + endpoint_err best; + best.m_error = (uint16_t)UINT16_MAX; + for (uint32_t l = 0; l < 48; l++) + { + uint32_t low = g_astc_sorted_order_unquant[13][l].m_unquant; + + for (uint32_t h = 0; h < 48; h++) + { + uint32_t high = g_astc_sorted_order_unquant[13][h].m_unquant; + + const int k = astc_interpolate_linear(low, high, g_astc_weights4[BC7ENC_ASTC_RANGE13_4BIT_OPTIMAL_INDEX]); + const int err = (k - c) * (k - c); + + if (err < best.m_error) + { + best.m_error = (uint16_t)err; + best.m_lo = (uint8_t)l; + best.m_hi = (uint8_t)h; + } + } // h + } // l + + g_astc_range13_4bit_optimal_endpoints[c] = best; + + } // c + + // ASTC range 13 [0,47] 2-bit + for (int c = 0; c < 256; c++) + { + endpoint_err best; + best.m_error = (uint16_t)UINT16_MAX; + for (uint32_t l = 0; l < 48; l++) + { + uint32_t low = g_astc_sorted_order_unquant[13][l].m_unquant; + + for (uint32_t h = 0; h < 48; h++) + { + uint32_t high = g_astc_sorted_order_unquant[13][h].m_unquant; + + const int k = astc_interpolate_linear(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE13_2BIT_OPTIMAL_INDEX]); + const int err = (k - c) * (k - c); + + if (err < best.m_error) + { + best.m_error = (uint16_t)err; + best.m_lo = (uint8_t)l; + best.m_hi = (uint8_t)h; + } + } // h + } // l + + g_astc_range13_2bit_optimal_endpoints[c] = best; + + } // c + + // ASTC range 11 [0,31] 5-bit + for (int c = 0; c < 256; c++) + { + endpoint_err best; + best.m_error = (uint16_t)UINT16_MAX; + for (uint32_t l = 0; l < 32; l++) + { + uint32_t low = g_astc_sorted_order_unquant[11][l].m_unquant; + + for (uint32_t h = 0; h < 32; h++) + { + uint32_t high = g_astc_sorted_order_unquant[11][h].m_unquant; + + const int k = astc_interpolate_linear(low, high, g_astc_weights5[BC7ENC_ASTC_RANGE11_5BIT_OPTIMAL_INDEX]); + const int err = (k - c) * (k - c); + + if (err < best.m_error) + { + best.m_error = (uint16_t)err; + best.m_lo = (uint8_t)l; + best.m_hi = (uint8_t)h; + } + } // h + } // l + + g_astc_range11_5bit_optimal_endpoints[c] = best; + + } // c +} + +static void compute_least_squares_endpoints_rgba(uint32_t N, const uint8_t *pSelectors, const bc7enc_vec4F* pSelector_weights, bc7enc_vec4F* pXl, bc7enc_vec4F* pXh, const color_quad_u8 *pColors) +{ + // Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf + // https://web.archive.org/web/20150319232457/http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf + // I did this in matrix form first, expanded out all the ops, then optimized it a bit. + double z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f; + double q00_r = 0.0f, q10_r = 0.0f, t_r = 0.0f; + double q00_g = 0.0f, q10_g = 0.0f, t_g = 0.0f; + double q00_b = 0.0f, q10_b = 0.0f, t_b = 0.0f; + double q00_a = 0.0f, q10_a = 0.0f, t_a = 0.0f; + + for (uint32_t i = 0; i < N; i++) + { + const uint32_t sel = pSelectors[i]; + z00 += pSelector_weights[sel].m_c[0]; + z10 += pSelector_weights[sel].m_c[1]; + z11 += pSelector_weights[sel].m_c[2]; + float w = pSelector_weights[sel].m_c[3]; + q00_r += w * pColors[i].m_c[0]; t_r += pColors[i].m_c[0]; + q00_g += w * pColors[i].m_c[1]; t_g += pColors[i].m_c[1]; + q00_b += w * pColors[i].m_c[2]; t_b += pColors[i].m_c[2]; + q00_a += w * pColors[i].m_c[3]; t_a += pColors[i].m_c[3]; + } + + q10_r = t_r - q00_r; + q10_g = t_g - q00_g; + q10_b = t_b - q00_b; + q10_a = t_a - q00_a; + + z01 = z10; + + double det = z00 * z11 - z01 * z10; + if (det != 0.0f) + det = 1.0f / det; + + double iz00, iz01, iz10, iz11; + iz00 = z11 * det; + iz01 = -z01 * det; + iz10 = -z10 * det; + iz11 = z00 * det; + + pXl->m_c[0] = (float)(iz00 * q00_r + iz01 * q10_r); pXh->m_c[0] = (float)(iz10 * q00_r + iz11 * q10_r); + pXl->m_c[1] = (float)(iz00 * q00_g + iz01 * q10_g); pXh->m_c[1] = (float)(iz10 * q00_g + iz11 * q10_g); + pXl->m_c[2] = (float)(iz00 * q00_b + iz01 * q10_b); pXh->m_c[2] = (float)(iz10 * q00_b + iz11 * q10_b); + pXl->m_c[3] = (float)(iz00 * q00_a + iz01 * q10_a); pXh->m_c[3] = (float)(iz10 * q00_a + iz11 * q10_a); + + for (uint32_t c = 0; c < 4; c++) + { + if ((pXl->m_c[c] < 0.0f) || (pXh->m_c[c] > 255.0f)) + { + uint32_t lo_v = UINT32_MAX, hi_v = 0; + for (uint32_t i = 0; i < N; i++) + { + lo_v = minimumu(lo_v, pColors[i].m_c[c]); + hi_v = maximumu(hi_v, pColors[i].m_c[c]); + } + + if (lo_v == hi_v) + { + pXl->m_c[c] = (float)lo_v; + pXh->m_c[c] = (float)hi_v; + } + } + } +} + +static void compute_least_squares_endpoints_rgb(uint32_t N, const uint8_t *pSelectors, const bc7enc_vec4F*pSelector_weights, bc7enc_vec4F*pXl, bc7enc_vec4F*pXh, const color_quad_u8 *pColors) +{ + double z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f; + double q00_r = 0.0f, q10_r = 0.0f, t_r = 0.0f; + double q00_g = 0.0f, q10_g = 0.0f, t_g = 0.0f; + double q00_b = 0.0f, q10_b = 0.0f, t_b = 0.0f; + + for (uint32_t i = 0; i < N; i++) + { + const uint32_t sel = pSelectors[i]; + z00 += pSelector_weights[sel].m_c[0]; + z10 += pSelector_weights[sel].m_c[1]; + z11 += pSelector_weights[sel].m_c[2]; + float w = pSelector_weights[sel].m_c[3]; + q00_r += w * pColors[i].m_c[0]; t_r += pColors[i].m_c[0]; + q00_g += w * pColors[i].m_c[1]; t_g += pColors[i].m_c[1]; + q00_b += w * pColors[i].m_c[2]; t_b += pColors[i].m_c[2]; + } + + q10_r = t_r - q00_r; + q10_g = t_g - q00_g; + q10_b = t_b - q00_b; + + z01 = z10; + + double det = z00 * z11 - z01 * z10; + if (det != 0.0f) + det = 1.0f / det; + + double iz00, iz01, iz10, iz11; + iz00 = z11 * det; + iz01 = -z01 * det; + iz10 = -z10 * det; + iz11 = z00 * det; + + pXl->m_c[0] = (float)(iz00 * q00_r + iz01 * q10_r); pXh->m_c[0] = (float)(iz10 * q00_r + iz11 * q10_r); + pXl->m_c[1] = (float)(iz00 * q00_g + iz01 * q10_g); pXh->m_c[1] = (float)(iz10 * q00_g + iz11 * q10_g); + pXl->m_c[2] = (float)(iz00 * q00_b + iz01 * q10_b); pXh->m_c[2] = (float)(iz10 * q00_b + iz11 * q10_b); + pXl->m_c[3] = 255.0f; pXh->m_c[3] = 255.0f; + + for (uint32_t c = 0; c < 3; c++) + { + if ((pXl->m_c[c] < 0.0f) || (pXh->m_c[c] > 255.0f)) + { + uint32_t lo_v = UINT32_MAX, hi_v = 0; + for (uint32_t i = 0; i < N; i++) + { + lo_v = minimumu(lo_v, pColors[i].m_c[c]); + hi_v = maximumu(hi_v, pColors[i].m_c[c]); + } + + if (lo_v == hi_v) + { + pXl->m_c[c] = (float)lo_v; + pXh->m_c[c] = (float)hi_v; + } + } + } +} + +static inline color_quad_u8 scale_color(const color_quad_u8* pC, const color_cell_compressor_params* pParams) +{ + color_quad_u8 results; + + if (pParams->m_astc_endpoint_range) + { + for (uint32_t i = 0; i < 4; i++) + { + results.m_c[i] = g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pC->m_c[i]].m_unquant; + } + } + else + { + const uint32_t n = pParams->m_comp_bits + (pParams->m_has_pbits ? 1 : 0); + assert((n >= 4) && (n <= 8)); + + for (uint32_t i = 0; i < 4; i++) + { + uint32_t v = pC->m_c[i] << (8 - n); + v |= (v >> n); + assert(v <= 255); + results.m_c[i] = (uint8_t)(v); + } + } + + return results; +} + +static inline uint64_t compute_color_distance_rgb(const color_quad_u8 *pE1, const color_quad_u8 *pE2, bc7enc_bool perceptual, const uint32_t weights[4]) +{ + int dr, dg, db; + + if (perceptual) + { + const int l1 = pE1->m_c[0] * 109 + pE1->m_c[1] * 366 + pE1->m_c[2] * 37; + const int cr1 = ((int)pE1->m_c[0] << 9) - l1; + const int cb1 = ((int)pE1->m_c[2] << 9) - l1; + const int l2 = pE2->m_c[0] * 109 + pE2->m_c[1] * 366 + pE2->m_c[2] * 37; + const int cr2 = ((int)pE2->m_c[0] << 9) - l2; + const int cb2 = ((int)pE2->m_c[2] << 9) - l2; + dr = (l1 - l2) >> 8; + dg = (cr1 - cr2) >> 8; + db = (cb1 - cb2) >> 8; + } + else + { + dr = (int)pE1->m_c[0] - (int)pE2->m_c[0]; + dg = (int)pE1->m_c[1] - (int)pE2->m_c[1]; + db = (int)pE1->m_c[2] - (int)pE2->m_c[2]; + } + + return weights[0] * (uint32_t)(dr * dr) + weights[1] * (uint32_t)(dg * dg) + weights[2] * (uint32_t)(db * db); +} + +static inline uint64_t compute_color_distance_rgba(const color_quad_u8 *pE1, const color_quad_u8 *pE2, bc7enc_bool perceptual, const uint32_t weights[4]) +{ + int da = (int)pE1->m_c[3] - (int)pE2->m_c[3]; + return compute_color_distance_rgb(pE1, pE2, perceptual, weights) + (weights[3] * (uint32_t)(da * da)); +} + +static uint64_t pack_mode1_to_one_color(const color_cell_compressor_params *pParams, color_cell_compressor_results *pResults, uint32_t r, uint32_t g, uint32_t b, uint8_t *pSelectors) +{ + uint32_t best_err = UINT_MAX; + uint32_t best_p = 0; + + for (uint32_t p = 0; p < 2; p++) + { + uint32_t err = g_bc7_mode_1_optimal_endpoints[r][p].m_error + g_bc7_mode_1_optimal_endpoints[g][p].m_error + g_bc7_mode_1_optimal_endpoints[b][p].m_error; + if (err < best_err) + { + best_err = err; + best_p = p; + } + } + + const endpoint_err *pEr = &g_bc7_mode_1_optimal_endpoints[r][best_p]; + const endpoint_err *pEg = &g_bc7_mode_1_optimal_endpoints[g][best_p]; + const endpoint_err *pEb = &g_bc7_mode_1_optimal_endpoints[b][best_p]; + + color_quad_u8_set(&pResults->m_low_endpoint, pEr->m_lo, pEg->m_lo, pEb->m_lo, 0); + color_quad_u8_set(&pResults->m_high_endpoint, pEr->m_hi, pEg->m_hi, pEb->m_hi, 0); + pResults->m_pbits[0] = best_p; + pResults->m_pbits[1] = 0; + + memset(pSelectors, BC7ENC_MODE_1_OPTIMAL_INDEX, pParams->m_num_pixels); + + color_quad_u8 p; + for (uint32_t i = 0; i < 3; i++) + { + uint32_t low = ((pResults->m_low_endpoint.m_c[i] << 1) | pResults->m_pbits[0]) << 1; + low |= (low >> 7); + + uint32_t high = ((pResults->m_high_endpoint.m_c[i] << 1) | pResults->m_pbits[0]) << 1; + high |= (high >> 7); + + p.m_c[i] = (uint8_t)((low * (64 - g_bc7_weights3[BC7ENC_MODE_1_OPTIMAL_INDEX]) + high * g_bc7_weights3[BC7ENC_MODE_1_OPTIMAL_INDEX] + 32) >> 6); + } + p.m_c[3] = 255; + + uint64_t total_err = 0; + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + total_err += compute_color_distance_rgb(&p, &pParams->m_pPixels[i], pParams->m_perceptual, pParams->m_weights); + + pResults->m_best_overall_err = total_err; + + return total_err; +} + +static uint64_t pack_astc_4bit_3bit_to_one_color(const color_cell_compressor_params *pParams, color_cell_compressor_results *pResults, uint32_t r, uint32_t g, uint32_t b, uint8_t *pSelectors) +{ + const endpoint_err *pEr = &g_astc_4bit_3bit_optimal_endpoints[r]; + const endpoint_err *pEg = &g_astc_4bit_3bit_optimal_endpoints[g]; + const endpoint_err *pEb = &g_astc_4bit_3bit_optimal_endpoints[b]; + + color_quad_u8_set(&pResults->m_low_endpoint, pEr->m_lo, pEg->m_lo, pEb->m_lo, 0); + color_quad_u8_set(&pResults->m_high_endpoint, pEr->m_hi, pEg->m_hi, pEb->m_hi, 0); + pResults->m_pbits[0] = 0; + pResults->m_pbits[1] = 0; + + for (uint32_t i = 0; i < 4; i++) + { + pResults->m_astc_low_endpoint.m_c[i] = g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_low_endpoint.m_c[i]].m_index; + pResults->m_astc_high_endpoint.m_c[i] = g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_high_endpoint.m_c[i]].m_index; + } + + memset(pSelectors, BC7ENC_ASTC_4BIT_3BIT_OPTIMAL_INDEX, pParams->m_num_pixels); + + color_quad_u8 p; + for (uint32_t i = 0; i < 3; i++) + { + uint32_t low = (pResults->m_low_endpoint.m_c[i] << 4) | pResults->m_low_endpoint.m_c[i]; + uint32_t high = (pResults->m_high_endpoint.m_c[i] << 4) | pResults->m_high_endpoint.m_c[i]; + + p.m_c[i] = (uint8_t)astc_interpolate_linear(low, high, g_bc7_weights3[BC7ENC_ASTC_4BIT_3BIT_OPTIMAL_INDEX]); + } + p.m_c[3] = 255; + + uint64_t total_err = 0; + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + total_err += compute_color_distance_rgb(&p, &pParams->m_pPixels[i], pParams->m_perceptual, pParams->m_weights); + + pResults->m_best_overall_err = total_err; + + return total_err; +} + +static uint64_t pack_astc_4bit_2bit_to_one_color_rgba(const color_cell_compressor_params *pParams, color_cell_compressor_results *pResults, uint32_t r, uint32_t g, uint32_t b, uint32_t a, uint8_t *pSelectors) +{ + const endpoint_err *pEr = &g_astc_4bit_2bit_optimal_endpoints[r]; + const endpoint_err *pEg = &g_astc_4bit_2bit_optimal_endpoints[g]; + const endpoint_err *pEb = &g_astc_4bit_2bit_optimal_endpoints[b]; + const endpoint_err *pEa = &g_astc_4bit_2bit_optimal_endpoints[a]; + + color_quad_u8_set(&pResults->m_low_endpoint, pEr->m_lo, pEg->m_lo, pEb->m_lo, pEa->m_lo); + color_quad_u8_set(&pResults->m_high_endpoint, pEr->m_hi, pEg->m_hi, pEb->m_hi, pEa->m_hi); + pResults->m_pbits[0] = 0; + pResults->m_pbits[1] = 0; + + for (uint32_t i = 0; i < 4; i++) + { + pResults->m_astc_low_endpoint.m_c[i] = g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_low_endpoint.m_c[i]].m_index; + pResults->m_astc_high_endpoint.m_c[i] = g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_high_endpoint.m_c[i]].m_index; + } + + memset(pSelectors, BC7ENC_ASTC_4BIT_2BIT_OPTIMAL_INDEX, pParams->m_num_pixels); + + color_quad_u8 p; + for (uint32_t i = 0; i < 4; i++) + { + uint32_t low = (pResults->m_low_endpoint.m_c[i] << 4) | pResults->m_low_endpoint.m_c[i]; + uint32_t high = (pResults->m_high_endpoint.m_c[i] << 4) | pResults->m_high_endpoint.m_c[i]; + + p.m_c[i] = (uint8_t)astc_interpolate_linear(low, high, g_bc7_weights2[BC7ENC_ASTC_4BIT_2BIT_OPTIMAL_INDEX]); + } + + uint64_t total_err = 0; + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + total_err += compute_color_distance_rgba(&p, &pParams->m_pPixels[i], pParams->m_perceptual, pParams->m_weights); + + pResults->m_best_overall_err = total_err; + + return total_err; +} + +static uint64_t pack_astc_range7_2bit_to_one_color(const color_cell_compressor_params *pParams, color_cell_compressor_results *pResults, uint32_t r, uint32_t g, uint32_t b, uint8_t *pSelectors) +{ + assert(pParams->m_astc_endpoint_range == 7 && pParams->m_num_selector_weights == 4); + + const endpoint_err *pEr = &g_astc_range7_2bit_optimal_endpoints[r]; + const endpoint_err *pEg = &g_astc_range7_2bit_optimal_endpoints[g]; + const endpoint_err *pEb = &g_astc_range7_2bit_optimal_endpoints[b]; + + color_quad_u8_set(&pResults->m_low_endpoint, pEr->m_lo, pEg->m_lo, pEb->m_lo, 0); + color_quad_u8_set(&pResults->m_high_endpoint, pEr->m_hi, pEg->m_hi, pEb->m_hi, 0); + pResults->m_pbits[0] = 0; + pResults->m_pbits[1] = 0; + + for (uint32_t i = 0; i < 4; i++) + { + pResults->m_astc_low_endpoint.m_c[i] = g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_low_endpoint.m_c[i]].m_index; + pResults->m_astc_high_endpoint.m_c[i] = g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_high_endpoint.m_c[i]].m_index; + } + + memset(pSelectors, BC7ENC_ASTC_RANGE7_2BIT_OPTIMAL_INDEX, pParams->m_num_pixels); + + color_quad_u8 p; + for (uint32_t i = 0; i < 3; i++) + { + uint32_t low = g_astc_sorted_order_unquant[7][pResults->m_low_endpoint.m_c[i]].m_unquant; + uint32_t high = g_astc_sorted_order_unquant[7][pResults->m_high_endpoint.m_c[i]].m_unquant; + + p.m_c[i] = (uint8_t)astc_interpolate_linear(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE7_2BIT_OPTIMAL_INDEX]); + } + p.m_c[3] = 255; + + uint64_t total_err = 0; + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + total_err += compute_color_distance_rgb(&p, &pParams->m_pPixels[i], pParams->m_perceptual, pParams->m_weights); + + pResults->m_best_overall_err = total_err; + + return total_err; +} + +static uint64_t pack_astc_range13_2bit_to_one_color(const color_cell_compressor_params *pParams, color_cell_compressor_results *pResults, uint32_t r, uint32_t g, uint32_t b, uint8_t *pSelectors) +{ + assert(pParams->m_astc_endpoint_range == 13 && pParams->m_num_selector_weights == 4 && !pParams->m_has_alpha); + + const endpoint_err *pEr = &g_astc_range13_2bit_optimal_endpoints[r]; + const endpoint_err *pEg = &g_astc_range13_2bit_optimal_endpoints[g]; + const endpoint_err *pEb = &g_astc_range13_2bit_optimal_endpoints[b]; + + color_quad_u8_set(&pResults->m_low_endpoint, pEr->m_lo, pEg->m_lo, pEb->m_lo, 47); + color_quad_u8_set(&pResults->m_high_endpoint, pEr->m_hi, pEg->m_hi, pEb->m_hi, 47); + pResults->m_pbits[0] = 0; + pResults->m_pbits[1] = 0; + + for (uint32_t i = 0; i < 4; i++) + { + pResults->m_astc_low_endpoint.m_c[i] = g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_low_endpoint.m_c[i]].m_index; + pResults->m_astc_high_endpoint.m_c[i] = g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_high_endpoint.m_c[i]].m_index; + } + + memset(pSelectors, BC7ENC_ASTC_RANGE13_2BIT_OPTIMAL_INDEX, pParams->m_num_pixels); + + color_quad_u8 p; + for (uint32_t i = 0; i < 4; i++) + { + uint32_t low = g_astc_sorted_order_unquant[13][pResults->m_low_endpoint.m_c[i]].m_unquant; + uint32_t high = g_astc_sorted_order_unquant[13][pResults->m_high_endpoint.m_c[i]].m_unquant; + + p.m_c[i] = (uint8_t)astc_interpolate_linear(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE13_2BIT_OPTIMAL_INDEX]); + } + + uint64_t total_err = 0; + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + total_err += compute_color_distance_rgb(&p, &pParams->m_pPixels[i], pParams->m_perceptual, pParams->m_weights); + + pResults->m_best_overall_err = total_err; + + return total_err; +} + +static uint64_t pack_astc_range11_5bit_to_one_color(const color_cell_compressor_params* pParams, color_cell_compressor_results* pResults, uint32_t r, uint32_t g, uint32_t b, uint8_t* pSelectors) +{ + assert(pParams->m_astc_endpoint_range == 11 && pParams->m_num_selector_weights == 32 && !pParams->m_has_alpha); + + const endpoint_err* pEr = &g_astc_range11_5bit_optimal_endpoints[r]; + const endpoint_err* pEg = &g_astc_range11_5bit_optimal_endpoints[g]; + const endpoint_err* pEb = &g_astc_range11_5bit_optimal_endpoints[b]; + + color_quad_u8_set(&pResults->m_low_endpoint, pEr->m_lo, pEg->m_lo, pEb->m_lo, 31); + color_quad_u8_set(&pResults->m_high_endpoint, pEr->m_hi, pEg->m_hi, pEb->m_hi, 31); + pResults->m_pbits[0] = 0; + pResults->m_pbits[1] = 0; + + for (uint32_t i = 0; i < 4; i++) + { + pResults->m_astc_low_endpoint.m_c[i] = g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_low_endpoint.m_c[i]].m_index; + pResults->m_astc_high_endpoint.m_c[i] = g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_high_endpoint.m_c[i]].m_index; + } + + memset(pSelectors, BC7ENC_ASTC_RANGE11_5BIT_OPTIMAL_INDEX, pParams->m_num_pixels); + + color_quad_u8 p; + for (uint32_t i = 0; i < 4; i++) + { + uint32_t low = g_astc_sorted_order_unquant[11][pResults->m_low_endpoint.m_c[i]].m_unquant; + uint32_t high = g_astc_sorted_order_unquant[11][pResults->m_high_endpoint.m_c[i]].m_unquant; + + p.m_c[i] = (uint8_t)astc_interpolate_linear(low, high, g_astc_weights5[BC7ENC_ASTC_RANGE11_5BIT_OPTIMAL_INDEX]); + } + + uint64_t total_err = 0; + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + total_err += compute_color_distance_rgb(&p, &pParams->m_pPixels[i], pParams->m_perceptual, pParams->m_weights); + + pResults->m_best_overall_err = total_err; + + return total_err; +} + +static uint64_t evaluate_solution(const color_quad_u8 *pLow, const color_quad_u8 *pHigh, const uint32_t pbits[2], const color_cell_compressor_params *pParams, color_cell_compressor_results *pResults) +{ + color_quad_u8 quantMinColor = *pLow; + color_quad_u8 quantMaxColor = *pHigh; + + if (pParams->m_has_pbits) + { + uint32_t minPBit, maxPBit; + + if (pParams->m_endpoints_share_pbit) + maxPBit = minPBit = pbits[0]; + else + { + minPBit = pbits[0]; + maxPBit = pbits[1]; + } + + quantMinColor.m_c[0] = (uint8_t)((pLow->m_c[0] << 1) | minPBit); + quantMinColor.m_c[1] = (uint8_t)((pLow->m_c[1] << 1) | minPBit); + quantMinColor.m_c[2] = (uint8_t)((pLow->m_c[2] << 1) | minPBit); + quantMinColor.m_c[3] = (uint8_t)((pLow->m_c[3] << 1) | minPBit); + + quantMaxColor.m_c[0] = (uint8_t)((pHigh->m_c[0] << 1) | maxPBit); + quantMaxColor.m_c[1] = (uint8_t)((pHigh->m_c[1] << 1) | maxPBit); + quantMaxColor.m_c[2] = (uint8_t)((pHigh->m_c[2] << 1) | maxPBit); + quantMaxColor.m_c[3] = (uint8_t)((pHigh->m_c[3] << 1) | maxPBit); + } + + color_quad_u8 actualMinColor = scale_color(&quantMinColor, pParams); + color_quad_u8 actualMaxColor = scale_color(&quantMaxColor, pParams); + + const uint32_t N = pParams->m_num_selector_weights; + assert(N >= 1 && N <= 32); + + color_quad_u8 weightedColors[32]; + weightedColors[0] = actualMinColor; + weightedColors[N - 1] = actualMaxColor; + + const uint32_t nc = pParams->m_has_alpha ? 4 : 3; + if (pParams->m_astc_endpoint_range) + { + for (uint32_t i = 1; i < (N - 1); i++) + { + for (uint32_t j = 0; j < nc; j++) + weightedColors[i].m_c[j] = (uint8_t)(astc_interpolate_linear(actualMinColor.m_c[j], actualMaxColor.m_c[j], pParams->m_pSelector_weights[i])); + } + } + else + { + for (uint32_t i = 1; i < (N - 1); i++) + for (uint32_t j = 0; j < nc; j++) + weightedColors[i].m_c[j] = (uint8_t)((actualMinColor.m_c[j] * (64 - pParams->m_pSelector_weights[i]) + actualMaxColor.m_c[j] * pParams->m_pSelector_weights[i] + 32) >> 6); + } + + const int lr = actualMinColor.m_c[0]; + const int lg = actualMinColor.m_c[1]; + const int lb = actualMinColor.m_c[2]; + const int dr = actualMaxColor.m_c[0] - lr; + const int dg = actualMaxColor.m_c[1] - lg; + const int db = actualMaxColor.m_c[2] - lb; + + uint64_t total_err = 0; + + if (pParams->m_pForce_selectors) + { + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + { + const color_quad_u8* pC = &pParams->m_pPixels[i]; + + const uint8_t sel = pParams->m_pForce_selectors[i]; + assert(sel < N); + + total_err += (pParams->m_has_alpha ? compute_color_distance_rgba : compute_color_distance_rgb)(&weightedColors[sel], pC, pParams->m_perceptual, pParams->m_weights); + + pResults->m_pSelectors_temp[i] = sel; + } + } + else if (!pParams->m_perceptual) + { + if (pParams->m_has_alpha) + { + const int la = actualMinColor.m_c[3]; + const int da = actualMaxColor.m_c[3] - la; + + const float f = N / (float)(squarei(dr) + squarei(dg) + squarei(db) + squarei(da) + .00000125f); + + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + { + const color_quad_u8 *pC = &pParams->m_pPixels[i]; + int r = pC->m_c[0]; + int g = pC->m_c[1]; + int b = pC->m_c[2]; + int a = pC->m_c[3]; + + int best_sel = (int)((float)((r - lr) * dr + (g - lg) * dg + (b - lb) * db + (a - la) * da) * f + .5f); + best_sel = clampi(best_sel, 1, N - 1); + + uint64_t err0 = compute_color_distance_rgba(&weightedColors[best_sel - 1], pC, BC7ENC_FALSE, pParams->m_weights); + uint64_t err1 = compute_color_distance_rgba(&weightedColors[best_sel], pC, BC7ENC_FALSE, pParams->m_weights); + + if (err0 == err1) + { + // Prefer non-interpolation + if ((best_sel - 1) == 0) + best_sel = 0; + } + else if (err1 > err0) + { + err1 = err0; + --best_sel; + } + total_err += err1; + + pResults->m_pSelectors_temp[i] = (uint8_t)best_sel; + } + } + else + { + const float f = N / (float)(squarei(dr) + squarei(dg) + squarei(db) + .00000125f); + + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + { + const color_quad_u8 *pC = &pParams->m_pPixels[i]; + int r = pC->m_c[0]; + int g = pC->m_c[1]; + int b = pC->m_c[2]; + + int sel = (int)((float)((r - lr) * dr + (g - lg) * dg + (b - lb) * db) * f + .5f); + sel = clampi(sel, 1, N - 1); + + uint64_t err0 = compute_color_distance_rgb(&weightedColors[sel - 1], pC, BC7ENC_FALSE, pParams->m_weights); + uint64_t err1 = compute_color_distance_rgb(&weightedColors[sel], pC, BC7ENC_FALSE, pParams->m_weights); + + int best_sel = sel; + uint64_t best_err = err1; + if (err0 == err1) + { + // Prefer non-interpolation + if ((best_sel - 1) == 0) + best_sel = 0; + } + else if (err0 < best_err) + { + best_err = err0; + best_sel = sel - 1; + } + + total_err += best_err; + + pResults->m_pSelectors_temp[i] = (uint8_t)best_sel; + } + } + } + else + { + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + { + uint64_t best_err = UINT64_MAX; + uint32_t best_sel = 0; + + if (pParams->m_has_alpha) + { + for (uint32_t j = 0; j < N; j++) + { + uint64_t err = compute_color_distance_rgba(&weightedColors[j], &pParams->m_pPixels[i], BC7ENC_TRUE, pParams->m_weights); + if (err < best_err) + { + best_err = err; + best_sel = j; + } + // Prefer non-interpolation + else if ((err == best_err) && (j == (N - 1))) + best_sel = j; + } + } + else + { + for (uint32_t j = 0; j < N; j++) + { + uint64_t err = compute_color_distance_rgb(&weightedColors[j], &pParams->m_pPixels[i], BC7ENC_TRUE, pParams->m_weights); + if (err < best_err) + { + best_err = err; + best_sel = j; + } + // Prefer non-interpolation + else if ((err == best_err) && (j == (N - 1))) + best_sel = j; + } + } + + total_err += best_err; + + pResults->m_pSelectors_temp[i] = (uint8_t)best_sel; + } + } + + if (total_err < pResults->m_best_overall_err) + { + pResults->m_best_overall_err = total_err; + + pResults->m_low_endpoint = *pLow; + pResults->m_high_endpoint = *pHigh; + + pResults->m_pbits[0] = pbits[0]; + pResults->m_pbits[1] = pbits[1]; + + memcpy(pResults->m_pSelectors, pResults->m_pSelectors_temp, sizeof(pResults->m_pSelectors[0]) * pParams->m_num_pixels); + } + + return total_err; +} + +static bool areDegenerateEndpoints(color_quad_u8* pTrialMinColor, color_quad_u8* pTrialMaxColor, const bc7enc_vec4F* pXl, const bc7enc_vec4F* pXh) +{ + for (uint32_t i = 0; i < 3; i++) + { + if (pTrialMinColor->m_c[i] == pTrialMaxColor->m_c[i]) + { + if (fabs(pXl->m_c[i] - pXh->m_c[i]) > 0.0f) + return true; + } + } + + return false; +} + +static void fixDegenerateEndpoints(uint32_t mode, color_quad_u8 *pTrialMinColor, color_quad_u8 *pTrialMaxColor, const bc7enc_vec4F*pXl, const bc7enc_vec4F*pXh, uint32_t iscale, int flags) +{ + if (mode == 255) + { + for (uint32_t i = 0; i < 3; i++) + { + if (pTrialMinColor->m_c[i] == pTrialMaxColor->m_c[i]) + { + if (fabs(pXl->m_c[i] - pXh->m_c[i]) > 0.000125f) + { + if (flags & 1) + { + if (pTrialMinColor->m_c[i] > 0) + pTrialMinColor->m_c[i]--; + } + if (flags & 2) + { + if (pTrialMaxColor->m_c[i] < iscale) + pTrialMaxColor->m_c[i]++; + } + } + } + } + } + else if (mode == 1) + { + // fix degenerate case where the input collapses to a single colorspace voxel, and we loose all freedom (test with grayscale ramps) + for (uint32_t i = 0; i < 3; i++) + { + if (pTrialMinColor->m_c[i] == pTrialMaxColor->m_c[i]) + { + if (fabs(pXl->m_c[i] - pXh->m_c[i]) > 0.000125f) + { + if (pTrialMinColor->m_c[i] > (iscale >> 1)) + { + if (pTrialMinColor->m_c[i] > 0) + pTrialMinColor->m_c[i]--; + else + if (pTrialMaxColor->m_c[i] < iscale) + pTrialMaxColor->m_c[i]++; + } + else + { + if (pTrialMaxColor->m_c[i] < iscale) + pTrialMaxColor->m_c[i]++; + else if (pTrialMinColor->m_c[i] > 0) + pTrialMinColor->m_c[i]--; + } + } + } + } + } +} + +static uint64_t find_optimal_solution(uint32_t mode, bc7enc_vec4F xl, bc7enc_vec4F xh, const color_cell_compressor_params *pParams, color_cell_compressor_results *pResults) +{ + vec4F_saturate_in_place(&xl); vec4F_saturate_in_place(&xh); + + if (pParams->m_astc_endpoint_range) + { + const uint32_t levels = astc_get_levels(pParams->m_astc_endpoint_range); + + const float scale = 255.0f; + + color_quad_u8 trialMinColor8Bit, trialMaxColor8Bit; + color_quad_u8_set_clamped(&trialMinColor8Bit, (int)(xl.m_c[0] * scale + .5f), (int)(xl.m_c[1] * scale + .5f), (int)(xl.m_c[2] * scale + .5f), (int)(xl.m_c[3] * scale + .5f)); + color_quad_u8_set_clamped(&trialMaxColor8Bit, (int)(xh.m_c[0] * scale + .5f), (int)(xh.m_c[1] * scale + .5f), (int)(xh.m_c[2] * scale + .5f), (int)(xh.m_c[3] * scale + .5f)); + + color_quad_u8 trialMinColor, trialMaxColor; + for (uint32_t i = 0; i < 4; i++) + { + trialMinColor.m_c[i] = g_astc_nearest_sorted_index[pParams->m_astc_endpoint_range][trialMinColor8Bit.m_c[i]]; + trialMaxColor.m_c[i] = g_astc_nearest_sorted_index[pParams->m_astc_endpoint_range][trialMaxColor8Bit.m_c[i]]; + } + + if (areDegenerateEndpoints(&trialMinColor, &trialMaxColor, &xl, &xh)) + { + color_quad_u8 trialMinColorOrig(trialMinColor), trialMaxColorOrig(trialMaxColor); + + fixDegenerateEndpoints(mode, &trialMinColor, &trialMaxColor, &xl, &xh, levels - 1, 1); + if ((pResults->m_best_overall_err == UINT64_MAX) || color_quad_u8_notequals(&trialMinColor, &pResults->m_low_endpoint) || color_quad_u8_notequals(&trialMaxColor, &pResults->m_high_endpoint)) + evaluate_solution(&trialMinColor, &trialMaxColor, pResults->m_pbits, pParams, pResults); + + trialMinColor = trialMinColorOrig; + trialMaxColor = trialMaxColorOrig; + fixDegenerateEndpoints(mode, &trialMinColor, &trialMaxColor, &xl, &xh, levels - 1, 0); + if ((pResults->m_best_overall_err == UINT64_MAX) || color_quad_u8_notequals(&trialMinColor, &pResults->m_low_endpoint) || color_quad_u8_notequals(&trialMaxColor, &pResults->m_high_endpoint)) + evaluate_solution(&trialMinColor, &trialMaxColor, pResults->m_pbits, pParams, pResults); + + trialMinColor = trialMinColorOrig; + trialMaxColor = trialMaxColorOrig; + fixDegenerateEndpoints(mode, &trialMinColor, &trialMaxColor, &xl, &xh, levels - 1, 2); + if ((pResults->m_best_overall_err == UINT64_MAX) || color_quad_u8_notequals(&trialMinColor, &pResults->m_low_endpoint) || color_quad_u8_notequals(&trialMaxColor, &pResults->m_high_endpoint)) + evaluate_solution(&trialMinColor, &trialMaxColor, pResults->m_pbits, pParams, pResults); + + trialMinColor = trialMinColorOrig; + trialMaxColor = trialMaxColorOrig; + fixDegenerateEndpoints(mode, &trialMinColor, &trialMaxColor, &xl, &xh, levels - 1, 3); + if ((pResults->m_best_overall_err == UINT64_MAX) || color_quad_u8_notequals(&trialMinColor, &pResults->m_low_endpoint) || color_quad_u8_notequals(&trialMaxColor, &pResults->m_high_endpoint)) + evaluate_solution(&trialMinColor, &trialMaxColor, pResults->m_pbits, pParams, pResults); + } + else + { + if ((pResults->m_best_overall_err == UINT64_MAX) || color_quad_u8_notequals(&trialMinColor, &pResults->m_low_endpoint) || color_quad_u8_notequals(&trialMaxColor, &pResults->m_high_endpoint)) + { + evaluate_solution(&trialMinColor, &trialMaxColor, pResults->m_pbits, pParams, pResults); + } + } + + for (uint32_t i = 0; i < 4; i++) + { + pResults->m_astc_low_endpoint.m_c[i] = g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_low_endpoint.m_c[i]].m_index; + pResults->m_astc_high_endpoint.m_c[i] = g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_high_endpoint.m_c[i]].m_index; + } + } + else if (pParams->m_has_pbits) + { + const int iscalep = (1 << (pParams->m_comp_bits + 1)) - 1; + const float scalep = (float)iscalep; + + const int32_t totalComps = pParams->m_has_alpha ? 4 : 3; + + uint32_t best_pbits[2]; + color_quad_u8 bestMinColor, bestMaxColor; + + if (!pParams->m_endpoints_share_pbit) + { + float best_err0 = 1e+9; + float best_err1 = 1e+9; + + for (int p = 0; p < 2; p++) + { + color_quad_u8 xMinColor, xMaxColor; + + // Notes: The pbit controls which quantization intervals are selected. + // total_levels=2^(comp_bits+1), where comp_bits=4 for mode 0, etc. + // pbit 0: v=(b*2)/(total_levels-1), pbit 1: v=(b*2+1)/(total_levels-1) where b is the component bin from [0,total_levels/2-1] and v is the [0,1] component value + // rearranging you get for pbit 0: b=floor(v*(total_levels-1)/2+.5) + // rearranging you get for pbit 1: b=floor((v*(total_levels-1)-1)/2+.5) + for (uint32_t c = 0; c < 4; c++) + { + xMinColor.m_c[c] = (uint8_t)(clampi(((int)((xl.m_c[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p)); + xMaxColor.m_c[c] = (uint8_t)(clampi(((int)((xh.m_c[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p)); + } + + color_quad_u8 scaledLow = scale_color(&xMinColor, pParams); + color_quad_u8 scaledHigh = scale_color(&xMaxColor, pParams); + + float err0 = 0, err1 = 0; + for (int i = 0; i < totalComps; i++) + { + err0 += squaref(scaledLow.m_c[i] - xl.m_c[i] * 255.0f); + err1 += squaref(scaledHigh.m_c[i] - xh.m_c[i] * 255.0f); + } + + if (err0 < best_err0) + { + best_err0 = err0; + best_pbits[0] = p; + + bestMinColor.m_c[0] = xMinColor.m_c[0] >> 1; + bestMinColor.m_c[1] = xMinColor.m_c[1] >> 1; + bestMinColor.m_c[2] = xMinColor.m_c[2] >> 1; + bestMinColor.m_c[3] = xMinColor.m_c[3] >> 1; + } + + if (err1 < best_err1) + { + best_err1 = err1; + best_pbits[1] = p; + + bestMaxColor.m_c[0] = xMaxColor.m_c[0] >> 1; + bestMaxColor.m_c[1] = xMaxColor.m_c[1] >> 1; + bestMaxColor.m_c[2] = xMaxColor.m_c[2] >> 1; + bestMaxColor.m_c[3] = xMaxColor.m_c[3] >> 1; + } + } + } + else + { + // Endpoints share pbits + float best_err = 1e+9; + + for (int p = 0; p < 2; p++) + { + color_quad_u8 xMinColor, xMaxColor; + for (uint32_t c = 0; c < 4; c++) + { + xMinColor.m_c[c] = (uint8_t)(clampi(((int)((xl.m_c[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p)); + xMaxColor.m_c[c] = (uint8_t)(clampi(((int)((xh.m_c[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p)); + } + + color_quad_u8 scaledLow = scale_color(&xMinColor, pParams); + color_quad_u8 scaledHigh = scale_color(&xMaxColor, pParams); + + float err = 0; + for (int i = 0; i < totalComps; i++) + err += squaref((scaledLow.m_c[i] / 255.0f) - xl.m_c[i]) + squaref((scaledHigh.m_c[i] / 255.0f) - xh.m_c[i]); + + if (err < best_err) + { + best_err = err; + best_pbits[0] = p; + best_pbits[1] = p; + for (uint32_t j = 0; j < 4; j++) + { + bestMinColor.m_c[j] = xMinColor.m_c[j] >> 1; + bestMaxColor.m_c[j] = xMaxColor.m_c[j] >> 1; + } + } + } + } + + fixDegenerateEndpoints(mode, &bestMinColor, &bestMaxColor, &xl, &xh, iscalep >> 1, 0); + + if ((pResults->m_best_overall_err == UINT64_MAX) || color_quad_u8_notequals(&bestMinColor, &pResults->m_low_endpoint) || color_quad_u8_notequals(&bestMaxColor, &pResults->m_high_endpoint) || (best_pbits[0] != pResults->m_pbits[0]) || (best_pbits[1] != pResults->m_pbits[1])) + evaluate_solution(&bestMinColor, &bestMaxColor, best_pbits, pParams, pResults); + } + else + { + const int iscale = (1 << pParams->m_comp_bits) - 1; + const float scale = (float)iscale; + + color_quad_u8 trialMinColor, trialMaxColor; + color_quad_u8_set_clamped(&trialMinColor, (int)(xl.m_c[0] * scale + .5f), (int)(xl.m_c[1] * scale + .5f), (int)(xl.m_c[2] * scale + .5f), (int)(xl.m_c[3] * scale + .5f)); + color_quad_u8_set_clamped(&trialMaxColor, (int)(xh.m_c[0] * scale + .5f), (int)(xh.m_c[1] * scale + .5f), (int)(xh.m_c[2] * scale + .5f), (int)(xh.m_c[3] * scale + .5f)); + + fixDegenerateEndpoints(mode, &trialMinColor, &trialMaxColor, &xl, &xh, iscale, 0); + + if ((pResults->m_best_overall_err == UINT64_MAX) || color_quad_u8_notequals(&trialMinColor, &pResults->m_low_endpoint) || color_quad_u8_notequals(&trialMaxColor, &pResults->m_high_endpoint)) + evaluate_solution(&trialMinColor, &trialMaxColor, pResults->m_pbits, pParams, pResults); + } + + return pResults->m_best_overall_err; +} + +void check_best_overall_error(const color_cell_compressor_params *pParams, color_cell_compressor_results *pResults) +{ + const uint32_t n = pParams->m_num_selector_weights; + + assert(n <= 32); + + color_quad_u8 colors[32]; + for (uint32_t c = 0; c < 4; c++) + { + colors[0].m_c[c] = g_astc_unquant[pParams->m_astc_endpoint_range][pResults->m_astc_low_endpoint.m_c[c]].m_unquant; + assert(colors[0].m_c[c] == g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_low_endpoint.m_c[c]].m_unquant); + + colors[n-1].m_c[c] = g_astc_unquant[pParams->m_astc_endpoint_range][pResults->m_astc_high_endpoint.m_c[c]].m_unquant; + assert(colors[n-1].m_c[c] == g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_high_endpoint.m_c[c]].m_unquant); + } + + for (uint32_t i = 1; i < pParams->m_num_selector_weights - 1; i++) + for (uint32_t c = 0; c < 4; c++) + colors[i].m_c[c] = (uint8_t)astc_interpolate_linear(colors[0].m_c[c], colors[n - 1].m_c[c], pParams->m_pSelector_weights[i]); + +#ifdef _DEBUG + uint64_t total_err = 0; + for (uint32_t p = 0; p < pParams->m_num_pixels; p++) + { + const color_quad_u8 &orig = pParams->m_pPixels[p]; + const color_quad_u8 &packed = colors[pResults->m_pSelectors[p]]; + + if (pParams->m_has_alpha) + total_err += compute_color_distance_rgba(&orig, &packed, pParams->m_perceptual, pParams->m_weights); + else + total_err += compute_color_distance_rgb(&orig, &packed, pParams->m_perceptual, pParams->m_weights); + } + assert(total_err == pResults->m_best_overall_err); +#endif + + // HACK HACK + //if (total_err != pResults->m_best_overall_err) + // printf("X"); +} + +static bool is_solid_rgb(const color_cell_compressor_params *pParams, uint32_t &r, uint32_t &g, uint32_t &b) +{ + r = pParams->m_pPixels[0].m_c[0]; + g = pParams->m_pPixels[0].m_c[1]; + b = pParams->m_pPixels[0].m_c[2]; + + bool allSame = true; + for (uint32_t i = 1; i < pParams->m_num_pixels; i++) + { + if ((r != pParams->m_pPixels[i].m_c[0]) || (g != pParams->m_pPixels[i].m_c[1]) || (b != pParams->m_pPixels[i].m_c[2])) + { + allSame = false; + break; + } + } + + return allSame; +} + +static bool is_solid_rgba(const color_cell_compressor_params *pParams, uint32_t &r, uint32_t &g, uint32_t &b, uint32_t &a) +{ + r = pParams->m_pPixels[0].m_c[0]; + g = pParams->m_pPixels[0].m_c[1]; + b = pParams->m_pPixels[0].m_c[2]; + a = pParams->m_pPixels[0].m_c[3]; + + bool allSame = true; + for (uint32_t i = 1; i < pParams->m_num_pixels; i++) + { + if ((r != pParams->m_pPixels[i].m_c[0]) || (g != pParams->m_pPixels[i].m_c[1]) || (b != pParams->m_pPixels[i].m_c[2]) || (a != pParams->m_pPixels[i].m_c[3])) + { + allSame = false; + break; + } + } + + return allSame; +} + +uint64_t color_cell_compression(uint32_t mode, const color_cell_compressor_params *pParams, color_cell_compressor_results *pResults, const bc7enc_compress_block_params *pComp_params) +{ + if (!pParams->m_astc_endpoint_range) + { + assert((mode == 6) || (!pParams->m_has_alpha)); + } + assert(pParams->m_num_selector_weights >= 1 && pParams->m_num_selector_weights <= 32); + assert(pParams->m_pSelector_weights[0] == 0); + assert(pParams->m_pSelector_weights[pParams->m_num_selector_weights - 1] == 64); + + pResults->m_best_overall_err = UINT64_MAX; + + uint32_t cr, cg, cb, ca; + + // If the partition's colors are all the same, then just pack them as a single color. + if (!pParams->m_pForce_selectors) + { + if (mode == 1) + { + if (is_solid_rgb(pParams, cr, cg, cb)) + return pack_mode1_to_one_color(pParams, pResults, cr, cg, cb, pResults->m_pSelectors); + } + else if ((pParams->m_astc_endpoint_range == 8) && (pParams->m_num_selector_weights == 8) && (!pParams->m_has_alpha)) + { + if (is_solid_rgb(pParams, cr, cg, cb)) + return pack_astc_4bit_3bit_to_one_color(pParams, pResults, cr, cg, cb, pResults->m_pSelectors); + } + else if ((pParams->m_astc_endpoint_range == 7) && (pParams->m_num_selector_weights == 4) && (!pParams->m_has_alpha)) + { + if (is_solid_rgb(pParams, cr, cg, cb)) + return pack_astc_range7_2bit_to_one_color(pParams, pResults, cr, cg, cb, pResults->m_pSelectors); + } + else if ((pParams->m_astc_endpoint_range == 8) && (pParams->m_num_selector_weights == 4) && (pParams->m_has_alpha)) + { + if (is_solid_rgba(pParams, cr, cg, cb, ca)) + return pack_astc_4bit_2bit_to_one_color_rgba(pParams, pResults, cr, cg, cb, ca, pResults->m_pSelectors); + } + else if ((pParams->m_astc_endpoint_range == 13) && (pParams->m_num_selector_weights == 4) && (!pParams->m_has_alpha)) + { + if (is_solid_rgb(pParams, cr, cg, cb)) + return pack_astc_range13_2bit_to_one_color(pParams, pResults, cr, cg, cb, pResults->m_pSelectors); + } + else if ((pParams->m_astc_endpoint_range == 11) && (pParams->m_num_selector_weights == 32) && (!pParams->m_has_alpha)) + { + if (is_solid_rgb(pParams, cr, cg, cb)) + return pack_astc_range11_5bit_to_one_color(pParams, pResults, cr, cg, cb, pResults->m_pSelectors); + } + } + + // Compute partition's mean color and principle axis. + bc7enc_vec4F meanColor, axis; + vec4F_set_scalar(&meanColor, 0.0f); + + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + { + bc7enc_vec4F color = vec4F_from_color(&pParams->m_pPixels[i]); + meanColor = vec4F_add(&meanColor, &color); + } + + bc7enc_vec4F meanColorScaled = vec4F_mul(&meanColor, 1.0f / (float)(pParams->m_num_pixels)); + + meanColor = vec4F_mul(&meanColor, 1.0f / (float)(pParams->m_num_pixels * 255.0f)); + vec4F_saturate_in_place(&meanColor); + + if (pParams->m_has_alpha) + { + // Use incremental PCA for RGBA PCA, because it's simple. + vec4F_set_scalar(&axis, 0.0f); + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + { + bc7enc_vec4F color = vec4F_from_color(&pParams->m_pPixels[i]); + color = vec4F_sub(&color, &meanColorScaled); + bc7enc_vec4F a = vec4F_mul(&color, color.m_c[0]); + bc7enc_vec4F b = vec4F_mul(&color, color.m_c[1]); + bc7enc_vec4F c = vec4F_mul(&color, color.m_c[2]); + bc7enc_vec4F d = vec4F_mul(&color, color.m_c[3]); + bc7enc_vec4F n = i ? axis : color; + vec4F_normalize_in_place(&n); + axis.m_c[0] += vec4F_dot(&a, &n); + axis.m_c[1] += vec4F_dot(&b, &n); + axis.m_c[2] += vec4F_dot(&c, &n); + axis.m_c[3] += vec4F_dot(&d, &n); + } + vec4F_normalize_in_place(&axis); + } + else + { + // Use covar technique for RGB PCA, because it doesn't require per-pixel normalization. + float cov[6] = { 0, 0, 0, 0, 0, 0 }; + + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + { + const color_quad_u8 *pV = &pParams->m_pPixels[i]; + float r = pV->m_c[0] - meanColorScaled.m_c[0]; + float g = pV->m_c[1] - meanColorScaled.m_c[1]; + float b = pV->m_c[2] - meanColorScaled.m_c[2]; + cov[0] += r*r; cov[1] += r*g; cov[2] += r*b; cov[3] += g*g; cov[4] += g*b; cov[5] += b*b; + } + + float xr = .9f, xg = 1.0f, xb = .7f; + for (uint32_t iter = 0; iter < 3; iter++) + { + float r = xr * cov[0] + xg * cov[1] + xb * cov[2]; + float g = xr * cov[1] + xg * cov[3] + xb * cov[4]; + float b = xr * cov[2] + xg * cov[4] + xb * cov[5]; + + float m = maximumf(maximumf(fabsf(r), fabsf(g)), fabsf(b)); + if (m > 1e-10f) + { + m = 1.0f / m; + r *= m; g *= m; b *= m; + } + + xr = r; xg = g; xb = b; + } + + float len = xr * xr + xg * xg + xb * xb; + if (len < 1e-10f) + vec4F_set_scalar(&axis, 0.0f); + else + { + len = 1.0f / sqrtf(len); + xr *= len; xg *= len; xb *= len; + vec4F_set(&axis, xr, xg, xb, 0); + } + } + + if (vec4F_dot(&axis, &axis) < .5f) + { + if (pParams->m_perceptual) + vec4F_set(&axis, .213f, .715f, .072f, pParams->m_has_alpha ? .715f : 0); + else + vec4F_set(&axis, 1.0f, 1.0f, 1.0f, pParams->m_has_alpha ? 1.0f : 0); + vec4F_normalize_in_place(&axis); + } + + bc7enc_vec4F minColor, maxColor; + + float l = 1e+9f, h = -1e+9f; + + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + { + bc7enc_vec4F color = vec4F_from_color(&pParams->m_pPixels[i]); + + bc7enc_vec4F q = vec4F_sub(&color, &meanColorScaled); + float d = vec4F_dot(&q, &axis); + + l = minimumf(l, d); + h = maximumf(h, d); + } + + l *= (1.0f / 255.0f); + h *= (1.0f / 255.0f); + + bc7enc_vec4F b0 = vec4F_mul(&axis, l); + bc7enc_vec4F b1 = vec4F_mul(&axis, h); + bc7enc_vec4F c0 = vec4F_add(&meanColor, &b0); + bc7enc_vec4F c1 = vec4F_add(&meanColor, &b1); + minColor = vec4F_saturate(&c0); + maxColor = vec4F_saturate(&c1); + + bc7enc_vec4F whiteVec; + vec4F_set_scalar(&whiteVec, 1.0f); + if (vec4F_dot(&minColor, &whiteVec) > vec4F_dot(&maxColor, &whiteVec)) + { +#if 1 + std::swap(minColor.m_c[0], maxColor.m_c[0]); + std::swap(minColor.m_c[1], maxColor.m_c[1]); + std::swap(minColor.m_c[2], maxColor.m_c[2]); + std::swap(minColor.m_c[3], maxColor.m_c[3]); +#elif 0 + // Fails to compile correctly with MSVC 2019 (code generation bug) + std::swap(minColor, maxColor); +#else + // Fails with MSVC 2019 + bc7enc_vec4F temp = minColor; + minColor = maxColor; + maxColor = temp; +#endif + } + + // First find a solution using the block's PCA. + if (!find_optimal_solution(mode, minColor, maxColor, pParams, pResults)) + return 0; + + for (uint32_t i = 0; i < pComp_params->m_least_squares_passes; i++) + { + // Now try to refine the solution using least squares by computing the optimal endpoints from the current selectors. + bc7enc_vec4F xl, xh; + vec4F_set_scalar(&xl, 0.0f); + vec4F_set_scalar(&xh, 0.0f); + if (pParams->m_has_alpha) + compute_least_squares_endpoints_rgba(pParams->m_num_pixels, pResults->m_pSelectors, pParams->m_pSelector_weightsx, &xl, &xh, pParams->m_pPixels); + else + compute_least_squares_endpoints_rgb(pParams->m_num_pixels, pResults->m_pSelectors, pParams->m_pSelector_weightsx, &xl, &xh, pParams->m_pPixels); + + xl = vec4F_mul(&xl, (1.0f / 255.0f)); + xh = vec4F_mul(&xh, (1.0f / 255.0f)); + + if (!find_optimal_solution(mode, xl, xh, pParams, pResults)) + return 0; + } + + if ((!pParams->m_pForce_selectors) && (pComp_params->m_uber_level > 0)) + { + // In uber level 1, try varying the selectors a little, somewhat like cluster fit would. First try incrementing the minimum selectors, + // then try decrementing the selectrors, then try both. + uint8_t selectors_temp[16], selectors_temp1[16]; + memcpy(selectors_temp, pResults->m_pSelectors, pParams->m_num_pixels); + + const int max_selector = pParams->m_num_selector_weights - 1; + + uint32_t min_sel = 256; + uint32_t max_sel = 0; + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + { + uint32_t sel = selectors_temp[i]; + min_sel = minimumu(min_sel, sel); + max_sel = maximumu(max_sel, sel); + } + + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + { + uint32_t sel = selectors_temp[i]; + if ((sel == min_sel) && (sel < (pParams->m_num_selector_weights - 1))) + sel++; + selectors_temp1[i] = (uint8_t)sel; + } + + bc7enc_vec4F xl, xh; + vec4F_set_scalar(&xl, 0.0f); + vec4F_set_scalar(&xh, 0.0f); + if (pParams->m_has_alpha) + compute_least_squares_endpoints_rgba(pParams->m_num_pixels, selectors_temp1, pParams->m_pSelector_weightsx, &xl, &xh, pParams->m_pPixels); + else + compute_least_squares_endpoints_rgb(pParams->m_num_pixels, selectors_temp1, pParams->m_pSelector_weightsx, &xl, &xh, pParams->m_pPixels); + + xl = vec4F_mul(&xl, (1.0f / 255.0f)); + xh = vec4F_mul(&xh, (1.0f / 255.0f)); + + if (!find_optimal_solution(mode, xl, xh, pParams, pResults)) + return 0; + + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + { + uint32_t sel = selectors_temp[i]; + if ((sel == max_sel) && (sel > 0)) + sel--; + selectors_temp1[i] = (uint8_t)sel; + } + + if (pParams->m_has_alpha) + compute_least_squares_endpoints_rgba(pParams->m_num_pixels, selectors_temp1, pParams->m_pSelector_weightsx, &xl, &xh, pParams->m_pPixels); + else + compute_least_squares_endpoints_rgb(pParams->m_num_pixels, selectors_temp1, pParams->m_pSelector_weightsx, &xl, &xh, pParams->m_pPixels); + + xl = vec4F_mul(&xl, (1.0f / 255.0f)); + xh = vec4F_mul(&xh, (1.0f / 255.0f)); + + if (!find_optimal_solution(mode, xl, xh, pParams, pResults)) + return 0; + + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + { + uint32_t sel = selectors_temp[i]; + if ((sel == min_sel) && (sel < (pParams->m_num_selector_weights - 1))) + sel++; + else if ((sel == max_sel) && (sel > 0)) + sel--; + selectors_temp1[i] = (uint8_t)sel; + } + + if (pParams->m_has_alpha) + compute_least_squares_endpoints_rgba(pParams->m_num_pixels, selectors_temp1, pParams->m_pSelector_weightsx, &xl, &xh, pParams->m_pPixels); + else + compute_least_squares_endpoints_rgb(pParams->m_num_pixels, selectors_temp1, pParams->m_pSelector_weightsx, &xl, &xh, pParams->m_pPixels); + + xl = vec4F_mul(&xl, (1.0f / 255.0f)); + xh = vec4F_mul(&xh, (1.0f / 255.0f)); + + if (!find_optimal_solution(mode, xl, xh, pParams, pResults)) + return 0; + + // In uber levels 2+, try taking more advantage of endpoint extrapolation by scaling the selectors in one direction or another. + const uint32_t uber_err_thresh = (pParams->m_num_pixels * 56) >> 4; + if ((pComp_params->m_uber_level >= 2) && (pResults->m_best_overall_err > uber_err_thresh)) + { + const int Q = (pComp_params->m_uber_level >= 4) ? (pComp_params->m_uber_level - 2) : 1; + for (int ly = -Q; ly <= 1; ly++) + { + for (int hy = max_selector - 1; hy <= (max_selector + Q); hy++) + { + if ((ly == 0) && (hy == max_selector)) + continue; + + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + selectors_temp1[i] = (uint8_t)clampf(floorf((float)max_selector * ((float)selectors_temp[i] - (float)ly) / ((float)hy - (float)ly) + .5f), 0, (float)max_selector); + + //bc7enc_vec4F xl, xh; + vec4F_set_scalar(&xl, 0.0f); + vec4F_set_scalar(&xh, 0.0f); + if (pParams->m_has_alpha) + compute_least_squares_endpoints_rgba(pParams->m_num_pixels, selectors_temp1, pParams->m_pSelector_weightsx, &xl, &xh, pParams->m_pPixels); + else + compute_least_squares_endpoints_rgb(pParams->m_num_pixels, selectors_temp1, pParams->m_pSelector_weightsx, &xl, &xh, pParams->m_pPixels); + + xl = vec4F_mul(&xl, (1.0f / 255.0f)); + xh = vec4F_mul(&xh, (1.0f / 255.0f)); + + if (!find_optimal_solution(mode, xl, xh, pParams, pResults)) + return 0; + } + } + } + } + + if (!pParams->m_pForce_selectors) + { + // Try encoding the partition as a single color by using the optimal single colors tables to encode the block to its mean. + if (mode == 1) + { + color_cell_compressor_results avg_results = *pResults; + const uint32_t r = (int)(.5f + meanColor.m_c[0] * 255.0f), g = (int)(.5f + meanColor.m_c[1] * 255.0f), b = (int)(.5f + meanColor.m_c[2] * 255.0f); + uint64_t avg_err = pack_mode1_to_one_color(pParams, &avg_results, r, g, b, pResults->m_pSelectors_temp); + if (avg_err < pResults->m_best_overall_err) + { + *pResults = avg_results; + memcpy(pResults->m_pSelectors, pResults->m_pSelectors_temp, sizeof(pResults->m_pSelectors[0]) * pParams->m_num_pixels); + pResults->m_best_overall_err = avg_err; + } + } + else if ((pParams->m_astc_endpoint_range == 8) && (pParams->m_num_selector_weights == 8) && (!pParams->m_has_alpha)) + { + color_cell_compressor_results avg_results = *pResults; + const uint32_t r = (int)(.5f + meanColor.m_c[0] * 255.0f), g = (int)(.5f + meanColor.m_c[1] * 255.0f), b = (int)(.5f + meanColor.m_c[2] * 255.0f); + uint64_t avg_err = pack_astc_4bit_3bit_to_one_color(pParams, &avg_results, r, g, b, pResults->m_pSelectors_temp); + if (avg_err < pResults->m_best_overall_err) + { + *pResults = avg_results; + memcpy(pResults->m_pSelectors, pResults->m_pSelectors_temp, sizeof(pResults->m_pSelectors[0]) * pParams->m_num_pixels); + pResults->m_best_overall_err = avg_err; + } + } + else if ((pParams->m_astc_endpoint_range == 7) && (pParams->m_num_selector_weights == 4) && (!pParams->m_has_alpha)) + { + color_cell_compressor_results avg_results = *pResults; + const uint32_t r = (int)(.5f + meanColor.m_c[0] * 255.0f), g = (int)(.5f + meanColor.m_c[1] * 255.0f), b = (int)(.5f + meanColor.m_c[2] * 255.0f); + uint64_t avg_err = pack_astc_range7_2bit_to_one_color(pParams, &avg_results, r, g, b, pResults->m_pSelectors_temp); + if (avg_err < pResults->m_best_overall_err) + { + *pResults = avg_results; + memcpy(pResults->m_pSelectors, pResults->m_pSelectors_temp, sizeof(pResults->m_pSelectors[0]) * pParams->m_num_pixels); + pResults->m_best_overall_err = avg_err; + } + } + else if ((pParams->m_astc_endpoint_range == 8) && (pParams->m_num_selector_weights == 4) && (pParams->m_has_alpha)) + { + color_cell_compressor_results avg_results = *pResults; + const uint32_t r = (int)(.5f + meanColor.m_c[0] * 255.0f), g = (int)(.5f + meanColor.m_c[1] * 255.0f), b = (int)(.5f + meanColor.m_c[2] * 255.0f), a = (int)(.5f + meanColor.m_c[3] * 255.0f); + uint64_t avg_err = pack_astc_4bit_2bit_to_one_color_rgba(pParams, &avg_results, r, g, b, a, pResults->m_pSelectors_temp); + if (avg_err < pResults->m_best_overall_err) + { + *pResults = avg_results; + memcpy(pResults->m_pSelectors, pResults->m_pSelectors_temp, sizeof(pResults->m_pSelectors[0]) * pParams->m_num_pixels); + pResults->m_best_overall_err = avg_err; + } + } + else if ((pParams->m_astc_endpoint_range == 13) && (pParams->m_num_selector_weights == 4) && (!pParams->m_has_alpha)) + { + color_cell_compressor_results avg_results = *pResults; + const uint32_t r = (int)(.5f + meanColor.m_c[0] * 255.0f), g = (int)(.5f + meanColor.m_c[1] * 255.0f), b = (int)(.5f + meanColor.m_c[2] * 255.0f); + uint64_t avg_err = pack_astc_range13_2bit_to_one_color(pParams, &avg_results, r, g, b, pResults->m_pSelectors_temp); + if (avg_err < pResults->m_best_overall_err) + { + *pResults = avg_results; + memcpy(pResults->m_pSelectors, pResults->m_pSelectors_temp, sizeof(pResults->m_pSelectors[0]) * pParams->m_num_pixels); + pResults->m_best_overall_err = avg_err; + } + } + else if ((pParams->m_astc_endpoint_range == 11) && (pParams->m_num_selector_weights == 32) && (!pParams->m_has_alpha)) + { + color_cell_compressor_results avg_results = *pResults; + const uint32_t r = (int)(.5f + meanColor.m_c[0] * 255.0f), g = (int)(.5f + meanColor.m_c[1] * 255.0f), b = (int)(.5f + meanColor.m_c[2] * 255.0f); + uint64_t avg_err = pack_astc_range11_5bit_to_one_color(pParams, &avg_results, r, g, b, pResults->m_pSelectors_temp); + if (avg_err < pResults->m_best_overall_err) + { + *pResults = avg_results; + memcpy(pResults->m_pSelectors, pResults->m_pSelectors_temp, sizeof(pResults->m_pSelectors[0]) * pParams->m_num_pixels); + pResults->m_best_overall_err = avg_err; + } + } + } + +#if BC7ENC_CHECK_OVERALL_ERROR + check_best_overall_error(pParams, pResults); +#endif + + return pResults->m_best_overall_err; +} + +uint64_t color_cell_compression_est_astc( + uint32_t num_weights, uint32_t num_comps, const uint32_t *pWeight_table, + uint32_t num_pixels, const color_quad_u8* pPixels, + uint64_t best_err_so_far, const uint32_t weights[4]) +{ + assert(num_comps == 3 || num_comps == 4); + assert(num_weights >= 1 && num_weights <= 32); + assert(pWeight_table[0] == 0 && pWeight_table[num_weights - 1] == 64); + + // Find RGB bounds as an approximation of the block's principle axis + uint32_t lr = 255, lg = 255, lb = 255, la = 255; + uint32_t hr = 0, hg = 0, hb = 0, ha = 0; + if (num_comps == 4) + { + for (uint32_t i = 0; i < num_pixels; i++) + { + const color_quad_u8* pC = &pPixels[i]; + if (pC->m_c[0] < lr) lr = pC->m_c[0]; + if (pC->m_c[1] < lg) lg = pC->m_c[1]; + if (pC->m_c[2] < lb) lb = pC->m_c[2]; + if (pC->m_c[3] < la) la = pC->m_c[3]; + + if (pC->m_c[0] > hr) hr = pC->m_c[0]; + if (pC->m_c[1] > hg) hg = pC->m_c[1]; + if (pC->m_c[2] > hb) hb = pC->m_c[2]; + if (pC->m_c[3] > ha) ha = pC->m_c[3]; + } + } + else + { + for (uint32_t i = 0; i < num_pixels; i++) + { + const color_quad_u8* pC = &pPixels[i]; + if (pC->m_c[0] < lr) lr = pC->m_c[0]; + if (pC->m_c[1] < lg) lg = pC->m_c[1]; + if (pC->m_c[2] < lb) lb = pC->m_c[2]; + + if (pC->m_c[0] > hr) hr = pC->m_c[0]; + if (pC->m_c[1] > hg) hg = pC->m_c[1]; + if (pC->m_c[2] > hb) hb = pC->m_c[2]; + } + la = 255; + ha = 255; + } + + color_quad_u8 lowColor, highColor; + color_quad_u8_set(&lowColor, lr, lg, lb, la); + color_quad_u8_set(&highColor, hr, hg, hb, ha); + + // Place endpoints at bbox diagonals and compute interpolated colors + color_quad_u8 weightedColors[32]; + + weightedColors[0] = lowColor; + weightedColors[num_weights - 1] = highColor; + for (uint32_t i = 1; i < (num_weights - 1); i++) + { + weightedColors[i].m_c[0] = (uint8_t)astc_interpolate_linear(lowColor.m_c[0], highColor.m_c[0], pWeight_table[i]); + weightedColors[i].m_c[1] = (uint8_t)astc_interpolate_linear(lowColor.m_c[1], highColor.m_c[1], pWeight_table[i]); + weightedColors[i].m_c[2] = (uint8_t)astc_interpolate_linear(lowColor.m_c[2], highColor.m_c[2], pWeight_table[i]); + weightedColors[i].m_c[3] = (num_comps == 4) ? (uint8_t)astc_interpolate_linear(lowColor.m_c[3], highColor.m_c[3], pWeight_table[i]) : 255; + } + + // Compute dots and thresholds + const int ar = highColor.m_c[0] - lowColor.m_c[0]; + const int ag = highColor.m_c[1] - lowColor.m_c[1]; + const int ab = highColor.m_c[2] - lowColor.m_c[2]; + const int aa = highColor.m_c[3] - lowColor.m_c[3]; + + int dots[32]; + if (num_comps == 4) + { + for (uint32_t i = 0; i < num_weights; i++) + dots[i] = weightedColors[i].m_c[0] * ar + weightedColors[i].m_c[1] * ag + weightedColors[i].m_c[2] * ab + weightedColors[i].m_c[3] * aa; + } + else + { + assert(aa == 0); + for (uint32_t i = 0; i < num_weights; i++) + dots[i] = weightedColors[i].m_c[0] * ar + weightedColors[i].m_c[1] * ag + weightedColors[i].m_c[2] * ab; + } + + int thresh[32 - 1]; + for (uint32_t i = 0; i < (num_weights - 1); i++) + thresh[i] = (dots[i] + dots[i + 1] + 1) >> 1; + + uint64_t total_err = 0; + if ((weights[0] | weights[1] | weights[2] | weights[3]) == 1) + { + if (num_comps == 4) + { + for (uint32_t i = 0; i < num_pixels; i++) + { + const color_quad_u8* pC = &pPixels[i]; + + int d = ar * pC->m_c[0] + ag * pC->m_c[1] + ab * pC->m_c[2] + aa * pC->m_c[3]; + + // Find approximate selector + uint32_t s = 0; + for (int j = num_weights - 2; j >= 0; j--) + { + if (d >= thresh[j]) + { + s = j + 1; + break; + } + } + + // Compute error + const color_quad_u8* pE1 = &weightedColors[s]; + + int dr = (int)pE1->m_c[0] - (int)pC->m_c[0]; + int dg = (int)pE1->m_c[1] - (int)pC->m_c[1]; + int db = (int)pE1->m_c[2] - (int)pC->m_c[2]; + int da = (int)pE1->m_c[3] - (int)pC->m_c[3]; + + total_err += (dr * dr) + (dg * dg) + (db * db) + (da * da); + if (total_err > best_err_so_far) + break; + } + } + else + { + for (uint32_t i = 0; i < num_pixels; i++) + { + const color_quad_u8* pC = &pPixels[i]; + + int d = ar * pC->m_c[0] + ag * pC->m_c[1] + ab * pC->m_c[2]; + + // Find approximate selector + uint32_t s = 0; + for (int j = num_weights - 2; j >= 0; j--) + { + if (d >= thresh[j]) + { + s = j + 1; + break; + } + } + + // Compute error + const color_quad_u8* pE1 = &weightedColors[s]; + + int dr = (int)pE1->m_c[0] - (int)pC->m_c[0]; + int dg = (int)pE1->m_c[1] - (int)pC->m_c[1]; + int db = (int)pE1->m_c[2] - (int)pC->m_c[2]; + + total_err += (dr * dr) + (dg * dg) + (db * db); + if (total_err > best_err_so_far) + break; + } + } + } + else + { + if (num_comps == 4) + { + for (uint32_t i = 0; i < num_pixels; i++) + { + const color_quad_u8* pC = &pPixels[i]; + + int d = ar * pC->m_c[0] + ag * pC->m_c[1] + ab * pC->m_c[2] + aa * pC->m_c[3]; + + // Find approximate selector + uint32_t s = 0; + for (int j = num_weights - 2; j >= 0; j--) + { + if (d >= thresh[j]) + { + s = j + 1; + break; + } + } + + // Compute error + const color_quad_u8* pE1 = &weightedColors[s]; + + int dr = (int)pE1->m_c[0] - (int)pC->m_c[0]; + int dg = (int)pE1->m_c[1] - (int)pC->m_c[1]; + int db = (int)pE1->m_c[2] - (int)pC->m_c[2]; + int da = (int)pE1->m_c[3] - (int)pC->m_c[3]; + + total_err += weights[0] * (dr * dr) + weights[1] * (dg * dg) + weights[2] * (db * db) + weights[3] * (da * da); + if (total_err > best_err_so_far) + break; + } + } + else + { + for (uint32_t i = 0; i < num_pixels; i++) + { + const color_quad_u8* pC = &pPixels[i]; + + int d = ar * pC->m_c[0] + ag * pC->m_c[1] + ab * pC->m_c[2]; + + // Find approximate selector + uint32_t s = 0; + for (int j = num_weights - 2; j >= 0; j--) + { + if (d >= thresh[j]) + { + s = j + 1; + break; + } + } + + // Compute error + const color_quad_u8* pE1 = &weightedColors[s]; + + int dr = (int)pE1->m_c[0] - (int)pC->m_c[0]; + int dg = (int)pE1->m_c[1] - (int)pC->m_c[1]; + int db = (int)pE1->m_c[2] - (int)pC->m_c[2]; + + total_err += weights[0] * (dr * dr) + weights[1] * (dg * dg) + weights[2] * (db * db); + if (total_err > best_err_so_far) + break; + } + } + } + + return total_err; +} + +} // namespace basisu diff --git a/thirdparty/basisu/encoder/basisu_bc7enc.h b/thirdparty/basisu/encoder/basisu_bc7enc.h new file mode 100644 index 000000000..925d6b2e8 --- /dev/null +++ b/thirdparty/basisu/encoder/basisu_bc7enc.h @@ -0,0 +1,132 @@ +// File: basisu_bc7enc.h +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "basisu_enc.h" +#include "../transcoder/basisu_transcoder_uastc.h" + +namespace basisu +{ + +#define BC7ENC_MAX_PARTITIONS1 (64) +#define BC7ENC_MAX_UBER_LEVEL (4) + + typedef uint8_t bc7enc_bool; + +#define BC7ENC_TRUE (1) +#define BC7ENC_FALSE (0) + + typedef struct { float m_c[4]; } bc7enc_vec4F; + + extern const float g_bc7_weights1x[2 * 4]; + extern const float g_bc7_weights2x[4 * 4]; + extern const float g_bc7_weights3x[8 * 4]; + extern const float g_bc7_weights4x[16 * 4]; + extern const float g_astc_weights4x[16 * 4]; + extern const float g_astc_weights5x[32 * 4]; + extern const float g_astc_weights_3levelsx[3 * 4]; + + extern basist::astc_quant_bin g_astc_sorted_order_unquant[basist::BC7ENC_TOTAL_ASTC_RANGES][256]; // [sorted unquantized order] + + struct color_cell_compressor_params + { + uint32_t m_num_pixels; + const basist::color_quad_u8* m_pPixels; + + uint32_t m_num_selector_weights; + const uint32_t* m_pSelector_weights; + + const bc7enc_vec4F* m_pSelector_weightsx; + uint32_t m_comp_bits; + + const uint8_t *m_pForce_selectors; + + // Non-zero m_astc_endpoint_range enables ASTC mode. m_comp_bits and m_has_pbits are always false. We only support 2, 3, or 4 bit weight encodings. + uint32_t m_astc_endpoint_range; + + uint32_t m_weights[4]; + bc7enc_bool m_has_alpha; + bc7enc_bool m_has_pbits; + bc7enc_bool m_endpoints_share_pbit; + bc7enc_bool m_perceptual; + }; + + struct color_cell_compressor_results + { + uint64_t m_best_overall_err; + basist::color_quad_u8 m_low_endpoint; + basist::color_quad_u8 m_high_endpoint; + uint32_t m_pbits[2]; + uint8_t* m_pSelectors; + uint8_t* m_pSelectors_temp; + + // Encoded ASTC indices, if ASTC mode is enabled + basist::color_quad_u8 m_astc_low_endpoint; + basist::color_quad_u8 m_astc_high_endpoint; + }; + + struct bc7enc_compress_block_params + { + // m_max_partitions_mode1 may range from 0 (disables mode 1) to BC7ENC_MAX_PARTITIONS1. The higher this value, the slower the compressor, but the higher the quality. + uint32_t m_max_partitions_mode1; + + // Relative RGBA or YCbCrA weights. + uint32_t m_weights[4]; + + // m_uber_level may range from 0 to BC7ENC_MAX_UBER_LEVEL. The higher this value, the slower the compressor, but the higher the quality. + uint32_t m_uber_level; + + // If m_perceptual is true, colorspace error is computed in YCbCr space, otherwise RGB. + bc7enc_bool m_perceptual; + + uint32_t m_least_squares_passes; + }; + + uint64_t color_cell_compression(uint32_t mode, const color_cell_compressor_params* pParams, color_cell_compressor_results* pResults, const bc7enc_compress_block_params* pComp_params); + + uint64_t color_cell_compression_est_astc( + uint32_t num_weights, uint32_t num_comps, const uint32_t* pWeight_table, + uint32_t num_pixels, const basist::color_quad_u8* pPixels, + uint64_t best_err_so_far, const uint32_t weights[4]); + + inline void bc7enc_compress_block_params_init_linear_weights(bc7enc_compress_block_params* p) + { + p->m_perceptual = BC7ENC_FALSE; + p->m_weights[0] = 1; + p->m_weights[1] = 1; + p->m_weights[2] = 1; + p->m_weights[3] = 1; + } + + inline void bc7enc_compress_block_params_init_perceptual_weights(bc7enc_compress_block_params* p) + { + p->m_perceptual = BC7ENC_TRUE; + p->m_weights[0] = 128; + p->m_weights[1] = 64; + p->m_weights[2] = 16; + p->m_weights[3] = 32; + } + + inline void bc7enc_compress_block_params_init(bc7enc_compress_block_params* p) + { + p->m_max_partitions_mode1 = BC7ENC_MAX_PARTITIONS1; + p->m_least_squares_passes = 1; + p->m_uber_level = 0; + bc7enc_compress_block_params_init_perceptual_weights(p); + } + + // bc7enc_compress_block_init() MUST be called before calling bc7enc_compress_block() (or you'll get artifacts). + void bc7enc_compress_block_init(); + +} // namespace basisu diff --git a/thirdparty/basisu/encoder/basisu_comp.cpp b/thirdparty/basisu/encoder/basisu_comp.cpp new file mode 100644 index 000000000..95b187159 --- /dev/null +++ b/thirdparty/basisu/encoder/basisu_comp.cpp @@ -0,0 +1,4334 @@ +// basisu_comp.cpp +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "basisu_comp.h" +#include "basisu_enc.h" +#include +#include +#include + +//#define UASTC_HDR_DEBUG_SAVE_CATEGORIZED_BLOCKS + +// basisu_transcoder.cpp is where basisu_miniz lives now, we just need the declarations here. +#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES +#include "basisu_miniz.h" + +#include "basisu_opencl.h" + +#include "../transcoder/basisu_astc_hdr_core.h" + +#if !BASISD_SUPPORT_KTX2 +#error BASISD_SUPPORT_KTX2 must be enabled (set to 1). +#endif + +#if BASISD_SUPPORT_KTX2_ZSTD +#include "../zstd/zstd.h" +#endif + +// Set to 1 to disable the mipPadding alignment workaround (which only seems to be needed when no key-values are written at all) +#define BASISU_DISABLE_KTX2_ALIGNMENT_WORKAROUND (0) + +// Set to 1 to disable writing all KTX2 key values, triggering an early validator bug. +#define BASISU_DISABLE_KTX2_KEY_VALUES (0) + +using namespace buminiz; + +#define BASISU_USE_STB_IMAGE_RESIZE_FOR_MIPMAP_GEN 0 +#define DEBUG_CROP_TEXTURE_TO_64x64 (0) +#define DEBUG_RESIZE_TEXTURE (0) + +namespace basisu +{ + basis_compressor::basis_compressor() : + m_pOpenCL_context(nullptr), + m_fmt_mode(basist::basis_tex_format::cETC1S), + m_basis_file_size(0), + m_basis_bits_per_texel(0.0f), + m_total_blocks(0), + m_hdr_image_scale(1.0f), + m_ldr_to_hdr_upconversion_nit_multiplier(1.0f), + m_upconverted_any_ldr_images(false), + m_any_source_image_has_alpha(false), + m_opencl_failed(false) + { + debug_printf("basis_compressor::basis_compressor\n"); + + assert(g_library_initialized); + } + + basis_compressor::~basis_compressor() + { + if (m_pOpenCL_context) + { + opencl_destroy_context(m_pOpenCL_context); + m_pOpenCL_context = nullptr; + } + } + + void basis_compressor::check_for_hdr_inputs() + { + if ((!m_params.m_source_filenames.size()) && (!m_params.m_source_images.size())) + { + if (m_params.m_source_images_hdr.size()) + { + // Assume they want UASTC HDR if they've specified any HDR source images. + m_params.m_hdr = true; + } + } + + if (!m_params.m_hdr) + { + // See if any files are .EXR or .HDR, if so switch the compressor to UASTC HDR mode. + for (uint32_t i = 0; i < m_params.m_source_filenames.size(); i++) + { + std::string filename; + string_get_filename(m_params.m_source_filenames[i].c_str(), filename); + + std::string ext(string_get_extension(filename)); + string_tolower(ext); + + if ((ext == "exr") || (ext == "hdr")) + { + m_params.m_hdr = true; + break; + } + } + } + + if (m_params.m_hdr) + { + if (m_params.m_source_alpha_filenames.size()) + { + debug_printf("Warning: Alpha channel image filenames are not yet supported in UASTC HDR/ASTC HDR modes.\n"); + m_params.m_source_alpha_filenames.clear(); + } + } + + if (m_params.m_hdr) + m_params.m_uastc = true; + } + + bool basis_compressor::sanity_check_input_params() + { + // Check for no source filenames specified. + if ((m_params.m_read_source_images) && (!m_params.m_source_filenames.size())) + { + assert(0); + return false; + } + + // See if they've specified any source filenames, but didn't tell us to read them. + if ((!m_params.m_read_source_images) && (m_params.m_source_filenames.size())) + { + assert(0); + return false; + } + + // Sanity check the input image parameters. + if (m_params.m_read_source_images) + { + // Caller can't specify their own images if they want us to read source images from files. + if (m_params.m_source_images.size() || m_params.m_source_images_hdr.size()) + { + assert(0); + return false; + } + + if (m_params.m_source_mipmap_images.size() || m_params.m_source_mipmap_images_hdr.size()) + { + assert(0); + return false; + } + } + else + { + // They didn't tell us to read any source files, so check for no LDR/HDR source images. + if (!m_params.m_source_images.size() && !m_params.m_source_images_hdr.size()) + { + assert(0); + return false; + } + + // Now we know we've been supplied LDR and/or HDR source images, check for LDR vs. HDR conflicts. + + if (m_params.m_source_images.size()) + { + // They've supplied LDR images, so make sure they also haven't specified HDR input images. + if (m_params.m_source_images_hdr.size() || m_params.m_source_mipmap_images_hdr.size()) + { + assert(0); + return false; + } + } + else + { + // No LDR images, so make sure they haven't specified any LDR mipmaps. + if (m_params.m_source_mipmap_images.size()) + { + assert(0); + return false; + } + + // No LDR images, so ensure they've supplied some HDR images to process. + if (!m_params.m_source_images_hdr.size()) + { + assert(0); + return false; + } + } + } + + return true; + } + + bool basis_compressor::init(const basis_compressor_params ¶ms) + { + debug_printf("basis_compressor::init\n"); + + if (!g_library_initialized) + { + error_printf("basis_compressor::init: basisu_encoder_init() MUST be called before using any encoder functionality!\n"); + return false; + } + + if (!params.m_pJob_pool) + { + error_printf("basis_compressor::init: A non-null job_pool pointer must be specified\n"); + return false; + } + + m_params = params; + + if ((m_params.m_compute_stats) && (!m_params.m_validate_output_data)) + m_params.m_validate_output_data = true; + + m_hdr_image_scale = 1.0f; + m_ldr_to_hdr_upconversion_nit_multiplier = 1.0f; + m_upconverted_any_ldr_images = false; + + check_for_hdr_inputs(); + + if (m_params.m_debug) + { + debug_printf("basis_compressor::init:\n"); + +#define PRINT_BOOL_VALUE(v) fmt_debug_printf("{}: {} {}\n", BASISU_STRINGIZE2(v), static_cast(m_params.v), m_params.v.was_changed()); +#define PRINT_INT_VALUE(v) fmt_debug_printf("{}: {} {}\n", BASISU_STRINGIZE2(v), static_cast(m_params.v), m_params.v.was_changed()); +#define PRINT_UINT_VALUE(v) fmt_debug_printf("{}: {} {}\n", BASISU_STRINGIZE2(v), static_cast(m_params.v), m_params.v.was_changed()); +#define PRINT_FLOAT_VALUE(v) fmt_debug_printf("{}: {} {}\n", BASISU_STRINGIZE2(v), static_cast(m_params.v), m_params.v.was_changed()); + + fmt_debug_printf("Source LDR images: {}, HDR images: {}, filenames: {}, alpha filenames: {}, LDR mipmap images: {}, HDR mipmap images: {}\n", + (uint64_t)m_params.m_source_images.size(), (uint64_t)m_params.m_source_images_hdr.size(), + (uint64_t)m_params.m_source_filenames.size(), (uint64_t)m_params.m_source_alpha_filenames.size(), + (uint64_t)m_params.m_source_mipmap_images.size(), (uint64_t)m_params.m_source_mipmap_images_hdr.size()); + + if (m_params.m_source_mipmap_images.size()) + { + debug_printf("m_source_mipmap_images array sizes:\n"); + for (uint32_t i = 0; i < m_params.m_source_mipmap_images.size(); i++) + debug_printf("%u ", m_params.m_source_mipmap_images[i].size()); + debug_printf("\n"); + } + + if (m_params.m_source_mipmap_images_hdr.size()) + { + debug_printf("m_source_mipmap_images_hdr array sizes:\n"); + for (uint32_t i = 0; i < m_params.m_source_mipmap_images_hdr.size(); i++) + debug_printf("%u ", m_params.m_source_mipmap_images_hdr[i].size()); + debug_printf("\n"); + } + + PRINT_BOOL_VALUE(m_hdr); + + switch (m_params.m_hdr_mode) + { + case hdr_modes::cUASTC_HDR_4X4: + { + fmt_debug_printf("m_hdr_mode: cUASTC_HDR_4X4\n"); + break; + } + case hdr_modes::cASTC_HDR_6X6: + { + fmt_debug_printf("m_hdr_mode: cASTC_HDR_6X6\n"); + break; + } + case hdr_modes::cASTC_HDR_6X6_INTERMEDIATE: + { + fmt_debug_printf("m_hdr_mode: cASTC_HDR_6X6_INTERMEDIATE\n"); + break; + } + default: + assert(false); + return false; + } + + PRINT_BOOL_VALUE(m_uastc); + PRINT_BOOL_VALUE(m_use_opencl); + PRINT_BOOL_VALUE(m_y_flip); + PRINT_BOOL_VALUE(m_debug); + PRINT_BOOL_VALUE(m_validate_etc1s); + PRINT_BOOL_VALUE(m_debug_images); + PRINT_INT_VALUE(m_compression_level); + PRINT_BOOL_VALUE(m_perceptual); + PRINT_BOOL_VALUE(m_no_endpoint_rdo); + PRINT_BOOL_VALUE(m_no_selector_rdo); + PRINT_BOOL_VALUE(m_read_source_images); + PRINT_BOOL_VALUE(m_write_output_basis_or_ktx2_files); + PRINT_BOOL_VALUE(m_compute_stats); + PRINT_BOOL_VALUE(m_check_for_alpha); + PRINT_BOOL_VALUE(m_force_alpha); + debug_printf("swizzle: %d,%d,%d,%d\n", + m_params.m_swizzle[0], + m_params.m_swizzle[1], + m_params.m_swizzle[2], + m_params.m_swizzle[3]); + PRINT_BOOL_VALUE(m_renormalize); + PRINT_BOOL_VALUE(m_multithreading); + PRINT_BOOL_VALUE(m_disable_hierarchical_endpoint_codebooks); + + PRINT_FLOAT_VALUE(m_endpoint_rdo_thresh); + PRINT_FLOAT_VALUE(m_selector_rdo_thresh); + + PRINT_BOOL_VALUE(m_mip_gen); + PRINT_BOOL_VALUE(m_mip_renormalize); + PRINT_BOOL_VALUE(m_mip_wrapping); + PRINT_BOOL_VALUE(m_mip_fast); + PRINT_BOOL_VALUE(m_mip_srgb); + PRINT_FLOAT_VALUE(m_mip_premultiplied); + PRINT_FLOAT_VALUE(m_mip_scale); + PRINT_INT_VALUE(m_mip_smallest_dimension); + debug_printf("m_mip_filter: %s\n", m_params.m_mip_filter.c_str()); + + debug_printf("m_max_endpoint_clusters: %u\n", m_params.m_etc1s_max_endpoint_clusters); + debug_printf("m_max_selector_clusters: %u\n", m_params.m_etc1s_max_selector_clusters); + debug_printf("m_etc1s_quality_level: %i\n", m_params.m_etc1s_quality_level); + debug_printf("UASTC HDR 4x4 quality level: %u\n", m_params.m_uastc_hdr_4x4_options.m_level); + + debug_printf("m_tex_type: %u\n", m_params.m_tex_type); + debug_printf("m_userdata0: 0x%X, m_userdata1: 0x%X\n", m_params.m_userdata0, m_params.m_userdata1); + debug_printf("m_us_per_frame: %i (%f fps)\n", m_params.m_us_per_frame, m_params.m_us_per_frame ? 1.0f / (m_params.m_us_per_frame / 1000000.0f) : 0); + debug_printf("m_pack_uastc_ldr_4x4_flags: 0x%X\n", m_params.m_pack_uastc_ldr_4x4_flags); + + PRINT_BOOL_VALUE(m_rdo_uastc_ldr_4x4); + PRINT_FLOAT_VALUE(m_rdo_uastc_ldr_4x4_quality_scalar); + PRINT_INT_VALUE(m_rdo_uastc_ldr_4x4_dict_size); + PRINT_FLOAT_VALUE(m_rdo_uastc_ldr_4x4_max_allowed_rms_increase_ratio); + PRINT_FLOAT_VALUE(m_rdo_uastc_ldr_4x4_skip_block_rms_thresh); + PRINT_FLOAT_VALUE(m_rdo_uastc_ldr_4x4_max_smooth_block_error_scale); + PRINT_FLOAT_VALUE(m_rdo_uastc_ldr_4x4_smooth_block_max_std_dev); + PRINT_BOOL_VALUE(m_rdo_uastc_ldr_4x4_favor_simpler_modes_in_rdo_mode) + PRINT_BOOL_VALUE(m_rdo_uastc_ldr_4x4_multithreading); + + PRINT_INT_VALUE(m_resample_width); + PRINT_INT_VALUE(m_resample_height); + PRINT_FLOAT_VALUE(m_resample_factor); + + debug_printf("Has global codebooks: %u\n", m_params.m_pGlobal_codebooks ? 1 : 0); + if (m_params.m_pGlobal_codebooks) + { + debug_printf("Global codebook endpoints: %u selectors: %u\n", m_params.m_pGlobal_codebooks->get_endpoints().size(), m_params.m_pGlobal_codebooks->get_selectors().size()); + } + + PRINT_BOOL_VALUE(m_create_ktx2_file); + + debug_printf("KTX2 UASTC supercompression: %u\n", m_params.m_ktx2_uastc_supercompression); + debug_printf("KTX2 Zstd supercompression level: %i\n", (int)m_params.m_ktx2_zstd_supercompression_level); + debug_printf("KTX2 sRGB transfer func: %u\n", (int)m_params.m_ktx2_srgb_transfer_func); + debug_printf("Total KTX2 key values: %u\n", m_params.m_ktx2_key_values.size()); + for (uint32_t i = 0; i < m_params.m_ktx2_key_values.size(); i++) + { + debug_printf("Key: \"%s\"\n", m_params.m_ktx2_key_values[i].m_key.data()); + debug_printf("Value size: %u\n", m_params.m_ktx2_key_values[i].m_value.size()); + } + + PRINT_BOOL_VALUE(m_validate_output_data); + PRINT_BOOL_VALUE(m_ldr_hdr_upconversion_srgb_to_linear); + PRINT_FLOAT_VALUE(m_ldr_hdr_upconversion_nit_multiplier); + debug_printf("Allow UASTC HDR 4x4 uber mode: %u\n", m_params.m_uastc_hdr_4x4_options.m_allow_uber_mode); + debug_printf("UASTC HDR 4x4 ultra quant: %u\n", m_params.m_uastc_hdr_4x4_options.m_ultra_quant); + PRINT_BOOL_VALUE(m_hdr_favor_astc); + +#undef PRINT_BOOL_VALUE +#undef PRINT_INT_VALUE +#undef PRINT_UINT_VALUE +#undef PRINT_FLOAT_VALUE + } + + if (!sanity_check_input_params()) + return false; + + if ((m_params.m_use_opencl) && opencl_is_available() && !m_pOpenCL_context && !m_opencl_failed) + { + m_pOpenCL_context = opencl_create_context(); + if (!m_pOpenCL_context) + m_opencl_failed = true; + } + + return true; + } + + void basis_compressor::pick_format_mode() + { + // Unfortunately due to the legacy of this code and backwards compat this is more complex than I would like. + m_fmt_mode = basist::basis_tex_format::cETC1S; + + if (m_params.m_hdr) + { + assert(m_params.m_uastc); + + switch (m_params.m_hdr_mode) + { + case hdr_modes::cUASTC_HDR_4X4: + m_fmt_mode = basist::basis_tex_format::cUASTC_HDR_4x4; + break; + case hdr_modes::cASTC_HDR_6X6: + m_fmt_mode = basist::basis_tex_format::cASTC_HDR_6x6; + break; + case hdr_modes::cASTC_HDR_6X6_INTERMEDIATE: + m_fmt_mode = basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE; + break; + default: + assert(0); + break; + } + } + else if (m_params.m_uastc) + { + m_fmt_mode = basist::basis_tex_format::cUASTC4x4; + } + + if (m_params.m_debug) + { + switch (m_fmt_mode) + { + case basist::basis_tex_format::cETC1S: + fmt_debug_printf("Format Mode: cETC1S\n"); + break; + case basist::basis_tex_format::cUASTC4x4: + fmt_debug_printf("Format Mode: cUASTC4x4\n"); + break; + case basist::basis_tex_format::cUASTC_HDR_4x4: + fmt_debug_printf("Format Mode: cUASTC_HDR_4x4\n"); + break; + case basist::basis_tex_format::cASTC_HDR_6x6: + fmt_debug_printf("Format Mode: cASTC_HDR_6x6\n"); + break; + case basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE: + fmt_debug_printf("Format Mode: cASTC_HDR_6x6_INTERMEDIATE\n"); + break; + default: + assert(0); + break; + } + } + } + + basis_compressor::error_code basis_compressor::process() + { + debug_printf("basis_compressor::process\n"); + + if (!read_dds_source_images()) + return cECFailedReadingSourceImages; + + // Note: After here m_params.m_hdr, m_params.m_uastc and m_fmt_mode cannot be changed. + pick_format_mode(); + + if (!read_source_images()) + return cECFailedReadingSourceImages; + + if (!validate_texture_type_constraints()) + return cECFailedValidating; + + if (m_params.m_create_ktx2_file) + { + if (!validate_ktx2_constraints()) + { + error_printf("Inputs do not satisfy .KTX2 texture constraints: all source images must be the same resolution and have the same number of mipmap levels.\n"); + return cECFailedValidating; + } + } + + if (!extract_source_blocks()) + return cECFailedFrontEnd; + + if (m_params.m_hdr) + { + if (m_params.m_hdr_mode == hdr_modes::cUASTC_HDR_4X4) + { + // UASTC 4x4 HDR + if (m_params.m_status_output) + printf("Mode: UASTC 4x4 HDR Level %u\n", m_params.m_uastc_hdr_4x4_options.m_level); + + error_code ec = encode_slices_to_uastc_4x4_hdr(); + if (ec != cECSuccess) + return ec; + } + else + { + assert((m_params.m_hdr_mode == hdr_modes::cASTC_HDR_6X6) || (m_params.m_hdr_mode == hdr_modes::cASTC_HDR_6X6_INTERMEDIATE)); + + // ASTC 6x6 HDR + if (m_params.m_status_output) + { + fmt_printf("Mode: ASTC 6x6 HDR {}, Base Level: {}, Highest Level: {}, Lambda: {}, REC 2020: {}\n", + (m_params.m_hdr_mode == hdr_modes::cASTC_HDR_6X6_INTERMEDIATE) ? "Intermediate" : "", + m_params.m_astc_hdr_6x6_options.m_master_comp_level, m_params.m_astc_hdr_6x6_options.m_highest_comp_level, + m_params.m_astc_hdr_6x6_options.m_lambda, m_params.m_astc_hdr_6x6_options.m_rec2020_bt2100_color_gamut); + } + + error_code ec = encode_slices_to_astc_6x6_hdr(); + if (ec != cECSuccess) + return ec; + } + } + else if (m_params.m_uastc) + { + // UASTC 4x4 LDR + if (m_params.m_status_output) + printf("Mode: UASTC LDR 4x4 Level %u\n", m_params.m_pack_uastc_ldr_4x4_flags & cPackUASTCLevelMask); + + error_code ec = encode_slices_to_uastc_4x4_ldr(); + if (ec != cECSuccess) + return ec; + } + else + { + // ETC1S + if (m_params.m_status_output) + printf("Mode: ETC1S Quality %i, Level %i\n", m_params.m_etc1s_quality_level, (int)m_params.m_compression_level); + + if (!process_frontend()) + return cECFailedFrontEnd; + + if (!extract_frontend_texture_data()) + return cECFailedFontendExtract; + + if (!process_backend()) + return cECFailedBackend; + } + + if (!create_basis_file_and_transcode()) + return cECFailedCreateBasisFile; + + if (m_params.m_create_ktx2_file) + { + if (!create_ktx2_file()) + return cECFailedCreateKTX2File; + } + + if (!write_output_files_and_compute_stats()) + return cECFailedWritingOutput; + + return cECSuccess; + } + + basis_compressor::error_code basis_compressor::encode_slices_to_astc_6x6_hdr() + { + debug_printf("basis_compressor::encode_slices_to_astc_6x6_hdr\n"); + + interval_timer tm; + tm.start(); + + m_uastc_slice_textures.resize(m_slice_descs.size()); + for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) + m_uastc_slice_textures[slice_index].init(texture_format::cASTC_HDR_6x6, m_slice_descs[slice_index].m_orig_width, m_slice_descs[slice_index].m_orig_height); + + if (m_params.m_hdr_mode == hdr_modes::cASTC_HDR_6X6) + m_uastc_backend_output.m_tex_format = basist::basis_tex_format::cASTC_HDR_6x6; + else if (m_params.m_hdr_mode == hdr_modes::cASTC_HDR_6X6_INTERMEDIATE) + m_uastc_backend_output.m_tex_format = basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE; + else + { + assert(0); + return cECFailedEncodeUASTC; + } + + m_uastc_backend_output.m_etc1s = false; + m_uastc_backend_output.m_srgb = false; + m_uastc_backend_output.m_slice_desc = m_slice_descs; + m_uastc_backend_output.m_slice_image_data.resize(m_slice_descs.size()); + m_uastc_backend_output.m_slice_image_crcs.resize(m_slice_descs.size()); + + astc_6x6_hdr::astc_hdr_6x6_global_config global_cfg(m_params.m_astc_hdr_6x6_options); + + global_cfg.m_image_stats = m_params.m_compute_stats; + global_cfg.m_debug_images = m_params.m_debug_images; + global_cfg.m_output_images = m_params.m_debug_images; + global_cfg.m_debug_output = m_params.m_debug; + global_cfg.m_status_output = m_params.m_status_output || m_params.m_debug; + + for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) + { + gpu_image& dst_tex = m_uastc_slice_textures[slice_index]; + uint8_vec &dst_buf = m_uastc_backend_output.m_slice_image_data[slice_index]; + + basisu_backend_slice_desc& slice_desc = m_slice_descs[slice_index]; + (void)slice_desc; + + const imagef& source_image = m_slice_images_hdr[slice_index]; + assert(source_image.get_width() && source_image.get_height()); + + uint8_vec intermediate_tex_data, astc_tex_data; + + global_cfg.m_debug_image_prefix = m_params.m_astc_hdr_6x6_options.m_debug_image_prefix; + global_cfg.m_debug_image_prefix += fmt_string("slice_{}_", slice_index); + + global_cfg.m_output_image_prefix = m_params.m_astc_hdr_6x6_options.m_output_image_prefix; + global_cfg.m_output_image_prefix += fmt_string("slice_{}_", slice_index); + + if (m_params.m_debug) + fmt_debug_printf("----------------------------------------------------------------------------\n"); + + astc_6x6_hdr::result_metrics metrics; + bool status = astc_6x6_hdr::compress_photo(source_image, global_cfg, m_params.m_pJob_pool, intermediate_tex_data, astc_tex_data, metrics); + if (!status) + return cECFailedEncodeUASTC; + + if (m_params.m_debug) + fmt_debug_printf("----------------------------------------------------------------------------\n"); + + // Currently it always gives us both intermediate and RDO + assert(intermediate_tex_data.size()); + assert(astc_tex_data.size()); + assert((astc_tex_data.size() & 15) == 0); + assert(dst_tex.get_size_in_bytes() == astc_tex_data.size_in_bytes()); + + memcpy(dst_tex.get_ptr(), astc_tex_data.data(), astc_tex_data.size_in_bytes()); + + if (m_params.m_hdr_mode == hdr_modes::cASTC_HDR_6X6) + { + dst_buf.resize(dst_tex.get_size_in_bytes()); + memcpy(&dst_buf[0], dst_tex.get_ptr(), dst_tex.get_size_in_bytes()); + } + else + { + assert(m_params.m_hdr_mode == hdr_modes::cASTC_HDR_6X6_INTERMEDIATE); + + dst_buf.resize(intermediate_tex_data.size_in_bytes()); + memcpy(&dst_buf[0], intermediate_tex_data.get_ptr(), intermediate_tex_data.size_in_bytes()); + } + + m_uastc_backend_output.m_slice_image_crcs[slice_index] = basist::crc16(dst_buf.get_ptr(), dst_buf.size_in_bytes(), 0); + } + + return cECSuccess; + } + + basis_compressor::error_code basis_compressor::encode_slices_to_uastc_4x4_hdr() + { + debug_printf("basis_compressor::encode_slices_to_uastc_4x4_hdr\n"); + + interval_timer tm; + tm.start(); + + m_uastc_slice_textures.resize(m_slice_descs.size()); + for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) + m_uastc_slice_textures[slice_index].init(texture_format::cUASTC_HDR_4x4, m_slice_descs[slice_index].m_orig_width, m_slice_descs[slice_index].m_orig_height); + + m_uastc_backend_output.m_tex_format = basist::basis_tex_format::cUASTC_HDR_4x4; + m_uastc_backend_output.m_etc1s = false; + m_uastc_backend_output.m_srgb = false; + m_uastc_backend_output.m_slice_desc = m_slice_descs; + m_uastc_backend_output.m_slice_image_data.resize(m_slice_descs.size()); + m_uastc_backend_output.m_slice_image_crcs.resize(m_slice_descs.size()); + + if (!m_params.m_perceptual) + { + m_params.m_uastc_hdr_4x4_options.m_r_err_scale = 1.0f; + m_params.m_uastc_hdr_4x4_options.m_g_err_scale = 1.0f; + } + + const float DEFAULT_BC6H_ERROR_WEIGHT = .65f;// .85f; + const float LOWEST_BC6H_ERROR_WEIGHT = .1f; + m_params.m_uastc_hdr_4x4_options.m_bc6h_err_weight = m_params.m_hdr_favor_astc ? LOWEST_BC6H_ERROR_WEIGHT : DEFAULT_BC6H_ERROR_WEIGHT; + + std::atomic any_failures; + any_failures.store(false); + + astc_hdr_4x4_block_stats enc_stats; + + struct uastc_blk_desc + { + uint32_t m_solid_flag; + uint32_t m_num_partitions; + uint32_t m_cem_index; + uint32_t m_weight_ise_range; + uint32_t m_endpoint_ise_range; + + bool operator< (const uastc_blk_desc& desc) const + { + if (this == &desc) + return false; + +#define COMP(XX) if (XX < desc.XX) return true; else if (XX != desc.XX) return false; + COMP(m_solid_flag) + COMP(m_num_partitions) + COMP(m_cem_index) + COMP(m_weight_ise_range) + COMP(m_endpoint_ise_range) +#undef COMP + + return false; + } + + bool operator== (const uastc_blk_desc& desc) const + { + if (this == &desc) + return true; + if ((*this < desc) || (desc < *this)) + return false; + return true; + } + + bool operator!= (const uastc_blk_desc& desc) const + { + return !(*this == desc); + } + }; + + struct uastc_blk_desc_stats + { + uastc_blk_desc_stats() : m_count(0) { } + uint32_t m_count; +#ifdef UASTC_HDR_DEBUG_SAVE_CATEGORIZED_BLOCKS + basisu::vector m_blks; +#endif + }; + + std::map unique_block_descs; + std::mutex unique_block_desc_mutex; + + for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) + { + gpu_image& tex = m_uastc_slice_textures[slice_index]; + basisu_backend_slice_desc& slice_desc = m_slice_descs[slice_index]; + (void)slice_desc; + + const uint32_t num_blocks_x = tex.get_blocks_x(); + const uint32_t num_blocks_y = tex.get_blocks_y(); + const uint32_t total_blocks = tex.get_total_blocks(); + const imagef& source_image = m_slice_images_hdr[slice_index]; + + std::atomic total_blocks_processed; + total_blocks_processed.store(0); + + const uint32_t N = 256; + for (uint32_t block_index_iter = 0; block_index_iter < total_blocks; block_index_iter += N) + { + const uint32_t first_index = block_index_iter; + const uint32_t last_index = minimum(total_blocks, block_index_iter + N); + + m_params.m_pJob_pool->add_job([this, first_index, last_index, num_blocks_x, num_blocks_y, total_blocks, &source_image, + &tex, &total_blocks_processed, &any_failures, &enc_stats, &unique_block_descs, &unique_block_desc_mutex] + { + BASISU_NOTE_UNUSED(num_blocks_y); + + basisu::vector all_results; + all_results.reserve(256); + + for (uint32_t block_index = first_index; block_index < last_index; block_index++) + { + const uint32_t block_x = block_index % num_blocks_x; + const uint32_t block_y = block_index / num_blocks_x; + + //if ((block_x == 176) && (block_y == 128)) + // printf("!"); + + vec4F block_pixels[16]; + + source_image.extract_block_clamped(&block_pixels[0], block_x * 4, block_y * 4, 4, 4); + + basist::astc_blk& dest_block = *(basist::astc_blk*)tex.get_block_ptr(block_x, block_y); + + float rgb_pixels[16 * 3]; + basist::half_float rgb_pixels_half[16 * 3]; + for (uint32_t i = 0; i < 16; i++) + { + rgb_pixels[i * 3 + 0] = block_pixels[i][0]; + rgb_pixels_half[i * 3 + 0] = float_to_half_non_neg_no_nan_inf(block_pixels[i][0]); + + rgb_pixels[i * 3 + 1] = block_pixels[i][1]; + rgb_pixels_half[i * 3 + 1] = float_to_half_non_neg_no_nan_inf(block_pixels[i][1]); + + rgb_pixels[i * 3 + 2] = block_pixels[i][2]; + rgb_pixels_half[i * 3 + 2] = float_to_half_non_neg_no_nan_inf(block_pixels[i][2]); + } + + bool status = astc_hdr_4x4_enc_block(&rgb_pixels[0], rgb_pixels_half, m_params.m_uastc_hdr_4x4_options, all_results); + if (!status) + { + any_failures.store(true); + continue; + } + + double best_err = 1e+30f; + int best_result_index = -1; + + const double bc6h_err_weight = m_params.m_uastc_hdr_4x4_options.m_bc6h_err_weight; + const double astc_err_weight = (1.0f - bc6h_err_weight); + + for (uint32_t i = 0; i < all_results.size(); i++) + { + basist::half_float unpacked_bc6h_block[4 * 4 * 3]; + unpack_bc6h(&all_results[i].m_bc6h_block, unpacked_bc6h_block, false); + + all_results[i].m_bc6h_block_error = compute_block_error(16, rgb_pixels_half, unpacked_bc6h_block, m_params.m_uastc_hdr_4x4_options); + + double overall_err = (all_results[i].m_bc6h_block_error * bc6h_err_weight) + (all_results[i].m_best_block_error * astc_err_weight); + + if ((!i) || (overall_err < best_err)) + { + best_err = overall_err; + best_result_index = i; + } + } + + const astc_hdr_4x4_pack_results& best_results = all_results[best_result_index]; + + astc_hdr_4x4_pack_results_to_block(dest_block, best_results); + + // Verify that this block is valid UASTC HDR and we can successfully transcode it to BC6H. + // (Well, except in fastest mode.) + if (m_params.m_uastc_hdr_4x4_options.m_level > 0) + { + basist::bc6h_block transcoded_bc6h_blk; + bool transcode_results = astc_hdr_transcode_to_bc6h(dest_block, transcoded_bc6h_blk); + assert(transcode_results); + if ((!transcode_results) && (!any_failures)) + { + error_printf("basis_compressor::encode_slices_to_uastc_4x4_hdr: UASTC HDR block transcode check failed!\n"); + + any_failures.store(true); + continue; + } + } + + if (m_params.m_debug) + { + // enc_stats has its own mutex + enc_stats.update(best_results); + + uastc_blk_desc blk_desc; + clear_obj(blk_desc); + + blk_desc.m_solid_flag = best_results.m_is_solid; + if (!blk_desc.m_solid_flag) + { + blk_desc.m_num_partitions = best_results.m_best_blk.m_num_partitions; + blk_desc.m_cem_index = best_results.m_best_blk.m_color_endpoint_modes[0]; + blk_desc.m_weight_ise_range = best_results.m_best_blk.m_weight_ise_range; + blk_desc.m_endpoint_ise_range = best_results.m_best_blk.m_endpoint_ise_range; + } + + { + std::lock_guard lck(unique_block_desc_mutex); + + auto res = unique_block_descs.insert(std::make_pair(blk_desc, uastc_blk_desc_stats())); + + (res.first)->second.m_count++; +#ifdef UASTC_HDR_DEBUG_SAVE_CATEGORIZED_BLOCKS + (res.first)->second.m_blks.push_back(dest_block); +#endif + } + } + + total_blocks_processed++; + + uint32_t val = total_blocks_processed; + if (((val & 1023) == 1023) && m_params.m_status_output) + { + debug_printf("basis_compressor::encode_slices_to_uastc_4x4_hdr: %3.1f%% done\n", static_cast(val) * 100.0f / total_blocks); + } + } + + }); + + } // block_index_iter + + m_params.m_pJob_pool->wait_for_all(); + + if (any_failures) + return cECFailedEncodeUASTC; + + m_uastc_backend_output.m_slice_image_data[slice_index].resize(tex.get_size_in_bytes()); + memcpy(&m_uastc_backend_output.m_slice_image_data[slice_index][0], tex.get_ptr(), tex.get_size_in_bytes()); + + m_uastc_backend_output.m_slice_image_crcs[slice_index] = basist::crc16(tex.get_ptr(), tex.get_size_in_bytes(), 0); + + } // slice_index + + debug_printf("basis_compressor::encode_slices_to_uastc_4x4_hdr: Total time: %3.3f secs\n", tm.get_elapsed_secs()); + + if (m_params.m_debug) + { + debug_printf("\n----- Total unique UASTC block descs: %u\n", (uint32_t)unique_block_descs.size()); + + uint32_t c = 0; + for (auto it = unique_block_descs.begin(); it != unique_block_descs.end(); ++it) + { + debug_printf("%u. Total uses: %u %3.2f%%, solid color: %u\n", c, it->second.m_count, + ((float)it->second.m_count * 100.0f) / enc_stats.m_total_blocks, it->first.m_solid_flag); + + if (!it->first.m_solid_flag) + { + debug_printf(" Num partitions: %u\n", it->first.m_num_partitions); + debug_printf(" CEM index: %u\n", it->first.m_cem_index); + debug_printf(" Weight ISE range: %u (%u levels)\n", it->first.m_weight_ise_range, astc_helpers::get_ise_levels(it->first.m_weight_ise_range)); + debug_printf(" Endpoint ISE range: %u (%u levels)\n", it->first.m_endpoint_ise_range, astc_helpers::get_ise_levels(it->first.m_endpoint_ise_range)); + } + +#ifdef UASTC_HDR_DEBUG_SAVE_CATEGORIZED_BLOCKS + debug_printf(" -- UASTC HDR block bytes:\n"); + for (uint32_t j = 0; j < minimum(4, it->second.m_blks.size()); j++) + { + basist::astc_blk& blk = it->second.m_blks[j]; + + debug_printf(" - UASTC HDR: { "); + for (uint32_t k = 0; k < 16; k++) + debug_printf("%u%s", ((const uint8_t*)&blk)[k], (k != 15) ? ", " : ""); + debug_printf(" }\n"); + + basist::bc6h_block bc6h_blk; + bool res = astc_hdr_transcode_to_bc6h(blk, bc6h_blk); + assert(res); + if (!res) + { + error_printf("astc_hdr_transcode_to_bc6h() failed!\n"); + return cECFailedEncodeUASTC; + } + + debug_printf(" - BC6H: { "); + for (uint32_t k = 0; k < 16; k++) + debug_printf("%u%s", ((const uint8_t*)&bc6h_blk)[k], (k != 15) ? ", " : ""); + debug_printf(" }\n"); + } +#endif + + c++; + } + printf("\n"); + + enc_stats.print(); + } + + return cECSuccess; + } + + basis_compressor::error_code basis_compressor::encode_slices_to_uastc_4x4_ldr() + { + debug_printf("basis_compressor::encode_slices_to_uastc_4x4_ldr\n"); + + m_uastc_slice_textures.resize(m_slice_descs.size()); + for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) + m_uastc_slice_textures[slice_index].init(texture_format::cUASTC4x4, m_slice_descs[slice_index].m_orig_width, m_slice_descs[slice_index].m_orig_height); + + m_uastc_backend_output.m_tex_format = basist::basis_tex_format::cUASTC4x4; + m_uastc_backend_output.m_etc1s = false; + m_uastc_backend_output.m_slice_desc = m_slice_descs; + m_uastc_backend_output.m_slice_image_data.resize(m_slice_descs.size()); + m_uastc_backend_output.m_slice_image_crcs.resize(m_slice_descs.size()); + + for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) + { + gpu_image& tex = m_uastc_slice_textures[slice_index]; + basisu_backend_slice_desc& slice_desc = m_slice_descs[slice_index]; + (void)slice_desc; + + const uint32_t num_blocks_x = tex.get_blocks_x(); + const uint32_t num_blocks_y = tex.get_blocks_y(); + const uint32_t total_blocks = tex.get_total_blocks(); + const image& source_image = m_slice_images[slice_index]; + + std::atomic total_blocks_processed; + total_blocks_processed.store(0); + + const uint32_t N = 256; + for (uint32_t block_index_iter = 0; block_index_iter < total_blocks; block_index_iter += N) + { + const uint32_t first_index = block_index_iter; + const uint32_t last_index = minimum(total_blocks, block_index_iter + N); + + m_params.m_pJob_pool->add_job([this, first_index, last_index, num_blocks_x, num_blocks_y, total_blocks, &source_image, &tex, &total_blocks_processed] + { + BASISU_NOTE_UNUSED(num_blocks_y); + + uint32_t uastc_flags = m_params.m_pack_uastc_ldr_4x4_flags; + if ((m_params.m_rdo_uastc_ldr_4x4) && (m_params.m_rdo_uastc_ldr_4x4_favor_simpler_modes_in_rdo_mode)) + uastc_flags |= cPackUASTCFavorSimplerModes; + + for (uint32_t block_index = first_index; block_index < last_index; block_index++) + { + const uint32_t block_x = block_index % num_blocks_x; + const uint32_t block_y = block_index / num_blocks_x; + + color_rgba block_pixels[4][4]; + + source_image.extract_block_clamped((color_rgba*)block_pixels, block_x * 4, block_y * 4, 4, 4); + + basist::uastc_block& dest_block = *(basist::uastc_block*)tex.get_block_ptr(block_x, block_y); + + encode_uastc(&block_pixels[0][0].r, dest_block, uastc_flags); + + total_blocks_processed++; + + uint32_t val = total_blocks_processed; + if (((val & 16383) == 16383) && m_params.m_status_output) + { + debug_printf("basis_compressor::encode_slices_to_uastc_4x4_ldr: %3.1f%% done\n", static_cast(val) * 100.0f / total_blocks); + } + + } + + }); + + } // block_index_iter + + m_params.m_pJob_pool->wait_for_all(); + + if (m_params.m_rdo_uastc_ldr_4x4) + { + uastc_rdo_params rdo_params; + rdo_params.m_lambda = m_params.m_rdo_uastc_ldr_4x4_quality_scalar; + rdo_params.m_max_allowed_rms_increase_ratio = m_params.m_rdo_uastc_ldr_4x4_max_allowed_rms_increase_ratio; + rdo_params.m_skip_block_rms_thresh = m_params.m_rdo_uastc_ldr_4x4_skip_block_rms_thresh; + rdo_params.m_lz_dict_size = m_params.m_rdo_uastc_ldr_4x4_dict_size; + rdo_params.m_smooth_block_max_error_scale = m_params.m_rdo_uastc_ldr_4x4_max_smooth_block_error_scale; + rdo_params.m_max_smooth_block_std_dev = m_params.m_rdo_uastc_ldr_4x4_smooth_block_max_std_dev; + + bool status = uastc_rdo(tex.get_total_blocks(), (basist::uastc_block*)tex.get_ptr(), + (const color_rgba *)m_source_blocks[slice_desc.m_first_block_index].m_pixels, rdo_params, m_params.m_pack_uastc_ldr_4x4_flags, m_params.m_rdo_uastc_ldr_4x4_multithreading ? m_params.m_pJob_pool : nullptr, + (m_params.m_rdo_uastc_ldr_4x4_multithreading && m_params.m_pJob_pool) ? basisu::minimum(4, (uint32_t)m_params.m_pJob_pool->get_total_threads()) : 0); + if (!status) + { + return cECFailedUASTCRDOPostProcess; + } + } + + m_uastc_backend_output.m_slice_image_data[slice_index].resize(tex.get_size_in_bytes()); + memcpy(&m_uastc_backend_output.m_slice_image_data[slice_index][0], tex.get_ptr(), tex.get_size_in_bytes()); + + m_uastc_backend_output.m_slice_image_crcs[slice_index] = basist::crc16(tex.get_ptr(), tex.get_size_in_bytes(), 0); + + } // slice_index + + return cECSuccess; + } + + bool basis_compressor::generate_mipmaps(const imagef& img, basisu::vector& mips, bool has_alpha) + { + debug_printf("basis_compressor::generate_mipmaps\n"); + + interval_timer tm; + tm.start(); + + uint32_t total_levels = 1; + uint32_t w = img.get_width(), h = img.get_height(); + while (maximum(w, h) > (uint32_t)m_params.m_mip_smallest_dimension) + { + w = maximum(w >> 1U, 1U); + h = maximum(h >> 1U, 1U); + total_levels++; + } + + for (uint32_t level = 1; level < total_levels; level++) + { + const uint32_t level_width = maximum(1, img.get_width() >> level); + const uint32_t level_height = maximum(1, img.get_height() >> level); + + imagef& level_img = *enlarge_vector(mips, 1); + level_img.resize(level_width, level_height); + + const imagef* pSource_image = &img; + + if (m_params.m_mip_fast) + { + if (level > 1) + pSource_image = &mips[level - 1]; + } + + bool status = image_resample(*pSource_image, level_img, + //m_params.m_mip_filter.c_str(), + "box", // TODO: negative lobes in the filter are causing negative colors, try Mitchell + m_params.m_mip_scale, m_params.m_mip_wrapping, 0, has_alpha ? 4 : 3); + if (!status) + { + error_printf("basis_compressor::generate_mipmaps: image_resample() failed!\n"); + return false; + } + + clean_hdr_image(level_img); + } + + if (m_params.m_debug) + debug_printf("Total mipmap generation time: %3.3f secs\n", tm.get_elapsed_secs()); + + return true; + } + + bool basis_compressor::generate_mipmaps(const image &img, basisu::vector &mips, bool has_alpha) + { + debug_printf("basis_compressor::generate_mipmaps\n"); + + interval_timer tm; + tm.start(); + + uint32_t total_levels = 1; + uint32_t w = img.get_width(), h = img.get_height(); + while (maximum(w, h) > (uint32_t)m_params.m_mip_smallest_dimension) + { + w = maximum(w >> 1U, 1U); + h = maximum(h >> 1U, 1U); + total_levels++; + } + +#if BASISU_USE_STB_IMAGE_RESIZE_FOR_MIPMAP_GEN + // Requires stb_image_resize + stbir_filter filter = STBIR_FILTER_DEFAULT; + if (m_params.m_mip_filter == "box") + filter = STBIR_FILTER_BOX; + else if (m_params.m_mip_filter == "triangle") + filter = STBIR_FILTER_TRIANGLE; + else if (m_params.m_mip_filter == "cubic") + filter = STBIR_FILTER_CUBICBSPLINE; + else if (m_params.m_mip_filter == "catmull") + filter = STBIR_FILTER_CATMULLROM; + else if (m_params.m_mip_filter == "mitchell") + filter = STBIR_FILTER_MITCHELL; + + for (uint32_t level = 1; level < total_levels; level++) + { + const uint32_t level_width = maximum(1, img.get_width() >> level); + const uint32_t level_height = maximum(1, img.get_height() >> level); + + image &level_img = *enlarge_vector(mips, 1); + level_img.resize(level_width, level_height); + + int result = stbir_resize_uint8_generic( + (const uint8_t *)img.get_ptr(), img.get_width(), img.get_height(), img.get_pitch() * sizeof(color_rgba), + (uint8_t *)level_img.get_ptr(), level_img.get_width(), level_img.get_height(), level_img.get_pitch() * sizeof(color_rgba), + has_alpha ? 4 : 3, has_alpha ? 3 : STBIR_ALPHA_CHANNEL_NONE, m_params.m_mip_premultiplied ? STBIR_FLAG_ALPHA_PREMULTIPLIED : 0, + m_params.m_mip_wrapping ? STBIR_EDGE_WRAP : STBIR_EDGE_CLAMP, filter, m_params.m_mip_srgb ? STBIR_COLORSPACE_SRGB : STBIR_COLORSPACE_LINEAR, + nullptr); + + if (result == 0) + { + error_printf("basis_compressor::generate_mipmaps: stbir_resize_uint8_generic() failed!\n"); + return false; + } + + if (m_params.m_mip_renormalize) + level_img.renormalize_normal_map(); + } +#else + for (uint32_t level = 1; level < total_levels; level++) + { + const uint32_t level_width = maximum(1, img.get_width() >> level); + const uint32_t level_height = maximum(1, img.get_height() >> level); + + image& level_img = *enlarge_vector(mips, 1); + level_img.resize(level_width, level_height); + + const image* pSource_image = &img; + + if (m_params.m_mip_fast) + { + if (level > 1) + pSource_image = &mips[level - 1]; + } + + bool status = image_resample(*pSource_image, level_img, m_params.m_mip_srgb, m_params.m_mip_filter.c_str(), m_params.m_mip_scale, m_params.m_mip_wrapping, 0, has_alpha ? 4 : 3); + if (!status) + { + error_printf("basis_compressor::generate_mipmaps: image_resample() failed!\n"); + return false; + } + + if (m_params.m_mip_renormalize) + level_img.renormalize_normal_map(); + } +#endif + + if (m_params.m_debug) + debug_printf("Total mipmap generation time: %3.3f secs\n", tm.get_elapsed_secs()); + + return true; + } + + void basis_compressor::clean_hdr_image(imagef& src_img) + { + const uint32_t width = src_img.get_width(); + const uint32_t height = src_img.get_height(); + + float max_used_val = 0.0f; + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + vec4F& c = src_img(x, y); + for (uint32_t i = 0; i < 3; i++) + max_used_val = maximum(max_used_val, c[i]); + } + } + + double hdr_image_scale = 1.0f; + if (max_used_val > basist::ASTC_HDR_MAX_VAL) + { + hdr_image_scale = max_used_val / basist::ASTC_HDR_MAX_VAL; + + const double inv_hdr_image_scale = basist::ASTC_HDR_MAX_VAL / max_used_val; + + for (uint32_t y = 0; y < src_img.get_height(); y++) + { + for (uint32_t x = 0; x < src_img.get_width(); x++) + { + vec4F& c = src_img(x, y); + + for (uint32_t i = 0; i < 3; i++) + c[i] = (float)minimum(c[i] * inv_hdr_image_scale, basist::ASTC_HDR_MAX_VAL); + } + } + + printf("Warning: The input HDR image's maximum used float value was %f, which is too high to encode as ASTC HDR. The image's components have been linearly scaled so the maximum used value is %f, by multiplying by %f.\n", + max_used_val, basist::ASTC_HDR_MAX_VAL, inv_hdr_image_scale); + + printf("The decoded ASTC HDR texture will have to be scaled up by %f.\n", hdr_image_scale); + } + + // TODO: Determine a constant scale factor, apply if > MAX_HALF_FLOAT + if (!src_img.clean_astc_hdr_pixels(basist::ASTC_HDR_MAX_VAL)) + printf("Warning: clean_astc_hdr_pixels() had to modify the input image to encode to ASTC HDR - see previous warning(s).\n"); + + m_hdr_image_scale = (float)hdr_image_scale; + + float lowest_nonzero_val = 1e+30f; + float lowest_val = 1e+30f; + float highest_val = -1e+30f; + + for (uint32_t y = 0; y < src_img.get_height(); y++) + { + for (uint32_t x = 0; x < src_img.get_width(); x++) + { + const vec4F& c = src_img(x, y); + + for (uint32_t i = 0; i < 3; i++) + { + lowest_val = basisu::minimum(lowest_val, c[i]); + + if (c[i] != 0.0f) + lowest_nonzero_val = basisu::minimum(lowest_nonzero_val, c[i]); + + highest_val = basisu::maximum(highest_val, c[i]); + } + } + } + + debug_printf("Lowest image value: %e, lowest non-zero value: %e, highest value: %e, dynamic range: %e\n", lowest_val, lowest_nonzero_val, highest_val, highest_val / lowest_nonzero_val); + } + + bool basis_compressor::read_dds_source_images() + { + debug_printf("basis_compressor::read_dds_source_images\n"); + + // Nothing to do if the caller doesn't want us reading source images. + if ((!m_params.m_read_source_images) || (!m_params.m_source_filenames.size())) + return true; + + // Just bail of the caller has specified their own source images. + if (m_params.m_source_images.size() || m_params.m_source_images_hdr.size()) + return true; + + if (m_params.m_source_mipmap_images.size() || m_params.m_source_mipmap_images_hdr.size()) + return true; + + // See if any input filenames are .DDS + bool any_dds = false, all_dds = true; + for (uint32_t i = 0; i < m_params.m_source_filenames.size(); i++) + { + std::string ext(string_get_extension(m_params.m_source_filenames[i])); + if (strcasecmp(ext.c_str(), "dds") == 0) + any_dds = true; + else + all_dds = false; + } + + // Bail if no .DDS files specified. + if (!any_dds) + return true; + + // If any input is .DDS they all must be .DDS, for simplicity. + if (!all_dds) + { + error_printf("If any filename is DDS, all filenames must be DDS.\n"); + return false; + } + + // Can't jam in alpha channel images if any .DDS files specified. + if (m_params.m_source_alpha_filenames.size()) + { + error_printf("Source alpha filenames are not supported in DDS mode.\n"); + return false; + } + + bool any_mipmaps = false; + + // Read each .DDS texture file + for (uint32_t i = 0; i < m_params.m_source_filenames.size(); i++) + { + basisu::vector ldr_mips; + basisu::vector hdr_mips; + bool status = read_uncompressed_dds_file(m_params.m_source_filenames[i].c_str(), ldr_mips, hdr_mips); + if (!status) + return false; + + assert(ldr_mips.size() || hdr_mips.size()); + + if (m_params.m_status_output) + { + printf("Read DDS file \"%s\", %s, %ux%u, %zu mipmap levels\n", + m_params.m_source_filenames[i].c_str(), + ldr_mips.size() ? "LDR" : "HDR", + ldr_mips.size() ? ldr_mips[0].get_width() : hdr_mips[0].get_width(), + ldr_mips.size() ? ldr_mips[0].get_height() : hdr_mips[0].get_height(), + ldr_mips.size() ? ldr_mips.size() : hdr_mips.size()); + } + + if (ldr_mips.size()) + { + if (m_params.m_source_images_hdr.size()) + { + error_printf("All DDS files must be of the same type (all LDR, or all HDR)\n"); + return false; + } + + m_params.m_source_images.push_back(ldr_mips[0]); + m_params.m_source_mipmap_images.resize(m_params.m_source_mipmap_images.size() + 1); + + if (ldr_mips.size() > 1) + { + ldr_mips.erase_index(0U); + + m_params.m_source_mipmap_images.back().swap(ldr_mips); + + any_mipmaps = true; + } + } + else + { + if (m_params.m_source_images.size()) + { + error_printf("All DDS files must be of the same type (all LDR, or all HDR)\n"); + return false; + } + + m_params.m_source_images_hdr.push_back(hdr_mips[0]); + m_params.m_source_mipmap_images_hdr.resize(m_params.m_source_mipmap_images_hdr.size() + 1); + + if (hdr_mips.size() > 1) + { + hdr_mips.erase_index(0U); + + m_params.m_source_mipmap_images_hdr.back().swap(hdr_mips); + + any_mipmaps = true; + } + + m_params.m_hdr = true; + m_params.m_uastc = true; + } + } + + m_params.m_read_source_images = false; + m_params.m_source_filenames.clear(); + m_params.m_source_alpha_filenames.clear(); + + if (!any_mipmaps) + { + m_params.m_source_mipmap_images.clear(); + m_params.m_source_mipmap_images_hdr.clear(); + } + + if ((m_params.m_hdr) && (!m_params.m_source_images_hdr.size())) + { + error_printf("HDR mode enabled, but only LDR .DDS files were loaded. HDR mode requires half or float (HDR) .DDS inputs.\n"); + return false; + } + + return true; + } + + bool basis_compressor::read_source_images() + { + debug_printf("basis_compressor::read_source_images\n"); + + const uint32_t total_source_files = m_params.m_read_source_images ? (uint32_t)m_params.m_source_filenames.size() : + (m_params.m_hdr ? (uint32_t)m_params.m_source_images_hdr.size() : (uint32_t)m_params.m_source_images.size()); + + if (!total_source_files) + { + debug_printf("basis_compressor::read_source_images: No source images to process\n"); + + return false; + } + + m_stats.resize(0); + m_slice_descs.resize(0); + m_slice_images.resize(0); + m_slice_images_hdr.resize(0); + + m_total_blocks = 0; + uint32_t total_macroblocks = 0; + + m_any_source_image_has_alpha = false; + + basisu::vector source_images; + basisu::vector source_images_hdr; + + basisu::vector source_filenames; + + // TODO: Note HDR images don't support alpha here, currently. + + // First load all source images, and determine if any have an alpha channel. + for (uint32_t source_file_index = 0; source_file_index < total_source_files; source_file_index++) + { + const char* pSource_filename = ""; + + image file_image; + imagef file_image_hdr; + + if (m_params.m_read_source_images) + { + pSource_filename = m_params.m_source_filenames[source_file_index].c_str(); + + // Load the source image + if (m_params.m_hdr) + { + float upconversion_nit_multiplier = m_params.m_ldr_hdr_upconversion_nit_multiplier; + if (upconversion_nit_multiplier == 0.0f) + { + // Note: We used to use a normalized nit multiplier of 1.0 for UASTC HDR 4x4. We're now writing upconverted output files in absolute luminance (100 nits). + upconversion_nit_multiplier = LDR_TO_HDR_NITS; + } + + m_ldr_to_hdr_upconversion_nit_multiplier = upconversion_nit_multiplier; + if (!is_image_filename_hdr(pSource_filename)) + m_upconverted_any_ldr_images = true; + + if (!load_image_hdr(pSource_filename, file_image_hdr, m_params.m_ldr_hdr_upconversion_srgb_to_linear, upconversion_nit_multiplier, m_params.m_ldr_hdr_upconversion_black_bias)) + { + error_printf("Failed reading source image: %s\n", pSource_filename); + return false; + } + + // TODO: For now, just slam alpha to 1.0f. None of our HDR encoders support alpha yet. + for (uint32_t y = 0; y < file_image_hdr.get_height(); y++) + for (uint32_t x = 0; x < file_image_hdr.get_width(); x++) + file_image_hdr(x, y)[3] = 1.0f; + } + else + { + if (!load_image(pSource_filename, file_image)) + { + error_printf("Failed reading source image: %s\n", pSource_filename); + return false; + } + } + + const uint32_t width = m_params.m_hdr ? file_image_hdr.get_width() : file_image.get_width(); + const uint32_t height = m_params.m_hdr ? file_image_hdr.get_height() : file_image.get_height(); + + if (m_params.m_status_output) + { + printf("Read source image \"%s\", %ux%u\n", pSource_filename, width, height); + } + + if (m_params.m_hdr) + { + clean_hdr_image(file_image_hdr); + } + else + { + // Optionally load another image and put a grayscale version of it into the alpha channel. + if ((source_file_index < m_params.m_source_alpha_filenames.size()) && (m_params.m_source_alpha_filenames[source_file_index].size())) + { + const char* pSource_alpha_image = m_params.m_source_alpha_filenames[source_file_index].c_str(); + + image alpha_data; + + if (!load_image(pSource_alpha_image, alpha_data)) + { + error_printf("Failed reading source image: %s\n", pSource_alpha_image); + return false; + } + + if (m_params.m_status_output) + printf("Read source alpha image \"%s\", %ux%u\n", pSource_alpha_image, alpha_data.get_width(), alpha_data.get_height()); + + alpha_data.crop(width, height); + + for (uint32_t y = 0; y < height; y++) + for (uint32_t x = 0; x < width; x++) + file_image(x, y).a = (uint8_t)alpha_data(x, y).get_709_luma(); + } + } + } + else + { + if (m_params.m_hdr) + { + file_image_hdr = m_params.m_source_images_hdr[source_file_index]; + clean_hdr_image(file_image_hdr); + } + else + { + file_image = m_params.m_source_images[source_file_index]; + } + } + + if (!m_params.m_hdr) + { + if (m_params.m_renormalize) + file_image.renormalize_normal_map(); + } + + bool alpha_swizzled = false; + + if (m_params.m_swizzle[0] != 0 || + m_params.m_swizzle[1] != 1 || + m_params.m_swizzle[2] != 2 || + m_params.m_swizzle[3] != 3) + { + if (!m_params.m_hdr) + { + // Used for XY normal maps in RG - puts X in color, Y in alpha + for (uint32_t y = 0; y < file_image.get_height(); y++) + { + for (uint32_t x = 0; x < file_image.get_width(); x++) + { + const color_rgba& c = file_image(x, y); + file_image(x, y).set_noclamp_rgba(c[m_params.m_swizzle[0]], c[m_params.m_swizzle[1]], c[m_params.m_swizzle[2]], c[m_params.m_swizzle[3]]); + } + } + + alpha_swizzled = (m_params.m_swizzle[3] != 3); + } + else + { + // Used for XY normal maps in RG - puts X in color, Y in alpha + for (uint32_t y = 0; y < file_image_hdr.get_height(); y++) + { + for (uint32_t x = 0; x < file_image_hdr.get_width(); x++) + { + const vec4F& c = file_image_hdr(x, y); + + // For now, alpha is always 1.0f in UASTC HDR. + file_image_hdr(x, y).set(c[m_params.m_swizzle[0]], c[m_params.m_swizzle[1]], c[m_params.m_swizzle[2]], 1.0f); // c[m_params.m_swizzle[3]]); + } + } + } + } + + bool has_alpha = false; + + if (!m_params.m_hdr) + { + if (m_params.m_force_alpha || alpha_swizzled) + has_alpha = true; + else if (!m_params.m_check_for_alpha) + file_image.set_alpha(255); + else if (file_image.has_alpha()) + has_alpha = true; + + if (has_alpha) + m_any_source_image_has_alpha = true; + } + + { + const uint32_t width = m_params.m_hdr ? file_image_hdr.get_width() : file_image.get_width(); + const uint32_t height = m_params.m_hdr ? file_image_hdr.get_height() : file_image.get_height(); + + debug_printf("Source image index %u filename %s %ux%u has alpha: %u\n", source_file_index, pSource_filename, width, height, has_alpha); + } + + if (m_params.m_y_flip) + { + if (m_params.m_hdr) + file_image_hdr.flip_y(); + else + file_image.flip_y(); + } + +#if DEBUG_CROP_TEXTURE_TO_64x64 + if (m_params.m_hdr) + file_image_hdr.resize(64, 64); + else + file_image.resize(64, 64); +#endif + + if ((m_params.m_resample_width > 0) && (m_params.m_resample_height > 0)) + { + int new_width = basisu::minimum(m_params.m_resample_width, BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION); + int new_height = basisu::minimum(m_params.m_resample_height, BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION); + + debug_printf("Resampling to %ix%i\n", new_width, new_height); + + // TODO: A box filter - kaiser looks too sharp on video. Let the caller control this. + if (m_params.m_hdr) + { + imagef temp_img(new_width, new_height); + image_resample(file_image_hdr, temp_img, "box"); // "kaiser"); + clean_hdr_image(temp_img); + temp_img.swap(file_image_hdr); + } + else + { + image temp_img(new_width, new_height); + image_resample(file_image, temp_img, m_params.m_perceptual, "box"); // "kaiser"); + temp_img.swap(file_image); + } + } + else if (m_params.m_resample_factor > 0.0f) + { + // TODO: A box filter - kaiser looks too sharp on video. Let the caller control this. + if (m_params.m_hdr) + { + int new_width = basisu::minimum(basisu::maximum(1, (int)ceilf(file_image_hdr.get_width() * m_params.m_resample_factor)), BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION); + int new_height = basisu::minimum(basisu::maximum(1, (int)ceilf(file_image_hdr.get_height() * m_params.m_resample_factor)), BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION); + + debug_printf("Resampling to %ix%i\n", new_width, new_height); + + imagef temp_img(new_width, new_height); + image_resample(file_image_hdr, temp_img, "box"); // "kaiser"); + clean_hdr_image(temp_img); + temp_img.swap(file_image_hdr); + } + else + { + int new_width = basisu::minimum(basisu::maximum(1, (int)ceilf(file_image.get_width() * m_params.m_resample_factor)), BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION); + int new_height = basisu::minimum(basisu::maximum(1, (int)ceilf(file_image.get_height() * m_params.m_resample_factor)), BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION); + + debug_printf("Resampling to %ix%i\n", new_width, new_height); + + image temp_img(new_width, new_height); + image_resample(file_image, temp_img, m_params.m_perceptual, "box"); // "kaiser"); + temp_img.swap(file_image); + } + } + + const uint32_t width = m_params.m_hdr ? file_image_hdr.get_width() : file_image.get_width(); + const uint32_t height = m_params.m_hdr ? file_image_hdr.get_height() : file_image.get_height(); + + if ((!width) || (!height)) + { + error_printf("basis_compressor::read_source_images: Source image has a zero width and/or height!\n"); + return false; + } + + if ((width > BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION) || (height > BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION)) + { + error_printf("basis_compressor::read_source_images: Source image \"%s\" is too large!\n", pSource_filename); + return false; + } + + if (!m_params.m_hdr) + source_images.enlarge(1)->swap(file_image); + else + source_images_hdr.enlarge(1)->swap(file_image_hdr); + + source_filenames.push_back(pSource_filename); + } + + // Check if the caller has generated their own mipmaps. + if (m_params.m_hdr) + { + if (m_params.m_source_mipmap_images_hdr.size()) + { + // Make sure they've passed us enough mipmap chains. + if ((m_params.m_source_images_hdr.size() != m_params.m_source_mipmap_images_hdr.size()) || (total_source_files != m_params.m_source_images_hdr.size())) + { + error_printf("basis_compressor::read_source_images(): m_params.m_source_mipmap_images_hdr.size() must equal m_params.m_source_images_hdr.size()!\n"); + return false; + } + } + } + else + { + if (m_params.m_source_mipmap_images.size()) + { + // Make sure they've passed us enough mipmap chains. + if ((m_params.m_source_images.size() != m_params.m_source_mipmap_images.size()) || (total_source_files != m_params.m_source_images.size())) + { + error_printf("basis_compressor::read_source_images(): m_params.m_source_mipmap_images.size() must equal m_params.m_source_images.size()!\n"); + return false; + } + + // Check if any of the user-supplied mipmap levels has alpha. + if (!m_any_source_image_has_alpha) + { + for (uint32_t source_file_index = 0; source_file_index < total_source_files; source_file_index++) + { + for (uint32_t mip_index = 0; mip_index < m_params.m_source_mipmap_images[source_file_index].size(); mip_index++) + { + const image& mip_img = m_params.m_source_mipmap_images[source_file_index][mip_index]; + + // Be sure to take into account any swizzling which will be applied. + if (mip_img.has_alpha(m_params.m_swizzle[3])) + { + m_any_source_image_has_alpha = true; + break; + } + } + + if (m_any_source_image_has_alpha) + break; + } + } + } + } + + debug_printf("Any source image has alpha: %u\n", m_any_source_image_has_alpha); + + // Now, for each source image, create the slices corresponding to that image. + for (uint32_t source_file_index = 0; source_file_index < total_source_files; source_file_index++) + { + const std::string &source_filename = source_filenames[source_file_index]; + + basisu::vector slices; + basisu::vector slices_hdr; + + slices.reserve(32); + slices_hdr.reserve(32); + + // The first (largest) mipmap level. + image *pFile_image = source_images.size() ? &source_images[source_file_index] : nullptr; + imagef *pFile_image_hdr = source_images_hdr.size() ? &source_images_hdr[source_file_index] : nullptr; + + // Reserve a slot for mip0. + if (m_params.m_hdr) + slices_hdr.resize(1); + else + slices.resize(1); + + if ((!m_params.m_hdr) && (m_params.m_source_mipmap_images.size())) + { + // User-provided mipmaps for each layer or image in the texture array. + for (uint32_t mip_index = 0; mip_index < m_params.m_source_mipmap_images[source_file_index].size(); mip_index++) + { + image& mip_img = m_params.m_source_mipmap_images[source_file_index][mip_index]; + + if ((m_params.m_swizzle[0] != 0) || + (m_params.m_swizzle[1] != 1) || + (m_params.m_swizzle[2] != 2) || + (m_params.m_swizzle[3] != 3)) + { + // Used for XY normal maps in RG - puts X in color, Y in alpha + for (uint32_t y = 0; y < mip_img.get_height(); y++) + { + for (uint32_t x = 0; x < mip_img.get_width(); x++) + { + const color_rgba& c = mip_img(x, y); + mip_img(x, y).set_noclamp_rgba(c[m_params.m_swizzle[0]], c[m_params.m_swizzle[1]], c[m_params.m_swizzle[2]], c[m_params.m_swizzle[3]]); + } + } + } + + slices.push_back(mip_img); + } + } + else if ((m_params.m_hdr) && (m_params.m_source_mipmap_images_hdr.size())) + { + // User-provided mipmaps for each layer or image in the texture array. + for (uint32_t mip_index = 0; mip_index < m_params.m_source_mipmap_images_hdr[source_file_index].size(); mip_index++) + { + imagef& mip_img = m_params.m_source_mipmap_images_hdr[source_file_index][mip_index]; + + if ((m_params.m_swizzle[0] != 0) || + (m_params.m_swizzle[1] != 1) || + (m_params.m_swizzle[2] != 2) || + (m_params.m_swizzle[3] != 3)) + { + // Used for XY normal maps in RG - puts X in color, Y in alpha + for (uint32_t y = 0; y < mip_img.get_height(); y++) + { + for (uint32_t x = 0; x < mip_img.get_width(); x++) + { + const vec4F& c = mip_img(x, y); + + // For now, HDR alpha is always 1.0f. + mip_img(x, y).set(c[m_params.m_swizzle[0]], c[m_params.m_swizzle[1]], c[m_params.m_swizzle[2]], 1.0f); // c[m_params.m_swizzle[3]]); + } + } + } + + clean_hdr_image(mip_img); + + slices_hdr.push_back(mip_img); + } + } + else if (m_params.m_mip_gen) + { + // Automatically generate mipmaps. + if (m_params.m_hdr) + { + if (!generate_mipmaps(*pFile_image_hdr, slices_hdr, m_any_source_image_has_alpha)) + return false; + } + else + { + if (!generate_mipmaps(*pFile_image, slices, m_any_source_image_has_alpha)) + return false; + } + } + + // Swap in the largest mipmap level here to avoid copying it, because generate_mips() will change the array. + // NOTE: file_image is now blank. + if (m_params.m_hdr) + slices_hdr[0].swap(*pFile_image_hdr); + else + slices[0].swap(*pFile_image); + + uint_vec mip_indices(m_params.m_hdr ? slices_hdr.size() : slices.size()); + for (uint32_t i = 0; i < (m_params.m_hdr ? slices_hdr.size() : slices.size()); i++) + mip_indices[i] = i; + + if ((!m_params.m_hdr) && (m_any_source_image_has_alpha) && (!m_params.m_uastc)) + { + // For ETC1S, if source has alpha, then even mips will have RGB, and odd mips will have alpha in RGB. + basisu::vector alpha_slices; + uint_vec new_mip_indices; + + alpha_slices.reserve(slices.size() * 2); + + for (uint32_t i = 0; i < slices.size(); i++) + { + image lvl_rgb(slices[i]); + image lvl_a(lvl_rgb); + + for (uint32_t y = 0; y < lvl_a.get_height(); y++) + { + for (uint32_t x = 0; x < lvl_a.get_width(); x++) + { + uint8_t a = lvl_a(x, y).a; + lvl_a(x, y).set_noclamp_rgba(a, a, a, 255); + } + } + + lvl_rgb.set_alpha(255); + + alpha_slices.push_back(lvl_rgb); + new_mip_indices.push_back(i); + + alpha_slices.push_back(lvl_a); + new_mip_indices.push_back(i); + } + + slices.swap(alpha_slices); + mip_indices.swap(new_mip_indices); + } + + if (m_params.m_hdr) + { + assert(slices_hdr.size() == mip_indices.size()); + } + else + { + assert(slices.size() == mip_indices.size()); + } + + for (uint32_t slice_index = 0; slice_index < (m_params.m_hdr ? slices_hdr.size() : slices.size()); slice_index++) + { + image *pSlice_image = m_params.m_hdr ? nullptr : &slices[slice_index]; + imagef *pSlice_image_hdr = m_params.m_hdr ? &slices_hdr[slice_index] : nullptr; + + const uint32_t orig_width = m_params.m_hdr ? pSlice_image_hdr->get_width() : pSlice_image->get_width(); + const uint32_t orig_height = m_params.m_hdr ? pSlice_image_hdr->get_height() : pSlice_image->get_height(); + + bool is_alpha_slice = false; + if ((!m_params.m_hdr) && (m_any_source_image_has_alpha)) + { + if (m_params.m_uastc) + { + is_alpha_slice = pSlice_image->has_alpha(); + } + else + { + is_alpha_slice = (slice_index & 1) != 0; + } + } + + // Enlarge the source image to block boundaries, duplicating edge pixels if necessary to avoid introducing extra colors into blocks. + if (m_params.m_hdr) + { + // Don't pad in 6x6 mode, the lower level compressor handles it. + if (m_params.m_hdr_mode == hdr_modes::cUASTC_HDR_4X4) + { + pSlice_image_hdr->crop_dup_borders(pSlice_image_hdr->get_block_width(get_block_width()) * get_block_width(), pSlice_image_hdr->get_block_height(get_block_height()) * get_block_height()); + } + } + else + { + pSlice_image->crop_dup_borders(pSlice_image->get_block_width(get_block_width()) * get_block_width(), pSlice_image->get_block_height(get_block_height()) * get_block_height()); + } + + if (m_params.m_debug_images) + { + if (m_params.m_hdr) + write_exr(string_format("basis_debug_source_image_%u_slice_%u.exr", source_file_index, slice_index).c_str(), *pSlice_image_hdr, 3, 0); + else + save_png(string_format("basis_debug_source_image_%u_slice_%u.png", source_file_index, slice_index).c_str(), *pSlice_image); + } + + const size_t dest_image_index = (m_params.m_hdr ? m_slice_images_hdr.size() : m_slice_images.size()); + + enlarge_vector(m_stats, 1); + + if (m_params.m_hdr) + enlarge_vector(m_slice_images_hdr, 1); + else + enlarge_vector(m_slice_images, 1); + + enlarge_vector(m_slice_descs, 1); + + m_stats[dest_image_index].m_filename = source_filename.c_str(); + m_stats[dest_image_index].m_width = orig_width; + m_stats[dest_image_index].m_height = orig_height; + + debug_printf("****** Slice %u: mip %u, alpha_slice: %u, filename: \"%s\", original: %ux%u actual: %ux%u\n", + m_slice_descs.size() - 1, mip_indices[slice_index], is_alpha_slice, source_filename.c_str(), + orig_width, orig_height, + m_params.m_hdr ? pSlice_image_hdr->get_width() : pSlice_image->get_width(), + m_params.m_hdr ? pSlice_image_hdr->get_height() : pSlice_image->get_height()); + + basisu_backend_slice_desc& slice_desc = m_slice_descs[dest_image_index]; + + slice_desc.m_first_block_index = m_total_blocks; + + slice_desc.m_orig_width = orig_width; + slice_desc.m_orig_height = orig_height; + + if (m_params.m_hdr) + { + slice_desc.m_width = pSlice_image_hdr->get_width(); + slice_desc.m_height = pSlice_image_hdr->get_height(); + + slice_desc.m_num_blocks_x = pSlice_image_hdr->get_block_width(get_block_width()); + slice_desc.m_num_blocks_y = pSlice_image_hdr->get_block_height(get_block_height()); + } + else + { + slice_desc.m_width = pSlice_image->get_width(); + slice_desc.m_height = pSlice_image->get_height(); + + slice_desc.m_num_blocks_x = pSlice_image->get_block_width(get_block_width()); + slice_desc.m_num_blocks_y = pSlice_image->get_block_height(get_block_height()); + } + + slice_desc.m_num_macroblocks_x = (slice_desc.m_num_blocks_x + 1) >> 1; + slice_desc.m_num_macroblocks_y = (slice_desc.m_num_blocks_y + 1) >> 1; + + slice_desc.m_source_file_index = source_file_index; + + slice_desc.m_mip_index = mip_indices[slice_index]; + + slice_desc.m_alpha = is_alpha_slice; + slice_desc.m_iframe = false; + if (m_params.m_tex_type == basist::cBASISTexTypeVideoFrames) + { + slice_desc.m_iframe = (source_file_index == 0); + } + + m_total_blocks += slice_desc.m_num_blocks_x * slice_desc.m_num_blocks_y; + total_macroblocks += slice_desc.m_num_macroblocks_x * slice_desc.m_num_macroblocks_y; + + // Finally, swap in the slice's image to avoid copying it. + // NOTE: slice_image is now blank. + if (m_params.m_hdr) + m_slice_images_hdr[dest_image_index].swap(*pSlice_image_hdr); + else + m_slice_images[dest_image_index].swap(*pSlice_image); + + } // slice_index + + } // source_file_index + + debug_printf("Total blocks: %u, Total macroblocks: %u\n", m_total_blocks, total_macroblocks); + + // Make sure we don't have too many slices + if (m_slice_descs.size() > BASISU_MAX_SLICES) + { + error_printf("Too many slices!\n"); + return false; + } + + // Basic sanity check on the slices + for (uint32_t i = 1; i < m_slice_descs.size(); i++) + { + const basisu_backend_slice_desc &prev_slice_desc = m_slice_descs[i - 1]; + const basisu_backend_slice_desc &slice_desc = m_slice_descs[i]; + + // Make sure images are in order + int image_delta = (int)slice_desc.m_source_file_index - (int)prev_slice_desc.m_source_file_index; + if (image_delta > 1) + return false; + + // Make sure mipmap levels are in order + if (!image_delta) + { + int level_delta = (int)slice_desc.m_mip_index - (int)prev_slice_desc.m_mip_index; + if (level_delta > 1) + return false; + } + } + + if (m_params.m_status_output) + { + printf("Total slices: %u\n", (uint32_t)m_slice_descs.size()); + } + + for (uint32_t i = 0; i < m_slice_descs.size(); i++) + { + const basisu_backend_slice_desc &slice_desc = m_slice_descs[i]; + + if (m_params.m_status_output) + { + printf("Slice: %u, alpha: %u, orig width/height: %ux%u, width/height: %ux%u, first_block: %u, image_index: %u, mip_level: %u, iframe: %u\n", + i, slice_desc.m_alpha, slice_desc.m_orig_width, slice_desc.m_orig_height, + slice_desc.m_width, slice_desc.m_height, + slice_desc.m_first_block_index, slice_desc.m_source_file_index, slice_desc.m_mip_index, slice_desc.m_iframe); + } + + if (m_any_source_image_has_alpha) + { + // HDR doesn't support alpha yet + if (m_params.m_hdr) + return false; + + if (!m_params.m_uastc) + { + // For ETC1S, alpha slices must be at odd slice indices. + if (slice_desc.m_alpha) + { + if ((i & 1) == 0) + return false; + + const basisu_backend_slice_desc& prev_slice_desc = m_slice_descs[i - 1]; + + // Make sure previous slice has this image's color data + if (prev_slice_desc.m_source_file_index != slice_desc.m_source_file_index) + return false; + if (prev_slice_desc.m_alpha) + return false; + if (prev_slice_desc.m_mip_index != slice_desc.m_mip_index) + return false; + if (prev_slice_desc.m_num_blocks_x != slice_desc.m_num_blocks_x) + return false; + if (prev_slice_desc.m_num_blocks_y != slice_desc.m_num_blocks_y) + return false; + } + else if (i & 1) + return false; + } + } + else if (slice_desc.m_alpha) + { + return false; + } + + if ((slice_desc.m_orig_width > slice_desc.m_width) || (slice_desc.m_orig_height > slice_desc.m_height)) + return false; + + if ((slice_desc.m_source_file_index == 0) && (m_params.m_tex_type == basist::cBASISTexTypeVideoFrames)) + { + if (!slice_desc.m_iframe) + return false; + } + } + + return true; + } + + // Do some basic validation for 2D arrays, cubemaps, video, and volumes. + bool basis_compressor::validate_texture_type_constraints() + { + debug_printf("basis_compressor::validate_texture_type_constraints\n"); + + // In 2D mode anything goes (each image may have a different resolution and # of mipmap levels). + if (m_params.m_tex_type == basist::cBASISTexType2D) + return true; + + uint32_t total_basis_images = 0; + + for (uint32_t slice_index = 0; slice_index < (m_params.m_hdr ? m_slice_images_hdr.size() : m_slice_images.size()); slice_index++) + { + const basisu_backend_slice_desc &slice_desc = m_slice_descs[slice_index]; + + total_basis_images = maximum(total_basis_images, slice_desc.m_source_file_index + 1); + } + + if (m_params.m_tex_type == basist::cBASISTexTypeCubemapArray) + { + // For cubemaps, validate that the total # of Basis images is a multiple of 6. + if ((total_basis_images % 6) != 0) + { + error_printf("basis_compressor::validate_texture_type_constraints: For cubemaps the total number of input images is not a multiple of 6!\n"); + return false; + } + } + + // Now validate that all the mip0's have the same dimensions, and that each image has the same # of mipmap levels. + uint_vec image_mipmap_levels(total_basis_images); + + int width = -1, height = -1; + for (uint32_t slice_index = 0; slice_index < (m_params.m_hdr ? m_slice_images_hdr.size() : m_slice_images.size()); slice_index++) + { + const basisu_backend_slice_desc &slice_desc = m_slice_descs[slice_index]; + + image_mipmap_levels[slice_desc.m_source_file_index] = maximum(image_mipmap_levels[slice_desc.m_source_file_index], slice_desc.m_mip_index + 1); + + if (slice_desc.m_mip_index != 0) + continue; + + if (width < 0) + { + width = slice_desc.m_orig_width; + height = slice_desc.m_orig_height; + } + else if ((width != (int)slice_desc.m_orig_width) || (height != (int)slice_desc.m_orig_height)) + { + error_printf("basis_compressor::validate_texture_type_constraints: The source image resolutions are not all equal!\n"); + return false; + } + } + + for (size_t i = 1; i < image_mipmap_levels.size(); i++) + { + if (image_mipmap_levels[0] != image_mipmap_levels[i]) + { + error_printf("basis_compressor::validate_texture_type_constraints: Each image must have the same number of mipmap levels!\n"); + return false; + } + } + + return true; + } + + bool basis_compressor::extract_source_blocks() + { + debug_printf("basis_compressor::extract_source_blocks\n"); + + // No need to extract blocks in 6x6 mode, but the 4x4 compressors want 4x4 blocks. + if ((m_fmt_mode == basist::basis_tex_format::cASTC_HDR_6x6) || (m_fmt_mode == basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE)) + return true; + + if (m_params.m_hdr) + m_source_blocks_hdr.resize(m_total_blocks); + else + m_source_blocks.resize(m_total_blocks); + + for (uint32_t slice_index = 0; slice_index < (m_params.m_hdr ? m_slice_images_hdr.size() : m_slice_images.size()); slice_index++) + { + const basisu_backend_slice_desc& slice_desc = m_slice_descs[slice_index]; + + const uint32_t num_blocks_x = slice_desc.m_num_blocks_x; + const uint32_t num_blocks_y = slice_desc.m_num_blocks_y; + + const image *pSource_image = m_params.m_hdr ? nullptr : &m_slice_images[slice_index]; + const imagef *pSource_image_hdr = m_params.m_hdr ? &m_slice_images_hdr[slice_index] : nullptr; + + for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) + { + for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) + { + if (m_params.m_hdr) + { + vec4F* pBlock = m_source_blocks_hdr[slice_desc.m_first_block_index + block_x + block_y * num_blocks_x].get_ptr(); + + pSource_image_hdr->extract_block_clamped(pBlock, block_x * 4, block_y * 4, 4, 4); + + // Additional (technically optional) early sanity checking of the block texels. + for (uint32_t i = 0; i < 16; i++) + { + for (uint32_t c = 0; c < 3; c++) + { + float v = pBlock[i][c]; + + if (std::isnan(v) || std::isinf(v) || (v < 0.0f) || (v > basist::MAX_HALF_FLOAT)) + { + error_printf("basis_compressor::extract_source_blocks: invalid float component\n"); + return false; + } + } + } + } + else + { + pSource_image->extract_block_clamped(m_source_blocks[slice_desc.m_first_block_index + block_x + block_y * num_blocks_x].get_ptr(), block_x * 4, block_y * 4, 4, 4); + } + } + } + } + + return true; + } + + bool basis_compressor::process_frontend() + { + debug_printf("basis_compressor::process_frontend\n"); + +#if 0 + // TODO + basis_etc1_pack_params pack_params; + pack_params.m_quality = cETCQualityMedium; + pack_params.m_perceptual = m_params.m_perceptual; + pack_params.m_use_color4 = false; + + pack_etc1_block_context pack_context; + + std::unordered_set endpoint_hash; + std::unordered_set selector_hash; + + for (uint32_t i = 0; i < m_source_blocks.size(); i++) + { + etc_block blk; + pack_etc1_block(blk, m_source_blocks[i].get_ptr(), pack_params, pack_context); + + const color_rgba c0(blk.get_block_color(0, false)); + endpoint_hash.insert((c0.r | (c0.g << 5) | (c0.b << 10)) | (blk.get_inten_table(0) << 16)); + + const color_rgba c1(blk.get_block_color(1, false)); + endpoint_hash.insert((c1.r | (c1.g << 5) | (c1.b << 10)) | (blk.get_inten_table(1) << 16)); + + selector_hash.insert(blk.get_raw_selector_bits()); + } + + const uint32_t total_unique_endpoints = (uint32_t)endpoint_hash.size(); + const uint32_t total_unique_selectors = (uint32_t)selector_hash.size(); + + if (m_params.m_debug) + { + debug_printf("Unique endpoints: %u, unique selectors: %u\n", total_unique_endpoints, total_unique_selectors); + } +#endif + + const double total_texels = m_total_blocks * 16.0f; + + int endpoint_clusters = m_params.m_etc1s_max_endpoint_clusters; + int selector_clusters = m_params.m_etc1s_max_selector_clusters; + + if (endpoint_clusters > basisu_frontend::cMaxEndpointClusters) + { + error_printf("Too many endpoint clusters! (%u but max is %u)\n", endpoint_clusters, basisu_frontend::cMaxEndpointClusters); + return false; + } + if (selector_clusters > basisu_frontend::cMaxSelectorClusters) + { + error_printf("Too many selector clusters! (%u but max is %u)\n", selector_clusters, basisu_frontend::cMaxSelectorClusters); + return false; + } + + if (m_params.m_etc1s_quality_level != -1) + { + const float quality = saturate(m_params.m_etc1s_quality_level / 255.0f); + + const float bits_per_endpoint_cluster = 14.0f; + const float max_desired_endpoint_cluster_bits_per_texel = 1.0f; // .15f + int max_endpoints = static_cast((max_desired_endpoint_cluster_bits_per_texel * total_texels) / bits_per_endpoint_cluster); + + const float mid = 128.0f / 255.0f; + + float color_endpoint_quality = quality; + + const float endpoint_split_point = 0.5f; + + // In v1.2 and in previous versions, the endpoint codebook size at quality 128 was 3072. This wasn't quite large enough. + const int ENDPOINT_CODEBOOK_MID_QUALITY_CODEBOOK_SIZE = 4800; + const int MAX_ENDPOINT_CODEBOOK_SIZE = 8192; + + if (color_endpoint_quality <= mid) + { + color_endpoint_quality = lerp(0.0f, endpoint_split_point, powf(color_endpoint_quality / mid, .65f)); + + max_endpoints = clamp(max_endpoints, 256, ENDPOINT_CODEBOOK_MID_QUALITY_CODEBOOK_SIZE); + max_endpoints = minimum(max_endpoints, m_total_blocks); + + if (max_endpoints < 64) + max_endpoints = 64; + endpoint_clusters = clamp((uint32_t)(.5f + lerp(32, static_cast(max_endpoints), color_endpoint_quality)), 32, basisu_frontend::cMaxEndpointClusters); + } + else + { + color_endpoint_quality = powf((color_endpoint_quality - mid) / (1.0f - mid), 1.6f); + + max_endpoints = clamp(max_endpoints, 256, MAX_ENDPOINT_CODEBOOK_SIZE); + max_endpoints = minimum(max_endpoints, m_total_blocks); + + if (max_endpoints < ENDPOINT_CODEBOOK_MID_QUALITY_CODEBOOK_SIZE) + max_endpoints = ENDPOINT_CODEBOOK_MID_QUALITY_CODEBOOK_SIZE; + endpoint_clusters = clamp((uint32_t)(.5f + lerp(ENDPOINT_CODEBOOK_MID_QUALITY_CODEBOOK_SIZE, static_cast(max_endpoints), color_endpoint_quality)), 32, basisu_frontend::cMaxEndpointClusters); + } + + float bits_per_selector_cluster = 14.0f; + + const float max_desired_selector_cluster_bits_per_texel = 1.0f; // .15f + int max_selectors = static_cast((max_desired_selector_cluster_bits_per_texel * total_texels) / bits_per_selector_cluster); + max_selectors = clamp(max_selectors, 256, basisu_frontend::cMaxSelectorClusters); + max_selectors = minimum(max_selectors, m_total_blocks); + + float color_selector_quality = quality; + //color_selector_quality = powf(color_selector_quality, 1.65f); + color_selector_quality = powf(color_selector_quality, 2.62f); + + if (max_selectors < 96) + max_selectors = 96; + selector_clusters = clamp((uint32_t)(.5f + lerp(96, static_cast(max_selectors), color_selector_quality)), 8, basisu_frontend::cMaxSelectorClusters); + + debug_printf("Max endpoints: %u, max selectors: %u\n", endpoint_clusters, selector_clusters); + + if (m_params.m_etc1s_quality_level >= 223) + { + if (!m_params.m_selector_rdo_thresh.was_changed()) + { + if (!m_params.m_endpoint_rdo_thresh.was_changed()) + m_params.m_endpoint_rdo_thresh *= .25f; + + if (!m_params.m_selector_rdo_thresh.was_changed()) + m_params.m_selector_rdo_thresh *= .25f; + } + } + else if (m_params.m_etc1s_quality_level >= 192) + { + if (!m_params.m_endpoint_rdo_thresh.was_changed()) + m_params.m_endpoint_rdo_thresh *= .5f; + + if (!m_params.m_selector_rdo_thresh.was_changed()) + m_params.m_selector_rdo_thresh *= .5f; + } + else if (m_params.m_etc1s_quality_level >= 160) + { + if (!m_params.m_endpoint_rdo_thresh.was_changed()) + m_params.m_endpoint_rdo_thresh *= .75f; + + if (!m_params.m_selector_rdo_thresh.was_changed()) + m_params.m_selector_rdo_thresh *= .75f; + } + else if (m_params.m_etc1s_quality_level >= 129) + { + float l = (quality - 129 / 255.0f) / ((160 - 129) / 255.0f); + + if (!m_params.m_endpoint_rdo_thresh.was_changed()) + m_params.m_endpoint_rdo_thresh *= lerp(1.0f, .75f, l); + + if (!m_params.m_selector_rdo_thresh.was_changed()) + m_params.m_selector_rdo_thresh *= lerp(1.0f, .75f, l); + } + } + + basisu_frontend::params p; + p.m_num_source_blocks = m_total_blocks; + p.m_pSource_blocks = &m_source_blocks[0]; + p.m_max_endpoint_clusters = endpoint_clusters; + p.m_max_selector_clusters = selector_clusters; + p.m_perceptual = m_params.m_perceptual; + p.m_debug_stats = m_params.m_debug; + p.m_debug_images = m_params.m_debug_images; + p.m_compression_level = m_params.m_compression_level; + p.m_tex_type = m_params.m_tex_type; + p.m_multithreaded = m_params.m_multithreading; + p.m_disable_hierarchical_endpoint_codebooks = m_params.m_disable_hierarchical_endpoint_codebooks; + p.m_validate = m_params.m_validate_etc1s; + p.m_pJob_pool = m_params.m_pJob_pool; + p.m_pGlobal_codebooks = m_params.m_pGlobal_codebooks; + + // Don't keep trying to use OpenCL if it ever fails. + p.m_pOpenCL_context = !m_opencl_failed ? m_pOpenCL_context : nullptr; + + if (!m_frontend.init(p)) + { + error_printf("basisu_frontend::init() failed!\n"); + return false; + } + + m_frontend.compress(); + + if (m_frontend.get_opencl_failed()) + m_opencl_failed = true; + + if (m_params.m_debug_images) + { + for (uint32_t i = 0; i < m_slice_descs.size(); i++) + { + char filename[1024]; +#ifdef _WIN32 + sprintf_s(filename, sizeof(filename), "rdo_frontend_output_output_blocks_%u.png", i); +#else + snprintf(filename, sizeof(filename), "rdo_frontend_output_output_blocks_%u.png", i); +#endif + m_frontend.dump_debug_image(filename, m_slice_descs[i].m_first_block_index, m_slice_descs[i].m_num_blocks_x, m_slice_descs[i].m_num_blocks_y, true); + +#ifdef _WIN32 + sprintf_s(filename, sizeof(filename), "rdo_frontend_output_api_%u.png", i); +#else + snprintf(filename, sizeof(filename), "rdo_frontend_output_api_%u.png", i); +#endif + m_frontend.dump_debug_image(filename, m_slice_descs[i].m_first_block_index, m_slice_descs[i].m_num_blocks_x, m_slice_descs[i].m_num_blocks_y, false); + } + } + + return true; + } + + bool basis_compressor::extract_frontend_texture_data() + { + if (!m_params.m_compute_stats) + return true; + + debug_printf("basis_compressor::extract_frontend_texture_data\n"); + + m_frontend_output_textures.resize(m_slice_descs.size()); + m_best_etc1s_images.resize(m_slice_descs.size()); + m_best_etc1s_images_unpacked.resize(m_slice_descs.size()); + + for (uint32_t i = 0; i < m_slice_descs.size(); i++) + { + const basisu_backend_slice_desc &slice_desc = m_slice_descs[i]; + + const uint32_t num_blocks_x = slice_desc.m_num_blocks_x; + const uint32_t num_blocks_y = slice_desc.m_num_blocks_y; + + const uint32_t width = num_blocks_x * 4; + const uint32_t height = num_blocks_y * 4; + + m_frontend_output_textures[i].init(texture_format::cETC1, width, height); + + for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) + for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) + memcpy(m_frontend_output_textures[i].get_block_ptr(block_x, block_y, 0), &m_frontend.get_output_block(slice_desc.m_first_block_index + block_x + block_y * num_blocks_x), sizeof(etc_block)); + +#if 0 + if (m_params.m_debug_images) + { + char filename[1024]; + sprintf_s(filename, sizeof(filename), "rdo_etc_frontend_%u_", i); + write_etc1_vis_images(m_frontend_output_textures[i], filename); + } +#endif + + m_best_etc1s_images[i].init(texture_format::cETC1, width, height); + for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) + for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) + memcpy(m_best_etc1s_images[i].get_block_ptr(block_x, block_y, 0), &m_frontend.get_etc1s_block(slice_desc.m_first_block_index + block_x + block_y * num_blocks_x), sizeof(etc_block)); + + m_best_etc1s_images[i].unpack(m_best_etc1s_images_unpacked[i]); + } + + return true; + } + + bool basis_compressor::process_backend() + { + debug_printf("basis_compressor::process_backend\n"); + + basisu_backend_params backend_params; + backend_params.m_debug = m_params.m_debug; + backend_params.m_debug_images = m_params.m_debug_images; + backend_params.m_etc1s = true; + backend_params.m_compression_level = m_params.m_compression_level; + + if (!m_params.m_no_endpoint_rdo) + backend_params.m_endpoint_rdo_quality_thresh = m_params.m_endpoint_rdo_thresh; + + if (!m_params.m_no_selector_rdo) + backend_params.m_selector_rdo_quality_thresh = m_params.m_selector_rdo_thresh; + + backend_params.m_used_global_codebooks = m_frontend.get_params().m_pGlobal_codebooks != nullptr; + backend_params.m_validate = m_params.m_validate_output_data; + + m_backend.init(&m_frontend, backend_params, m_slice_descs); + uint32_t total_packed_bytes = m_backend.encode(); + + if (!total_packed_bytes) + { + error_printf("basis_compressor::encode() failed!\n"); + return false; + } + + debug_printf("Total packed bytes (estimated): %u\n", total_packed_bytes); + + return true; + } + + bool basis_compressor::create_basis_file_and_transcode() + { + debug_printf("basis_compressor::create_basis_file_and_transcode\n"); + + const basisu_backend_output& encoded_output = m_params.m_uastc ? m_uastc_backend_output : m_backend.get_output(); + + if (!m_basis_file.init(encoded_output, m_params.m_tex_type, m_params.m_userdata0, m_params.m_userdata1, m_params.m_y_flip, m_params.m_us_per_frame)) + { + error_printf("basis_compressor::create_basis_file_and_transcode: basisu_backend:init() failed!\n"); + return false; + } + + const uint8_vec& comp_data = m_basis_file.get_compressed_data(); + + m_output_basis_file = comp_data; + + uint32_t total_orig_pixels = 0; + + for (uint32_t i = 0; i < m_slice_descs.size(); i++) + { + const basisu_backend_slice_desc& slice_desc = m_slice_descs[i]; + + total_orig_pixels += slice_desc.m_orig_width * slice_desc.m_orig_height; + } + + m_basis_file_size = (uint32_t)comp_data.size(); + m_basis_bits_per_texel = total_orig_pixels ? (comp_data.size() * 8.0f) / total_orig_pixels : 0; + + debug_printf("Total .basis output file size: %u, %3.3f bits/texel\n", comp_data.size(), comp_data.size() * 8.0f / total_orig_pixels); + + // HDR 6x6 TODO + // HACK HACK + const bool is_hdr_6x6 = m_params.m_hdr && (m_params.m_hdr_mode != hdr_modes::cUASTC_HDR_4X4); + + if (m_params.m_validate_output_data) + { + interval_timer tm; + tm.start(); + + basist::basisu_transcoder_init(); + + debug_printf("basist::basisu_transcoder_init: Took %f ms\n", tm.get_elapsed_ms()); + + // Verify the compressed data by transcoding it to ASTC (or ETC1)/BC7 and validating the CRC's. + basist::basisu_transcoder decoder; + if (!decoder.validate_file_checksums(&comp_data[0], (uint32_t)comp_data.size(), true)) + { + error_printf("decoder.validate_file_checksums() failed!\n"); + return false; + } + + m_decoded_output_textures.resize(m_slice_descs.size()); + + if (m_params.m_hdr) + { + m_decoded_output_textures_bc6h_hdr_unpacked.resize(m_slice_descs.size()); + + m_decoded_output_textures_astc_hdr.resize(m_slice_descs.size()); + m_decoded_output_textures_astc_hdr_unpacked.resize(m_slice_descs.size()); + } + else + { + m_decoded_output_textures_unpacked.resize(m_slice_descs.size()); + + m_decoded_output_textures_bc7.resize(m_slice_descs.size()); + m_decoded_output_textures_unpacked_bc7.resize(m_slice_descs.size()); + } + + tm.start(); + + if (m_params.m_pGlobal_codebooks) + { + decoder.set_global_codebooks(m_params.m_pGlobal_codebooks); + } + + if (!decoder.start_transcoding(&comp_data[0], (uint32_t)comp_data.size())) + { + error_printf("decoder.start_transcoding() failed!\n"); + return false; + } + + double start_transcoding_time = tm.get_elapsed_secs(); + + debug_printf("basisu_compressor::start_transcoding() took %3.3fms\n", start_transcoding_time * 1000.0f); + + double total_time_etc1s_or_astc = 0; + + for (uint32_t slice_iter = 0; slice_iter < m_slice_descs.size(); slice_iter++) + { + // Select either BC6H, UASTC LDR 4x4, or ETC1 + basisu::texture_format tex_format = m_params.m_hdr ? texture_format::cBC6HUnsigned : (m_params.m_uastc ? texture_format::cUASTC4x4 : texture_format::cETC1); + basist::block_format blk_format = m_params.m_hdr ? basist::block_format::cBC6H : (m_params.m_uastc ? basist::block_format::cUASTC_4x4 : basist::block_format::cETC1); + + gpu_image decoded_texture; + decoded_texture.init( + tex_format, + m_slice_descs[slice_iter].m_width, m_slice_descs[slice_iter].m_height); + + tm.start(); + + const uint32_t block_size_x = basisu::get_block_width(tex_format); + const uint32_t block_size_y = basisu::get_block_height(tex_format); + const uint32_t num_dst_blocks_x = (m_slice_descs[slice_iter].m_orig_width + block_size_x - 1) / block_size_x; + const uint32_t num_dst_blocks_y = (m_slice_descs[slice_iter].m_orig_height + block_size_y - 1) / block_size_y; + const uint32_t total_dst_blocks = num_dst_blocks_x * num_dst_blocks_y; + + uint32_t bytes_per_block = m_params.m_uastc ? 16 : 8; + + if (!decoder.transcode_slice(&comp_data[0], (uint32_t)comp_data.size(), slice_iter, + reinterpret_cast(decoded_texture.get_ptr()), total_dst_blocks, blk_format, bytes_per_block)) + { + error_printf("Transcoding failed on slice %u!\n", slice_iter); + return false; + } + + total_time_etc1s_or_astc += tm.get_elapsed_secs(); + + if (encoded_output.m_tex_format == basist::basis_tex_format::cETC1S) + { + uint32_t image_crc16 = basist::crc16(decoded_texture.get_ptr(), decoded_texture.get_size_in_bytes(), 0); + if (image_crc16 != encoded_output.m_slice_image_crcs[slice_iter]) + { + error_printf("Decoded image data CRC check failed on slice %u!\n", slice_iter); + return false; + } + debug_printf("Decoded image data CRC check succeeded on slice %i\n", slice_iter); + } + + m_decoded_output_textures[slice_iter] = decoded_texture; + } + + double total_alt_transcode_time = 0; + tm.start(); + + if (m_params.m_hdr) + { + if (is_hdr_6x6) + { + assert(basist::basis_is_format_supported(basist::transcoder_texture_format::cTFASTC_HDR_6x6_RGBA, basist::basis_tex_format::cASTC_HDR_6x6)); + assert(basist::basis_is_format_supported(basist::transcoder_texture_format::cTFASTC_HDR_6x6_RGBA, basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE)); + + for (uint32_t i = 0; i < m_slice_descs.size(); i++) + { + gpu_image decoded_texture; + decoded_texture.init(texture_format::cASTC_HDR_6x6, m_slice_descs[i].m_width, m_slice_descs[i].m_height); + + if (!decoder.transcode_slice(&comp_data[0], (uint32_t)comp_data.size(), i, + reinterpret_cast(decoded_texture.get_ptr()), m_slice_descs[i].m_num_blocks_x * m_slice_descs[i].m_num_blocks_y, basist::block_format::cASTC_HDR_6x6, 16)) + { + error_printf("Transcoding failed to ASTC HDR on slice %u!\n", i); + return false; + } + + m_decoded_output_textures_astc_hdr[i] = decoded_texture; + } + } + else + { + assert(basist::basis_is_format_supported(basist::transcoder_texture_format::cTFASTC_HDR_4x4_RGBA, basist::basis_tex_format::cUASTC_HDR_4x4)); + + for (uint32_t i = 0; i < m_slice_descs.size(); i++) + { + gpu_image decoded_texture; + decoded_texture.init(texture_format::cASTC_HDR_4x4, m_slice_descs[i].m_width, m_slice_descs[i].m_height); + + if (!decoder.transcode_slice(&comp_data[0], (uint32_t)comp_data.size(), i, + reinterpret_cast(decoded_texture.get_ptr()), m_slice_descs[i].m_num_blocks_x * m_slice_descs[i].m_num_blocks_y, basist::block_format::cASTC_HDR_4x4, 16)) + { + error_printf("Transcoding failed to ASTC HDR on slice %u!\n", i); + return false; + } + + m_decoded_output_textures_astc_hdr[i] = decoded_texture; + } + } + } + else + { + if (basist::basis_is_format_supported(basist::transcoder_texture_format::cTFBC7_RGBA, basist::basis_tex_format::cUASTC4x4) && + basist::basis_is_format_supported(basist::transcoder_texture_format::cTFBC7_RGBA, basist::basis_tex_format::cETC1S)) + { + for (uint32_t i = 0; i < m_slice_descs.size(); i++) + { + gpu_image decoded_texture; + decoded_texture.init(texture_format::cBC7, m_slice_descs[i].m_width, m_slice_descs[i].m_height); + + if (!decoder.transcode_slice(&comp_data[0], (uint32_t)comp_data.size(), i, + reinterpret_cast(decoded_texture.get_ptr()), m_slice_descs[i].m_num_blocks_x * m_slice_descs[i].m_num_blocks_y, basist::block_format::cBC7, 16)) + { + error_printf("Transcoding failed to BC7 on slice %u!\n", i); + return false; + } + + m_decoded_output_textures_bc7[i] = decoded_texture; + } + } + } + + total_alt_transcode_time = tm.get_elapsed_secs(); + + for (uint32_t i = 0; i < m_slice_descs.size(); i++) + { + if (m_params.m_hdr) + { + // BC6H + bool status = m_decoded_output_textures[i].unpack_hdr(m_decoded_output_textures_bc6h_hdr_unpacked[i]); + assert(status); + BASISU_NOTE_UNUSED(status); + + // ASTC HDR + status = m_decoded_output_textures_astc_hdr[i].unpack_hdr(m_decoded_output_textures_astc_hdr_unpacked[i]); + assert(status); + } + else + { + bool status = m_decoded_output_textures[i].unpack(m_decoded_output_textures_unpacked[i]); + assert(status); + BASISU_NOTE_UNUSED(status); + + if (m_decoded_output_textures_bc7[i].get_pixel_width()) + { + status = m_decoded_output_textures_bc7[i].unpack(m_decoded_output_textures_unpacked_bc7[i]); + assert(status); + } + } + } + + debug_printf("Transcoded to %s in %3.3fms, %f texels/sec\n", + m_params.m_hdr ? "BC6H" : (m_params.m_uastc ? "ASTC" : "ETC1"), + total_time_etc1s_or_astc * 1000.0f, total_orig_pixels / total_time_etc1s_or_astc); + + if (total_alt_transcode_time != 0) + debug_printf("Alternate transcode in %3.3fms, %f texels/sec\n", total_alt_transcode_time * 1000.0f, total_orig_pixels / total_alt_transcode_time); + + if (!is_hdr_6x6) + { + for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) + { + const basisu_backend_slice_desc& slice_desc = m_slice_descs[slice_index]; + + const uint32_t total_blocks = slice_desc.m_num_blocks_x * slice_desc.m_num_blocks_y; + BASISU_NOTE_UNUSED(total_blocks); + + assert(m_decoded_output_textures[slice_index].get_total_blocks() == total_blocks); + } + } + + } // if (m_params.m_validate_output_data) + + return true; + } + + bool basis_compressor::write_hdr_debug_images(const char* pBasename, const imagef& orig_hdr_img, uint32_t width, uint32_t height) + { + // Copy image to account for 4x4 block expansion + imagef hdr_img(orig_hdr_img); + hdr_img.resize(width, height); + + image srgb_img(width, height); + + const float inv_upconversion_scale = (m_ldr_to_hdr_upconversion_nit_multiplier > 0.0f) ? (1.0f / m_ldr_to_hdr_upconversion_nit_multiplier) : 1.0f; + + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + vec4F p(hdr_img(x, y)); + + p[0] = clamp(p[0] * inv_upconversion_scale, 0.0f, 1.0f); + p[1] = clamp(p[1] * inv_upconversion_scale, 0.0f, 1.0f); + p[2] = clamp(p[2] * inv_upconversion_scale, 0.0f, 1.0f); + + int rc = (int)std::round(linear_to_srgb(p[0]) * 255.0f); + int gc = (int)std::round(linear_to_srgb(p[1]) * 255.0f); + int bc = (int)std::round(linear_to_srgb(p[2]) * 255.0f); + + srgb_img.set_clipped(x, y, color_rgba(rc, gc, bc, 255)); + } + } + + { + const std::string filename(string_format("%s_linear_clamped_to_srgb.png", pBasename)); + save_png(filename.c_str(), srgb_img); + printf("Wrote .PNG file %s\n", filename.c_str()); + } + + { + const std::string filename(string_format("%s_compressive_tonemapped.png", pBasename)); + image compressive_tonemapped_img; + + bool status = tonemap_image_compressive(compressive_tonemapped_img, hdr_img); + if (!status) + { + error_printf("basis_compressor::write_hdr_debug_images: tonemap_image_compressive() failed (invalid half-float input)\n"); + } + else + { + save_png(filename.c_str(), compressive_tonemapped_img); + printf("Wrote .PNG file %s\n", filename.c_str()); + } + } + + image tonemapped_img; + + for (int e = -5; e <= 5; e++) + { + const float scale = powf(2.0f, (float)e); + + tonemap_image_reinhard(tonemapped_img, hdr_img, scale); + + std::string filename(string_format("%s_reinhard_tonemapped_scale_%f.png", pBasename, scale)); + save_png(filename.c_str(), tonemapped_img, cImageSaveIgnoreAlpha); + printf("Wrote .PNG file %s\n", filename.c_str()); + } + + return true; + } + + bool basis_compressor::write_output_files_and_compute_stats() + { + debug_printf("basis_compressor::write_output_files_and_compute_stats\n"); + + const uint8_vec& comp_data = m_params.m_create_ktx2_file ? m_output_ktx2_file : m_basis_file.get_compressed_data(); + if (m_params.m_write_output_basis_or_ktx2_files) + { + const std::string& output_filename = m_params.m_out_filename; + + if (!write_vec_to_file(output_filename.c_str(), comp_data)) + { + error_printf("Failed writing output data to file \"%s\"\n", output_filename.c_str()); + return false; + } + + if (m_params.m_status_output) + { + printf("Wrote output .basis/.ktx2 file \"%s\"\n", output_filename.c_str()); + } + } + + size_t comp_size = 0; + if ((m_params.m_compute_stats) && (m_params.m_uastc) && (comp_data.size())) + { + void* pComp_data = tdefl_compress_mem_to_heap(&comp_data[0], comp_data.size(), &comp_size, TDEFL_MAX_PROBES_MASK);// TDEFL_DEFAULT_MAX_PROBES); + size_t decomp_size = 0; + void* pDecomp_data = tinfl_decompress_mem_to_heap(pComp_data, comp_size, &decomp_size, 0); + if ((decomp_size != comp_data.size()) || (memcmp(pDecomp_data, &comp_data[0], decomp_size) != 0)) + { + printf("basis_compressor::create_basis_file_and_transcode:: miniz compression or decompression failed!\n"); + return false; + } + + mz_free(pComp_data); + mz_free(pDecomp_data); + + uint32_t total_texels = 0; + for (uint32_t i = 0; i < m_slice_descs.size(); i++) + total_texels += (m_slice_descs[i].m_orig_width * m_slice_descs[i].m_orig_height); + + m_basis_bits_per_texel = ((float)comp_size * 8.0f) / total_texels; + + fmt_debug_printf("Output file size: {}, {3.2} bits/texel, LZ compressed file size: {}, {3.2} bits/texel\n", + (uint64_t)comp_data.size(), ((float)comp_data.size() * 8.0f) / total_texels, + (uint64_t)comp_size, m_basis_bits_per_texel); + } + + m_stats.resize(m_slice_descs.size()); + + if (m_params.m_validate_output_data) + { + if (m_params.m_hdr) + { + if (m_params.m_print_stats) + { + printf("ASTC/BC6H half float space error metrics (a piecewise linear approximation of log2 error):\n"); + } + + for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) + { + const basisu_backend_slice_desc& slice_desc = m_slice_descs[slice_index]; + + if (m_params.m_compute_stats) + { + image_stats& s = m_stats[slice_index]; + + if (m_params.m_print_stats) + { + printf("Slice: %u\n", slice_index); + } + + image_metrics im; + + if (m_params.m_print_stats) + { + printf("\nASTC channels:\n"); + for (uint32_t i = 0; i < 3; i++) + { + im.calc_half(m_slice_images_hdr[slice_index], m_decoded_output_textures_astc_hdr_unpacked[slice_index], i, 1, true); + + printf("%c: ", "RGB"[i]); + im.print_hp(); + } + + printf("BC6H channels:\n"); + for (uint32_t i = 0; i < 3; i++) + { + im.calc_half(m_slice_images_hdr[slice_index], m_decoded_output_textures_bc6h_hdr_unpacked[slice_index], i, 1, true); + + printf("%c: ", "RGB"[i]); + im.print_hp(); + } + } + + im.calc_half(m_slice_images_hdr[slice_index], m_decoded_output_textures_astc_hdr_unpacked[slice_index], 0, 3, true); + s.m_basis_rgb_avg_psnr = (float)im.m_psnr; + + if (m_params.m_print_stats) + { + printf("\nASTC RGB: "); + im.print_hp(); +#if 0 + // Validation + im.calc_half2(m_slice_images_hdr[slice_index], m_decoded_output_textures_astc_hdr_unpacked[slice_index], 0, 3, true); + printf("\nASTC RGB (Alt): "); + im.print_hp(); +#endif + } + + im.calc_half(m_slice_images_hdr[slice_index], m_decoded_output_textures_bc6h_hdr_unpacked[slice_index], 0, 3, true); + s.m_basis_rgb_avg_bc6h_psnr = (float)im.m_psnr; + + if (m_params.m_print_stats) + { + printf("BC6H RGB: "); + im.print_hp(); + //printf("\n"); + } + + im.calc(m_slice_images_hdr[slice_index], m_decoded_output_textures_astc_hdr_unpacked[slice_index], 0, 3, true, true); + s.m_basis_rgb_avg_log2_psnr = (float)im.m_psnr; + + if (m_params.m_print_stats) + { + printf("\nASTC Log2 RGB: "); + im.print_hp(); + } + + im.calc(m_slice_images_hdr[slice_index], m_decoded_output_textures_bc6h_hdr_unpacked[slice_index], 0, 3, true, true); + s.m_basis_rgb_avg_bc6h_log2_psnr = (float)im.m_psnr; + + if (m_params.m_print_stats) + { + printf("BC6H Log2 RGB: "); + im.print_hp(); + + printf("\n"); + } + } + + if (m_params.m_debug_images) + { + std::string out_basename; + if (m_params.m_out_filename.size()) + string_get_filename(m_params.m_out_filename.c_str(), out_basename); + else if (m_params.m_source_filenames.size()) + string_get_filename(m_params.m_source_filenames[slice_desc.m_source_file_index].c_str(), out_basename); + + string_remove_extension(out_basename); + out_basename = "basis_debug_" + out_basename + string_format("_slice_%u", slice_index); + + // Write BC6H .DDS file. + { + gpu_image bc6h_tex(m_decoded_output_textures[slice_index]); + bc6h_tex.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height); + + std::string filename(out_basename + "_bc6h.dds"); + write_compressed_texture_file(filename.c_str(), bc6h_tex, true); + printf("Wrote .DDS file %s\n", filename.c_str()); + } + + // Write ASTC .KTX/.astc files. ("astcenc -dh input.astc output.exr" to decode the astc file.) + { + gpu_image astc_tex(m_decoded_output_textures_astc_hdr[slice_index]); + astc_tex.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height); + + std::string filename1(out_basename + "_astc.astc"); + + uint32_t block_width = 4, block_height = 4; + if ((m_params.m_hdr_mode == hdr_modes::cASTC_HDR_6X6) || (m_params.m_hdr_mode == hdr_modes::cASTC_HDR_6X6_INTERMEDIATE)) + { + block_width = 6; + block_height = 6; + } + + write_astc_file(filename1.c_str(), astc_tex.get_ptr(), block_width, block_height, slice_desc.m_orig_width, slice_desc.m_orig_height); + printf("Wrote .ASTC file %s\n", filename1.c_str()); + + std::string filename2(out_basename + "_astc.ktx"); + write_compressed_texture_file(filename2.c_str(), astc_tex, true); + printf("Wrote .KTX file %s\n", filename2.c_str()); + } + + // Write unpacked ASTC image to .EXR + { + imagef astc_img(m_decoded_output_textures_astc_hdr_unpacked[slice_index]); + astc_img.resize(slice_desc.m_orig_width, slice_desc.m_orig_height); + + std::string filename(out_basename + "_unpacked_astc.exr"); + write_exr(filename.c_str(), astc_img, 3, 0); + printf("Wrote .EXR file %s\n", filename.c_str()); + } + + // Write unpacked BC6H image to .EXR + { + imagef bc6h_img(m_decoded_output_textures_bc6h_hdr_unpacked[slice_index]); + bc6h_img.resize(slice_desc.m_orig_width, slice_desc.m_orig_height); + + std::string filename(out_basename + "_unpacked_bc6h.exr"); + write_exr(filename.c_str(), bc6h_img, 3, 0); + printf("Wrote .EXR file %s\n", filename.c_str()); + } + + // Write tonemapped/srgb images + write_hdr_debug_images((out_basename + "_source").c_str(), m_slice_images_hdr[slice_index], slice_desc.m_orig_width, slice_desc.m_orig_height); + write_hdr_debug_images((out_basename + "_unpacked_astc").c_str(), m_decoded_output_textures_astc_hdr_unpacked[slice_index], slice_desc.m_orig_width, slice_desc.m_orig_height); + write_hdr_debug_images((out_basename + "_unpacked_bc6h").c_str(), m_decoded_output_textures_bc6h_hdr_unpacked[slice_index], slice_desc.m_orig_width, slice_desc.m_orig_height); + } + } + } + else + { + for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) + { + const basisu_backend_slice_desc& slice_desc = m_slice_descs[slice_index]; + + if (m_params.m_compute_stats) + { + if (m_params.m_print_stats) + printf("Slice: %u\n", slice_index); + + image_stats& s = m_stats[slice_index]; + + image_metrics em; + + // ---- .basis stats + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 3); + if (m_params.m_print_stats) + em.print(".basis RGB Avg: "); + s.m_basis_rgb_avg_psnr = (float)em.m_psnr; + + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 4); + if (m_params.m_print_stats) + em.print(".basis RGBA Avg: "); + s.m_basis_rgba_avg_psnr = (float)em.m_psnr; + + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 1); + if (m_params.m_print_stats) + em.print(".basis R Avg: "); + + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 1, 1); + if (m_params.m_print_stats) + em.print(".basis G Avg: "); + + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 2, 1); + if (m_params.m_print_stats) + em.print(".basis B Avg: "); + + if (m_params.m_uastc) + { + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 3, 1); + if (m_params.m_print_stats) + em.print(".basis A Avg: "); + + s.m_basis_a_avg_psnr = (float)em.m_psnr; + } + + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 0); + if (m_params.m_print_stats) + em.print(".basis 709 Luma: "); + s.m_basis_luma_709_psnr = static_cast(em.m_psnr); + s.m_basis_luma_709_ssim = static_cast(em.m_ssim); + + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 0, true, true); + if (m_params.m_print_stats) + em.print(".basis 601 Luma: "); + s.m_basis_luma_601_psnr = static_cast(em.m_psnr); + + if (m_slice_descs.size() == 1) + { + const uint32_t output_size = comp_size ? (uint32_t)comp_size : (uint32_t)comp_data.size(); + if (m_params.m_print_stats) + { + debug_printf(".basis RGB PSNR per bit/texel*10000: %3.3f\n", 10000.0f * s.m_basis_rgb_avg_psnr / ((output_size * 8.0f) / (slice_desc.m_orig_width * slice_desc.m_orig_height))); + debug_printf(".basis Luma 709 PSNR per bit/texel*10000: %3.3f\n", 10000.0f * s.m_basis_luma_709_psnr / ((output_size * 8.0f) / (slice_desc.m_orig_width * slice_desc.m_orig_height))); + } + } + + if (m_decoded_output_textures_unpacked_bc7[slice_index].get_width()) + { + // ---- BC7 stats + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 3); + //if (m_params.m_print_stats) + // em.print("BC7 RGB Avg: "); + s.m_bc7_rgb_avg_psnr = (float)em.m_psnr; + + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 4); + //if (m_params.m_print_stats) + // em.print("BC7 RGBA Avg: "); + s.m_bc7_rgba_avg_psnr = (float)em.m_psnr; + + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 1); + //if (m_params.m_print_stats) + // em.print("BC7 R Avg: "); + + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 1, 1); + //if (m_params.m_print_stats) + // em.print("BC7 G Avg: "); + + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 2, 1); + //if (m_params.m_print_stats) + // em.print("BC7 B Avg: "); + + if (m_params.m_uastc) + { + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 3, 1); + //if (m_params.m_print_stats) + // em.print("BC7 A Avg: "); + + s.m_bc7_a_avg_psnr = (float)em.m_psnr; + } + + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 0); + //if (m_params.m_print_stats) + // em.print("BC7 709 Luma: "); + s.m_bc7_luma_709_psnr = static_cast(em.m_psnr); + s.m_bc7_luma_709_ssim = static_cast(em.m_ssim); + + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 0, true, true); + //if (m_params.m_print_stats) + // em.print("BC7 601 Luma: "); + s.m_bc7_luma_601_psnr = static_cast(em.m_psnr); + } + + if (!m_params.m_uastc) + { + // ---- Nearly best possible ETC1S stats + em.calc(m_slice_images[slice_index], m_best_etc1s_images_unpacked[slice_index], 0, 3); + //if (m_params.m_print_stats) + // em.print("Unquantized ETC1S RGB Avg: "); + s.m_best_etc1s_rgb_avg_psnr = static_cast(em.m_psnr); + + em.calc(m_slice_images[slice_index], m_best_etc1s_images_unpacked[slice_index], 0, 0); + //if (m_params.m_print_stats) + // em.print("Unquantized ETC1S 709 Luma: "); + s.m_best_etc1s_luma_709_psnr = static_cast(em.m_psnr); + s.m_best_etc1s_luma_709_ssim = static_cast(em.m_ssim); + + em.calc(m_slice_images[slice_index], m_best_etc1s_images_unpacked[slice_index], 0, 0, true, true); + //if (m_params.m_print_stats) + // em.print("Unquantized ETC1S 601 Luma: "); + s.m_best_etc1s_luma_601_psnr = static_cast(em.m_psnr); + } + } + + std::string out_basename; + if (m_params.m_out_filename.size()) + string_get_filename(m_params.m_out_filename.c_str(), out_basename); + else if (m_params.m_source_filenames.size()) + string_get_filename(m_params.m_source_filenames[slice_desc.m_source_file_index].c_str(), out_basename); + + string_remove_extension(out_basename); + out_basename = "basis_debug_" + out_basename + string_format("_slice_%u", slice_index); + + if ((!m_params.m_uastc) && (m_frontend.get_params().m_debug_images)) + { + // Write "best" ETC1S debug images + if (!m_params.m_uastc) + { + gpu_image best_etc1s_gpu_image(m_best_etc1s_images[slice_index]); + best_etc1s_gpu_image.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height); + write_compressed_texture_file((out_basename + "_best_etc1s.ktx").c_str(), best_etc1s_gpu_image, true); + + image best_etc1s_unpacked; + best_etc1s_gpu_image.unpack(best_etc1s_unpacked); + save_png(out_basename + "_best_etc1s.png", best_etc1s_unpacked); + } + } + + if (m_params.m_debug_images) + { + // Write decoded ETC1S/ASTC debug images + { + gpu_image decoded_etc1s_or_astc(m_decoded_output_textures[slice_index]); + decoded_etc1s_or_astc.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height); + write_compressed_texture_file((out_basename + "_transcoded_etc1s_or_astc.ktx").c_str(), decoded_etc1s_or_astc, true); + + image temp(m_decoded_output_textures_unpacked[slice_index]); + temp.crop(slice_desc.m_orig_width, slice_desc.m_orig_height); + save_png(out_basename + "_transcoded_etc1s_or_astc.png", temp); + } + + // Write decoded BC7 debug images + if (m_decoded_output_textures_bc7[slice_index].get_pixel_width()) + { + gpu_image decoded_bc7(m_decoded_output_textures_bc7[slice_index]); + decoded_bc7.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height); + write_compressed_texture_file((out_basename + "_transcoded_bc7.ktx").c_str(), decoded_bc7, true); + + image temp(m_decoded_output_textures_unpacked_bc7[slice_index]); + temp.crop(slice_desc.m_orig_width, slice_desc.m_orig_height); + save_png(out_basename + "_transcoded_bc7.png", temp); + } + } + } + } // if (m_params.m_hdr) + + } // if (m_params.m_validate_output_data) + + return true; + } + + // Make sure all the mip 0's have the same dimensions and number of mipmap levels, or we can't encode the KTX2 file. + bool basis_compressor::validate_ktx2_constraints() + { + uint32_t base_width = 0, base_height = 0; + uint32_t total_layers = 0; + for (uint32_t i = 0; i < m_slice_descs.size(); i++) + { + if (m_slice_descs[i].m_mip_index == 0) + { + if (!base_width) + { + base_width = m_slice_descs[i].m_orig_width; + base_height = m_slice_descs[i].m_orig_height; + } + else + { + if ((m_slice_descs[i].m_orig_width != base_width) || (m_slice_descs[i].m_orig_height != base_height)) + { + return false; + } + } + + total_layers = maximum(total_layers, m_slice_descs[i].m_source_file_index + 1); + } + } + + basisu::vector total_mips(total_layers); + for (uint32_t i = 0; i < m_slice_descs.size(); i++) + total_mips[m_slice_descs[i].m_source_file_index] = maximum(total_mips[m_slice_descs[i].m_source_file_index], m_slice_descs[i].m_mip_index + 1); + + for (uint32_t i = 1; i < total_layers; i++) + { + if (total_mips[0] != total_mips[i]) + { + return false; + } + } + + return true; + } + + // colorModel=KTX2_KDF_DF_MODEL_ETC1S (0xA3) + // LDR ETC1S texture data in a custom format, with global codebooks + static uint8_t g_ktx2_etc1s_nonalpha_dfd[44] = { 0x2C,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x28,0x0,0xA3,0x1,0x2,0x0,0x3,0x3,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x3F,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF }; + static uint8_t g_ktx2_etc1s_alpha_dfd[60] = { 0x3C,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x38,0x0,0xA3,0x1,0x2,0x0,0x3,0x3,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x3F,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF,0x40,0x0,0x3F,0xF,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF }; + + // colorModel=KTX2_KDF_DF_MODEL_UASTC_LDR_4X4 (0xA6) + // LDR UASTC 4x4 texture data in a custom block format + static uint8_t g_ktx2_uastc_ldr_4x4_nonalpha_dfd[44] = { 0x2C,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x28,0x0,0xA6,0x1,0x2,0x0,0x3,0x3,0x0,0x0,0x10,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x7F,0x4,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF }; + static uint8_t g_ktx2_uastc_ldr_4x4_alpha_dfd[44] = { 0x2C,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x28,0x0,0xA6,0x1,0x2,0x0,0x3,0x3,0x0,0x0,0x10,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x7F,0x3,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF }; + + // colorModel=KTX2_KDF_DF_MODEL_UASTC_HDR_4X4 (0xA7) + // Standard ASTC HDR 4x4 texture data but constrained for easy transcoding to BC6H, either highest quality or RDO optimized. + static uint8_t g_ktx2_uastc_hdr_4x4_nonalpha_dfd[44] = + { + 0x2C,0x0,0x0,0x0, // 0 totalSize + 0x0,0x0,0x0,0x0, // 1 descriptorType/vendorId + 0x2,0x0,0x28,0x0, // 2 descriptorBlockSize/versionNumber + 0xA7,0x1,0x1,0x0, // 3 flags, transferFunction, colorPrimaries, colorModel (KTX2_KDF_DF_MODEL_UASTC_HDR_4X4) + 0x3,0x3,0x0,0x0, // 4 texelBlockDimension0-texelBlockDimension3 + 0x10,0x0,0x0,0x0, // 5 bytesPlane0-bytesPlane3 + 0x0,0x0,0x0,0x0, // 6 bytesPlane4-bytesPlane7 + 0x0,0x0,0x7F,0x80, // 7 bitLength/bitOffset/channelType and Qualifer flags (KHR_DF_SAMPLE_DATATYPE_FLOAT etc.) + 0x0,0x0,0x0,0x0, // 8 samplePosition0-samplePosition3 + 0x0,0x0,0x0,0x0, // 9 sampleLower (0.0) + 0x00, 0x00, 0x80, 0x3F // 10 sampleHigher (1.0) + }; + + // colorModel=KTX2_KDF_DF_MODEL_ASTC (0xA2) + // Standard ASTC HDR 6x6 texture data, either highest quality or RDO optimized. + static uint8_t g_ktx2_astc_hdr_6x6_nonalpha_dfd[44] = + { + 0x2C,0x0,0x0,0x0, // 0 totalSize + 0x0,0x0,0x0,0x0, // 1 descriptorType/vendorId + 0x2,0x0,0x28,0x0, // 2 descriptorBlockSize/versionNumber + 0xA2,0x1,0x1,0x0, // 3 flags, transferFunction, colorPrimaries, colorModel (0xA2/162, standard ASTC, KTX2_KDF_DF_MODEL_ASTC) + 0x5,0x5,0x0,0x0, // 4 texelBlockDimension0-texelBlockDimension3 + 0x10,0x0,0x0,0x0, // 5 bytesPlane0-bytesPlane3 + 0x0,0x0,0x0,0x0, // 6 bytesPlane4-bytesPlane7 + 0x0,0x0,0x7F,0x80, // 7 bitLength/bitOffset/channelType and Qualifer flags (KHR_DF_SAMPLE_DATATYPE_FLOAT etc.) + 0x0,0x0,0x0,0x0, // 8 samplePosition0-samplePosition3 + 0x0,0x0,0x0,0x0, // 9 sampleLower (0.0) + 0x00, 0x00, 0x80, 0x3F // 10 sampleHigher (1.0) + }; + + // colorModel=KTX2_KDF_DF_MODEL_ASTC_HDR_6X6_INTERMEDIATE (0xA8) + // Our custom intermediate format that when decoded directly outputs ASTC HDR 6x6 + static uint8_t g_ktx2_astc_hdr_6x6_intermediate_nonalpha_dfd[44] = + { + 0x2C,0x0,0x0,0x0, // 0 totalSize + 0x0,0x0,0x0,0x0, // 1 descriptorType/vendorId + 0x2,0x0,0x28,0x0, // 2 descriptorBlockSize/versionNumber + 0xA8,0x1,0x1,0x0, // 3 flags, transferFunction, colorPrimaries, colorModel (KTX2_KDF_DF_MODEL_ASTC_HDR_6X6_INTERMEDIATE) + 0x5,0x5,0x0,0x0, // 4 texelBlockDimension0-texelBlockDimension3 + 0x10,0x0,0x0,0x0, // 5 bytesPlane0-bytesPlane3 + 0x0,0x0,0x0,0x0, // 6 bytesPlane4-bytesPlane7 + 0x0,0x0,0x7F,0x80, // 7 bitLength/bitOffset/channelType and Qualifer flags (KHR_DF_SAMPLE_DATATYPE_FLOAT etc.) + 0x0,0x0,0x0,0x0, // 8 samplePosition0-samplePosition3 + 0x0,0x0,0x0,0x0, // 9 sampleLower (0.0) + 0x00, 0x00, 0x80, 0x3F // 10 sampleHigher (1.0) + }; + + bool basis_compressor::get_dfd(uint8_vec &dfd, const basist::ktx2_header &header) + { + const uint8_t* pDFD; + uint32_t dfd_len; + + if (m_params.m_uastc) + { + if (m_params.m_hdr) + { + switch (m_params.m_hdr_mode) + { + case hdr_modes::cUASTC_HDR_4X4: + { + pDFD = g_ktx2_uastc_hdr_4x4_nonalpha_dfd; + dfd_len = sizeof(g_ktx2_uastc_hdr_4x4_nonalpha_dfd); + break; + } + case hdr_modes::cASTC_HDR_6X6: + { + pDFD = g_ktx2_astc_hdr_6x6_nonalpha_dfd; + dfd_len = sizeof(g_ktx2_astc_hdr_6x6_nonalpha_dfd); + break; + } + case hdr_modes::cASTC_HDR_6X6_INTERMEDIATE: + { + pDFD = g_ktx2_astc_hdr_6x6_intermediate_nonalpha_dfd; + dfd_len = sizeof(g_ktx2_astc_hdr_6x6_intermediate_nonalpha_dfd); + break; + } + default: + { + assert(0); + return false; + } + } + } + // Must be LDR UASTC 4x4 + else if (m_any_source_image_has_alpha) + { + pDFD = g_ktx2_uastc_ldr_4x4_alpha_dfd; + dfd_len = sizeof(g_ktx2_uastc_ldr_4x4_alpha_dfd); + } + else + { + pDFD = g_ktx2_uastc_ldr_4x4_nonalpha_dfd; + dfd_len = sizeof(g_ktx2_uastc_ldr_4x4_nonalpha_dfd); + } + } + else + { + // Must be ETC1S. + assert(!m_params.m_hdr); + + if (m_any_source_image_has_alpha) + { + pDFD = g_ktx2_etc1s_alpha_dfd; + dfd_len = sizeof(g_ktx2_etc1s_alpha_dfd); + } + else + { + pDFD = g_ktx2_etc1s_nonalpha_dfd; + dfd_len = sizeof(g_ktx2_etc1s_nonalpha_dfd); + } + } + + assert(dfd_len >= 44); + + dfd.resize(dfd_len); + memcpy(dfd.data(), pDFD, dfd_len); + + uint32_t dfd_bits = basisu::read_le_dword(dfd.data() + 3 * sizeof(uint32_t)); + + // Color primaries + if ((m_params.m_hdr) && (m_params.m_astc_hdr_6x6_options.m_rec2020_bt2100_color_gamut)) + { + dfd_bits &= ~(0xFF << 8); + dfd_bits |= (basist::KTX2_DF_PRIMARIES_BT2020 << 8); + } + + // Transfer function + dfd_bits &= ~(0xFF << 16); + + if (m_params.m_hdr) + { + // TODO: In HDR mode, always write linear for now. + dfd_bits |= (basist::KTX2_KHR_DF_TRANSFER_LINEAR << 16); + } + else + { + if (m_params.m_ktx2_srgb_transfer_func) + dfd_bits |= (basist::KTX2_KHR_DF_TRANSFER_SRGB << 16); + else + dfd_bits |= (basist::KTX2_KHR_DF_TRANSFER_LINEAR << 16); + } + + basisu::write_le_dword(dfd.data() + 3 * sizeof(uint32_t), dfd_bits); + + if (header.m_supercompression_scheme != basist::KTX2_SS_NONE) + { + uint32_t plane_bits = basisu::read_le_dword(dfd.data() + 5 * sizeof(uint32_t)); + + plane_bits &= ~0xFF; + + basisu::write_le_dword(dfd.data() + 5 * sizeof(uint32_t), plane_bits); + } + + // Fix up the DFD channel(s) + uint32_t dfd_chan0 = basisu::read_le_dword(dfd.data() + 7 * sizeof(uint32_t)); + + if (m_params.m_uastc) + { + dfd_chan0 &= ~(0xF << 24); + + // TODO: Allow the caller to override this + if (m_any_source_image_has_alpha) + dfd_chan0 |= (basist::KTX2_DF_CHANNEL_UASTC_RGBA << 24); + else + dfd_chan0 |= (basist::KTX2_DF_CHANNEL_UASTC_RGB << 24); + } + + basisu::write_le_dword(dfd.data() + 7 * sizeof(uint32_t), dfd_chan0); + + return true; + } + + bool basis_compressor::create_ktx2_file() + { + //bool needs_global_data = false; + bool can_use_zstd = false; + + switch (m_fmt_mode) + { + case basist::basis_tex_format::cETC1S: + { + //needs_global_data = true; + break; + } + case basist::basis_tex_format::cUASTC4x4: + { + can_use_zstd = true; + break; + } + case basist::basis_tex_format::cUASTC_HDR_4x4: + { + can_use_zstd = true; + break; + } + case basist::basis_tex_format::cASTC_HDR_6x6: + { + can_use_zstd = true; + break; + } + case basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE: + { + //needs_global_data = true; + break; + } + default: + assert(0); + //fmt_debug_printf("HERE 1\n"); + return false; + } + + if (can_use_zstd) + { + if ((m_params.m_ktx2_uastc_supercompression != basist::KTX2_SS_NONE) && (m_params.m_ktx2_uastc_supercompression != basist::KTX2_SS_ZSTANDARD)) + { + //fmt_debug_printf("HERE 2\n"); + return false; + } + } + + const basisu_backend_output& backend_output = m_backend.get_output(); + + // Determine the width/height, number of array layers, mipmap levels, and the number of faces (1 for 2D, 6 for cubemap). + // This does not support 1D or 3D. + uint32_t base_width = 0, base_height = 0, total_layers = 0, total_levels = 0, total_faces = 1; + + for (uint32_t i = 0; i < m_slice_descs.size(); i++) + { + if ((m_slice_descs[i].m_mip_index == 0) && (!base_width)) + { + base_width = m_slice_descs[i].m_orig_width; + base_height = m_slice_descs[i].m_orig_height; + } + + total_layers = maximum(total_layers, m_slice_descs[i].m_source_file_index + 1); + + if (!m_slice_descs[i].m_source_file_index) + total_levels = maximum(total_levels, m_slice_descs[i].m_mip_index + 1); + } + + if (m_params.m_tex_type == basist::cBASISTexTypeCubemapArray) + { + assert((total_layers % 6) == 0); + + total_layers /= 6; + assert(total_layers >= 1); + + total_faces = 6; + } + + basist::ktx2_header header; + memset((void *)&header, 0, sizeof(header)); + + memcpy(header.m_identifier, basist::g_ktx2_file_identifier, sizeof(basist::g_ktx2_file_identifier)); + header.m_pixel_width = base_width; + header.m_pixel_height = base_height; + header.m_face_count = total_faces; + + if (m_params.m_hdr) + { + if (m_params.m_hdr_mode == hdr_modes::cUASTC_HDR_4X4) + header.m_vk_format = basist::KTX2_FORMAT_ASTC_4x4_SFLOAT_BLOCK; + else if (m_params.m_hdr_mode == hdr_modes::cASTC_HDR_6X6) + header.m_vk_format = basist::KTX2_FORMAT_ASTC_6x6_SFLOAT_BLOCK; + else + { + assert(m_params.m_hdr_mode == hdr_modes::cASTC_HDR_6X6_INTERMEDIATE); + + header.m_vk_format = basist::KTX2_VK_FORMAT_UNDEFINED; + } + } + else + { + // Either ETC1S or UASTC LDR 4x4. + assert((m_fmt_mode == basist::basis_tex_format::cETC1S) || (m_fmt_mode == basist::basis_tex_format::cUASTC4x4)); + + header.m_vk_format = basist::KTX2_VK_FORMAT_UNDEFINED; + } + + header.m_type_size = 1; + header.m_level_count = total_levels; + header.m_layer_count = (total_layers > 1) ? total_layers : 0; + + if (can_use_zstd) + { + switch (m_params.m_ktx2_uastc_supercompression) + { + case basist::KTX2_SS_NONE: + { + header.m_supercompression_scheme = basist::KTX2_SS_NONE; + break; + } + case basist::KTX2_SS_ZSTANDARD: + { +#if BASISD_SUPPORT_KTX2_ZSTD + header.m_supercompression_scheme = basist::KTX2_SS_ZSTANDARD; +#else + header.m_supercompression_scheme = basist::KTX2_SS_NONE; +#endif + break; + } + default: + assert(0); + //fmt_debug_printf("HERE 3\n"); + return false; + } + } + + basisu::vector level_data_bytes(total_levels); + basisu::vector compressed_level_data_bytes(total_levels); + size_t_vec slice_level_offsets(m_slice_descs.size()); + + // This will append the texture data in the correct order (for each level: layer, then face). + for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) + { + const basisu_backend_slice_desc& slice_desc = m_slice_descs[slice_index]; + + slice_level_offsets[slice_index] = level_data_bytes[slice_desc.m_mip_index].size(); + + if (m_fmt_mode == basist::basis_tex_format::cETC1S) + { + append_vector(level_data_bytes[slice_desc.m_mip_index], backend_output.m_slice_image_data[slice_index]); + } + else + { + append_vector(level_data_bytes[slice_desc.m_mip_index], m_uastc_backend_output.m_slice_image_data[slice_index]); + } + } + + // Zstd Supercompression + if ((can_use_zstd) && (header.m_supercompression_scheme == basist::KTX2_SS_ZSTANDARD)) + { +#if BASISD_SUPPORT_KTX2_ZSTD + for (uint32_t level_index = 0; level_index < total_levels; level_index++) + { + compressed_level_data_bytes[level_index].resize(ZSTD_compressBound(level_data_bytes[level_index].size())); + + size_t result = ZSTD_compress(compressed_level_data_bytes[level_index].data(), compressed_level_data_bytes[level_index].size(), + level_data_bytes[level_index].data(), level_data_bytes[level_index].size(), + m_params.m_ktx2_zstd_supercompression_level); + + if (ZSTD_isError(result)) + { + //fmt_debug_printf("HERE 5\n"); + return false; + } + + compressed_level_data_bytes[level_index].resize(result); + } +#else + // Can't get here + assert(0); + //fmt_debug_printf("HERE 6\n"); + return false; +#endif + } + else + { + // No supercompression + compressed_level_data_bytes = level_data_bytes; + } + + uint8_vec ktx2_global_data; + + // Create ETC1S global supercompressed data + if (m_fmt_mode == basist::basis_tex_format::cETC1S) + { + basist::ktx2_etc1s_global_data_header etc1s_global_data_header; + clear_obj(etc1s_global_data_header); + + etc1s_global_data_header.m_endpoint_count = backend_output.m_num_endpoints; + etc1s_global_data_header.m_selector_count = backend_output.m_num_selectors; + etc1s_global_data_header.m_endpoints_byte_length = backend_output.m_endpoint_palette.size(); + etc1s_global_data_header.m_selectors_byte_length = backend_output.m_selector_palette.size(); + etc1s_global_data_header.m_tables_byte_length = backend_output.m_slice_image_tables.size(); + + basisu::vector etc1s_image_descs(total_levels * total_layers * total_faces); + memset((void *)etc1s_image_descs.data(), 0, etc1s_image_descs.size_in_bytes()); + + for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) + { + const basisu_backend_slice_desc& slice_desc = m_slice_descs[slice_index]; + + const uint32_t level_index = slice_desc.m_mip_index; + uint32_t layer_index = slice_desc.m_source_file_index; + uint32_t face_index = 0; + + if (m_params.m_tex_type == basist::cBASISTexTypeCubemapArray) + { + face_index = layer_index % 6; + layer_index /= 6; + } + + const uint32_t etc1s_image_index = level_index * (total_layers * total_faces) + layer_index * total_faces + face_index; + + if (slice_desc.m_alpha) + { + etc1s_image_descs[etc1s_image_index].m_alpha_slice_byte_length = backend_output.m_slice_image_data[slice_index].size(); + etc1s_image_descs[etc1s_image_index].m_alpha_slice_byte_offset = slice_level_offsets[slice_index]; + } + else + { + if (m_params.m_tex_type == basist::cBASISTexTypeVideoFrames) + etc1s_image_descs[etc1s_image_index].m_image_flags = !slice_desc.m_iframe ? basist::KTX2_IMAGE_IS_P_FRAME : 0; + + etc1s_image_descs[etc1s_image_index].m_rgb_slice_byte_length = backend_output.m_slice_image_data[slice_index].size(); + etc1s_image_descs[etc1s_image_index].m_rgb_slice_byte_offset = slice_level_offsets[slice_index]; + } + } // slice_index + + append_vector(ktx2_global_data, (const uint8_t*)&etc1s_global_data_header, sizeof(etc1s_global_data_header)); + append_vector(ktx2_global_data, (const uint8_t*)etc1s_image_descs.data(), etc1s_image_descs.size_in_bytes()); + append_vector(ktx2_global_data, backend_output.m_endpoint_palette); + append_vector(ktx2_global_data, backend_output.m_selector_palette); + append_vector(ktx2_global_data, backend_output.m_slice_image_tables); + + header.m_supercompression_scheme = basist::KTX2_SS_BASISLZ; + } + else if (m_fmt_mode == basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE) + { + basisu::vector image_descs(total_levels * total_layers * total_faces); + memset((void *)image_descs.data(), 0, image_descs.size_in_bytes()); + + for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) + { + const basisu_backend_slice_desc& slice_desc = m_slice_descs[slice_index]; + + const uint32_t level_index = slice_desc.m_mip_index; + uint32_t layer_index = slice_desc.m_source_file_index; + uint32_t face_index = 0; + + if (m_params.m_tex_type == basist::cBASISTexTypeCubemapArray) + { + face_index = layer_index % 6; + layer_index /= 6; + } + + const uint32_t output_image_index = level_index * (total_layers * total_faces) + layer_index * total_faces + face_index; + + image_descs[output_image_index].m_rgb_slice_byte_length = m_uastc_backend_output.m_slice_image_data[slice_index].size(); + image_descs[output_image_index].m_rgb_slice_byte_offset = slice_level_offsets[slice_index]; + + } // slice_index + + append_vector(ktx2_global_data, (const uint8_t*)image_descs.data(), image_descs.size_in_bytes()); + + header.m_supercompression_scheme = basist::KTX2_SS_BASISLZ; + } + + // Key values + basist::ktx2_transcoder::key_value_vec key_values(m_params.m_ktx2_key_values); + + basist::ktx2_add_key_value(key_values, "KTXwriter", fmt_string("Basis Universal {}", BASISU_LIB_VERSION_STRING)); + + if (m_params.m_hdr) + { + if (m_upconverted_any_ldr_images) + basist::ktx2_add_key_value(key_values, "LDRUpconversionMultiplier", fmt_string("{}", m_ldr_to_hdr_upconversion_nit_multiplier)); + + if (m_params.m_ldr_hdr_upconversion_srgb_to_linear) + basist::ktx2_add_key_value(key_values, "LDRUpconversionSRGBToLinear", "1"); + } + + key_values.sort(); + +#if BASISU_DISABLE_KTX2_KEY_VALUES + // HACK HACK - Clear the key values array, which causes no key values to be written (triggering the ktx2check validator bug). + key_values.clear(); +#endif + + uint8_vec key_value_data; + + // DFD + uint8_vec dfd; + if (!get_dfd(dfd, header)) + { + //fmt_debug_printf("HERE 7\n"); + return false; + } + + const uint32_t kvd_file_offset = sizeof(header) + sizeof(basist::ktx2_level_index) * total_levels + (uint32_t)dfd.size(); + + for (uint32_t pass = 0; pass < 2; pass++) + { + for (uint32_t i = 0; i < key_values.size(); i++) + { + if (key_values[i].m_key.size() < 2) + { + //fmt_debug_printf("HERE 8\n"); + return false; + } + + if (key_values[i].m_key.back() != 0) + { + //fmt_debug_printf("HERE 9\n"); + return false; + } + + const uint64_t total_len = (uint64_t)key_values[i].m_key.size() + (uint64_t)key_values[i].m_value.size(); + if (total_len >= UINT32_MAX) + { + //fmt_debug_printf("HERE 10\n"); + return false; + } + + packed_uint<4> le_len((uint32_t)total_len); + append_vector(key_value_data, (const uint8_t*)&le_len, sizeof(le_len)); + + append_vector(key_value_data, key_values[i].m_key); + append_vector(key_value_data, key_values[i].m_value); + + const uint32_t ofs = key_value_data.size() & 3; + const uint32_t padding = (4 - ofs) & 3; + for (uint32_t p = 0; p < padding; p++) + key_value_data.push_back(0); + } + + if (header.m_supercompression_scheme != basist::KTX2_SS_NONE) + break; + +#if BASISU_DISABLE_KTX2_ALIGNMENT_WORKAROUND + break; +#endif + + // Hack to ensure the KVD block ends on a 16 byte boundary, because we have no other official way of aligning the data. + uint32_t kvd_end_file_offset = kvd_file_offset + (uint32_t)key_value_data.size(); + uint32_t bytes_needed_to_pad = (16 - (kvd_end_file_offset & 15)) & 15; + if (!bytes_needed_to_pad) + { + // We're good. No need to add a dummy key. + break; + } + + assert(!pass); + if (pass) + { + //fmt_debug_printf("HERE 11\n"); + return false; + } + + if (bytes_needed_to_pad < 6) + bytes_needed_to_pad += 16; + + // Just add the padding. It's likely not necessary anymore, but can't really hurt. + //printf("WARNING: Due to a KTX2 validator bug related to mipPadding, we must insert a dummy key into the KTX2 file of %u bytes\n", bytes_needed_to_pad); + + // We're not good - need to add a dummy key large enough to force file alignment so the mip level array gets aligned. + // We can't just add some bytes before the mip level array because ktx2check will see that as extra data in the file that shouldn't be there in ktxValidator::validateDataSize(). + key_values.enlarge(1); + for (uint32_t i = 0; i < (bytes_needed_to_pad - 4 - 1 - 1); i++) + key_values.back().m_key.push_back(127); + + key_values.back().m_key.push_back(0); + + key_values.back().m_value.push_back(0); + + key_values.sort(); + + key_value_data.resize(0); + + // Try again + } + + basisu::vector level_index_array(total_levels); + memset((void *)level_index_array.data(), 0, level_index_array.size_in_bytes()); + + m_output_ktx2_file.clear(); + m_output_ktx2_file.reserve(m_output_basis_file.size()); + + // Dummy header + m_output_ktx2_file.resize(sizeof(header)); + + // Level index array + append_vector(m_output_ktx2_file, (const uint8_t*)level_index_array.data(), level_index_array.size_in_bytes()); + + // DFD + const uint8_t* pDFD = dfd.data(); + uint32_t dfd_len = (uint32_t)dfd.size(); + + header.m_dfd_byte_offset = m_output_ktx2_file.size(); + header.m_dfd_byte_length = dfd_len; + append_vector(m_output_ktx2_file, pDFD, dfd_len); + + // Key value data + if (key_value_data.size()) + { + assert(kvd_file_offset == m_output_ktx2_file.size()); + + header.m_kvd_byte_offset = m_output_ktx2_file.size(); + header.m_kvd_byte_length = key_value_data.size(); + append_vector(m_output_ktx2_file, key_value_data); + } + + // Global Supercompressed Data + if (ktx2_global_data.size()) + { + uint32_t ofs = m_output_ktx2_file.size() & 7; + uint32_t padding = (8 - ofs) & 7; + for (uint32_t i = 0; i < padding; i++) + m_output_ktx2_file.push_back(0); + + header.m_sgd_byte_length = ktx2_global_data.size(); + header.m_sgd_byte_offset = m_output_ktx2_file.size(); + + append_vector(m_output_ktx2_file, ktx2_global_data); + } + + // mipPadding + if (header.m_supercompression_scheme == basist::KTX2_SS_NONE) + { + // We currently can't do this or the validator will incorrectly give an error. + uint32_t ofs = m_output_ktx2_file.size() & 15; + uint32_t padding = (16 - ofs) & 15; + + // Make sure we're always aligned here (due to a validator bug). + if (padding) + { + printf("Warning: KTX2 mip level data is not 16-byte aligned. This may trigger a ktx2check validation bug. Writing %u bytes of mipPadding.\n", padding); + } + + for (uint32_t i = 0; i < padding; i++) + m_output_ktx2_file.push_back(0); + } + + // Level data - write the smallest mipmap first. + for (int level = total_levels - 1; level >= 0; level--) + { + level_index_array[level].m_byte_length = compressed_level_data_bytes[level].size(); + + //if (m_params.m_uastc) + if (can_use_zstd) + { + level_index_array[level].m_uncompressed_byte_length = level_data_bytes[level].size(); + } + + level_index_array[level].m_byte_offset = m_output_ktx2_file.size(); + append_vector(m_output_ktx2_file, compressed_level_data_bytes[level]); + } + + // Write final header + memcpy(m_output_ktx2_file.data(), &header, sizeof(header)); + + // Write final level index array + memcpy(m_output_ktx2_file.data() + sizeof(header), level_index_array.data(), level_index_array.size_in_bytes()); + + uint32_t total_orig_pixels = 0; + + for (uint32_t i = 0; i < m_slice_descs.size(); i++) + { + const basisu_backend_slice_desc& slice_desc = m_slice_descs[i]; + total_orig_pixels += slice_desc.m_orig_width * slice_desc.m_orig_height; + } + + debug_printf("Total .ktx2 output file size: %u, %3.3f bits/texel\n", m_output_ktx2_file.size(), ((float)m_output_ktx2_file.size() * 8.0f) / total_orig_pixels); + + return true; + } + + bool basis_parallel_compress( + uint32_t total_threads, + const basisu::vector& params_vec, + basisu::vector< parallel_results >& results_vec) + { + assert(g_library_initialized); + if (!g_library_initialized) + { + error_printf("basis_parallel_compress: basisu_encoder_init() MUST be called before using any encoder functionality!\n"); + return false; + } + + assert(total_threads >= 1); + total_threads = basisu::maximum(total_threads, 1); + + job_pool jpool(total_threads); + + results_vec.resize(0); + results_vec.resize(params_vec.size()); + + std::atomic result; + result.store(true); + + std::atomic opencl_failed; + opencl_failed.store(false); + + for (uint32_t pindex = 0; pindex < params_vec.size(); pindex++) + { + jpool.add_job([pindex, ¶ms_vec, &results_vec, &result, &opencl_failed] { + + basis_compressor_params params = params_vec[pindex]; + parallel_results& results = results_vec[pindex]; + + interval_timer tm; + tm.start(); + + basis_compressor c; + + // Dummy job pool + job_pool task_jpool(1); + params.m_pJob_pool = &task_jpool; + // TODO: Remove this flag entirely + params.m_multithreading = true; + + // Stop using OpenCL if a failure ever occurs. + if (opencl_failed) + params.m_use_opencl = false; + + bool status = c.init(params); + + if (c.get_opencl_failed()) + opencl_failed.store(true); + + if (status) + { + basis_compressor::error_code ec = c.process(); + + if (c.get_opencl_failed()) + opencl_failed.store(true); + + results.m_error_code = ec; + + if (ec == basis_compressor::cECSuccess) + { + results.m_basis_file = c.get_output_basis_file(); + results.m_ktx2_file = c.get_output_ktx2_file(); + results.m_stats = c.get_stats(); + results.m_basis_bits_per_texel = c.get_basis_bits_per_texel(); + results.m_any_source_image_has_alpha = c.get_any_source_image_has_alpha(); + } + else + { + result = false; + } + } + else + { + results.m_error_code = basis_compressor::cECFailedInitializing; + + result = false; + } + + results.m_total_time = tm.get_elapsed_secs(); + } ); + + } // pindex + + jpool.wait_for_all(); + + if (opencl_failed) + error_printf("An OpenCL error occured sometime during compression. The compressor fell back to CPU processing after the failure.\n"); + + return result; + } + + static void* basis_compress( + basist::basis_tex_format mode, + const basisu::vector *pSource_images, + const basisu::vector *pSource_images_hdr, + uint32_t flags_and_quality, float uastc_rdo_quality, + size_t* pSize, + image_stats* pStats) + { + assert((pSource_images != nullptr) || (pSource_images_hdr != nullptr)); + assert(!((pSource_images != nullptr) && (pSource_images_hdr != nullptr))); + + // Check input parameters + if (pSource_images) + { + if ((!pSource_images->size()) || (!pSize)) + { + error_printf("basis_compress: Invalid parameter\n"); + assert(0); + return nullptr; + } + } + else + { + if ((!pSource_images_hdr->size()) || (!pSize)) + { + error_printf("basis_compress: Invalid parameter\n"); + assert(0); + return nullptr; + } + } + + *pSize = 0; + + // Initialize a job pool + uint32_t num_threads = 1; + if (flags_and_quality & cFlagThreaded) + num_threads = basisu::maximum(1, std::thread::hardware_concurrency()); + + job_pool jp(num_threads); + + // Initialize the compressor parameter struct + basis_compressor_params comp_params; + comp_params.set_format_mode(mode); + + comp_params.m_pJob_pool = &jp; + + comp_params.m_y_flip = (flags_and_quality & cFlagYFlip) != 0; + comp_params.m_debug = (flags_and_quality & cFlagDebug) != 0; + comp_params.m_debug_images = (flags_and_quality & cFlagDebugImages) != 0; + + // Copy the largest mipmap level + if (pSource_images) + { + comp_params.m_source_images.resize(1); + comp_params.m_source_images[0] = (*pSource_images)[0]; + + // Copy the smaller mipmap levels, if any + if (pSource_images->size() > 1) + { + comp_params.m_source_mipmap_images.resize(1); + comp_params.m_source_mipmap_images[0].resize(pSource_images->size() - 1); + + for (uint32_t i = 1; i < pSource_images->size(); i++) + comp_params.m_source_mipmap_images[0][i - 1] = (*pSource_images)[i]; + } + } + else + { + comp_params.m_source_images_hdr.resize(1); + comp_params.m_source_images_hdr[0] = (*pSource_images_hdr)[0]; + + // Copy the smaller mipmap levels, if any + if (pSource_images_hdr->size() > 1) + { + comp_params.m_source_mipmap_images_hdr.resize(1); + comp_params.m_source_mipmap_images_hdr[0].resize(pSource_images_hdr->size() - 1); + + for (uint32_t i = 1; i < pSource_images->size(); i++) + comp_params.m_source_mipmap_images_hdr[0][i - 1] = (*pSource_images_hdr)[i]; + } + } + + comp_params.m_multithreading = (flags_and_quality & cFlagThreaded) != 0; + comp_params.m_use_opencl = (flags_and_quality & cFlagUseOpenCL) != 0; + + comp_params.m_write_output_basis_or_ktx2_files = false; + + comp_params.m_perceptual = (flags_and_quality & cFlagSRGB) != 0; + comp_params.m_mip_srgb = comp_params.m_perceptual; + comp_params.m_mip_gen = (flags_and_quality & (cFlagGenMipsWrap | cFlagGenMipsClamp)) != 0; + comp_params.m_mip_wrapping = (flags_and_quality & cFlagGenMipsWrap) != 0; + + if (mode == basist::basis_tex_format::cUASTC4x4) + { + comp_params.m_pack_uastc_ldr_4x4_flags = flags_and_quality & cPackUASTCLevelMask; + comp_params.m_rdo_uastc_ldr_4x4 = (flags_and_quality & cFlagUASTCRDO) != 0; + comp_params.m_rdo_uastc_ldr_4x4_quality_scalar = uastc_rdo_quality; + } + else if (mode == basist::basis_tex_format::cETC1S) + { + comp_params.m_etc1s_quality_level = basisu::maximum(1, flags_and_quality & 255); + } + + comp_params.m_create_ktx2_file = (flags_and_quality & cFlagKTX2) != 0; + + if (comp_params.m_create_ktx2_file) + { + // Set KTX2 specific parameters. + if ((flags_and_quality & cFlagKTX2UASTCSuperCompression) && (comp_params.m_uastc)) + comp_params.m_ktx2_uastc_supercompression = basist::KTX2_SS_ZSTANDARD; + + comp_params.m_ktx2_srgb_transfer_func = comp_params.m_perceptual; + } + + comp_params.m_compute_stats = (pStats != nullptr); + comp_params.m_print_stats = (flags_and_quality & cFlagPrintStats) != 0; + comp_params.m_status_output = (flags_and_quality & cFlagPrintStatus) != 0; + + if (mode == basist::basis_tex_format::cUASTC_HDR_4x4) + { + comp_params.m_uastc_hdr_4x4_options.set_quality_level(flags_and_quality & cPackUASTCLevelMask); + } + else if ((mode == basist::basis_tex_format::cASTC_HDR_6x6) || (mode == basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE)) + { + comp_params.m_astc_hdr_6x6_options.set_user_level(flags_and_quality & cPackUASTCLevelMask); + comp_params.m_astc_hdr_6x6_options.m_lambda = uastc_rdo_quality; + comp_params.m_astc_hdr_6x6_options.m_rec2020_bt2100_color_gamut = (flags_and_quality & cFlagREC2020) != 0; + } + + // Create the compressor, initialize it, and process the input + basis_compressor comp; + if (!comp.init(comp_params)) + { + error_printf("basis_compress: basis_compressor::init() failed!\n"); + return nullptr; + } + + basis_compressor::error_code ec = comp.process(); + + if (ec != basis_compressor::cECSuccess) + { + error_printf("basis_compress: basis_compressor::process() failed with error code %u\n", (uint32_t)ec); + return nullptr; + } + + if ((pStats) && (comp.get_opencl_failed())) + { + pStats->m_opencl_failed = true; + } + + // Get the output file data and return it to the caller + void* pFile_data = nullptr; + const uint8_vec* pFile_data_vec = comp_params.m_create_ktx2_file ? &comp.get_output_ktx2_file() : &comp.get_output_basis_file(); + + pFile_data = malloc(pFile_data_vec->size()); + if (!pFile_data) + { + error_printf("basis_compress: Out of memory\n"); + return nullptr; + } + memcpy(pFile_data, pFile_data_vec->get_ptr(), pFile_data_vec->size()); + + *pSize = pFile_data_vec->size(); + + if ((pStats) && (comp.get_stats().size())) + { + *pStats = comp.get_stats()[0]; + } + + return pFile_data; + } + + void* basis_compress( + basist::basis_tex_format mode, + const basisu::vector& source_images, + uint32_t flags_and_quality, float uastc_rdo_quality, + size_t* pSize, + image_stats* pStats) + { + return basis_compress(mode, &source_images, nullptr, flags_and_quality, uastc_rdo_quality, pSize, pStats); + } + + void* basis_compress( + basist::basis_tex_format mode, + const basisu::vector& source_images_hdr, + uint32_t flags_and_quality, float lambda, + size_t* pSize, + image_stats* pStats) + { + return basis_compress(mode, nullptr, &source_images_hdr, flags_and_quality, lambda, pSize, pStats); + } + + void* basis_compress( + basist::basis_tex_format mode, + const uint8_t* pImageRGBA, uint32_t width, uint32_t height, uint32_t pitch_in_pixels, + uint32_t flags_and_quality, float uastc_rdo_quality, + size_t* pSize, + image_stats* pStats) + { + if (!pitch_in_pixels) + pitch_in_pixels = width; + + if ((!pImageRGBA) || (!width) || (!height) || (pitch_in_pixels < width) || (!pSize)) + { + error_printf("basis_compress: Invalid parameter\n"); + assert(0); + return nullptr; + } + + *pSize = 0; + + if ((width > BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION) || (height > BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION)) + { + error_printf("basis_compress: Image too large\n"); + return nullptr; + } + + // Copy the source image + basisu::vector source_image(1); + source_image[0].crop(width, height, width, g_black_color, false); + for (uint32_t y = 0; y < height; y++) + memcpy(source_image[0].get_ptr() + y * width, (const color_rgba*)pImageRGBA + y * pitch_in_pixels, width * sizeof(color_rgba)); + + return basis_compress(mode, source_image, flags_and_quality, uastc_rdo_quality, pSize, pStats); + } + + void basis_free_data(void* p) + { + free(p); + } + + bool basis_benchmark_etc1s_opencl(bool* pOpenCL_failed) + { + if (pOpenCL_failed) + *pOpenCL_failed = false; + + if (!opencl_is_available()) + { + error_printf("basis_benchmark_etc1s_opencl: OpenCL support must be enabled first!\n"); + return false; + } + + const uint32_t W = 1024, H = 1024; + basisu::vector images; + image& img = images.enlarge(1)->resize(W, H); + + const uint32_t NUM_RAND_LETTERS = 6000;// 40000; + + rand r; + r.seed(200); + + for (uint32_t i = 0; i < NUM_RAND_LETTERS; i++) + { + uint32_t x = r.irand(0, W - 1), y = r.irand(0, H - 1); + uint32_t sx = r.irand(1, 4), sy = r.irand(1, 4); + color_rgba c(r.byte(), r.byte(), r.byte(), 255); + + img.debug_text(x, y, sx, sy, c, nullptr, false, "%c", static_cast(r.irand(32, 127))); + } + + //save_png("test.png", img); + + image_stats stats; + + uint32_t flags_and_quality = cFlagSRGB | cFlagThreaded | 255; + size_t comp_size = 0; + + double best_cpu_time = 1e+9f, best_gpu_time = 1e+9f; + + const uint32_t TIMES_TO_ENCODE = 2; + interval_timer tm; + + for (uint32_t i = 0; i < TIMES_TO_ENCODE; i++) + { + tm.start(); + void* pComp_data = basis_compress( + basist::basis_tex_format::cETC1S, + images, + flags_and_quality, 1.0f, + &comp_size, + &stats); + double cpu_time = tm.get_elapsed_secs(); + if (!pComp_data) + { + error_printf("basis_benchmark_etc1s_opencl: basis_compress() failed (CPU)!\n"); + return false; + } + + best_cpu_time = minimum(best_cpu_time, cpu_time); + + basis_free_data(pComp_data); + } + + printf("Best CPU time: %3.3f\n", best_cpu_time); + + for (uint32_t i = 0; i < TIMES_TO_ENCODE; i++) + { + tm.start(); + void* pComp_data = basis_compress( + basist::basis_tex_format::cETC1S, + images, + flags_and_quality | cFlagUseOpenCL, 1.0f, + &comp_size, + &stats); + + if (stats.m_opencl_failed) + { + error_printf("basis_benchmark_etc1s_opencl: OpenCL failed!\n"); + + basis_free_data(pComp_data); + + if (pOpenCL_failed) + *pOpenCL_failed = true; + + return false; + } + + double gpu_time = tm.get_elapsed_secs(); + if (!pComp_data) + { + error_printf("basis_benchmark_etc1s_opencl: basis_compress() failed (GPU)!\n"); + return false; + } + + best_gpu_time = minimum(best_gpu_time, gpu_time); + + basis_free_data(pComp_data); + } + + printf("Best GPU time: %3.3f\n", best_gpu_time); + + return best_gpu_time < best_cpu_time; + } + +} // namespace basisu + + + diff --git a/thirdparty/basisu/encoder/basisu_comp.h b/thirdparty/basisu/encoder/basisu_comp.h new file mode 100644 index 000000000..ffa1fdf2a --- /dev/null +++ b/thirdparty/basisu/encoder/basisu_comp.h @@ -0,0 +1,868 @@ +// basisu_comp.h +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "basisu_frontend.h" +#include "basisu_backend.h" +#include "basisu_basis_file.h" +#include "../transcoder/basisu_transcoder.h" +#include "basisu_uastc_enc.h" +#include "basisu_uastc_hdr_4x4_enc.h" +#include "basisu_astc_hdr_6x6_enc.h" + +#define BASISU_LIB_VERSION 160 +#define BASISU_LIB_VERSION_STRING "1.60" + +#ifndef BASISD_SUPPORT_KTX2 + #error BASISD_SUPPORT_KTX2 is undefined +#endif +#ifndef BASISD_SUPPORT_KTX2_ZSTD + #error BASISD_SUPPORT_KTX2_ZSTD is undefined +#endif + +#if !BASISD_SUPPORT_KTX2 + #error BASISD_SUPPORT_KTX2 must be enabled when building the encoder. To reduce code size if KTX2 support is not needed, set BASISD_SUPPORT_KTX2_ZSTD to 0 +#endif + +namespace basisu +{ + struct opencl_context; + typedef opencl_context* opencl_context_ptr; + + const uint32_t BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION = 16384; + + // Allow block's color distance to increase by 1.5 while searching for an alternative nearby endpoint. + const float BASISU_DEFAULT_ENDPOINT_RDO_THRESH = 1.5f; + + // Allow block's color distance to increase by 1.25 while searching the selector history buffer for a close enough match. + const float BASISU_DEFAULT_SELECTOR_RDO_THRESH = 1.25f; + + const int BASISU_DEFAULT_QUALITY = 128; + const float BASISU_DEFAULT_HYBRID_SEL_CB_QUALITY_THRESH = 2.0f; + + const uint32_t BASISU_MAX_IMAGE_DIMENSION = 16384; + const uint32_t BASISU_QUALITY_MIN = 1; + const uint32_t BASISU_QUALITY_MAX = 255; + const uint32_t BASISU_MAX_ENDPOINT_CLUSTERS = basisu_frontend::cMaxEndpointClusters; + const uint32_t BASISU_MAX_SELECTOR_CLUSTERS = basisu_frontend::cMaxSelectorClusters; + + const uint32_t BASISU_MAX_SLICES = 0xFFFFFF; + + const int BASISU_RDO_UASTC_DICT_SIZE_DEFAULT = 4096; // 32768; + const int BASISU_RDO_UASTC_DICT_SIZE_MIN = 64; + const int BASISU_RDO_UASTC_DICT_SIZE_MAX = 65536; + + struct image_stats + { + image_stats() + { + clear(); + } + + void clear() + { + m_filename.clear(); + m_width = 0; + m_height = 0; + + m_basis_rgb_avg_psnr = 0.0f; + m_basis_rgb_avg_log2_psnr = 0.0f; + + m_basis_rgba_avg_psnr = 0.0f; + m_basis_a_avg_psnr = 0.0f; + m_basis_luma_709_psnr = 0.0f; + m_basis_luma_601_psnr = 0.0f; + m_basis_luma_709_ssim = 0.0f; + + m_basis_rgb_avg_bc6h_psnr = 0.0f; + m_basis_rgb_avg_bc6h_log2_psnr = 0.0f; + + m_bc7_rgb_avg_psnr = 0.0f; + m_bc7_rgba_avg_psnr = 0.0f; + m_bc7_a_avg_psnr = 0.0f; + m_bc7_luma_709_psnr = 0.0f; + m_bc7_luma_601_psnr = 0.0f; + m_bc7_luma_709_ssim = 0.0f; + + m_best_etc1s_rgb_avg_psnr = 0.0f; + m_best_etc1s_luma_709_psnr = 0.0f; + m_best_etc1s_luma_601_psnr = 0.0f; + m_best_etc1s_luma_709_ssim = 0.0f; + + m_opencl_failed = false; + } + + std::string m_filename; + uint32_t m_width; + uint32_t m_height; + + // .basis/.ktx2 compressed (LDR: ETC1S or UASTC statistics, HDR: transcoded BC6H statistics) + float m_basis_rgb_avg_psnr; + float m_basis_rgb_avg_log2_psnr; + + float m_basis_rgba_avg_psnr; + float m_basis_a_avg_psnr; + float m_basis_luma_709_psnr; + float m_basis_luma_601_psnr; + float m_basis_luma_709_ssim; + + // UASTC HDR only. + float m_basis_rgb_avg_bc6h_psnr; + float m_basis_rgb_avg_bc6h_log2_psnr; + + // LDR: BC7 statistics + float m_bc7_rgb_avg_psnr; + float m_bc7_rgba_avg_psnr; + float m_bc7_a_avg_psnr; + float m_bc7_luma_709_psnr; + float m_bc7_luma_601_psnr; + float m_bc7_luma_709_ssim; + + // LDR: Highest achievable quality ETC1S statistics + float m_best_etc1s_rgb_avg_psnr; + float m_best_etc1s_luma_709_psnr; + float m_best_etc1s_luma_601_psnr; + float m_best_etc1s_luma_709_ssim; + + bool m_opencl_failed; + }; + + enum class hdr_modes + { + // standard but constrained ASTC HDR 4x4 tex data that can be rapidly transcoded to BC6H + cUASTC_HDR_4X4, + // standard RDO optimized or non-RDO (highest quality) ASTC HDR 6x6 tex data that can be rapidly re-encoded to BC6H + cASTC_HDR_6X6, + // a custom intermediate format based off ASTC HDR that can be rapidly decoded straight to ASTC HDR or re-encoded to BC6H + cASTC_HDR_6X6_INTERMEDIATE, + cTotal + }; + + template + struct bool_param + { + bool_param() : + m_value(def), + m_changed(false) + { + } + + void clear() + { + m_value = def; + m_changed = false; + } + + operator bool() const + { + return m_value; + } + + bool operator= (bool v) + { + m_value = v; + m_changed = true; + return m_value; + } + + bool was_changed() const { return m_changed; } + void set_changed(bool flag) { m_changed = flag; } + + bool m_value; + bool m_changed; + }; + + template + struct param + { + param(T def, T min_v, T max_v) : + m_value(def), + m_def(def), + m_min(min_v), + m_max(max_v), + m_changed(false) + { + } + + void clear() + { + m_value = m_def; + m_changed = false; + } + + operator T() const + { + return m_value; + } + + T operator= (T v) + { + m_value = clamp(v, m_min, m_max); + m_changed = true; + return m_value; + } + + T operator *= (T v) + { + m_value *= v; + m_changed = true; + return m_value; + } + + bool was_changed() const { return m_changed; } + void set_changed(bool flag) { m_changed = flag; } + + T m_value; + T m_def; + T m_min; + T m_max; + bool m_changed; + }; + + struct basis_compressor_params + { + basis_compressor_params() : + m_compression_level((int)BASISU_DEFAULT_COMPRESSION_LEVEL, 0, (int)BASISU_MAX_COMPRESSION_LEVEL), + m_selector_rdo_thresh(BASISU_DEFAULT_SELECTOR_RDO_THRESH, 0.0f, 1e+10f), + m_endpoint_rdo_thresh(BASISU_DEFAULT_ENDPOINT_RDO_THRESH, 0.0f, 1e+10f), + m_mip_scale(1.0f, .000125f, 4.0f), + m_mip_smallest_dimension(1, 1, 16384), + m_etc1s_max_endpoint_clusters(512), + m_etc1s_max_selector_clusters(512), + m_etc1s_quality_level(-1), + m_pack_uastc_ldr_4x4_flags(cPackUASTCLevelDefault), + m_rdo_uastc_ldr_4x4_quality_scalar(1.0f, 0.001f, 50.0f), + m_rdo_uastc_ldr_4x4_dict_size(BASISU_RDO_UASTC_DICT_SIZE_DEFAULT, BASISU_RDO_UASTC_DICT_SIZE_MIN, BASISU_RDO_UASTC_DICT_SIZE_MAX), + m_rdo_uastc_ldr_4x4_max_smooth_block_error_scale(UASTC_RDO_DEFAULT_SMOOTH_BLOCK_MAX_ERROR_SCALE, 1.0f, 300.0f), + m_rdo_uastc_ldr_4x4_smooth_block_max_std_dev(UASTC_RDO_DEFAULT_MAX_SMOOTH_BLOCK_STD_DEV, .01f, 65536.0f), + m_rdo_uastc_ldr_4x4_max_allowed_rms_increase_ratio(UASTC_RDO_DEFAULT_MAX_ALLOWED_RMS_INCREASE_RATIO, .01f, 100.0f), + m_rdo_uastc_ldr_4x4_skip_block_rms_thresh(UASTC_RDO_DEFAULT_SKIP_BLOCK_RMS_THRESH, .01f, 100.0f), + m_resample_width(0, 1, 16384), + m_resample_height(0, 1, 16384), + m_resample_factor(0.0f, .00125f, 100.0f), + m_ktx2_uastc_supercompression(basist::KTX2_SS_NONE), + m_ktx2_zstd_supercompression_level(6, INT_MIN, INT_MAX), + m_ldr_hdr_upconversion_nit_multiplier(0.0f, 0.0f, basist::MAX_HALF_FLOAT), + m_ldr_hdr_upconversion_black_bias(0.0f, 0.0f, 1.0f), + m_pJob_pool(nullptr) + { + clear(); + } + + void clear() + { + m_uastc.clear(); + m_hdr.clear(); + m_hdr_mode = hdr_modes::cUASTC_HDR_4X4; + + m_use_opencl.clear(); + m_status_output.clear(); + + m_source_filenames.clear(); + m_source_alpha_filenames.clear(); + + m_source_images.clear(); + m_source_mipmap_images.clear(); + + m_out_filename.clear(); + + m_y_flip.clear(); + m_debug.clear(); + m_validate_etc1s.clear(); + m_debug_images.clear(); + m_perceptual.clear(); + m_no_selector_rdo.clear(); + m_selector_rdo_thresh.clear(); + m_read_source_images.clear(); + m_write_output_basis_or_ktx2_files.clear(); + m_compression_level.clear(); + m_compute_stats.clear(); + m_print_stats.clear(); + m_check_for_alpha.clear(); + m_force_alpha.clear(); + m_multithreading.clear(); + m_swizzle[0] = 0; + m_swizzle[1] = 1; + m_swizzle[2] = 2; + m_swizzle[3] = 3; + m_renormalize.clear(); + m_disable_hierarchical_endpoint_codebooks.clear(); + + m_no_endpoint_rdo.clear(); + m_endpoint_rdo_thresh.clear(); + + m_mip_gen.clear(); + m_mip_scale.clear(); + m_mip_filter = "kaiser"; + m_mip_scale = 1.0f; + m_mip_srgb.clear(); + m_mip_premultiplied.clear(); + m_mip_renormalize.clear(); + m_mip_wrapping.clear(); + m_mip_fast.clear(); + m_mip_smallest_dimension.clear(); + + m_etc1s_max_endpoint_clusters = 0; + m_etc1s_max_selector_clusters = 0; + m_etc1s_quality_level = -1; + + m_tex_type = basist::cBASISTexType2D; + m_userdata0 = 0; + m_userdata1 = 0; + m_us_per_frame = 0; + + m_pack_uastc_ldr_4x4_flags = cPackUASTCLevelDefault; + m_rdo_uastc_ldr_4x4.clear(); + m_rdo_uastc_ldr_4x4_quality_scalar.clear(); + m_rdo_uastc_ldr_4x4_max_smooth_block_error_scale.clear(); + m_rdo_uastc_ldr_4x4_smooth_block_max_std_dev.clear(); + m_rdo_uastc_ldr_4x4_max_allowed_rms_increase_ratio.clear(); + m_rdo_uastc_ldr_4x4_skip_block_rms_thresh.clear(); + m_rdo_uastc_ldr_4x4_favor_simpler_modes_in_rdo_mode.clear(); + m_rdo_uastc_ldr_4x4_multithreading.clear(); + + m_resample_width.clear(); + m_resample_height.clear(); + m_resample_factor.clear(); + + m_pGlobal_codebooks = nullptr; + + m_create_ktx2_file.clear(); + m_ktx2_uastc_supercompression = basist::KTX2_SS_NONE; + m_ktx2_key_values.clear(); + m_ktx2_zstd_supercompression_level.clear(); + m_ktx2_srgb_transfer_func.clear(); + + m_validate_output_data.clear(); + + m_ldr_hdr_upconversion_srgb_to_linear.clear(); + + m_hdr_favor_astc.clear(); + + m_uastc_hdr_4x4_options.init(); + m_astc_hdr_6x6_options.clear(); + + m_ldr_hdr_upconversion_nit_multiplier.clear(); + m_ldr_hdr_upconversion_black_bias.clear(); + + m_pJob_pool = nullptr; + } + + // Configures the compressor's mode by setting the proper parameters (which were preserved for backwards compatibility with old code). + void set_format_mode(basist::basis_tex_format m) + { + switch (m) + { + case basist::basis_tex_format::cETC1S: + { + m_hdr = false; + m_uastc = false; + m_hdr_mode = hdr_modes::cUASTC_HDR_4X4; // doesn't matter + break; + } + case basist::basis_tex_format::cUASTC4x4: + { + m_hdr = false; + m_uastc = true; + m_hdr_mode = hdr_modes::cUASTC_HDR_4X4; // doesn't matter + break; + } + case basist::basis_tex_format::cUASTC_HDR_4x4: + { + m_hdr = true; + m_uastc = true; + m_hdr_mode = hdr_modes::cUASTC_HDR_4X4; + break; + } + case basist::basis_tex_format::cASTC_HDR_6x6: + { + m_hdr = true; + m_uastc = true; + m_hdr_mode = hdr_modes::cASTC_HDR_6X6; + break; + } + case basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE: + { + m_hdr = true; + m_uastc = true; + m_hdr_mode = hdr_modes::cASTC_HDR_6X6_INTERMEDIATE; + break; + } + default: + assert(0); + break; + } + } + + // By default we generate LDR ETC1S data. + // if m_uastc is true but m_hdr is not true, we generate UASTC 4x4 LDR data (8bpp with or without RDO). + // if m_uastc is true and m_hdr is true, we generate 4x4 or 6x6 HDR data (either standard ASTC, constrained ASTC, RDO ASTC, or intermediate), controlled by m_hdr_mode. + + // True to generate UASTC .basis/.KTX2 file data, otherwise ETC1S. + // Should be true for any non-ETC1S format (UASTC 4x4 LDR, UASTC 4x4 HDR, RDO ASTC 6x6 HDR, and ASTC 6x6 HDR intermediate). + bool_param m_uastc; + + // Set m_hdr to true to switch to UASTC HDR mode. m_hdr_mode then controls which format is output. + // m_hdr_mode then controls which format is output (4x4, 6x6, or 6x6 intermediate). + bool_param m_hdr; + + // If m_hdr is true, this specifies which mode we operate in (currently UASTC 4x4 HDR or ASTC 6x6 HDR). Defaults to UASTC 4x4 HDR for backwards compatibility. + hdr_modes m_hdr_mode; + + bool_param m_use_opencl; + + // If m_read_source_images is true, m_source_filenames (and optionally m_source_alpha_filenames) contains the filenames of PNG etc. images to read. + // Otherwise, the compressor processes the images in m_source_images or m_source_images_hdr. + basisu::vector m_source_filenames; + basisu::vector m_source_alpha_filenames; + + basisu::vector m_source_images; + + basisu::vector m_source_images_hdr; + + // Stores mipmaps starting from level 1. Level 0 is still stored in m_source_images, as usual. + // If m_source_mipmaps isn't empty, automatic mipmap generation isn't done. m_source_mipmaps.size() MUST equal m_source_images.size() or the compressor returns an error. + // The compressor applies the user-provided swizzling (in m_swizzle) to these images. + basisu::vector< basisu::vector > m_source_mipmap_images; + + basisu::vector< basisu::vector > m_source_mipmap_images_hdr; + + // Filename of the output basis/ktx2 file + std::string m_out_filename; + + // The params are done this way so we can detect when the user has explictly changed them. + + // Flip images across Y axis + bool_param m_y_flip; + + // If true, the compressor will print basis status to stdout during compression. + bool_param m_status_output; + + // Output debug information during compression + bool_param m_debug; + bool_param m_validate_etc1s; + + // m_debug_images is pretty slow + bool_param m_debug_images; + + // ETC1S compression level, from 0 to BASISU_MAX_COMPRESSION_LEVEL (higher is slower). + // This parameter controls numerous internal encoding speed vs. compression efficiency/performance tradeoffs. + // Note this is NOT the same as the ETC1S quality level, and most users shouldn't change this. + param m_compression_level; + + // Use perceptual sRGB colorspace metrics instead of linear + bool_param m_perceptual; + + // Disable selector RDO, for faster compression but larger files + bool_param m_no_selector_rdo; + param m_selector_rdo_thresh; + + bool_param m_no_endpoint_rdo; + param m_endpoint_rdo_thresh; + + // Read source images from m_source_filenames/m_source_alpha_filenames + bool_param m_read_source_images; + + // Write the output basis/ktx2 file to disk using m_out_filename + bool_param m_write_output_basis_or_ktx2_files; + + // Compute and display image metrics + bool_param m_compute_stats; + + // Print stats to stdout, if m_compute_stats is true. + bool_param m_print_stats; + + // Check to see if any input image has an alpha channel, if so then the output basis/ktx2 file will have alpha channels + bool_param m_check_for_alpha; + + // Always put alpha slices in the output basis/ktx2 file, even when the input doesn't have alpha + bool_param m_force_alpha; + bool_param m_multithreading; + + // Split the R channel to RGB and the G channel to alpha, then write a basis/ktx2 file with alpha channels + uint8_t m_swizzle[4]; + + bool_param m_renormalize; + + // If true the front end will not use 2 level endpoint codebook searching, for slightly higher quality but much slower execution. + // Note some m_compression_level's disable this automatically. + bool_param m_disable_hierarchical_endpoint_codebooks; + + // mipmap generation parameters + bool_param m_mip_gen; + param m_mip_scale; + std::string m_mip_filter; + bool_param m_mip_srgb; + bool_param m_mip_premultiplied; // not currently supported + bool_param m_mip_renormalize; + bool_param m_mip_wrapping; + bool_param m_mip_fast; + param m_mip_smallest_dimension; + + // ETC1S codebook size (quality) control. + // If m_etc1s_quality_level != -1, it controls the quality level. It ranges from [1,255] or [BASISU_QUALITY_MIN, BASISU_QUALITY_MAX]. + // Otherwise m_max_endpoint_clusters/m_max_selector_clusters controls the codebook sizes directly. + uint32_t m_etc1s_max_endpoint_clusters; + uint32_t m_etc1s_max_selector_clusters; + int m_etc1s_quality_level; + + // m_tex_type, m_userdata0, m_userdata1, m_framerate - These fields go directly into the .basis file header. + basist::basis_texture_type m_tex_type; + uint32_t m_userdata0; + uint32_t m_userdata1; + uint32_t m_us_per_frame; + + // UASTC LDR 4x4 parameters + // cPackUASTCLevelDefault, etc. + uint32_t m_pack_uastc_ldr_4x4_flags; + bool_param m_rdo_uastc_ldr_4x4; + param m_rdo_uastc_ldr_4x4_quality_scalar; + param m_rdo_uastc_ldr_4x4_dict_size; + param m_rdo_uastc_ldr_4x4_max_smooth_block_error_scale; + param m_rdo_uastc_ldr_4x4_smooth_block_max_std_dev; + param m_rdo_uastc_ldr_4x4_max_allowed_rms_increase_ratio; + param m_rdo_uastc_ldr_4x4_skip_block_rms_thresh; + bool_param m_rdo_uastc_ldr_4x4_favor_simpler_modes_in_rdo_mode; + bool_param m_rdo_uastc_ldr_4x4_multithreading; + + param m_resample_width; + param m_resample_height; + param m_resample_factor; + + const basist::basisu_lowlevel_etc1s_transcoder *m_pGlobal_codebooks; + + // KTX2 specific parameters. + // Internally, the compressor always creates a .basis file then it converts that lossless to KTX2. + bool_param m_create_ktx2_file; + basist::ktx2_supercompression m_ktx2_uastc_supercompression; + basist::ktx2_transcoder::key_value_vec m_ktx2_key_values; + param m_ktx2_zstd_supercompression_level; + bool_param m_ktx2_srgb_transfer_func; + + uastc_hdr_4x4_codec_options m_uastc_hdr_4x4_options; + astc_6x6_hdr::astc_hdr_6x6_global_config m_astc_hdr_6x6_options; + + bool_param m_validate_output_data; + + // LDR->HDR upconversion parameters. + // + // If true, LDR images (such as PNG) will be converted to normalized [0,1] linear light (via a sRGB->Linear conversion), or absolute luminance (nits or candelas per meter squared), and then processed as HDR. + // Otherwise, LDR images are assumed to already be in linear light (i.e. they don't use the sRGB transfer function). + bool_param m_ldr_hdr_upconversion_srgb_to_linear; + + // m_ldr_hdr_upconversion_nit_multiplier is only used when loading SDR/LDR images and compressing to an HDR output format. + // By default m_ldr_hdr_upconversion_nit_multiplier is 0. It's an override for the default. + // When loading LDR images, a default multiplier of 1.0 will be used in UASTC 4x4 HDR mode. Partially for backwards compatibility with previous library releases, and also because it doesn't really matter with this encoder what the multiplier is. + // With the 6x6 HDR encoder it does matter because it expects inputs in absolute nits, so the LDR upconversion luminance multiplier default will be 100 nits. (Most SDR monitors were/are 80-100 nits or so.) + param m_ldr_hdr_upconversion_nit_multiplier; + + // The optional sRGB space bias to use during LDR->HDR upconversion. Should be between [0,.49] or so. Only applied on black (0.0) color components. + // Defaults to no bias (0.0f). + param m_ldr_hdr_upconversion_black_bias; + + // If true, ASTC HDR quality is favored more than BC6H quality. Otherwise it's a rough balance. + bool_param m_hdr_favor_astc; + + job_pool *m_pJob_pool; + }; + + // Important: basisu_encoder_init() MUST be called first before using this class. + class basis_compressor + { + BASISU_NO_EQUALS_OR_COPY_CONSTRUCT(basis_compressor); + + public: + basis_compressor(); + ~basis_compressor(); + + // Note it *should* be possible to call init() multiple times with different inputs, but this scenario isn't well tested. Ideally, create 1 object, compress, then delete it. + bool init(const basis_compressor_params ¶ms); + + enum error_code + { + cECSuccess = 0, + cECFailedInitializing, + cECFailedReadingSourceImages, + cECFailedValidating, + cECFailedEncodeUASTC, + cECFailedFrontEnd, + cECFailedFontendExtract, + cECFailedBackend, + cECFailedCreateBasisFile, + cECFailedWritingOutput, + cECFailedUASTCRDOPostProcess, + cECFailedCreateKTX2File + }; + + error_code process(); + + // The output .basis file will always be valid of process() succeeded. + const uint8_vec &get_output_basis_file() const { return m_output_basis_file; } + + // The output .ktx2 file will only be valid if m_create_ktx2_file was true and process() succeeded. + const uint8_vec& get_output_ktx2_file() const { return m_output_ktx2_file; } + + const basisu::vector &get_stats() const { return m_stats; } + + uint32_t get_basis_file_size() const { return m_basis_file_size; } + double get_basis_bits_per_texel() const { return m_basis_bits_per_texel; } + + bool get_any_source_image_has_alpha() const { return m_any_source_image_has_alpha; } + + bool get_opencl_failed() const { return m_opencl_failed; } + + private: + basis_compressor_params m_params; + + opencl_context_ptr m_pOpenCL_context; + + basist::basis_tex_format m_fmt_mode; + + basisu::vector m_slice_images; + basisu::vector m_slice_images_hdr; + + basisu::vector m_stats; + + uint32_t m_basis_file_size; + double m_basis_bits_per_texel; + + basisu_backend_slice_desc_vec m_slice_descs; + + uint32_t m_total_blocks; + + basisu_frontend m_frontend; + + // These are 4x4 blocks. + pixel_block_vec m_source_blocks; + pixel_block_hdr_vec m_source_blocks_hdr; + + basisu::vector m_frontend_output_textures; + + basisu::vector m_best_etc1s_images; + basisu::vector m_best_etc1s_images_unpacked; + + basisu_backend m_backend; + + basisu_file m_basis_file; + + basisu::vector m_decoded_output_textures; // BC6H in HDR mode + basisu::vector m_decoded_output_textures_unpacked; + + basisu::vector m_decoded_output_textures_bc7; + basisu::vector m_decoded_output_textures_unpacked_bc7; + + basisu::vector m_decoded_output_textures_bc6h_hdr_unpacked; // BC6H in HDR mode + + basisu::vector m_decoded_output_textures_astc_hdr; + basisu::vector m_decoded_output_textures_astc_hdr_unpacked; + + uint8_vec m_output_basis_file; + uint8_vec m_output_ktx2_file; + + basisu::vector m_uastc_slice_textures; + basisu_backend_output m_uastc_backend_output; + + // The amount the HDR input has to be scaled up in case it had to be rescaled to fit into half floats. + float m_hdr_image_scale; + + // The upconversion multiplier used to load LDR images in HDR mode. + float m_ldr_to_hdr_upconversion_nit_multiplier; + + // True if any loaded source images were LDR and upconverted to HDR. + bool m_upconverted_any_ldr_images; + + bool m_any_source_image_has_alpha; + + bool m_opencl_failed; + + void check_for_hdr_inputs(); + bool sanity_check_input_params(); + void clean_hdr_image(imagef& src_img); + bool read_dds_source_images(); + bool read_source_images(); + bool extract_source_blocks(); + bool process_frontend(); + bool extract_frontend_texture_data(); + bool process_backend(); + bool create_basis_file_and_transcode(); + bool write_hdr_debug_images(const char* pBasename, const imagef& img, uint32_t width, uint32_t height); + bool write_output_files_and_compute_stats(); + error_code encode_slices_to_astc_6x6_hdr(); + error_code encode_slices_to_uastc_4x4_hdr(); + error_code encode_slices_to_uastc_4x4_ldr(); + bool generate_mipmaps(const imagef& img, basisu::vector& mips, bool has_alpha); + bool generate_mipmaps(const image &img, basisu::vector &mips, bool has_alpha); + bool validate_texture_type_constraints(); + bool validate_ktx2_constraints(); + bool get_dfd(uint8_vec& dfd, const basist::ktx2_header& hdr); + bool create_ktx2_file(); + void pick_format_mode(); + + uint32_t get_block_width() const + { + if (m_params.m_hdr) + { + switch (m_params.m_hdr_mode) + { + case hdr_modes::cASTC_HDR_6X6: + case hdr_modes::cASTC_HDR_6X6_INTERMEDIATE: + return 6; + default: + break; + } + } + return 4; + } + + uint32_t get_block_height() const + { + if (m_params.m_hdr) + { + switch (m_params.m_hdr_mode) + { + case hdr_modes::cASTC_HDR_6X6: + case hdr_modes::cASTC_HDR_6X6_INTERMEDIATE: + return 6; + default: + break; + } + } + return 4; + } + }; + + // Alternative simple C-style wrapper API around the basis_compressor class. + // This doesn't expose every encoder feature, but it's enough to get going. + // Important: basisu_encoder_init() MUST be called first before calling these functions. + // + // Input parameters: + // source_images: Array of "image" objects, one per mipmap level, largest mipmap level first. + // OR + // pImageRGBA: pointer to a 32-bpp RGBx or RGBA raster image, R first in memory, A last. Top scanline first in memory. + // width/height/pitch_in_pixels: dimensions of pImageRGBA + // + // flags_and_quality: Combination of the above flags logically OR'd with the ETC1S or UASTC level, i.e. "cFlagSRGB | cFlagGenMipsClamp | cFlagThreaded | 128" or "cFlagSRGB | cFlagGenMipsClamp | cFlagUASTC | cFlagThreaded | cPackUASTCLevelDefault". + // In ETC1S mode, the lower 8-bits are the ETC1S quality level which ranges from [1,255] (higher=better quality/larger files) + // In UASTC mode, the lower 8-bits are the UASTC LDR/HDR pack level (see cPackUASTCLevelFastest, etc.). Fastest/lowest quality is 0, so be sure to set it correctly. Valid values are [0,4] for both LDR/HDR. + // In UASTC mode, be sure to set this, otherwise it defaults to 0 (fastest/lowest quality). + // + // uastc_rdo_quality: Float UASTC RDO quality level (0=no change, higher values lower quality but increase compressibility, initially try .5-1.5) + // + // pSize: Returns the output data's compressed size in bytes + // + // Return value is the compressed .basis or .ktx2 file data, or nullptr on failure. Must call basis_free() to free it. + enum + { + cFlagUseOpenCL = 1 << 8, // use OpenCL if available + cFlagThreaded = 1 << 9, // use multiple threads for compression + cFlagDebug = 1 << 10, // enable debug output + + cFlagKTX2 = 1 << 11, // generate a KTX2 file + cFlagKTX2UASTCSuperCompression = 1 << 12, // use KTX2 Zstd supercompression on UASTC files + + cFlagSRGB = 1 << 13, // input texture is sRGB, use perceptual colorspace metrics, also use sRGB filtering during mipmap gen, and also sets KTX2 output transfer func to sRGB + cFlagGenMipsClamp = 1 << 14, // generate mipmaps with clamp addressing + cFlagGenMipsWrap = 1 << 15, // generate mipmaps with wrap addressing + + cFlagYFlip = 1 << 16, // flip source image on Y axis before compression + + cFlagUASTCRDO = 1 << 17, // use RDO postprocessing when generating UASTC files (must set uastc_rdo_quality to the quality scalar) + + cFlagPrintStats = 1 << 18, // print image stats to stdout + cFlagPrintStatus = 1 << 19, // print status to stdout + + cFlagDebugImages = 1 << 20, // enable status output + + cFlagREC2020 = 1 << 21 // ASTC 6x6 modes: treat input as REC 2020 vs. the default 709 + }; + + // This function accepts an array of source images. + // If more than one image is provided, it's assumed the images form a mipmap pyramid and automatic mipmap generation is disabled. + // Returns a pointer to the compressed .basis or .ktx2 file data. *pSize is the size of the compressed data. + // Important: The returned block MUST be manually freed using basis_free_data(). + // basisu_encoder_init() MUST be called first! + // LDR version. To compress the LDR source image as HDR: Use the cFlagHDR flag. + void* basis_compress( + basist::basis_tex_format mode, + const basisu::vector &source_images, + uint32_t flags_and_quality, float uastc_rdo_quality, + size_t* pSize, + image_stats* pStats = nullptr); + + // HDR-only version. + // Important: The returned block MUST be manually freed using basis_free_data(). + void* basis_compress( + basist::basis_tex_format mode, + const basisu::vector& source_images_hdr, + uint32_t flags_and_quality, float lambda, + size_t* pSize, + image_stats* pStats = nullptr); + + // This function only accepts a single LDR source image. It's just a wrapper for basis_compress() above. + // Important: The returned block MUST be manually freed using basis_free_data(). + void* basis_compress( + basist::basis_tex_format mode, + const uint8_t* pImageRGBA, uint32_t width, uint32_t height, uint32_t pitch_in_pixels, + uint32_t flags_and_quality, float uastc_rdo_quality, + size_t* pSize, + image_stats* pStats = nullptr); + + // Frees the dynamically allocated file data returned by basis_compress(). + // This MUST be called on the pointer returned by basis_compress() when you're done with it. + void basis_free_data(void* p); + + // Runs a short benchmark using synthetic image data to time OpenCL encoding vs. CPU encoding, with multithreading enabled. + // Returns true if opencl is worth using on this system, otherwise false. + // If pOpenCL_failed is not null, it will be set to true if OpenCL encoding failed *on this particular machine/driver/BasisU version* and the encoder falled back to CPU encoding. + // basisu_encoder_init() MUST be called first. If OpenCL support wasn't enabled this always returns false. + bool basis_benchmark_etc1s_opencl(bool *pOpenCL_failed = nullptr); + + // Parallel compression API + struct parallel_results + { + double m_total_time; + basis_compressor::error_code m_error_code; + uint8_vec m_basis_file; + uint8_vec m_ktx2_file; + basisu::vector m_stats; + double m_basis_bits_per_texel; + bool m_any_source_image_has_alpha; + + parallel_results() + { + clear(); + } + + void clear() + { + m_total_time = 0.0f; + m_error_code = basis_compressor::cECFailedInitializing; + m_basis_file.clear(); + m_ktx2_file.clear(); + m_stats.clear(); + m_basis_bits_per_texel = 0.0f; + m_any_source_image_has_alpha = false; + } + }; + + // Compresses an array of input textures across total_threads threads using the basis_compressor class. + // Compressing multiple textures at a time is substantially more efficient than just compressing one at a time. + // total_threads must be >= 1. + bool basis_parallel_compress( + uint32_t total_threads, + const basisu::vector ¶ms_vec, + basisu::vector< parallel_results > &results_vec); + +} // namespace basisu + diff --git a/thirdparty/basisu/encoder/basisu_enc.cpp b/thirdparty/basisu/encoder/basisu_enc.cpp new file mode 100644 index 000000000..2c314740c --- /dev/null +++ b/thirdparty/basisu/encoder/basisu_enc.cpp @@ -0,0 +1,3991 @@ +// basisu_enc.cpp +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "basisu_enc.h" +#include "basisu_resampler.h" +#include "basisu_resampler_filters.h" +#include "basisu_etc.h" +#include "../transcoder/basisu_transcoder.h" +#include "basisu_bc7enc.h" +#include "jpgd.h" +#include "pvpngreader.h" +#include "basisu_opencl.h" +#include "basisu_uastc_hdr_4x4_enc.h" +#include "basisu_astc_hdr_6x6_enc.h" + +#include + +#ifndef TINYEXR_USE_ZFP +#define TINYEXR_USE_ZFP (1) +#endif +#include "3rdparty/tinyexr.h" + +#ifndef MINIZ_HEADER_FILE_ONLY +#define MINIZ_HEADER_FILE_ONLY +#endif +#ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES +#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES +#endif +#include "basisu_miniz.h" + +#define QOI_IMPLEMENTATION +#include "3rdparty/qoi.h" + +#if defined(_WIN32) +// For QueryPerformanceCounter/QueryPerformanceFrequency +#define WIN32_LEAN_AND_MEAN +#include +#endif + +namespace basisu +{ + uint64_t interval_timer::g_init_ticks, interval_timer::g_freq; + double interval_timer::g_timer_freq; + +#if BASISU_SUPPORT_SSE + bool g_cpu_supports_sse41; +#endif + + fast_linear_to_srgb g_fast_linear_to_srgb; + + uint8_t g_hamming_dist[256] = + { + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 + }; + + // This is a Public Domain 8x8 font from here: + // https://github.com/dhepper/font8x8/blob/master/font8x8_basic.h + const uint8_t g_debug_font8x8_basic[127 - 32 + 1][8] = + { + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0020 ( ) + { 0x18, 0x3C, 0x3C, 0x18, 0x18, 0x00, 0x18, 0x00}, // U+0021 (!) + { 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0022 (") + { 0x36, 0x36, 0x7F, 0x36, 0x7F, 0x36, 0x36, 0x00}, // U+0023 (#) + { 0x0C, 0x3E, 0x03, 0x1E, 0x30, 0x1F, 0x0C, 0x00}, // U+0024 ($) + { 0x00, 0x63, 0x33, 0x18, 0x0C, 0x66, 0x63, 0x00}, // U+0025 (%) + { 0x1C, 0x36, 0x1C, 0x6E, 0x3B, 0x33, 0x6E, 0x00}, // U+0026 (&) + { 0x06, 0x06, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0027 (') + { 0x18, 0x0C, 0x06, 0x06, 0x06, 0x0C, 0x18, 0x00}, // U+0028 (() + { 0x06, 0x0C, 0x18, 0x18, 0x18, 0x0C, 0x06, 0x00}, // U+0029 ()) + { 0x00, 0x66, 0x3C, 0xFF, 0x3C, 0x66, 0x00, 0x00}, // U+002A (*) + { 0x00, 0x0C, 0x0C, 0x3F, 0x0C, 0x0C, 0x00, 0x00}, // U+002B (+) + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x0C, 0x0C, 0x06}, // U+002C (,) + { 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, 0x00}, // U+002D (-) + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x0C, 0x0C, 0x00}, // U+002E (.) + { 0x60, 0x30, 0x18, 0x0C, 0x06, 0x03, 0x01, 0x00}, // U+002F (/) + { 0x3E, 0x63, 0x73, 0x7B, 0x6F, 0x67, 0x3E, 0x00}, // U+0030 (0) + { 0x0C, 0x0E, 0x0C, 0x0C, 0x0C, 0x0C, 0x3F, 0x00}, // U+0031 (1) + { 0x1E, 0x33, 0x30, 0x1C, 0x06, 0x33, 0x3F, 0x00}, // U+0032 (2) + { 0x1E, 0x33, 0x30, 0x1C, 0x30, 0x33, 0x1E, 0x00}, // U+0033 (3) + { 0x38, 0x3C, 0x36, 0x33, 0x7F, 0x30, 0x78, 0x00}, // U+0034 (4) + { 0x3F, 0x03, 0x1F, 0x30, 0x30, 0x33, 0x1E, 0x00}, // U+0035 (5) + { 0x1C, 0x06, 0x03, 0x1F, 0x33, 0x33, 0x1E, 0x00}, // U+0036 (6) + { 0x3F, 0x33, 0x30, 0x18, 0x0C, 0x0C, 0x0C, 0x00}, // U+0037 (7) + { 0x1E, 0x33, 0x33, 0x1E, 0x33, 0x33, 0x1E, 0x00}, // U+0038 (8) + { 0x1E, 0x33, 0x33, 0x3E, 0x30, 0x18, 0x0E, 0x00}, // U+0039 (9) + { 0x00, 0x0C, 0x0C, 0x00, 0x00, 0x0C, 0x0C, 0x00}, // U+003A (:) + { 0x00, 0x0C, 0x0C, 0x00, 0x00, 0x0C, 0x0C, 0x06}, // U+003B (;) + { 0x18, 0x0C, 0x06, 0x03, 0x06, 0x0C, 0x18, 0x00}, // U+003C (<) + { 0x00, 0x00, 0x3F, 0x00, 0x00, 0x3F, 0x00, 0x00}, // U+003D (=) + { 0x06, 0x0C, 0x18, 0x30, 0x18, 0x0C, 0x06, 0x00}, // U+003E (>) + { 0x1E, 0x33, 0x30, 0x18, 0x0C, 0x00, 0x0C, 0x00}, // U+003F (?) + { 0x3E, 0x63, 0x7B, 0x7B, 0x7B, 0x03, 0x1E, 0x00}, // U+0040 (@) + { 0x0C, 0x1E, 0x33, 0x33, 0x3F, 0x33, 0x33, 0x00}, // U+0041 (A) + { 0x3F, 0x66, 0x66, 0x3E, 0x66, 0x66, 0x3F, 0x00}, // U+0042 (B) + { 0x3C, 0x66, 0x03, 0x03, 0x03, 0x66, 0x3C, 0x00}, // U+0043 (C) + { 0x1F, 0x36, 0x66, 0x66, 0x66, 0x36, 0x1F, 0x00}, // U+0044 (D) + { 0x7F, 0x46, 0x16, 0x1E, 0x16, 0x46, 0x7F, 0x00}, // U+0045 (E) + { 0x7F, 0x46, 0x16, 0x1E, 0x16, 0x06, 0x0F, 0x00}, // U+0046 (F) + { 0x3C, 0x66, 0x03, 0x03, 0x73, 0x66, 0x7C, 0x00}, // U+0047 (G) + { 0x33, 0x33, 0x33, 0x3F, 0x33, 0x33, 0x33, 0x00}, // U+0048 (H) + { 0x1E, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x1E, 0x00}, // U+0049 (I) + { 0x78, 0x30, 0x30, 0x30, 0x33, 0x33, 0x1E, 0x00}, // U+004A (J) + { 0x67, 0x66, 0x36, 0x1E, 0x36, 0x66, 0x67, 0x00}, // U+004B (K) + { 0x0F, 0x06, 0x06, 0x06, 0x46, 0x66, 0x7F, 0x00}, // U+004C (L) + { 0x63, 0x77, 0x7F, 0x7F, 0x6B, 0x63, 0x63, 0x00}, // U+004D (M) + { 0x63, 0x67, 0x6F, 0x7B, 0x73, 0x63, 0x63, 0x00}, // U+004E (N) + { 0x1C, 0x36, 0x63, 0x63, 0x63, 0x36, 0x1C, 0x00}, // U+004F (O) + { 0x3F, 0x66, 0x66, 0x3E, 0x06, 0x06, 0x0F, 0x00}, // U+0050 (P) + { 0x1E, 0x33, 0x33, 0x33, 0x3B, 0x1E, 0x38, 0x00}, // U+0051 (Q) + { 0x3F, 0x66, 0x66, 0x3E, 0x36, 0x66, 0x67, 0x00}, // U+0052 (R) + { 0x1E, 0x33, 0x07, 0x0E, 0x38, 0x33, 0x1E, 0x00}, // U+0053 (S) + { 0x3F, 0x2D, 0x0C, 0x0C, 0x0C, 0x0C, 0x1E, 0x00}, // U+0054 (T) + { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x3F, 0x00}, // U+0055 (U) + { 0x33, 0x33, 0x33, 0x33, 0x33, 0x1E, 0x0C, 0x00}, // U+0056 (V) + { 0x63, 0x63, 0x63, 0x6B, 0x7F, 0x77, 0x63, 0x00}, // U+0057 (W) + { 0x63, 0x63, 0x36, 0x1C, 0x1C, 0x36, 0x63, 0x00}, // U+0058 (X) + { 0x33, 0x33, 0x33, 0x1E, 0x0C, 0x0C, 0x1E, 0x00}, // U+0059 (Y) + { 0x7F, 0x63, 0x31, 0x18, 0x4C, 0x66, 0x7F, 0x00}, // U+005A (Z) + { 0x1E, 0x06, 0x06, 0x06, 0x06, 0x06, 0x1E, 0x00}, // U+005B ([) + { 0x03, 0x06, 0x0C, 0x18, 0x30, 0x60, 0x40, 0x00}, // U+005C (\) + { 0x1E, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1E, 0x00}, // U+005D (]) + { 0x08, 0x1C, 0x36, 0x63, 0x00, 0x00, 0x00, 0x00}, // U+005E (^) + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF}, // U+005F (_) + { 0x0C, 0x0C, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0060 (`) + { 0x00, 0x00, 0x1E, 0x30, 0x3E, 0x33, 0x6E, 0x00}, // U+0061 (a) + { 0x07, 0x06, 0x06, 0x3E, 0x66, 0x66, 0x3B, 0x00}, // U+0062 (b) + { 0x00, 0x00, 0x1E, 0x33, 0x03, 0x33, 0x1E, 0x00}, // U+0063 (c) + { 0x38, 0x30, 0x30, 0x3e, 0x33, 0x33, 0x6E, 0x00}, // U+0064 (d) + { 0x00, 0x00, 0x1E, 0x33, 0x3f, 0x03, 0x1E, 0x00}, // U+0065 (e) + { 0x1C, 0x36, 0x06, 0x0f, 0x06, 0x06, 0x0F, 0x00}, // U+0066 (f) + { 0x00, 0x00, 0x6E, 0x33, 0x33, 0x3E, 0x30, 0x1F}, // U+0067 (g) + { 0x07, 0x06, 0x36, 0x6E, 0x66, 0x66, 0x67, 0x00}, // U+0068 (h) + { 0x0C, 0x00, 0x0E, 0x0C, 0x0C, 0x0C, 0x1E, 0x00}, // U+0069 (i) + { 0x30, 0x00, 0x30, 0x30, 0x30, 0x33, 0x33, 0x1E}, // U+006A (j) + { 0x07, 0x06, 0x66, 0x36, 0x1E, 0x36, 0x67, 0x00}, // U+006B (k) + { 0x0E, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x1E, 0x00}, // U+006C (l) + { 0x00, 0x00, 0x33, 0x7F, 0x7F, 0x6B, 0x63, 0x00}, // U+006D (m) + { 0x00, 0x00, 0x1F, 0x33, 0x33, 0x33, 0x33, 0x00}, // U+006E (n) + { 0x00, 0x00, 0x1E, 0x33, 0x33, 0x33, 0x1E, 0x00}, // U+006F (o) + { 0x00, 0x00, 0x3B, 0x66, 0x66, 0x3E, 0x06, 0x0F}, // U+0070 (p) + { 0x00, 0x00, 0x6E, 0x33, 0x33, 0x3E, 0x30, 0x78}, // U+0071 (q) + { 0x00, 0x00, 0x3B, 0x6E, 0x66, 0x06, 0x0F, 0x00}, // U+0072 (r) + { 0x00, 0x00, 0x3E, 0x03, 0x1E, 0x30, 0x1F, 0x00}, // U+0073 (s) + { 0x08, 0x0C, 0x3E, 0x0C, 0x0C, 0x2C, 0x18, 0x00}, // U+0074 (t) + { 0x00, 0x00, 0x33, 0x33, 0x33, 0x33, 0x6E, 0x00}, // U+0075 (u) + { 0x00, 0x00, 0x33, 0x33, 0x33, 0x1E, 0x0C, 0x00}, // U+0076 (v) + { 0x00, 0x00, 0x63, 0x6B, 0x7F, 0x7F, 0x36, 0x00}, // U+0077 (w) + { 0x00, 0x00, 0x63, 0x36, 0x1C, 0x36, 0x63, 0x00}, // U+0078 (x) + { 0x00, 0x00, 0x33, 0x33, 0x33, 0x3E, 0x30, 0x1F}, // U+0079 (y) + { 0x00, 0x00, 0x3F, 0x19, 0x0C, 0x26, 0x3F, 0x00}, // U+007A (z) + { 0x38, 0x0C, 0x0C, 0x07, 0x0C, 0x0C, 0x38, 0x00}, // U+007B ({) + { 0x18, 0x18, 0x18, 0x00, 0x18, 0x18, 0x18, 0x00}, // U+007C (|) + { 0x07, 0x0C, 0x0C, 0x38, 0x0C, 0x0C, 0x07, 0x00}, // U+007D (}) + { 0x6E, 0x3B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+007E (~) + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} // U+007F + }; + + bool g_library_initialized; + std::mutex g_encoder_init_mutex; + + // Encoder library initialization (just call once at startup) + bool basisu_encoder_init(bool use_opencl, bool opencl_force_serialization) + { + std::lock_guard lock(g_encoder_init_mutex); + + if (g_library_initialized) + return true; + + detect_sse41(); + + basist::basisu_transcoder_init(); + pack_etc1_solid_color_init(); + //uastc_init(); + bc7enc_compress_block_init(); // must be after uastc_init() + + // Don't bother initializing the OpenCL module at all if it's been completely disabled. + if (use_opencl) + { + opencl_init(opencl_force_serialization); + } + + interval_timer::init(); // make sure interval_timer globals are initialized from main thread to avoid TSAN reports + + astc_hdr_enc_init(); + basist::bc6h_enc_init(); + astc_6x6_hdr::global_init(); + + g_library_initialized = true; + return true; + } + + void basisu_encoder_deinit() + { + opencl_deinit(); + + g_library_initialized = false; + } + + void error_vprintf(const char* pFmt, va_list args) + { + const uint32_t BUF_SIZE = 256; + char buf[BUF_SIZE]; + + va_list args_copy; + va_copy(args_copy, args); + int total_chars = vsnprintf(buf, sizeof(buf), pFmt, args_copy); + va_end(args_copy); + + if (total_chars < 0) + { + assert(0); + return; + } + + if (total_chars >= (int)BUF_SIZE) + { + basisu::vector var_buf(total_chars + 1); + + va_copy(args_copy, args); + int total_chars_retry = vsnprintf(var_buf.data(), var_buf.size(), pFmt, args_copy); + va_end(args_copy); + + if (total_chars_retry < 0) + { + assert(0); + return; + } + + fprintf(stderr, "ERROR: %s", var_buf.data()); + } + else + { + fprintf(stderr, "ERROR: %s", buf); + } + } + + void error_printf(const char *pFmt, ...) + { + va_list args; + va_start(args, pFmt); + error_vprintf(pFmt, args); + va_end(args); + } + +#if defined(_WIN32) + void platform_sleep(uint32_t ms) + { + Sleep(ms); + } +#else + void platform_sleep(uint32_t ms) + { + // TODO + } +#endif + +#if defined(_WIN32) + inline void query_counter(timer_ticks* pTicks) + { + QueryPerformanceCounter(reinterpret_cast(pTicks)); + } + inline void query_counter_frequency(timer_ticks* pTicks) + { + QueryPerformanceFrequency(reinterpret_cast(pTicks)); + } +#elif defined(__APPLE__) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__EMSCRIPTEN__) +#include + inline void query_counter(timer_ticks* pTicks) + { + struct timeval cur_time; + gettimeofday(&cur_time, NULL); + *pTicks = static_cast(cur_time.tv_sec) * 1000000ULL + static_cast(cur_time.tv_usec); + } + inline void query_counter_frequency(timer_ticks* pTicks) + { + *pTicks = 1000000; + } +#elif defined(__GNUC__) +#include + inline void query_counter(timer_ticks* pTicks) + { + struct timeval cur_time; + gettimeofday(&cur_time, NULL); + *pTicks = static_cast(cur_time.tv_sec) * 1000000ULL + static_cast(cur_time.tv_usec); + } + inline void query_counter_frequency(timer_ticks* pTicks) + { + *pTicks = 1000000; + } +#else +#error TODO +#endif + + interval_timer::interval_timer() : m_start_time(0), m_stop_time(0), m_started(false), m_stopped(false) + { + if (!g_timer_freq) + init(); + } + + void interval_timer::start() + { + query_counter(&m_start_time); + m_started = true; + m_stopped = false; + } + + void interval_timer::stop() + { + assert(m_started); + query_counter(&m_stop_time); + m_stopped = true; + } + + double interval_timer::get_elapsed_secs() const + { + assert(m_started); + if (!m_started) + return 0; + + timer_ticks stop_time = m_stop_time; + if (!m_stopped) + query_counter(&stop_time); + + timer_ticks delta = stop_time - m_start_time; + return delta * g_timer_freq; + } + + void interval_timer::init() + { + if (!g_timer_freq) + { + query_counter_frequency(&g_freq); + g_timer_freq = 1.0f / g_freq; + query_counter(&g_init_ticks); + } + } + + timer_ticks interval_timer::get_ticks() + { + if (!g_timer_freq) + init(); + timer_ticks ticks; + query_counter(&ticks); + return ticks - g_init_ticks; + } + + double interval_timer::ticks_to_secs(timer_ticks ticks) + { + if (!g_timer_freq) + init(); + return ticks * g_timer_freq; + } + + // Note this is linear<->sRGB, NOT REC709 which uses slightly different equations/transfer functions. + // However the gamuts/white points of REC709 and sRGB are the same. + float linear_to_srgb(float l) + { + assert(l >= 0.0f && l <= 1.0f); + if (l < .0031308f) + return saturate(l * 12.92f); + else + return saturate(1.055f * powf(l, 1.0f / 2.4f) - .055f); + } + + float srgb_to_linear(float s) + { + assert(s >= 0.0f && s <= 1.0f); + if (s < .04045f) + return saturate(s * (1.0f / 12.92f)); + else + return saturate(powf((s + .055f) * (1.0f / 1.055f), 2.4f)); + } + + const uint32_t MAX_32BIT_ALLOC_SIZE = 250000000; + + bool load_tga(const char* pFilename, image& img) + { + int w = 0, h = 0, n_chans = 0; + uint8_t* pImage_data = read_tga(pFilename, w, h, n_chans); + + if ((!pImage_data) || (!w) || (!h) || ((n_chans != 3) && (n_chans != 4))) + { + error_printf("Failed loading .TGA image \"%s\"!\n", pFilename); + + if (pImage_data) + free(pImage_data); + + return false; + } + + if (sizeof(void *) == sizeof(uint32_t)) + { + if (((uint64_t)w * h * n_chans) > MAX_32BIT_ALLOC_SIZE) + { + error_printf("Image \"%s\" is too large (%ux%u) to process in a 32-bit build!\n", pFilename, w, h); + + if (pImage_data) + free(pImage_data); + + return false; + } + } + + img.resize(w, h); + + const uint8_t *pSrc = pImage_data; + for (int y = 0; y < h; y++) + { + color_rgba *pDst = &img(0, y); + + for (int x = 0; x < w; x++) + { + pDst->r = pSrc[0]; + pDst->g = pSrc[1]; + pDst->b = pSrc[2]; + pDst->a = (n_chans == 3) ? 255 : pSrc[3]; + + pSrc += n_chans; + ++pDst; + } + } + + free(pImage_data); + + return true; + } + + bool load_qoi(const char* pFilename, image& img) + { + qoi_desc desc; + clear_obj(desc); + + void* p = qoi_read(pFilename, &desc, 4); + if (!p) + return false; + + img.grant_ownership(static_cast(p), desc.width, desc.height); + + return true; + } + + bool load_png(const uint8_t *pBuf, size_t buf_size, image &img, const char *pFilename) + { + interval_timer tm; + tm.start(); + + if (!buf_size) + return false; + + uint32_t width = 0, height = 0, num_chans = 0; + void* pImage = pv_png::load_png(pBuf, buf_size, 4, width, height, num_chans); + + if (!pImage) + { + error_printf("pv_png::load_png failed while loading image \"%s\"\n", pFilename); + return false; + } + + img.grant_ownership(reinterpret_cast(pImage), width, height); + + //debug_printf("Total load_png() time: %3.3f secs\n", tm.get_elapsed_secs()); + + return true; + } + + bool load_png(const char* pFilename, image& img) + { + uint8_vec buffer; + if (!read_file_to_vec(pFilename, buffer)) + { + error_printf("load_png: Failed reading file \"%s\"!\n", pFilename); + return false; + } + + return load_png(buffer.data(), buffer.size(), img, pFilename); + } + + bool load_jpg(const char *pFilename, image& img) + { + int width = 0, height = 0, actual_comps = 0; + uint8_t *pImage_data = jpgd::decompress_jpeg_image_from_file(pFilename, &width, &height, &actual_comps, 4, jpgd::jpeg_decoder::cFlagLinearChromaFiltering); + if (!pImage_data) + return false; + + img.init(pImage_data, width, height, 4); + + free(pImage_data); + + return true; + } + + bool load_jpg(const uint8_t* pBuf, size_t buf_size, image& img) + { + if (buf_size > INT_MAX) + { + assert(0); + return false; + } + + int width = 0, height = 0, actual_comps = 0; + uint8_t* pImage_data = jpgd::decompress_jpeg_image_from_memory(pBuf, (int)buf_size, &width, &height, &actual_comps, 4, jpgd::jpeg_decoder::cFlagLinearChromaFiltering); + if (!pImage_data) + return false; + + img.init(pImage_data, width, height, 4); + + free(pImage_data); + + return true; + } + + bool load_image(const char* pFilename, image& img) + { + std::string ext(string_get_extension(std::string(pFilename))); + + if (ext.length() == 0) + return false; + + const char *pExt = ext.c_str(); + + if (strcasecmp(pExt, "png") == 0) + return load_png(pFilename, img); + if (strcasecmp(pExt, "tga") == 0) + return load_tga(pFilename, img); + if (strcasecmp(pExt, "qoi") == 0) + return load_qoi(pFilename, img); + if ( (strcasecmp(pExt, "jpg") == 0) || (strcasecmp(pExt, "jfif") == 0) || (strcasecmp(pExt, "jpeg") == 0) ) + return load_jpg(pFilename, img); + + return false; + } + + static void convert_ldr_to_hdr_image(imagef &img, const image &ldr_img, bool ldr_srgb_to_linear, float linear_nit_multiplier = 1.0f, float ldr_black_bias = 0.0f) + { + img.resize(ldr_img.get_width(), ldr_img.get_height()); + + for (uint32_t y = 0; y < ldr_img.get_height(); y++) + { + for (uint32_t x = 0; x < ldr_img.get_width(); x++) + { + const color_rgba& c = ldr_img(x, y); + + vec4F& d = img(x, y); + if (ldr_srgb_to_linear) + { + float r = (float)c[0]; + float g = (float)c[1]; + float b = (float)c[2]; + + if (ldr_black_bias > 0.0f) + { + // ASTC HDR is noticeably weaker dealing with blocks containing some pixels with components set to 0. + // Add a very slight bias less than .5 to avoid this difficulity. When the HDR image is mapped to SDR sRGB and rounded back to 8-bits, this bias will still result in zero. + // (FWIW, in reality, a physical monitor would be unlikely to have a perfectly zero black level.) + // This is purely optional and on most images it doesn't matter visually. + if (r == 0.0f) + r = ldr_black_bias; + if (g == 0.0f) + g = ldr_black_bias; + if (b == 0.0f) + b = ldr_black_bias; + } + + // Compute how much linear light would be emitted by a SDR 80-100 nit monitor. + d[0] = srgb_to_linear(r * (1.0f / 255.0f)) * linear_nit_multiplier; + d[1] = srgb_to_linear(g * (1.0f / 255.0f)) * linear_nit_multiplier; + d[2] = srgb_to_linear(b * (1.0f / 255.0f)) * linear_nit_multiplier; + } + else + { + d[0] = c[0] * (1.0f / 255.0f) * linear_nit_multiplier; + d[1] = c[1] * (1.0f / 255.0f) * linear_nit_multiplier; + d[2] = c[2] * (1.0f / 255.0f) * linear_nit_multiplier; + } + d[3] = c[3] * (1.0f / 255.0f); + } + } + } + + bool load_image_hdr(const void* pMem, size_t mem_size, imagef& img, uint32_t width, uint32_t height, hdr_image_type img_type, bool ldr_srgb_to_linear, float linear_nit_multiplier, float ldr_black_bias) + { + if ((!pMem) || (!mem_size)) + { + assert(0); + return false; + } + + switch (img_type) + { + case hdr_image_type::cHITRGBAHalfFloat: + { + if (mem_size != width * height * sizeof(basist::half_float) * 4) + { + assert(0); + return false; + } + + if ((!width) || (!height)) + { + assert(0); + return false; + } + + const basist::half_float* pSrc_image_h = static_cast(pMem); + + img.resize(width, height); + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + const basist::half_float* pSrc_pixel = &pSrc_image_h[x * 4]; + + vec4F& dst = img(x, y); + dst[0] = basist::half_to_float(pSrc_pixel[0]); + dst[1] = basist::half_to_float(pSrc_pixel[1]); + dst[2] = basist::half_to_float(pSrc_pixel[2]); + dst[3] = basist::half_to_float(pSrc_pixel[3]); + } + + pSrc_image_h += (width * 4); + } + + break; + } + case hdr_image_type::cHITRGBAFloat: + { + if (mem_size != width * height * sizeof(float) * 4) + { + assert(0); + return false; + } + + if ((!width) || (!height)) + { + assert(0); + return false; + } + + img.resize(width, height); + memcpy((void *)img.get_ptr(), pMem, width * height * sizeof(float) * 4); + + break; + } + case hdr_image_type::cHITJPGImage: + { + image ldr_img; + if (!load_jpg(static_cast(pMem), mem_size, ldr_img)) + return false; + + convert_ldr_to_hdr_image(img, ldr_img, ldr_srgb_to_linear, linear_nit_multiplier, ldr_black_bias); + break; + } + case hdr_image_type::cHITPNGImage: + { + image ldr_img; + if (!load_png(static_cast(pMem), mem_size, ldr_img)) + return false; + + convert_ldr_to_hdr_image(img, ldr_img, ldr_srgb_to_linear, linear_nit_multiplier, ldr_black_bias); + break; + } + case hdr_image_type::cHITEXRImage: + { + if (!read_exr(pMem, mem_size, img)) + return false; + + break; + } + case hdr_image_type::cHITHDRImage: + { + uint8_vec buf(mem_size); + memcpy(buf.get_ptr(), pMem, mem_size); + + rgbe_header_info hdr; + if (!read_rgbe(buf, img, hdr)) + return false; + + break; + } + default: + assert(0); + return false; + } + + return true; + } + + bool is_image_filename_hdr(const char *pFilename) + { + std::string ext(string_get_extension(std::string(pFilename))); + + if (ext.length() == 0) + return false; + + const char* pExt = ext.c_str(); + + return ((strcasecmp(pExt, "hdr") == 0) || (strcasecmp(pExt, "exr") == 0)); + } + + // TODO: move parameters to struct, add a HDR clean flag to eliminate NaN's/Inf's + bool load_image_hdr(const char* pFilename, imagef& img, bool ldr_srgb_to_linear, float linear_nit_multiplier, float ldr_black_bias) + { + std::string ext(string_get_extension(std::string(pFilename))); + + if (ext.length() == 0) + return false; + + const char* pExt = ext.c_str(); + + if (strcasecmp(pExt, "hdr") == 0) + { + rgbe_header_info rgbe_info; + if (!read_rgbe(pFilename, img, rgbe_info)) + return false; + return true; + } + + if (strcasecmp(pExt, "exr") == 0) + { + int n_chans = 0; + if (!read_exr(pFilename, img, n_chans)) + return false; + return true; + } + + // Try loading image as LDR, then optionally convert to linear light. + { + image ldr_img; + if (!load_image(pFilename, ldr_img)) + return false; + + convert_ldr_to_hdr_image(img, ldr_img, ldr_srgb_to_linear, linear_nit_multiplier, ldr_black_bias); + } + + return true; + } + + bool save_png(const char* pFilename, const image &img, uint32_t image_save_flags, uint32_t grayscale_comp) + { + if (!img.get_total_pixels()) + return false; + + void* pPNG_data = nullptr; + size_t PNG_data_size = 0; + + if (image_save_flags & cImageSaveGrayscale) + { + uint8_vec g_pixels(img.get_total_pixels()); + uint8_t* pDst = &g_pixels[0]; + + for (uint32_t y = 0; y < img.get_height(); y++) + for (uint32_t x = 0; x < img.get_width(); x++) + *pDst++ = img(x, y)[grayscale_comp]; + + pPNG_data = buminiz::tdefl_write_image_to_png_file_in_memory_ex(g_pixels.data(), img.get_width(), img.get_height(), 1, &PNG_data_size, 1, false); + } + else + { + bool has_alpha = false; + + if ((image_save_flags & cImageSaveIgnoreAlpha) == 0) + has_alpha = img.has_alpha(); + + if (!has_alpha) + { + uint8_vec rgb_pixels(img.get_total_pixels() * 3); + uint8_t* pDst = &rgb_pixels[0]; + + for (uint32_t y = 0; y < img.get_height(); y++) + { + const color_rgba* pSrc = &img(0, y); + for (uint32_t x = 0; x < img.get_width(); x++) + { + pDst[0] = pSrc->r; + pDst[1] = pSrc->g; + pDst[2] = pSrc->b; + + pSrc++; + pDst += 3; + } + } + + pPNG_data = buminiz::tdefl_write_image_to_png_file_in_memory_ex(rgb_pixels.data(), img.get_width(), img.get_height(), 3, &PNG_data_size, 1, false); + } + else + { + pPNG_data = buminiz::tdefl_write_image_to_png_file_in_memory_ex(img.get_ptr(), img.get_width(), img.get_height(), 4, &PNG_data_size, 1, false); + } + } + + if (!pPNG_data) + return false; + + bool status = write_data_to_file(pFilename, pPNG_data, PNG_data_size); + if (!status) + { + error_printf("save_png: Failed writing to filename \"%s\"!\n", pFilename); + } + + free(pPNG_data); + + return status; + } + + bool read_file_to_vec(const char* pFilename, uint8_vec& data) + { + FILE* pFile = nullptr; +#ifdef _WIN32 + fopen_s(&pFile, pFilename, "rb"); +#else + pFile = fopen(pFilename, "rb"); +#endif + if (!pFile) + return false; + + fseek(pFile, 0, SEEK_END); +#ifdef _WIN32 + int64_t filesize = _ftelli64(pFile); +#else + int64_t filesize = ftello(pFile); +#endif + if (filesize < 0) + { + fclose(pFile); + return false; + } + fseek(pFile, 0, SEEK_SET); + + if (sizeof(size_t) == sizeof(uint32_t)) + { + if (filesize > 0x70000000) + { + // File might be too big to load safely in one alloc + fclose(pFile); + return false; + } + } + + if (!data.try_resize((size_t)filesize)) + { + fclose(pFile); + return false; + } + + if (filesize) + { + if (fread(&data[0], 1, (size_t)filesize, pFile) != (size_t)filesize) + { + fclose(pFile); + return false; + } + } + + fclose(pFile); + return true; + } + + bool read_file_to_data(const char* pFilename, void *pData, size_t len) + { + assert(pData && len); + if ((!pData) || (!len)) + return false; + + FILE* pFile = nullptr; +#ifdef _WIN32 + fopen_s(&pFile, pFilename, "rb"); +#else + pFile = fopen(pFilename, "rb"); +#endif + if (!pFile) + return false; + + fseek(pFile, 0, SEEK_END); +#ifdef _WIN32 + int64_t filesize = _ftelli64(pFile); +#else + int64_t filesize = ftello(pFile); +#endif + + if ((filesize < 0) || ((size_t)filesize < len)) + { + fclose(pFile); + return false; + } + fseek(pFile, 0, SEEK_SET); + + if (fread(pData, 1, (size_t)len, pFile) != (size_t)len) + { + fclose(pFile); + return false; + } + + fclose(pFile); + return true; + } + + bool write_data_to_file(const char* pFilename, const void* pData, size_t len) + { + FILE* pFile = nullptr; +#ifdef _WIN32 + fopen_s(&pFile, pFilename, "wb"); +#else + pFile = fopen(pFilename, "wb"); +#endif + if (!pFile) + return false; + + if (len) + { + if (fwrite(pData, 1, len, pFile) != len) + { + fclose(pFile); + return false; + } + } + + return fclose(pFile) != EOF; + } + + bool image_resample(const image &src, image &dst, bool srgb, + const char *pFilter, float filter_scale, + bool wrapping, + uint32_t first_comp, uint32_t num_comps) + { + assert((first_comp + num_comps) <= 4); + + const int cMaxComps = 4; + + const uint32_t src_w = src.get_width(), src_h = src.get_height(); + const uint32_t dst_w = dst.get_width(), dst_h = dst.get_height(); + + if (maximum(src_w, src_h) > BASISU_RESAMPLER_MAX_DIMENSION) + { + printf("Image is too large!\n"); + return false; + } + + if (!src_w || !src_h || !dst_w || !dst_h) + return false; + + if ((num_comps < 1) || (num_comps > cMaxComps)) + return false; + + if ((minimum(dst_w, dst_h) < 1) || (maximum(dst_w, dst_h) > BASISU_RESAMPLER_MAX_DIMENSION)) + { + printf("Image is too large!\n"); + return false; + } + + if ((src_w == dst_w) && (src_h == dst_h)) + { + dst = src; + return true; + } + + float srgb_to_linear_table[256]; + if (srgb) + { + for (int i = 0; i < 256; ++i) + srgb_to_linear_table[i] = srgb_to_linear((float)i * (1.0f/255.0f)); + } + + const int LINEAR_TO_SRGB_TABLE_SIZE = 8192; + uint8_t linear_to_srgb_table[LINEAR_TO_SRGB_TABLE_SIZE]; + + if (srgb) + { + for (int i = 0; i < LINEAR_TO_SRGB_TABLE_SIZE; ++i) + linear_to_srgb_table[i] = (uint8_t)clamp((int)(255.0f * linear_to_srgb((float)i * (1.0f / (LINEAR_TO_SRGB_TABLE_SIZE - 1))) + .5f), 0, 255); + } + + std::vector samples[cMaxComps]; + Resampler *resamplers[cMaxComps]; + + resamplers[0] = new Resampler(src_w, src_h, dst_w, dst_h, + wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP, 0.0f, 1.0f, + pFilter, nullptr, nullptr, filter_scale, filter_scale, 0, 0); + samples[0].resize(src_w); + + for (uint32_t i = 1; i < num_comps; ++i) + { + resamplers[i] = new Resampler(src_w, src_h, dst_w, dst_h, + wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP, 0.0f, 1.0f, + pFilter, resamplers[0]->get_clist_x(), resamplers[0]->get_clist_y(), filter_scale, filter_scale, 0, 0); + samples[i].resize(src_w); + } + + uint32_t dst_y = 0; + + for (uint32_t src_y = 0; src_y < src_h; ++src_y) + { + const color_rgba *pSrc = &src(0, src_y); + + // Put source lines into resampler(s) + for (uint32_t x = 0; x < src_w; ++x) + { + for (uint32_t c = 0; c < num_comps; ++c) + { + const uint32_t comp_index = first_comp + c; + const uint32_t v = (*pSrc)[comp_index]; + + if (!srgb || (comp_index == 3)) + samples[c][x] = v * (1.0f / 255.0f); + else + samples[c][x] = srgb_to_linear_table[v]; + } + + pSrc++; + } + + for (uint32_t c = 0; c < num_comps; ++c) + { + if (!resamplers[c]->put_line(&samples[c][0])) + { + for (uint32_t i = 0; i < num_comps; i++) + delete resamplers[i]; + return false; + } + } + + // Now retrieve any output lines + for (;;) + { + uint32_t c; + for (c = 0; c < num_comps; ++c) + { + const uint32_t comp_index = first_comp + c; + + const float *pOutput_samples = resamplers[c]->get_line(); + if (!pOutput_samples) + break; + + const bool linear_flag = !srgb || (comp_index == 3); + + color_rgba *pDst = &dst(0, dst_y); + + for (uint32_t x = 0; x < dst_w; x++) + { + // TODO: Add dithering + if (linear_flag) + { + int j = (int)(255.0f * pOutput_samples[x] + .5f); + (*pDst)[comp_index] = (uint8_t)clamp(j, 0, 255); + } + else + { + int j = (int)((LINEAR_TO_SRGB_TABLE_SIZE - 1) * pOutput_samples[x] + .5f); + (*pDst)[comp_index] = linear_to_srgb_table[clamp(j, 0, LINEAR_TO_SRGB_TABLE_SIZE - 1)]; + } + + pDst++; + } + } + if (c < num_comps) + break; + + ++dst_y; + } + } + + for (uint32_t i = 0; i < num_comps; ++i) + delete resamplers[i]; + + return true; + } + + bool image_resample(const imagef& src, imagef& dst, + const char* pFilter, float filter_scale, + bool wrapping, + uint32_t first_comp, uint32_t num_comps) + { + assert((first_comp + num_comps) <= 4); + + const int cMaxComps = 4; + + const uint32_t src_w = src.get_width(), src_h = src.get_height(); + const uint32_t dst_w = dst.get_width(), dst_h = dst.get_height(); + + if (maximum(src_w, src_h) > BASISU_RESAMPLER_MAX_DIMENSION) + { + printf("Image is too large!\n"); + return false; + } + + if (!src_w || !src_h || !dst_w || !dst_h) + return false; + + if ((num_comps < 1) || (num_comps > cMaxComps)) + return false; + + if ((minimum(dst_w, dst_h) < 1) || (maximum(dst_w, dst_h) > BASISU_RESAMPLER_MAX_DIMENSION)) + { + printf("Image is too large!\n"); + return false; + } + + if ((src_w == dst_w) && (src_h == dst_h) && (filter_scale == 1.0f)) + { + dst = src; + return true; + } + + std::vector samples[cMaxComps]; + Resampler* resamplers[cMaxComps]; + + resamplers[0] = new Resampler(src_w, src_h, dst_w, dst_h, + wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP, 1.0f, 0.0f, // no clamping + pFilter, nullptr, nullptr, filter_scale, filter_scale, 0, 0); + samples[0].resize(src_w); + + for (uint32_t i = 1; i < num_comps; ++i) + { + resamplers[i] = new Resampler(src_w, src_h, dst_w, dst_h, + wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP, 1.0f, 0.0f, // no clamping + pFilter, resamplers[0]->get_clist_x(), resamplers[0]->get_clist_y(), filter_scale, filter_scale, 0, 0); + samples[i].resize(src_w); + } + + uint32_t dst_y = 0; + + for (uint32_t src_y = 0; src_y < src_h; ++src_y) + { + const vec4F* pSrc = &src(0, src_y); + + // Put source lines into resampler(s) + for (uint32_t x = 0; x < src_w; ++x) + { + for (uint32_t c = 0; c < num_comps; ++c) + { + const uint32_t comp_index = first_comp + c; + const float v = (*pSrc)[comp_index]; + + samples[c][x] = v; + } + + pSrc++; + } + + for (uint32_t c = 0; c < num_comps; ++c) + { + if (!resamplers[c]->put_line(&samples[c][0])) + { + for (uint32_t i = 0; i < num_comps; i++) + delete resamplers[i]; + return false; + } + } + + // Now retrieve any output lines + for (;;) + { + uint32_t c; + for (c = 0; c < num_comps; ++c) + { + const uint32_t comp_index = first_comp + c; + + const float* pOutput_samples = resamplers[c]->get_line(); + if (!pOutput_samples) + break; + + vec4F* pDst = &dst(0, dst_y); + + for (uint32_t x = 0; x < dst_w; x++) + { + (*pDst)[comp_index] = pOutput_samples[x]; + pDst++; + } + } + if (c < num_comps) + break; + + ++dst_y; + } + } + + for (uint32_t i = 0; i < num_comps; ++i) + delete resamplers[i]; + + return true; + } + + void canonical_huffman_calculate_minimum_redundancy(sym_freq *A, int num_syms) + { + // See the paper "In-Place Calculation of Minimum Redundancy Codes" by Moffat and Katajainen + if (!num_syms) + return; + + if (1 == num_syms) + { + A[0].m_key = 1; + return; + } + + A[0].m_key += A[1].m_key; + + int s = 2, r = 0, next; + for (next = 1; next < (num_syms - 1); ++next) + { + if ((s >= num_syms) || (A[r].m_key < A[s].m_key)) + { + A[next].m_key = A[r].m_key; + A[r].m_key = next; + ++r; + } + else + { + A[next].m_key = A[s].m_key; + ++s; + } + + if ((s >= num_syms) || ((r < next) && A[r].m_key < A[s].m_key)) + { + A[next].m_key = A[next].m_key + A[r].m_key; + A[r].m_key = next; + ++r; + } + else + { + A[next].m_key = A[next].m_key + A[s].m_key; + ++s; + } + } + A[num_syms - 2].m_key = 0; + + for (next = num_syms - 3; next >= 0; --next) + { + A[next].m_key = 1 + A[A[next].m_key].m_key; + } + + int num_avail = 1, num_used = 0, depth = 0; + r = num_syms - 2; + next = num_syms - 1; + while (num_avail > 0) + { + for ( ; (r >= 0) && ((int)A[r].m_key == depth); ++num_used, --r ) + ; + + for ( ; num_avail > num_used; --next, --num_avail) + A[next].m_key = depth; + + num_avail = 2 * num_used; + num_used = 0; + ++depth; + } + } + + void canonical_huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size) + { + int i; + uint32_t total = 0; + if (code_list_len <= 1) + return; + + for (i = max_code_size + 1; i <= cHuffmanMaxSupportedInternalCodeSize; i++) + pNum_codes[max_code_size] += pNum_codes[i]; + + for (i = max_code_size; i > 0; i--) + total += (((uint32_t)pNum_codes[i]) << (max_code_size - i)); + + while (total != (1UL << max_code_size)) + { + pNum_codes[max_code_size]--; + for (i = max_code_size - 1; i > 0; i--) + { + if (pNum_codes[i]) + { + pNum_codes[i]--; + pNum_codes[i + 1] += 2; + break; + } + } + + total--; + } + } + + sym_freq *canonical_huffman_radix_sort_syms(uint32_t num_syms, sym_freq *pSyms0, sym_freq *pSyms1) + { + uint32_t total_passes = 2, pass_shift, pass, i, hist[256 * 2]; + sym_freq *pCur_syms = pSyms0, *pNew_syms = pSyms1; + + clear_obj(hist); + + for (i = 0; i < num_syms; i++) + { + uint32_t freq = pSyms0[i].m_key; + + // We scale all input frequencies to 16-bits. + assert(freq <= UINT16_MAX); + + hist[freq & 0xFF]++; + hist[256 + ((freq >> 8) & 0xFF)]++; + } + + while ((total_passes > 1) && (num_syms == hist[(total_passes - 1) * 256])) + total_passes--; + + for (pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8) + { + const uint32_t *pHist = &hist[pass << 8]; + uint32_t offsets[256], cur_ofs = 0; + for (i = 0; i < 256; i++) + { + offsets[i] = cur_ofs; + cur_ofs += pHist[i]; + } + + for (i = 0; i < num_syms; i++) + pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & 0xFF]++] = pCur_syms[i]; + + sym_freq *t = pCur_syms; + pCur_syms = pNew_syms; + pNew_syms = t; + } + + return pCur_syms; + } + + bool huffman_encoding_table::init(uint32_t num_syms, const uint16_t *pFreq, uint32_t max_code_size) + { + if (max_code_size > cHuffmanMaxSupportedCodeSize) + return false; + if ((!num_syms) || (num_syms > cHuffmanMaxSyms)) + return false; + + uint32_t total_used_syms = 0; + for (uint32_t i = 0; i < num_syms; i++) + if (pFreq[i]) + total_used_syms++; + + if (!total_used_syms) + return false; + + std::vector sym_freq0(total_used_syms), sym_freq1(total_used_syms); + for (uint32_t i = 0, j = 0; i < num_syms; i++) + { + if (pFreq[i]) + { + sym_freq0[j].m_key = pFreq[i]; + sym_freq0[j++].m_sym_index = static_cast(i); + } + } + + sym_freq *pSym_freq = canonical_huffman_radix_sort_syms(total_used_syms, &sym_freq0[0], &sym_freq1[0]); + + canonical_huffman_calculate_minimum_redundancy(pSym_freq, total_used_syms); + + int num_codes[cHuffmanMaxSupportedInternalCodeSize + 1]; + clear_obj(num_codes); + + for (uint32_t i = 0; i < total_used_syms; i++) + { + if (pSym_freq[i].m_key > cHuffmanMaxSupportedInternalCodeSize) + return false; + + num_codes[pSym_freq[i].m_key]++; + } + + canonical_huffman_enforce_max_code_size(num_codes, total_used_syms, max_code_size); + + m_code_sizes.resize(0); + m_code_sizes.resize(num_syms); + + m_codes.resize(0); + m_codes.resize(num_syms); + + for (uint32_t i = 1, j = total_used_syms; i <= max_code_size; i++) + for (uint32_t l = num_codes[i]; l > 0; l--) + m_code_sizes[pSym_freq[--j].m_sym_index] = static_cast(i); + + uint32_t next_code[cHuffmanMaxSupportedInternalCodeSize + 1]; + + next_code[1] = 0; + for (uint32_t j = 0, i = 2; i <= max_code_size; i++) + next_code[i] = j = ((j + num_codes[i - 1]) << 1); + + for (uint32_t i = 0; i < num_syms; i++) + { + uint32_t rev_code = 0, code, code_size; + if ((code_size = m_code_sizes[i]) == 0) + continue; + if (code_size > cHuffmanMaxSupportedInternalCodeSize) + return false; + code = next_code[code_size]++; + for (uint32_t l = code_size; l > 0; l--, code >>= 1) + rev_code = (rev_code << 1) | (code & 1); + m_codes[i] = static_cast(rev_code); + } + + return true; + } + + bool huffman_encoding_table::init(uint32_t num_syms, const uint32_t *pSym_freq, uint32_t max_code_size) + { + if ((!num_syms) || (num_syms > cHuffmanMaxSyms)) + return false; + + uint16_vec sym_freq(num_syms); + + uint32_t max_freq = 0; + for (uint32_t i = 0; i < num_syms; i++) + max_freq = maximum(max_freq, pSym_freq[i]); + + if (max_freq < UINT16_MAX) + { + for (uint32_t i = 0; i < num_syms; i++) + sym_freq[i] = static_cast(pSym_freq[i]); + } + else + { + for (uint32_t i = 0; i < num_syms; i++) + { + if (pSym_freq[i]) + { + uint32_t f = static_cast((static_cast(pSym_freq[i]) * 65534U + (max_freq >> 1)) / max_freq); + sym_freq[i] = static_cast(clamp(f, 1, 65534)); + } + } + } + + return init(num_syms, &sym_freq[0], max_code_size); + } + + void bitwise_coder::end_nonzero_run(uint16_vec &syms, uint32_t &run_size, uint32_t len) + { + if (run_size) + { + if (run_size < cHuffmanSmallRepeatSizeMin) + { + while (run_size--) + syms.push_back(static_cast(len)); + } + else if (run_size <= cHuffmanSmallRepeatSizeMax) + { + syms.push_back(static_cast(cHuffmanSmallRepeatCode | ((run_size - cHuffmanSmallRepeatSizeMin) << 6))); + } + else + { + assert((run_size >= cHuffmanBigRepeatSizeMin) && (run_size <= cHuffmanBigRepeatSizeMax)); + syms.push_back(static_cast(cHuffmanBigRepeatCode | ((run_size - cHuffmanBigRepeatSizeMin) << 6))); + } + } + + run_size = 0; + } + + void bitwise_coder::end_zero_run(uint16_vec &syms, uint32_t &run_size) + { + if (run_size) + { + if (run_size < cHuffmanSmallZeroRunSizeMin) + { + while (run_size--) + syms.push_back(0); + } + else if (run_size <= cHuffmanSmallZeroRunSizeMax) + { + syms.push_back(static_cast(cHuffmanSmallZeroRunCode | ((run_size - cHuffmanSmallZeroRunSizeMin) << 6))); + } + else + { + assert((run_size >= cHuffmanBigZeroRunSizeMin) && (run_size <= cHuffmanBigZeroRunSizeMax)); + syms.push_back(static_cast(cHuffmanBigZeroRunCode | ((run_size - cHuffmanBigZeroRunSizeMin) << 6))); + } + } + + run_size = 0; + } + + uint32_t bitwise_coder::emit_huffman_table(const huffman_encoding_table &tab) + { + const uint64_t start_bits = m_total_bits; + + const uint8_vec &code_sizes = tab.get_code_sizes(); + + uint32_t total_used = tab.get_total_used_codes(); + put_bits(total_used, cHuffmanMaxSymsLog2); + + if (!total_used) + return 0; + + uint16_vec syms; + syms.reserve(total_used + 16); + + uint32_t prev_code_len = UINT_MAX, zero_run_size = 0, nonzero_run_size = 0; + + for (uint32_t i = 0; i <= total_used; ++i) + { + const uint32_t code_len = (i == total_used) ? 0xFF : code_sizes[i]; + assert((code_len == 0xFF) || (code_len <= 16)); + + if (code_len) + { + end_zero_run(syms, zero_run_size); + + if (code_len != prev_code_len) + { + end_nonzero_run(syms, nonzero_run_size, prev_code_len); + if (code_len != 0xFF) + syms.push_back(static_cast(code_len)); + } + else if (++nonzero_run_size == cHuffmanBigRepeatSizeMax) + end_nonzero_run(syms, nonzero_run_size, prev_code_len); + } + else + { + end_nonzero_run(syms, nonzero_run_size, prev_code_len); + + if (++zero_run_size == cHuffmanBigZeroRunSizeMax) + end_zero_run(syms, zero_run_size); + } + + prev_code_len = code_len; + } + + histogram h(cHuffmanTotalCodelengthCodes); + for (uint32_t i = 0; i < syms.size(); i++) + h.inc(syms[i] & 63); + + huffman_encoding_table ct; + if (!ct.init(h, 7)) + return 0; + + assert(cHuffmanTotalSortedCodelengthCodes == cHuffmanTotalCodelengthCodes); + + uint32_t total_codelength_codes; + for (total_codelength_codes = cHuffmanTotalSortedCodelengthCodes; total_codelength_codes > 0; total_codelength_codes--) + if (ct.get_code_sizes()[g_huffman_sorted_codelength_codes[total_codelength_codes - 1]]) + break; + + assert(total_codelength_codes); + + put_bits(total_codelength_codes, 5); + for (uint32_t i = 0; i < total_codelength_codes; i++) + put_bits(ct.get_code_sizes()[g_huffman_sorted_codelength_codes[i]], 3); + + for (uint32_t i = 0; i < syms.size(); ++i) + { + const uint32_t l = syms[i] & 63, e = syms[i] >> 6; + + put_code(l, ct); + + if (l == cHuffmanSmallZeroRunCode) + put_bits(e, cHuffmanSmallZeroRunExtraBits); + else if (l == cHuffmanBigZeroRunCode) + put_bits(e, cHuffmanBigZeroRunExtraBits); + else if (l == cHuffmanSmallRepeatCode) + put_bits(e, cHuffmanSmallRepeatExtraBits); + else if (l == cHuffmanBigRepeatCode) + put_bits(e, cHuffmanBigRepeatExtraBits); + } + + return (uint32_t)(m_total_bits - start_bits); + } + + bool huffman_test(int rand_seed) + { + histogram h(19); + + // Feed in a fibonacci sequence to force large codesizes + h[0] += 1; h[1] += 1; h[2] += 2; h[3] += 3; + h[4] += 5; h[5] += 8; h[6] += 13; h[7] += 21; + h[8] += 34; h[9] += 55; h[10] += 89; h[11] += 144; + h[12] += 233; h[13] += 377; h[14] += 610; h[15] += 987; + h[16] += 1597; h[17] += 2584; h[18] += 4181; + + huffman_encoding_table etab; + etab.init(h, 16); + + { + bitwise_coder c; + c.init(1024); + + c.emit_huffman_table(etab); + for (int i = 0; i < 19; i++) + c.put_code(i, etab); + + c.flush(); + + basist::bitwise_decoder d; + d.init(&c.get_bytes()[0], static_cast(c.get_bytes().size())); + + basist::huffman_decoding_table dtab; + bool success = d.read_huffman_table(dtab); + if (!success) + { + assert(0); + printf("Failure 5\n"); + return false; + } + + for (uint32_t i = 0; i < 19; i++) + { + uint32_t s = d.decode_huffman(dtab); + if (s != i) + { + assert(0); + printf("Failure 5\n"); + return false; + } + } + } + + basisu::rand r; + r.seed(rand_seed); + + for (int iter = 0; iter < 500000; iter++) + { + printf("%u\n", iter); + + uint32_t max_sym = r.irand(0, 8193); + uint32_t num_codes = r.irand(1, 10000); + uint_vec syms(num_codes); + + for (uint32_t i = 0; i < num_codes; i++) + { + if (r.bit()) + syms[i] = r.irand(0, max_sym); + else + { + int s = (int)(r.gaussian((float)max_sym / 2, (float)maximum(1, max_sym / 2)) + .5f); + s = basisu::clamp(s, 0, max_sym); + + syms[i] = s; + } + + } + + histogram h1(max_sym + 1); + for (uint32_t i = 0; i < num_codes; i++) + h1[syms[i]]++; + + huffman_encoding_table etab2; + if (!etab2.init(h1, 16)) + { + assert(0); + printf("Failed 0\n"); + return false; + } + + bitwise_coder c; + c.init(1024); + + c.emit_huffman_table(etab2); + + for (uint32_t i = 0; i < num_codes; i++) + c.put_code(syms[i], etab2); + + c.flush(); + + basist::bitwise_decoder d; + d.init(&c.get_bytes()[0], (uint32_t)c.get_bytes().size()); + + basist::huffman_decoding_table dtab; + bool success = d.read_huffman_table(dtab); + if (!success) + { + assert(0); + printf("Failed 2\n"); + return false; + } + + for (uint32_t i = 0; i < num_codes; i++) + { + uint32_t s = d.decode_huffman(dtab); + if (s != syms[i]) + { + assert(0); + printf("Failed 4\n"); + return false; + } + } + + } + return true; + } + + void palette_index_reorderer::init(uint32_t num_indices, const uint32_t *pIndices, uint32_t num_syms, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight) + { + assert((num_syms > 0) && (num_indices > 0)); + assert((dist_func_weight >= 0.0f) && (dist_func_weight <= 1.0f)); + + clear(); + + m_remap_table.resize(num_syms); + m_entries_picked.reserve(num_syms); + m_total_count_to_picked.resize(num_syms); + + if (num_indices <= 1) + return; + + prepare_hist(num_syms, num_indices, pIndices); + find_initial(num_syms); + + while (m_entries_to_do.size()) + { + // Find the best entry to move into the picked list. + uint32_t best_entry; + double best_count; + find_next_entry(best_entry, best_count, pDist_func, pCtx, dist_func_weight); + + // We now have chosen an entry to place in the picked list, now determine which side it goes on. + const uint32_t entry_to_move = m_entries_to_do[best_entry]; + + float side = pick_side(num_syms, entry_to_move, pDist_func, pCtx, dist_func_weight); + + // Put entry_to_move either on the "left" or "right" side of the picked entries + if (side <= 0) + m_entries_picked.push_back(entry_to_move); + else + m_entries_picked.insert(m_entries_picked.begin(), entry_to_move); + + // Erase best_entry from the todo list + m_entries_to_do.erase(m_entries_to_do.begin() + best_entry); + + // We've just moved best_entry to the picked list, so now we need to update m_total_count_to_picked[] to factor the additional count to best_entry + for (uint32_t i = 0; i < m_entries_to_do.size(); i++) + m_total_count_to_picked[m_entries_to_do[i]] += get_hist(m_entries_to_do[i], entry_to_move, num_syms); + } + + for (uint32_t i = 0; i < num_syms; i++) + m_remap_table[m_entries_picked[i]] = i; + } + + void palette_index_reorderer::prepare_hist(uint32_t num_syms, uint32_t num_indices, const uint32_t *pIndices) + { + m_hist.resize(0); + m_hist.resize(num_syms * num_syms); + + for (uint32_t i = 0; i < num_indices; i++) + { + const uint32_t idx = pIndices[i]; + inc_hist(idx, (i < (num_indices - 1)) ? pIndices[i + 1] : -1, num_syms); + inc_hist(idx, (i > 0) ? pIndices[i - 1] : -1, num_syms); + } + } + + void palette_index_reorderer::find_initial(uint32_t num_syms) + { + uint32_t max_count = 0, max_index = 0; + for (uint32_t i = 0; i < num_syms * num_syms; i++) + if (m_hist[i] > max_count) + max_count = m_hist[i], max_index = i; + + uint32_t a = max_index / num_syms, b = max_index % num_syms; + + const size_t ofs = m_entries_picked.size(); + + m_entries_picked.push_back(a); + m_entries_picked.push_back(b); + + for (uint32_t i = 0; i < num_syms; i++) + if ((i != m_entries_picked[ofs + 1]) && (i != m_entries_picked[ofs])) + m_entries_to_do.push_back(i); + + for (uint32_t i = 0; i < m_entries_to_do.size(); i++) + for (uint32_t j = 0; j < m_entries_picked.size(); j++) + m_total_count_to_picked[m_entries_to_do[i]] += get_hist(m_entries_to_do[i], m_entries_picked[j], num_syms); + } + + void palette_index_reorderer::find_next_entry(uint32_t &best_entry, double &best_count, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight) + { + best_entry = 0; + best_count = 0; + + for (uint32_t i = 0; i < m_entries_to_do.size(); i++) + { + const uint32_t u = m_entries_to_do[i]; + double total_count = m_total_count_to_picked[u]; + + if (pDist_func) + { + float w = maximum((*pDist_func)(u, m_entries_picked.front(), pCtx), (*pDist_func)(u, m_entries_picked.back(), pCtx)); + assert((w >= 0.0f) && (w <= 1.0f)); + total_count = (total_count + 1.0f) * lerp(1.0f - dist_func_weight, 1.0f + dist_func_weight, w); + } + + if (total_count <= best_count) + continue; + + best_entry = i; + best_count = total_count; + } + } + + float palette_index_reorderer::pick_side(uint32_t num_syms, uint32_t entry_to_move, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight) + { + float which_side = 0; + + int l_count = 0, r_count = 0; + for (uint32_t j = 0; j < m_entries_picked.size(); j++) + { + const int count = get_hist(entry_to_move, m_entries_picked[j], num_syms), r = ((int)m_entries_picked.size() + 1 - 2 * (j + 1)); + which_side += static_cast(r * count); + if (r >= 0) + l_count += r * count; + else + r_count += -r * count; + } + + if (pDist_func) + { + float w_left = lerp(1.0f - dist_func_weight, 1.0f + dist_func_weight, (*pDist_func)(entry_to_move, m_entries_picked.front(), pCtx)); + float w_right = lerp(1.0f - dist_func_weight, 1.0f + dist_func_weight, (*pDist_func)(entry_to_move, m_entries_picked.back(), pCtx)); + which_side = w_left * l_count - w_right * r_count; + } + return which_side; + } + + void image_metrics::calc(const imagef& a, const imagef& b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error, bool log) + { + assert((first_chan < 4U) && (first_chan + total_chans <= 4U)); + + const uint32_t width = basisu::minimum(a.get_width(), b.get_width()); + const uint32_t height = basisu::minimum(a.get_height(), b.get_height()); + + double max_e = -1e+30f; + double sum = 0.0f, sum_sqr = 0.0f; + + m_has_neg = false; + m_any_abnormal = false; + m_hf_mag_overflow = false; + + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + const vec4F& ca = a(x, y), &cb = b(x, y); + + if (total_chans) + { + for (uint32_t c = 0; c < total_chans; c++) + { + float fa = ca[first_chan + c], fb = cb[first_chan + c]; + + if ((fabs(fa) > basist::MAX_HALF_FLOAT) || (fabs(fb) > basist::MAX_HALF_FLOAT)) + m_hf_mag_overflow = true; + + if ((fa < 0.0f) || (fb < 0.0f)) + m_has_neg = true; + + if (std::isinf(fa) || std::isinf(fb) || std::isnan(fa) || std::isnan(fb)) + m_any_abnormal = true; + + const double delta = fabs(fa - fb); + max_e = basisu::maximum(max_e, delta); + + if (log) + { + double log2_delta = log2f(basisu::maximum(0.0f, fa) + 1.0f) - log2f(basisu::maximum(0.0f, fb) + 1.0f); + + sum += fabs(log2_delta); + sum_sqr += log2_delta * log2_delta; + } + else + { + sum += fabs(delta); + sum_sqr += delta * delta; + } + } + } + else + { + for (uint32_t c = 0; c < 3; c++) + { + float fa = ca[c], fb = cb[c]; + + if ((fabs(fa) > basist::MAX_HALF_FLOAT) || (fabs(fb) > basist::MAX_HALF_FLOAT)) + m_hf_mag_overflow = true; + + if ((fa < 0.0f) || (fb < 0.0f)) + m_has_neg = true; + + if (std::isinf(fa) || std::isinf(fb) || std::isnan(fa) || std::isnan(fb)) + m_any_abnormal = true; + } + + double ca_l = get_luminance(ca), cb_l = get_luminance(cb); + + double delta = fabs(ca_l - cb_l); + max_e = basisu::maximum(max_e, delta); + + if (log) + { + double log2_delta = log2(basisu::maximum(0.0f, ca_l) + 1.0f) - log2(basisu::maximum(0.0f, cb_l) + 1.0f); + + sum += fabs(log2_delta); + sum_sqr += log2_delta * log2_delta; + } + else + { + sum += delta; + sum_sqr += delta * delta; + } + } + } + } + + m_max = (double)(max_e); + + double total_values = (double)width * (double)height; + if (avg_comp_error) + total_values *= (double)clamp(total_chans, 1, 4); + + m_mean = (float)(sum / total_values); + m_mean_squared = (float)(sum_sqr / total_values); + m_rms = (float)sqrt(sum_sqr / total_values); + + const double max_val = 1.0f; + m_psnr = m_rms ? (float)clamp(log10(max_val / m_rms) * 20.0f, 0.0f, 1000.0f) : 1000.0f; + } + + void image_metrics::calc_half(const imagef& a, const imagef& b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error) + { + assert(total_chans); + assert((first_chan < 4U) && (first_chan + total_chans <= 4U)); + + const uint32_t width = basisu::minimum(a.get_width(), b.get_width()); + const uint32_t height = basisu::minimum(a.get_height(), b.get_height()); + + m_has_neg = false; + m_hf_mag_overflow = false; + m_any_abnormal = false; + + uint_vec hist(65536); + + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + const vec4F& ca = a(x, y), &cb = b(x, y); + + for (uint32_t i = 0; i < 4; i++) + { + if ((ca[i] < 0.0f) || (cb[i] < 0.0f)) + m_has_neg = true; + + if ((fabs(ca[i]) > basist::MAX_HALF_FLOAT) || (fabs(cb[i]) > basist::MAX_HALF_FLOAT)) + m_hf_mag_overflow = true; + + if (std::isnan(ca[i]) || std::isnan(cb[i]) || std::isinf(ca[i]) || std::isinf(cb[i])) + m_any_abnormal = true; + } + + int cah[4] = { basist::float_to_half(ca[0]), basist::float_to_half(ca[1]), basist::float_to_half(ca[2]), basist::float_to_half(ca[3]) }; + int cbh[4] = { basist::float_to_half(cb[0]), basist::float_to_half(cb[1]), basist::float_to_half(cb[2]), basist::float_to_half(cb[3]) }; + + for (uint32_t c = 0; c < total_chans; c++) + hist[iabs(cah[first_chan + c] - cbh[first_chan + c]) & 65535]++; + + } // x + } // y + + m_max = 0; + double sum = 0.0f, sum2 = 0.0f; + for (uint32_t i = 0; i < 65536; i++) + { + if (hist[i]) + { + m_max = basisu::maximum(m_max, (double)i); + double v = (double)i * (double)hist[i]; + sum += v; + sum2 += (double)i * v; + } + } + + double total_values = (double)width * (double)height; + if (avg_comp_error) + total_values *= (double)clamp(total_chans, 1, 4); + + const float max_val = 65535.0f; + m_mean = (float)clamp(sum / total_values, 0.0f, max_val); + m_mean_squared = (float)clamp(sum2 / total_values, 0.0f, max_val * max_val); + m_rms = (float)sqrt(m_mean_squared); + m_psnr = m_rms ? (float)clamp(log10(max_val / m_rms) * 20.0f, 0.0f, 1000.0f) : 1000.0f; + } + + // Alt. variant, same as calc_half(), for validation. + void image_metrics::calc_half2(const imagef& a, const imagef& b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error) + { + assert(total_chans); + assert((first_chan < 4U) && (first_chan + total_chans <= 4U)); + + const uint32_t width = basisu::minimum(a.get_width(), b.get_width()); + const uint32_t height = basisu::minimum(a.get_height(), b.get_height()); + + m_has_neg = false; + m_hf_mag_overflow = false; + m_any_abnormal = false; + + double sum = 0.0f, sum2 = 0.0f; + m_max = 0; + + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + const vec4F& ca = a(x, y), & cb = b(x, y); + + for (uint32_t i = 0; i < 4; i++) + { + if ((ca[i] < 0.0f) || (cb[i] < 0.0f)) + m_has_neg = true; + + if ((fabs(ca[i]) > basist::MAX_HALF_FLOAT) || (fabs(cb[i]) > basist::MAX_HALF_FLOAT)) + m_hf_mag_overflow = true; + + if (std::isnan(ca[i]) || std::isnan(cb[i]) || std::isinf(ca[i]) || std::isinf(cb[i])) + m_any_abnormal = true; + } + + int cah[4] = { basist::float_to_half(ca[0]), basist::float_to_half(ca[1]), basist::float_to_half(ca[2]), basist::float_to_half(ca[3]) }; + int cbh[4] = { basist::float_to_half(cb[0]), basist::float_to_half(cb[1]), basist::float_to_half(cb[2]), basist::float_to_half(cb[3]) }; + + for (uint32_t c = 0; c < total_chans; c++) + { + int diff = iabs(cah[first_chan + c] - cbh[first_chan + c]); + if (diff) + m_max = std::max(m_max, (double)diff); + + sum += diff; + sum2 += squarei(cah[first_chan + c] - cbh[first_chan + c]); + } + + } // x + } // y + + double total_values = (double)width * (double)height; + if (avg_comp_error) + total_values *= (double)clamp(total_chans, 1, 4); + + const float max_val = 65535.0f; + m_mean = (float)clamp(sum / total_values, 0.0f, max_val); + m_mean_squared = (float)clamp(sum2 / total_values, 0.0f, max_val * max_val); + m_rms = (float)sqrt(m_mean_squared); + m_psnr = m_rms ? (float)clamp(log10(max_val / m_rms) * 20.0f, 0.0f, 1000.0f) : 1000.0f; + } + + void image_metrics::calc(const image &a, const image &b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error, bool use_601_luma) + { + assert((first_chan < 4U) && (first_chan + total_chans <= 4U)); + + const uint32_t width = basisu::minimum(a.get_width(), b.get_width()); + const uint32_t height = basisu::minimum(a.get_height(), b.get_height()); + + double hist[256]; + clear_obj(hist); + + m_has_neg = false; + m_any_abnormal = false; + m_hf_mag_overflow = false; + + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + const color_rgba &ca = a(x, y), &cb = b(x, y); + + if (total_chans) + { + for (uint32_t c = 0; c < total_chans; c++) + hist[iabs(ca[first_chan + c] - cb[first_chan + c])]++; + } + else + { + if (use_601_luma) + hist[iabs(ca.get_601_luma() - cb.get_601_luma())]++; + else + hist[iabs(ca.get_709_luma() - cb.get_709_luma())]++; + } + } + } + + m_max = 0; + double sum = 0.0f, sum2 = 0.0f; + for (uint32_t i = 0; i < 256; i++) + { + if (hist[i]) + { + m_max = basisu::maximum(m_max, (double)i); + double v = i * hist[i]; + sum += v; + sum2 += i * v; + } + } + + double total_values = (double)width * (double)height; + if (avg_comp_error) + total_values *= (double)clamp(total_chans, 1, 4); + + m_mean = (float)clamp(sum / total_values, 0.0f, 255.0); + m_mean_squared = (float)clamp(sum2 / total_values, 0.0f, 255.0f * 255.0f); + m_rms = (float)sqrt(m_mean_squared); + m_psnr = m_rms ? (float)clamp(log10(255.0 / m_rms) * 20.0f, 0.0f, 100.0f) : 100.0f; + } + + void print_image_metrics(const image& a, const image& b) + { + image_metrics im; + im.calc(a, b, 0, 3); + im.print("RGB "); + + im.calc(a, b, 0, 4); + im.print("RGBA "); + + im.calc(a, b, 0, 1); + im.print("R "); + + im.calc(a, b, 1, 1); + im.print("G "); + + im.calc(a, b, 2, 1); + im.print("B "); + + im.calc(a, b, 3, 1); + im.print("A "); + + im.calc(a, b, 0, 0); + im.print("Y 709 "); + + im.calc(a, b, 0, 0, true, true); + im.print("Y 601 "); + } + + void fill_buffer_with_random_bytes(void *pBuf, size_t size, uint32_t seed) + { + rand r(seed); + + uint8_t *pDst = static_cast(pBuf); + + while (size >= sizeof(uint32_t)) + { + *(uint32_t *)pDst = r.urand32(); + pDst += sizeof(uint32_t); + size -= sizeof(uint32_t); + } + + while (size) + { + *pDst++ = r.byte(); + size--; + } + } + + uint32_t hash_hsieh(const uint8_t *pBuf, size_t len) + { + if (!pBuf || !len) + return 0; + + uint32_t h = static_cast(len); + + const uint32_t bytes_left = len & 3; + len >>= 2; + + while (len--) + { + const uint16_t *pWords = reinterpret_cast(pBuf); + + h += pWords[0]; + + const uint32_t t = (pWords[1] << 11) ^ h; + h = (h << 16) ^ t; + + pBuf += sizeof(uint32_t); + + h += h >> 11; + } + + switch (bytes_left) + { + case 1: + h += *reinterpret_cast(pBuf); + h ^= h << 10; + h += h >> 1; + break; + case 2: + h += *reinterpret_cast(pBuf); + h ^= h << 11; + h += h >> 17; + break; + case 3: + h += *reinterpret_cast(pBuf); + h ^= h << 16; + h ^= (static_cast(pBuf[sizeof(uint16_t)])) << 18; + h += h >> 11; + break; + default: + break; + } + + h ^= h << 3; + h += h >> 5; + h ^= h << 4; + h += h >> 17; + h ^= h << 25; + h += h >> 6; + + return h; + } + + job_pool::job_pool(uint32_t num_threads) : + m_num_active_jobs(0) + { + m_kill_flag.store(false); + m_num_active_workers.store(0); + + assert(num_threads >= 1U); + + debug_printf("job_pool::job_pool: %u total threads\n", num_threads); + + if (num_threads > 1) + { + m_threads.resize(num_threads - 1); + + for (int i = 0; i < ((int)num_threads - 1); i++) + m_threads[i] = std::thread([this, i] { job_thread(i); }); + } + } + + job_pool::~job_pool() + { + debug_printf("job_pool::~job_pool\n"); + + // Notify all workers that they need to die right now. + { + std::lock_guard lk(m_mutex); + m_kill_flag.store(true); + } + + m_has_work.notify_all(); + +#ifdef __EMSCRIPTEN__ + for ( ; ; ) + { + if (m_num_active_workers.load() <= 0) + break; + std::this_thread::sleep_for(std::chrono::milliseconds(50)); + } + + // At this point all worker threads should be exiting or exited. + // We could call detach(), but this seems to just call join() anyway. +#endif + + // Wait for all worker threads to exit. + for (uint32_t i = 0; i < m_threads.size(); i++) + m_threads[i].join(); + } + + void job_pool::add_job(const std::function& job) + { + std::unique_lock lock(m_mutex); + + m_queue.emplace_back(job); + + const size_t queue_size = m_queue.size(); + + lock.unlock(); + + if (queue_size > 1) + m_has_work.notify_one(); + } + + void job_pool::add_job(std::function&& job) + { + std::unique_lock lock(m_mutex); + + m_queue.emplace_back(std::move(job)); + + const size_t queue_size = m_queue.size(); + + lock.unlock(); + + if (queue_size > 1) + { + m_has_work.notify_one(); + } + } + + void job_pool::wait_for_all() + { + std::unique_lock lock(m_mutex); + + // Drain the job queue on the calling thread. + while (!m_queue.empty()) + { + std::function job(m_queue.back()); + m_queue.pop_back(); + + lock.unlock(); + + job(); + + lock.lock(); + } + + // The queue is empty, now wait for all active jobs to finish up. +#ifndef __EMSCRIPTEN__ + m_no_more_jobs.wait(lock, [this]{ return !m_num_active_jobs; } ); +#else + // Avoid infinite blocking + for (; ; ) + { + if (m_no_more_jobs.wait_for(lock, std::chrono::milliseconds(50), [this] { return !m_num_active_jobs; })) + { + break; + } + } +#endif + } + + void job_pool::job_thread(uint32_t index) + { + BASISU_NOTE_UNUSED(index); + //debug_printf("job_pool::job_thread: starting %u\n", index); + + m_num_active_workers.fetch_add(1); + + while (!m_kill_flag) + { + std::unique_lock lock(m_mutex); + + // Wait for any jobs to be issued. +#if 0 + m_has_work.wait(lock, [this] { return m_kill_flag || m_queue.size(); } ); +#else + // For more safety vs. buggy RTL's. Worse case we stall for a second vs. locking up forever if something goes wrong. + m_has_work.wait_for(lock, std::chrono::milliseconds(1000), [this] { + return m_kill_flag || !m_queue.empty(); + }); +#endif + + // Check to see if we're supposed to exit. + if (m_kill_flag) + break; + + if (m_queue.empty()) + continue; + + // Get the job and execute it. + std::function job(m_queue.back()); + m_queue.pop_back(); + + ++m_num_active_jobs; + + lock.unlock(); + + job(); + + lock.lock(); + + --m_num_active_jobs; + + // Now check if there are no more jobs remaining. + const bool all_done = m_queue.empty() && !m_num_active_jobs; + + lock.unlock(); + + if (all_done) + m_no_more_jobs.notify_all(); + } + + m_num_active_workers.fetch_add(-1); + + //debug_printf("job_pool::job_thread: exiting\n"); + } + + // .TGA image loading + #pragma pack(push) + #pragma pack(1) + struct tga_header + { + uint8_t m_id_len; + uint8_t m_cmap; + uint8_t m_type; + packed_uint<2> m_cmap_first; + packed_uint<2> m_cmap_len; + uint8_t m_cmap_bpp; + packed_uint<2> m_x_org; + packed_uint<2> m_y_org; + packed_uint<2> m_width; + packed_uint<2> m_height; + uint8_t m_depth; + uint8_t m_desc; + }; + #pragma pack(pop) + + const uint32_t MAX_TGA_IMAGE_SIZE = 16384; + + enum tga_image_type + { + cITPalettized = 1, + cITRGB = 2, + cITGrayscale = 3 + }; + + uint8_t *read_tga(const uint8_t *pBuf, uint32_t buf_size, int &width, int &height, int &n_chans) + { + width = 0; + height = 0; + n_chans = 0; + + if (buf_size <= sizeof(tga_header)) + return nullptr; + + const tga_header &hdr = *reinterpret_cast(pBuf); + + if ((!hdr.m_width) || (!hdr.m_height) || (hdr.m_width > MAX_TGA_IMAGE_SIZE) || (hdr.m_height > MAX_TGA_IMAGE_SIZE)) + return nullptr; + + if (hdr.m_desc >> 6) + return nullptr; + + // Simple validation + if ((hdr.m_cmap != 0) && (hdr.m_cmap != 1)) + return nullptr; + + if (hdr.m_cmap) + { + if ((hdr.m_cmap_bpp == 0) || (hdr.m_cmap_bpp > 32)) + return nullptr; + + // Nobody implements CMapFirst correctly, so we're not supporting it. Never seen it used, either. + if (hdr.m_cmap_first != 0) + return nullptr; + } + + const bool x_flipped = (hdr.m_desc & 0x10) != 0; + const bool y_flipped = (hdr.m_desc & 0x20) == 0; + + bool rle_flag = false; + int file_image_type = hdr.m_type; + if (file_image_type > 8) + { + file_image_type -= 8; + rle_flag = true; + } + + const tga_image_type image_type = static_cast(file_image_type); + + switch (file_image_type) + { + case cITRGB: + if (hdr.m_depth == 8) + return nullptr; + break; + case cITPalettized: + if ((hdr.m_depth != 8) || (hdr.m_cmap != 1) || (hdr.m_cmap_len == 0)) + return nullptr; + break; + case cITGrayscale: + if ((hdr.m_cmap != 0) || (hdr.m_cmap_len != 0)) + return nullptr; + if ((hdr.m_depth != 8) && (hdr.m_depth != 16)) + return nullptr; + break; + default: + return nullptr; + } + + uint32_t tga_bytes_per_pixel = 0; + + switch (hdr.m_depth) + { + case 32: + tga_bytes_per_pixel = 4; + n_chans = 4; + break; + case 24: + tga_bytes_per_pixel = 3; + n_chans = 3; + break; + case 16: + case 15: + tga_bytes_per_pixel = 2; + // For compatibility with stb_image_write.h + n_chans = ((file_image_type == cITGrayscale) && (hdr.m_depth == 16)) ? 4 : 3; + break; + case 8: + tga_bytes_per_pixel = 1; + // For palettized RGBA support, which both FreeImage and stb_image support. + n_chans = ((file_image_type == cITPalettized) && (hdr.m_cmap_bpp == 32)) ? 4 : 3; + break; + default: + return nullptr; + } + + //const uint32_t bytes_per_line = hdr.m_width * tga_bytes_per_pixel; + + const uint8_t *pSrc = pBuf + sizeof(tga_header); + uint32_t bytes_remaining = buf_size - sizeof(tga_header); + + if (hdr.m_id_len) + { + if (bytes_remaining < hdr.m_id_len) + return nullptr; + pSrc += hdr.m_id_len; + bytes_remaining += hdr.m_id_len; + } + + color_rgba pal[256]; + for (uint32_t i = 0; i < 256; i++) + pal[i].set(0, 0, 0, 255); + + if ((hdr.m_cmap) && (hdr.m_cmap_len)) + { + if (image_type == cITPalettized) + { + // Note I cannot find any files using 32bpp palettes in the wild (never seen any in ~30 years). + if ( ((hdr.m_cmap_bpp != 32) && (hdr.m_cmap_bpp != 24) && (hdr.m_cmap_bpp != 15) && (hdr.m_cmap_bpp != 16)) || (hdr.m_cmap_len > 256) ) + return nullptr; + + if (hdr.m_cmap_bpp == 32) + { + const uint32_t pal_size = hdr.m_cmap_len * 4; + if (bytes_remaining < pal_size) + return nullptr; + + for (uint32_t i = 0; i < hdr.m_cmap_len; i++) + { + pal[i].r = pSrc[i * 4 + 2]; + pal[i].g = pSrc[i * 4 + 1]; + pal[i].b = pSrc[i * 4 + 0]; + pal[i].a = pSrc[i * 4 + 3]; + } + + bytes_remaining -= pal_size; + pSrc += pal_size; + } + else if (hdr.m_cmap_bpp == 24) + { + const uint32_t pal_size = hdr.m_cmap_len * 3; + if (bytes_remaining < pal_size) + return nullptr; + + for (uint32_t i = 0; i < hdr.m_cmap_len; i++) + { + pal[i].r = pSrc[i * 3 + 2]; + pal[i].g = pSrc[i * 3 + 1]; + pal[i].b = pSrc[i * 3 + 0]; + pal[i].a = 255; + } + + bytes_remaining -= pal_size; + pSrc += pal_size; + } + else + { + const uint32_t pal_size = hdr.m_cmap_len * 2; + if (bytes_remaining < pal_size) + return nullptr; + + for (uint32_t i = 0; i < hdr.m_cmap_len; i++) + { + const uint32_t v = pSrc[i * 2 + 0] | (pSrc[i * 2 + 1] << 8); + + pal[i].r = (((v >> 10) & 31) * 255 + 15) / 31; + pal[i].g = (((v >> 5) & 31) * 255 + 15) / 31; + pal[i].b = ((v & 31) * 255 + 15) / 31; + pal[i].a = 255; + } + + bytes_remaining -= pal_size; + pSrc += pal_size; + } + } + else + { + const uint32_t bytes_to_skip = (hdr.m_cmap_bpp >> 3) * hdr.m_cmap_len; + if (bytes_remaining < bytes_to_skip) + return nullptr; + pSrc += bytes_to_skip; + bytes_remaining += bytes_to_skip; + } + } + + width = hdr.m_width; + height = hdr.m_height; + + const uint32_t source_pitch = width * tga_bytes_per_pixel; + const uint32_t dest_pitch = width * n_chans; + + uint8_t *pImage = (uint8_t *)malloc(dest_pitch * height); + if (!pImage) + return nullptr; + + std::vector input_line_buf; + if (rle_flag) + input_line_buf.resize(source_pitch); + + int run_type = 0, run_remaining = 0; + uint8_t run_pixel[4]; + memset(run_pixel, 0, sizeof(run_pixel)); + + for (int y = 0; y < height; y++) + { + const uint8_t *pLine_data; + + if (rle_flag) + { + int pixels_remaining = width; + uint8_t *pDst = &input_line_buf[0]; + + do + { + if (!run_remaining) + { + if (bytes_remaining < 1) + { + free(pImage); + return nullptr; + } + + int v = *pSrc++; + bytes_remaining--; + + run_type = v & 0x80; + run_remaining = (v & 0x7F) + 1; + + if (run_type) + { + if (bytes_remaining < tga_bytes_per_pixel) + { + free(pImage); + return nullptr; + } + + memcpy(run_pixel, pSrc, tga_bytes_per_pixel); + pSrc += tga_bytes_per_pixel; + bytes_remaining -= tga_bytes_per_pixel; + } + } + + const uint32_t n = basisu::minimum(pixels_remaining, run_remaining); + pixels_remaining -= n; + run_remaining -= n; + + if (run_type) + { + for (uint32_t i = 0; i < n; i++) + for (uint32_t j = 0; j < tga_bytes_per_pixel; j++) + *pDst++ = run_pixel[j]; + } + else + { + const uint32_t bytes_wanted = n * tga_bytes_per_pixel; + + if (bytes_remaining < bytes_wanted) + { + free(pImage); + return nullptr; + } + + memcpy(pDst, pSrc, bytes_wanted); + pDst += bytes_wanted; + + pSrc += bytes_wanted; + bytes_remaining -= bytes_wanted; + } + + } while (pixels_remaining); + + assert((pDst - &input_line_buf[0]) == (int)(width * tga_bytes_per_pixel)); + + pLine_data = &input_line_buf[0]; + } + else + { + if (bytes_remaining < source_pitch) + { + free(pImage); + return nullptr; + } + + pLine_data = pSrc; + bytes_remaining -= source_pitch; + pSrc += source_pitch; + } + + // Convert to 24bpp RGB or 32bpp RGBA. + uint8_t *pDst = pImage + (y_flipped ? (height - 1 - y) : y) * dest_pitch + (x_flipped ? (width - 1) * n_chans : 0); + const int dst_stride = x_flipped ? -((int)n_chans) : n_chans; + + switch (hdr.m_depth) + { + case 32: + assert(tga_bytes_per_pixel == 4 && n_chans == 4); + for (int i = 0; i < width; i++, pLine_data += 4, pDst += dst_stride) + { + pDst[0] = pLine_data[2]; + pDst[1] = pLine_data[1]; + pDst[2] = pLine_data[0]; + pDst[3] = pLine_data[3]; + } + break; + case 24: + assert(tga_bytes_per_pixel == 3 && n_chans == 3); + for (int i = 0; i < width; i++, pLine_data += 3, pDst += dst_stride) + { + pDst[0] = pLine_data[2]; + pDst[1] = pLine_data[1]; + pDst[2] = pLine_data[0]; + } + break; + case 16: + case 15: + if (image_type == cITRGB) + { + assert(tga_bytes_per_pixel == 2 && n_chans == 3); + for (int i = 0; i < width; i++, pLine_data += 2, pDst += dst_stride) + { + const uint32_t v = pLine_data[0] | (pLine_data[1] << 8); + pDst[0] = (((v >> 10) & 31) * 255 + 15) / 31; + pDst[1] = (((v >> 5) & 31) * 255 + 15) / 31; + pDst[2] = ((v & 31) * 255 + 15) / 31; + } + } + else + { + assert(image_type == cITGrayscale && tga_bytes_per_pixel == 2 && n_chans == 4); + for (int i = 0; i < width; i++, pLine_data += 2, pDst += dst_stride) + { + pDst[0] = pLine_data[0]; + pDst[1] = pLine_data[0]; + pDst[2] = pLine_data[0]; + pDst[3] = pLine_data[1]; + } + } + break; + case 8: + assert(tga_bytes_per_pixel == 1); + if (image_type == cITPalettized) + { + if (hdr.m_cmap_bpp == 32) + { + assert(n_chans == 4); + for (int i = 0; i < width; i++, pLine_data++, pDst += dst_stride) + { + const uint32_t c = *pLine_data; + pDst[0] = pal[c].r; + pDst[1] = pal[c].g; + pDst[2] = pal[c].b; + pDst[3] = pal[c].a; + } + } + else + { + assert(n_chans == 3); + for (int i = 0; i < width; i++, pLine_data++, pDst += dst_stride) + { + const uint32_t c = *pLine_data; + pDst[0] = pal[c].r; + pDst[1] = pal[c].g; + pDst[2] = pal[c].b; + } + } + } + else + { + assert(n_chans == 3); + for (int i = 0; i < width; i++, pLine_data++, pDst += dst_stride) + { + const uint8_t c = *pLine_data; + pDst[0] = c; + pDst[1] = c; + pDst[2] = c; + } + } + break; + default: + assert(0); + break; + } + } // y + + return pImage; + } + + uint8_t *read_tga(const char *pFilename, int &width, int &height, int &n_chans) + { + width = height = n_chans = 0; + + uint8_vec filedata; + if (!read_file_to_vec(pFilename, filedata)) + return nullptr; + + if (!filedata.size() || (filedata.size() > UINT32_MAX)) + return nullptr; + + return read_tga(&filedata[0], (uint32_t)filedata.size(), width, height, n_chans); + } + + static inline void hdr_convert(const color_rgba& rgbe, vec4F& c) + { + if (rgbe[3] != 0) + { + float scale = ldexp(1.0f, rgbe[3] - 128 - 8); + c.set((float)rgbe[0] * scale, (float)rgbe[1] * scale, (float)rgbe[2] * scale, 1.0f); + } + else + { + c.set(0.0f, 0.0f, 0.0f, 1.0f); + } + } + + bool string_begins_with(const std::string& str, const char* pPhrase) + { + const size_t str_len = str.size(); + + const size_t phrase_len = strlen(pPhrase); + assert(phrase_len); + + if (str_len >= phrase_len) + { +#ifdef _MSC_VER + if (_strnicmp(pPhrase, str.c_str(), phrase_len) == 0) +#else + if (strncasecmp(pPhrase, str.c_str(), phrase_len) == 0) +#endif + return true; + } + + return false; + } + + // Radiance RGBE (.HDR) image reading. + // This code tries to preserve the original logic in Radiance's ray/src/common/color.c code: + // https://www.radiance-online.org/cgi-bin/viewcvs.cgi/ray/src/common/color.c?revision=2.26&view=markup&sortby=log + // Also see: https://flipcode.com/archives/HDR_Image_Reader.shtml. + // https://github.com/LuminanceHDR/LuminanceHDR/blob/master/src/Libpfs/io/rgbereader.cpp. + // https://radsite.lbl.gov/radiance/refer/filefmts.pdf + // Buggy readers: + // stb_image.h: appears to be a clone of rgbe.c, but with goto's (doesn't support old format files, doesn't support mixture of RLE/non-RLE scanlines) + // http://www.graphics.cornell.edu/~bjw/rgbe.html - rgbe.c/h + // http://www.graphics.cornell.edu/online/formats/rgbe/ - rgbe.c/.h - buggy + bool read_rgbe(const uint8_vec &filedata, imagef& img, rgbe_header_info& hdr_info) + { + hdr_info.clear(); + + const uint32_t MAX_SUPPORTED_DIM = 65536; + + if (filedata.size() < 4) + return false; + + // stb_image.h checks for the string "#?RADIANCE" or "#?RGBE" in the header. + // The original Radiance header code doesn't care about the specific string. + // opencv's reader only checks for "#?", so that's what we're going to do. + if ((filedata[0] != '#') || (filedata[1] != '?')) + return false; + + //uint32_t width = 0, height = 0; + bool is_rgbe = false; + size_t cur_ofs = 0; + + // Parse the lines until we encounter a blank line. + std::string cur_line; + for (; ; ) + { + if (cur_ofs >= filedata.size()) + return false; + + const uint32_t HEADER_TOO_BIG_SIZE = 4096; + if (cur_ofs >= HEADER_TOO_BIG_SIZE) + { + // Header seems too large - something is likely wrong. Return failure. + return false; + } + + uint8_t c = filedata[cur_ofs++]; + + if (c == '\n') + { + if (!cur_line.size()) + break; + + if ((cur_line[0] == '#') && (!string_begins_with(cur_line, "#?")) && (!hdr_info.m_program.size())) + { + cur_line.erase(0, 1); + while (cur_line.size() && (cur_line[0] == ' ')) + cur_line.erase(0, 1); + + hdr_info.m_program = cur_line; + } + else if (string_begins_with(cur_line, "EXPOSURE=") && (cur_line.size() > 9)) + { + hdr_info.m_exposure = atof(cur_line.c_str() + 9); + hdr_info.m_has_exposure = true; + } + else if (string_begins_with(cur_line, "GAMMA=") && (cur_line.size() > 6)) + { + hdr_info.m_exposure = atof(cur_line.c_str() + 6); + hdr_info.m_has_gamma = true; + } + else if (cur_line == "FORMAT=32-bit_rle_rgbe") + { + is_rgbe = true; + } + + cur_line.resize(0); + } + else + cur_line.push_back((char)c); + } + + if (!is_rgbe) + return false; + + // Assume and require the final line to have the image's dimensions. We're not supporting flipping. + for (; ; ) + { + if (cur_ofs >= filedata.size()) + return false; + uint8_t c = filedata[cur_ofs++]; + if (c == '\n') + break; + cur_line.push_back((char)c); + } + + int comp[2] = { 1, 0 }; // y, x (major, minor) + int dir[2] = { -1, 1 }; // -1, 1, (major, minor), for y -1=up + uint32_t major_dim = 0, minor_dim = 0; + + // Parse the dimension string, normally it'll be "-Y # +X #" (major, minor), rarely it differs + for (uint32_t d = 0; d < 2; d++) // 0=major, 1=minor + { + const bool is_neg_x = (strncmp(&cur_line[0], "-X ", 3) == 0); + const bool is_pos_x = (strncmp(&cur_line[0], "+X ", 3) == 0); + const bool is_x = is_neg_x || is_pos_x; + + const bool is_neg_y = (strncmp(&cur_line[0], "-Y ", 3) == 0); + const bool is_pos_y = (strncmp(&cur_line[0], "+Y ", 3) == 0); + const bool is_y = is_neg_y || is_pos_y; + + if (cur_line.size() < 3) + return false; + + if (!is_x && !is_y) + return false; + + comp[d] = is_x ? 0 : 1; + dir[d] = (is_neg_x || is_neg_y) ? -1 : 1; + + uint32_t& dim = d ? minor_dim : major_dim; + + cur_line.erase(0, 3); + + while (cur_line.size()) + { + char c = cur_line[0]; + if (c != ' ') + break; + cur_line.erase(0, 1); + } + + bool has_digits = false; + while (cur_line.size()) + { + char c = cur_line[0]; + cur_line.erase(0, 1); + + if (c == ' ') + break; + + if ((c < '0') || (c > '9')) + return false; + + const uint32_t prev_dim = dim; + dim = dim * 10 + (c - '0'); + if (dim < prev_dim) + return false; + + has_digits = true; + } + if (!has_digits) + return false; + + if ((dim < 1) || (dim > MAX_SUPPORTED_DIM)) + return false; + } + + // temp image: width=minor, height=major + img.resize(minor_dim, major_dim); + + std::vector temp_scanline(minor_dim); + + // Read the scanlines. + for (uint32_t y = 0; y < major_dim; y++) + { + vec4F* pDst = &img(0, y); + + if ((filedata.size() - cur_ofs) < 4) + return false; + + // Determine if the line uses the new or old format. See the logic in color.c. + bool old_decrunch = false; + if ((minor_dim < 8) || (minor_dim > 0x7FFF)) + { + // Line is too short or long; must be old format. + old_decrunch = true; + } + else if (filedata[cur_ofs] != 2) + { + // R is not 2, must be old format + old_decrunch = true; + } + else + { + // c[0]/red is 2.Check GB and E for validity. + color_rgba c; + memcpy(&c, &filedata[cur_ofs], 4); + + if ((c[1] != 2) || (c[2] & 0x80)) + { + // G isn't 2, or the high bit of B is set which is impossible (image's > 0x7FFF pixels can't get here). Use old format. + old_decrunch = true; + } + else + { + // Check B and E. If this isn't the minor_dim in network order, something is wrong. The pixel would also be denormalized, and invalid. + uint32_t w = (c[2] << 8) | c[3]; + if (w != minor_dim) + return false; + + cur_ofs += 4; + } + } + + if (old_decrunch) + { + uint32_t rshift = 0, x = 0; + + while (x < minor_dim) + { + if ((filedata.size() - cur_ofs) < 4) + return false; + + color_rgba c; + memcpy(&c, &filedata[cur_ofs], 4); + cur_ofs += 4; + + if ((c[0] == 1) && (c[1] == 1) && (c[2] == 1)) + { + // We'll allow RLE matches to cross scanlines, but not on the very first pixel. + if ((!x) && (!y)) + return false; + + const uint32_t run_len = c[3] << rshift; + const vec4F run_color(pDst[-1]); + + if ((x + run_len) > minor_dim) + return false; + + for (uint32_t i = 0; i < run_len; i++) + *pDst++ = run_color; + + rshift += 8; + x += run_len; + } + else + { + rshift = 0; + + hdr_convert(c, *pDst); + pDst++; + x++; + } + } + continue; + } + + // New format + for (uint32_t s = 0; s < 4; s++) + { + uint32_t x_ofs = 0; + while (x_ofs < minor_dim) + { + uint32_t num_remaining = minor_dim - x_ofs; + + if (cur_ofs >= filedata.size()) + return false; + + uint8_t count = filedata[cur_ofs++]; + if (count > 128) + { + count -= 128; + if (count > num_remaining) + return false; + + if (cur_ofs >= filedata.size()) + return false; + const uint8_t val = filedata[cur_ofs++]; + + for (uint32_t i = 0; i < count; i++) + temp_scanline[x_ofs + i][s] = val; + + x_ofs += count; + } + else + { + if ((!count) || (count > num_remaining)) + return false; + + for (uint32_t i = 0; i < count; i++) + { + if (cur_ofs >= filedata.size()) + return false; + const uint8_t val = filedata[cur_ofs++]; + + temp_scanline[x_ofs + i][s] = val; + } + + x_ofs += count; + } + } // while (x_ofs < minor_dim) + } // c + + // Convert all the RGBE pixels to float now + for (uint32_t x = 0; x < minor_dim; x++, pDst++) + hdr_convert(temp_scanline[x], *pDst); + + assert((pDst - &img(0, y)) == (int)minor_dim); + + } // y + + // at here: + // img(width,height)=image pixels as read from file, x=minor axis, y=major axis + // width=minor axis dimension + // height=major axis dimension + // in file, pixels are emitted in minor order, them major (so major=scanlines in the file) + + imagef final_img; + if (comp[0] == 0) // if major axis is X + final_img.resize(major_dim, minor_dim); + else // major axis is Y, minor is X + final_img.resize(minor_dim, major_dim); + + // TODO: optimize the identity case + for (uint32_t major_iter = 0; major_iter < major_dim; major_iter++) + { + for (uint32_t minor_iter = 0; minor_iter < minor_dim; minor_iter++) + { + const vec4F& p = img(minor_iter, major_iter); + + uint32_t dst_x = 0, dst_y = 0; + + // is the minor dim output x? + if (comp[1] == 0) + { + // minor axis is x, major is y + + // is minor axis (which is output x) flipped? + if (dir[1] < 0) + dst_x = minor_dim - 1 - minor_iter; + else + dst_x = minor_iter; + + // is major axis (which is output y) flipped? -1=down in raster order, 1=up + if (dir[0] < 0) + dst_y = major_iter; + else + dst_y = major_dim - 1 - major_iter; + } + else + { + // minor axis is output y, major is output x + + // is minor axis (which is output y) flipped? + if (dir[1] < 0) + dst_y = minor_iter; + else + dst_y = minor_dim - 1 - minor_iter; + + // is major axis (which is output x) flipped? + if (dir[0] < 0) + dst_x = major_dim - 1 - major_iter; + else + dst_x = major_iter; + } + + final_img(dst_x, dst_y) = p; + } + } + + final_img.swap(img); + + return true; + } + + bool read_rgbe(const char* pFilename, imagef& img, rgbe_header_info& hdr_info) + { + uint8_vec filedata; + if (!read_file_to_vec(pFilename, filedata)) + return false; + return read_rgbe(filedata, img, hdr_info); + } + + static uint8_vec& append_string(uint8_vec& buf, const char* pStr) + { + const size_t str_len = strlen(pStr); + if (!str_len) + return buf; + + const size_t ofs = buf.size(); + buf.resize(ofs + str_len); + memcpy(&buf[ofs], pStr, str_len); + + return buf; + } + + static uint8_vec& append_string(uint8_vec& buf, const std::string& str) + { + if (!str.size()) + return buf; + return append_string(buf, str.c_str()); + } + + static inline void float2rgbe(color_rgba &rgbe, const vec4F &c) + { + const float red = c[0], green = c[1], blue = c[2]; + assert(red >= 0.0f && green >= 0.0f && blue >= 0.0f); + + const float max_v = basisu::maximumf(basisu::maximumf(red, green), blue); + + if (max_v < 1e-32f) + rgbe.clear(); + else + { + int e; + const float scale = frexp(max_v, &e) * 256.0f / max_v; + rgbe[0] = (uint8_t)(clamp((int)(red * scale), 0, 255)); + rgbe[1] = (uint8_t)(clamp((int)(green * scale), 0, 255)); + rgbe[2] = (uint8_t)(clamp((int)(blue * scale), 0, 255)); + rgbe[3] = (uint8_t)(e + 128); + } + } + + const bool RGBE_FORCE_RAW = false; + const bool RGBE_FORCE_OLD_CRUNCH = false; // note must readers (particularly stb_image.h's) don't properly support this, when they should + + bool write_rgbe(uint8_vec &file_data, imagef& img, rgbe_header_info& hdr_info) + { + if (!img.get_width() || !img.get_height()) + return false; + + const uint32_t width = img.get_width(), height = img.get_height(); + + file_data.resize(0); + file_data.reserve(1024 + img.get_width() * img.get_height() * 4); + + append_string(file_data, "#?RADIANCE\n"); + + if (hdr_info.m_has_exposure) + append_string(file_data, string_format("EXPOSURE=%g\n", hdr_info.m_exposure)); + + if (hdr_info.m_has_gamma) + append_string(file_data, string_format("GAMMA=%g\n", hdr_info.m_gamma)); + + append_string(file_data, "FORMAT=32-bit_rle_rgbe\n\n"); + append_string(file_data, string_format("-Y %u +X %u\n", height, width)); + + if (((width < 8) || (width > 0x7FFF)) || (RGBE_FORCE_RAW)) + { + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + color_rgba rgbe; + float2rgbe(rgbe, img(x, y)); + append_vector(file_data, (const uint8_t *)&rgbe, sizeof(rgbe)); + } + } + } + else if (RGBE_FORCE_OLD_CRUNCH) + { + for (uint32_t y = 0; y < height; y++) + { + int prev_r = -1, prev_g = -1, prev_b = -1, prev_e = -1; + uint32_t cur_run_len = 0; + + for (uint32_t x = 0; x < width; x++) + { + color_rgba rgbe; + float2rgbe(rgbe, img(x, y)); + + if ((rgbe[0] == prev_r) && (rgbe[1] == prev_g) && (rgbe[2] == prev_b) && (rgbe[3] == prev_e)) + { + if (++cur_run_len == 255) + { + // this ensures rshift stays 0, it's lame but this path is only for testing readers + color_rgba f(1, 1, 1, cur_run_len - 1); + append_vector(file_data, (const uint8_t*)&f, sizeof(f)); + append_vector(file_data, (const uint8_t*)&rgbe, sizeof(rgbe)); + cur_run_len = 0; + } + } + else + { + if (cur_run_len > 0) + { + color_rgba f(1, 1, 1, cur_run_len); + append_vector(file_data, (const uint8_t*)&f, sizeof(f)); + + cur_run_len = 0; + } + + append_vector(file_data, (const uint8_t*)&rgbe, sizeof(rgbe)); + + prev_r = rgbe[0]; + prev_g = rgbe[1]; + prev_b = rgbe[2]; + prev_e = rgbe[3]; + } + } // x + + if (cur_run_len > 0) + { + color_rgba f(1, 1, 1, cur_run_len); + append_vector(file_data, (const uint8_t*)&f, sizeof(f)); + } + } // y + } + else + { + uint8_vec temp[4]; + for (uint32_t c = 0; c < 4; c++) + temp[c].resize(width); + + for (uint32_t y = 0; y < height; y++) + { + color_rgba rgbe(2, 2, width >> 8, width & 0xFF); + append_vector(file_data, (const uint8_t*)&rgbe, sizeof(rgbe)); + + for (uint32_t x = 0; x < width; x++) + { + float2rgbe(rgbe, img(x, y)); + + for (uint32_t c = 0; c < 4; c++) + temp[c][x] = rgbe[c]; + } + + for (uint32_t c = 0; c < 4; c++) + { + int raw_ofs = -1; + + uint32_t x = 0; + while (x < width) + { + const uint32_t num_bytes_remaining = width - x; + const uint32_t max_run_len = basisu::minimum(num_bytes_remaining, 127); + const uint8_t cur_byte = temp[c][x]; + + uint32_t run_len = 1; + while (run_len < max_run_len) + { + if (temp[c][x + run_len] != cur_byte) + break; + run_len++; + } + + const uint32_t cost_to_keep_raw = ((raw_ofs != -1) ? 0 : 1) + run_len; // 0 or 1 bytes to start a raw run, then the repeated bytes issued as raw + const uint32_t cost_to_take_run = 2 + 1; // 2 bytes to issue the RLE, then 1 bytes to start whatever follows it (raw or RLE) + + if ((run_len >= 3) && (cost_to_take_run < cost_to_keep_raw)) + { + file_data.push_back((uint8_t)(128 + run_len)); + file_data.push_back(cur_byte); + + x += run_len; + raw_ofs = -1; + } + else + { + if (raw_ofs < 0) + { + raw_ofs = (int)file_data.size(); + file_data.push_back(0); + } + + if (++file_data[raw_ofs] == 128) + raw_ofs = -1; + + file_data.push_back(cur_byte); + + x++; + } + } // x + + } // c + } // y + } + + return true; + } + + bool write_rgbe(const char* pFilename, imagef& img, rgbe_header_info& hdr_info) + { + uint8_vec file_data; + if (!write_rgbe(file_data, img, hdr_info)) + return false; + return write_vec_to_file(pFilename, file_data); + } + + bool read_exr(const char* pFilename, imagef& img, int& n_chans) + { + n_chans = 0; + + int width = 0, height = 0; + float* out_rgba = nullptr; + const char* err = nullptr; + + int status = LoadEXRWithLayer(&out_rgba, &width, &height, pFilename, nullptr, &err, &n_chans); + if (status != 0) + { + error_printf("Failed loading .EXR image \"%s\"! (TinyEXR error: %s)\n", pFilename, err ? err : "?"); + FreeEXRErrorMessage(err); + free(out_rgba); + return false; + } + + const uint32_t MAX_SUPPORTED_DIM = 65536; + if ((width < 1) || (height < 1) || (width > (int)MAX_SUPPORTED_DIM) || (height > (int)MAX_SUPPORTED_DIM)) + { + error_printf("Invalid dimensions of .EXR image \"%s\"!\n", pFilename); + free(out_rgba); + return false; + } + + img.resize(width, height); + + if (n_chans == 1) + { + const float* pSrc = out_rgba; + vec4F* pDst = img.get_ptr(); + + for (int y = 0; y < height; y++) + { + for (int x = 0; x < width; x++) + { + (*pDst)[0] = pSrc[0]; + (*pDst)[1] = pSrc[1]; + (*pDst)[2] = pSrc[2]; + (*pDst)[3] = 1.0f; + + pSrc += 4; + ++pDst; + } + } + } + else + { + memcpy((void *)img.get_ptr(), out_rgba, static_cast(sizeof(float) * 4 * img.get_total_pixels())); + } + + free(out_rgba); + return true; + } + + bool read_exr(const void* pMem, size_t mem_size, imagef& img) + { + float* out_rgba = nullptr; + int width = 0, height = 0; + const char* pErr = nullptr; + int res = LoadEXRFromMemory(&out_rgba, &width, &height, (const uint8_t*)pMem, mem_size, &pErr); + if (res < 0) + { + error_printf("Failed loading .EXR image from memory! (TinyEXR error: %s)\n", pErr ? pErr : "?"); + FreeEXRErrorMessage(pErr); + free(out_rgba); + return false; + } + + img.resize(width, height); + memcpy((void *)img.get_ptr(), out_rgba, width * height * sizeof(float) * 4); + free(out_rgba); + + return true; + } + + bool write_exr(const char* pFilename, const imagef& img, uint32_t n_chans, uint32_t flags) + { + assert((n_chans == 1) || (n_chans == 3) || (n_chans == 4)); + + const bool linear_hint = (flags & WRITE_EXR_LINEAR_HINT) != 0, + store_float = (flags & WRITE_EXR_STORE_FLOATS) != 0, + no_compression = (flags & WRITE_EXR_NO_COMPRESSION) != 0; + + const uint32_t width = img.get_width(), height = img.get_height(); + assert(width && height); + + if (!width || !height) + return false; + + float_vec layers[4]; + float* image_ptrs[4]; + for (uint32_t c = 0; c < n_chans; c++) + { + layers[c].resize(width * height); + image_ptrs[c] = layers[c].get_ptr(); + } + + // ABGR + int chan_order[4] = { 3, 2, 1, 0 }; + + if (n_chans == 1) + { + // Y + chan_order[0] = 0; + } + else if (n_chans == 3) + { + // BGR + chan_order[0] = 2; + chan_order[1] = 1; + chan_order[2] = 0; + } + else if (n_chans != 4) + { + assert(0); + return false; + } + + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + const vec4F& p = img(x, y); + + for (uint32_t c = 0; c < n_chans; c++) + layers[c][x + y * width] = p[chan_order[c]]; + } // x + } // y + + EXRHeader header; + InitEXRHeader(&header); + + EXRImage image; + InitEXRImage(&image); + + image.num_channels = n_chans; + image.images = (unsigned char**)image_ptrs; + image.width = width; + image.height = height; + + header.num_channels = n_chans; + + header.channels = (EXRChannelInfo*)calloc(header.num_channels, sizeof(EXRChannelInfo)); + + // Must be (A)BGR order, since most of EXR viewers expect this channel order. + for (uint32_t i = 0; i < n_chans; i++) + { + char c = 'Y'; + if (n_chans == 3) + c = "BGR"[i]; + else if (n_chans == 4) + c = "ABGR"[i]; + + header.channels[i].name[0] = c; + header.channels[i].name[1] = '\0'; + + header.channels[i].p_linear = linear_hint; + } + + header.pixel_types = (int*)calloc(header.num_channels, sizeof(int)); + header.requested_pixel_types = (int*)calloc(header.num_channels, sizeof(int)); + + if (!no_compression) + header.compression_type = TINYEXR_COMPRESSIONTYPE_ZIP; + + for (int i = 0; i < header.num_channels; i++) + { + // pixel type of input image + header.pixel_types[i] = TINYEXR_PIXELTYPE_FLOAT; + + // pixel type of output image to be stored in .EXR + header.requested_pixel_types[i] = store_float ? TINYEXR_PIXELTYPE_FLOAT : TINYEXR_PIXELTYPE_HALF; + } + + const char* pErr_msg = nullptr; + + int ret = SaveEXRImageToFile(&image, &header, pFilename, &pErr_msg); + if (ret != TINYEXR_SUCCESS) + { + error_printf("Save EXR err: %s\n", pErr_msg); + FreeEXRErrorMessage(pErr_msg); + } + + free(header.channels); + free(header.pixel_types); + free(header.requested_pixel_types); + + return (ret == TINYEXR_SUCCESS); + } + + void image::debug_text(uint32_t x_ofs, uint32_t y_ofs, uint32_t scale_x, uint32_t scale_y, const color_rgba& fg, const color_rgba* pBG, bool alpha_only, const char* pFmt, ...) + { + char buf[2048]; + + va_list args; + va_start(args, pFmt); +#ifdef _WIN32 + vsprintf_s(buf, sizeof(buf), pFmt, args); +#else + vsnprintf(buf, sizeof(buf), pFmt, args); +#endif + va_end(args); + + const char* p = buf; + + const uint32_t orig_x_ofs = x_ofs; + + while (*p) + { + uint8_t c = *p++; + if ((c < 32) || (c > 127)) + c = '.'; + + const uint8_t* pGlpyh = &g_debug_font8x8_basic[c - 32][0]; + + for (uint32_t y = 0; y < 8; y++) + { + uint32_t row_bits = pGlpyh[y]; + for (uint32_t x = 0; x < 8; x++) + { + const uint32_t q = row_bits & (1 << x); + + const color_rgba* pColor = q ? &fg : pBG; + if (!pColor) + continue; + + if (alpha_only) + fill_box_alpha(x_ofs + x * scale_x, y_ofs + y * scale_y, scale_x, scale_y, *pColor); + else + fill_box(x_ofs + x * scale_x, y_ofs + y * scale_y, scale_x, scale_y, *pColor); + } + } + + x_ofs += 8 * scale_x; + if ((x_ofs + 8 * scale_x) > m_width) + { + x_ofs = orig_x_ofs; + y_ofs += 8 * scale_y; + } + } + } + + // Very basic global Reinhard tone mapping, output converted to sRGB with no dithering, alpha is carried through unchanged. + // Only used for debugging/development. + void tonemap_image_reinhard(image &ldr_img, const imagef &hdr_img, float exposure, bool add_noise, bool per_component, bool luma_scaling) + { + uint32_t width = hdr_img.get_width(), height = hdr_img.get_height(); + + ldr_img.resize(width, height); + + rand r; + r.seed(128); + + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + vec4F c(hdr_img(x, y)); + + if (per_component) + { + for (uint32_t t = 0; t < 3; t++) + { + if (c[t] <= 0.0f) + { + c[t] = 0.0f; + } + else + { + c[t] *= exposure; + c[t] = c[t] / (1.0f + c[t]); + } + } + } + else + { + c[0] *= exposure; + c[1] *= exposure; + c[2] *= exposure; + + const float L = 0.2126f * c[0] + 0.7152f * c[1] + 0.0722f * c[2]; + + float Lmapped = 0.0f; + if (L > 0.0f) + { + //Lmapped = L / (1.0f + L); + //Lmapped /= L; + + Lmapped = 1.0f / (1.0f + L); + } + + c[0] = c[0] * Lmapped; + c[1] = c[1] * Lmapped; + c[2] = c[2] * Lmapped; + + if (luma_scaling) + { + // Keeps the ratio of r/g/b intact + float m = maximum(c[0], c[1], c[2]); + if (m > 1.0f) + { + c /= m; + } + } + } + + c.clamp(0.0f, 1.0f); + + c[3] = c[3] * 255.0f; + + color_rgba& o = ldr_img(x, y); + + if (add_noise) + { + c[0] = linear_to_srgb(c[0]) * 255.0f; + c[1] = linear_to_srgb(c[1]) * 255.0f; + c[2] = linear_to_srgb(c[2]) * 255.0f; + + const float NOISE_AMP = .5f; + c[0] += r.frand(-NOISE_AMP, NOISE_AMP); + c[1] += r.frand(-NOISE_AMP, NOISE_AMP); + c[2] += r.frand(-NOISE_AMP, NOISE_AMP); + + c.clamp(0.0f, 255.0f); + + o[0] = (uint8_t)fast_roundf_int(c[0]); + o[1] = (uint8_t)fast_roundf_int(c[1]); + o[2] = (uint8_t)fast_roundf_int(c[2]); + o[3] = (uint8_t)fast_roundf_int(c[3]); + } + else + { + o[0] = g_fast_linear_to_srgb.convert(c[0]); + o[1] = g_fast_linear_to_srgb.convert(c[1]); + o[2] = g_fast_linear_to_srgb.convert(c[2]); + o[3] = (uint8_t)fast_roundf_int(c[3]); + } + } + } + } + + bool tonemap_image_compressive(image& dst_img, const imagef& hdr_test_img) + { + const uint32_t width = hdr_test_img.get_width(); + const uint32_t height = hdr_test_img.get_height(); + + uint16_vec orig_half_img(width * 3 * height); + uint16_vec half_img(width * 3 * height); + + int max_shift = 32; + + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + const vec4F& p = hdr_test_img(x, y); + + for (uint32_t i = 0; i < 3; i++) + { + if (p[i] < 0.0f) + return false; + if (p[i] > basist::MAX_HALF_FLOAT) + return false; + + uint32_t h = basist::float_to_half(p[i]); + //uint32_t orig_h = h; + + orig_half_img[(x + y * width) * 3 + i] = (uint16_t)h; + + // Rotate sign bit into LSB + //h = rot_left16((uint16_t)h, 1); + //assert(rot_right16((uint16_t)h, 1) == orig_h); + h <<= 1; + + half_img[(x + y * width) * 3 + i] = (uint16_t)h; + + // Determine # of leading zero bits, ignoring the sign bit + if (h) + { + int lz = clz(h) - 16; + assert(lz >= 0 && lz <= 16); + + assert((h << lz) <= 0xFFFF); + + max_shift = basisu::minimum(max_shift, lz); + } + } // i + } // x + } // y + + //printf("tonemap_image_compressive: Max leading zeros: %i\n", max_shift); + + uint32_t high_hist[256]; + clear_obj(high_hist); + + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + for (uint32_t i = 0; i < 3; i++) + { + uint16_t& hf = half_img[(x + y * width) * 3 + i]; + + assert(((uint32_t)hf << max_shift) <= 65535); + + hf <<= max_shift; + + uint32_t h = (uint8_t)(hf >> 8); + high_hist[h]++; + } + } // x + } // y + + uint32_t total_vals_used = 0; + int remap_old_to_new[256]; + for (uint32_t i = 0; i < 256; i++) + remap_old_to_new[i] = -1; + + for (uint32_t i = 0; i < 256; i++) + { + if (high_hist[i] != 0) + { + remap_old_to_new[i] = total_vals_used; + total_vals_used++; + } + } + + assert(total_vals_used >= 1); + + //printf("tonemap_image_compressive: Total used high byte values: %u, unused: %u\n", total_vals_used, 256 - total_vals_used); + + bool val_used[256]; + clear_obj(val_used); + + int remap_new_to_old[256]; + for (uint32_t i = 0; i < 256; i++) + remap_new_to_old[i] = -1; + BASISU_NOTE_UNUSED(remap_new_to_old); + + int prev_c = -1; + BASISU_NOTE_UNUSED(prev_c); + for (uint32_t i = 0; i < 256; i++) + { + if (remap_old_to_new[i] >= 0) + { + int c; + if (total_vals_used <= 1) + c = remap_old_to_new[i]; + else + { + c = (remap_old_to_new[i] * 255 + ((total_vals_used - 1) / 2)) / (total_vals_used - 1); + + assert(c > prev_c); + } + + assert(!val_used[c]); + + remap_new_to_old[c] = i; + + remap_old_to_new[i] = c; + prev_c = c; + + //printf("%u ", c); + + val_used[c] = true; + } + } // i + //printf("\n"); + + dst_img.resize(width, height); + + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + for (uint32_t c = 0; c < 3; c++) + { + uint16_t& v16 = half_img[(x + y * width) * 3 + c]; + + uint32_t hb = v16 >> 8; + //uint32_t lb = v16 & 0xFF; + + assert(remap_old_to_new[hb] != -1); + assert(remap_old_to_new[hb] <= 255); + assert(remap_new_to_old[remap_old_to_new[hb]] == (int)hb); + + hb = remap_old_to_new[hb]; + + //v16 = (uint16_t)((hb << 8) | lb); + + dst_img(x, y)[c] = (uint8_t)hb; + } + } // x + } // y + + return true; + } + + bool tonemap_image_compressive2(image& dst_img, const imagef& hdr_test_img) + { + const uint32_t width = hdr_test_img.get_width(); + const uint32_t height = hdr_test_img.get_height(); + + dst_img.resize(width, height); + dst_img.set_all(color_rgba(0, 0, 0, 255)); + + basisu::vector half_img(width * 3 * height); + + uint32_t low_h = UINT32_MAX, high_h = 0; + + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + const vec4F& p = hdr_test_img(x, y); + + for (uint32_t i = 0; i < 3; i++) + { + float f = p[i]; + + if (std::isnan(f) || std::isinf(f)) + f = 0.0f; + else if (f < 0.0f) + f = 0.0f; + else if (f > basist::MAX_HALF_FLOAT) + f = basist::MAX_HALF_FLOAT; + + uint32_t h = basist::float_to_half(f); + + low_h = minimum(low_h, h); + high_h = maximum(high_h, h); + + half_img[(x + y * width) * 3 + i] = (basist::half_float)h; + + } // i + } // x + } // y + + if (low_h == high_h) + return false; + + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + for (uint32_t i = 0; i < 3; i++) + { + basist::half_float h = half_img[(x + y * width) * 3 + i]; + + float f = (float)(h - low_h) / (float)(high_h - low_h); + + int iv = basisu::clamp((int)std::round(f * 255.0f), 0, 255); + + dst_img(x, y)[i] = (uint8_t)iv; + + } // i + } // x + } // y + + return true; + } + +} // namespace basisu diff --git a/thirdparty/basisu/encoder/basisu_enc.h b/thirdparty/basisu/encoder/basisu_enc.h new file mode 100644 index 000000000..a565803e0 --- /dev/null +++ b/thirdparty/basisu/encoder/basisu_enc.h @@ -0,0 +1,4319 @@ +// basisu_enc.h +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "../transcoder/basisu.h" +#include "../transcoder/basisu_transcoder_internal.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#if !defined(_WIN32) || defined(__MINGW32__) +#include +#endif + +// This module is really just a huge grab bag of classes and helper functions needed by the encoder. + +// If BASISU_USE_HIGH_PRECISION_COLOR_DISTANCE is 1, quality in perceptual mode will be slightly greater, but at a large increase in encoding CPU time. +#define BASISU_USE_HIGH_PRECISION_COLOR_DISTANCE (0) + +#if BASISU_SUPPORT_SSE +// Declared in basisu_kernels_imp.h, but we can't include that here otherwise it would lead to circular type errors. +extern void update_covar_matrix_16x16_sse41(uint32_t num_vecs, const void* pWeighted_vecs, const void* pOrigin, const uint32_t *pVec_indices, void* pMatrix16x16); +#endif + +namespace basisu +{ + extern uint8_t g_hamming_dist[256]; + extern const uint8_t g_debug_font8x8_basic[127 - 32 + 1][8]; + + // true if basisu_encoder_init() has been called and returned. + extern bool g_library_initialized; + + // Encoder library initialization. + // This function MUST be called before encoding anything! + // Returns false if library initialization fails. + bool basisu_encoder_init(bool use_opencl = false, bool opencl_force_serialization = false); + void basisu_encoder_deinit(); + + // basisu_kernels_sse.cpp - will be a no-op and g_cpu_supports_sse41 will always be false unless compiled with BASISU_SUPPORT_SSE=1 + extern void detect_sse41(); + +#if BASISU_SUPPORT_SSE + extern bool g_cpu_supports_sse41; +#else + const bool g_cpu_supports_sse41 = false; +#endif + + void error_vprintf(const char* pFmt, va_list args); + void error_printf(const char *pFmt, ...); + + template + inline void fmt_error_printf(const char* pFmt, Args&&... args) + { + std::string res; + if (!fmt_variants(res, pFmt, fmt_variant_vec{ fmt_variant(std::forward(args))... })) + return; + error_printf("%s", res.c_str()); + } + + void platform_sleep(uint32_t ms); + + // Helpers + + inline uint8_t clamp255(int32_t i) + { + return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i); + } + + inline int left_shift32(int val, int shift) + { + assert((shift >= 0) && (shift < 32)); + return static_cast(static_cast(val) << shift); + } + + inline uint32_t left_shift32(uint32_t val, int shift) + { + assert((shift >= 0) && (shift < 32)); + return val << shift; + } + + inline int32_t clampi(int32_t value, int32_t low, int32_t high) + { + if (value < low) + value = low; + else if (value > high) + value = high; + return value; + } + + inline uint8_t mul_8(uint32_t v, uint32_t a) + { + v = v * a + 128; + return (uint8_t)((v + (v >> 8)) >> 8); + } + + inline int fast_roundf_int(float x) + { + return (x >= 0.0f) ? (int)(x + 0.5f) : (int)(x - 0.5f); + } + + inline int fast_floorf_int(float x) + { + int xi = (int)x; // Truncate towards zero + return ((x < 0.0f) && (x != (float)xi)) ? (xi - 1) : xi; + } + + inline uint64_t read_bits(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize) + { + assert(codesize <= 64); + uint64_t bits = 0; + uint32_t total_bits = 0; + + while (total_bits < codesize) + { + uint32_t byte_bit_offset = bit_offset & 7; + uint32_t bits_to_read = minimum(codesize - total_bits, 8 - byte_bit_offset); + + uint32_t byte_bits = pBuf[bit_offset >> 3] >> byte_bit_offset; + byte_bits &= ((1 << bits_to_read) - 1); + + bits |= ((uint64_t)(byte_bits) << total_bits); + + total_bits += bits_to_read; + bit_offset += bits_to_read; + } + + return bits; + } + + inline uint32_t read_bits32(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize) + { + assert(codesize <= 32); + uint32_t bits = 0; + uint32_t total_bits = 0; + + while (total_bits < codesize) + { + uint32_t byte_bit_offset = bit_offset & 7; + uint32_t bits_to_read = minimum(codesize - total_bits, 8 - byte_bit_offset); + + uint32_t byte_bits = pBuf[bit_offset >> 3] >> byte_bit_offset; + byte_bits &= ((1 << bits_to_read) - 1); + + bits |= (byte_bits << total_bits); + + total_bits += bits_to_read; + bit_offset += bits_to_read; + } + + return bits; + } + + // Open interval + inline int bounds_check(int v, int l, int h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; } + inline uint32_t bounds_check(uint32_t v, uint32_t l, uint32_t h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; } + + // Closed interval + inline int bounds_check_incl(int v, int l, int h) { (void)v; (void)l; (void)h; assert(v >= l && v <= h); return v; } + inline uint32_t bounds_check_incl(uint32_t v, uint32_t l, uint32_t h) { (void)v; (void)l; (void)h; assert(v >= l && v <= h); return v; } + + inline uint32_t clz(uint32_t x) + { + if (!x) + return 32; + + uint32_t n = 0; + while ((x & 0x80000000) == 0) + { + x <<= 1u; + n++; + } + + return n; + } + + bool string_begins_with(const std::string& str, const char* pPhrase); + + // Case sensitive, returns -1 if can't find + inline int string_find_first(const std::string& str, const char* pPhrase) + { + size_t res = str.find(pPhrase, 0); + if (res == std::string::npos) + return -1; + return (int)res; + } + + // Hashing + + inline uint32_t bitmix32c(uint32_t v) + { + v = (v + 0x7ed55d16) + (v << 12); + v = (v ^ 0xc761c23c) ^ (v >> 19); + v = (v + 0x165667b1) + (v << 5); + v = (v + 0xd3a2646c) ^ (v << 9); + v = (v + 0xfd7046c5) + (v << 3); + v = (v ^ 0xb55a4f09) ^ (v >> 16); + return v; + } + + inline uint32_t bitmix32(uint32_t v) + { + v -= (v << 6); + v ^= (v >> 17); + v -= (v << 9); + v ^= (v << 4); + v -= (v << 3); + v ^= (v << 10); + v ^= (v >> 15); + return v; + } + + inline uint32_t wang_hash(uint32_t seed) + { + seed = (seed ^ 61) ^ (seed >> 16); + seed *= 9; + seed = seed ^ (seed >> 4); + seed *= 0x27d4eb2d; + seed = seed ^ (seed >> 15); + return seed; + } + + uint32_t hash_hsieh(const uint8_t* pBuf, size_t len); + + template + struct bit_hasher + { + inline std::size_t operator()(const Key& k) const + { + return hash_hsieh(reinterpret_cast(&k), sizeof(k)); + } + }; + + struct string_hasher + { + inline std::size_t operator()(const std::string& k) const + { + size_t l = k.size(); + if (!l) + return 0; + return hash_hsieh(reinterpret_cast(k.c_str()), l); + } + }; + + class running_stat + { + public: + running_stat() { clear(); } + + void clear() + { + m_n = 0; + m_total = 0; + m_old_m = 0; + m_new_m = 0; + m_old_s = 0; + m_new_s = 0; + m_min = 0; + m_max = 0; + } + + void push(double x) + { + m_n++; + m_total += x; + if (m_n == 1) + { + m_old_m = m_new_m = x; + m_old_s = 0.0; + m_min = x; + m_max = x; + } + else + { + // See Knuth TAOCP vol 2, 3rd edition, page 232 + m_new_m = m_old_m + (x - m_old_m) / m_n; + m_new_s = m_old_s + (x - m_old_m) * (x - m_new_m); + m_old_m = m_new_m; + m_old_s = m_new_s; + m_min = basisu::minimum(x, m_min); + m_max = basisu::maximum(x, m_max); + } + } + + uint32_t get_num() const + { + return m_n; + } + + double get_total() const + { + return m_total; + } + + double get_mean() const + { + return (m_n > 0) ? m_new_m : 0.0; + } + + // Returns sample variance + double get_variance() const + { + return ((m_n > 1) ? m_new_s / (m_n - 1) : 0.0); + } + + double get_std_dev() const + { + return sqrt(get_variance()); + } + + double get_min() const + { + return m_min; + } + + double get_max() const + { + return m_max; + } + + private: + uint32_t m_n; + double m_total, m_old_m, m_new_m, m_old_s, m_new_s, m_min, m_max; + }; + + // Linear algebra + + template + class vec + { + protected: + T m_v[N]; + + public: + enum { num_elements = N }; + typedef T scalar_type; + + inline vec() { } + inline vec(eZero) { set_zero(); } + + explicit inline vec(T val) { set(val); } + inline vec(T v0, T v1) { set(v0, v1); } + inline vec(T v0, T v1, T v2) { set(v0, v1, v2); } + inline vec(T v0, T v1, T v2, T v3) { set(v0, v1, v2, v3); } + inline vec(const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] = other.m_v[i]; } + template inline vec(const vec &other) { set(other); } + + inline const T& operator[](uint32_t i) const { assert(i < N); return m_v[i]; } + inline T &operator[](uint32_t i) { assert(i < N); return m_v[i]; } + + inline T getX() const { return m_v[0]; } + inline T getY() const { static_assert(N >= 2, "N too small"); return m_v[1]; } + inline T getZ() const { static_assert(N >= 3, "N too small"); return m_v[2]; } + inline T getW() const { static_assert(N >= 4, "N too small"); return m_v[3]; } + + inline bool operator==(const vec &rhs) const { for (uint32_t i = 0; i < N; i++) if (m_v[i] != rhs.m_v[i]) return false; return true; } + inline bool operator!=(const vec& rhs) const { return !(*this == rhs); } + inline bool operator<(const vec &rhs) const { for (uint32_t i = 0; i < N; i++) { if (m_v[i] < rhs.m_v[i]) return true; else if (m_v[i] != rhs.m_v[i]) return false; } return false; } + + inline void set_zero() { for (uint32_t i = 0; i < N; i++) m_v[i] = 0; } + inline void clear() { set_zero(); } + + template + inline vec &set(const vec &other) + { + uint32_t i; + if ((const void *)(&other) == (const void *)(this)) + return *this; + const uint32_t m = minimum(OtherN, N); + for (i = 0; i < m; i++) + m_v[i] = static_cast(other[i]); + for (; i < N; i++) + m_v[i] = 0; + return *this; + } + + inline vec &set_component(uint32_t index, T val) { assert(index < N); m_v[index] = val; return *this; } + inline vec &set(T val) { for (uint32_t i = 0; i < N; i++) m_v[i] = val; return *this; } + inline void clear_elements(uint32_t s, uint32_t e) { assert(e <= N); for (uint32_t i = s; i < e; i++) m_v[i] = 0; } + + inline vec &set(T v0, T v1) + { + m_v[0] = v0; + if (N >= 2) + { + m_v[1] = v1; + clear_elements(2, N); + } + return *this; + } + + inline vec &set(T v0, T v1, T v2) + { + m_v[0] = v0; + if (N >= 2) + { + m_v[1] = v1; + if (N >= 3) + { + m_v[2] = v2; + clear_elements(3, N); + } + } + return *this; + } + + inline vec &set(T v0, T v1, T v2, T v3) + { + m_v[0] = v0; + if (N >= 2) + { + m_v[1] = v1; + if (N >= 3) + { + m_v[2] = v2; + + if (N >= 4) + { + m_v[3] = v3; + clear_elements(5, N); + } + } + } + return *this; + } + + inline vec &operator=(const vec &rhs) { if (this != &rhs) for (uint32_t i = 0; i < N; i++) m_v[i] = rhs.m_v[i]; return *this; } + template inline vec &operator=(const vec &rhs) { set(rhs); return *this; } + + inline const T *get_ptr() const { return reinterpret_cast(&m_v[0]); } + inline T *get_ptr() { return reinterpret_cast(&m_v[0]); } + + inline vec operator- () const { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = -m_v[i]; return res; } + inline vec operator+ () const { return *this; } + inline vec &operator+= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] += other.m_v[i]; return *this; } + inline vec &operator-= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] -= other.m_v[i]; return *this; } + inline vec &operator/= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] /= other.m_v[i]; return *this; } + inline vec &operator*=(const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] *= other.m_v[i]; return *this; } + inline vec &operator/= (T s) { for (uint32_t i = 0; i < N; i++) m_v[i] /= s; return *this; } + inline vec &operator*= (T s) { for (uint32_t i = 0; i < N; i++) m_v[i] *= s; return *this; } + + friend inline vec operator+(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] + rhs.m_v[i]; return res; } + friend inline vec operator-(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] - rhs.m_v[i]; return res; } + friend inline vec operator*(const vec &lhs, T val) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] * val; return res; } + friend inline vec operator*(T val, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = val * rhs.m_v[i]; return res; } + friend inline vec operator/(const vec &lhs, T val) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] / val; return res; } + friend inline vec operator/(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] / rhs.m_v[i]; return res; } + + static inline T dot_product(const vec &lhs, const vec &rhs) { T res = lhs.m_v[0] * rhs.m_v[0]; for (uint32_t i = 1; i < N; i++) res += lhs.m_v[i] * rhs.m_v[i]; return res; } + + inline T dot(const vec &rhs) const { return dot_product(*this, rhs); } + + inline T norm() const { return dot_product(*this, *this); } + inline T length() const { return sqrt(norm()); } + + inline T squared_distance(const vec &other) const { T d2 = 0; for (uint32_t i = 0; i < N; i++) { T d = m_v[i] - other.m_v[i]; d2 += d * d; } return d2; } + inline double squared_distance_d(const vec& other) const { double d2 = 0; for (uint32_t i = 0; i < N; i++) { double d = (double)m_v[i] - (double)other.m_v[i]; d2 += d * d; } return d2; } + + inline T distance(const vec &other) const { return static_cast(sqrt(squared_distance(other))); } + inline double distance_d(const vec& other) const { return sqrt(squared_distance_d(other)); } + + inline vec &normalize_in_place() { T len = length(); if (len != 0.0f) *this *= (1.0f / len); return *this; } + + inline vec get_normalized() const { vec res(*this); res.normalize_in_place(); return res; } + + inline vec &clamp(T l, T h) + { + for (uint32_t i = 0; i < N; i++) + m_v[i] = basisu::clamp(m_v[i], l, h); + return *this; + } + + static vec component_mul(const vec& a, const vec& b) + { + vec res; + for (uint32_t i = 0; i < N; i++) + res[i] = a[i] * b[i]; + return res; + } + + static vec component_min(const vec& a, const vec& b) + { + vec res; + for (uint32_t i = 0; i < N; i++) + res[i] = minimum(a[i], b[i]); + return res; + } + + static vec component_max(const vec& a, const vec& b) + { + vec res; + for (uint32_t i = 0; i < N; i++) + res[i] = maximum(a[i], b[i]); + return res; + } + + static vec lerp(const vec& a, const vec& b, float s) + { + vec res; + for (uint32_t i = 0; i < N; i++) + res[i] = basisu::lerp(a[i], b[i], s); + return res; + } + }; + + typedef vec<4, double> vec4D; + typedef vec<3, double> vec3D; + typedef vec<2, double> vec2D; + typedef vec<1, double> vec1D; + + typedef vec<6, float> vec6F; + typedef vec<5, float> vec5F; + typedef vec<4, float> vec4F; + typedef vec<3, float> vec3F; + typedef vec<2, float> vec2F; + typedef vec<1, float> vec1F; + + typedef vec<16, float> vec16F; + + template struct bitwise_copyable< vec > { enum { cFlag = true }; }; + template struct bitwise_movable< vec > { enum { cFlag = true }; }; + + template + class matrix + { + public: + typedef vec col_vec; + typedef vec row_vec; + + typedef T scalar_type; + + enum { rows = Rows, cols = Cols }; + + protected: + row_vec m_r[Rows]; + + public: + inline matrix() {} + inline matrix(eZero) { set_zero(); } + inline matrix(const matrix &other) { for (uint32_t i = 0; i < Rows; i++) m_r[i] = other.m_r[i]; } + inline matrix &operator=(const matrix &rhs) { if (this != &rhs) for (uint32_t i = 0; i < Rows; i++) m_r[i] = rhs.m_r[i]; return *this; } + + inline T operator()(uint32_t r, uint32_t c) const { assert((r < Rows) && (c < Cols)); return m_r[r][c]; } + inline T &operator()(uint32_t r, uint32_t c) { assert((r < Rows) && (c < Cols)); return m_r[r][c]; } + + inline const row_vec &operator[](uint32_t r) const { assert(r < Rows); return m_r[r]; } + inline row_vec &operator[](uint32_t r) { assert(r < Rows); return m_r[r]; } + + inline matrix &set_zero() + { + for (uint32_t i = 0; i < Rows; i++) + m_r[i].set_zero(); + return *this; + } + + inline matrix &set_identity() + { + for (uint32_t i = 0; i < Rows; i++) + { + m_r[i].set_zero(); + if (i < Cols) + m_r[i][i] = 1.0f; + } + return *this; + } + }; + + template struct bitwise_copyable< matrix > { enum { cFlag = true }; }; + template struct bitwise_movable< matrix > { enum { cFlag = true }; }; + + template + inline VectorType compute_pca_from_covar(matrix &cmatrix) + { + VectorType axis; + if (N == 1) + axis.set(1.0f); + else + { + for (uint32_t i = 0; i < N; i++) + axis[i] = lerp(.75f, 1.25f, i * (1.0f / maximum(N - 1, 1))); + } + + VectorType prev_axis(axis); + + // Power iterations + for (uint32_t power_iter = 0; power_iter < 8; power_iter++) + { + VectorType trial_axis; + double max_sum = 0; + + for (uint32_t i = 0; i < N; i++) + { + double sum = 0; + for (uint32_t j = 0; j < N; j++) + sum += cmatrix[i][j] * axis[j]; + + trial_axis[i] = static_cast(sum); + + max_sum = maximum(fabs(sum), max_sum); + } + + if (max_sum != 0.0f) + trial_axis *= static_cast(1.0f / max_sum); + + VectorType delta_axis(prev_axis - trial_axis); + + prev_axis = axis; + axis = trial_axis; + + if (delta_axis.norm() < .0024f) + break; + } + + return axis.normalize_in_place(); + } + + template inline void indirect_sort(uint32_t num_indices, uint32_t* pIndices, const T* pKeys) + { + for (uint32_t i = 0; i < num_indices; i++) + pIndices[i] = i; + + std::sort( + pIndices, + pIndices + num_indices, + [pKeys](uint32_t a, uint32_t b) { return pKeys[a] < pKeys[b]; } + ); + } + + // 1-4 byte direct Radix sort. + template + T* radix_sort(uint32_t num_vals, T* pBuf0, T* pBuf1, uint32_t key_ofs, uint32_t key_size) + { + assert(key_ofs < sizeof(T)); + assert((key_size >= 1) && (key_size <= 4)); + + uint32_t hist[256 * 4]; + + memset(hist, 0, sizeof(hist[0]) * 256 * key_size); + +#define BASISU_GET_KEY(p) (*(uint32_t *)((uint8_t *)(p) + key_ofs)) + + if (key_size == 4) + { + T* p = pBuf0; + T* q = pBuf0 + num_vals; + for (; p != q; p++) + { + const uint32_t key = BASISU_GET_KEY(p); + + hist[key & 0xFF]++; + hist[256 + ((key >> 8) & 0xFF)]++; + hist[512 + ((key >> 16) & 0xFF)]++; + hist[768 + ((key >> 24) & 0xFF)]++; + } + } + else if (key_size == 3) + { + T* p = pBuf0; + T* q = pBuf0 + num_vals; + for (; p != q; p++) + { + const uint32_t key = BASISU_GET_KEY(p); + + hist[key & 0xFF]++; + hist[256 + ((key >> 8) & 0xFF)]++; + hist[512 + ((key >> 16) & 0xFF)]++; + } + } + else if (key_size == 2) + { + T* p = pBuf0; + T* q = pBuf0 + (num_vals >> 1) * 2; + + for (; p != q; p += 2) + { + const uint32_t key0 = BASISU_GET_KEY(p); + const uint32_t key1 = BASISU_GET_KEY(p + 1); + + hist[key0 & 0xFF]++; + hist[256 + ((key0 >> 8) & 0xFF)]++; + + hist[key1 & 0xFF]++; + hist[256 + ((key1 >> 8) & 0xFF)]++; + } + + if (num_vals & 1) + { + const uint32_t key = BASISU_GET_KEY(p); + + hist[key & 0xFF]++; + hist[256 + ((key >> 8) & 0xFF)]++; + } + } + else + { + assert(key_size == 1); + if (key_size != 1) + return NULL; + + T* p = pBuf0; + T* q = pBuf0 + (num_vals >> 1) * 2; + + for (; p != q; p += 2) + { + const uint32_t key0 = BASISU_GET_KEY(p); + const uint32_t key1 = BASISU_GET_KEY(p + 1); + + hist[key0 & 0xFF]++; + hist[key1 & 0xFF]++; + } + + if (num_vals & 1) + { + const uint32_t key = BASISU_GET_KEY(p); + hist[key & 0xFF]++; + } + } + + T* pCur = pBuf0; + T* pNew = pBuf1; + + for (uint32_t pass = 0; pass < key_size; pass++) + { + const uint32_t* pHist = &hist[pass << 8]; + + uint32_t offsets[256]; + + uint32_t cur_ofs = 0; + for (uint32_t i = 0; i < 256; i += 2) + { + offsets[i] = cur_ofs; + cur_ofs += pHist[i]; + + offsets[i + 1] = cur_ofs; + cur_ofs += pHist[i + 1]; + } + + const uint32_t pass_shift = pass << 3; + + T* p = pCur; + T* q = pCur + (num_vals >> 1) * 2; + + for (; p != q; p += 2) + { + uint32_t c0 = (BASISU_GET_KEY(p) >> pass_shift) & 0xFF; + uint32_t c1 = (BASISU_GET_KEY(p + 1) >> pass_shift) & 0xFF; + + if (c0 == c1) + { + uint32_t dst_offset0 = offsets[c0]; + + offsets[c0] = dst_offset0 + 2; + + pNew[dst_offset0] = p[0]; + pNew[dst_offset0 + 1] = p[1]; + } + else + { + uint32_t dst_offset0 = offsets[c0]++; + uint32_t dst_offset1 = offsets[c1]++; + + pNew[dst_offset0] = p[0]; + pNew[dst_offset1] = p[1]; + } + } + + if (num_vals & 1) + { + uint32_t c = (BASISU_GET_KEY(p) >> pass_shift) & 0xFF; + + uint32_t dst_offset = offsets[c]; + offsets[c] = dst_offset + 1; + + pNew[dst_offset] = *p; + } + + T* t = pCur; + pCur = pNew; + pNew = t; + } + + return pCur; + } + +#undef BASISU_GET_KEY + + // Very simple job pool with no dependencies. + class job_pool + { + BASISU_NO_EQUALS_OR_COPY_CONSTRUCT(job_pool); + + public: + // num_threads is the TOTAL number of job pool threads, including the calling thread! So 2=1 new thread, 3=2 new threads, etc. + job_pool(uint32_t num_threads); + ~job_pool(); + + void add_job(const std::function& job); + void add_job(std::function&& job); + + void wait_for_all(); + + size_t get_total_threads() const { return 1 + m_threads.size(); } + + private: + std::vector m_threads; + std::vector > m_queue; + + std::mutex m_mutex; + std::condition_variable m_has_work; + std::condition_variable m_no_more_jobs; + + uint32_t m_num_active_jobs; + + std::atomic m_kill_flag; + + std::atomic m_num_active_workers; + + void job_thread(uint32_t index); + }; + + // Simple 64-bit color class + + class color_rgba_i16 + { + public: + union + { + int16_t m_comps[4]; + + struct + { + int16_t r; + int16_t g; + int16_t b; + int16_t a; + }; + }; + + inline color_rgba_i16() + { + static_assert(sizeof(*this) == sizeof(int16_t)*4, "sizeof(*this) == sizeof(int16_t)*4"); + } + + inline color_rgba_i16(int sr, int sg, int sb, int sa) + { + set(sr, sg, sb, sa); + } + + inline color_rgba_i16 &set(int sr, int sg, int sb, int sa) + { + m_comps[0] = (int16_t)clamp(sr, INT16_MIN, INT16_MAX); + m_comps[1] = (int16_t)clamp(sg, INT16_MIN, INT16_MAX); + m_comps[2] = (int16_t)clamp(sb, INT16_MIN, INT16_MAX); + m_comps[3] = (int16_t)clamp(sa, INT16_MIN, INT16_MAX); + return *this; + } + }; + + class color_rgba + { + public: + union + { + uint8_t m_comps[4]; + + struct + { + uint8_t r; + uint8_t g; + uint8_t b; + uint8_t a; + }; + }; + + inline color_rgba() + { + static_assert(sizeof(*this) == 4, "sizeof(*this) != 4"); + static_assert(sizeof(*this) == sizeof(basist::color32), "sizeof(*this) != sizeof(basist::color32)"); + } + + // Not too hot about this idea. + inline color_rgba(const basist::color32& other) : + r(other.r), + g(other.g), + b(other.b), + a(other.a) + { + } + + color_rgba& operator= (const basist::color32& rhs) + { + r = rhs.r; + g = rhs.g; + b = rhs.b; + a = rhs.a; + return *this; + } + + inline color_rgba(int y) + { + set(y); + } + + inline color_rgba(int y, int na) + { + set(y, na); + } + + inline color_rgba(int sr, int sg, int sb, int sa) + { + set(sr, sg, sb, sa); + } + + inline color_rgba(eNoClamp, int sr, int sg, int sb, int sa) + { + set_noclamp_rgba((uint8_t)sr, (uint8_t)sg, (uint8_t)sb, (uint8_t)sa); + } + + inline color_rgba& set_noclamp_y(int y) + { + m_comps[0] = (uint8_t)y; + m_comps[1] = (uint8_t)y; + m_comps[2] = (uint8_t)y; + m_comps[3] = (uint8_t)255; + return *this; + } + + inline color_rgba &set_noclamp_rgba(int sr, int sg, int sb, int sa) + { + m_comps[0] = (uint8_t)sr; + m_comps[1] = (uint8_t)sg; + m_comps[2] = (uint8_t)sb; + m_comps[3] = (uint8_t)sa; + return *this; + } + + inline color_rgba &set(int y) + { + m_comps[0] = static_cast(clamp(y, 0, 255)); + m_comps[1] = m_comps[0]; + m_comps[2] = m_comps[0]; + m_comps[3] = 255; + return *this; + } + + inline color_rgba &set(int y, int na) + { + m_comps[0] = static_cast(clamp(y, 0, 255)); + m_comps[1] = m_comps[0]; + m_comps[2] = m_comps[0]; + m_comps[3] = static_cast(clamp(na, 0, 255)); + return *this; + } + + inline color_rgba &set(int sr, int sg, int sb, int sa) + { + m_comps[0] = static_cast(clamp(sr, 0, 255)); + m_comps[1] = static_cast(clamp(sg, 0, 255)); + m_comps[2] = static_cast(clamp(sb, 0, 255)); + m_comps[3] = static_cast(clamp(sa, 0, 255)); + return *this; + } + + inline color_rgba &set_rgb(int sr, int sg, int sb) + { + m_comps[0] = static_cast(clamp(sr, 0, 255)); + m_comps[1] = static_cast(clamp(sg, 0, 255)); + m_comps[2] = static_cast(clamp(sb, 0, 255)); + return *this; + } + + inline color_rgba &set_rgb(const color_rgba &other) + { + r = other.r; + g = other.g; + b = other.b; + return *this; + } + + inline const uint8_t &operator[] (uint32_t index) const { assert(index < 4); return m_comps[index]; } + inline uint8_t &operator[] (uint32_t index) { assert(index < 4); return m_comps[index]; } + + inline void clear() + { + m_comps[0] = 0; + m_comps[1] = 0; + m_comps[2] = 0; + m_comps[3] = 0; + } + + inline bool operator== (const color_rgba &rhs) const + { + if (m_comps[0] != rhs.m_comps[0]) return false; + if (m_comps[1] != rhs.m_comps[1]) return false; + if (m_comps[2] != rhs.m_comps[2]) return false; + if (m_comps[3] != rhs.m_comps[3]) return false; + return true; + } + + inline bool operator!= (const color_rgba &rhs) const + { + return !(*this == rhs); + } + + inline bool operator<(const color_rgba &rhs) const + { + for (int i = 0; i < 4; i++) + { + if (m_comps[i] < rhs.m_comps[i]) + return true; + else if (m_comps[i] != rhs.m_comps[i]) + return false; + } + return false; + } + + inline int get_601_luma() const { return (19595U * m_comps[0] + 38470U * m_comps[1] + 7471U * m_comps[2] + 32768U) >> 16U; } + inline int get_709_luma() const { return (13938U * m_comps[0] + 46869U * m_comps[1] + 4729U * m_comps[2] + 32768U) >> 16U; } + inline int get_luma(bool luma_601) const { return luma_601 ? get_601_luma() : get_709_luma(); } + + inline uint32_t get_bgra_uint32() const { return b | (g << 8) | (r << 16) | (a << 24); } + inline uint32_t get_rgba_uint32() const { return r | (g << 8) | (b << 16) | (a << 24); } + + inline basist::color32 get_color32() const + { + return basist::color32(r, g, b, a); + } + + static color_rgba comp_min(const color_rgba& a, const color_rgba& b) { return color_rgba(basisu::minimum(a[0], b[0]), basisu::minimum(a[1], b[1]), basisu::minimum(a[2], b[2]), basisu::minimum(a[3], b[3])); } + static color_rgba comp_max(const color_rgba& a, const color_rgba& b) { return color_rgba(basisu::maximum(a[0], b[0]), basisu::maximum(a[1], b[1]), basisu::maximum(a[2], b[2]), basisu::maximum(a[3], b[3])); } + }; + + typedef basisu::vector color_rgba_vec; + + const color_rgba g_black_color(0, 0, 0, 255); + const color_rgba g_black_trans_color(0, 0, 0, 0); + const color_rgba g_white_color(255, 255, 255, 255); + + inline int color_distance(int r0, int g0, int b0, int r1, int g1, int b1) + { + int dr = r0 - r1, dg = g0 - g1, db = b0 - b1; + return dr * dr + dg * dg + db * db; + } + + inline int color_distance(int r0, int g0, int b0, int a0, int r1, int g1, int b1, int a1) + { + int dr = r0 - r1, dg = g0 - g1, db = b0 - b1, da = a0 - a1; + return dr * dr + dg * dg + db * db + da * da; + } + + inline int color_distance(const color_rgba &c0, const color_rgba &c1, bool alpha) + { + if (alpha) + return color_distance(c0.r, c0.g, c0.b, c0.a, c1.r, c1.g, c1.b, c1.a); + else + return color_distance(c0.r, c0.g, c0.b, c1.r, c1.g, c1.b); + } + + // TODO: Allow user to control channel weightings. + inline uint32_t color_distance(bool perceptual, const color_rgba &e1, const color_rgba &e2, bool alpha) + { + if (perceptual) + { +#if BASISU_USE_HIGH_PRECISION_COLOR_DISTANCE + const float l1 = e1.r * .2126f + e1.g * .715f + e1.b * .0722f; + const float l2 = e2.r * .2126f + e2.g * .715f + e2.b * .0722f; + + const float cr1 = e1.r - l1; + const float cr2 = e2.r - l2; + + const float cb1 = e1.b - l1; + const float cb2 = e2.b - l2; + + const float dl = l1 - l2; + const float dcr = cr1 - cr2; + const float dcb = cb1 - cb2; + + uint32_t d = static_cast(32.0f*4.0f*dl*dl + 32.0f*2.0f*(.5f / (1.0f - .2126f))*(.5f / (1.0f - .2126f))*dcr*dcr + 32.0f*.25f*(.5f / (1.0f - .0722f))*(.5f / (1.0f - .0722f))*dcb*dcb); + + if (alpha) + { + int da = static_cast(e1.a) - static_cast(e2.a); + d += static_cast(128.0f*da*da); + } + + return d; +#elif 1 + int dr = e1.r - e2.r; + int dg = e1.g - e2.g; + int db = e1.b - e2.b; + +#if 0 + int delta_l = dr * 27 + dg * 92 + db * 9; + int delta_cr = dr * 128 - delta_l; + int delta_cb = db * 128 - delta_l; + + uint32_t id = ((uint32_t)(delta_l * delta_l) >> 7U) + + ((((uint32_t)(delta_cr * delta_cr) >> 7U) * 26U) >> 7U) + + ((((uint32_t)(delta_cb * delta_cb) >> 7U) * 3U) >> 7U); +#else + int64_t delta_l = dr * 27 + dg * 92 + db * 9; + int64_t delta_cr = dr * 128 - delta_l; + int64_t delta_cb = db * 128 - delta_l; + + uint32_t id = ((uint32_t)((delta_l * delta_l) >> 7U)) + + ((((uint32_t)((delta_cr * delta_cr) >> 7U)) * 26U) >> 7U) + + ((((uint32_t)((delta_cb * delta_cb) >> 7U)) * 3U) >> 7U); +#endif + + if (alpha) + { + int da = (e1.a - e2.a) << 7; + // This shouldn't overflow if da is 255 or -255: 29.99 bits after squaring. + id += ((uint32_t)(da * da) >> 7U); + } + + return id; +#else + int dr = e1.r - e2.r; + int dg = e1.g - e2.g; + int db = e1.b - e2.b; + + int64_t delta_l = dr * 27 + dg * 92 + db * 9; + int64_t delta_cr = dr * 128 - delta_l; + int64_t delta_cb = db * 128 - delta_l; + + int64_t id = ((delta_l * delta_l) * 128) + + ((delta_cr * delta_cr) * 26) + + ((delta_cb * delta_cb) * 3); + + if (alpha) + { + int64_t da = (e1.a - e2.a); + id += (da * da) * 128; + } + + int d = (id + 8192) >> 14; + + return d; +#endif + } + else + return color_distance(e1, e2, alpha); + } + + static inline uint32_t color_distance_la(const color_rgba& a, const color_rgba& b) + { + const int dl = a.r - b.r; + const int da = a.a - b.a; + return dl * dl + da * da; + } + + // String helpers + + inline int string_find_right(const std::string& filename, char c) + { + size_t result = filename.find_last_of(c); + return (result == std::string::npos) ? -1 : (int)result; + } + + inline std::string string_get_extension(const std::string &filename) + { + int sep = -1; +#ifdef _WIN32 + sep = string_find_right(filename, '\\'); +#endif + if (sep < 0) + sep = string_find_right(filename, '/'); + + int dot = string_find_right(filename, '.'); + if (dot <= sep) + return ""; + + std::string result(filename); + result.erase(0, dot + 1); + + return result; + } + + inline bool string_remove_extension(std::string &filename) + { + int sep = -1; +#ifdef _WIN32 + sep = string_find_right(filename, '\\'); +#endif + if (sep < 0) + sep = string_find_right(filename, '/'); + + int dot = string_find_right(filename, '.'); + if ((dot < sep) || (dot < 0)) + return false; + + filename.resize(dot); + + return true; + } + + inline std::string string_tolower(const std::string& s) + { + std::string result(s); + for (size_t i = 0; i < result.size(); i++) + { + result[i] = (char)tolower((uint8_t)(result[i])); + } + return result; + } + + inline char *strcpy_safe(char *pDst, size_t dst_len, const char *pSrc) + { + assert(pDst && pSrc && dst_len); + if (!dst_len) + return pDst; + + const size_t src_len = strlen(pSrc); + const size_t src_len_plus_terminator = src_len + 1; + + if (src_len_plus_terminator <= dst_len) + memcpy(pDst, pSrc, src_len_plus_terminator); + else + { + if (dst_len > 1) + memcpy(pDst, pSrc, dst_len - 1); + pDst[dst_len - 1] = '\0'; + } + + return pDst; + } + + inline bool string_ends_with(const std::string& s, char c) + { + return (s.size() != 0) && (s.back() == c); + } + + inline bool string_split_path(const char *p, std::string *pDrive, std::string *pDir, std::string *pFilename, std::string *pExt) + { +#ifdef _MSC_VER + char drive_buf[_MAX_DRIVE] = { 0 }; + char dir_buf[_MAX_DIR] = { 0 }; + char fname_buf[_MAX_FNAME] = { 0 }; + char ext_buf[_MAX_EXT] = { 0 }; + + errno_t error = _splitpath_s(p, + pDrive ? drive_buf : NULL, pDrive ? _MAX_DRIVE : 0, + pDir ? dir_buf : NULL, pDir ? _MAX_DIR : 0, + pFilename ? fname_buf : NULL, pFilename ? _MAX_FNAME : 0, + pExt ? ext_buf : NULL, pExt ? _MAX_EXT : 0); + if (error != 0) + return false; + + if (pDrive) *pDrive = drive_buf; + if (pDir) *pDir = dir_buf; + if (pFilename) *pFilename = fname_buf; + if (pExt) *pExt = ext_buf; + return true; +#else + char dirtmp[1024], nametmp[1024]; + strcpy_safe(dirtmp, sizeof(dirtmp), p); + strcpy_safe(nametmp, sizeof(nametmp), p); + + if (pDrive) + pDrive->resize(0); + + const char *pDirName = dirname(dirtmp); + const char* pBaseName = basename(nametmp); + if ((!pDirName) || (!pBaseName)) + return false; + + if (pDir) + { + *pDir = pDirName; + if ((pDir->size()) && (pDir->back() != '/')) + *pDir += "/"; + } + + if (pFilename) + { + *pFilename = pBaseName; + string_remove_extension(*pFilename); + } + + if (pExt) + { + *pExt = pBaseName; + *pExt = string_get_extension(*pExt); + if (pExt->size()) + *pExt = "." + *pExt; + } + + return true; +#endif + } + + inline bool is_path_separator(char c) + { +#ifdef _WIN32 + return (c == '/') || (c == '\\'); +#else + return (c == '/'); +#endif + } + + inline bool is_drive_separator(char c) + { +#ifdef _WIN32 + return (c == ':'); +#else + (void)c; + return false; +#endif + } + + inline void string_combine_path(std::string &dst, const char *p, const char *q) + { + std::string temp(p); + if (temp.size() && !is_path_separator(q[0])) + { + if (!is_path_separator(temp.back())) + temp.append(1, BASISU_PATH_SEPERATOR_CHAR); + } + temp += q; + dst.swap(temp); + } + + inline void string_combine_path(std::string &dst, const char *p, const char *q, const char *r) + { + string_combine_path(dst, p, q); + string_combine_path(dst, dst.c_str(), r); + } + + inline void string_combine_path_and_extension(std::string &dst, const char *p, const char *q, const char *r, const char *pExt) + { + string_combine_path(dst, p, q, r); + if ((!string_ends_with(dst, '.')) && (pExt[0]) && (pExt[0] != '.')) + dst.append(1, '.'); + dst.append(pExt); + } + + inline bool string_get_pathname(const char *p, std::string &path) + { + std::string temp_drive, temp_path; + if (!string_split_path(p, &temp_drive, &temp_path, NULL, NULL)) + return false; + string_combine_path(path, temp_drive.c_str(), temp_path.c_str()); + return true; + } + + inline bool string_get_filename(const char *p, std::string &filename) + { + std::string temp_ext; + if (!string_split_path(p, nullptr, nullptr, &filename, &temp_ext)) + return false; + filename += temp_ext; + return true; + } + + class rand + { + std::mt19937 m_mt; + + public: + rand() { } + + rand(uint32_t s) { seed(s); } + void seed(uint32_t s) { m_mt.seed(s); } + + // between [l,h] + int irand(int l, int h) { std::uniform_int_distribution d(l, h); return d(m_mt); } + + uint32_t urand32() { return static_cast(irand(INT32_MIN, INT32_MAX)); } + + bool bit() { return irand(0, 1) == 1; } + + uint8_t byte() { return static_cast(urand32()); } + + // between [l,h) + float frand(float l, float h) { std::uniform_real_distribution d(l, h); return d(m_mt); } + + float gaussian(float mean, float stddev) { std::normal_distribution d(mean, stddev); return d(m_mt); } + }; + + class priority_queue + { + public: + priority_queue() : + m_size(0) + { + } + + void clear() + { + m_heap.clear(); + m_size = 0; + } + + void init(uint32_t max_entries, uint32_t first_index, float first_priority) + { + m_heap.resize(max_entries + 1); + m_heap[1].m_index = first_index; + m_heap[1].m_priority = first_priority; + m_size = 1; + } + + inline uint32_t size() const { return m_size; } + + inline uint32_t get_top_index() const { return m_heap[1].m_index; } + inline float get_top_priority() const { return m_heap[1].m_priority; } + + inline void delete_top() + { + assert(m_size > 0); + m_heap[1] = m_heap[m_size]; + m_size--; + if (m_size) + down_heap(1); + } + + inline void add_heap(uint32_t index, float priority) + { + m_size++; + + uint32_t k = m_size; + + if (m_size >= m_heap.size()) + m_heap.resize(m_size + 1); + + for (;;) + { + uint32_t parent_index = k >> 1; + if ((!parent_index) || (m_heap[parent_index].m_priority > priority)) + break; + m_heap[k] = m_heap[parent_index]; + k = parent_index; + } + + m_heap[k].m_index = index; + m_heap[k].m_priority = priority; + } + + private: + struct entry + { + uint32_t m_index; + float m_priority; + }; + + basisu::vector m_heap; + uint32_t m_size; + + // Push down entry at index + inline void down_heap(uint32_t heap_index) + { + uint32_t orig_index = m_heap[heap_index].m_index; + const float orig_priority = m_heap[heap_index].m_priority; + + uint32_t child_index; + while ((child_index = (heap_index << 1)) <= m_size) + { + if ((child_index < m_size) && (m_heap[child_index].m_priority < m_heap[child_index + 1].m_priority)) ++child_index; + if (orig_priority > m_heap[child_index].m_priority) + break; + m_heap[heap_index] = m_heap[child_index]; + heap_index = child_index; + } + + m_heap[heap_index].m_index = orig_index; + m_heap[heap_index].m_priority = orig_priority; + } + }; + + // Tree structured vector quantization (TSVQ) + + template + class tree_vector_quant + { + public: + typedef TrainingVectorType training_vec_type; + typedef std::pair training_vec_with_weight; + typedef basisu::vector< training_vec_with_weight > array_of_weighted_training_vecs; + + tree_vector_quant() : + m_next_codebook_index(0) + { + } + + void clear() + { + clear_vector(m_training_vecs); + clear_vector(m_nodes); + m_next_codebook_index = 0; + } + + void add_training_vec(const TrainingVectorType &v, uint64_t weight) { m_training_vecs.push_back(std::make_pair(v, weight)); } + + size_t get_total_training_vecs() const { return m_training_vecs.size(); } + const array_of_weighted_training_vecs &get_training_vecs() const { return m_training_vecs; } + array_of_weighted_training_vecs &get_training_vecs() { return m_training_vecs; } + + void retrieve(basisu::vector< basisu::vector > &codebook) const + { + for (uint32_t i = 0; i < m_nodes.size(); i++) + { + const tsvq_node &n = m_nodes[i]; + if (!n.is_leaf()) + continue; + + codebook.resize(codebook.size() + 1); + codebook.back() = n.m_training_vecs; + } + } + + void retrieve(basisu::vector &codebook) const + { + for (uint32_t i = 0; i < m_nodes.size(); i++) + { + const tsvq_node &n = m_nodes[i]; + if (!n.is_leaf()) + continue; + + codebook.resize(codebook.size() + 1); + codebook.back() = n.m_origin; + } + } + + void retrieve(uint32_t max_clusters, basisu::vector &codebook) const + { + uint_vec node_stack; + node_stack.reserve(512); + + codebook.resize(0); + codebook.reserve(max_clusters); + + uint32_t node_index = 0; + + while (true) + { + const tsvq_node& cur = m_nodes[node_index]; + + if (cur.is_leaf() || ((2 + cur.m_codebook_index) > (int)max_clusters)) + { + codebook.resize(codebook.size() + 1); + codebook.back() = cur.m_training_vecs; + + if (node_stack.empty()) + break; + + node_index = node_stack.back(); + node_stack.pop_back(); + continue; + } + + node_stack.push_back(cur.m_right_index); + node_index = cur.m_left_index; + } + } + + bool generate(uint32_t max_size) + { + if (!m_training_vecs.size()) + return false; + + m_next_codebook_index = 0; + + clear_vector(m_nodes); + m_nodes.reserve(max_size * 2 + 1); + + m_nodes.push_back(prepare_root()); + + priority_queue var_heap; + var_heap.init(max_size, 0, m_nodes[0].m_var); + + basisu::vector l_children, r_children; + + // Now split the worst nodes + l_children.reserve(m_training_vecs.size() + 1); + r_children.reserve(m_training_vecs.size() + 1); + + uint32_t total_leaf_nodes = 1; + + //interval_timer tm; + //tm.start(); + + while ((var_heap.size()) && (total_leaf_nodes < max_size)) + { + const uint32_t node_index = var_heap.get_top_index(); + const tsvq_node &node = m_nodes[node_index]; + + assert(node.m_var == var_heap.get_top_priority()); + assert(node.is_leaf()); + + var_heap.delete_top(); + + if (node.m_training_vecs.size() > 1) + { + if (split_node(node_index, var_heap, l_children, r_children)) + { + // This removes one leaf node (making an internal node) and replaces it with two new leaves, so +1 total. + total_leaf_nodes += 1; + } + } + } + + //debug_printf("tree_vector_quant::generate %u: %3.3f secs\n", TrainingVectorType::num_elements, tm.get_elapsed_secs()); + + return true; + } + + private: + class tsvq_node + { + public: + inline tsvq_node() : m_weight(0), m_origin(cZero), m_left_index(-1), m_right_index(-1), m_codebook_index(-1) { } + + // vecs is erased + inline void set(const TrainingVectorType &org, uint64_t weight, float var, basisu::vector &vecs) { m_origin = org; m_weight = weight; m_var = var; m_training_vecs.swap(vecs); } + + inline bool is_leaf() const { return m_left_index < 0; } + + float m_var; + uint64_t m_weight; + TrainingVectorType m_origin; + int32_t m_left_index, m_right_index; + basisu::vector m_training_vecs; + int m_codebook_index; + }; + + typedef basisu::vector tsvq_node_vec; + tsvq_node_vec m_nodes; + + array_of_weighted_training_vecs m_training_vecs; + + uint32_t m_next_codebook_index; + + tsvq_node prepare_root() const + { + double ttsum = 0.0f; + + // Prepare root node containing all training vectors + tsvq_node root; + root.m_training_vecs.reserve(m_training_vecs.size()); + + for (uint32_t i = 0; i < m_training_vecs.size(); i++) + { + const TrainingVectorType &v = m_training_vecs[i].first; + const uint64_t weight = m_training_vecs[i].second; + + root.m_training_vecs.push_back(i); + + root.m_origin += (v * static_cast(weight)); + root.m_weight += weight; + + ttsum += v.dot(v) * weight; + } + + root.m_var = static_cast(ttsum - (root.m_origin.dot(root.m_origin) / root.m_weight)); + + root.m_origin *= (1.0f / root.m_weight); + + return root; + } + + bool split_node(uint32_t node_index, priority_queue &var_heap, basisu::vector &l_children, basisu::vector &r_children) + { + TrainingVectorType l_child_org, r_child_org; + uint64_t l_weight = 0, r_weight = 0; + float l_var = 0.0f, r_var = 0.0f; + + // Compute initial left/right child origins + if (!prep_split(m_nodes[node_index], l_child_org, r_child_org)) + return false; + + // Use k-means iterations to refine these children vectors + if (!refine_split(m_nodes[node_index], l_child_org, l_weight, l_var, l_children, r_child_org, r_weight, r_var, r_children)) + return false; + + // Create children + const uint32_t l_child_index = (uint32_t)m_nodes.size(), r_child_index = (uint32_t)m_nodes.size() + 1; + + m_nodes[node_index].m_left_index = l_child_index; + m_nodes[node_index].m_right_index = r_child_index; + + m_nodes[node_index].m_codebook_index = m_next_codebook_index; + m_next_codebook_index++; + + m_nodes.resize(m_nodes.size() + 2); + + tsvq_node &l_child = m_nodes[l_child_index], &r_child = m_nodes[r_child_index]; + + l_child.set(l_child_org, l_weight, l_var, l_children); + r_child.set(r_child_org, r_weight, r_var, r_children); + + if ((l_child.m_var <= 0.0f) && (l_child.m_training_vecs.size() > 1)) + { + TrainingVectorType v(m_training_vecs[l_child.m_training_vecs[0]].first); + + for (uint32_t i = 1; i < l_child.m_training_vecs.size(); i++) + { + if (!(v == m_training_vecs[l_child.m_training_vecs[i]].first)) + { + l_child.m_var = 1e-4f; + break; + } + } + } + + if ((r_child.m_var <= 0.0f) && (r_child.m_training_vecs.size() > 1)) + { + TrainingVectorType v(m_training_vecs[r_child.m_training_vecs[0]].first); + + for (uint32_t i = 1; i < r_child.m_training_vecs.size(); i++) + { + if (!(v == m_training_vecs[r_child.m_training_vecs[i]].first)) + { + r_child.m_var = 1e-4f; + break; + } + } + } + + if ((l_child.m_var > 0.0f) && (l_child.m_training_vecs.size() > 1)) + var_heap.add_heap(l_child_index, l_child.m_var); + + if ((r_child.m_var > 0.0f) && (r_child.m_training_vecs.size() > 1)) + var_heap.add_heap(r_child_index, r_child.m_var); + + return true; + } + + TrainingVectorType compute_split_axis(const tsvq_node &node) const + { + const uint32_t N = TrainingVectorType::num_elements; + + matrix cmatrix; + + if ((N != 16) || (!g_cpu_supports_sse41)) + { + cmatrix.set_zero(); + + // Compute covariance matrix from weighted input vectors + for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) + { + const TrainingVectorType v(m_training_vecs[node.m_training_vecs[i]].first - node.m_origin); + const TrainingVectorType w(static_cast(m_training_vecs[node.m_training_vecs[i]].second) * v); + + for (uint32_t x = 0; x < N; x++) + for (uint32_t y = x; y < N; y++) + cmatrix[x][y] = cmatrix[x][y] + v[x] * w[y]; + } + } + else + { +#if BASISU_SUPPORT_SSE + // Specialize the case with 16x16 matrices, which are quite expensive without SIMD. + // This SSE function takes pointers to void types, so do some sanity checks. + assert(sizeof(TrainingVectorType) == sizeof(float) * 16); + assert(sizeof(training_vec_with_weight) == sizeof(std::pair)); + update_covar_matrix_16x16_sse41(node.m_training_vecs.size_u32(), m_training_vecs.data(), &node.m_origin, node.m_training_vecs.data(), &cmatrix); +#endif + } + + const float renorm_scale = 1.0f / node.m_weight; + + for (uint32_t x = 0; x < N; x++) + for (uint32_t y = x; y < N; y++) + cmatrix[x][y] *= renorm_scale; + + // Diagonal flip + for (uint32_t x = 0; x < (N - 1); x++) + for (uint32_t y = x + 1; y < N; y++) + cmatrix[y][x] = cmatrix[x][y]; + + return compute_pca_from_covar(cmatrix); + } + + bool prep_split(const tsvq_node &node, TrainingVectorType &l_child_result, TrainingVectorType &r_child_result) const + { + //const uint32_t N = TrainingVectorType::num_elements; + + if (2 == node.m_training_vecs.size()) + { + l_child_result = m_training_vecs[node.m_training_vecs[0]].first; + r_child_result = m_training_vecs[node.m_training_vecs[1]].first; + return true; + } + + TrainingVectorType axis(compute_split_axis(node)), l_child(0.0f), r_child(0.0f); + double l_weight = 0.0f, r_weight = 0.0f; + + // Compute initial left/right children + for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) + { + const float weight = (float)m_training_vecs[node.m_training_vecs[i]].second; + + const TrainingVectorType &v = m_training_vecs[node.m_training_vecs[i]].first; + + double t = (v - node.m_origin).dot(axis); + if (t >= 0.0f) + { + r_child += v * weight; + r_weight += weight; + } + else + { + l_child += v * weight; + l_weight += weight; + } + } + + if ((l_weight > 0.0f) && (r_weight > 0.0f)) + { + l_child_result = l_child * static_cast(1.0f / l_weight); + r_child_result = r_child * static_cast(1.0f / r_weight); + } + else + { + TrainingVectorType l(1e+20f); + TrainingVectorType h(-1e+20f); + for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) + { + const TrainingVectorType& v = m_training_vecs[node.m_training_vecs[i]].first; + + l = TrainingVectorType::component_min(l, v); + h = TrainingVectorType::component_max(h, v); + } + + TrainingVectorType r(h - l); + + float largest_axis_v = 0.0f; + int largest_axis_index = -1; + for (uint32_t i = 0; i < TrainingVectorType::num_elements; i++) + { + if (r[i] > largest_axis_v) + { + largest_axis_v = r[i]; + largest_axis_index = i; + } + } + + if (largest_axis_index < 0) + return false; + + basisu::vector keys(node.m_training_vecs.size()); + for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) + keys[i] = m_training_vecs[node.m_training_vecs[i]].first[largest_axis_index]; + + uint_vec indices(node.m_training_vecs.size()); + indirect_sort((uint32_t)node.m_training_vecs.size(), &indices[0], &keys[0]); + + l_child.set_zero(); + l_weight = 0; + + r_child.set_zero(); + r_weight = 0; + + const uint32_t half_index = (uint32_t)node.m_training_vecs.size() / 2; + for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) + { + const float weight = (float)m_training_vecs[node.m_training_vecs[i]].second; + + const TrainingVectorType& v = m_training_vecs[node.m_training_vecs[i]].first; + + if (i < half_index) + { + l_child += v * weight; + l_weight += weight; + } + else + { + r_child += v * weight; + r_weight += weight; + } + } + + if ((l_weight > 0.0f) && (r_weight > 0.0f)) + { + l_child_result = l_child * static_cast(1.0f / l_weight); + r_child_result = r_child * static_cast(1.0f / r_weight); + } + else + { + l_child_result = l; + r_child_result = h; + } + } + + return true; + } + + bool refine_split(const tsvq_node &node, + TrainingVectorType &l_child, uint64_t &l_weight, float &l_var, basisu::vector &l_children, + TrainingVectorType &r_child, uint64_t &r_weight, float &r_var, basisu::vector &r_children) const + { + l_children.reserve(node.m_training_vecs.size()); + r_children.reserve(node.m_training_vecs.size()); + + float prev_total_variance = 1e+10f; + + // Refine left/right children locations using k-means iterations + const uint32_t cMaxIters = 6; + for (uint32_t iter = 0; iter < cMaxIters; iter++) + { + l_children.resize(0); + r_children.resize(0); + + TrainingVectorType new_l_child(cZero), new_r_child(cZero); + + double l_ttsum = 0.0f, r_ttsum = 0.0f; + + l_weight = 0; + r_weight = 0; + + for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) + { + const TrainingVectorType &v = m_training_vecs[node.m_training_vecs[i]].first; + const uint64_t weight = m_training_vecs[node.m_training_vecs[i]].second; + + double left_dist2 = l_child.squared_distance_d(v), right_dist2 = r_child.squared_distance_d(v); + + if (left_dist2 >= right_dist2) + { + new_r_child += (v * static_cast(weight)); + r_weight += weight; + + r_ttsum += weight * v.dot(v); + r_children.push_back(node.m_training_vecs[i]); + } + else + { + new_l_child += (v * static_cast(weight)); + l_weight += weight; + + l_ttsum += weight * v.dot(v); + l_children.push_back(node.m_training_vecs[i]); + } + } + + // Node is unsplittable using the above algorithm - try something else to split it up. + if ((!l_weight) || (!r_weight)) + { + l_children.resize(0); + new_l_child.set(0.0f); + l_ttsum = 0.0f; + l_weight = 0; + + r_children.resize(0); + new_r_child.set(0.0f); + r_ttsum = 0.0f; + r_weight = 0; + + TrainingVectorType firstVec; + for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) + { + const TrainingVectorType& v = m_training_vecs[node.m_training_vecs[i]].first; + const uint64_t weight = m_training_vecs[node.m_training_vecs[i]].second; + + if ((!i) || (v == firstVec)) + { + firstVec = v; + + new_r_child += (v * static_cast(weight)); + r_weight += weight; + + r_ttsum += weight * v.dot(v); + r_children.push_back(node.m_training_vecs[i]); + } + else + { + new_l_child += (v * static_cast(weight)); + l_weight += weight; + + l_ttsum += weight * v.dot(v); + l_children.push_back(node.m_training_vecs[i]); + } + } + + if ((!l_weight) || (!r_weight)) + return false; + } + + l_var = static_cast(l_ttsum - (new_l_child.dot(new_l_child) / l_weight)); + r_var = static_cast(r_ttsum - (new_r_child.dot(new_r_child) / r_weight)); + + new_l_child *= (1.0f / l_weight); + new_r_child *= (1.0f / r_weight); + + l_child = new_l_child; + r_child = new_r_child; + + float total_var = l_var + r_var; + const float cGiveupVariance = .00001f; + if (total_var < cGiveupVariance) + break; + + // Check to see if the variance has settled + const float cVarianceDeltaThresh = .00125f; + if (((prev_total_variance - total_var) / total_var) < cVarianceDeltaThresh) + break; + + prev_total_variance = total_var; + } + + return true; + } + }; + + struct weighted_block_group + { + uint64_t m_total_weight; + uint_vec m_indices; + }; + + template + bool generate_hierarchical_codebook_threaded_internal(Quantizer& q, + uint32_t max_codebook_size, uint32_t max_parent_codebook_size, + basisu::vector& codebook, + basisu::vector& parent_codebook, + uint32_t max_threads, bool limit_clusterizers, job_pool *pJob_pool) + { + codebook.resize(0); + parent_codebook.resize(0); + + if ((max_threads <= 1) || (q.get_training_vecs().size() < 256) || (max_codebook_size < max_threads * 16)) + { + if (!q.generate(max_codebook_size)) + return false; + + q.retrieve(codebook); + + if (max_parent_codebook_size) + q.retrieve(max_parent_codebook_size, parent_codebook); + + return true; + } + + const uint32_t cMaxThreads = 16; + if (max_threads > cMaxThreads) + max_threads = cMaxThreads; + + if (!q.generate(max_threads)) + return false; + + basisu::vector initial_codebook; + + q.retrieve(initial_codebook); + + if (initial_codebook.size() < max_threads) + { + codebook = initial_codebook; + + if (max_parent_codebook_size) + q.retrieve(max_parent_codebook_size, parent_codebook); + + return true; + } + + Quantizer quantizers[cMaxThreads]; + + bool success_flags[cMaxThreads]; + clear_obj(success_flags); + + basisu::vector local_clusters[cMaxThreads]; + basisu::vector local_parent_clusters[cMaxThreads]; + + for (uint32_t thread_iter = 0; thread_iter < max_threads; thread_iter++) + { + pJob_pool->add_job( [thread_iter, &local_clusters, &local_parent_clusters, &success_flags, &quantizers, &initial_codebook, &q, &limit_clusterizers, &max_codebook_size, &max_threads, &max_parent_codebook_size] { + + Quantizer& lq = quantizers[thread_iter]; + uint_vec& cluster_indices = initial_codebook[thread_iter]; + + uint_vec local_to_global(cluster_indices.size()); + + for (uint32_t i = 0; i < cluster_indices.size(); i++) + { + const uint32_t global_training_vec_index = cluster_indices[i]; + local_to_global[i] = global_training_vec_index; + + lq.add_training_vec(q.get_training_vecs()[global_training_vec_index].first, q.get_training_vecs()[global_training_vec_index].second); + } + + const uint32_t max_clusters = limit_clusterizers ? ((max_codebook_size + max_threads - 1) / max_threads) : (uint32_t)lq.get_total_training_vecs(); + + success_flags[thread_iter] = lq.generate(max_clusters); + + if (success_flags[thread_iter]) + { + lq.retrieve(local_clusters[thread_iter]); + + for (uint32_t i = 0; i < local_clusters[thread_iter].size(); i++) + { + for (uint32_t j = 0; j < local_clusters[thread_iter][i].size(); j++) + local_clusters[thread_iter][i][j] = local_to_global[local_clusters[thread_iter][i][j]]; + } + + if (max_parent_codebook_size) + { + lq.retrieve((max_parent_codebook_size + max_threads - 1) / max_threads, local_parent_clusters[thread_iter]); + + for (uint32_t i = 0; i < local_parent_clusters[thread_iter].size(); i++) + { + for (uint32_t j = 0; j < local_parent_clusters[thread_iter][i].size(); j++) + local_parent_clusters[thread_iter][i][j] = local_to_global[local_parent_clusters[thread_iter][i][j]]; + } + } + } + + } ); + + } // thread_iter + + pJob_pool->wait_for_all(); + + uint32_t total_clusters = 0, total_parent_clusters = 0; + + for (int thread_iter = 0; thread_iter < (int)max_threads; thread_iter++) + { + if (!success_flags[thread_iter]) + return false; + total_clusters += (uint32_t)local_clusters[thread_iter].size(); + total_parent_clusters += (uint32_t)local_parent_clusters[thread_iter].size(); + } + + codebook.reserve(total_clusters); + parent_codebook.reserve(total_parent_clusters); + + for (uint32_t thread_iter = 0; thread_iter < max_threads; thread_iter++) + { + for (uint32_t j = 0; j < local_clusters[thread_iter].size(); j++) + { + codebook.resize(codebook.size() + 1); + codebook.back().swap(local_clusters[thread_iter][j]); + } + + for (uint32_t j = 0; j < local_parent_clusters[thread_iter].size(); j++) + { + parent_codebook.resize(parent_codebook.size() + 1); + parent_codebook.back().swap(local_parent_clusters[thread_iter][j]); + } + } + + return true; + } + + template + bool generate_hierarchical_codebook_threaded(Quantizer& q, + uint32_t max_codebook_size, uint32_t max_parent_codebook_size, + basisu::vector& codebook, + basisu::vector& parent_codebook, + uint32_t max_threads, job_pool *pJob_pool, + bool even_odd_input_pairs_equal) + { + typedef bit_hasher training_vec_bit_hasher; + + // rg 6/24/2025 - Cross platform determinism +#if 0 + typedef std::unordered_map < typename Quantizer::training_vec_type, weighted_block_group, + training_vec_bit_hasher> group_hash; +#else + typedef std::map< typename Quantizer::training_vec_type, weighted_block_group > group_hash; +#endif + + //interval_timer tm; + //tm.start(); + + group_hash unique_vecs; + + // rg 6/24/2025 - Cross platform determinism +#if 0 + unique_vecs.reserve(20000); +#endif + + weighted_block_group g; + + if (even_odd_input_pairs_equal) + { + g.m_indices.resize(2); + + assert(q.get_training_vecs().size() >= 2 && (q.get_training_vecs().size() & 1) == 0); + + for (uint32_t i = 0; i < q.get_training_vecs().size(); i += 2) + { + assert(q.get_training_vecs()[i].first == q.get_training_vecs()[i + 1].first); + + g.m_total_weight = q.get_training_vecs()[i].second + q.get_training_vecs()[i + 1].second; + g.m_indices[0] = i; + g.m_indices[1] = i + 1; + + auto ins_res = unique_vecs.insert(std::make_pair(q.get_training_vecs()[i].first, g)); + + if (!ins_res.second) + { + (ins_res.first)->second.m_total_weight += g.m_total_weight; + (ins_res.first)->second.m_indices.push_back(i); + (ins_res.first)->second.m_indices.push_back(i + 1); + } + } + } + else + { + g.m_indices.resize(1); + + for (uint32_t i = 0; i < q.get_training_vecs().size(); i++) + { + g.m_total_weight = q.get_training_vecs()[i].second; + g.m_indices[0] = i; + + auto ins_res = unique_vecs.insert(std::make_pair(q.get_training_vecs()[i].first, g)); + + if (!ins_res.second) + { + (ins_res.first)->second.m_total_weight += g.m_total_weight; + (ins_res.first)->second.m_indices.push_back(i); + } + } + } + + //debug_printf("generate_hierarchical_codebook_threaded: %u training vectors, %u unique training vectors, %3.3f secs\n", q.get_total_training_vecs(), (uint32_t)unique_vecs.size(), tm.get_elapsed_secs()); + debug_printf("generate_hierarchical_codebook_threaded: %u training vectors, %u unique training vectors\n", q.get_total_training_vecs(), (uint32_t)unique_vecs.size()); + + Quantizer group_quant; + typedef typename group_hash::const_iterator group_hash_const_iter; + basisu::vector unique_vec_iters; + unique_vec_iters.reserve(unique_vecs.size()); + + for (auto iter = unique_vecs.begin(); iter != unique_vecs.end(); ++iter) + { + group_quant.add_training_vec(iter->first, iter->second.m_total_weight); + unique_vec_iters.push_back(iter); + } + + bool limit_clusterizers = true; + if (unique_vecs.size() <= max_codebook_size) + limit_clusterizers = false; + + debug_printf("Limit clusterizers: %u\n", limit_clusterizers); + + basisu::vector group_codebook, group_parent_codebook; + bool status = generate_hierarchical_codebook_threaded_internal(group_quant, + max_codebook_size, max_parent_codebook_size, + group_codebook, + group_parent_codebook, + (unique_vecs.size() < 65536*4) ? 1 : max_threads, limit_clusterizers, pJob_pool); + + if (!status) + return false; + + codebook.resize(0); + for (uint32_t i = 0; i < group_codebook.size(); i++) + { + codebook.resize(codebook.size() + 1); + + for (uint32_t j = 0; j < group_codebook[i].size(); j++) + { + const uint32_t group_index = group_codebook[i][j]; + + typename group_hash::const_iterator group_iter = unique_vec_iters[group_index]; + const uint_vec& training_vec_indices = group_iter->second.m_indices; + + append_vector(codebook.back(), training_vec_indices); + } + } + + parent_codebook.resize(0); + for (uint32_t i = 0; i < group_parent_codebook.size(); i++) + { + parent_codebook.resize(parent_codebook.size() + 1); + + for (uint32_t j = 0; j < group_parent_codebook[i].size(); j++) + { + const uint32_t group_index = group_parent_codebook[i][j]; + + typename group_hash::const_iterator group_iter = unique_vec_iters[group_index]; + const uint_vec& training_vec_indices = group_iter->second.m_indices; + + append_vector(parent_codebook.back(), training_vec_indices); + } + } + + return true; + } + + // Canonical Huffman coding + + class histogram + { + basisu::vector m_hist; + + public: + histogram(uint32_t size = 0) { init(size); } + + void clear() + { + clear_vector(m_hist); + } + + void init(uint32_t size) + { + m_hist.resize(0); + m_hist.resize(size); + } + + inline uint32_t size() const { return static_cast(m_hist.size()); } + + inline const uint32_t &operator[] (uint32_t index) const + { + return m_hist[index]; + } + + inline uint32_t &operator[] (uint32_t index) + { + return m_hist[index]; + } + + inline void inc(uint32_t index) + { + m_hist[index]++; + } + + uint64_t get_total() const + { + uint64_t total = 0; + for (uint32_t i = 0; i < m_hist.size(); ++i) + total += m_hist[i]; + return total; + } + + double get_entropy() const + { + double total = static_cast(get_total()); + if (total == 0.0f) + return 0.0f; + + const double inv_total = 1.0f / total; + const double neg_inv_log2 = -1.0f / log(2.0f); + + double e = 0.0f; + for (uint32_t i = 0; i < m_hist.size(); i++) + if (m_hist[i]) + e += log(m_hist[i] * inv_total) * neg_inv_log2 * static_cast(m_hist[i]); + + return e; + } + }; + + struct sym_freq + { + uint32_t m_key; + uint16_t m_sym_index; + }; + + sym_freq *canonical_huffman_radix_sort_syms(uint32_t num_syms, sym_freq *pSyms0, sym_freq *pSyms1); + void canonical_huffman_calculate_minimum_redundancy(sym_freq *A, int num_syms); + void canonical_huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size); + + class huffman_encoding_table + { + public: + huffman_encoding_table() + { + } + + void clear() + { + clear_vector(m_codes); + clear_vector(m_code_sizes); + } + + bool init(const histogram &h, uint32_t max_code_size = cHuffmanMaxSupportedCodeSize) + { + return init(h.size(), &h[0], max_code_size); + } + + bool init(uint32_t num_syms, const uint16_t *pFreq, uint32_t max_code_size); + bool init(uint32_t num_syms, const uint32_t *pSym_freq, uint32_t max_code_size); + + inline const uint16_vec &get_codes() const { return m_codes; } + inline const uint8_vec &get_code_sizes() const { return m_code_sizes; } + + uint32_t get_total_used_codes() const + { + for (int i = static_cast(m_code_sizes.size()) - 1; i >= 0; i--) + if (m_code_sizes[i]) + return i + 1; + return 0; + } + + private: + uint16_vec m_codes; + uint8_vec m_code_sizes; + }; + + class bitwise_coder + { + public: + bitwise_coder() : + m_bit_buffer(0), + m_bit_buffer_size(0), + m_total_bits(0) + { + } + + bitwise_coder(const bitwise_coder& other) : + m_bytes(other.m_bytes), + m_bit_buffer(other.m_bit_buffer), + m_bit_buffer_size(other.m_bit_buffer_size), + m_total_bits(other.m_total_bits) + { + } + + bitwise_coder(bitwise_coder&& other) : + m_bytes(std::move(other.m_bytes)), + m_bit_buffer(other.m_bit_buffer), + m_bit_buffer_size(other.m_bit_buffer_size), + m_total_bits(other.m_total_bits) + { + } + + bitwise_coder& operator= (const bitwise_coder& rhs) + { + if (this == &rhs) + return *this; + + m_bytes = rhs.m_bytes; + m_bit_buffer = rhs.m_bit_buffer; + m_bit_buffer_size = rhs.m_bit_buffer_size; + m_total_bits = rhs.m_total_bits; + + return *this; + } + + bitwise_coder& operator= (bitwise_coder&& rhs) + { + if (this == &rhs) + return *this; + + m_bytes = std::move(rhs.m_bytes); + m_bit_buffer = rhs.m_bit_buffer; + m_bit_buffer_size = rhs.m_bit_buffer_size; + m_total_bits = rhs.m_total_bits; + + return *this; + } + + inline void clear() + { + clear_vector(m_bytes); + m_bit_buffer = 0; + m_bit_buffer_size = 0; + m_total_bits = 0; + } + + inline void restart() + { + m_bytes.resize(0); + m_bit_buffer = 0; + m_bit_buffer_size = 0; + m_total_bits = 0; + } + + inline const uint8_vec &get_bytes() const { return m_bytes; } + inline uint8_vec& get_bytes() { return m_bytes; } + + inline void reserve(uint32_t size) { m_bytes.reserve(size); } + + inline uint64_t get_total_bits() const { return m_total_bits; } + inline uint32_t get_total_bits_u32() const { assert(m_total_bits <= UINT32_MAX); return static_cast(m_total_bits); } + inline void clear_total_bits() { m_total_bits = 0; } + + inline void init(uint32_t reserve_size = 1024) + { + m_bytes.reserve(reserve_size); + m_bytes.resize(0); + + m_bit_buffer = 0; + m_bit_buffer_size = 0; + m_total_bits = 0; + } + + inline uint32_t flush() + { + if (m_bit_buffer_size) + { + m_total_bits += 8 - (m_bit_buffer_size & 7); + append_byte(static_cast(m_bit_buffer)); + + m_bit_buffer = 0; + m_bit_buffer_size = 0; + + return 8; + } + + return 0; + } + + inline uint32_t put_bits(uint32_t bits, uint32_t num_bits) + { + assert(num_bits <= 32); + assert(bits < (1ULL << num_bits)); + + if (!num_bits) + return 0; + + m_total_bits += num_bits; + + uint64_t v = (static_cast(bits) << m_bit_buffer_size) | m_bit_buffer; + m_bit_buffer_size += num_bits; + + while (m_bit_buffer_size >= 8) + { + append_byte(static_cast(v)); + v >>= 8; + m_bit_buffer_size -= 8; + } + + m_bit_buffer = static_cast(v); + return num_bits; + } + + inline uint32_t put_code(uint32_t sym, const huffman_encoding_table &tab) + { + uint32_t code = tab.get_codes()[sym]; + uint32_t code_size = tab.get_code_sizes()[sym]; + assert(code_size >= 1); + put_bits(code, code_size); + return code_size; + } + + inline uint32_t put_truncated_binary(uint32_t v, uint32_t n) + { + assert((n >= 2) && (v < n)); + + uint32_t k = floor_log2i(n); + uint32_t u = (1 << (k + 1)) - n; + + if (v < u) + return put_bits(v, k); + + uint32_t x = v + u; + assert((x >> 1) >= u); + + put_bits(x >> 1, k); + put_bits(x & 1, 1); + return k + 1; + } + + inline uint32_t put_rice(uint32_t v, uint32_t m) + { + assert(m); + + const uint64_t start_bits = m_total_bits; + + uint32_t q = v >> m, r = v & ((1 << m) - 1); + + // rice coding sanity check + assert(q <= 64); + + for (; q > 16; q -= 16) + put_bits(0xFFFF, 16); + + put_bits((1 << q) - 1, q); + put_bits(r << 1, m + 1); + + return (uint32_t)(m_total_bits - start_bits); + } + + inline uint32_t put_vlc(uint32_t v, uint32_t chunk_bits) + { + assert(chunk_bits); + + const uint32_t chunk_size = 1 << chunk_bits; + const uint32_t chunk_mask = chunk_size - 1; + + uint32_t total_bits = 0; + + for ( ; ; ) + { + uint32_t next_v = v >> chunk_bits; + + total_bits += put_bits((v & chunk_mask) | (next_v ? chunk_size : 0), chunk_bits + 1); + if (!next_v) + break; + + v = next_v; + } + + return total_bits; + } + + uint32_t emit_huffman_table(const huffman_encoding_table &tab); + + void append(const bitwise_coder& other) + { + for (uint32_t i = 0; i < other.m_bytes.size(); i++) + put_bits(other.m_bytes[i], 8); + + if (other.m_bit_buffer_size) + put_bits(other.m_bit_buffer, other.m_bit_buffer_size); + } + + private: + uint8_vec m_bytes; + uint32_t m_bit_buffer, m_bit_buffer_size; + uint64_t m_total_bits; + + inline void append_byte(uint8_t c) + { + //m_bytes.resize(m_bytes.size() + 1); + //m_bytes.back() = c; + + m_bytes.push_back(c); + } + + static void end_nonzero_run(uint16_vec &syms, uint32_t &run_size, uint32_t len); + static void end_zero_run(uint16_vec &syms, uint32_t &run_size); + }; + + class huff2D + { + public: + huff2D() { } + huff2D(uint32_t bits_per_sym, uint32_t total_syms_per_group) { init(bits_per_sym, total_syms_per_group); } + + inline const histogram &get_histogram() const { return m_histogram; } + inline const huffman_encoding_table &get_encoding_table() const { return m_encoding_table; } + + inline void init(uint32_t bits_per_sym, uint32_t total_syms_per_group) + { + assert((bits_per_sym * total_syms_per_group) <= 16 && total_syms_per_group >= 1 && bits_per_sym >= 1); + + m_bits_per_sym = bits_per_sym; + m_total_syms_per_group = total_syms_per_group; + m_cur_sym_bits = 0; + m_cur_num_syms = 0; + m_decode_syms_remaining = 0; + m_next_decoder_group_index = 0; + + m_histogram.init(1 << (bits_per_sym * total_syms_per_group)); + } + + inline void clear() + { + m_group_bits.clear(); + + m_cur_sym_bits = 0; + m_cur_num_syms = 0; + m_decode_syms_remaining = 0; + m_next_decoder_group_index = 0; + } + + inline void emit(uint32_t sym) + { + m_cur_sym_bits |= (sym << (m_cur_num_syms * m_bits_per_sym)); + m_cur_num_syms++; + + if (m_cur_num_syms == m_total_syms_per_group) + flush(); + } + + inline void flush() + { + if (m_cur_num_syms) + { + m_group_bits.push_back(m_cur_sym_bits); + m_histogram.inc(m_cur_sym_bits); + + m_cur_sym_bits = 0; + m_cur_num_syms = 0; + } + } + + inline bool start_encoding(uint32_t code_size_limit = 16) + { + flush(); + + if (!m_encoding_table.init(m_histogram, code_size_limit)) + return false; + + m_decode_syms_remaining = 0; + m_next_decoder_group_index = 0; + + return true; + } + + inline uint32_t emit_next_sym(bitwise_coder &c) + { + uint32_t bits = 0; + + if (!m_decode_syms_remaining) + { + bits = c.put_code(m_group_bits[m_next_decoder_group_index++], m_encoding_table); + m_decode_syms_remaining = m_total_syms_per_group; + } + + m_decode_syms_remaining--; + return bits; + } + + inline void emit_flush() + { + m_decode_syms_remaining = 0; + } + + private: + uint_vec m_group_bits; + huffman_encoding_table m_encoding_table; + histogram m_histogram; + uint32_t m_bits_per_sym, m_total_syms_per_group, m_cur_sym_bits, m_cur_num_syms, m_next_decoder_group_index, m_decode_syms_remaining; + }; + + bool huffman_test(int rand_seed); + + // VQ index reordering + + class palette_index_reorderer + { + public: + palette_index_reorderer() + { + } + + void clear() + { + clear_vector(m_hist); + clear_vector(m_total_count_to_picked); + clear_vector(m_entries_picked); + clear_vector(m_entries_to_do); + clear_vector(m_remap_table); + } + + // returns [0,1] distance of entry i to entry j + typedef float(*pEntry_dist_func)(uint32_t i, uint32_t j, void *pCtx); + + void init(uint32_t num_indices, const uint32_t *pIndices, uint32_t num_syms, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight); + + // Table remaps old to new symbol indices + inline const uint_vec &get_remap_table() const { return m_remap_table; } + + private: + uint_vec m_hist, m_total_count_to_picked, m_entries_picked, m_entries_to_do, m_remap_table; + + inline uint32_t get_hist(int i, int j, int n) const { return (i > j) ? m_hist[j * n + i] : m_hist[i * n + j]; } + inline void inc_hist(int i, int j, int n) { if ((i != j) && (i < j) && (i != -1) && (j != -1)) { assert(((uint32_t)i < (uint32_t)n) && ((uint32_t)j < (uint32_t)n)); m_hist[i * n + j]++; } } + + void prepare_hist(uint32_t num_syms, uint32_t num_indices, const uint32_t *pIndices); + void find_initial(uint32_t num_syms); + void find_next_entry(uint32_t &best_entry, double &best_count, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight); + float pick_side(uint32_t num_syms, uint32_t entry_to_move, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight); + }; + + // Simple 32-bit 2D image class + + class image + { + public: + image() : + m_width(0), m_height(0), m_pitch(0) + { + } + + image(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) : + m_width(0), m_height(0), m_pitch(0) + { + resize(w, h, p); + } + + image(const uint8_t *pImage, uint32_t width, uint32_t height, uint32_t comps) : + m_width(0), m_height(0), m_pitch(0) + { + init(pImage, width, height, comps); + } + + image(const image &other) : + m_width(0), m_height(0), m_pitch(0) + { + *this = other; + } + + image(image&& other) : + m_width(other.m_width), m_height(other.m_height), m_pitch(other.m_pitch), + m_pixels(std::move(other.m_pixels)) + { + other.m_width = 0; + other.m_height = 0; + other.m_pitch = 0; + } + + image& operator= (image&& rhs) + { + if (this != &rhs) + { + m_width = rhs.m_width; + m_height = rhs.m_height; + m_pitch = rhs.m_pitch; + m_pixels = std::move(rhs.m_pixels); + + rhs.m_width = 0; + rhs.m_height = 0; + rhs.m_pitch = 0; + } + return *this; + } + + image &swap(image &other) + { + std::swap(m_width, other.m_width); + std::swap(m_height, other.m_height); + std::swap(m_pitch, other.m_pitch); + m_pixels.swap(other.m_pixels); + return *this; + } + + image &operator= (const image &rhs) + { + if (this != &rhs) + { + m_width = rhs.m_width; + m_height = rhs.m_height; + m_pitch = rhs.m_pitch; + m_pixels = rhs.m_pixels; + } + return *this; + } + + image &clear() + { + m_width = 0; + m_height = 0; + m_pitch = 0; + clear_vector(m_pixels); + return *this; + } + + image& match_dimensions(const image& other) + { + resize(other.get_width(), other.get_height()); + return *this; + } + + image &resize(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const color_rgba& background = g_black_color) + { + return crop(w, h, p, background); + } + + image &set_all(const color_rgba &c) + { + for (uint32_t i = 0; i < m_pixels.size(); i++) + m_pixels[i] = c; + return *this; + } + + void init(const uint8_t *pImage, uint32_t width, uint32_t height, uint32_t comps) + { + assert(comps >= 1 && comps <= 4); + + resize(width, height); + + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + const uint8_t *pSrc = &pImage[(x + y * width) * comps]; + color_rgba &dst = (*this)(x, y); + + if (comps == 1) + { + dst.r = pSrc[0]; + dst.g = pSrc[0]; + dst.b = pSrc[0]; + dst.a = 255; + } + else if (comps == 2) + { + dst.r = pSrc[0]; + dst.g = pSrc[0]; + dst.b = pSrc[0]; + dst.a = pSrc[1]; + } + else + { + dst.r = pSrc[0]; + dst.g = pSrc[1]; + dst.b = pSrc[2]; + if (comps == 4) + dst.a = pSrc[3]; + else + dst.a = 255; + } + } + } + } + + image &fill_box(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const color_rgba &c) + { + for (uint32_t iy = 0; iy < h; iy++) + for (uint32_t ix = 0; ix < w; ix++) + set_clipped(x + ix, y + iy, c); + return *this; + } + + image& fill_box_alpha(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const color_rgba& c) + { + for (uint32_t iy = 0; iy < h; iy++) + for (uint32_t ix = 0; ix < w; ix++) + set_clipped_alpha(x + ix, y + iy, c); + return *this; + } + + image &crop_dup_borders(uint32_t w, uint32_t h) + { + const uint32_t orig_w = m_width, orig_h = m_height; + + crop(w, h); + + if (orig_w && orig_h) + { + if (m_width > orig_w) + { + for (uint32_t x = orig_w; x < m_width; x++) + for (uint32_t y = 0; y < m_height; y++) + set_clipped(x, y, get_clamped(minimum(x, orig_w - 1U), minimum(y, orig_h - 1U))); + } + + if (m_height > orig_h) + { + for (uint32_t y = orig_h; y < m_height; y++) + for (uint32_t x = 0; x < m_width; x++) + set_clipped(x, y, get_clamped(minimum(x, orig_w - 1U), minimum(y, orig_h - 1U))); + } + } + return *this; + } + + // pPixels MUST have been allocated using malloc() (basisu::vector will eventually use free() on the pointer). + image& grant_ownership(color_rgba* pPixels, uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) + { + if (p == UINT32_MAX) + p = w; + + clear(); + + if ((!p) || (!w) || (!h)) + return *this; + + m_pixels.grant_ownership(pPixels, p * h, p * h); + + m_width = w; + m_height = h; + m_pitch = p; + + return *this; + } + + image &crop(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const color_rgba &background = g_black_color, bool init_image = true) + { + if (p == UINT32_MAX) + p = w; + + if ((w == m_width) && (m_height == h) && (m_pitch == p)) + return *this; + + if ((!w) || (!h) || (!p)) + { + clear(); + return *this; + } + + color_rgba_vec cur_state; + cur_state.swap(m_pixels); + + m_pixels.resize(p * h); + + if (init_image) + { + if (m_width || m_height) + { + for (uint32_t y = 0; y < h; y++) + { + for (uint32_t x = 0; x < w; x++) + { + if ((x < m_width) && (y < m_height)) + m_pixels[x + y * p] = cur_state[x + y * m_pitch]; + else + m_pixels[x + y * p] = background; + } + } + } + else + { + m_pixels.set_all(background); + } + } + + m_width = w; + m_height = h; + m_pitch = p; + + return *this; + } + + inline const color_rgba &operator() (uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; } + inline color_rgba &operator() (uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; } + + inline const color_rgba &get_clamped(int x, int y) const { return (*this)(clamp(x, 0, m_width - 1), clamp(y, 0, m_height - 1)); } + inline color_rgba &get_clamped(int x, int y) { return (*this)(clamp(x, 0, m_width - 1), clamp(y, 0, m_height - 1)); } + + inline const color_rgba &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) const + { + x = wrap_u ? posmod(x, m_width) : clamp(x, 0, m_width - 1); + y = wrap_v ? posmod(y, m_height) : clamp(y, 0, m_height - 1); + return m_pixels[x + y * m_pitch]; + } + + inline color_rgba &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) + { + x = wrap_u ? posmod(x, m_width) : clamp(x, 0, m_width - 1); + y = wrap_v ? posmod(y, m_height) : clamp(y, 0, m_height - 1); + return m_pixels[x + y * m_pitch]; + } + + inline image &set_clipped(int x, int y, const color_rgba &c) + { + if ((static_cast(x) < m_width) && (static_cast(y) < m_height)) + (*this)(x, y) = c; + return *this; + } + + inline image& set_clipped_alpha(int x, int y, const color_rgba& c) + { + if ((static_cast(x) < m_width) && (static_cast(y) < m_height)) + (*this)(x, y).m_comps[3] = c.m_comps[3]; + return *this; + } + + // Very straightforward blit with full clipping. Not fast, but it works. + image &blit(const image &src, int src_x, int src_y, int src_w, int src_h, int dst_x, int dst_y) + { + for (int y = 0; y < src_h; y++) + { + const int sy = src_y + y; + if (sy < 0) + continue; + else if (sy >= (int)src.get_height()) + break; + + for (int x = 0; x < src_w; x++) + { + const int sx = src_x + x; + if (sx < 0) + continue; + else if (sx >= (int)src.get_width()) + break; + + set_clipped(dst_x + x, dst_y + y, src(sx, sy)); + } + } + + return *this; + } + + const image &extract_block_clamped(color_rgba *pDst, uint32_t src_x, uint32_t src_y, uint32_t w, uint32_t h) const + { + if (((src_x + w) > m_width) || ((src_y + h) > m_height)) + { + // Slower clamping case + for (uint32_t y = 0; y < h; y++) + for (uint32_t x = 0; x < w; x++) + *pDst++ = get_clamped(src_x + x, src_y + y); + } + else + { + const color_rgba* pSrc = &m_pixels[src_x + src_y * m_pitch]; + + for (uint32_t y = 0; y < h; y++) + { + memcpy(pDst, pSrc, w * sizeof(color_rgba)); + pSrc += m_pitch; + pDst += w; + } + } + + return *this; + } + + image &set_block_clipped(const color_rgba *pSrc, uint32_t dst_x, uint32_t dst_y, uint32_t w, uint32_t h) + { + for (uint32_t y = 0; y < h; y++) + for (uint32_t x = 0; x < w; x++) + set_clipped(dst_x + x, dst_y + y, *pSrc++); + return *this; + } + + inline bool is_valid() const { return m_width > 0; } + + inline uint32_t get_width() const { return m_width; } + inline uint32_t get_height() const { return m_height; } + inline uint32_t get_pitch() const { return m_pitch; } + inline uint32_t get_total_pixels() const { return m_width * m_height; } + + inline uint32_t get_block_width(uint32_t w) const { return (m_width + (w - 1)) / w; } + inline uint32_t get_block_height(uint32_t h) const { return (m_height + (h - 1)) / h; } + inline uint32_t get_total_blocks(uint32_t w, uint32_t h) const { return get_block_width(w) * get_block_height(h); } + + inline const color_rgba_vec &get_pixels() const { return m_pixels; } + inline color_rgba_vec &get_pixels() { return m_pixels; } + + inline const color_rgba *get_ptr() const { return &m_pixels[0]; } + inline color_rgba *get_ptr() { return &m_pixels[0]; } + + bool has_alpha(uint32_t channel = 3) const + { + for (uint32_t y = 0; y < m_height; ++y) + for (uint32_t x = 0; x < m_width; ++x) + if ((*this)(x, y)[channel] < 255) + return true; + + return false; + } + + image &set_alpha(uint8_t a) + { + for (uint32_t y = 0; y < m_height; ++y) + for (uint32_t x = 0; x < m_width; ++x) + (*this)(x, y).a = a; + return *this; + } + + image &flip_y() + { + for (uint32_t y = 0; y < m_height / 2; ++y) + for (uint32_t x = 0; x < m_width; ++x) + std::swap((*this)(x, y), (*this)(x, m_height - 1 - y)); + return *this; + } + + // TODO: There are many ways to do this, not sure this is the best way. + image &renormalize_normal_map() + { + for (uint32_t y = 0; y < m_height; y++) + { + for (uint32_t x = 0; x < m_width; x++) + { + color_rgba &c = (*this)(x, y); + if ((c.r == 128) && (c.g == 128) && (c.b == 128)) + continue; + + vec3F v(c.r, c.g, c.b); + v = (v * (2.0f / 255.0f)) - vec3F(1.0f); + v.clamp(-1.0f, 1.0f); + + float length = v.length(); + const float cValidThresh = .077f; + if (length < cValidThresh) + { + c.set(128, 128, 128, c.a); + } + else if (fabs(length - 1.0f) > cValidThresh) + { + if (length) + v /= length; + + for (uint32_t i = 0; i < 3; i++) + c[i] = static_cast(clamp(floor((v[i] + 1.0f) * 255.0f * .5f + .5f), 0.0f, 255.0f)); + + if ((c.g == 128) && (c.r == 128)) + { + if (c.b < 128) + c.b = 0; + else + c.b = 255; + } + } + } + } + return *this; + } + + void swap_rb() + { + for (auto& v : m_pixels) + std::swap(v.r, v.b); + } + + void debug_text(uint32_t x_ofs, uint32_t y_ofs, uint32_t x_scale, uint32_t y_scale, const color_rgba &fg, const color_rgba *pBG, bool alpha_only, const char* p, ...); + + vec4F get_filtered_vec4F(float x, float y) const + { + x -= .5f; + y -= .5f; + + int ix = (int)floorf(x); + int iy = (int)floorf(y); + float wx = x - ix; + float wy = y - iy; + + color_rgba a(get_clamped(ix, iy)); + color_rgba b(get_clamped(ix + 1, iy)); + color_rgba c(get_clamped(ix, iy + 1)); + color_rgba d(get_clamped(ix + 1, iy + 1)); + + vec4F result; + + for (uint32_t i = 0; i < 4; i++) + { + const float top = lerp((float)a[i], (float)b[i], wx); + const float bot = lerp((float)c[i], (float)d[i], wx); + const float m = lerp((float)top, (float)bot, wy); + + result[i] = m; + } + + return result; + } + + // (x,y) - Continuous coordinates, where pixel centers are at (.5,.5), valid image coords are [0,width] and [0,height]. Clamp addressing. + color_rgba get_filtered(float x, float y) const + { + const vec4F fresult(get_filtered_vec4F(x, y)); + + color_rgba result; + + for (uint32_t i = 0; i < 4; i++) + result[i] = (uint8_t)clamp((int)(fresult[i] + .5f), 0, 255); + + return result; + } + + private: + uint32_t m_width, m_height, m_pitch; // all in pixels + color_rgba_vec m_pixels; + }; + + // Float images + + typedef basisu::vector vec4F_vec; + + class imagef + { + public: + imagef() : + m_width(0), m_height(0), m_pitch(0) + { + } + + imagef(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) : + m_width(0), m_height(0), m_pitch(0) + { + resize(w, h, p); + } + + imagef(const imagef &other) : + m_width(0), m_height(0), m_pitch(0) + { + *this = other; + } + + imagef(imagef&& other) : + m_width(other.m_width), m_height(other.m_height), m_pitch(other.m_pitch), + m_pixels(std::move(other.m_pixels)) + { + other.m_width = 0; + other.m_height = 0; + other.m_pitch = 0; + } + + imagef& operator= (imagef&& rhs) + { + if (this != &rhs) + { + m_width = rhs.m_width; + m_height = rhs.m_height; + m_pitch = rhs.m_pitch; + m_pixels = std::move(rhs.m_pixels); + + rhs.m_width = 0; + rhs.m_height = 0; + rhs.m_pitch = 0; + } + return *this; + } + + imagef &swap(imagef &other) + { + std::swap(m_width, other.m_width); + std::swap(m_height, other.m_height); + std::swap(m_pitch, other.m_pitch); + m_pixels.swap(other.m_pixels); + return *this; + } + + imagef &operator= (const imagef &rhs) + { + if (this != &rhs) + { + m_width = rhs.m_width; + m_height = rhs.m_height; + m_pitch = rhs.m_pitch; + m_pixels = rhs.m_pixels; + } + return *this; + } + + imagef &clear() + { + m_width = 0; + m_height = 0; + m_pitch = 0; + clear_vector(m_pixels); + return *this; + } + + imagef &set(const image &src, const vec4F &scale = vec4F(1), const vec4F &bias = vec4F(0)) + { + const uint32_t width = src.get_width(); + const uint32_t height = src.get_height(); + + resize(width, height); + + for (int y = 0; y < (int)height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + const color_rgba &src_pixel = src(x, y); + (*this)(x, y).set((float)src_pixel.r * scale[0] + bias[0], (float)src_pixel.g * scale[1] + bias[1], (float)src_pixel.b * scale[2] + bias[2], (float)src_pixel.a * scale[3] + bias[3]); + } + } + + return *this; + } + + imagef& match_dimensions(const imagef& other) + { + resize(other.get_width(), other.get_height()); + return *this; + } + + imagef &resize(const imagef &other, uint32_t p = UINT32_MAX, const vec4F& background = vec4F(0,0,0,1)) + { + return resize(other.get_width(), other.get_height(), p, background); + } + + imagef &resize(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const vec4F& background = vec4F(0,0,0,1)) + { + return crop(w, h, p, background); + } + + imagef &set_all(const vec4F &c) + { + for (uint32_t i = 0; i < m_pixels.size(); i++) + m_pixels[i] = c; + return *this; + } + + imagef &fill_box(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const vec4F &c) + { + for (uint32_t iy = 0; iy < h; iy++) + for (uint32_t ix = 0; ix < w; ix++) + set_clipped(x + ix, y + iy, c); + return *this; + } + + imagef &crop(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const vec4F &background = vec4F(0,0,0,1)) + { + if (p == UINT32_MAX) + p = w; + + if ((w == m_width) && (m_height == h) && (m_pitch == p)) + return *this; + + if ((!w) || (!h) || (!p)) + { + clear(); + return *this; + } + + vec4F_vec cur_state; + cur_state.swap(m_pixels); + + m_pixels.resize(p * h); + + for (uint32_t y = 0; y < h; y++) + { + for (uint32_t x = 0; x < w; x++) + { + if ((x < m_width) && (y < m_height)) + m_pixels[x + y * p] = cur_state[x + y * m_pitch]; + else + m_pixels[x + y * p] = background; + } + } + + m_width = w; + m_height = h; + m_pitch = p; + + return *this; + } + + imagef& crop_dup_borders(uint32_t w, uint32_t h) + { + const uint32_t orig_w = m_width, orig_h = m_height; + + crop(w, h); + + if (orig_w && orig_h) + { + if (m_width > orig_w) + { + for (uint32_t x = orig_w; x < m_width; x++) + for (uint32_t y = 0; y < m_height; y++) + set_clipped(x, y, get_clamped(minimum(x, orig_w - 1U), minimum(y, orig_h - 1U))); + } + + if (m_height > orig_h) + { + for (uint32_t y = orig_h; y < m_height; y++) + for (uint32_t x = 0; x < m_width; x++) + set_clipped(x, y, get_clamped(minimum(x, orig_w - 1U), minimum(y, orig_h - 1U))); + } + } + return *this; + } + + inline const vec4F &operator() (uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; } + inline vec4F &operator() (uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; } + + inline const vec4F &get_clamped(int x, int y) const { return (*this)(clamp(x, 0, m_width - 1), clamp(y, 0, m_height - 1)); } + inline vec4F &get_clamped(int x, int y) { return (*this)(clamp(x, 0, m_width - 1), clamp(y, 0, m_height - 1)); } + + inline const vec4F &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) const + { + x = wrap_u ? posmod(x, m_width) : clamp(x, 0, m_width - 1); + y = wrap_v ? posmod(y, m_height) : clamp(y, 0, m_height - 1); + return m_pixels[x + y * m_pitch]; + } + + inline vec4F &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) + { + x = wrap_u ? posmod(x, m_width) : clamp(x, 0, m_width - 1); + y = wrap_v ? posmod(y, m_height) : clamp(y, 0, m_height - 1); + return m_pixels[x + y * m_pitch]; + } + + inline imagef &set_clipped(int x, int y, const vec4F &c) + { + if ((static_cast(x) < m_width) && (static_cast(y) < m_height)) + (*this)(x, y) = c; + return *this; + } + + // Very straightforward blit with full clipping. Not fast, but it works. + imagef &blit(const imagef &src, int src_x, int src_y, int src_w, int src_h, int dst_x, int dst_y) + { + for (int y = 0; y < src_h; y++) + { + const int sy = src_y + y; + if (sy < 0) + continue; + else if (sy >= (int)src.get_height()) + break; + + for (int x = 0; x < src_w; x++) + { + const int sx = src_x + x; + if (sx < 0) + continue; + else if (sx >= (int)src.get_width()) + break; + + set_clipped(dst_x + x, dst_y + y, src(sx, sy)); + } + } + + return *this; + } + + const imagef &extract_block_clamped(vec4F *pDst, uint32_t src_x, uint32_t src_y, uint32_t w, uint32_t h) const + { + for (uint32_t y = 0; y < h; y++) + for (uint32_t x = 0; x < w; x++) + *pDst++ = get_clamped(src_x + x, src_y + y); + return *this; + } + + imagef &set_block_clipped(const vec4F *pSrc, uint32_t dst_x, uint32_t dst_y, uint32_t w, uint32_t h) + { + for (uint32_t y = 0; y < h; y++) + for (uint32_t x = 0; x < w; x++) + set_clipped(dst_x + x, dst_y + y, *pSrc++); + return *this; + } + + inline bool is_valid() const { return m_width > 0; } + + inline uint32_t get_width() const { return m_width; } + inline uint32_t get_height() const { return m_height; } + inline uint32_t get_pitch() const { return m_pitch; } + inline uint64_t get_total_pixels() const { return (uint64_t)m_width * m_height; } + + inline uint32_t get_block_width(uint32_t w) const { return (m_width + (w - 1)) / w; } + inline uint32_t get_block_height(uint32_t h) const { return (m_height + (h - 1)) / h; } + inline uint32_t get_total_blocks(uint32_t w, uint32_t h) const { return get_block_width(w) * get_block_height(h); } + + inline const vec4F_vec &get_pixels() const { return m_pixels; } + inline vec4F_vec &get_pixels() { return m_pixels; } + + inline const vec4F *get_ptr() const { return &m_pixels[0]; } + inline vec4F *get_ptr() { return &m_pixels[0]; } + + bool clean_astc_hdr_pixels(float highest_mag) + { + bool status = true; + bool nan_msg = false; + bool inf_msg = false; + bool neg_zero_msg = false; + bool neg_msg = false; + bool clamp_msg = false; + + for (uint32_t iy = 0; iy < m_height; iy++) + { + for (uint32_t ix = 0; ix < m_width; ix++) + { + vec4F& c = (*this)(ix, iy); + + for (uint32_t s = 0; s < 4; s++) + { + float &p = c[s]; + union { float f; uint32_t u; } x; x.f = p; + + if ((std::isnan(p)) || (std::isinf(p)) || (x.u == 0x80000000)) + { + if (std::isnan(p)) + { + if (!nan_msg) + { + fprintf(stderr, "One or more input pixels was NaN, setting to 0.\n"); + nan_msg = true; + } + } + + if (std::isinf(p)) + { + if (!inf_msg) + { + fprintf(stderr, "One or more input pixels was INF, setting to 0.\n"); + inf_msg = true; + } + } + + if (x.u == 0x80000000) + { + if (!neg_zero_msg) + { + fprintf(stderr, "One or more input pixels was -0, setting them to 0.\n"); + neg_zero_msg = true; + } + } + + p = 0.0f; + status = false; + } + else + { + //const float o = p; + if (p < 0.0f) + { + p = 0.0f; + + if (!neg_msg) + { + fprintf(stderr, "One or more input pixels was negative -- setting these pixel components to 0 because ASTC HDR doesn't support signed values.\n"); + neg_msg = true; + } + + status = false; + } + + if (p > highest_mag) + { + p = highest_mag; + + if (!clamp_msg) + { + fprintf(stderr, "One or more input pixels had to be clamped to %f.\n", highest_mag); + clamp_msg = true; + } + + status = false; + } + } + } + } + } + + return status; + } + + imagef& flip_y() + { + for (uint32_t y = 0; y < m_height / 2; ++y) + for (uint32_t x = 0; x < m_width; ++x) + std::swap((*this)(x, y), (*this)(x, m_height - 1 - y)); + + return *this; + } + + bool has_alpha(uint32_t channel = 3) const + { + for (uint32_t y = 0; y < m_height; ++y) + for (uint32_t x = 0; x < m_width; ++x) + if ((*this)(x, y)[channel] != 1.0f) + return true; + + return false; + } + + vec4F get_filtered_vec4F(float x, float y) const + { + x -= .5f; + y -= .5f; + + int ix = (int)floorf(x); + int iy = (int)floorf(y); + float wx = x - ix; + float wy = y - iy; + + vec4F a(get_clamped(ix, iy)); + vec4F b(get_clamped(ix + 1, iy)); + vec4F c(get_clamped(ix, iy + 1)); + vec4F d(get_clamped(ix + 1, iy + 1)); + + vec4F result; + + for (uint32_t i = 0; i < 4; i++) + { + const float top = lerp((float)a[i], (float)b[i], wx); + const float bot = lerp((float)c[i], (float)d[i], wx); + const float m = lerp((float)top, (float)bot, wy); + + result[i] = m; + } + + return result; + } + + private: + uint32_t m_width, m_height, m_pitch; // all in pixels + vec4F_vec m_pixels; + }; + + // REC 709 coefficients + const float REC_709_R = 0.212656f, REC_709_G = 0.715158f, REC_709_B = 0.072186f; + + inline float get_luminance(const vec4F &c) + { + return c[0] * REC_709_R + c[1] * REC_709_G + c[2] * REC_709_B; + } + + float linear_to_srgb(float l); + float srgb_to_linear(float s); + + class fast_linear_to_srgb + { + public: + fast_linear_to_srgb() + { + init(); + } + + void init() + { + for (int i = 0; i < LINEAR_TO_SRGB_TABLE_SIZE; ++i) + { + float l = (float)i * (1.0f / (LINEAR_TO_SRGB_TABLE_SIZE - 1)); + m_linear_to_srgb_table[i] = (uint8_t)basisu::fast_floorf_int(255.0f * basisu::linear_to_srgb(l)); + } + + float srgb_to_linear[256]; + for (int i = 0; i < 256; i++) + srgb_to_linear[i] = basisu::srgb_to_linear((float)i / 255.0f); + + for (int i = 0; i < 256; i++) + m_srgb_to_linear_thresh[i] = (srgb_to_linear[i] + srgb_to_linear[basisu::minimum(i + 1, 255)]) * .5f; + } + + inline uint8_t convert(float l) const + { + assert((l >= 0.0f) && (l <= 1.0f)); + int j = basisu::fast_roundf_int((LINEAR_TO_SRGB_TABLE_SIZE - 1) * l); + + assert((j >= 0) && (j < LINEAR_TO_SRGB_TABLE_SIZE)); + int b = m_linear_to_srgb_table[j]; + + b += (l > m_srgb_to_linear_thresh[b]); + + return (uint8_t)b; + } + + private: + static constexpr int LINEAR_TO_SRGB_TABLE_SIZE = 2048; + uint8_t m_linear_to_srgb_table[LINEAR_TO_SRGB_TABLE_SIZE]; + + float m_srgb_to_linear_thresh[256]; + }; + + extern fast_linear_to_srgb g_fast_linear_to_srgb; + + // Image metrics + + class image_metrics + { + public: + // TODO: Add ssim + double m_max, m_mean, m_mean_squared, m_rms, m_psnr, m_ssim; + bool m_has_neg, m_hf_mag_overflow, m_any_abnormal; + + image_metrics() + { + clear(); + } + + void clear() + { + m_max = 0; + m_mean = 0; + m_mean_squared = 0; + m_rms = 0; + m_psnr = 0; + m_ssim = 0; + m_has_neg = false; + m_hf_mag_overflow = false; + m_any_abnormal = false; + } + + void print(const char *pPrefix = nullptr) { printf("%sMax: %3.3f Mean: %3.3f RMS: %3.3f PSNR: %2.3f dB\n", pPrefix ? pPrefix : "", m_max, m_mean, m_rms, m_psnr); } + void print_hp(const char* pPrefix = nullptr) { printf("%sMax: %3.6f Mean: %3.6f RMS: %3.6f PSNR: %2.6f dB, Any Neg: %u, Half float overflow: %u, Any NaN/Inf: %u\n", pPrefix ? pPrefix : "", m_max, m_mean, m_rms, m_psnr, m_has_neg, m_hf_mag_overflow, m_any_abnormal); } + + void calc(const imagef& a, const imagef& b, uint32_t first_chan = 0, uint32_t total_chans = 0, bool avg_comp_error = true, bool log = false); + void calc_half(const imagef& a, const imagef& b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error); + void calc_half2(const imagef& a, const imagef& b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error); + void calc(const image &a, const image &b, uint32_t first_chan = 0, uint32_t total_chans = 0, bool avg_comp_error = true, bool use_601_luma = false); + }; + + void print_image_metrics(const image& a, const image& b); + + // Image saving/loading/resampling + + bool load_png(const uint8_t* pBuf, size_t buf_size, image& img, const char* pFilename = nullptr); + bool load_png(const char* pFilename, image& img); + inline bool load_png(const std::string &filename, image &img) { return load_png(filename.c_str(), img); } + + bool load_tga(const char* pFilename, image& img); + inline bool load_tga(const std::string &filename, image &img) { return load_tga(filename.c_str(), img); } + + bool load_qoi(const char* pFilename, image& img); + + bool load_jpg(const char *pFilename, image& img); + bool load_jpg(const uint8_t* pBuf, size_t buf_size, image& img); + inline bool load_jpg(const std::string &filename, image &img) { return load_jpg(filename.c_str(), img); } + + // Currently loads .PNG, .TGA, or .JPG + bool load_image(const char* pFilename, image& img); + inline bool load_image(const std::string &filename, image &img) { return load_image(filename.c_str(), img); } + + bool is_image_filename_hdr(const char* pFilename); + + // Supports .HDR and most (but not all) .EXR's (see TinyEXR). + bool load_image_hdr(const char* pFilename, imagef& img, bool ldr_srgb_to_linear = true, float linear_nit_multiplier = 1.0f, float ldr_black_bias = 0.0f); + + inline bool load_image_hdr(const std::string& filename, imagef& img, bool ldr_srgb_to_linear = true, float linear_nit_multiplier = 1.0f, float ldr_black_bias = 0.0f) + { + return load_image_hdr(filename.c_str(), img, ldr_srgb_to_linear, linear_nit_multiplier, ldr_black_bias); + } + + enum class hdr_image_type + { + cHITRGBAHalfFloat = 0, + cHITRGBAFloat = 1, + cHITPNGImage = 2, + cHITEXRImage = 3, + cHITHDRImage = 4, + cHITJPGImage = 5 + }; + + bool load_image_hdr(const void* pMem, size_t mem_size, imagef& img, uint32_t width, uint32_t height, hdr_image_type img_type, bool ldr_srgb_to_linear, float linear_nit_multiplier = 1.0f, float ldr_black_bias = 0.0f); + + uint8_t *read_tga(const uint8_t *pBuf, uint32_t buf_size, int &width, int &height, int &n_chans); + uint8_t *read_tga(const char *pFilename, int &width, int &height, int &n_chans); + + struct rgbe_header_info + { + std::string m_program; + + // Note no validation is done, either gamma or exposure may be 0. + double m_gamma; + bool m_has_gamma; + + double m_exposure; // watts/steradian/m^2. + bool m_has_exposure; + + void clear() + { + m_program.clear(); + m_gamma = 1.0f; + m_has_gamma = false; + m_exposure = 1.0f; + m_has_exposure = false; + } + }; + + bool read_rgbe(const uint8_vec& filedata, imagef& img, rgbe_header_info& hdr_info); + bool read_rgbe(const char* pFilename, imagef& img, rgbe_header_info &hdr_info); + + bool write_rgbe(uint8_vec& file_data, imagef& img, rgbe_header_info& hdr_info); + bool write_rgbe(const char* pFilename, imagef& img, rgbe_header_info& hdr_info); + + bool read_exr(const char* pFilename, imagef& img, int& n_chans); + bool read_exr(const void* pMem, size_t mem_size, imagef& img); + + enum + { + WRITE_EXR_LINEAR_HINT = 1, // hint for lossy comp. methods: exr_perceptual_treatment_t, logarithmic or linear, defaults to logarithmic + WRITE_EXR_STORE_FLOATS = 2, // use 32-bit floats, otherwise it uses half floats + WRITE_EXR_NO_COMPRESSION = 4 // no compression, otherwise it uses ZIP compression (16 scanlines per block) + }; + + // Supports 1 (Y), 3 (RGB), or 4 (RGBA) channel images. + bool write_exr(const char* pFilename, const imagef& img, uint32_t n_chans, uint32_t flags); + + enum + { + cImageSaveGrayscale = 1, + cImageSaveIgnoreAlpha = 2 + }; + + bool save_png(const char* pFilename, const image& img, uint32_t image_save_flags = 0, uint32_t grayscale_comp = 0); + inline bool save_png(const std::string &filename, const image &img, uint32_t image_save_flags = 0, uint32_t grayscale_comp = 0) { return save_png(filename.c_str(), img, image_save_flags, grayscale_comp); } + + bool read_file_to_vec(const char* pFilename, uint8_vec& data); + bool read_file_to_data(const char* pFilename, void *pData, size_t len); + + bool write_data_to_file(const char* pFilename, const void* pData, size_t len); + + inline bool write_vec_to_file(const char* pFilename, const uint8_vec& v) { return v.size() ? write_data_to_file(pFilename, &v[0], v.size()) : write_data_to_file(pFilename, "", 0); } + + bool image_resample(const image &src, image &dst, bool srgb = false, + const char *pFilter = "lanczos4", float filter_scale = 1.0f, + bool wrapping = false, + uint32_t first_comp = 0, uint32_t num_comps = 4); + + bool image_resample(const imagef& src, imagef& dst, + const char* pFilter = "lanczos4", float filter_scale = 1.0f, + bool wrapping = false, + uint32_t first_comp = 0, uint32_t num_comps = 4); + + // Timing + + typedef uint64_t timer_ticks; + + class interval_timer + { + public: + interval_timer(); + + void start(); + void stop(); + + double get_elapsed_secs() const; + inline double get_elapsed_ms() const { return 1000.0f* get_elapsed_secs(); } + + static void init(); + static inline timer_ticks get_ticks_per_sec() { return g_freq; } + static timer_ticks get_ticks(); + static double ticks_to_secs(timer_ticks ticks); + static inline double ticks_to_ms(timer_ticks ticks) { return ticks_to_secs(ticks) * 1000.0f; } + + private: + static timer_ticks g_init_ticks, g_freq; + static double g_timer_freq; + + timer_ticks m_start_time, m_stop_time; + + bool m_started, m_stopped; + }; + + inline double get_interval_timer() { return interval_timer::ticks_to_secs(interval_timer::get_ticks()); } + + inline FILE *fopen_safe(const char *pFilename, const char *pMode) + { +#ifdef _WIN32 + FILE *pFile = nullptr; + fopen_s(&pFile, pFilename, pMode); + return pFile; +#else + return fopen(pFilename, pMode); +#endif + } + + void fill_buffer_with_random_bytes(void *pBuf, size_t size, uint32_t seed = 1); + + const uint32_t cPixelBlockWidth = 4; + const uint32_t cPixelBlockHeight = 4; + const uint32_t cPixelBlockTotalPixels = cPixelBlockWidth * cPixelBlockHeight; + + struct pixel_block + { + color_rgba m_pixels[cPixelBlockHeight][cPixelBlockWidth]; // [y][x] + + inline const color_rgba& operator() (uint32_t x, uint32_t y) const { assert((x < cPixelBlockWidth) && (y < cPixelBlockHeight)); return m_pixels[y][x]; } + inline color_rgba& operator() (uint32_t x, uint32_t y) { assert((x < cPixelBlockWidth) && (y < cPixelBlockHeight)); return m_pixels[y][x]; } + + inline const color_rgba* get_ptr() const { return &m_pixels[0][0]; } + inline color_rgba* get_ptr() { return &m_pixels[0][0]; } + + inline void clear() { clear_obj(*this); } + + inline bool operator== (const pixel_block& rhs) const + { + return memcmp(m_pixels, rhs.m_pixels, sizeof(m_pixels)) == 0; + } + }; + typedef basisu::vector pixel_block_vec; + + struct pixel_block_hdr + { + vec4F m_pixels[cPixelBlockHeight][cPixelBlockWidth]; // [y][x] + + inline const vec4F& operator() (uint32_t x, uint32_t y) const { assert((x < cPixelBlockWidth) && (y < cPixelBlockHeight)); return m_pixels[y][x]; } + inline vec4F& operator() (uint32_t x, uint32_t y) { assert((x < cPixelBlockWidth) && (y < cPixelBlockHeight)); return m_pixels[y][x]; } + + inline const vec4F* get_ptr() const { return &m_pixels[0][0]; } + inline vec4F* get_ptr() { return &m_pixels[0][0]; } + + inline void clear() { clear_obj(*this); } + + inline bool operator== (const pixel_block& rhs) const + { + return memcmp(m_pixels, rhs.m_pixels, sizeof(m_pixels)) == 0; + } + }; + typedef basisu::vector pixel_block_hdr_vec; + + void tonemap_image_reinhard(image& ldr_img, const imagef& hdr_img, float exposure, bool add_noise = false, bool per_component = true, bool luma_scaling = false); + bool tonemap_image_compressive(image& dst_img, const imagef& hdr_test_img); + bool tonemap_image_compressive2(image& dst_img, const imagef& hdr_test_img); + + // Intersection + enum eClear { cClear = 0 }; + enum eInitExpand { cInitExpand = 0 }; + enum eIdentity { cIdentity = 0 }; + + template + class ray + { + public: + typedef vector_type vector_t; + typedef typename vector_type::scalar_type scalar_type; + + inline ray() { } + inline ray(eClear) { clear(); } + inline ray(const vector_type& origin, const vector_type& direction) : m_origin(origin), m_direction(direction) { } + + inline void clear() + { + m_origin.clear(); + m_direction.clear(); + } + + inline const vector_type& get_origin(void) const { return m_origin; } + inline void set_origin(const vector_type& origin) { m_origin = origin; } + + inline const vector_type& get_direction(void) const { return m_direction; } + inline void set_direction(const vector_type& direction) { m_direction = direction; } + + inline void set_endpoints(const vector_type& start, const vector_type& end) + { + m_origin = start; + + m_direction = end - start; + m_direction.normalize_in_place(); + } + + inline vector_type eval(scalar_type t) const + { + return m_origin + m_direction * t; + } + + private: + vector_type m_origin; + vector_type m_direction; + }; + + typedef ray ray2F; + typedef ray ray3F; + + template + class vec_interval + { + public: + enum { N = T::num_elements }; + typedef typename T::scalar_type scalar_type; + + inline vec_interval(const T& v) { m_bounds[0] = v; m_bounds[1] = v; } + inline vec_interval(const T& low, const T& high) { m_bounds[0] = low; m_bounds[1] = high; } + + inline vec_interval() { } + inline vec_interval(eClear) { clear(); } + inline vec_interval(eInitExpand) { init_expand(); } + + inline void clear() { m_bounds[0].clear(); m_bounds[1].clear(); } + + inline void init_expand() + { + m_bounds[0].set(1e+30f, 1e+30f, 1e+30f); + m_bounds[1].set(-1e+30f, -1e+30f, -1e+30f); + } + + inline vec_interval expand(const T& p) + { + for (uint32_t c = 0; c < N; c++) + { + if (p[c] < m_bounds[0][c]) + m_bounds[0][c] = p[c]; + + if (p[c] > m_bounds[1][c]) + m_bounds[1][c] = p[c]; + } + + return *this; + } + + inline const T& operator[] (uint32_t i) const { assert(i < 2); return m_bounds[i]; } + inline T& operator[] (uint32_t i) { assert(i < 2); return m_bounds[i]; } + + const T& get_low() const { return m_bounds[0]; } + T& get_low() { return m_bounds[0]; } + + const T& get_high() const { return m_bounds[1]; } + T& get_high() { return m_bounds[1]; } + + scalar_type get_dim(uint32_t axis) const { return m_bounds[1][axis] - m_bounds[0][axis]; } + + bool contains(const T& p) const + { + const T& low = get_low(), high = get_high(); + + for (uint32_t i = 0; i < N; i++) + { + if (p[i] < low[i]) + return false; + + if (p[i] > high[i]) + return false; + } + return true; + } + + private: + T m_bounds[2]; + }; + + typedef vec_interval vec_interval1F; + typedef vec_interval vec_interval2F; + typedef vec_interval vec_interval3F; + typedef vec_interval vec_interval4F; + + typedef vec_interval1F aabb1F; + typedef vec_interval2F aabb2F; + typedef vec_interval3F aabb3F; + + namespace intersection + { + enum result + { + cBackfacing = -1, + cFailure = 0, + cSuccess, + cParallel, + cInside, + }; + + // Returns cInside, cSuccess, or cFailure. + // Algorithm: Graphics Gems 1 + template + result ray_aabb(vector_type& coord, scalar_type& t, const ray_type& ray, const aabb_type& box) + { + enum + { + cNumDim = vector_type::num_elements, + cRight = 0, + cLeft = 1, + cMiddle = 2 + }; + + bool inside = true; + int quadrant[cNumDim]; + scalar_type candidate_plane[cNumDim]; + + for (int i = 0; i < cNumDim; i++) + { + if (ray.get_origin()[i] < box[0][i]) + { + quadrant[i] = cLeft; + candidate_plane[i] = box[0][i]; + inside = false; + } + else if (ray.get_origin()[i] > box[1][i]) + { + quadrant[i] = cRight; + candidate_plane[i] = box[1][i]; + inside = false; + } + else + { + quadrant[i] = cMiddle; + } + } + + if (inside) + { + coord = ray.get_origin(); + t = 0.0f; + return cInside; + } + + scalar_type max_t[cNumDim]; + for (int i = 0; i < cNumDim; i++) + { + if ((quadrant[i] != cMiddle) && (ray.get_direction()[i] != 0.0f)) + max_t[i] = (candidate_plane[i] - ray.get_origin()[i]) / ray.get_direction()[i]; + else + max_t[i] = -1.0f; + } + + int which_plane = 0; + for (int i = 1; i < cNumDim; i++) + if (max_t[which_plane] < max_t[i]) + which_plane = i; + + if (max_t[which_plane] < 0.0f) + return cFailure; + + for (int i = 0; i < cNumDim; i++) + { + if (i != which_plane) + { + coord[i] = ray.get_origin()[i] + max_t[which_plane] * ray.get_direction()[i]; + + if ((coord[i] < box[0][i]) || (coord[i] > box[1][i])) + return cFailure; + } + else + { + coord[i] = candidate_plane[i]; + } + + assert(coord[i] >= box[0][i] && coord[i] <= box[1][i]); + } + + t = max_t[which_plane]; + return cSuccess; + } + + template + result ray_aabb(bool& started_within, vector_type& coord, scalar_type& t, const ray_type& ray, const aabb_type& box) + { + if (!box.contains(ray.get_origin())) + { + started_within = false; + return ray_aabb(coord, t, ray, box); + } + + started_within = true; + + typename vector_type::T diag_dist = box.diagonal_length() * 1.5f; + ray_type outside_ray(ray.eval(diag_dist), -ray.get_direction()); + + result res(ray_aabb(coord, t, outside_ray, box)); + if (res != cSuccess) + return res; + + t = basisu::maximum(0.0f, diag_dist - t); + return cSuccess; + } + + } // intersect + + // This float->half conversion matches how "F32TO16" works on Intel GPU's. + // Input cannot be negative, Inf or Nan. + inline basist::half_float float_to_half_non_neg_no_nan_inf(float val) + { + union { float f; int32_t i; uint32_t u; } fi = { val }; + const int flt_m = fi.i & 0x7FFFFF, flt_e = (fi.i >> 23) & 0xFF; + int e = 0, m = 0; + + assert(((fi.i >> 31) == 0) && (flt_e != 0xFF)); + + // not zero or denormal + if (flt_e != 0) + { + int new_exp = flt_e - 127; + if (new_exp > 15) + e = 31; + else if (new_exp < -14) + m = lrintf((1 << 24) * fabsf(fi.f)); + else + { + e = new_exp + 15; + m = lrintf(flt_m * (1.0f / ((float)(1 << 13)))); + } + } + + assert((0 <= m) && (m <= 1024)); + if (m == 1024) + { + e++; + m = 0; + } + + assert((e >= 0) && (e <= 31)); + assert((m >= 0) && (m <= 1023)); + + basist::half_float result = (basist::half_float)((e << 10) | m); + return result; + } + + union fu32 + { + uint32_t u; + float f; + }; + + // Supports positive and denormals only. No NaN or Inf. + BASISU_FORCE_INLINE float fast_half_to_float_pos_not_inf_or_nan(basist::half_float h) + { + assert(!basist::half_is_signed(h) && !basist::is_half_inf_or_nan(h)); + + // add 112 to the exponent (112+half float's exp bias of 15=float32's bias of 127) + static const fu32 K = { 0x77800000 }; + + fu32 o; + o.u = h << 13; + o.f *= K.f; + + return o.f; + } + + // Positive, negative, or denormals. No NaN or Inf. Clamped to MAX_HALF_FLOAT. + inline basist::half_float fast_float_to_half_trunc_no_nan_or_inf(float f) + { + assert(!isnan(f) && !isinf(f)); + + // Sutract 112 from the exponent, to change the bias from 127 to 15. + static const fu32 g_f_to_h{ 0x7800000 }; + + fu32 fu; + + fu.f = minimum((float)basist::MAX_HALF_FLOAT, fabsf(f)) * g_f_to_h.f; + + return (basist::half_float)(((fu.u >> (23 - 10)) & 0x7FFF) | ((f < 0.0f) ? 0x8000 : 0)); + } + + inline basist::half_float fast_float_to_half_trunc_no_clamp_neg_nan_or_inf(float f) + { + assert(!isnan(f) && !isinf(f)); + assert((f >= 0.0f) && (f <= basist::MAX_HALF_FLOAT)); + + // Sutract 112 from the exponent, to change the bias from 127 to 15. + static const fu32 g_f_to_h{ 0x7800000 }; + + fu32 fu; + + fu.f = f * g_f_to_h.f; + + return (basist::half_float)((fu.u >> (23 - 10)) & 0x7FFF); + } + + inline basist::half_float fast_float_to_half_no_clamp_neg_nan_or_inf(float f) + { + assert(!isnan(f) && !isinf(f)); + assert((f >= 0.0f) && (f <= basist::MAX_HALF_FLOAT)); + + // Sutract 112 from the exponent, to change the bias from 127 to 15. + static const fu32 g_f_to_h{ 0x7800000 }; + + fu32 fu; + + fu.f = f * g_f_to_h.f; + + uint32_t h = (basist::half_float)((fu.u >> (23 - 10)) & 0x7FFF); + + // round to even or nearest + uint32_t mant = fu.u & 8191; // examine lowest 13 bits + uint32_t inc = (mant > 4096) | ((mant == 4096) & (h & 1)); + h += inc; + + if (h > basist::MAX_HALF_FLOAT_AS_INT_BITS) + h = basist::MAX_HALF_FLOAT_AS_INT_BITS; + + return (basist::half_float)h; + } + +} // namespace basisu + +#include "basisu_math.h" diff --git a/thirdparty/basisu/encoder/basisu_etc.cpp b/thirdparty/basisu/encoder/basisu_etc.cpp new file mode 100644 index 000000000..ba1c14231 --- /dev/null +++ b/thirdparty/basisu/encoder/basisu_etc.cpp @@ -0,0 +1,1610 @@ +// basis_etc.cpp +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "basisu_etc.h" + +#if BASISU_SUPPORT_SSE +#define CPPSPMD_NAME(a) a##_sse41 +#include "basisu_kernels_declares.h" +#endif + +#define BASISU_DEBUG_ETC_ENCODER 0 +#define BASISU_DEBUG_ETC_ENCODER_DEEPER 0 + +namespace basisu +{ + const int8_t g_etc2_eac_tables[16][8] = + { + { -3, -6, -9, -15, 2, 5, 8, 14 }, { -3, -7, -10, -13, 2, 6, 9, 12 }, { -2, -5, -8, -13, 1, 4, 7, 12 }, { -2, -4, -6, -13, 1, 3, 5, 12 }, + { -3, -6, -8, -12, 2, 5, 7, 11 }, { -3, -7, -9, -11, 2, 6, 8, 10 }, { -4, -7, -8, -11, 3, 6, 7, 10 }, { -3, -5, -8, -11, 2, 4, 7, 10 }, + { -2, -6, -8, -10, 1, 5, 7, 9 }, { -2, -5, -8, -10, 1, 4, 7, 9 }, { -2, -4, -8, -10, 1, 3, 7, 9 }, { -2, -5, -7, -10, 1, 4, 6, 9 }, + { -3, -4, -7, -10, 2, 3, 6, 9 }, { -1, -2, -3, -10, 0, 1, 2, 9 }, { -4, -6, -8, -9, 3, 5, 7, 8 }, { -3, -5, -7, -9, 2, 4, 6, 8 } + }; + + const int8_t g_etc2_eac_tables8[16][8] = + { + { -24, -48, -72, -120, 16, 40, 64, 112 }, { -24,-56,-80,-104,16,48,72,96 }, { -16,-40,-64,-104,8,32,56,96 }, { -16,-32,-48,-104,8,24,40,96 }, + { -24,-48,-64,-96,16,40,56,88 }, { -24,-56,-72,-88,16,48,64,80 }, { -32,-56,-64,-88,24,48,56,80 }, { -24,-40,-64,-88,16,32,56,80 }, + { -16,-48,-64,-80,8,40,56,72 }, { -16,-40,-64,-80,8,32,56,72 }, { -16,-32,-64,-80,8,24,56,72 }, { -16,-40,-56,-80,8,32,48,72 }, + { -24,-32,-56,-80,16,24,48,72 }, { -8,-16,-24,-80,0,8,16,72 }, { -32,-48,-64,-72,24,40,56,64 }, { -24,-40,-56,-72,16,32,48,64 } + }; + + // Given an ETC1 diff/inten_table/selector, and an 8-bit desired color, this table encodes the best packed_color in the low byte, and the abs error in the high byte. + static uint16_t g_etc1_inverse_lookup[2 * 8 * 4][256]; // [ diff/inten_table/selector][desired_color ] + + // g_color8_to_etc_block_config[color][table_index] = Supplies for each 8-bit color value a list of packed ETC1 diff/intensity table/selectors/packed_colors that map to that color. + // To pack: diff | (inten << 1) | (selector << 4) | (packed_c << 8) + static const uint16_t g_etc1_color8_to_etc_block_config_0_255[2][33] = + { + { 0x0000, 0x0010, 0x0002, 0x0012, 0x0004, 0x0014, 0x0006, 0x0016, 0x0008, 0x0018, 0x000A, 0x001A, 0x000C, 0x001C, 0x000E, 0x001E, 0x0001, 0x0011, 0x0003, 0x0013, 0x0005, 0x0015, 0x0007, 0x0017, 0x0009, 0x0019, 0x000B, 0x001B, 0x000D, 0x001D, 0x000F, 0x001F, 0xFFFF }, + { 0x0F20, 0x0F30, 0x0E32, 0x0F22, 0x0E34, 0x0F24, 0x0D36, 0x0F26, 0x0C38, 0x0E28, 0x0B3A, 0x0E2A, 0x093C, 0x0E2C, 0x053E, 0x0D2E, 0x1E31, 0x1F21, 0x1D33, 0x1F23, 0x1C35, 0x1E25, 0x1A37, 0x1E27, 0x1839, 0x1D29, 0x163B, 0x1C2B, 0x133D, 0x1B2D, 0x093F, 0x1A2F, 0xFFFF }, + }; + + // Really only [254][11]. + static const uint16_t g_etc1_color8_to_etc_block_config_1_to_254[254][12] = + { + { 0x021C, 0x0D0D, 0xFFFF }, { 0x0020, 0x0021, 0x0A0B, 0x061F, 0xFFFF }, { 0x0113, 0x0217, 0xFFFF }, { 0x0116, 0x031E, 0x0B0E, 0x0405, 0xFFFF }, { 0x0022, 0x0204, 0x050A, 0x0023, 0xFFFF }, { 0x0111, 0x0319, 0x0809, 0x170F, 0xFFFF }, { + 0x0303, 0x0215, 0x0607, 0xFFFF }, { 0x0030, 0x0114, 0x0408, 0x0031, 0x0201, 0x051D, 0xFFFF }, { 0x0100, 0x0024, 0x0306, 0x0025, 0x041B, 0x0E0D, 0xFFFF }, { 0x021A, 0x0121, 0x0B0B, 0x071F, 0xFFFF }, { 0x0213, 0x0317, 0xFFFF }, { 0x0112, + 0x0505, 0xFFFF }, { 0x0026, 0x070C, 0x0123, 0x0027, 0xFFFF }, { 0x0211, 0x0909, 0xFFFF }, { 0x0110, 0x0315, 0x0707, 0x0419, 0x180F, 0xFFFF }, { 0x0218, 0x0131, 0x0301, 0x0403, 0x061D, 0xFFFF }, { 0x0032, 0x0202, 0x0033, 0x0125, 0x051B, + 0x0F0D, 0xFFFF }, { 0x0028, 0x031C, 0x0221, 0x0029, 0xFFFF }, { 0x0120, 0x0313, 0x0C0B, 0x081F, 0xFFFF }, { 0x0605, 0x0417, 0xFFFF }, { 0x0216, 0x041E, 0x0C0E, 0x0223, 0x0127, 0xFFFF }, { 0x0122, 0x0304, 0x060A, 0x0311, 0x0A09, 0xFFFF + }, { 0x0519, 0x190F, 0xFFFF }, { 0x002A, 0x0231, 0x0503, 0x0415, 0x0807, 0x002B, 0x071D, 0xFFFF }, { 0x0130, 0x0214, 0x0508, 0x0401, 0x0133, 0x0225, 0x061B, 0xFFFF }, { 0x0200, 0x0124, 0x0406, 0x0321, 0x0129, 0x100D, 0xFFFF }, { 0x031A, + 0x0D0B, 0x091F, 0xFFFF }, { 0x0413, 0x0705, 0x0517, 0xFFFF }, { 0x0212, 0x0034, 0x0323, 0x0035, 0x0227, 0xFFFF }, { 0x0126, 0x080C, 0x0B09, 0xFFFF }, { 0x0411, 0x0619, 0x1A0F, 0xFFFF }, { 0x0210, 0x0331, 0x0603, 0x0515, 0x0907, 0x012B, + 0xFFFF }, { 0x0318, 0x002C, 0x0501, 0x0233, 0x0325, 0x071B, 0x002D, 0x081D, 0xFFFF }, { 0x0132, 0x0302, 0x0229, 0x110D, 0xFFFF }, { 0x0128, 0x041C, 0x0421, 0x0E0B, 0x0A1F, 0xFFFF }, { 0x0220, 0x0513, 0x0617, 0xFFFF }, { 0x0135, 0x0805, + 0x0327, 0xFFFF }, { 0x0316, 0x051E, 0x0D0E, 0x0423, 0xFFFF }, { 0x0222, 0x0404, 0x070A, 0x0511, 0x0719, 0x0C09, 0x1B0F, 0xFFFF }, { 0x0703, 0x0615, 0x0A07, 0x022B, 0xFFFF }, { 0x012A, 0x0431, 0x0601, 0x0333, 0x012D, 0x091D, 0xFFFF }, { + 0x0230, 0x0314, 0x0036, 0x0608, 0x0425, 0x0037, 0x0329, 0x081B, 0x120D, 0xFFFF }, { 0x0300, 0x0224, 0x0506, 0x0521, 0x0F0B, 0x0B1F, 0xFFFF }, { 0x041A, 0x0613, 0x0717, 0xFFFF }, { 0x0235, 0x0905, 0xFFFF }, { 0x0312, 0x0134, 0x0523, + 0x0427, 0xFFFF }, { 0x0226, 0x090C, 0x002E, 0x0611, 0x0D09, 0x002F, 0xFFFF }, { 0x0715, 0x0B07, 0x0819, 0x032B, 0x1C0F, 0xFFFF }, { 0x0310, 0x0531, 0x0701, 0x0803, 0x022D, 0x0A1D, 0xFFFF }, { 0x0418, 0x012C, 0x0433, 0x0525, 0x0137, 0x091B, + 0x130D, 0xFFFF }, { 0x0232, 0x0402, 0x0621, 0x0429, 0xFFFF }, { 0x0228, 0x051C, 0x0713, 0x100B, 0x0C1F, 0xFFFF }, { 0x0320, 0x0335, 0x0A05, 0x0817, 0xFFFF }, { 0x0623, 0x0527, 0xFFFF }, { 0x0416, 0x061E, 0x0E0E, 0x0711, 0x0E09, 0x012F, + 0xFFFF }, { 0x0322, 0x0504, 0x080A, 0x0919, 0x1D0F, 0xFFFF }, { 0x0631, 0x0903, 0x0815, 0x0C07, 0x042B, 0x032D, 0x0B1D, 0xFFFF }, { 0x022A, 0x0801, 0x0533, 0x0625, 0x0237, 0x0A1B, 0xFFFF }, { 0x0330, 0x0414, 0x0136, 0x0708, 0x0721, 0x0529, + 0x140D, 0xFFFF }, { 0x0400, 0x0324, 0x0606, 0x0038, 0x0039, 0x110B, 0x0D1F, 0xFFFF }, { 0x051A, 0x0813, 0x0B05, 0x0917, 0xFFFF }, { 0x0723, 0x0435, 0x0627, 0xFFFF }, { 0x0412, 0x0234, 0x0F09, 0x022F, 0xFFFF }, { 0x0326, 0x0A0C, 0x012E, + 0x0811, 0x0A19, 0x1E0F, 0xFFFF }, { 0x0731, 0x0A03, 0x0915, 0x0D07, 0x052B, 0xFFFF }, { 0x0410, 0x0901, 0x0633, 0x0725, 0x0337, 0x0B1B, 0x042D, 0x0C1D, 0xFFFF }, { 0x0518, 0x022C, 0x0629, 0x150D, 0xFFFF }, { 0x0332, 0x0502, 0x0821, 0x0139, + 0x120B, 0x0E1F, 0xFFFF }, { 0x0328, 0x061C, 0x0913, 0x0A17, 0xFFFF }, { 0x0420, 0x0535, 0x0C05, 0x0727, 0xFFFF }, { 0x0823, 0x032F, 0xFFFF }, { 0x0516, 0x071E, 0x0F0E, 0x0911, 0x0B19, 0x1009, 0x1F0F, 0xFFFF }, { 0x0422, 0x0604, 0x090A, + 0x0B03, 0x0A15, 0x0E07, 0x062B, 0xFFFF }, { 0x0831, 0x0A01, 0x0733, 0x052D, 0x0D1D, 0xFFFF }, { 0x032A, 0x0825, 0x0437, 0x0729, 0x0C1B, 0x160D, 0xFFFF }, { 0x0430, 0x0514, 0x0236, 0x0808, 0x0921, 0x0239, 0x130B, 0x0F1F, 0xFFFF }, { 0x0500, + 0x0424, 0x0706, 0x0138, 0x0A13, 0x0B17, 0xFFFF }, { 0x061A, 0x0635, 0x0D05, 0xFFFF }, { 0x0923, 0x0827, 0xFFFF }, { 0x0512, 0x0334, 0x003A, 0x0A11, 0x1109, 0x003B, 0x042F, 0xFFFF }, { 0x0426, 0x0B0C, 0x022E, 0x0B15, 0x0F07, 0x0C19, + 0x072B, 0xFFFF }, { 0x0931, 0x0B01, 0x0C03, 0x062D, 0x0E1D, 0xFFFF }, { 0x0510, 0x0833, 0x0925, 0x0537, 0x0D1B, 0x170D, 0xFFFF }, { 0x0618, 0x032C, 0x0A21, 0x0339, 0x0829, 0xFFFF }, { 0x0432, 0x0602, 0x0B13, 0x140B, 0x101F, 0xFFFF }, { + 0x0428, 0x071C, 0x0735, 0x0E05, 0x0C17, 0xFFFF }, { 0x0520, 0x0A23, 0x0927, 0xFFFF }, { 0x0B11, 0x1209, 0x013B, 0x052F, 0xFFFF }, { 0x0616, 0x081E, 0x0D19, 0xFFFF }, { 0x0522, 0x0704, 0x0A0A, 0x0A31, 0x0D03, 0x0C15, 0x1007, 0x082B, 0x072D, + 0x0F1D, 0xFFFF }, { 0x0C01, 0x0933, 0x0A25, 0x0637, 0x0E1B, 0xFFFF }, { 0x042A, 0x0B21, 0x0929, 0x180D, 0xFFFF }, { 0x0530, 0x0614, 0x0336, 0x0908, 0x0439, 0x150B, 0x111F, 0xFFFF }, { 0x0600, 0x0524, 0x0806, 0x0238, 0x0C13, 0x0F05, + 0x0D17, 0xFFFF }, { 0x071A, 0x0B23, 0x0835, 0x0A27, 0xFFFF }, { 0x1309, 0x023B, 0x062F, 0xFFFF }, { 0x0612, 0x0434, 0x013A, 0x0C11, 0x0E19, 0xFFFF }, { 0x0526, 0x0C0C, 0x032E, 0x0B31, 0x0E03, 0x0D15, 0x1107, 0x092B, 0xFFFF }, { 0x0D01, + 0x0A33, 0x0B25, 0x0737, 0x0F1B, 0x082D, 0x101D, 0xFFFF }, { 0x0610, 0x0A29, 0x190D, 0xFFFF }, { 0x0718, 0x042C, 0x0C21, 0x0539, 0x160B, 0x121F, 0xFFFF }, { 0x0532, 0x0702, 0x0D13, 0x0E17, 0xFFFF }, { 0x0528, 0x081C, 0x0935, 0x1005, 0x0B27, + 0xFFFF }, { 0x0620, 0x0C23, 0x033B, 0x072F, 0xFFFF }, { 0x0D11, 0x0F19, 0x1409, 0xFFFF }, { 0x0716, 0x003C, 0x091E, 0x0F03, 0x0E15, 0x1207, 0x0A2B, 0x003D, 0xFFFF }, { 0x0622, 0x0804, 0x0B0A, 0x0C31, 0x0E01, 0x0B33, 0x092D, 0x111D, + 0xFFFF }, { 0x0C25, 0x0837, 0x0B29, 0x101B, 0x1A0D, 0xFFFF }, { 0x052A, 0x0D21, 0x0639, 0x170B, 0x131F, 0xFFFF }, { 0x0630, 0x0714, 0x0436, 0x0A08, 0x0E13, 0x0F17, 0xFFFF }, { 0x0700, 0x0624, 0x0906, 0x0338, 0x0A35, 0x1105, 0xFFFF }, { + 0x081A, 0x0D23, 0x0C27, 0xFFFF }, { 0x0E11, 0x1509, 0x043B, 0x082F, 0xFFFF }, { 0x0712, 0x0534, 0x023A, 0x0F15, 0x1307, 0x1019, 0x0B2B, 0x013D, 0xFFFF }, { 0x0626, 0x0D0C, 0x042E, 0x0D31, 0x0F01, 0x1003, 0x0A2D, 0x121D, 0xFFFF }, { 0x0C33, + 0x0D25, 0x0937, 0x111B, 0x1B0D, 0xFFFF }, { 0x0710, 0x0E21, 0x0739, 0x0C29, 0xFFFF }, { 0x0818, 0x052C, 0x0F13, 0x180B, 0x141F, 0xFFFF }, { 0x0632, 0x0802, 0x0B35, 0x1205, 0x1017, 0xFFFF }, { 0x0628, 0x091C, 0x0E23, 0x0D27, 0xFFFF }, { + 0x0720, 0x0F11, 0x1609, 0x053B, 0x092F, 0xFFFF }, { 0x1119, 0x023D, 0xFFFF }, { 0x0816, 0x013C, 0x0A1E, 0x0E31, 0x1103, 0x1015, 0x1407, 0x0C2B, 0x0B2D, 0x131D, 0xFFFF }, { 0x0722, 0x0904, 0x0C0A, 0x1001, 0x0D33, 0x0E25, 0x0A37, 0x121B, + 0xFFFF }, { 0x0F21, 0x0D29, 0x1C0D, 0xFFFF }, { 0x062A, 0x0839, 0x190B, 0x151F, 0xFFFF }, { 0x0730, 0x0814, 0x0536, 0x0B08, 0x1013, 0x1305, 0x1117, 0xFFFF }, { 0x0800, 0x0724, 0x0A06, 0x0438, 0x0F23, 0x0C35, 0x0E27, 0xFFFF }, { 0x091A, + 0x1709, 0x063B, 0x0A2F, 0xFFFF }, { 0x1011, 0x1219, 0x033D, 0xFFFF }, { 0x0812, 0x0634, 0x033A, 0x0F31, 0x1203, 0x1115, 0x1507, 0x0D2B, 0xFFFF }, { 0x0726, 0x0E0C, 0x052E, 0x1101, 0x0E33, 0x0F25, 0x0B37, 0x131B, 0x0C2D, 0x141D, 0xFFFF }, { + 0x0E29, 0x1D0D, 0xFFFF }, { 0x0810, 0x1021, 0x0939, 0x1A0B, 0x161F, 0xFFFF }, { 0x0918, 0x062C, 0x1113, 0x1217, 0xFFFF }, { 0x0732, 0x0902, 0x0D35, 0x1405, 0x0F27, 0xFFFF }, { 0x0728, 0x0A1C, 0x1023, 0x073B, 0x0B2F, 0xFFFF }, { 0x0820, + 0x1111, 0x1319, 0x1809, 0xFFFF }, { 0x1303, 0x1215, 0x1607, 0x0E2B, 0x043D, 0xFFFF }, { 0x0916, 0x023C, 0x0B1E, 0x1031, 0x1201, 0x0F33, 0x0D2D, 0x151D, 0xFFFF }, { 0x0822, 0x0A04, 0x0D0A, 0x1025, 0x0C37, 0x0F29, 0x141B, 0x1E0D, 0xFFFF }, { + 0x1121, 0x0A39, 0x1B0B, 0x171F, 0xFFFF }, { 0x072A, 0x1213, 0x1317, 0xFFFF }, { 0x0830, 0x0914, 0x0636, 0x0C08, 0x0E35, 0x1505, 0xFFFF }, { 0x0900, 0x0824, 0x0B06, 0x0538, 0x1123, 0x1027, 0xFFFF }, { 0x0A1A, 0x1211, 0x1909, 0x083B, 0x0C2F, + 0xFFFF }, { 0x1315, 0x1707, 0x1419, 0x0F2B, 0x053D, 0xFFFF }, { 0x0912, 0x0734, 0x043A, 0x1131, 0x1301, 0x1403, 0x0E2D, 0x161D, 0xFFFF }, { 0x0826, 0x0F0C, 0x062E, 0x1033, 0x1125, 0x0D37, 0x151B, 0x1F0D, 0xFFFF }, { 0x1221, 0x0B39, 0x1029, + 0xFFFF }, { 0x0910, 0x1313, 0x1C0B, 0x181F, 0xFFFF }, { 0x0A18, 0x072C, 0x0F35, 0x1605, 0x1417, 0xFFFF }, { 0x0832, 0x0A02, 0x1223, 0x1127, 0xFFFF }, { 0x0828, 0x0B1C, 0x1311, 0x1A09, 0x093B, 0x0D2F, 0xFFFF }, { 0x0920, 0x1519, 0x063D, + 0xFFFF }, { 0x1231, 0x1503, 0x1415, 0x1807, 0x102B, 0x0F2D, 0x171D, 0xFFFF }, { 0x0A16, 0x033C, 0x0C1E, 0x1401, 0x1133, 0x1225, 0x0E37, 0x161B, 0xFFFF }, { 0x0922, 0x0B04, 0x0E0A, 0x1321, 0x1129, 0xFFFF }, { 0x0C39, 0x1D0B, 0x191F, 0xFFFF + }, { 0x082A, 0x1413, 0x1705, 0x1517, 0xFFFF }, { 0x0930, 0x0A14, 0x0736, 0x0D08, 0x1323, 0x1035, 0x1227, 0xFFFF }, { 0x0A00, 0x0924, 0x0C06, 0x0638, 0x1B09, 0x0A3B, 0x0E2F, 0xFFFF }, { 0x0B1A, 0x1411, 0x1619, 0x073D, 0xFFFF }, { 0x1331, + 0x1603, 0x1515, 0x1907, 0x112B, 0xFFFF }, { 0x0A12, 0x0834, 0x053A, 0x1501, 0x1233, 0x1325, 0x0F37, 0x171B, 0x102D, 0x181D, 0xFFFF }, { 0x0926, 0x072E, 0x1229, 0xFFFF }, { 0x1421, 0x0D39, 0x1E0B, 0x1A1F, 0xFFFF }, { 0x0A10, 0x1513, + 0x1617, 0xFFFF }, { 0x0B18, 0x082C, 0x1135, 0x1805, 0x1327, 0xFFFF }, { 0x0932, 0x0B02, 0x1423, 0x0B3B, 0x0F2F, 0xFFFF }, { 0x0928, 0x0C1C, 0x1511, 0x1719, 0x1C09, 0xFFFF }, { 0x0A20, 0x1703, 0x1615, 0x1A07, 0x122B, 0x083D, 0xFFFF }, { + 0x1431, 0x1601, 0x1333, 0x112D, 0x191D, 0xFFFF }, { 0x0B16, 0x043C, 0x0D1E, 0x1425, 0x1037, 0x1329, 0x181B, 0xFFFF }, { 0x0A22, 0x0C04, 0x0F0A, 0x1521, 0x0E39, 0x1F0B, 0x1B1F, 0xFFFF }, { 0x1613, 0x1717, 0xFFFF }, { 0x092A, 0x1235, 0x1905, + 0xFFFF }, { 0x0A30, 0x0B14, 0x0836, 0x0E08, 0x1523, 0x1427, 0xFFFF }, { 0x0B00, 0x0A24, 0x0D06, 0x0738, 0x1611, 0x1D09, 0x0C3B, 0x102F, 0xFFFF }, { 0x0C1A, 0x1715, 0x1B07, 0x1819, 0x132B, 0x093D, 0xFFFF }, { 0x1531, 0x1701, 0x1803, 0x122D, + 0x1A1D, 0xFFFF }, { 0x0B12, 0x0934, 0x063A, 0x1433, 0x1525, 0x1137, 0x191B, 0xFFFF }, { 0x0A26, 0x003E, 0x082E, 0x1621, 0x0F39, 0x1429, 0x003F, 0xFFFF }, { 0x1713, 0x1C1F, 0xFFFF }, { 0x0B10, 0x1335, 0x1A05, 0x1817, 0xFFFF }, { 0x0C18, + 0x092C, 0x1623, 0x1527, 0xFFFF }, { 0x0A32, 0x0C02, 0x1711, 0x1E09, 0x0D3B, 0x112F, 0xFFFF }, { 0x0A28, 0x0D1C, 0x1919, 0x0A3D, 0xFFFF }, { 0x0B20, 0x1631, 0x1903, 0x1815, 0x1C07, 0x142B, 0x132D, 0x1B1D, 0xFFFF }, { 0x1801, 0x1533, 0x1625, + 0x1237, 0x1A1B, 0xFFFF }, { 0x0C16, 0x053C, 0x0E1E, 0x1721, 0x1529, 0x013F, 0xFFFF }, { 0x0B22, 0x0D04, 0x1039, 0x1D1F, 0xFFFF }, { 0x1813, 0x1B05, 0x1917, 0xFFFF }, { 0x0A2A, 0x1723, 0x1435, 0x1627, 0xFFFF }, { 0x0B30, 0x0C14, 0x0936, + 0x0F08, 0x1F09, 0x0E3B, 0x122F, 0xFFFF }, { 0x0C00, 0x0B24, 0x0E06, 0x0838, 0x1811, 0x1A19, 0x0B3D, 0xFFFF }, { 0x0D1A, 0x1731, 0x1A03, 0x1915, 0x1D07, 0x152B, 0xFFFF }, { 0x1901, 0x1633, 0x1725, 0x1337, 0x1B1B, 0x142D, 0x1C1D, 0xFFFF }, { + 0x0C12, 0x0A34, 0x073A, 0x1629, 0x023F, 0xFFFF }, { 0x0B26, 0x013E, 0x092E, 0x1821, 0x1139, 0x1E1F, 0xFFFF }, { 0x1913, 0x1A17, 0xFFFF }, { 0x0C10, 0x1535, 0x1C05, 0x1727, 0xFFFF }, { 0x0D18, 0x0A2C, 0x1823, 0x0F3B, 0x132F, 0xFFFF }, { + 0x0B32, 0x0D02, 0x1911, 0x1B19, 0xFFFF }, { 0x0B28, 0x0E1C, 0x1B03, 0x1A15, 0x1E07, 0x162B, 0x0C3D, 0xFFFF }, { 0x0C20, 0x1831, 0x1A01, 0x1733, 0x152D, 0x1D1D, 0xFFFF }, { 0x1825, 0x1437, 0x1729, 0x1C1B, 0x033F, 0xFFFF }, { 0x0D16, 0x063C, + 0x0F1E, 0x1921, 0x1239, 0x1F1F, 0xFFFF }, { 0x0C22, 0x0E04, 0x1A13, 0x1B17, 0xFFFF }, { 0x1635, 0x1D05, 0xFFFF }, { 0x0B2A, 0x1923, 0x1827, 0xFFFF }, { 0x0C30, 0x0D14, 0x0A36, 0x1A11, 0x103B, 0x142F, 0xFFFF }, { 0x0D00, 0x0C24, 0x0F06, + 0x0938, 0x1B15, 0x1F07, 0x1C19, 0x172B, 0x0D3D, 0xFFFF }, { 0x0E1A, 0x1931, 0x1B01, 0x1C03, 0x162D, 0x1E1D, 0xFFFF }, { 0x1833, 0x1925, 0x1537, 0x1D1B, 0xFFFF }, { 0x0D12, 0x0B34, 0x083A, 0x1A21, 0x1339, 0x1829, 0x043F, 0xFFFF }, { 0x0C26, + 0x023E, 0x0A2E, 0x1B13, 0xFFFF }, { 0x1735, 0x1E05, 0x1C17, 0xFFFF }, { 0x0D10, 0x1A23, 0x1927, 0xFFFF }, { 0x0E18, 0x0B2C, 0x1B11, 0x113B, 0x152F, 0xFFFF }, { 0x0C32, 0x0E02, 0x1D19, 0x0E3D, 0xFFFF }, { 0x0C28, 0x0F1C, 0x1A31, 0x1D03, + 0x1C15, 0x182B, 0x172D, 0x1F1D, 0xFFFF }, { 0x0D20, 0x1C01, 0x1933, 0x1A25, 0x1637, 0x1E1B, 0xFFFF }, { 0x1B21, 0x1929, 0x053F, 0xFFFF }, { 0x0E16, 0x073C, 0x1439, 0xFFFF }, { 0x0D22, 0x0F04, 0x1C13, 0x1F05, 0x1D17, 0xFFFF }, { 0x1B23, + 0x1835, 0x1A27, 0xFFFF }, { 0x0C2A, 0x123B, 0x162F, 0xFFFF }, { 0x0D30, 0x0E14, 0x0B36, 0x1C11, 0x1E19, 0x0F3D, 0xFFFF }, { 0x0E00, 0x0D24, 0x0A38, 0x1B31, 0x1E03, 0x1D15, 0x192B, 0xFFFF }, { 0x0F1A, 0x1D01, 0x1A33, 0x1B25, 0x1737, 0x1F1B, + 0x182D, 0xFFFF }, { 0x1A29, 0x063F, 0xFFFF }, { 0x0E12, 0x0C34, 0x093A, 0x1C21, 0x1539, 0xFFFF }, { 0x0D26, 0x033E, 0x0B2E, 0x1D13, 0x1E17, 0xFFFF }, { 0x1935, 0x1B27, 0xFFFF }, { 0x0E10, 0x1C23, 0x133B, 0x172F, 0xFFFF }, { 0x0F18, + 0x0C2C, 0x1D11, 0x1F19, 0xFFFF }, { 0x0D32, 0x0F02, 0x1F03, 0x1E15, 0x1A2B, 0x103D, 0xFFFF }, { 0x0D28, 0x1C31, 0x1E01, 0x1B33, 0x192D, 0xFFFF }, { 0x0E20, 0x1C25, 0x1837, 0x1B29, 0x073F, 0xFFFF }, { 0x1D21, 0x1639, 0xFFFF }, { 0x0F16, + 0x083C, 0x1E13, 0x1F17, 0xFFFF }, { 0x0E22, 0x1A35, 0xFFFF }, { 0x1D23, 0x1C27, 0xFFFF }, { 0x0D2A, 0x1E11, 0x143B, 0x182F, 0xFFFF }, { 0x0E30, 0x0F14, 0x0C36, 0x1F15, 0x1B2B, 0x113D, 0xFFFF }, { 0x0F00, 0x0E24, 0x0B38, 0x1D31, 0x1F01, + 0x1A2D, 0xFFFF }, { 0x1C33, 0x1D25, 0x1937, 0xFFFF }, { 0x1E21, 0x1739, 0x1C29, 0x083F, 0xFFFF }, { 0x0F12, 0x0D34, 0x0A3A, 0x1F13, 0xFFFF }, { 0x0E26, 0x043E, 0x0C2E, 0x1B35, 0xFFFF }, { 0x1E23, 0x1D27, 0xFFFF }, { 0x0F10, 0x1F11, 0x153B, 0x192F, 0xFFFF }, { 0x0D2C, 0x123D, 0xFFFF }, + }; + + static uint32_t etc1_decode_value(uint32_t diff, uint32_t inten, uint32_t selector, uint32_t packed_c) + { + const uint32_t limit = diff ? 32 : 16; + BASISU_NOTE_UNUSED(limit); + assert((diff < 2) && (inten < 8) && (selector < 4) && (packed_c < limit)); + int c; + if (diff) + c = (packed_c >> 2) | (packed_c << 3); + else + c = packed_c | (packed_c << 4); + c += g_etc1_inten_tables[inten][selector]; + c = clamp(c, 0, 255); + return c; + } + + void pack_etc1_solid_color_init() + { + for (uint32_t diff = 0; diff < 2; diff++) + { + const uint32_t limit = diff ? 32 : 16; + + for (uint32_t inten = 0; inten < 8; inten++) + { + for (uint32_t selector = 0; selector < 4; selector++) + { + const uint32_t inverse_table_index = diff + (inten << 1) + (selector << 4); + for (uint32_t color = 0; color < 256; color++) + { + uint32_t best_error = UINT32_MAX, best_packed_c = 0; + for (uint32_t packed_c = 0; packed_c < limit; packed_c++) + { + int v = etc1_decode_value(diff, inten, selector, packed_c); + uint32_t err = (uint32_t)labs(v - static_cast(color)); + if (err < best_error) + { + best_error = err; + best_packed_c = packed_c; + if (!best_error) + break; + } + } + assert(best_error <= 255); + g_etc1_inverse_lookup[inverse_table_index][color] = static_cast(best_packed_c | (best_error << 8)); + } + } + } + } + +#if 0 + for (uint32_t y = 0; y < 64; y++) + { + printf("{"); + for (uint32_t x = 0; x < 256; x++) + { + printf("0x%X", g_etc1_inverse_lookup[y][x]); + if (x != 255) + printf(","); + if (((x & 63) == 63) && (x != 255)) + printf("\n"); + } + printf("},\n"); + } +#endif + } + + // Packs solid color blocks efficiently using a set of small precomputed tables. + // For random 888 inputs, MSE results are better than Erricson's ETC1 packer in "slow" mode ~9.5% of the time, is slightly worse only ~.01% of the time, and is equal the rest of the time. + uint64_t pack_etc1_block_solid_color(etc_block& block, const uint8_t* pColor) + { + assert(g_etc1_inverse_lookup[0][255]); + + static uint32_t s_next_comp[4] = { 1, 2, 0, 1 }; + + uint32_t best_error = UINT32_MAX, best_i = 0; + int best_x = 0, best_packed_c1 = 0, best_packed_c2 = 0; + + // For each possible 8-bit value, there is a precomputed list of diff/inten/selector configurations that allow that 8-bit value to be encoded with no error. + for (uint32_t i = 0; i < 3; i++) + { + const uint32_t c1 = pColor[s_next_comp[i]], c2 = pColor[s_next_comp[i + 1]]; + + const int delta_range = 1; + for (int delta = -delta_range; delta <= delta_range; delta++) + { + const int c_plus_delta = clamp(pColor[i] + delta, 0, 255); + + const uint16_t* pTable; + if (!c_plus_delta) + pTable = g_etc1_color8_to_etc_block_config_0_255[0]; + else if (c_plus_delta == 255) + pTable = g_etc1_color8_to_etc_block_config_0_255[1]; + else + pTable = g_etc1_color8_to_etc_block_config_1_to_254[c_plus_delta - 1]; + + do + { + const uint32_t x = *pTable++; + +#ifdef _DEBUG + const uint32_t diff = x & 1; + const uint32_t inten = (x >> 1) & 7; + const uint32_t selector = (x >> 4) & 3; + const uint32_t p0 = (x >> 8) & 255; + assert(etc1_decode_value(diff, inten, selector, p0) == (uint32_t)c_plus_delta); +#endif + + const uint16_t* pInverse_table = g_etc1_inverse_lookup[x & 0xFF]; + uint16_t p1 = pInverse_table[c1]; + uint16_t p2 = pInverse_table[c2]; + const uint32_t trial_error = square(c_plus_delta - pColor[i]) + square(p1 >> 8) + square(p2 >> 8); + if (trial_error < best_error) + { + best_error = trial_error; + best_x = x; + best_packed_c1 = p1 & 0xFF; + best_packed_c2 = p2 & 0xFF; + best_i = i; + if (!best_error) + goto found_perfect_match; + } + } while (*pTable != 0xFFFF); + } + } + found_perfect_match: + + const uint32_t diff = best_x & 1; + const uint32_t inten = (best_x >> 1) & 7; + + block.m_bytes[3] = static_cast(((inten | (inten << 3)) << 2) | (diff << 1)); + + const uint32_t etc1_selector = g_selector_index_to_etc1[(best_x >> 4) & 3]; + *reinterpret_cast(&block.m_bytes[4]) = (etc1_selector & 2) ? 0xFFFF : 0; + *reinterpret_cast(&block.m_bytes[6]) = (etc1_selector & 1) ? 0xFFFF : 0; + + const uint32_t best_packed_c0 = (best_x >> 8) & 255; + if (diff) + { + block.m_bytes[best_i] = static_cast(best_packed_c0 << 3); + block.m_bytes[s_next_comp[best_i]] = static_cast(best_packed_c1 << 3); + block.m_bytes[s_next_comp[best_i + 1]] = static_cast(best_packed_c2 << 3); + } + else + { + block.m_bytes[best_i] = static_cast(best_packed_c0 | (best_packed_c0 << 4)); + block.m_bytes[s_next_comp[best_i]] = static_cast(best_packed_c1 | (best_packed_c1 << 4)); + block.m_bytes[s_next_comp[best_i + 1]] = static_cast(best_packed_c2 | (best_packed_c2 << 4)); + } + + return best_error; + } + + const uint32_t BASISU_ETC1_CLUSTER_FIT_ORDER_TABLE_SIZE = 165; + + static const struct { uint8_t m_v[4]; } g_cluster_fit_order_tab[BASISU_ETC1_CLUSTER_FIT_ORDER_TABLE_SIZE] = + { + { { 0, 0, 0, 8 } },{ { 0, 5, 2, 1 } },{ { 0, 6, 1, 1 } },{ { 0, 7, 0, 1 } },{ { 0, 7, 1, 0 } }, + { { 0, 0, 8, 0 } },{ { 0, 0, 3, 5 } },{ { 0, 1, 7, 0 } },{ { 0, 0, 4, 4 } },{ { 0, 0, 2, 6 } }, + { { 0, 0, 7, 1 } },{ { 0, 0, 1, 7 } },{ { 0, 0, 5, 3 } },{ { 1, 6, 0, 1 } },{ { 0, 0, 6, 2 } }, + { { 0, 2, 6, 0 } },{ { 2, 4, 2, 0 } },{ { 0, 3, 5, 0 } },{ { 3, 3, 1, 1 } },{ { 4, 2, 0, 2 } }, + { { 1, 5, 2, 0 } },{ { 0, 5, 3, 0 } },{ { 0, 6, 2, 0 } },{ { 2, 4, 1, 1 } },{ { 5, 1, 0, 2 } }, + { { 6, 1, 1, 0 } },{ { 3, 3, 0, 2 } },{ { 6, 0, 0, 2 } },{ { 0, 8, 0, 0 } },{ { 6, 1, 0, 1 } }, + { { 0, 1, 6, 1 } },{ { 1, 6, 1, 0 } },{ { 4, 1, 3, 0 } },{ { 0, 2, 5, 1 } },{ { 5, 0, 3, 0 } }, + { { 5, 3, 0, 0 } },{ { 0, 1, 5, 2 } },{ { 0, 3, 4, 1 } },{ { 2, 5, 1, 0 } },{ { 1, 7, 0, 0 } }, + { { 0, 1, 4, 3 } },{ { 6, 0, 2, 0 } },{ { 0, 4, 4, 0 } },{ { 2, 6, 0, 0 } },{ { 0, 2, 4, 2 } }, + { { 0, 5, 1, 2 } },{ { 0, 6, 0, 2 } },{ { 3, 5, 0, 0 } },{ { 0, 4, 3, 1 } },{ { 3, 4, 1, 0 } }, + { { 4, 3, 1, 0 } },{ { 1, 5, 0, 2 } },{ { 0, 3, 3, 2 } },{ { 1, 4, 1, 2 } },{ { 0, 4, 2, 2 } }, + { { 2, 3, 3, 0 } },{ { 4, 4, 0, 0 } },{ { 1, 2, 4, 1 } },{ { 0, 5, 0, 3 } },{ { 0, 1, 3, 4 } }, + { { 1, 5, 1, 1 } },{ { 1, 4, 2, 1 } },{ { 1, 3, 2, 2 } },{ { 5, 2, 1, 0 } },{ { 1, 3, 3, 1 } }, + { { 0, 1, 2, 5 } },{ { 1, 1, 5, 1 } },{ { 0, 3, 2, 3 } },{ { 2, 5, 0, 1 } },{ { 3, 2, 2, 1 } }, + { { 2, 3, 0, 3 } },{ { 1, 4, 3, 0 } },{ { 2, 2, 1, 3 } },{ { 6, 2, 0, 0 } },{ { 1, 0, 6, 1 } }, + { { 3, 3, 2, 0 } },{ { 7, 1, 0, 0 } },{ { 3, 1, 4, 0 } },{ { 0, 2, 3, 3 } },{ { 0, 4, 1, 3 } }, + { { 0, 4, 0, 4 } },{ { 0, 1, 0, 7 } },{ { 2, 0, 5, 1 } },{ { 2, 0, 4, 2 } },{ { 3, 0, 2, 3 } }, + { { 2, 2, 4, 0 } },{ { 2, 2, 3, 1 } },{ { 4, 0, 3, 1 } },{ { 3, 2, 3, 0 } },{ { 2, 3, 2, 1 } }, + { { 1, 3, 4, 0 } },{ { 7, 0, 1, 0 } },{ { 3, 0, 4, 1 } },{ { 1, 0, 5, 2 } },{ { 8, 0, 0, 0 } }, + { { 3, 0, 1, 4 } },{ { 4, 1, 1, 2 } },{ { 4, 0, 2, 2 } },{ { 1, 2, 5, 0 } },{ { 4, 2, 1, 1 } }, + { { 3, 4, 0, 1 } },{ { 2, 0, 3, 3 } },{ { 5, 0, 1, 2 } },{ { 5, 0, 0, 3 } },{ { 2, 4, 0, 2 } }, + { { 2, 1, 4, 1 } },{ { 4, 0, 1, 3 } },{ { 2, 1, 5, 0 } },{ { 4, 2, 2, 0 } },{ { 4, 0, 4, 0 } }, + { { 1, 0, 4, 3 } },{ { 1, 4, 0, 3 } },{ { 3, 0, 3, 2 } },{ { 4, 3, 0, 1 } },{ { 0, 1, 1, 6 } }, + { { 1, 3, 1, 3 } },{ { 0, 2, 2, 4 } },{ { 2, 0, 2, 4 } },{ { 5, 1, 1, 1 } },{ { 3, 0, 5, 0 } }, + { { 2, 3, 1, 2 } },{ { 3, 0, 0, 5 } },{ { 0, 3, 1, 4 } },{ { 5, 0, 2, 1 } },{ { 2, 1, 3, 2 } }, + { { 2, 0, 6, 0 } },{ { 3, 1, 3, 1 } },{ { 5, 1, 2, 0 } },{ { 1, 0, 3, 4 } },{ { 1, 1, 6, 0 } }, + { { 4, 0, 0, 4 } },{ { 2, 0, 1, 5 } },{ { 0, 3, 0, 5 } },{ { 1, 3, 0, 4 } },{ { 4, 1, 2, 1 } }, + { { 1, 2, 3, 2 } },{ { 3, 1, 0, 4 } },{ { 5, 2, 0, 1 } },{ { 1, 2, 2, 3 } },{ { 3, 2, 1, 2 } }, + { { 2, 2, 2, 2 } },{ { 6, 0, 1, 1 } },{ { 1, 2, 1, 4 } },{ { 1, 1, 4, 2 } },{ { 3, 2, 0, 3 } }, + { { 1, 2, 0, 5 } },{ { 1, 0, 7, 0 } },{ { 3, 1, 2, 2 } },{ { 1, 0, 2, 5 } },{ { 2, 0, 0, 6 } }, + { { 2, 1, 1, 4 } },{ { 2, 2, 0, 4 } },{ { 1, 1, 3, 3 } },{ { 7, 0, 0, 1 } },{ { 1, 0, 0, 7 } }, + { { 2, 1, 2, 3 } },{ { 4, 1, 0, 3 } },{ { 3, 1, 1, 3 } },{ { 1, 1, 2, 4 } },{ { 2, 1, 0, 5 } }, + { { 1, 0, 1, 6 } },{ { 0, 2, 1, 5 } },{ { 0, 2, 0, 6 } },{ { 1, 1, 1, 5 } },{ { 1, 1, 0, 6 } } + }; + + const int g_etc1_inten_tables[cETC1IntenModifierValues][cETC1SelectorValues] = + { + { -8, -2, 2, 8 }, { -17, -5, 5, 17 }, { -29, -9, 9, 29 }, { -42, -13, 13, 42 }, + { -60, -18, 18, 60 }, { -80, -24, 24, 80 }, { -106, -33, 33, 106 }, { -183, -47, 47, 183 } + }; + + const uint8_t g_etc1_to_selector_index[cETC1SelectorValues] = { 2, 3, 1, 0 }; + const uint8_t g_selector_index_to_etc1[cETC1SelectorValues] = { 3, 2, 0, 1 }; + + // [flip][subblock][pixel_index] + const etc_coord2 g_etc1_pixel_coords[2][2][8] = + { + { + { + { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 }, + { 1, 0 }, { 1, 1 }, { 1, 2 }, { 1, 3 } + }, + { + { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 }, + { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 } + } + }, + { + { + { 0, 0 }, { 1, 0 }, { 2, 0 }, { 3, 0 }, + { 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 } + }, + { + { 0, 2 }, { 1, 2 }, { 2, 2 }, { 3, 2 }, + { 0, 3 }, { 1, 3 }, { 2, 3 }, { 3, 3 } + }, + } + }; + + // [flip][subblock][pixel_index] + const uint32_t g_etc1_pixel_indices[2][2][8] = + { + { + { + 0 + 4 * 0, 0 + 4 * 1, 0 + 4 * 2, 0 + 4 * 3, + 1 + 4 * 0, 1 + 4 * 1, 1 + 4 * 2, 1 + 4 * 3 + }, + { + 2 + 4 * 0, 2 + 4 * 1, 2 + 4 * 2, 2 + 4 * 3, + 3 + 4 * 0, 3 + 4 * 1, 3 + 4 * 2, 3 + 4 * 3 + } + }, + { + { + 0 + 4 * 0, 1 + 4 * 0, 2 + 4 * 0, 3 + 4 * 0, + 0 + 4 * 1, 1 + 4 * 1, 2 + 4 * 1, 3 + 4 * 1 + }, + { + 0 + 4 * 2, 1 + 4 * 2, 2 + 4 * 2, 3 + 4 * 2, + 0 + 4 * 3, 1 + 4 * 3, 2 + 4 * 3, 3 + 4 * 3 + }, + } + }; + + uint16_t etc_block::pack_color5(const color_rgba& color, bool scaled, uint32_t bias) + { + return pack_color5(color.r, color.g, color.b, scaled, bias); + } + + uint16_t etc_block::pack_color5(uint32_t r, uint32_t g, uint32_t b, bool scaled, uint32_t bias) + { + if (scaled) + { + r = (r * 31U + bias) / 255U; + g = (g * 31U + bias) / 255U; + b = (b * 31U + bias) / 255U; + } + + r = minimum(r, 31U); + g = minimum(g, 31U); + b = minimum(b, 31U); + + return static_cast(b | (g << 5U) | (r << 10U)); + } + + color_rgba etc_block::unpack_color5(uint16_t packed_color5, bool scaled, uint32_t alpha) + { + uint32_t b = packed_color5 & 31U; + uint32_t g = (packed_color5 >> 5U) & 31U; + uint32_t r = (packed_color5 >> 10U) & 31U; + + if (scaled) + { + b = (b << 3U) | (b >> 2U); + g = (g << 3U) | (g >> 2U); + r = (r << 3U) | (r >> 2U); + } + + return color_rgba(cNoClamp, r, g, b, minimum(alpha, 255U)); + } + + void etc_block::unpack_color5(color_rgba& result, uint16_t packed_color5, bool scaled) + { + result = unpack_color5(packed_color5, scaled, 255); + } + + void etc_block::unpack_color5(uint32_t& r, uint32_t& g, uint32_t& b, uint16_t packed_color5, bool scaled) + { + color_rgba c(unpack_color5(packed_color5, scaled, 0)); + r = c.r; + g = c.g; + b = c.b; + } + + bool etc_block::unpack_color5(color_rgba& result, uint16_t packed_color5, uint16_t packed_delta3, bool scaled, uint32_t alpha) + { + color_rgba_i16 dc(unpack_delta3(packed_delta3)); + + int b = (packed_color5 & 31U) + dc.b; + int g = ((packed_color5 >> 5U) & 31U) + dc.g; + int r = ((packed_color5 >> 10U) & 31U) + dc.r; + + bool success = true; + if (static_cast(r | g | b) > 31U) + { + success = false; + r = clamp(r, 0, 31); + g = clamp(g, 0, 31); + b = clamp(b, 0, 31); + } + + if (scaled) + { + b = (b << 3U) | (b >> 2U); + g = (g << 3U) | (g >> 2U); + r = (r << 3U) | (r >> 2U); + } + + result.set_noclamp_rgba(r, g, b, minimum(alpha, 255U)); + return success; + } + + bool etc_block::unpack_color5(uint32_t& r, uint32_t& g, uint32_t& b, uint16_t packed_color5, uint16_t packed_delta3, bool scaled, uint32_t alpha) + { + color_rgba result; + const bool success = unpack_color5(result, packed_color5, packed_delta3, scaled, alpha); + r = result.r; + g = result.g; + b = result.b; + return success; + } + + uint16_t etc_block::pack_delta3(const color_rgba_i16& color) + { + return pack_delta3(color.r, color.g, color.b); + } + + uint16_t etc_block::pack_delta3(int r, int g, int b) + { + assert((r >= cETC1ColorDeltaMin) && (r <= cETC1ColorDeltaMax)); + assert((g >= cETC1ColorDeltaMin) && (g <= cETC1ColorDeltaMax)); + assert((b >= cETC1ColorDeltaMin) && (b <= cETC1ColorDeltaMax)); + if (r < 0) r += 8; + if (g < 0) g += 8; + if (b < 0) b += 8; + return static_cast(b | (g << 3) | (r << 6)); + } + + color_rgba_i16 etc_block::unpack_delta3(uint16_t packed_delta3) + { + int r = (packed_delta3 >> 6) & 7; + int g = (packed_delta3 >> 3) & 7; + int b = packed_delta3 & 7; + if (r >= 4) r -= 8; + if (g >= 4) g -= 8; + if (b >= 4) b -= 8; + return color_rgba_i16(r, g, b, 255); + } + + void etc_block::unpack_delta3(int& r, int& g, int& b, uint16_t packed_delta3) + { + r = (packed_delta3 >> 6) & 7; + g = (packed_delta3 >> 3) & 7; + b = packed_delta3 & 7; + if (r >= 4) r -= 8; + if (g >= 4) g -= 8; + if (b >= 4) b -= 8; + } + + uint16_t etc_block::pack_color4(const color_rgba& color, bool scaled, uint32_t bias) + { + return pack_color4(color.r, color.g, color.b, scaled, bias); + } + + uint16_t etc_block::pack_color4(uint32_t r, uint32_t g, uint32_t b, bool scaled, uint32_t bias) + { + if (scaled) + { + r = (r * 15U + bias) / 255U; + g = (g * 15U + bias) / 255U; + b = (b * 15U + bias) / 255U; + } + + r = minimum(r, 15U); + g = minimum(g, 15U); + b = minimum(b, 15U); + + return static_cast(b | (g << 4U) | (r << 8U)); + } + + color_rgba etc_block::unpack_color4(uint16_t packed_color4, bool scaled, uint32_t alpha) + { + uint32_t b = packed_color4 & 15U; + uint32_t g = (packed_color4 >> 4U) & 15U; + uint32_t r = (packed_color4 >> 8U) & 15U; + + if (scaled) + { + b = (b << 4U) | b; + g = (g << 4U) | g; + r = (r << 4U) | r; + } + + return color_rgba(cNoClamp, r, g, b, minimum(alpha, 255U)); + } + + void etc_block::unpack_color4(uint32_t& r, uint32_t& g, uint32_t& b, uint16_t packed_color4, bool scaled) + { + color_rgba c(unpack_color4(packed_color4, scaled, 0)); + r = c.r; + g = c.g; + b = c.b; + } + + void etc_block::get_diff_subblock_colors(color_rgba* pDst, uint16_t packed_color5, uint32_t table_idx) + { + assert(table_idx < cETC1IntenModifierValues); + const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; + + uint32_t r, g, b; + unpack_color5(r, g, b, packed_color5, true); + + const int ir = static_cast(r), ig = static_cast(g), ib = static_cast(b); + + const int y0 = pInten_modifer_table[0]; + pDst[0].set(ir + y0, ig + y0, ib + y0, 255); + + const int y1 = pInten_modifer_table[1]; + pDst[1].set(ir + y1, ig + y1, ib + y1, 255); + + const int y2 = pInten_modifer_table[2]; + pDst[2].set(ir + y2, ig + y2, ib + y2, 255); + + const int y3 = pInten_modifer_table[3]; + pDst[3].set(ir + y3, ig + y3, ib + y3, 255); + } + + bool etc_block::get_diff_subblock_colors(color_rgba* pDst, uint16_t packed_color5, uint16_t packed_delta3, uint32_t table_idx) + { + assert(table_idx < cETC1IntenModifierValues); + const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; + + uint32_t r, g, b; + bool success = unpack_color5(r, g, b, packed_color5, packed_delta3, true); + + const int ir = static_cast(r), ig = static_cast(g), ib = static_cast(b); + + const int y0 = pInten_modifer_table[0]; + pDst[0].set(ir + y0, ig + y0, ib + y0, 255); + + const int y1 = pInten_modifer_table[1]; + pDst[1].set(ir + y1, ig + y1, ib + y1, 255); + + const int y2 = pInten_modifer_table[2]; + pDst[2].set(ir + y2, ig + y2, ib + y2, 255); + + const int y3 = pInten_modifer_table[3]; + pDst[3].set(ir + y3, ig + y3, ib + y3, 255); + + return success; + } + + void etc_block::get_abs_subblock_colors(color_rgba* pDst, uint16_t packed_color4, uint32_t table_idx) + { + assert(table_idx < cETC1IntenModifierValues); + const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; + + uint32_t r, g, b; + unpack_color4(r, g, b, packed_color4, true); + + const int ir = static_cast(r), ig = static_cast(g), ib = static_cast(b); + + const int y0 = pInten_modifer_table[0]; + pDst[0].set(ir + y0, ig + y0, ib + y0, 255); + + const int y1 = pInten_modifer_table[1]; + pDst[1].set(ir + y1, ig + y1, ib + y1, 255); + + const int y2 = pInten_modifer_table[2]; + pDst[2].set(ir + y2, ig + y2, ib + y2, 255); + + const int y3 = pInten_modifer_table[3]; + pDst[3].set(ir + y3, ig + y3, ib + y3, 255); + } + + bool unpack_etc1(const etc_block& block, color_rgba *pDst, bool preserve_alpha) + { + const bool diff_flag = block.get_diff_bit(); + const bool flip_flag = block.get_flip_bit(); + const uint32_t table_index0 = block.get_inten_table(0); + const uint32_t table_index1 = block.get_inten_table(1); + + color_rgba subblock_colors0[4]; + color_rgba subblock_colors1[4]; + + if (diff_flag) + { + const uint16_t base_color5 = block.get_base5_color(); + const uint16_t delta_color3 = block.get_delta3_color(); + etc_block::get_diff_subblock_colors(subblock_colors0, base_color5, table_index0); + + if (!etc_block::get_diff_subblock_colors(subblock_colors1, base_color5, delta_color3, table_index1)) + return false; + } + else + { + const uint16_t base_color4_0 = block.get_base4_color(0); + etc_block::get_abs_subblock_colors(subblock_colors0, base_color4_0, table_index0); + + const uint16_t base_color4_1 = block.get_base4_color(1); + etc_block::get_abs_subblock_colors(subblock_colors1, base_color4_1, table_index1); + } + + if (preserve_alpha) + { + if (flip_flag) + { + for (uint32_t y = 0; y < 2; y++) + { + pDst[0].set_rgb(subblock_colors0[block.get_selector(0, y)]); + pDst[1].set_rgb(subblock_colors0[block.get_selector(1, y)]); + pDst[2].set_rgb(subblock_colors0[block.get_selector(2, y)]); + pDst[3].set_rgb(subblock_colors0[block.get_selector(3, y)]); + pDst += 4; + } + + for (uint32_t y = 2; y < 4; y++) + { + pDst[0].set_rgb(subblock_colors1[block.get_selector(0, y)]); + pDst[1].set_rgb(subblock_colors1[block.get_selector(1, y)]); + pDst[2].set_rgb(subblock_colors1[block.get_selector(2, y)]); + pDst[3].set_rgb(subblock_colors1[block.get_selector(3, y)]); + pDst += 4; + } + } + else + { + for (uint32_t y = 0; y < 4; y++) + { + pDst[0].set_rgb(subblock_colors0[block.get_selector(0, y)]); + pDst[1].set_rgb(subblock_colors0[block.get_selector(1, y)]); + pDst[2].set_rgb(subblock_colors1[block.get_selector(2, y)]); + pDst[3].set_rgb(subblock_colors1[block.get_selector(3, y)]); + pDst += 4; + } + } + } + else + { + if (flip_flag) + { + // 0000 + // 0000 + // 1111 + // 1111 + for (uint32_t y = 0; y < 2; y++) + { + pDst[0] = subblock_colors0[block.get_selector(0, y)]; + pDst[1] = subblock_colors0[block.get_selector(1, y)]; + pDst[2] = subblock_colors0[block.get_selector(2, y)]; + pDst[3] = subblock_colors0[block.get_selector(3, y)]; + pDst += 4; + } + + for (uint32_t y = 2; y < 4; y++) + { + pDst[0] = subblock_colors1[block.get_selector(0, y)]; + pDst[1] = subblock_colors1[block.get_selector(1, y)]; + pDst[2] = subblock_colors1[block.get_selector(2, y)]; + pDst[3] = subblock_colors1[block.get_selector(3, y)]; + pDst += 4; + } + } + else + { + // 0011 + // 0011 + // 0011 + // 0011 + for (uint32_t y = 0; y < 4; y++) + { + pDst[0] = subblock_colors0[block.get_selector(0, y)]; + pDst[1] = subblock_colors0[block.get_selector(1, y)]; + pDst[2] = subblock_colors1[block.get_selector(2, y)]; + pDst[3] = subblock_colors1[block.get_selector(3, y)]; + pDst += 4; + } + } + } + + return true; + } + + inline int extend_6_to_8(uint32_t n) + { + return (n << 2) | (n >> 4); + } + + inline int extend_7_to_8(uint32_t n) + { + return (n << 1) | (n >> 6); + } + + inline int extend_4_to_8(uint32_t n) + { + return (n << 4) | n; + } + + uint64_t etc_block::evaluate_etc1_error(const color_rgba* pBlock_pixels, bool perceptual, int subblock_index) const + { + color_rgba unpacked_block[16]; + + unpack_etc1(*this, unpacked_block); + + uint64_t total_error = 0; + + if (subblock_index < 0) + { + for (uint32_t i = 0; i < 16; i++) + total_error += color_distance(perceptual, pBlock_pixels[i], unpacked_block[i], false); + } + else + { + const bool flip_bit = get_flip_bit(); + + for (uint32_t i = 0; i < 8; i++) + { + const uint32_t idx = g_etc1_pixel_indices[flip_bit][subblock_index][i]; + + total_error += color_distance(perceptual, pBlock_pixels[idx], unpacked_block[idx], false); + } + } + + return total_error; + } + + void etc_block::get_subblock_pixels(color_rgba* pPixels, int subblock_index) const + { + if (subblock_index < 0) + unpack_etc1(*this, pPixels); + else + { + color_rgba unpacked_block[16]; + + unpack_etc1(*this, unpacked_block); + + const bool flip_bit = get_flip_bit(); + + for (uint32_t i = 0; i < 8; i++) + { + const uint32_t idx = g_etc1_pixel_indices[flip_bit][subblock_index][i]; + + pPixels[i] = unpacked_block[idx]; + } + } + } + + bool etc1_optimizer::compute() + { + assert(m_pResult->m_pSelectors); + + if (m_pParams->m_pForce_selectors) + { + assert(m_pParams->m_quality >= cETCQualitySlow); + if (m_pParams->m_quality < cETCQualitySlow) + return false; + } + + const uint32_t n = m_pParams->m_num_src_pixels; + + if (m_pParams->m_cluster_fit) + { + if (m_pParams->m_quality == cETCQualityFast) + compute_internal_cluster_fit(4); + else if (m_pParams->m_quality == cETCQualityMedium) + compute_internal_cluster_fit(16); + else if (m_pParams->m_quality == cETCQualitySlow) + compute_internal_cluster_fit(64); + else + compute_internal_cluster_fit(BASISU_ETC1_CLUSTER_FIT_ORDER_TABLE_SIZE); + } + else + compute_internal_neighborhood(m_br, m_bg, m_bb); + + if (!m_best_solution.m_valid) + { + m_pResult->m_error = UINT32_MAX; + return false; + } + + //const uint8_t* pSelectors = &m_best_solution.m_selectors[0]; + const uint8_t* pSelectors = m_pParams->m_pForce_selectors ? m_pParams->m_pForce_selectors : &m_best_solution.m_selectors[0]; + +#if defined(DEBUG) || defined(_DEBUG) + { + // sanity check the returned error + color_rgba block_colors[4]; + m_best_solution.m_coords.get_block_colors(block_colors); + + const color_rgba* pSrc_pixels = m_pParams->m_pSrc_pixels; + uint64_t actual_error = 0; + + bool perceptual; + if (m_pParams->m_quality >= cETCQualityMedium) + perceptual = m_pParams->m_perceptual; + else + perceptual = (m_pParams->m_quality == cETCQualityFast) ? false : m_pParams->m_perceptual; + + for (uint32_t i = 0; i < n; i++) + actual_error += color_distance(perceptual, pSrc_pixels[i], block_colors[pSelectors[i]], false); + + assert(actual_error == m_best_solution.m_error); + } +#endif + + m_pResult->m_error = m_best_solution.m_error; + + m_pResult->m_block_color_unscaled = m_best_solution.m_coords.m_unscaled_color; + m_pResult->m_block_color4 = m_best_solution.m_coords.m_color4; + + m_pResult->m_block_inten_table = m_best_solution.m_coords.m_inten_table; + memcpy(m_pResult->m_pSelectors, pSelectors, n); + m_pResult->m_n = n; + + return true; + } + + void etc1_optimizer::refine_solution(uint32_t max_refinement_trials) + { + // Now we have the input block, the avg. color of the input pixels, a set of trial selector indices, and the block color+intensity index. + // Now, for each component, attempt to refine the current solution by solving a simple linear equation. For example, for 4 colors: + // The goal is: + // pixel0 - (block_color+inten_table[selector0]) + pixel1 - (block_color+inten_table[selector1]) + pixel2 - (block_color+inten_table[selector2]) + pixel3 - (block_color+inten_table[selector3]) = 0 + // Rearranging this: + // (pixel0 + pixel1 + pixel2 + pixel3) - (block_color+inten_table[selector0]) - (block_color+inten_table[selector1]) - (block_color+inten_table[selector2]) - (block_color+inten_table[selector3]) = 0 + // (pixel0 + pixel1 + pixel2 + pixel3) - block_color - inten_table[selector0] - block_color-inten_table[selector1] - block_color-inten_table[selector2] - block_color-inten_table[selector3] = 0 + // (pixel0 + pixel1 + pixel2 + pixel3) - 4*block_color - inten_table[selector0] - inten_table[selector1] - inten_table[selector2] - inten_table[selector3] = 0 + // (pixel0 + pixel1 + pixel2 + pixel3) - 4*block_color - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3]) = 0 + // (pixel0 + pixel1 + pixel2 + pixel3)/4 - block_color - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3])/4 = 0 + // block_color = (pixel0 + pixel1 + pixel2 + pixel3)/4 - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3])/4 + // So what this means: + // optimal_block_color = avg_input - avg_inten_delta + // So the optimal block color can be computed by taking the average block color and subtracting the current average of the intensity delta. + // Unfortunately, optimal_block_color must then be quantized to 555 or 444 so it's not always possible to improve matters using this formula. + // Also, the above formula is for unclamped intensity deltas. The actual implementation takes into account clamping. + + const uint32_t n = m_pParams->m_num_src_pixels; + + for (uint32_t refinement_trial = 0; refinement_trial < max_refinement_trials; refinement_trial++) + { + const uint8_t* pSelectors = &m_best_solution.m_selectors[0]; + const int* pInten_table = g_etc1_inten_tables[m_best_solution.m_coords.m_inten_table]; + + int delta_sum_r = 0, delta_sum_g = 0, delta_sum_b = 0; + const color_rgba base_color(m_best_solution.m_coords.get_scaled_color()); + for (uint32_t r = 0; r < n; r++) + { + const uint32_t s = *pSelectors++; + const int yd_temp = pInten_table[s]; + // Compute actual delta being applied to each pixel, taking into account clamping. + delta_sum_r += clamp(base_color.r + yd_temp, 0, 255) - base_color.r; + delta_sum_g += clamp(base_color.g + yd_temp, 0, 255) - base_color.g; + delta_sum_b += clamp(base_color.b + yd_temp, 0, 255) - base_color.b; + } + + if ((!delta_sum_r) && (!delta_sum_g) && (!delta_sum_b)) + break; + + const float avg_delta_r_f = static_cast(delta_sum_r) / n; + const float avg_delta_g_f = static_cast(delta_sum_g) / n; + const float avg_delta_b_f = static_cast(delta_sum_b) / n; + const int br1 = clamp(static_cast((m_avg_color[0] - avg_delta_r_f) * m_limit / 255.0f + .5f), 0, m_limit); + const int bg1 = clamp(static_cast((m_avg_color[1] - avg_delta_g_f) * m_limit / 255.0f + .5f), 0, m_limit); + const int bb1 = clamp(static_cast((m_avg_color[2] - avg_delta_b_f) * m_limit / 255.0f + .5f), 0, m_limit); + +#if BASISU_DEBUG_ETC_ENCODER_DEEPER + printf("Refinement trial %u, avg_delta %f %f %f\n", refinement_trial, avg_delta_r_f, avg_delta_g_f, avg_delta_b_f); +#endif + + if (!evaluate_solution(etc1_solution_coordinates(br1, bg1, bb1, 0, m_pParams->m_use_color4), m_trial_solution, &m_best_solution)) + break; + + } // refinement_trial + } + + void etc1_optimizer::compute_internal_neighborhood(int scan_r, int scan_g, int scan_b) + { + if (m_best_solution.m_error == 0) + return; + + //const uint32_t n = m_pParams->m_num_src_pixels; + const int scan_delta_size = m_pParams->m_scan_delta_size; + + // Scan through a subset of the 3D lattice centered around the avg block color trying each 3D (555 or 444) lattice point as a potential block color. + // Each time a better solution is found try to refine the current solution's block color based of the current selectors and intensity table index. + for (int zdi = 0; zdi < scan_delta_size; zdi++) + { + const int zd = m_pParams->m_pScan_deltas[zdi]; + const int mbb = scan_b + zd; + if (mbb < 0) continue; else if (mbb > m_limit) break; + + for (int ydi = 0; ydi < scan_delta_size; ydi++) + { + const int yd = m_pParams->m_pScan_deltas[ydi]; + const int mbg = scan_g + yd; + if (mbg < 0) continue; else if (mbg > m_limit) break; + + for (int xdi = 0; xdi < scan_delta_size; xdi++) + { + const int xd = m_pParams->m_pScan_deltas[xdi]; + const int mbr = scan_r + xd; + if (mbr < 0) continue; else if (mbr > m_limit) break; + + etc1_solution_coordinates coords(mbr, mbg, mbb, 0, m_pParams->m_use_color4); + + if (!evaluate_solution(coords, m_trial_solution, &m_best_solution)) + continue; + + if (m_pParams->m_refinement) + { + refine_solution((m_pParams->m_quality == cETCQualityFast) ? 2 : (((xd | yd | zd) == 0) ? 4 : 2)); + } + + } // xdi + } // ydi + } // zdi + } + + void etc1_optimizer::compute_internal_cluster_fit(uint32_t total_perms_to_try) + { + if ((!m_best_solution.m_valid) || ((m_br != m_best_solution.m_coords.m_unscaled_color.r) || (m_bg != m_best_solution.m_coords.m_unscaled_color.g) || (m_bb != m_best_solution.m_coords.m_unscaled_color.b))) + { + evaluate_solution(etc1_solution_coordinates(m_br, m_bg, m_bb, 0, m_pParams->m_use_color4), m_trial_solution, &m_best_solution); + } + + if ((m_best_solution.m_error == 0) || (!m_best_solution.m_valid)) + return; + + for (uint32_t i = 0; i < total_perms_to_try; i++) + { + int delta_sum_r = 0, delta_sum_g = 0, delta_sum_b = 0; + + const int *pInten_table = g_etc1_inten_tables[m_best_solution.m_coords.m_inten_table]; + const color_rgba base_color(m_best_solution.m_coords.get_scaled_color()); + + const uint8_t *pNum_selectors = g_cluster_fit_order_tab[i].m_v; + + for (uint32_t q = 0; q < 4; q++) + { + const int yd_temp = pInten_table[q]; + + delta_sum_r += pNum_selectors[q] * (clamp(base_color.r + yd_temp, 0, 255) - base_color.r); + delta_sum_g += pNum_selectors[q] * (clamp(base_color.g + yd_temp, 0, 255) - base_color.g); + delta_sum_b += pNum_selectors[q] * (clamp(base_color.b + yd_temp, 0, 255) - base_color.b); + } + + if ((!delta_sum_r) && (!delta_sum_g) && (!delta_sum_b)) + continue; + + const float avg_delta_r_f = static_cast(delta_sum_r) / 8; + const float avg_delta_g_f = static_cast(delta_sum_g) / 8; + const float avg_delta_b_f = static_cast(delta_sum_b) / 8; + + const int br1 = clamp(static_cast((m_avg_color[0] - avg_delta_r_f) * m_limit / 255.0f + .5f), 0, m_limit); + const int bg1 = clamp(static_cast((m_avg_color[1] - avg_delta_g_f) * m_limit / 255.0f + .5f), 0, m_limit); + const int bb1 = clamp(static_cast((m_avg_color[2] - avg_delta_b_f) * m_limit / 255.0f + .5f), 0, m_limit); + +#if BASISU_DEBUG_ETC_ENCODER_DEEPER + printf("Second refinement trial %u, avg_delta %f %f %f\n", i, avg_delta_r_f, avg_delta_g_f, avg_delta_b_f); +#endif + + evaluate_solution(etc1_solution_coordinates(br1, bg1, bb1, 0, m_pParams->m_use_color4), m_trial_solution, &m_best_solution); + + if (m_best_solution.m_error == 0) + break; + } + } + + void etc1_optimizer::init(const params& params, results& result) + { + m_pParams = ¶ms; + m_pResult = &result; + + const uint32_t n = m_pParams->m_num_src_pixels; + + m_selectors.resize(n); + m_best_selectors.resize(n); + m_temp_selectors.resize(n); + m_trial_solution.m_selectors.resize(n); + m_best_solution.m_selectors.resize(n); + + m_limit = m_pParams->m_use_color4 ? 15 : 31; + + vec3F avg_color(0.0f); + + m_luma.resize(n); + m_sorted_luma_indices.resize(n); + m_sorted_luma.resize(n); + + int min_r = 255, min_g = 255, min_b = 255; + int max_r = 0, max_g = 0, max_b = 0; + + for (uint32_t i = 0; i < n; i++) + { + const color_rgba& c = m_pParams->m_pSrc_pixels[i]; + + min_r = basisu::minimum(min_r, c.r); + min_g = basisu::minimum(min_g, c.g); + min_b = basisu::minimum(min_b, c.b); + + max_r = basisu::maximum(max_r, c.r); + max_g = basisu::maximum(max_g, c.g); + max_b = basisu::maximum(max_b, c.b); + + const vec3F fc(c.r, c.g, c.b); + + avg_color += fc; + + m_luma[i] = static_cast(c.r + c.g + c.b); + m_sorted_luma_indices[i] = i; + } + avg_color /= static_cast(n); + m_avg_color = avg_color; + m_max_comp_spread = basisu::maximum(basisu::maximum(max_r - min_r, max_g - min_g), max_b - min_b); + + m_br = clamp(static_cast(m_avg_color[0] * m_limit / 255.0f + .5f), 0, m_limit); + m_bg = clamp(static_cast(m_avg_color[1] * m_limit / 255.0f + .5f), 0, m_limit); + m_bb = clamp(static_cast(m_avg_color[2] * m_limit / 255.0f + .5f), 0, m_limit); + +#if BASISU_DEBUG_ETC_ENCODER_DEEPER + printf("Avg block color: %u %u %u\n", m_br, m_bg, m_bb); +#endif + + if (m_pParams->m_quality == cETCQualityFast) + { + indirect_sort(n, &m_sorted_luma_indices[0], &m_luma[0]); + + m_pSorted_luma = &m_sorted_luma[0]; + m_pSorted_luma_indices = &m_sorted_luma_indices[0]; + + for (uint32_t i = 0; i < n; i++) + m_pSorted_luma[i] = m_luma[m_pSorted_luma_indices[i]]; + } + + m_best_solution.m_coords.clear(); + m_best_solution.m_valid = false; + m_best_solution.m_error = UINT64_MAX; + + clear_obj(m_solutions_tried); + } + + // Return false if we've probably already tried this solution, true if we have definitely not. + bool etc1_optimizer::check_for_redundant_solution(const etc1_solution_coordinates& coords) + { + // Hash first 3 bytes of color (RGB) + uint32_t kh = hash_hsieh((uint8_t*)&coords.m_unscaled_color.r, 3); + + uint32_t h0 = kh & cSolutionsTriedHashMask; + uint32_t h1 = (kh >> cSolutionsTriedHashBits) & cSolutionsTriedHashMask; + + // Simple Bloom filter lookup with k=2 + if ( ((m_solutions_tried[h0 >> 3] & (1 << (h0 & 7))) != 0) && + ((m_solutions_tried[h1 >> 3] & (1 << (h1 & 7))) != 0) ) + return false; + + m_solutions_tried[h0 >> 3] |= (1 << (h0 & 7)); + m_solutions_tried[h1 >> 3] |= (1 << (h1 & 7)); + + return true; + } + + static uint8_t g_eval_dist_tables[8][256] = + { + // 99% threshold + { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,}, + { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,}, + { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,}, + { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,1,1,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,}, + { 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,}, + { 1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,0,1,1,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,}, + { 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,}, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,} + }; + + bool etc1_optimizer::evaluate_solution_slow(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution) + { + if (!check_for_redundant_solution(coords)) + return false; + +#if BASISU_DEBUG_ETC_ENCODER_DEEPER + printf("Eval solution: %u %u %u\n", coords.m_unscaled_color.r, coords.m_unscaled_color.g, coords.m_unscaled_color.b); +#endif + + trial_solution.m_valid = false; + + if (m_pParams->m_constrain_against_base_color5) + { + const int dr = (int)coords.m_unscaled_color.r - (int)m_pParams->m_base_color5.r; + const int dg = (int)coords.m_unscaled_color.g - (int)m_pParams->m_base_color5.g; + const int db = (int)coords.m_unscaled_color.b - (int)m_pParams->m_base_color5.b; + + if ((minimum(dr, dg, db) < cETC1ColorDeltaMin) || (maximum(dr, dg, db) > cETC1ColorDeltaMax)) + { +#if BASISU_DEBUG_ETC_ENCODER_DEEPER + printf("Eval failed due to constraint from %u %u %u\n", m_pParams->m_base_color5.r, m_pParams->m_base_color5.g, m_pParams->m_base_color5.b); +#endif + return false; + } + } + + const color_rgba base_color(coords.get_scaled_color()); + + const uint32_t n = m_pParams->m_num_src_pixels; + assert(trial_solution.m_selectors.size() == n); + + trial_solution.m_error = INT64_MAX; + + const uint8_t *pSelectors_to_use = m_pParams->m_pForce_selectors; + + for (uint32_t inten_table = 0; inten_table < cETC1IntenModifierValues; inten_table++) + { + if (m_pParams->m_quality <= cETCQualityMedium) + { + if (!g_eval_dist_tables[inten_table][m_max_comp_spread]) + continue; + } + +#if 0 + if (m_pParams->m_quality <= cETCQualityMedium) + { + // For tables 5-7, if the max component spread falls within certain ranges, skip the inten table. Statistically they are extremely unlikely to result in lower error. + if (inten_table == 7) + { + if (m_max_comp_spread < 42) + continue; + } + else if (inten_table == 6) + { + if ((m_max_comp_spread >= 12) && (m_max_comp_spread <= 31)) + continue; + } + else if (inten_table == 5) + { + if ((m_max_comp_spread >= 13) && (m_max_comp_spread <= 21)) + continue; + } + } +#endif + + const int* pInten_table = g_etc1_inten_tables[inten_table]; + + color_rgba block_colors[4]; + for (uint32_t s = 0; s < 4; s++) + { + const int yd = pInten_table[s]; + block_colors[s].set(base_color.r + yd, base_color.g + yd, base_color.b + yd, 255); + } + + uint64_t total_error = 0; + + const color_rgba* pSrc_pixels = m_pParams->m_pSrc_pixels; + + if (!g_cpu_supports_sse41) + { + for (uint32_t c = 0; c < n; c++) + { + const color_rgba& src_pixel = *pSrc_pixels++; + + uint32_t best_selector_index = 0; + uint32_t best_error = 0; + + if (pSelectors_to_use) + { + best_selector_index = pSelectors_to_use[c]; + best_error = color_distance(m_pParams->m_perceptual, src_pixel, block_colors[best_selector_index], false); + } + else + { + best_error = color_distance(m_pParams->m_perceptual, src_pixel, block_colors[0], false); + + uint32_t trial_error = color_distance(m_pParams->m_perceptual, src_pixel, block_colors[1], false); + if (trial_error < best_error) + { + best_error = trial_error; + best_selector_index = 1; + } + + trial_error = color_distance(m_pParams->m_perceptual, src_pixel, block_colors[2], false); + if (trial_error < best_error) + { + best_error = trial_error; + best_selector_index = 2; + } + + trial_error = color_distance(m_pParams->m_perceptual, src_pixel, block_colors[3], false); + if (trial_error < best_error) + { + best_error = trial_error; + best_selector_index = 3; + } + } + + m_temp_selectors[c] = static_cast(best_selector_index); + + total_error += best_error; + if (total_error >= trial_solution.m_error) + break; + } + } + else + { +#if BASISU_SUPPORT_SSE + if (pSelectors_to_use) + { + if (m_pParams->m_perceptual) + perceptual_distance_rgb_4_N_sse41((int64_t*)&total_error, pSelectors_to_use, block_colors, pSrc_pixels, n, trial_solution.m_error); + else + linear_distance_rgb_4_N_sse41((int64_t*)&total_error, pSelectors_to_use, block_colors, pSrc_pixels, n, trial_solution.m_error); + } + else + { + if (m_pParams->m_perceptual) + find_selectors_perceptual_rgb_4_N_sse41((int64_t*)&total_error, &m_temp_selectors[0], block_colors, pSrc_pixels, n, trial_solution.m_error); + else + find_selectors_linear_rgb_4_N_sse41((int64_t*)&total_error, &m_temp_selectors[0], block_colors, pSrc_pixels, n, trial_solution.m_error); + } +#endif + } + + if (total_error < trial_solution.m_error) + { + trial_solution.m_error = total_error; + trial_solution.m_coords.m_inten_table = inten_table; + trial_solution.m_selectors.swap(m_temp_selectors); + trial_solution.m_valid = true; + } + } + trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color; + trial_solution.m_coords.m_color4 = m_pParams->m_use_color4; + +#if BASISU_DEBUG_ETC_ENCODER_DEEPER + printf("Eval done: %u error: %I64u best error so far: %I64u\n", (trial_solution.m_error < pBest_solution->m_error), trial_solution.m_error, pBest_solution->m_error); +#endif + + bool success = false; + if (pBest_solution) + { + if (trial_solution.m_error < pBest_solution->m_error) + { + *pBest_solution = trial_solution; + success = true; + } + } + + return success; + } + + bool etc1_optimizer::evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution) + { + if (!check_for_redundant_solution(coords)) + return false; + +#if BASISU_DEBUG_ETC_ENCODER_DEEPER + printf("Eval solution fast: %u %u %u\n", coords.m_unscaled_color.r, coords.m_unscaled_color.g, coords.m_unscaled_color.b); +#endif + + if (m_pParams->m_constrain_against_base_color5) + { + const int dr = (int)coords.m_unscaled_color.r - (int)m_pParams->m_base_color5.r; + const int dg = (int)coords.m_unscaled_color.g - (int)m_pParams->m_base_color5.g; + const int db = (int)coords.m_unscaled_color.b - (int)m_pParams->m_base_color5.b; + + if ((minimum(dr, dg, db) < cETC1ColorDeltaMin) || (maximum(dr, dg, db) > cETC1ColorDeltaMax)) + { + trial_solution.m_valid = false; + +#if BASISU_DEBUG_ETC_ENCODER_DEEPER + printf("Eval failed due to constraint from %u %u %u\n", m_pParams->m_base_color5.r, m_pParams->m_base_color5.g, m_pParams->m_base_color5.b); +#endif + return false; + } + } + + const color_rgba base_color(coords.get_scaled_color()); + + const uint32_t n = m_pParams->m_num_src_pixels; + assert(trial_solution.m_selectors.size() == n); + + trial_solution.m_error = UINT64_MAX; + + const bool perceptual = (m_pParams->m_quality == cETCQualityFast) ? false : m_pParams->m_perceptual; + + for (int inten_table = cETC1IntenModifierValues - 1; inten_table >= 0; --inten_table) + { + const int* pInten_table = g_etc1_inten_tables[inten_table]; + + uint32_t block_inten[4]; + color_rgba block_colors[4]; + for (uint32_t s = 0; s < 4; s++) + { + const int yd = pInten_table[s]; + color_rgba block_color(base_color.r + yd, base_color.g + yd, base_color.b + yd, 255); + block_colors[s] = block_color; + block_inten[s] = block_color.r + block_color.g + block_color.b; + } + + // evaluate_solution_fast() enforces/assumes a total ordering of the input colors along the intensity (1,1,1) axis to more quickly classify the inputs to selectors. + // The inputs colors have been presorted along the projection onto this axis, and ETC1 block colors are always ordered along the intensity axis, so this classification is fast. + // 0 1 2 3 + // 01 12 23 + const uint32_t block_inten_midpoints[3] = { block_inten[0] + block_inten[1], block_inten[1] + block_inten[2], block_inten[2] + block_inten[3] }; + + uint64_t total_error = 0; + const color_rgba* pSrc_pixels = m_pParams->m_pSrc_pixels; + + if (perceptual) + { + if ((m_pSorted_luma[n - 1] * 2) < block_inten_midpoints[0]) + { + if (block_inten[0] > m_pSorted_luma[n - 1]) + { + const uint32_t min_error = iabs((int)block_inten[0] - (int)m_pSorted_luma[n - 1]); + if (min_error >= trial_solution.m_error) + continue; + } + + memset(&m_temp_selectors[0], 0, n); + + for (uint32_t c = 0; c < n; c++) + total_error += color_distance(true, block_colors[0], pSrc_pixels[c], false); + } + else if ((m_pSorted_luma[0] * 2) >= block_inten_midpoints[2]) + { + if (m_pSorted_luma[0] > block_inten[3]) + { + const uint32_t min_error = iabs((int)m_pSorted_luma[0] - (int)block_inten[3]); + if (min_error >= trial_solution.m_error) + continue; + } + + memset(&m_temp_selectors[0], 3, n); + + for (uint32_t c = 0; c < n; c++) + total_error += color_distance(true, block_colors[3], pSrc_pixels[c], false); + } + else + { + if (!g_cpu_supports_sse41) + { + uint32_t cur_selector = 0, c; + for (c = 0; c < n; c++) + { + const uint32_t y = m_pSorted_luma[c]; + while ((y * 2) >= block_inten_midpoints[cur_selector]) + if (++cur_selector > 2) + goto done; + const uint32_t sorted_pixel_index = m_pSorted_luma_indices[c]; + m_temp_selectors[sorted_pixel_index] = static_cast(cur_selector); + total_error += color_distance(true, block_colors[cur_selector], pSrc_pixels[sorted_pixel_index], false); + } + done: + while (c < n) + { + const uint32_t sorted_pixel_index = m_pSorted_luma_indices[c]; + m_temp_selectors[sorted_pixel_index] = 3; + total_error += color_distance(true, block_colors[3], pSrc_pixels[sorted_pixel_index], false); + ++c; + } + } + else + { +#if BASISU_SUPPORT_SSE + uint32_t cur_selector = 0, c; + + for (c = 0; c < n; c++) + { + const uint32_t y = m_pSorted_luma[c]; + while ((y * 2) >= block_inten_midpoints[cur_selector]) + { + if (++cur_selector > 2) + goto done3; + } + const uint32_t sorted_pixel_index = m_pSorted_luma_indices[c]; + m_temp_selectors[sorted_pixel_index] = static_cast(cur_selector); + } + done3: + + while (c < n) + { + const uint32_t sorted_pixel_index = m_pSorted_luma_indices[c]; + m_temp_selectors[sorted_pixel_index] = 3; + ++c; + } + + int64_t block_error; + perceptual_distance_rgb_4_N_sse41(&block_error, &m_temp_selectors[0], block_colors, pSrc_pixels, n, INT64_MAX); + total_error += block_error; +#endif + } + } + } + else + { + if ((m_pSorted_luma[n - 1] * 2) < block_inten_midpoints[0]) + { + if (block_inten[0] > m_pSorted_luma[n - 1]) + { + const uint32_t min_error = iabs((int)block_inten[0] - (int)m_pSorted_luma[n - 1]); + if (min_error >= trial_solution.m_error) + continue; + } + + memset(&m_temp_selectors[0], 0, n); + + for (uint32_t c = 0; c < n; c++) + total_error += color_distance(block_colors[0], pSrc_pixels[c], false); + } + else if ((m_pSorted_luma[0] * 2) >= block_inten_midpoints[2]) + { + if (m_pSorted_luma[0] > block_inten[3]) + { + const uint32_t min_error = iabs((int)m_pSorted_luma[0] - (int)block_inten[3]); + if (min_error >= trial_solution.m_error) + continue; + } + + memset(&m_temp_selectors[0], 3, n); + + for (uint32_t c = 0; c < n; c++) + total_error += color_distance(block_colors[3], pSrc_pixels[c], false); + } + else + { + uint32_t cur_selector = 0, c; + for (c = 0; c < n; c++) + { + const uint32_t y = m_pSorted_luma[c]; + while ((y * 2) >= block_inten_midpoints[cur_selector]) + if (++cur_selector > 2) + goto done2; + const uint32_t sorted_pixel_index = m_pSorted_luma_indices[c]; + m_temp_selectors[sorted_pixel_index] = static_cast(cur_selector); + total_error += color_distance(block_colors[cur_selector], pSrc_pixels[sorted_pixel_index], false); + } + done2: + while (c < n) + { + const uint32_t sorted_pixel_index = m_pSorted_luma_indices[c]; + m_temp_selectors[sorted_pixel_index] = 3; + total_error += color_distance(block_colors[3], pSrc_pixels[sorted_pixel_index], false); + ++c; + } + } + } + + if (total_error < trial_solution.m_error) + { + trial_solution.m_error = total_error; + trial_solution.m_coords.m_inten_table = inten_table; + trial_solution.m_selectors.swap(m_temp_selectors); + trial_solution.m_valid = true; + if (!total_error) + break; + } + } + trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color; + trial_solution.m_coords.m_color4 = m_pParams->m_use_color4; + +#if BASISU_DEBUG_ETC_ENCODER_DEEPER + printf("Eval done: %u error: %I64u best error so far: %I64u\n", (trial_solution.m_error < pBest_solution->m_error), trial_solution.m_error, pBest_solution->m_error); +#endif + + bool success = false; + if (pBest_solution) + { + if (trial_solution.m_error < pBest_solution->m_error) + { + *pBest_solution = trial_solution; + success = true; + } + } + + return success; + } + + uint64_t pack_eac_a8(pack_eac_a8_results& results, const uint8_t* pPixels, uint32_t num_pixels, uint32_t base_search_rad, uint32_t mul_search_rad, uint32_t table_mask) + { + results.m_selectors.resize(num_pixels); + results.m_selectors_temp.resize(num_pixels); + + uint32_t min_alpha = 255, max_alpha = 0; + for (uint32_t i = 0; i < num_pixels; i++) + { + const uint32_t a = pPixels[i]; + if (a < min_alpha) min_alpha = a; + if (a > max_alpha) max_alpha = a; + } + + if (min_alpha == max_alpha) + { + results.m_base = min_alpha; + results.m_table = 13; + results.m_multiplier = 1; + for (uint32_t i = 0; i < num_pixels; i++) + results.m_selectors[i] = 4; + return 0; + } + + const uint32_t alpha_range = max_alpha - min_alpha; + + uint64_t best_err = UINT64_MAX; + + for (uint32_t table = 0; table < 16; table++) + { + if ((table_mask & (1U << table)) == 0) + continue; + + const float range = (float)(g_etc2_eac_tables[table][ETC2_EAC_MAX_VALUE_SELECTOR] - g_etc2_eac_tables[table][ETC2_EAC_MIN_VALUE_SELECTOR]); + const int center = (int)roundf(lerp((float)min_alpha, (float)max_alpha, (float)(0 - g_etc2_eac_tables[table][ETC2_EAC_MIN_VALUE_SELECTOR]) / range)); + + const int base_min = clamp255(center - base_search_rad); + const int base_max = clamp255(center + base_search_rad); + + const int mul = (int)roundf(alpha_range / range); + const int mul_low = clamp(mul - mul_search_rad, 1, 15); + const int mul_high = clamp(mul + mul_search_rad, 1, 15); + + for (int base = base_min; base <= base_max; base++) + { + for (int multiplier = mul_low; multiplier <= mul_high; multiplier++) + { + uint64_t total_err = 0; + + for (uint32_t i = 0; i < num_pixels; i++) + { + const int a = pPixels[i]; + + uint32_t best_s_err = UINT32_MAX; + uint32_t best_s = 0; + for (uint32_t s = 0; s < 8; s++) + { + const int v = clamp255((int)multiplier * g_etc2_eac_tables[table][s] + (int)base); + + uint32_t err = iabs(a - v); + if (err < best_s_err) + { + best_s_err = err; + best_s = s; + } + } + + results.m_selectors_temp[i] = static_cast(best_s); + + total_err += best_s_err * best_s_err; + if (total_err >= best_err) + break; + } + + if (total_err < best_err) + { + best_err = total_err; + results.m_base = base; + results.m_multiplier = multiplier; + results.m_table = table; + results.m_selectors.swap(results.m_selectors_temp); + if (!best_err) + return best_err; + } + + } // table + + } // multiplier + + } // base + + return best_err; + } + + void pack_eac_a8(eac_a8_block* pBlock, const uint8_t* pPixels, uint32_t base_search_rad, uint32_t mul_search_rad, uint32_t table_mask) + { + pack_eac_a8_results results; + pack_eac_a8(results, pPixels, 16, base_search_rad, mul_search_rad, table_mask); + + pBlock->m_base = results.m_base; + pBlock->m_multiplier = results.m_multiplier; + pBlock->m_table = results.m_table; + for (uint32_t y = 0; y < 4; y++) + for (uint32_t x = 0; x < 4; x++) + pBlock->set_selector(x, y, results.m_selectors[x + y * 4]); + } + +} // namespace basisu diff --git a/thirdparty/basisu/encoder/basisu_etc.h b/thirdparty/basisu/encoder/basisu_etc.h new file mode 100644 index 000000000..5c44bd481 --- /dev/null +++ b/thirdparty/basisu/encoder/basisu_etc.h @@ -0,0 +1,1181 @@ +// basis_etc.h +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "../transcoder/basisu.h" +#include "basisu_enc.h" + +namespace basisu +{ + enum etc_constants + { + cETC1BytesPerBlock = 8U, + + cETC1SelectorBits = 2U, + cETC1SelectorValues = 1U << cETC1SelectorBits, + cETC1SelectorMask = cETC1SelectorValues - 1U, + + cETC1BlockShift = 2U, + cETC1BlockSize = 1U << cETC1BlockShift, + + cETC1LSBSelectorIndicesBitOffset = 0, + cETC1MSBSelectorIndicesBitOffset = 16, + + cETC1FlipBitOffset = 32, + cETC1DiffBitOffset = 33, + + cETC1IntenModifierNumBits = 3, + cETC1IntenModifierValues = 1 << cETC1IntenModifierNumBits, + cETC1RightIntenModifierTableBitOffset = 34, + cETC1LeftIntenModifierTableBitOffset = 37, + + // Base+Delta encoding (5 bit bases, 3 bit delta) + cETC1BaseColorCompNumBits = 5, + cETC1BaseColorCompMax = 1 << cETC1BaseColorCompNumBits, + + cETC1DeltaColorCompNumBits = 3, + cETC1DeltaColorComp = 1 << cETC1DeltaColorCompNumBits, + cETC1DeltaColorCompMax = 1 << cETC1DeltaColorCompNumBits, + + cETC1BaseColor5RBitOffset = 59, + cETC1BaseColor5GBitOffset = 51, + cETC1BaseColor5BBitOffset = 43, + + cETC1DeltaColor3RBitOffset = 56, + cETC1DeltaColor3GBitOffset = 48, + cETC1DeltaColor3BBitOffset = 40, + + // Absolute (non-delta) encoding (two 4-bit per component bases) + cETC1AbsColorCompNumBits = 4, + cETC1AbsColorCompMax = 1 << cETC1AbsColorCompNumBits, + + cETC1AbsColor4R1BitOffset = 60, + cETC1AbsColor4G1BitOffset = 52, + cETC1AbsColor4B1BitOffset = 44, + + cETC1AbsColor4R2BitOffset = 56, + cETC1AbsColor4G2BitOffset = 48, + cETC1AbsColor4B2BitOffset = 40, + + cETC1ColorDeltaMin = -4, + cETC1ColorDeltaMax = 3, + + // Delta3: + // 0 1 2 3 4 5 6 7 + // 000 001 010 011 100 101 110 111 + // 0 1 2 3 -4 -3 -2 -1 + }; + + extern const int g_etc1_inten_tables[cETC1IntenModifierValues][cETC1SelectorValues]; + extern const uint8_t g_etc1_to_selector_index[cETC1SelectorValues]; + extern const uint8_t g_selector_index_to_etc1[cETC1SelectorValues]; + + struct etc_coord2 + { + uint8_t m_x, m_y; + }; + extern const etc_coord2 g_etc1_pixel_coords[2][2][8]; // [flipped][subblock][subblock_pixel] + extern const uint32_t g_etc1_pixel_indices[2][2][8]; // [flipped][subblock][subblock_pixel] + + struct etc_block + { + // big endian uint64: + // bit ofs: 56 48 40 32 24 16 8 0 + // byte ofs: b0, b1, b2, b3, b4, b5, b6, b7 + union + { + uint64_t m_uint64; + + uint8_t m_bytes[8]; + }; + + inline void clear() + { + assert(sizeof(*this) == 8); + clear_obj(*this); + } + + inline uint64_t get_all_bits() const + { + return read_be64(&m_uint64); + } + + inline uint32_t get_general_bits(uint32_t ofs, uint32_t num) const + { + assert((ofs + num) <= 64U); + assert(num && (num < 32U)); + return (uint32_t)(read_be64(&m_uint64) >> ofs) & ((1UL << num) - 1UL); + } + + inline void set_general_bits(uint32_t ofs, uint32_t num, uint32_t bits) + { + assert((ofs + num) <= 64U); + assert(num && (num < 32U)); + + uint64_t x = read_be64(&m_uint64); + uint64_t msk = ((1ULL << static_cast(num)) - 1ULL) << static_cast(ofs); + x &= ~msk; + x |= (static_cast(bits) << static_cast(ofs)); + write_be64(&m_uint64, x); + } + + inline uint32_t get_byte_bits(uint32_t ofs, uint32_t num) const + { + assert((ofs + num) <= 64U); + assert(num && (num <= 8U)); + assert((ofs >> 3) == ((ofs + num - 1) >> 3)); + const uint32_t byte_ofs = 7 - (ofs >> 3); + const uint32_t byte_bit_ofs = ofs & 7; + return (m_bytes[byte_ofs] >> byte_bit_ofs) & ((1 << num) - 1); + } + + inline void set_byte_bits(uint32_t ofs, uint32_t num, uint32_t bits) + { + assert((ofs + num) <= 64U); + assert(num && (num < 32U)); + assert((ofs >> 3) == ((ofs + num - 1) >> 3)); + assert(bits < (1U << num)); + const uint32_t byte_ofs = 7 - (ofs >> 3); + const uint32_t byte_bit_ofs = ofs & 7; + const uint32_t mask = (1 << num) - 1; + m_bytes[byte_ofs] &= ~(mask << byte_bit_ofs); + m_bytes[byte_ofs] |= (bits << byte_bit_ofs); + } + + // false = left/right subblocks + // true = upper/lower subblocks + inline bool get_flip_bit() const + { + return (m_bytes[3] & 1) != 0; + } + + inline void set_flip_bit(bool flip) + { + m_bytes[3] &= ~1; + m_bytes[3] |= static_cast(flip); + } + + inline bool get_diff_bit() const + { + return (m_bytes[3] & 2) != 0; + } + + inline void set_diff_bit(bool diff) + { + m_bytes[3] &= ~2; + m_bytes[3] |= (static_cast(diff) << 1); + } + + // Returns intensity modifier table (0-7) used by subblock subblock_id. + // subblock_id=0 left/top (CW 1), 1=right/bottom (CW 2) + inline uint32_t get_inten_table(uint32_t subblock_id) const + { + assert(subblock_id < 2); + const uint32_t ofs = subblock_id ? 2 : 5; + return (m_bytes[3] >> ofs) & 7; + } + + // Sets intensity modifier table (0-7) used by subblock subblock_id (0 or 1) + inline void set_inten_table(uint32_t subblock_id, uint32_t t) + { + assert(subblock_id < 2); + assert(t < 8); + const uint32_t ofs = subblock_id ? 2 : 5; + m_bytes[3] &= ~(7 << ofs); + m_bytes[3] |= (t << ofs); + } + + inline void set_inten_tables_etc1s(uint32_t t) + { + set_inten_table(0, t); + set_inten_table(1, t); + } + + inline bool is_etc1s() const + { + if (get_inten_table(0) != get_inten_table(1)) + return false; + + if (get_diff_bit()) + { + if (get_delta3_color() != 0) + return false; + } + else + { + if (get_base4_color(0) != get_base4_color(1)) + return false; + } + + return true; + } + + // Returned encoded selector value ranges from 0-3 (this is NOT a direct index into g_etc1_inten_tables, see get_selector()) + inline uint32_t get_raw_selector(uint32_t x, uint32_t y) const + { + assert((x | y) < 4); + + const uint32_t bit_index = x * 4 + y; + const uint32_t byte_bit_ofs = bit_index & 7; + const uint8_t *p = &m_bytes[7 - (bit_index >> 3)]; + const uint32_t lsb = (p[0] >> byte_bit_ofs) & 1; + const uint32_t msb = (p[-2] >> byte_bit_ofs) & 1; + const uint32_t val = lsb | (msb << 1); + + return val; + } + + // Returned selector value ranges from 0-3 and is a direct index into g_etc1_inten_tables. + inline uint32_t get_selector(uint32_t x, uint32_t y) const + { + return g_etc1_to_selector_index[get_raw_selector(x, y)]; + } + + // Selector "val" ranges from 0-3 and is a direct index into g_etc1_inten_tables. + inline void set_selector(uint32_t x, uint32_t y, uint32_t val) + { + assert((x | y | val) < 4); + const uint32_t bit_index = x * 4 + y; + + uint8_t *p = &m_bytes[7 - (bit_index >> 3)]; + + const uint32_t byte_bit_ofs = bit_index & 7; + const uint32_t mask = 1 << byte_bit_ofs; + + const uint32_t etc1_val = g_selector_index_to_etc1[val]; + + const uint32_t lsb = etc1_val & 1; + const uint32_t msb = etc1_val >> 1; + + p[0] &= ~mask; + p[0] |= (lsb << byte_bit_ofs); + + p[-2] &= ~mask; + p[-2] |= (msb << byte_bit_ofs); + } + + // Selector "etc1_val" ranges from 0-3 and is a direct (raw) ETC1 selector. + inline void set_raw_selector(uint32_t x, uint32_t y, uint32_t etc1_val) + { + assert((x | y | etc1_val) < 4); + const uint32_t bit_index = x * 4 + y; + + uint8_t* p = &m_bytes[7 - (bit_index >> 3)]; + + const uint32_t byte_bit_ofs = bit_index & 7; + const uint32_t mask = 1 << byte_bit_ofs; + + const uint32_t lsb = etc1_val & 1; + const uint32_t msb = etc1_val >> 1; + + p[0] &= ~mask; + p[0] |= (lsb << byte_bit_ofs); + + p[-2] &= ~mask; + p[-2] |= (msb << byte_bit_ofs); + } + + inline uint32_t get_raw_selector_bits() const + { + return m_bytes[4] | (m_bytes[5] << 8) | (m_bytes[6] << 16) | (m_bytes[7] << 24); + } + + inline void set_raw_selector_bits(uint32_t bits) + { + m_bytes[4] = static_cast(bits); + m_bytes[5] = static_cast(bits >> 8); + m_bytes[6] = static_cast(bits >> 16); + m_bytes[7] = static_cast(bits >> 24); + } + + inline void set_raw_selector_bits(uint8_t byte0, uint8_t byte1, uint8_t byte2, uint8_t byte3) + { + m_bytes[4] = byte0; + m_bytes[5] = byte1; + m_bytes[6] = byte2; + m_bytes[7] = byte3; + } + + inline void set_base4_color(uint32_t idx, uint16_t c) + { + if (idx) + { + set_byte_bits(cETC1AbsColor4R2BitOffset, 4, (c >> 8) & 15); + set_byte_bits(cETC1AbsColor4G2BitOffset, 4, (c >> 4) & 15); + set_byte_bits(cETC1AbsColor4B2BitOffset, 4, c & 15); + } + else + { + set_byte_bits(cETC1AbsColor4R1BitOffset, 4, (c >> 8) & 15); + set_byte_bits(cETC1AbsColor4G1BitOffset, 4, (c >> 4) & 15); + set_byte_bits(cETC1AbsColor4B1BitOffset, 4, c & 15); + } + } + + inline uint16_t get_base4_color(uint32_t idx) const + { + uint32_t r, g, b; + if (idx) + { + r = get_byte_bits(cETC1AbsColor4R2BitOffset, 4); + g = get_byte_bits(cETC1AbsColor4G2BitOffset, 4); + b = get_byte_bits(cETC1AbsColor4B2BitOffset, 4); + } + else + { + r = get_byte_bits(cETC1AbsColor4R1BitOffset, 4); + g = get_byte_bits(cETC1AbsColor4G1BitOffset, 4); + b = get_byte_bits(cETC1AbsColor4B1BitOffset, 4); + } + return static_cast(b | (g << 4U) | (r << 8U)); + } + + inline void set_base5_color(uint16_t c) + { + set_byte_bits(cETC1BaseColor5RBitOffset, 5, (c >> 10) & 31); + set_byte_bits(cETC1BaseColor5GBitOffset, 5, (c >> 5) & 31); + set_byte_bits(cETC1BaseColor5BBitOffset, 5, c & 31); + } + + inline uint16_t get_base5_color() const + { + const uint32_t r = get_byte_bits(cETC1BaseColor5RBitOffset, 5); + const uint32_t g = get_byte_bits(cETC1BaseColor5GBitOffset, 5); + const uint32_t b = get_byte_bits(cETC1BaseColor5BBitOffset, 5); + return static_cast(b | (g << 5U) | (r << 10U)); + } + + void set_delta3_color(uint16_t c) + { + set_byte_bits(cETC1DeltaColor3RBitOffset, 3, (c >> 6) & 7); + set_byte_bits(cETC1DeltaColor3GBitOffset, 3, (c >> 3) & 7); + set_byte_bits(cETC1DeltaColor3BBitOffset, 3, c & 7); + } + + inline uint16_t get_delta3_color() const + { + const uint32_t r = get_byte_bits(cETC1DeltaColor3RBitOffset, 3); + const uint32_t g = get_byte_bits(cETC1DeltaColor3GBitOffset, 3); + const uint32_t b = get_byte_bits(cETC1DeltaColor3BBitOffset, 3); + return static_cast(b | (g << 3U) | (r << 6U)); + } + + uint64_t determine_selectors(const color_rgba* pSource_pixels, bool perceptual, uint32_t begin_subblock = 0, uint32_t end_subblock = 2) + { + uint64_t total_error = 0; + + for (uint32_t subblock = begin_subblock; subblock < end_subblock; subblock++) + { + color_rgba block_colors[4]; + get_block_colors(block_colors, subblock); + + if (get_flip_bit()) + { + for (uint32_t y = 0; y < 2; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t best_selector = 0; + uint64_t best_error = UINT64_MAX; + + for (uint32_t s = 0; s < 4; s++) + { + uint64_t err = color_distance(perceptual, block_colors[s], pSource_pixels[x + (subblock * 2 + y) * 4], false); + if (err < best_error) + { + best_error = err; + best_selector = s; + } + } + + set_selector(x, subblock * 2 + y, best_selector); + + total_error += best_error; + } + } + } + else + { + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 2; x++) + { + uint32_t best_selector = 0; + uint64_t best_error = UINT64_MAX; + + for (uint32_t s = 0; s < 4; s++) + { + uint64_t err = color_distance(perceptual, block_colors[s], pSource_pixels[(subblock * 2) + x + y * 4], false); + if (err < best_error) + { + best_error = err; + best_selector = s; + } + } + + set_selector(subblock * 2 + x, y, best_selector); + + total_error += best_error; + } + } + } + } + + return total_error; + } + + color_rgba get_block_color(uint32_t subblock_index, bool scaled) const + { + color_rgba b; + + if (get_diff_bit()) + { + if (subblock_index) + unpack_color5(b, get_base5_color(), get_delta3_color(), scaled); + else + unpack_color5(b, get_base5_color(), scaled); + } + else + { + b = unpack_color4(get_base4_color(subblock_index), scaled); + } + + return b; + } + + uint32_t get_subblock_index(uint32_t x, uint32_t y) const + { + if (get_flip_bit()) + return y >= 2; + else + return x >= 2; + } + + bool get_block_colors(color_rgba* pBlock_colors, uint32_t subblock_index) const + { + color_rgba b; + + if (get_diff_bit()) + { + if (subblock_index) + unpack_color5(b, get_base5_color(), get_delta3_color(), true); + else + unpack_color5(b, get_base5_color(), true); + } + else + { + b = unpack_color4(get_base4_color(subblock_index), true); + } + + const int* pInten_table = g_etc1_inten_tables[get_inten_table(subblock_index)]; + + bool dc = false; + + pBlock_colors[0].set(clamp255(b.r + pInten_table[0], dc), clamp255(b.g + pInten_table[0], dc), clamp255(b.b + pInten_table[0], dc), 255); + pBlock_colors[1].set(clamp255(b.r + pInten_table[1], dc), clamp255(b.g + pInten_table[1], dc), clamp255(b.b + pInten_table[1], dc), 255); + pBlock_colors[2].set(clamp255(b.r + pInten_table[2], dc), clamp255(b.g + pInten_table[2], dc), clamp255(b.b + pInten_table[2], dc), 255); + pBlock_colors[3].set(clamp255(b.r + pInten_table[3], dc), clamp255(b.g + pInten_table[3], dc), clamp255(b.b + pInten_table[3], dc), 255); + + return dc; + } + + void get_block_colors_etc1s(color_rgba* pBlock_colors) const + { + color_rgba b; + + unpack_color5(b, get_base5_color(), true); + + const int* pInten_table = g_etc1_inten_tables[get_inten_table(0)]; + + pBlock_colors[0].set(clamp255(b.r + pInten_table[0]), clamp255(b.g + pInten_table[0]), clamp255(b.b + pInten_table[0]), 255); + pBlock_colors[1].set(clamp255(b.r + pInten_table[1]), clamp255(b.g + pInten_table[1]), clamp255(b.b + pInten_table[1]), 255); + pBlock_colors[2].set(clamp255(b.r + pInten_table[2]), clamp255(b.g + pInten_table[2]), clamp255(b.b + pInten_table[2]), 255); + pBlock_colors[3].set(clamp255(b.r + pInten_table[3]), clamp255(b.g + pInten_table[3]), clamp255(b.b + pInten_table[3]), 255); + } + + static void get_block_colors_etc1s(color_rgba* pBlock_colors, const color_rgba &base5_color, uint32_t inten_table) + { + color_rgba b; + b.r = (base5_color.r << 3U) | (base5_color.r >> 2U); + b.g = (base5_color.g << 3U) | (base5_color.g >> 2U); + b.b = (base5_color.b << 3U) | (base5_color.b >> 2U); + + const int* pInten_table = g_etc1_inten_tables[inten_table]; + + pBlock_colors[0].set(clamp255(b.r + pInten_table[0]), clamp255(b.g + pInten_table[0]), clamp255(b.b + pInten_table[0]), 255); + pBlock_colors[1].set(clamp255(b.r + pInten_table[1]), clamp255(b.g + pInten_table[1]), clamp255(b.b + pInten_table[1]), 255); + pBlock_colors[2].set(clamp255(b.r + pInten_table[2]), clamp255(b.g + pInten_table[2]), clamp255(b.b + pInten_table[2]), 255); + pBlock_colors[3].set(clamp255(b.r + pInten_table[3]), clamp255(b.g + pInten_table[3]), clamp255(b.b + pInten_table[3]), 255); + } + + void get_block_color(color_rgba& color, uint32_t subblock_index, uint32_t selector_index) const + { + color_rgba b; + + if (get_diff_bit()) + { + if (subblock_index) + unpack_color5(b, get_base5_color(), get_delta3_color(), true); + else + unpack_color5(b, get_base5_color(), true); + } + else + { + b = unpack_color4(get_base4_color(subblock_index), true); + } + + const int* pInten_table = g_etc1_inten_tables[get_inten_table(subblock_index)]; + + color.set(clamp255(b.r + pInten_table[selector_index]), clamp255(b.g + pInten_table[selector_index]), clamp255(b.b + pInten_table[selector_index]), 255); + } + + bool get_block_low_high_colors(color_rgba* pBlock_colors, uint32_t subblock_index) const + { + color_rgba b; + + if (get_diff_bit()) + { + if (subblock_index) + unpack_color5(b, get_base5_color(), get_delta3_color(), true); + else + unpack_color5(b, get_base5_color(), true); + } + else + { + b = unpack_color4(get_base4_color(subblock_index), true); + } + + const int* pInten_table = g_etc1_inten_tables[get_inten_table(subblock_index)]; + + bool dc = false; + + pBlock_colors[0].set(clamp255(b.r + pInten_table[0], dc), clamp255(b.g + pInten_table[0], dc), clamp255(b.b + pInten_table[0], dc), 255); + pBlock_colors[1].set(clamp255(b.r + pInten_table[3], dc), clamp255(b.g + pInten_table[3], dc), clamp255(b.b + pInten_table[3], dc), 255); + + return dc; + } + + static void get_block_colors5(color_rgba *pBlock_colors, const color_rgba &base_color5, uint32_t inten_table, bool scaled = false) + { + color_rgba b(base_color5); + + if (!scaled) + { + b.r = (b.r << 3) | (b.r >> 2); + b.g = (b.g << 3) | (b.g >> 2); + b.b = (b.b << 3) | (b.b >> 2); + } + + const int* pInten_table = g_etc1_inten_tables[inten_table]; + + pBlock_colors[0].set(clamp255(b.r + pInten_table[0]), clamp255(b.g + pInten_table[0]), clamp255(b.b + pInten_table[0]), 255); + pBlock_colors[1].set(clamp255(b.r + pInten_table[1]), clamp255(b.g + pInten_table[1]), clamp255(b.b + pInten_table[1]), 255); + pBlock_colors[2].set(clamp255(b.r + pInten_table[2]), clamp255(b.g + pInten_table[2]), clamp255(b.b + pInten_table[2]), 255); + pBlock_colors[3].set(clamp255(b.r + pInten_table[3]), clamp255(b.g + pInten_table[3]), clamp255(b.b + pInten_table[3]), 255); + } + + static void get_block_colors4(color_rgba *pBlock_colors, const color_rgba &base_color4, uint32_t inten_table, bool scaled = false) + { + color_rgba b(base_color4); + + if (!scaled) + { + b.r = (b.r << 4) | b.r; + b.g = (b.g << 4) | b.g; + b.b = (b.b << 4) | b.b; + } + + const int* pInten_table = g_etc1_inten_tables[inten_table]; + + pBlock_colors[0].set(clamp255(b.r + pInten_table[0]), clamp255(b.g + pInten_table[0]), clamp255(b.b + pInten_table[0]), 255); + pBlock_colors[1].set(clamp255(b.r + pInten_table[1]), clamp255(b.g + pInten_table[1]), clamp255(b.b + pInten_table[1]), 255); + pBlock_colors[2].set(clamp255(b.r + pInten_table[2]), clamp255(b.g + pInten_table[2]), clamp255(b.b + pInten_table[2]), 255); + pBlock_colors[3].set(clamp255(b.r + pInten_table[3]), clamp255(b.g + pInten_table[3]), clamp255(b.b + pInten_table[3]), 255); + } + + uint64_t evaluate_etc1_error(const color_rgba* pBlock_pixels, bool perceptual, int subblock_index = -1) const; + void get_subblock_pixels(color_rgba* pPixels, int subblock_index = -1) const; + + void get_selector_range(uint32_t& low, uint32_t& high) const + { + low = 3; + high = 0; + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t s = get_selector(x, y); + low = minimum(low, s); + high = maximum(high, s); + } + } + } + + void set_block_color4(const color_rgba &c0_unscaled, const color_rgba &c1_unscaled) + { + set_diff_bit(false); + + set_base4_color(0, pack_color4(c0_unscaled, false)); + set_base4_color(1, pack_color4(c1_unscaled, false)); + } + + void set_block_color5(const color_rgba &c0_unscaled, const color_rgba &c1_unscaled) + { + set_diff_bit(true); + + set_base5_color(pack_color5(c0_unscaled, false)); + + int dr = c1_unscaled.r - c0_unscaled.r; + int dg = c1_unscaled.g - c0_unscaled.g; + int db = c1_unscaled.b - c0_unscaled.b; + + set_delta3_color(pack_delta3(dr, dg, db)); + } + + void set_block_color5_etc1s(const color_rgba &c_unscaled) + { + set_diff_bit(true); + + set_base5_color(pack_color5(c_unscaled, false)); + set_delta3_color(pack_delta3(0, 0, 0)); + } + + bool set_block_color5_check(const color_rgba &c0_unscaled, const color_rgba &c1_unscaled) + { + set_diff_bit(true); + + set_base5_color(pack_color5(c0_unscaled, false)); + + int dr = c1_unscaled.r - c0_unscaled.r; + int dg = c1_unscaled.g - c0_unscaled.g; + int db = c1_unscaled.b - c0_unscaled.b; + + if (((dr < cETC1ColorDeltaMin) || (dr > cETC1ColorDeltaMax)) || + ((dg < cETC1ColorDeltaMin) || (dg > cETC1ColorDeltaMax)) || + ((db < cETC1ColorDeltaMin) || (db > cETC1ColorDeltaMax))) + return false; + + set_delta3_color(pack_delta3(dr, dg, db)); + + return true; + } + + bool set_block_color5_clamp(const color_rgba &c0_unscaled, const color_rgba &c1_unscaled) + { + set_diff_bit(true); + set_base5_color(pack_color5(c0_unscaled, false)); + + int dr = c1_unscaled.r - c0_unscaled.r; + int dg = c1_unscaled.g - c0_unscaled.g; + int db = c1_unscaled.b - c0_unscaled.b; + + dr = clamp(dr, cETC1ColorDeltaMin, cETC1ColorDeltaMax); + dg = clamp(dg, cETC1ColorDeltaMin, cETC1ColorDeltaMax); + db = clamp(db, cETC1ColorDeltaMin, cETC1ColorDeltaMax); + + set_delta3_color(pack_delta3(dr, dg, db)); + + return true; + } + color_rgba get_selector_color(uint32_t x, uint32_t y, uint32_t s) const + { + color_rgba block_colors[4]; + + get_block_colors(block_colors, get_subblock_index(x, y)); + + return block_colors[s]; + } + + // Base color 5 + static uint16_t pack_color5(const color_rgba& color, bool scaled, uint32_t bias = 127U); + static uint16_t pack_color5(uint32_t r, uint32_t g, uint32_t b, bool scaled, uint32_t bias = 127U); + + static color_rgba unpack_color5(uint16_t packed_color5, bool scaled, uint32_t alpha = 255U); + static void unpack_color5(uint32_t& r, uint32_t& g, uint32_t& b, uint16_t packed_color, bool scaled); + static void unpack_color5(color_rgba& result, uint16_t packed_color5, bool scaled); + + static bool unpack_color5(color_rgba& result, uint16_t packed_color5, uint16_t packed_delta3, bool scaled, uint32_t alpha = 255U); + static bool unpack_color5(uint32_t& r, uint32_t& g, uint32_t& b, uint16_t packed_color5, uint16_t packed_delta3, bool scaled, uint32_t alpha = 255U); + + // Delta color 3 + // Inputs range from -4 to 3 (cETC1ColorDeltaMin to cETC1ColorDeltaMax) + static uint16_t pack_delta3(const color_rgba_i16& color); + static uint16_t pack_delta3(int r, int g, int b); + + // Results range from -4 to 3 (cETC1ColorDeltaMin to cETC1ColorDeltaMax) + static color_rgba_i16 unpack_delta3(uint16_t packed_delta3); + static void unpack_delta3(int& r, int& g, int& b, uint16_t packed_delta3); + + static bool try_pack_color5_delta3(const color_rgba *pColor5_unscaled) + { + int dr = pColor5_unscaled[1].r - pColor5_unscaled[0].r; + int dg = pColor5_unscaled[1].g - pColor5_unscaled[0].g; + int db = pColor5_unscaled[1].b - pColor5_unscaled[0].b; + + if ((minimum(dr, dg, db) < cETC1ColorDeltaMin) || (maximum(dr, dg, db) > cETC1ColorDeltaMax)) + return false; + + return true; + } + + // Abs color 4 + static uint16_t pack_color4(const color_rgba& color, bool scaled, uint32_t bias = 127U); + static uint16_t pack_color4(uint32_t r, uint32_t g, uint32_t b, bool scaled, uint32_t bias = 127U); + + static color_rgba unpack_color4(uint16_t packed_color4, bool scaled, uint32_t alpha = 255U); + static void unpack_color4(uint32_t& r, uint32_t& g, uint32_t& b, uint16_t packed_color4, bool scaled); + + // subblock colors + static void get_diff_subblock_colors(color_rgba* pDst, uint16_t packed_color5, uint32_t table_idx); + static bool get_diff_subblock_colors(color_rgba* pDst, uint16_t packed_color5, uint16_t packed_delta3, uint32_t table_idx); + static void get_abs_subblock_colors(color_rgba* pDst, uint16_t packed_color4, uint32_t table_idx); + + static inline void unscaled_to_scaled_color(color_rgba& dst, const color_rgba& src, bool color4) + { + if (color4) + { + dst.r = src.r | (src.r << 4); + dst.g = src.g | (src.g << 4); + dst.b = src.b | (src.b << 4); + } + else + { + dst.r = (src.r >> 2) | (src.r << 3); + dst.g = (src.g >> 2) | (src.g << 3); + dst.b = (src.b >> 2) | (src.b << 3); + } + dst.a = src.a; + } + + private: + static uint8_t clamp255(int x, bool &did_clamp) + { + if (x < 0) + { + did_clamp = true; + return 0; + } + else if (x > 255) + { + did_clamp = true; + return 255; + } + + return static_cast(x); + } + + static uint8_t clamp255(int x) + { + if (x < 0) + return 0; + else if (x > 255) + return 255; + + return static_cast(x); + } + }; + + typedef basisu::vector etc_block_vec; + + // Returns false if the unpack fails (could be bogus data or ETC2) + bool unpack_etc1(const etc_block& block, color_rgba *pDst, bool preserve_alpha = false); + + enum basis_etc_quality + { + cETCQualityFast, + cETCQualityMedium, + cETCQualitySlow, + cETCQualityUber, + cETCQualityTotal, + }; + + struct basis_etc1_pack_params + { + basis_etc_quality m_quality; + bool m_perceptual; + bool m_cluster_fit; + bool m_force_etc1s; + bool m_use_color4; + float m_flip_bias; + + inline basis_etc1_pack_params() + { + clear(); + } + + void clear() + { + m_quality = cETCQualitySlow; + m_perceptual = true; + m_cluster_fit = true; + m_force_etc1s = false; + m_use_color4 = true; + m_flip_bias = 0.0f; + } + }; + + struct etc1_solution_coordinates + { + inline etc1_solution_coordinates() : + m_unscaled_color(0, 0, 0, 0), + m_inten_table(0), + m_color4(false) + { + } + + inline etc1_solution_coordinates(uint32_t r, uint32_t g, uint32_t b, uint32_t inten_table, bool color4) : + m_unscaled_color((uint8_t)r, (uint8_t)g, (uint8_t)b, 255), + m_inten_table((uint8_t)inten_table), + m_color4(color4) + { + } + + inline etc1_solution_coordinates(const color_rgba& c, uint32_t inten_table, bool color4) : + m_unscaled_color(c), + m_inten_table(inten_table), + m_color4(color4) + { + } + + inline etc1_solution_coordinates(const etc1_solution_coordinates& other) + { + *this = other; + } + + inline etc1_solution_coordinates& operator= (const etc1_solution_coordinates& rhs) + { + m_unscaled_color = rhs.m_unscaled_color; + m_inten_table = rhs.m_inten_table; + m_color4 = rhs.m_color4; + return *this; + } + + inline void clear() + { + m_unscaled_color.clear(); + m_inten_table = 0; + m_color4 = false; + } + + inline void init(const color_rgba& c, uint32_t inten_table, bool color4) + { + m_unscaled_color = c; + m_inten_table = inten_table; + m_color4 = color4; + } + + inline color_rgba get_scaled_color() const + { + int br, bg, bb; + if (m_color4) + { + br = m_unscaled_color.r | (m_unscaled_color.r << 4); + bg = m_unscaled_color.g | (m_unscaled_color.g << 4); + bb = m_unscaled_color.b | (m_unscaled_color.b << 4); + } + else + { + br = (m_unscaled_color.r >> 2) | (m_unscaled_color.r << 3); + bg = (m_unscaled_color.g >> 2) | (m_unscaled_color.g << 3); + bb = (m_unscaled_color.b >> 2) | (m_unscaled_color.b << 3); + } + return color_rgba((uint8_t)br, (uint8_t)bg, (uint8_t)bb, 255); + } + + // returns true if anything was clamped + inline void get_block_colors(color_rgba* pBlock_colors) + { + int br, bg, bb; + if (m_color4) + { + br = m_unscaled_color.r | (m_unscaled_color.r << 4); + bg = m_unscaled_color.g | (m_unscaled_color.g << 4); + bb = m_unscaled_color.b | (m_unscaled_color.b << 4); + } + else + { + br = (m_unscaled_color.r >> 2) | (m_unscaled_color.r << 3); + bg = (m_unscaled_color.g >> 2) | (m_unscaled_color.g << 3); + bb = (m_unscaled_color.b >> 2) | (m_unscaled_color.b << 3); + } + const int* pInten_table = g_etc1_inten_tables[m_inten_table]; + pBlock_colors[0].set(br + pInten_table[0], bg + pInten_table[0], bb + pInten_table[0], 255); + pBlock_colors[1].set(br + pInten_table[1], bg + pInten_table[1], bb + pInten_table[1], 255); + pBlock_colors[2].set(br + pInten_table[2], bg + pInten_table[2], bb + pInten_table[2], 255); + pBlock_colors[3].set(br + pInten_table[3], bg + pInten_table[3], bb + pInten_table[3], 255); + } + + color_rgba m_unscaled_color; + uint32_t m_inten_table; + bool m_color4; + }; + + class etc1_optimizer + { + BASISU_NO_EQUALS_OR_COPY_CONSTRUCT(etc1_optimizer); + + public: + etc1_optimizer() + { + clear(); + } + + void clear() + { + m_pParams = nullptr; + m_pResult = nullptr; + m_pSorted_luma = nullptr; + m_pSorted_luma_indices = nullptr; + } + + struct params; + + typedef bool(*evaluate_solution_override_func)(uint64_t &error, const params &p, const color_rgba* pBlock_colors, const uint8_t* pSelectors, const etc1_solution_coordinates& coords); + + struct params : basis_etc1_pack_params + { + params() + { + clear(); + } + + params(const basis_etc1_pack_params& base_params) + { + clear_optimizer_params(); + + *static_cast(this) = base_params; + } + + void clear() + { + clear_optimizer_params(); + } + + void clear_optimizer_params() + { + basis_etc1_pack_params::clear(); + + m_num_src_pixels = 0; + m_pSrc_pixels = 0; + + m_use_color4 = false; + static const int s_default_scan_delta[] = { 0 }; + m_pScan_deltas = s_default_scan_delta; + m_scan_delta_size = 1; + + m_base_color5.clear(); + m_constrain_against_base_color5 = false; + + m_refinement = true; + + m_pForce_selectors = nullptr; + } + + uint32_t m_num_src_pixels; + const color_rgba* m_pSrc_pixels; + + bool m_use_color4; + const int* m_pScan_deltas; + uint32_t m_scan_delta_size; + + color_rgba m_base_color5; + bool m_constrain_against_base_color5; + + bool m_refinement; + + const uint8_t* m_pForce_selectors; + }; + + struct results + { + uint64_t m_error; + color_rgba m_block_color_unscaled; + uint32_t m_block_inten_table; + uint32_t m_n; + uint8_t* m_pSelectors; + bool m_block_color4; + + inline results& operator= (const results& rhs) + { + m_block_color_unscaled = rhs.m_block_color_unscaled; + m_block_color4 = rhs.m_block_color4; + m_block_inten_table = rhs.m_block_inten_table; + m_error = rhs.m_error; + memcpy(m_pSelectors, rhs.m_pSelectors, minimum(rhs.m_n, m_n)); + return *this; + } + }; + + void init(const params& params, results& result); + bool compute(); + + const params* get_params() const { return m_pParams; } + + private: + struct potential_solution + { + potential_solution() : m_coords(), m_error(UINT64_MAX), m_valid(false) + { + } + + etc1_solution_coordinates m_coords; + basisu::vector m_selectors; + uint64_t m_error; + bool m_valid; + + void clear() + { + m_coords.clear(); + m_selectors.resize(0); + m_error = UINT64_MAX; + m_valid = false; + } + + bool are_selectors_all_equal() const + { + if (!m_selectors.size()) + return false; + const uint32_t s = m_selectors[0]; + for (uint32_t i = 1; i < m_selectors.size(); i++) + if (m_selectors[i] != s) + return false; + return true; + } + }; + + const params* m_pParams; + results* m_pResult; + + int m_limit; + + vec3F m_avg_color; + int m_br, m_bg, m_bb; + int m_max_comp_spread; + basisu::vector m_luma; + basisu::vector m_sorted_luma; + basisu::vector m_sorted_luma_indices; + const uint32_t* m_pSorted_luma_indices; + uint32_t* m_pSorted_luma; + + basisu::vector m_selectors; + basisu::vector m_best_selectors; + + potential_solution m_best_solution; + potential_solution m_trial_solution; + basisu::vector m_temp_selectors; + + enum { cSolutionsTriedHashBits = 10, cTotalSolutionsTriedHashSize = 1 << cSolutionsTriedHashBits, cSolutionsTriedHashMask = cTotalSolutionsTriedHashSize - 1 }; + uint8_t m_solutions_tried[cTotalSolutionsTriedHashSize / 8]; + + void get_nearby_inten_tables(uint32_t idx, int &first_inten_table, int &last_inten_table) + { + first_inten_table = maximum(idx - 1, 0); + last_inten_table = minimum(cETC1IntenModifierValues, idx + 1); + } + + bool check_for_redundant_solution(const etc1_solution_coordinates& coords); + bool evaluate_solution_slow(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution); + bool evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution); + + inline bool evaluate_solution(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution) + { + if (m_pParams->m_quality >= cETCQualityMedium) + return evaluate_solution_slow(coords, trial_solution, pBest_solution); + else + return evaluate_solution_fast(coords, trial_solution, pBest_solution); + } + + void refine_solution(uint32_t max_refinement_trials); + void compute_internal_neighborhood(int scan_r, int scan_g, int scan_b); + void compute_internal_cluster_fit(uint32_t total_perms_to_try); + }; + + struct pack_etc1_block_context + { + etc1_optimizer m_optimizer; + }; + + void pack_etc1_solid_color_init(); + uint64_t pack_etc1_block_solid_color(etc_block& block, const uint8_t* pColor); + + // ETC EAC + extern const int8_t g_etc2_eac_tables[16][8]; + extern const int8_t g_etc2_eac_tables8[16][8]; + + const uint32_t ETC2_EAC_MIN_VALUE_SELECTOR = 3, ETC2_EAC_MAX_VALUE_SELECTOR = 7; + + struct eac_a8_block + { + uint16_t m_base : 8; + uint16_t m_table : 4; + uint16_t m_multiplier : 4; + + uint8_t m_selectors[6]; + + inline uint32_t get_selector(uint32_t x, uint32_t y, uint64_t selector_bits) const + { + assert((x < 4) && (y < 4)); + return static_cast((selector_bits >> (45 - (y + x * 4) * 3)) & 7); + } + + inline uint64_t get_selector_bits() const + { + uint64_t pixels = ((uint64_t)m_selectors[0] << 40) | ((uint64_t)m_selectors[1] << 32) | ((uint64_t)m_selectors[2] << 24) | ((uint64_t)m_selectors[3] << 16) | ((uint64_t)m_selectors[4] << 8) | m_selectors[5]; + return pixels; + } + + inline void set_selector_bits(uint64_t pixels) + { + m_selectors[0] = (uint8_t)(pixels >> 40); + m_selectors[1] = (uint8_t)(pixels >> 32); + m_selectors[2] = (uint8_t)(pixels >> 24); + m_selectors[3] = (uint8_t)(pixels >> 16); + m_selectors[4] = (uint8_t)(pixels >> 8); + m_selectors[5] = (uint8_t)(pixels); + } + + void set_selector(uint32_t x, uint32_t y, uint32_t s) + { + assert((x < 4) && (y < 4) && (s < 8)); + + const uint32_t ofs = 45 - (y + x * 4) * 3; + + uint64_t pixels = get_selector_bits(); + + pixels &= ~(7ULL << ofs); + pixels |= (static_cast(s) << ofs); + + set_selector_bits(pixels); + } + }; + + struct etc2_rgba_block + { + eac_a8_block m_alpha; + etc_block m_rgb; + }; + + struct pack_eac_a8_results + { + uint32_t m_base; + uint32_t m_table; + uint32_t m_multiplier; + uint8_vec m_selectors; + uint8_vec m_selectors_temp; + }; + + uint64_t pack_eac_a8(pack_eac_a8_results& results, const uint8_t* pPixels, uint32_t num_pixels, uint32_t base_search_rad, uint32_t mul_search_rad, uint32_t table_mask = UINT32_MAX); + void pack_eac_a8(eac_a8_block* pBlock, const uint8_t* pPixels, uint32_t base_search_rad, uint32_t mul_search_rad, uint32_t table_mask = UINT32_MAX); + +} // namespace basisu diff --git a/thirdparty/basisu/encoder/basisu_frontend.cpp b/thirdparty/basisu/encoder/basisu_frontend.cpp new file mode 100644 index 000000000..99ac2aa9d --- /dev/null +++ b/thirdparty/basisu/encoder/basisu_frontend.cpp @@ -0,0 +1,3385 @@ +// basisu_frontend.cpp +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// TODO: +// This code originally supported full ETC1 and ETC1S, so there's some legacy stuff to be cleaned up in here. +// Add endpoint tiling support (where we force adjacent blocks to use the same endpoints during quantization), for a ~10% or more increase in bitrate at same SSIM. The backend already supports this. +// +#include "../transcoder/basisu.h" +#include "basisu_frontend.h" +#include "basisu_opencl.h" +#include +#include + +#if BASISU_SUPPORT_SSE +#define CPPSPMD_NAME(a) a##_sse41 +#include "basisu_kernels_declares.h" +#endif + +#define BASISU_FRONTEND_VERIFY(c) do { if (!(c)) handle_verify_failure(__LINE__); } while(0) + +namespace basisu +{ + const uint32_t cMaxCodebookCreationThreads = 8; + + const uint32_t BASISU_MAX_ENDPOINT_REFINEMENT_STEPS = 3; + //const uint32_t BASISU_MAX_SELECTOR_REFINEMENT_STEPS = 3; + + const uint32_t BASISU_ENDPOINT_PARENT_CODEBOOK_SIZE = 16; + const uint32_t BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_01 = 32; + const uint32_t BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_DEFAULT = 16; + + // TODO - How to handle internal verifies in the basisu lib + static inline void handle_verify_failure(int line) + { + error_printf("basisu_frontend: verify check failed at line %i!\n", line); + abort(); + } + + bool basisu_frontend::init(const params &p) + { + debug_printf("basisu_frontend::init: Multithreaded: %u, Job pool total threads: %u, NumEndpointClusters: %u, NumSelectorClusters: %u, Perceptual: %u, CompressionLevel: %u\n", + p.m_multithreaded, p.m_pJob_pool ? p.m_pJob_pool->get_total_threads() : 0, + p.m_max_endpoint_clusters, p.m_max_selector_clusters, p.m_perceptual, p.m_compression_level); + + if ((p.m_max_endpoint_clusters < 1) || (p.m_max_endpoint_clusters > cMaxEndpointClusters)) + return false; + if ((p.m_max_selector_clusters < 1) || (p.m_max_selector_clusters > cMaxSelectorClusters)) + return false; + + m_source_blocks.resize(0); + append_vector(m_source_blocks, p.m_pSource_blocks, p.m_num_source_blocks); + + m_params = p; + + if (m_params.m_pOpenCL_context) + { + BASISU_ASSUME(sizeof(cl_pixel_block) == sizeof(pixel_block)); + + // Upload the RGBA pixel blocks a single time. + if (!opencl_set_pixel_blocks(m_params.m_pOpenCL_context, m_source_blocks.size(), (cl_pixel_block*)m_source_blocks.data())) + { + // This is not fatal, we just won't use OpenCL. + error_printf("basisu_frontend::init: opencl_set_pixel_blocks() failed\n"); + m_params.m_pOpenCL_context = nullptr; + m_opencl_failed = true; + } + } + + m_encoded_blocks.resize(m_params.m_num_source_blocks); + memset(&m_encoded_blocks[0], 0, m_encoded_blocks.size() * sizeof(m_encoded_blocks[0])); + + m_num_endpoint_codebook_iterations = 1; + m_num_selector_codebook_iterations = 1; + + switch (p.m_compression_level) + { + case 0: + { + m_endpoint_refinement = false; + m_use_hierarchical_endpoint_codebooks = true; + m_use_hierarchical_selector_codebooks = true; + break; + } + case 1: + { + m_endpoint_refinement = true; + m_use_hierarchical_endpoint_codebooks = true; + m_use_hierarchical_selector_codebooks = true; + + break; + } + case 2: + { + m_endpoint_refinement = true; + m_use_hierarchical_endpoint_codebooks = true; + m_use_hierarchical_selector_codebooks = true; + + break; + } + case 3: + { + m_endpoint_refinement = true; + m_use_hierarchical_endpoint_codebooks = false; + m_use_hierarchical_selector_codebooks = false; + break; + } + case 4: + { + m_endpoint_refinement = true; + m_use_hierarchical_endpoint_codebooks = true; + m_use_hierarchical_selector_codebooks = true; + m_num_endpoint_codebook_iterations = BASISU_MAX_ENDPOINT_REFINEMENT_STEPS; + m_num_selector_codebook_iterations = BASISU_MAX_ENDPOINT_REFINEMENT_STEPS; + break; + } + case 5: + { + m_endpoint_refinement = true; + m_use_hierarchical_endpoint_codebooks = false; + m_use_hierarchical_selector_codebooks = false; + m_num_endpoint_codebook_iterations = BASISU_MAX_ENDPOINT_REFINEMENT_STEPS; + m_num_selector_codebook_iterations = BASISU_MAX_ENDPOINT_REFINEMENT_STEPS; + break; + } + case 6: + default: + { + m_endpoint_refinement = true; + m_use_hierarchical_endpoint_codebooks = false; + m_use_hierarchical_selector_codebooks = false; + m_num_endpoint_codebook_iterations = BASISU_MAX_ENDPOINT_REFINEMENT_STEPS*2; + m_num_selector_codebook_iterations = BASISU_MAX_ENDPOINT_REFINEMENT_STEPS*2; + break; + } + + } + + if (m_params.m_disable_hierarchical_endpoint_codebooks) + m_use_hierarchical_endpoint_codebooks = false; + + debug_printf("Endpoint refinement: %u, Hierarchical endpoint codebooks: %u, Hierarchical selector codebooks: %u, Endpoint codebook iters: %u, Selector codebook iters: %u\n", + m_endpoint_refinement, m_use_hierarchical_endpoint_codebooks, m_use_hierarchical_selector_codebooks, m_num_endpoint_codebook_iterations, m_num_selector_codebook_iterations); + + return true; + } + + bool basisu_frontend::compress() + { + debug_printf("basisu_frontend::compress\n"); + + m_total_blocks = m_params.m_num_source_blocks; + m_total_pixels = m_total_blocks * cPixelBlockTotalPixels; + + // Encode the initial high quality ETC1S texture + + init_etc1_images(); + + // First quantize the ETC1S endpoints + + if (m_params.m_pGlobal_codebooks) + { + init_global_codebooks(); + } + else + { + init_endpoint_training_vectors(); + + generate_endpoint_clusters(); + + for (uint32_t refine_endpoint_step = 0; refine_endpoint_step < m_num_endpoint_codebook_iterations; refine_endpoint_step++) + { + if (m_params.m_validate) + { + BASISU_FRONTEND_VERIFY(check_etc1s_constraints()); + + BASISU_FRONTEND_VERIFY(validate_endpoint_cluster_hierarchy(false)); + } + + if (refine_endpoint_step) + { + introduce_new_endpoint_clusters(); + } + + if (m_params.m_validate) + { + BASISU_FRONTEND_VERIFY(validate_endpoint_cluster_hierarchy(false)); + } + + generate_endpoint_codebook(refine_endpoint_step); + + if ((m_params.m_debug_images) && (m_params.m_dump_endpoint_clusterization)) + { + char buf[256]; + snprintf(buf, sizeof(buf), "endpoint_cluster_vis_pre_%u.png", refine_endpoint_step); + dump_endpoint_clusterization_visualization(buf, false); + } + + bool early_out = false; + + if (m_endpoint_refinement) + { + //dump_endpoint_clusterization_visualization("endpoint_clusters_before_refinement.png"); + + if (!refine_endpoint_clusterization()) + early_out = true; + + if ((m_params.m_tex_type == basist::cBASISTexTypeVideoFrames) && (!refine_endpoint_step) && (m_num_endpoint_codebook_iterations == 1)) + { + eliminate_redundant_or_empty_endpoint_clusters(); + generate_endpoint_codebook(basisu::maximum(1U, refine_endpoint_step)); + } + + if ((m_params.m_debug_images) && (m_params.m_dump_endpoint_clusterization)) + { + char buf[256]; + snprintf(buf, sizeof(buf), "endpoint_cluster_vis_post_%u.png", refine_endpoint_step); + + dump_endpoint_clusterization_visualization(buf, false); + snprintf(buf, sizeof(buf), "endpoint_cluster_colors_vis_post_%u.png", refine_endpoint_step); + + dump_endpoint_clusterization_visualization(buf, true); + } + } + + if (m_params.m_validate) + { + BASISU_FRONTEND_VERIFY(validate_endpoint_cluster_hierarchy(false)); + } + + eliminate_redundant_or_empty_endpoint_clusters(); + + if (m_params.m_validate) + { + BASISU_FRONTEND_VERIFY(validate_endpoint_cluster_hierarchy(false)); + } + + if (m_params.m_debug_stats) + debug_printf("Total endpoint clusters: %u\n", (uint32_t)m_endpoint_clusters.size()); + + if (early_out) + break; + } + + if (m_params.m_validate) + { + BASISU_FRONTEND_VERIFY(check_etc1s_constraints()); + } + + generate_block_endpoint_clusters(); + + create_initial_packed_texture(); + + // Now quantize the ETC1S selectors + + generate_selector_clusters(); + + if (m_use_hierarchical_selector_codebooks) + compute_selector_clusters_within_each_parent_cluster(); + + if (m_params.m_compression_level == 0) + { + create_optimized_selector_codebook(0); + + find_optimal_selector_clusters_for_each_block(); + + introduce_special_selector_clusters(); + } + else + { + const uint32_t num_refine_selector_steps = m_num_selector_codebook_iterations; + for (uint32_t refine_selector_steps = 0; refine_selector_steps < num_refine_selector_steps; refine_selector_steps++) + { + create_optimized_selector_codebook(refine_selector_steps); + + find_optimal_selector_clusters_for_each_block(); + + introduce_special_selector_clusters(); + + if ((m_params.m_compression_level >= 4) || (m_params.m_tex_type == basist::cBASISTexTypeVideoFrames)) + { + if (!refine_block_endpoints_given_selectors()) + break; + } + } + } + + optimize_selector_codebook(); + + if (m_params.m_debug_stats) + debug_printf("Total selector clusters: %u\n", (uint32_t)m_selector_cluster_block_indices.size()); + } + + finalize(); + + if (m_params.m_validate) + { + if (!validate_output()) + return false; + } + + debug_printf("basisu_frontend::compress: Done\n"); + + return true; + } + + bool basisu_frontend::init_global_codebooks() + { + const basist::basisu_lowlevel_etc1s_transcoder* pTranscoder = m_params.m_pGlobal_codebooks; + + const basist::basisu_lowlevel_etc1s_transcoder::endpoint_vec& endpoints = pTranscoder->get_endpoints(); + const basist::basisu_lowlevel_etc1s_transcoder::selector_vec& selectors = pTranscoder->get_selectors(); + + m_endpoint_cluster_etc_params.resize(endpoints.size()); + for (uint32_t i = 0; i < endpoints.size(); i++) + { + m_endpoint_cluster_etc_params[i].m_inten_table[0] = endpoints[i].m_inten5; + m_endpoint_cluster_etc_params[i].m_inten_table[1] = endpoints[i].m_inten5; + + m_endpoint_cluster_etc_params[i].m_color_unscaled[0].set(endpoints[i].m_color5.r, endpoints[i].m_color5.g, endpoints[i].m_color5.b, 255); + m_endpoint_cluster_etc_params[i].m_color_used[0] = true; + m_endpoint_cluster_etc_params[i].m_valid = true; + } + + m_optimized_cluster_selectors.resize(selectors.size()); + for (uint32_t i = 0; i < m_optimized_cluster_selectors.size(); i++) + { + for (uint32_t y = 0; y < 4; y++) + for (uint32_t x = 0; x < 4; x++) + m_optimized_cluster_selectors[i].set_selector(x, y, selectors[i].get_selector(x, y)); + } + + m_block_endpoint_clusters_indices.resize(m_total_blocks); + + m_orig_encoded_blocks.resize(m_total_blocks); + + m_block_selector_cluster_index.resize(m_total_blocks); + +#if 0 + for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N) + { + const uint32_t first_index = block_index_iter; + const uint32_t last_index = minimum(m_total_blocks, first_index + N); + + m_params.m_pJob_pool->add_job([this, first_index, last_index] { + + for (uint32_t block_index = first_index; block_index < last_index; block_index++) + { + const etc_block& blk = m_etc1_blocks_etc1s[block_index]; + + const uint32_t block_endpoint_index = m_block_endpoint_clusters_indices[block_index][0]; + + etc_block trial_blk; + trial_blk.set_block_color5_etc1s(blk.m_color_unscaled[0]); + trial_blk.set_flip_bit(true); + + uint64_t best_err = UINT64_MAX; + uint32_t best_index = 0; + + for (uint32_t i = 0; i < m_optimized_cluster_selectors.size(); i++) + { + trial_blk.set_raw_selector_bits(m_optimized_cluster_selectors[i].get_raw_selector_bits()); + + const uint64_t cur_err = trial_blk.evaluate_etc1_error(get_source_pixel_block(block_index).get_ptr(), m_params.m_perceptual); + if (cur_err < best_err) + { + best_err = cur_err; + best_index = i; + if (!cur_err) + break; + } + + } // block_index + + m_block_selector_cluster_index[block_index] = best_index; + } + + }); + + } + + m_params.m_pJob_pool->wait_for_all(); + + m_encoded_blocks.resize(m_total_blocks); + for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++) + { + const uint32_t endpoint_index = m_block_endpoint_clusters_indices[block_index][0]; + const uint32_t selector_index = m_block_selector_cluster_index[block_index]; + + etc_block& blk = m_encoded_blocks[block_index]; + + blk.set_block_color5_etc1s(m_endpoint_cluster_etc_params[endpoint_index].m_color_unscaled[0]); + blk.set_inten_tables_etc1s(m_endpoint_cluster_etc_params[endpoint_index].m_inten_table[0]); + blk.set_flip_bit(true); + blk.set_raw_selector_bits(m_optimized_cluster_selectors[selector_index].get_raw_selector_bits()); + } +#endif + + // HACK HACK + const uint32_t NUM_PASSES = 3; + for (uint32_t pass = 0; pass < NUM_PASSES; pass++) + { + debug_printf("init_global_codebooks: pass %u\n", pass); + + const uint32_t N = 128; + for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N) + { + const uint32_t first_index = block_index_iter; + const uint32_t last_index = minimum(m_total_blocks, first_index + N); + + m_params.m_pJob_pool->add_job([this, first_index, last_index, pass] { + + for (uint32_t block_index = first_index; block_index < last_index; block_index++) + { + const etc_block& blk = pass ? m_encoded_blocks[block_index] : m_etc1_blocks_etc1s[block_index]; + const uint32_t blk_raw_selector_bits = blk.get_raw_selector_bits(); + + etc_block trial_blk(blk); + trial_blk.set_raw_selector_bits(blk_raw_selector_bits); + trial_blk.set_flip_bit(true); + + uint64_t best_err = UINT64_MAX; + uint32_t best_index = 0; + etc_block best_block(trial_blk); + + for (uint32_t i = 0; i < m_endpoint_cluster_etc_params.size(); i++) + { + if (m_endpoint_cluster_etc_params[i].m_inten_table[0] > blk.get_inten_table(0)) + continue; + + trial_blk.set_block_color5_etc1s(m_endpoint_cluster_etc_params[i].m_color_unscaled[0]); + trial_blk.set_inten_tables_etc1s(m_endpoint_cluster_etc_params[i].m_inten_table[0]); + + const color_rgba* pSource_pixels = get_source_pixel_block(block_index).get_ptr(); + uint64_t cur_err; + if (!pass) + cur_err = trial_blk.determine_selectors(pSource_pixels, m_params.m_perceptual); + else + cur_err = trial_blk.evaluate_etc1_error(pSource_pixels, m_params.m_perceptual); + + if (cur_err < best_err) + { + best_err = cur_err; + best_index = i; + best_block = trial_blk; + + if (!cur_err) + break; + } + } + + m_block_endpoint_clusters_indices[block_index][0] = best_index; + m_block_endpoint_clusters_indices[block_index][1] = best_index; + + m_orig_encoded_blocks[block_index] = best_block; + + } // block_index + + }); + + } + + m_params.m_pJob_pool->wait_for_all(); + + m_endpoint_clusters.resize(0); + m_endpoint_clusters.resize(endpoints.size()); + for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++) + { + const uint32_t endpoint_cluster_index = m_block_endpoint_clusters_indices[block_index][0]; + m_endpoint_clusters[endpoint_cluster_index].push_back(block_index * 2); + m_endpoint_clusters[endpoint_cluster_index].push_back(block_index * 2 + 1); + } + + m_block_selector_cluster_index.resize(m_total_blocks); + + for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N) + { + const uint32_t first_index = block_index_iter; + const uint32_t last_index = minimum(m_total_blocks, first_index + N); + + m_params.m_pJob_pool->add_job([this, first_index, last_index] { + + for (uint32_t block_index = first_index; block_index < last_index; block_index++) + { + const uint32_t block_endpoint_index = m_block_endpoint_clusters_indices[block_index][0]; + + etc_block trial_blk; + trial_blk.set_block_color5_etc1s(m_endpoint_cluster_etc_params[block_endpoint_index].m_color_unscaled[0]); + trial_blk.set_inten_tables_etc1s(m_endpoint_cluster_etc_params[block_endpoint_index].m_inten_table[0]); + trial_blk.set_flip_bit(true); + + uint64_t best_err = UINT64_MAX; + uint32_t best_index = 0; + + for (uint32_t i = 0; i < m_optimized_cluster_selectors.size(); i++) + { + trial_blk.set_raw_selector_bits(m_optimized_cluster_selectors[i].get_raw_selector_bits()); + + const uint64_t cur_err = trial_blk.evaluate_etc1_error(get_source_pixel_block(block_index).get_ptr(), m_params.m_perceptual); + if (cur_err < best_err) + { + best_err = cur_err; + best_index = i; + if (!cur_err) + break; + } + + } // block_index + + m_block_selector_cluster_index[block_index] = best_index; + } + + }); + + } + + m_params.m_pJob_pool->wait_for_all(); + + m_encoded_blocks.resize(m_total_blocks); + for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++) + { + const uint32_t endpoint_index = m_block_endpoint_clusters_indices[block_index][0]; + const uint32_t selector_index = m_block_selector_cluster_index[block_index]; + + etc_block& blk = m_encoded_blocks[block_index]; + + blk.set_block_color5_etc1s(m_endpoint_cluster_etc_params[endpoint_index].m_color_unscaled[0]); + blk.set_inten_tables_etc1s(m_endpoint_cluster_etc_params[endpoint_index].m_inten_table[0]); + blk.set_flip_bit(true); + blk.set_raw_selector_bits(m_optimized_cluster_selectors[selector_index].get_raw_selector_bits()); + } + + } // pass + + m_selector_cluster_block_indices.resize(selectors.size()); + for (uint32_t block_index = 0; block_index < m_etc1_blocks_etc1s.size(); block_index++) + m_selector_cluster_block_indices[m_block_selector_cluster_index[block_index]].push_back(block_index); + + return true; + } + + void basisu_frontend::introduce_special_selector_clusters() + { + debug_printf("introduce_special_selector_clusters\n"); + + uint32_t total_blocks_relocated = 0; + const uint32_t initial_selector_clusters = m_selector_cluster_block_indices.size_u32(); + + bool_vec block_relocated_flags(m_total_blocks); + + // Make sure the selector codebook always has pure flat blocks for each possible selector, to avoid obvious artifacts. + // optimize_selector_codebook() will clean up any redundant clusters we create here. + for (uint32_t sel = 0; sel < 4; sel++) + { + etc_block blk; + clear_obj(blk); + for (uint32_t j = 0; j < 16; j++) + blk.set_selector(j & 3, j >> 2, sel); + + int k; + for (k = 0; k < (int)m_optimized_cluster_selectors.size(); k++) + if (m_optimized_cluster_selectors[k].get_raw_selector_bits() == blk.get_raw_selector_bits()) + break; + if (k < (int)m_optimized_cluster_selectors.size()) + continue; + + debug_printf("Introducing sel %u\n", sel); + + const uint32_t new_selector_cluster_index = m_optimized_cluster_selectors.size_u32(); + + m_optimized_cluster_selectors.push_back(blk); + + vector_ensure_element_is_valid(m_selector_cluster_block_indices, new_selector_cluster_index); + + for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++) + { + if (m_orig_encoded_blocks[block_index].get_raw_selector_bits() != blk.get_raw_selector_bits()) + continue; + + // See if using flat selectors actually decreases the block's error. + const uint32_t old_selector_cluster_index = m_block_selector_cluster_index[block_index]; + + etc_block cur_blk; + const uint32_t endpoint_cluster_index = get_subblock_endpoint_cluster_index(block_index, 0); + cur_blk.set_block_color5_etc1s(get_endpoint_cluster_unscaled_color(endpoint_cluster_index, false)); + cur_blk.set_inten_tables_etc1s(get_endpoint_cluster_inten_table(endpoint_cluster_index, false)); + cur_blk.set_raw_selector_bits(get_selector_cluster_selector_bits(old_selector_cluster_index).get_raw_selector_bits()); + cur_blk.set_flip_bit(true); + + const uint64_t cur_err = cur_blk.evaluate_etc1_error(get_source_pixel_block(block_index).get_ptr(), m_params.m_perceptual); + + cur_blk.set_raw_selector_bits(blk.get_raw_selector_bits()); + + const uint64_t new_err = cur_blk.evaluate_etc1_error(get_source_pixel_block(block_index).get_ptr(), m_params.m_perceptual); + + if (new_err >= cur_err) + continue; + + // Change the block to use the new cluster + m_block_selector_cluster_index[block_index] = new_selector_cluster_index; + + m_selector_cluster_block_indices[new_selector_cluster_index].push_back(block_index); + + block_relocated_flags[block_index] = true; + +#if 0 + int j = vector_find(m_selector_cluster_block_indices[old_selector_cluster_index], block_index); + if (j >= 0) + m_selector_cluster_block_indices[old_selector_cluster_index].erase(m_selector_cluster_block_indices[old_selector_cluster_index].begin() + j); +#endif + + total_blocks_relocated++; + + m_encoded_blocks[block_index].set_raw_selector_bits(blk.get_raw_selector_bits()); + + } // block_index + + } // sel + + if (total_blocks_relocated) + { + debug_printf("Fixing selector codebook\n"); + + for (int selector_cluster_index = 0; selector_cluster_index < (int)initial_selector_clusters; selector_cluster_index++) + { + uint_vec& block_indices = m_selector_cluster_block_indices[selector_cluster_index]; + + uint32_t dst_ofs = 0; + + for (uint32_t i = 0; i < block_indices.size(); i++) + { + const uint32_t block_index = block_indices[i]; + if (!block_relocated_flags[block_index]) + block_indices[dst_ofs++] = block_index; + } + + block_indices.resize(dst_ofs); + } + } + + debug_printf("Total blocks relocated to new flat selector clusters: %u\n", total_blocks_relocated); + } + + // This method will change the number and ordering of the selector codebook clusters. + void basisu_frontend::optimize_selector_codebook() + { + debug_printf("optimize_selector_codebook\n"); + + const uint32_t orig_total_selector_clusters = m_optimized_cluster_selectors.size_u32(); + + bool_vec selector_cluster_was_used(m_optimized_cluster_selectors.size()); + for (uint32_t i = 0; i < m_total_blocks; i++) + selector_cluster_was_used[m_block_selector_cluster_index[i]] = true; + + int_vec old_to_new(m_optimized_cluster_selectors.size()); + int_vec new_to_old; + uint32_t total_new_entries = 0; + + std::unordered_map selector_hashmap; + + for (int i = 0; i < static_cast(m_optimized_cluster_selectors.size()); i++) + { + if (!selector_cluster_was_used[i]) + { + old_to_new[i] = -1; + continue; + } + + const uint32_t raw_selector_bits = m_optimized_cluster_selectors[i].get_raw_selector_bits(); + + auto find_res = selector_hashmap.insert(std::make_pair(raw_selector_bits, total_new_entries)); + if (!find_res.second) + { + old_to_new[i] = (find_res.first)->second; + continue; + } + + old_to_new[i] = total_new_entries++; + new_to_old.push_back(i); + } + + debug_printf("Original selector clusters: %u, new cluster selectors: %u\n", orig_total_selector_clusters, total_new_entries); + + for (uint32_t i = 0; i < m_block_selector_cluster_index.size(); i++) + { + BASISU_FRONTEND_VERIFY((old_to_new[m_block_selector_cluster_index[i]] >= 0) && (old_to_new[m_block_selector_cluster_index[i]] < (int)total_new_entries)); + m_block_selector_cluster_index[i] = old_to_new[m_block_selector_cluster_index[i]]; + } + + basisu::vector new_optimized_cluster_selectors(m_optimized_cluster_selectors.size() ? total_new_entries : 0); + basisu::vector new_selector_cluster_indices(m_selector_cluster_block_indices.size() ? total_new_entries : 0); + + for (uint32_t i = 0; i < total_new_entries; i++) + { + if (m_optimized_cluster_selectors.size()) + new_optimized_cluster_selectors[i] = m_optimized_cluster_selectors[new_to_old[i]]; + + //if (m_selector_cluster_block_indices.size()) + // new_selector_cluster_indices[i] = m_selector_cluster_block_indices[new_to_old[i]]; + } + + for (uint32_t i = 0; i < m_block_selector_cluster_index.size(); i++) + { + new_selector_cluster_indices[m_block_selector_cluster_index[i]].push_back(i); + } + + m_optimized_cluster_selectors.swap(new_optimized_cluster_selectors); + m_selector_cluster_block_indices.swap(new_selector_cluster_indices); + + // This isn't strictly necessary - doing it for completeness/future sanity. + if (m_selector_clusters_within_each_parent_cluster.size()) + { + for (uint32_t i = 0; i < m_selector_clusters_within_each_parent_cluster.size(); i++) + for (uint32_t j = 0; j < m_selector_clusters_within_each_parent_cluster[i].size(); j++) + m_selector_clusters_within_each_parent_cluster[i][j] = old_to_new[m_selector_clusters_within_each_parent_cluster[i][j]]; + } + + debug_printf("optimize_selector_codebook: Before: %u After: %u\n", orig_total_selector_clusters, total_new_entries); + } + + void basisu_frontend::init_etc1_images() + { + debug_printf("basisu_frontend::init_etc1_images\n"); + + interval_timer tm; + tm.start(); + + m_etc1_blocks_etc1s.resize(m_total_blocks); + + bool use_cpu = true; + + if (m_params.m_pOpenCL_context) + { + uint32_t total_perms = 64; + if (m_params.m_compression_level == 0) + total_perms = 4; + else if (m_params.m_compression_level == 1) + total_perms = 16; + else if (m_params.m_compression_level == BASISU_MAX_COMPRESSION_LEVEL) + total_perms = OPENCL_ENCODE_ETC1S_MAX_PERMS; + + bool status = opencl_encode_etc1s_blocks(m_params.m_pOpenCL_context, m_etc1_blocks_etc1s.data(), m_params.m_perceptual, total_perms); + if (status) + use_cpu = false; + else + { + error_printf("basisu_frontend::init_etc1_images: opencl_encode_etc1s_blocks() failed! Using CPU.\n"); + m_params.m_pOpenCL_context = nullptr; + m_opencl_failed = true; + } + } + + if (use_cpu) + { + const uint32_t N = 4096; + for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N) + { + const uint32_t first_index = block_index_iter; + const uint32_t last_index = minimum(m_total_blocks, first_index + N); + + m_params.m_pJob_pool->add_job([this, first_index, last_index] { + + for (uint32_t block_index = first_index; block_index < last_index; block_index++) + { + const pixel_block& source_blk = get_source_pixel_block(block_index); + + etc1_optimizer optimizer; + etc1_optimizer::params optimizer_params; + etc1_optimizer::results optimizer_results; + + if (m_params.m_compression_level == 0) + optimizer_params.m_quality = cETCQualityFast; + else if (m_params.m_compression_level == 1) + optimizer_params.m_quality = cETCQualityMedium; + else if (m_params.m_compression_level == BASISU_MAX_COMPRESSION_LEVEL) + optimizer_params.m_quality = cETCQualityUber; + + optimizer_params.m_num_src_pixels = 16; + optimizer_params.m_pSrc_pixels = source_blk.get_ptr(); + optimizer_params.m_perceptual = m_params.m_perceptual; + + uint8_t selectors[16]; + optimizer_results.m_pSelectors = selectors; + optimizer_results.m_n = 16; + + optimizer.init(optimizer_params, optimizer_results); + if (!optimizer.compute()) + BASISU_FRONTEND_VERIFY(false); + + etc_block& blk = m_etc1_blocks_etc1s[block_index]; + + memset(&blk, 0, sizeof(blk)); + blk.set_block_color5_etc1s(optimizer_results.m_block_color_unscaled); + blk.set_inten_tables_etc1s(optimizer_results.m_block_inten_table); + blk.set_flip_bit(true); + + for (uint32_t y = 0; y < 4; y++) + for (uint32_t x = 0; x < 4; x++) + blk.set_selector(x, y, selectors[x + y * 4]); + } + + }); + + } + + m_params.m_pJob_pool->wait_for_all(); + + } // use_cpu + + debug_printf("init_etc1_images: Elapsed time: %3.3f secs\n", tm.get_elapsed_secs()); + } + + void basisu_frontend::init_endpoint_training_vectors() + { + debug_printf("init_endpoint_training_vectors\n"); + + vec6F_quantizer::array_of_weighted_training_vecs &training_vecs = m_endpoint_clusterizer.get_training_vecs(); + + training_vecs.resize(m_total_blocks * 2); + + const uint32_t N = 16384; + for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N) + { + const uint32_t first_index = block_index_iter; + const uint32_t last_index = minimum(m_total_blocks, first_index + N); + + m_params.m_pJob_pool->add_job( [this, first_index, last_index, &training_vecs] { + + for (uint32_t block_index = first_index; block_index < last_index; block_index++) + { + const etc_block &blk = m_etc1_blocks_etc1s[block_index]; + + color_rgba block_colors[2]; + blk.get_block_low_high_colors(block_colors, 0); + + vec6F v; + v[0] = block_colors[0].r * (1.0f / 255.0f); + v[1] = block_colors[0].g * (1.0f / 255.0f); + v[2] = block_colors[0].b * (1.0f / 255.0f); + v[3] = block_colors[1].r * (1.0f / 255.0f); + v[4] = block_colors[1].g * (1.0f / 255.0f); + v[5] = block_colors[1].b * (1.0f / 255.0f); + + training_vecs[block_index * 2 + 0] = std::make_pair(v, 1); + training_vecs[block_index * 2 + 1] = std::make_pair(v, 1); + + } // block_index; + + } ); + + } // block_index_iter + + m_params.m_pJob_pool->wait_for_all(); + } + + void basisu_frontend::generate_endpoint_clusters() + { + debug_printf("Begin endpoint quantization\n"); + + const uint32_t parent_codebook_size = (m_params.m_max_endpoint_clusters >= 256) ? BASISU_ENDPOINT_PARENT_CODEBOOK_SIZE : 0; + uint32_t max_threads = 0; + max_threads = m_params.m_multithreaded ? minimum(std::thread::hardware_concurrency(), cMaxCodebookCreationThreads) : 0; + if (m_params.m_pJob_pool) + max_threads = minimum((int)m_params.m_pJob_pool->get_total_threads(), max_threads); + + debug_printf("max_threads: %u\n", max_threads); + bool status = generate_hierarchical_codebook_threaded(m_endpoint_clusterizer, + m_params.m_max_endpoint_clusters, m_use_hierarchical_endpoint_codebooks ? parent_codebook_size : 0, + m_endpoint_clusters, + m_endpoint_parent_clusters, + max_threads, m_params.m_pJob_pool, true); + BASISU_FRONTEND_VERIFY(status); + + if (m_use_hierarchical_endpoint_codebooks) + { + if (!m_endpoint_parent_clusters.size()) + { + m_endpoint_parent_clusters.resize(0); + m_endpoint_parent_clusters.resize(1); + for (uint32_t i = 0; i < m_total_blocks; i++) + { + m_endpoint_parent_clusters[0].push_back(i*2); + m_endpoint_parent_clusters[0].push_back(i*2+1); + } + } + + BASISU_ASSUME(BASISU_ENDPOINT_PARENT_CODEBOOK_SIZE <= UINT8_MAX); + + m_block_parent_endpoint_cluster.resize(0); + m_block_parent_endpoint_cluster.resize(m_total_blocks); + vector_set_all(m_block_parent_endpoint_cluster, 0xFF); + for (uint32_t parent_cluster_index = 0; parent_cluster_index < m_endpoint_parent_clusters.size(); parent_cluster_index++) + { + const uint_vec &cluster = m_endpoint_parent_clusters[parent_cluster_index]; + for (uint32_t j = 0; j < cluster.size(); j++) + { + const uint32_t block_index = cluster[j] >> 1; + m_block_parent_endpoint_cluster[block_index] = static_cast(parent_cluster_index); + } + } + + for (uint32_t i = 0; i < m_total_blocks; i++) + { + BASISU_FRONTEND_VERIFY(m_block_parent_endpoint_cluster[i] != 0xFF); + } + + // Ensure that all the blocks within each cluster are all in the same parent cluster, or something is very wrong. + for (uint32_t cluster_index = 0; cluster_index < m_endpoint_clusters.size(); cluster_index++) + { + const uint_vec &cluster = m_endpoint_clusters[cluster_index]; + + uint32_t parent_cluster_index = 0; + for (uint32_t j = 0; j < cluster.size(); j++) + { + const uint32_t block_index = cluster[j] >> 1; + + BASISU_FRONTEND_VERIFY(block_index < m_block_parent_endpoint_cluster.size()); + + if (!j) + { + parent_cluster_index = m_block_parent_endpoint_cluster[block_index]; + } + else + { + BASISU_FRONTEND_VERIFY(m_block_parent_endpoint_cluster[block_index] == parent_cluster_index); + } + } + } + } + + if (m_params.m_debug_stats) + debug_printf("Total endpoint clusters: %u, parent clusters: %u\n", m_endpoint_clusters.size_u32(), m_endpoint_parent_clusters.size_u32()); + } + + // Iterate through each array of endpoint cluster block indices and set the m_block_endpoint_clusters_indices[][] array to indicaste which cluster index each block uses. + void basisu_frontend::generate_block_endpoint_clusters() + { + m_block_endpoint_clusters_indices.resize(m_total_blocks); + + for (int cluster_index = 0; cluster_index < static_cast(m_endpoint_clusters.size()); cluster_index++) + { + const basisu::vector& cluster_indices = m_endpoint_clusters[cluster_index]; + + for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++) + { + const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1; + const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1; + + m_block_endpoint_clusters_indices[block_index][subblock_index] = cluster_index; + + } // cluster_indices_iter + } + + if (m_params.m_validate) + { + for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++) + { + uint32_t cluster_0 = m_block_endpoint_clusters_indices[block_index][0]; + uint32_t cluster_1 = m_block_endpoint_clusters_indices[block_index][1]; + BASISU_FRONTEND_VERIFY(cluster_0 == cluster_1); + } + } + } + + void basisu_frontend::compute_endpoint_clusters_within_each_parent_cluster() + { + generate_block_endpoint_clusters(); + + m_endpoint_clusters_within_each_parent_cluster.resize(0); + m_endpoint_clusters_within_each_parent_cluster.resize(m_endpoint_parent_clusters.size()); + + // Note: It's possible that some blocks got moved into the same cluster, but live in different parent clusters. + for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++) + { + const uint32_t cluster_index = m_block_endpoint_clusters_indices[block_index][0]; + const uint32_t parent_cluster_index = m_block_parent_endpoint_cluster[block_index]; + + m_endpoint_clusters_within_each_parent_cluster[parent_cluster_index].push_back(cluster_index); + } + + for (uint32_t i = 0; i < m_endpoint_clusters_within_each_parent_cluster.size(); i++) + { + uint_vec &cluster_indices = m_endpoint_clusters_within_each_parent_cluster[i]; + + BASISU_FRONTEND_VERIFY(cluster_indices.size()); + + vector_sort(cluster_indices); + + auto last = std::unique(cluster_indices.begin(), cluster_indices.end()); + cluster_indices.erase(last, cluster_indices.end()); + } + } + + void basisu_frontend::compute_endpoint_subblock_error_vec() + { + m_subblock_endpoint_quant_err_vec.resize(0); + + const uint32_t N = 512; + for (uint32_t cluster_index_iter = 0; cluster_index_iter < m_endpoint_clusters.size(); cluster_index_iter += N) + { + const uint32_t first_index = cluster_index_iter; + const uint32_t last_index = minimum(m_endpoint_clusters.size_u32(), cluster_index_iter + N); + + m_params.m_pJob_pool->add_job( [this, first_index, last_index] { + + for (uint32_t cluster_index = first_index; cluster_index < last_index; cluster_index++) + { + const basisu::vector& cluster_indices = m_endpoint_clusters[cluster_index]; + + assert(cluster_indices.size()); + + for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++) + { + basisu::vector cluster_pixels(8); + + const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1; + const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1; + + const bool flipped = true; + + const color_rgba *pSource_block_pixels = get_source_pixel_block(block_index).get_ptr(); + + for (uint32_t pixel_index = 0; pixel_index < 8; pixel_index++) + { + cluster_pixels[pixel_index] = pSource_block_pixels[g_etc1_pixel_indices[flipped][subblock_index][pixel_index]]; + } + + const endpoint_cluster_etc_params &etc_params = m_endpoint_cluster_etc_params[cluster_index]; + + assert(etc_params.m_valid); + + color_rgba block_colors[4]; + etc_block::get_block_colors5(block_colors, etc_params.m_color_unscaled[0], etc_params.m_inten_table[0], true); + + uint64_t total_err = 0; + + for (uint32_t i = 0; i < 8; i++) + { + const color_rgba &c = cluster_pixels[i]; + + uint64_t best_err = UINT64_MAX; + //uint32_t best_index = 0; + + for (uint32_t s = 0; s < 4; s++) + { + uint64_t err = color_distance(m_params.m_perceptual, c, block_colors[s], false); + if (err < best_err) + { + best_err = err; + //best_index = s; + } + } + + total_err += best_err; + } + + subblock_endpoint_quant_err quant_err; + quant_err.m_total_err = total_err; + quant_err.m_cluster_index = cluster_index; + quant_err.m_cluster_subblock_index = cluster_indices_iter; + quant_err.m_block_index = block_index; + quant_err.m_subblock_index = subblock_index; + + { + std::lock_guard lock(m_lock); + + m_subblock_endpoint_quant_err_vec.push_back(quant_err); + } + } + } // cluster_index + + } ); + + } // cluster_index_iter + + m_params.m_pJob_pool->wait_for_all(); + + vector_sort(m_subblock_endpoint_quant_err_vec); + } + + void basisu_frontend::introduce_new_endpoint_clusters() + { + debug_printf("introduce_new_endpoint_clusters\n"); + + generate_block_endpoint_clusters(); + + int num_new_endpoint_clusters = m_params.m_max_endpoint_clusters - m_endpoint_clusters.size_u32(); + if (num_new_endpoint_clusters <= 0) + return; + + compute_endpoint_subblock_error_vec(); + + const uint32_t num_orig_endpoint_clusters = m_endpoint_clusters.size_u32(); + + std::unordered_set training_vector_was_relocated; + + uint_vec cluster_sizes(num_orig_endpoint_clusters); + for (uint32_t i = 0; i < num_orig_endpoint_clusters; i++) + cluster_sizes[i] = m_endpoint_clusters[i].size_u32(); + + std::unordered_set ignore_cluster; + + uint32_t total_new_clusters = 0; + + while (num_new_endpoint_clusters) + { + if (m_subblock_endpoint_quant_err_vec.size() == 0) + break; + + subblock_endpoint_quant_err subblock_to_move(m_subblock_endpoint_quant_err_vec.back()); + + m_subblock_endpoint_quant_err_vec.pop_back(); + + if (unordered_set_contains(ignore_cluster, subblock_to_move.m_cluster_index)) + continue; + + uint32_t training_vector_index = subblock_to_move.m_block_index * 2 + subblock_to_move.m_subblock_index; + + if (cluster_sizes[subblock_to_move.m_cluster_index] <= 2) + continue; + + if (unordered_set_contains(training_vector_was_relocated, training_vector_index)) + continue; + + if (unordered_set_contains(training_vector_was_relocated, training_vector_index ^ 1)) + continue; + +#if 0 + const uint32_t block_index = subblock_to_move.m_block_index; + const etc_block& blk = m_etc1_blocks_etc1s[block_index]; + uint32_t ls, hs; + blk.get_selector_range(ls, hs); + if (ls != hs) + continue; +#endif + + //const uint32_t new_endpoint_cluster_index = (uint32_t)m_endpoint_clusters.size(); + + enlarge_vector(m_endpoint_clusters, 1)->push_back(training_vector_index); + enlarge_vector(m_endpoint_cluster_etc_params, 1); + + assert(m_endpoint_clusters.size() == m_endpoint_cluster_etc_params.size()); + + training_vector_was_relocated.insert(training_vector_index); + + m_endpoint_clusters.back().push_back(training_vector_index ^ 1); + training_vector_was_relocated.insert(training_vector_index ^ 1); + + BASISU_FRONTEND_VERIFY(cluster_sizes[subblock_to_move.m_cluster_index] >= 2); + cluster_sizes[subblock_to_move.m_cluster_index] -= 2; + + ignore_cluster.insert(subblock_to_move.m_cluster_index); + + total_new_clusters++; + + num_new_endpoint_clusters--; + } + + debug_printf("Introduced %i new endpoint clusters\n", total_new_clusters); + + for (uint32_t i = 0; i < num_orig_endpoint_clusters; i++) + { + uint_vec &cluster_indices = m_endpoint_clusters[i]; + + uint_vec new_cluster_indices; + for (uint32_t j = 0; j < cluster_indices.size(); j++) + { + uint32_t training_vector_index = cluster_indices[j]; + + if (!unordered_set_contains(training_vector_was_relocated, training_vector_index)) + new_cluster_indices.push_back(training_vector_index); + } + + if (cluster_indices.size() != new_cluster_indices.size()) + { + BASISU_FRONTEND_VERIFY(new_cluster_indices.size() > 0); + cluster_indices.swap(new_cluster_indices); + } + } + + generate_block_endpoint_clusters(); + } + + struct color_rgba_hasher + { + inline std::size_t operator()(const color_rgba& k) const + { + uint32_t v = *(const uint32_t*)&k; + + //return bitmix32(v); + + //v ^= (v << 10); + //v ^= (v >> 12); + + return v; + } + }; + + // Given each endpoint cluster, gather all the block pixels which are in that cluster and compute optimized ETC1S endpoints for them. + // TODO: Don't optimize endpoint clusters which haven't changed. + // If step>=1, we check to ensure the new endpoint values actually decrease quantization error. + void basisu_frontend::generate_endpoint_codebook(uint32_t step) + { + debug_printf("generate_endpoint_codebook\n"); + + interval_timer tm; + tm.start(); + + m_endpoint_cluster_etc_params.resize(m_endpoint_clusters.size()); + + bool use_cpu = true; + // TODO: Get this working when step>0 + if (m_params.m_pOpenCL_context && !step) + { + const uint32_t total_clusters = (uint32_t)m_endpoint_clusters.size(); + + basisu::vector pixel_clusters(total_clusters); + + std::vector input_pixels; + input_pixels.reserve(m_total_blocks * 16); + + std::vector pixel_weights; + pixel_weights.reserve(m_total_blocks * 16); + + uint_vec cluster_sizes(total_clusters); + + //typedef basisu::hash_map color_hasher_type; + //color_hasher_type color_hasher; + //color_hasher.reserve(2048); + + interval_timer hash_tm; + hash_tm.start(); + + basisu::vector colors, colors2; + colors.reserve(65536); + colors2.reserve(65536); + + for (uint32_t cluster_index = 0; cluster_index < m_endpoint_clusters.size(); cluster_index++) + { + const basisu::vector& cluster_indices = m_endpoint_clusters[cluster_index]; + assert((cluster_indices.size() & 1) == 0); + +#if 0 + uint64_t first_pixel_index = input_pixels.size(); + const uint32_t total_pixels = 16 * (cluster_indices.size() / 2); + + input_pixels.resize(input_pixels.size() + total_pixels); + pixel_weights.resize(pixel_weights.size() + total_pixels); + + uint64_t dst_ofs = first_pixel_index; + + uint64_t total_r = 0, total_g = 0, total_b = 0; + for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++) + { + const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1; + if (subblock_index) + continue; + + const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1; + const color_rgba* pBlock_pixels = get_source_pixel_block(block_index).get_ptr(); + + for (uint32_t i = 0; i < 16; i++) + { + input_pixels[dst_ofs] = pBlock_pixels[i]; + pixel_weights[dst_ofs] = 1; + dst_ofs++; + + total_r += pBlock_pixels[i].r; + total_g += pBlock_pixels[i].g; + total_b += pBlock_pixels[i].b; + } + } + + //printf("%i %f %f %f\n", cluster_index, total_r / (float)total_pixels, total_g / (float)total_pixels, total_b / (float)total_pixels); + + pixel_clusters[cluster_index].m_first_pixel_index = first_pixel_index; + pixel_clusters[cluster_index].m_total_pixels = total_pixels; + cluster_sizes[cluster_index] = total_pixels; +#elif 1 + colors.resize(cluster_indices.size() * 8); + colors2.resize(cluster_indices.size() * 8); + uint32_t dst_ofs = 0; + + for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++) + { + const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1; + if (subblock_index) + continue; + + const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1; + const color_rgba* pBlock_pixels = get_source_pixel_block(block_index).get_ptr(); + + memcpy(colors.data() + dst_ofs, pBlock_pixels, sizeof(color_rgba) * 16); + dst_ofs += 16; + + } // cluster_indices_iter + + uint32_t* pSorted = radix_sort((uint32_t)colors.size(), colors.data(), colors2.data(), 0, 3); + + const uint64_t first_pixel_index = input_pixels.size(); + + uint32_t prev_color = 0, cur_weight = 0; + + for (uint32_t i = 0; i < colors.size(); i++) + { + uint32_t cur_color = pSorted[i]; + if (cur_color == prev_color) + { + if (++cur_weight == 0) + cur_weight--; + } + else + { + if (cur_weight) + { + input_pixels.push_back(*(const color_rgba*)&prev_color); + pixel_weights.push_back(cur_weight); + } + + prev_color = cur_color; + cur_weight = 1; + } + } + + if (cur_weight) + { + input_pixels.push_back(*(const color_rgba*)&prev_color); + pixel_weights.push_back(cur_weight); + } + + uint32_t total_unique_pixels = (uint32_t)(input_pixels.size() - first_pixel_index); + + pixel_clusters[cluster_index].m_first_pixel_index = first_pixel_index; + pixel_clusters[cluster_index].m_total_pixels = total_unique_pixels; + + cluster_sizes[cluster_index] = total_unique_pixels; +#else + color_hasher.reset(); + + for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++) + { + const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1; + if (subblock_index) + continue; + + const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1; + const color_rgba* pBlock_pixels = get_source_pixel_block(block_index).get_ptr(); + + uint32_t *pPrev_weight = nullptr; + color_rgba prev_color; + + { + color_rgba cur_color = pBlock_pixels[0]; + auto res = color_hasher.insert(cur_color, 0); + + uint32_t& weight = (res.first)->second; + if (weight != UINT32_MAX) + weight++; + + prev_color = cur_color; + pPrev_weight = &(res.first)->second; + } + + for (uint32_t i = 1; i < 16; i++) + { + color_rgba cur_color = pBlock_pixels[i]; + + if (cur_color == prev_color) + { + if (*pPrev_weight != UINT32_MAX) + *pPrev_weight = *pPrev_weight + 1; + } + else + { + auto res = color_hasher.insert(cur_color, 0); + + uint32_t& weight = (res.first)->second; + if (weight != UINT32_MAX) + weight++; + + prev_color = cur_color; + pPrev_weight = &(res.first)->second; + } + } + + } // cluster_indices_iter + + const uint64_t first_pixel_index = input_pixels.size(); + uint32_t total_unique_pixels = color_hasher.size(); + + pixel_clusters[cluster_index].m_first_pixel_index = first_pixel_index; + pixel_clusters[cluster_index].m_total_pixels = total_unique_pixels; + + input_pixels.resize(first_pixel_index + total_unique_pixels); + pixel_weights.resize(first_pixel_index + total_unique_pixels); + + uint32_t j = 0; + + for (auto it = color_hasher.begin(); it != color_hasher.end(); ++it, ++j) + { + input_pixels[first_pixel_index + j] = it->first; + pixel_weights[first_pixel_index + j] = it->second; + } + + cluster_sizes[cluster_index] = total_unique_pixels; +#endif + + } // cluster_index + + debug_printf("Total hash time: %3.3f secs\n", hash_tm.get_elapsed_secs()); + + debug_printf("Total unique colors: %llu\n", input_pixels.size()); + + uint_vec sorted_cluster_indices_new_to_old(total_clusters); + indirect_sort(total_clusters, sorted_cluster_indices_new_to_old.data(), cluster_sizes.data()); + //for (uint32_t i = 0; i < total_clusters; i++) + // sorted_cluster_indices_new_to_old[i] = i; + + uint_vec sorted_cluster_indices_old_to_new(total_clusters); + for (uint32_t i = 0; i < total_clusters; i++) + sorted_cluster_indices_old_to_new[sorted_cluster_indices_new_to_old[i]] = i; + + basisu::vector sorted_pixel_clusters(total_clusters); + for (uint32_t i = 0; i < total_clusters; i++) + sorted_pixel_clusters[i] = pixel_clusters[sorted_cluster_indices_new_to_old[i]]; + + uint32_t total_perms = 64; + if (m_params.m_compression_level <= 1) + total_perms = 16; + else if (m_params.m_compression_level == BASISU_MAX_COMPRESSION_LEVEL) + total_perms = OPENCL_ENCODE_ETC1S_MAX_PERMS; + + basisu::vector output_blocks(total_clusters); + + if (opencl_encode_etc1s_pixel_clusters( + m_params.m_pOpenCL_context, + output_blocks.data(), + total_clusters, + sorted_pixel_clusters.data(), + input_pixels.size(), + input_pixels.data(), + pixel_weights.data(), + m_params.m_perceptual, total_perms)) + { + for (uint32_t old_cluster_index = 0; old_cluster_index < m_endpoint_clusters.size(); old_cluster_index++) + { + const uint32_t new_cluster_index = sorted_cluster_indices_old_to_new[old_cluster_index]; + + const etc_block& blk = output_blocks[new_cluster_index]; + + endpoint_cluster_etc_params& prev_etc_params = m_endpoint_cluster_etc_params[old_cluster_index]; + + prev_etc_params.m_valid = true; + etc_block::unpack_color5(prev_etc_params.m_color_unscaled[0], blk.get_base5_color(), false); + prev_etc_params.m_inten_table[0] = blk.get_inten_table(0); + prev_etc_params.m_color_error[0] = 0; // dummy value - we don't actually use this + } + + use_cpu = false; + } + else + { + error_printf("basisu_frontend::generate_endpoint_codebook: opencl_encode_etc1s_pixel_clusters() failed! Using CPU.\n"); + m_params.m_pOpenCL_context = nullptr; + m_opencl_failed = true; + } + + } // if (opencl_is_available() && m_params.m_use_opencl) + + if (use_cpu) + { + const uint32_t N = 128; + for (uint32_t cluster_index_iter = 0; cluster_index_iter < m_endpoint_clusters.size(); cluster_index_iter += N) + { + const uint32_t first_index = cluster_index_iter; + const uint32_t last_index = minimum((uint32_t)m_endpoint_clusters.size(), cluster_index_iter + N); + + m_params.m_pJob_pool->add_job([this, first_index, last_index, step] { + + for (uint32_t cluster_index = first_index; cluster_index < last_index; cluster_index++) + { + const basisu::vector& cluster_indices = m_endpoint_clusters[cluster_index]; + + BASISU_FRONTEND_VERIFY(cluster_indices.size()); + + const uint32_t total_pixels = (uint32_t)cluster_indices.size() * 8; + + basisu::vector cluster_pixels(total_pixels); + + for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++) + { + const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1; + const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1; + + const bool flipped = true; + + const color_rgba* pBlock_pixels = get_source_pixel_block(block_index).get_ptr(); + + for (uint32_t pixel_index = 0; pixel_index < 8; pixel_index++) + { + const color_rgba& c = pBlock_pixels[g_etc1_pixel_indices[flipped][subblock_index][pixel_index]]; + cluster_pixels[cluster_indices_iter * 8 + pixel_index] = c; + } + } + + endpoint_cluster_etc_params new_subblock_params; + + { + etc1_optimizer optimizer; + etc1_solution_coordinates solutions[2]; + + etc1_optimizer::params cluster_optimizer_params; + cluster_optimizer_params.m_num_src_pixels = total_pixels; + cluster_optimizer_params.m_pSrc_pixels = &cluster_pixels[0]; + + cluster_optimizer_params.m_use_color4 = false; + cluster_optimizer_params.m_perceptual = m_params.m_perceptual; + + if (m_params.m_compression_level <= 1) + cluster_optimizer_params.m_quality = cETCQualityMedium; + else if (m_params.m_compression_level == BASISU_MAX_COMPRESSION_LEVEL) + cluster_optimizer_params.m_quality = cETCQualityUber; + + etc1_optimizer::results cluster_optimizer_results; + + basisu::vector cluster_selectors(total_pixels); + cluster_optimizer_results.m_n = total_pixels; + cluster_optimizer_results.m_pSelectors = &cluster_selectors[0]; + + optimizer.init(cluster_optimizer_params, cluster_optimizer_results); + + if (!optimizer.compute()) + BASISU_FRONTEND_VERIFY(false); + + new_subblock_params.m_color_unscaled[0] = cluster_optimizer_results.m_block_color_unscaled; + new_subblock_params.m_inten_table[0] = cluster_optimizer_results.m_block_inten_table; + new_subblock_params.m_color_error[0] = cluster_optimizer_results.m_error; + } + + endpoint_cluster_etc_params& prev_etc_params = m_endpoint_cluster_etc_params[cluster_index]; + + bool use_new_subblock_params = false; + if ((!step) || (!prev_etc_params.m_valid)) + use_new_subblock_params = true; + else + { + assert(prev_etc_params.m_valid); + + uint64_t total_prev_err = 0; + + { + color_rgba block_colors[4]; + + etc_block::get_block_colors5(block_colors, prev_etc_params.m_color_unscaled[0], prev_etc_params.m_inten_table[0], false); + + uint64_t total_err = 0; + + for (uint32_t i = 0; i < total_pixels; i++) + { + const color_rgba& c = cluster_pixels[i]; + + uint64_t best_err = UINT64_MAX; + //uint32_t best_index = 0; + + for (uint32_t s = 0; s < 4; s++) + { + uint64_t err = color_distance(m_params.m_perceptual, c, block_colors[s], false); + if (err < best_err) + { + best_err = err; + //best_index = s; + } + } + + total_err += best_err; + } + + total_prev_err += total_err; + } + + // See if we should update this cluster's endpoints (if the error has actually fallen) + if (total_prev_err > new_subblock_params.m_color_error[0]) + { + use_new_subblock_params = true; + } + } + + if (use_new_subblock_params) + { + new_subblock_params.m_valid = true; + + prev_etc_params = new_subblock_params; + } + + } // cluster_index + + }); + + } // cluster_index_iter + + m_params.m_pJob_pool->wait_for_all(); + } + + debug_printf("Elapsed time: %3.3f secs\n", tm.get_elapsed_secs()); + } + + bool basisu_frontend::check_etc1s_constraints() const + { + basisu::vector block_clusters(m_total_blocks); + + for (int cluster_index = 0; cluster_index < static_cast(m_endpoint_clusters.size()); cluster_index++) + { + const basisu::vector& cluster_indices = m_endpoint_clusters[cluster_index]; + + for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++) + { + const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1; + const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1; + + block_clusters[block_index][subblock_index] = cluster_index; + + } // cluster_indices_iter + } + + for (uint32_t i = 0; i < m_total_blocks; i++) + { + if (block_clusters[i][0] != block_clusters[i][1]) + return false; + } + + return true; + } + + // For each block, determine which ETC1S endpoint cluster can encode that block with lowest error. + // This reassigns blocks to different endpoint clusters. + uint32_t basisu_frontend::refine_endpoint_clusterization() + { + debug_printf("refine_endpoint_clusterization\n"); + + if (m_use_hierarchical_endpoint_codebooks) + compute_endpoint_clusters_within_each_parent_cluster(); + + // Note: It's possible that an endpoint cluster may live in more than one parent cluster after the first refinement step. + + basisu::vector block_clusters(m_total_blocks); + + for (int cluster_index = 0; cluster_index < static_cast(m_endpoint_clusters.size()); cluster_index++) + { + const basisu::vector& cluster_indices = m_endpoint_clusters[cluster_index]; + + for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++) + { + const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1; + const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1; + + block_clusters[block_index][subblock_index] = cluster_index; + + } // cluster_indices_iter + } + + //---------------------------------------------------------- + + // Create a new endpoint clusterization + + interval_timer tm; + tm.start(); + + uint_vec best_cluster_indices(m_total_blocks); + + bool use_cpu = true; + // TODO: Support non-hierarchical endpoint codebooks here + if (m_params.m_pOpenCL_context && m_use_hierarchical_endpoint_codebooks) + { + // For the OpenCL kernel, we order the parent endpoint clusters by smallest to largest for efficiency. + // We also prepare an array of block info structs that point into this new parent endpoint cluster array. + const uint32_t total_parent_clusters = (uint32_t)m_endpoint_clusters_within_each_parent_cluster.size(); + + basisu::vector cl_block_info_structs(m_total_blocks); + + // the size of each parent cluster, in total clusters + uint_vec parent_cluster_sizes(total_parent_clusters); + for (uint32_t i = 0; i < total_parent_clusters; i++) + parent_cluster_sizes[i] = (uint32_t)m_endpoint_clusters_within_each_parent_cluster[i].size(); + + uint_vec first_parent_cluster_ofs(total_parent_clusters); + uint32_t cur_ofs = 0; + for (uint32_t i = 0; i < total_parent_clusters; i++) + { + first_parent_cluster_ofs[i] = cur_ofs; + + cur_ofs += parent_cluster_sizes[i]; + } + + // Note: total_actual_endpoint_clusters is not necessarly equal to m_endpoint_clusters.size(), because clusters may live in multiple parent clusters after the first refinement step. + BASISU_FRONTEND_VERIFY(cur_ofs >= m_endpoint_clusters.size()); + const uint32_t total_actual_endpoint_clusters = cur_ofs; + basisu::vector cl_endpoint_cluster_structs(total_actual_endpoint_clusters); + + for (uint32_t i = 0; i < total_parent_clusters; i++) + { + const uint32_t dst_ofs = first_parent_cluster_ofs[i]; + + const uint32_t parent_cluster_size = parent_cluster_sizes[i]; + + assert(m_endpoint_clusters_within_each_parent_cluster[i].size() == parent_cluster_size); + + for (uint32_t j = 0; j < parent_cluster_size; j++) + { + const uint32_t endpoint_cluster_index = m_endpoint_clusters_within_each_parent_cluster[i][j]; + + color_rgba cluster_etc_base_color(m_endpoint_cluster_etc_params[endpoint_cluster_index].m_color_unscaled[0]); + uint32_t cluster_etc_inten = m_endpoint_cluster_etc_params[endpoint_cluster_index].m_inten_table[0]; + + cl_endpoint_cluster_structs[dst_ofs + j].m_unscaled_color = cluster_etc_base_color; + cl_endpoint_cluster_structs[dst_ofs + j].m_etc_inten = (uint8_t)cluster_etc_inten; + cl_endpoint_cluster_structs[dst_ofs + j].m_cluster_index = (uint16_t)endpoint_cluster_index; + } + } + + for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++) + { + const uint32_t block_parent_endpoint_cluster_index = m_block_parent_endpoint_cluster[block_index]; + + cl_block_info_structs[block_index].m_num_clusters = (uint16_t)(parent_cluster_sizes[block_parent_endpoint_cluster_index]); + cl_block_info_structs[block_index].m_first_cluster_ofs = (uint16_t)(first_parent_cluster_ofs[block_parent_endpoint_cluster_index]); + + const uint32_t block_cluster_index = block_clusters[block_index][0]; + cl_block_info_structs[block_index].m_cur_cluster_index = (uint16_t)block_cluster_index; + cl_block_info_structs[block_index].m_cur_cluster_etc_inten = (uint8_t)m_endpoint_cluster_etc_params[block_cluster_index].m_inten_table[0]; + } + + uint_vec block_cluster_indices(m_total_blocks); + for (uint32_t i = 0; i < m_total_blocks; i++) + block_cluster_indices[i] = block_clusters[i][0]; + + uint_vec sorted_block_indices(m_total_blocks); + indirect_sort(m_total_blocks, sorted_block_indices.data(), block_cluster_indices.data()); + + bool status = opencl_refine_endpoint_clusterization( + m_params.m_pOpenCL_context, + cl_block_info_structs.data(), + total_actual_endpoint_clusters, + cl_endpoint_cluster_structs.data(), + sorted_block_indices.data(), + best_cluster_indices.data(), + m_params.m_perceptual); + + if (status) + { + use_cpu = false; + } + else + { + error_printf("basisu_frontend::refine_endpoint_clusterization: opencl_refine_endpoint_clusterization() failed! Using CPU.\n"); + m_params.m_pOpenCL_context = nullptr; + m_opencl_failed = true; + } + } + + if (use_cpu) + { + const uint32_t N = 1024; + for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N) + { + const uint32_t first_index = block_index_iter; + const uint32_t last_index = minimum(m_total_blocks, first_index + N); + + m_params.m_pJob_pool->add_job([this, first_index, last_index, &best_cluster_indices, &block_clusters] { + + for (uint32_t block_index = first_index; block_index < last_index; block_index++) + { + const uint32_t cluster_index = block_clusters[block_index][0]; + BASISU_FRONTEND_VERIFY(cluster_index == block_clusters[block_index][1]); + + const color_rgba* pSubblock_pixels = get_source_pixel_block(block_index).get_ptr(); + const uint32_t num_subblock_pixels = 16; + + uint64_t best_cluster_err = INT64_MAX; + uint32_t best_cluster_index = 0; + + const uint32_t block_parent_endpoint_cluster_index = m_block_parent_endpoint_cluster.size() ? m_block_parent_endpoint_cluster[block_index] : 0; + const uint_vec* pCluster_indices = m_endpoint_clusters_within_each_parent_cluster.size() ? &m_endpoint_clusters_within_each_parent_cluster[block_parent_endpoint_cluster_index] : nullptr; + + const uint32_t total_clusters = m_use_hierarchical_endpoint_codebooks ? (uint32_t)pCluster_indices->size() : (uint32_t)m_endpoint_clusters.size(); + + for (uint32_t i = 0; i < total_clusters; i++) + { + const uint32_t cluster_iter = m_use_hierarchical_endpoint_codebooks ? (*pCluster_indices)[i] : i; + + color_rgba cluster_etc_base_color(m_endpoint_cluster_etc_params[cluster_iter].m_color_unscaled[0]); + uint32_t cluster_etc_inten = m_endpoint_cluster_etc_params[cluster_iter].m_inten_table[0]; + + uint64_t total_err = 0; + + const uint32_t low_selector = 0;//subblock_etc_params_vec[j].m_low_selectors[0]; + const uint32_t high_selector = 3;//subblock_etc_params_vec[j].m_high_selectors[0]; + color_rgba subblock_colors[4]; + // Can't assign it here - may result in too much error when selector quant occurs + if (cluster_etc_inten > m_endpoint_cluster_etc_params[cluster_index].m_inten_table[0]) + { + total_err = INT64_MAX; + goto skip_cluster; + } + + etc_block::get_block_colors5(subblock_colors, cluster_etc_base_color, cluster_etc_inten); + +#if 0 + for (uint32_t p = 0; p < num_subblock_pixels; p++) + { + uint64_t best_err = UINT64_MAX; + + for (uint32_t r = low_selector; r <= high_selector; r++) + { + uint64_t err = color_distance(m_params.m_perceptual, pSubblock_pixels[p], subblock_colors[r], false); + best_err = minimum(best_err, err); + if (!best_err) + break; + } + + total_err += best_err; + if (total_err > best_cluster_err) + break; + } // p +#else + if (m_params.m_perceptual) + { + if (!g_cpu_supports_sse41) + { + for (uint32_t p = 0; p < num_subblock_pixels; p++) + { + uint64_t best_err = UINT64_MAX; + + for (uint32_t r = low_selector; r <= high_selector; r++) + { + uint64_t err = color_distance(true, pSubblock_pixels[p], subblock_colors[r], false); + best_err = minimum(best_err, err); + if (!best_err) + break; + } + + total_err += best_err; + if (total_err > best_cluster_err) + break; + } // p + } + else + { +#if BASISU_SUPPORT_SSE + find_lowest_error_perceptual_rgb_4_N_sse41((int64_t*)&total_err, subblock_colors, pSubblock_pixels, num_subblock_pixels, best_cluster_err); +#endif + } + } + else + { + if (!g_cpu_supports_sse41) + { + for (uint32_t p = 0; p < num_subblock_pixels; p++) + { + uint64_t best_err = UINT64_MAX; + + for (uint32_t r = low_selector; r <= high_selector; r++) + { + uint64_t err = color_distance(false, pSubblock_pixels[p], subblock_colors[r], false); + best_err = minimum(best_err, err); + if (!best_err) + break; + } + + total_err += best_err; + if (total_err > best_cluster_err) + break; + } // p + } + else + { +#if BASISU_SUPPORT_SSE + find_lowest_error_linear_rgb_4_N_sse41((int64_t*)&total_err, subblock_colors, pSubblock_pixels, num_subblock_pixels, best_cluster_err); +#endif + } + } +#endif + + skip_cluster: + if ((total_err < best_cluster_err) || + ((cluster_iter == cluster_index) && (total_err == best_cluster_err))) + { + best_cluster_err = total_err; + best_cluster_index = cluster_iter; + + if (!best_cluster_err) + break; + } + } // j + + best_cluster_indices[block_index] = best_cluster_index; + + } // block_index + + }); + + } // block_index_iter + + m_params.m_pJob_pool->wait_for_all(); + + } // use_cpu + + debug_printf("refine_endpoint_clusterization time: %3.3f secs\n", tm.get_elapsed_secs()); + + basisu::vector > optimized_endpoint_clusters(m_endpoint_clusters.size()); + uint32_t total_subblocks_reassigned = 0; + + for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++) + { + const uint32_t training_vector_index = block_index * 2 + 0; + + const uint32_t orig_cluster_index = block_clusters[block_index][0]; + const uint32_t best_cluster_index = best_cluster_indices[block_index]; + + optimized_endpoint_clusters[best_cluster_index].push_back(training_vector_index); + optimized_endpoint_clusters[best_cluster_index].push_back(training_vector_index + 1); + + if (best_cluster_index != orig_cluster_index) + { + total_subblocks_reassigned++; + } + } + + debug_printf("total_subblocks_reassigned: %u\n", total_subblocks_reassigned); + + m_endpoint_clusters = optimized_endpoint_clusters; + + return total_subblocks_reassigned; + } + + void basisu_frontend::eliminate_redundant_or_empty_endpoint_clusters() + { + debug_printf("eliminate_redundant_or_empty_endpoint_clusters\n"); + + // Step 1: Sort endpoint clusters by the base colors/intens + + uint_vec sorted_endpoint_cluster_indices(m_endpoint_clusters.size()); + for (uint32_t i = 0; i < m_endpoint_clusters.size(); i++) + sorted_endpoint_cluster_indices[i] = i; + + indirect_sort((uint32_t)m_endpoint_clusters.size(), &sorted_endpoint_cluster_indices[0], &m_endpoint_cluster_etc_params[0]); + + basisu::vector > new_endpoint_clusters(m_endpoint_clusters.size()); + basisu::vector new_subblock_etc_params(m_endpoint_clusters.size()); + + for (uint32_t i = 0; i < m_endpoint_clusters.size(); i++) + { + uint32_t j = sorted_endpoint_cluster_indices[i]; + new_endpoint_clusters[i] = m_endpoint_clusters[j]; + new_subblock_etc_params[i] = m_endpoint_cluster_etc_params[j]; + } + + new_endpoint_clusters.swap(m_endpoint_clusters); + new_subblock_etc_params.swap(m_endpoint_cluster_etc_params); + + // Step 2: Eliminate redundant endpoint clusters, or empty endpoint clusters + + new_endpoint_clusters.resize(0); + new_subblock_etc_params.resize(0); + + for (int i = 0; i < (int)m_endpoint_clusters.size(); ) + { + if (!m_endpoint_clusters[i].size()) + { + i++; + continue; + } + + int j; + for (j = i + 1; j < (int)m_endpoint_clusters.size(); j++) + { + if (!(m_endpoint_cluster_etc_params[i] == m_endpoint_cluster_etc_params[j])) + break; + } + + new_endpoint_clusters.push_back(m_endpoint_clusters[i]); + new_subblock_etc_params.push_back(m_endpoint_cluster_etc_params[i]); + + for (int k = i + 1; k < j; k++) + { + append_vector(new_endpoint_clusters.back(), m_endpoint_clusters[k]); + } + + i = j; + } + + if (m_endpoint_clusters.size() != new_endpoint_clusters.size()) + { + if (m_params.m_debug_stats) + debug_printf("Eliminated %u redundant or empty clusters\n", (uint32_t)(m_endpoint_clusters.size() - new_endpoint_clusters.size())); + + m_endpoint_clusters.swap(new_endpoint_clusters); + + m_endpoint_cluster_etc_params.swap(new_subblock_etc_params); + } + } + + void basisu_frontend::create_initial_packed_texture() + { + debug_printf("create_initial_packed_texture\n"); + + interval_timer tm; + tm.start(); + + bool use_cpu = true; + + if ((m_params.m_pOpenCL_context) && (opencl_is_available())) + { + basisu::vector block_etc5_color_intens(m_total_blocks); + + for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++) + { + uint32_t cluster0 = m_block_endpoint_clusters_indices[block_index][0]; + + const color_rgba& color_unscaled = m_endpoint_cluster_etc_params[cluster0].m_color_unscaled[0]; + uint32_t inten = m_endpoint_cluster_etc_params[cluster0].m_inten_table[0]; + + block_etc5_color_intens[block_index].set(color_unscaled.r, color_unscaled.g, color_unscaled.b, inten); + } + + bool status = opencl_determine_selectors(m_params.m_pOpenCL_context, block_etc5_color_intens.data(), + m_encoded_blocks.data(), + m_params.m_perceptual); + if (!status) + { + error_printf("basisu_frontend::create_initial_packed_texture: opencl_determine_selectors() failed! Using CPU.\n"); + m_params.m_pOpenCL_context = nullptr; + m_opencl_failed = true; + } + else + { + use_cpu = false; + } + } + + if (use_cpu) + { + const uint32_t N = 4096; + for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N) + { + const uint32_t first_index = block_index_iter; + const uint32_t last_index = minimum(m_total_blocks, first_index + N); + + m_params.m_pJob_pool->add_job([this, first_index, last_index] { + + for (uint32_t block_index = first_index; block_index < last_index; block_index++) + { + uint32_t cluster0 = m_block_endpoint_clusters_indices[block_index][0]; + uint32_t cluster1 = m_block_endpoint_clusters_indices[block_index][1]; + BASISU_FRONTEND_VERIFY(cluster0 == cluster1); + + const color_rgba* pSource_pixels = get_source_pixel_block(block_index).get_ptr(); + + etc_block& blk = m_encoded_blocks[block_index]; + + color_rgba unscaled[2] = { m_endpoint_cluster_etc_params[cluster0].m_color_unscaled[0], m_endpoint_cluster_etc_params[cluster1].m_color_unscaled[0] }; + uint32_t inten[2] = { m_endpoint_cluster_etc_params[cluster0].m_inten_table[0], m_endpoint_cluster_etc_params[cluster1].m_inten_table[0] }; + + blk.set_block_color5(unscaled[0], unscaled[1]); + blk.set_flip_bit(true); + + blk.set_inten_table(0, inten[0]); + blk.set_inten_table(1, inten[1]); + + blk.determine_selectors(pSource_pixels, m_params.m_perceptual); + + } // block_index + + }); + + } // block_index_iter + + m_params.m_pJob_pool->wait_for_all(); + + } // use_cpu + + m_orig_encoded_blocks = m_encoded_blocks; + + debug_printf("Elapsed time: %3.3f secs\n", tm.get_elapsed_secs()); + } + + void basisu_frontend::compute_selector_clusters_within_each_parent_cluster() + { + uint_vec block_selector_cluster_indices(m_total_blocks); + + for (int cluster_index = 0; cluster_index < static_cast(m_selector_cluster_block_indices.size()); cluster_index++) + { + const basisu::vector& cluster_indices = m_selector_cluster_block_indices[cluster_index]; + + for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++) + { + const uint32_t block_index = cluster_indices[cluster_indices_iter]; + + block_selector_cluster_indices[block_index] = cluster_index; + + } // cluster_indices_iter + + } // cluster_index + + m_selector_clusters_within_each_parent_cluster.resize(0); + m_selector_clusters_within_each_parent_cluster.resize(m_selector_parent_cluster_block_indices.size()); + + for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++) + { + const uint32_t cluster_index = block_selector_cluster_indices[block_index]; + const uint32_t parent_cluster_index = m_block_parent_selector_cluster[block_index]; + + m_selector_clusters_within_each_parent_cluster[parent_cluster_index].push_back(cluster_index); + } + + for (uint32_t i = 0; i < m_selector_clusters_within_each_parent_cluster.size(); i++) + { + uint_vec &cluster_indices = m_selector_clusters_within_each_parent_cluster[i]; + + BASISU_FRONTEND_VERIFY(cluster_indices.size()); + + vector_sort(cluster_indices); + + auto last = std::unique(cluster_indices.begin(), cluster_indices.end()); + cluster_indices.erase(last, cluster_indices.end()); + } + } + + void basisu_frontend::generate_selector_clusters() + { + debug_printf("generate_selector_clusters\n"); + + typedef tree_vector_quant vec16F_clusterizer; + + vec16F_clusterizer::array_of_weighted_training_vecs training_vecs(m_total_blocks); + + const uint32_t N = 4096; + for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N) + { + const uint32_t first_index = block_index_iter; + const uint32_t last_index = minimum(m_total_blocks, first_index + N); + + m_params.m_pJob_pool->add_job( [this, first_index, last_index, &training_vecs] { + + for (uint32_t block_index = first_index; block_index < last_index; block_index++) + { + const etc_block &blk = m_encoded_blocks[block_index]; + + vec16F v; + for (uint32_t y = 0; y < 4; y++) + for (uint32_t x = 0; x < 4; x++) + v[x + y * 4] = static_cast(blk.get_selector(x, y)); + + const uint32_t subblock_index = (blk.get_inten_table(0) > blk.get_inten_table(1)) ? 0 : 1; + + color_rgba block_colors[2]; + blk.get_block_low_high_colors(block_colors, subblock_index); + + const uint32_t dist = color_distance(m_params.m_perceptual, block_colors[0], block_colors[1], false); + + const uint32_t cColorDistToWeight = 300; + const uint32_t cMaxWeight = 4096; + uint32_t weight = clamp(dist / cColorDistToWeight, 1, cMaxWeight); + + training_vecs[block_index].first = v; + training_vecs[block_index].second = weight; + + } // block_index + + } ); + + } // block_index_iter + + m_params.m_pJob_pool->wait_for_all(); + + vec16F_clusterizer selector_clusterizer; + for (uint32_t i = 0; i < m_total_blocks; i++) + selector_clusterizer.add_training_vec(training_vecs[i].first, training_vecs[i].second); + + const int selector_parent_codebook_size = (m_params.m_compression_level <= 1) ? BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_01 : BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_DEFAULT; + const uint32_t parent_codebook_size = (m_params.m_max_selector_clusters >= 256) ? selector_parent_codebook_size : 0; + debug_printf("Using selector parent codebook size %u\n", parent_codebook_size); + + uint32_t max_threads = 0; + max_threads = m_params.m_multithreaded ? minimum(std::thread::hardware_concurrency(), cMaxCodebookCreationThreads) : 0; + if (m_params.m_pJob_pool) + max_threads = minimum((int)m_params.m_pJob_pool->get_total_threads(), max_threads); + + bool status = generate_hierarchical_codebook_threaded(selector_clusterizer, + m_params.m_max_selector_clusters, m_use_hierarchical_selector_codebooks ? parent_codebook_size : 0, + m_selector_cluster_block_indices, + m_selector_parent_cluster_block_indices, + max_threads, m_params.m_pJob_pool, false); + BASISU_FRONTEND_VERIFY(status); + + if (m_use_hierarchical_selector_codebooks) + { + if (!m_selector_parent_cluster_block_indices.size()) + { + m_selector_parent_cluster_block_indices.resize(0); + m_selector_parent_cluster_block_indices.resize(1); + for (uint32_t i = 0; i < m_total_blocks; i++) + m_selector_parent_cluster_block_indices[0].push_back(i); + } + + BASISU_ASSUME(BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_01 <= UINT8_MAX); + BASISU_ASSUME(BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_DEFAULT <= UINT8_MAX); + + m_block_parent_selector_cluster.resize(0); + m_block_parent_selector_cluster.resize(m_total_blocks); + vector_set_all(m_block_parent_selector_cluster, 0xFF); + + for (uint32_t parent_cluster_index = 0; parent_cluster_index < m_selector_parent_cluster_block_indices.size(); parent_cluster_index++) + { + const uint_vec &cluster = m_selector_parent_cluster_block_indices[parent_cluster_index]; + for (uint32_t j = 0; j < cluster.size(); j++) + m_block_parent_selector_cluster[cluster[j]] = static_cast(parent_cluster_index); + } + for (uint32_t i = 0; i < m_total_blocks; i++) + { + BASISU_FRONTEND_VERIFY(m_block_parent_selector_cluster[i] != 0xFF); + } + + // Ensure that all the blocks within each cluster are all in the same parent cluster, or something is very wrong. + for (uint32_t cluster_index = 0; cluster_index < m_selector_cluster_block_indices.size(); cluster_index++) + { + const uint_vec &cluster = m_selector_cluster_block_indices[cluster_index]; + + uint32_t parent_cluster_index = 0; + for (uint32_t j = 0; j < cluster.size(); j++) + { + const uint32_t block_index = cluster[j]; + if (!j) + { + parent_cluster_index = m_block_parent_selector_cluster[block_index]; + } + else + { + BASISU_FRONTEND_VERIFY(m_block_parent_selector_cluster[block_index] == parent_cluster_index); + } + } + } + } + + debug_printf("Total selector clusters: %u, total parent selector clusters: %u\n", (uint32_t)m_selector_cluster_block_indices.size(), (uint32_t)m_selector_parent_cluster_block_indices.size()); + } + + void basisu_frontend::create_optimized_selector_codebook(uint32_t iter) + { + debug_printf("create_optimized_selector_codebook\n"); + + interval_timer tm; + tm.start(); + + const uint32_t total_selector_clusters = (uint32_t)m_selector_cluster_block_indices.size(); + + debug_printf("Total selector clusters (from m_selector_cluster_block_indices.size()): %u\n", (uint32_t)m_selector_cluster_block_indices.size()); + + m_optimized_cluster_selectors.resize(total_selector_clusters); + + // For each selector codebook entry, and for each of the 4x4 selectors, determine which selector minimizes the error across all the blocks that use that quantized selector. + const uint32_t N = 256; + for (uint32_t cluster_index_iter = 0; cluster_index_iter < total_selector_clusters; cluster_index_iter += N) + { + const uint32_t first_index = cluster_index_iter; + const uint32_t last_index = minimum((uint32_t)total_selector_clusters, cluster_index_iter + N); + + m_params.m_pJob_pool->add_job([this, first_index, last_index] { + + for (uint32_t cluster_index = first_index; cluster_index < last_index; cluster_index++) + { + const basisu::vector& cluster_block_indices = m_selector_cluster_block_indices[cluster_index]; + + if (!cluster_block_indices.size()) + continue; + + uint64_t overall_best_err = 0; + (void)overall_best_err; + + uint64_t total_err[4][4][4]; + clear_obj(total_err); + + for (uint32_t cluster_block_index = 0; cluster_block_index < cluster_block_indices.size(); cluster_block_index++) + { + const uint32_t block_index = cluster_block_indices[cluster_block_index]; + + const etc_block& blk = m_encoded_blocks[block_index]; + + color_rgba blk_colors[4]; + blk.get_block_colors(blk_colors, 0); + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const color_rgba& orig_color = get_source_pixel_block(block_index)(x, y); + + if (m_params.m_perceptual) + { + for (uint32_t s = 0; s < 4; s++) + total_err[y][x][s] += color_distance(true, blk_colors[s], orig_color, false); + } + else + { + for (uint32_t s = 0; s < 4; s++) + total_err[y][x][s] += color_distance(false, blk_colors[s], orig_color, false); + } + } // x + } // y + + } // cluster_block_index + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint64_t best_err = total_err[y][x][0]; + uint8_t best_sel = 0; + + for (uint32_t s = 1; s < 4; s++) + { + if (total_err[y][x][s] < best_err) + { + best_err = total_err[y][x][s]; + best_sel = (uint8_t)s; + } + } + + m_optimized_cluster_selectors[cluster_index].set_selector(x, y, best_sel); + + overall_best_err += best_err; + } // x + } // y + + } // cluster_index + + }); + + } // cluster_index_iter + + m_params.m_pJob_pool->wait_for_all(); + + debug_printf("Elapsed time: %3.3f secs\n", tm.get_elapsed_secs()); + + if (m_params.m_debug_images) + { + uint32_t max_selector_cluster_size = 0; + + for (uint32_t i = 0; i < m_selector_cluster_block_indices.size(); i++) + max_selector_cluster_size = maximum(max_selector_cluster_size, (uint32_t)m_selector_cluster_block_indices[i].size()); + + if ((max_selector_cluster_size * 5) < 32768) + { + const uint32_t x_spacer_len = 16; + image selector_cluster_vis(x_spacer_len + max_selector_cluster_size * 5, (uint32_t)m_selector_cluster_block_indices.size() * 5); + + for (uint32_t selector_cluster_index = 0; selector_cluster_index < m_selector_cluster_block_indices.size(); selector_cluster_index++) + { + const basisu::vector &cluster_block_indices = m_selector_cluster_block_indices[selector_cluster_index]; + + for (uint32_t y = 0; y < 4; y++) + for (uint32_t x = 0; x < 4; x++) + selector_cluster_vis.set_clipped(x_spacer_len + x - 12, selector_cluster_index * 5 + y, color_rgba((m_optimized_cluster_selectors[selector_cluster_index].get_selector(x, y) * 255) / 3)); + + for (uint32_t i = 0; i < cluster_block_indices.size(); i++) + { + uint32_t block_index = cluster_block_indices[i]; + + const etc_block &blk = m_orig_encoded_blocks[block_index]; + + for (uint32_t y = 0; y < 4; y++) + for (uint32_t x = 0; x < 4; x++) + selector_cluster_vis.set_clipped(x_spacer_len + x + 5 * i, selector_cluster_index * 5 + y, color_rgba((blk.get_selector(x, y) * 255) / 3)); + } + } + + char buf[256]; + snprintf(buf, sizeof(buf), "selector_cluster_vis_%u.png", iter); + save_png(buf, selector_cluster_vis); + } + } + } + + // For each block: Determine which quantized selectors best encode that block, given its quantized endpoints. + // Note that this method may leave some empty clusters (i.e. arrays with no block indices), including at the end. + void basisu_frontend::find_optimal_selector_clusters_for_each_block() + { + debug_printf("find_optimal_selector_clusters_for_each_block\n"); + + interval_timer tm; + tm.start(); + + if (m_params.m_validate) + { + // Sanity checks + BASISU_FRONTEND_VERIFY(m_selector_cluster_block_indices.size() == m_optimized_cluster_selectors.size()); + for (uint32_t i = 0; i < m_selector_clusters_within_each_parent_cluster.size(); i++) + { + for (uint32_t j = 0; j < m_selector_clusters_within_each_parent_cluster[i].size(); j++) + { + BASISU_FRONTEND_VERIFY(m_selector_clusters_within_each_parent_cluster[i][j] < m_optimized_cluster_selectors.size()); + } + } + } + + m_block_selector_cluster_index.resize(m_total_blocks); + + if (m_params.m_compression_level == 0) + { + // Just leave the blocks in their original selector clusters. + for (uint32_t selector_cluster_index = 0; selector_cluster_index < m_selector_cluster_block_indices.size(); selector_cluster_index++) + { + for (uint32_t j = 0; j < m_selector_cluster_block_indices[selector_cluster_index].size(); j++) + { + const uint32_t block_index = m_selector_cluster_block_indices[selector_cluster_index][j]; + + m_block_selector_cluster_index[block_index] = selector_cluster_index; + + etc_block& blk = m_encoded_blocks[block_index]; + blk.set_raw_selector_bits(m_optimized_cluster_selectors[selector_cluster_index].get_raw_selector_bits()); + } + } + + debug_printf("Elapsed time: %3.3f secs\n", tm.get_elapsed_secs()); + + return; + } + + bool use_cpu = true; + + if ((m_params.m_pOpenCL_context) && m_use_hierarchical_selector_codebooks) + { + const uint32_t num_parent_clusters = m_selector_clusters_within_each_parent_cluster.size_u32(); + + basisu::vector selector_structs; + selector_structs.reserve(m_optimized_cluster_selectors.size()); + + uint_vec parent_selector_cluster_offsets(num_parent_clusters); + + uint_vec selector_cluster_indices; + selector_cluster_indices.reserve(m_optimized_cluster_selectors.size()); + + uint32_t cur_ofs = 0; + for (uint32_t parent_index = 0; parent_index < num_parent_clusters; parent_index++) + { + parent_selector_cluster_offsets[parent_index] = cur_ofs; + + for (uint32_t j = 0; j < m_selector_clusters_within_each_parent_cluster[parent_index].size(); j++) + { + const uint32_t selector_cluster_index = m_selector_clusters_within_each_parent_cluster[parent_index][j]; + + uint32_t sel_bits = 0; + for (uint32_t p = 0; p < 16; p++) + sel_bits |= (m_optimized_cluster_selectors[selector_cluster_index].get_selector(p & 3, p >> 2) << (p * 2)); + + selector_structs.enlarge(1)->m_packed_selectors = sel_bits; + + selector_cluster_indices.push_back(selector_cluster_index); + } + + cur_ofs += m_selector_clusters_within_each_parent_cluster[parent_index].size_u32(); + } + + const uint32_t total_input_selectors = cur_ofs; + + basisu::vector block_structs(m_total_blocks); + for (uint32_t i = 0; i < m_total_blocks; i++) + { + const uint32_t parent_selector_cluster = m_block_parent_selector_cluster[i]; + + const etc_block& blk = m_encoded_blocks[i]; + blk.unpack_color5(block_structs[i].m_etc_color5_inten, blk.get_base5_color(), false); + + block_structs[i].m_etc_color5_inten.a = (uint8_t)blk.get_inten_table(0); + block_structs[i].m_first_selector = parent_selector_cluster_offsets[parent_selector_cluster]; + block_structs[i].m_num_selectors = m_selector_clusters_within_each_parent_cluster[parent_selector_cluster].size_u32(); + } + + uint_vec output_selector_cluster_indices(m_total_blocks); + + bool status = opencl_find_optimal_selector_clusters_for_each_block( + m_params.m_pOpenCL_context, + block_structs.data(), + total_input_selectors, + selector_structs.data(), + selector_cluster_indices.data(), + output_selector_cluster_indices.data(), + m_params.m_perceptual); + + if (!status) + { + error_printf("basisu_frontend::find_optimal_selector_clusters_for_each_block: opencl_find_optimal_selector_clusters_for_each_block() failed! Using CPU.\n"); + m_params.m_pOpenCL_context = nullptr; + m_opencl_failed = true; + } + else + { + for (uint32_t i = 0; i < m_selector_cluster_block_indices.size(); i++) + { + m_selector_cluster_block_indices[i].resize(0); + m_selector_cluster_block_indices[i].reserve(128); + } + + for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++) + { + etc_block& blk = m_encoded_blocks[block_index]; + + uint32_t best_cluster_index = output_selector_cluster_indices[block_index]; + + blk.set_raw_selector_bits(m_optimized_cluster_selectors[best_cluster_index].get_raw_selector_bits()); + + m_block_selector_cluster_index[block_index] = best_cluster_index; + + vector_ensure_element_is_valid(m_selector_cluster_block_indices, best_cluster_index); + m_selector_cluster_block_indices[best_cluster_index].push_back(block_index); + } + + use_cpu = false; + } + } + + if (use_cpu) + { + basisu::vector unpacked_optimized_cluster_selectors(16 * m_optimized_cluster_selectors.size()); + for (uint32_t cluster_index = 0; cluster_index < m_optimized_cluster_selectors.size(); cluster_index++) + { + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + unpacked_optimized_cluster_selectors[cluster_index * 16 + y * 4 + x] = (uint8_t)m_optimized_cluster_selectors[cluster_index].get_selector(x, y); + } + } + } + + const uint32_t N = 2048; + for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N) + { + const uint32_t first_index = block_index_iter; + const uint32_t last_index = minimum(m_total_blocks, first_index + N); + + m_params.m_pJob_pool->add_job( [this, first_index, last_index, &unpacked_optimized_cluster_selectors] { + + int prev_best_cluster_index = 0; + + for (uint32_t block_index = first_index; block_index < last_index; block_index++) + { + const pixel_block& block = get_source_pixel_block(block_index); + + etc_block& blk = m_encoded_blocks[block_index]; + + if ((block_index > first_index) && (block == get_source_pixel_block(block_index - 1))) + { + blk.set_raw_selector_bits(m_optimized_cluster_selectors[prev_best_cluster_index].get_raw_selector_bits()); + + m_block_selector_cluster_index[block_index] = prev_best_cluster_index; + + continue; + } + + const color_rgba* pBlock_pixels = block.get_ptr(); + + color_rgba trial_block_colors[4]; + blk.get_block_colors_etc1s(trial_block_colors); + + // precompute errors for the i-th block pixel and selector sel: [sel][i] + uint32_t trial_errors[4][16]; + + if (m_params.m_perceptual) + { + for (uint32_t sel = 0; sel < 4; ++sel) + for (uint32_t i = 0; i < 16; ++i) + trial_errors[sel][i] = color_distance(true, pBlock_pixels[i], trial_block_colors[sel], false); + } + else + { + for (uint32_t sel = 0; sel < 4; ++sel) + for (uint32_t i = 0; i < 16; ++i) + trial_errors[sel][i] = color_distance(false, pBlock_pixels[i], trial_block_colors[sel], false); + } + + // Compute the minimum possible errors (given any selectors) for pixels 0-15 + uint64_t min_possible_error_0_15 = 0; + for (uint32_t i = 0; i < 16; i++) + min_possible_error_0_15 += basisu::minimum(trial_errors[0][i], trial_errors[1][i], trial_errors[2][i], trial_errors[3][i]); + + // Compute the minimum possible errors (given any selectors) for pixels 4-15 + uint64_t min_possible_error_4_15 = 0; + for (uint32_t i = 4; i < 16; i++) + min_possible_error_4_15 += basisu::minimum(trial_errors[0][i], trial_errors[1][i], trial_errors[2][i], trial_errors[3][i]); + + // Compute the minimum possible errors (given any selectors) for pixels 8-15 + uint64_t min_possible_error_8_15 = 0; + for (uint32_t i = 8; i < 16; i++) + min_possible_error_8_15 += basisu::minimum(trial_errors[0][i], trial_errors[1][i], trial_errors[2][i], trial_errors[3][i]); + + // Compute the minimum possible errors (given any selectors) for pixels 12-15 + uint64_t min_possible_error_12_15 = 0; + for (uint32_t i = 12; i < 16; i++) + min_possible_error_12_15 += basisu::minimum(trial_errors[0][i], trial_errors[1][i], trial_errors[2][i], trial_errors[3][i]); + + uint64_t best_cluster_err = INT64_MAX; + uint32_t best_cluster_index = 0; + + const uint32_t parent_selector_cluster = m_block_parent_selector_cluster.size() ? m_block_parent_selector_cluster[block_index] : 0; + const uint_vec *pCluster_indices = m_selector_clusters_within_each_parent_cluster.size() ? &m_selector_clusters_within_each_parent_cluster[parent_selector_cluster] : nullptr; + + const uint32_t total_clusters = m_use_hierarchical_selector_codebooks ? (uint32_t)pCluster_indices->size() : (uint32_t)m_selector_cluster_block_indices.size(); + + #if 0 + for (uint32_t cluster_iter = 0; cluster_iter < total_clusters; cluster_iter++) + { + const uint32_t cluster_index = m_use_hierarchical_selector_codebooks ? (*pCluster_indices)[cluster_iter] : cluster_iter; + + const etc_block& cluster_blk = m_optimized_cluster_selectors[cluster_index]; + + uint64_t trial_err = 0; + for (int y = 0; y < 4; y++) + { + for (int x = 0; x < 4; x++) + { + const uint32_t sel = cluster_blk.get_selector(x, y); + + trial_err += color_distance(m_params.m_perceptual, trial_block_colors[sel], pBlock_pixels[x + y * 4], false); + if (trial_err > best_cluster_err) + goto early_out; + } + } + + if (trial_err < best_cluster_err) + { + best_cluster_err = trial_err; + best_cluster_index = cluster_index; + if (!best_cluster_err) + break; + } + + early_out: + ; + } + #else + for (uint32_t cluster_iter = 0; cluster_iter < total_clusters; cluster_iter++) + { + const uint32_t cluster_index = m_use_hierarchical_selector_codebooks ? (*pCluster_indices)[cluster_iter] : cluster_iter; + + const uint8_t* pSels = &unpacked_optimized_cluster_selectors[cluster_index * 16]; + + uint64_t trial_err = (uint64_t)trial_errors[pSels[0]][0] + trial_errors[pSels[1]][1] + trial_errors[pSels[2]][2] + trial_errors[pSels[3]][3]; + if ((trial_err + min_possible_error_4_15) >= best_cluster_err) + continue; + + trial_err += (uint64_t)trial_errors[pSels[4]][4] + trial_errors[pSels[5]][5] + trial_errors[pSels[6]][6] + trial_errors[pSels[7]][7]; + if ((trial_err + min_possible_error_8_15) >= best_cluster_err) + continue; + + trial_err += (uint64_t)trial_errors[pSels[8]][8] + trial_errors[pSels[9]][9] + trial_errors[pSels[10]][10] + trial_errors[pSels[11]][11]; + if ((trial_err + min_possible_error_12_15) >= best_cluster_err) + continue; + + trial_err += (uint64_t)trial_errors[pSels[12]][12] + trial_errors[pSels[13]][13] + trial_errors[pSels[14]][14] + trial_errors[pSels[15]][15]; + + if (trial_err < best_cluster_err) + { + best_cluster_err = trial_err; + best_cluster_index = cluster_index; + if (best_cluster_err == min_possible_error_0_15) + break; + } + + } // cluster_iter + #endif + + blk.set_raw_selector_bits(m_optimized_cluster_selectors[best_cluster_index].get_raw_selector_bits()); + + m_block_selector_cluster_index[block_index] = best_cluster_index; + + prev_best_cluster_index = best_cluster_index; + + } // block_index + + } ); + + } // block_index_iter + + m_params.m_pJob_pool->wait_for_all(); + + for (uint32_t i = 0; i < m_selector_cluster_block_indices.size(); i++) + { + m_selector_cluster_block_indices[i].resize(0); + m_selector_cluster_block_indices[i].reserve(128); + } + + for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++) + { + const uint32_t best_cluster_index = m_block_selector_cluster_index[block_index]; + + vector_ensure_element_is_valid(m_selector_cluster_block_indices, best_cluster_index); + m_selector_cluster_block_indices[best_cluster_index].push_back(block_index); + } + + } // if (use_cpu) + + debug_printf("Elapsed time: %3.3f secs\n", tm.get_elapsed_secs()); + } + + // TODO: Remove old ETC1 specific stuff, and thread this. + uint32_t basisu_frontend::refine_block_endpoints_given_selectors() + { + debug_printf("refine_block_endpoints_given_selectors\n"); + + for (int block_index = 0; block_index < static_cast(m_total_blocks); block_index++) + { + //uint32_t selector_cluster = m_block_selector_cluster_index(block_x, block_y); + vec2U &endpoint_clusters = m_block_endpoint_clusters_indices[block_index]; + + m_endpoint_cluster_etc_params[endpoint_clusters[0]].m_subblocks.push_back(block_index * 2); + + m_endpoint_cluster_etc_params[endpoint_clusters[1]].m_subblocks.push_back(block_index * 2 + 1); + } + + uint32_t total_subblocks_refined = 0; + uint32_t total_subblocks_examined = 0; + + for (uint32_t endpoint_cluster_index = 0; endpoint_cluster_index < m_endpoint_cluster_etc_params.size(); endpoint_cluster_index++) + { + endpoint_cluster_etc_params &subblock_params = m_endpoint_cluster_etc_params[endpoint_cluster_index]; + + const uint_vec &subblocks = subblock_params.m_subblocks; + //uint32_t total_pixels = subblock.m_subblocks.size() * 8; + + basisu::vector subblock_colors[2]; // [use_individual_mode] + uint8_vec subblock_selectors[2]; + + uint64_t cur_subblock_err[2] = { 0, 0 }; + + for (uint32_t subblock_iter = 0; subblock_iter < subblocks.size(); subblock_iter++) + { + uint32_t training_vector_index = subblocks[subblock_iter]; + + uint32_t block_index = training_vector_index >> 1; + uint32_t subblock_index = training_vector_index & 1; + const bool is_flipped = true; + + const etc_block &blk = m_encoded_blocks[block_index]; + + const bool use_individual_mode = !blk.get_diff_bit(); + + const color_rgba *pSource_block_pixels = get_source_pixel_block(block_index).get_ptr(); + + color_rgba unpacked_block_pixels[16]; + unpack_etc1(blk, unpacked_block_pixels); + + for (uint32_t i = 0; i < 8; i++) + { + const uint32_t pixel_index = g_etc1_pixel_indices[is_flipped][subblock_index][i]; + const etc_coord2 &coords = g_etc1_pixel_coords[is_flipped][subblock_index][i]; + + subblock_colors[use_individual_mode].push_back(pSource_block_pixels[pixel_index]); + + cur_subblock_err[use_individual_mode] += color_distance(m_params.m_perceptual, pSource_block_pixels[pixel_index], unpacked_block_pixels[pixel_index], false); + + subblock_selectors[use_individual_mode].push_back(static_cast(blk.get_selector(coords.m_x, coords.m_y))); + } + } // subblock_iter + + etc1_optimizer::results cluster_optimizer_results[2]; + bool results_valid[2] = { false, false }; + + clear_obj(cluster_optimizer_results); + + basisu::vector cluster_selectors[2]; + + for (uint32_t use_individual_mode = 0; use_individual_mode < 2; use_individual_mode++) + { + const uint32_t total_pixels = (uint32_t)subblock_colors[use_individual_mode].size(); + + if (!total_pixels) + continue; + + total_subblocks_examined += total_pixels / 8; + + etc1_optimizer optimizer; + etc1_solution_coordinates solutions[2]; + + etc1_optimizer::params cluster_optimizer_params; + cluster_optimizer_params.m_num_src_pixels = total_pixels; + cluster_optimizer_params.m_pSrc_pixels = &subblock_colors[use_individual_mode][0]; + + cluster_optimizer_params.m_use_color4 = use_individual_mode != 0; + cluster_optimizer_params.m_perceptual = m_params.m_perceptual; + + cluster_optimizer_params.m_pForce_selectors = &subblock_selectors[use_individual_mode][0]; + cluster_optimizer_params.m_quality = cETCQualityUber; + + cluster_selectors[use_individual_mode].resize(total_pixels); + + cluster_optimizer_results[use_individual_mode].m_n = total_pixels; + cluster_optimizer_results[use_individual_mode].m_pSelectors = &cluster_selectors[use_individual_mode][0]; + + optimizer.init(cluster_optimizer_params, cluster_optimizer_results[use_individual_mode]); + + if (!optimizer.compute()) + continue; + + if (cluster_optimizer_results[use_individual_mode].m_error < cur_subblock_err[use_individual_mode]) + results_valid[use_individual_mode] = true; + + } // use_individual_mode + + for (uint32_t use_individual_mode = 0; use_individual_mode < 2; use_individual_mode++) + { + if (!results_valid[use_individual_mode]) + continue; + + uint32_t num_passes = use_individual_mode ? 1 : 2; + + bool all_passed5 = true; + + for (uint32_t pass = 0; pass < num_passes; pass++) + { + for (uint32_t subblock_iter = 0; subblock_iter < subblocks.size(); subblock_iter++) + { + const uint32_t training_vector_index = subblocks[subblock_iter]; + + const uint32_t block_index = training_vector_index >> 1; + const uint32_t subblock_index = training_vector_index & 1; + //const bool is_flipped = true; + + etc_block &blk = m_encoded_blocks[block_index]; + + if (!blk.get_diff_bit() != static_cast(use_individual_mode != 0)) + continue; + + if (use_individual_mode) + { + blk.set_base4_color(subblock_index, etc_block::pack_color4(cluster_optimizer_results[1].m_block_color_unscaled, false)); + blk.set_inten_table(subblock_index, cluster_optimizer_results[1].m_block_inten_table); + + subblock_params.m_color_error[1] = cluster_optimizer_results[1].m_error; + subblock_params.m_inten_table[1] = cluster_optimizer_results[1].m_block_inten_table; + subblock_params.m_color_unscaled[1] = cluster_optimizer_results[1].m_block_color_unscaled; + + total_subblocks_refined++; + } + else + { + const uint16_t base_color5 = blk.get_base5_color(); + const uint16_t delta_color3 = blk.get_delta3_color(); + + uint32_t r[2], g[2], b[2]; + etc_block::unpack_color5(r[0], g[0], b[0], base_color5, false); + bool success = etc_block::unpack_color5(r[1], g[1], b[1], base_color5, delta_color3, false); + assert(success); + BASISU_NOTE_UNUSED(success); + + r[subblock_index] = cluster_optimizer_results[0].m_block_color_unscaled.r; + g[subblock_index] = cluster_optimizer_results[0].m_block_color_unscaled.g; + b[subblock_index] = cluster_optimizer_results[0].m_block_color_unscaled.b; + + color_rgba colors[2] = { color_rgba(r[0], g[0], b[0], 255), color_rgba(r[1], g[1], b[1], 255) }; + + if (!etc_block::try_pack_color5_delta3(colors)) + { + all_passed5 = false; + break; + } + + if ((pass == 1) && (all_passed5)) + { + blk.set_block_color5(colors[0], colors[1]); + blk.set_inten_table(subblock_index, cluster_optimizer_results[0].m_block_inten_table); + + subblock_params.m_color_error[0] = cluster_optimizer_results[0].m_error; + subblock_params.m_inten_table[0] = cluster_optimizer_results[0].m_block_inten_table; + subblock_params.m_color_unscaled[0] = cluster_optimizer_results[0].m_block_color_unscaled; + + total_subblocks_refined++; + } + } + + } // subblock_iter + + } // pass + + } // use_individual_mode + + } // endpoint_cluster_index + + if (m_params.m_debug_stats) + debug_printf("Total subblock endpoints refined: %u (%3.1f%%)\n", total_subblocks_refined, total_subblocks_refined * 100.0f / total_subblocks_examined); + + return total_subblocks_refined; + } + + void basisu_frontend::dump_endpoint_clusterization_visualization(const char *pFilename, bool vis_endpoint_colors) + { + debug_printf("dump_endpoint_clusterization_visualization\n"); + + uint32_t max_endpoint_cluster_size = 0; + + basisu::vector cluster_sizes(m_endpoint_clusters.size()); + basisu::vector sorted_cluster_indices(m_endpoint_clusters.size()); + for (uint32_t i = 0; i < m_endpoint_clusters.size(); i++) + { + max_endpoint_cluster_size = maximum(max_endpoint_cluster_size, (uint32_t)m_endpoint_clusters[i].size()); + cluster_sizes[i] = (uint32_t)m_endpoint_clusters[i].size(); + } + + if (!max_endpoint_cluster_size) + return; + + for (uint32_t i = 0; i < m_endpoint_clusters.size(); i++) + sorted_cluster_indices[i] = i; + + //indexed_heap_sort(endpoint_clusters.size(), cluster_sizes.get_ptr(), sorted_cluster_indices.get_ptr()); + + image endpoint_cluster_vis(12 + minimum(max_endpoint_cluster_size, 2048) * 5, (uint32_t)m_endpoint_clusters.size() * 3); + + for (uint32_t unsorted_cluster_iter = 0; unsorted_cluster_iter < m_endpoint_clusters.size(); unsorted_cluster_iter++) + { + const uint32_t cluster_iter = sorted_cluster_indices[unsorted_cluster_iter]; + + etc_block blk; + blk.clear(); + blk.set_flip_bit(false); + blk.set_diff_bit(true); + blk.set_inten_tables_etc1s(m_endpoint_cluster_etc_params[cluster_iter].m_inten_table[0]); + blk.set_base5_color(etc_block::pack_color5(m_endpoint_cluster_etc_params[cluster_iter].m_color_unscaled[0], false)); + + color_rgba blk_colors[4]; + blk.get_block_colors(blk_colors, 0); + for (uint32_t i = 0; i < 4; i++) + endpoint_cluster_vis.fill_box(i * 2, 3 * unsorted_cluster_iter, 2, 2, blk_colors[i]); + + for (uint32_t subblock_iter = 0; subblock_iter < m_endpoint_clusters[cluster_iter].size(); subblock_iter++) + { + uint32_t training_vector_index = m_endpoint_clusters[cluster_iter][subblock_iter]; + + const uint32_t block_index = training_vector_index >> 1; + const uint32_t subblock_index = training_vector_index & 1; + + const etc_block& blk2 = m_etc1_blocks_etc1s[block_index]; + + const color_rgba *pBlock_pixels = get_source_pixel_block(block_index).get_ptr(); + + color_rgba subblock_pixels[8]; + + if (vis_endpoint_colors) + { + color_rgba colors[2]; + blk2.get_block_low_high_colors(colors, subblock_index); + for (uint32_t i = 0; i < 8; i++) + subblock_pixels[i] = colors[subblock_index]; + } + else + { + for (uint32_t i = 0; i < 8; i++) + subblock_pixels[i] = pBlock_pixels[g_etc1_pixel_indices[blk2.get_flip_bit()][subblock_index][i]]; + } + + endpoint_cluster_vis.set_block_clipped(subblock_pixels, 12 + 5 * subblock_iter, 3 * unsorted_cluster_iter, 4, 2); + } + } + + save_png(pFilename, endpoint_cluster_vis); + debug_printf("Wrote debug visualization file %s\n", pFilename); + } + + void basisu_frontend::finalize() + { + for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++) + { + for (uint32_t subblock_index = 0; subblock_index < 2; subblock_index++) + { + const uint32_t endpoint_cluster_index = get_subblock_endpoint_cluster_index(block_index, subblock_index); + + m_endpoint_cluster_etc_params[endpoint_cluster_index].m_color_used[0] = true; + } + } + } + + // The backend has remapped the block endpoints while optimizing the output symbols for better rate distortion performance, so let's go and reoptimize the endpoint codebook. + // This is currently the only place where the backend actually goes and changes the quantization and calls the frontend to fix things up. + // This is basically a bottom up clusterization stage, where some leaves can be combined. + void basisu_frontend::reoptimize_remapped_endpoints(const uint_vec &new_block_endpoints, int_vec &old_to_new_endpoint_cluster_indices, bool optimize_final_codebook, uint_vec *pBlock_selector_indices) + { + debug_printf("reoptimize_remapped_endpoints\n"); + + basisu::vector new_endpoint_cluster_block_indices(m_endpoint_clusters.size()); + for (uint32_t i = 0; i < new_block_endpoints.size(); i++) + new_endpoint_cluster_block_indices[new_block_endpoints[i]].push_back(i); + + basisu::vector cluster_valid(new_endpoint_cluster_block_indices.size()); + basisu::vector cluster_improved(new_endpoint_cluster_block_indices.size()); + + const uint32_t N = 256; + for (uint32_t cluster_index_iter = 0; cluster_index_iter < new_endpoint_cluster_block_indices.size(); cluster_index_iter += N) + { + const uint32_t first_index = cluster_index_iter; + const uint32_t last_index = minimum((uint32_t)new_endpoint_cluster_block_indices.size(), cluster_index_iter + N); + + m_params.m_pJob_pool->add_job( [this, first_index, last_index, &cluster_improved, &cluster_valid, &new_endpoint_cluster_block_indices, &pBlock_selector_indices ] { + + for (uint32_t cluster_index = first_index; cluster_index < last_index; cluster_index++) + { + const basisu::vector& cluster_block_indices = new_endpoint_cluster_block_indices[cluster_index]; + + if (!cluster_block_indices.size()) + continue; + + const uint32_t total_pixels = (uint32_t)cluster_block_indices.size() * 16; + + basisu::vector cluster_pixels(total_pixels); + uint8_vec force_selectors(total_pixels); + + etc_block blk; + blk.set_block_color5_etc1s(get_endpoint_cluster_unscaled_color(cluster_index, false)); + blk.set_inten_tables_etc1s(get_endpoint_cluster_inten_table(cluster_index, false)); + blk.set_flip_bit(true); + + uint64_t cur_err = 0; + + for (uint32_t cluster_block_indices_iter = 0; cluster_block_indices_iter < cluster_block_indices.size(); cluster_block_indices_iter++) + { + const uint32_t block_index = cluster_block_indices[cluster_block_indices_iter]; + + const color_rgba *pBlock_pixels = get_source_pixel_block(block_index).get_ptr(); + + memcpy(&cluster_pixels[cluster_block_indices_iter * 16], pBlock_pixels, 16 * sizeof(color_rgba)); + + const uint32_t selector_cluster_index = pBlock_selector_indices ? (*pBlock_selector_indices)[block_index] : get_block_selector_cluster_index(block_index); + + const etc_block &blk_selectors = get_selector_cluster_selector_bits(selector_cluster_index); + + blk.set_raw_selector_bits(blk_selectors.get_raw_selector_bits()); + + cur_err += blk.evaluate_etc1_error(pBlock_pixels, m_params.m_perceptual); + + for (uint32_t y = 0; y < 4; y++) + for (uint32_t x = 0; x < 4; x++) + force_selectors[cluster_block_indices_iter * 16 + x + y * 4] = static_cast(blk_selectors.get_selector(x, y)); + } + + endpoint_cluster_etc_params new_endpoint_cluster_etc_params; + + { + etc1_optimizer optimizer; + etc1_solution_coordinates solutions[2]; + + etc1_optimizer::params cluster_optimizer_params; + cluster_optimizer_params.m_num_src_pixels = total_pixels; + cluster_optimizer_params.m_pSrc_pixels = &cluster_pixels[0]; + + cluster_optimizer_params.m_use_color4 = false; + cluster_optimizer_params.m_perceptual = m_params.m_perceptual; + cluster_optimizer_params.m_pForce_selectors = &force_selectors[0]; + + if (m_params.m_compression_level == BASISU_MAX_COMPRESSION_LEVEL) + cluster_optimizer_params.m_quality = cETCQualityUber; + else + cluster_optimizer_params.m_quality = cETCQualitySlow; + + etc1_optimizer::results cluster_optimizer_results; + + basisu::vector cluster_selectors(total_pixels); + cluster_optimizer_results.m_n = total_pixels; + cluster_optimizer_results.m_pSelectors = &cluster_selectors[0]; + + optimizer.init(cluster_optimizer_params, cluster_optimizer_results); + + if (!optimizer.compute()) + BASISU_FRONTEND_VERIFY(false); + + new_endpoint_cluster_etc_params.m_color_unscaled[0] = cluster_optimizer_results.m_block_color_unscaled; + new_endpoint_cluster_etc_params.m_inten_table[0] = cluster_optimizer_results.m_block_inten_table; + new_endpoint_cluster_etc_params.m_color_error[0] = cluster_optimizer_results.m_error; + new_endpoint_cluster_etc_params.m_color_used[0] = true; + new_endpoint_cluster_etc_params.m_valid = true; + } + + if (new_endpoint_cluster_etc_params.m_color_error[0] < cur_err) + { + m_endpoint_cluster_etc_params[cluster_index] = new_endpoint_cluster_etc_params; + + cluster_improved[cluster_index] = true; + } + + cluster_valid[cluster_index] = true; + + } // cluster_index + + } ); + + } // cluster_index_iter + + m_params.m_pJob_pool->wait_for_all(); + + uint32_t total_unused_clusters = 0; + uint32_t total_improved_clusters = 0; + + old_to_new_endpoint_cluster_indices.resize(m_endpoint_clusters.size()); + vector_set_all(old_to_new_endpoint_cluster_indices, -1); + + int total_new_endpoint_clusters = 0; + + for (uint32_t old_cluster_index = 0; old_cluster_index < m_endpoint_clusters.size(); old_cluster_index++) + { + if (!cluster_valid[old_cluster_index]) + total_unused_clusters++; + else + old_to_new_endpoint_cluster_indices[old_cluster_index] = total_new_endpoint_clusters++; + + if (cluster_improved[old_cluster_index]) + total_improved_clusters++; + } + + debug_printf("Total unused clusters: %u\n", total_unused_clusters); + debug_printf("Total improved_clusters: %u\n", total_improved_clusters); + debug_printf("Total endpoint clusters: %u\n", total_new_endpoint_clusters); + + if (optimize_final_codebook) + { + cluster_subblock_etc_params_vec new_endpoint_cluster_etc_params(total_new_endpoint_clusters); + + for (uint32_t old_cluster_index = 0; old_cluster_index < m_endpoint_clusters.size(); old_cluster_index++) + { + if (old_to_new_endpoint_cluster_indices[old_cluster_index] >= 0) + new_endpoint_cluster_etc_params[old_to_new_endpoint_cluster_indices[old_cluster_index]] = m_endpoint_cluster_etc_params[old_cluster_index]; + } + + debug_printf("basisu_frontend::reoptimize_remapped_endpoints: stage 1\n"); + + basisu::vector new_endpoint_clusters(total_new_endpoint_clusters); + + for (uint32_t block_index = 0; block_index < new_block_endpoints.size(); block_index++) + { + const uint32_t old_endpoint_cluster_index = new_block_endpoints[block_index]; + + const int new_endpoint_cluster_index = old_to_new_endpoint_cluster_indices[old_endpoint_cluster_index]; + BASISU_FRONTEND_VERIFY(new_endpoint_cluster_index >= 0); + + BASISU_FRONTEND_VERIFY(new_endpoint_cluster_index < (int)new_endpoint_clusters.size()); + + new_endpoint_clusters[new_endpoint_cluster_index].push_back(block_index * 2 + 0); + new_endpoint_clusters[new_endpoint_cluster_index].push_back(block_index * 2 + 1); + + BASISU_FRONTEND_VERIFY(new_endpoint_cluster_index < (int)new_endpoint_cluster_etc_params.size()); + + new_endpoint_cluster_etc_params[new_endpoint_cluster_index].m_subblocks.push_back(block_index * 2 + 0); + new_endpoint_cluster_etc_params[new_endpoint_cluster_index].m_subblocks.push_back(block_index * 2 + 1); + + m_block_endpoint_clusters_indices[block_index][0] = new_endpoint_cluster_index; + m_block_endpoint_clusters_indices[block_index][1] = new_endpoint_cluster_index; + } + + debug_printf("basisu_frontend::reoptimize_remapped_endpoints: stage 2\n"); + + m_endpoint_clusters = new_endpoint_clusters; + m_endpoint_cluster_etc_params = new_endpoint_cluster_etc_params; + + eliminate_redundant_or_empty_endpoint_clusters(); + + debug_printf("basisu_frontend::reoptimize_remapped_endpoints: stage 3\n"); + + for (uint32_t new_cluster_index = 0; new_cluster_index < m_endpoint_clusters.size(); new_cluster_index++) + { + for (uint32_t cluster_block_iter = 0; cluster_block_iter < m_endpoint_clusters[new_cluster_index].size(); cluster_block_iter++) + { + const uint32_t subblock_index = m_endpoint_clusters[new_cluster_index][cluster_block_iter]; + const uint32_t block_index = subblock_index >> 1; + + m_block_endpoint_clusters_indices[block_index][0] = new_cluster_index; + m_block_endpoint_clusters_indices[block_index][1] = new_cluster_index; + + const uint32_t old_cluster_index = new_block_endpoints[block_index]; + + old_to_new_endpoint_cluster_indices[old_cluster_index] = new_cluster_index; + } + } + + debug_printf("basisu_frontend::reoptimize_remapped_endpoints: stage 4\n"); + + for (uint32_t block_index = 0; block_index < m_encoded_blocks.size(); block_index++) + { + const uint32_t endpoint_cluster_index = get_subblock_endpoint_cluster_index(block_index, 0); + + m_encoded_blocks[block_index].set_block_color5_etc1s(get_endpoint_cluster_unscaled_color(endpoint_cluster_index, false)); + m_encoded_blocks[block_index].set_inten_tables_etc1s(get_endpoint_cluster_inten_table(endpoint_cluster_index, false)); + } + + debug_printf("Final (post-RDO) endpoint clusters: %u\n", m_endpoint_clusters.size()); + } + + //debug_printf("validate_output: %u\n", validate_output()); + } + + // Endpoint clusterization hierarchy integrity checker. + // Note this doesn't check for empty clusters. + bool basisu_frontend::validate_endpoint_cluster_hierarchy(bool ensure_clusters_have_same_parents) const + { + if (!m_endpoint_parent_clusters.size()) + return true; + + int_vec subblock_parent_indices(m_total_blocks * 2); + subblock_parent_indices.set_all(-1); + + int_vec subblock_cluster_indices(m_total_blocks * 2); + subblock_cluster_indices.set_all(-1); + + for (uint32_t parent_index = 0; parent_index < m_endpoint_parent_clusters.size(); parent_index++) + { + for (uint32_t i = 0; i < m_endpoint_parent_clusters[parent_index].size(); i++) + { + uint32_t subblock_index = m_endpoint_parent_clusters[parent_index][i]; + if (subblock_index >= m_total_blocks * 2) + return false; + + // If the endpoint cluster lives in more than one parent node, that's wrong. + if (subblock_parent_indices[subblock_index] != -1) + return false; + + subblock_parent_indices[subblock_index] = parent_index; + } + } + + // Make sure all endpoint clusters are present in the parent cluster. + for (uint32_t i = 0; i < subblock_parent_indices.size(); i++) + { + if (subblock_parent_indices[i] == -1) + return false; + } + + for (uint32_t cluster_index = 0; cluster_index < m_endpoint_clusters.size(); cluster_index++) + { + int parent_index = 0; + + for (uint32_t i = 0; i < m_endpoint_clusters[cluster_index].size(); i++) + { + uint32_t subblock_index = m_endpoint_clusters[cluster_index][i]; + if (subblock_index >= m_total_blocks * 2) + return false; + + if (subblock_cluster_indices[subblock_index] != -1) + return false; + + subblock_cluster_indices[subblock_index] = cluster_index; + + // There are transformations on the endpoint clusters that can break the strict tree requirement + if (ensure_clusters_have_same_parents) + { + // Make sure all the subblocks are in the same parent cluster + if (!i) + parent_index = subblock_parent_indices[subblock_index]; + else if (subblock_parent_indices[subblock_index] != parent_index) + return false; + } + } + } + + // Make sure all endpoint clusters are present in the parent cluster. + for (uint32_t i = 0; i < subblock_cluster_indices.size(); i++) + { + if (subblock_cluster_indices[i] == -1) + return false; + } + + return true; + } + + // This is very slow and only intended for debugging/development. It's enabled using the "-validate_etc1s" command line option. + bool basisu_frontend::validate_output() const + { + debug_printf("validate_output\n"); + + if (!check_etc1s_constraints()) + return false; + + for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++) + { +//#define CHECK(x) do { if (!(x)) { DebugBreak(); return false; } } while(0) +#define CHECK(x) BASISU_FRONTEND_VERIFY(x); + + CHECK(get_output_block(block_index).get_flip_bit() == true); + + const bool diff_flag = get_diff_flag(block_index); + CHECK(diff_flag == true); + + etc_block blk; + memset(&blk, 0, sizeof(blk)); + blk.set_flip_bit(true); + blk.set_diff_bit(true); + + const uint32_t endpoint_cluster0_index = get_subblock_endpoint_cluster_index(block_index, 0); + const uint32_t endpoint_cluster1_index = get_subblock_endpoint_cluster_index(block_index, 1); + + // basisu only supports ETC1S, so these must be equal. + CHECK(endpoint_cluster0_index == endpoint_cluster1_index); + + CHECK(blk.set_block_color5_check(get_endpoint_cluster_unscaled_color(endpoint_cluster0_index, false), get_endpoint_cluster_unscaled_color(endpoint_cluster1_index, false))); + + CHECK(get_endpoint_cluster_color_is_used(endpoint_cluster0_index, false)); + + blk.set_inten_table(0, get_endpoint_cluster_inten_table(endpoint_cluster0_index, false)); + blk.set_inten_table(1, get_endpoint_cluster_inten_table(endpoint_cluster1_index, false)); + + const uint32_t selector_cluster_index = get_block_selector_cluster_index(block_index); + CHECK(selector_cluster_index < get_total_selector_clusters()); + + CHECK(vector_find(get_selector_cluster_block_indices(selector_cluster_index), block_index) != -1); + + blk.set_raw_selector_bits(get_selector_cluster_selector_bits(selector_cluster_index).get_raw_selector_bits()); + + const etc_block &rdo_output_block = get_output_block(block_index); + + CHECK(rdo_output_block.get_flip_bit() == blk.get_flip_bit()); + CHECK(rdo_output_block.get_diff_bit() == blk.get_diff_bit()); + CHECK(rdo_output_block.get_inten_table(0) == blk.get_inten_table(0)); + CHECK(rdo_output_block.get_inten_table(1) == blk.get_inten_table(1)); + CHECK(rdo_output_block.get_base5_color() == blk.get_base5_color()); + CHECK(rdo_output_block.get_delta3_color() == blk.get_delta3_color()); + CHECK(rdo_output_block.get_raw_selector_bits() == blk.get_raw_selector_bits()); + +#undef CHECK + } + + return true; + } + + void basisu_frontend::dump_debug_image(const char *pFilename, uint32_t first_block, uint32_t num_blocks_x, uint32_t num_blocks_y, bool output_blocks) + { + gpu_image g; + g.init(texture_format::cETC1, num_blocks_x * 4, num_blocks_y * 4); + + for (uint32_t y = 0; y < num_blocks_y; y++) + { + for (uint32_t x = 0; x < num_blocks_x; x++) + { + const uint32_t block_index = first_block + x + y * num_blocks_x; + + etc_block &blk = *(etc_block *)g.get_block_ptr(x, y); + + if (output_blocks) + blk = get_output_block(block_index); + else + { + const bool diff_flag = get_diff_flag(block_index); + + blk.set_diff_bit(diff_flag); + blk.set_flip_bit(true); + + const uint32_t endpoint_cluster0_index = get_subblock_endpoint_cluster_index(block_index, 0); + const uint32_t endpoint_cluster1_index = get_subblock_endpoint_cluster_index(block_index, 1); + + if (diff_flag) + blk.set_block_color5(get_endpoint_cluster_unscaled_color(endpoint_cluster0_index, false), get_endpoint_cluster_unscaled_color(endpoint_cluster1_index, false)); + else + blk.set_block_color4(get_endpoint_cluster_unscaled_color(endpoint_cluster0_index, true), get_endpoint_cluster_unscaled_color(endpoint_cluster1_index, true)); + + blk.set_inten_table(0, get_endpoint_cluster_inten_table(endpoint_cluster0_index, !diff_flag)); + blk.set_inten_table(1, get_endpoint_cluster_inten_table(endpoint_cluster1_index, !diff_flag)); + + const uint32_t selector_cluster_index = get_block_selector_cluster_index(block_index); + blk.set_raw_selector_bits(get_selector_cluster_selector_bits(selector_cluster_index).get_raw_selector_bits()); + } + } + } + + image img; + g.unpack(img); + + save_png(pFilename, img); + } + +} // namespace basisu + diff --git a/thirdparty/basisu/encoder/basisu_frontend.h b/thirdparty/basisu/encoder/basisu_frontend.h new file mode 100644 index 000000000..69fc8d8ec --- /dev/null +++ b/thirdparty/basisu/encoder/basisu_frontend.h @@ -0,0 +1,353 @@ +// basisu_frontend.h +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "basisu_enc.h" +#include "basisu_etc.h" +#include "basisu_gpu_texture.h" +#include "../transcoder/basisu_file_headers.h" +#include "../transcoder/basisu_transcoder.h" + +namespace basisu +{ + struct opencl_context; + typedef opencl_context* opencl_context_ptr; + + struct vec2U + { + uint32_t m_comps[2]; + + vec2U() { } + vec2U(uint32_t a, uint32_t b) { set(a, b); } + + void set(uint32_t a, uint32_t b) { m_comps[0] = a; m_comps[1] = b; } + + uint32_t operator[] (uint32_t i) const { assert(i < 2); return m_comps[i]; } + uint32_t &operator[] (uint32_t i) { assert(i < 2); return m_comps[i]; } + }; + + const uint32_t BASISU_DEFAULT_COMPRESSION_LEVEL = 2; + const uint32_t BASISU_MAX_COMPRESSION_LEVEL = 6; + + class basisu_frontend + { + BASISU_NO_EQUALS_OR_COPY_CONSTRUCT(basisu_frontend); + + public: + + basisu_frontend() : + m_total_blocks(0), + m_total_pixels(0), + m_endpoint_refinement(false), + m_use_hierarchical_endpoint_codebooks(false), + m_use_hierarchical_selector_codebooks(false), + m_num_endpoint_codebook_iterations(0), + m_num_selector_codebook_iterations(0), + m_opencl_failed(false) + { + } + + enum + { + cMaxEndpointClusters = 16128, + + cMaxSelectorClusters = 16128, + }; + + struct params + { + params() : + m_num_source_blocks(0), + m_pSource_blocks(NULL), + m_max_endpoint_clusters(256), + m_max_selector_clusters(256), + m_compression_level(BASISU_DEFAULT_COMPRESSION_LEVEL), + m_perceptual(true), + m_debug_stats(false), + m_debug_images(false), + m_dump_endpoint_clusterization(true), + m_validate(false), + m_multithreaded(false), + m_disable_hierarchical_endpoint_codebooks(false), + m_tex_type(basist::cBASISTexType2D), + m_pOpenCL_context(nullptr), + m_pJob_pool(nullptr) + { + } + + uint32_t m_num_source_blocks; + pixel_block *m_pSource_blocks; + + uint32_t m_max_endpoint_clusters; + uint32_t m_max_selector_clusters; + + uint32_t m_compression_level; + + bool m_perceptual; + bool m_debug_stats; + bool m_debug_images; + bool m_dump_endpoint_clusterization; + bool m_validate; + bool m_multithreaded; + bool m_disable_hierarchical_endpoint_codebooks; + + basist::basis_texture_type m_tex_type; + const basist::basisu_lowlevel_etc1s_transcoder *m_pGlobal_codebooks; + + opencl_context_ptr m_pOpenCL_context; + + job_pool *m_pJob_pool; + }; + + bool init(const params &p); + + bool compress(); + + const params &get_params() const { return m_params; } + + const pixel_block &get_source_pixel_block(uint32_t i) const { return m_source_blocks[i]; } + + // RDO output blocks + uint32_t get_total_output_blocks() const { return static_cast(m_encoded_blocks.size()); } + + const etc_block &get_output_block(uint32_t block_index) const { return m_encoded_blocks[block_index]; } + const etc_block_vec &get_output_blocks() const { return m_encoded_blocks; } + + // "Best" ETC1S blocks + const etc_block &get_etc1s_block(uint32_t block_index) const { return m_etc1_blocks_etc1s[block_index]; } + + // Per-block flags + bool get_diff_flag(uint32_t block_index) const { return m_encoded_blocks[block_index].get_diff_bit(); } + + // Endpoint clusters + uint32_t get_total_endpoint_clusters() const { return static_cast(m_endpoint_clusters.size()); } + uint32_t get_subblock_endpoint_cluster_index(uint32_t block_index, uint32_t subblock_index) const { return m_block_endpoint_clusters_indices[block_index][subblock_index]; } + + const color_rgba &get_endpoint_cluster_unscaled_color(uint32_t cluster_index, bool individual_mode) const { return m_endpoint_cluster_etc_params[cluster_index].m_color_unscaled[individual_mode]; } + uint32_t get_endpoint_cluster_inten_table(uint32_t cluster_index, bool individual_mode) const { return m_endpoint_cluster_etc_params[cluster_index].m_inten_table[individual_mode]; } + + bool get_endpoint_cluster_color_is_used(uint32_t cluster_index, bool individual_mode) const { return m_endpoint_cluster_etc_params[cluster_index].m_color_used[individual_mode]; } + + // Selector clusters + uint32_t get_total_selector_clusters() const { return static_cast(m_selector_cluster_block_indices.size()); } + uint32_t get_block_selector_cluster_index(uint32_t block_index) const { return m_block_selector_cluster_index[block_index]; } + const etc_block &get_selector_cluster_selector_bits(uint32_t cluster_index) const { return m_optimized_cluster_selectors[cluster_index]; } + + // Returns block indices using each selector cluster + const uint_vec &get_selector_cluster_block_indices(uint32_t selector_cluster_index) const { return m_selector_cluster_block_indices[selector_cluster_index]; } + + void dump_debug_image(const char *pFilename, uint32_t first_block, uint32_t num_blocks_x, uint32_t num_blocks_y, bool output_blocks); + + void reoptimize_remapped_endpoints(const uint_vec &new_block_endpoints, int_vec &old_to_new_endpoint_cluster_indices, bool optimize_final_codebook, uint_vec *pBlock_selector_indices = nullptr); + + bool get_opencl_failed() const { return m_opencl_failed; } + + private: + params m_params; + uint32_t m_total_blocks; + uint32_t m_total_pixels; + + bool m_endpoint_refinement; + bool m_use_hierarchical_endpoint_codebooks; + bool m_use_hierarchical_selector_codebooks; + + uint32_t m_num_endpoint_codebook_iterations; + uint32_t m_num_selector_codebook_iterations; + + // Source pixels for each blocks + pixel_block_vec m_source_blocks; + + // The quantized ETC1S texture. + etc_block_vec m_encoded_blocks; + + // Quantized blocks after endpoint quant, but before selector quant + etc_block_vec m_orig_encoded_blocks; + + // Full quality ETC1S texture + etc_block_vec m_etc1_blocks_etc1s; + + typedef vec<6, float> vec6F; + + // Endpoint clusterizer + typedef tree_vector_quant vec6F_quantizer; + vec6F_quantizer m_endpoint_clusterizer; + + // For each endpoint cluster: An array of which subblock indices (block_index*2+subblock) are located in that cluster. + basisu::vector m_endpoint_clusters; + + // Array of subblock indices for each parent endpoint cluster + // Note: Initially, each endpoint cluster will only live in a single parent cluster, in a shallow tree. + // As the endpoint clusters are manipulated this constraint gets broken. + basisu::vector m_endpoint_parent_clusters; + + // Each block's parent endpoint cluster index + uint8_vec m_block_parent_endpoint_cluster; + + // Array of endpoint cluster indices for each parent endpoint cluster + basisu::vector m_endpoint_clusters_within_each_parent_cluster; + + struct endpoint_cluster_etc_params + { + endpoint_cluster_etc_params() + { + clear(); + } + + void clear() + { + clear_obj(m_color_unscaled); + clear_obj(m_inten_table); + clear_obj(m_color_error); + m_subblocks.clear(); + + clear_obj(m_color_used); + m_valid = false; + } + + // TODO: basisu doesn't use individual mode. + color_rgba m_color_unscaled[2]; // [use_individual_mode] + uint32_t m_inten_table[2]; + + uint64_t m_color_error[2]; + + uint_vec m_subblocks; + + bool m_color_used[2]; + + bool m_valid; + + bool operator== (const endpoint_cluster_etc_params &other) const + { + for (uint32_t i = 0; i < 2; i++) + { + if (m_color_unscaled[i] != other.m_color_unscaled[i]) + return false; + } + + if (m_inten_table[0] != other.m_inten_table[0]) + return false; + if (m_inten_table[1] != other.m_inten_table[1]) + return false; + + return true; + } + + bool operator< (const endpoint_cluster_etc_params &other) const + { + for (uint32_t i = 0; i < 2; i++) + { + if (m_color_unscaled[i] < other.m_color_unscaled[i]) + return true; + else if (m_color_unscaled[i] != other.m_color_unscaled[i]) + return false; + } + + if (m_inten_table[0] < other.m_inten_table[0]) + return true; + else if (m_inten_table[0] == other.m_inten_table[0]) + { + if (m_inten_table[1] < other.m_inten_table[1]) + return true; + } + + return false; + } + }; + + typedef basisu::vector cluster_subblock_etc_params_vec; + + // Each endpoint cluster's ETC1S parameters + cluster_subblock_etc_params_vec m_endpoint_cluster_etc_params; + + // The endpoint cluster index used by each ETC1 subblock. + basisu::vector m_block_endpoint_clusters_indices; + + // The block(s) within each selector cluster + // Note: If you add anything here that uses selector cluster indicies, be sure to update optimize_selector_codebook()! + basisu::vector m_selector_cluster_block_indices; + + // The selector bits for each selector cluster. + basisu::vector m_optimized_cluster_selectors; + + // The block(s) within each parent selector cluster. + basisu::vector m_selector_parent_cluster_block_indices; + + // Each block's parent selector cluster + uint8_vec m_block_parent_selector_cluster; + + // Array of selector cluster indices for each parent selector cluster + basisu::vector m_selector_clusters_within_each_parent_cluster; + + // Each block's selector cluster index + basisu::vector m_block_selector_cluster_index; + + struct subblock_endpoint_quant_err + { + uint64_t m_total_err; + uint32_t m_cluster_index; + uint32_t m_cluster_subblock_index; + uint32_t m_block_index; + uint32_t m_subblock_index; + + bool operator< (const subblock_endpoint_quant_err &rhs) const + { + if (m_total_err < rhs.m_total_err) + return true; + else if (m_total_err == rhs.m_total_err) + { + if (m_block_index < rhs.m_block_index) + return true; + else if (m_block_index == rhs.m_block_index) + return m_subblock_index < rhs.m_subblock_index; + } + return false; + } + }; + + // The sorted subblock endpoint quant error for each endpoint cluster + basisu::vector m_subblock_endpoint_quant_err_vec; + + std::mutex m_lock; + + bool m_opencl_failed; + + //----------------------------------------------------------------------------- + + void init_etc1_images(); + bool init_global_codebooks(); + void init_endpoint_training_vectors(); + void dump_endpoint_clusterization_visualization(const char *pFilename, bool vis_endpoint_colors); + void generate_endpoint_clusters(); + void compute_endpoint_subblock_error_vec(); + void introduce_new_endpoint_clusters(); + void generate_endpoint_codebook(uint32_t step); + uint32_t refine_endpoint_clusterization(); + void eliminate_redundant_or_empty_endpoint_clusters(); + void generate_block_endpoint_clusters(); + void compute_endpoint_clusters_within_each_parent_cluster(); + void compute_selector_clusters_within_each_parent_cluster(); + void create_initial_packed_texture(); + void generate_selector_clusters(); + void create_optimized_selector_codebook(uint32_t iter); + void find_optimal_selector_clusters_for_each_block(); + uint32_t refine_block_endpoints_given_selectors(); + void finalize(); + bool validate_endpoint_cluster_hierarchy(bool ensure_clusters_have_same_parents) const; + bool validate_output() const; + void introduce_special_selector_clusters(); + void optimize_selector_codebook(); + bool check_etc1s_constraints() const; + }; + +} // namespace basisu diff --git a/thirdparty/basisu/encoder/basisu_gpu_texture.cpp b/thirdparty/basisu/encoder/basisu_gpu_texture.cpp new file mode 100644 index 000000000..3fa65b43c --- /dev/null +++ b/thirdparty/basisu/encoder/basisu_gpu_texture.cpp @@ -0,0 +1,2583 @@ +// basisu_gpu_texture.cpp +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "basisu_gpu_texture.h" +#include "basisu_enc.h" +#include "basisu_pvrtc1_4.h" +#include "3rdparty/android_astc_decomp.h" +#include "basisu_bc7enc.h" +#include "../transcoder/basisu_astc_hdr_core.h" + +#define TINYDDS_IMPLEMENTATION +#include "3rdparty/tinydds.h" + +#define BASISU_USE_GOOGLE_ASTC_DECODER (1) + +namespace basisu +{ + //------------------------------------------------------------------------------------------------ + // ETC2 EAC + + void unpack_etc2_eac(const void *pBlock_bits, color_rgba *pPixels) + { + static_assert(sizeof(eac_a8_block) == 8, "sizeof(eac_a8_block) == 8"); + + const eac_a8_block *pBlock = static_cast(pBlock_bits); + + const int8_t *pTable = g_etc2_eac_tables[pBlock->m_table]; + + const uint64_t selector_bits = pBlock->get_selector_bits(); + + const int32_t base = pBlock->m_base; + const int32_t mul = pBlock->m_multiplier; + + pPixels[0].a = clamp255(base + pTable[pBlock->get_selector(0, 0, selector_bits)] * mul); + pPixels[1].a = clamp255(base + pTable[pBlock->get_selector(1, 0, selector_bits)] * mul); + pPixels[2].a = clamp255(base + pTable[pBlock->get_selector(2, 0, selector_bits)] * mul); + pPixels[3].a = clamp255(base + pTable[pBlock->get_selector(3, 0, selector_bits)] * mul); + + pPixels[4].a = clamp255(base + pTable[pBlock->get_selector(0, 1, selector_bits)] * mul); + pPixels[5].a = clamp255(base + pTable[pBlock->get_selector(1, 1, selector_bits)] * mul); + pPixels[6].a = clamp255(base + pTable[pBlock->get_selector(2, 1, selector_bits)] * mul); + pPixels[7].a = clamp255(base + pTable[pBlock->get_selector(3, 1, selector_bits)] * mul); + + pPixels[8].a = clamp255(base + pTable[pBlock->get_selector(0, 2, selector_bits)] * mul); + pPixels[9].a = clamp255(base + pTable[pBlock->get_selector(1, 2, selector_bits)] * mul); + pPixels[10].a = clamp255(base + pTable[pBlock->get_selector(2, 2, selector_bits)] * mul); + pPixels[11].a = clamp255(base + pTable[pBlock->get_selector(3, 2, selector_bits)] * mul); + + pPixels[12].a = clamp255(base + pTable[pBlock->get_selector(0, 3, selector_bits)] * mul); + pPixels[13].a = clamp255(base + pTable[pBlock->get_selector(1, 3, selector_bits)] * mul); + pPixels[14].a = clamp255(base + pTable[pBlock->get_selector(2, 3, selector_bits)] * mul); + pPixels[15].a = clamp255(base + pTable[pBlock->get_selector(3, 3, selector_bits)] * mul); + } + + //------------------------------------------------------------------------------------------------ + // BC1 + struct bc1_block + { + enum { cTotalEndpointBytes = 2, cTotalSelectorBytes = 4 }; + + uint8_t m_low_color[cTotalEndpointBytes]; + uint8_t m_high_color[cTotalEndpointBytes]; + uint8_t m_selectors[cTotalSelectorBytes]; + + inline uint32_t get_high_color() const { return m_high_color[0] | (m_high_color[1] << 8U); } + inline uint32_t get_low_color() const { return m_low_color[0] | (m_low_color[1] << 8U); } + + static void unpack_color(uint32_t c, uint32_t &r, uint32_t &g, uint32_t &b) + { + r = (c >> 11) & 31; + g = (c >> 5) & 63; + b = c & 31; + + r = (r << 3) | (r >> 2); + g = (g << 2) | (g >> 4); + b = (b << 3) | (b >> 2); + } + + inline uint32_t get_selector(uint32_t x, uint32_t y) const { assert((x < 4U) && (y < 4U)); return (m_selectors[y] >> (x * 2)) & 3; } + }; + + // Returns true if the block uses 3 color punchthrough alpha mode. + bool unpack_bc1(const void *pBlock_bits, color_rgba *pPixels, bool set_alpha) + { + static_assert(sizeof(bc1_block) == 8, "sizeof(bc1_block) == 8"); + + const bc1_block *pBlock = static_cast(pBlock_bits); + + const uint32_t l = pBlock->get_low_color(); + const uint32_t h = pBlock->get_high_color(); + + color_rgba c[4]; + + uint32_t r0, g0, b0, r1, g1, b1; + bc1_block::unpack_color(l, r0, g0, b0); + bc1_block::unpack_color(h, r1, g1, b1); + + c[0].set_noclamp_rgba(r0, g0, b0, 255); + c[1].set_noclamp_rgba(r1, g1, b1, 255); + + bool used_punchthrough = false; + + if (l > h) + { + c[2].set_noclamp_rgba((r0 * 2 + r1) / 3, (g0 * 2 + g1) / 3, (b0 * 2 + b1) / 3, 255); + c[3].set_noclamp_rgba((r1 * 2 + r0) / 3, (g1 * 2 + g0) / 3, (b1 * 2 + b0) / 3, 255); + } + else + { + c[2].set_noclamp_rgba((r0 + r1) / 2, (g0 + g1) / 2, (b0 + b1) / 2, 255); + c[3].set_noclamp_rgba(0, 0, 0, 0); + used_punchthrough = true; + } + + if (set_alpha) + { + for (uint32_t y = 0; y < 4; y++, pPixels += 4) + { + pPixels[0] = c[pBlock->get_selector(0, y)]; + pPixels[1] = c[pBlock->get_selector(1, y)]; + pPixels[2] = c[pBlock->get_selector(2, y)]; + pPixels[3] = c[pBlock->get_selector(3, y)]; + } + } + else + { + for (uint32_t y = 0; y < 4; y++, pPixels += 4) + { + pPixels[0].set_rgb(c[pBlock->get_selector(0, y)]); + pPixels[1].set_rgb(c[pBlock->get_selector(1, y)]); + pPixels[2].set_rgb(c[pBlock->get_selector(2, y)]); + pPixels[3].set_rgb(c[pBlock->get_selector(3, y)]); + } + } + + return used_punchthrough; + } + + bool unpack_bc1_nv(const void *pBlock_bits, color_rgba *pPixels, bool set_alpha) + { + static_assert(sizeof(bc1_block) == 8, "sizeof(bc1_block) == 8"); + + const bc1_block *pBlock = static_cast(pBlock_bits); + + const uint32_t l = pBlock->get_low_color(); + const uint32_t h = pBlock->get_high_color(); + + color_rgba c[4]; + + int r0 = (l >> 11) & 31; + int g0 = (l >> 5) & 63; + int b0 = l & 31; + int r1 = (h >> 11) & 31; + int g1 = (h >> 5) & 63; + int b1 = h & 31; + + c[0].b = (uint8_t)((3 * b0 * 22) / 8); + c[0].g = (uint8_t)((g0 << 2) | (g0 >> 4)); + c[0].r = (uint8_t)((3 * r0 * 22) / 8); + c[0].a = 0xFF; + + c[1].r = (uint8_t)((3 * r1 * 22) / 8); + c[1].g = (uint8_t)((g1 << 2) | (g1 >> 4)); + c[1].b = (uint8_t)((3 * b1 * 22) / 8); + c[1].a = 0xFF; + + int gdiff = c[1].g - c[0].g; + + bool used_punchthrough = false; + + if (l > h) + { + c[2].r = (uint8_t)(((2 * r0 + r1) * 22) / 8); + c[2].g = (uint8_t)(((256 * c[0].g + gdiff/4 + 128 + gdiff * 80) / 256)); + c[2].b = (uint8_t)(((2 * b0 + b1) * 22) / 8); + c[2].a = 0xFF; + + c[3].r = (uint8_t)(((2 * r1 + r0) * 22) / 8); + c[3].g = (uint8_t)((256 * c[1].g - gdiff/4 + 128 - gdiff * 80) / 256); + c[3].b = (uint8_t)(((2 * b1 + b0) * 22) / 8); + c[3].a = 0xFF; + } + else + { + c[2].r = (uint8_t)(((r0 + r1) * 33) / 8); + c[2].g = (uint8_t)((256 * c[0].g + gdiff/4 + 128 + gdiff * 128) / 256); + c[2].b = (uint8_t)(((b0 + b1) * 33) / 8); + c[2].a = 0xFF; + + c[3].set_noclamp_rgba(0, 0, 0, 0); + used_punchthrough = true; + } + + if (set_alpha) + { + for (uint32_t y = 0; y < 4; y++, pPixels += 4) + { + pPixels[0] = c[pBlock->get_selector(0, y)]; + pPixels[1] = c[pBlock->get_selector(1, y)]; + pPixels[2] = c[pBlock->get_selector(2, y)]; + pPixels[3] = c[pBlock->get_selector(3, y)]; + } + } + else + { + for (uint32_t y = 0; y < 4; y++, pPixels += 4) + { + pPixels[0].set_rgb(c[pBlock->get_selector(0, y)]); + pPixels[1].set_rgb(c[pBlock->get_selector(1, y)]); + pPixels[2].set_rgb(c[pBlock->get_selector(2, y)]); + pPixels[3].set_rgb(c[pBlock->get_selector(3, y)]); + } + } + + return used_punchthrough; + } + + static inline int interp_5_6_amd(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 * 43 + c1 * 21 + 32) >> 6; } + static inline int interp_half_5_6_amd(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 + c1 + 1) >> 1; } + + bool unpack_bc1_amd(const void *pBlock_bits, color_rgba *pPixels, bool set_alpha) + { + const bc1_block *pBlock = static_cast(pBlock_bits); + + const uint32_t l = pBlock->get_low_color(); + const uint32_t h = pBlock->get_high_color(); + + color_rgba c[4]; + + uint32_t r0, g0, b0, r1, g1, b1; + bc1_block::unpack_color(l, r0, g0, b0); + bc1_block::unpack_color(h, r1, g1, b1); + + c[0].set_noclamp_rgba(r0, g0, b0, 255); + c[1].set_noclamp_rgba(r1, g1, b1, 255); + + bool used_punchthrough = false; + + if (l > h) + { + c[2].set_noclamp_rgba(interp_5_6_amd(r0, r1), interp_5_6_amd(g0, g1), interp_5_6_amd(b0, b1), 255); + c[3].set_noclamp_rgba(interp_5_6_amd(r1, r0), interp_5_6_amd(g1, g0), interp_5_6_amd(b1, b0), 255); + } + else + { + c[2].set_noclamp_rgba(interp_half_5_6_amd(r0, r1), interp_half_5_6_amd(g0, g1), interp_half_5_6_amd(b0, b1), 255); + c[3].set_noclamp_rgba(0, 0, 0, 0); + used_punchthrough = true; + } + + if (set_alpha) + { + for (uint32_t y = 0; y < 4; y++, pPixels += 4) + { + pPixels[0] = c[pBlock->get_selector(0, y)]; + pPixels[1] = c[pBlock->get_selector(1, y)]; + pPixels[2] = c[pBlock->get_selector(2, y)]; + pPixels[3] = c[pBlock->get_selector(3, y)]; + } + } + else + { + for (uint32_t y = 0; y < 4; y++, pPixels += 4) + { + pPixels[0].set_rgb(c[pBlock->get_selector(0, y)]); + pPixels[1].set_rgb(c[pBlock->get_selector(1, y)]); + pPixels[2].set_rgb(c[pBlock->get_selector(2, y)]); + pPixels[3].set_rgb(c[pBlock->get_selector(3, y)]); + } + } + + return used_punchthrough; + } + + //------------------------------------------------------------------------------------------------ + // BC3-5 + + struct bc4_block + { + enum { cBC4SelectorBits = 3, cTotalSelectorBytes = 6, cMaxSelectorValues = 8 }; + uint8_t m_endpoints[2]; + + uint8_t m_selectors[cTotalSelectorBytes]; + + inline uint32_t get_low_alpha() const { return m_endpoints[0]; } + inline uint32_t get_high_alpha() const { return m_endpoints[1]; } + inline bool is_alpha6_block() const { return get_low_alpha() <= get_high_alpha(); } + + inline uint64_t get_selector_bits() const + { + return ((uint64_t)((uint32_t)m_selectors[0] | ((uint32_t)m_selectors[1] << 8U) | ((uint32_t)m_selectors[2] << 16U) | ((uint32_t)m_selectors[3] << 24U))) | + (((uint64_t)m_selectors[4]) << 32U) | + (((uint64_t)m_selectors[5]) << 40U); + } + + inline uint32_t get_selector(uint32_t x, uint32_t y, uint64_t selector_bits) const + { + assert((x < 4U) && (y < 4U)); + return (selector_bits >> (((y * 4) + x) * cBC4SelectorBits)) & (cMaxSelectorValues - 1); + } + + static inline uint32_t get_block_values6(uint8_t *pDst, uint32_t l, uint32_t h) + { + pDst[0] = static_cast(l); + pDst[1] = static_cast(h); + pDst[2] = static_cast((l * 4 + h) / 5); + pDst[3] = static_cast((l * 3 + h * 2) / 5); + pDst[4] = static_cast((l * 2 + h * 3) / 5); + pDst[5] = static_cast((l + h * 4) / 5); + pDst[6] = 0; + pDst[7] = 255; + return 6; + } + + static inline uint32_t get_block_values8(uint8_t *pDst, uint32_t l, uint32_t h) + { + pDst[0] = static_cast(l); + pDst[1] = static_cast(h); + pDst[2] = static_cast((l * 6 + h) / 7); + pDst[3] = static_cast((l * 5 + h * 2) / 7); + pDst[4] = static_cast((l * 4 + h * 3) / 7); + pDst[5] = static_cast((l * 3 + h * 4) / 7); + pDst[6] = static_cast((l * 2 + h * 5) / 7); + pDst[7] = static_cast((l + h * 6) / 7); + return 8; + } + + static inline uint32_t get_block_values(uint8_t *pDst, uint32_t l, uint32_t h) + { + if (l > h) + return get_block_values8(pDst, l, h); + else + return get_block_values6(pDst, l, h); + } + }; + + void unpack_bc4(const void *pBlock_bits, uint8_t *pPixels, uint32_t stride) + { + static_assert(sizeof(bc4_block) == 8, "sizeof(bc4_block) == 8"); + + const bc4_block *pBlock = static_cast(pBlock_bits); + + uint8_t sel_values[8]; + bc4_block::get_block_values(sel_values, pBlock->get_low_alpha(), pBlock->get_high_alpha()); + + const uint64_t selector_bits = pBlock->get_selector_bits(); + + for (uint32_t y = 0; y < 4; y++, pPixels += (stride * 4U)) + { + pPixels[0] = sel_values[pBlock->get_selector(0, y, selector_bits)]; + pPixels[stride * 1] = sel_values[pBlock->get_selector(1, y, selector_bits)]; + pPixels[stride * 2] = sel_values[pBlock->get_selector(2, y, selector_bits)]; + pPixels[stride * 3] = sel_values[pBlock->get_selector(3, y, selector_bits)]; + } + } + + // Returns false if the block uses 3-color punchthrough alpha mode, which isn't supported on some GPU's for BC3. + bool unpack_bc3(const void *pBlock_bits, color_rgba *pPixels) + { + bool success = true; + + if (unpack_bc1((const uint8_t *)pBlock_bits + sizeof(bc4_block), pPixels, true)) + success = false; + + unpack_bc4(pBlock_bits, &pPixels[0].a, sizeof(color_rgba)); + + return success; + } + + // writes RG + void unpack_bc5(const void *pBlock_bits, color_rgba *pPixels) + { + unpack_bc4(pBlock_bits, &pPixels[0].r, sizeof(color_rgba)); + unpack_bc4((const uint8_t *)pBlock_bits + sizeof(bc4_block), &pPixels[0].g, sizeof(color_rgba)); + } + + //------------------------------------------------------------------------------------------------ + // ATC isn't officially documented, so I'm assuming these references: + // http://www.guildsoftware.com/papers/2012.Converting.DXTC.to.ATC.pdf + // https://github.com/Triang3l/S3TConv/blob/master/s3tconv_atitc.c + // The paper incorrectly says the ATC lerp factors are 1/3 and 2/3, but they are actually 3/8 and 5/8. + void unpack_atc(const void* pBlock_bits, color_rgba* pPixels) + { + const uint8_t* pBytes = static_cast(pBlock_bits); + + const uint16_t color0 = pBytes[0] | (pBytes[1] << 8U); + const uint16_t color1 = pBytes[2] | (pBytes[3] << 8U); + uint32_t sels = pBytes[4] | (pBytes[5] << 8U) | (pBytes[6] << 16U) | (pBytes[7] << 24U); + + const bool mode = (color0 & 0x8000) != 0; + + color_rgba c[4]; + + c[0].set((color0 >> 10) & 31, (color0 >> 5) & 31, color0 & 31, 255); + c[0].r = (c[0].r << 3) | (c[0].r >> 2); + c[0].g = (c[0].g << 3) | (c[0].g >> 2); + c[0].b = (c[0].b << 3) | (c[0].b >> 2); + + c[3].set((color1 >> 11) & 31, (color1 >> 5) & 63, color1 & 31, 255); + c[3].r = (c[3].r << 3) | (c[3].r >> 2); + c[3].g = (c[3].g << 2) | (c[3].g >> 4); + c[3].b = (c[3].b << 3) | (c[3].b >> 2); + + if (mode) + { + c[1].set(basisu::maximum(0, c[0].r - (c[3].r >> 2)), basisu::maximum(0, c[0].g - (c[3].g >> 2)), basisu::maximum(0, c[0].b - (c[3].b >> 2)), 255); + c[2] = c[0]; + c[0].set(0, 0, 0, 255); + } + else + { + c[1].r = (c[0].r * 5 + c[3].r * 3) >> 3; + c[1].g = (c[0].g * 5 + c[3].g * 3) >> 3; + c[1].b = (c[0].b * 5 + c[3].b * 3) >> 3; + + c[2].r = (c[0].r * 3 + c[3].r * 5) >> 3; + c[2].g = (c[0].g * 3 + c[3].g * 5) >> 3; + c[2].b = (c[0].b * 3 + c[3].b * 5) >> 3; + } + + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t s = sels & 3; + + pPixels[i] = c[s]; + + sels >>= 2; + } + } + + //------------------------------------------------------------------------------------------------ + // BC7 mode 0-7 decompression. + // Instead of one monster routine to unpack all the BC7 modes, we're lumping the 3 subset, 2 subset, 1 subset, and dual plane modes together into simple shared routines. + + static inline uint32_t bc7_dequant(uint32_t val, uint32_t pbit, uint32_t val_bits) { assert(val < (1U << val_bits)); assert(pbit < 2); assert(val_bits >= 4 && val_bits <= 8); const uint32_t total_bits = val_bits + 1; val = (val << 1) | pbit; val <<= (8 - total_bits); val |= (val >> total_bits); assert(val <= 255); return val; } + static inline uint32_t bc7_dequant(uint32_t val, uint32_t val_bits) { assert(val < (1U << val_bits)); assert(val_bits >= 4 && val_bits <= 8); val <<= (8 - val_bits); val |= (val >> val_bits); assert(val <= 255); return val; } + + static inline uint32_t bc7_interp2(uint32_t l, uint32_t h, uint32_t w) { assert(w < 4); return (l * (64 - basist::g_bc7_weights2[w]) + h * basist::g_bc7_weights2[w] + 32) >> 6; } + static inline uint32_t bc7_interp3(uint32_t l, uint32_t h, uint32_t w) { assert(w < 8); return (l * (64 - basist::g_bc7_weights3[w]) + h * basist::g_bc7_weights3[w] + 32) >> 6; } + static inline uint32_t bc7_interp4(uint32_t l, uint32_t h, uint32_t w) { assert(w < 16); return (l * (64 - basist::g_bc7_weights4[w]) + h * basist::g_bc7_weights4[w] + 32) >> 6; } + static inline uint32_t bc7_interp(uint32_t l, uint32_t h, uint32_t w, uint32_t bits) + { + assert(l <= 255 && h <= 255); + switch (bits) + { + case 2: return bc7_interp2(l, h, w); + case 3: return bc7_interp3(l, h, w); + case 4: return bc7_interp4(l, h, w); + default: + break; + } + return 0; + } + + bool unpack_bc7_mode0_2(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels) + { + //const uint32_t SUBSETS = 3; + const uint32_t ENDPOINTS = 6; + const uint32_t COMPS = 3; + const uint32_t WEIGHT_BITS = (mode == 0) ? 3 : 2; + const uint32_t ENDPOINT_BITS = (mode == 0) ? 4 : 5; + const uint32_t PBITS = (mode == 0) ? 6 : 0; + const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS; + + uint32_t bit_offset = 0; + const uint8_t* pBuf = static_cast(pBlock_bits); + + if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false; + + const uint32_t part = read_bits32(pBuf, bit_offset, (mode == 0) ? 4 : 6); + + color_rgba endpoints[ENDPOINTS]; + for (uint32_t c = 0; c < COMPS; c++) + for (uint32_t e = 0; e < ENDPOINTS; e++) + endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, ENDPOINT_BITS); + + uint32_t pbits[6]; + for (uint32_t p = 0; p < PBITS; p++) + pbits[p] = read_bits32(pBuf, bit_offset, 1); + + uint32_t weights[16]; + for (uint32_t i = 0; i < 16; i++) + weights[i] = read_bits32(pBuf, bit_offset, ((!i) || (i == basist::g_bc7_table_anchor_index_third_subset_1[part]) || (i == basist::g_bc7_table_anchor_index_third_subset_2[part])) ? (WEIGHT_BITS - 1) : WEIGHT_BITS); + + assert(bit_offset == 128); + + for (uint32_t e = 0; e < ENDPOINTS; e++) + for (uint32_t c = 0; c < 4; c++) + endpoints[e][c] = (uint8_t)((c == 3) ? 255 : (PBITS ? bc7_dequant(endpoints[e][c], pbits[e], ENDPOINT_BITS) : bc7_dequant(endpoints[e][c], ENDPOINT_BITS))); + + color_rgba block_colors[3][8]; + for (uint32_t s = 0; s < 3; s++) + for (uint32_t i = 0; i < WEIGHT_VALS; i++) + { + for (uint32_t c = 0; c < 3; c++) + block_colors[s][i][c] = (uint8_t)bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS); + block_colors[s][i][3] = 255; + } + + for (uint32_t i = 0; i < 16; i++) + pPixels[i] = block_colors[basist::g_bc7_partition3[part * 16 + i]][weights[i]]; + + return true; + } + + bool unpack_bc7_mode1_3_7(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels) + { + //const uint32_t SUBSETS = 2; + const uint32_t ENDPOINTS = 4; + const uint32_t COMPS = (mode == 7) ? 4 : 3; + const uint32_t WEIGHT_BITS = (mode == 1) ? 3 : 2; + const uint32_t ENDPOINT_BITS = (mode == 7) ? 5 : ((mode == 1) ? 6 : 7); + const uint32_t PBITS = (mode == 1) ? 2 : 4; + const uint32_t SHARED_PBITS = (mode == 1) ? true : false; + const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS; + + uint32_t bit_offset = 0; + const uint8_t* pBuf = static_cast(pBlock_bits); + + if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false; + + const uint32_t part = read_bits32(pBuf, bit_offset, 6); + + color_rgba endpoints[ENDPOINTS]; + for (uint32_t c = 0; c < COMPS; c++) + for (uint32_t e = 0; e < ENDPOINTS; e++) + endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, ENDPOINT_BITS); + + uint32_t pbits[4]; + for (uint32_t p = 0; p < PBITS; p++) + pbits[p] = read_bits32(pBuf, bit_offset, 1); + + uint32_t weights[16]; + for (uint32_t i = 0; i < 16; i++) + weights[i] = read_bits32(pBuf, bit_offset, ((!i) || (i == basist::g_bc7_table_anchor_index_second_subset[part])) ? (WEIGHT_BITS - 1) : WEIGHT_BITS); + + assert(bit_offset == 128); + + for (uint32_t e = 0; e < ENDPOINTS; e++) + for (uint32_t c = 0; c < 4; c++) + endpoints[e][c] = (uint8_t)((c == ((mode == 7U) ? 4U : 3U)) ? 255 : bc7_dequant(endpoints[e][c], pbits[SHARED_PBITS ? (e >> 1) : e], ENDPOINT_BITS)); + + color_rgba block_colors[2][8]; + for (uint32_t s = 0; s < 2; s++) + for (uint32_t i = 0; i < WEIGHT_VALS; i++) + { + for (uint32_t c = 0; c < COMPS; c++) + block_colors[s][i][c] = (uint8_t)bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS); + block_colors[s][i][3] = (COMPS == 3) ? 255 : block_colors[s][i][3]; + } + + for (uint32_t i = 0; i < 16; i++) + pPixels[i] = block_colors[basist::g_bc7_partition2[part * 16 + i]][weights[i]]; + + return true; + } + + bool unpack_bc7_mode4_5(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels) + { + const uint32_t ENDPOINTS = 2; + const uint32_t COMPS = 4; + const uint32_t WEIGHT_BITS = 2; + const uint32_t A_WEIGHT_BITS = (mode == 4) ? 3 : 2; + const uint32_t ENDPOINT_BITS = (mode == 4) ? 5 : 7; + const uint32_t A_ENDPOINT_BITS = (mode == 4) ? 6 : 8; + //const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS; + //const uint32_t A_WEIGHT_VALS = 1 << A_WEIGHT_BITS; + + uint32_t bit_offset = 0; + const uint8_t* pBuf = static_cast(pBlock_bits); + + if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false; + + const uint32_t comp_rot = read_bits32(pBuf, bit_offset, 2); + const uint32_t index_mode = (mode == 4) ? read_bits32(pBuf, bit_offset, 1) : 0; + + color_rgba endpoints[ENDPOINTS]; + for (uint32_t c = 0; c < COMPS; c++) + for (uint32_t e = 0; e < ENDPOINTS; e++) + endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, (c == 3) ? A_ENDPOINT_BITS : ENDPOINT_BITS); + + const uint32_t weight_bits[2] = { index_mode ? A_WEIGHT_BITS : WEIGHT_BITS, index_mode ? WEIGHT_BITS : A_WEIGHT_BITS }; + + uint32_t weights[16], a_weights[16]; + + for (uint32_t i = 0; i < 16; i++) + (index_mode ? a_weights : weights)[i] = read_bits32(pBuf, bit_offset, weight_bits[index_mode] - ((!i) ? 1 : 0)); + + for (uint32_t i = 0; i < 16; i++) + (index_mode ? weights : a_weights)[i] = read_bits32(pBuf, bit_offset, weight_bits[1 - index_mode] - ((!i) ? 1 : 0)); + + assert(bit_offset == 128); + + for (uint32_t e = 0; e < ENDPOINTS; e++) + for (uint32_t c = 0; c < 4; c++) + endpoints[e][c] = (uint8_t)bc7_dequant(endpoints[e][c], (c == 3) ? A_ENDPOINT_BITS : ENDPOINT_BITS); + + color_rgba block_colors[8]; + for (uint32_t i = 0; i < (1U << weight_bits[0]); i++) + for (uint32_t c = 0; c < 3; c++) + block_colors[i][c] = (uint8_t)bc7_interp(endpoints[0][c], endpoints[1][c], i, weight_bits[0]); + + for (uint32_t i = 0; i < (1U << weight_bits[1]); i++) + block_colors[i][3] = (uint8_t)bc7_interp(endpoints[0][3], endpoints[1][3], i, weight_bits[1]); + + for (uint32_t i = 0; i < 16; i++) + { + pPixels[i] = block_colors[weights[i]]; + pPixels[i].a = block_colors[a_weights[i]].a; + if (comp_rot >= 1) + std::swap(pPixels[i].a, pPixels[i].m_comps[comp_rot - 1]); + } + + return true; + } + + struct bc7_mode_6 + { + struct + { + uint64_t m_mode : 7; + uint64_t m_r0 : 7; + uint64_t m_r1 : 7; + uint64_t m_g0 : 7; + uint64_t m_g1 : 7; + uint64_t m_b0 : 7; + uint64_t m_b1 : 7; + uint64_t m_a0 : 7; + uint64_t m_a1 : 7; + uint64_t m_p0 : 1; + } m_lo; + + union + { + struct + { + uint64_t m_p1 : 1; + uint64_t m_s00 : 3; + uint64_t m_s10 : 4; + uint64_t m_s20 : 4; + uint64_t m_s30 : 4; + + uint64_t m_s01 : 4; + uint64_t m_s11 : 4; + uint64_t m_s21 : 4; + uint64_t m_s31 : 4; + + uint64_t m_s02 : 4; + uint64_t m_s12 : 4; + uint64_t m_s22 : 4; + uint64_t m_s32 : 4; + + uint64_t m_s03 : 4; + uint64_t m_s13 : 4; + uint64_t m_s23 : 4; + uint64_t m_s33 : 4; + + } m_hi; + + uint64_t m_hi_bits; + }; + }; + + bool unpack_bc7_mode6(const void *pBlock_bits, color_rgba *pPixels) + { + static_assert(sizeof(bc7_mode_6) == 16, "sizeof(bc7_mode_6) == 16"); + + const bc7_mode_6 &block = *static_cast(pBlock_bits); + + if (block.m_lo.m_mode != (1 << 6)) + return false; + + const uint32_t r0 = (uint32_t)((block.m_lo.m_r0 << 1) | block.m_lo.m_p0); + const uint32_t g0 = (uint32_t)((block.m_lo.m_g0 << 1) | block.m_lo.m_p0); + const uint32_t b0 = (uint32_t)((block.m_lo.m_b0 << 1) | block.m_lo.m_p0); + const uint32_t a0 = (uint32_t)((block.m_lo.m_a0 << 1) | block.m_lo.m_p0); + const uint32_t r1 = (uint32_t)((block.m_lo.m_r1 << 1) | block.m_hi.m_p1); + const uint32_t g1 = (uint32_t)((block.m_lo.m_g1 << 1) | block.m_hi.m_p1); + const uint32_t b1 = (uint32_t)((block.m_lo.m_b1 << 1) | block.m_hi.m_p1); + const uint32_t a1 = (uint32_t)((block.m_lo.m_a1 << 1) | block.m_hi.m_p1); + + color_rgba vals[16]; + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t w = basist::g_bc7_weights4[i]; + const uint32_t iw = 64 - w; + vals[i].set_noclamp_rgba( + (r0 * iw + r1 * w + 32) >> 6, + (g0 * iw + g1 * w + 32) >> 6, + (b0 * iw + b1 * w + 32) >> 6, + (a0 * iw + a1 * w + 32) >> 6); + } + + pPixels[0] = vals[block.m_hi.m_s00]; + pPixels[1] = vals[block.m_hi.m_s10]; + pPixels[2] = vals[block.m_hi.m_s20]; + pPixels[3] = vals[block.m_hi.m_s30]; + + pPixels[4] = vals[block.m_hi.m_s01]; + pPixels[5] = vals[block.m_hi.m_s11]; + pPixels[6] = vals[block.m_hi.m_s21]; + pPixels[7] = vals[block.m_hi.m_s31]; + + pPixels[8] = vals[block.m_hi.m_s02]; + pPixels[9] = vals[block.m_hi.m_s12]; + pPixels[10] = vals[block.m_hi.m_s22]; + pPixels[11] = vals[block.m_hi.m_s32]; + + pPixels[12] = vals[block.m_hi.m_s03]; + pPixels[13] = vals[block.m_hi.m_s13]; + pPixels[14] = vals[block.m_hi.m_s23]; + pPixels[15] = vals[block.m_hi.m_s33]; + + return true; + } + + bool unpack_bc7(const void *pBlock, color_rgba *pPixels) + { + const uint32_t first_byte = static_cast(pBlock)[0]; + + for (uint32_t mode = 0; mode <= 7; mode++) + { + if (first_byte & (1U << mode)) + { + switch (mode) + { + case 0: + case 2: + return unpack_bc7_mode0_2(mode, pBlock, pPixels); + case 1: + case 3: + case 7: + return unpack_bc7_mode1_3_7(mode, pBlock, pPixels); + case 4: + case 5: + return unpack_bc7_mode4_5(mode, pBlock, pPixels); + case 6: + return unpack_bc7_mode6(pBlock, pPixels); + default: + break; + } + } + } + + return false; + } + + static inline int bc6h_sign_extend(int val, int bits) + { + assert((bits >= 1) && (bits < 32)); + assert((val >= 0) && (val < (1 << bits))); + return (val << (32 - bits)) >> (32 - bits); + } + + static inline int bc6h_apply_delta(int base, int delta, int num_bits, int is_signed) + { + int bitmask = ((1 << num_bits) - 1); + int v = (base + delta) & bitmask; + return is_signed ? bc6h_sign_extend(v, num_bits) : v; + } + + static int bc6h_dequantize(int val, int bits, int is_signed) + { + int result; + if (is_signed) + { + if (bits >= 16) + result = val; + else + { + int s_flag = 0; + if (val < 0) + { + s_flag = 1; + val = -val; + } + + if (val == 0) + result = 0; + else if (val >= ((1 << (bits - 1)) - 1)) + result = 0x7FFF; + else + result = ((val << 15) + 0x4000) >> (bits - 1); + + if (s_flag) + result = -result; + } + } + else + { + if (bits >= 15) + result = val; + else if (!val) + result = 0; + else if (val == ((1 << bits) - 1)) + result = 0xFFFF; + else + result = ((val << 16) + 0x8000) >> bits; + } + return result; + } + + static inline int bc6h_interpolate(int a, int b, const uint8_t* pWeights, int index) + { + return (a * (64 - (int)pWeights[index]) + b * (int)pWeights[index] + 32) >> 6; + } + + static inline basist::half_float bc6h_convert_to_half(int val, int is_signed) + { + if (!is_signed) + { + // scale by 31/64 + return (basist::half_float)((val * 31) >> 6); + } + + // scale by 31/32 + val = (val < 0) ? -(((-val) * 31) >> 5) : (val * 31) >> 5; + + int s = 0; + if (val < 0) + { + s = 0x8000; + val = -val; + } + + return (basist::half_float)(s | val); + } + + static inline uint32_t bc6h_get_bits(uint32_t num_bits, uint64_t& l, uint64_t& h, uint32_t& total_bits) + { + assert((num_bits) && (num_bits <= 63)); + + uint32_t v = (uint32_t)(l & ((1U << num_bits) - 1U)); + + l >>= num_bits; + l |= (h << (64U - num_bits)); + h >>= num_bits; + + total_bits += num_bits; + assert(total_bits <= 128); + + return v; + } + + static inline uint32_t bc6h_reverse_bits(uint32_t v, uint32_t num_bits) + { + uint32_t res = 0; + for (uint32_t i = 0; i < num_bits; i++) + { + uint32_t bit = (v & (1u << i)) != 0u; + res |= (bit << (num_bits - 1u - i)); + } + return res; + } + + static inline uint64_t bc6h_read_le_qword(const void* p) + { + const uint8_t* pSrc = static_cast(p); + return ((uint64_t)read_le_dword(pSrc)) | (((uint64_t)read_le_dword(pSrc + sizeof(uint32_t))) << 32U); + } + + bool unpack_bc6h(const void* pSrc_block, void* pDst_block, bool is_signed, uint32_t dest_pitch_in_halfs) + { + assert(dest_pitch_in_halfs >= 4 * 3); + + const uint32_t MAX_SUBSETS = 2, MAX_COMPS = 3; + + const uint8_t* pSrc = static_cast(pSrc_block); + basist::half_float* pDst = static_cast(pDst_block); + + uint64_t blo = bc6h_read_le_qword(pSrc), bhi = bc6h_read_le_qword(pSrc + sizeof(uint64_t)); + + // Unpack mode + const int mode = basist::g_bc6h_mode_lookup[blo & 31]; + if (mode < 0) + { + for (int y = 0; y < 4; y++) + { + memset(pDst, 0, sizeof(basist::half_float) * 4); + pDst += dest_pitch_in_halfs; + } + return false; + } + + // Skip mode bits + uint32_t total_bits_read = 0; + bc6h_get_bits((mode < 2) ? 2 : 5, blo, bhi, total_bits_read); + + assert(mode < (int)basist::NUM_BC6H_MODES); + + const uint32_t num_subsets = (mode >= 10) ? 1 : 2; + const bool is_mode_9_or_10 = (mode == 9) || (mode == 10); + + // Unpack endpoint components + int comps[MAX_SUBSETS][MAX_COMPS][2] = { { { 0 } } }; // [subset][comp][l/h] + int part_index = 0; + + uint32_t layout_index = 0; + while (layout_index < basist::MAX_BC6H_LAYOUT_INDEX) + { + const basist::bc6h_bit_layout& layout = basist::g_bc6h_bit_layouts[mode][layout_index]; + + if (layout.m_comp < 0) + break; + + const int subset = layout.m_index >> 1, lh_index = layout.m_index & 1; + assert((layout.m_comp == 3) || ((subset >= 0) && (subset < (int)MAX_SUBSETS))); + + const int last_bit = layout.m_last_bit, first_bit = layout.m_first_bit; + assert(last_bit >= 0); + + int& res = (layout.m_comp == 3) ? part_index : comps[subset][layout.m_comp][lh_index]; + + if (first_bit < 0) + { + res |= (bc6h_get_bits(1, blo, bhi, total_bits_read) << last_bit); + } + else + { + const int total_bits = iabs(last_bit - first_bit) + 1; + const int bit_shift = basisu::minimum(first_bit, last_bit); + + int b = bc6h_get_bits(total_bits, blo, bhi, total_bits_read); + + if (last_bit < first_bit) + b = bc6h_reverse_bits(b, total_bits); + + res |= (b << bit_shift); + } + + layout_index++; + } + assert(layout_index != basist::MAX_BC6H_LAYOUT_INDEX); + + // Sign extend/dequantize endpoints + const int num_sig_bits = basist::g_bc6h_mode_sig_bits[mode][0]; + if (is_signed) + { + for (uint32_t comp = 0; comp < 3; comp++) + comps[0][comp][0] = bc6h_sign_extend(comps[0][comp][0], num_sig_bits); + } + + if (is_signed || !is_mode_9_or_10) + { + for (uint32_t subset = 0; subset < num_subsets; subset++) + for (uint32_t comp = 0; comp < 3; comp++) + for (uint32_t lh = (subset ? 0 : 1); lh < 2; lh++) + comps[subset][comp][lh] = bc6h_sign_extend(comps[subset][comp][lh], basist::g_bc6h_mode_sig_bits[mode][1 + comp]); + } + + if (!is_mode_9_or_10) + { + for (uint32_t subset = 0; subset < num_subsets; subset++) + for (uint32_t comp = 0; comp < 3; comp++) + for (uint32_t lh = (subset ? 0 : 1); lh < 2; lh++) + comps[subset][comp][lh] = bc6h_apply_delta(comps[0][comp][0], comps[subset][comp][lh], num_sig_bits, is_signed); + } + + for (uint32_t subset = 0; subset < num_subsets; subset++) + for (uint32_t comp = 0; comp < 3; comp++) + for (uint32_t lh = 0; lh < 2; lh++) + comps[subset][comp][lh] = bc6h_dequantize(comps[subset][comp][lh], num_sig_bits, is_signed); + + // Now unpack weights and output texels + const int weight_bits = (mode >= 10) ? 4 : 3; + const uint8_t* pWeights = (mode >= 10) ? basist::g_bc6h_weight4 : basist::g_bc6h_weight3; + + dest_pitch_in_halfs -= 4 * 3; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + int subset = (num_subsets == 1) ? ((x | y) ? 0 : 0x80) : basist::g_bc6h_2subset_patterns[part_index][y][x]; + const int num_bits = weight_bits + ((subset & 0x80) ? -1 : 0); + + subset &= 1; + + const int weight_index = bc6h_get_bits(num_bits, blo, bhi, total_bits_read); + + pDst[0] = bc6h_convert_to_half(bc6h_interpolate(comps[subset][0][0], comps[subset][0][1], pWeights, weight_index), is_signed); + pDst[1] = bc6h_convert_to_half(bc6h_interpolate(comps[subset][1][0], comps[subset][1][1], pWeights, weight_index), is_signed); + pDst[2] = bc6h_convert_to_half(bc6h_interpolate(comps[subset][2][0], comps[subset][2][1], pWeights, weight_index), is_signed); + + pDst += 3; + } + + pDst += dest_pitch_in_halfs; + } + + assert(total_bits_read == 128); + return true; + } + //------------------------------------------------------------------------------------------------ + // FXT1 (for fun, and because some modern Intel parts support it, and because a subset is like BC1) + + struct fxt1_block + { + union + { + struct + { + uint64_t m_t00 : 2; + uint64_t m_t01 : 2; + uint64_t m_t02 : 2; + uint64_t m_t03 : 2; + uint64_t m_t04 : 2; + uint64_t m_t05 : 2; + uint64_t m_t06 : 2; + uint64_t m_t07 : 2; + uint64_t m_t08 : 2; + uint64_t m_t09 : 2; + uint64_t m_t10 : 2; + uint64_t m_t11 : 2; + uint64_t m_t12 : 2; + uint64_t m_t13 : 2; + uint64_t m_t14 : 2; + uint64_t m_t15 : 2; + uint64_t m_t16 : 2; + uint64_t m_t17 : 2; + uint64_t m_t18 : 2; + uint64_t m_t19 : 2; + uint64_t m_t20 : 2; + uint64_t m_t21 : 2; + uint64_t m_t22 : 2; + uint64_t m_t23 : 2; + uint64_t m_t24 : 2; + uint64_t m_t25 : 2; + uint64_t m_t26 : 2; + uint64_t m_t27 : 2; + uint64_t m_t28 : 2; + uint64_t m_t29 : 2; + uint64_t m_t30 : 2; + uint64_t m_t31 : 2; + } m_lo; + uint64_t m_lo_bits; + uint8_t m_sels[8]; + }; + + union + { + struct + { +#ifdef BASISU_USE_ORIGINAL_3DFX_FXT1_ENCODING + // This is the format that 3DFX's DECOMP.EXE tool expects, which I'm assuming is what the actual 3DFX hardware wanted. + // Unfortunately, color0/color1 and color2/color3 are flipped relative to the official OpenGL extension and Intel's documentation! + uint64_t m_b1 : 5; + uint64_t m_g1 : 5; + uint64_t m_r1 : 5; + uint64_t m_b0 : 5; + uint64_t m_g0 : 5; + uint64_t m_r0 : 5; + uint64_t m_b3 : 5; + uint64_t m_g3 : 5; + uint64_t m_r3 : 5; + uint64_t m_b2 : 5; + uint64_t m_g2 : 5; + uint64_t m_r2 : 5; +#else + // Intel's encoding, and the encoding in the OpenGL FXT1 spec. + uint64_t m_b0 : 5; + uint64_t m_g0 : 5; + uint64_t m_r0 : 5; + uint64_t m_b1 : 5; + uint64_t m_g1 : 5; + uint64_t m_r1 : 5; + uint64_t m_b2 : 5; + uint64_t m_g2 : 5; + uint64_t m_r2 : 5; + uint64_t m_b3 : 5; + uint64_t m_g3 : 5; + uint64_t m_r3 : 5; +#endif + uint64_t m_alpha : 1; + uint64_t m_glsb : 2; + uint64_t m_mode : 1; + } m_hi; + + uint64_t m_hi_bits; + }; + }; + + static color_rgba expand_565(const color_rgba& c) + { + return color_rgba((c.r << 3) | (c.r >> 2), (c.g << 2) | (c.g >> 4), (c.b << 3) | (c.b >> 2), 255); + } + + // We only support CC_MIXED non-alpha blocks here because that's the only mode the transcoder uses at the moment. + bool unpack_fxt1(const void *p, color_rgba *pPixels) + { + const fxt1_block* pBlock = static_cast(p); + + if (pBlock->m_hi.m_mode == 0) + return false; + if (pBlock->m_hi.m_alpha == 1) + return false; + + color_rgba colors[4]; + + colors[0].r = pBlock->m_hi.m_r0; + colors[0].g = (uint8_t)((pBlock->m_hi.m_g0 << 1) | ((pBlock->m_lo.m_t00 >> 1) ^ (pBlock->m_hi.m_glsb & 1))); + colors[0].b = pBlock->m_hi.m_b0; + colors[0].a = 255; + + colors[1].r = pBlock->m_hi.m_r1; + colors[1].g = (uint8_t)((pBlock->m_hi.m_g1 << 1) | (pBlock->m_hi.m_glsb & 1)); + colors[1].b = pBlock->m_hi.m_b1; + colors[1].a = 255; + + colors[2].r = pBlock->m_hi.m_r2; + colors[2].g = (uint8_t)((pBlock->m_hi.m_g2 << 1) | ((pBlock->m_lo.m_t16 >> 1) ^ (pBlock->m_hi.m_glsb >> 1))); + colors[2].b = pBlock->m_hi.m_b2; + colors[2].a = 255; + + colors[3].r = pBlock->m_hi.m_r3; + colors[3].g = (uint8_t)((pBlock->m_hi.m_g3 << 1) | (pBlock->m_hi.m_glsb >> 1)); + colors[3].b = pBlock->m_hi.m_b3; + colors[3].a = 255; + + for (uint32_t i = 0; i < 4; i++) + colors[i] = expand_565(colors[i]); + + color_rgba block0_colors[4]; + block0_colors[0] = colors[0]; + block0_colors[1] = color_rgba((colors[0].r * 2 + colors[1].r + 1) / 3, (colors[0].g * 2 + colors[1].g + 1) / 3, (colors[0].b * 2 + colors[1].b + 1) / 3, 255); + block0_colors[2] = color_rgba((colors[1].r * 2 + colors[0].r + 1) / 3, (colors[1].g * 2 + colors[0].g + 1) / 3, (colors[1].b * 2 + colors[0].b + 1) / 3, 255); + block0_colors[3] = colors[1]; + + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t sel = (pBlock->m_sels[i >> 2] >> ((i & 3) * 2)) & 3; + + const uint32_t x = i & 3; + const uint32_t y = i >> 2; + pPixels[x + y * 8] = block0_colors[sel]; + } + + color_rgba block1_colors[4]; + block1_colors[0] = colors[2]; + block1_colors[1] = color_rgba((colors[2].r * 2 + colors[3].r + 1) / 3, (colors[2].g * 2 + colors[3].g + 1) / 3, (colors[2].b * 2 + colors[3].b + 1) / 3, 255); + block1_colors[2] = color_rgba((colors[3].r * 2 + colors[2].r + 1) / 3, (colors[3].g * 2 + colors[2].g + 1) / 3, (colors[3].b * 2 + colors[2].b + 1) / 3, 255); + block1_colors[3] = colors[3]; + + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t sel = (pBlock->m_sels[4 + (i >> 2)] >> ((i & 3) * 2)) & 3; + + const uint32_t x = i & 3; + const uint32_t y = i >> 2; + pPixels[4 + x + y * 8] = block1_colors[sel]; + } + + return true; + } + + //------------------------------------------------------------------------------------------------ + // PVRTC2 (non-interpolated, hard_flag=1 modulation=0 subset only!) + + struct pvrtc2_block + { + uint8_t m_modulation[4]; + + union + { + union + { + // Opaque mode: RGB colora=554 and colorb=555 + struct + { + uint32_t m_mod_flag : 1; + uint32_t m_blue_a : 4; + uint32_t m_green_a : 5; + uint32_t m_red_a : 5; + uint32_t m_hard_flag : 1; + uint32_t m_blue_b : 5; + uint32_t m_green_b : 5; + uint32_t m_red_b : 5; + uint32_t m_opaque_flag : 1; + + } m_opaque_color_data; + + // Transparent mode: RGBA colora=4433 and colorb=4443 + struct + { + uint32_t m_mod_flag : 1; + uint32_t m_blue_a : 3; + uint32_t m_green_a : 4; + uint32_t m_red_a : 4; + uint32_t m_alpha_a : 3; + uint32_t m_hard_flag : 1; + uint32_t m_blue_b : 4; + uint32_t m_green_b : 4; + uint32_t m_red_b : 4; + uint32_t m_alpha_b : 3; + uint32_t m_opaque_flag : 1; + + } m_trans_color_data; + }; + + uint32_t m_color_data_bits; + }; + }; + + static color_rgba convert_rgb_555_to_888(const color_rgba& col) + { + return color_rgba((col[0] << 3) | (col[0] >> 2), (col[1] << 3) | (col[1] >> 2), (col[2] << 3) | (col[2] >> 2), 255); + } + + static color_rgba convert_rgba_5554_to_8888(const color_rgba& col) + { + return color_rgba((col[0] << 3) | (col[0] >> 2), (col[1] << 3) | (col[1] >> 2), (col[2] << 3) | (col[2] >> 2), (col[3] << 4) | col[3]); + } + + // PVRTC2 is currently limited to only what our transcoder outputs (non-interpolated, hard_flag=1 modulation=0). In this mode, PVRTC2 looks much like BC1/ATC. + bool unpack_pvrtc2(const void *p, color_rgba *pPixels) + { + const pvrtc2_block* pBlock = static_cast(p); + + if ((!pBlock->m_opaque_color_data.m_hard_flag) || (pBlock->m_opaque_color_data.m_mod_flag)) + { + // This mode isn't supported by the transcoder, so we aren't bothering with it here. + return false; + } + + color_rgba colors[4]; + + if (pBlock->m_opaque_color_data.m_opaque_flag) + { + // colora=554 + color_rgba color_a(pBlock->m_opaque_color_data.m_red_a, pBlock->m_opaque_color_data.m_green_a, (pBlock->m_opaque_color_data.m_blue_a << 1) | (pBlock->m_opaque_color_data.m_blue_a >> 3), 255); + + // colora=555 + color_rgba color_b(pBlock->m_opaque_color_data.m_red_b, pBlock->m_opaque_color_data.m_green_b, pBlock->m_opaque_color_data.m_blue_b, 255); + + colors[0] = convert_rgb_555_to_888(color_a); + colors[3] = convert_rgb_555_to_888(color_b); + + colors[1].set((colors[0].r * 5 + colors[3].r * 3) / 8, (colors[0].g * 5 + colors[3].g * 3) / 8, (colors[0].b * 5 + colors[3].b * 3) / 8, 255); + colors[2].set((colors[0].r * 3 + colors[3].r * 5) / 8, (colors[0].g * 3 + colors[3].g * 5) / 8, (colors[0].b * 3 + colors[3].b * 5) / 8, 255); + } + else + { + // colora=4433 + color_rgba color_a( + (pBlock->m_trans_color_data.m_red_a << 1) | (pBlock->m_trans_color_data.m_red_a >> 3), + (pBlock->m_trans_color_data.m_green_a << 1) | (pBlock->m_trans_color_data.m_green_a >> 3), + (pBlock->m_trans_color_data.m_blue_a << 2) | (pBlock->m_trans_color_data.m_blue_a >> 1), + pBlock->m_trans_color_data.m_alpha_a << 1); + + //colorb=4443 + color_rgba color_b( + (pBlock->m_trans_color_data.m_red_b << 1) | (pBlock->m_trans_color_data.m_red_b >> 3), + (pBlock->m_trans_color_data.m_green_b << 1) | (pBlock->m_trans_color_data.m_green_b >> 3), + (pBlock->m_trans_color_data.m_blue_b << 1) | (pBlock->m_trans_color_data.m_blue_b >> 3), + (pBlock->m_trans_color_data.m_alpha_b << 1) | 1); + + colors[0] = convert_rgba_5554_to_8888(color_a); + colors[3] = convert_rgba_5554_to_8888(color_b); + } + + colors[1].set((colors[0].r * 5 + colors[3].r * 3) / 8, (colors[0].g * 5 + colors[3].g * 3) / 8, (colors[0].b * 5 + colors[3].b * 3) / 8, (colors[0].a * 5 + colors[3].a * 3) / 8); + colors[2].set((colors[0].r * 3 + colors[3].r * 5) / 8, (colors[0].g * 3 + colors[3].g * 5) / 8, (colors[0].b * 3 + colors[3].b * 5) / 8, (colors[0].a * 3 + colors[3].a * 5) / 8); + + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t sel = (pBlock->m_modulation[i >> 2] >> ((i & 3) * 2)) & 3; + pPixels[i] = colors[sel]; + } + + return true; + } + + //------------------------------------------------------------------------------------------------ + // ETC2 EAC R11 or RG11 + + struct etc2_eac_r11 + { + uint64_t m_base : 8; + uint64_t m_table : 4; + uint64_t m_mul : 4; + uint64_t m_sels_0 : 8; + uint64_t m_sels_1 : 8; + uint64_t m_sels_2 : 8; + uint64_t m_sels_3 : 8; + uint64_t m_sels_4 : 8; + uint64_t m_sels_5 : 8; + + uint64_t get_sels() const + { + return ((uint64_t)m_sels_0 << 40U) | ((uint64_t)m_sels_1 << 32U) | ((uint64_t)m_sels_2 << 24U) | ((uint64_t)m_sels_3 << 16U) | ((uint64_t)m_sels_4 << 8U) | m_sels_5; + } + + void set_sels(uint64_t v) + { + m_sels_0 = (v >> 40U) & 0xFF; + m_sels_1 = (v >> 32U) & 0xFF; + m_sels_2 = (v >> 24U) & 0xFF; + m_sels_3 = (v >> 16U) & 0xFF; + m_sels_4 = (v >> 8U) & 0xFF; + m_sels_5 = v & 0xFF; + } + }; + + struct etc2_eac_rg11 + { + etc2_eac_r11 m_c[2]; + }; + + void unpack_etc2_eac_r(const void *p, color_rgba* pPixels, uint32_t c) + { + const etc2_eac_r11* pBlock = static_cast(p); + const uint64_t sels = pBlock->get_sels(); + + const int base = (int)pBlock->m_base * 8 + 4; + const int mul = pBlock->m_mul ? ((int)pBlock->m_mul * 8) : 1; + const int table = (int)pBlock->m_table; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t shift = 45 - ((y + x * 4) * 3); + + const uint32_t sel = (uint32_t)((sels >> shift) & 7); + + int val = base + g_etc2_eac_tables[table][sel] * mul; + val = clamp(val, 0, 2047); + + // Convert to 8-bits with rounding + //pPixels[x + y * 4].m_comps[c] = static_cast((val * 255 + 1024) / 2047); + pPixels[x + y * 4].m_comps[c] = static_cast((val * 255 + 1023) / 2047); + + } // x + } // y + } + + void unpack_etc2_eac_rg(const void* p, color_rgba* pPixels) + { + for (uint32_t c = 0; c < 2; c++) + { + const etc2_eac_r11* pBlock = &static_cast(p)->m_c[c]; + + unpack_etc2_eac_r(pBlock, pPixels, c); + } + } + + //------------------------------------------------------------------------------------------------ + // UASTC + + void unpack_uastc(const void* p, color_rgba* pPixels) + { + basist::unpack_uastc(*static_cast(p), (basist::color32 *)pPixels, false); + } + + // Unpacks to RGBA, R, RG, or A. LDR GPU texture formats only. + bool unpack_block(texture_format fmt, const void* pBlock, color_rgba* pPixels) + { + switch (fmt) + { + case texture_format::cBC1: + { + unpack_bc1(pBlock, pPixels, true); + break; + } + case texture_format::cBC1_NV: + { + unpack_bc1_nv(pBlock, pPixels, true); + break; + } + case texture_format::cBC1_AMD: + { + unpack_bc1_amd(pBlock, pPixels, true); + break; + } + case texture_format::cBC3: + { + return unpack_bc3(pBlock, pPixels); + } + case texture_format::cBC4: + { + // Unpack to R + unpack_bc4(pBlock, &pPixels[0].r, sizeof(color_rgba)); + break; + } + case texture_format::cBC5: + { + unpack_bc5(pBlock, pPixels); + break; + } + case texture_format::cBC7: + { + return unpack_bc7(pBlock, pPixels); + } + // Full ETC2 color blocks (planar/T/H modes) is currently unsupported in basisu, but we do support ETC2 with alpha (using ETC1 for color) + case texture_format::cETC2_RGB: + case texture_format::cETC1: + case texture_format::cETC1S: + { + return unpack_etc1(*static_cast(pBlock), pPixels); + } + case texture_format::cETC2_RGBA: + { + if (!unpack_etc1(static_cast(pBlock)[1], pPixels)) + return false; + unpack_etc2_eac(pBlock, pPixels); + break; + } + case texture_format::cETC2_ALPHA: + { + // Unpack to A + unpack_etc2_eac(pBlock, pPixels); + break; + } + case texture_format::cBC6HSigned: + case texture_format::cBC6HUnsigned: + case texture_format::cASTC_HDR_4x4: + case texture_format::cUASTC_HDR_4x4: + case texture_format::cASTC_HDR_6x6: + { + // Can't unpack HDR blocks in unpack_block() because it returns 32bpp pixel data. + assert(0); + return false; + } + case texture_format::cASTC_LDR_4x4: + { + const bool astc_srgb = false; + bool status = basisu_astc::astc::decompress_ldr(reinterpret_cast(pPixels), static_cast(pBlock), astc_srgb, 4, 4); + assert(status); + + if (!status) + return false; + + break; + } + case texture_format::cATC_RGB: + { + unpack_atc(pBlock, pPixels); + break; + } + case texture_format::cATC_RGBA_INTERPOLATED_ALPHA: + { + unpack_atc(static_cast(pBlock) + 8, pPixels); + unpack_bc4(pBlock, &pPixels[0].a, sizeof(color_rgba)); + break; + } + case texture_format::cFXT1_RGB: + { + unpack_fxt1(pBlock, pPixels); + break; + } + case texture_format::cPVRTC2_4_RGBA: + { + unpack_pvrtc2(pBlock, pPixels); + break; + } + case texture_format::cETC2_R11_EAC: + { + unpack_etc2_eac_r(static_cast(pBlock), pPixels, 0); + break; + } + case texture_format::cETC2_RG11_EAC: + { + unpack_etc2_eac_rg(pBlock, pPixels); + break; + } + case texture_format::cUASTC4x4: + { + unpack_uastc(pBlock, pPixels); + break; + } + default: + { + assert(0); + // TODO + return false; + } + } + return true; + } + + bool unpack_block_hdr(texture_format fmt, const void* pBlock, vec4F* pPixels) + { + switch (fmt) + { + case texture_format::cASTC_HDR_6x6: + { +#if BASISU_USE_GOOGLE_ASTC_DECODER + bool status = basisu_astc::astc::decompress_hdr(&pPixels[0][0], (uint8_t*)pBlock, 6, 6); + assert(status); + if (!status) + return false; +#else + // Use our decoder + basist::half_float half_block[6 * 6][4]; + + astc_helpers::log_astc_block log_blk; + if (!astc_helpers::unpack_block(pBlock, log_blk, 6, 6)) + return false; + if (!astc_helpers::decode_block(log_blk, half_block, 6, 6, astc_helpers::cDecodeModeHDR16)) + return false; + + for (uint32_t p = 0; p < (6 * 6); p++) + { + pPixels[p][0] = basist::half_to_float(half_block[p][0]); + pPixels[p][1] = basist::half_to_float(half_block[p][1]); + pPixels[p][2] = basist::half_to_float(half_block[p][2]); + pPixels[p][3] = basist::half_to_float(half_block[p][3]); + } +#endif + return true; + } + case texture_format::cASTC_HDR_4x4: + case texture_format::cUASTC_HDR_4x4: + { +#if BASISU_USE_GOOGLE_ASTC_DECODER + // Use Google's decoder + bool status = basisu_astc::astc::decompress_hdr(&pPixels[0][0], (uint8_t*)pBlock, 4, 4); + assert(status); + if (!status) + return false; +#else + // Use our decoder + basist::half_float half_block[16][4]; + + astc_helpers::log_astc_block log_blk; + if (!astc_helpers::unpack_block(pBlock, log_blk, 4, 4)) + return false; + if (!astc_helpers::decode_block(log_blk, half_block, 4, 4, astc_helpers::cDecodeModeHDR16)) + return false; + + for (uint32_t p = 0; p < 16; p++) + { + pPixels[p][0] = basist::half_to_float(half_block[p][0]); + pPixels[p][1] = basist::half_to_float(half_block[p][1]); + pPixels[p][2] = basist::half_to_float(half_block[p][2]); + pPixels[p][3] = basist::half_to_float(half_block[p][3]); + } + + //memset(pPixels, 0, sizeof(vec4F) * 16); +#endif + return true; + } + case texture_format::cBC6HSigned: + case texture_format::cBC6HUnsigned: + { + basist::half_float half_block[16][3]; + + unpack_bc6h(pBlock, half_block, fmt == texture_format::cBC6HSigned); + + for (uint32_t p = 0; p < 16; p++) + { + pPixels[p][0] = basist::half_to_float(half_block[p][0]); + pPixels[p][1] = basist::half_to_float(half_block[p][1]); + pPixels[p][2] = basist::half_to_float(half_block[p][2]); + pPixels[p][3] = 1.0f; + } + + return true; + } + default: + { + break; + } + } + + assert(0); + return false; + } + + bool gpu_image::unpack(image& img) const + { + img.resize(get_pixel_width(), get_pixel_height()); + img.set_all(g_black_color); + + if (!img.get_width() || !img.get_height()) + return true; + + if ((m_fmt == texture_format::cPVRTC1_4_RGB) || (m_fmt == texture_format::cPVRTC1_4_RGBA)) + { + pvrtc4_image pi(m_width, m_height); + + if (get_total_blocks() != pi.get_total_blocks()) + return false; + + memcpy(&pi.get_blocks()[0], get_ptr(), get_size_in_bytes()); + + pi.deswizzle(); + + pi.unpack_all_pixels(img); + + return true; + } + + assert((m_block_width <= cMaxBlockSize) && (m_block_height <= cMaxBlockSize)); + color_rgba pixels[cMaxBlockSize * cMaxBlockSize]; + for (uint32_t i = 0; i < cMaxBlockSize * cMaxBlockSize; i++) + pixels[i] = g_black_color; + + bool success = true; + + for (uint32_t by = 0; by < m_blocks_y; by++) + { + for (uint32_t bx = 0; bx < m_blocks_x; bx++) + { + const void* pBlock = get_block_ptr(bx, by); + + if (!unpack_block(m_fmt, pBlock, pixels)) + success = false; + + img.set_block_clipped(pixels, bx * m_block_width, by * m_block_height, m_block_width, m_block_height); + } // bx + } // by + + return success; + } + + bool gpu_image::unpack_hdr(imagef& img) const + { + if ((m_fmt != texture_format::cASTC_HDR_4x4) && (m_fmt != texture_format::cUASTC_HDR_4x4) && (m_fmt != texture_format::cASTC_HDR_6x6) && + (m_fmt != texture_format::cBC6HUnsigned) && (m_fmt != texture_format::cBC6HSigned)) + { + // Can't call on LDR images, at least currently. (Could unpack the LDR data and convert to float.) + assert(0); + return false; + } + + img.resize(get_pixel_width(), get_pixel_height()); + img.set_all(vec4F(0.0f)); + + if (!img.get_width() || !img.get_height()) + return true; + + assert((m_block_width <= cMaxBlockSize) && (m_block_height <= cMaxBlockSize)); + vec4F pixels[cMaxBlockSize * cMaxBlockSize]; + clear_obj(pixels); + + bool success = true; + + for (uint32_t by = 0; by < m_blocks_y; by++) + { + for (uint32_t bx = 0; bx < m_blocks_x; bx++) + { + const void* pBlock = get_block_ptr(bx, by); + + if (!unpack_block_hdr(m_fmt, pBlock, pixels)) + success = false; + + img.set_block_clipped(pixels, bx * m_block_width, by * m_block_height, m_block_width, m_block_height); + } // bx + } // by + + return success; + } + + // KTX1 texture file writing + static const uint8_t g_ktx_file_id[12] = { 0xAB, 0x4B, 0x54, 0x58, 0x20, 0x31, 0x31, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A }; + + // KTX/GL enums + enum + { + KTX_ENDIAN = 0x04030201, + KTX_OPPOSITE_ENDIAN = 0x01020304, + KTX_ETC1_RGB8_OES = 0x8D64, + KTX_RED = 0x1903, + KTX_RG = 0x8227, + KTX_RGB = 0x1907, + KTX_RGBA = 0x1908, + + KTX_COMPRESSED_RGB_S3TC_DXT1_EXT = 0x83F0, + KTX_COMPRESSED_RGBA_S3TC_DXT5_EXT = 0x83F3, + KTX_COMPRESSED_RED_RGTC1_EXT = 0x8DBB, + KTX_COMPRESSED_RED_GREEN_RGTC2_EXT = 0x8DBD, + KTX_COMPRESSED_RGB8_ETC2 = 0x9274, + KTX_COMPRESSED_RGBA8_ETC2_EAC = 0x9278, + KTX_COMPRESSED_RGBA_BPTC_UNORM = 0x8E8C, + KTX_COMPRESSED_SRGB_ALPHA_BPTC_UNORM = 0x8E8D, + KTX_COMPRESSED_RGB_BPTC_SIGNED_FLOAT = 0x8E8E, + KTX_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT = 0x8E8F, + KTX_COMPRESSED_RGB_PVRTC_4BPPV1_IMG = 0x8C00, + KTX_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG = 0x8C02, + + KTX_COMPRESSED_RGBA_ASTC_4x4_KHR = 0x93B0, + KTX_COMPRESSED_RGBA_ASTC_5x4_KHR = 0x93B1, + KTX_COMPRESSED_RGBA_ASTC_5x5_KHR = 0x93B2, + KTX_COMPRESSED_RGBA_ASTC_6x5_KHR = 0x93B3, + KTX_COMPRESSED_RGBA_ASTC_6x6_KHR = 0x93B4, + KTX_COMPRESSED_RGBA_ASTC_8x5_KHR = 0x93B5, + KTX_COMPRESSED_RGBA_ASTC_8x6_KHR = 0x93B6, + KTX_COMPRESSED_RGBA_ASTC_8x8_KHR = 0x93B7, + KTX_COMPRESSED_RGBA_ASTC_10x5_KHR = 0x93B8, + KTX_COMPRESSED_RGBA_ASTC_10x6_KHR = 0x93B9, + KTX_COMPRESSED_RGBA_ASTC_10x8_KHR = 0x93BA, + KTX_COMPRESSED_RGBA_ASTC_10x10_KHR = 0x93BB, + KTX_COMPRESSED_RGBA_ASTC_12x10_KHR = 0x93BC, + KTX_COMPRESSED_RGBA_ASTC_12x12_KHR = 0x93BD, + + KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR = 0x93D0, + KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR = 0x93D1, + KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR = 0x93D2, + KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR = 0x93D3, + KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR = 0x93D4, + KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR = 0x93D5, + KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR = 0x93D6, + KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR = 0x93D7, + KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR = 0x93D8, + KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR = 0x93D9, + KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR = 0x93DA, + KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR = 0x93DB, + KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR = 0x93DC, + KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR = 0x93DD, + + KTX_COMPRESSED_RGBA_UASTC_4x4_KHR = 0x94CC, // TODO - Use proper value! + + KTX_ATC_RGB_AMD = 0x8C92, + KTX_ATC_RGBA_INTERPOLATED_ALPHA_AMD = 0x87EE, + + KTX_COMPRESSED_RGB_FXT1_3DFX = 0x86B0, + KTX_COMPRESSED_RGBA_FXT1_3DFX = 0x86B1, + KTX_COMPRESSED_RGBA_PVRTC_4BPPV2_IMG = 0x9138, + KTX_COMPRESSED_R11_EAC = 0x9270, + KTX_COMPRESSED_RG11_EAC = 0x9272 + }; + + struct ktx_header + { + uint8_t m_identifier[12]; + packed_uint<4> m_endianness; + packed_uint<4> m_glType; + packed_uint<4> m_glTypeSize; + packed_uint<4> m_glFormat; + packed_uint<4> m_glInternalFormat; + packed_uint<4> m_glBaseInternalFormat; + packed_uint<4> m_pixelWidth; + packed_uint<4> m_pixelHeight; + packed_uint<4> m_pixelDepth; + packed_uint<4> m_numberOfArrayElements; + packed_uint<4> m_numberOfFaces; + packed_uint<4> m_numberOfMipmapLevels; + packed_uint<4> m_bytesOfKeyValueData; + + void clear() { clear_obj(*this); } + }; + + // Input is a texture array of mipmapped gpu_image's: gpu_images[array_index][level_index] + bool create_ktx_texture_file(uint8_vec &ktx_data, const basisu::vector& gpu_images, bool cubemap_flag) + { + if (!gpu_images.size()) + { + assert(0); + return false; + } + + uint32_t width = 0, height = 0, total_levels = 0; + basisu::texture_format fmt = texture_format::cInvalidTextureFormat; + + // Sanity check the input + if (cubemap_flag) + { + if ((gpu_images.size() % 6) != 0) + { + assert(0); + return false; + } + } + + for (uint32_t array_index = 0; array_index < gpu_images.size(); array_index++) + { + const gpu_image_vec &levels = gpu_images[array_index]; + + if (!levels.size()) + { + // Empty mip chain + assert(0); + return false; + } + + if (!array_index) + { + width = levels[0].get_pixel_width(); + height = levels[0].get_pixel_height(); + total_levels = (uint32_t)levels.size(); + fmt = levels[0].get_format(); + } + else + { + if ((width != levels[0].get_pixel_width()) || + (height != levels[0].get_pixel_height()) || + (total_levels != levels.size())) + { + // All cubemap/texture array faces must be the same dimension + assert(0); + return false; + } + } + + for (uint32_t level_index = 0; level_index < levels.size(); level_index++) + { + if (level_index) + { + if ( (levels[level_index].get_pixel_width() != maximum(1, levels[0].get_pixel_width() >> level_index)) || + (levels[level_index].get_pixel_height() != maximum(1, levels[0].get_pixel_height() >> level_index)) ) + { + // Malformed mipmap chain + assert(0); + return false; + } + } + + if (fmt != levels[level_index].get_format()) + { + // All input textures must use the same GPU format + assert(0); + return false; + } + } + } + + uint32_t internal_fmt = KTX_ETC1_RGB8_OES, base_internal_fmt = KTX_RGB; + + switch (fmt) + { + case texture_format::cBC1: + case texture_format::cBC1_NV: + case texture_format::cBC1_AMD: + { + internal_fmt = KTX_COMPRESSED_RGB_S3TC_DXT1_EXT; + break; + } + case texture_format::cBC3: + { + internal_fmt = KTX_COMPRESSED_RGBA_S3TC_DXT5_EXT; + base_internal_fmt = KTX_RGBA; + break; + } + case texture_format::cBC4: + { + internal_fmt = KTX_COMPRESSED_RED_RGTC1_EXT;// KTX_COMPRESSED_LUMINANCE_LATC1_EXT; + base_internal_fmt = KTX_RED; + break; + } + case texture_format::cBC5: + { + internal_fmt = KTX_COMPRESSED_RED_GREEN_RGTC2_EXT; + base_internal_fmt = KTX_RG; + break; + } + case texture_format::cETC1: + case texture_format::cETC1S: + { + internal_fmt = KTX_ETC1_RGB8_OES; + break; + } + case texture_format::cETC2_RGB: + { + internal_fmt = KTX_COMPRESSED_RGB8_ETC2; + break; + } + case texture_format::cETC2_RGBA: + { + internal_fmt = KTX_COMPRESSED_RGBA8_ETC2_EAC; + base_internal_fmt = KTX_RGBA; + break; + } + case texture_format::cBC6HSigned: + { + internal_fmt = KTX_COMPRESSED_RGB_BPTC_SIGNED_FLOAT; + base_internal_fmt = KTX_RGBA; + break; + } + case texture_format::cBC6HUnsigned: + { + internal_fmt = KTX_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT; + base_internal_fmt = KTX_RGBA; + break; + } + case texture_format::cBC7: + { + internal_fmt = KTX_COMPRESSED_RGBA_BPTC_UNORM; + base_internal_fmt = KTX_RGBA; + break; + } + case texture_format::cPVRTC1_4_RGB: + { + internal_fmt = KTX_COMPRESSED_RGB_PVRTC_4BPPV1_IMG; + break; + } + case texture_format::cPVRTC1_4_RGBA: + { + internal_fmt = KTX_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG; + base_internal_fmt = KTX_RGBA; + break; + } + case texture_format::cASTC_HDR_6x6: + { + internal_fmt = KTX_COMPRESSED_RGBA_ASTC_6x6_KHR; + // TODO: should we write RGB? We don't support generating HDR 6x6 with alpha. + base_internal_fmt = KTX_RGBA; + break; + } + // We use different enums for HDR vs. LDR ASTC, but internally they are both just ASTC. + case texture_format::cASTC_LDR_4x4: + case texture_format::cASTC_HDR_4x4: + case texture_format::cUASTC_HDR_4x4: // UASTC_HDR is just HDR-only ASTC + { + internal_fmt = KTX_COMPRESSED_RGBA_ASTC_4x4_KHR; + base_internal_fmt = KTX_RGBA; + break; + } + case texture_format::cATC_RGB: + { + internal_fmt = KTX_ATC_RGB_AMD; + break; + } + case texture_format::cATC_RGBA_INTERPOLATED_ALPHA: + { + internal_fmt = KTX_ATC_RGBA_INTERPOLATED_ALPHA_AMD; + base_internal_fmt = KTX_RGBA; + break; + } + case texture_format::cETC2_R11_EAC: + { + internal_fmt = KTX_COMPRESSED_R11_EAC; + base_internal_fmt = KTX_RED; + break; + } + case texture_format::cETC2_RG11_EAC: + { + internal_fmt = KTX_COMPRESSED_RG11_EAC; + base_internal_fmt = KTX_RG; + break; + } + case texture_format::cUASTC4x4: + { + internal_fmt = KTX_COMPRESSED_RGBA_UASTC_4x4_KHR; + base_internal_fmt = KTX_RGBA; + break; + } + case texture_format::cFXT1_RGB: + { + internal_fmt = KTX_COMPRESSED_RGB_FXT1_3DFX; + break; + } + case texture_format::cPVRTC2_4_RGBA: + { + internal_fmt = KTX_COMPRESSED_RGBA_PVRTC_4BPPV2_IMG; + base_internal_fmt = KTX_RGBA; + break; + } + default: + { + // TODO + assert(0); + return false; + } + } + + ktx_header header; + header.clear(); + memcpy(&header.m_identifier, g_ktx_file_id, sizeof(g_ktx_file_id)); + header.m_endianness = KTX_ENDIAN; + + header.m_pixelWidth = width; + header.m_pixelHeight = height; + + header.m_glTypeSize = 1; + + header.m_glInternalFormat = internal_fmt; + header.m_glBaseInternalFormat = base_internal_fmt; + + header.m_numberOfArrayElements = (uint32_t)(cubemap_flag ? (gpu_images.size() / 6) : gpu_images.size()); + if (header.m_numberOfArrayElements == 1) + header.m_numberOfArrayElements = 0; + + header.m_numberOfMipmapLevels = total_levels; + header.m_numberOfFaces = cubemap_flag ? 6 : 1; + + append_vector(ktx_data, (uint8_t*)&header, sizeof(header)); + + for (uint32_t level_index = 0; level_index < total_levels; level_index++) + { + uint32_t img_size = gpu_images[0][level_index].get_size_in_bytes(); + + if ((header.m_numberOfFaces == 1) || (header.m_numberOfArrayElements > 1)) + { + img_size = img_size * header.m_numberOfFaces * maximum(1, header.m_numberOfArrayElements); + } + + assert(img_size && ((img_size & 3) == 0)); + + packed_uint<4> packed_img_size(img_size); + append_vector(ktx_data, (uint8_t*)&packed_img_size, sizeof(packed_img_size)); + + uint32_t bytes_written = 0; + (void)bytes_written; + + for (uint32_t array_index = 0; array_index < maximum(1, header.m_numberOfArrayElements); array_index++) + { + for (uint32_t face_index = 0; face_index < header.m_numberOfFaces; face_index++) + { + const gpu_image& img = gpu_images[cubemap_flag ? (array_index * 6 + face_index) : array_index][level_index]; + + append_vector(ktx_data, (uint8_t*)img.get_ptr(), img.get_size_in_bytes()); + + bytes_written += img.get_size_in_bytes(); + } + + } // array_index + + } // level_index + + return true; + } + + bool does_dds_support_format(texture_format fmt) + { + switch (fmt) + { + case texture_format::cBC1_NV: + case texture_format::cBC1_AMD: + case texture_format::cBC1: + case texture_format::cBC3: + case texture_format::cBC4: + case texture_format::cBC5: + case texture_format::cBC6HSigned: + case texture_format::cBC6HUnsigned: + case texture_format::cBC7: + return true; + default: + break; + } + return false; + } + + // Only supports the basic DirectX BC texture formats. + // gpu_images array is: [face/layer][mipmap level] + // For cubemap arrays, # of face/layers must be a multiple of 6. + // Accepts 2D, 2D mipmapped, 2D array, 2D array mipmapped + // and cubemap, cubemap mipmapped, and cubemap array mipmapped. + bool write_dds_file(uint8_vec &dds_data, const basisu::vector& gpu_images, bool cubemap_flag, bool use_srgb_format) + { + if (!gpu_images.size()) + { + assert(0); + return false; + } + + // Sanity check the input + uint32_t slices = 1; + if (cubemap_flag) + { + if ((gpu_images.size() % 6) != 0) + { + assert(0); + return false; + } + slices = gpu_images.size_u32() / 6; + } + else + { + slices = gpu_images.size_u32(); + } + + uint32_t width = 0, height = 0, total_levels = 0; + basisu::texture_format fmt = texture_format::cInvalidTextureFormat; + + // Sanity check the input for consistent # of dimensions and mip levels + for (uint32_t array_index = 0; array_index < gpu_images.size(); array_index++) + { + const gpu_image_vec& levels = gpu_images[array_index]; + + if (!levels.size()) + { + // Empty mip chain + assert(0); + return false; + } + + if (!array_index) + { + width = levels[0].get_pixel_width(); + height = levels[0].get_pixel_height(); + total_levels = (uint32_t)levels.size(); + fmt = levels[0].get_format(); + } + else + { + if ((width != levels[0].get_pixel_width()) || + (height != levels[0].get_pixel_height()) || + (total_levels != levels.size())) + { + // All cubemap/texture array faces must be the same dimension + assert(0); + return false; + } + } + + for (uint32_t level_index = 0; level_index < levels.size(); level_index++) + { + if (level_index) + { + if ((levels[level_index].get_pixel_width() != maximum(1, levels[0].get_pixel_width() >> level_index)) || + (levels[level_index].get_pixel_height() != maximum(1, levels[0].get_pixel_height() >> level_index))) + { + // Malformed mipmap chain + assert(0); + return false; + } + } + + if (fmt != levels[level_index].get_format()) + { + // All input textures must use the same GPU format + assert(0); + return false; + } + } + } + + // No mipmap levels + if (!total_levels) + { + assert(0); + return false; + } + + // Create the DDS mipmap level data + uint8_vec mipmaps[32]; + + // See https://learn.microsoft.com/en-us/windows/win32/direct3ddds/dds-file-layout-for-cubic-environment-maps + // DDS cubemap organization is cubemap face 0 followed by all mips, then cubemap face 1 followed by all mips, etc. + // Unfortunately tinydds.h's writer doesn't handle this case correctly, so we work around it here. + // This also applies with 2D texture arrays, too. RenderDoc and ddsview (DirectXTex) views each type (cubemap array and 2D texture array) correctly. + // Also see "Using Texture Arrays in Direct3D 10/11": + // https://learn.microsoft.com/en-us/windows/win32/direct3ddds/dx-graphics-dds-pguide + for (uint32_t array_index = 0; array_index < gpu_images.size(); array_index++) + { + const gpu_image_vec& levels = gpu_images[array_index]; + + for (uint32_t level_index = 0; level_index < levels.size(); level_index++) + { + append_vector(mipmaps[0], (uint8_t*)levels[level_index].get_ptr(), levels[level_index].get_size_in_bytes()); + + } // level_index + } // array_index + +#if 0 + // This organization, required by tinydds.h's API, is wrong. + { + for (uint32_t array_index = 0; array_index < gpu_images.size(); array_index++) + { + const gpu_image_vec& levels = gpu_images[array_index]; + + for (uint32_t level_index = 0; level_index < levels.size(); level_index++) + { + append_vector(mipmaps[level_index], (uint8_t*)levels[level_index].get_ptr(), levels[level_index].get_size_in_bytes()); + + } // level_index + } // array_index + } +#endif + + // Write DDS file using tinydds + TinyDDS_WriteCallbacks cbs; + cbs.error = [](void* user, char const* msg) { BASISU_NOTE_UNUSED(user); fprintf(stderr, "tinydds: %s\n", msg); }; + cbs.alloc = [](void* user, size_t size) -> void* { BASISU_NOTE_UNUSED(user); return malloc(size); }; + cbs.free = [](void* user, void* memory) { BASISU_NOTE_UNUSED(user); free(memory); }; + cbs.write = [](void* user, void const* buffer, size_t byteCount) { BASISU_NOTE_UNUSED(user); uint8_vec* pVec = (uint8_vec*)user; append_vector(*pVec, (const uint8_t*)buffer, byteCount); }; + + uint32_t mipmap_sizes[32]; + const void* mipmap_ptrs[32]; + + clear_obj(mipmap_sizes); + clear_obj(mipmap_ptrs); + + assert(total_levels < 32); + for (uint32_t i = 0; i < total_levels; i++) + { + mipmap_sizes[i] = mipmaps[i].size_in_bytes_u32(); + mipmap_ptrs[i] = mipmaps[i].get_ptr(); + } + + // Select tinydds texture format + uint32_t tinydds_fmt = 0; + + switch (fmt) + { + case texture_format::cBC1_NV: + case texture_format::cBC1_AMD: + case texture_format::cBC1: + tinydds_fmt = use_srgb_format ? TDDS_BC1_RGBA_SRGB_BLOCK : TDDS_BC1_RGBA_UNORM_BLOCK; + break; + case texture_format::cBC3: + tinydds_fmt = use_srgb_format ? TDDS_BC3_SRGB_BLOCK : TDDS_BC3_UNORM_BLOCK; + break; + case texture_format::cBC4: + tinydds_fmt = TDDS_BC4_UNORM_BLOCK; + break; + case texture_format::cBC5: + tinydds_fmt = TDDS_BC5_UNORM_BLOCK; + break; + case texture_format::cBC6HSigned: + tinydds_fmt = TDDS_BC6H_SFLOAT_BLOCK; + break; + case texture_format::cBC6HUnsigned: + tinydds_fmt = TDDS_BC6H_UFLOAT_BLOCK; + break; + case texture_format::cBC7: + tinydds_fmt = use_srgb_format ? TDDS_BC7_SRGB_BLOCK : TDDS_BC7_UNORM_BLOCK; + break; + default: + { + fprintf(stderr, "Warning: Unsupported format in write_dds_file().\n"); + return false; + } + } + + // DirectXTex's DDSView doesn't handle odd sizes textures correctly. RenderDoc loads them fine, however. + // Trying to work around this here results in invalid mipmaps. + //width = (width + 3) & ~3; + //height = (height + 3) & ~3; + + bool status = TinyDDS_WriteImage(&cbs, + &dds_data, + width, + height, + 1, + slices, + total_levels, + (TinyDDS_Format)tinydds_fmt, + cubemap_flag, + true, + mipmap_sizes, + mipmap_ptrs); + + if (!status) + { + fprintf(stderr, "write_dds_file: Failed creating DDS file\n"); + return false; + } + + return true; + } + + bool write_dds_file(const char* pFilename, const basisu::vector& gpu_images, bool cubemap_flag, bool use_srgb_format) + { + uint8_vec dds_data; + + if (!write_dds_file(dds_data, gpu_images, cubemap_flag, use_srgb_format)) + return false; + + if (!write_vec_to_file(pFilename, dds_data)) + { + fprintf(stderr, "write_dds_file: Failed writing DDS file data\n"); + return false; + } + + return true; + } + + bool read_uncompressed_dds_file(const char* pFilename, basisu::vector &ldr_mips, basisu::vector& hdr_mips) + { + const uint32_t MAX_IMAGE_DIM = 16384; + + TinyDDS_Callbacks cbs; + + cbs.errorFn = [](void* user, char const* msg) { BASISU_NOTE_UNUSED(user); fprintf(stderr, "tinydds: %s\n", msg); }; + cbs.allocFn = [](void* user, size_t size) -> void* { BASISU_NOTE_UNUSED(user); return malloc(size); }; + cbs.freeFn = [](void* user, void* memory) { BASISU_NOTE_UNUSED(user); free(memory); }; + cbs.readFn = [](void* user, void* buffer, size_t byteCount) -> size_t { return (size_t)fread(buffer, 1, byteCount, (FILE*)user); }; + +#ifdef _MSC_VER + cbs.seekFn = [](void* user, int64_t ofs) -> bool { return _fseeki64((FILE*)user, ofs, SEEK_SET) == 0; }; + cbs.tellFn = [](void* user) -> int64_t { return _ftelli64((FILE*)user); }; +#else + cbs.seekFn = [](void* user, int64_t ofs) -> bool { return fseek((FILE*)user, (long)ofs, SEEK_SET) == 0; }; + cbs.tellFn = [](void* user) -> int64_t { return (int64_t)ftell((FILE*)user); }; +#endif + + FILE* pFile = fopen_safe(pFilename, "rb"); + if (!pFile) + { + error_printf("Can't open .DDS file \"%s\"\n", pFilename); + return false; + } + + // These are the formats AMD Compressonator supports in its UI. + enum dds_fmt + { + cRGBA32, + cRGBA_HALF, + cRGBA_FLOAT + }; + + bool status = false; + dds_fmt fmt = cRGBA32; + uint32_t width = 0, height = 0; + bool hdr_flag = false; + TinyDDS_Format tfmt = TDDS_UNDEFINED; + + TinyDDS_ContextHandle ctx = TinyDDS_CreateContext(&cbs, pFile); + if (!ctx) + goto failure; + + status = TinyDDS_ReadHeader(ctx); + if (!status) + { + error_printf("Failed parsing DDS header in file \"%s\"\n", pFilename); + goto failure; + } + + if ((!TinyDDS_Is2D(ctx)) || (TinyDDS_ArraySlices(ctx) > 1) || (TinyDDS_IsCubemap(ctx))) + { + error_printf("Unsupported DDS texture type in file \"%s\"\n", pFilename); + goto failure; + } + + width = TinyDDS_Width(ctx); + height = TinyDDS_Height(ctx); + + if (!width || !height) + { + error_printf("DDS texture dimensions invalid in file \"%s\"\n", pFilename); + goto failure; + } + + if ((width > MAX_IMAGE_DIM) || (height > MAX_IMAGE_DIM)) + { + error_printf("DDS texture dimensions too large in file \"%s\"\n", pFilename); + goto failure; + } + + tfmt = TinyDDS_GetFormat(ctx); + switch (tfmt) + { + case TDDS_R8G8B8A8_SRGB: + case TDDS_R8G8B8A8_UNORM: + case TDDS_B8G8R8A8_SRGB: + case TDDS_B8G8R8A8_UNORM: + fmt = cRGBA32; + break; + case TDDS_R16G16B16A16_SFLOAT: + fmt = cRGBA_HALF; + hdr_flag = true; + break; + case TDDS_R32G32B32A32_SFLOAT: + fmt = cRGBA_FLOAT; + hdr_flag = true; + break; + default: + error_printf("File \"%s\" has an unsupported DDS texture format (only supports RGBA/BGRA 32bpp, RGBA HALF float, or RGBA FLOAT)\n", pFilename); + goto failure; + } + + if (hdr_flag) + hdr_mips.resize(TinyDDS_NumberOfMipmaps(ctx)); + else + ldr_mips.resize(TinyDDS_NumberOfMipmaps(ctx)); + + for (uint32_t level = 0; level < TinyDDS_NumberOfMipmaps(ctx); level++) + { + const uint32_t level_width = TinyDDS_MipMapReduce(width, level); + const uint32_t level_height = TinyDDS_MipMapReduce(height, level); + const uint32_t total_level_texels = level_width * level_height; + + const void* pImage = TinyDDS_ImageRawData(ctx, level); + const uint32_t image_size = TinyDDS_ImageSize(ctx, level); + + if (fmt == cRGBA32) + { + ldr_mips[level].resize(level_width, level_height); + + if ((ldr_mips[level].get_total_pixels() * sizeof(uint32_t) != image_size)) + { + assert(0); + goto failure; + } + + memcpy(ldr_mips[level].get_ptr(), pImage, image_size); + + if ((tfmt == TDDS_B8G8R8A8_SRGB) || (tfmt == TDDS_B8G8R8A8_UNORM)) + { + // Swap R and B components. + uint32_t *pTexels = (uint32_t *)ldr_mips[level].get_ptr(); + for (uint32_t i = 0; i < total_level_texels; i++) + { + const uint32_t v = pTexels[i]; + const uint32_t r = (v >> 16) & 0xFF; + const uint32_t b = v & 0xFF; + pTexels[i] = r | (b << 16) | (v & 0xFF00FF00); + } + } + } + else if (fmt == cRGBA_FLOAT) + { + hdr_mips[level].resize(level_width, level_height); + + if ((hdr_mips[level].get_total_pixels() * sizeof(float) * 4 != image_size)) + { + assert(0); + goto failure; + } + + memcpy((void *)hdr_mips[level].get_ptr(), pImage, image_size); + } + else if (fmt == cRGBA_HALF) + { + hdr_mips[level].resize(level_width, level_height); + + if ((hdr_mips[level].get_total_pixels() * sizeof(basist::half_float) * 4 != image_size)) + { + assert(0); + goto failure; + } + + // Unpack half to float. + const basist::half_float* pSrc_comps = static_cast(pImage); + vec4F* pDst_texels = hdr_mips[level].get_ptr(); + + for (uint32_t i = 0; i < total_level_texels; i++) + { + (*pDst_texels)[0] = basist::half_to_float(pSrc_comps[0]); + (*pDst_texels)[1] = basist::half_to_float(pSrc_comps[1]); + (*pDst_texels)[2] = basist::half_to_float(pSrc_comps[2]); + (*pDst_texels)[3] = basist::half_to_float(pSrc_comps[3]); + + pSrc_comps += 4; + pDst_texels++; + } // y + } + } // level + + TinyDDS_DestroyContext(ctx); + fclose(pFile); + + return true; + + failure: + if (ctx) + TinyDDS_DestroyContext(ctx); + + if (pFile) + fclose(pFile); + + return false; + } + + bool write_compressed_texture_file(const char* pFilename, const basisu::vector& g, bool cubemap_flag, bool use_srgb_format) + { + std::string extension(string_tolower(string_get_extension(pFilename))); + + uint8_vec filedata; + if (extension == "ktx") + { + if (!create_ktx_texture_file(filedata, g, cubemap_flag)) + return false; + } + else if (extension == "pvr") + { + // TODO + return false; + } + else if (extension == "dds") + { + if (!write_dds_file(filedata, g, cubemap_flag, use_srgb_format)) + return false; + } + else + { + // unsupported texture format + assert(0); + return false; + } + + return basisu::write_vec_to_file(pFilename, filedata); + } + + bool write_compressed_texture_file(const char* pFilename, const gpu_image_vec& g, bool use_srgb_format) + { + basisu::vector a; + a.push_back(g); + return write_compressed_texture_file(pFilename, a, false, use_srgb_format); + } + + bool write_compressed_texture_file(const char* pFilename, const gpu_image& g, bool use_srgb_format) + { + basisu::vector v; + enlarge_vector(v, 1)->push_back(g); + return write_compressed_texture_file(pFilename, v, false, use_srgb_format); + } + + //const uint32_t OUT_FILE_MAGIC = 'TEXC'; + struct out_file_header + { + packed_uint<4> m_magic; + packed_uint<4> m_pad; + packed_uint<4> m_width; + packed_uint<4> m_height; + }; + + // As no modern tool supports FXT1 format .KTX files, let's write .OUT files and make sure 3DFX's original tools shipped in 1999 can decode our encoded output. + bool write_3dfx_out_file(const char* pFilename, const gpu_image& gi) + { + out_file_header hdr; + //hdr.m_magic = OUT_FILE_MAGIC; + hdr.m_magic.m_bytes[0] = 67; + hdr.m_magic.m_bytes[1] = 88; + hdr.m_magic.m_bytes[2] = 69; + hdr.m_magic.m_bytes[3] = 84; + hdr.m_pad = 0; + hdr.m_width = gi.get_blocks_x() * 8; + hdr.m_height = gi.get_blocks_y() * 4; + + FILE* pFile = nullptr; +#ifdef _WIN32 + fopen_s(&pFile, pFilename, "wb"); +#else + pFile = fopen(pFilename, "wb"); +#endif + if (!pFile) + return false; + + fwrite(&hdr, sizeof(hdr), 1, pFile); + fwrite(gi.get_ptr(), gi.get_size_in_bytes(), 1, pFile); + + return fclose(pFile) != EOF; + } + + // The .astc texture format is readable using ARM's astcenc, AMD Compressonator, and other engines/tools. It oddly doesn't support mipmaps, limiting + // its usefulness/relevance. + // https://github.com/ARM-software/astc-encoder/blob/main/Docs/FileFormat.md + bool write_astc_file(const char* pFilename, const void* pBlocks, uint32_t block_width, uint32_t block_height, uint32_t dim_x, uint32_t dim_y) + { + assert(pBlocks && (block_width >= 4) && (block_height >= 4) && (dim_x > 0) && (dim_y > 0)); + + uint8_vec file_data; + file_data.push_back(0x13); + file_data.push_back(0xAB); + file_data.push_back(0xA1); + file_data.push_back(0x5C); + + file_data.push_back((uint8_t)block_width); + file_data.push_back((uint8_t)block_height); + file_data.push_back(1); + + file_data.push_back((uint8_t)dim_x); + file_data.push_back((uint8_t)(dim_x >> 8)); + file_data.push_back((uint8_t)(dim_x >> 16)); + + file_data.push_back((uint8_t)dim_y); + file_data.push_back((uint8_t)(dim_y >> 8)); + file_data.push_back((uint8_t)(dim_y >> 16)); + + file_data.push_back((uint8_t)1); + file_data.push_back((uint8_t)0); + file_data.push_back((uint8_t)0); + + const uint32_t num_blocks_x = (dim_x + block_width - 1) / block_width; + const uint32_t num_blocks_y = (dim_y + block_height - 1) / block_height; + + const uint32_t total_bytes = num_blocks_x * num_blocks_y * 16; + + const size_t cur_size = file_data.size(); + + file_data.resize(cur_size + total_bytes); + + memcpy(&file_data[cur_size], pBlocks, total_bytes); + + return write_vec_to_file(pFilename, file_data); + } + +} // basisu + diff --git a/thirdparty/basisu/encoder/basisu_gpu_texture.h b/thirdparty/basisu/encoder/basisu_gpu_texture.h new file mode 100644 index 000000000..67c2a2bc5 --- /dev/null +++ b/thirdparty/basisu/encoder/basisu_gpu_texture.h @@ -0,0 +1,170 @@ +// basisu_gpu_texture.h +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "../transcoder/basisu.h" +#include "basisu_etc.h" + +namespace basisu +{ + // GPU texture "image" + class gpu_image + { + public: + enum { cMaxBlockSize = 12 }; + + gpu_image() + { + clear(); + } + + gpu_image(texture_format fmt, uint32_t width, uint32_t height) + { + init(fmt, width, height); + } + + void clear() + { + m_fmt = texture_format::cInvalidTextureFormat; + m_width = 0; + m_height = 0; + m_block_width = 0; + m_block_height = 0; + m_blocks_x = 0; + m_blocks_y = 0; + m_qwords_per_block = 0; + m_blocks.clear(); + } + + inline texture_format get_format() const { return m_fmt; } + inline bool is_hdr() const { return is_hdr_texture_format(m_fmt); } + + // Width/height in pixels + inline uint32_t get_pixel_width() const { return m_width; } + inline uint32_t get_pixel_height() const { return m_height; } + + // Width/height in blocks, row pitch is assumed to be m_blocks_x. + inline uint32_t get_blocks_x() const { return m_blocks_x; } + inline uint32_t get_blocks_y() const { return m_blocks_y; } + + // Size of each block in pixels + inline uint32_t get_block_width() const { return m_block_width; } + inline uint32_t get_block_height() const { return m_block_height; } + + inline uint32_t get_qwords_per_block() const { return m_qwords_per_block; } + inline uint32_t get_total_blocks() const { return m_blocks_x * m_blocks_y; } + inline uint32_t get_bytes_per_block() const { return get_qwords_per_block() * sizeof(uint64_t); } + inline uint32_t get_row_pitch_in_bytes() const { return get_bytes_per_block() * get_blocks_x(); } + + inline const uint64_vec &get_blocks() const { return m_blocks; } + + inline const uint64_t *get_ptr() const { return &m_blocks[0]; } + inline uint64_t *get_ptr() { return &m_blocks[0]; } + + inline uint32_t get_size_in_bytes() const { return get_total_blocks() * get_qwords_per_block() * sizeof(uint64_t); } + + inline const void *get_block_ptr(uint32_t block_x, uint32_t block_y, uint32_t element_index = 0) const + { + assert(block_x < m_blocks_x && block_y < m_blocks_y); + return &m_blocks[(block_x + block_y * m_blocks_x) * m_qwords_per_block + element_index]; + } + + inline void *get_block_ptr(uint32_t block_x, uint32_t block_y, uint32_t element_index = 0) + { + assert(block_x < m_blocks_x && block_y < m_blocks_y && element_index < m_qwords_per_block); + return &m_blocks[(block_x + block_y * m_blocks_x) * m_qwords_per_block + element_index]; + } + + void init(texture_format fmt, uint32_t width, uint32_t height) + { + m_fmt = fmt; + m_width = width; + m_height = height; + m_block_width = basisu::get_block_width(m_fmt); + m_block_height = basisu::get_block_height(m_fmt); + m_blocks_x = (m_width + m_block_width - 1) / m_block_width; + m_blocks_y = (m_height + m_block_height - 1) / m_block_height; + m_qwords_per_block = basisu::get_qwords_per_block(m_fmt); + + m_blocks.resize(0); + m_blocks.resize(m_blocks_x * m_blocks_y * m_qwords_per_block); + } + + // Unpacks LDR textures only. + bool unpack(image& img) const; + + // Unpacks HDR textures only. + bool unpack_hdr(imagef& img) const; + + inline void override_dimensions(uint32_t w, uint32_t h) + { + m_width = w; + m_height = h; + } + + private: + texture_format m_fmt; + uint32_t m_width, m_height, m_blocks_x, m_blocks_y, m_block_width, m_block_height, m_qwords_per_block; + uint64_vec m_blocks; + }; + + typedef basisu::vector gpu_image_vec; + + // KTX1 file writing + bool create_ktx_texture_file(uint8_vec &ktx_data, const basisu::vector& gpu_images, bool cubemap_flag); + + bool does_dds_support_format(texture_format fmt); + bool write_dds_file(uint8_vec& dds_data, const basisu::vector& gpu_images, bool cubemap_flag, bool use_srgb_format); + bool write_dds_file(const char* pFilename, const basisu::vector& gpu_images, bool cubemap_flag, bool use_srgb_format); + + // Currently reads 2D 32bpp RGBA, 16-bit HALF RGBA, or 32-bit FLOAT RGBA, with or without mipmaps. No tex arrays or cubemaps, yet. + bool read_uncompressed_dds_file(const char* pFilename, basisu::vector& ldr_mips, basisu::vector& hdr_mips); + + // Supports DDS and KTX + bool write_compressed_texture_file(const char *pFilename, const basisu::vector& g, bool cubemap_flag, bool use_srgb_format); + bool write_compressed_texture_file(const char* pFilename, const gpu_image_vec& g, bool use_srgb_format); + bool write_compressed_texture_file(const char *pFilename, const gpu_image &g, bool use_srgb_format); + + bool write_3dfx_out_file(const char* pFilename, const gpu_image& gi); + + // GPU texture block unpacking + // For ETC1, use in basisu_etc.h: bool unpack_etc1(const etc_block& block, color_rgba *pDst, bool preserve_alpha) + void unpack_etc2_eac(const void *pBlock_bits, color_rgba *pPixels); + bool unpack_bc1(const void *pBlock_bits, color_rgba *pPixels, bool set_alpha); + void unpack_bc4(const void *pBlock_bits, uint8_t *pPixels, uint32_t stride); + bool unpack_bc3(const void *pBlock_bits, color_rgba *pPixels); + void unpack_bc5(const void *pBlock_bits, color_rgba *pPixels); + bool unpack_bc7_mode6(const void *pBlock_bits, color_rgba *pPixels); + bool unpack_bc7(const void* pBlock_bits, color_rgba* pPixels); // full format + bool unpack_bc6h(const void* pSrc_block, void* pDst_block, bool is_signed, uint32_t dest_pitch_in_halfs = 4 * 3); // full format, outputs HALF values, RGB texels only (not RGBA) + void unpack_atc(const void* pBlock_bits, color_rgba* pPixels); + // We only support CC_MIXED non-alpha blocks here because that's the only mode the transcoder uses at the moment. + bool unpack_fxt1(const void* p, color_rgba* pPixels); + // PVRTC2 is currently limited to only what our transcoder outputs (non-interpolated, hard_flag=1 modulation=0). In this mode, PVRTC2 looks much like BC1/ATC. + bool unpack_pvrtc2(const void* p, color_rgba* pPixels); + void unpack_etc2_eac_r(const void *p, color_rgba* pPixels, uint32_t c); + void unpack_etc2_eac_rg(const void* p, color_rgba* pPixels); + + // unpack_block() is primarily intended to unpack texture data created by the transcoder. + // For some texture formats (like ETC2 RGB, PVRTC2, FXT1) it's not yet a complete implementation. + // Unpacks LDR texture formats only. + bool unpack_block(texture_format fmt, const void *pBlock, color_rgba *pPixels); + + // Unpacks HDR texture formats only. + bool unpack_block_hdr(texture_format fmt, const void* pBlock, vec4F* pPixels); + + bool write_astc_file(const char* pFilename, const void* pBlocks, uint32_t block_width, uint32_t block_height, uint32_t dim_x, uint32_t dim_y); + +} // namespace basisu + diff --git a/thirdparty/basisu/encoder/basisu_kernels_declares.h b/thirdparty/basisu/encoder/basisu_kernels_declares.h new file mode 100644 index 000000000..9b85a594e --- /dev/null +++ b/thirdparty/basisu/encoder/basisu_kernels_declares.h @@ -0,0 +1,27 @@ +// basisu_kernels_declares.h +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#if BASISU_SUPPORT_SSE +void CPPSPMD_NAME(perceptual_distance_rgb_4_N)(int64_t* pDistance, const uint8_t* pSelectors, const basisu::color_rgba* pBlock_colors, const basisu::color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_err); +void CPPSPMD_NAME(linear_distance_rgb_4_N)(int64_t* pDistance, const uint8_t* pSelectors, const basisu::color_rgba* pBlock_colors, const basisu::color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_err); + +void CPPSPMD_NAME(find_selectors_perceptual_rgb_4_N)(int64_t* pDistance, uint8_t* pSelectors, const basisu::color_rgba* pBlock_colors, const basisu::color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_err); +void CPPSPMD_NAME(find_selectors_linear_rgb_4_N)(int64_t* pDistance, uint8_t* pSelectors, const basisu::color_rgba* pBlock_colors, const basisu::color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_err); + +void CPPSPMD_NAME(find_lowest_error_perceptual_rgb_4_N)(int64_t* pDistance, const basisu::color_rgba* pBlock_colors, const basisu::color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_error); +void CPPSPMD_NAME(find_lowest_error_linear_rgb_4_N)(int64_t* pDistance, const basisu::color_rgba* pBlock_colors, const basisu::color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_error); + +void CPPSPMD_NAME(update_covar_matrix_16x16)(uint32_t num_vecs, const void* pWeighted_vecs, const void *pOrigin, const uint32_t* pVec_indices, void *pMatrix16x16); +#endif diff --git a/thirdparty/basisu/encoder/basisu_kernels_imp.h b/thirdparty/basisu/encoder/basisu_kernels_imp.h new file mode 100644 index 000000000..123862b1d --- /dev/null +++ b/thirdparty/basisu/encoder/basisu_kernels_imp.h @@ -0,0 +1,647 @@ +// basisu_kernels_imp.h - Do not directly include +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using namespace CPPSPMD; + +namespace CPPSPMD_NAME(basisu_kernels_namespace) +{ + struct perceptual_distance_rgb_4_N : spmd_kernel + { + void _call(int64_t* pDistance, + const uint8_t* pSelectors, + const color_rgba* pBlock_colors, + const color_rgba* pSrc_pixels, uint32_t n, + int64_t early_out_err) + { + assert(early_out_err >= 0); + + *pDistance = 0; + + __m128i block_colors[4]; + vint block_colors_r[4], block_colors_g[4], block_colors_b[4]; + for (uint32_t i = 0; i < 4; i++) + { + block_colors[i] = load_rgba32(&pBlock_colors[i]); + store_all(block_colors_r[i], (int)pBlock_colors[i].r); + store_all(block_colors_g[i], (int)pBlock_colors[i].g); + store_all(block_colors_b[i], (int)pBlock_colors[i].b); + } + + uint32_t i; + for (i = 0; (i + 4) <= n; i += 4) + { + __m128i c0 = load_rgba32(&pSrc_pixels[i + 0]), c1 = load_rgba32(&pSrc_pixels[i + 1]), c2 = load_rgba32(&pSrc_pixels[i + 2]), c3 = load_rgba32(&pSrc_pixels[i + 3]); + + vint r, g, b, a; + transpose4x4(r.m_value, g.m_value, b.m_value, a.m_value, c0, c1, c2, c3); + + int s0 = pSelectors[i], s1 = pSelectors[i + 1], s2 = pSelectors[i + 2], s3 = pSelectors[i + 3]; + + vint base_r, base_g, base_b, base_a; + if ((s0 == s1) && (s0 == s2) && (s0 == s3)) + { + store_all(base_r, block_colors_r[s0]); + store_all(base_g, block_colors_g[s0]); + store_all(base_b, block_colors_b[s0]); + } + else + { + __m128i k0 = block_colors[s0], k1 = block_colors[s1], k2 = block_colors[s2], k3 = block_colors[s3]; + transpose4x4(base_r.m_value, base_g.m_value, base_b.m_value, base_a.m_value, k0, k1, k2, k3); + } + + vint dr = base_r - r; + vint dg = base_g - g; + vint db = base_b - b; + + vint delta_l = dr * 27 + dg * 92 + db * 9; + vint delta_cr = dr * 128 - delta_l; + vint delta_cb = db * 128 - delta_l; + + vint id = ((delta_l * delta_l) >> 7) + + ((((delta_cr * delta_cr) >> 7) * 26) >> 7) + + ((((delta_cb * delta_cb) >> 7) * 3) >> 7); + + *pDistance += reduce_add(id); + if (*pDistance >= early_out_err) + return; + } + + for (; i < n; i++) + { + int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; + + int sel = pSelectors[i]; + int base_r = pBlock_colors[sel].r, base_g = pBlock_colors[sel].g, base_b = pBlock_colors[sel].b; + + int dr = base_r - r; + int dg = base_g - g; + int db = base_b - b; + + int delta_l = dr * 27 + dg * 92 + db * 9; + int delta_cr = dr * 128 - delta_l; + int delta_cb = db * 128 - delta_l; + + int id = ((delta_l * delta_l) >> 7) + + ((((delta_cr * delta_cr) >> 7) * 26) >> 7) + + ((((delta_cb * delta_cb) >> 7) * 3) >> 7); + + *pDistance += id; + if (*pDistance >= early_out_err) + return; + } + } + }; + + struct linear_distance_rgb_4_N : spmd_kernel + { + void _call(int64_t* pDistance, + const uint8_t* pSelectors, + const color_rgba* pBlock_colors, + const color_rgba* pSrc_pixels, uint32_t n, + int64_t early_out_err) + { + assert(early_out_err >= 0); + + *pDistance = 0; + + __m128i block_colors[4]; + vint block_colors_r[4], block_colors_g[4], block_colors_b[4]; + for (uint32_t i = 0; i < 4; i++) + { + block_colors[i] = load_rgba32(&pBlock_colors[i]); + store_all(block_colors_r[i], (int)pBlock_colors[i].r); + store_all(block_colors_g[i], (int)pBlock_colors[i].g); + store_all(block_colors_b[i], (int)pBlock_colors[i].b); + } + + uint32_t i; + for (i = 0; (i + 4) <= n; i += 4) + { + __m128i c0 = load_rgba32(&pSrc_pixels[i + 0]), c1 = load_rgba32(&pSrc_pixels[i + 1]), c2 = load_rgba32(&pSrc_pixels[i + 2]), c3 = load_rgba32(&pSrc_pixels[i + 3]); + + vint r, g, b, a; + transpose4x4(r.m_value, g.m_value, b.m_value, a.m_value, c0, c1, c2, c3); + + int s0 = pSelectors[i], s1 = pSelectors[i + 1], s2 = pSelectors[i + 2], s3 = pSelectors[i + 3]; + + vint base_r, base_g, base_b, base_a; + if ((s0 == s1) && (s0 == s2) && (s0 == s3)) + { + store_all(base_r, block_colors_r[s0]); + store_all(base_g, block_colors_g[s0]); + store_all(base_b, block_colors_b[s0]); + } + else + { + __m128i k0 = block_colors[s0], k1 = block_colors[s1], k2 = block_colors[s2], k3 = block_colors[s3]; + transpose4x4(base_r.m_value, base_g.m_value, base_b.m_value, base_a.m_value, k0, k1, k2, k3); + } + + vint dr = base_r - r; + vint dg = base_g - g; + vint db = base_b - b; + + vint id = dr * dr + dg * dg + db * db; + + *pDistance += reduce_add(id); + if (*pDistance >= early_out_err) + return; + } + + for (; i < n; i++) + { + int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; + + int sel = pSelectors[i]; + int base_r = pBlock_colors[sel].r, base_g = pBlock_colors[sel].g, base_b = pBlock_colors[sel].b; + + int dr = base_r - r; + int dg = base_g - g; + int db = base_b - b; + + int id = dr * dr + dg * dg + db * db; + + *pDistance += id; + if (*pDistance >= early_out_err) + return; + } + } + }; + + struct find_selectors_perceptual_rgb_4_N : spmd_kernel + { + inline vint compute_dist( + const vint& base_r, const vint& base_g, const vint& base_b, + const vint& r, const vint& g, const vint& b) + { + vint dr = base_r - r; + vint dg = base_g - g; + vint db = base_b - b; + + vint delta_l = dr * 27 + dg * 92 + db * 9; + vint delta_cr = dr * 128 - delta_l; + vint delta_cb = db * 128 - delta_l; + + vint id = VINT_SHIFT_RIGHT(delta_l * delta_l, 7) + + VINT_SHIFT_RIGHT(VINT_SHIFT_RIGHT(delta_cr * delta_cr, 7) * 26, 7) + + VINT_SHIFT_RIGHT(VINT_SHIFT_RIGHT(delta_cb * delta_cb, 7) * 3, 7); + + return id; + } + + void _call(int64_t* pDistance, + uint8_t* pSelectors, + const color_rgba* pBlock_colors, + const color_rgba* pSrc_pixels, uint32_t n, + int64_t early_out_err) + { + assert(early_out_err >= 0); + + *pDistance = 0; + + vint block_colors_r[4], block_colors_g[4], block_colors_b[4]; + for (uint32_t i = 0; i < 4; i++) + { + store_all(block_colors_r[i], (int)pBlock_colors[i].r); + store_all(block_colors_g[i], (int)pBlock_colors[i].g); + store_all(block_colors_b[i], (int)pBlock_colors[i].b); + } + + const __m128i shuf = _mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, 12, 8, 4, 0); + + uint32_t i; + + for (i = 0; (i + 4) <= n; i += 4) + { + __m128i c0 = load_rgba32(&pSrc_pixels[i + 0]), c1 = load_rgba32(&pSrc_pixels[i + 1]), c2 = load_rgba32(&pSrc_pixels[i + 2]), c3 = load_rgba32(&pSrc_pixels[i + 3]); + + vint r, g, b, a; + transpose4x4(r.m_value, g.m_value, b.m_value, a.m_value, c0, c1, c2, c3); + + vint dist0 = compute_dist(block_colors_r[0], block_colors_g[0], block_colors_b[0], r, g, b); + vint dist1 = compute_dist(block_colors_r[1], block_colors_g[1], block_colors_b[1], r, g, b); + vint dist2 = compute_dist(block_colors_r[2], block_colors_g[2], block_colors_b[2], r, g, b); + vint dist3 = compute_dist(block_colors_r[3], block_colors_g[3], block_colors_b[3], r, g, b); + + vint min_dist = min(min(min(dist0, dist1), dist2), dist3); + + vint sels = spmd_ternaryi(min_dist == dist0, 0, spmd_ternaryi(min_dist == dist1, 1, spmd_ternaryi(min_dist == dist2, 2, 3))); + + __m128i vsels = shuffle_epi8(sels.m_value, shuf); + storeu_si32((void *)(pSelectors + i), vsels); + + *pDistance += reduce_add(min_dist); + if (*pDistance >= early_out_err) + return; + } + + for (; i < n; i++) + { + int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; + + int best_err = INT_MAX, best_sel = 0; + for (int sel = 0; sel < 4; sel++) + { + int base_r = pBlock_colors[sel].r, base_g = pBlock_colors[sel].g, base_b = pBlock_colors[sel].b; + + int dr = base_r - r; + int dg = base_g - g; + int db = base_b - b; + + int delta_l = dr * 27 + dg * 92 + db * 9; + int delta_cr = dr * 128 - delta_l; + int delta_cb = db * 128 - delta_l; + + int id = ((delta_l * delta_l) >> 7) + + ((((delta_cr * delta_cr) >> 7) * 26) >> 7) + + ((((delta_cb * delta_cb) >> 7) * 3) >> 7); + if (id < best_err) + { + best_err = id; + best_sel = sel; + } + } + + pSelectors[i] = (uint8_t)best_sel; + + *pDistance += best_err; + if (*pDistance >= early_out_err) + return; + } + } + }; + + struct find_selectors_linear_rgb_4_N : spmd_kernel + { + inline vint compute_dist( + const vint& base_r, const vint& base_g, const vint& base_b, + const vint& r, const vint& g, const vint& b) + { + vint dr = base_r - r; + vint dg = base_g - g; + vint db = base_b - b; + + vint id = dr * dr + dg * dg + db * db; + return id; + } + + void _call(int64_t* pDistance, + uint8_t* pSelectors, + const color_rgba* pBlock_colors, + const color_rgba* pSrc_pixels, uint32_t n, + int64_t early_out_err) + { + assert(early_out_err >= 0); + + *pDistance = 0; + + vint block_colors_r[4], block_colors_g[4], block_colors_b[4]; + for (uint32_t i = 0; i < 4; i++) + { + store_all(block_colors_r[i], (int)pBlock_colors[i].r); + store_all(block_colors_g[i], (int)pBlock_colors[i].g); + store_all(block_colors_b[i], (int)pBlock_colors[i].b); + } + + const __m128i shuf = _mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, 12, 8, 4, 0); + + uint32_t i; + + for (i = 0; (i + 4) <= n; i += 4) + { + __m128i c0 = load_rgba32(&pSrc_pixels[i + 0]), c1 = load_rgba32(&pSrc_pixels[i + 1]), c2 = load_rgba32(&pSrc_pixels[i + 2]), c3 = load_rgba32(&pSrc_pixels[i + 3]); + + vint r, g, b, a; + transpose4x4(r.m_value, g.m_value, b.m_value, a.m_value, c0, c1, c2, c3); + + vint dist0 = compute_dist(block_colors_r[0], block_colors_g[0], block_colors_b[0], r, g, b); + vint dist1 = compute_dist(block_colors_r[1], block_colors_g[1], block_colors_b[1], r, g, b); + vint dist2 = compute_dist(block_colors_r[2], block_colors_g[2], block_colors_b[2], r, g, b); + vint dist3 = compute_dist(block_colors_r[3], block_colors_g[3], block_colors_b[3], r, g, b); + + vint min_dist = min(min(min(dist0, dist1), dist2), dist3); + + vint sels = spmd_ternaryi(min_dist == dist0, 0, spmd_ternaryi(min_dist == dist1, 1, spmd_ternaryi(min_dist == dist2, 2, 3))); + + __m128i vsels = shuffle_epi8(sels.m_value, shuf); + storeu_si32((void *)(pSelectors + i), vsels); + + *pDistance += reduce_add(min_dist); + if (*pDistance >= early_out_err) + return; + } + + for (; i < n; i++) + { + int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; + + int best_err = INT_MAX, best_sel = 0; + for (int sel = 0; sel < 4; sel++) + { + int base_r = pBlock_colors[sel].r, base_g = pBlock_colors[sel].g, base_b = pBlock_colors[sel].b; + + int dr = base_r - r; + int dg = base_g - g; + int db = base_b - b; + + int id = dr * dr + dg * dg + db * db; + if (id < best_err) + { + best_err = id; + best_sel = sel; + } + } + + pSelectors[i] = (uint8_t)best_sel; + + *pDistance += best_err; + if (*pDistance >= early_out_err) + return; + } + } + }; + + struct find_lowest_error_perceptual_rgb_4_N : spmd_kernel + { + inline vint compute_dist( + const vint& base_r, const vint& base_g, const vint& base_b, + const vint& r, const vint& g, const vint& b) + { + vint dr = base_r - r; + vint dg = base_g - g; + vint db = base_b - b; + + vint delta_l = dr * 27 + dg * 92 + db * 9; + vint delta_cr = dr * 128 - delta_l; + vint delta_cb = db * 128 - delta_l; + + vint id = VINT_SHIFT_RIGHT(delta_l * delta_l, 7) + + VINT_SHIFT_RIGHT(VINT_SHIFT_RIGHT(delta_cr * delta_cr, 7) * 26, 7) + + VINT_SHIFT_RIGHT(VINT_SHIFT_RIGHT(delta_cb * delta_cb, 7) * 3, 7); + + return id; + } + + void _call(int64_t* pDistance, + const color_rgba* pBlock_colors, + const color_rgba* pSrc_pixels, uint32_t n, + int64_t early_out_error) + { + assert(early_out_error >= 0); + + *pDistance = 0; + + vint block_colors_r[4], block_colors_g[4], block_colors_b[4]; + for (uint32_t i = 0; i < 4; i++) + { + store_all(block_colors_r[i], (int)pBlock_colors[i].r); + store_all(block_colors_g[i], (int)pBlock_colors[i].g); + store_all(block_colors_b[i], (int)pBlock_colors[i].b); + } + + uint32_t i; + + for (i = 0; (i + 4) <= n; i += 4) + { + __m128i c0 = load_rgba32(&pSrc_pixels[i + 0]), c1 = load_rgba32(&pSrc_pixels[i + 1]), c2 = load_rgba32(&pSrc_pixels[i + 2]), c3 = load_rgba32(&pSrc_pixels[i + 3]); + + vint r, g, b, a; + transpose4x4(r.m_value, g.m_value, b.m_value, a.m_value, c0, c1, c2, c3); + + vint dist0 = compute_dist(block_colors_r[0], block_colors_g[0], block_colors_b[0], r, g, b); + vint dist1 = compute_dist(block_colors_r[1], block_colors_g[1], block_colors_b[1], r, g, b); + vint dist2 = compute_dist(block_colors_r[2], block_colors_g[2], block_colors_b[2], r, g, b); + vint dist3 = compute_dist(block_colors_r[3], block_colors_g[3], block_colors_b[3], r, g, b); + + vint min_dist = min(min(min(dist0, dist1), dist2), dist3); + + *pDistance += reduce_add(min_dist); + if (*pDistance > early_out_error) + return; + } + + for (; i < n; i++) + { + int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; + + int best_err = INT_MAX; + for (int sel = 0; sel < 4; sel++) + { + int base_r = pBlock_colors[sel].r, base_g = pBlock_colors[sel].g, base_b = pBlock_colors[sel].b; + + int dr = base_r - r; + int dg = base_g - g; + int db = base_b - b; + + int delta_l = dr * 27 + dg * 92 + db * 9; + int delta_cr = dr * 128 - delta_l; + int delta_cb = db * 128 - delta_l; + + int id = ((delta_l * delta_l) >> 7) + + ((((delta_cr * delta_cr) >> 7) * 26) >> 7) + + ((((delta_cb * delta_cb) >> 7) * 3) >> 7); + + if (id < best_err) + { + best_err = id; + } + } + + *pDistance += best_err; + if (*pDistance > early_out_error) + return; + } + } + }; + + struct find_lowest_error_linear_rgb_4_N : spmd_kernel + { + inline vint compute_dist( + const vint& base_r, const vint& base_g, const vint& base_b, + const vint& r, const vint& g, const vint& b) + { + vint dr = base_r - r; + vint dg = base_g - g; + vint db = base_b - b; + + vint id = dr * dr + dg * dg + db * db; + + return id; + } + + void _call(int64_t* pDistance, + const color_rgba* pBlock_colors, + const color_rgba* pSrc_pixels, uint32_t n, + int64_t early_out_error) + { + assert(early_out_error >= 0); + + *pDistance = 0; + + vint block_colors_r[4], block_colors_g[4], block_colors_b[4]; + for (uint32_t i = 0; i < 4; i++) + { + store_all(block_colors_r[i], (int)pBlock_colors[i].r); + store_all(block_colors_g[i], (int)pBlock_colors[i].g); + store_all(block_colors_b[i], (int)pBlock_colors[i].b); + } + + uint32_t i; + + for (i = 0; (i + 4) <= n; i += 4) + { + __m128i c0 = load_rgba32(&pSrc_pixels[i + 0]), c1 = load_rgba32(&pSrc_pixels[i + 1]), c2 = load_rgba32(&pSrc_pixels[i + 2]), c3 = load_rgba32(&pSrc_pixels[i + 3]); + + vint r, g, b, a; + transpose4x4(r.m_value, g.m_value, b.m_value, a.m_value, c0, c1, c2, c3); + + vint dist0 = compute_dist(block_colors_r[0], block_colors_g[0], block_colors_b[0], r, g, b); + vint dist1 = compute_dist(block_colors_r[1], block_colors_g[1], block_colors_b[1], r, g, b); + vint dist2 = compute_dist(block_colors_r[2], block_colors_g[2], block_colors_b[2], r, g, b); + vint dist3 = compute_dist(block_colors_r[3], block_colors_g[3], block_colors_b[3], r, g, b); + + vint min_dist = min(min(min(dist0, dist1), dist2), dist3); + + *pDistance += reduce_add(min_dist); + if (*pDistance > early_out_error) + return; + } + + for (; i < n; i++) + { + int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; + + int best_err = INT_MAX; + for (int sel = 0; sel < 4; sel++) + { + int base_r = pBlock_colors[sel].r, base_g = pBlock_colors[sel].g, base_b = pBlock_colors[sel].b; + + int dr = base_r - r; + int dg = base_g - g; + int db = base_b - b; + + int id = dr * dr + dg * dg + db * db; + + if (id < best_err) + { + best_err = id; + } + } + + *pDistance += best_err; + if (*pDistance > early_out_error) + return; + } + } + }; + + struct update_covar_matrix_16x16 : spmd_kernel + { + void _call( + uint32_t num_vecs, const void* pWeighted_vecs_void, const void* pOrigin_void, const uint32_t* pVec_indices, void* pMatrix16x16_void) + { + const std::pair* pWeighted_vecs = static_cast< const std::pair *>(pWeighted_vecs_void); + + const float* pOrigin = static_cast(pOrigin_void); + vfloat org0 = loadu_linear_all(pOrigin), org1 = loadu_linear_all(pOrigin + 4), org2 = loadu_linear_all(pOrigin + 8), org3 = loadu_linear_all(pOrigin + 12); + + vfloat mat[16][4]; + vfloat vzero(zero_vfloat()); + + for (uint32_t i = 0; i < 16; i++) + { + store_all(mat[i][0], vzero); + store_all(mat[i][1], vzero); + store_all(mat[i][2], vzero); + store_all(mat[i][3], vzero); + } + + for (uint32_t k = 0; k < num_vecs; k++) + { + const uint32_t vec_index = pVec_indices[k]; + + const float* pW = pWeighted_vecs[vec_index].first.get_ptr(); + vfloat weight((float)pWeighted_vecs[vec_index].second); + + vfloat vec[4] = { loadu_linear_all(pW) - org0, loadu_linear_all(pW + 4) - org1, loadu_linear_all(pW + 8) - org2, loadu_linear_all(pW + 12) - org3 }; + + vfloat wvec0 = vec[0] * weight, wvec1 = vec[1] * weight, wvec2 = vec[2] * weight, wvec3 = vec[3] * weight; + + for (uint32_t j = 0; j < 16; j++) + { + vfloat vx = ((const float*)vec)[j]; + + store_all(mat[j][0], mat[j][0] + vx * wvec0); + store_all(mat[j][1], mat[j][1] + vx * wvec1); + store_all(mat[j][2], mat[j][2] + vx * wvec2); + store_all(mat[j][3], mat[j][3] + vx * wvec3); + + } // j + + } // k + + float* pMatrix = static_cast(pMatrix16x16_void); + + float* pDst = pMatrix; + for (uint32_t i = 0; i < 16; i++) + { + storeu_linear_all(pDst, mat[i][0]); + storeu_linear_all(pDst + 4, mat[i][1]); + storeu_linear_all(pDst + 8, mat[i][2]); + storeu_linear_all(pDst + 12, mat[i][3]); + pDst += 16; + } + } + }; + +} // namespace + +using namespace CPPSPMD_NAME(basisu_kernels_namespace); + +void CPPSPMD_NAME(perceptual_distance_rgb_4_N)(int64_t* pDistance, const uint8_t* pSelectors, const color_rgba* pBlock_colors, const color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_err) +{ + spmd_call< perceptual_distance_rgb_4_N >(pDistance, pSelectors, pBlock_colors, pSrc_pixels, n, early_out_err); +} + +void CPPSPMD_NAME(linear_distance_rgb_4_N)(int64_t* pDistance, const uint8_t* pSelectors, const color_rgba* pBlock_colors, const color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_err) +{ + spmd_call< linear_distance_rgb_4_N >(pDistance, pSelectors, pBlock_colors, pSrc_pixels, n, early_out_err); +} + +void CPPSPMD_NAME(find_selectors_perceptual_rgb_4_N)(int64_t *pDistance, uint8_t* pSelectors, const color_rgba* pBlock_colors, const color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_err) +{ + spmd_call< find_selectors_perceptual_rgb_4_N >(pDistance, pSelectors, pBlock_colors, pSrc_pixels, n, early_out_err); +} + +void CPPSPMD_NAME(find_selectors_linear_rgb_4_N)(int64_t* pDistance, uint8_t* pSelectors, const color_rgba* pBlock_colors, const color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_err) +{ + spmd_call< find_selectors_linear_rgb_4_N >(pDistance, pSelectors, pBlock_colors, pSrc_pixels, n, early_out_err); +} + +void CPPSPMD_NAME(find_lowest_error_perceptual_rgb_4_N)(int64_t* pDistance, const color_rgba* pBlock_colors, const color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_error) +{ + spmd_call< find_lowest_error_perceptual_rgb_4_N >(pDistance, pBlock_colors, pSrc_pixels, n, early_out_error); +} + +void CPPSPMD_NAME(find_lowest_error_linear_rgb_4_N)(int64_t* pDistance, const color_rgba* pBlock_colors, const color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_error) +{ + spmd_call< find_lowest_error_linear_rgb_4_N >(pDistance, pBlock_colors, pSrc_pixels, n, early_out_error); +} + +void CPPSPMD_NAME(update_covar_matrix_16x16)(uint32_t num_vecs, const void* pWeighted_vecs, const void* pOrigin, const uint32_t *pVec_indices, void* pMatrix16x16) +{ + spmd_call < update_covar_matrix_16x16 >(num_vecs, pWeighted_vecs, pOrigin, pVec_indices, pMatrix16x16); +} diff --git a/thirdparty/basisu/encoder/basisu_kernels_sse.cpp b/thirdparty/basisu/encoder/basisu_kernels_sse.cpp new file mode 100644 index 000000000..36a493d7e --- /dev/null +++ b/thirdparty/basisu/encoder/basisu_kernels_sse.cpp @@ -0,0 +1,145 @@ +// basisu_kernels_sse.cpp +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "basisu_enc.h" + +#if BASISU_SUPPORT_SSE + +#define CPPSPMD_SSE2 0 + +#ifdef _MSC_VER +#include +#endif + +#include "cppspmd_sse.h" + +#include "cppspmd_type_aliases.h" + +using namespace basisu; + +#include "basisu_kernels_declares.h" +#include "basisu_kernels_imp.h" + +namespace basisu +{ + +struct cpu_info +{ + cpu_info() { memset(this, 0, sizeof(*this)); } + + bool m_has_fpu; + bool m_has_mmx; + bool m_has_sse; + bool m_has_sse2; + bool m_has_sse3; + bool m_has_ssse3; + bool m_has_sse41; + bool m_has_sse42; + bool m_has_avx; + bool m_has_avx2; + bool m_has_pclmulqdq; +}; + +static void extract_x86_flags(cpu_info &info, uint32_t ecx, uint32_t edx) +{ + info.m_has_fpu = (edx & (1 << 0)) != 0; + info.m_has_mmx = (edx & (1 << 23)) != 0; + info.m_has_sse = (edx & (1 << 25)) != 0; + info.m_has_sse2 = (edx & (1 << 26)) != 0; + info.m_has_sse3 = (ecx & (1 << 0)) != 0; + info.m_has_ssse3 = (ecx & (1 << 9)) != 0; + info.m_has_sse41 = (ecx & (1 << 19)) != 0; + info.m_has_sse42 = (ecx & (1 << 20)) != 0; + info.m_has_pclmulqdq = (ecx & (1 << 1)) != 0; + info.m_has_avx = (ecx & (1 << 28)) != 0; +} + +static void extract_x86_extended_flags(cpu_info &info, uint32_t ebx) +{ + info.m_has_avx2 = (ebx & (1 << 5)) != 0; +} + +#ifndef _MSC_VER +static void do_cpuid(uint32_t eax, uint32_t ecx, uint32_t* regs) +{ + uint32_t ebx = 0, edx = 0; + +#if defined(__PIC__) && defined(__i386__) + __asm__("movl %%ebx, %%edi;" + "cpuid;" + "xchgl %%ebx, %%edi;" + : "=D"(ebx), "+a"(eax), "+c"(ecx), "=d"(edx)); +#else + __asm__("cpuid;" : "+b"(ebx), "+a"(eax), "+c"(ecx), "=d"(edx)); +#endif + + regs[0] = eax; regs[1] = ebx; regs[2] = ecx; regs[3] = edx; +} +#endif + +static void get_cpuinfo(cpu_info &info) +{ + int regs[4]; + +#ifdef _MSC_VER + __cpuid(regs, 0); +#else + do_cpuid(0, 0, (uint32_t *)regs); +#endif + + const uint32_t max_eax = regs[0]; + + if (max_eax >= 1U) + { +#ifdef _MSC_VER + __cpuid(regs, 1); +#else + do_cpuid(1, 0, (uint32_t*)regs); +#endif + extract_x86_flags(info, regs[2], regs[3]); + } + + if (max_eax >= 7U) + { +#ifdef _MSC_VER + __cpuidex(regs, 7, 0); +#else + do_cpuid(7, 0, (uint32_t*)regs); +#endif + + extract_x86_extended_flags(info, regs[1]); + } +} + +void detect_sse41() +{ + cpu_info info; + get_cpuinfo(info); + + // Check for everything from SSE to SSE 4.1 + g_cpu_supports_sse41 = info.m_has_sse && info.m_has_sse2 && info.m_has_sse3 && info.m_has_ssse3 && info.m_has_sse41; +} + +} // namespace basisu +#else // #if BASISU_SUPPORT_SSE +namespace basisu +{ + +void detect_sse41() +{ +} + +} // namespace basisu +#endif // #if BASISU_SUPPORT_SSE + diff --git a/thirdparty/basisu/encoder/basisu_math.h b/thirdparty/basisu/encoder/basisu_math.h new file mode 100644 index 000000000..3e56747be --- /dev/null +++ b/thirdparty/basisu/encoder/basisu_math.h @@ -0,0 +1,3146 @@ +// File: basisu_math.h +#pragma once + +// TODO: Would prefer this in the basisu namespace, but to avoid collisions with the existing vec/matrix classes I'm placing this in "bu_math". +namespace bu_math +{ + // Cross-platform 1.0f/sqrtf(x) approximation. See https://en.wikipedia.org/wiki/Fast_inverse_square_root#cite_note-37. + // Would prefer using SSE1 etc. but that would require implementing multiple versions and platform divergence (needing more testing). + BASISU_FORCE_INLINE float inv_sqrt(float v) + { + union + { + float flt; + uint32_t ui; + } un; + + un.flt = v; + un.ui = 0x5F1FFFF9UL - (un.ui >> 1); + + return 0.703952253f * un.flt * (2.38924456f - v * (un.flt * un.flt)); + } + + inline float smoothstep(float edge0, float edge1, float x) + { + assert(edge1 != edge0); + + // Scale, and clamp x to 0..1 range + x = basisu::saturate((x - edge0) / (edge1 - edge0)); + + return x * x * (3.0f - 2.0f * x); + } + + template + class vec : public basisu::rel_ops > + { + public: + typedef T scalar_type; + enum + { + num_elements = N + }; + + inline vec() + { + } + + inline vec(basisu::eClear) + { + clear(); + } + + inline vec(const vec& other) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] = other.m_s[i]; + } + + template + inline vec(const vec& other) + { + set(other); + } + + template + inline vec(const vec& other, T w) + { + *this = other; + m_s[N - 1] = w; + } + + template + inline explicit vec(Args... args) + { + static_assert(sizeof...(args) <= N); + set(args...); + } + + inline void clear() + { + if (N > 4) + memset(m_s, 0, sizeof(m_s)); + else + { + for (uint32_t i = 0; i < N; i++) + m_s[i] = 0; + } + } + + template + inline vec& set(const vec& other) + { + if ((void*)this == (void*)&other) + return *this; + const uint32_t m = basisu::minimum(N, ON); + uint32_t i; + for (i = 0; i < m; i++) + m_s[i] = static_cast(other[i]); + for (; i < N; i++) + m_s[i] = 0; + return *this; + } + + inline vec& set_component(uint32_t index, T val) + { + assert(index < N); + m_s[index] = val; + return *this; + } + + inline vec& set_all(T val) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] = val; + return *this; + } + + template + inline vec& set(Args... args) + { + static_assert(sizeof...(args) <= N); + + // Initialize using parameter pack expansion + T values[] = { static_cast(args)... }; + + // Special case if setting with a scalar + if (sizeof...(args) == 1) + { + set_all(values[0]); + } + else + { + // Copy the values into the vector + for (std::size_t i = 0; i < sizeof...(args); ++i) + { + m_s[i] = values[i]; + } + + // Zero-initialize the remaining elements (if any) + if (sizeof...(args) < N) + { + std::fill(m_s + sizeof...(args), m_s + N, T{}); + } + } + + return *this; + } + + inline vec& set(const T* pValues) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] = pValues[i]; + return *this; + } + + template + inline vec& swizzle_set(const vec& other, uint32_t i) + { + return set(static_cast(other[i])); + } + + template + inline vec& swizzle_set(const vec& other, uint32_t i, uint32_t j) + { + return set(static_cast(other[i]), static_cast(other[j])); + } + + template + inline vec& swizzle_set(const vec& other, uint32_t i, uint32_t j, uint32_t k) + { + return set(static_cast(other[i]), static_cast(other[j]), static_cast(other[k])); + } + + template + inline vec& swizzle_set(const vec& other, uint32_t i, uint32_t j, uint32_t k, uint32_t l) + { + return set(static_cast(other[i]), static_cast(other[j]), static_cast(other[k]), static_cast(other[l])); + } + + inline vec& operator=(const vec& rhs) + { + if (this != &rhs) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] = rhs.m_s[i]; + } + return *this; + } + + template + inline vec& operator=(const vec& other) + { + if ((void*)this == (void*)&other) + return *this; + + uint32_t s = basisu::minimum(N, O); + + uint32_t i; + for (i = 0; i < s; i++) + m_s[i] = static_cast(other[i]); + + for (; i < N; i++) + m_s[i] = 0; + + return *this; + } + + inline bool operator==(const vec& rhs) const + { + for (uint32_t i = 0; i < N; i++) + if (!(m_s[i] == rhs.m_s[i])) + return false; + return true; + } + + inline bool operator<(const vec& rhs) const + { + for (uint32_t i = 0; i < N; i++) + { + if (m_s[i] < rhs.m_s[i]) + return true; + else if (!(m_s[i] == rhs.m_s[i])) + return false; + } + + return false; + } + + inline T operator[](uint32_t i) const + { + assert(i < N); + return m_s[i]; + } + + inline T& operator[](uint32_t i) + { + assert(i < N); + return m_s[i]; + } + + template + inline uint64_t get_component_bits_as_uint() const + { + static_assert(index < N); + static_assert((sizeof(T) == sizeof(uint16_t)) || (sizeof(T) == sizeof(uint32_t)) || (sizeof(T) == sizeof(uint64_t)), "Unsupported type"); + + if (sizeof(T) == sizeof(uint16_t)) + return *reinterpret_cast(&m_s[index]); + else if (sizeof(T) == sizeof(uint32_t)) + return *reinterpret_cast(&m_s[index]); + else if (sizeof(T) == sizeof(uint64_t)) + return *reinterpret_cast(&m_s[index]); + else + { + assert(0); + return 0; + } + } + + inline T get_x(void) const + { + return m_s[0]; + } + inline T get_y(void) const + { + static_assert(N >= 2); + return m_s[1]; + } + inline T get_z(void) const + { + static_assert(N >= 3); + return m_s[2]; + } + inline T get_w(void) const + { + static_assert(N >= 4); + return m_s[3]; + } + + inline vec get_x_vector() const + { + return broadcast<0>(); + } + inline vec get_y_vector() const + { + return broadcast<1>(); + } + inline vec get_z_vector() const + { + return broadcast<2>(); + } + inline vec get_w_vector() const + { + return broadcast<3>(); + } + + inline T get_component(uint32_t i) const + { + return (*this)[i]; + } + + inline vec& set_x(T v) + { + m_s[0] = v; + return *this; + } + inline vec& set_y(T v) + { + static_assert(N >= 2); + m_s[1] = v; + return *this; + } + inline vec& set_z(T v) + { + static_assert(N >= 3); + m_s[2] = v; + return *this; + } + inline vec& set_w(T v) + { + static_assert(N >= 4); + m_s[3] = v; + return *this; + } + + inline const T* get_ptr() const + { + return reinterpret_cast(&m_s[0]); + } + inline T* get_ptr() + { + return reinterpret_cast(&m_s[0]); + } + + inline vec as_point() const + { + vec result(*this); + result[N - 1] = 1; + return result; + } + + inline vec as_dir() const + { + vec result(*this); + result[N - 1] = 0; + return result; + } + + inline vec<2, T> select2(uint32_t i, uint32_t j) const + { + assert((i < N) && (j < N)); + return vec<2, T>(m_s[i], m_s[j]); + } + + inline vec<3, T> select3(uint32_t i, uint32_t j, uint32_t k) const + { + assert((i < N) && (j < N) && (k < N)); + return vec<3, T>(m_s[i], m_s[j], m_s[k]); + } + + inline vec<4, T> select4(uint32_t i, uint32_t j, uint32_t k, uint32_t l) const + { + assert((i < N) && (j < N) && (k < N) && (l < N)); + return vec<4, T>(m_s[i], m_s[j], m_s[k], m_s[l]); + } + + inline bool is_dir() const + { + return m_s[N - 1] == 0; + } + inline bool is_vector() const + { + return is_dir(); + } + inline bool is_point() const + { + return m_s[N - 1] == 1; + } + + inline vec project() const + { + vec result(*this); + if (result[N - 1]) + result /= result[N - 1]; + return result; + } + + inline vec broadcast(unsigned i) const + { + return vec((*this)[i]); + } + + template + inline vec broadcast() const + { + return vec((*this)[i]); + } + + inline vec swizzle(uint32_t i, uint32_t j) const + { + return vec((*this)[i], (*this)[j]); + } + + inline vec swizzle(uint32_t i, uint32_t j, uint32_t k) const + { + return vec((*this)[i], (*this)[j], (*this)[k]); + } + + inline vec swizzle(uint32_t i, uint32_t j, uint32_t k, uint32_t l) const + { + return vec((*this)[i], (*this)[j], (*this)[k], (*this)[l]); + } + + inline vec operator-() const + { + vec result; + for (uint32_t i = 0; i < N; i++) + result.m_s[i] = -m_s[i]; + return result; + } + + inline vec operator+() const + { + return *this; + } + + inline vec& operator+=(const vec& other) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] += other.m_s[i]; + return *this; + } + + inline vec& operator-=(const vec& other) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] -= other.m_s[i]; + return *this; + } + + inline vec& operator*=(const vec& other) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] *= other.m_s[i]; + return *this; + } + + inline vec& operator/=(const vec& other) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] /= other.m_s[i]; + return *this; + } + + inline vec& operator*=(T s) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] *= s; + return *this; + } + + inline vec& operator/=(T s) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] /= s; + return *this; + } + + friend inline vec operator*(const vec& lhs, T val) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result.m_s[i] = lhs.m_s[i] * val; + return result; + } + + friend inline vec operator*(T val, const vec& rhs) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result.m_s[i] = val * rhs.m_s[i]; + return result; + } + + friend inline vec operator/(const vec& lhs, const vec& rhs) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result.m_s[i] = lhs.m_s[i] / rhs.m_s[i]; + return result; + } + + friend inline vec operator/(const vec& lhs, T val) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result.m_s[i] = lhs.m_s[i] / val; + return result; + } + + friend inline vec operator+(const vec& lhs, const vec& rhs) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result.m_s[i] = lhs.m_s[i] + rhs.m_s[i]; + return result; + } + + friend inline vec operator-(const vec& lhs, const vec& rhs) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result.m_s[i] = lhs.m_s[i] - rhs.m_s[i]; + return result; + } + + static inline vec<3, T> cross2(const vec& a, const vec& b) + { + static_assert(N >= 2); + return vec<3, T>(0, 0, a[0] * b[1] - a[1] * b[0]); + } + + inline vec<3, T> cross2(const vec& b) const + { + return cross2(*this, b); + } + + static inline vec<3, T> cross3(const vec& a, const vec& b) + { + static_assert(N >= 3); + return vec<3, T>(a[1] * b[2] - a[2] * b[1], a[2] * b[0] - a[0] * b[2], a[0] * b[1] - a[1] * b[0]); + } + + inline vec<3, T> cross3(const vec& b) const + { + return cross3(*this, b); + } + + static inline vec<3, T> cross(const vec& a, const vec& b) + { + static_assert(N >= 2); + + if (N == 2) + return cross2(a, b); + else + return cross3(a, b); + } + + inline vec<3, T> cross(const vec& b) const + { + static_assert(N >= 2); + return cross(*this, b); + } + + inline T dot(const vec& rhs) const + { + return dot(*this, rhs); + } + + inline vec dot_vector(const vec& rhs) const + { + return vec(dot(*this, rhs)); + } + + static inline T dot(const vec& lhs, const vec& rhs) + { + T result = lhs.m_s[0] * rhs.m_s[0]; + for (uint32_t i = 1; i < N; i++) + result += lhs.m_s[i] * rhs.m_s[i]; + return result; + } + + inline T dot2(const vec& rhs) const + { + static_assert(N >= 2); + return m_s[0] * rhs.m_s[0] + m_s[1] * rhs.m_s[1]; + } + + inline T dot3(const vec& rhs) const + { + static_assert(N >= 3); + return m_s[0] * rhs.m_s[0] + m_s[1] * rhs.m_s[1] + m_s[2] * rhs.m_s[2]; + } + + inline T dot4(const vec& rhs) const + { + static_assert(N >= 4); + return m_s[0] * rhs.m_s[0] + m_s[1] * rhs.m_s[1] + m_s[2] * rhs.m_s[2] + m_s[3] * rhs.m_s[3]; + } + + inline T norm(void) const + { + T sum = m_s[0] * m_s[0]; + for (uint32_t i = 1; i < N; i++) + sum += m_s[i] * m_s[i]; + return sum; + } + + inline T length(void) const + { + return sqrt(norm()); + } + + inline T squared_distance(const vec& rhs) const + { + T dist2 = 0; + for (uint32_t i = 0; i < N; i++) + { + T d = m_s[i] - rhs.m_s[i]; + dist2 += d * d; + } + return dist2; + } + + inline T squared_distance(const vec& rhs, T early_out) const + { + T dist2 = 0; + for (uint32_t i = 0; i < N; i++) + { + T d = m_s[i] - rhs.m_s[i]; + dist2 += d * d; + if (dist2 > early_out) + break; + } + return dist2; + } + + inline T distance(const vec& rhs) const + { + T dist2 = 0; + for (uint32_t i = 0; i < N; i++) + { + T d = m_s[i] - rhs.m_s[i]; + dist2 += d * d; + } + return sqrt(dist2); + } + + inline vec inverse() const + { + vec result; + for (uint32_t i = 0; i < N; i++) + result[i] = m_s[i] ? (1.0f / m_s[i]) : 0; + return result; + } + + // returns squared length (norm) + inline double normalize(const vec* pDefaultVec = NULL) + { + double n = m_s[0] * m_s[0]; + for (uint32_t i = 1; i < N; i++) + n += m_s[i] * m_s[i]; + + if (n != 0) + *this *= static_cast(1.0f / sqrt(n)); + else if (pDefaultVec) + *this = *pDefaultVec; + return n; + } + + inline double normalize3(const vec* pDefaultVec = NULL) + { + static_assert(N >= 3); + + double n = m_s[0] * m_s[0] + m_s[1] * m_s[1] + m_s[2] * m_s[2]; + + if (n != 0) + *this *= static_cast((1.0f / sqrt(n))); + else if (pDefaultVec) + *this = *pDefaultVec; + return n; + } + + inline vec& normalize_in_place(const vec* pDefaultVec = NULL) + { + normalize(pDefaultVec); + return *this; + } + + inline vec& normalize3_in_place(const vec* pDefaultVec = NULL) + { + normalize3(pDefaultVec); + return *this; + } + + inline vec get_normalized(const vec* pDefaultVec = NULL) const + { + vec result(*this); + result.normalize(pDefaultVec); + return result; + } + + inline vec get_normalized3(const vec* pDefaultVec = NULL) const + { + vec result(*this); + result.normalize3(pDefaultVec); + return result; + } + + inline vec& clamp(T l, T h) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] = static_cast(basisu::clamp(m_s[i], l, h)); + return *this; + } + + inline vec& saturate() + { + return clamp(0.0f, 1.0f); + } + + inline vec& clamp(const vec& l, const vec& h) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] = static_cast(basisu::clamp(m_s[i], l[i], h[i])); + return *this; + } + + inline bool is_within_bounds(const vec& l, const vec& h) const + { + for (uint32_t i = 0; i < N; i++) + if ((m_s[i] < l[i]) || (m_s[i] > h[i])) + return false; + + return true; + } + + inline bool is_within_bounds(T l, T h) const + { + for (uint32_t i = 0; i < N; i++) + if ((m_s[i] < l) || (m_s[i] > h)) + return false; + + return true; + } + + inline uint32_t get_major_axis(void) const + { + T m = fabs(m_s[0]); + uint32_t r = 0; + for (uint32_t i = 1; i < N; i++) + { + const T c = fabs(m_s[i]); + if (c > m) + { + m = c; + r = i; + } + } + return r; + } + + inline uint32_t get_minor_axis(void) const + { + T m = fabs(m_s[0]); + uint32_t r = 0; + for (uint32_t i = 1; i < N; i++) + { + const T c = fabs(m_s[i]); + if (c < m) + { + m = c; + r = i; + } + } + return r; + } + + inline void get_projection_axes(uint32_t& u, uint32_t& v) const + { + const int axis = get_major_axis(); + if (m_s[axis] < 0.0f) + { + v = basisu::next_wrap(axis, N); + u = basisu::next_wrap(v, N); + } + else + { + u = basisu::next_wrap(axis, N); + v = basisu::next_wrap(u, N); + } + } + + inline T get_absolute_minimum(void) const + { + T result = fabs(m_s[0]); + for (uint32_t i = 1; i < N; i++) + result = basisu::minimum(result, fabs(m_s[i])); + return result; + } + + inline T get_absolute_maximum(void) const + { + T result = fabs(m_s[0]); + for (uint32_t i = 1; i < N; i++) + result = basisu::maximum(result, fabs(m_s[i])); + return result; + } + + inline T get_minimum(void) const + { + T result = m_s[0]; + for (uint32_t i = 1; i < N; i++) + result = basisu::minimum(result, m_s[i]); + return result; + } + + inline T get_maximum(void) const + { + T result = m_s[0]; + for (uint32_t i = 1; i < N; i++) + result = basisu::maximum(result, m_s[i]); + return result; + } + + inline vec& remove_unit_direction(const vec& dir) + { + *this -= (dot(dir) * dir); + return *this; + } + + inline vec get_remove_unit_direction(const vec& dir) const + { + return *this - (dot(dir) * dir); + } + + inline bool all_less(const vec& b) const + { + for (uint32_t i = 0; i < N; i++) + if (m_s[i] >= b.m_s[i]) + return false; + return true; + } + + inline bool all_less_equal(const vec& b) const + { + for (uint32_t i = 0; i < N; i++) + if (m_s[i] > b.m_s[i]) + return false; + return true; + } + + inline bool all_greater(const vec& b) const + { + for (uint32_t i = 0; i < N; i++) + if (m_s[i] <= b.m_s[i]) + return false; + return true; + } + + inline bool all_greater_equal(const vec& b) const + { + for (uint32_t i = 0; i < N; i++) + if (m_s[i] < b.m_s[i]) + return false; + return true; + } + + inline vec negate_xyz() const + { + vec ret; + + ret[0] = -m_s[0]; + if (N >= 2) + ret[1] = -m_s[1]; + if (N >= 3) + ret[2] = -m_s[2]; + + for (uint32_t i = 3; i < N; i++) + ret[i] = m_s[i]; + + return ret; + } + + inline vec& invert() + { + for (uint32_t i = 0; i < N; i++) + if (m_s[i] != 0.0f) + m_s[i] = 1.0f / m_s[i]; + return *this; + } + + inline scalar_type perp_dot(const vec& b) const + { + static_assert(N == 2); + return m_s[0] * b.m_s[1] - m_s[1] * b.m_s[0]; + } + + inline vec perp() const + { + static_assert(N == 2); + return vec(-m_s[1], m_s[0]); + } + + inline vec get_floor() const + { + vec result; + for (uint32_t i = 0; i < N; i++) + result[i] = floor(m_s[i]); + return result; + } + + inline vec get_ceil() const + { + vec result; + for (uint32_t i = 0; i < N; i++) + result[i] = ceil(m_s[i]); + return result; + } + + inline T get_total() const + { + T res = m_s[0]; + for (uint32_t i = 1; i < N; i++) + res += m_s[i]; + return res; + } + + // static helper methods + + static inline vec mul_components(const vec& lhs, const vec& rhs) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result[i] = lhs.m_s[i] * rhs.m_s[i]; + return result; + } + + static inline vec mul_add_components(const vec& a, const vec& b, const vec& c) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result[i] = a.m_s[i] * b.m_s[i] + c.m_s[i]; + return result; + } + + static inline vec make_axis(uint32_t i) + { + vec result; + result.clear(); + result[i] = 1; + return result; + } + + static inline vec equals_mask(const vec& a, const vec& b) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret[i] = (a[i] == b[i]); + return ret; + } + + static inline vec not_equals_mask(const vec& a, const vec& b) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret[i] = (a[i] != b[i]); + return ret; + } + + static inline vec less_mask(const vec& a, const vec& b) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret[i] = (a[i] < b[i]); + return ret; + } + + static inline vec less_equals_mask(const vec& a, const vec& b) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret[i] = (a[i] <= b[i]); + return ret; + } + + static inline vec greater_equals_mask(const vec& a, const vec& b) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret[i] = (a[i] >= b[i]); + return ret; + } + + static inline vec greater_mask(const vec& a, const vec& b) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret[i] = (a[i] > b[i]); + return ret; + } + + static inline vec component_max(const vec& a, const vec& b) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret.m_s[i] = basisu::maximum(a.m_s[i], b.m_s[i]); + return ret; + } + + static inline vec component_min(const vec& a, const vec& b) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret.m_s[i] = basisu::minimum(a.m_s[i], b.m_s[i]); + return ret; + } + + static inline vec lerp(const vec& a, const vec& b, float t) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret.m_s[i] = a.m_s[i] + (b.m_s[i] - a.m_s[i]) * t; + return ret; + } + + static inline bool equal_tol(const vec& a, const vec& b, float t) + { + for (uint32_t i = 0; i < N; i++) + if (!basisu::equal_tol(a.m_s[i], b.m_s[i], t)) + return false; + return true; + } + + inline bool equal_tol(const vec& b, float t) const + { + return equal_tol(*this, b, t); + } + + static inline vec make_random(basisu::rand& r, float l, float h) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result[i] = r.frand(l, h); + return result; + } + + static inline vec make_random(basisu::rand& r, const vec& l, const vec& h) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result[i] = r.frand(l[i], h[i]); + return result; + } + + void print() const + { + for (uint32_t c = 0; c < N; c++) + printf("%3.3f ", (*this)[c]); + printf("\n"); + } + + protected: + T m_s[N]; + }; + + typedef vec<1, double> vec1D; + typedef vec<2, double> vec2D; + typedef vec<3, double> vec3D; + typedef vec<4, double> vec4D; + + typedef vec<1, float> vec1F; + + typedef vec<2, float> vec2F; + typedef basisu::vector vec2F_array; + + typedef vec<3, float> vec3F; + typedef basisu::vector vec3F_array; + + typedef vec<4, float> vec4F; + typedef basisu::vector vec4F_array; + + typedef vec<2, uint32_t> vec2U; + typedef vec<3, uint32_t> vec3U; + typedef vec<2, int> vec2I; + typedef vec<3, int> vec3I; + typedef vec<4, int> vec4I; + + typedef vec<2, int16_t> vec2I16; + typedef vec<3, int16_t> vec3I16; + + inline vec2F rotate_point_2D(const vec2F& p, float rad) + { + float c = cosf(rad); + float s = sinf(rad); + + float x = p[0]; + float y = p[1]; + + return vec2F(x * c - y * s, x * s + y * c); + } + + //-------------------------------------------------------------- + + // Matrix/vector cheat sheet, because confusingly, depending on how matrices are stored in memory people can use opposite definitions of "rows", "cols", etc. + // See http://www.mindcontrol.org/~hplus/graphics/matrix-layout.html + // + // So in this simple row-major general matrix class: + // matrix=[NumRows][NumCols] or [R][C], i.e. a 3x3 matrix stored in memory will appear as: R0C0, R0C1, R0C2, R1C0, R1C1, R1C2, etc. + // Matrix multiplication: [R0,C0]*[R1,C1]=[R0,C1], C0 must equal R1 + // + // In this class: + // A "row vector" type is a vector of size # of matrix cols, 1xC. It's the vector type that is used to store the matrix rows. + // A "col vector" type is a vector of size # of matrix rows, Rx1. It's a vector type large enough to hold each matrix column. + // + // Subrow/col vectors: last component is assumed to be either 0 (a "vector") or 1 (a "point") + // "subrow vector": vector/point of size # cols-1, 1x(C-1) + // "subcol vector": vector/point of size # rows-1, (R-1)x1 + // + // D3D style: + // vec*matrix, row vector on left (vec dotted against columns) + // [1,4]*[4,4]=[1,4] + // abcd * A B C D + // A B C D + // A B C D + // A B C D + // = e f g h + // + // Now confusingly, in the matrix transform method for vec*matrix below the vector's type is "col_vec", because col_vec will have the proper size for non-square matrices. But the vector on the left is written as row vector, argh. + // + // + // OGL style: + // matrix*vec, col vector on right (vec dotted against rows): + // [4,4]*[4,1]=[4,1] + // + // A B C D * e = e + // A B C D f f + // A B C D g g + // A B C D h h + + template + Z& matrix_mul_helper(Z& result, const X& lhs, const Y& rhs) + { + static_assert((int)Z::num_rows == (int)X::num_rows); + static_assert((int)Z::num_cols == (int)Y::num_cols); + static_assert((int)X::num_cols == (int)Y::num_rows); + assert(((void*)&result != (void*)&lhs) && ((void*)&result != (void*)&rhs)); + for (int r = 0; r < X::num_rows; r++) + for (int c = 0; c < Y::num_cols; c++) + { + typename Z::scalar_type s = lhs(r, 0) * rhs(0, c); + for (uint32_t i = 1; i < X::num_cols; i++) + s += lhs(r, i) * rhs(i, c); + result(r, c) = s; + } + return result; + } + + template + Z& matrix_mul_helper_transpose_lhs(Z& result, const X& lhs, const Y& rhs) + { + static_assert((int)Z::num_rows == (int)X::num_cols); + static_assert((int)Z::num_cols == (int)Y::num_cols); + static_assert((int)X::num_rows == (int)Y::num_rows); + assert(((void*)&result != (void*)&lhs) && ((void*)&result != (void*)&rhs)); + for (int r = 0; r < X::num_cols; r++) + for (int c = 0; c < Y::num_cols; c++) + { + typename Z::scalar_type s = lhs(0, r) * rhs(0, c); + for (uint32_t i = 1; i < X::num_rows; i++) + s += lhs(i, r) * rhs(i, c); + result(r, c) = s; + } + return result; + } + + template + Z& matrix_mul_helper_transpose_rhs(Z& result, const X& lhs, const Y& rhs) + { + static_assert((int)Z::num_rows == (int)X::num_rows); + static_assert((int)Z::num_cols == (int)Y::num_rows); + static_assert((int)X::num_cols == (int)Y::num_cols); + assert(((void*)&result != (void*)&lhs) && ((void*)&result != (void*)&rhs)); + for (int r = 0; r < X::num_rows; r++) + for (int c = 0; c < Y::num_rows; c++) + { + typename Z::scalar_type s = lhs(r, 0) * rhs(c, 0); + for (uint32_t i = 1; i < X::num_cols; i++) + s += lhs(r, i) * rhs(c, i); + result(r, c) = s; + } + return result; + } + + template + class matrix + { + public: + typedef T scalar_type; + enum + { + num_rows = R, + num_cols = C + }; + + typedef vec col_vec; + typedef vec < (R > 1) ? (R - 1) : 0, T > subcol_vec; + + typedef vec row_vec; + typedef vec < (C > 1) ? (C - 1) : 0, T > subrow_vec; + + inline matrix() + { + } + + inline matrix(basisu::eClear) + { + clear(); + } + + inline matrix(basisu::eIdentity) + { + set_identity_matrix(); + } + + inline matrix(const T* p) + { + set(p); + } + + inline matrix(const matrix& other) + { + for (uint32_t i = 0; i < R; i++) + m_rows[i] = other.m_rows[i]; + } + + inline matrix& operator=(const matrix& rhs) + { + if (this != &rhs) + for (uint32_t i = 0; i < R; i++) + m_rows[i] = rhs.m_rows[i]; + return *this; + } + + inline matrix(T val00, T val01, + T val10, T val11) + { + set(val00, val01, val10, val11); + } + + inline matrix(T val00, T val01, + T val10, T val11, + T val20, T val21) + { + set(val00, val01, val10, val11, val20, val21); + } + + inline matrix(T val00, T val01, T val02, + T val10, T val11, T val12, + T val20, T val21, T val22) + { + set(val00, val01, val02, val10, val11, val12, val20, val21, val22); + } + + inline matrix(T val00, T val01, T val02, T val03, + T val10, T val11, T val12, T val13, + T val20, T val21, T val22, T val23, + T val30, T val31, T val32, T val33) + { + set(val00, val01, val02, val03, val10, val11, val12, val13, val20, val21, val22, val23, val30, val31, val32, val33); + } + + inline matrix(T val00, T val01, T val02, T val03, + T val10, T val11, T val12, T val13, + T val20, T val21, T val22, T val23) + { + set(val00, val01, val02, val03, val10, val11, val12, val13, val20, val21, val22, val23); + } + + inline void set(const float* p) + { + for (uint32_t i = 0; i < R; i++) + { + m_rows[i].set(p); + p += C; + } + } + + inline void set(T val00, T val01, + T val10, T val11) + { + m_rows[0].set(val00, val01); + if (R >= 2) + { + m_rows[1].set(val10, val11); + + for (uint32_t i = 2; i < R; i++) + m_rows[i].clear(); + } + } + + inline void set(T val00, T val01, + T val10, T val11, + T val20, T val21) + { + m_rows[0].set(val00, val01); + if (R >= 2) + { + m_rows[1].set(val10, val11); + + if (R >= 3) + { + m_rows[2].set(val20, val21); + + for (uint32_t i = 3; i < R; i++) + m_rows[i].clear(); + } + } + } + + inline void set(T val00, T val01, T val02, + T val10, T val11, T val12, + T val20, T val21, T val22) + { + m_rows[0].set(val00, val01, val02); + if (R >= 2) + { + m_rows[1].set(val10, val11, val12); + if (R >= 3) + { + m_rows[2].set(val20, val21, val22); + + for (uint32_t i = 3; i < R; i++) + m_rows[i].clear(); + } + } + } + + inline void set(T val00, T val01, T val02, T val03, + T val10, T val11, T val12, T val13, + T val20, T val21, T val22, T val23, + T val30, T val31, T val32, T val33) + { + m_rows[0].set(val00, val01, val02, val03); + if (R >= 2) + { + m_rows[1].set(val10, val11, val12, val13); + if (R >= 3) + { + m_rows[2].set(val20, val21, val22, val23); + + if (R >= 4) + { + m_rows[3].set(val30, val31, val32, val33); + + for (uint32_t i = 4; i < R; i++) + m_rows[i].clear(); + } + } + } + } + + inline void set(T val00, T val01, T val02, T val03, + T val10, T val11, T val12, T val13, + T val20, T val21, T val22, T val23) + { + m_rows[0].set(val00, val01, val02, val03); + if (R >= 2) + { + m_rows[1].set(val10, val11, val12, val13); + if (R >= 3) + { + m_rows[2].set(val20, val21, val22, val23); + + for (uint32_t i = 3; i < R; i++) + m_rows[i].clear(); + } + } + } + + inline uint32_t get_num_rows() const + { + return num_rows; + } + + inline uint32_t get_num_cols() const + { + return num_cols; + } + + inline uint32_t get_total_elements() const + { + return num_rows * num_cols; + } + + inline T operator()(uint32_t r, uint32_t c) const + { + assert((r < R) && (c < C)); + return m_rows[r][c]; + } + + inline T& operator()(uint32_t r, uint32_t c) + { + assert((r < R) && (c < C)); + return m_rows[r][c]; + } + + inline const row_vec& operator[](uint32_t r) const + { + assert(r < R); + return m_rows[r]; + } + + inline row_vec& operator[](uint32_t r) + { + assert(r < R); + return m_rows[r]; + } + + inline const row_vec& get_row(uint32_t r) const + { + return (*this)[r]; + } + + inline row_vec& get_row(uint32_t r) + { + return (*this)[r]; + } + + inline void set_row(uint32_t r, const row_vec& v) + { + (*this)[r] = v; + } + + inline col_vec get_col(uint32_t c) const + { + assert(c < C); + col_vec result; + for (uint32_t i = 0; i < R; i++) + result[i] = m_rows[i][c]; + return result; + } + + inline void set_col(uint32_t c, const col_vec& col) + { + assert(c < C); + for (uint32_t i = 0; i < R; i++) + m_rows[i][c] = col[i]; + } + + inline void set_col(uint32_t c, const subcol_vec& col) + { + assert(c < C); + for (uint32_t i = 0; i < (R - 1); i++) + m_rows[i][c] = col[i]; + + m_rows[R - 1][c] = 0.0f; + } + + inline const row_vec& get_translate() const + { + return m_rows[R - 1]; + } + + inline matrix& set_translate(const row_vec& r) + { + m_rows[R - 1] = r; + return *this; + } + + inline matrix& set_translate(const subrow_vec& r) + { + m_rows[R - 1] = row_vec(r).as_point(); + return *this; + } + + inline const T* get_ptr() const + { + return reinterpret_cast(&m_rows[0]); + } + inline T* get_ptr() + { + return reinterpret_cast(&m_rows[0]); + } + + inline matrix& operator+=(const matrix& other) + { + for (uint32_t i = 0; i < R; i++) + m_rows[i] += other.m_rows[i]; + return *this; + } + + inline matrix& operator-=(const matrix& other) + { + for (uint32_t i = 0; i < R; i++) + m_rows[i] -= other.m_rows[i]; + return *this; + } + + inline matrix& operator*=(T val) + { + for (uint32_t i = 0; i < R; i++) + m_rows[i] *= val; + return *this; + } + + inline matrix& operator/=(T val) + { + for (uint32_t i = 0; i < R; i++) + m_rows[i] /= val; + return *this; + } + + inline matrix& operator*=(const matrix& other) + { + matrix result; + matrix_mul_helper(result, *this, other); + *this = result; + return *this; + } + + friend inline matrix operator+(const matrix& lhs, const matrix& rhs) + { + matrix result; + for (uint32_t i = 0; i < R; i++) + result[i] = lhs.m_rows[i] + rhs.m_rows[i]; + return result; + } + + friend inline matrix operator-(const matrix& lhs, const matrix& rhs) + { + matrix result; + for (uint32_t i = 0; i < R; i++) + result[i] = lhs.m_rows[i] - rhs.m_rows[i]; + return result; + } + + friend inline matrix operator*(const matrix& lhs, T val) + { + matrix result; + for (uint32_t i = 0; i < R; i++) + result[i] = lhs.m_rows[i] * val; + return result; + } + + friend inline matrix operator/(const matrix& lhs, T val) + { + matrix result; + for (uint32_t i = 0; i < R; i++) + result[i] = lhs.m_rows[i] / val; + return result; + } + + friend inline matrix operator*(T val, const matrix& rhs) + { + matrix result; + for (uint32_t i = 0; i < R; i++) + result[i] = val * rhs.m_rows[i]; + return result; + } + +#if 0 + template + friend inline matrix operator*(const matrix& lhs, const matrix& rhs) + { + matrix result; + return matrix_mul_helper(result, lhs, rhs); + } +#endif + friend inline matrix operator*(const matrix& lhs, const matrix& rhs) + { + matrix result; + return matrix_mul_helper(result, lhs, rhs); + } + + friend inline row_vec operator*(const col_vec& a, const matrix& b) + { + return transform(a, b); + } + + inline matrix operator+() const + { + return *this; + } + + inline matrix operator-() const + { + matrix result; + for (uint32_t i = 0; i < R; i++) + result[i] = -m_rows[i]; + return result; + } + + inline matrix& clear() + { + for (uint32_t i = 0; i < R; i++) + m_rows[i].clear(); + return *this; + } + + inline matrix& set_zero_matrix() + { + clear(); + return *this; + } + + inline matrix& set_identity_matrix() + { + for (uint32_t i = 0; i < R; i++) + { + m_rows[i].clear(); + m_rows[i][i] = 1.0f; + } + return *this; + } + + inline matrix& set_scale_matrix(float s) + { + clear(); + for (int i = 0; i < (R - 1); i++) + m_rows[i][i] = s; + m_rows[R - 1][C - 1] = 1.0f; + return *this; + } + + inline matrix& set_scale_matrix(const row_vec& s) + { + clear(); + for (uint32_t i = 0; i < R; i++) + m_rows[i][i] = s[i]; + return *this; + } + + inline matrix& set_scale_matrix(float x, float y) + { + set_identity_matrix(); + m_rows[0].set_x(x); + m_rows[1].set_y(y); + return *this; + } + + inline matrix& set_scale_matrix(float x, float y, float z) + { + set_identity_matrix(); + m_rows[0].set_x(x); + m_rows[1].set_y(y); + m_rows[2].set_z(z); + return *this; + } + + inline matrix& set_translate_matrix(const row_vec& s) + { + set_identity_matrix(); + set_translate(s); + return *this; + } + + inline matrix& set_translate_matrix(float x, float y) + { + set_identity_matrix(); + set_translate(row_vec(x, y).as_point()); + return *this; + } + + inline matrix& set_translate_matrix(float x, float y, float z) + { + set_identity_matrix(); + set_translate(row_vec(x, y, z).as_point()); + return *this; + } + + inline matrix get_transposed() const + { + static_assert(R == C); + + matrix result; + for (uint32_t i = 0; i < R; i++) + for (uint32_t j = 0; j < C; j++) + result.m_rows[i][j] = m_rows[j][i]; + return result; + } + + inline matrix get_transposed_nonsquare() const + { + matrix result; + for (uint32_t i = 0; i < R; i++) + for (uint32_t j = 0; j < C; j++) + result[j][i] = m_rows[i][j]; + return result; + } + + inline matrix& transpose_in_place() + { + matrix result; + for (uint32_t i = 0; i < R; i++) + for (uint32_t j = 0; j < C; j++) + result.m_rows[i][j] = m_rows[j][i]; + *this = result; + return *this; + } + + // Frobenius Norm + T get_norm() const + { + T result = 0; + + for (uint32_t i = 0; i < R; i++) + for (uint32_t j = 0; j < C; j++) + result += m_rows[i][j] * m_rows[i][j]; + + return static_cast(sqrt(result)); + } + + inline matrix get_power(T p) const + { + matrix result; + + for (uint32_t i = 0; i < R; i++) + for (uint32_t j = 0; j < C; j++) + result[i][j] = static_cast(pow(m_rows[i][j], p)); + + return result; + } + + inline matrix<1, R, T> numpy_dot(const matrix<1, C, T>& b) const + { + matrix<1, R, T> result; + + for (uint32_t r = 0; r < R; r++) + { + T sum = 0; + for (uint32_t c = 0; c < C; c++) + sum += m_rows[r][c] * b[0][c]; + + result[0][r] = static_cast(sum); + } + + return result; + } + + bool invert(matrix& result) const + { + static_assert(R == C); + + result.set_identity_matrix(); + + matrix mat(*this); + + for (uint32_t c = 0; c < C; c++) + { + uint32_t max_r = c; + for (uint32_t r = c + 1; r < R; r++) + if (fabs(mat[r][c]) > fabs(mat[max_r][c])) + max_r = r; + + if (mat[max_r][c] == 0.0f) + { + result.set_identity_matrix(); + return false; + } + + std::swap(mat[c], mat[max_r]); + std::swap(result[c], result[max_r]); + + result[c] /= mat[c][c]; + mat[c] /= mat[c][c]; + + for (uint32_t row = 0; row < R; row++) + { + if (row != c) + { + const row_vec temp(mat[row][c]); + mat[row] -= row_vec::mul_components(mat[c], temp); + result[row] -= row_vec::mul_components(result[c], temp); + } + } + } + + return true; + } + + matrix& invert_in_place() + { + matrix result; + invert(result); + *this = result; + return *this; + } + + matrix get_inverse() const + { + matrix result; + invert(result); + return result; + } + + T get_det() const + { + static_assert(R == C); + return det_helper(*this, R); + } + + bool equal_tol(const matrix& b, float tol) const + { + for (uint32_t r = 0; r < R; r++) + if (!row_vec::equal_tol(m_rows[r], b.m_rows[r], tol)) + return false; + return true; + } + + bool is_square() const + { + return R == C; + } + + double get_trace() const + { + static_assert(is_square()); + + T total = 0; + for (uint32_t i = 0; i < R; i++) + total += (*this)(i, i); + + return total; + } + + void print() const + { + for (uint32_t r = 0; r < R; r++) + { + for (uint32_t c = 0; c < C; c++) + printf("%3.7f ", (*this)(r, c)); + printf("\n"); + } + } + + // This method transforms a vec by a matrix (D3D-style: row vector on left). + // Confusingly, note that the data type is named "col_vec", but mathematically it's actually written as a row vector (of size equal to the # matrix rows, which is why it's called a "col_vec" in this class). + // 1xR * RxC = 1xC + // This dots against the matrix columns. + static inline row_vec transform(const col_vec& a, const matrix& b) + { + row_vec result(b[0] * a[0]); + for (uint32_t r = 1; r < R; r++) + result += b[r] * a[r]; + return result; + } + + // This method transforms a vec by a matrix (D3D-style: row vector on left). + // Last component of vec is assumed to be 1. + static inline row_vec transform_point(const col_vec& a, const matrix& b) + { + row_vec result(0); + for (int r = 0; r < (R - 1); r++) + result += b[r] * a[r]; + result += b[R - 1]; + return result; + } + + // This method transforms a vec by a matrix (D3D-style: row vector on left). + // Last component of vec is assumed to be 0. + static inline row_vec transform_vector(const col_vec& a, const matrix& b) + { + row_vec result(0); + for (int r = 0; r < (R - 1); r++) + result += b[r] * a[r]; + return result; + } + + // This method transforms a vec by a matrix (D3D-style: row vector on left). + // Last component of vec is assumed to be 1. + static inline subcol_vec transform_point(const subcol_vec& a, const matrix& b) + { + subcol_vec result(0); + for (int r = 0; r < static_cast(R); r++) + { + const T s = (r < subcol_vec::num_elements) ? a[r] : 1.0f; + for (int c = 0; c < static_cast(C - 1); c++) + result[c] += b[r][c] * s; + } + return result; + } + + // This method transforms a vec by a matrix (D3D-style: row vector on left). + // Last component of vec is assumed to be 0. + static inline subcol_vec transform_vector(const subcol_vec& a, const matrix& b) + { + subcol_vec result(0); + for (int r = 0; r < static_cast(R - 1); r++) + { + const T s = a[r]; + for (int c = 0; c < static_cast(C - 1); c++) + result[c] += b[r][c] * s; + } + return result; + } + + // Like transform() above, but the matrix is effectively transposed before the multiply. + static inline col_vec transform_transposed(const col_vec& a, const matrix& b) + { + static_assert(R == C); + col_vec result; + for (uint32_t r = 0; r < R; r++) + result[r] = b[r].dot(a); + return result; + } + + // Like transform() above, but the matrix is effectively transposed before the multiply. + // Last component of vec is assumed to be 0. + static inline col_vec transform_vector_transposed(const col_vec& a, const matrix& b) + { + static_assert(R == C); + col_vec result; + for (uint32_t r = 0; r < R; r++) + { + T s = 0; + for (uint32_t c = 0; c < (C - 1); c++) + s += b[r][c] * a[c]; + + result[r] = s; + } + return result; + } + + // This method transforms a vec by a matrix (D3D-style: row vector on left), but the matrix is effectively transposed before the multiply. + // Last component of vec is assumed to be 1. + static inline subcol_vec transform_point_transposed(const subcol_vec& a, const matrix& b) + { + static_assert(R == C); + subcol_vec result(0); + for (int r = 0; r < R; r++) + { + const T s = (r < subcol_vec::num_elements) ? a[r] : 1.0f; + for (int c = 0; c < (C - 1); c++) + result[c] += b[c][r] * s; + } + return result; + } + + // This method transforms a vec by a matrix (D3D-style: row vector on left), but the matrix is effectively transposed before the multiply. + // Last component of vec is assumed to be 0. + static inline subcol_vec transform_vector_transposed(const subcol_vec& a, const matrix& b) + { + static_assert(R == C); + subcol_vec result(0); + for (int r = 0; r < static_cast(R - 1); r++) + { + const T s = a[r]; + for (int c = 0; c < static_cast(C - 1); c++) + result[c] += b[c][r] * s; + } + return result; + } + + // This method transforms a matrix by a vector (OGL style, col vector on the right). + // Note that the data type is named "row_vec", but mathematically it's actually written as a column vector (of size equal to the # matrix cols). + // RxC * Cx1 = Rx1 + // This dots against the matrix rows. + static inline col_vec transform(const matrix& b, const row_vec& a) + { + col_vec result; + for (int r = 0; r < static_cast(R); r++) + result[r] = b[r].dot(a); + return result; + } + + // This method transforms a matrix by a vector (OGL style, col vector on the right), except the matrix is effectively transposed before the multiply. + // Note that the data type is named "row_vec", but mathematically it's actually written as a column vector (of size equal to the # matrix cols). + // RxC * Cx1 = Rx1 + // This dots against the matrix cols. + static inline col_vec transform_transposed(const matrix& b, const row_vec& a) + { + static_assert(R == C); + row_vec result(b[0] * a[0]); + for (int r = 1; r < static_cast(R); r++) + result += b[r] * a[r]; + return col_vec(result); + } + + static inline matrix& mul_components(matrix& result, const matrix& lhs, const matrix& rhs) + { + for (uint32_t r = 0; r < R; r++) + result[r] = row_vec::mul_components(lhs[r], rhs[r]); + return result; + } + + static inline matrix& concat(matrix& lhs, const matrix& rhs) + { + return matrix_mul_helper(lhs, matrix(lhs), rhs); + } + + inline matrix& concat_in_place(const matrix& rhs) + { + return concat(*this, rhs); + } + + static inline matrix& multiply(matrix& result, const matrix& lhs, const matrix& rhs) + { + matrix temp; + matrix* pResult = ((&result == &lhs) || (&result == &rhs)) ? &temp : &result; + + matrix_mul_helper(*pResult, lhs, rhs); + if (pResult != &result) + result = *pResult; + + return result; + } + + static matrix make_zero_matrix() + { + matrix result; + result.clear(); + return result; + } + + static matrix make_identity_matrix() + { + matrix result; + result.set_identity_matrix(); + return result; + } + + static matrix make_translate_matrix(const row_vec& t) + { + return matrix(basisu::cIdentity).set_translate(t); + } + + static matrix make_translate_matrix(float x, float y) + { + return matrix(basisu::cIdentity).set_translate_matrix(x, y); + } + + static matrix make_translate_matrix(float x, float y, float z) + { + return matrix(basisu::cIdentity).set_translate_matrix(x, y, z); + } + + static inline matrix make_scale_matrix(float s) + { + return matrix().set_scale_matrix(s); + } + + static inline matrix make_scale_matrix(const row_vec& s) + { + return matrix().set_scale_matrix(s); + } + + static inline matrix make_scale_matrix(float x, float y) + { + static_assert(R >= 3 && C >= 3); + matrix result; + result.set_identity_matrix(); + result.m_rows[0][0] = x; + result.m_rows[1][1] = y; + return result; + } + + static inline matrix make_scale_matrix(float x, float y, float z) + { + static_assert(R >= 4 && C >= 4); + matrix result; + result.set_identity_matrix(); + result.m_rows[0][0] = x; + result.m_rows[1][1] = y; + result.m_rows[2][2] = z; + return result; + } + + // Helpers derived from Graphics Gems 1 and 2 (Matrices and Transformations, Ronald N. Goldman) + static matrix make_rotate_matrix(const vec<3, T>& axis, T ang) + { + static_assert(R >= 3 && C >= 3); + + vec<3, T> norm_axis(axis.get_normalized()); + + double cos_a = cos(ang); + double inv_cos_a = 1.0f - cos_a; + + double sin_a = sin(ang); + + const T x = norm_axis[0]; + const T y = norm_axis[1]; + const T z = norm_axis[2]; + + const double x2 = norm_axis[0] * norm_axis[0]; + const double y2 = norm_axis[1] * norm_axis[1]; + const double z2 = norm_axis[2] * norm_axis[2]; + + matrix result; + result.set_identity_matrix(); + + result[0][0] = (T)((inv_cos_a * x2) + cos_a); + result[1][0] = (T)((inv_cos_a * x * y) + (sin_a * z)); + result[2][0] = (T)((inv_cos_a * x * z) - (sin_a * y)); + + result[0][1] = (T)((inv_cos_a * x * y) - (sin_a * z)); + result[1][1] = (T)((inv_cos_a * y2) + cos_a); + result[2][1] = (T)((inv_cos_a * y * z) + (sin_a * x)); + + result[0][2] = (T)((inv_cos_a * x * z) + (sin_a * y)); + result[1][2] = (T)((inv_cos_a * y * z) - (sin_a * x)); + result[2][2] = (T)((inv_cos_a * z2) + cos_a); + + return result; + } + + static inline matrix make_rotate_matrix(T ang) + { + static_assert(R >= 2 && C >= 2); + + matrix ret(basisu::cIdentity); + + const T sin_a = static_cast(sin(ang)); + const T cos_a = static_cast(cos(ang)); + + ret[0][0] = +cos_a; + ret[0][1] = -sin_a; + ret[1][0] = +sin_a; + ret[1][1] = +cos_a; + + return ret; + } + + static inline matrix make_rotate_matrix(uint32_t axis, T ang) + { + vec<3, T> axis_vec; + axis_vec.clear(); + axis_vec[axis] = 1.0f; + return make_rotate_matrix(axis_vec, ang); + } + + static inline matrix make_cross_product_matrix(const vec<3, scalar_type>& c) + { + static_assert((num_rows >= 3) && (num_cols >= 3)); + matrix ret(basisu::cClear); + ret[0][1] = c[2]; + ret[0][2] = -c[1]; + ret[1][0] = -c[2]; + ret[1][2] = c[0]; + ret[2][0] = c[1]; + ret[2][1] = -c[0]; + return ret; + } + + static inline matrix make_reflection_matrix(const vec<4, scalar_type>& n, const vec<4, scalar_type>& q) + { + static_assert((num_rows == 4) && (num_cols == 4)); + matrix ret; + assert(n.is_vector() && q.is_vector()); + ret = make_identity_matrix() - 2.0f * make_tensor_product_matrix(n, n); + ret.set_translate((2.0f * q.dot(n) * n).as_point()); + return ret; + } + + static inline matrix make_tensor_product_matrix(const row_vec& v, const row_vec& w) + { + matrix ret; + for (int r = 0; r < num_rows; r++) + ret[r] = row_vec::mul_components(v.broadcast(r), w); + return ret; + } + + static inline matrix make_uniform_scaling_matrix(const vec<4, scalar_type>& q, scalar_type c) + { + static_assert((num_rows == 4) && (num_cols == 4)); + assert(q.is_vector()); + matrix ret; + ret = c * make_identity_matrix(); + ret.set_translate(((1.0f - c) * q).as_point()); + return ret; + } + + static inline matrix make_nonuniform_scaling_matrix(const vec<4, scalar_type>& q, scalar_type c, const vec<4, scalar_type>& w) + { + static_assert((num_rows == 4) && (num_cols == 4)); + assert(q.is_vector() && w.is_vector()); + matrix ret; + ret = make_identity_matrix() - (1.0f - c) * make_tensor_product_matrix(w, w); + ret.set_translate(((1.0f - c) * q.dot(w) * w).as_point()); + return ret; + } + + // n = normal of plane, q = point on plane + static inline matrix make_ortho_projection_matrix(const vec<4, scalar_type>& n, const vec<4, scalar_type>& q) + { + assert(n.is_vector() && q.is_vector()); + matrix ret; + ret = make_identity_matrix() - make_tensor_product_matrix(n, n); + ret.set_translate((q.dot(n) * n).as_point()); + return ret; + } + + static inline matrix make_parallel_projection(const vec<4, scalar_type>& n, const vec<4, scalar_type>& q, const vec<4, scalar_type>& w) + { + assert(n.is_vector() && q.is_vector() && w.is_vector()); + matrix ret; + ret = make_identity_matrix() - (make_tensor_product_matrix(n, w) / (w.dot(n))); + ret.set_translate(((q.dot(n) / w.dot(n)) * w).as_point()); + return ret; + } + + protected: + row_vec m_rows[R]; + + static T det_helper(const matrix& a, uint32_t n) + { + // Algorithm ported from Numerical Recipes in C. + T d; + matrix m; + if (n == 2) + d = a(0, 0) * a(1, 1) - a(1, 0) * a(0, 1); + else + { + d = 0; + for (uint32_t j1 = 1; j1 <= n; j1++) + { + for (uint32_t i = 2; i <= n; i++) + { + int j2 = 1; + for (uint32_t j = 1; j <= n; j++) + { + if (j != j1) + { + m(i - 2, j2 - 1) = a(i - 1, j - 1); + j2++; + } + } + } + d += (((1 + j1) & 1) ? -1.0f : 1.0f) * a(1 - 1, j1 - 1) * det_helper(m, n - 1); + } + } + return d; + } + }; + + typedef matrix<2, 2, float> matrix22F; + typedef matrix<2, 2, double> matrix22D; + + typedef matrix<3, 3, float> matrix33F; + typedef matrix<3, 3, double> matrix33D; + + typedef matrix<4, 4, float> matrix44F; + typedef matrix<4, 4, double> matrix44D; + + typedef matrix<8, 8, float> matrix88F; + + // These helpers create good old D3D-style matrices. + inline matrix44F matrix44F_make_perspective_offcenter_lh(float l, float r, float b, float t, float nz, float fz) + { + float two_nz = 2.0f * nz; + float one_over_width = 1.0f / (r - l); + float one_over_height = 1.0f / (t - b); + + matrix44F view_to_proj; + view_to_proj[0].set(two_nz * one_over_width, 0.0f, 0.0f, 0.0f); + view_to_proj[1].set(0.0f, two_nz * one_over_height, 0.0f, 0.0f); + view_to_proj[2].set(-(l + r) * one_over_width, -(t + b) * one_over_height, fz / (fz - nz), 1.0f); + view_to_proj[3].set(0.0f, 0.0f, -view_to_proj[2][2] * nz, 0.0f); + return view_to_proj; + } + + // fov_y: full Y field of view (radians) + // aspect: viewspace width/height + inline matrix44F matrix44F_make_perspective_fov_lh(float fov_y, float aspect, float nz, float fz) + { + double sin_fov = sin(0.5f * fov_y); + double cos_fov = cos(0.5f * fov_y); + + float y_scale = static_cast(cos_fov / sin_fov); + float x_scale = static_cast(y_scale / aspect); + + matrix44F view_to_proj; + view_to_proj[0].set(x_scale, 0, 0, 0); + view_to_proj[1].set(0, y_scale, 0, 0); + view_to_proj[2].set(0, 0, fz / (fz - nz), 1); + view_to_proj[3].set(0, 0, -nz * fz / (fz - nz), 0); + return view_to_proj; + } + + inline matrix44F matrix44F_make_ortho_offcenter_lh(float l, float r, float b, float t, float nz, float fz) + { + matrix44F view_to_proj; + view_to_proj[0].set(2.0f / (r - l), 0.0f, 0.0f, 0.0f); + view_to_proj[1].set(0.0f, 2.0f / (t - b), 0.0f, 0.0f); + view_to_proj[2].set(0.0f, 0.0f, 1.0f / (fz - nz), 0.0f); + view_to_proj[3].set((l + r) / (l - r), (t + b) / (b - t), nz / (nz - fz), 1.0f); + return view_to_proj; + } + + inline matrix44F matrix44F_make_ortho_lh(float w, float h, float nz, float fz) + { + return matrix44F_make_ortho_offcenter_lh(-w * .5f, w * .5f, -h * .5f, h * .5f, nz, fz); + } + + inline matrix44F matrix44F_make_projection_to_screen_d3d(int x, int y, int w, int h, float min_z, float max_z) + { + matrix44F proj_to_screen; + proj_to_screen[0].set(w * .5f, 0.0f, 0.0f, 0.0f); + proj_to_screen[1].set(0, h * -.5f, 0.0f, 0.0f); + proj_to_screen[2].set(0, 0.0f, max_z - min_z, 0.0f); + proj_to_screen[3].set(x + w * .5f, y + h * .5f, min_z, 1.0f); + return proj_to_screen; + } + + inline matrix44F matrix44F_make_lookat_lh(const vec3F& camera_pos, const vec3F& look_at, const vec3F& camera_up, float camera_roll_ang_in_radians) + { + vec4F col2(look_at - camera_pos); + assert(col2.is_vector()); + if (col2.normalize() == 0.0f) + col2.set(0, 0, 1, 0); + + vec4F col1(camera_up); + assert(col1.is_vector()); + if (!col2[0] && !col2[2]) + col1.set(-1.0f, 0.0f, 0.0f, 0.0f); + + if ((col1.dot(col2)) > .9999f) + col1.set(0.0f, 1.0f, 0.0f, 0.0f); + + vec4F col0(vec4F::cross3(col1, col2).normalize_in_place()); + col1 = vec4F::cross3(col2, col0).normalize_in_place(); + + matrix44F rotm(matrix44F::make_identity_matrix()); + rotm.set_col(0, col0); + rotm.set_col(1, col1); + rotm.set_col(2, col2); + return matrix44F::make_translate_matrix(-camera_pos[0], -camera_pos[1], -camera_pos[2]) * rotm * matrix44F::make_rotate_matrix(2, camera_roll_ang_in_radians); + } + + template R matrix_NxN_create_DCT() + { + assert(R::num_rows == R::num_cols); + + const uint32_t N = R::num_cols; + + R result; + for (uint32_t k = 0; k < N; k++) + { + for (uint32_t n = 0; n < N; n++) + { + double f; + + if (!k) + f = 1.0f / sqrt(float(N)); + else + f = sqrt(2.0f / float(N)) * cos((basisu::cPiD * (2.0f * float(n) + 1.0f) * float(k)) / (2.0f * float(N))); + + result(k, n) = static_cast(f); + } + } + + return result; + } + + template R matrix_NxN_DCT(const R& a, const R& dct) + { + R temp; + matrix_mul_helper(temp, dct, a); + R result; + matrix_mul_helper_transpose_rhs(result, temp, dct); + return result; + } + + template R matrix_NxN_IDCT(const R& b, const R& dct) + { + R temp; + matrix_mul_helper_transpose_lhs(temp, dct, b); + R result; + matrix_mul_helper(result, temp, dct); + return result; + } + + template matrix matrix_kronecker_product(const X& a, const Y& b) + { + matrix result; + + for (uint32_t r = 0; r < X::num_rows; r++) + { + for (uint32_t c = 0; c < X::num_cols; c++) + { + for (uint32_t i = 0; i < Y::num_rows; i++) + for (uint32_t j = 0; j < Y::num_cols; j++) + result(r * Y::num_rows + i, c * Y::num_cols + j) = a(r, c) * b(i, j); + } + } + + return result; + } + + template matrix matrix_combine_vertically(const X& a, const Y& b) + { + matrix result; + + for (uint32_t r = 0; r < X::num_rows; r++) + for (uint32_t c = 0; c < X::num_cols; c++) + result(r, c) = a(r, c); + + for (uint32_t r = 0; r < Y::num_rows; r++) + for (uint32_t c = 0; c < Y::num_cols; c++) + result(r + X::num_rows, c) = b(r, c); + + return result; + } + + inline matrix88F get_haar8() + { + matrix22F haar2( + 1, 1, + 1, -1); + matrix22F i2( + 1, 0, + 0, 1); + matrix44F i4( + 1, 0, 0, 0, + 0, 1, 0, 0, + 0, 0, 1, 0, + 0, 0, 0, 1); + + matrix<1, 2, float> b0; b0(0, 0) = 1; b0(0, 1) = 1; + matrix<1, 2, float> b1; b1(0, 0) = 1.0f; b1(0, 1) = -1.0f; + + matrix<2, 4, float> haar4_0 = matrix_kronecker_product(haar2, b0); + matrix<2, 4, float> haar4_1 = matrix_kronecker_product(i2, b1); + + matrix<4, 4, float> haar4 = matrix_combine_vertically(haar4_0, haar4_1); + + matrix<4, 8, float> haar8_0 = matrix_kronecker_product(haar4, b0); + matrix<4, 8, float> haar8_1 = matrix_kronecker_product(i4, b1); + + haar8_0[2] *= sqrtf(2); + haar8_0[3] *= sqrtf(2); + haar8_1 *= 2.0f; + + matrix<8, 8, float> haar8 = matrix_combine_vertically(haar8_0, haar8_1); + + return haar8; + } + + inline matrix44F get_haar4() + { + const float sqrt2 = 1.4142135623730951f; + + return matrix44F( + .5f * 1, .5f * 1, .5f * 1, .5f * 1, + .5f * 1, .5f * 1, .5f * -1, .5f * -1, + .5f * sqrt2, .5f * -sqrt2, 0, 0, + 0, 0, .5f * sqrt2, .5f * -sqrt2); + } + + template + inline matrix<2, 2, T> get_inverse_2x2(const matrix<2, 2, T>& m) + { + double a = m[0][0]; + double b = m[0][1]; + double c = m[1][0]; + double d = m[1][1]; + + double det = a * d - b * c; + if (det != 0.0f) + det = 1.0f / det; + + matrix<2, 2, T> result; + result[0][0] = static_cast(d * det); + result[0][1] = static_cast(-b * det); + result[1][0] = static_cast(-c * det); + result[1][1] = static_cast(a * det); + return result; + } + +} // namespace bu_math + +namespace basisu +{ + class tracked_stat + { + public: + tracked_stat() { clear(); } + + inline void clear() { m_num = 0; m_total = 0; m_total2 = 0; } + + inline void update(int32_t val) { m_num++; m_total += val; m_total2 += val * val; } + + inline tracked_stat& operator += (uint32_t val) { update(val); return *this; } + + inline uint32_t get_number_of_values() { return m_num; } + inline uint64_t get_total() const { return m_total; } + inline uint64_t get_total2() const { return m_total2; } + + inline float get_average() const { return m_num ? (float)m_total / m_num : 0.0f; }; + inline float get_std_dev() const { return m_num ? sqrtf((float)(m_num * m_total2 - m_total * m_total)) / m_num : 0.0f; } + inline float get_variance() const { float s = get_std_dev(); return s * s; } + + private: + uint32_t m_num; + int64_t m_total; + int64_t m_total2; + }; + + class tracked_stat_dbl + { + public: + tracked_stat_dbl() { clear(); } + + inline void clear() { m_num = 0; m_total = 0; m_total2 = 0; } + + inline void update(double val) { m_num++; m_total += val; m_total2 += val * val; } + + inline tracked_stat_dbl& operator += (double val) { update(val); return *this; } + + inline uint64_t get_number_of_values() { return m_num; } + inline double get_total() const { return m_total; } + inline double get_total2() const { return m_total2; } + + inline double get_average() const { return m_num ? m_total / (double)m_num : 0.0f; }; + inline double get_std_dev() const { return m_num ? sqrt((double)(m_num * m_total2 - m_total * m_total)) / m_num : 0.0f; } + inline double get_variance() const { double s = get_std_dev(); return s * s; } + + private: + uint64_t m_num; + double m_total; + double m_total2; + }; + + template + struct stats + { + uint32_t m_n; + FloatType m_total, m_total_sq; // total, total of squares values + FloatType m_avg, m_avg_sq; // mean, mean of the squared values + FloatType m_rms; // sqrt(m_avg_sq) + FloatType m_std_dev, m_var; // population standard deviation and variance + FloatType m_mad; // mean absolute deviation + FloatType m_min, m_max, m_range; // min and max values, and max-min + FloatType m_len; // length of values as a vector (Euclidean norm or L2 norm) + FloatType m_coeff_of_var; // coefficient of variation (std_dev/mean), High CV: Indicates greater variability relative to the mean, meaning the data values are more spread out, + // Low CV : Indicates less variability relative to the mean, meaning the data values are more consistent. + + FloatType m_skewness; // Skewness = 0: The data is perfectly symmetric around the mean, + // Skewness > 0: The data is positively skewed (right-skewed), + // Skewness < 0: The data is negatively skewed (left-skewed) + // 0-.5 approx. symmetry, .5-1 moderate skew, >= 1 highly skewed + + FloatType m_kurtosis; // Excess Kurtosis: Kurtosis = 0: The distribution has normal kurtosis (mesokurtic) + // Kurtosis > 0: The distribution is leptokurtic, with heavy tails and a sharp peak + // Kurtosis < 0: The distribution is platykurtic, with light tails and a flatter peak + + bool m_any_zero; + + FloatType m_median; + uint32_t m_median_index; + + stats() + { + clear(); + } + + void clear() + { + m_n = 0; + m_total = 0, m_total_sq = 0; + m_avg = 0, m_avg_sq = 0; + m_rms = 0; + m_std_dev = 0, m_var = 0; + m_mad = 0; + m_min = BIG_FLOAT_VAL, m_max = -BIG_FLOAT_VAL; m_range = 0.0f; + m_len = 0; + m_coeff_of_var = 0; + m_skewness = 0; + m_kurtosis = 0; + m_any_zero = false; + + m_median = 0; + m_median_index = 0; + } + + template + void calc_median(uint32_t n, const T* pVals, uint32_t stride = 1) + { + m_median = 0; + m_median_index = 0; + + if (!n) + return; + + basisu::vector< std::pair > vals(n); + + for (uint32_t i = 0; i < n; i++) + { + vals[i].first = pVals[i * stride]; + vals[i].second = i; + } + + std::sort(vals.begin(), vals.end(), [](const std::pair& a, const std::pair& b) { + return a.first < b.first; + }); + + m_median = vals[n / 2].first; + if ((n & 1) == 0) + m_median = (m_median + vals[(n / 2) - 1].first) * .5f; + + m_median_index = vals[n / 2].second; + } + + template + void calc(uint32_t n, const T* pVals, uint32_t stride = 1, bool calc_median_flag = false) + { + clear(); + + if (!n) + return; + + if (calc_median_flag) + calc_median(n, pVals, stride); + + m_n = n; + + for (uint32_t i = 0; i < n; i++) + { + FloatType v = (FloatType)pVals[i * stride]; + + if (v == 0.0f) + m_any_zero = true; + + m_total += v; + m_total_sq += v * v; + + if (!i) + { + m_min = v; + m_max = v; + } + else + { + m_min = minimum(m_min, v); + m_max = maximum(m_max, v); + } + } + + m_range = m_max - m_min; + + m_len = sqrt(m_total_sq); + + const FloatType nd = (FloatType)n; + + m_avg = m_total / nd; + m_avg_sq = m_total_sq / nd; + m_rms = sqrt(m_avg_sq); + + for (uint32_t i = 0; i < n; i++) + { + FloatType v = (FloatType)pVals[i * stride]; + FloatType d = v - m_avg; + + const FloatType d2 = d * d; + const FloatType d3 = d2 * d; + const FloatType d4 = d3 * d; + + m_var += d2; + m_mad += fabs(d); + m_skewness += d3; + m_kurtosis += d4; + } + + m_var /= nd; + m_mad /= nd; + + m_std_dev = sqrt(m_var); + + m_coeff_of_var = (m_avg != 0.0f) ? (m_std_dev / fabs(m_avg)) : 0.0f; + + FloatType k3 = m_std_dev * m_std_dev * m_std_dev; + FloatType k4 = k3 * m_std_dev; + m_skewness = (k3 != 0.0f) ? ((m_skewness / nd) / k3) : 0.0f; + m_kurtosis = (k4 != 0.0f) ? (((m_kurtosis / nd) / k4) - 3.0f) : 0.0f; + } + + // Only compute average, variance and standard deviation. + template + void calc_simplified(uint32_t n, const T* pVals, uint32_t stride = 1) + { + clear(); + + if (!n) + return; + + m_n = n; + + for (uint32_t i = 0; i < n; i++) + { + FloatType v = (FloatType)pVals[i * stride]; + + m_total += v; + } + + const FloatType nd = (FloatType)n; + + m_avg = m_total / nd; + + for (uint32_t i = 0; i < n; i++) + { + FloatType v = (FloatType)pVals[i * stride]; + FloatType d = v - m_avg; + + const FloatType d2 = d * d; + + m_var += d2; + } + + m_var /= nd; + m_std_dev = sqrt(m_var); + } + }; + + template + struct comparative_stats + { + FloatType m_cov; // covariance + FloatType m_pearson; // Pearson Correlation Coefficient (r) [-1,1] + FloatType m_mse; // mean squared error + FloatType m_rmse; // root mean squared error + FloatType m_mae; // mean abs error + FloatType m_rmsle; // root mean squared log error + FloatType m_euclidean_dist; // euclidean distance between values as vectors + FloatType m_cosine_sim; // normalized dot products of values as vectors + FloatType m_min_diff, m_max_diff; // minimum/maximum abs difference between values + + comparative_stats() + { + clear(); + } + + void clear() + { + m_cov = 0; + m_pearson = 0; + m_mse = 0; + m_rmse = 0; + m_mae = 0; + m_rmsle = 0; + m_euclidean_dist = 0; + m_cosine_sim = 0; + m_min_diff = 0; + m_max_diff = 0; + } + + template + void calc(uint32_t n, const T* pA, const T* pB, uint32_t a_stride = 1, uint32_t b_stride = 1, const stats *pA_stats = nullptr, const stats *pB_stats = nullptr) + { + clear(); + if (!n) + return; + + stats temp_a_stats; + if (!pA_stats) + { + pA_stats = &temp_a_stats; + temp_a_stats.calc(n, pA, a_stride); + } + + stats temp_b_stats; + if (!pB_stats) + { + pB_stats = &temp_b_stats; + temp_b_stats.calc(n, pB, b_stride); + } + + for (uint32_t i = 0; i < n; i++) + { + const FloatType fa = (FloatType)pA[i * a_stride]; + const FloatType fb = (FloatType)pB[i * b_stride]; + + if ((pA_stats->m_min >= 0.0f) && (pB_stats->m_min >= 0.0f)) + { + const FloatType ld = log(fa + 1.0f) - log(fb + 1.0f); + m_rmsle += ld * ld; + } + + const FloatType diff = fa - fb; + const FloatType abs_diff = fabs(diff); + + m_mse += diff * diff; + m_mae += abs_diff; + + m_min_diff = i ? minimum(m_min_diff, abs_diff) : abs_diff; + m_max_diff = maximum(m_max_diff, abs_diff); + + const FloatType da = fa - pA_stats->m_avg; + const FloatType db = fb - pB_stats->m_avg; + m_cov += da * db; + + m_cosine_sim += fa * fb; + } + + const FloatType nd = (FloatType)n; + + m_euclidean_dist = sqrt(m_mse); + + m_mse /= nd; + m_rmse = sqrt(m_mse); + + m_mae /= nd; + + m_cov /= nd; + + FloatType dv = (pA_stats->m_std_dev * pB_stats->m_std_dev); + if (dv != 0.0f) + m_pearson = m_cov / dv; + + if ((pA_stats->m_min >= 0.0) && (pB_stats->m_min >= 0.0f)) + m_rmsle = sqrt(m_rmsle / nd); + + FloatType c = pA_stats->m_len * pB_stats->m_len; + if (c != 0.0f) + m_cosine_sim /= c; + else + m_cosine_sim = 0.0f; + } + + // Only computes Pearson, cov, mse, rmse, Euclidean distance + template + void calc_pearson(uint32_t n, const T* pA, const T* pB, uint32_t a_stride = 1, uint32_t b_stride = 1, const stats* pA_stats = nullptr, const stats* pB_stats = nullptr) + { + clear(); + if (!n) + return; + + stats temp_a_stats; + if (!pA_stats) + { + pA_stats = &temp_a_stats; + temp_a_stats.calc(n, pA, a_stride); + } + + stats temp_b_stats; + if (!pB_stats) + { + pB_stats = &temp_b_stats; + temp_b_stats.calc(n, pB, b_stride); + } + + for (uint32_t i = 0; i < n; i++) + { + const FloatType fa = (FloatType)pA[i * a_stride]; + const FloatType fb = (FloatType)pB[i * b_stride]; + + const FloatType diff = fa - fb; + + m_mse += diff * diff; + + const FloatType da = fa - pA_stats->m_avg; + const FloatType db = fb - pB_stats->m_avg; + m_cov += da * db; + } + + const FloatType nd = (FloatType)n; + + m_euclidean_dist = sqrt(m_mse); + + m_mse /= nd; + m_rmse = sqrt(m_mse); + + m_cov /= nd; + + FloatType dv = (pA_stats->m_std_dev * pB_stats->m_std_dev); + if (dv != 0.0f) + m_pearson = m_cov / dv; + } + + // Only computes MSE, RMSE, eclidiean distance, and covariance. + template + void calc_simplified(uint32_t n, const T* pA, const T* pB, uint32_t a_stride = 1, uint32_t b_stride = 1, const stats* pA_stats = nullptr, const stats* pB_stats = nullptr) + { + clear(); + if (!n) + return; + + stats temp_a_stats; + if (!pA_stats) + { + pA_stats = &temp_a_stats; + temp_a_stats.calc(n, pA, a_stride); + } + + stats temp_b_stats; + if (!pB_stats) + { + pB_stats = &temp_b_stats; + temp_b_stats.calc(n, pB, b_stride); + } + + for (uint32_t i = 0; i < n; i++) + { + const FloatType fa = (FloatType)pA[i * a_stride]; + const FloatType fb = (FloatType)pB[i * b_stride]; + + const FloatType diff = fa - fb; + + m_mse += diff * diff; + + const FloatType da = fa - pA_stats->m_avg; + const FloatType db = fb - pB_stats->m_avg; + m_cov += da * db; + } + + const FloatType nd = (FloatType)n; + + m_euclidean_dist = sqrt(m_mse); + + m_mse /= nd; + m_rmse = sqrt(m_mse); + + m_cov /= nd; + } + + // Only computes covariance. + template + void calc_cov(uint32_t n, const T* pA, const T* pB, uint32_t a_stride = 1, uint32_t b_stride = 1, const stats* pA_stats = nullptr, const stats* pB_stats = nullptr) + { + clear(); + if (!n) + return; + + stats temp_a_stats; + if (!pA_stats) + { + pA_stats = &temp_a_stats; + temp_a_stats.calc(n, pA, a_stride); + } + + stats temp_b_stats; + if (!pB_stats) + { + pB_stats = &temp_b_stats; + temp_b_stats.calc(n, pB, b_stride); + } + + for (uint32_t i = 0; i < n; i++) + { + const FloatType fa = (FloatType)pA[i * a_stride]; + const FloatType fb = (FloatType)pB[i * b_stride]; + + const FloatType da = fa - pA_stats->m_avg; + const FloatType db = fb - pB_stats->m_avg; + m_cov += da * db; + } + + const FloatType nd = (FloatType)n; + + m_cov /= nd; + } + }; + + class stat_history + { + public: + stat_history(uint32_t size) + { + init(size); + } + + void init(uint32_t size) + { + clear(); + + m_samples.reserve(size); + m_samples.resize(0); + m_max_samples = size; + } + + inline void clear() + { + m_samples.resize(0); + m_max_samples = 0; + } + + inline void update(double val) + { + m_samples.push_back(val); + + if (m_samples.size() > m_max_samples) + m_samples.erase_index(0); + } + + inline size_t size() + { + return m_samples.size(); + } + + struct stats + { + double m_avg = 0; + double m_std_dev = 0; + double m_var = 0; + double m_mad = 0; + double m_min_val = 0; + double m_max_val = 0; + + void clear() + { + basisu::clear_obj(*this); + } + }; + + inline void get_stats(stats& s) + { + s.clear(); + + if (m_samples.empty()) + return; + + double total = 0, total2 = 0; + + for (size_t i = 0; i < m_samples.size(); i++) + { + const double v = m_samples[i]; + + total += v; + total2 += v * v; + + if (!i) + { + s.m_min_val = v; + s.m_max_val = v; + } + else + { + s.m_min_val = basisu::minimum(s.m_min_val, v); + s.m_max_val = basisu::maximum(s.m_max_val, v); + } + } + + const double n = (double)m_samples.size(); + + s.m_avg = total / n; + s.m_std_dev = sqrt((n * total2 - total * total)) / n; + s.m_var = (n * total2 - total * total) / (n * n); + + double sc = 0; + for (size_t i = 0; i < m_samples.size(); i++) + { + const double v = m_samples[i]; + s.m_mad += fabs(v - s.m_avg); + + sc += basisu::square(v - s.m_avg); + } + sc = sqrt(sc / n); + + s.m_mad /= n; + } + + private: + uint32_t m_max_samples; + basisu::vector m_samples; + }; + + // bfloat16 helpers, see: + // https://en.wikipedia.org/wiki/Bfloat16_floating-point_format + + typedef union + { + uint32_t u; + float f; + } float32_union; + + typedef uint16_t bfloat16; + + inline float bfloat16_to_float(bfloat16 bfloat16) + { + float32_union float_union; + float_union.u = ((uint32_t)bfloat16) << 16; + return float_union.f; + } + + inline bfloat16 float_to_bfloat16(float input, bool round_flag = true) + { + float32_union float_union; + float_union.f = input; + + uint32_t exponent = (float_union.u >> 23) & 0xFF; + + // Check if the number is denormalized in float32 (exponent == 0) + if (exponent == 0) + { + // Handle denormalized float32 as zero in bfloat16 + return 0x0000; + } + + // Extract the top 16 bits (sign, exponent, and 7 most significant bits of the mantissa) + uint32_t upperBits = float_union.u >> 16; + + if (round_flag) + { + // Check the most significant bit of the lower 16 bits for rounding + uint32_t lowerBits = float_union.u & 0xFFFF; + + // Round to nearest or even + if ((lowerBits & 0x8000) && + ((lowerBits > 0x8000) || ((lowerBits == 0x8000) && (upperBits & 1))) + ) + { + // Round up + upperBits += 1; + + // Check for overflow in the exponent after rounding up + if (((upperBits & 0x7F80) == 0x7F80) && ((upperBits & 0x007F) == 0)) + { + // Exponent overflow (the upper bits became all 1s) + // Set the result to infinity + upperBits = (upperBits & 0x8000) | 0x7F80; // Preserve the sign bit, set exponent to 0xFF, and mantissa to 0 + } + } + } + + return (bfloat16)upperBits; + } + + inline int bfloat16_get_exp(bfloat16 v) + { + return (int)((v >> 7) & 0xFF) - 127; + } + + inline int bfloat16_get_mantissa(bfloat16 v) + { + return (v & 0x7F); + } + + inline int bfloat16_get_sign(bfloat16 v) + { + return (v & 0x8000) ? -1 : 1; + } + + inline bool bfloat16_is_nan_or_inf(bfloat16 v) + { + return ((v >> 7) & 0xFF) == 0xFF; + } + + inline bool bfloat16_is_zero(bfloat16 v) + { + return (v & 0x7FFF) == 0; + } + + inline bfloat16 bfloat16_init(int sign, int exp, int mant) + { + uint16_t res = (sign < 0) ? 0x8000 : 0; + + assert((exp >= -126) && (res <= 127)); + res |= ((exp + 127) << 7); + + assert((mant >= 0) && (mant < 128)); + res |= mant; + + return res; + } + + +} // namespace basisu + diff --git a/thirdparty/basisu/encoder/basisu_miniz.h b/thirdparty/basisu/encoder/basisu_miniz.h new file mode 100644 index 000000000..dab38f9f9 --- /dev/null +++ b/thirdparty/basisu/encoder/basisu_miniz.h @@ -0,0 +1,2531 @@ +/* miniz.c v1.15 - deflate/inflate, zlib-subset, ZIP reading/writing/appending, PNG writing + Implements RFC 1950: http://www.ietf.org/rfc/rfc1950.txt and RFC 1951: http://www.ietf.org/rfc/rfc1951.txt + + Forked from the public domain/unlicense version at: https://code.google.com/archive/p/miniz/ + + Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef MINIZ_HEADER_INCLUDED +#define MINIZ_HEADER_INCLUDED + +#include + +// Defines to completely disable specific portions of miniz.c: +// If all macros here are defined the only functionality remaining will be CRC-32, adler-32, tinfl, and tdefl. + +// Define MINIZ_NO_STDIO to disable all usage and any functions which rely on stdio for file I/O. +//#define MINIZ_NO_STDIO + +// If MINIZ_NO_TIME is specified then the ZIP archive functions will not be able to get the current time, or +// get/set file times, and the C run-time funcs that get/set times won't be called. +// The current downside is the times written to your archives will be from 1979. +//#define MINIZ_NO_TIME + +// Define MINIZ_NO_ARCHIVE_APIS to disable all ZIP archive API's. +//#define MINIZ_NO_ARCHIVE_APIS + +// Define MINIZ_NO_ARCHIVE_APIS to disable all writing related ZIP archive API's. +//#define MINIZ_NO_ARCHIVE_WRITING_APIS + +// Define MINIZ_NO_ZLIB_APIS to remove all ZLIB-style compression/decompression API's. +//#define MINIZ_NO_ZLIB_APIS + +// Define MINIZ_NO_ZLIB_COMPATIBLE_NAME to disable zlib names, to prevent conflicts against stock zlib. +//#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES + +// Define MINIZ_NO_MALLOC to disable all calls to malloc, free, and realloc. +// Note if MINIZ_NO_MALLOC is defined then the user must always provide custom user alloc/free/realloc +// callbacks to the zlib and archive API's, and a few stand-alone helper API's which don't provide custom user +// functions (such as tdefl_compress_mem_to_heap() and tinfl_decompress_mem_to_heap()) won't work. +//#define MINIZ_NO_MALLOC + +#if defined(__TINYC__) && (defined(__linux) || defined(__linux__)) + // TODO: Work around "error: include file 'sys\utime.h' when compiling with tcc on Linux + #define MINIZ_NO_TIME +#endif + +#if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_ARCHIVE_APIS) + #include +#endif + +#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__i386) || defined(__i486__) || defined(__i486) || defined(i386) || defined(__ia64__) || defined(__x86_64__) +// MINIZ_X86_OR_X64_CPU is only used to help set the below macros. +#define MINIZ_X86_OR_X64_CPU 1 +#endif + +#if (__BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__) || MINIZ_X86_OR_X64_CPU +// Set MINIZ_LITTLE_ENDIAN to 1 if the processor is little endian. +#define MINIZ_LITTLE_ENDIAN 1 +#endif + +#if MINIZ_X86_OR_X64_CPU +// Set MINIZ_USE_UNALIGNED_LOADS_AND_STORES to 1 on CPU's that permit efficient integer loads and stores from unaligned addresses. +#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1 +#endif + +// Using unaligned loads and stores causes errors when using UBSan. Jam it off. +#if defined(__has_feature) +#if __has_feature(undefined_behavior_sanitizer) +#undef MINIZ_USE_UNALIGNED_LOADS_AND_STORES +#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 0 +#endif +#endif + +#if defined(_M_X64) || defined(_WIN64) || defined(__MINGW64__) || defined(_LP64) || defined(__LP64__) || defined(__ia64__) || defined(__x86_64__) +// Set MINIZ_HAS_64BIT_REGISTERS to 1 if operations on 64-bit integers are reasonably fast (and don't involve compiler generated calls to helper functions). +#define MINIZ_HAS_64BIT_REGISTERS 1 +#endif + +namespace buminiz { + +// ------------------- zlib-style API Definitions. + +// For more compatibility with zlib, miniz.c uses unsigned long for some parameters/struct members. Beware: mz_ulong can be either 32 or 64-bits! +typedef unsigned long mz_ulong; + +// mz_free() internally uses the MZ_FREE() macro (which by default calls free() unless you've modified the MZ_MALLOC macro) to release a block allocated from the heap. +void mz_free(void *p); + +#define MZ_ADLER32_INIT (1) +// mz_adler32() returns the initial adler-32 value to use when called with ptr==NULL. +mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len); + +#define MZ_CRC32_INIT (0) +// mz_crc32() returns the initial CRC-32 value to use when called with ptr==NULL. +mz_ulong mz_crc32(mz_ulong crc, const unsigned char *ptr, size_t buf_len); + +// Compression strategies. +enum { MZ_DEFAULT_STRATEGY = 0, MZ_FILTERED = 1, MZ_HUFFMAN_ONLY = 2, MZ_RLE = 3, MZ_FIXED = 4 }; + +// Method +#define MZ_DEFLATED 8 + +#ifndef MINIZ_NO_ZLIB_APIS + +// Heap allocation callbacks. +// Note that mz_alloc_func parameter types purpsosely differ from zlib's: items/size is size_t, not unsigned long. +typedef void *(*mz_alloc_func)(void *opaque, size_t items, size_t size); +typedef void (*mz_free_func)(void *opaque, void *address); +typedef void *(*mz_realloc_func)(void *opaque, void *address, size_t items, size_t size); + +#define MZ_VERSION "9.1.15" +#define MZ_VERNUM 0x91F0 +#define MZ_VER_MAJOR 9 +#define MZ_VER_MINOR 1 +#define MZ_VER_REVISION 15 +#define MZ_VER_SUBREVISION 0 + +// Flush values. For typical usage you only need MZ_NO_FLUSH and MZ_FINISH. The other values are for advanced use (refer to the zlib docs). +enum { MZ_NO_FLUSH = 0, MZ_PARTIAL_FLUSH = 1, MZ_SYNC_FLUSH = 2, MZ_FULL_FLUSH = 3, MZ_FINISH = 4, MZ_BLOCK = 5 }; + +// Return status codes. MZ_PARAM_ERROR is non-standard. +enum { MZ_OK = 0, MZ_STREAM_END = 1, MZ_NEED_DICT = 2, MZ_ERRNO = -1, MZ_STREAM_ERROR = -2, MZ_DATA_ERROR = -3, MZ_MEM_ERROR = -4, MZ_BUF_ERROR = -5, MZ_VERSION_ERROR = -6, MZ_PARAM_ERROR = -10000 }; + +// Compression levels: 0-9 are the standard zlib-style levels, 10 is best possible compression (not zlib compatible, and may be very slow), MZ_DEFAULT_COMPRESSION=MZ_DEFAULT_LEVEL. +enum { MZ_NO_COMPRESSION = 0, MZ_BEST_SPEED = 1, MZ_BEST_COMPRESSION = 9, MZ_UBER_COMPRESSION = 10, MZ_DEFAULT_LEVEL = 6, MZ_DEFAULT_COMPRESSION = -1 }; + +// Window bits +#define MZ_DEFAULT_WINDOW_BITS 15 + +struct mz_internal_state; + +// Compression/decompression stream struct. +typedef struct mz_stream_s +{ + const unsigned char *next_in; // pointer to next byte to read + unsigned int avail_in; // number of bytes available at next_in + mz_ulong total_in; // total number of bytes consumed so far + + unsigned char *next_out; // pointer to next byte to write + unsigned int avail_out; // number of bytes that can be written to next_out + mz_ulong total_out; // total number of bytes produced so far + + char *msg; // error msg (unused) + struct mz_internal_state *state; // internal state, allocated by zalloc/zfree + + mz_alloc_func zalloc; // optional heap allocation function (defaults to malloc) + mz_free_func zfree; // optional heap free function (defaults to free) + void *opaque; // heap alloc function user pointer + + int data_type; // data_type (unused) + mz_ulong adler; // adler32 of the source or uncompressed data + mz_ulong reserved; // not used +} mz_stream; + +typedef mz_stream *mz_streamp; + +// Returns the version string of miniz.c. +const char *mz_version(void); + +// mz_deflateInit() initializes a compressor with default options: +// Parameters: +// pStream must point to an initialized mz_stream struct. +// level must be between [MZ_NO_COMPRESSION, MZ_BEST_COMPRESSION]. +// level 1 enables a specially optimized compression function that's been optimized purely for performance, not ratio. +// (This special func. is currently only enabled when MINIZ_USE_UNALIGNED_LOADS_AND_STORES and MINIZ_LITTLE_ENDIAN are defined.) +// Return values: +// MZ_OK on success. +// MZ_STREAM_ERROR if the stream is bogus. +// MZ_PARAM_ERROR if the input parameters are bogus. +// MZ_MEM_ERROR on out of memory. +int mz_deflateInit(mz_streamp pStream, int level); + +// mz_deflateInit2() is like mz_deflate(), except with more control: +// Additional parameters: +// method must be MZ_DEFLATED +// window_bits must be MZ_DEFAULT_WINDOW_BITS (to wrap the deflate stream with zlib header/adler-32 footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate/no header or footer) +// mem_level must be between [1, 9] (it's checked but ignored by miniz.c) +int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy); + +// Quickly resets a compressor without having to reallocate anything. Same as calling mz_deflateEnd() followed by mz_deflateInit()/mz_deflateInit2(). +int mz_deflateReset(mz_streamp pStream); + +// mz_deflate() compresses the input to output, consuming as much of the input and producing as much output as possible. +// Parameters: +// pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members. +// flush may be MZ_NO_FLUSH, MZ_PARTIAL_FLUSH/MZ_SYNC_FLUSH, MZ_FULL_FLUSH, or MZ_FINISH. +// Return values: +// MZ_OK on success (when flushing, or if more input is needed but not available, and/or there's more output to be written but the output buffer is full). +// MZ_STREAM_END if all input has been consumed and all output bytes have been written. Don't call mz_deflate() on the stream anymore. +// MZ_STREAM_ERROR if the stream is bogus. +// MZ_PARAM_ERROR if one of the parameters is invalid. +// MZ_BUF_ERROR if no forward progress is possible because the input and/or output buffers are empty. (Fill up the input buffer or free up some output space and try again.) +int mz_deflate(mz_streamp pStream, int flush); + +// mz_deflateEnd() deinitializes a compressor: +// Return values: +// MZ_OK on success. +// MZ_STREAM_ERROR if the stream is bogus. +int mz_deflateEnd(mz_streamp pStream); + +// mz_deflateBound() returns a (very) conservative upper bound on the amount of data that could be generated by deflate(), assuming flush is set to only MZ_NO_FLUSH or MZ_FINISH. +mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len); + +// Single-call compression functions mz_compress() and mz_compress2(): +// Returns MZ_OK on success, or one of the error codes from mz_deflate() on failure. +int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len); +int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len, int level); + +// mz_compressBound() returns a (very) conservative upper bound on the amount of data that could be generated by calling mz_compress(). +mz_ulong mz_compressBound(mz_ulong source_len); + +// Initializes a decompressor. +int mz_inflateInit(mz_streamp pStream); + +// mz_inflateInit2() is like mz_inflateInit() with an additional option that controls the window size and whether or not the stream has been wrapped with a zlib header/footer: +// window_bits must be MZ_DEFAULT_WINDOW_BITS (to parse zlib header/footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate). +int mz_inflateInit2(mz_streamp pStream, int window_bits); + +// Decompresses the input stream to the output, consuming only as much of the input as needed, and writing as much to the output as possible. +// Parameters: +// pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members. +// flush may be MZ_NO_FLUSH, MZ_SYNC_FLUSH, or MZ_FINISH. +// On the first call, if flush is MZ_FINISH it's assumed the input and output buffers are both sized large enough to decompress the entire stream in a single call (this is slightly faster). +// MZ_FINISH implies that there are no more source bytes available beside what's already in the input buffer, and that the output buffer is large enough to hold the rest of the decompressed data. +// Return values: +// MZ_OK on success. Either more input is needed but not available, and/or there's more output to be written but the output buffer is full. +// MZ_STREAM_END if all needed input has been consumed and all output bytes have been written. For zlib streams, the adler-32 of the decompressed data has also been verified. +// MZ_STREAM_ERROR if the stream is bogus. +// MZ_DATA_ERROR if the deflate stream is invalid. +// MZ_PARAM_ERROR if one of the parameters is invalid. +// MZ_BUF_ERROR if no forward progress is possible because the input buffer is empty but the inflater needs more input to continue, or if the output buffer is not large enough. Call mz_inflate() again +// with more input data, or with more room in the output buffer (except when using single call decompression, described above). +int mz_inflate(mz_streamp pStream, int flush); +int mz_inflate2(mz_streamp pStream, int flush, int adler32_checking); + +// Deinitializes a decompressor. +int mz_inflateEnd(mz_streamp pStream); + +// Single-call decompression. +// Returns MZ_OK on success, or one of the error codes from mz_inflate() on failure. +int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len); + +// Returns a string description of the specified error code, or NULL if the error code is invalid. +const char *mz_error(int err); + +// Redefine zlib-compatible names to miniz equivalents, so miniz.c can be used as a drop-in replacement for the subset of zlib that miniz.c supports. +// Define MINIZ_NO_ZLIB_COMPATIBLE_NAMES to disable zlib-compatibility if you use zlib in the same project. +#ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES + typedef unsigned char Byte; + typedef unsigned int uInt; + typedef mz_ulong uLong; + typedef Byte Bytef; + typedef uInt uIntf; + typedef char charf; + typedef int intf; + typedef void *voidpf; + typedef uLong uLongf; + typedef void *voidp; + typedef void *const voidpc; + #define Z_NULL 0 + #define Z_NO_FLUSH MZ_NO_FLUSH + #define Z_PARTIAL_FLUSH MZ_PARTIAL_FLUSH + #define Z_SYNC_FLUSH MZ_SYNC_FLUSH + #define Z_FULL_FLUSH MZ_FULL_FLUSH + #define Z_FINISH MZ_FINISH + #define Z_BLOCK MZ_BLOCK + #define Z_OK MZ_OK + #define Z_STREAM_END MZ_STREAM_END + #define Z_NEED_DICT MZ_NEED_DICT + #define Z_ERRNO MZ_ERRNO + #define Z_STREAM_ERROR MZ_STREAM_ERROR + #define Z_DATA_ERROR MZ_DATA_ERROR + #define Z_MEM_ERROR MZ_MEM_ERROR + #define Z_BUF_ERROR MZ_BUF_ERROR + #define Z_VERSION_ERROR MZ_VERSION_ERROR + #define Z_PARAM_ERROR MZ_PARAM_ERROR + #define Z_NO_COMPRESSION MZ_NO_COMPRESSION + #define Z_BEST_SPEED MZ_BEST_SPEED + #define Z_BEST_COMPRESSION MZ_BEST_COMPRESSION + #define Z_DEFAULT_COMPRESSION MZ_DEFAULT_COMPRESSION + #define Z_DEFAULT_STRATEGY MZ_DEFAULT_STRATEGY + #define Z_FILTERED MZ_FILTERED + #define Z_HUFFMAN_ONLY MZ_HUFFMAN_ONLY + #define Z_RLE MZ_RLE + #define Z_FIXED MZ_FIXED + #define Z_DEFLATED MZ_DEFLATED + #define Z_DEFAULT_WINDOW_BITS MZ_DEFAULT_WINDOW_BITS + #define alloc_func mz_alloc_func + #define free_func mz_free_func + #define internal_state mz_internal_state + #define z_stream mz_stream + #define deflateInit mz_deflateInit + #define deflateInit2 mz_deflateInit2 + #define deflateReset mz_deflateReset + #define deflate mz_deflate + #define deflateEnd mz_deflateEnd + #define deflateBound mz_deflateBound + #define compress mz_compress + #define compress2 mz_compress2 + #define compressBound mz_compressBound + #define inflateInit mz_inflateInit + #define inflateInit2 mz_inflateInit2 + #define inflate mz_inflate + #define inflateEnd mz_inflateEnd + #define uncompress mz_uncompress + #define crc32 mz_crc32 + #define adler32 mz_adler32 + #define MAX_WBITS 15 + #define MAX_MEM_LEVEL 9 + #define zError mz_error + #define ZLIB_VERSION MZ_VERSION + #define ZLIB_VERNUM MZ_VERNUM + #define ZLIB_VER_MAJOR MZ_VER_MAJOR + #define ZLIB_VER_MINOR MZ_VER_MINOR + #define ZLIB_VER_REVISION MZ_VER_REVISION + #define ZLIB_VER_SUBREVISION MZ_VER_SUBREVISION + #define zlibVersion mz_version + #define zlib_version mz_version() +#endif // #ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES + +#endif // MINIZ_NO_ZLIB_APIS + +// ------------------- Types and macros + +typedef unsigned char mz_uint8; +typedef signed short mz_int16; +typedef unsigned short mz_uint16; +typedef unsigned int mz_uint32; +typedef unsigned int mz_uint; +typedef long long mz_int64; +typedef unsigned long long mz_uint64; +typedef int mz_bool; + +#define MZ_FALSE (0) +#define MZ_TRUE (1) + +// An attempt to work around MSVC's spammy "warning C4127: conditional expression is constant" message. +#ifdef _MSC_VER + #define MZ_MACRO_END while (0, 0) +#else + #define MZ_MACRO_END while (0) +#endif + +// ------------------- Low-level Decompression API Definitions + +// Decompression flags used by tinfl_decompress(). +// TINFL_FLAG_PARSE_ZLIB_HEADER: If set, the input has a valid zlib header and ends with an adler32 checksum (it's a valid zlib stream). Otherwise, the input is a raw deflate stream. +// TINFL_FLAG_HAS_MORE_INPUT: If set, there are more input bytes available beyond the end of the supplied input buffer. If clear, the input buffer contains all remaining input. +// TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF: If set, the output buffer is large enough to hold the entire decompressed stream. If clear, the output buffer is at least the size of the dictionary (typically 32KB). +// TINFL_FLAG_COMPUTE_ADLER32: Force adler-32 checksum computation of the decompressed bytes. +enum +{ + TINFL_FLAG_PARSE_ZLIB_HEADER = 1, + TINFL_FLAG_HAS_MORE_INPUT = 2, + TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF = 4, + TINFL_FLAG_COMPUTE_ADLER32 = 8 +}; + +// High level decompression functions: +// tinfl_decompress_mem_to_heap() decompresses a block in memory to a heap block allocated via malloc(). +// On entry: +// pSrc_buf, src_buf_len: Pointer and size of the Deflate or zlib source data to decompress. +// On return: +// Function returns a pointer to the decompressed data, or NULL on failure. +// *pOut_len will be set to the decompressed data's size, which could be larger than src_buf_len on uncompressible data. +// The caller must call mz_free() on the returned block when it's no longer needed. +void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags); + +// tinfl_decompress_mem_to_mem() decompresses a block in memory to another block in memory. +// Returns TINFL_DECOMPRESS_MEM_TO_MEM_FAILED on failure, or the number of bytes written on success. +#define TINFL_DECOMPRESS_MEM_TO_MEM_FAILED ((size_t)(-1)) +size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags); + +// tinfl_decompress_mem_to_callback() decompresses a block in memory to an internal 32KB buffer, and a user provided callback function will be called to flush the buffer. +// Returns 1 on success or 0 on failure. +typedef int (*tinfl_put_buf_func_ptr)(const void* pBuf, int len, void *pUser); +int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags); + +struct tinfl_decompressor_tag; typedef struct tinfl_decompressor_tag tinfl_decompressor; + +// Max size of LZ dictionary. +#define TINFL_LZ_DICT_SIZE 32768 + +// Return status. +typedef enum +{ + TINFL_STATUS_BAD_PARAM = -3, + TINFL_STATUS_ADLER32_MISMATCH = -2, + TINFL_STATUS_FAILED = -1, + TINFL_STATUS_DONE = 0, + TINFL_STATUS_NEEDS_MORE_INPUT = 1, + TINFL_STATUS_HAS_MORE_OUTPUT = 2 +} tinfl_status; + +// Initializes the decompressor to its initial state. +#define tinfl_init(r) do { (r)->m_state = 0; } MZ_MACRO_END +#define tinfl_get_adler32(r) (r)->m_check_adler32 + +// Main low-level decompressor coroutine function. This is the only function actually needed for decompression. All the other functions are just high-level helpers for improved usability. +// This is a universal API, i.e. it can be used as a building block to build any desired higher level decompression API. In the limit case, it can be called once per every byte input or output. +tinfl_status tinfl_decompress(tinfl_decompressor *r, const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags); + +// Internal/private bits follow. +enum +{ + TINFL_MAX_HUFF_TABLES = 3, TINFL_MAX_HUFF_SYMBOLS_0 = 288, TINFL_MAX_HUFF_SYMBOLS_1 = 32, TINFL_MAX_HUFF_SYMBOLS_2 = 19, + TINFL_FAST_LOOKUP_BITS = 10, TINFL_FAST_LOOKUP_SIZE = 1 << TINFL_FAST_LOOKUP_BITS +}; + +typedef struct +{ + mz_uint8 m_code_size[TINFL_MAX_HUFF_SYMBOLS_0]; + mz_int16 m_look_up[TINFL_FAST_LOOKUP_SIZE], m_tree[TINFL_MAX_HUFF_SYMBOLS_0 * 2]; +} tinfl_huff_table; + +#if MINIZ_HAS_64BIT_REGISTERS + #define TINFL_USE_64BIT_BITBUF 1 +#endif + +#if TINFL_USE_64BIT_BITBUF + typedef mz_uint64 tinfl_bit_buf_t; + #define TINFL_BITBUF_SIZE (64) +#else + typedef mz_uint32 tinfl_bit_buf_t; + #define TINFL_BITBUF_SIZE (32) +#endif + +struct tinfl_decompressor_tag +{ + mz_uint32 m_state, m_num_bits, m_zhdr0, m_zhdr1, m_z_adler32, m_final, m_type, m_check_adler32, m_dist, m_counter, m_num_extra, m_table_sizes[TINFL_MAX_HUFF_TABLES]; + tinfl_bit_buf_t m_bit_buf; + size_t m_dist_from_out_buf_start; + tinfl_huff_table m_tables[TINFL_MAX_HUFF_TABLES]; + mz_uint8 m_raw_header[4], m_len_codes[TINFL_MAX_HUFF_SYMBOLS_0 + TINFL_MAX_HUFF_SYMBOLS_1 + 137]; +}; + +// ------------------- Low-level Compression API Definitions + +// Set TDEFL_LESS_MEMORY to 1 to use less memory (compression will be slightly slower, and raw/dynamic blocks will be output more frequently). +#define TDEFL_LESS_MEMORY 0 + +// tdefl_init() compression flags logically OR'd together (low 12 bits contain the max. number of probes per dictionary search): +// TDEFL_DEFAULT_MAX_PROBES: The compressor defaults to 128 dictionary probes per dictionary search. 0=Huffman only, 1=Huffman+LZ (fastest/crap compression), 4095=Huffman+LZ (slowest/best compression). +enum +{ + TDEFL_HUFFMAN_ONLY = 0, TDEFL_DEFAULT_MAX_PROBES = 128, TDEFL_MAX_PROBES_MASK = 0xFFF +}; + +// TDEFL_WRITE_ZLIB_HEADER: If set, the compressor outputs a zlib header before the deflate data, and the Adler-32 of the source data at the end. Otherwise, you'll get raw deflate data. +// TDEFL_COMPUTE_ADLER32: Always compute the adler-32 of the input data (even when not writing zlib headers). +// TDEFL_GREEDY_PARSING_FLAG: Set to use faster greedy parsing, instead of more efficient lazy parsing. +// TDEFL_NONDETERMINISTIC_PARSING_FLAG: Enable to decrease the compressor's initialization time to the minimum, but the output may vary from run to run given the same input (depending on the contents of memory). +// TDEFL_RLE_MATCHES: Only look for RLE matches (matches with a distance of 1) +// TDEFL_FILTER_MATCHES: Discards matches <= 5 chars if enabled. +// TDEFL_FORCE_ALL_STATIC_BLOCKS: Disable usage of optimized Huffman tables. +// TDEFL_FORCE_ALL_RAW_BLOCKS: Only use raw (uncompressed) deflate blocks. +// The low 12 bits are reserved to control the max # of hash probes per dictionary lookup (see TDEFL_MAX_PROBES_MASK). +enum +{ + TDEFL_WRITE_ZLIB_HEADER = 0x01000, + TDEFL_COMPUTE_ADLER32 = 0x02000, + TDEFL_GREEDY_PARSING_FLAG = 0x04000, + TDEFL_NONDETERMINISTIC_PARSING_FLAG = 0x08000, + TDEFL_RLE_MATCHES = 0x10000, + TDEFL_FILTER_MATCHES = 0x20000, + TDEFL_FORCE_ALL_STATIC_BLOCKS = 0x40000, + TDEFL_FORCE_ALL_RAW_BLOCKS = 0x80000 +}; + +// High level compression functions: +// tdefl_compress_mem_to_heap() compresses a block in memory to a heap block allocated via malloc(). +// On entry: +// pSrc_buf, src_buf_len: Pointer and size of source block to compress. +// flags: The max match finder probes (default is 128) logically OR'd against the above flags. Higher probes are slower but improve compression. +// On return: +// Function returns a pointer to the compressed data, or NULL on failure. +// *pOut_len will be set to the compressed data's size, which could be larger than src_buf_len on uncompressible data. +// The caller must free() the returned block when it's no longer needed. +void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags); + +// tdefl_compress_mem_to_mem() compresses a block in memory to another block in memory. +// Returns 0 on failure. +size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags); + +// Compresses an image to a compressed PNG file in memory. +// On entry: +// pImage, w, h, and num_chans describe the image to compress. num_chans may be 1, 2, 3, or 4. +// The image pitch in bytes per scanline will be w*num_chans. The leftmost pixel on the top scanline is stored first in memory. +// level may range from [0,10], use MZ_NO_COMPRESSION, MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc. or a decent default is MZ_DEFAULT_LEVEL +// If flip is true, the image will be flipped on the Y axis (useful for OpenGL apps). +// On return: +// Function returns a pointer to the compressed data, or NULL on failure. +// *pLen_out will be set to the size of the PNG image file. +// The caller must mz_free() the returned heap block (which will typically be larger than *pLen_out) when it's no longer needed. +void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, int h, int num_chans, size_t *pLen_out, mz_uint level, mz_bool flip); +void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, int num_chans, size_t *pLen_out); + +// Output stream interface. The compressor uses this interface to write compressed data. It'll typically be called TDEFL_OUT_BUF_SIZE at a time. +typedef mz_bool (*tdefl_put_buf_func_ptr)(const void* pBuf, int len, void *pUser); + +// tdefl_compress_mem_to_output() compresses a block to an output stream. The above helpers use this function internally. +mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags); + +enum { TDEFL_MAX_HUFF_TABLES = 3, TDEFL_MAX_HUFF_SYMBOLS_0 = 288, TDEFL_MAX_HUFF_SYMBOLS_1 = 32, TDEFL_MAX_HUFF_SYMBOLS_2 = 19, TDEFL_LZ_DICT_SIZE = 32768, TDEFL_LZ_DICT_SIZE_MASK = TDEFL_LZ_DICT_SIZE - 1, TDEFL_MIN_MATCH_LEN = 3, TDEFL_MAX_MATCH_LEN = 258 }; + +// TDEFL_OUT_BUF_SIZE MUST be large enough to hold a single entire compressed output block (using static/fixed Huffman codes). +#if TDEFL_LESS_MEMORY +enum { TDEFL_LZ_CODE_BUF_SIZE = 24 * 1024, TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13 ) / 10, TDEFL_MAX_HUFF_SYMBOLS = 288, TDEFL_LZ_HASH_BITS = 12, TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS }; +#else +enum { TDEFL_LZ_CODE_BUF_SIZE = 64 * 1024, TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13 ) / 10, TDEFL_MAX_HUFF_SYMBOLS = 288, TDEFL_LZ_HASH_BITS = 15, TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS }; +#endif + +// The low-level tdefl functions below may be used directly if the above helper functions aren't flexible enough. The low-level functions don't make any heap allocations, unlike the above helper functions. +typedef enum +{ + TDEFL_STATUS_BAD_PARAM = -2, + TDEFL_STATUS_PUT_BUF_FAILED = -1, + TDEFL_STATUS_OKAY = 0, + TDEFL_STATUS_DONE = 1, +} tdefl_status; + +// Must map to MZ_NO_FLUSH, MZ_SYNC_FLUSH, etc. enums +typedef enum +{ + TDEFL_NO_FLUSH = 0, + TDEFL_SYNC_FLUSH = 2, + TDEFL_FULL_FLUSH = 3, + TDEFL_FINISH = 4 +} tdefl_flush; + +// tdefl's compression state structure. +typedef struct +{ + tdefl_put_buf_func_ptr m_pPut_buf_func; + void *m_pPut_buf_user; + mz_uint m_flags, m_max_probes[2]; + int m_greedy_parsing; + mz_uint m_adler32, m_lookahead_pos, m_lookahead_size, m_dict_size; + mz_uint8 *m_pLZ_code_buf, *m_pLZ_flags, *m_pOutput_buf, *m_pOutput_buf_end; + mz_uint m_num_flags_left, m_total_lz_bytes, m_lz_code_buf_dict_pos, m_bits_in, m_bit_buffer; + mz_uint m_saved_match_dist, m_saved_match_len, m_saved_lit, m_output_flush_ofs, m_output_flush_remaining, m_finished, m_block_index, m_wants_to_finish; + tdefl_status m_prev_return_status; + const void *m_pIn_buf; + void *m_pOut_buf; + size_t *m_pIn_buf_size, *m_pOut_buf_size; + tdefl_flush m_flush; + const mz_uint8 *m_pSrc; + size_t m_src_buf_left, m_out_buf_ofs; + mz_uint8 m_dict[TDEFL_LZ_DICT_SIZE + TDEFL_MAX_MATCH_LEN - 1]; + mz_uint16 m_huff_count[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; + mz_uint16 m_huff_codes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; + mz_uint8 m_huff_code_sizes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; + mz_uint8 m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE]; + mz_uint16 m_next[TDEFL_LZ_DICT_SIZE]; + mz_uint16 m_hash[TDEFL_LZ_HASH_SIZE]; + mz_uint8 m_output_buf[TDEFL_OUT_BUF_SIZE]; +} tdefl_compressor; + +// Initializes the compressor. +// There is no corresponding deinit() function because the tdefl API's do not dynamically allocate memory. +// pBut_buf_func: If NULL, output data will be supplied to the specified callback. In this case, the user should call the tdefl_compress_buffer() API for compression. +// If pBut_buf_func is NULL the user should always call the tdefl_compress() API. +// flags: See the above enums (TDEFL_HUFFMAN_ONLY, TDEFL_WRITE_ZLIB_HEADER, etc.) +tdefl_status tdefl_init(tdefl_compressor *d, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags); + +// Compresses a block of data, consuming as much of the specified input buffer as possible, and writing as much compressed data to the specified output buffer as possible. +tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, size_t *pIn_buf_size, void *pOut_buf, size_t *pOut_buf_size, tdefl_flush flush); + +// tdefl_compress_buffer() is only usable when the tdefl_init() is called with a non-NULL tdefl_put_buf_func_ptr. +// tdefl_compress_buffer() always consumes the entire input buffer. +tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, size_t in_buf_size, tdefl_flush flush); + +tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d); +mz_uint32 tdefl_get_adler32(tdefl_compressor *d); + +// Can't use tdefl_create_comp_flags_from_zip_params if MINIZ_NO_ZLIB_APIS isn't defined, because it uses some of its macros. +#ifndef MINIZ_NO_ZLIB_APIS +// Create tdefl_compress() flags given zlib-style compression parameters. +// level may range from [0,10] (where 10 is absolute max compression, but may be much slower on some files) +// window_bits may be -15 (raw deflate) or 15 (zlib) +// strategy may be either MZ_DEFAULT_STRATEGY, MZ_FILTERED, MZ_HUFFMAN_ONLY, MZ_RLE, or MZ_FIXED +mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, int strategy); +#endif // #ifndef MINIZ_NO_ZLIB_APIS + +} // namespace buminiz + +#endif // MINIZ_HEADER_INCLUDED + +// ------------------- End of Header: Implementation follows. (If you only want the header, define MINIZ_HEADER_FILE_ONLY.) + +#ifndef MINIZ_HEADER_FILE_ONLY + +#include +#include + +namespace buminiz { + +typedef unsigned char mz_validate_uint16[sizeof(mz_uint16)==2 ? 1 : -1]; +typedef unsigned char mz_validate_uint32[sizeof(mz_uint32)==4 ? 1 : -1]; +typedef unsigned char mz_validate_uint64[sizeof(mz_uint64)==8 ? 1 : -1]; + +#define MZ_ASSERT(x) assert(x) + +#ifdef MINIZ_NO_MALLOC + #define MZ_MALLOC(x) NULL + #define MZ_FREE(x) (void)x, ((void)0) + #define MZ_REALLOC(p, x) NULL +#else + #define MZ_MALLOC(x) malloc(x) + #define MZ_FREE(x) free(x) + #define MZ_REALLOC(p, x) realloc(p, x) +#endif + +#define MZ_MAX(a,b) (((a)>(b))?(a):(b)) +#define MZ_MIN(a,b) (((a)<(b))?(a):(b)) +#define MZ_CLEAR_OBJ(obj) memset(&(obj), 0, sizeof(obj)) + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN + #define MZ_READ_LE16(p) *((const mz_uint16 *)(p)) + #define MZ_READ_LE32(p) *((const mz_uint32 *)(p)) +#else + #define MZ_READ_LE16(p) ((mz_uint32)(((const mz_uint8 *)(p))[0]) | ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U)) + #define MZ_READ_LE32(p) ((mz_uint32)(((const mz_uint8 *)(p))[0]) | ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U) | ((mz_uint32)(((const mz_uint8 *)(p))[2]) << 16U) | ((mz_uint32)(((const mz_uint8 *)(p))[3]) << 24U)) +#endif + +#ifdef _MSC_VER + #define MZ_FORCEINLINE __forceinline +#elif defined(__GNUC__) + #define MZ_FORCEINLINE inline __attribute__((__always_inline__)) +#else + #define MZ_FORCEINLINE inline +#endif + +// ------------------- zlib-style API's + +mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len) +{ + mz_uint32 i, s1 = (mz_uint32)(adler & 0xffff), s2 = (mz_uint32)(adler >> 16); size_t block_len = buf_len % 5552; + if (!ptr) return MZ_ADLER32_INIT; + while (buf_len) { + for (i = 0; i + 7 < block_len; i += 8, ptr += 8) { + s1 += ptr[0], s2 += s1; s1 += ptr[1], s2 += s1; s1 += ptr[2], s2 += s1; s1 += ptr[3], s2 += s1; + s1 += ptr[4], s2 += s1; s1 += ptr[5], s2 += s1; s1 += ptr[6], s2 += s1; s1 += ptr[7], s2 += s1; + } + for ( ; i < block_len; ++i) s1 += *ptr++, s2 += s1; + s1 %= 65521U, s2 %= 65521U; buf_len -= block_len; block_len = 5552; + } + return (s2 << 16) + s1; +} + +// Karl Malbrain's compact CRC-32. See "A compact CCITT crc16 and crc32 C implementation that balances processor cache usage against speed": http://www.geocities.com/malbrain/ +mz_ulong mz_crc32(mz_ulong crc, const mz_uint8 *ptr, size_t buf_len) +{ + static const mz_uint32 s_crc32[16] = { 0, 0x1db71064, 0x3b6e20c8, 0x26d930ac, 0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c, + 0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c, 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c }; + mz_uint32 crcu32 = (mz_uint32)crc; + if (!ptr) return MZ_CRC32_INIT; + crcu32 = ~crcu32; while (buf_len--) { mz_uint8 b = *ptr++; crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b & 0xF)]; crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b >> 4)]; } + return ~crcu32; +} + +void mz_free(void *p) +{ + MZ_FREE(p); +} + +#ifndef MINIZ_NO_ZLIB_APIS + +static void *def_alloc_func(void *opaque, size_t items, size_t size) { (void)opaque, (void)items, (void)size; return MZ_MALLOC(items * size); } +static void def_free_func(void *opaque, void *address) { (void)opaque, (void)address; MZ_FREE(address); } +//static void *def_realloc_func(void *opaque, void *address, size_t items, size_t size) { (void)opaque, (void)address, (void)items, (void)size; return MZ_REALLOC(address, items * size); } + +const char *mz_version(void) +{ + return MZ_VERSION; +} + +int mz_deflateInit(mz_streamp pStream, int level) +{ + return mz_deflateInit2(pStream, level, MZ_DEFLATED, MZ_DEFAULT_WINDOW_BITS, 9, MZ_DEFAULT_STRATEGY); +} + +int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy) +{ + tdefl_compressor *pComp; + mz_uint comp_flags = TDEFL_COMPUTE_ADLER32 | tdefl_create_comp_flags_from_zip_params(level, window_bits, strategy); + + if (!pStream) return MZ_STREAM_ERROR; + if ((method != MZ_DEFLATED) || ((mem_level < 1) || (mem_level > 9)) || ((window_bits != MZ_DEFAULT_WINDOW_BITS) && (-window_bits != MZ_DEFAULT_WINDOW_BITS))) return MZ_PARAM_ERROR; + + pStream->data_type = 0; + pStream->adler = MZ_ADLER32_INIT; + pStream->msg = NULL; + pStream->reserved = 0; + pStream->total_in = 0; + pStream->total_out = 0; + if (!pStream->zalloc) pStream->zalloc = def_alloc_func; + if (!pStream->zfree) pStream->zfree = def_free_func; + + pComp = (tdefl_compressor *)pStream->zalloc(pStream->opaque, 1, sizeof(tdefl_compressor)); + if (!pComp) + return MZ_MEM_ERROR; + + pStream->state = (struct mz_internal_state *)pComp; + + if (tdefl_init(pComp, NULL, NULL, comp_flags) != TDEFL_STATUS_OKAY) + { + mz_deflateEnd(pStream); + return MZ_PARAM_ERROR; + } + + return MZ_OK; +} + +int mz_deflateReset(mz_streamp pStream) +{ + if ((!pStream) || (!pStream->state) || (!pStream->zalloc) || (!pStream->zfree)) return MZ_STREAM_ERROR; + pStream->total_in = pStream->total_out = 0; + tdefl_init((tdefl_compressor*)pStream->state, NULL, NULL, ((tdefl_compressor*)pStream->state)->m_flags); + return MZ_OK; +} + +int mz_deflate(mz_streamp pStream, int flush) +{ + size_t in_bytes, out_bytes; + mz_ulong orig_total_in, orig_total_out; + int mz_status = MZ_OK; + + if ((!pStream) || (!pStream->state) || (flush < 0) || (flush > MZ_FINISH) || (!pStream->next_out)) return MZ_STREAM_ERROR; + if (!pStream->avail_out) return MZ_BUF_ERROR; + + if (flush == MZ_PARTIAL_FLUSH) flush = MZ_SYNC_FLUSH; + + if (((tdefl_compressor*)pStream->state)->m_prev_return_status == TDEFL_STATUS_DONE) + return (flush == MZ_FINISH) ? MZ_STREAM_END : MZ_BUF_ERROR; + + orig_total_in = pStream->total_in; orig_total_out = pStream->total_out; + for ( ; ; ) + { + tdefl_status defl_status; + in_bytes = pStream->avail_in; out_bytes = pStream->avail_out; + + defl_status = tdefl_compress((tdefl_compressor*)pStream->state, pStream->next_in, &in_bytes, pStream->next_out, &out_bytes, (tdefl_flush)flush); + pStream->next_in += (mz_uint)in_bytes; pStream->avail_in -= (mz_uint)in_bytes; + pStream->total_in += (mz_uint)in_bytes; pStream->adler = tdefl_get_adler32((tdefl_compressor*)pStream->state); + + pStream->next_out += (mz_uint)out_bytes; pStream->avail_out -= (mz_uint)out_bytes; + pStream->total_out += (mz_uint)out_bytes; + + if (defl_status < 0) + { + mz_status = MZ_STREAM_ERROR; + break; + } + else if (defl_status == TDEFL_STATUS_DONE) + { + mz_status = MZ_STREAM_END; + break; + } + else if (!pStream->avail_out) + break; + else if ((!pStream->avail_in) && (flush != MZ_FINISH)) + { + if ((flush) || (pStream->total_in != orig_total_in) || (pStream->total_out != orig_total_out)) + break; + return MZ_BUF_ERROR; // Can't make forward progress without some input. + } + } + return mz_status; +} + +int mz_deflateEnd(mz_streamp pStream) +{ + if (!pStream) return MZ_STREAM_ERROR; + if (pStream->state) + { + pStream->zfree(pStream->opaque, pStream->state); + pStream->state = NULL; + } + return MZ_OK; +} + +mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len) +{ + (void)pStream; + // This is really over conservative. (And lame, but it's actually pretty tricky to compute a true upper bound given the way tdefl's blocking works.) + mz_uint64 a = 128ULL + (source_len * 110ULL) / 100ULL; + mz_uint64 b = 128ULL + (mz_uint64)source_len + ((source_len / (31 * 1024)) + 1ULL) * 5ULL; + + mz_uint64 t = MZ_MAX(a, b); + if (((mz_ulong)t) != t) + t = (mz_ulong)(-1); + + return (mz_ulong)t; +} + +int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len, int level) +{ + int status; + mz_stream stream; + memset(&stream, 0, sizeof(stream)); + + // In case mz_ulong is 64-bits (argh I hate longs). + if ((source_len | *pDest_len) > 0xFFFFFFFFU) return MZ_PARAM_ERROR; + + stream.next_in = pSource; + stream.avail_in = (mz_uint32)source_len; + stream.next_out = pDest; + stream.avail_out = (mz_uint32)*pDest_len; + + status = mz_deflateInit(&stream, level); + if (status != MZ_OK) return status; + + status = mz_deflate(&stream, MZ_FINISH); + if (status != MZ_STREAM_END) + { + mz_deflateEnd(&stream); + return (status == MZ_OK) ? MZ_BUF_ERROR : status; + } + + *pDest_len = stream.total_out; + return mz_deflateEnd(&stream); +} + +int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len) +{ + return mz_compress2(pDest, pDest_len, pSource, source_len, MZ_DEFAULT_COMPRESSION); +} + +mz_ulong mz_compressBound(mz_ulong source_len) +{ + return mz_deflateBound(NULL, source_len); +} + +typedef struct +{ + tinfl_decompressor m_decomp; + mz_uint m_dict_ofs, m_dict_avail, m_first_call, m_has_flushed; int m_window_bits; + mz_uint8 m_dict[TINFL_LZ_DICT_SIZE]; + tinfl_status m_last_status; +} inflate_state; + +int mz_inflateInit2(mz_streamp pStream, int window_bits) +{ + inflate_state *pDecomp; + if (!pStream) return MZ_STREAM_ERROR; + if ((window_bits != MZ_DEFAULT_WINDOW_BITS) && (-window_bits != MZ_DEFAULT_WINDOW_BITS)) return MZ_PARAM_ERROR; + + pStream->data_type = 0; + pStream->adler = 0; + pStream->msg = NULL; + pStream->total_in = 0; + pStream->total_out = 0; + pStream->reserved = 0; + if (!pStream->zalloc) pStream->zalloc = def_alloc_func; + if (!pStream->zfree) pStream->zfree = def_free_func; + + pDecomp = (inflate_state*)pStream->zalloc(pStream->opaque, 1, sizeof(inflate_state)); + if (!pDecomp) return MZ_MEM_ERROR; + + pStream->state = (struct mz_internal_state *)pDecomp; + + tinfl_init(&pDecomp->m_decomp); + pDecomp->m_dict_ofs = 0; + pDecomp->m_dict_avail = 0; + pDecomp->m_last_status = TINFL_STATUS_NEEDS_MORE_INPUT; + pDecomp->m_first_call = 1; + pDecomp->m_has_flushed = 0; + pDecomp->m_window_bits = window_bits; + + return MZ_OK; +} + +int mz_inflateInit(mz_streamp pStream) +{ + return mz_inflateInit2(pStream, MZ_DEFAULT_WINDOW_BITS); +} + +int mz_inflate2(mz_streamp pStream, int flush, int adler32_checking) +{ + inflate_state* pState; + mz_uint n, first_call, decomp_flags = adler32_checking ? TINFL_FLAG_COMPUTE_ADLER32 : 0; + size_t in_bytes, out_bytes, orig_avail_in; + tinfl_status status; + + if ((!pStream) || (!pStream->state)) return MZ_STREAM_ERROR; + if (flush == MZ_PARTIAL_FLUSH) flush = MZ_SYNC_FLUSH; + if ((flush) && (flush != MZ_SYNC_FLUSH) && (flush != MZ_FINISH)) return MZ_STREAM_ERROR; + + pState = (inflate_state*)pStream->state; + if (pState->m_window_bits > 0) decomp_flags |= TINFL_FLAG_PARSE_ZLIB_HEADER; + orig_avail_in = pStream->avail_in; + + first_call = pState->m_first_call; pState->m_first_call = 0; + if (pState->m_last_status < 0) return MZ_DATA_ERROR; + + if (pState->m_has_flushed && (flush != MZ_FINISH)) return MZ_STREAM_ERROR; + pState->m_has_flushed |= (flush == MZ_FINISH); + + if ((flush == MZ_FINISH) && (first_call)) + { + // MZ_FINISH on the first call implies that the input and output buffers are large enough to hold the entire compressed/decompressed file. + decomp_flags |= TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF; + in_bytes = pStream->avail_in; out_bytes = pStream->avail_out; + status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes, pStream->next_out, pStream->next_out, &out_bytes, decomp_flags); + pState->m_last_status = status; + pStream->next_in += (mz_uint)in_bytes; pStream->avail_in -= (mz_uint)in_bytes; pStream->total_in += (mz_uint)in_bytes; + pStream->adler = tinfl_get_adler32(&pState->m_decomp); + pStream->next_out += (mz_uint)out_bytes; pStream->avail_out -= (mz_uint)out_bytes; pStream->total_out += (mz_uint)out_bytes; + + if (status < 0) + return MZ_DATA_ERROR; + else if (status != TINFL_STATUS_DONE) + { + pState->m_last_status = TINFL_STATUS_FAILED; + return MZ_BUF_ERROR; + } + return MZ_STREAM_END; + } + // flush != MZ_FINISH then we must assume there's more input. + if (flush != MZ_FINISH) decomp_flags |= TINFL_FLAG_HAS_MORE_INPUT; + + if (pState->m_dict_avail) + { + n = MZ_MIN(pState->m_dict_avail, pStream->avail_out); + memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n); + pStream->next_out += n; pStream->avail_out -= n; pStream->total_out += n; + pState->m_dict_avail -= n; pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1); + return ((pState->m_last_status == TINFL_STATUS_DONE) && (!pState->m_dict_avail)) ? MZ_STREAM_END : MZ_OK; + } + + for ( ; ; ) + { + in_bytes = pStream->avail_in; + out_bytes = TINFL_LZ_DICT_SIZE - pState->m_dict_ofs; + + status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes, pState->m_dict, pState->m_dict + pState->m_dict_ofs, &out_bytes, decomp_flags); + pState->m_last_status = status; + + pStream->next_in += (mz_uint)in_bytes; pStream->avail_in -= (mz_uint)in_bytes; + pStream->total_in += (mz_uint)in_bytes; pStream->adler = tinfl_get_adler32(&pState->m_decomp); + + pState->m_dict_avail = (mz_uint)out_bytes; + + n = MZ_MIN(pState->m_dict_avail, pStream->avail_out); + memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n); + pStream->next_out += n; pStream->avail_out -= n; pStream->total_out += n; + pState->m_dict_avail -= n; pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1); + + if (status < 0) + return MZ_DATA_ERROR; // Stream is corrupted (there could be some uncompressed data left in the output dictionary - oh well). + else if ((status == TINFL_STATUS_NEEDS_MORE_INPUT) && (!orig_avail_in)) + return MZ_BUF_ERROR; // Signal caller that we can't make forward progress without supplying more input or by setting flush to MZ_FINISH. + else if (flush == MZ_FINISH) + { + // The output buffer MUST be large to hold the remaining uncompressed data when flush==MZ_FINISH. + if (status == TINFL_STATUS_DONE) + return pState->m_dict_avail ? MZ_BUF_ERROR : MZ_STREAM_END; + // status here must be TINFL_STATUS_HAS_MORE_OUTPUT, which means there's at least 1 more byte on the way. If there's no more room left in the output buffer then something is wrong. + else if (!pStream->avail_out) + return MZ_BUF_ERROR; + } + else if ((status == TINFL_STATUS_DONE) || (!pStream->avail_in) || (!pStream->avail_out) || (pState->m_dict_avail)) + break; + } + + return ((status == TINFL_STATUS_DONE) && (!pState->m_dict_avail)) ? MZ_STREAM_END : MZ_OK; +} + +int mz_inflate(mz_streamp pStream, int flush) +{ + return mz_inflate2(pStream, flush, MZ_TRUE); +} + +int mz_inflateEnd(mz_streamp pStream) +{ + if (!pStream) + return MZ_STREAM_ERROR; + if (pStream->state) + { + pStream->zfree(pStream->opaque, pStream->state); + pStream->state = NULL; + } + return MZ_OK; +} + +int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len) +{ + mz_stream stream; + int status; + memset(&stream, 0, sizeof(stream)); + + // In case mz_ulong is 64-bits (argh I hate longs). + if ((source_len | *pDest_len) > 0xFFFFFFFFU) return MZ_PARAM_ERROR; + + stream.next_in = pSource; + stream.avail_in = (mz_uint32)source_len; + stream.next_out = pDest; + stream.avail_out = (mz_uint32)*pDest_len; + + status = mz_inflateInit(&stream); + if (status != MZ_OK) + return status; + + status = mz_inflate(&stream, MZ_FINISH); + if (status != MZ_STREAM_END) + { + mz_inflateEnd(&stream); + return ((status == MZ_BUF_ERROR) && (!stream.avail_in)) ? MZ_DATA_ERROR : status; + } + *pDest_len = stream.total_out; + + return mz_inflateEnd(&stream); +} + +const char *mz_error(int err) +{ + static struct { int m_err; const char *m_pDesc; } s_error_descs[] = + { + { MZ_OK, "" }, { MZ_STREAM_END, "stream end" }, { MZ_NEED_DICT, "need dictionary" }, { MZ_ERRNO, "file error" }, { MZ_STREAM_ERROR, "stream error" }, + { MZ_DATA_ERROR, "data error" }, { MZ_MEM_ERROR, "out of memory" }, { MZ_BUF_ERROR, "buf error" }, { MZ_VERSION_ERROR, "version error" }, { MZ_PARAM_ERROR, "parameter error" } + }; + mz_uint i; for (i = 0; i < sizeof(s_error_descs) / sizeof(s_error_descs[0]); ++i) if (s_error_descs[i].m_err == err) return s_error_descs[i].m_pDesc; + return NULL; +} + +#endif //MINIZ_NO_ZLIB_APIS + +// ------------------- Low-level Decompression (completely independent from all compression API's) + +#define TINFL_MEMCPY(d, s, l) memcpy(d, s, l) +#define TINFL_MEMSET(p, c, l) memset(p, c, l) + +#define TINFL_CR_BEGIN switch(r->m_state) { case 0: +#define TINFL_CR_RETURN(state_index, result) do { status = result; r->m_state = state_index; goto common_exit; case state_index:; } MZ_MACRO_END +#define TINFL_CR_RETURN_FOREVER(state_index, result) do { for ( ; ; ) { TINFL_CR_RETURN(state_index, result); } } MZ_MACRO_END +#define TINFL_CR_FINISH } + +// TODO: If the caller has indicated that there's no more input, and we attempt to read beyond the input buf, then something is wrong with the input because the inflator never +// reads ahead more than it needs to. Currently TINFL_GET_BYTE() pads the end of the stream with 0's in this scenario. +#define TINFL_GET_BYTE(state_index, c) do { \ + if (pIn_buf_cur >= pIn_buf_end) { \ + for ( ; ; ) { \ + if (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) { \ + TINFL_CR_RETURN(state_index, TINFL_STATUS_NEEDS_MORE_INPUT); \ + if (pIn_buf_cur < pIn_buf_end) { \ + c = *pIn_buf_cur++; \ + break; \ + } \ + } else { \ + c = 0; \ + break; \ + } \ + } \ + } else c = *pIn_buf_cur++; } MZ_MACRO_END + +#define TINFL_NEED_BITS(state_index, n) do { mz_uint c; TINFL_GET_BYTE(state_index, c); bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); num_bits += 8; } while (num_bits < (mz_uint)(n)) +#define TINFL_SKIP_BITS(state_index, n) do { if (num_bits < (mz_uint)(n)) { TINFL_NEED_BITS(state_index, n); } bit_buf >>= (n); num_bits -= (n); } MZ_MACRO_END +#define TINFL_GET_BITS(state_index, b, n) do { if (num_bits < (mz_uint)(n)) { TINFL_NEED_BITS(state_index, n); } b = bit_buf & ((1 << (n)) - 1); bit_buf >>= (n); num_bits -= (n); } MZ_MACRO_END + +// TINFL_HUFF_BITBUF_FILL() is only used rarely, when the number of bytes remaining in the input buffer falls below 2. +// It reads just enough bytes from the input stream that are needed to decode the next Huffman code (and absolutely no more). It works by trying to fully decode a +// Huffman code by using whatever bits are currently present in the bit buffer. If this fails, it reads another byte, and tries again until it succeeds or until the +// bit buffer contains >=15 bits (deflate's max. Huffman code size). +#define TINFL_HUFF_BITBUF_FILL(state_index, pHuff) \ + do { \ + temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]; \ + if (temp >= 0) { \ + code_len = temp >> 9; \ + if ((code_len) && (num_bits >= code_len)) \ + break; \ + } else if (num_bits > TINFL_FAST_LOOKUP_BITS) { \ + code_len = TINFL_FAST_LOOKUP_BITS; \ + do { \ + temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)]; \ + } while ((temp < 0) && (num_bits >= (code_len + 1))); if (temp >= 0) break; \ + } TINFL_GET_BYTE(state_index, c); bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); num_bits += 8; \ + } while (num_bits < 15); + +// TINFL_HUFF_DECODE() decodes the next Huffman coded symbol. It's more complex than you would initially expect because the zlib API expects the decompressor to never read +// beyond the final byte of the deflate stream. (In other words, when this macro wants to read another byte from the input, it REALLY needs another byte in order to fully +// decode the next Huffman code.) Handling this properly is particularly important on raw deflate (non-zlib) streams, which aren't followed by a byte aligned adler-32. +// The slow path is only executed at the very end of the input buffer. +#define TINFL_HUFF_DECODE(state_index, sym, pHuff) do { \ + int temp; mz_uint code_len, c; \ + if (num_bits < 15) { \ + if ((pIn_buf_end - pIn_buf_cur) < 2) { \ + TINFL_HUFF_BITBUF_FILL(state_index, pHuff); \ + } else { \ + bit_buf |= (((tinfl_bit_buf_t)pIn_buf_cur[0]) << num_bits) | (((tinfl_bit_buf_t)pIn_buf_cur[1]) << (num_bits + 8)); pIn_buf_cur += 2; num_bits += 16; \ + } \ + } \ + if ((temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0) \ + code_len = temp >> 9, temp &= 511; \ + else { \ + code_len = TINFL_FAST_LOOKUP_BITS; do { temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)]; } while (temp < 0); \ + } sym = temp; bit_buf >>= code_len; num_bits -= code_len; } MZ_MACRO_END + +tinfl_status tinfl_decompress(tinfl_decompressor *r, const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags) +{ + static const int s_length_base[31] = { 3,4,5,6,7,8,9,10,11,13, 15,17,19,23,27,31,35,43,51,59, 67,83,99,115,131,163,195,227,258,0,0 }; + static const int s_length_extra[31]= { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 }; + static const int s_dist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193, 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0}; + static const int s_dist_extra[32] = { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; + static const mz_uint8 s_length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 }; + static const int s_min_table_sizes[3] = { 257, 1, 4 }; + + tinfl_status status = TINFL_STATUS_FAILED; mz_uint32 num_bits, dist, counter, num_extra; tinfl_bit_buf_t bit_buf; + const mz_uint8 *pIn_buf_cur = pIn_buf_next, *const pIn_buf_end = pIn_buf_next + *pIn_buf_size; + mz_uint8 *pOut_buf_cur = pOut_buf_next, *const pOut_buf_end = pOut_buf_next + *pOut_buf_size; + size_t out_buf_size_mask = (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF) ? (size_t)-1 : ((pOut_buf_next - pOut_buf_start) + *pOut_buf_size) - 1, dist_from_out_buf_start; + + // Ensure the output buffer's size is a power of 2, unless the output buffer is large enough to hold the entire output file (in which case it doesn't matter). + if (((out_buf_size_mask + 1) & out_buf_size_mask) || (pOut_buf_next < pOut_buf_start)) { *pIn_buf_size = *pOut_buf_size = 0; return TINFL_STATUS_BAD_PARAM; } + + num_bits = r->m_num_bits; bit_buf = r->m_bit_buf; dist = r->m_dist; counter = r->m_counter; num_extra = r->m_num_extra; dist_from_out_buf_start = r->m_dist_from_out_buf_start; + TINFL_CR_BEGIN + + bit_buf = num_bits = dist = counter = num_extra = r->m_zhdr0 = r->m_zhdr1 = 0; r->m_z_adler32 = r->m_check_adler32 = 1; + if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) + { + TINFL_GET_BYTE(1, r->m_zhdr0); TINFL_GET_BYTE(2, r->m_zhdr1); + counter = (((r->m_zhdr0 * 256 + r->m_zhdr1) % 31 != 0) || (r->m_zhdr1 & 32) || ((r->m_zhdr0 & 15) != 8)); + if (!(decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)) counter |= (((1U << (8U + (r->m_zhdr0 >> 4))) > 32768U) || ((out_buf_size_mask + 1) < (size_t)(1ULL << (8U + (r->m_zhdr0 >> 4))))); + if (counter) { TINFL_CR_RETURN_FOREVER(36, TINFL_STATUS_FAILED); } + } + + do + { + TINFL_GET_BITS(3, r->m_final, 3); r->m_type = r->m_final >> 1; + if (r->m_type == 0) + { + TINFL_SKIP_BITS(5, num_bits & 7); + for (counter = 0; counter < 4; ++counter) { if (num_bits) TINFL_GET_BITS(6, r->m_raw_header[counter], 8); else TINFL_GET_BYTE(7, r->m_raw_header[counter]); } + if ((counter = (r->m_raw_header[0] | (r->m_raw_header[1] << 8))) != (mz_uint)(0xFFFF ^ (r->m_raw_header[2] | (r->m_raw_header[3] << 8)))) { TINFL_CR_RETURN_FOREVER(39, TINFL_STATUS_FAILED); } + while ((counter) && (num_bits)) + { + TINFL_GET_BITS(51, dist, 8); + while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(52, TINFL_STATUS_HAS_MORE_OUTPUT); } + *pOut_buf_cur++ = (mz_uint8)dist; + counter--; + } + while (counter) + { + size_t n; while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(9, TINFL_STATUS_HAS_MORE_OUTPUT); } + while (pIn_buf_cur >= pIn_buf_end) + { + if (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) + { + TINFL_CR_RETURN(38, TINFL_STATUS_NEEDS_MORE_INPUT); + } + else + { + TINFL_CR_RETURN_FOREVER(40, TINFL_STATUS_FAILED); + } + } + n = MZ_MIN(MZ_MIN((size_t)(pOut_buf_end - pOut_buf_cur), (size_t)(pIn_buf_end - pIn_buf_cur)), counter); + TINFL_MEMCPY(pOut_buf_cur, pIn_buf_cur, n); pIn_buf_cur += n; pOut_buf_cur += n; counter -= (mz_uint)n; + } + } + else if (r->m_type == 3) + { + TINFL_CR_RETURN_FOREVER(10, TINFL_STATUS_FAILED); + } + else + { + if (r->m_type == 1) + { + mz_uint8 *p = r->m_tables[0].m_code_size; mz_uint i; + r->m_table_sizes[0] = 288; r->m_table_sizes[1] = 32; TINFL_MEMSET(r->m_tables[1].m_code_size, 5, 32); + for ( i = 0; i <= 143; ++i) *p++ = 8; for ( ; i <= 255; ++i) *p++ = 9; for ( ; i <= 279; ++i) *p++ = 7; for ( ; i <= 287; ++i) *p++ = 8; + } + else + { + for (counter = 0; counter < 3; counter++) { TINFL_GET_BITS(11, r->m_table_sizes[counter], "\05\05\04"[counter]); r->m_table_sizes[counter] += s_min_table_sizes[counter]; } + MZ_CLEAR_OBJ(r->m_tables[2].m_code_size); for (counter = 0; counter < r->m_table_sizes[2]; counter++) { mz_uint s; TINFL_GET_BITS(14, s, 3); r->m_tables[2].m_code_size[s_length_dezigzag[counter]] = (mz_uint8)s; } + r->m_table_sizes[2] = 19; + } + for ( ; (int)r->m_type >= 0; r->m_type--) + { + int tree_next, tree_cur; tinfl_huff_table *pTable; + mz_uint i, j, used_syms, total, sym_index, next_code[17], total_syms[16]; pTable = &r->m_tables[r->m_type]; MZ_CLEAR_OBJ(total_syms); MZ_CLEAR_OBJ(pTable->m_look_up); MZ_CLEAR_OBJ(pTable->m_tree); + for (i = 0; i < r->m_table_sizes[r->m_type]; ++i) total_syms[pTable->m_code_size[i]]++; + used_syms = 0, total = 0; next_code[0] = next_code[1] = 0; + for (i = 1; i <= 15; ++i) { used_syms += total_syms[i]; next_code[i + 1] = (total = ((total + total_syms[i]) << 1)); } + if ((65536 != total) && (used_syms > 1)) + { + TINFL_CR_RETURN_FOREVER(35, TINFL_STATUS_FAILED); + } + for (tree_next = -1, sym_index = 0; sym_index < r->m_table_sizes[r->m_type]; ++sym_index) + { + mz_uint rev_code = 0, l, cur_code, code_size = pTable->m_code_size[sym_index]; if (!code_size) continue; + cur_code = next_code[code_size]++; for (l = code_size; l > 0; l--, cur_code >>= 1) rev_code = (rev_code << 1) | (cur_code & 1); + if (code_size <= TINFL_FAST_LOOKUP_BITS) { mz_int16 k = (mz_int16)((code_size << 9) | sym_index); while (rev_code < TINFL_FAST_LOOKUP_SIZE) { pTable->m_look_up[rev_code] = k; rev_code += (1 << code_size); } continue; } + if (0 == (tree_cur = pTable->m_look_up[rev_code & (TINFL_FAST_LOOKUP_SIZE - 1)])) { pTable->m_look_up[rev_code & (TINFL_FAST_LOOKUP_SIZE - 1)] = (mz_int16)tree_next; tree_cur = tree_next; tree_next -= 2; } + rev_code >>= (TINFL_FAST_LOOKUP_BITS - 1); + for (j = code_size; j > (TINFL_FAST_LOOKUP_BITS + 1); j--) + { + tree_cur -= ((rev_code >>= 1) & 1); + if (!pTable->m_tree[-tree_cur - 1]) { pTable->m_tree[-tree_cur - 1] = (mz_int16)tree_next; tree_cur = tree_next; tree_next -= 2; } else tree_cur = pTable->m_tree[-tree_cur - 1]; + } + tree_cur -= ((rev_code >>= 1) & 1); pTable->m_tree[-tree_cur - 1] = (mz_int16)sym_index; + } + if (r->m_type == 2) + { + for (counter = 0; counter < (r->m_table_sizes[0] + r->m_table_sizes[1]); ) + { + mz_uint s; TINFL_HUFF_DECODE(16, dist, &r->m_tables[2]); if (dist < 16) { r->m_len_codes[counter++] = (mz_uint8)dist; continue; } + if ((dist == 16) && (!counter)) + { + TINFL_CR_RETURN_FOREVER(17, TINFL_STATUS_FAILED); + } + num_extra = "\02\03\07"[dist - 16]; TINFL_GET_BITS(18, s, num_extra); s += "\03\03\013"[dist - 16]; + TINFL_MEMSET(r->m_len_codes + counter, (dist == 16) ? r->m_len_codes[counter - 1] : 0, s); counter += s; + } + if ((r->m_table_sizes[0] + r->m_table_sizes[1]) != counter) + { + TINFL_CR_RETURN_FOREVER(21, TINFL_STATUS_FAILED); + } + TINFL_MEMCPY(r->m_tables[0].m_code_size, r->m_len_codes, r->m_table_sizes[0]); TINFL_MEMCPY(r->m_tables[1].m_code_size, r->m_len_codes + r->m_table_sizes[0], r->m_table_sizes[1]); + } + } + for ( ; ; ) + { + mz_uint8 *pSrc; + for ( ; ; ) + { + if (((pIn_buf_end - pIn_buf_cur) < 4) || ((pOut_buf_end - pOut_buf_cur) < 2)) + { + TINFL_HUFF_DECODE(23, counter, &r->m_tables[0]); + if (counter >= 256) + break; + while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(24, TINFL_STATUS_HAS_MORE_OUTPUT); } + *pOut_buf_cur++ = (mz_uint8)counter; + } + else + { + int sym2; mz_uint code_len; +#if TINFL_USE_64BIT_BITBUF + if (num_bits < 30) { bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE32(pIn_buf_cur)) << num_bits); pIn_buf_cur += 4; num_bits += 32; } +#else + if (num_bits < 15) { bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits); pIn_buf_cur += 2; num_bits += 16; } +#endif + if ((sym2 = r->m_tables[0].m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0) + code_len = sym2 >> 9; + else + { + code_len = TINFL_FAST_LOOKUP_BITS; do { sym2 = r->m_tables[0].m_tree[~sym2 + ((bit_buf >> code_len++) & 1)]; } while (sym2 < 0); + } + counter = sym2; bit_buf >>= code_len; num_bits -= code_len; + if (counter & 256) + break; + +#if !TINFL_USE_64BIT_BITBUF + if (num_bits < 15) { bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits); pIn_buf_cur += 2; num_bits += 16; } +#endif + if ((sym2 = r->m_tables[0].m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0) + code_len = sym2 >> 9; + else + { + code_len = TINFL_FAST_LOOKUP_BITS; do { sym2 = r->m_tables[0].m_tree[~sym2 + ((bit_buf >> code_len++) & 1)]; } while (sym2 < 0); + } + bit_buf >>= code_len; num_bits -= code_len; + + pOut_buf_cur[0] = (mz_uint8)counter; + if (sym2 & 256) + { + pOut_buf_cur++; + counter = sym2; + break; + } + pOut_buf_cur[1] = (mz_uint8)sym2; + pOut_buf_cur += 2; + } + } + if ((counter &= 511) == 256) break; + + num_extra = s_length_extra[counter - 257]; counter = s_length_base[counter - 257]; + if (num_extra) { mz_uint extra_bits; TINFL_GET_BITS(25, extra_bits, num_extra); counter += extra_bits; } + + TINFL_HUFF_DECODE(26, dist, &r->m_tables[1]); + num_extra = s_dist_extra[dist]; dist = s_dist_base[dist]; + if (num_extra) { mz_uint extra_bits; TINFL_GET_BITS(27, extra_bits, num_extra); dist += extra_bits; } + + dist_from_out_buf_start = pOut_buf_cur - pOut_buf_start; + if ((dist > dist_from_out_buf_start) && (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)) + { + TINFL_CR_RETURN_FOREVER(37, TINFL_STATUS_FAILED); + } + + pSrc = pOut_buf_start + ((dist_from_out_buf_start - dist) & out_buf_size_mask); + + if ((MZ_MAX(pOut_buf_cur, pSrc) + counter) > pOut_buf_end) + { + while (counter--) + { + while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(53, TINFL_STATUS_HAS_MORE_OUTPUT); } + *pOut_buf_cur++ = pOut_buf_start[(dist_from_out_buf_start++ - dist) & out_buf_size_mask]; + } + continue; + } +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES + else if ((counter >= 9) && (counter <= dist)) + { + const mz_uint8 *pSrc_end = pSrc + (counter & ~7); + do + { + ((mz_uint32 *)pOut_buf_cur)[0] = ((const mz_uint32 *)pSrc)[0]; + ((mz_uint32 *)pOut_buf_cur)[1] = ((const mz_uint32 *)pSrc)[1]; + pOut_buf_cur += 8; + } while ((pSrc += 8) < pSrc_end); + if ((counter &= 7) < 3) + { + if (counter) + { + pOut_buf_cur[0] = pSrc[0]; + if (counter > 1) + pOut_buf_cur[1] = pSrc[1]; + pOut_buf_cur += counter; + } + continue; + } + } +#endif + do + { + pOut_buf_cur[0] = pSrc[0]; + pOut_buf_cur[1] = pSrc[1]; + pOut_buf_cur[2] = pSrc[2]; + pOut_buf_cur += 3; pSrc += 3; + } while ((int)(counter -= 3) > 2); + if ((int)counter > 0) + { + pOut_buf_cur[0] = pSrc[0]; + if ((int)counter > 1) + pOut_buf_cur[1] = pSrc[1]; + pOut_buf_cur += counter; + } + } + } + } while (!(r->m_final & 1)); + if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) + { + TINFL_SKIP_BITS(32, num_bits & 7); for (counter = 0; counter < 4; ++counter) { mz_uint s; if (num_bits) TINFL_GET_BITS(41, s, 8); else TINFL_GET_BYTE(42, s); r->m_z_adler32 = (r->m_z_adler32 << 8) | s; } + } + TINFL_CR_RETURN_FOREVER(34, TINFL_STATUS_DONE); + TINFL_CR_FINISH + +common_exit: + r->m_num_bits = num_bits; r->m_bit_buf = bit_buf; r->m_dist = dist; r->m_counter = counter; r->m_num_extra = num_extra; r->m_dist_from_out_buf_start = dist_from_out_buf_start; + *pIn_buf_size = pIn_buf_cur - pIn_buf_next; *pOut_buf_size = pOut_buf_cur - pOut_buf_next; + //if ((decomp_flags & (TINFL_FLAG_PARSE_ZLIB_HEADER | TINFL_FLAG_COMPUTE_ADLER32)) && (status >= 0)) + if ((decomp_flags & TINFL_FLAG_COMPUTE_ADLER32) && (status >= 0)) + { + const mz_uint8 *ptr = pOut_buf_next; size_t buf_len = *pOut_buf_size; + mz_uint32 i, s1 = r->m_check_adler32 & 0xffff, s2 = r->m_check_adler32 >> 16; size_t block_len = buf_len % 5552; + while (buf_len) + { + for (i = 0; i + 7 < block_len; i += 8, ptr += 8) + { + s1 += ptr[0], s2 += s1; s1 += ptr[1], s2 += s1; s1 += ptr[2], s2 += s1; s1 += ptr[3], s2 += s1; + s1 += ptr[4], s2 += s1; s1 += ptr[5], s2 += s1; s1 += ptr[6], s2 += s1; s1 += ptr[7], s2 += s1; + } + for ( ; i < block_len; ++i) s1 += *ptr++, s2 += s1; + s1 %= 65521U, s2 %= 65521U; buf_len -= block_len; block_len = 5552; + } + r->m_check_adler32 = (s2 << 16) + s1; + if ((status == TINFL_STATUS_DONE) && (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) && (r->m_check_adler32 != r->m_z_adler32)) + status = TINFL_STATUS_ADLER32_MISMATCH; + } + return status; +} + +// Higher level helper functions. +void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags) +{ + tinfl_decompressor decomp; void *pBuf = NULL, *pNew_buf; size_t src_buf_ofs = 0, out_buf_capacity = 0; + *pOut_len = 0; + tinfl_init(&decomp); + for ( ; ; ) + { + size_t src_buf_size = src_buf_len - src_buf_ofs, dst_buf_size = out_buf_capacity - *pOut_len, new_out_buf_capacity; + tinfl_status status = tinfl_decompress(&decomp, (const mz_uint8*)pSrc_buf + src_buf_ofs, &src_buf_size, (mz_uint8*)pBuf, pBuf ? (mz_uint8*)pBuf + *pOut_len : NULL, &dst_buf_size, + (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF); + if ((status < 0) || (status == TINFL_STATUS_NEEDS_MORE_INPUT)) + { + MZ_FREE(pBuf); *pOut_len = 0; return NULL; + } + src_buf_ofs += src_buf_size; + *pOut_len += dst_buf_size; + if (status == TINFL_STATUS_DONE) break; + new_out_buf_capacity = out_buf_capacity * 2; if (new_out_buf_capacity < 128) new_out_buf_capacity = 128; + pNew_buf = MZ_REALLOC(pBuf, new_out_buf_capacity); + if (!pNew_buf) + { + MZ_FREE(pBuf); *pOut_len = 0; return NULL; + } + pBuf = pNew_buf; out_buf_capacity = new_out_buf_capacity; + } + return pBuf; +} + +size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags) +{ + tinfl_decompressor decomp; tinfl_status status; tinfl_init(&decomp); + status = tinfl_decompress(&decomp, (const mz_uint8*)pSrc_buf, &src_buf_len, (mz_uint8*)pOut_buf, (mz_uint8*)pOut_buf, &out_buf_len, (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF); + return (status != TINFL_STATUS_DONE) ? TINFL_DECOMPRESS_MEM_TO_MEM_FAILED : out_buf_len; +} + +int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags) +{ + int result = 0; + tinfl_decompressor decomp; + mz_uint8 *pDict = (mz_uint8*)MZ_MALLOC(TINFL_LZ_DICT_SIZE); size_t in_buf_ofs = 0, dict_ofs = 0; + if (!pDict) + return TINFL_STATUS_FAILED; + tinfl_init(&decomp); + for ( ; ; ) + { + size_t in_buf_size = *pIn_buf_size - in_buf_ofs, dst_buf_size = TINFL_LZ_DICT_SIZE - dict_ofs; + tinfl_status status = tinfl_decompress(&decomp, (const mz_uint8*)pIn_buf + in_buf_ofs, &in_buf_size, pDict, pDict + dict_ofs, &dst_buf_size, + (flags & ~(TINFL_FLAG_HAS_MORE_INPUT | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF))); + in_buf_ofs += in_buf_size; + if ((dst_buf_size) && (!(*pPut_buf_func)(pDict + dict_ofs, (int)dst_buf_size, pPut_buf_user))) + break; + if (status != TINFL_STATUS_HAS_MORE_OUTPUT) + { + result = (status == TINFL_STATUS_DONE); + break; + } + dict_ofs = (dict_ofs + dst_buf_size) & (TINFL_LZ_DICT_SIZE - 1); + } + MZ_FREE(pDict); + *pIn_buf_size = in_buf_ofs; + return result; +} + +// ------------------- Low-level Compression (independent from all decompression API's) + +// Purposely making these tables static for faster init and thread safety. +static const mz_uint16 s_tdefl_len_sym[256] = { + 257,258,259,260,261,262,263,264,265,265,266,266,267,267,268,268,269,269,269,269,270,270,270,270,271,271,271,271,272,272,272,272, + 273,273,273,273,273,273,273,273,274,274,274,274,274,274,274,274,275,275,275,275,275,275,275,275,276,276,276,276,276,276,276,276, + 277,277,277,277,277,277,277,277,277,277,277,277,277,277,277,277,278,278,278,278,278,278,278,278,278,278,278,278,278,278,278,278, + 279,279,279,279,279,279,279,279,279,279,279,279,279,279,279,279,280,280,280,280,280,280,280,280,280,280,280,280,280,280,280,280, + 281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281, + 282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282, + 283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283, + 284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,285 }; + +static const mz_uint8 s_tdefl_len_extra[256] = { + 0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0 }; + +static const mz_uint8 s_tdefl_small_dist_sym[512] = { + 0,1,2,3,4,4,5,5,6,6,6,6,7,7,7,7,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11,11,11,11, + 11,11,11,11,11,11,11,11,11,11,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13, + 13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,14,14,14,14, + 14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14, + 14,14,14,14,14,14,14,14,14,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15, + 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,16,16,16,16,16,16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,17,17,17,17,17,17,17,17,17,17,17,17,17,17, + 17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17, + 17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17, + 17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17 }; + +static const mz_uint8 s_tdefl_small_dist_extra[512] = { + 0,0,0,0,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7 }; + +static const mz_uint8 s_tdefl_large_dist_sym[128] = { + 0,0,18,19,20,20,21,21,22,22,22,22,23,23,23,23,24,24,24,24,24,24,24,24,25,25,25,25,25,25,25,25,26,26,26,26,26,26,26,26,26,26,26,26, + 26,26,26,26,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28, + 28,28,28,28,28,28,28,28,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29 }; + +static const mz_uint8 s_tdefl_large_dist_extra[128] = { + 0,0,8,8,9,9,9,9,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12, + 12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13, + 13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13 }; + +// Radix sorts tdefl_sym_freq[] array by 16-bit key m_key. Returns ptr to sorted values. +typedef struct { mz_uint16 m_key, m_sym_index; } tdefl_sym_freq; +static tdefl_sym_freq* tdefl_radix_sort_syms(mz_uint num_syms, tdefl_sym_freq* pSyms0, tdefl_sym_freq* pSyms1) +{ + mz_uint32 total_passes = 2, pass_shift, pass, i, hist[256 * 2]; tdefl_sym_freq* pCur_syms = pSyms0, *pNew_syms = pSyms1; MZ_CLEAR_OBJ(hist); + for (i = 0; i < num_syms; i++) { mz_uint freq = pSyms0[i].m_key; hist[freq & 0xFF]++; hist[256 + ((freq >> 8) & 0xFF)]++; } + while ((total_passes > 1) && (num_syms == hist[(total_passes - 1) * 256])) total_passes--; + for (pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8) + { + const mz_uint32* pHist = &hist[pass << 8]; + mz_uint offsets[256], cur_ofs = 0; + for (i = 0; i < 256; i++) { offsets[i] = cur_ofs; cur_ofs += pHist[i]; } + for (i = 0; i < num_syms; i++) pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & 0xFF]++] = pCur_syms[i]; + { tdefl_sym_freq* t = pCur_syms; pCur_syms = pNew_syms; pNew_syms = t; } + } + return pCur_syms; +} + +// tdefl_calculate_minimum_redundancy() originally written by: Alistair Moffat, alistair@cs.mu.oz.au, Jyrki Katajainen, jyrki@diku.dk, November 1996. +static void tdefl_calculate_minimum_redundancy(tdefl_sym_freq *A, int n) +{ + int root, leaf, next, avbl, used, dpth; + if (n==0) return; else if (n==1) { A[0].m_key = 1; return; } + A[0].m_key += A[1].m_key; root = 0; leaf = 2; + for (next=1; next < n-1; next++) + { + if (leaf>=n || A[root].m_key=n || (root=0; next--) A[next].m_key = A[A[next].m_key].m_key+1; + avbl = 1; used = dpth = 0; root = n-2; next = n-1; + while (avbl>0) + { + while (root>=0 && (int)A[root].m_key==dpth) { used++; root--; } + while (avbl>used) { A[next--].m_key = (mz_uint16)(dpth); avbl--; } + avbl = 2*used; dpth++; used = 0; + } +} + +// Limits canonical Huffman code table's max code size. +enum { TDEFL_MAX_SUPPORTED_HUFF_CODESIZE = 32 }; +static void tdefl_huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size) +{ + int i; mz_uint32 total = 0; if (code_list_len <= 1) return; + for (i = max_code_size + 1; i <= TDEFL_MAX_SUPPORTED_HUFF_CODESIZE; i++) pNum_codes[max_code_size] += pNum_codes[i]; + for (i = max_code_size; i > 0; i--) total += (((mz_uint32)pNum_codes[i]) << (max_code_size - i)); + while (total != (1UL << max_code_size)) + { + pNum_codes[max_code_size]--; + for (i = max_code_size - 1; i > 0; i--) if (pNum_codes[i]) { pNum_codes[i]--; pNum_codes[i + 1] += 2; break; } + total--; + } +} + +static void tdefl_optimize_huffman_table(tdefl_compressor *d, int table_num, int table_len, int code_size_limit, int static_table) +{ + int i, j, l, num_codes[1 + TDEFL_MAX_SUPPORTED_HUFF_CODESIZE]; mz_uint next_code[TDEFL_MAX_SUPPORTED_HUFF_CODESIZE + 1]; MZ_CLEAR_OBJ(num_codes); + if (static_table) + { + for (i = 0; i < table_len; i++) num_codes[d->m_huff_code_sizes[table_num][i]]++; + } + else + { + tdefl_sym_freq syms0[TDEFL_MAX_HUFF_SYMBOLS], syms1[TDEFL_MAX_HUFF_SYMBOLS], *pSyms; + int num_used_syms = 0; + const mz_uint16 *pSym_count = &d->m_huff_count[table_num][0]; + for (i = 0; i < table_len; i++) if (pSym_count[i]) { syms0[num_used_syms].m_key = (mz_uint16)pSym_count[i]; syms0[num_used_syms++].m_sym_index = (mz_uint16)i; } + + pSyms = tdefl_radix_sort_syms(num_used_syms, syms0, syms1); tdefl_calculate_minimum_redundancy(pSyms, num_used_syms); + + for (i = 0; i < num_used_syms; i++) num_codes[pSyms[i].m_key]++; + + tdefl_huffman_enforce_max_code_size(num_codes, num_used_syms, code_size_limit); + + MZ_CLEAR_OBJ(d->m_huff_code_sizes[table_num]); MZ_CLEAR_OBJ(d->m_huff_codes[table_num]); + for (i = 1, j = num_used_syms; i <= code_size_limit; i++) + for (l = num_codes[i]; l > 0; l--) d->m_huff_code_sizes[table_num][pSyms[--j].m_sym_index] = (mz_uint8)(i); + } + + next_code[1] = 0; for (j = 0, i = 2; i <= code_size_limit; i++) next_code[i] = j = ((j + num_codes[i - 1]) << 1); + + for (i = 0; i < table_len; i++) + { + mz_uint rev_code = 0, code, code_size; if ((code_size = d->m_huff_code_sizes[table_num][i]) == 0) continue; + code = next_code[code_size]++; for (l = code_size; l > 0; l--, code >>= 1) rev_code = (rev_code << 1) | (code & 1); + d->m_huff_codes[table_num][i] = (mz_uint16)rev_code; + } +} + +#define TDEFL_PUT_BITS(b, l) do { \ + mz_uint bits = b; mz_uint len = l; MZ_ASSERT(bits <= ((1U << len) - 1U)); \ + d->m_bit_buffer |= (bits << d->m_bits_in); d->m_bits_in += len; \ + while (d->m_bits_in >= 8) { \ + if (d->m_pOutput_buf < d->m_pOutput_buf_end) \ + *d->m_pOutput_buf++ = (mz_uint8)(d->m_bit_buffer); \ + d->m_bit_buffer >>= 8; \ + d->m_bits_in -= 8; \ + } \ +} MZ_MACRO_END + +#define TDEFL_RLE_PREV_CODE_SIZE() { if (rle_repeat_count) { \ + if (rle_repeat_count < 3) { \ + d->m_huff_count[2][prev_code_size] = (mz_uint16)(d->m_huff_count[2][prev_code_size] + rle_repeat_count); \ + while (rle_repeat_count--) packed_code_sizes[num_packed_code_sizes++] = prev_code_size; \ + } else { \ + d->m_huff_count[2][16] = (mz_uint16)(d->m_huff_count[2][16] + 1); packed_code_sizes[num_packed_code_sizes++] = 16; packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_repeat_count - 3); \ +} rle_repeat_count = 0; } } + +#define TDEFL_RLE_ZERO_CODE_SIZE() { if (rle_z_count) { \ + if (rle_z_count < 3) { \ + d->m_huff_count[2][0] = (mz_uint16)(d->m_huff_count[2][0] + rle_z_count); while (rle_z_count--) packed_code_sizes[num_packed_code_sizes++] = 0; \ + } else if (rle_z_count <= 10) { \ + d->m_huff_count[2][17] = (mz_uint16)(d->m_huff_count[2][17] + 1); packed_code_sizes[num_packed_code_sizes++] = 17; packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_z_count - 3); \ + } else { \ + d->m_huff_count[2][18] = (mz_uint16)(d->m_huff_count[2][18] + 1); packed_code_sizes[num_packed_code_sizes++] = 18; packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_z_count - 11); \ +} rle_z_count = 0; } } + +static mz_uint8 s_tdefl_packed_code_size_syms_swizzle[] = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 }; + +static void tdefl_start_dynamic_block(tdefl_compressor *d) +{ + int num_lit_codes, num_dist_codes, num_bit_lengths; mz_uint i, total_code_sizes_to_pack, num_packed_code_sizes, rle_z_count, rle_repeat_count, packed_code_sizes_index; + mz_uint8 code_sizes_to_pack[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], packed_code_sizes[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], prev_code_size = 0xFF; + + d->m_huff_count[0][256] = 1; + + tdefl_optimize_huffman_table(d, 0, TDEFL_MAX_HUFF_SYMBOLS_0, 15, MZ_FALSE); + tdefl_optimize_huffman_table(d, 1, TDEFL_MAX_HUFF_SYMBOLS_1, 15, MZ_FALSE); + + for (num_lit_codes = 286; num_lit_codes > 257; num_lit_codes--) if (d->m_huff_code_sizes[0][num_lit_codes - 1]) break; + for (num_dist_codes = 30; num_dist_codes > 1; num_dist_codes--) if (d->m_huff_code_sizes[1][num_dist_codes - 1]) break; + + memcpy(code_sizes_to_pack, &d->m_huff_code_sizes[0][0], num_lit_codes); + memcpy(code_sizes_to_pack + num_lit_codes, &d->m_huff_code_sizes[1][0], num_dist_codes); + total_code_sizes_to_pack = num_lit_codes + num_dist_codes; num_packed_code_sizes = 0; rle_z_count = 0; rle_repeat_count = 0; + + memset(&d->m_huff_count[2][0], 0, sizeof(d->m_huff_count[2][0]) * TDEFL_MAX_HUFF_SYMBOLS_2); + for (i = 0; i < total_code_sizes_to_pack; i++) + { + mz_uint8 code_size = code_sizes_to_pack[i]; + if (!code_size) + { + TDEFL_RLE_PREV_CODE_SIZE(); + if (++rle_z_count == 138) { TDEFL_RLE_ZERO_CODE_SIZE(); } + } + else + { + TDEFL_RLE_ZERO_CODE_SIZE(); + if (code_size != prev_code_size) + { + TDEFL_RLE_PREV_CODE_SIZE(); + d->m_huff_count[2][code_size] = (mz_uint16)(d->m_huff_count[2][code_size] + 1); packed_code_sizes[num_packed_code_sizes++] = code_size; + } + else if (++rle_repeat_count == 6) + { + TDEFL_RLE_PREV_CODE_SIZE(); + } + } + prev_code_size = code_size; + } + if (rle_repeat_count) { TDEFL_RLE_PREV_CODE_SIZE(); } else { TDEFL_RLE_ZERO_CODE_SIZE(); } + + tdefl_optimize_huffman_table(d, 2, TDEFL_MAX_HUFF_SYMBOLS_2, 7, MZ_FALSE); + + TDEFL_PUT_BITS(2, 2); + + TDEFL_PUT_BITS(num_lit_codes - 257, 5); + TDEFL_PUT_BITS(num_dist_codes - 1, 5); + + for (num_bit_lengths = 18; num_bit_lengths >= 0; num_bit_lengths--) if (d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[num_bit_lengths]]) break; + num_bit_lengths = MZ_MAX(4, (num_bit_lengths + 1)); TDEFL_PUT_BITS(num_bit_lengths - 4, 4); + for (i = 0; (int)i < num_bit_lengths; i++) TDEFL_PUT_BITS(d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[i]], 3); + + for (packed_code_sizes_index = 0; packed_code_sizes_index < num_packed_code_sizes; ) + { + mz_uint code = packed_code_sizes[packed_code_sizes_index++]; MZ_ASSERT(code < TDEFL_MAX_HUFF_SYMBOLS_2); + TDEFL_PUT_BITS(d->m_huff_codes[2][code], d->m_huff_code_sizes[2][code]); + if (code >= 16) TDEFL_PUT_BITS(packed_code_sizes[packed_code_sizes_index++], "\02\03\07"[code - 16]); + } +} + +static void tdefl_start_static_block(tdefl_compressor *d) +{ + mz_uint i; + mz_uint8 *p = &d->m_huff_code_sizes[0][0]; + + for (i = 0; i <= 143; ++i) *p++ = 8; + for ( ; i <= 255; ++i) *p++ = 9; + for ( ; i <= 279; ++i) *p++ = 7; + for ( ; i <= 287; ++i) *p++ = 8; + + memset(d->m_huff_code_sizes[1], 5, 32); + + tdefl_optimize_huffman_table(d, 0, 288, 15, MZ_TRUE); + tdefl_optimize_huffman_table(d, 1, 32, 15, MZ_TRUE); + + TDEFL_PUT_BITS(1, 2); +} + +static const mz_uint mz_bitmasks[17] = { 0x0000, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F, 0x003F, 0x007F, 0x00FF, 0x01FF, 0x03FF, 0x07FF, 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF }; + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && MINIZ_HAS_64BIT_REGISTERS +static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d) +{ + mz_uint flags; + mz_uint8 *pLZ_codes; + mz_uint8 *pOutput_buf = d->m_pOutput_buf; + mz_uint8 *pLZ_code_buf_end = d->m_pLZ_code_buf; + mz_uint64 bit_buffer = d->m_bit_buffer; + mz_uint bits_in = d->m_bits_in; + +#define TDEFL_PUT_BITS_FAST(b, l) { bit_buffer |= (((mz_uint64)(b)) << bits_in); bits_in += (l); } + + flags = 1; + for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < pLZ_code_buf_end; flags >>= 1) + { + if (flags == 1) + flags = *pLZ_codes++ | 0x100; + + if (flags & 1) + { + mz_uint s0, s1, n0, n1, sym, num_extra_bits; + mz_uint match_len = pLZ_codes[0], match_dist = *(const mz_uint16 *)(pLZ_codes + 1); pLZ_codes += 3; + + MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS_FAST(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], s_tdefl_len_extra[match_len]); + + // This sequence coaxes MSVC into using cmov's vs. jmp's. + s0 = s_tdefl_small_dist_sym[match_dist & 511]; + n0 = s_tdefl_small_dist_extra[match_dist & 511]; + s1 = s_tdefl_large_dist_sym[match_dist >> 8]; + n1 = s_tdefl_large_dist_extra[match_dist >> 8]; + sym = (match_dist < 512) ? s0 : s1; + num_extra_bits = (match_dist < 512) ? n0 : n1; + + MZ_ASSERT(d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS_FAST(match_dist & mz_bitmasks[num_extra_bits], num_extra_bits); + } + else + { + mz_uint lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); + + if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end)) + { + flags >>= 1; + lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); + + if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end)) + { + flags >>= 1; + lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); + } + } + } + + if (pOutput_buf >= d->m_pOutput_buf_end) + return MZ_FALSE; + + *(mz_uint64*)pOutput_buf = bit_buffer; + pOutput_buf += (bits_in >> 3); + bit_buffer >>= (bits_in & ~7); + bits_in &= 7; + } + +#undef TDEFL_PUT_BITS_FAST + + d->m_pOutput_buf = pOutput_buf; + d->m_bits_in = 0; + d->m_bit_buffer = 0; + + while (bits_in) + { + mz_uint32 n = MZ_MIN(bits_in, 16); + TDEFL_PUT_BITS((mz_uint)bit_buffer & mz_bitmasks[n], n); + bit_buffer >>= n; + bits_in -= n; + } + + TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]); + + return (d->m_pOutput_buf < d->m_pOutput_buf_end); +} +#else +static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d) +{ + mz_uint flags; + mz_uint8 *pLZ_codes; + + flags = 1; + for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < d->m_pLZ_code_buf; flags >>= 1) + { + if (flags == 1) + flags = *pLZ_codes++ | 0x100; + if (flags & 1) + { + mz_uint sym, num_extra_bits; + mz_uint match_len = pLZ_codes[0], match_dist = (pLZ_codes[1] | (pLZ_codes[2] << 8)); pLZ_codes += 3; + + MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], s_tdefl_len_extra[match_len]); + + if (match_dist < 512) + { + sym = s_tdefl_small_dist_sym[match_dist]; num_extra_bits = s_tdefl_small_dist_extra[match_dist]; + } + else + { + sym = s_tdefl_large_dist_sym[match_dist >> 8]; num_extra_bits = s_tdefl_large_dist_extra[match_dist >> 8]; + } + MZ_ASSERT(d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS(match_dist & mz_bitmasks[num_extra_bits], num_extra_bits); + } + else + { + mz_uint lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); + } + } + + TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]); + + return (d->m_pOutput_buf < d->m_pOutput_buf_end); +} +#endif // MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && MINIZ_HAS_64BIT_REGISTERS + +static mz_bool tdefl_compress_block(tdefl_compressor *d, mz_bool static_block) +{ + if (static_block) + tdefl_start_static_block(d); + else + tdefl_start_dynamic_block(d); + return tdefl_compress_lz_codes(d); +} + +static int tdefl_flush_block(tdefl_compressor *d, int flush) +{ + mz_uint saved_bit_buf, saved_bits_in; + mz_uint8 *pSaved_output_buf; + mz_bool comp_block_succeeded = MZ_FALSE; + int n, use_raw_block = ((d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS) != 0) && (d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size; + mz_uint8 *pOutput_buf_start = ((d->m_pPut_buf_func == NULL) && ((*d->m_pOut_buf_size - d->m_out_buf_ofs) >= TDEFL_OUT_BUF_SIZE)) ? ((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs) : d->m_output_buf; + + d->m_pOutput_buf = pOutput_buf_start; + d->m_pOutput_buf_end = d->m_pOutput_buf + TDEFL_OUT_BUF_SIZE - 16; + + MZ_ASSERT(!d->m_output_flush_remaining); + d->m_output_flush_ofs = 0; + d->m_output_flush_remaining = 0; + + *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> d->m_num_flags_left); + d->m_pLZ_code_buf -= (d->m_num_flags_left == 8); + + if ((d->m_flags & TDEFL_WRITE_ZLIB_HEADER) && (!d->m_block_index)) + { + TDEFL_PUT_BITS(0x78, 8); TDEFL_PUT_BITS(0x01, 8); + } + + TDEFL_PUT_BITS(flush == TDEFL_FINISH, 1); + + pSaved_output_buf = d->m_pOutput_buf; saved_bit_buf = d->m_bit_buffer; saved_bits_in = d->m_bits_in; + + if (!use_raw_block) + comp_block_succeeded = tdefl_compress_block(d, (d->m_flags & TDEFL_FORCE_ALL_STATIC_BLOCKS) || (d->m_total_lz_bytes < 48)); + + // If the block gets expanded, forget the current contents of the output buffer and send a raw block instead. + if ( ((use_raw_block) || ((d->m_total_lz_bytes) && ((d->m_pOutput_buf - pSaved_output_buf + 1U) >= d->m_total_lz_bytes))) && + ((d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size) ) + { + mz_uint i; d->m_pOutput_buf = pSaved_output_buf; d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in; + TDEFL_PUT_BITS(0, 2); + if (d->m_bits_in) { TDEFL_PUT_BITS(0, 8 - d->m_bits_in); } + for (i = 2; i; --i, d->m_total_lz_bytes ^= 0xFFFF) + { + TDEFL_PUT_BITS(d->m_total_lz_bytes & 0xFFFF, 16); + } + for (i = 0; i < d->m_total_lz_bytes; ++i) + { + TDEFL_PUT_BITS(d->m_dict[(d->m_lz_code_buf_dict_pos + i) & TDEFL_LZ_DICT_SIZE_MASK], 8); + } + } + // Check for the extremely unlikely (if not impossible) case of the compressed block not fitting into the output buffer when using dynamic codes. + else if (!comp_block_succeeded) + { + d->m_pOutput_buf = pSaved_output_buf; d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in; + tdefl_compress_block(d, MZ_TRUE); + } + + if (flush) + { + if (flush == TDEFL_FINISH) + { + if (d->m_bits_in) { TDEFL_PUT_BITS(0, 8 - d->m_bits_in); } + if (d->m_flags & TDEFL_WRITE_ZLIB_HEADER) { mz_uint i, a = d->m_adler32; for (i = 0; i < 4; i++) { TDEFL_PUT_BITS((a >> 24) & 0xFF, 8); a <<= 8; } } + } + else + { + mz_uint i, z = 0; TDEFL_PUT_BITS(0, 3); if (d->m_bits_in) { TDEFL_PUT_BITS(0, 8 - d->m_bits_in); } for (i = 2; i; --i, z ^= 0xFFFF) { TDEFL_PUT_BITS(z & 0xFFFF, 16); } + } + } + + MZ_ASSERT(d->m_pOutput_buf < d->m_pOutput_buf_end); + + memset(&d->m_huff_count[0][0], 0, sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0); + memset(&d->m_huff_count[1][0], 0, sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1); + + d->m_pLZ_code_buf = d->m_lz_code_buf + 1; d->m_pLZ_flags = d->m_lz_code_buf; d->m_num_flags_left = 8; d->m_lz_code_buf_dict_pos += d->m_total_lz_bytes; d->m_total_lz_bytes = 0; d->m_block_index++; + + if ((n = (int)(d->m_pOutput_buf - pOutput_buf_start)) != 0) + { + if (d->m_pPut_buf_func) + { + *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf; + if (!(*d->m_pPut_buf_func)(d->m_output_buf, n, d->m_pPut_buf_user)) + return (d->m_prev_return_status = TDEFL_STATUS_PUT_BUF_FAILED); + } + else if (pOutput_buf_start == d->m_output_buf) + { + int bytes_to_copy = (int)MZ_MIN((size_t)n, (size_t)(*d->m_pOut_buf_size - d->m_out_buf_ofs)); + memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf, bytes_to_copy); + d->m_out_buf_ofs += bytes_to_copy; + if ((n -= bytes_to_copy) != 0) + { + d->m_output_flush_ofs = bytes_to_copy; + d->m_output_flush_remaining = n; + } + } + else + { + d->m_out_buf_ofs += n; + } + } + + return d->m_output_flush_remaining; +} + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES +#define TDEFL_READ_UNALIGNED_WORD(p) *(const mz_uint16*)(p) +static MZ_FORCEINLINE void tdefl_find_match(tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len) +{ + mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, match_len = *pMatch_len, probe_pos = pos, next_probe_pos, probe_len; + mz_uint num_probes_left = d->m_max_probes[match_len >= 32]; + const mz_uint16 *s = (const mz_uint16*)(d->m_dict + pos), *p, *q; + mz_uint16 c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]), s01 = TDEFL_READ_UNALIGNED_WORD(s); + MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN); if (max_match_len <= match_len) return; + for ( ; ; ) + { + for ( ; ; ) + { + if (--num_probes_left == 0) return; + #define TDEFL_PROBE \ + next_probe_pos = d->m_next[probe_pos]; \ + if ((!next_probe_pos) || ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) return; \ + probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \ + if (TDEFL_READ_UNALIGNED_WORD(&d->m_dict[probe_pos + match_len - 1]) == c01) break; + TDEFL_PROBE; TDEFL_PROBE; TDEFL_PROBE; + } + if (!dist) break; q = (const mz_uint16*)(d->m_dict + probe_pos); if (TDEFL_READ_UNALIGNED_WORD(q) != s01) continue; p = s; probe_len = 32; + do { } while ( (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && + (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (--probe_len > 0) ); + if (!probe_len) + { + *pMatch_dist = dist; *pMatch_len = MZ_MIN(max_match_len, (mz_uint)TDEFL_MAX_MATCH_LEN); break; + } + else if ((probe_len = ((mz_uint)(p - s) * 2) + (mz_uint)(*(const mz_uint8*)p == *(const mz_uint8*)q)) > match_len) + { + *pMatch_dist = dist; if ((*pMatch_len = match_len = MZ_MIN(max_match_len, probe_len)) == max_match_len) break; + c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]); + } + } +} +#else +static MZ_FORCEINLINE void tdefl_find_match(tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len) +{ + mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, match_len = *pMatch_len, probe_pos = pos, next_probe_pos, probe_len; + mz_uint num_probes_left = d->m_max_probes[match_len >= 32]; + const mz_uint8 *s = d->m_dict + pos, *p, *q; + mz_uint8 c0 = d->m_dict[pos + match_len], c1 = d->m_dict[pos + match_len - 1]; + MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN); if (max_match_len <= match_len) return; + for ( ; ; ) + { + for ( ; ; ) + { + if (--num_probes_left == 0) return; + #define TDEFL_PROBE \ + next_probe_pos = d->m_next[probe_pos]; \ + if ((!next_probe_pos) || ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) return; \ + probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \ + if ((d->m_dict[probe_pos + match_len] == c0) && (d->m_dict[probe_pos + match_len - 1] == c1)) break; + TDEFL_PROBE; TDEFL_PROBE; TDEFL_PROBE; + } + if (!dist) break; p = s; q = d->m_dict + probe_pos; for (probe_len = 0; probe_len < max_match_len; probe_len++) if (*p++ != *q++) break; + if (probe_len > match_len) + { + *pMatch_dist = dist; if ((*pMatch_len = match_len = probe_len) == max_match_len) return; + c0 = d->m_dict[pos + match_len]; c1 = d->m_dict[pos + match_len - 1]; + } + } +} +#endif // #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN +static mz_bool tdefl_compress_fast(tdefl_compressor *d) +{ + // Faster, minimally featured LZRW1-style match+parse loop with better register utilization. Intended for applications where raw throughput is valued more highly than ratio. + mz_uint lookahead_pos = d->m_lookahead_pos, lookahead_size = d->m_lookahead_size, dict_size = d->m_dict_size, total_lz_bytes = d->m_total_lz_bytes, num_flags_left = d->m_num_flags_left; + mz_uint8 *pLZ_code_buf = d->m_pLZ_code_buf, *pLZ_flags = d->m_pLZ_flags; + mz_uint cur_pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK; + + while ((d->m_src_buf_left) || ((d->m_flush) && (lookahead_size))) + { + const mz_uint TDEFL_COMP_FAST_LOOKAHEAD_SIZE = 4096; + mz_uint dst_pos = (lookahead_pos + lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK; + mz_uint num_bytes_to_process = (mz_uint)MZ_MIN(d->m_src_buf_left, TDEFL_COMP_FAST_LOOKAHEAD_SIZE - lookahead_size); + d->m_src_buf_left -= num_bytes_to_process; + lookahead_size += num_bytes_to_process; + + while (num_bytes_to_process) + { + mz_uint32 n = MZ_MIN(TDEFL_LZ_DICT_SIZE - dst_pos, num_bytes_to_process); + memcpy(d->m_dict + dst_pos, d->m_pSrc, n); + if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) + memcpy(d->m_dict + TDEFL_LZ_DICT_SIZE + dst_pos, d->m_pSrc, MZ_MIN(n, (TDEFL_MAX_MATCH_LEN - 1) - dst_pos)); + d->m_pSrc += n; + dst_pos = (dst_pos + n) & TDEFL_LZ_DICT_SIZE_MASK; + num_bytes_to_process -= n; + } + + dict_size = MZ_MIN(TDEFL_LZ_DICT_SIZE - lookahead_size, dict_size); + if ((!d->m_flush) && (lookahead_size < TDEFL_COMP_FAST_LOOKAHEAD_SIZE)) break; + + while (lookahead_size >= 4) + { + mz_uint cur_match_dist, cur_match_len = 1; + mz_uint8 *pCur_dict = d->m_dict + cur_pos; + mz_uint first_trigram = (*(const mz_uint32 *)pCur_dict) & 0xFFFFFF; + mz_uint hash = (first_trigram ^ (first_trigram >> (24 - (TDEFL_LZ_HASH_BITS - 8)))) & TDEFL_LEVEL1_HASH_SIZE_MASK; + mz_uint probe_pos = d->m_hash[hash]; + d->m_hash[hash] = (mz_uint16)lookahead_pos; + + if (((cur_match_dist = (mz_uint16)(lookahead_pos - probe_pos)) <= dict_size) && ((*(const mz_uint32 *)(d->m_dict + (probe_pos &= TDEFL_LZ_DICT_SIZE_MASK)) & 0xFFFFFF) == first_trigram)) + { + const mz_uint16 *p = (const mz_uint16 *)pCur_dict; + const mz_uint16 *q = (const mz_uint16 *)(d->m_dict + probe_pos); + mz_uint32 probe_len = 32; + do { } while ( (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && + (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (--probe_len > 0) ); + cur_match_len = ((mz_uint)(p - (const mz_uint16 *)pCur_dict) * 2) + (mz_uint)(*(const mz_uint8 *)p == *(const mz_uint8 *)q); + if (!probe_len) + cur_match_len = cur_match_dist ? TDEFL_MAX_MATCH_LEN : 0; + + if ((cur_match_len < TDEFL_MIN_MATCH_LEN) || ((cur_match_len == TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 8U*1024U))) + { + cur_match_len = 1; + *pLZ_code_buf++ = (mz_uint8)first_trigram; + *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); + d->m_huff_count[0][(mz_uint8)first_trigram]++; + } + else + { + mz_uint32 s0, s1; + cur_match_len = MZ_MIN(cur_match_len, lookahead_size); + + MZ_ASSERT((cur_match_len >= TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 1) && (cur_match_dist <= TDEFL_LZ_DICT_SIZE)); + + cur_match_dist--; + + pLZ_code_buf[0] = (mz_uint8)(cur_match_len - TDEFL_MIN_MATCH_LEN); + *(mz_uint16 *)(&pLZ_code_buf[1]) = (mz_uint16)cur_match_dist; + pLZ_code_buf += 3; + *pLZ_flags = (mz_uint8)((*pLZ_flags >> 1) | 0x80); + + s0 = s_tdefl_small_dist_sym[cur_match_dist & 511]; + s1 = s_tdefl_large_dist_sym[cur_match_dist >> 8]; + d->m_huff_count[1][(cur_match_dist < 512) ? s0 : s1]++; + + d->m_huff_count[0][s_tdefl_len_sym[cur_match_len - TDEFL_MIN_MATCH_LEN]]++; + } + } + else + { + *pLZ_code_buf++ = (mz_uint8)first_trigram; + *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); + d->m_huff_count[0][(mz_uint8)first_trigram]++; + } + + if (--num_flags_left == 0) { num_flags_left = 8; pLZ_flags = pLZ_code_buf++; } + + total_lz_bytes += cur_match_len; + lookahead_pos += cur_match_len; + dict_size = MZ_MIN(dict_size + cur_match_len, (mz_uint)TDEFL_LZ_DICT_SIZE); + cur_pos = (cur_pos + cur_match_len) & TDEFL_LZ_DICT_SIZE_MASK; + MZ_ASSERT(lookahead_size >= cur_match_len); + lookahead_size -= cur_match_len; + + if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) + { + int n; + d->m_lookahead_pos = lookahead_pos; d->m_lookahead_size = lookahead_size; d->m_dict_size = dict_size; + d->m_total_lz_bytes = total_lz_bytes; d->m_pLZ_code_buf = pLZ_code_buf; d->m_pLZ_flags = pLZ_flags; d->m_num_flags_left = num_flags_left; + if ((n = tdefl_flush_block(d, 0)) != 0) + return (n < 0) ? MZ_FALSE : MZ_TRUE; + total_lz_bytes = d->m_total_lz_bytes; pLZ_code_buf = d->m_pLZ_code_buf; pLZ_flags = d->m_pLZ_flags; num_flags_left = d->m_num_flags_left; + } + } + + while (lookahead_size) + { + mz_uint8 lit = d->m_dict[cur_pos]; + + total_lz_bytes++; + *pLZ_code_buf++ = lit; + *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); + if (--num_flags_left == 0) { num_flags_left = 8; pLZ_flags = pLZ_code_buf++; } + + d->m_huff_count[0][lit]++; + + lookahead_pos++; + dict_size = MZ_MIN(dict_size + 1, (mz_uint)TDEFL_LZ_DICT_SIZE); + cur_pos = (cur_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK; + lookahead_size--; + + if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) + { + int n; + d->m_lookahead_pos = lookahead_pos; d->m_lookahead_size = lookahead_size; d->m_dict_size = dict_size; + d->m_total_lz_bytes = total_lz_bytes; d->m_pLZ_code_buf = pLZ_code_buf; d->m_pLZ_flags = pLZ_flags; d->m_num_flags_left = num_flags_left; + if ((n = tdefl_flush_block(d, 0)) != 0) + return (n < 0) ? MZ_FALSE : MZ_TRUE; + total_lz_bytes = d->m_total_lz_bytes; pLZ_code_buf = d->m_pLZ_code_buf; pLZ_flags = d->m_pLZ_flags; num_flags_left = d->m_num_flags_left; + } + } + } + + d->m_lookahead_pos = lookahead_pos; d->m_lookahead_size = lookahead_size; d->m_dict_size = dict_size; + d->m_total_lz_bytes = total_lz_bytes; d->m_pLZ_code_buf = pLZ_code_buf; d->m_pLZ_flags = pLZ_flags; d->m_num_flags_left = num_flags_left; + return MZ_TRUE; +} +#endif // MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN + +static MZ_FORCEINLINE void tdefl_record_literal(tdefl_compressor *d, mz_uint8 lit) +{ + d->m_total_lz_bytes++; + *d->m_pLZ_code_buf++ = lit; + *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> 1); if (--d->m_num_flags_left == 0) { d->m_num_flags_left = 8; d->m_pLZ_flags = d->m_pLZ_code_buf++; } + d->m_huff_count[0][lit]++; +} + +static MZ_FORCEINLINE void tdefl_record_match(tdefl_compressor *d, mz_uint match_len, mz_uint match_dist) +{ + mz_uint32 s0, s1; + + MZ_ASSERT((match_len >= TDEFL_MIN_MATCH_LEN) && (match_dist >= 1) && (match_dist <= TDEFL_LZ_DICT_SIZE)); + + d->m_total_lz_bytes += match_len; + + d->m_pLZ_code_buf[0] = (mz_uint8)(match_len - TDEFL_MIN_MATCH_LEN); + + match_dist -= 1; + d->m_pLZ_code_buf[1] = (mz_uint8)(match_dist & 0xFF); + d->m_pLZ_code_buf[2] = (mz_uint8)(match_dist >> 8); d->m_pLZ_code_buf += 3; + + *d->m_pLZ_flags = (mz_uint8)((*d->m_pLZ_flags >> 1) | 0x80); if (--d->m_num_flags_left == 0) { d->m_num_flags_left = 8; d->m_pLZ_flags = d->m_pLZ_code_buf++; } + + s0 = s_tdefl_small_dist_sym[match_dist & 511]; s1 = s_tdefl_large_dist_sym[(match_dist >> 8) & 127]; + d->m_huff_count[1][(match_dist < 512) ? s0 : s1]++; + + if (match_len >= TDEFL_MIN_MATCH_LEN) d->m_huff_count[0][s_tdefl_len_sym[match_len - TDEFL_MIN_MATCH_LEN]]++; +} + +static mz_bool tdefl_compress_normal(tdefl_compressor *d) +{ + const mz_uint8 *pSrc = d->m_pSrc; size_t src_buf_left = d->m_src_buf_left; + tdefl_flush flush = d->m_flush; + + while ((src_buf_left) || ((flush) && (d->m_lookahead_size))) + { + mz_uint len_to_move, cur_match_dist, cur_match_len, cur_pos; + // Update dictionary and hash chains. Keeps the lookahead size equal to TDEFL_MAX_MATCH_LEN. + if ((d->m_lookahead_size + d->m_dict_size) >= (TDEFL_MIN_MATCH_LEN - 1)) + { + mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK, ins_pos = d->m_lookahead_pos + d->m_lookahead_size - 2; + mz_uint hash = (d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] << TDEFL_LZ_HASH_SHIFT) ^ d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK]; + mz_uint num_bytes_to_process = (mz_uint)MZ_MIN(src_buf_left, TDEFL_MAX_MATCH_LEN - d->m_lookahead_size); + const mz_uint8 *pSrc_end = pSrc + num_bytes_to_process; + src_buf_left -= num_bytes_to_process; + d->m_lookahead_size += num_bytes_to_process; + while (pSrc != pSrc_end) + { + mz_uint8 c = *pSrc++; d->m_dict[dst_pos] = c; if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c; + hash = ((hash << TDEFL_LZ_HASH_SHIFT) ^ c) & (TDEFL_LZ_HASH_SIZE - 1); + d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash]; d->m_hash[hash] = (mz_uint16)(ins_pos); + dst_pos = (dst_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK; ins_pos++; + } + } + else + { + while ((src_buf_left) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN)) + { + mz_uint8 c = *pSrc++; + mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK; + src_buf_left--; + d->m_dict[dst_pos] = c; + if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) + d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c; + if ((++d->m_lookahead_size + d->m_dict_size) >= TDEFL_MIN_MATCH_LEN) + { + mz_uint ins_pos = d->m_lookahead_pos + (d->m_lookahead_size - 1) - 2; + mz_uint hash = ((d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] << (TDEFL_LZ_HASH_SHIFT * 2)) ^ (d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK] << TDEFL_LZ_HASH_SHIFT) ^ c) & (TDEFL_LZ_HASH_SIZE - 1); + d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash]; d->m_hash[hash] = (mz_uint16)(ins_pos); + } + } + } + d->m_dict_size = MZ_MIN(TDEFL_LZ_DICT_SIZE - d->m_lookahead_size, d->m_dict_size); + if ((!flush) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN)) + break; + + // Simple lazy/greedy parsing state machine. + len_to_move = 1; cur_match_dist = 0; cur_match_len = d->m_saved_match_len ? d->m_saved_match_len : (TDEFL_MIN_MATCH_LEN - 1); cur_pos = d->m_lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK; + if (d->m_flags & (TDEFL_RLE_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS)) + { + if ((d->m_dict_size) && (!(d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS))) + { + mz_uint8 c = d->m_dict[(cur_pos - 1) & TDEFL_LZ_DICT_SIZE_MASK]; + cur_match_len = 0; while (cur_match_len < d->m_lookahead_size) { if (d->m_dict[cur_pos + cur_match_len] != c) break; cur_match_len++; } + if (cur_match_len < TDEFL_MIN_MATCH_LEN) cur_match_len = 0; else cur_match_dist = 1; + } + } + else + { + tdefl_find_match(d, d->m_lookahead_pos, d->m_dict_size, d->m_lookahead_size, &cur_match_dist, &cur_match_len); + } + if (((cur_match_len == TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 8U*1024U)) || (cur_pos == cur_match_dist) || ((d->m_flags & TDEFL_FILTER_MATCHES) && (cur_match_len <= 5))) + { + cur_match_dist = cur_match_len = 0; + } + if (d->m_saved_match_len) + { + if (cur_match_len > d->m_saved_match_len) + { + tdefl_record_literal(d, (mz_uint8)d->m_saved_lit); + if (cur_match_len >= 128) + { + tdefl_record_match(d, cur_match_len, cur_match_dist); + d->m_saved_match_len = 0; len_to_move = cur_match_len; + } + else + { + d->m_saved_lit = d->m_dict[cur_pos]; d->m_saved_match_dist = cur_match_dist; d->m_saved_match_len = cur_match_len; + } + } + else + { + tdefl_record_match(d, d->m_saved_match_len, d->m_saved_match_dist); + len_to_move = d->m_saved_match_len - 1; d->m_saved_match_len = 0; + } + } + else if (!cur_match_dist) + tdefl_record_literal(d, d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]); + else if ((d->m_greedy_parsing) || (d->m_flags & TDEFL_RLE_MATCHES) || (cur_match_len >= 128)) + { + tdefl_record_match(d, cur_match_len, cur_match_dist); + len_to_move = cur_match_len; + } + else + { + d->m_saved_lit = d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]; d->m_saved_match_dist = cur_match_dist; d->m_saved_match_len = cur_match_len; + } + // Move the lookahead forward by len_to_move bytes. + d->m_lookahead_pos += len_to_move; + MZ_ASSERT(d->m_lookahead_size >= len_to_move); + d->m_lookahead_size -= len_to_move; + d->m_dict_size = MZ_MIN(d->m_dict_size + len_to_move, (mz_uint)TDEFL_LZ_DICT_SIZE); + // Check if it's time to flush the current LZ codes to the internal output buffer. + if ( (d->m_pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) || + ( (d->m_total_lz_bytes > 31*1024) && (((((mz_uint)(d->m_pLZ_code_buf - d->m_lz_code_buf) * 115) >> 7) >= d->m_total_lz_bytes) || (d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS))) ) + { + int n; + d->m_pSrc = pSrc; d->m_src_buf_left = src_buf_left; + if ((n = tdefl_flush_block(d, 0)) != 0) + return (n < 0) ? MZ_FALSE : MZ_TRUE; + } + } + + d->m_pSrc = pSrc; d->m_src_buf_left = src_buf_left; + return MZ_TRUE; +} + +static tdefl_status tdefl_flush_output_buffer(tdefl_compressor *d) +{ + if (d->m_pIn_buf_size) + { + *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf; + } + + if (d->m_pOut_buf_size) + { + size_t n = MZ_MIN(*d->m_pOut_buf_size - d->m_out_buf_ofs, d->m_output_flush_remaining); + memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf + d->m_output_flush_ofs, n); + d->m_output_flush_ofs += (mz_uint)n; + d->m_output_flush_remaining -= (mz_uint)n; + d->m_out_buf_ofs += n; + + *d->m_pOut_buf_size = d->m_out_buf_ofs; + } + + return (d->m_finished && !d->m_output_flush_remaining) ? TDEFL_STATUS_DONE : TDEFL_STATUS_OKAY; +} + +tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, size_t *pIn_buf_size, void *pOut_buf, size_t *pOut_buf_size, tdefl_flush flush) +{ + if (!d) + { + if (pIn_buf_size) *pIn_buf_size = 0; + if (pOut_buf_size) *pOut_buf_size = 0; + return TDEFL_STATUS_BAD_PARAM; + } + + d->m_pIn_buf = pIn_buf; d->m_pIn_buf_size = pIn_buf_size; + d->m_pOut_buf = pOut_buf; d->m_pOut_buf_size = pOut_buf_size; + d->m_pSrc = (const mz_uint8 *)(pIn_buf); d->m_src_buf_left = pIn_buf_size ? *pIn_buf_size : 0; + d->m_out_buf_ofs = 0; + d->m_flush = flush; + + if ( ((d->m_pPut_buf_func != NULL) == ((pOut_buf != NULL) || (pOut_buf_size != NULL))) || (d->m_prev_return_status != TDEFL_STATUS_OKAY) || + (d->m_wants_to_finish && (flush != TDEFL_FINISH)) || (pIn_buf_size && *pIn_buf_size && !pIn_buf) || (pOut_buf_size && *pOut_buf_size && !pOut_buf) ) + { + if (pIn_buf_size) *pIn_buf_size = 0; + if (pOut_buf_size) *pOut_buf_size = 0; + return (d->m_prev_return_status = TDEFL_STATUS_BAD_PARAM); + } + d->m_wants_to_finish |= (flush == TDEFL_FINISH); + + if ((d->m_output_flush_remaining) || (d->m_finished)) + return (d->m_prev_return_status = tdefl_flush_output_buffer(d)); + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN + if (((d->m_flags & TDEFL_MAX_PROBES_MASK) == 1) && + ((d->m_flags & TDEFL_GREEDY_PARSING_FLAG) != 0) && + ((d->m_flags & (TDEFL_FILTER_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS | TDEFL_RLE_MATCHES)) == 0)) + { + if (!tdefl_compress_fast(d)) + return d->m_prev_return_status; + } + else +#endif // #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN + { + if (!tdefl_compress_normal(d)) + return d->m_prev_return_status; + } + + if ((d->m_flags & (TDEFL_WRITE_ZLIB_HEADER | TDEFL_COMPUTE_ADLER32)) && (pIn_buf)) + d->m_adler32 = (mz_uint32)mz_adler32(d->m_adler32, (const mz_uint8 *)pIn_buf, d->m_pSrc - (const mz_uint8 *)pIn_buf); + + if ((flush) && (!d->m_lookahead_size) && (!d->m_src_buf_left) && (!d->m_output_flush_remaining)) + { + if (tdefl_flush_block(d, flush) < 0) + return d->m_prev_return_status; + d->m_finished = (flush == TDEFL_FINISH); + if (flush == TDEFL_FULL_FLUSH) { MZ_CLEAR_OBJ(d->m_hash); MZ_CLEAR_OBJ(d->m_next); d->m_dict_size = 0; } + } + + return (d->m_prev_return_status = tdefl_flush_output_buffer(d)); +} + +tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, size_t in_buf_size, tdefl_flush flush) +{ + MZ_ASSERT(d->m_pPut_buf_func); return tdefl_compress(d, pIn_buf, &in_buf_size, NULL, NULL, flush); +} + +tdefl_status tdefl_init(tdefl_compressor *d, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags) +{ + d->m_pPut_buf_func = pPut_buf_func; d->m_pPut_buf_user = pPut_buf_user; + d->m_flags = (mz_uint)(flags); d->m_max_probes[0] = 1 + ((flags & 0xFFF) + 2) / 3; d->m_greedy_parsing = (flags & TDEFL_GREEDY_PARSING_FLAG) != 0; + d->m_max_probes[1] = 1 + (((flags & 0xFFF) >> 2) + 2) / 3; + if (!(flags & TDEFL_NONDETERMINISTIC_PARSING_FLAG)) MZ_CLEAR_OBJ(d->m_hash); + d->m_lookahead_pos = d->m_lookahead_size = d->m_dict_size = d->m_total_lz_bytes = d->m_lz_code_buf_dict_pos = d->m_bits_in = 0; + d->m_output_flush_ofs = d->m_output_flush_remaining = d->m_finished = d->m_block_index = d->m_bit_buffer = d->m_wants_to_finish = 0; + d->m_pLZ_code_buf = d->m_lz_code_buf + 1; d->m_pLZ_flags = d->m_lz_code_buf; d->m_num_flags_left = 8; + d->m_pOutput_buf = d->m_output_buf; d->m_pOutput_buf_end = d->m_output_buf; d->m_prev_return_status = TDEFL_STATUS_OKAY; + d->m_saved_match_dist = d->m_saved_match_len = d->m_saved_lit = 0; d->m_adler32 = 1; + d->m_pIn_buf = NULL; d->m_pOut_buf = NULL; + d->m_pIn_buf_size = NULL; d->m_pOut_buf_size = NULL; + d->m_flush = TDEFL_NO_FLUSH; d->m_pSrc = NULL; d->m_src_buf_left = 0; d->m_out_buf_ofs = 0; + memset(&d->m_huff_count[0][0], 0, sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0); + memset(&d->m_huff_count[1][0], 0, sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1); + return TDEFL_STATUS_OKAY; +} + +tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d) +{ + return d->m_prev_return_status; +} + +mz_uint32 tdefl_get_adler32(tdefl_compressor *d) +{ + return d->m_adler32; +} + +mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags) +{ + tdefl_compressor *pComp; mz_bool succeeded; if (((buf_len) && (!pBuf)) || (!pPut_buf_func)) return MZ_FALSE; + pComp = (tdefl_compressor*)MZ_MALLOC(sizeof(tdefl_compressor)); if (!pComp) return MZ_FALSE; + succeeded = (tdefl_init(pComp, pPut_buf_func, pPut_buf_user, flags) == TDEFL_STATUS_OKAY); + succeeded = succeeded && (tdefl_compress_buffer(pComp, pBuf, buf_len, TDEFL_FINISH) == TDEFL_STATUS_DONE); + MZ_FREE(pComp); return succeeded; +} + +typedef struct +{ + size_t m_size, m_capacity; + mz_uint8 *m_pBuf; + mz_bool m_expandable; +} tdefl_output_buffer; + +static mz_bool tdefl_output_buffer_putter(const void *pBuf, int len, void *pUser) +{ + tdefl_output_buffer *p = (tdefl_output_buffer *)pUser; + size_t new_size = p->m_size + len; + if (new_size > p->m_capacity) + { + size_t new_capacity = p->m_capacity; mz_uint8 *pNew_buf; if (!p->m_expandable) return MZ_FALSE; + do { new_capacity = MZ_MAX(128U, new_capacity << 1U); } while (new_size > new_capacity); + pNew_buf = (mz_uint8*)MZ_REALLOC(p->m_pBuf, new_capacity); if (!pNew_buf) return MZ_FALSE; + p->m_pBuf = pNew_buf; p->m_capacity = new_capacity; + } + memcpy((mz_uint8*)p->m_pBuf + p->m_size, pBuf, len); p->m_size = new_size; + return MZ_TRUE; +} + +void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags) +{ + tdefl_output_buffer out_buf; MZ_CLEAR_OBJ(out_buf); + if (!pOut_len) return MZ_FALSE; else *pOut_len = 0; + out_buf.m_expandable = MZ_TRUE; + if (!tdefl_compress_mem_to_output(pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags)) return NULL; + *pOut_len = out_buf.m_size; return out_buf.m_pBuf; +} + +size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags) +{ + tdefl_output_buffer out_buf; MZ_CLEAR_OBJ(out_buf); + if (!pOut_buf) return 0; + out_buf.m_pBuf = (mz_uint8*)pOut_buf; out_buf.m_capacity = out_buf_len; + if (!tdefl_compress_mem_to_output(pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags)) return 0; + return out_buf.m_size; +} + +#ifndef MINIZ_NO_ZLIB_APIS +static const mz_uint s_tdefl_num_probes[11] = { 0, 1, 6, 32, 16, 32, 128, 256, 512, 768, 1500 }; + +// level may actually range from [0,10] (10 is a "hidden" max level, where we want a bit more compression and it's fine if throughput to fall off a cliff on some files). +mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, int strategy) +{ + mz_uint comp_flags = s_tdefl_num_probes[(level >= 0) ? MZ_MIN(10, level) : MZ_DEFAULT_LEVEL] | ((level <= 3) ? TDEFL_GREEDY_PARSING_FLAG : 0); + if (window_bits > 0) comp_flags |= TDEFL_WRITE_ZLIB_HEADER; + + if (!level) comp_flags |= TDEFL_FORCE_ALL_RAW_BLOCKS; + else if (strategy == MZ_FILTERED) comp_flags |= TDEFL_FILTER_MATCHES; + else if (strategy == MZ_HUFFMAN_ONLY) comp_flags &= ~TDEFL_MAX_PROBES_MASK; + else if (strategy == MZ_FIXED) comp_flags |= TDEFL_FORCE_ALL_STATIC_BLOCKS; + else if (strategy == MZ_RLE) comp_flags |= TDEFL_RLE_MATCHES; + + return comp_flags; +} +#endif //MINIZ_NO_ZLIB_APIS + +#ifdef _MSC_VER +#pragma warning (push) +#pragma warning (disable:4204) // nonstandard extension used : non-constant aggregate initializer (also supported by GNU C and C99, so no big deal) +#endif + +// Simple PNG writer function by Alex Evans, 2011. Released into the public domain: https://gist.github.com/908299, more context at +// http://altdevblogaday.org/2011/04/06/a-smaller-jpg-encoder/. +// This is actually a modification of Alex's original code so PNG files generated by this function pass pngcheck. +void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, int h, int num_chans, size_t *pLen_out, mz_uint level, mz_bool flip) +{ + // Using a local copy of this array here in case MINIZ_NO_ZLIB_APIS was defined. + static const mz_uint s_tdefl_png_num_probes[11] = { 0, 1, 6, 32, 16, 32, 128, 256, 512, 768, 1500 }; + tdefl_compressor *pComp = (tdefl_compressor *)MZ_MALLOC(sizeof(tdefl_compressor)); tdefl_output_buffer out_buf; int i, bpl = w * num_chans, y, z; mz_uint32 c; *pLen_out = 0; + if (!pComp) return NULL; + MZ_CLEAR_OBJ(out_buf); out_buf.m_expandable = MZ_TRUE; out_buf.m_capacity = 57+MZ_MAX(64, (1+bpl)*h); if (NULL == (out_buf.m_pBuf = (mz_uint8*)MZ_MALLOC(out_buf.m_capacity))) { MZ_FREE(pComp); return NULL; } + // write dummy header + for (z = 41; z; --z) tdefl_output_buffer_putter(&z, 1, &out_buf); + // compress image data + tdefl_init(pComp, tdefl_output_buffer_putter, &out_buf, s_tdefl_png_num_probes[MZ_MIN(10, level)] | TDEFL_WRITE_ZLIB_HEADER | (level <= 3 ? TDEFL_GREEDY_PARSING_FLAG : 0)); + for (y = 0; y < h; ++y) { tdefl_compress_buffer(pComp, &z, 1, TDEFL_NO_FLUSH); tdefl_compress_buffer(pComp, (mz_uint8*)pImage + (flip ? (h - 1 - y) : y) * bpl, bpl, TDEFL_NO_FLUSH); } + if (tdefl_compress_buffer(pComp, NULL, 0, TDEFL_FINISH) != TDEFL_STATUS_DONE) { MZ_FREE(pComp); MZ_FREE(out_buf.m_pBuf); return NULL; } + // write real header + *pLen_out = out_buf.m_size-41; + { + static const mz_uint8 chans[] = {0x00, 0x00, 0x04, 0x02, 0x06}; + mz_uint8 pnghdr[41]={0x89,0x50,0x4e,0x47,0x0d,0x0a,0x1a,0x0a,0x00,0x00,0x00,0x0d,0x49,0x48,0x44,0x52, + 0,0,(mz_uint8)(w>>8),(mz_uint8)w,0,0,(mz_uint8)(h>>8),(mz_uint8)h,8,chans[num_chans],0,0,0,0,0,0,0, + (mz_uint8)(*pLen_out>>24),(mz_uint8)(*pLen_out>>16),(mz_uint8)(*pLen_out>>8),(mz_uint8)*pLen_out,0x49,0x44,0x41,0x54}; + c=(mz_uint32)mz_crc32(MZ_CRC32_INIT,pnghdr+12,17); for (i=0; i<4; ++i, c<<=8) ((mz_uint8*)(pnghdr+29))[i]=(mz_uint8)(c>>24); + memcpy(out_buf.m_pBuf, pnghdr, 41); + } + // write footer (IDAT CRC-32, followed by IEND chunk) + if (!tdefl_output_buffer_putter("\0\0\0\0\0\0\0\0\x49\x45\x4e\x44\xae\x42\x60\x82", 16, &out_buf)) { *pLen_out = 0; MZ_FREE(pComp); MZ_FREE(out_buf.m_pBuf); return NULL; } + c = (mz_uint32)mz_crc32(MZ_CRC32_INIT,out_buf.m_pBuf+41-4, *pLen_out+4); for (i=0; i<4; ++i, c<<=8) (out_buf.m_pBuf+out_buf.m_size-16)[i] = (mz_uint8)(c >> 24); + // compute final size of file, grab compressed data buffer and return + *pLen_out += 57; MZ_FREE(pComp); return out_buf.m_pBuf; +} +void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, int num_chans, size_t *pLen_out) +{ + // Level 6 corresponds to TDEFL_DEFAULT_MAX_PROBES or MZ_DEFAULT_LEVEL (but we can't depend on MZ_DEFAULT_LEVEL being available in case the zlib API's where #defined out) + return tdefl_write_image_to_png_file_in_memory_ex(pImage, w, h, num_chans, pLen_out, 6, MZ_FALSE); +} + +#ifdef _MSC_VER +#pragma warning (pop) +#endif + +} // namespace buminiz + +#endif // MINIZ_HEADER_FILE_ONLY + diff --git a/thirdparty/basisu/encoder/basisu_ocl_kernels.h b/thirdparty/basisu/encoder/basisu_ocl_kernels.h new file mode 100644 index 000000000..46db61bf1 --- /dev/null +++ b/thirdparty/basisu/encoder/basisu_ocl_kernels.h @@ -0,0 +1,1439 @@ +unsigned char ocl_kernels_cl[] = { + 0x2f, 0x2f, 0x23, 0x64, 0x65, 0x66, 0x69, 0x6e, 0x65, 0x20, 0x5f, 0x44, 0x45, 0x42, 0x55, 0x47, 0x0d, 0x0a, 0x0d, 0x0a, 0x23, 0x69, 0x66, 0x6e, 0x64, 0x65, 0x66, 0x20, 0x4e, 0x55, 0x4c, 0x4c, + 0x0d, 0x0a, 0x09, 0x23, 0x64, 0x65, 0x66, 0x69, 0x6e, 0x65, 0x20, 0x4e, 0x55, 0x4c, 0x4c, 0x20, 0x30, 0x4c, 0x0d, 0x0a, 0x23, 0x65, 0x6e, 0x64, 0x69, 0x66, 0x0d, 0x0a, 0x0d, 0x0a, 0x74, 0x79, + 0x70, 0x65, 0x64, 0x65, 0x66, 0x20, 0x63, 0x68, 0x61, 0x72, 0x20, 0x69, 0x6e, 0x74, 0x38, 0x5f, 0x74, 0x3b, 0x0d, 0x0a, 0x74, 0x79, 0x70, 0x65, 0x64, 0x65, 0x66, 0x20, 0x75, 0x63, 0x68, 0x61, + 0x72, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x38, 0x5f, 0x74, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x74, 0x79, 0x70, 0x65, 0x64, 0x65, 0x66, 0x20, 0x73, 0x68, 0x6f, 0x72, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x31, + 0x36, 0x5f, 0x74, 0x3b, 0x0d, 0x0a, 0x74, 0x79, 0x70, 0x65, 0x64, 0x65, 0x66, 0x20, 0x75, 0x73, 0x68, 0x6f, 0x72, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x31, 0x36, 0x5f, 0x74, 0x3b, 0x0d, 0x0a, + 0x0d, 0x0a, 0x74, 0x79, 0x70, 0x65, 0x64, 0x65, 0x66, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x3b, 0x0d, 0x0a, 0x74, 0x79, 0x70, 0x65, 0x64, 0x65, 0x66, 0x20, + 0x75, 0x69, 0x6e, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x74, 0x79, 0x70, 0x65, 0x64, 0x65, 0x66, 0x20, 0x6c, 0x6f, 0x6e, 0x67, 0x20, 0x69, + 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x74, 0x3b, 0x0d, 0x0a, 0x74, 0x79, 0x70, 0x65, 0x64, 0x65, 0x66, 0x20, 0x75, 0x6c, 0x6f, 0x6e, 0x67, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x74, 0x3b, + 0x0d, 0x0a, 0x0d, 0x0a, 0x74, 0x79, 0x70, 0x65, 0x64, 0x65, 0x66, 0x20, 0x75, 0x63, 0x68, 0x61, 0x72, 0x34, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x3b, 0x0d, 0x0a, + 0x0d, 0x0a, 0x23, 0x64, 0x65, 0x66, 0x69, 0x6e, 0x65, 0x20, 0x55, 0x49, 0x4e, 0x54, 0x33, 0x32, 0x5f, 0x4d, 0x41, 0x58, 0x20, 0x30, 0x78, 0x46, 0x46, 0x46, 0x46, 0x46, 0x46, 0x46, 0x46, 0x55, + 0x4c, 0x0d, 0x0a, 0x23, 0x64, 0x65, 0x66, 0x69, 0x6e, 0x65, 0x20, 0x49, 0x4e, 0x54, 0x36, 0x34, 0x5f, 0x4d, 0x41, 0x58, 0x20, 0x4c, 0x4f, 0x4e, 0x47, 0x5f, 0x4d, 0x41, 0x58, 0x0d, 0x0a, 0x23, + 0x64, 0x65, 0x66, 0x69, 0x6e, 0x65, 0x20, 0x55, 0x49, 0x4e, 0x54, 0x36, 0x34, 0x5f, 0x4d, 0x41, 0x58, 0x20, 0x55, 0x4c, 0x4f, 0x4e, 0x47, 0x5f, 0x4d, 0x41, 0x58, 0x0d, 0x0a, 0x0d, 0x0a, 0x69, + 0x6e, 0x74, 0x20, 0x73, 0x71, 0x75, 0x61, 0x72, 0x65, 0x69, 0x28, 0x69, 0x6e, 0x74, 0x20, 0x61, 0x29, 0x20, 0x7b, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x61, 0x20, 0x2a, 0x20, 0x61, + 0x3b, 0x20, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x23, 0x69, 0x66, 0x64, 0x65, 0x66, 0x20, 0x5f, 0x44, 0x45, 0x42, 0x55, 0x47, 0x0d, 0x0a, 0x09, 0x69, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x20, 0x76, 0x6f, + 0x69, 0x64, 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x5f, 0x61, 0x73, 0x73, 0x65, 0x72, 0x74, 0x28, 0x62, 0x6f, 0x6f, 0x6c, 0x20, 0x78, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, + 0x61, 0x6e, 0x74, 0x20, 0x63, 0x68, 0x61, 0x72, 0x20, 0x2a, 0x70, 0x4d, 0x73, 0x67, 0x2c, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x6c, 0x69, 0x6e, 0x65, 0x29, 0x0d, 0x0a, 0x09, 0x7b, 0x0d, 0x0a, 0x09, + 0x09, 0x69, 0x66, 0x20, 0x28, 0x21, 0x78, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x70, 0x72, 0x69, 0x6e, 0x74, 0x66, 0x28, 0x22, 0x61, 0x73, 0x73, 0x65, 0x72, 0x74, 0x28, 0x29, 0x20, 0x66, 0x61, + 0x69, 0x6c, 0x65, 0x64, 0x20, 0x6f, 0x6e, 0x20, 0x6c, 0x69, 0x6e, 0x65, 0x20, 0x25, 0x69, 0x3a, 0x20, 0x25, 0x73, 0x5c, 0x6e, 0x22, 0x2c, 0x20, 0x6c, 0x69, 0x6e, 0x65, 0x2c, 0x20, 0x70, 0x4d, + 0x73, 0x67, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, 0x09, 0x23, 0x64, 0x65, 0x66, 0x69, 0x6e, 0x65, 0x20, 0x61, 0x73, 0x73, 0x65, 0x72, 0x74, 0x28, 0x78, 0x29, 0x20, 0x69, 0x6e, 0x74, + 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x5f, 0x61, 0x73, 0x73, 0x65, 0x72, 0x74, 0x28, 0x78, 0x2c, 0x20, 0x23, 0x78, 0x2c, 0x20, 0x5f, 0x5f, 0x4c, 0x49, 0x4e, 0x45, 0x5f, 0x5f, 0x29, 0x0d, 0x0a, 0x23, + 0x65, 0x6c, 0x73, 0x65, 0x0d, 0x0a, 0x09, 0x23, 0x64, 0x65, 0x66, 0x69, 0x6e, 0x65, 0x20, 0x61, 0x73, 0x73, 0x65, 0x72, 0x74, 0x28, 0x78, 0x29, 0x0d, 0x0a, 0x23, 0x65, 0x6e, 0x64, 0x69, 0x66, + 0x0d, 0x0a, 0x0d, 0x0a, 0x69, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x38, 0x5f, 0x74, 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, 0x35, 0x35, 0x28, 0x69, 0x6e, 0x74, 0x20, + 0x78, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x28, 0x78, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x32, 0x35, 0x35, 0x29, 0x3b, + 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x69, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x38, 0x5f, 0x74, 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, 0x35, 0x35, 0x5f, 0x66, + 0x6c, 0x61, 0x67, 0x28, 0x69, 0x6e, 0x74, 0x20, 0x78, 0x2c, 0x20, 0x62, 0x6f, 0x6f, 0x6c, 0x20, 0x2a, 0x70, 0x44, 0x69, 0x64, 0x5f, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, + 0x0a, 0x09, 0x69, 0x66, 0x20, 0x28, 0x78, 0x20, 0x3c, 0x20, 0x30, 0x29, 0x0d, 0x0a, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x2a, 0x70, 0x44, 0x69, 0x64, 0x5f, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x20, + 0x3d, 0x20, 0x74, 0x72, 0x75, 0x65, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x30, 0x3b, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, 0x09, 0x65, 0x6c, 0x73, 0x65, 0x20, + 0x69, 0x66, 0x20, 0x28, 0x78, 0x20, 0x3e, 0x20, 0x32, 0x35, 0x35, 0x29, 0x0d, 0x0a, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x2a, 0x70, 0x44, 0x69, 0x64, 0x5f, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x20, + 0x3d, 0x20, 0x74, 0x72, 0x75, 0x65, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x32, 0x35, 0x35, 0x3b, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x72, + 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x38, 0x5f, 0x74, 0x29, 0x28, 0x78, 0x29, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x74, 0x79, 0x70, 0x65, 0x64, 0x65, + 0x66, 0x20, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x20, 0x5f, 0x5f, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x5f, 0x5f, 0x20, 0x28, 0x28, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x29, + 0x29, 0x20, 0x65, 0x6e, 0x63, 0x6f, 0x64, 0x65, 0x5f, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x5f, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x5f, 0x74, 0x61, 0x67, 0x0d, + 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x6d, 0x5f, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x73, 0x3b, 0x0d, 0x0a, 0x09, + 0x69, 0x6e, 0x74, 0x20, 0x6d, 0x5f, 0x70, 0x65, 0x72, 0x63, 0x65, 0x70, 0x74, 0x75, 0x61, 0x6c, 0x3b, 0x0d, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x6d, 0x5f, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, + 0x70, 0x65, 0x72, 0x6d, 0x73, 0x3b, 0x0d, 0x0a, 0x7d, 0x20, 0x65, 0x6e, 0x63, 0x6f, 0x64, 0x65, 0x5f, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x5f, 0x73, 0x74, 0x72, + 0x75, 0x63, 0x74, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x74, 0x79, 0x70, 0x65, 0x64, 0x65, 0x66, 0x20, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x20, 0x5f, 0x5f, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, + 0x74, 0x65, 0x5f, 0x5f, 0x20, 0x28, 0x28, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x29, 0x29, 0x20, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x74, 0x61, 0x67, 0x0d, + 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x6d, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x5b, 0x31, 0x36, 0x5d, 0x3b, 0x20, 0x2f, 0x2f, + 0x20, 0x5b, 0x79, 0x2a, 0x34, 0x2b, 0x78, 0x5d, 0x0d, 0x0a, 0x7d, 0x20, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x75, 0x69, 0x6e, 0x74, + 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x64, 0x69, 0x73, 0x74, 0x61, 0x6e, 0x63, 0x65, 0x28, 0x62, 0x6f, 0x6f, 0x6c, 0x20, 0x70, 0x65, 0x72, 0x63, 0x65, 0x70, 0x74, 0x75, 0x61, 0x6c, 0x2c, + 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x65, 0x31, 0x2c, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x65, 0x32, 0x2c, 0x20, 0x62, + 0x6f, 0x6f, 0x6c, 0x20, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x69, 0x66, 0x20, 0x28, 0x70, 0x65, 0x72, 0x63, 0x65, 0x70, 0x74, 0x75, 0x61, 0x6c, 0x29, 0x0d, + 0x0a, 0x09, 0x7b, 0x0d, 0x0a, 0x23, 0x69, 0x66, 0x20, 0x30, 0x0d, 0x0a, 0x09, 0x09, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x33, 0x20, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x72, 0x67, 0x62, 0x20, 0x3d, + 0x20, 0x28, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x33, 0x29, 0x28, 0x65, 0x31, 0x2e, 0x78, 0x20, 0x2d, 0x20, 0x65, 0x32, 0x2e, 0x78, 0x2c, 0x20, 0x65, 0x31, 0x2e, 0x79, 0x20, 0x2d, 0x20, 0x65, 0x32, + 0x2e, 0x79, 0x2c, 0x20, 0x65, 0x31, 0x2e, 0x7a, 0x20, 0x2d, 0x20, 0x65, 0x32, 0x2e, 0x7a, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x33, 0x20, 0x64, 0x65, + 0x6c, 0x74, 0x61, 0x5f, 0x79, 0x63, 0x62, 0x63, 0x72, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x79, 0x63, 0x62, 0x63, 0x72, 0x2e, 0x78, 0x20, 0x3d, 0x20, 0x64, 0x6f, + 0x74, 0x28, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x72, 0x67, 0x62, 0x2c, 0x20, 0x28, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x33, 0x29, 0x28, 0x2e, 0x32, 0x31, 0x32, 0x36, 0x66, 0x2c, 0x20, 0x2e, 0x37, + 0x31, 0x35, 0x32, 0x66, 0x2c, 0x20, 0x2e, 0x30, 0x37, 0x32, 0x32, 0x66, 0x29, 0x29, 0x3b, 0x20, 0x2f, 0x2f, 0x20, 0x79, 0x0d, 0x0a, 0x09, 0x09, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x79, 0x63, + 0x62, 0x63, 0x72, 0x2e, 0x79, 0x20, 0x3d, 0x20, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x72, 0x67, 0x62, 0x2e, 0x78, 0x20, 0x2d, 0x20, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x79, 0x63, 0x62, 0x63, + 0x72, 0x2e, 0x78, 0x3b, 0x20, 0x2f, 0x2f, 0x20, 0x63, 0x72, 0x0d, 0x0a, 0x09, 0x09, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x79, 0x63, 0x62, 0x63, 0x72, 0x2e, 0x7a, 0x20, 0x3d, 0x20, 0x64, 0x65, + 0x6c, 0x74, 0x61, 0x5f, 0x72, 0x67, 0x62, 0x2e, 0x7a, 0x20, 0x2d, 0x20, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x79, 0x63, 0x62, 0x63, 0x72, 0x2e, 0x78, 0x3b, 0x20, 0x2f, 0x2f, 0x20, 0x63, 0x62, + 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x79, 0x63, 0x62, 0x63, 0x72, 0x20, 0x2a, 0x3d, 0x20, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x79, 0x63, 0x62, 0x63, 0x72, + 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x64, 0x20, 0x3d, 0x20, 0x64, 0x6f, 0x74, 0x28, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x79, 0x63, 0x62, 0x63, 0x72, + 0x2c, 0x20, 0x28, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x33, 0x29, 0x28, 0x31, 0x2e, 0x30, 0x66, 0x2c, 0x20, 0x30, 0x2e, 0x32, 0x30, 0x33, 0x31, 0x32, 0x35, 0x66, 0x2c, 0x20, 0x30, 0x2e, 0x30, 0x32, + 0x33, 0x34, 0x33, 0x37, 0x35, 0x66, 0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x69, 0x66, 0x20, 0x28, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x7b, 0x0d, 0x0a, + 0x09, 0x09, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x61, 0x20, 0x3d, 0x20, 0x65, 0x31, 0x2e, 0x77, 0x20, 0x2d, 0x20, 0x65, 0x32, 0x2e, 0x77, 0x3b, 0x0d, 0x0a, 0x09, + 0x09, 0x09, 0x64, 0x20, 0x2b, 0x3d, 0x20, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x61, 0x20, 0x2a, 0x20, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x61, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x7d, 0x0d, 0x0a, + 0x0d, 0x0a, 0x09, 0x09, 0x64, 0x20, 0x3d, 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x28, 0x64, 0x20, 0x2a, 0x20, 0x32, 0x35, 0x36, 0x2e, 0x30, 0x66, 0x20, 0x2b, 0x20, 0x2e, 0x35, 0x66, 0x2c, 0x20, + 0x30, 0x2e, 0x30, 0x66, 0x2c, 0x20, 0x28, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x29, 0x55, 0x49, 0x4e, 0x54, 0x33, 0x32, 0x5f, 0x4d, 0x41, 0x58, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x72, + 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x29, 0x28, 0x64, 0x29, 0x3b, 0x0d, 0x0a, 0x23, 0x65, 0x6c, 0x73, 0x65, 0x0d, 0x0a, 0x09, 0x09, 0x2f, 0x2f, 0x20, 0x54, 0x68, + 0x69, 0x73, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x74, 0x68, 0x65, 0x20, 0x43, 0x50, 0x55, 0x20, 0x63, 0x6f, 0x64, 0x65, 0x2c, 0x20, 0x77, 0x68, 0x69, 0x63, 0x68, 0x20, 0x69, + 0x73, 0x20, 0x75, 0x73, 0x65, 0x66, 0x75, 0x6c, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x74, 0x65, 0x73, 0x74, 0x69, 0x6e, 0x67, 0x2e, 0x0d, 0x0a, 0x09, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x64, 0x72, 0x20, + 0x3d, 0x20, 0x65, 0x31, 0x2e, 0x78, 0x20, 0x2d, 0x20, 0x65, 0x32, 0x2e, 0x78, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x64, 0x67, 0x20, 0x3d, 0x20, 0x65, 0x31, 0x2e, 0x79, 0x20, + 0x2d, 0x20, 0x65, 0x32, 0x2e, 0x79, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x64, 0x62, 0x20, 0x3d, 0x20, 0x65, 0x31, 0x2e, 0x7a, 0x20, 0x2d, 0x20, 0x65, 0x32, 0x2e, 0x7a, 0x3b, + 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x6c, 0x20, 0x3d, 0x20, 0x64, 0x72, 0x20, 0x2a, 0x20, 0x32, 0x37, 0x20, 0x2b, 0x20, 0x64, 0x67, + 0x20, 0x2a, 0x20, 0x39, 0x32, 0x20, 0x2b, 0x20, 0x64, 0x62, 0x20, 0x2a, 0x20, 0x39, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x63, 0x72, 0x20, + 0x3d, 0x20, 0x64, 0x72, 0x20, 0x2a, 0x20, 0x31, 0x32, 0x38, 0x20, 0x2d, 0x20, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x6c, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x64, 0x65, 0x6c, + 0x74, 0x61, 0x5f, 0x63, 0x62, 0x20, 0x3d, 0x20, 0x64, 0x62, 0x20, 0x2a, 0x20, 0x31, 0x32, 0x38, 0x20, 0x2d, 0x20, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x6c, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, + 0x09, 0x75, 0x69, 0x6e, 0x74, 0x20, 0x69, 0x64, 0x20, 0x3d, 0x20, 0x28, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x29, 0x28, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x6c, 0x20, 0x2a, 0x20, 0x64, 0x65, 0x6c, + 0x74, 0x61, 0x5f, 0x6c, 0x29, 0x20, 0x3e, 0x3e, 0x20, 0x37, 0x55, 0x29, 0x20, 0x2b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x28, 0x28, 0x28, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x29, 0x28, 0x64, 0x65, 0x6c, + 0x74, 0x61, 0x5f, 0x63, 0x72, 0x20, 0x2a, 0x20, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x63, 0x72, 0x29, 0x20, 0x3e, 0x3e, 0x20, 0x37, 0x55, 0x29, 0x20, 0x2a, 0x20, 0x32, 0x36, 0x55, 0x29, 0x20, + 0x3e, 0x3e, 0x20, 0x37, 0x55, 0x29, 0x20, 0x2b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x28, 0x28, 0x28, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x29, 0x28, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x63, 0x62, 0x20, + 0x2a, 0x20, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x63, 0x62, 0x29, 0x20, 0x3e, 0x3e, 0x20, 0x37, 0x55, 0x29, 0x20, 0x2a, 0x20, 0x33, 0x55, 0x29, 0x20, 0x3e, 0x3e, 0x20, 0x37, 0x55, 0x29, 0x3b, + 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x69, 0x66, 0x20, 0x28, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x64, 0x61, + 0x20, 0x3d, 0x20, 0x28, 0x65, 0x31, 0x2e, 0x77, 0x20, 0x2d, 0x20, 0x65, 0x32, 0x2e, 0x77, 0x29, 0x20, 0x3c, 0x3c, 0x20, 0x37, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x69, 0x64, 0x20, 0x2b, 0x3d, + 0x20, 0x28, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x29, 0x28, 0x64, 0x61, 0x20, 0x2a, 0x20, 0x64, 0x61, 0x29, 0x20, 0x3e, 0x3e, 0x20, 0x37, 0x55, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x7d, 0x0d, 0x0a, + 0x09, 0x09, 0x0d, 0x0a, 0x09, 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x69, 0x64, 0x3b, 0x0d, 0x0a, 0x23, 0x65, 0x6e, 0x64, 0x69, 0x66, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, 0x09, 0x65, + 0x6c, 0x73, 0x65, 0x20, 0x69, 0x66, 0x20, 0x28, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x29, 0x0d, 0x0a, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x64, 0x72, 0x20, 0x3d, 0x20, 0x65, + 0x31, 0x2e, 0x78, 0x20, 0x2d, 0x20, 0x65, 0x32, 0x2e, 0x78, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x64, 0x67, 0x20, 0x3d, 0x20, 0x65, 0x31, 0x2e, 0x79, 0x20, 0x2d, 0x20, 0x65, + 0x32, 0x2e, 0x79, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x64, 0x62, 0x20, 0x3d, 0x20, 0x65, 0x31, 0x2e, 0x7a, 0x20, 0x2d, 0x20, 0x65, 0x32, 0x2e, 0x7a, 0x3b, 0x09, 0x0d, 0x0a, + 0x09, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x64, 0x61, 0x20, 0x3d, 0x20, 0x65, 0x31, 0x2e, 0x77, 0x20, 0x2d, 0x20, 0x65, 0x32, 0x2e, 0x77, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, + 0x6e, 0x20, 0x64, 0x72, 0x20, 0x2a, 0x20, 0x64, 0x72, 0x20, 0x2b, 0x20, 0x64, 0x67, 0x20, 0x2a, 0x20, 0x64, 0x67, 0x20, 0x2b, 0x20, 0x64, 0x62, 0x20, 0x2a, 0x20, 0x64, 0x62, 0x20, 0x2b, 0x20, + 0x64, 0x61, 0x20, 0x2a, 0x20, 0x64, 0x61, 0x3b, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, 0x09, 0x65, 0x6c, 0x73, 0x65, 0x0d, 0x0a, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x64, + 0x72, 0x20, 0x3d, 0x20, 0x65, 0x31, 0x2e, 0x78, 0x20, 0x2d, 0x20, 0x65, 0x32, 0x2e, 0x78, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x64, 0x67, 0x20, 0x3d, 0x20, 0x65, 0x31, 0x2e, + 0x79, 0x20, 0x2d, 0x20, 0x65, 0x32, 0x2e, 0x79, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x64, 0x62, 0x20, 0x3d, 0x20, 0x65, 0x31, 0x2e, 0x7a, 0x20, 0x2d, 0x20, 0x65, 0x32, 0x2e, + 0x7a, 0x3b, 0x09, 0x0d, 0x0a, 0x09, 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x64, 0x72, 0x20, 0x2a, 0x20, 0x64, 0x72, 0x20, 0x2b, 0x20, 0x64, 0x67, 0x20, 0x2a, 0x20, 0x64, 0x67, 0x20, + 0x2b, 0x20, 0x64, 0x62, 0x20, 0x2a, 0x20, 0x64, 0x62, 0x3b, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x74, 0x79, 0x70, 0x65, 0x64, 0x65, 0x66, 0x20, 0x73, 0x74, 0x72, + 0x75, 0x63, 0x74, 0x20, 0x5f, 0x5f, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x5f, 0x5f, 0x20, 0x28, 0x28, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x29, 0x29, 0x20, 0x65, 0x74, 0x63, + 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x74, 0x61, 0x67, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x2f, 0x2f, 0x20, 0x62, 0x69, 0x67, 0x20, 0x65, 0x6e, 0x64, 0x69, 0x61, 0x6e, 0x20, 0x75, 0x69, + 0x6e, 0x74, 0x36, 0x34, 0x3a, 0x0d, 0x0a, 0x09, 0x2f, 0x2f, 0x20, 0x62, 0x69, 0x74, 0x20, 0x6f, 0x66, 0x73, 0x3a, 0x20, 0x20, 0x35, 0x36, 0x20, 0x20, 0x34, 0x38, 0x20, 0x20, 0x34, 0x30, 0x20, + 0x20, 0x33, 0x32, 0x20, 0x20, 0x32, 0x34, 0x20, 0x20, 0x31, 0x36, 0x20, 0x20, 0x20, 0x38, 0x20, 0x20, 0x20, 0x30, 0x0d, 0x0a, 0x09, 0x2f, 0x2f, 0x20, 0x62, 0x79, 0x74, 0x65, 0x20, 0x6f, 0x66, + 0x73, 0x3a, 0x20, 0x62, 0x30, 0x2c, 0x20, 0x62, 0x31, 0x2c, 0x20, 0x62, 0x32, 0x2c, 0x20, 0x62, 0x33, 0x2c, 0x20, 0x62, 0x34, 0x2c, 0x20, 0x62, 0x35, 0x2c, 0x20, 0x62, 0x36, 0x2c, 0x20, 0x62, + 0x37, 0x20, 0x0d, 0x0a, 0x09, 0x75, 0x6e, 0x69, 0x6f, 0x6e, 0x0d, 0x0a, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x74, 0x20, 0x6d, 0x5f, 0x75, 0x69, 0x6e, + 0x74, 0x36, 0x34, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x38, 0x5f, 0x74, 0x20, 0x6d, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x5b, 0x38, 0x5d, 0x3b, 0x0d, 0x0a, 0x09, 0x7d, 0x3b, + 0x0d, 0x0a, 0x0d, 0x0a, 0x7d, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x65, 0x6e, 0x75, 0x6d, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x63, 0x6f, 0x6e, + 0x73, 0x74, 0x61, 0x6e, 0x74, 0x73, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x42, 0x79, 0x74, 0x65, 0x73, 0x50, 0x65, 0x72, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x3d, + 0x20, 0x38, 0x55, 0x2c, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x53, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x42, 0x69, 0x74, 0x73, 0x20, 0x3d, 0x20, 0x32, 0x55, 0x2c, + 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x53, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x20, 0x3d, 0x20, 0x31, 0x55, 0x20, 0x3c, 0x3c, 0x20, 0x63, + 0x45, 0x54, 0x43, 0x31, 0x53, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x42, 0x69, 0x74, 0x73, 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x53, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, + 0x72, 0x4d, 0x61, 0x73, 0x6b, 0x20, 0x3d, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x53, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x20, 0x2d, 0x20, 0x31, 0x55, + 0x2c, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x53, 0x68, 0x69, 0x66, 0x74, 0x20, 0x3d, 0x20, 0x32, 0x55, 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x45, + 0x54, 0x43, 0x31, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x53, 0x69, 0x7a, 0x65, 0x20, 0x3d, 0x20, 0x31, 0x55, 0x20, 0x3c, 0x3c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x53, + 0x68, 0x69, 0x66, 0x74, 0x2c, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x4c, 0x53, 0x42, 0x53, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x49, 0x6e, 0x64, 0x69, 0x63, 0x65, + 0x73, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x30, 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x4d, 0x53, 0x42, 0x53, 0x65, 0x6c, 0x65, 0x63, 0x74, + 0x6f, 0x72, 0x49, 0x6e, 0x64, 0x69, 0x63, 0x65, 0x73, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x31, 0x36, 0x2c, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, + 0x43, 0x31, 0x46, 0x6c, 0x69, 0x70, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x33, 0x32, 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x44, 0x69, 0x66, + 0x66, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x33, 0x33, 0x2c, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x4d, + 0x6f, 0x64, 0x69, 0x66, 0x69, 0x65, 0x72, 0x4e, 0x75, 0x6d, 0x42, 0x69, 0x74, 0x73, 0x20, 0x3d, 0x20, 0x33, 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x49, 0x6e, 0x74, 0x65, 0x6e, + 0x4d, 0x6f, 0x64, 0x69, 0x66, 0x69, 0x65, 0x72, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x20, 0x3d, 0x20, 0x31, 0x20, 0x3c, 0x3c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x49, 0x6e, 0x74, 0x65, 0x6e, + 0x4d, 0x6f, 0x64, 0x69, 0x66, 0x69, 0x65, 0x72, 0x4e, 0x75, 0x6d, 0x42, 0x69, 0x74, 0x73, 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x52, 0x69, 0x67, 0x68, 0x74, 0x49, 0x6e, 0x74, + 0x65, 0x6e, 0x4d, 0x6f, 0x64, 0x69, 0x66, 0x69, 0x65, 0x72, 0x54, 0x61, 0x62, 0x6c, 0x65, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x33, 0x34, 0x2c, 0x0d, 0x0a, + 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x4c, 0x65, 0x66, 0x74, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x4d, 0x6f, 0x64, 0x69, 0x66, 0x69, 0x65, 0x72, 0x54, 0x61, 0x62, 0x6c, 0x65, 0x42, 0x69, 0x74, 0x4f, + 0x66, 0x66, 0x73, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x33, 0x37, 0x2c, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x2f, 0x2f, 0x20, 0x42, 0x61, 0x73, 0x65, 0x2b, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x20, 0x65, 0x6e, + 0x63, 0x6f, 0x64, 0x69, 0x6e, 0x67, 0x20, 0x28, 0x35, 0x20, 0x62, 0x69, 0x74, 0x20, 0x62, 0x61, 0x73, 0x65, 0x73, 0x2c, 0x20, 0x33, 0x20, 0x62, 0x69, 0x74, 0x20, 0x64, 0x65, 0x6c, 0x74, 0x61, + 0x29, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x42, 0x61, 0x73, 0x65, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x43, 0x6f, 0x6d, 0x70, 0x4e, 0x75, 0x6d, 0x42, 0x69, 0x74, 0x73, 0x20, 0x3d, 0x20, + 0x35, 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x42, 0x61, 0x73, 0x65, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x43, 0x6f, 0x6d, 0x70, 0x4d, 0x61, 0x78, 0x20, 0x3d, 0x20, 0x31, 0x20, 0x3c, + 0x3c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x42, 0x61, 0x73, 0x65, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x43, 0x6f, 0x6d, 0x70, 0x4e, 0x75, 0x6d, 0x42, 0x69, 0x74, 0x73, 0x2c, 0x0d, 0x0a, 0x0d, 0x0a, + 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x43, 0x6f, 0x6d, 0x70, 0x4e, 0x75, 0x6d, 0x42, 0x69, 0x74, 0x73, 0x20, 0x3d, 0x20, 0x33, 0x2c, + 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x43, 0x6f, 0x6d, 0x70, 0x20, 0x3d, 0x20, 0x31, 0x20, 0x3c, 0x3c, 0x20, 0x63, 0x45, + 0x54, 0x43, 0x31, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x43, 0x6f, 0x6d, 0x70, 0x4e, 0x75, 0x6d, 0x42, 0x69, 0x74, 0x73, 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, + 0x31, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x43, 0x6f, 0x6d, 0x70, 0x4d, 0x61, 0x78, 0x20, 0x3d, 0x20, 0x31, 0x20, 0x3c, 0x3c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x44, + 0x65, 0x6c, 0x74, 0x61, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x43, 0x6f, 0x6d, 0x70, 0x4e, 0x75, 0x6d, 0x42, 0x69, 0x74, 0x73, 0x2c, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x42, + 0x61, 0x73, 0x65, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x52, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x35, 0x39, 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, + 0x31, 0x42, 0x61, 0x73, 0x65, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x47, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x35, 0x31, 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x45, + 0x54, 0x43, 0x31, 0x42, 0x61, 0x73, 0x65, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x42, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x34, 0x33, 0x2c, 0x0d, 0x0a, 0x0d, + 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x33, 0x52, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x35, + 0x36, 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x33, 0x47, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x20, + 0x3d, 0x20, 0x34, 0x38, 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x33, 0x42, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, + 0x65, 0x74, 0x20, 0x3d, 0x20, 0x34, 0x30, 0x2c, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x2f, 0x2f, 0x20, 0x41, 0x62, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x65, 0x20, 0x28, 0x6e, 0x6f, 0x6e, 0x2d, 0x64, 0x65, + 0x6c, 0x74, 0x61, 0x29, 0x20, 0x65, 0x6e, 0x63, 0x6f, 0x64, 0x69, 0x6e, 0x67, 0x20, 0x28, 0x74, 0x77, 0x6f, 0x20, 0x34, 0x2d, 0x62, 0x69, 0x74, 0x20, 0x70, 0x65, 0x72, 0x20, 0x63, 0x6f, 0x6d, + 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x20, 0x62, 0x61, 0x73, 0x65, 0x73, 0x29, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x41, 0x62, 0x73, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x43, 0x6f, 0x6d, + 0x70, 0x4e, 0x75, 0x6d, 0x42, 0x69, 0x74, 0x73, 0x20, 0x3d, 0x20, 0x34, 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x41, 0x62, 0x73, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x43, 0x6f, 0x6d, + 0x70, 0x4d, 0x61, 0x78, 0x20, 0x3d, 0x20, 0x31, 0x20, 0x3c, 0x3c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x41, 0x62, 0x73, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x43, 0x6f, 0x6d, 0x70, 0x4e, 0x75, 0x6d, + 0x42, 0x69, 0x74, 0x73, 0x2c, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x41, 0x62, 0x73, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x34, 0x52, 0x31, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, + 0x73, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x36, 0x30, 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x41, 0x62, 0x73, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x34, 0x47, 0x31, 0x42, 0x69, 0x74, 0x4f, + 0x66, 0x66, 0x73, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x35, 0x32, 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x41, 0x62, 0x73, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x34, 0x42, 0x31, 0x42, 0x69, + 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x34, 0x34, 0x2c, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x41, 0x62, 0x73, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x34, + 0x52, 0x32, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x35, 0x36, 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x41, 0x62, 0x73, 0x43, 0x6f, 0x6c, 0x6f, + 0x72, 0x34, 0x47, 0x32, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x34, 0x38, 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x41, 0x62, 0x73, 0x43, 0x6f, + 0x6c, 0x6f, 0x72, 0x34, 0x42, 0x32, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x34, 0x30, 0x2c, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x43, + 0x6f, 0x6c, 0x6f, 0x72, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x4d, 0x69, 0x6e, 0x20, 0x3d, 0x20, 0x2d, 0x34, 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x45, 0x54, 0x43, 0x31, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x44, + 0x65, 0x6c, 0x74, 0x61, 0x4d, 0x61, 0x78, 0x20, 0x3d, 0x20, 0x33, 0x2c, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x2f, 0x2f, 0x20, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x33, 0x3a, 0x0d, 0x0a, 0x09, 0x2f, 0x2f, + 0x20, 0x30, 0x20, 0x20, 0x20, 0x31, 0x20, 0x20, 0x20, 0x32, 0x20, 0x20, 0x20, 0x33, 0x20, 0x20, 0x20, 0x34, 0x20, 0x20, 0x20, 0x35, 0x20, 0x20, 0x20, 0x36, 0x20, 0x20, 0x20, 0x37, 0x0d, 0x0a, + 0x09, 0x2f, 0x2f, 0x20, 0x30, 0x30, 0x30, 0x20, 0x30, 0x30, 0x31, 0x20, 0x30, 0x31, 0x30, 0x20, 0x30, 0x31, 0x31, 0x20, 0x31, 0x30, 0x30, 0x20, 0x31, 0x30, 0x31, 0x20, 0x31, 0x31, 0x30, 0x20, + 0x31, 0x31, 0x31, 0x0d, 0x0a, 0x09, 0x2f, 0x2f, 0x20, 0x30, 0x20, 0x20, 0x20, 0x31, 0x20, 0x20, 0x20, 0x32, 0x20, 0x20, 0x20, 0x33, 0x20, 0x20, 0x20, 0x2d, 0x34, 0x20, 0x20, 0x2d, 0x33, 0x20, + 0x20, 0x2d, 0x32, 0x20, 0x20, 0x2d, 0x31, 0x0d, 0x0a, 0x7d, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x23, 0x64, 0x65, 0x66, 0x69, 0x6e, 0x65, 0x20, 0x42, 0x41, 0x53, 0x49, 0x53, 0x55, 0x5f, 0x45, 0x54, + 0x43, 0x31, 0x5f, 0x43, 0x4c, 0x55, 0x53, 0x54, 0x45, 0x52, 0x5f, 0x46, 0x49, 0x54, 0x5f, 0x4f, 0x52, 0x44, 0x45, 0x52, 0x5f, 0x54, 0x41, 0x42, 0x4c, 0x45, 0x5f, 0x53, 0x49, 0x5a, 0x45, 0x20, + 0x28, 0x31, 0x36, 0x35, 0x29, 0x0d, 0x0a, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x20, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x20, 0x7b, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x38, 0x5f, 0x74, + 0x20, 0x6d, 0x5f, 0x76, 0x5b, 0x34, 0x5d, 0x3b, 0x20, 0x7d, 0x20, 0x67, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x66, 0x69, 0x74, 0x5f, 0x6f, 0x72, 0x64, 0x65, 0x72, 0x5f, 0x74, + 0x61, 0x62, 0x5b, 0x42, 0x41, 0x53, 0x49, 0x53, 0x55, 0x5f, 0x45, 0x54, 0x43, 0x31, 0x5f, 0x43, 0x4c, 0x55, 0x53, 0x54, 0x45, 0x52, 0x5f, 0x46, 0x49, 0x54, 0x5f, 0x4f, 0x52, 0x44, 0x45, 0x52, + 0x5f, 0x54, 0x41, 0x42, 0x4c, 0x45, 0x5f, 0x53, 0x49, 0x5a, 0x45, 0x5d, 0x20, 0x3d, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x30, 0x2c, + 0x20, 0x38, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x35, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, + 0x20, 0x36, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x37, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, + 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x37, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x30, 0x2c, 0x20, + 0x38, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x35, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, + 0x30, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x37, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x34, 0x20, 0x7d, 0x20, + 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x36, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x30, + 0x2c, 0x20, 0x37, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x37, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, + 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x35, 0x2c, 0x20, 0x33, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x36, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x31, 0x20, + 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x36, 0x2c, 0x20, 0x32, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, + 0x20, 0x32, 0x2c, 0x20, 0x36, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x32, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, + 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x35, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x33, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x31, 0x2c, 0x20, + 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x34, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x32, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, + 0x31, 0x2c, 0x20, 0x35, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x35, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, + 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x36, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x32, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x31, + 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x35, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x32, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, + 0x7b, 0x20, 0x36, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x33, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x32, 0x20, + 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x36, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x32, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x38, 0x2c, + 0x20, 0x30, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x36, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, + 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x36, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x36, 0x2c, 0x20, 0x31, 0x2c, 0x20, + 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x34, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, + 0x32, 0x2c, 0x20, 0x35, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x35, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, + 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, 0x35, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x35, + 0x2c, 0x20, 0x32, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x32, + 0x2c, 0x20, 0x35, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x37, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, + 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x33, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x36, 0x2c, 0x20, 0x30, 0x2c, + 0x20, 0x32, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, + 0x20, 0x32, 0x2c, 0x20, 0x36, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x32, 0x20, 0x7d, + 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x35, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x32, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, + 0x36, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x32, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x33, 0x2c, 0x20, 0x35, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, + 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x33, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x30, + 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, 0x34, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x31, + 0x2c, 0x20, 0x35, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x32, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x32, 0x20, 0x7d, 0x20, 0x7d, + 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x32, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x32, 0x2c, + 0x20, 0x32, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, 0x32, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, + 0x20, 0x34, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x31, 0x20, 0x7d, + 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x35, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x33, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x31, 0x2c, 0x20, + 0x33, 0x2c, 0x20, 0x34, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x35, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, + 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x32, + 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x35, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x33, + 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x35, 0x20, 0x7d, 0x20, 0x7d, + 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x35, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x32, 0x2c, + 0x20, 0x33, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x32, 0x2c, 0x20, 0x35, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x33, 0x2c, + 0x20, 0x32, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, 0x32, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x33, 0x20, 0x7d, + 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x32, 0x2c, 0x20, 0x32, 0x2c, 0x20, + 0x31, 0x2c, 0x20, 0x33, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x36, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, + 0x31, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x36, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, 0x33, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x30, + 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x37, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x33, 0x2c, 0x20, 0x31, + 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x33, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, + 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x33, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x30, 0x2c, + 0x20, 0x34, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x37, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x32, 0x2c, + 0x20, 0x30, 0x2c, 0x20, 0x35, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x32, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x32, 0x20, 0x7d, 0x20, 0x7d, 0x2c, + 0x7b, 0x20, 0x7b, 0x20, 0x33, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x33, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, 0x32, 0x2c, 0x20, 0x32, 0x2c, 0x20, + 0x34, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x32, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, + 0x34, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x33, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, + 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x32, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x33, + 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x37, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, + 0x7b, 0x20, 0x33, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x35, 0x2c, 0x20, 0x32, 0x20, + 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x38, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, 0x33, 0x2c, + 0x20, 0x30, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x34, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x34, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x32, 0x20, 0x7d, 0x20, 0x7d, 0x2c, + 0x7b, 0x20, 0x7b, 0x20, 0x34, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x32, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x35, 0x2c, 0x20, + 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x34, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, + 0x33, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x32, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x33, 0x20, 0x7d, 0x20, + 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x35, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x32, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x35, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x30, + 0x2c, 0x20, 0x33, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x32, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x32, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, + 0x7b, 0x20, 0x32, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x34, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x33, 0x20, + 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x32, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x35, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x34, 0x2c, 0x20, 0x32, 0x2c, + 0x20, 0x32, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x34, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, + 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x33, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x30, 0x2c, 0x20, + 0x33, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x33, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x32, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x34, 0x2c, 0x20, + 0x33, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x36, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, + 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x33, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x32, + 0x2c, 0x20, 0x34, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x32, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x34, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x35, + 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x33, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x35, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, + 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, 0x32, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x32, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x33, 0x2c, 0x20, 0x30, 0x2c, + 0x20, 0x30, 0x2c, 0x20, 0x35, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x34, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, + 0x20, 0x35, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x32, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x32, 0x20, 0x7d, + 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, 0x32, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x36, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x33, 0x2c, 0x20, + 0x31, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x35, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, + 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x34, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x36, 0x2c, 0x20, 0x30, + 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, 0x34, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x34, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x32, + 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x35, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x35, 0x20, 0x7d, 0x20, 0x7d, + 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x34, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x34, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x32, 0x2c, + 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x32, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, + 0x20, 0x33, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x34, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x35, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x31, 0x20, 0x7d, + 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x33, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x33, 0x2c, 0x20, 0x32, 0x2c, 0x20, + 0x31, 0x2c, 0x20, 0x32, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, 0x32, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x32, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, + 0x20, 0x7b, 0x20, 0x36, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x34, + 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x32, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x33, 0x2c, 0x20, 0x32, + 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x33, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x35, 0x20, 0x7d, 0x20, 0x7d, + 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x37, 0x2c, 0x20, 0x30, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x33, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x32, 0x2c, + 0x20, 0x32, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x35, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x32, 0x2c, + 0x20, 0x30, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x36, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, 0x32, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x34, 0x20, 0x7d, + 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x32, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x34, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x31, 0x2c, 0x20, + 0x33, 0x2c, 0x20, 0x33, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x37, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, + 0x31, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x37, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, 0x32, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x33, + 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x34, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x33, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x33, 0x2c, 0x20, 0x31, + 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x33, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x34, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, + 0x7b, 0x20, 0x32, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x35, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x31, 0x2c, + 0x20, 0x36, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x35, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x30, 0x2c, + 0x20, 0x32, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x36, 0x20, 0x7d, 0x20, 0x7d, 0x2c, 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x35, 0x20, 0x7d, 0x20, 0x7d, 0x2c, + 0x7b, 0x20, 0x7b, 0x20, 0x31, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x36, 0x20, 0x7d, 0x20, 0x7d, 0x0d, 0x0a, 0x7d, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x61, + 0x6e, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x67, 0x5f, 0x65, 0x74, 0x63, 0x31, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x73, 0x5b, 0x63, 0x45, 0x54, 0x43, 0x31, + 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x4d, 0x6f, 0x64, 0x69, 0x66, 0x69, 0x65, 0x72, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x5d, 0x5b, 0x63, 0x45, 0x54, 0x43, 0x31, 0x53, 0x65, 0x6c, 0x65, 0x63, 0x74, + 0x6f, 0x72, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x5d, 0x20, 0x3d, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x2d, 0x38, 0x2c, 0x20, 0x20, 0x2d, 0x32, 0x2c, 0x20, 0x20, 0x20, 0x32, 0x2c, + 0x20, 0x20, 0x20, 0x38, 0x20, 0x7d, 0x2c, 0x20, 0x7b, 0x20, 0x2d, 0x31, 0x37, 0x2c, 0x20, 0x20, 0x2d, 0x35, 0x2c, 0x20, 0x20, 0x35, 0x2c, 0x20, 0x20, 0x31, 0x37, 0x20, 0x7d, 0x2c, 0x20, 0x7b, + 0x20, 0x2d, 0x32, 0x39, 0x2c, 0x20, 0x20, 0x2d, 0x39, 0x2c, 0x20, 0x20, 0x20, 0x39, 0x2c, 0x20, 0x20, 0x32, 0x39, 0x20, 0x7d, 0x2c, 0x20, 0x7b, 0x20, 0x20, 0x2d, 0x34, 0x32, 0x2c, 0x20, 0x2d, + 0x31, 0x33, 0x2c, 0x20, 0x31, 0x33, 0x2c, 0x20, 0x20, 0x34, 0x32, 0x20, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x2d, 0x36, 0x30, 0x2c, 0x20, 0x2d, 0x31, 0x38, 0x2c, 0x20, 0x31, 0x38, 0x2c, + 0x20, 0x20, 0x36, 0x30, 0x20, 0x7d, 0x2c, 0x20, 0x7b, 0x20, 0x2d, 0x38, 0x30, 0x2c, 0x20, 0x2d, 0x32, 0x34, 0x2c, 0x20, 0x32, 0x34, 0x2c, 0x20, 0x20, 0x38, 0x30, 0x20, 0x7d, 0x2c, 0x20, 0x7b, + 0x20, 0x2d, 0x31, 0x30, 0x36, 0x2c, 0x20, 0x2d, 0x33, 0x33, 0x2c, 0x20, 0x33, 0x33, 0x2c, 0x20, 0x31, 0x30, 0x36, 0x20, 0x7d, 0x2c, 0x20, 0x7b, 0x20, 0x2d, 0x31, 0x38, 0x33, 0x2c, 0x20, 0x2d, + 0x34, 0x37, 0x2c, 0x20, 0x34, 0x37, 0x2c, 0x20, 0x31, 0x38, 0x33, 0x20, 0x7d, 0x0d, 0x0a, 0x7d, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x20, 0x75, 0x69, + 0x6e, 0x74, 0x38, 0x5f, 0x74, 0x20, 0x67, 0x5f, 0x65, 0x74, 0x63, 0x31, 0x5f, 0x74, 0x6f, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5b, 0x63, + 0x45, 0x54, 0x43, 0x31, 0x53, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x5d, 0x20, 0x3d, 0x20, 0x7b, 0x20, 0x32, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x31, 0x2c, + 0x20, 0x30, 0x20, 0x7d, 0x3b, 0x0d, 0x0a, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x38, 0x5f, 0x74, 0x20, 0x67, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, + 0x6f, 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5f, 0x74, 0x6f, 0x5f, 0x65, 0x74, 0x63, 0x31, 0x5b, 0x63, 0x45, 0x54, 0x43, 0x31, 0x53, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x56, 0x61, + 0x6c, 0x75, 0x65, 0x73, 0x5d, 0x20, 0x3d, 0x20, 0x7b, 0x20, 0x33, 0x2c, 0x20, 0x32, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x31, 0x20, 0x7d, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x75, 0x69, 0x6e, 0x74, 0x33, + 0x32, 0x5f, 0x74, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x73, 0x28, 0x63, 0x6f, 0x6e, 0x73, + 0x74, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x2c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x6f, 0x66, 0x73, 0x2c, 0x20, 0x75, 0x69, + 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x6e, 0x75, 0x6d, 0x29, 0x20, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x61, 0x73, 0x73, 0x65, 0x72, 0x74, 0x28, 0x28, 0x6f, 0x66, 0x73, 0x20, 0x2b, 0x20, + 0x6e, 0x75, 0x6d, 0x29, 0x20, 0x3c, 0x3d, 0x20, 0x36, 0x34, 0x55, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x61, 0x73, 0x73, 0x65, 0x72, 0x74, 0x28, 0x6e, 0x75, 0x6d, 0x20, 0x26, 0x26, 0x20, 0x28, 0x6e, + 0x75, 0x6d, 0x20, 0x3c, 0x3d, 0x20, 0x38, 0x55, 0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x61, 0x73, 0x73, 0x65, 0x72, 0x74, 0x28, 0x28, 0x6f, 0x66, 0x73, 0x20, 0x3e, 0x3e, 0x20, 0x33, 0x29, 0x20, + 0x3d, 0x3d, 0x20, 0x28, 0x28, 0x6f, 0x66, 0x73, 0x20, 0x2b, 0x20, 0x6e, 0x75, 0x6d, 0x20, 0x2d, 0x20, 0x31, 0x29, 0x20, 0x3e, 0x3e, 0x20, 0x33, 0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, + 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x6f, 0x66, 0x73, 0x20, 0x3d, 0x20, 0x37, 0x20, 0x2d, 0x20, 0x28, 0x6f, 0x66, 0x73, + 0x20, 0x3e, 0x3e, 0x20, 0x33, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, + 0x74, 0x5f, 0x6f, 0x66, 0x73, 0x20, 0x3d, 0x20, 0x6f, 0x66, 0x73, 0x20, 0x26, 0x20, 0x37, 0x3b, 0x0d, 0x0a, 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x28, 0x70, 0x2d, 0x3e, 0x6d, 0x5f, + 0x62, 0x79, 0x74, 0x65, 0x73, 0x5b, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x6f, 0x66, 0x73, 0x5d, 0x20, 0x3e, 0x3e, 0x20, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x5f, 0x6f, 0x66, 0x73, 0x29, + 0x20, 0x26, 0x20, 0x28, 0x28, 0x31, 0x20, 0x3c, 0x3c, 0x20, 0x6e, 0x75, 0x6d, 0x29, 0x20, 0x2d, 0x20, 0x31, 0x29, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x76, 0x6f, 0x69, 0x64, 0x20, + 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x73, 0x28, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, + 0x6b, 0x20, 0x2a, 0x70, 0x2c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x6f, 0x66, 0x73, 0x2c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x6e, 0x75, 0x6d, + 0x2c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x69, 0x74, 0x73, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x61, 0x73, 0x73, 0x65, 0x72, 0x74, 0x28, 0x28, 0x6f, 0x66, + 0x73, 0x20, 0x2b, 0x20, 0x6e, 0x75, 0x6d, 0x29, 0x20, 0x3c, 0x3d, 0x20, 0x36, 0x34, 0x55, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x61, 0x73, 0x73, 0x65, 0x72, 0x74, 0x28, 0x6e, 0x75, 0x6d, 0x20, 0x26, + 0x26, 0x20, 0x28, 0x6e, 0x75, 0x6d, 0x20, 0x3c, 0x20, 0x33, 0x32, 0x55, 0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x61, 0x73, 0x73, 0x65, 0x72, 0x74, 0x28, 0x28, 0x6f, 0x66, 0x73, 0x20, 0x3e, 0x3e, + 0x20, 0x33, 0x29, 0x20, 0x3d, 0x3d, 0x20, 0x28, 0x28, 0x6f, 0x66, 0x73, 0x20, 0x2b, 0x20, 0x6e, 0x75, 0x6d, 0x20, 0x2d, 0x20, 0x31, 0x29, 0x20, 0x3e, 0x3e, 0x20, 0x33, 0x29, 0x29, 0x3b, 0x0d, + 0x0a, 0x09, 0x61, 0x73, 0x73, 0x65, 0x72, 0x74, 0x28, 0x62, 0x69, 0x74, 0x73, 0x20, 0x3c, 0x20, 0x28, 0x31, 0x55, 0x20, 0x3c, 0x3c, 0x20, 0x6e, 0x75, 0x6d, 0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x09, + 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x6f, 0x66, 0x73, 0x20, 0x3d, 0x20, 0x37, 0x20, 0x2d, 0x20, 0x28, 0x6f, + 0x66, 0x73, 0x20, 0x3e, 0x3e, 0x20, 0x33, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x79, 0x74, 0x65, 0x5f, + 0x62, 0x69, 0x74, 0x5f, 0x6f, 0x66, 0x73, 0x20, 0x3d, 0x20, 0x6f, 0x66, 0x73, 0x20, 0x26, 0x20, 0x37, 0x3b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, + 0x32, 0x5f, 0x74, 0x20, 0x6d, 0x61, 0x73, 0x6b, 0x20, 0x3d, 0x20, 0x28, 0x31, 0x20, 0x3c, 0x3c, 0x20, 0x6e, 0x75, 0x6d, 0x29, 0x20, 0x2d, 0x20, 0x31, 0x3b, 0x0d, 0x0a, 0x09, 0x70, 0x2d, 0x3e, + 0x6d, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x5b, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x6f, 0x66, 0x73, 0x5d, 0x20, 0x26, 0x3d, 0x20, 0x7e, 0x28, 0x6d, 0x61, 0x73, 0x6b, 0x20, 0x3c, 0x3c, 0x20, 0x62, + 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x5f, 0x6f, 0x66, 0x73, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x70, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x5b, 0x62, 0x79, 0x74, 0x65, 0x5f, + 0x6f, 0x66, 0x73, 0x5d, 0x20, 0x7c, 0x3d, 0x20, 0x28, 0x62, 0x69, 0x74, 0x73, 0x20, 0x3c, 0x3c, 0x20, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x5f, 0x6f, 0x66, 0x73, 0x29, 0x3b, 0x0d, + 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x62, 0x6f, 0x6f, 0x6c, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x66, 0x6c, 0x69, 0x70, 0x5f, 0x62, 0x69, + 0x74, 0x28, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x29, 0x20, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x72, 0x65, 0x74, 0x75, + 0x72, 0x6e, 0x20, 0x28, 0x70, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x5b, 0x33, 0x5d, 0x20, 0x26, 0x20, 0x31, 0x29, 0x20, 0x21, 0x3d, 0x20, 0x30, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, + 0x0a, 0x0d, 0x0a, 0x76, 0x6f, 0x69, 0x64, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x66, 0x6c, 0x69, 0x70, 0x5f, 0x62, 0x69, 0x74, 0x28, 0x65, + 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x2c, 0x20, 0x62, 0x6f, 0x6f, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x70, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x70, 0x2d, 0x3e, + 0x6d, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x5b, 0x33, 0x5d, 0x20, 0x26, 0x3d, 0x20, 0x7e, 0x31, 0x3b, 0x0d, 0x0a, 0x09, 0x70, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x5b, 0x33, + 0x5d, 0x20, 0x7c, 0x3d, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x38, 0x5f, 0x74, 0x29, 0x28, 0x66, 0x6c, 0x69, 0x70, 0x29, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x62, 0x6f, 0x6f, 0x6c, + 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x64, 0x69, 0x66, 0x66, 0x5f, 0x62, 0x69, 0x74, 0x28, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x65, 0x74, + 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x29, 0x20, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x28, 0x70, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, + 0x79, 0x74, 0x65, 0x73, 0x5b, 0x33, 0x5d, 0x20, 0x26, 0x20, 0x32, 0x29, 0x20, 0x21, 0x3d, 0x20, 0x30, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x76, 0x6f, 0x69, 0x64, 0x20, 0x65, 0x74, + 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x64, 0x69, 0x66, 0x66, 0x5f, 0x62, 0x69, 0x74, 0x28, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, + 0x70, 0x2c, 0x20, 0x62, 0x6f, 0x6f, 0x6c, 0x20, 0x64, 0x69, 0x66, 0x66, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x70, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x5b, 0x33, 0x5d, + 0x20, 0x26, 0x3d, 0x20, 0x7e, 0x32, 0x3b, 0x0d, 0x0a, 0x09, 0x70, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x5b, 0x33, 0x5d, 0x20, 0x7c, 0x3d, 0x20, 0x28, 0x28, 0x75, 0x69, 0x6e, + 0x74, 0x33, 0x32, 0x5f, 0x74, 0x29, 0x28, 0x64, 0x69, 0x66, 0x66, 0x29, 0x20, 0x3c, 0x3c, 0x20, 0x31, 0x29, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x2f, 0x2f, 0x20, 0x52, 0x65, 0x74, + 0x75, 0x72, 0x6e, 0x73, 0x20, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x74, 0x79, 0x20, 0x6d, 0x6f, 0x64, 0x69, 0x66, 0x69, 0x65, 0x72, 0x20, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x20, 0x28, 0x30, + 0x2d, 0x37, 0x29, 0x20, 0x75, 0x73, 0x65, 0x64, 0x20, 0x62, 0x79, 0x20, 0x73, 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x73, 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x64, + 0x2e, 0x0d, 0x0a, 0x2f, 0x2f, 0x20, 0x73, 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x64, 0x3d, 0x30, 0x20, 0x6c, 0x65, 0x66, 0x74, 0x2f, 0x74, 0x6f, 0x70, 0x20, 0x28, 0x43, 0x57, + 0x20, 0x31, 0x29, 0x2c, 0x20, 0x31, 0x3d, 0x72, 0x69, 0x67, 0x68, 0x74, 0x2f, 0x62, 0x6f, 0x74, 0x74, 0x6f, 0x6d, 0x20, 0x28, 0x43, 0x57, 0x20, 0x32, 0x29, 0x0d, 0x0a, 0x75, 0x69, 0x6e, 0x74, + 0x33, 0x32, 0x5f, 0x74, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x28, 0x63, + 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x2c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x73, 0x75, 0x62, 0x62, + 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x64, 0x29, 0x20, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x61, 0x73, 0x73, 0x65, 0x72, 0x74, 0x28, 0x73, 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, + 0x64, 0x20, 0x3c, 0x20, 0x32, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x6f, 0x66, 0x73, 0x20, 0x3d, 0x20, 0x73, + 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x64, 0x20, 0x3f, 0x20, 0x32, 0x20, 0x3a, 0x20, 0x35, 0x3b, 0x0d, 0x0a, 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x28, 0x70, 0x2d, + 0x3e, 0x6d, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x5b, 0x33, 0x5d, 0x20, 0x3e, 0x3e, 0x20, 0x6f, 0x66, 0x73, 0x29, 0x20, 0x26, 0x20, 0x37, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x2f, + 0x2f, 0x20, 0x53, 0x65, 0x74, 0x73, 0x20, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x74, 0x79, 0x20, 0x6d, 0x6f, 0x64, 0x69, 0x66, 0x69, 0x65, 0x72, 0x20, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x20, + 0x28, 0x30, 0x2d, 0x37, 0x29, 0x20, 0x75, 0x73, 0x65, 0x64, 0x20, 0x62, 0x79, 0x20, 0x73, 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x73, 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, + 0x69, 0x64, 0x20, 0x28, 0x30, 0x20, 0x6f, 0x72, 0x20, 0x31, 0x29, 0x0d, 0x0a, 0x76, 0x6f, 0x69, 0x64, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, + 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x28, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x2c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, + 0x5f, 0x74, 0x20, 0x73, 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x64, 0x2c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x74, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, + 0x09, 0x61, 0x73, 0x73, 0x65, 0x72, 0x74, 0x28, 0x73, 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x64, 0x20, 0x3c, 0x20, 0x32, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x61, 0x73, 0x73, 0x65, + 0x72, 0x74, 0x28, 0x74, 0x20, 0x3c, 0x20, 0x38, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x6f, 0x66, 0x73, 0x20, + 0x3d, 0x20, 0x73, 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x64, 0x20, 0x3f, 0x20, 0x32, 0x20, 0x3a, 0x20, 0x35, 0x3b, 0x0d, 0x0a, 0x09, 0x70, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x79, + 0x74, 0x65, 0x73, 0x5b, 0x33, 0x5d, 0x20, 0x26, 0x3d, 0x20, 0x7e, 0x28, 0x37, 0x20, 0x3c, 0x3c, 0x20, 0x6f, 0x66, 0x73, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x70, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x79, + 0x74, 0x65, 0x73, 0x5b, 0x33, 0x5d, 0x20, 0x7c, 0x3d, 0x20, 0x28, 0x74, 0x20, 0x3c, 0x3c, 0x20, 0x6f, 0x66, 0x73, 0x29, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x76, 0x6f, 0x69, 0x64, + 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x73, 0x5f, 0x65, 0x74, 0x63, 0x31, + 0x73, 0x28, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x2c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x74, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, + 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x28, 0x70, 0x2c, 0x20, 0x30, 0x2c, + 0x20, 0x74, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, + 0x28, 0x70, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x74, 0x29, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, + 0x6f, 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x72, 0x61, 0x77, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x28, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, + 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x78, 0x2c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, + 0x5f, 0x74, 0x20, 0x79, 0x29, 0x20, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x61, 0x73, 0x73, 0x65, 0x72, 0x74, 0x28, 0x28, 0x78, 0x20, 0x7c, 0x20, 0x79, 0x29, 0x20, 0x3c, 0x20, 0x34, 0x29, 0x3b, + 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x69, 0x74, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x3d, 0x20, + 0x78, 0x20, 0x2a, 0x20, 0x34, 0x20, 0x2b, 0x20, 0x79, 0x3b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x79, 0x74, 0x65, + 0x5f, 0x62, 0x69, 0x74, 0x5f, 0x6f, 0x66, 0x73, 0x20, 0x3d, 0x20, 0x62, 0x69, 0x74, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x26, 0x20, 0x37, 0x3b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, + 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x38, 0x5f, 0x74, 0x20, 0x2a, 0x70, 0x20, 0x3d, 0x20, 0x26, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x5b, + 0x37, 0x20, 0x2d, 0x20, 0x28, 0x62, 0x69, 0x74, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x3e, 0x3e, 0x20, 0x33, 0x29, 0x5d, 0x3b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, + 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x6c, 0x73, 0x62, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x5b, 0x30, 0x5d, 0x20, 0x3e, 0x3e, 0x20, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x5f, + 0x6f, 0x66, 0x73, 0x29, 0x20, 0x26, 0x20, 0x31, 0x3b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x6d, 0x73, 0x62, 0x20, 0x3d, + 0x20, 0x28, 0x70, 0x5b, 0x2d, 0x32, 0x5d, 0x20, 0x3e, 0x3e, 0x20, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x5f, 0x6f, 0x66, 0x73, 0x29, 0x20, 0x26, 0x20, 0x31, 0x3b, 0x0d, 0x0a, 0x09, + 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x76, 0x61, 0x6c, 0x20, 0x3d, 0x20, 0x6c, 0x73, 0x62, 0x20, 0x7c, 0x20, 0x28, 0x6d, 0x73, 0x62, 0x20, + 0x3c, 0x3c, 0x20, 0x31, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x76, 0x61, 0x6c, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x2f, 0x2f, 0x20, + 0x52, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x65, 0x64, 0x20, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x72, 0x61, 0x6e, 0x67, 0x65, 0x73, 0x20, 0x66, + 0x72, 0x6f, 0x6d, 0x20, 0x30, 0x2d, 0x33, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x64, 0x69, 0x72, 0x65, 0x63, 0x74, 0x20, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x69, 0x6e, + 0x74, 0x6f, 0x20, 0x67, 0x5f, 0x65, 0x74, 0x63, 0x31, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x73, 0x2e, 0x0d, 0x0a, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, + 0x74, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x28, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x65, + 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x78, 0x2c, 0x20, 0x75, 0x69, + 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x79, 0x29, 0x20, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x67, 0x5f, 0x65, 0x74, 0x63, 0x31, 0x5f, 0x74, 0x6f, + 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5b, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x72, 0x61, + 0x77, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x78, 0x2c, 0x20, 0x79, 0x29, 0x5d, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, + 0x0a, 0x2f, 0x2f, 0x20, 0x53, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x20, 0x22, 0x76, 0x61, 0x6c, 0x22, 0x20, 0x72, 0x61, 0x6e, 0x67, 0x65, 0x73, 0x20, 0x66, 0x72, 0x6f, 0x6d, 0x20, 0x30, + 0x2d, 0x33, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x64, 0x69, 0x72, 0x65, 0x63, 0x74, 0x20, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x69, 0x6e, 0x74, 0x6f, 0x20, 0x67, 0x5f, + 0x65, 0x74, 0x63, 0x31, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x73, 0x2e, 0x0d, 0x0a, 0x76, 0x6f, 0x69, 0x64, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, + 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x28, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x42, 0x6c, 0x6f, 0x63, + 0x6b, 0x2c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x78, 0x2c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x79, 0x2c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, + 0x32, 0x5f, 0x74, 0x20, 0x76, 0x61, 0x6c, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x61, 0x73, 0x73, 0x65, 0x72, 0x74, 0x28, 0x28, 0x78, 0x20, 0x7c, 0x20, 0x79, 0x20, 0x7c, 0x20, 0x76, 0x61, + 0x6c, 0x29, 0x20, 0x3c, 0x20, 0x34, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x69, 0x74, 0x5f, 0x69, 0x6e, + 0x64, 0x65, 0x78, 0x20, 0x3d, 0x20, 0x78, 0x20, 0x2a, 0x20, 0x34, 0x20, 0x2b, 0x20, 0x79, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x38, 0x5f, 0x74, 0x20, 0x2a, 0x70, 0x20, + 0x3d, 0x20, 0x26, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x5b, 0x37, 0x20, 0x2d, 0x20, 0x28, 0x62, 0x69, 0x74, 0x5f, 0x69, 0x6e, 0x64, 0x65, + 0x78, 0x20, 0x3e, 0x3e, 0x20, 0x33, 0x29, 0x5d, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x79, 0x74, + 0x65, 0x5f, 0x62, 0x69, 0x74, 0x5f, 0x6f, 0x66, 0x73, 0x20, 0x3d, 0x20, 0x62, 0x69, 0x74, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x26, 0x20, 0x37, 0x3b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, + 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x6d, 0x61, 0x73, 0x6b, 0x20, 0x3d, 0x20, 0x31, 0x20, 0x3c, 0x3c, 0x20, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, + 0x5f, 0x6f, 0x66, 0x73, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x65, 0x74, 0x63, 0x31, 0x5f, 0x76, 0x61, + 0x6c, 0x20, 0x3d, 0x20, 0x67, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5f, 0x74, 0x6f, 0x5f, 0x65, 0x74, 0x63, 0x31, 0x5b, 0x76, 0x61, 0x6c, + 0x5d, 0x3b, 0x0d, 0x0a, 0x09, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x6c, 0x73, 0x62, 0x20, 0x3d, 0x20, 0x65, 0x74, 0x63, + 0x31, 0x5f, 0x76, 0x61, 0x6c, 0x20, 0x26, 0x20, 0x31, 0x3b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x6d, 0x73, 0x62, 0x20, + 0x3d, 0x20, 0x65, 0x74, 0x63, 0x31, 0x5f, 0x76, 0x61, 0x6c, 0x20, 0x3e, 0x3e, 0x20, 0x31, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x70, 0x5b, 0x30, 0x5d, 0x20, 0x26, 0x3d, 0x20, 0x7e, 0x6d, 0x61, + 0x73, 0x6b, 0x3b, 0x0d, 0x0a, 0x09, 0x70, 0x5b, 0x30, 0x5d, 0x20, 0x7c, 0x3d, 0x20, 0x28, 0x6c, 0x73, 0x62, 0x20, 0x3c, 0x3c, 0x20, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x5f, 0x6f, + 0x66, 0x73, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x70, 0x5b, 0x2d, 0x32, 0x5d, 0x20, 0x26, 0x3d, 0x20, 0x7e, 0x6d, 0x61, 0x73, 0x6b, 0x3b, 0x0d, 0x0a, 0x09, 0x70, 0x5b, 0x2d, 0x32, 0x5d, + 0x20, 0x7c, 0x3d, 0x20, 0x28, 0x6d, 0x73, 0x62, 0x20, 0x3c, 0x3c, 0x20, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x5f, 0x6f, 0x66, 0x73, 0x29, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, + 0x0a, 0x76, 0x6f, 0x69, 0x64, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x62, 0x61, 0x73, 0x65, 0x34, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x28, + 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x69, 0x64, 0x78, 0x2c, + 0x20, 0x75, 0x69, 0x6e, 0x74, 0x31, 0x36, 0x5f, 0x74, 0x20, 0x63, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x69, 0x66, 0x20, 0x28, 0x69, 0x64, 0x78, 0x29, 0x0d, 0x0a, 0x09, 0x7b, 0x0d, 0x0a, + 0x09, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x73, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, + 0x2c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x41, 0x62, 0x73, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x34, 0x52, 0x32, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x2c, 0x20, 0x34, 0x2c, 0x20, + 0x28, 0x63, 0x20, 0x3e, 0x3e, 0x20, 0x38, 0x29, 0x20, 0x26, 0x20, 0x31, 0x35, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, + 0x5f, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x73, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x41, 0x62, 0x73, 0x43, 0x6f, 0x6c, 0x6f, 0x72, + 0x34, 0x47, 0x32, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x28, 0x63, 0x20, 0x3e, 0x3e, 0x20, 0x34, 0x29, 0x20, 0x26, 0x20, 0x31, 0x35, 0x29, 0x3b, + 0x0d, 0x0a, 0x09, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x73, 0x28, 0x70, 0x42, 0x6c, 0x6f, + 0x63, 0x6b, 0x2c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x41, 0x62, 0x73, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x34, 0x42, 0x32, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x2c, 0x20, 0x34, + 0x2c, 0x20, 0x63, 0x20, 0x26, 0x20, 0x31, 0x35, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, 0x09, 0x65, 0x6c, 0x73, 0x65, 0x0d, 0x0a, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x65, 0x74, 0x63, + 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x73, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x63, 0x45, 0x54, + 0x43, 0x31, 0x41, 0x62, 0x73, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x34, 0x52, 0x31, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x28, 0x63, 0x20, 0x3e, 0x3e, + 0x20, 0x38, 0x29, 0x20, 0x26, 0x20, 0x31, 0x35, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x62, 0x79, 0x74, 0x65, + 0x5f, 0x62, 0x69, 0x74, 0x73, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x41, 0x62, 0x73, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x34, 0x47, 0x31, 0x42, 0x69, + 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x28, 0x63, 0x20, 0x3e, 0x3e, 0x20, 0x34, 0x29, 0x20, 0x26, 0x20, 0x31, 0x35, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x65, + 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x73, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x63, + 0x45, 0x54, 0x43, 0x31, 0x41, 0x62, 0x73, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x34, 0x42, 0x31, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x2c, 0x20, 0x34, 0x2c, 0x20, 0x63, 0x20, 0x26, + 0x20, 0x31, 0x35, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x75, 0x69, 0x6e, 0x74, 0x31, 0x36, 0x5f, 0x74, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, + 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x62, 0x61, 0x73, 0x65, 0x34, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x28, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, + 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x69, 0x64, 0x78, 0x29, 0x20, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, + 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x72, 0x2c, 0x20, 0x67, 0x2c, 0x20, 0x62, 0x3b, 0x0d, 0x0a, 0x09, 0x69, 0x66, 0x20, 0x28, 0x69, 0x64, 0x78, 0x29, 0x0d, 0x0a, 0x09, 0x7b, + 0x0d, 0x0a, 0x09, 0x09, 0x72, 0x20, 0x3d, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x73, 0x28, + 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x41, 0x62, 0x73, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x34, 0x52, 0x32, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, + 0x74, 0x2c, 0x20, 0x34, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x67, 0x20, 0x3d, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x62, 0x79, 0x74, 0x65, + 0x5f, 0x62, 0x69, 0x74, 0x73, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x41, 0x62, 0x73, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x34, 0x47, 0x32, 0x42, 0x69, + 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x2c, 0x20, 0x34, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x62, 0x20, 0x3d, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x67, 0x65, + 0x74, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x73, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x41, 0x62, 0x73, 0x43, 0x6f, 0x6c, 0x6f, + 0x72, 0x34, 0x42, 0x32, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x2c, 0x20, 0x34, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, 0x09, 0x65, 0x6c, 0x73, 0x65, 0x0d, 0x0a, 0x09, + 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x72, 0x20, 0x3d, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x73, + 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x41, 0x62, 0x73, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x34, 0x52, 0x31, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, + 0x65, 0x74, 0x2c, 0x20, 0x34, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x67, 0x20, 0x3d, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x62, 0x79, 0x74, + 0x65, 0x5f, 0x62, 0x69, 0x74, 0x73, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x41, 0x62, 0x73, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x34, 0x47, 0x31, 0x42, + 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x2c, 0x20, 0x34, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x62, 0x20, 0x3d, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x67, + 0x65, 0x74, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x73, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x41, 0x62, 0x73, 0x43, 0x6f, 0x6c, + 0x6f, 0x72, 0x34, 0x42, 0x31, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x2c, 0x20, 0x34, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, + 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x31, 0x36, 0x5f, 0x74, 0x29, 0x28, 0x62, 0x20, 0x7c, 0x20, 0x28, 0x67, 0x20, 0x3c, 0x3c, 0x20, 0x34, 0x55, 0x29, 0x20, 0x7c, 0x20, 0x28, 0x72, 0x20, 0x3c, + 0x3c, 0x20, 0x38, 0x55, 0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x76, 0x6f, 0x69, 0x64, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, + 0x5f, 0x62, 0x61, 0x73, 0x65, 0x35, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x28, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, + 0x75, 0x69, 0x6e, 0x74, 0x31, 0x36, 0x5f, 0x74, 0x20, 0x63, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x62, + 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x73, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x42, 0x61, 0x73, 0x65, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x35, + 0x52, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x2c, 0x20, 0x35, 0x2c, 0x20, 0x28, 0x63, 0x20, 0x3e, 0x3e, 0x20, 0x31, 0x30, 0x29, 0x20, 0x26, 0x20, 0x33, 0x31, 0x29, 0x3b, 0x0d, + 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x73, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, + 0x2c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x42, 0x61, 0x73, 0x65, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x47, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x2c, 0x20, 0x35, 0x2c, 0x20, + 0x28, 0x63, 0x20, 0x3e, 0x3e, 0x20, 0x35, 0x29, 0x20, 0x26, 0x20, 0x33, 0x31, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, + 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x73, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x42, 0x61, 0x73, 0x65, 0x43, 0x6f, 0x6c, 0x6f, 0x72, + 0x35, 0x42, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x2c, 0x20, 0x35, 0x2c, 0x20, 0x63, 0x20, 0x26, 0x20, 0x33, 0x31, 0x29, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x75, + 0x69, 0x6e, 0x74, 0x31, 0x36, 0x5f, 0x74, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x62, 0x61, 0x73, 0x65, 0x35, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, + 0x72, 0x28, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, + 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x72, 0x20, 0x3d, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, + 0x5f, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x73, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x42, 0x61, 0x73, 0x65, 0x43, 0x6f, 0x6c, 0x6f, + 0x72, 0x35, 0x52, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x2c, 0x20, 0x35, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, + 0x5f, 0x74, 0x20, 0x67, 0x20, 0x3d, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x73, 0x28, 0x70, + 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x42, 0x61, 0x73, 0x65, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x47, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, + 0x2c, 0x20, 0x35, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x20, 0x3d, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, + 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x73, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, + 0x42, 0x61, 0x73, 0x65, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x42, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x2c, 0x20, 0x35, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x72, 0x65, 0x74, 0x75, + 0x72, 0x6e, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x31, 0x36, 0x5f, 0x74, 0x29, 0x28, 0x62, 0x20, 0x7c, 0x20, 0x28, 0x67, 0x20, 0x3c, 0x3c, 0x20, 0x35, 0x55, 0x29, 0x20, 0x7c, 0x20, 0x28, 0x72, + 0x20, 0x3c, 0x3c, 0x20, 0x31, 0x30, 0x55, 0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x76, 0x6f, 0x69, 0x64, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, + 0x73, 0x65, 0x74, 0x5f, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x33, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x28, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x42, 0x6c, 0x6f, + 0x63, 0x6b, 0x2c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x31, 0x36, 0x5f, 0x74, 0x20, 0x63, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, + 0x65, 0x74, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x73, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x43, + 0x6f, 0x6c, 0x6f, 0x72, 0x33, 0x52, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x28, 0x63, 0x20, 0x3e, 0x3e, 0x20, 0x36, 0x29, 0x20, 0x26, 0x20, 0x37, + 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x73, 0x28, 0x70, 0x42, 0x6c, + 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x33, 0x47, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x2c, + 0x20, 0x33, 0x2c, 0x20, 0x28, 0x63, 0x20, 0x3e, 0x3e, 0x20, 0x33, 0x29, 0x20, 0x26, 0x20, 0x37, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, + 0x65, 0x74, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x73, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x43, + 0x6f, 0x6c, 0x6f, 0x72, 0x33, 0x42, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x2c, 0x20, 0x33, 0x2c, 0x20, 0x63, 0x20, 0x26, 0x20, 0x37, 0x29, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, + 0x0d, 0x0a, 0x75, 0x69, 0x6e, 0x74, 0x31, 0x36, 0x5f, 0x74, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x33, 0x5f, + 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x28, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x29, 0x20, 0x0d, + 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x72, 0x20, 0x3d, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, + 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x73, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x44, 0x65, 0x6c, + 0x74, 0x61, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x33, 0x52, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x2c, 0x20, 0x33, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, + 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x67, 0x20, 0x3d, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x5f, + 0x62, 0x69, 0x74, 0x73, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x33, 0x47, 0x42, 0x69, + 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x2c, 0x20, 0x33, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x20, + 0x3d, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x62, 0x69, 0x74, 0x73, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, + 0x2c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x33, 0x42, 0x42, 0x69, 0x74, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x2c, 0x20, 0x33, 0x29, + 0x3b, 0x0d, 0x0a, 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x31, 0x36, 0x5f, 0x74, 0x29, 0x28, 0x62, 0x20, 0x7c, 0x20, 0x28, 0x67, 0x20, 0x3c, 0x3c, 0x20, + 0x33, 0x55, 0x29, 0x20, 0x7c, 0x20, 0x28, 0x72, 0x20, 0x3c, 0x3c, 0x20, 0x36, 0x55, 0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x76, 0x6f, 0x69, 0x64, 0x20, 0x65, 0x74, 0x63, + 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x75, 0x6e, 0x70, 0x61, 0x63, 0x6b, 0x5f, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x33, 0x28, 0x69, 0x6e, 0x74, 0x20, 0x2a, 0x70, 0x52, 0x2c, 0x20, 0x69, 0x6e, + 0x74, 0x20, 0x2a, 0x70, 0x47, 0x2c, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x2a, 0x70, 0x42, 0x2c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x31, 0x36, 0x5f, 0x74, 0x20, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x5f, + 0x64, 0x65, 0x6c, 0x74, 0x61, 0x33, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x72, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x5f, 0x64, 0x65, 0x6c, + 0x74, 0x61, 0x33, 0x20, 0x3e, 0x3e, 0x20, 0x36, 0x29, 0x20, 0x26, 0x20, 0x37, 0x3b, 0x0d, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x67, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, + 0x5f, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x33, 0x20, 0x3e, 0x3e, 0x20, 0x33, 0x29, 0x20, 0x26, 0x20, 0x37, 0x3b, 0x0d, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x62, 0x20, 0x3d, 0x20, 0x70, 0x61, 0x63, + 0x6b, 0x65, 0x64, 0x5f, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x33, 0x20, 0x26, 0x20, 0x37, 0x3b, 0x0d, 0x0a, 0x09, 0x69, 0x66, 0x20, 0x28, 0x72, 0x20, 0x3e, 0x3d, 0x20, 0x34, 0x29, 0x20, 0x72, 0x20, + 0x2d, 0x3d, 0x20, 0x38, 0x3b, 0x0d, 0x0a, 0x09, 0x69, 0x66, 0x20, 0x28, 0x67, 0x20, 0x3e, 0x3d, 0x20, 0x34, 0x29, 0x20, 0x67, 0x20, 0x2d, 0x3d, 0x20, 0x38, 0x3b, 0x0d, 0x0a, 0x09, 0x69, 0x66, + 0x20, 0x28, 0x62, 0x20, 0x3e, 0x3d, 0x20, 0x34, 0x29, 0x20, 0x62, 0x20, 0x2d, 0x3d, 0x20, 0x38, 0x3b, 0x0d, 0x0a, 0x09, 0x2a, 0x70, 0x52, 0x20, 0x3d, 0x20, 0x72, 0x3b, 0x0d, 0x0a, 0x09, 0x2a, + 0x70, 0x47, 0x20, 0x3d, 0x20, 0x67, 0x3b, 0x0d, 0x0a, 0x09, 0x2a, 0x70, 0x42, 0x20, 0x3d, 0x20, 0x62, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x62, 0x6f, 0x6f, 0x6c, 0x20, 0x65, 0x74, + 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x75, 0x6e, 0x70, 0x61, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x5f, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x33, 0x28, 0x63, 0x6f, 0x6c, + 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x2a, 0x70, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x31, 0x36, 0x5f, 0x74, 0x20, 0x70, 0x61, 0x63, 0x6b, 0x65, + 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x2c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x31, 0x36, 0x5f, 0x74, 0x20, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x5f, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x33, + 0x2c, 0x20, 0x62, 0x6f, 0x6f, 0x6c, 0x20, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x2c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x29, 0x0d, 0x0a, + 0x7b, 0x0d, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x64, 0x72, 0x2c, 0x20, 0x64, 0x67, 0x2c, 0x20, 0x64, 0x62, 0x3b, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, + 0x75, 0x6e, 0x70, 0x61, 0x63, 0x6b, 0x5f, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x33, 0x28, 0x26, 0x64, 0x72, 0x2c, 0x20, 0x26, 0x64, 0x67, 0x2c, 0x20, 0x26, 0x64, 0x62, 0x2c, 0x20, 0x70, 0x61, 0x63, + 0x6b, 0x65, 0x64, 0x5f, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x33, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x62, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, + 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x20, 0x26, 0x20, 0x33, 0x31, 0x55, 0x29, 0x20, 0x2b, 0x20, 0x64, 0x62, 0x3b, 0x0d, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x67, 0x20, 0x3d, 0x20, 0x28, + 0x28, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x20, 0x3e, 0x3e, 0x20, 0x35, 0x55, 0x29, 0x20, 0x26, 0x20, 0x33, 0x31, 0x55, 0x29, 0x20, 0x2b, 0x20, 0x64, + 0x67, 0x3b, 0x0d, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x72, 0x20, 0x3d, 0x20, 0x28, 0x28, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x20, 0x3e, 0x3e, 0x20, + 0x31, 0x30, 0x55, 0x29, 0x20, 0x26, 0x20, 0x33, 0x31, 0x55, 0x29, 0x20, 0x2b, 0x20, 0x64, 0x72, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x62, 0x6f, 0x6f, 0x6c, 0x20, 0x73, 0x75, 0x63, 0x63, 0x65, + 0x73, 0x73, 0x20, 0x3d, 0x20, 0x74, 0x72, 0x75, 0x65, 0x3b, 0x0d, 0x0a, 0x09, 0x69, 0x66, 0x20, 0x28, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x29, 0x28, 0x72, 0x20, 0x7c, 0x20, + 0x67, 0x20, 0x7c, 0x20, 0x62, 0x29, 0x20, 0x3e, 0x20, 0x33, 0x31, 0x55, 0x29, 0x0d, 0x0a, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x20, 0x3d, 0x20, 0x66, + 0x61, 0x6c, 0x73, 0x65, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x72, 0x20, 0x3d, 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x28, 0x72, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x33, 0x31, 0x29, 0x3b, 0x0d, 0x0a, 0x09, + 0x09, 0x67, 0x20, 0x3d, 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x28, 0x67, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x33, 0x31, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x62, 0x20, 0x3d, 0x20, 0x63, 0x6c, 0x61, + 0x6d, 0x70, 0x28, 0x62, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x33, 0x31, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x69, 0x66, 0x20, 0x28, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, + 0x29, 0x0d, 0x0a, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x62, 0x20, 0x3d, 0x20, 0x28, 0x62, 0x20, 0x3c, 0x3c, 0x20, 0x33, 0x55, 0x29, 0x20, 0x7c, 0x20, 0x28, 0x62, 0x20, 0x3e, 0x3e, 0x20, 0x32, + 0x55, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x67, 0x20, 0x3d, 0x20, 0x28, 0x67, 0x20, 0x3c, 0x3c, 0x20, 0x33, 0x55, 0x29, 0x20, 0x7c, 0x20, 0x28, 0x67, 0x20, 0x3e, 0x3e, 0x20, 0x32, 0x55, 0x29, + 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x72, 0x20, 0x3d, 0x20, 0x28, 0x72, 0x20, 0x3c, 0x3c, 0x20, 0x33, 0x55, 0x29, 0x20, 0x7c, 0x20, 0x28, 0x72, 0x20, 0x3e, 0x3e, 0x20, 0x32, 0x55, 0x29, 0x3b, 0x0d, + 0x0a, 0x09, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x2a, 0x70, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x20, 0x3d, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x29, 0x28, + 0x72, 0x2c, 0x20, 0x67, 0x2c, 0x20, 0x62, 0x2c, 0x20, 0x6d, 0x69, 0x6e, 0x28, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x2c, 0x20, 0x32, 0x35, 0x35, 0x55, 0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x72, 0x65, + 0x74, 0x75, 0x72, 0x6e, 0x20, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x65, + 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x75, 0x6e, 0x70, 0x61, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x31, 0x36, 0x5f, 0x74, 0x20, + 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x2c, 0x20, 0x62, 0x6f, 0x6f, 0x6c, 0x20, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x2c, 0x20, 0x75, 0x69, 0x6e, 0x74, + 0x33, 0x32, 0x5f, 0x74, 0x20, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x20, 0x3d, 0x20, 0x70, 0x61, + 0x63, 0x6b, 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x20, 0x26, 0x20, 0x33, 0x31, 0x55, 0x3b, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x67, 0x20, + 0x3d, 0x20, 0x28, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x20, 0x3e, 0x3e, 0x20, 0x35, 0x55, 0x29, 0x20, 0x26, 0x20, 0x33, 0x31, 0x55, 0x3b, 0x0d, 0x0a, + 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x72, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x20, 0x3e, 0x3e, 0x20, + 0x31, 0x30, 0x55, 0x29, 0x20, 0x26, 0x20, 0x33, 0x31, 0x55, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x69, 0x66, 0x20, 0x28, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x29, 0x0d, 0x0a, 0x09, 0x7b, 0x0d, + 0x0a, 0x09, 0x09, 0x62, 0x20, 0x3d, 0x20, 0x28, 0x62, 0x20, 0x3c, 0x3c, 0x20, 0x33, 0x55, 0x29, 0x20, 0x7c, 0x20, 0x28, 0x62, 0x20, 0x3e, 0x3e, 0x20, 0x32, 0x55, 0x29, 0x3b, 0x0d, 0x0a, 0x09, + 0x09, 0x67, 0x20, 0x3d, 0x20, 0x28, 0x67, 0x20, 0x3c, 0x3c, 0x20, 0x33, 0x55, 0x29, 0x20, 0x7c, 0x20, 0x28, 0x67, 0x20, 0x3e, 0x3e, 0x20, 0x32, 0x55, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x72, + 0x20, 0x3d, 0x20, 0x28, 0x72, 0x20, 0x3c, 0x3c, 0x20, 0x33, 0x55, 0x29, 0x20, 0x7c, 0x20, 0x28, 0x72, 0x20, 0x3e, 0x3e, 0x20, 0x32, 0x55, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, 0x0d, + 0x0a, 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x29, 0x28, 0x72, 0x2c, 0x20, 0x67, 0x2c, 0x20, 0x62, 0x2c, 0x20, 0x6d, + 0x69, 0x6e, 0x28, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x2c, 0x20, 0x32, 0x35, 0x35, 0x55, 0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, + 0x62, 0x61, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x75, 0x6e, 0x70, 0x61, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x34, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x31, + 0x36, 0x5f, 0x74, 0x20, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x34, 0x2c, 0x20, 0x62, 0x6f, 0x6f, 0x6c, 0x20, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x2c, 0x20, + 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x20, + 0x3d, 0x20, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x34, 0x20, 0x26, 0x20, 0x31, 0x35, 0x55, 0x3b, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, + 0x74, 0x20, 0x67, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x34, 0x20, 0x3e, 0x3e, 0x20, 0x34, 0x55, 0x29, 0x20, 0x26, 0x20, 0x31, 0x35, + 0x55, 0x3b, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x72, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x34, + 0x20, 0x3e, 0x3e, 0x20, 0x38, 0x55, 0x29, 0x20, 0x26, 0x20, 0x31, 0x35, 0x55, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x69, 0x66, 0x20, 0x28, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x29, 0x0d, 0x0a, + 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x62, 0x20, 0x3d, 0x20, 0x28, 0x62, 0x20, 0x3c, 0x3c, 0x20, 0x34, 0x55, 0x29, 0x20, 0x7c, 0x20, 0x62, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x67, 0x20, 0x3d, 0x20, + 0x28, 0x67, 0x20, 0x3c, 0x3c, 0x20, 0x34, 0x55, 0x29, 0x20, 0x7c, 0x20, 0x67, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x72, 0x20, 0x3d, 0x20, 0x28, 0x72, 0x20, 0x3c, 0x3c, 0x20, 0x34, 0x55, 0x29, 0x20, + 0x7c, 0x20, 0x72, 0x3b, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x29, + 0x28, 0x72, 0x2c, 0x20, 0x67, 0x2c, 0x20, 0x62, 0x2c, 0x20, 0x6d, 0x69, 0x6e, 0x28, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x2c, 0x20, 0x32, 0x35, 0x35, 0x55, 0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, + 0x0a, 0x0d, 0x0a, 0x2f, 0x2f, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x20, 0x69, 0x66, 0x20, 0x64, 0x69, 0x64, 0x6e, 0x27, 0x74, 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x2c, 0x20, 0x74, 0x72, 0x75, + 0x65, 0x20, 0x69, 0x66, 0x20, 0x61, 0x6e, 0x79, 0x20, 0x63, 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x65, 0x64, 0x0d, 0x0a, 0x62, 0x6f, 0x6f, 0x6c, + 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x28, 0x63, 0x6f, 0x6e, 0x73, + 0x74, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x2a, + 0x20, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x2c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x73, 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, + 0x6b, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x29, 0x20, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x62, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, + 0x09, 0x69, 0x66, 0x20, 0x28, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x64, 0x69, 0x66, 0x66, 0x5f, 0x62, 0x69, 0x74, 0x28, 0x70, 0x42, 0x6c, 0x6f, + 0x63, 0x6b, 0x29, 0x29, 0x0d, 0x0a, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x69, 0x66, 0x20, 0x28, 0x73, 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x29, 0x0d, + 0x0a, 0x09, 0x09, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x75, 0x6e, 0x70, 0x61, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x5f, 0x64, 0x65, 0x6c, 0x74, + 0x61, 0x33, 0x28, 0x26, 0x62, 0x2c, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x62, 0x61, 0x73, 0x65, 0x35, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, + 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x29, 0x2c, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x33, 0x5f, 0x63, + 0x6f, 0x6c, 0x6f, 0x72, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x29, 0x2c, 0x20, 0x74, 0x72, 0x75, 0x65, 0x2c, 0x20, 0x32, 0x35, 0x35, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x65, 0x6c, 0x73, + 0x65, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x62, 0x20, 0x3d, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x75, 0x6e, 0x70, 0x61, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, + 0x35, 0x28, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x62, 0x61, 0x73, 0x65, 0x35, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x28, 0x70, 0x42, 0x6c, 0x6f, + 0x63, 0x6b, 0x29, 0x2c, 0x20, 0x74, 0x72, 0x75, 0x65, 0x2c, 0x20, 0x32, 0x35, 0x35, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, 0x09, 0x65, 0x6c, 0x73, 0x65, 0x0d, 0x0a, 0x09, 0x7b, 0x0d, + 0x0a, 0x09, 0x09, 0x62, 0x20, 0x3d, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x75, 0x6e, 0x70, 0x61, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x34, 0x28, 0x65, + 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x62, 0x61, 0x73, 0x65, 0x34, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, + 0x20, 0x73, 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x29, 0x2c, 0x20, 0x74, 0x72, 0x75, 0x65, 0x2c, 0x20, 0x32, 0x35, 0x35, 0x29, 0x3b, 0x0d, 0x0a, 0x09, + 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x2a, 0x20, 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, + 0x20, 0x3d, 0x20, 0x67, 0x5f, 0x65, 0x74, 0x63, 0x31, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x73, 0x5b, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, + 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x73, 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, + 0x6b, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x29, 0x5d, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x62, 0x6f, 0x6f, 0x6c, 0x20, 0x64, 0x63, 0x20, 0x3d, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x3b, 0x0d, + 0x0a, 0x09, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x30, 0x5d, 0x20, 0x3d, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, + 0x29, 0x28, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, 0x35, 0x35, 0x5f, 0x66, 0x6c, 0x61, 0x67, 0x28, 0x62, 0x2e, 0x78, 0x20, 0x2b, 0x20, 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, + 0x6c, 0x65, 0x5b, 0x30, 0x5d, 0x2c, 0x20, 0x26, 0x64, 0x63, 0x29, 0x2c, 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, 0x35, 0x35, 0x5f, 0x66, 0x6c, 0x61, 0x67, 0x28, 0x62, 0x2e, 0x79, 0x20, 0x2b, + 0x20, 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5b, 0x30, 0x5d, 0x2c, 0x20, 0x26, 0x64, 0x63, 0x29, 0x2c, 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, 0x35, 0x35, + 0x5f, 0x66, 0x6c, 0x61, 0x67, 0x28, 0x62, 0x2e, 0x7a, 0x20, 0x2b, 0x20, 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5b, 0x30, 0x5d, 0x2c, 0x20, 0x26, 0x64, 0x63, + 0x29, 0x2c, 0x20, 0x32, 0x35, 0x35, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x31, 0x5d, 0x20, 0x3d, 0x20, 0x28, 0x63, + 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x29, 0x28, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, 0x35, 0x35, 0x5f, 0x66, 0x6c, 0x61, 0x67, 0x28, 0x62, 0x2e, 0x78, 0x20, 0x2b, 0x20, 0x70, + 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5b, 0x31, 0x5d, 0x2c, 0x20, 0x26, 0x64, 0x63, 0x29, 0x2c, 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, 0x35, 0x35, 0x5f, 0x66, + 0x6c, 0x61, 0x67, 0x28, 0x62, 0x2e, 0x79, 0x20, 0x2b, 0x20, 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5b, 0x31, 0x5d, 0x2c, 0x20, 0x26, 0x64, 0x63, 0x29, 0x2c, + 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, 0x35, 0x35, 0x5f, 0x66, 0x6c, 0x61, 0x67, 0x28, 0x62, 0x2e, 0x7a, 0x20, 0x2b, 0x20, 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, + 0x65, 0x5b, 0x31, 0x5d, 0x2c, 0x20, 0x26, 0x64, 0x63, 0x29, 0x2c, 0x20, 0x32, 0x35, 0x35, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, + 0x73, 0x5b, 0x32, 0x5d, 0x20, 0x3d, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x29, 0x28, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, 0x35, 0x35, 0x5f, 0x66, 0x6c, 0x61, + 0x67, 0x28, 0x62, 0x2e, 0x78, 0x20, 0x2b, 0x20, 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5b, 0x32, 0x5d, 0x2c, 0x20, 0x26, 0x64, 0x63, 0x29, 0x2c, 0x20, 0x63, + 0x6c, 0x61, 0x6d, 0x70, 0x32, 0x35, 0x35, 0x5f, 0x66, 0x6c, 0x61, 0x67, 0x28, 0x62, 0x2e, 0x79, 0x20, 0x2b, 0x20, 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5b, + 0x32, 0x5d, 0x2c, 0x20, 0x26, 0x64, 0x63, 0x29, 0x2c, 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, 0x35, 0x35, 0x5f, 0x66, 0x6c, 0x61, 0x67, 0x28, 0x62, 0x2e, 0x7a, 0x20, 0x2b, 0x20, 0x70, 0x49, + 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5b, 0x32, 0x5d, 0x2c, 0x20, 0x26, 0x64, 0x63, 0x29, 0x2c, 0x20, 0x32, 0x35, 0x35, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x70, 0x42, 0x6c, + 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x33, 0x5d, 0x20, 0x3d, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x29, 0x28, 0x63, 0x6c, 0x61, + 0x6d, 0x70, 0x32, 0x35, 0x35, 0x5f, 0x66, 0x6c, 0x61, 0x67, 0x28, 0x62, 0x2e, 0x78, 0x20, 0x2b, 0x20, 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5b, 0x33, 0x5d, + 0x2c, 0x20, 0x26, 0x64, 0x63, 0x29, 0x2c, 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, 0x35, 0x35, 0x5f, 0x66, 0x6c, 0x61, 0x67, 0x28, 0x62, 0x2e, 0x79, 0x20, 0x2b, 0x20, 0x70, 0x49, 0x6e, 0x74, + 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5b, 0x33, 0x5d, 0x2c, 0x20, 0x26, 0x64, 0x63, 0x29, 0x2c, 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, 0x35, 0x35, 0x5f, 0x66, 0x6c, 0x61, 0x67, + 0x28, 0x62, 0x2e, 0x7a, 0x20, 0x2b, 0x20, 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5b, 0x33, 0x5d, 0x2c, 0x20, 0x26, 0x64, 0x63, 0x29, 0x2c, 0x20, 0x32, 0x35, + 0x35, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x64, 0x63, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x76, 0x6f, 0x69, 0x64, 0x20, 0x67, 0x65, 0x74, 0x5f, + 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x35, 0x28, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x2a, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, + 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x2a, 0x70, 0x42, 0x61, 0x73, 0x65, + 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x2c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x2c, 0x20, 0x62, + 0x6f, 0x6f, 0x6c, 0x20, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x20, 0x2f, 0x2a, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x20, 0x2a, 0x2f, 0x29, 0x20, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, + 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x62, 0x20, 0x3d, 0x20, 0x2a, 0x70, 0x42, 0x61, 0x73, 0x65, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, + 0x69, 0x66, 0x20, 0x28, 0x21, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x29, 0x0d, 0x0a, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x62, 0x2e, 0x78, 0x20, 0x3d, 0x20, 0x28, 0x62, 0x2e, 0x78, 0x20, 0x3c, + 0x3c, 0x20, 0x33, 0x29, 0x20, 0x7c, 0x20, 0x28, 0x62, 0x2e, 0x78, 0x20, 0x3e, 0x3e, 0x20, 0x32, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x62, 0x2e, 0x79, 0x20, 0x3d, 0x20, 0x28, 0x62, 0x2e, 0x79, + 0x20, 0x3c, 0x3c, 0x20, 0x33, 0x29, 0x20, 0x7c, 0x20, 0x28, 0x62, 0x2e, 0x79, 0x20, 0x3e, 0x3e, 0x20, 0x32, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x62, 0x2e, 0x7a, 0x20, 0x3d, 0x20, 0x28, 0x62, + 0x2e, 0x7a, 0x20, 0x3c, 0x3c, 0x20, 0x33, 0x29, 0x20, 0x7c, 0x20, 0x28, 0x62, 0x2e, 0x7a, 0x20, 0x3e, 0x3e, 0x20, 0x32, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x63, + 0x6f, 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x2a, 0x20, 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x20, 0x3d, 0x20, 0x67, 0x5f, 0x65, 0x74, + 0x63, 0x31, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x73, 0x5b, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5d, 0x3b, 0x0d, 0x0a, 0x0d, + 0x0a, 0x09, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x30, 0x5d, 0x20, 0x3d, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, + 0x29, 0x28, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, 0x35, 0x35, 0x28, 0x62, 0x2e, 0x78, 0x20, 0x2b, 0x20, 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5b, 0x30, 0x5d, + 0x29, 0x2c, 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, 0x35, 0x35, 0x28, 0x62, 0x2e, 0x79, 0x20, 0x2b, 0x20, 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5b, 0x30, + 0x5d, 0x29, 0x2c, 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, 0x35, 0x35, 0x28, 0x62, 0x2e, 0x7a, 0x20, 0x2b, 0x20, 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5b, + 0x30, 0x5d, 0x29, 0x2c, 0x20, 0x32, 0x35, 0x35, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x31, 0x5d, 0x20, 0x3d, 0x20, + 0x28, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x29, 0x28, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, 0x35, 0x35, 0x28, 0x62, 0x2e, 0x78, 0x20, 0x2b, 0x20, 0x70, 0x49, 0x6e, 0x74, + 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5b, 0x31, 0x5d, 0x29, 0x2c, 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, 0x35, 0x35, 0x28, 0x62, 0x2e, 0x79, 0x20, 0x2b, 0x20, 0x70, 0x49, 0x6e, + 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5b, 0x31, 0x5d, 0x29, 0x2c, 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, 0x35, 0x35, 0x28, 0x62, 0x2e, 0x7a, 0x20, 0x2b, 0x20, 0x70, 0x49, + 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5b, 0x31, 0x5d, 0x29, 0x2c, 0x20, 0x32, 0x35, 0x35, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, + 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x32, 0x5d, 0x20, 0x3d, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x29, 0x28, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, 0x35, 0x35, + 0x28, 0x62, 0x2e, 0x78, 0x20, 0x2b, 0x20, 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5b, 0x32, 0x5d, 0x29, 0x2c, 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, 0x35, + 0x35, 0x28, 0x62, 0x2e, 0x79, 0x20, 0x2b, 0x20, 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5b, 0x32, 0x5d, 0x29, 0x2c, 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, + 0x35, 0x35, 0x28, 0x62, 0x2e, 0x7a, 0x20, 0x2b, 0x20, 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5b, 0x32, 0x5d, 0x29, 0x2c, 0x20, 0x32, 0x35, 0x35, 0x29, 0x3b, + 0x0d, 0x0a, 0x09, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x33, 0x5d, 0x20, 0x3d, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, + 0x61, 0x29, 0x28, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, 0x35, 0x35, 0x28, 0x62, 0x2e, 0x78, 0x20, 0x2b, 0x20, 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5b, 0x33, + 0x5d, 0x29, 0x2c, 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, 0x35, 0x35, 0x28, 0x62, 0x2e, 0x79, 0x20, 0x2b, 0x20, 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5b, + 0x33, 0x5d, 0x29, 0x2c, 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, 0x35, 0x35, 0x28, 0x62, 0x2e, 0x7a, 0x20, 0x2b, 0x20, 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, + 0x5b, 0x33, 0x5d, 0x29, 0x2c, 0x20, 0x32, 0x35, 0x35, 0x29, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x74, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, + 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x64, 0x65, 0x74, 0x65, 0x72, 0x6d, 0x69, 0x6e, 0x65, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x28, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, + 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x2a, 0x20, 0x70, 0x53, + 0x6f, 0x75, 0x72, 0x63, 0x65, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x2c, 0x20, 0x62, 0x6f, 0x6f, 0x6c, 0x20, 0x70, 0x65, 0x72, 0x63, 0x65, 0x70, 0x74, 0x75, 0x61, 0x6c, 0x2c, 0x20, 0x75, + 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x65, 0x67, 0x69, 0x6e, 0x5f, 0x73, 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2f, 0x2a, 0x3d, 0x20, 0x30, 0x2a, 0x2f, 0x2c, 0x20, + 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x65, 0x6e, 0x64, 0x5f, 0x73, 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2f, 0x2a, 0x3d, 0x20, 0x32, 0x2a, 0x2f, 0x29, 0x0d, 0x0a, + 0x7b, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x74, 0x20, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x0d, 0x0a, 0x0d, + 0x0a, 0x09, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x73, 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x3d, 0x20, 0x62, 0x65, 0x67, 0x69, 0x6e, + 0x5f, 0x73, 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x3b, 0x20, 0x73, 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x3c, 0x20, 0x65, 0x6e, 0x64, 0x5f, 0x73, 0x75, 0x62, 0x62, 0x6c, 0x6f, + 0x63, 0x6b, 0x3b, 0x20, 0x73, 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x2b, 0x2b, 0x29, 0x0d, 0x0a, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, + 0x61, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x34, 0x5d, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, + 0x67, 0x65, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, + 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x2c, 0x20, 0x73, 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x69, 0x66, 0x20, 0x28, 0x65, 0x74, 0x63, 0x5f, 0x62, + 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x67, 0x65, 0x74, 0x5f, 0x66, 0x6c, 0x69, 0x70, 0x5f, 0x62, 0x69, 0x74, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x29, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x7b, 0x0d, + 0x0a, 0x09, 0x09, 0x09, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x79, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x20, 0x79, 0x20, 0x3c, 0x20, 0x32, 0x3b, 0x20, + 0x79, 0x2b, 0x2b, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x78, 0x20, + 0x3d, 0x20, 0x30, 0x3b, 0x20, 0x78, 0x20, 0x3c, 0x20, 0x34, 0x3b, 0x20, 0x78, 0x2b, 0x2b, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x75, 0x69, + 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, + 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x74, 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x55, 0x49, 0x4e, 0x54, 0x36, 0x34, 0x5f, 0x4d, 0x41, 0x58, + 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x73, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x20, 0x73, + 0x20, 0x3c, 0x20, 0x34, 0x3b, 0x20, 0x73, 0x2b, 0x2b, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, + 0x5f, 0x74, 0x20, 0x65, 0x72, 0x72, 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x64, 0x69, 0x73, 0x74, 0x61, 0x6e, 0x63, 0x65, 0x28, 0x70, 0x65, 0x72, 0x63, 0x65, 0x70, 0x74, 0x75, + 0x61, 0x6c, 0x2c, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x73, 0x5d, 0x2c, 0x20, 0x70, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x5f, 0x70, 0x69, 0x78, + 0x65, 0x6c, 0x73, 0x5b, 0x78, 0x20, 0x2b, 0x20, 0x28, 0x73, 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x20, 0x32, 0x20, 0x2b, 0x20, 0x79, 0x29, 0x20, 0x2a, 0x20, 0x34, 0x5d, 0x2c, + 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x69, 0x66, 0x20, 0x28, 0x65, 0x72, 0x72, 0x20, 0x3c, 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, + 0x72, 0x72, 0x6f, 0x72, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x6f, + 0x72, 0x20, 0x3d, 0x20, 0x65, 0x72, 0x72, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x20, 0x3d, + 0x20, 0x73, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x7d, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x65, 0x74, 0x63, + 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x78, 0x2c, 0x20, 0x73, + 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x20, 0x32, 0x20, 0x2b, 0x20, 0x79, 0x2c, 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x29, 0x3b, + 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x2b, 0x3d, 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, + 0x6f, 0x72, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x7d, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x7d, 0x0d, 0x0a, 0x09, 0x09, 0x7d, 0x0d, 0x0a, 0x09, 0x09, 0x65, 0x6c, 0x73, 0x65, 0x0d, 0x0a, 0x09, + 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x79, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x20, 0x79, 0x20, 0x3c, 0x20, + 0x34, 0x3b, 0x20, 0x79, 0x2b, 0x2b, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, + 0x20, 0x78, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x20, 0x78, 0x20, 0x3c, 0x20, 0x32, 0x3b, 0x20, 0x78, 0x2b, 0x2b, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, + 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x0d, 0x0a, 0x09, 0x09, + 0x09, 0x09, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x74, 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x55, 0x49, 0x4e, 0x54, 0x36, 0x34, 0x5f, + 0x4d, 0x41, 0x58, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x73, 0x20, 0x3d, 0x20, 0x30, + 0x3b, 0x20, 0x73, 0x20, 0x3c, 0x20, 0x34, 0x3b, 0x20, 0x73, 0x2b, 0x2b, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x75, 0x69, 0x6e, + 0x74, 0x36, 0x34, 0x5f, 0x74, 0x20, 0x65, 0x72, 0x72, 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x64, 0x69, 0x73, 0x74, 0x61, 0x6e, 0x63, 0x65, 0x28, 0x70, 0x65, 0x72, 0x63, 0x65, + 0x70, 0x74, 0x75, 0x61, 0x6c, 0x2c, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x73, 0x5d, 0x2c, 0x20, 0x70, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x5f, + 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x5b, 0x28, 0x73, 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x20, 0x32, 0x29, 0x20, 0x2b, 0x20, 0x78, 0x20, 0x2b, 0x20, 0x79, 0x20, 0x2a, 0x20, + 0x34, 0x5d, 0x2c, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x69, 0x66, 0x20, 0x28, 0x65, 0x72, 0x72, 0x20, 0x3c, 0x20, 0x62, 0x65, 0x73, + 0x74, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, + 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x65, 0x72, 0x72, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, + 0x72, 0x20, 0x3d, 0x20, 0x73, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x7d, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, + 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x73, + 0x75, 0x62, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x20, 0x32, 0x20, 0x2b, 0x20, 0x78, 0x2c, 0x20, 0x79, 0x2c, 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, + 0x72, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x2b, 0x3d, 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, + 0x65, 0x72, 0x72, 0x6f, 0x72, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x7d, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x7d, 0x0d, 0x0a, 0x09, 0x09, 0x7d, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, + 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x75, 0x69, 0x6e, 0x74, 0x31, + 0x36, 0x5f, 0x74, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x70, 0x61, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x34, 0x5f, 0x72, 0x67, 0x62, 0x28, 0x75, 0x69, + 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x72, 0x2c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x67, 0x2c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, + 0x2c, 0x20, 0x62, 0x6f, 0x6f, 0x6c, 0x20, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x69, 0x61, + 0x73, 0x20, 0x3d, 0x20, 0x31, 0x32, 0x37, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x69, 0x66, 0x20, 0x28, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x29, 0x0d, 0x0a, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, + 0x72, 0x20, 0x3d, 0x20, 0x28, 0x72, 0x20, 0x2a, 0x20, 0x31, 0x35, 0x55, 0x20, 0x2b, 0x20, 0x62, 0x69, 0x61, 0x73, 0x29, 0x20, 0x2f, 0x20, 0x32, 0x35, 0x35, 0x55, 0x3b, 0x0d, 0x0a, 0x09, 0x09, + 0x67, 0x20, 0x3d, 0x20, 0x28, 0x67, 0x20, 0x2a, 0x20, 0x31, 0x35, 0x55, 0x20, 0x2b, 0x20, 0x62, 0x69, 0x61, 0x73, 0x29, 0x20, 0x2f, 0x20, 0x32, 0x35, 0x35, 0x55, 0x3b, 0x0d, 0x0a, 0x09, 0x09, + 0x62, 0x20, 0x3d, 0x20, 0x28, 0x62, 0x20, 0x2a, 0x20, 0x31, 0x35, 0x55, 0x20, 0x2b, 0x20, 0x62, 0x69, 0x61, 0x73, 0x29, 0x20, 0x2f, 0x20, 0x32, 0x35, 0x35, 0x55, 0x3b, 0x0d, 0x0a, 0x09, 0x7d, + 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x72, 0x20, 0x3d, 0x20, 0x6d, 0x69, 0x6e, 0x28, 0x72, 0x2c, 0x20, 0x31, 0x35, 0x55, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x67, 0x20, 0x3d, 0x20, 0x6d, 0x69, 0x6e, 0x28, + 0x67, 0x2c, 0x20, 0x31, 0x35, 0x55, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x62, 0x20, 0x3d, 0x20, 0x6d, 0x69, 0x6e, 0x28, 0x62, 0x2c, 0x20, 0x31, 0x35, 0x55, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, + 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x31, 0x36, 0x5f, 0x74, 0x29, 0x28, 0x62, 0x20, 0x7c, 0x20, 0x28, 0x67, 0x20, 0x3c, 0x3c, 0x20, 0x34, 0x55, 0x29, 0x20, + 0x7c, 0x20, 0x28, 0x72, 0x20, 0x3c, 0x3c, 0x20, 0x38, 0x55, 0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x75, 0x69, 0x6e, 0x74, 0x31, 0x36, 0x5f, 0x74, 0x20, 0x65, 0x74, 0x63, + 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x70, 0x61, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x34, 0x28, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x63, 0x6f, + 0x6c, 0x6f, 0x72, 0x2c, 0x20, 0x62, 0x6f, 0x6f, 0x6c, 0x20, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, + 0x62, 0x69, 0x61, 0x73, 0x20, 0x3d, 0x20, 0x31, 0x32, 0x37, 0x3b, 0x0d, 0x0a, 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x70, + 0x61, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x34, 0x5f, 0x72, 0x67, 0x62, 0x28, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x78, 0x2c, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x79, 0x2c, + 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x7a, 0x2c, 0x20, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x29, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x75, 0x69, 0x6e, 0x74, 0x31, 0x36, 0x5f, + 0x74, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x70, 0x61, 0x63, 0x6b, 0x5f, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x33, 0x28, 0x69, 0x6e, 0x74, 0x20, 0x72, 0x2c, 0x20, 0x69, + 0x6e, 0x74, 0x20, 0x67, 0x2c, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x62, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x61, 0x73, 0x73, 0x65, 0x72, 0x74, 0x28, 0x28, 0x72, 0x20, 0x3e, 0x3d, 0x20, 0x63, + 0x45, 0x54, 0x43, 0x31, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x4d, 0x69, 0x6e, 0x29, 0x20, 0x26, 0x26, 0x20, 0x28, 0x72, 0x20, 0x3c, 0x3d, 0x20, 0x63, 0x45, 0x54, 0x43, + 0x31, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x4d, 0x61, 0x78, 0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x61, 0x73, 0x73, 0x65, 0x72, 0x74, 0x28, 0x28, 0x67, 0x20, 0x3e, 0x3d, + 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x4d, 0x69, 0x6e, 0x29, 0x20, 0x26, 0x26, 0x20, 0x28, 0x67, 0x20, 0x3c, 0x3d, 0x20, 0x63, 0x45, + 0x54, 0x43, 0x31, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x4d, 0x61, 0x78, 0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x61, 0x73, 0x73, 0x65, 0x72, 0x74, 0x28, 0x28, 0x62, 0x20, + 0x3e, 0x3d, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x4d, 0x69, 0x6e, 0x29, 0x20, 0x26, 0x26, 0x20, 0x28, 0x62, 0x20, 0x3c, 0x3d, 0x20, + 0x63, 0x45, 0x54, 0x43, 0x31, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x4d, 0x61, 0x78, 0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x69, 0x66, 0x20, 0x28, 0x72, 0x20, 0x3c, 0x20, + 0x30, 0x29, 0x20, 0x72, 0x20, 0x2b, 0x3d, 0x20, 0x38, 0x3b, 0x0d, 0x0a, 0x09, 0x69, 0x66, 0x20, 0x28, 0x67, 0x20, 0x3c, 0x20, 0x30, 0x29, 0x20, 0x67, 0x20, 0x2b, 0x3d, 0x20, 0x38, 0x3b, 0x0d, + 0x0a, 0x09, 0x69, 0x66, 0x20, 0x28, 0x62, 0x20, 0x3c, 0x20, 0x30, 0x29, 0x20, 0x62, 0x20, 0x2b, 0x3d, 0x20, 0x38, 0x3b, 0x0d, 0x0a, 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x28, 0x75, + 0x69, 0x6e, 0x74, 0x31, 0x36, 0x5f, 0x74, 0x29, 0x28, 0x62, 0x20, 0x7c, 0x20, 0x28, 0x67, 0x20, 0x3c, 0x3c, 0x20, 0x33, 0x29, 0x20, 0x7c, 0x20, 0x28, 0x72, 0x20, 0x3c, 0x3c, 0x20, 0x36, 0x29, + 0x29, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x76, 0x6f, 0x69, 0x64, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, + 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x34, 0x28, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x63, 0x6f, 0x6c, 0x6f, + 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x63, 0x30, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x2c, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x63, + 0x31, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x64, + 0x69, 0x66, 0x66, 0x5f, 0x62, 0x69, 0x74, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, + 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x62, 0x61, 0x73, 0x65, 0x34, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x30, 0x2c, + 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x70, 0x61, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x34, 0x28, 0x63, 0x30, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, + 0x65, 0x64, 0x2c, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x62, 0x61, 0x73, + 0x65, 0x34, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x31, 0x2c, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x70, 0x61, + 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x34, 0x28, 0x63, 0x31, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x2c, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x29, 0x29, 0x3b, 0x0d, + 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x75, 0x69, 0x6e, 0x74, 0x31, 0x36, 0x5f, 0x74, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x70, 0x61, 0x63, 0x6b, 0x5f, 0x63, 0x6f, + 0x6c, 0x6f, 0x72, 0x35, 0x5f, 0x72, 0x67, 0x62, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x72, 0x2c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x67, 0x2c, + 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x2c, 0x20, 0x62, 0x6f, 0x6f, 0x6c, 0x20, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x75, + 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x69, 0x61, 0x73, 0x20, 0x3d, 0x20, 0x31, 0x32, 0x37, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x69, 0x66, 0x20, 0x28, 0x73, 0x63, 0x61, 0x6c, + 0x65, 0x64, 0x29, 0x0d, 0x0a, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x72, 0x20, 0x3d, 0x20, 0x28, 0x72, 0x20, 0x2a, 0x20, 0x33, 0x31, 0x55, 0x20, 0x2b, 0x20, 0x62, 0x69, 0x61, 0x73, 0x29, 0x20, + 0x2f, 0x20, 0x32, 0x35, 0x35, 0x55, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x67, 0x20, 0x3d, 0x20, 0x28, 0x67, 0x20, 0x2a, 0x20, 0x33, 0x31, 0x55, 0x20, 0x2b, 0x20, 0x62, 0x69, 0x61, 0x73, 0x29, 0x20, + 0x2f, 0x20, 0x32, 0x35, 0x35, 0x55, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x62, 0x20, 0x3d, 0x20, 0x28, 0x62, 0x20, 0x2a, 0x20, 0x33, 0x31, 0x55, 0x20, 0x2b, 0x20, 0x62, 0x69, 0x61, 0x73, 0x29, 0x20, + 0x2f, 0x20, 0x32, 0x35, 0x35, 0x55, 0x3b, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x72, 0x20, 0x3d, 0x20, 0x6d, 0x69, 0x6e, 0x28, 0x72, 0x2c, 0x20, 0x33, 0x31, 0x55, 0x29, 0x3b, + 0x0d, 0x0a, 0x09, 0x67, 0x20, 0x3d, 0x20, 0x6d, 0x69, 0x6e, 0x28, 0x67, 0x2c, 0x20, 0x33, 0x31, 0x55, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x62, 0x20, 0x3d, 0x20, 0x6d, 0x69, 0x6e, 0x28, 0x62, 0x2c, + 0x20, 0x33, 0x31, 0x55, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x31, 0x36, 0x5f, 0x74, 0x29, 0x28, 0x62, 0x20, 0x7c, + 0x20, 0x28, 0x67, 0x20, 0x3c, 0x3c, 0x20, 0x35, 0x55, 0x29, 0x20, 0x7c, 0x20, 0x28, 0x72, 0x20, 0x3c, 0x3c, 0x20, 0x31, 0x30, 0x55, 0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, + 0x75, 0x69, 0x6e, 0x74, 0x31, 0x36, 0x5f, 0x74, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x70, 0x61, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x28, 0x63, + 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x63, 0x2c, 0x20, 0x62, 0x6f, 0x6f, 0x6c, 0x20, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x72, + 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x70, 0x61, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x5f, 0x72, 0x67, 0x62, 0x28, + 0x63, 0x2e, 0x78, 0x2c, 0x20, 0x63, 0x2e, 0x79, 0x2c, 0x20, 0x63, 0x2e, 0x7a, 0x2c, 0x20, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x29, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x76, 0x6f, + 0x69, 0x64, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x28, 0x65, 0x74, + 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x63, 0x30, 0x5f, 0x75, + 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x2c, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x63, 0x31, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x29, + 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x64, 0x69, 0x66, 0x66, 0x5f, 0x62, 0x69, 0x74, 0x28, 0x70, 0x42, 0x6c, + 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x74, 0x72, 0x75, 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x62, 0x61, + 0x73, 0x65, 0x35, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x70, 0x61, 0x63, 0x6b, + 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x28, 0x63, 0x30, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x2c, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, + 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x64, 0x72, 0x20, 0x3d, 0x20, 0x63, 0x31, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x2e, 0x78, 0x20, 0x2d, 0x20, 0x63, 0x30, 0x5f, 0x75, 0x6e, + 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x2e, 0x78, 0x3b, 0x0d, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x64, 0x67, 0x20, 0x3d, 0x20, 0x63, 0x31, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, + 0x2e, 0x79, 0x20, 0x2d, 0x20, 0x63, 0x30, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x2e, 0x79, 0x3b, 0x0d, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x64, 0x62, 0x20, 0x3d, 0x20, 0x63, + 0x31, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x2e, 0x7a, 0x20, 0x2d, 0x20, 0x63, 0x30, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x2e, 0x7a, 0x3b, 0x0d, 0x0a, 0x0d, + 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x33, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x28, 0x70, 0x42, 0x6c, + 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x70, 0x61, 0x63, 0x6b, 0x5f, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x33, 0x28, 0x64, 0x72, 0x2c, 0x20, 0x64, + 0x67, 0x2c, 0x20, 0x64, 0x62, 0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x76, 0x6f, 0x69, 0x64, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, + 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x5f, 0x65, 0x74, 0x63, 0x31, 0x73, 0x28, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, + 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x63, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x29, 0x0d, 0x0a, + 0x7b, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x64, 0x69, 0x66, 0x66, 0x5f, 0x62, 0x69, 0x74, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, + 0x6b, 0x2c, 0x20, 0x74, 0x72, 0x75, 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x62, + 0x61, 0x73, 0x65, 0x35, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x70, 0x61, 0x63, + 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x28, 0x63, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x2c, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x09, + 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x33, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, + 0x6b, 0x2c, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x70, 0x61, 0x63, 0x6b, 0x5f, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x33, 0x28, 0x30, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x30, + 0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x62, 0x6f, 0x6f, 0x6c, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x62, 0x6c, 0x6f, + 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x5f, 0x63, 0x68, 0x65, 0x63, 0x6b, 0x28, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x42, 0x6c, 0x6f, 0x63, + 0x6b, 0x2c, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x63, 0x30, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x2c, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, + 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x63, 0x31, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, + 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x64, 0x69, 0x66, 0x66, 0x5f, 0x62, 0x69, 0x74, 0x28, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x74, 0x72, 0x75, 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, + 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x62, 0x61, 0x73, 0x65, 0x35, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x28, 0x70, 0x42, 0x6c, 0x6f, + 0x63, 0x6b, 0x2c, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x70, 0x61, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x28, 0x63, 0x30, 0x5f, 0x75, 0x6e, 0x73, + 0x63, 0x61, 0x6c, 0x65, 0x64, 0x2c, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x64, 0x72, 0x20, 0x3d, 0x20, 0x63, 0x31, 0x5f, + 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x2e, 0x78, 0x20, 0x2d, 0x20, 0x63, 0x30, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x2e, 0x78, 0x3b, 0x0d, 0x0a, 0x09, 0x69, 0x6e, + 0x74, 0x20, 0x64, 0x67, 0x20, 0x3d, 0x20, 0x63, 0x31, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x2e, 0x79, 0x20, 0x2d, 0x20, 0x63, 0x30, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, + 0x65, 0x64, 0x2e, 0x79, 0x3b, 0x0d, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x64, 0x62, 0x20, 0x3d, 0x20, 0x63, 0x31, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x2e, 0x7a, 0x20, 0x2d, + 0x20, 0x63, 0x30, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x2e, 0x7a, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x69, 0x66, 0x20, 0x28, 0x28, 0x28, 0x64, 0x72, 0x20, 0x3c, 0x20, 0x63, + 0x45, 0x54, 0x43, 0x31, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x4d, 0x69, 0x6e, 0x29, 0x20, 0x7c, 0x7c, 0x20, 0x28, 0x64, 0x72, 0x20, 0x3e, 0x20, 0x63, 0x45, 0x54, 0x43, + 0x31, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x4d, 0x61, 0x78, 0x29, 0x29, 0x20, 0x7c, 0x7c, 0x0d, 0x0a, 0x09, 0x09, 0x28, 0x28, 0x64, 0x67, 0x20, 0x3c, 0x20, 0x63, 0x45, + 0x54, 0x43, 0x31, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x4d, 0x69, 0x6e, 0x29, 0x20, 0x7c, 0x7c, 0x20, 0x28, 0x64, 0x67, 0x20, 0x3e, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, + 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x4d, 0x61, 0x78, 0x29, 0x29, 0x20, 0x7c, 0x7c, 0x0d, 0x0a, 0x09, 0x09, 0x28, 0x28, 0x64, 0x62, 0x20, 0x3c, 0x20, 0x63, 0x45, 0x54, + 0x43, 0x31, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x4d, 0x69, 0x6e, 0x29, 0x20, 0x7c, 0x7c, 0x20, 0x28, 0x64, 0x62, 0x20, 0x3e, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x43, + 0x6f, 0x6c, 0x6f, 0x72, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x4d, 0x61, 0x78, 0x29, 0x29, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x3b, + 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x33, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x28, + 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x70, 0x61, 0x63, 0x6b, 0x5f, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x33, 0x28, 0x64, 0x72, + 0x2c, 0x20, 0x64, 0x67, 0x2c, 0x20, 0x64, 0x62, 0x29, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x74, 0x72, 0x75, 0x65, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, + 0x0a, 0x0d, 0x0a, 0x76, 0x6f, 0x69, 0x64, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x70, 0x61, 0x63, 0x6b, 0x5f, 0x72, 0x61, 0x77, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, + 0x74, 0x6f, 0x72, 0x73, 0x28, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, + 0x6e, 0x74, 0x38, 0x5f, 0x74, 0x20, 0x2a, 0x70, 0x53, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, + 0x20, 0x77, 0x6f, 0x72, 0x64, 0x33, 0x20, 0x3d, 0x20, 0x30, 0x2c, 0x20, 0x77, 0x6f, 0x72, 0x64, 0x32, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x0d, 0x0a, 0x09, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x75, 0x69, + 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x79, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x20, 0x79, 0x20, 0x3c, 0x20, 0x34, 0x3b, 0x20, 0x79, 0x2b, 0x2b, 0x29, 0x0d, 0x0a, 0x09, 0x7b, 0x0d, 0x0a, 0x09, + 0x09, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x78, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x20, 0x78, 0x20, 0x3c, 0x20, 0x34, 0x3b, 0x20, 0x78, 0x2b, 0x2b, + 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x69, 0x74, 0x5f, 0x69, 0x6e, + 0x64, 0x65, 0x78, 0x20, 0x3d, 0x20, 0x78, 0x20, 0x2a, 0x20, 0x34, 0x20, 0x2b, 0x20, 0x79, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, + 0x32, 0x5f, 0x74, 0x20, 0x73, 0x20, 0x3d, 0x20, 0x70, 0x53, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x5b, 0x78, 0x20, 0x2b, 0x20, 0x79, 0x20, 0x2a, 0x20, 0x34, 0x5d, 0x3b, 0x0d, 0x0a, + 0x09, 0x09, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x6c, 0x73, 0x62, 0x20, 0x3d, 0x20, 0x73, 0x20, 0x26, 0x20, + 0x31, 0x2c, 0x20, 0x6d, 0x73, 0x62, 0x20, 0x3d, 0x20, 0x73, 0x20, 0x3e, 0x3e, 0x20, 0x31, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x77, 0x6f, 0x72, 0x64, 0x33, 0x20, 0x7c, + 0x3d, 0x20, 0x28, 0x6c, 0x73, 0x62, 0x20, 0x3c, 0x3c, 0x20, 0x62, 0x69, 0x74, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x77, 0x6f, 0x72, 0x64, 0x32, 0x20, + 0x7c, 0x3d, 0x20, 0x28, 0x6d, 0x73, 0x62, 0x20, 0x3c, 0x3c, 0x20, 0x62, 0x69, 0x74, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x7d, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, + 0x0a, 0x0d, 0x0a, 0x09, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x5b, 0x37, 0x5d, 0x20, 0x3d, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x38, 0x5f, + 0x74, 0x29, 0x28, 0x77, 0x6f, 0x72, 0x64, 0x33, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x5b, 0x36, 0x5d, 0x20, + 0x3d, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x38, 0x5f, 0x74, 0x29, 0x28, 0x77, 0x6f, 0x72, 0x64, 0x33, 0x20, 0x3e, 0x3e, 0x20, 0x38, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x70, 0x42, 0x6c, 0x6f, 0x63, + 0x6b, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x5b, 0x35, 0x5d, 0x20, 0x3d, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x38, 0x5f, 0x74, 0x29, 0x28, 0x77, 0x6f, 0x72, 0x64, 0x32, 0x29, + 0x3b, 0x0d, 0x0a, 0x09, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x5b, 0x34, 0x5d, 0x20, 0x3d, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x38, 0x5f, + 0x74, 0x29, 0x28, 0x77, 0x6f, 0x72, 0x64, 0x32, 0x20, 0x3e, 0x3e, 0x20, 0x38, 0x29, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x2f, 0x2f, 0x20, 0x2d, 0x2d, 0x2d, 0x2d, 0x20, 0x45, 0x43, + 0x31, 0x53, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x65, 0x6e, 0x63, 0x6f, 0x64, 0x69, 0x6e, 0x67, 0x2f, 0x65, 0x6e, 0x64, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x20, 0x6f, 0x70, 0x74, 0x69, 0x6d, + 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x0d, 0x0a, 0x0d, 0x0a, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x38, 0x5f, 0x74, 0x20, 0x67, 0x5f, 0x65, 0x76, + 0x61, 0x6c, 0x5f, 0x64, 0x69, 0x73, 0x74, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x73, 0x5b, 0x38, 0x5d, 0x5b, 0x32, 0x35, 0x36, 0x5d, 0x20, 0x3d, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x2f, 0x2f, + 0x20, 0x39, 0x39, 0x25, 0x20, 0x74, 0x68, 0x72, 0x65, 0x73, 0x68, 0x6f, 0x6c, 0x64, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, + 0x20, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x31, + 0x2c, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x30, 0x2c, + 0x31, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x30, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x7d, 0x2c, 0x0d, 0x0a, 0x09, 0x7b, 0x20, 0x31, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, + 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x30, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, + 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x7d, 0x2c, 0x0d, + 0x0a, 0x09, 0x7b, 0x20, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, + 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x30, 0x2c, 0x30, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, 0x31, 0x2c, + 0x31, 0x2c, 0x31, 0x2c, 0x7d, 0x0d, 0x0a, 0x7d, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x74, 0x79, 0x70, 0x65, 0x64, 0x65, 0x66, 0x20, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x20, 0x65, 0x74, 0x63, 0x31, + 0x73, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x72, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x63, 0x6f, 0x6f, 0x72, 0x64, 0x69, 0x6e, 0x61, 0x74, 0x65, 0x73, + 0x5f, 0x74, 0x61, 0x67, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x6d, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x5f, + 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x3b, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x6d, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x3b, + 0x0d, 0x0a, 0x7d, 0x20, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x72, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x63, 0x6f, 0x6f, + 0x72, 0x64, 0x69, 0x6e, 0x61, 0x74, 0x65, 0x73, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x67, 0x65, 0x74, 0x5f, 0x73, 0x63, 0x61, 0x6c, + 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x28, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6c, + 0x6f, 0x72, 0x29, 0x20, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x62, 0x72, 0x2c, 0x20, 0x62, 0x67, 0x2c, 0x20, 0x62, 0x62, 0x3b, 0x0d, 0x0a, 0x09, 0x0d, 0x0a, 0x09, 0x62, + 0x72, 0x20, 0x3d, 0x20, 0x28, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x78, 0x20, 0x3e, 0x3e, 0x20, 0x32, 0x29, 0x20, 0x7c, 0x20, 0x28, 0x75, + 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x78, 0x20, 0x3c, 0x3c, 0x20, 0x33, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x62, 0x67, 0x20, 0x3d, 0x20, 0x28, 0x75, + 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x79, 0x20, 0x3e, 0x3e, 0x20, 0x32, 0x29, 0x20, 0x7c, 0x20, 0x28, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, + 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x79, 0x20, 0x3c, 0x3c, 0x20, 0x33, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x62, 0x62, 0x20, 0x3d, 0x20, 0x28, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, + 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x7a, 0x20, 0x3e, 0x3e, 0x20, 0x32, 0x29, 0x20, 0x7c, 0x20, 0x28, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, + 0x72, 0x2e, 0x7a, 0x20, 0x3c, 0x3c, 0x20, 0x33, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x0d, 0x0a, 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, + 0x62, 0x61, 0x29, 0x28, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x38, 0x5f, 0x74, 0x29, 0x62, 0x72, 0x2c, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x38, 0x5f, 0x74, 0x29, 0x62, 0x67, 0x2c, 0x20, 0x28, 0x75, + 0x69, 0x6e, 0x74, 0x38, 0x5f, 0x74, 0x29, 0x62, 0x62, 0x2c, 0x20, 0x32, 0x35, 0x35, 0x29, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x74, 0x79, 0x70, 0x65, 0x64, 0x65, 0x66, 0x20, 0x73, + 0x74, 0x72, 0x75, 0x63, 0x74, 0x20, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x72, 0x5f, 0x70, 0x6f, 0x74, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, + 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x74, 0x61, 0x67, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x74, 0x09, 0x09, 0x09, 0x09, 0x09, 0x6d, + 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x3b, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x72, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, + 0x6f, 0x6e, 0x5f, 0x63, 0x6f, 0x6f, 0x72, 0x64, 0x69, 0x6e, 0x61, 0x74, 0x65, 0x73, 0x20, 0x6d, 0x5f, 0x63, 0x6f, 0x6f, 0x72, 0x64, 0x73, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x0d, 0x0a, 0x09, 0x75, + 0x69, 0x6e, 0x74, 0x38, 0x5f, 0x74, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x6d, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x5b, 0x31, 0x36, 0x5d, 0x3b, 0x0d, 0x0a, 0x09, 0x62, + 0x6f, 0x6f, 0x6c, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x6d, 0x5f, 0x76, 0x61, 0x6c, 0x69, 0x64, 0x3b, 0x0d, 0x0a, 0x7d, 0x20, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6d, + 0x69, 0x7a, 0x65, 0x72, 0x5f, 0x70, 0x6f, 0x74, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x74, 0x79, 0x70, 0x65, + 0x64, 0x65, 0x66, 0x20, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x20, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x72, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x65, + 0x5f, 0x74, 0x61, 0x67, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x6d, 0x5f, 0x62, 0x72, 0x2c, 0x20, 0x6d, 0x5f, 0x62, 0x67, 0x2c, 0x20, 0x6d, 0x5f, 0x62, 0x62, 0x3b, 0x0d, + 0x0a, 0x09, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x33, 0x20, 0x6d, 0x5f, 0x61, 0x76, 0x67, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x3b, 0x0d, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x6d, 0x5f, 0x6d, 0x61, + 0x78, 0x5f, 0x63, 0x6f, 0x6d, 0x70, 0x5f, 0x73, 0x70, 0x72, 0x65, 0x61, 0x64, 0x3b, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x72, + 0x5f, 0x70, 0x6f, 0x74, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x20, 0x6d, 0x5f, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, + 0x69, 0x6f, 0x6e, 0x3b, 0x0d, 0x0a, 0x7d, 0x20, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x72, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x65, 0x3b, 0x0d, 0x0a, + 0x0d, 0x0a, 0x62, 0x6f, 0x6f, 0x6c, 0x20, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x72, 0x5f, 0x65, 0x76, 0x61, 0x6c, 0x75, 0x61, 0x74, 0x65, 0x5f, + 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x72, 0x5f, 0x73, 0x74, 0x61, 0x74, + 0x65, 0x20, 0x2a, 0x70, 0x53, 0x74, 0x61, 0x74, 0x65, 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x65, 0x6e, 0x63, 0x6f, 0x64, 0x65, + 0x5f, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x5f, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x20, 0x2a, 0x70, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x0d, 0x0a, 0x09, + 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x74, 0x20, 0x6e, 0x75, 0x6d, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, + 0x6c, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x2a, 0x70, 0x50, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x2c, 0x20, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, + 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x2a, 0x70, 0x57, 0x65, 0x69, 0x67, 0x68, 0x74, 0x73, 0x2c, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, + 0x31, 0x73, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x72, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x63, 0x6f, 0x6f, 0x72, 0x64, 0x69, 0x6e, 0x61, 0x74, 0x65, + 0x73, 0x20, 0x63, 0x6f, 0x6f, 0x72, 0x64, 0x73, 0x2c, 0x20, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x72, 0x5f, 0x70, 0x6f, 0x74, + 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x2a, 0x20, 0x70, 0x54, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, + 0x2c, 0x20, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x72, 0x5f, 0x70, 0x6f, 0x74, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x73, + 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x2a, 0x20, 0x70, 0x42, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x75, 0x69, + 0x6e, 0x74, 0x38, 0x5f, 0x74, 0x20, 0x74, 0x65, 0x6d, 0x70, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x5b, 0x31, 0x36, 0x5d, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x70, 0x54, + 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x2d, 0x3e, 0x6d, 0x5f, 0x76, 0x61, 0x6c, 0x69, 0x64, 0x20, 0x3d, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x3b, 0x0d, + 0x0a, 0x09, 0x09, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x62, 0x61, 0x73, 0x65, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, + 0x72, 0x20, 0x3d, 0x20, 0x67, 0x65, 0x74, 0x5f, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x28, 0x63, 0x6f, 0x6f, 0x72, 0x64, 0x73, 0x2e, 0x6d, 0x5f, 0x75, 0x6e, + 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x0d, 0x0a, 0x09, 0x70, 0x54, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, + 0x69, 0x6f, 0x6e, 0x2d, 0x3e, 0x6d, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x49, 0x4e, 0x54, 0x36, 0x34, 0x5f, 0x4d, 0x41, 0x58, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x0d, 0x0a, 0x09, + 0x66, 0x6f, 0x72, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x20, 0x69, + 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x20, 0x3c, 0x20, 0x63, 0x45, 0x54, 0x43, 0x31, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x4d, 0x6f, 0x64, 0x69, 0x66, 0x69, 0x65, 0x72, 0x56, + 0x61, 0x6c, 0x75, 0x65, 0x73, 0x3b, 0x20, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x2b, 0x2b, 0x29, 0x0d, 0x0a, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x2f, 0x2f, 0x20, + 0x54, 0x4f, 0x44, 0x4f, 0x3a, 0x20, 0x54, 0x68, 0x69, 0x73, 0x20, 0x63, 0x68, 0x65, 0x63, 0x6b, 0x20, 0x69, 0x73, 0x20, 0x65, 0x71, 0x75, 0x69, 0x76, 0x61, 0x6c, 0x65, 0x6e, 0x74, 0x20, 0x74, + 0x6f, 0x20, 0x6d, 0x65, 0x64, 0x69, 0x75, 0x6d, 0x20, 0x71, 0x75, 0x61, 0x6c, 0x69, 0x74, 0x79, 0x20, 0x69, 0x6e, 0x20, 0x74, 0x68, 0x65, 0x20, 0x43, 0x2b, 0x2b, 0x20, 0x76, 0x65, 0x72, 0x73, + 0x69, 0x6f, 0x6e, 0x2e, 0x0d, 0x0a, 0x09, 0x09, 0x69, 0x66, 0x20, 0x28, 0x21, 0x67, 0x5f, 0x65, 0x76, 0x61, 0x6c, 0x5f, 0x64, 0x69, 0x73, 0x74, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x73, 0x5b, + 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5d, 0x5b, 0x70, 0x53, 0x74, 0x61, 0x74, 0x65, 0x2d, 0x3e, 0x6d, 0x5f, 0x6d, 0x61, 0x78, 0x5f, 0x63, 0x6f, 0x6d, 0x70, 0x5f, + 0x73, 0x70, 0x72, 0x65, 0x61, 0x64, 0x5d, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x63, 0x6f, 0x6e, 0x74, 0x69, 0x6e, 0x75, 0x65, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x63, 0x6f, 0x6e, 0x73, + 0x74, 0x61, 0x6e, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x2a, 0x20, 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x20, 0x3d, 0x20, 0x67, 0x5f, 0x65, 0x74, 0x63, 0x31, 0x5f, + 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x73, 0x5b, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5d, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, + 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x34, 0x5d, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x66, + 0x6f, 0x72, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x73, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x20, 0x73, 0x20, 0x3c, 0x20, 0x34, 0x3b, 0x20, 0x73, 0x2b, 0x2b, 0x29, 0x0d, + 0x0a, 0x09, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x79, 0x64, 0x20, 0x3d, 0x20, 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5b, 0x73, + 0x5d, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x73, 0x5d, 0x20, 0x3d, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, + 0x72, 0x67, 0x62, 0x61, 0x29, 0x28, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, 0x35, 0x35, 0x28, 0x62, 0x61, 0x73, 0x65, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x78, 0x20, 0x2b, 0x20, 0x79, 0x64, + 0x29, 0x2c, 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, 0x35, 0x35, 0x28, 0x62, 0x61, 0x73, 0x65, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x79, 0x20, 0x2b, 0x20, 0x79, 0x64, 0x29, 0x2c, 0x20, + 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x32, 0x35, 0x35, 0x28, 0x62, 0x61, 0x73, 0x65, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x7a, 0x20, 0x2b, 0x20, 0x79, 0x64, 0x29, 0x2c, 0x20, 0x32, 0x35, 0x35, + 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x74, 0x20, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, + 0x72, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x0d, 0x0a, 0x09, 0x09, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x74, 0x20, 0x63, 0x20, + 0x3d, 0x20, 0x30, 0x3b, 0x20, 0x63, 0x20, 0x3c, 0x20, 0x6e, 0x75, 0x6d, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x3b, 0x20, 0x63, 0x2b, 0x2b, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x7b, 0x0d, 0x0a, + 0x09, 0x09, 0x09, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x73, 0x72, 0x63, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x20, 0x3d, 0x20, 0x70, 0x50, 0x69, 0x78, 0x65, 0x6c, + 0x73, 0x5b, 0x63, 0x5d, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, + 0x6f, 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x3d, 0x20, 0x33, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, + 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x64, 0x69, 0x73, 0x74, 0x61, 0x6e, 0x63, 0x65, 0x28, 0x70, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2d, 0x3e, + 0x6d, 0x5f, 0x70, 0x65, 0x72, 0x63, 0x65, 0x70, 0x74, 0x75, 0x61, 0x6c, 0x2c, 0x20, 0x73, 0x72, 0x63, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x2c, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, + 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x30, 0x5d, 0x2c, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, + 0x20, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x64, 0x69, 0x73, 0x74, 0x61, 0x6e, 0x63, 0x65, 0x28, 0x70, 0x50, + 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2d, 0x3e, 0x6d, 0x5f, 0x70, 0x65, 0x72, 0x63, 0x65, 0x70, 0x74, 0x75, 0x61, 0x6c, 0x2c, 0x20, 0x73, 0x72, 0x63, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x2c, 0x20, + 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x31, 0x5d, 0x2c, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x69, 0x66, 0x20, + 0x28, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3c, 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x7b, + 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x3b, 0x0d, + 0x0a, 0x09, 0x09, 0x09, 0x09, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x3d, 0x20, 0x32, 0x3b, 0x0d, 0x0a, 0x09, + 0x09, 0x09, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x64, 0x69, + 0x73, 0x74, 0x61, 0x6e, 0x63, 0x65, 0x28, 0x70, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2d, 0x3e, 0x6d, 0x5f, 0x70, 0x65, 0x72, 0x63, 0x65, 0x70, 0x74, 0x75, 0x61, 0x6c, 0x2c, 0x20, 0x73, 0x72, + 0x63, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x2c, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x32, 0x5d, 0x2c, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x29, + 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x69, 0x66, 0x20, 0x28, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3c, 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, + 0x6f, 0x72, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x74, 0x72, 0x69, 0x61, + 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x65, + 0x78, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d, + 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x64, 0x69, 0x73, 0x74, 0x61, 0x6e, 0x63, 0x65, 0x28, 0x70, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2d, 0x3e, 0x6d, 0x5f, 0x70, 0x65, 0x72, 0x63, 0x65, + 0x70, 0x74, 0x75, 0x61, 0x6c, 0x2c, 0x20, 0x73, 0x72, 0x63, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x2c, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x33, + 0x5d, 0x2c, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x69, 0x66, 0x20, 0x28, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3c, + 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, + 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x65, 0x6c, 0x65, + 0x63, 0x74, 0x6f, 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x3d, 0x20, 0x31, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x69, 0x66, 0x20, 0x28, + 0x6e, 0x75, 0x6d, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x20, 0x3c, 0x3d, 0x20, 0x31, 0x36, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x74, 0x65, 0x6d, 0x70, 0x5f, 0x73, 0x65, 0x6c, 0x65, + 0x63, 0x74, 0x6f, 0x72, 0x73, 0x5b, 0x63, 0x5d, 0x20, 0x3d, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x38, 0x5f, 0x74, 0x29, 0x28, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, + 0x6f, 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x2b, 0x3d, 0x20, + 0x70, 0x57, 0x65, 0x69, 0x67, 0x68, 0x74, 0x73, 0x20, 0x3f, 0x20, 0x28, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x2a, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, + 0x5f, 0x74, 0x29, 0x70, 0x57, 0x65, 0x69, 0x67, 0x68, 0x74, 0x73, 0x5b, 0x63, 0x5d, 0x29, 0x20, 0x3a, 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x3b, 0x0d, 0x0a, 0x09, + 0x09, 0x09, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x69, 0x66, 0x20, 0x28, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3e, 0x3d, 0x20, 0x70, 0x54, 0x72, 0x69, 0x61, 0x6c, + 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x2d, 0x3e, 0x6d, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x3b, 0x0d, + 0x0a, 0x09, 0x09, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x69, 0x66, 0x20, 0x28, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3c, 0x20, 0x70, 0x54, 0x72, 0x69, + 0x61, 0x6c, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x2d, 0x3e, 0x6d, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x70, + 0x54, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x2d, 0x3e, 0x6d, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, + 0x65, 0x72, 0x72, 0x6f, 0x72, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x70, 0x54, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x2d, 0x3e, 0x6d, 0x5f, 0x63, 0x6f, + 0x6f, 0x72, 0x64, 0x73, 0x2e, 0x6d, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x20, 0x3d, 0x20, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, + 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x69, 0x66, 0x20, 0x28, 0x6e, 0x75, 0x6d, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x20, 0x3c, 0x3d, 0x20, 0x31, 0x36, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, + 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x69, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x20, 0x69, 0x20, 0x3c, 0x20, + 0x6e, 0x75, 0x6d, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x3b, 0x20, 0x69, 0x2b, 0x2b, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x70, 0x54, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x73, 0x6f, + 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x2d, 0x3e, 0x6d, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x5b, 0x69, 0x5d, 0x20, 0x3d, 0x20, 0x74, 0x65, 0x6d, 0x70, 0x5f, 0x73, 0x65, + 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x5b, 0x69, 0x5d, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x7d, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x70, 0x54, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x73, 0x6f, 0x6c, + 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x2d, 0x3e, 0x6d, 0x5f, 0x76, 0x61, 0x6c, 0x69, 0x64, 0x20, 0x3d, 0x20, 0x74, 0x72, 0x75, 0x65, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x7d, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, + 0x0a, 0x09, 0x70, 0x54, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x2d, 0x3e, 0x6d, 0x5f, 0x63, 0x6f, 0x6f, 0x72, 0x64, 0x73, 0x2e, 0x6d, 0x5f, 0x75, 0x6e, + 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6f, 0x72, 0x64, 0x73, 0x2e, 0x6d, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, + 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x62, 0x6f, 0x6f, 0x6c, 0x20, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x20, 0x3d, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, + 0x3b, 0x0d, 0x0a, 0x09, 0x69, 0x66, 0x20, 0x28, 0x70, 0x42, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x29, 0x0d, 0x0a, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x69, + 0x66, 0x20, 0x28, 0x70, 0x54, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x2d, 0x3e, 0x6d, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3c, 0x20, 0x70, 0x42, + 0x65, 0x73, 0x74, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x2d, 0x3e, 0x6d, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x09, + 0x2a, 0x70, 0x42, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x20, 0x3d, 0x20, 0x2a, 0x70, 0x54, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, + 0x6f, 0x6e, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x20, 0x3d, 0x20, 0x74, 0x72, 0x75, 0x65, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x7d, 0x0d, 0x0a, 0x09, 0x7d, + 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x0d, 0x0a, 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x76, + 0x6f, 0x69, 0x64, 0x20, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x69, 0x74, 0x28, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x31, + 0x73, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x72, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x65, 0x20, 0x2a, 0x70, 0x53, 0x74, 0x61, 0x74, 0x65, 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, + 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x65, 0x6e, 0x63, 0x6f, 0x64, 0x65, 0x5f, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x5f, 0x73, 0x74, 0x72, + 0x75, 0x63, 0x74, 0x20, 0x2a, 0x70, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x74, 0x20, 0x6e, 0x75, 0x6d, 0x5f, 0x70, 0x69, 0x78, + 0x65, 0x6c, 0x73, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x2a, 0x70, 0x50, + 0x69, 0x78, 0x65, 0x6c, 0x73, 0x2c, 0x20, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, + 0x2a, 0x70, 0x57, 0x65, 0x69, 0x67, 0x68, 0x74, 0x73, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x4c, 0x49, 0x4d, 0x49, 0x54, 0x20, + 0x3d, 0x20, 0x33, 0x31, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x6d, 0x69, 0x6e, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, + 0x20, 0x3d, 0x20, 0x32, 0x35, 0x35, 0x3b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x6d, 0x61, 0x78, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x20, 0x3d, + 0x20, 0x30, 0x3b, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x74, 0x20, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x20, 0x3d, 0x20, 0x30, 0x3b, + 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x74, 0x20, 0x73, 0x75, 0x6d, 0x5f, 0x72, 0x20, 0x3d, 0x20, 0x30, 0x2c, 0x20, 0x73, 0x75, 0x6d, 0x5f, 0x67, 0x20, 0x3d, 0x20, 0x30, + 0x2c, 0x20, 0x73, 0x75, 0x6d, 0x5f, 0x62, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x0d, 0x0a, 0x09, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, + 0x5f, 0x74, 0x20, 0x69, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x20, 0x69, 0x20, 0x3c, 0x20, 0x6e, 0x75, 0x6d, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x3b, 0x20, 0x69, 0x2b, 0x2b, 0x29, 0x0d, 0x0a, + 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x63, 0x20, 0x3d, 0x20, 0x70, 0x50, 0x69, 0x78, 0x65, + 0x6c, 0x73, 0x5b, 0x69, 0x5d, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x6d, 0x69, 0x6e, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x6d, 0x69, 0x6e, 0x28, 0x6d, 0x69, 0x6e, 0x5f, + 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2c, 0x20, 0x63, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x6d, 0x61, 0x78, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x6d, 0x61, 0x78, 0x28, 0x6d, 0x61, + 0x78, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2c, 0x20, 0x63, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x69, 0x66, 0x20, 0x28, 0x70, 0x57, 0x65, 0x69, 0x67, 0x68, 0x74, 0x73, 0x29, 0x0d, + 0x0a, 0x09, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x74, 0x20, 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x20, 0x3d, 0x20, 0x70, 0x57, 0x65, 0x69, 0x67, + 0x68, 0x74, 0x73, 0x5b, 0x69, 0x5d, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x73, 0x75, 0x6d, 0x5f, 0x72, 0x20, 0x2b, 0x3d, 0x20, 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x20, 0x2a, 0x20, + 0x63, 0x2e, 0x78, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x73, 0x75, 0x6d, 0x5f, 0x67, 0x20, 0x2b, 0x3d, 0x20, 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x20, 0x2a, 0x20, 0x63, 0x2e, 0x79, 0x3b, 0x0d, + 0x0a, 0x09, 0x09, 0x09, 0x73, 0x75, 0x6d, 0x5f, 0x62, 0x20, 0x2b, 0x3d, 0x20, 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x20, 0x2a, 0x20, 0x63, 0x2e, 0x7a, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x0d, 0x0a, + 0x09, 0x09, 0x09, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x20, 0x2b, 0x3d, 0x20, 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x7d, 0x0d, + 0x0a, 0x09, 0x09, 0x65, 0x6c, 0x73, 0x65, 0x0d, 0x0a, 0x09, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x73, 0x75, 0x6d, 0x5f, 0x72, 0x20, 0x2b, 0x3d, 0x20, 0x63, 0x2e, 0x78, 0x3b, 0x0d, 0x0a, + 0x09, 0x09, 0x09, 0x73, 0x75, 0x6d, 0x5f, 0x67, 0x20, 0x2b, 0x3d, 0x20, 0x63, 0x2e, 0x79, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x73, 0x75, 0x6d, 0x5f, 0x62, 0x20, 0x2b, 0x3d, 0x20, 0x63, 0x2e, + 0x7a, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x2b, 0x2b, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x7d, 0x0d, 0x0a, 0x09, + 0x7d, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x0d, 0x0a, 0x09, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x33, 0x20, 0x61, 0x76, 0x67, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x3b, 0x0d, 0x0a, 0x09, 0x61, 0x76, + 0x67, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x78, 0x20, 0x3d, 0x20, 0x28, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x29, 0x73, 0x75, 0x6d, 0x5f, 0x72, 0x20, 0x2f, 0x20, 0x74, 0x6f, 0x74, 0x61, 0x6c, + 0x5f, 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x3b, 0x0d, 0x0a, 0x09, 0x61, 0x76, 0x67, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x79, 0x20, 0x3d, 0x20, 0x28, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x29, + 0x73, 0x75, 0x6d, 0x5f, 0x67, 0x20, 0x2f, 0x20, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x3b, 0x0d, 0x0a, 0x09, 0x61, 0x76, 0x67, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, + 0x72, 0x2e, 0x7a, 0x20, 0x3d, 0x20, 0x28, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x29, 0x73, 0x75, 0x6d, 0x5f, 0x62, 0x20, 0x2f, 0x20, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x77, 0x65, 0x69, 0x67, 0x68, + 0x74, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x70, 0x53, 0x74, 0x61, 0x74, 0x65, 0x2d, 0x3e, 0x6d, 0x5f, 0x61, 0x76, 0x67, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x61, 0x76, 0x67, + 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x3b, 0x0d, 0x0a, 0x09, 0x70, 0x53, 0x74, 0x61, 0x74, 0x65, 0x2d, 0x3e, 0x6d, 0x5f, 0x6d, 0x61, 0x78, 0x5f, 0x63, 0x6f, 0x6d, 0x70, 0x5f, 0x73, 0x70, 0x72, + 0x65, 0x61, 0x64, 0x20, 0x3d, 0x20, 0x6d, 0x61, 0x78, 0x28, 0x6d, 0x61, 0x78, 0x28, 0x28, 0x69, 0x6e, 0x74, 0x29, 0x6d, 0x61, 0x78, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x78, 0x20, 0x2d, + 0x20, 0x28, 0x69, 0x6e, 0x74, 0x29, 0x6d, 0x69, 0x6e, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x78, 0x2c, 0x20, 0x28, 0x69, 0x6e, 0x74, 0x29, 0x6d, 0x61, 0x78, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, + 0x72, 0x2e, 0x79, 0x20, 0x2d, 0x20, 0x28, 0x69, 0x6e, 0x74, 0x29, 0x6d, 0x69, 0x6e, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x79, 0x29, 0x2c, 0x20, 0x28, 0x69, 0x6e, 0x74, 0x29, 0x6d, 0x61, + 0x78, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x7a, 0x20, 0x2d, 0x20, 0x28, 0x69, 0x6e, 0x74, 0x29, 0x6d, 0x69, 0x6e, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x7a, 0x29, 0x3b, 0x0d, 0x0a, + 0x09, 0x09, 0x0d, 0x0a, 0x09, 0x2f, 0x2f, 0x20, 0x54, 0x4f, 0x44, 0x4f, 0x3a, 0x20, 0x54, 0x68, 0x65, 0x20, 0x72, 0x6f, 0x75, 0x6e, 0x64, 0x69, 0x6e, 0x67, 0x20, 0x68, 0x65, 0x72, 0x65, 0x20, + 0x63, 0x6f, 0x75, 0x6c, 0x64, 0x20, 0x62, 0x65, 0x20, 0x69, 0x6d, 0x70, 0x72, 0x6f, 0x76, 0x65, 0x64, 0x2c, 0x20, 0x6c, 0x69, 0x6b, 0x65, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x44, 0x58, 0x54, + 0x31, 0x2f, 0x42, 0x43, 0x31, 0x2e, 0x0d, 0x0a, 0x09, 0x70, 0x53, 0x74, 0x61, 0x74, 0x65, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x72, 0x20, 0x3d, 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x28, 0x28, 0x69, + 0x6e, 0x74, 0x29, 0x28, 0x61, 0x76, 0x67, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x78, 0x20, 0x2a, 0x20, 0x28, 0x4c, 0x49, 0x4d, 0x49, 0x54, 0x20, 0x2f, 0x20, 0x32, 0x35, 0x35, 0x2e, 0x30, + 0x66, 0x29, 0x20, 0x2b, 0x20, 0x2e, 0x35, 0x66, 0x29, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x4c, 0x49, 0x4d, 0x49, 0x54, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x70, 0x53, 0x74, 0x61, 0x74, 0x65, 0x2d, 0x3e, + 0x6d, 0x5f, 0x62, 0x67, 0x20, 0x3d, 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x28, 0x28, 0x69, 0x6e, 0x74, 0x29, 0x28, 0x61, 0x76, 0x67, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x79, 0x20, 0x2a, + 0x20, 0x28, 0x4c, 0x49, 0x4d, 0x49, 0x54, 0x20, 0x2f, 0x20, 0x32, 0x35, 0x35, 0x2e, 0x30, 0x66, 0x29, 0x20, 0x2b, 0x20, 0x2e, 0x35, 0x66, 0x29, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x4c, 0x49, 0x4d, + 0x49, 0x54, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x70, 0x53, 0x74, 0x61, 0x74, 0x65, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x62, 0x20, 0x3d, 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x28, 0x28, 0x69, 0x6e, 0x74, + 0x29, 0x28, 0x61, 0x76, 0x67, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x7a, 0x20, 0x2a, 0x20, 0x28, 0x4c, 0x49, 0x4d, 0x49, 0x54, 0x20, 0x2f, 0x20, 0x32, 0x35, 0x35, 0x2e, 0x30, 0x66, 0x29, + 0x20, 0x2b, 0x20, 0x2e, 0x35, 0x66, 0x29, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x4c, 0x49, 0x4d, 0x49, 0x54, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x70, 0x53, 0x74, 0x61, 0x74, 0x65, 0x2d, 0x3e, + 0x6d, 0x5f, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x6d, 0x5f, 0x76, 0x61, 0x6c, 0x69, 0x64, 0x20, 0x3d, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x3b, + 0x0d, 0x0a, 0x09, 0x70, 0x53, 0x74, 0x61, 0x74, 0x65, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x6d, 0x5f, 0x65, 0x72, 0x72, + 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x55, 0x49, 0x4e, 0x54, 0x36, 0x34, 0x5f, 0x4d, 0x41, 0x58, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x76, 0x6f, 0x69, 0x64, 0x20, 0x65, 0x74, 0x63, 0x31, + 0x73, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x66, 0x69, 0x74, + 0x28, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x70, 0x65, 0x72, 0x6d, 0x73, 0x5f, 0x74, 0x6f, 0x5f, 0x74, 0x72, 0x79, 0x2c, + 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x72, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x65, 0x20, 0x2a, 0x70, 0x53, 0x74, 0x61, 0x74, 0x65, + 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x65, 0x6e, 0x63, 0x6f, 0x64, 0x65, 0x5f, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x70, 0x61, + 0x72, 0x61, 0x6d, 0x5f, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x20, 0x2a, 0x70, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x74, 0x20, + 0x6e, 0x75, 0x6d, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, + 0x67, 0x62, 0x61, 0x20, 0x2a, 0x70, 0x50, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x75, 0x69, 0x6e, + 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x2a, 0x70, 0x57, 0x65, 0x69, 0x67, 0x68, 0x74, 0x73, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, + 0x4c, 0x49, 0x4d, 0x49, 0x54, 0x20, 0x3d, 0x20, 0x33, 0x31, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x72, 0x5f, + 0x70, 0x6f, 0x74, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x20, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, + 0x6e, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x72, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x5f, + 0x63, 0x6f, 0x6f, 0x72, 0x64, 0x69, 0x6e, 0x61, 0x74, 0x65, 0x73, 0x20, 0x63, 0x75, 0x72, 0x5f, 0x63, 0x6f, 0x6f, 0x72, 0x64, 0x73, 0x3b, 0x0d, 0x0a, 0x09, 0x63, 0x75, 0x72, 0x5f, 0x63, 0x6f, + 0x6f, 0x72, 0x64, 0x73, 0x2e, 0x6d, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, + 0x67, 0x62, 0x61, 0x29, 0x28, 0x70, 0x53, 0x74, 0x61, 0x74, 0x65, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x72, 0x2c, 0x20, 0x70, 0x53, 0x74, 0x61, 0x74, 0x65, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x67, 0x2c, + 0x20, 0x70, 0x53, 0x74, 0x61, 0x74, 0x65, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x62, 0x2c, 0x20, 0x32, 0x35, 0x35, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x6f, 0x70, 0x74, + 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x72, 0x5f, 0x65, 0x76, 0x61, 0x6c, 0x75, 0x61, 0x74, 0x65, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x70, 0x53, 0x74, 0x61, 0x74, 0x65, 0x2c, + 0x20, 0x70, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x6e, 0x75, 0x6d, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x2c, 0x20, 0x70, 0x50, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x2c, 0x20, 0x70, + 0x57, 0x65, 0x69, 0x67, 0x68, 0x74, 0x73, 0x2c, 0x20, 0x63, 0x75, 0x72, 0x5f, 0x63, 0x6f, 0x6f, 0x72, 0x64, 0x73, 0x2c, 0x20, 0x26, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x73, 0x6f, 0x6c, 0x75, + 0x74, 0x69, 0x6f, 0x6e, 0x2c, 0x20, 0x26, 0x70, 0x53, 0x74, 0x61, 0x74, 0x65, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x29, 0x3b, + 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x0d, 0x0a, 0x09, 0x69, 0x66, 0x20, 0x28, 0x70, 0x53, 0x74, 0x61, 0x74, 0x65, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, + 0x69, 0x6f, 0x6e, 0x2e, 0x6d, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d, 0x3d, 0x20, 0x30, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, + 0x09, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x69, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x20, 0x69, 0x20, 0x3c, 0x20, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, + 0x70, 0x65, 0x72, 0x6d, 0x73, 0x5f, 0x74, 0x6f, 0x5f, 0x74, 0x72, 0x79, 0x3b, 0x20, 0x69, 0x2b, 0x2b, 0x29, 0x0d, 0x0a, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x64, 0x65, + 0x6c, 0x74, 0x61, 0x5f, 0x73, 0x75, 0x6d, 0x5f, 0x72, 0x20, 0x3d, 0x20, 0x30, 0x2c, 0x20, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x73, 0x75, 0x6d, 0x5f, 0x67, 0x20, 0x3d, 0x20, 0x30, 0x2c, 0x20, + 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x73, 0x75, 0x6d, 0x5f, 0x62, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x20, 0x69, + 0x6e, 0x74, 0x20, 0x2a, 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x20, 0x3d, 0x20, 0x67, 0x5f, 0x65, 0x74, 0x63, 0x31, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5f, + 0x74, 0x61, 0x62, 0x6c, 0x65, 0x73, 0x5b, 0x70, 0x53, 0x74, 0x61, 0x74, 0x65, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x6d, + 0x5f, 0x63, 0x6f, 0x6f, 0x72, 0x64, 0x73, 0x2e, 0x6d, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5d, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, + 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x62, 0x61, 0x73, 0x65, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x67, 0x65, 0x74, 0x5f, 0x73, 0x63, 0x61, + 0x6c, 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x28, 0x70, 0x53, 0x74, 0x61, 0x74, 0x65, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, + 0x6e, 0x2e, 0x6d, 0x5f, 0x63, 0x6f, 0x6f, 0x72, 0x64, 0x73, 0x2e, 0x6d, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, + 0x0a, 0x09, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x38, 0x5f, 0x74, 0x20, 0x2a, 0x70, 0x4e, 0x75, 0x6d, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, + 0x6f, 0x72, 0x73, 0x20, 0x3d, 0x20, 0x67, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x66, 0x69, 0x74, 0x5f, 0x6f, 0x72, 0x64, 0x65, 0x72, 0x5f, 0x74, 0x61, 0x62, 0x5b, 0x69, 0x5d, + 0x2e, 0x6d, 0x5f, 0x76, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x71, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x20, + 0x71, 0x20, 0x3c, 0x20, 0x34, 0x3b, 0x20, 0x71, 0x2b, 0x2b, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x79, + 0x64, 0x5f, 0x74, 0x65, 0x6d, 0x70, 0x20, 0x3d, 0x20, 0x70, 0x49, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5b, 0x71, 0x5d, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x09, + 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x73, 0x75, 0x6d, 0x5f, 0x72, 0x20, 0x2b, 0x3d, 0x20, 0x70, 0x4e, 0x75, 0x6d, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x5b, 0x71, 0x5d, + 0x20, 0x2a, 0x20, 0x28, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x28, 0x62, 0x61, 0x73, 0x65, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x78, 0x20, 0x2b, 0x20, 0x79, 0x64, 0x5f, 0x74, 0x65, 0x6d, 0x70, + 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x32, 0x35, 0x35, 0x29, 0x20, 0x2d, 0x20, 0x62, 0x61, 0x73, 0x65, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x78, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x64, + 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x73, 0x75, 0x6d, 0x5f, 0x67, 0x20, 0x2b, 0x3d, 0x20, 0x70, 0x4e, 0x75, 0x6d, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x5b, 0x71, 0x5d, 0x20, + 0x2a, 0x20, 0x28, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x28, 0x62, 0x61, 0x73, 0x65, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x79, 0x20, 0x2b, 0x20, 0x79, 0x64, 0x5f, 0x74, 0x65, 0x6d, 0x70, 0x2c, + 0x20, 0x30, 0x2c, 0x20, 0x32, 0x35, 0x35, 0x29, 0x20, 0x2d, 0x20, 0x62, 0x61, 0x73, 0x65, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x79, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x64, 0x65, + 0x6c, 0x74, 0x61, 0x5f, 0x73, 0x75, 0x6d, 0x5f, 0x62, 0x20, 0x2b, 0x3d, 0x20, 0x70, 0x4e, 0x75, 0x6d, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x5b, 0x71, 0x5d, 0x20, 0x2a, + 0x20, 0x28, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x28, 0x62, 0x61, 0x73, 0x65, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x7a, 0x20, 0x2b, 0x20, 0x79, 0x64, 0x5f, 0x74, 0x65, 0x6d, 0x70, 0x2c, 0x20, + 0x30, 0x2c, 0x20, 0x32, 0x35, 0x35, 0x29, 0x20, 0x2d, 0x20, 0x62, 0x61, 0x73, 0x65, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x7a, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x7d, 0x0d, 0x0a, 0x0d, + 0x0a, 0x09, 0x09, 0x69, 0x66, 0x20, 0x28, 0x28, 0x21, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x73, 0x75, 0x6d, 0x5f, 0x72, 0x29, 0x20, 0x26, 0x26, 0x20, 0x28, 0x21, 0x64, 0x65, 0x6c, 0x74, 0x61, + 0x5f, 0x73, 0x75, 0x6d, 0x5f, 0x67, 0x29, 0x20, 0x26, 0x26, 0x20, 0x28, 0x21, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x73, 0x75, 0x6d, 0x5f, 0x62, 0x29, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x63, + 0x6f, 0x6e, 0x74, 0x69, 0x6e, 0x75, 0x65, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x61, 0x76, 0x67, 0x5f, 0x64, 0x65, + 0x6c, 0x74, 0x61, 0x5f, 0x72, 0x5f, 0x66, 0x20, 0x3d, 0x20, 0x28, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x29, 0x28, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x73, 0x75, 0x6d, 0x5f, 0x72, 0x29, 0x20, 0x2f, + 0x20, 0x38, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x61, 0x76, 0x67, 0x5f, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x67, 0x5f, 0x66, + 0x20, 0x3d, 0x20, 0x28, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x29, 0x28, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x73, 0x75, 0x6d, 0x5f, 0x67, 0x29, 0x20, 0x2f, 0x20, 0x38, 0x3b, 0x0d, 0x0a, 0x09, 0x09, + 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x20, 0x61, 0x76, 0x67, 0x5f, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x62, 0x5f, 0x66, 0x20, 0x3d, 0x20, 0x28, 0x66, 0x6c, 0x6f, + 0x61, 0x74, 0x29, 0x28, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x73, 0x75, 0x6d, 0x5f, 0x62, 0x29, 0x20, 0x2f, 0x20, 0x38, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, + 0x20, 0x69, 0x6e, 0x74, 0x20, 0x62, 0x72, 0x31, 0x20, 0x3d, 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x28, 0x28, 0x69, 0x6e, 0x74, 0x29, 0x28, 0x28, 0x70, 0x53, 0x74, 0x61, 0x74, 0x65, 0x2d, 0x3e, + 0x6d, 0x5f, 0x61, 0x76, 0x67, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x78, 0x20, 0x2d, 0x20, 0x61, 0x76, 0x67, 0x5f, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x72, 0x5f, 0x66, 0x29, 0x20, 0x2a, + 0x20, 0x28, 0x4c, 0x49, 0x4d, 0x49, 0x54, 0x20, 0x2f, 0x20, 0x32, 0x35, 0x35, 0x2e, 0x30, 0x66, 0x29, 0x20, 0x2b, 0x20, 0x2e, 0x35, 0x66, 0x29, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x4c, 0x49, 0x4d, + 0x49, 0x54, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x62, 0x67, 0x31, 0x20, 0x3d, 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x28, 0x28, 0x69, + 0x6e, 0x74, 0x29, 0x28, 0x28, 0x70, 0x53, 0x74, 0x61, 0x74, 0x65, 0x2d, 0x3e, 0x6d, 0x5f, 0x61, 0x76, 0x67, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x79, 0x20, 0x2d, 0x20, 0x61, 0x76, 0x67, + 0x5f, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x67, 0x5f, 0x66, 0x29, 0x20, 0x2a, 0x20, 0x28, 0x4c, 0x49, 0x4d, 0x49, 0x54, 0x20, 0x2f, 0x20, 0x32, 0x35, 0x35, 0x2e, 0x30, 0x66, 0x29, 0x20, 0x2b, + 0x20, 0x2e, 0x35, 0x66, 0x29, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x4c, 0x49, 0x4d, 0x49, 0x54, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x62, + 0x62, 0x31, 0x20, 0x3d, 0x20, 0x63, 0x6c, 0x61, 0x6d, 0x70, 0x28, 0x28, 0x69, 0x6e, 0x74, 0x29, 0x28, 0x28, 0x70, 0x53, 0x74, 0x61, 0x74, 0x65, 0x2d, 0x3e, 0x6d, 0x5f, 0x61, 0x76, 0x67, 0x5f, + 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2e, 0x7a, 0x20, 0x2d, 0x20, 0x61, 0x76, 0x67, 0x5f, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x5f, 0x62, 0x5f, 0x66, 0x29, 0x20, 0x2a, 0x20, 0x28, 0x4c, 0x49, 0x4d, 0x49, + 0x54, 0x20, 0x2f, 0x20, 0x32, 0x35, 0x35, 0x2e, 0x30, 0x66, 0x29, 0x20, 0x2b, 0x20, 0x2e, 0x35, 0x66, 0x29, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x4c, 0x49, 0x4d, 0x49, 0x54, 0x29, 0x3b, 0x0d, 0x0a, + 0x09, 0x09, 0x0d, 0x0a, 0x09, 0x09, 0x63, 0x75, 0x72, 0x5f, 0x63, 0x6f, 0x6f, 0x72, 0x64, 0x73, 0x2e, 0x6d, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, + 0x72, 0x20, 0x3d, 0x20, 0x28, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x29, 0x28, 0x62, 0x72, 0x31, 0x2c, 0x20, 0x62, 0x67, 0x31, 0x2c, 0x20, 0x62, 0x62, 0x31, 0x2c, 0x20, + 0x32, 0x35, 0x35, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x72, 0x5f, 0x65, 0x76, 0x61, 0x6c, 0x75, + 0x61, 0x74, 0x65, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x70, 0x53, 0x74, 0x61, 0x74, 0x65, 0x2c, 0x20, 0x70, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x6e, 0x75, + 0x6d, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x2c, 0x20, 0x70, 0x50, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x2c, 0x20, 0x70, 0x57, 0x65, 0x69, 0x67, 0x68, 0x74, 0x73, 0x2c, 0x20, 0x63, 0x75, 0x72, + 0x5f, 0x63, 0x6f, 0x6f, 0x72, 0x64, 0x73, 0x2c, 0x20, 0x26, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x2c, 0x20, 0x26, 0x70, 0x53, 0x74, 0x61, 0x74, + 0x65, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x0d, 0x0a, 0x09, 0x09, 0x69, 0x66, 0x20, 0x28, 0x70, + 0x53, 0x74, 0x61, 0x74, 0x65, 0x2d, 0x3e, 0x6d, 0x5f, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x6d, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d, + 0x3d, 0x20, 0x30, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x3b, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x2f, 0x2f, 0x20, 0x45, 0x6e, 0x63, + 0x6f, 0x64, 0x65, 0x20, 0x61, 0x6e, 0x20, 0x45, 0x54, 0x43, 0x31, 0x53, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x67, 0x69, 0x76, 0x65, 0x6e, 0x20, 0x61, 0x20, 0x34, 0x78, 0x34, 0x20, 0x70, + 0x69, 0x78, 0x65, 0x6c, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x2e, 0x0d, 0x0a, 0x6b, 0x65, 0x72, 0x6e, 0x65, 0x6c, 0x20, 0x76, 0x6f, 0x69, 0x64, 0x20, 0x65, 0x6e, 0x63, 0x6f, 0x64, 0x65, 0x5f, + 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x73, 0x28, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, + 0x65, 0x6e, 0x63, 0x6f, 0x64, 0x65, 0x5f, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x5f, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x20, 0x2a, 0x70, 0x50, 0x61, 0x72, 0x61, + 0x6d, 0x73, 0x2c, 0x20, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x5f, 0x62, 0x6c, 0x6f, + 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x73, 0x2c, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x65, + 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x73, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x63, + 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x3d, 0x20, 0x67, 0x65, 0x74, 0x5f, + 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x5f, 0x69, 0x64, 0x28, 0x30, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, + 0x70, 0x69, 0x78, 0x65, 0x6c, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x3d, 0x20, 0x26, 0x70, 0x49, 0x6e, + 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x73, 0x5b, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5d, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, + 0x31, 0x73, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x72, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x65, 0x20, 0x73, 0x74, 0x61, 0x74, 0x65, 0x3b, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x31, + 0x73, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x69, 0x74, 0x28, 0x26, 0x73, 0x74, 0x61, 0x74, 0x65, 0x2c, 0x20, 0x70, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, + 0x2c, 0x20, 0x31, 0x36, 0x2c, 0x20, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x2d, 0x3e, 0x6d, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x2c, 0x20, 0x4e, 0x55, + 0x4c, 0x4c, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x5f, + 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x66, 0x69, 0x74, 0x28, 0x70, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2d, 0x3e, 0x6d, 0x5f, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x70, 0x65, 0x72, + 0x6d, 0x73, 0x2c, 0x20, 0x26, 0x73, 0x74, 0x61, 0x74, 0x65, 0x2c, 0x20, 0x70, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x31, 0x36, 0x2c, 0x20, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, + 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x2d, 0x3e, 0x6d, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x2c, 0x20, 0x4e, 0x55, 0x4c, 0x4c, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, + 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x62, 0x6c, 0x6b, 0x3b, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x66, 0x6c, 0x69, 0x70, + 0x5f, 0x62, 0x69, 0x74, 0x28, 0x26, 0x62, 0x6c, 0x6b, 0x2c, 0x20, 0x74, 0x72, 0x75, 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, + 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x5f, 0x65, 0x74, 0x63, 0x31, 0x73, 0x28, 0x26, 0x62, 0x6c, 0x6b, 0x2c, 0x20, 0x73, 0x74, 0x61, 0x74, 0x65, + 0x2e, 0x6d, 0x5f, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x6d, 0x5f, 0x63, 0x6f, 0x6f, 0x72, 0x64, 0x73, 0x2e, 0x6d, 0x5f, 0x75, 0x6e, 0x73, 0x63, + 0x61, 0x6c, 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x69, 0x6e, 0x74, + 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x73, 0x5f, 0x65, 0x74, 0x63, 0x31, 0x73, 0x28, 0x26, 0x62, 0x6c, 0x6b, 0x2c, 0x20, 0x73, 0x74, 0x61, 0x74, 0x65, 0x2e, 0x6d, 0x5f, 0x62, 0x65, + 0x73, 0x74, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x6d, 0x5f, 0x63, 0x6f, 0x6f, 0x72, 0x64, 0x73, 0x2e, 0x6d, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, + 0x6c, 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x70, 0x61, 0x63, 0x6b, 0x5f, 0x72, 0x61, 0x77, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, + 0x6f, 0x72, 0x73, 0x28, 0x26, 0x62, 0x6c, 0x6b, 0x2c, 0x20, 0x73, 0x74, 0x61, 0x74, 0x65, 0x2e, 0x6d, 0x5f, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x2e, + 0x6d, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x0d, 0x0a, 0x09, 0x70, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, + 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x73, 0x5b, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5d, 0x20, 0x3d, 0x20, 0x62, 0x6c, 0x6b, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, + 0x0d, 0x0a, 0x74, 0x79, 0x70, 0x65, 0x64, 0x65, 0x66, 0x20, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x20, 0x5f, 0x5f, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x5f, 0x5f, 0x20, 0x28, + 0x28, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x29, 0x29, 0x20, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x74, 0x61, 0x67, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, + 0x09, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x74, 0x20, 0x6d, 0x5f, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x3b, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, + 0x36, 0x34, 0x5f, 0x74, 0x20, 0x6d, 0x5f, 0x66, 0x69, 0x72, 0x73, 0x74, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x3b, 0x0d, 0x0a, 0x7d, 0x20, 0x70, 0x69, 0x78, + 0x65, 0x6c, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x2f, 0x2f, 0x20, 0x44, 0x65, 0x74, 0x65, 0x72, 0x6d, 0x69, 0x6e, 0x65, 0x20, 0x74, 0x68, 0x65, 0x20, + 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x61, 0x6c, 0x20, 0x45, 0x54, 0x43, 0x31, 0x53, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x2f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x74, 0x79, 0x20, 0x67, + 0x69, 0x76, 0x65, 0x6e, 0x20, 0x61, 0x6e, 0x20, 0x61, 0x72, 0x62, 0x69, 0x74, 0x72, 0x61, 0x72, 0x79, 0x20, 0x6c, 0x61, 0x72, 0x67, 0x65, 0x20, 0x61, 0x72, 0x72, 0x61, 0x79, 0x20, 0x6f, 0x66, + 0x20, 0x34, 0x78, 0x34, 0x20, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x20, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x73, 0x2e, 0x0d, 0x0a, 0x6b, 0x65, 0x72, 0x6e, 0x65, 0x6c, + 0x20, 0x76, 0x6f, 0x69, 0x64, 0x20, 0x65, 0x6e, 0x63, 0x6f, 0x64, 0x65, 0x5f, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x66, 0x72, 0x6f, 0x6d, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x5f, 0x63, 0x6c, + 0x75, 0x73, 0x74, 0x65, 0x72, 0x28, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x65, 0x6e, 0x63, 0x6f, 0x64, 0x65, 0x5f, + 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x5f, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x20, 0x2a, 0x70, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x0d, 0x0a, 0x20, + 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x20, 0x2a, 0x70, + 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x73, 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, + 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x2a, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x2c, 0x0d, + 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x2a, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, + 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x73, 0x2c, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, + 0x70, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x73, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, + 0x32, 0x5f, 0x74, 0x20, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x3d, 0x20, 0x67, 0x65, 0x74, 0x5f, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x5f, 0x69, + 0x64, 0x28, 0x30, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x5f, 0x63, 0x6c, + 0x75, 0x73, 0x74, 0x65, 0x72, 0x20, 0x2a, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x20, 0x3d, 0x20, 0x26, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, + 0x70, 0x69, 0x78, 0x65, 0x6c, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x73, 0x5b, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5d, 0x3b, 0x0d, 0x0a, + 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x74, 0x20, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x20, 0x3d, 0x20, 0x70, 0x49, 0x6e, 0x70, 0x75, + 0x74, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x2d, 0x3e, 0x6d, 0x5f, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x3b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, + 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x2a, 0x70, 0x50, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x20, 0x3d, 0x20, + 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x20, 0x2b, 0x20, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x2d, 0x3e, + 0x6d, 0x5f, 0x66, 0x69, 0x72, 0x73, 0x74, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x3b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, + 0x62, 0x61, 0x6c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x2a, 0x70, 0x57, 0x65, 0x69, 0x67, 0x68, 0x74, 0x73, 0x20, 0x3d, 0x20, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, + 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x73, 0x20, 0x2b, 0x20, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x2d, 0x3e, 0x6d, 0x5f, 0x66, 0x69, 0x72, 0x73, + 0x74, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, + 0x65, 0x72, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x65, 0x20, 0x73, 0x74, 0x61, 0x74, 0x65, 0x3b, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x31, 0x73, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, + 0x72, 0x5f, 0x69, 0x6e, 0x69, 0x74, 0x28, 0x26, 0x73, 0x74, 0x61, 0x74, 0x65, 0x2c, 0x20, 0x70, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x70, 0x69, + 0x78, 0x65, 0x6c, 0x73, 0x2c, 0x20, 0x70, 0x50, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x2c, 0x20, 0x70, 0x57, 0x65, 0x69, 0x67, 0x68, 0x74, 0x73, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x31, + 0x73, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x66, 0x69, 0x74, + 0x28, 0x70, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2d, 0x3e, 0x6d, 0x5f, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x70, 0x65, 0x72, 0x6d, 0x73, 0x2c, 0x20, 0x26, 0x73, 0x74, 0x61, 0x74, 0x65, 0x2c, + 0x20, 0x70, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x2c, 0x20, 0x70, 0x50, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x2c, + 0x20, 0x70, 0x57, 0x65, 0x69, 0x67, 0x68, 0x74, 0x73, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x62, 0x6c, 0x6b, 0x3b, 0x0d, + 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x66, 0x6c, 0x69, 0x70, 0x5f, 0x62, 0x69, 0x74, 0x28, 0x26, 0x62, 0x6c, 0x6b, 0x2c, 0x20, 0x74, + 0x72, 0x75, 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, + 0x72, 0x35, 0x5f, 0x65, 0x74, 0x63, 0x31, 0x73, 0x28, 0x26, 0x62, 0x6c, 0x6b, 0x2c, 0x20, 0x73, 0x74, 0x61, 0x74, 0x65, 0x2e, 0x6d, 0x5f, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x6f, 0x6c, 0x75, + 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x6d, 0x5f, 0x63, 0x6f, 0x6f, 0x72, 0x64, 0x73, 0x2e, 0x6d, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x29, 0x3b, + 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x73, 0x5f, 0x65, 0x74, + 0x63, 0x31, 0x73, 0x28, 0x26, 0x62, 0x6c, 0x6b, 0x2c, 0x20, 0x73, 0x74, 0x61, 0x74, 0x65, 0x2e, 0x6d, 0x5f, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x2e, + 0x6d, 0x5f, 0x63, 0x6f, 0x6f, 0x72, 0x64, 0x73, 0x2e, 0x6d, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, + 0x09, 0x09, 0x0d, 0x0a, 0x09, 0x70, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x73, 0x5b, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x65, + 0x78, 0x5d, 0x20, 0x3d, 0x20, 0x62, 0x6c, 0x6b, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x2f, 0x2f, 0x20, 0x2d, 0x2d, 0x2d, 0x2d, 0x20, 0x72, 0x65, 0x66, 0x69, 0x6e, 0x65, 0x5f, 0x65, + 0x6e, 0x64, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x0d, 0x0a, 0x74, 0x79, 0x70, 0x65, 0x64, 0x65, 0x66, 0x20, + 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x20, 0x5f, 0x5f, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x5f, 0x5f, 0x20, 0x28, 0x28, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x29, 0x29, 0x20, + 0x72, 0x65, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x5f, 0x74, 0x61, 0x67, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x31, 0x36, + 0x5f, 0x74, 0x20, 0x6d, 0x5f, 0x66, 0x69, 0x72, 0x73, 0x74, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x6f, 0x66, 0x73, 0x3b, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x31, 0x36, + 0x5f, 0x74, 0x20, 0x6d, 0x5f, 0x6e, 0x75, 0x6d, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x73, 0x3b, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x31, 0x36, 0x5f, 0x74, 0x20, 0x6d, 0x5f, + 0x63, 0x75, 0x72, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x3b, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x38, 0x5f, 0x74, 0x20, 0x6d, 0x5f, 0x63, + 0x75, 0x72, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x65, 0x74, 0x63, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x3b, 0x0d, 0x0a, 0x7d, 0x20, 0x72, 0x65, 0x63, 0x5f, 0x62, 0x6c, 0x6f, + 0x63, 0x6b, 0x5f, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x74, 0x79, 0x70, 0x65, 0x64, 0x65, 0x66, 0x20, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x20, 0x5f, 0x5f, 0x61, + 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x5f, 0x5f, 0x20, 0x28, 0x28, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x29, 0x29, 0x20, 0x72, 0x65, 0x63, 0x5f, 0x65, 0x6e, 0x64, 0x70, 0x6f, 0x69, + 0x6e, 0x74, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x5f, 0x74, 0x61, 0x67, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6c, 0x6f, 0x72, + 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x6d, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x3b, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x38, 0x5f, + 0x74, 0x20, 0x6d, 0x5f, 0x65, 0x74, 0x63, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x3b, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x31, 0x36, 0x5f, 0x74, 0x20, 0x6d, 0x5f, 0x63, 0x6c, 0x75, 0x73, + 0x74, 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x3b, 0x0d, 0x0a, 0x7d, 0x20, 0x72, 0x65, 0x63, 0x5f, 0x65, 0x6e, 0x64, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, + 0x65, 0x72, 0x5f, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x74, 0x79, 0x70, 0x65, 0x64, 0x65, 0x66, 0x20, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x20, 0x5f, 0x5f, 0x61, + 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x5f, 0x5f, 0x20, 0x28, 0x28, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x29, 0x29, 0x20, 0x72, 0x65, 0x63, 0x5f, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x5f, + 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x5f, 0x74, 0x61, 0x67, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x6d, 0x5f, 0x74, 0x6f, 0x74, 0x61, 0x6c, + 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x73, 0x3b, 0x0d, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x6d, 0x5f, 0x70, 0x65, 0x72, 0x63, 0x65, 0x70, 0x74, 0x75, 0x61, 0x6c, 0x3b, 0x0d, 0x0a, 0x7d, 0x20, + 0x72, 0x65, 0x63, 0x5f, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x5f, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x2f, 0x2f, 0x20, 0x46, 0x6f, 0x72, 0x20, 0x65, 0x61, 0x63, 0x68, + 0x20, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x3a, 0x20, 0x66, 0x69, 0x6e, 0x64, 0x20, 0x74, 0x68, 0x65, 0x20, 0x62, 0x65, 0x73, 0x74, 0x20, 0x65, 0x6e, 0x64, 0x70, + 0x6f, 0x69, 0x6e, 0x74, 0x20, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x20, 0x74, 0x68, 0x61, 0x74, 0x20, 0x65, 0x6e, 0x63, 0x6f, 0x64, 0x65, 0x73, 0x20, 0x69, 0x74, 0x2e, 0x0d, 0x0a, 0x6b, + 0x65, 0x72, 0x6e, 0x65, 0x6c, 0x20, 0x76, 0x6f, 0x69, 0x64, 0x20, 0x72, 0x65, 0x66, 0x69, 0x6e, 0x65, 0x5f, 0x65, 0x6e, 0x64, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, + 0x65, 0x72, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x72, 0x65, 0x63, 0x5f, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x5f, + 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, + 0x6c, 0x20, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x73, 0x2c, 0x0d, 0x0a, 0x09, + 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x72, 0x65, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x20, 0x2a, 0x70, + 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x66, 0x6f, 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, + 0x20, 0x72, 0x65, 0x63, 0x5f, 0x65, 0x6e, 0x64, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x20, 0x2a, 0x70, 0x49, + 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x73, 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x75, 0x69, + 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x2a, 0x70, 0x53, 0x6f, 0x72, 0x74, 0x65, 0x64, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x64, 0x69, 0x63, 0x65, 0x73, 0x2c, 0x0d, 0x0a, + 0x20, 0x20, 0x20, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x2a, 0x70, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x5f, 0x69, 0x6e, 0x64, + 0x69, 0x63, 0x65, 0x73, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x73, 0x6f, 0x72, 0x74, 0x65, 0x64, + 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x3d, 0x20, 0x67, 0x65, 0x74, 0x5f, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x5f, 0x69, 0x64, 0x28, 0x30, 0x29, 0x3b, + 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x3d, 0x20, + 0x70, 0x53, 0x6f, 0x72, 0x74, 0x65, 0x64, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x64, 0x69, 0x63, 0x65, 0x73, 0x5b, 0x73, 0x6f, 0x72, 0x74, 0x65, 0x64, 0x5f, 0x62, 0x6c, 0x6f, + 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5d, 0x3b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x20, 0x70, 0x65, 0x72, 0x63, 0x65, 0x70, 0x74, 0x75, 0x61, + 0x6c, 0x20, 0x3d, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e, 0x6d, 0x5f, 0x70, 0x65, 0x72, 0x63, 0x65, 0x70, 0x74, 0x75, 0x61, 0x6c, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, + 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, + 0x6f, 0x63, 0x6b, 0x20, 0x3d, 0x20, 0x26, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x73, 0x5b, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, + 0x5d, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x0d, 0x0a, 0x09, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x70, 0x72, 0x69, 0x76, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, + 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x3b, 0x0d, 0x0a, 0x09, 0x70, 0x72, 0x69, 0x76, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x3d, 0x20, 0x2a, 0x70, 0x49, + 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x66, + 0x69, 0x72, 0x73, 0x74, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x6f, 0x66, 0x73, 0x20, 0x3d, 0x20, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, + 0x69, 0x6e, 0x66, 0x6f, 0x5b, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5d, 0x2e, 0x6d, 0x5f, 0x66, 0x69, 0x72, 0x73, 0x74, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, + 0x72, 0x5f, 0x6f, 0x66, 0x73, 0x3b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x6e, 0x75, 0x6d, 0x5f, 0x63, 0x6c, 0x75, 0x73, + 0x74, 0x65, 0x72, 0x73, 0x20, 0x3d, 0x20, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x66, 0x6f, 0x5b, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, + 0x6e, 0x64, 0x65, 0x78, 0x5d, 0x2e, 0x6d, 0x5f, 0x6e, 0x75, 0x6d, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x73, 0x3b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, + 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x63, 0x75, 0x72, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x3d, + 0x20, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x66, 0x6f, 0x5b, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5d, 0x2e, + 0x6d, 0x5f, 0x63, 0x75, 0x72, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x3b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, + 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x63, 0x75, 0x72, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x65, 0x74, 0x63, 0x5f, 0x69, 0x6e, 0x74, 0x65, + 0x6e, 0x20, 0x3d, 0x20, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x66, 0x6f, 0x5b, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x64, 0x65, + 0x78, 0x5d, 0x2e, 0x6d, 0x5f, 0x63, 0x75, 0x72, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x65, 0x74, 0x63, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x3b, 0x0d, 0x0a, 0x09, 0x0d, 0x0a, + 0x09, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x74, 0x20, 0x6f, 0x76, 0x65, 0x72, 0x61, 0x6c, 0x6c, 0x5f, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x20, 0x3d, 0x20, 0x55, 0x49, 0x4e, + 0x54, 0x36, 0x34, 0x5f, 0x4d, 0x41, 0x58, 0x3b, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, + 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x69, 0x20, + 0x3d, 0x20, 0x30, 0x3b, 0x20, 0x69, 0x20, 0x3c, 0x20, 0x6e, 0x75, 0x6d, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x73, 0x3b, 0x20, 0x69, 0x2b, 0x2b, 0x29, 0x0d, 0x0a, 0x09, 0x7b, 0x0d, + 0x0a, 0x09, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, + 0x3d, 0x20, 0x66, 0x69, 0x72, 0x73, 0x74, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x6f, 0x66, 0x73, 0x20, 0x2b, 0x20, 0x69, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x63, 0x6f, 0x6c, 0x6f, + 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x63, + 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x73, 0x5b, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5d, 0x2e, 0x6d, 0x5f, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, + 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x38, 0x5f, 0x74, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x69, 0x6e, + 0x74, 0x65, 0x6e, 0x20, 0x3d, 0x20, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x73, 0x5b, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x69, 0x6e, + 0x64, 0x65, 0x78, 0x5d, 0x2e, 0x6d, 0x5f, 0x65, 0x74, 0x63, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x31, + 0x36, 0x5f, 0x74, 0x20, 0x6f, 0x72, 0x69, 0x67, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x3d, 0x20, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, + 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x73, 0x5b, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5d, 0x2e, 0x6d, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, + 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x69, 0x66, 0x20, 0x28, 0x65, 0x74, 0x63, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x20, 0x3e, 0x20, 0x63, 0x75, + 0x72, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x65, 0x74, 0x63, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x63, + 0x6f, 0x6e, 0x74, 0x69, 0x6e, 0x75, 0x65, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, + 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x34, 0x5d, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x67, 0x65, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x35, 0x28, 0x62, + 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x2c, 0x20, 0x26, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2c, 0x20, 0x65, 0x74, + 0x63, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x2c, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x74, 0x20, 0x74, + 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x0d, 0x0a, 0x09, 0x09, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x75, 0x69, + 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x63, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x20, 0x63, 0x20, 0x3c, 0x20, 0x31, 0x36, 0x3b, 0x20, 0x63, 0x2b, 0x2b, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x7b, 0x0d, + 0x0a, 0x09, 0x09, 0x09, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x73, 0x72, 0x63, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x20, 0x3d, 0x20, 0x70, 0x72, 0x69, 0x76, 0x5f, + 0x70, 0x69, 0x78, 0x65, 0x6c, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x2e, 0x6d, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x5b, 0x63, 0x5d, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x75, + 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x64, 0x69, 0x73, 0x74, 0x61, + 0x6e, 0x63, 0x65, 0x28, 0x70, 0x65, 0x72, 0x63, 0x65, 0x70, 0x74, 0x75, 0x61, 0x6c, 0x2c, 0x20, 0x73, 0x72, 0x63, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x2c, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, + 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x30, 0x5d, 0x2c, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, + 0x5f, 0x74, 0x20, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x64, 0x69, 0x73, 0x74, 0x61, 0x6e, 0x63, 0x65, 0x28, + 0x70, 0x65, 0x72, 0x63, 0x65, 0x70, 0x74, 0x75, 0x61, 0x6c, 0x2c, 0x20, 0x73, 0x72, 0x63, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x2c, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, + 0x6f, 0x72, 0x73, 0x5b, 0x31, 0x5d, 0x2c, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x69, 0x66, 0x20, 0x28, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x65, 0x72, + 0x72, 0x6f, 0x72, 0x20, 0x3c, 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x6f, + 0x72, 0x20, 0x3d, 0x20, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, + 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x64, 0x69, 0x73, 0x74, 0x61, 0x6e, 0x63, 0x65, 0x28, 0x70, 0x65, 0x72, 0x63, 0x65, 0x70, 0x74, 0x75, 0x61, 0x6c, 0x2c, 0x20, + 0x73, 0x72, 0x63, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x2c, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x32, 0x5d, 0x2c, 0x20, 0x66, 0x61, 0x6c, 0x73, + 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x69, 0x66, 0x20, 0x28, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3c, 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, + 0x72, 0x72, 0x6f, 0x72, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x65, 0x72, + 0x72, 0x6f, 0x72, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x64, + 0x69, 0x73, 0x74, 0x61, 0x6e, 0x63, 0x65, 0x28, 0x70, 0x65, 0x72, 0x63, 0x65, 0x70, 0x74, 0x75, 0x61, 0x6c, 0x2c, 0x20, 0x73, 0x72, 0x63, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x2c, 0x20, 0x62, + 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x33, 0x5d, 0x2c, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x69, 0x66, 0x20, 0x28, + 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3c, 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x62, + 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, + 0x09, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x2b, 0x3d, 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x3b, + 0x0d, 0x0a, 0x09, 0x09, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x69, 0x66, 0x20, 0x28, 0x20, 0x28, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3c, 0x20, 0x6f, + 0x76, 0x65, 0x72, 0x61, 0x6c, 0x6c, 0x5f, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x29, 0x20, 0x7c, 0x7c, 0x0d, 0x0a, 0x09, 0x09, 0x20, 0x20, 0x20, 0x20, 0x20, 0x28, 0x28, 0x6f, 0x72, + 0x69, 0x67, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x3d, 0x3d, 0x20, 0x63, 0x75, 0x72, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6c, + 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x29, 0x20, 0x26, 0x26, 0x20, 0x28, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d, 0x3d, 0x20, + 0x6f, 0x76, 0x65, 0x72, 0x61, 0x6c, 0x6c, 0x5f, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x29, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x7b, 0x0d, 0x0a, 0x09, + 0x09, 0x09, 0x6f, 0x76, 0x65, 0x72, 0x61, 0x6c, 0x6c, 0x5f, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x20, 0x3d, 0x20, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, + 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x3d, 0x20, 0x6f, 0x72, 0x69, 0x67, 0x5f, + 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x69, 0x66, 0x20, 0x28, 0x21, 0x6f, 0x76, 0x65, 0x72, 0x61, 0x6c, 0x6c, 0x5f, + 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x7d, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, + 0x0d, 0x0a, 0x09, 0x70, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x5f, 0x69, 0x6e, 0x64, 0x69, 0x63, 0x65, 0x73, 0x5b, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5d, 0x20, + 0x3d, 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x2f, 0x2f, 0x20, 0x2d, + 0x2d, 0x2d, 0x2d, 0x20, 0x66, 0x69, 0x6e, 0x64, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x61, 0x6c, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, + 0x72, 0x73, 0x5f, 0x66, 0x6f, 0x72, 0x5f, 0x65, 0x61, 0x63, 0x68, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x0d, 0x0a, 0x0d, 0x0a, 0x74, 0x79, 0x70, 0x65, 0x64, 0x65, 0x66, 0x20, 0x73, 0x74, 0x72, + 0x75, 0x63, 0x74, 0x20, 0x5f, 0x5f, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x5f, 0x5f, 0x20, 0x28, 0x28, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x29, 0x29, 0x20, 0x66, 0x6f, 0x73, + 0x63, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x5f, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x5f, 0x74, 0x61, 0x67, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, + 0x32, 0x5f, 0x74, 0x20, 0x6d, 0x5f, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x3b, 0x09, 0x2f, 0x2f, 0x20, 0x34, 0x78, 0x34, 0x20, 0x67, + 0x72, 0x69, 0x64, 0x20, 0x6f, 0x66, 0x20, 0x32, 0x2d, 0x62, 0x69, 0x74, 0x20, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x0d, 0x0a, 0x7d, 0x20, 0x66, 0x6f, 0x73, 0x63, 0x5f, 0x73, + 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x5f, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x74, 0x79, 0x70, 0x65, 0x64, 0x65, 0x66, 0x20, 0x73, 0x74, 0x72, 0x75, 0x63, + 0x74, 0x20, 0x5f, 0x5f, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x5f, 0x5f, 0x20, 0x28, 0x28, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x29, 0x29, 0x20, 0x66, 0x6f, 0x73, 0x63, 0x5f, + 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x5f, 0x74, 0x61, 0x67, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, + 0x20, 0x6d, 0x5f, 0x65, 0x74, 0x63, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x3b, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x75, 0x6e, 0x73, 0x63, 0x61, 0x6c, 0x65, + 0x64, 0x20, 0x35, 0x2d, 0x62, 0x69, 0x74, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x20, 0x69, 0x6e, 0x20, 0x52, 0x47, 0x42, 0x2c, 0x20, 0x61, 0x6c, 0x70, 0x68, + 0x61, 0x20, 0x68, 0x61, 0x73, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x27, 0x73, 0x20, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x74, 0x79, 0x20, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x0d, 0x0a, 0x09, + 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x6d, 0x5f, 0x66, 0x69, 0x72, 0x73, 0x74, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x3b, 0x09, 0x09, 0x2f, 0x2f, 0x20, 0x6f, + 0x66, 0x66, 0x73, 0x65, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x6f, 0x20, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x20, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, + 0x33, 0x32, 0x5f, 0x74, 0x20, 0x6d, 0x5f, 0x6e, 0x75, 0x6d, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x3b, 0x09, 0x09, 0x2f, 0x2f, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, + 0x20, 0x6f, 0x66, 0x20, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x20, 0x74, 0x6f, 0x20, 0x63, 0x68, 0x65, 0x63, 0x6b, 0x0d, 0x0a, 0x7d, 0x20, 0x66, 0x6f, 0x73, 0x63, 0x5f, 0x62, + 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x74, 0x79, 0x70, 0x65, 0x64, 0x65, 0x66, 0x20, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x20, 0x5f, + 0x5f, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x5f, 0x5f, 0x20, 0x28, 0x28, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x29, 0x29, 0x20, 0x66, 0x6f, 0x73, 0x63, 0x5f, 0x70, 0x61, 0x72, + 0x61, 0x6d, 0x5f, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x5f, 0x74, 0x61, 0x67, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x6d, 0x5f, 0x74, 0x6f, + 0x74, 0x61, 0x6c, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x73, 0x3b, 0x0d, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x6d, 0x5f, 0x70, 0x65, 0x72, 0x63, 0x65, 0x70, 0x74, 0x75, 0x61, 0x6c, 0x3b, 0x0d, + 0x0a, 0x7d, 0x20, 0x66, 0x6f, 0x73, 0x63, 0x5f, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x5f, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x2f, 0x2f, 0x20, 0x46, 0x6f, 0x72, 0x20, + 0x65, 0x61, 0x63, 0x68, 0x20, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x3a, 0x20, 0x46, 0x69, 0x6e, 0x64, 0x20, 0x74, 0x68, 0x65, 0x20, 0x71, 0x75, 0x61, 0x6e, 0x74, + 0x69, 0x7a, 0x65, 0x64, 0x20, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x20, 0x77, 0x68, 0x69, 0x63, 0x68, 0x20, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x20, 0x69, 0x6e, 0x20, 0x74, + 0x68, 0x65, 0x20, 0x6c, 0x6f, 0x77, 0x65, 0x73, 0x74, 0x20, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x2e, 0x0d, 0x0a, 0x6b, 0x65, 0x72, 0x6e, 0x65, 0x6c, 0x20, 0x76, 0x6f, 0x69, 0x64, 0x20, 0x66, 0x69, + 0x6e, 0x64, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x61, 0x6c, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x73, 0x5f, 0x66, 0x6f, 0x72, + 0x5f, 0x65, 0x61, 0x63, 0x68, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x28, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x66, 0x6f, 0x73, 0x63, 0x5f, 0x70, 0x61, 0x72, + 0x61, 0x6d, 0x5f, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, + 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x73, 0x2c, + 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x66, 0x6f, 0x73, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x74, 0x72, 0x75, 0x63, + 0x74, 0x20, 0x2a, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x66, 0x6f, 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, + 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x66, 0x6f, 0x73, 0x63, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x5f, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x20, 0x2a, 0x70, 0x49, 0x6e, 0x70, 0x75, + 0x74, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x75, 0x69, 0x6e, 0x74, + 0x33, 0x32, 0x5f, 0x74, 0x20, 0x2a, 0x70, 0x53, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x69, 0x63, 0x65, 0x73, 0x2c, + 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x2a, 0x70, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x5f, 0x73, + 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x69, 0x63, 0x65, 0x73, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, + 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x3d, 0x20, 0x67, 0x65, 0x74, 0x5f, 0x67, + 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x5f, 0x69, 0x64, 0x28, 0x30, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x63, + 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x2a, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x20, 0x3d, 0x20, 0x70, 0x49, 0x6e, 0x70, 0x75, + 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x73, 0x5b, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5d, 0x2e, 0x6d, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x3b, 0x0d, + 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x66, 0x6f, 0x73, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, + 0x20, 0x2a, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x66, 0x6f, 0x20, 0x3d, 0x20, 0x26, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, + 0x66, 0x6f, 0x5b, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5d, 0x3b, 0x0d, 0x0a, 0x09, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, + 0x61, 0x6c, 0x20, 0x66, 0x6f, 0x73, 0x63, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x5f, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x20, 0x2a, 0x70, 0x53, 0x65, 0x6c, 0x65, 0x63, 0x74, + 0x6f, 0x72, 0x73, 0x20, 0x3d, 0x20, 0x26, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x5b, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, + 0x6e, 0x66, 0x6f, 0x2d, 0x3e, 0x6d, 0x5f, 0x66, 0x69, 0x72, 0x73, 0x74, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x5d, 0x3b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, + 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x6e, 0x75, 0x6d, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x20, 0x3d, 0x20, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, + 0x69, 0x6e, 0x66, 0x6f, 0x2d, 0x3e, 0x6d, 0x5f, 0x6e, 0x75, 0x6d, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6c, 0x6f, 0x72, + 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x34, 0x5d, 0x3b, 0x0d, 0x0a, 0x09, 0x63, + 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x20, 0x3d, 0x20, 0x70, 0x42, 0x6c, + 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x66, 0x6f, 0x2d, 0x3e, 0x6d, 0x5f, 0x65, 0x74, 0x63, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x3b, 0x0d, 0x0a, 0x09, + 0x67, 0x65, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x35, 0x28, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, + 0x6c, 0x6f, 0x72, 0x73, 0x2c, 0x20, 0x26, 0x65, 0x74, 0x63, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x2c, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x63, 0x6f, 0x6c, + 0x6f, 0x72, 0x35, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x2e, 0x77, 0x2c, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, + 0x74, 0x20, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x73, 0x5b, 0x34, 0x5d, 0x5b, 0x31, 0x36, 0x5d, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x69, 0x66, 0x20, 0x28, 0x70, + 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e, 0x6d, 0x5f, 0x70, 0x65, 0x72, 0x63, 0x65, 0x70, 0x74, 0x75, 0x61, 0x6c, 0x29, 0x0d, 0x0a, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x66, 0x6f, 0x72, 0x20, 0x28, + 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x73, 0x65, 0x6c, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x20, 0x73, 0x65, 0x6c, 0x20, 0x3c, 0x20, 0x34, 0x3b, 0x20, 0x2b, 0x2b, 0x73, 0x65, 0x6c, + 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x69, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x20, 0x69, 0x20, 0x3c, 0x20, 0x31, + 0x36, 0x3b, 0x20, 0x2b, 0x2b, 0x69, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x73, 0x5b, 0x73, 0x65, 0x6c, 0x5d, 0x5b, 0x69, + 0x5d, 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x64, 0x69, 0x73, 0x74, 0x61, 0x6e, 0x63, 0x65, 0x28, 0x74, 0x72, 0x75, 0x65, 0x2c, 0x20, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, + 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x5b, 0x69, 0x5d, 0x2c, 0x20, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x73, 0x65, + 0x6c, 0x5d, 0x2c, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, 0x09, 0x65, 0x6c, 0x73, 0x65, 0x0d, 0x0a, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x66, 0x6f, + 0x72, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x73, 0x65, 0x6c, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x20, 0x73, 0x65, 0x6c, 0x20, 0x3c, 0x20, 0x34, 0x3b, 0x20, 0x2b, 0x2b, + 0x73, 0x65, 0x6c, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x69, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x20, 0x69, 0x20, + 0x3c, 0x20, 0x31, 0x36, 0x3b, 0x20, 0x2b, 0x2b, 0x69, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x73, 0x5b, 0x73, 0x65, 0x6c, + 0x5d, 0x5b, 0x69, 0x5d, 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x64, 0x69, 0x73, 0x74, 0x61, 0x6e, 0x63, 0x65, 0x28, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x2c, 0x20, 0x70, 0x42, 0x6c, + 0x6f, 0x63, 0x6b, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x5b, 0x69, 0x5d, 0x2c, 0x20, 0x74, 0x72, 0x69, 0x61, 0x6c, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, + 0x73, 0x5b, 0x73, 0x65, 0x6c, 0x5d, 0x2c, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x74, + 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x20, 0x3d, 0x20, 0x55, 0x49, 0x4e, 0x54, 0x36, 0x34, 0x5f, 0x4d, 0x41, 0x58, 0x3b, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, + 0x5f, 0x74, 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, + 0x33, 0x32, 0x5f, 0x74, 0x20, 0x73, 0x65, 0x6c, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x20, 0x73, 0x65, 0x6c, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x3c, 0x20, + 0x6e, 0x75, 0x6d, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x3b, 0x20, 0x73, 0x65, 0x6c, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x2b, 0x2b, 0x29, 0x0d, 0x0a, 0x09, 0x7b, 0x0d, + 0x0a, 0x09, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x73, 0x65, 0x6c, 0x73, 0x20, 0x3d, 0x20, 0x70, 0x53, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x5b, 0x73, 0x65, + 0x6c, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5d, 0x2e, 0x6d, 0x5f, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x3b, 0x0d, 0x0a, 0x09, 0x09, + 0x0d, 0x0a, 0x09, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x74, 0x20, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x66, + 0x6f, 0x72, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x69, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x20, 0x69, 0x20, 0x3c, 0x20, 0x31, 0x36, 0x3b, 0x20, 0x69, 0x2b, 0x2b, 0x2c, + 0x20, 0x73, 0x65, 0x6c, 0x73, 0x20, 0x3e, 0x3e, 0x3d, 0x20, 0x32, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x20, 0x2b, 0x3d, 0x20, 0x74, 0x72, + 0x69, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x73, 0x5b, 0x73, 0x65, 0x6c, 0x73, 0x20, 0x26, 0x20, 0x33, 0x5d, 0x5b, 0x69, 0x5d, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x69, 0x66, + 0x20, 0x28, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x20, 0x3c, 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, + 0x09, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x20, 0x3d, 0x20, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x65, 0x72, 0x72, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x62, 0x65, 0x73, 0x74, 0x5f, + 0x69, 0x6e, 0x64, 0x65, 0x78, 0x20, 0x3d, 0x20, 0x73, 0x65, 0x6c, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x69, 0x66, 0x20, 0x28, 0x21, 0x62, 0x65, + 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x29, 0x0d, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x7d, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, + 0x09, 0x70, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x69, 0x63, 0x65, + 0x73, 0x5b, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5d, 0x20, 0x3d, 0x20, 0x70, 0x53, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x5f, 0x63, 0x6c, 0x75, 0x73, 0x74, + 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x64, 0x69, 0x63, 0x65, 0x73, 0x5b, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x66, 0x6f, 0x2d, 0x3e, 0x6d, 0x5f, 0x66, 0x69, 0x72, 0x73, 0x74, 0x5f, + 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x20, 0x2b, 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5d, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x2f, 0x2f, + 0x20, 0x64, 0x65, 0x74, 0x65, 0x72, 0x6d, 0x69, 0x6e, 0x65, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x0d, 0x0a, 0x0d, 0x0a, 0x74, 0x79, 0x70, 0x65, 0x64, 0x65, 0x66, 0x20, + 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x20, 0x5f, 0x5f, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x5f, 0x5f, 0x20, 0x28, 0x28, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x29, 0x29, 0x20, + 0x64, 0x73, 0x5f, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x5f, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x5f, 0x74, 0x61, 0x67, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, + 0x74, 0x20, 0x6d, 0x5f, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x73, 0x3b, 0x0d, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x20, 0x6d, 0x5f, 0x70, 0x65, 0x72, 0x63, 0x65, 0x70, + 0x74, 0x75, 0x61, 0x6c, 0x3b, 0x0d, 0x0a, 0x7d, 0x20, 0x64, 0x73, 0x5f, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x5f, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x2f, 0x2f, 0x20, + 0x46, 0x6f, 0x72, 0x20, 0x65, 0x61, 0x63, 0x68, 0x20, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x3a, 0x20, 0x44, 0x65, 0x74, 0x65, 0x72, 0x6d, 0x69, 0x6e, 0x65, 0x20, + 0x74, 0x68, 0x65, 0x20, 0x45, 0x54, 0x43, 0x31, 0x53, 0x20, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x20, 0x74, 0x68, 0x61, 0x74, 0x20, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x20, + 0x69, 0x6e, 0x20, 0x74, 0x68, 0x65, 0x20, 0x6c, 0x6f, 0x77, 0x65, 0x73, 0x74, 0x20, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x2c, 0x20, 0x67, 0x69, 0x76, 0x65, 0x6e, 0x20, 0x65, 0x61, 0x63, 0x68, 0x20, + 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x27, 0x73, 0x20, 0x70, 0x72, 0x65, 0x64, 0x65, 0x74, 0x65, 0x72, 0x6d, 0x69, 0x6e, 0x65, 0x64, 0x20, 0x45, 0x54, 0x43, 0x31, 0x53, 0x20, 0x63, 0x6f, 0x6c, 0x6f, + 0x72, 0x35, 0x2f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x74, 0x69, 0x65, 0x73, 0x2e, 0x20, 0x0d, 0x0a, 0x6b, 0x65, 0x72, 0x6e, 0x65, 0x6c, 0x20, 0x76, 0x6f, 0x69, 0x64, 0x20, 0x64, 0x65, + 0x74, 0x65, 0x72, 0x6d, 0x69, 0x6e, 0x65, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x28, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x64, 0x73, + 0x5f, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x5f, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, + 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, + 0x63, 0x6b, 0x73, 0x2c, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x2a, + 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x65, 0x74, 0x63, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x5f, 0x61, 0x6e, 0x64, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x2c, 0x0d, 0x0a, 0x20, 0x20, + 0x20, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x2a, 0x70, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, + 0x6b, 0x73, 0x29, 0x0d, 0x0a, 0x7b, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, + 0x64, 0x65, 0x78, 0x20, 0x3d, 0x20, 0x67, 0x65, 0x74, 0x5f, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x5f, 0x69, 0x64, 0x28, 0x30, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6e, + 0x73, 0x74, 0x20, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x2a, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x70, 0x69, 0x78, + 0x65, 0x6c, 0x73, 0x20, 0x3d, 0x20, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x73, 0x5b, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5d, + 0x2e, 0x6d, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x63, 0x6f, + 0x6c, 0x6f, 0x72, 0x35, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x20, 0x3d, 0x20, 0x70, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x65, 0x74, 0x63, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x5f, 0x61, + 0x6e, 0x64, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5b, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5d, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x63, 0x6f, 0x6c, 0x6f, 0x72, + 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x34, 0x5d, 0x3b, 0x0d, 0x0a, 0x09, 0x67, 0x65, 0x74, 0x5f, 0x62, 0x6c, 0x6f, + 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x35, 0x28, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x2c, 0x20, 0x26, 0x65, 0x74, 0x63, 0x5f, 0x63, 0x6f, + 0x6c, 0x6f, 0x72, 0x35, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x2c, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x2e, 0x77, 0x2c, 0x20, + 0x66, 0x61, 0x6c, 0x73, 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x20, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, + 0x63, 0x6b, 0x3b, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x66, 0x6c, 0x69, 0x70, 0x5f, 0x62, 0x69, 0x74, 0x28, 0x26, 0x6f, 0x75, + 0x74, 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x74, 0x72, 0x75, 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, + 0x65, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x5f, 0x65, 0x74, 0x63, 0x31, 0x73, 0x28, 0x26, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, + 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, + 0x6f, 0x63, 0x6b, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x73, 0x5f, 0x65, 0x74, 0x63, 0x31, 0x73, 0x28, 0x26, 0x6f, 0x75, 0x74, 0x70, + 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x65, 0x74, 0x63, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x35, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x2e, 0x77, 0x29, 0x3b, 0x0d, 0x0a, + 0x0d, 0x0a, 0x09, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x69, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x20, 0x69, 0x20, 0x3c, 0x20, 0x31, 0x36, 0x3b, 0x20, + 0x69, 0x2b, 0x2b, 0x29, 0x0d, 0x0a, 0x09, 0x7b, 0x0d, 0x0a, 0x09, 0x09, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x72, 0x67, 0x62, 0x61, 0x20, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x5f, 0x63, 0x6f, 0x6c, + 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x70, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x73, 0x5b, 0x69, 0x5d, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x75, 0x69, 0x6e, 0x74, + 0x20, 0x65, 0x72, 0x72, 0x30, 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x64, 0x69, 0x73, 0x74, 0x61, 0x6e, 0x63, 0x65, 0x28, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e, 0x6d, 0x5f, + 0x70, 0x65, 0x72, 0x63, 0x65, 0x70, 0x74, 0x75, 0x61, 0x6c, 0x2c, 0x20, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2c, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, + 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x30, 0x5d, 0x2c, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x20, 0x65, 0x72, 0x72, 0x31, 0x20, 0x3d, + 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x64, 0x69, 0x73, 0x74, 0x61, 0x6e, 0x63, 0x65, 0x28, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e, 0x6d, 0x5f, 0x70, 0x65, 0x72, 0x63, 0x65, 0x70, 0x74, + 0x75, 0x61, 0x6c, 0x2c, 0x20, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2c, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x31, + 0x5d, 0x2c, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x20, 0x65, 0x72, 0x72, 0x32, 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, + 0x64, 0x69, 0x73, 0x74, 0x61, 0x6e, 0x63, 0x65, 0x28, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e, 0x6d, 0x5f, 0x70, 0x65, 0x72, 0x63, 0x65, 0x70, 0x74, 0x75, 0x61, 0x6c, 0x2c, 0x20, 0x70, 0x69, + 0x78, 0x65, 0x6c, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x2c, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x32, 0x5d, 0x2c, 0x20, 0x66, 0x61, 0x6c, 0x73, + 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x20, 0x65, 0x72, 0x72, 0x33, 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x5f, 0x64, 0x69, 0x73, 0x74, 0x61, 0x6e, 0x63, + 0x65, 0x28, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e, 0x6d, 0x5f, 0x70, 0x65, 0x72, 0x63, 0x65, 0x70, 0x74, 0x75, 0x61, 0x6c, 0x2c, 0x20, 0x70, 0x69, 0x78, 0x65, 0x6c, 0x5f, 0x63, 0x6f, 0x6c, + 0x6f, 0x72, 0x2c, 0x20, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x73, 0x5b, 0x33, 0x5d, 0x2c, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, + 0x09, 0x09, 0x75, 0x69, 0x6e, 0x74, 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x20, 0x3d, 0x20, 0x6d, 0x69, 0x6e, 0x28, 0x6d, 0x69, 0x6e, 0x28, 0x6d, 0x69, 0x6e, 0x28, 0x65, 0x72, + 0x72, 0x30, 0x2c, 0x20, 0x65, 0x72, 0x72, 0x31, 0x29, 0x2c, 0x20, 0x65, 0x72, 0x72, 0x32, 0x29, 0x2c, 0x20, 0x65, 0x72, 0x72, 0x33, 0x29, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x75, 0x69, + 0x6e, 0x74, 0x33, 0x32, 0x5f, 0x74, 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x65, 0x6c, 0x20, 0x3d, 0x20, 0x28, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x20, 0x3d, 0x3d, 0x20, 0x65, + 0x72, 0x72, 0x32, 0x29, 0x20, 0x3f, 0x20, 0x32, 0x20, 0x3a, 0x20, 0x33, 0x3b, 0x0d, 0x0a, 0x09, 0x09, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x65, 0x6c, 0x20, 0x3d, 0x20, 0x28, 0x62, 0x65, 0x73, + 0x74, 0x5f, 0x65, 0x72, 0x72, 0x20, 0x3d, 0x3d, 0x20, 0x65, 0x72, 0x72, 0x31, 0x29, 0x20, 0x3f, 0x20, 0x31, 0x20, 0x3a, 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x65, 0x6c, 0x3b, 0x0d, 0x0a, + 0x09, 0x09, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x65, 0x6c, 0x20, 0x3d, 0x20, 0x28, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x65, 0x72, 0x72, 0x20, 0x3d, 0x3d, 0x20, 0x65, 0x72, 0x72, 0x30, 0x29, 0x20, + 0x3f, 0x20, 0x30, 0x20, 0x3a, 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x65, 0x6c, 0x3b, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x09, 0x65, 0x74, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x73, + 0x65, 0x74, 0x5f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x28, 0x26, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x2c, 0x20, 0x69, 0x20, 0x26, 0x20, 0x33, + 0x2c, 0x20, 0x69, 0x20, 0x3e, 0x3e, 0x20, 0x32, 0x2c, 0x20, 0x62, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x65, 0x6c, 0x29, 0x3b, 0x0d, 0x0a, 0x09, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a, 0x09, 0x70, 0x4f, 0x75, + 0x74, 0x70, 0x75, 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x73, 0x5b, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5d, 0x20, 0x3d, 0x20, 0x6f, 0x75, 0x74, 0x70, 0x75, + 0x74, 0x5f, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x3b, 0x0d, 0x0a, 0x7d, 0x0d, 0x0a, 0x0d, 0x0a +}; +unsigned int ocl_kernels_cl_len = 45935; diff --git a/thirdparty/basisu/encoder/basisu_opencl.cpp b/thirdparty/basisu/encoder/basisu_opencl.cpp new file mode 100644 index 000000000..312e46b1a --- /dev/null +++ b/thirdparty/basisu/encoder/basisu_opencl.cpp @@ -0,0 +1,1342 @@ +// basisu_opencl.cpp +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "basisu_opencl.h" + +// If 1, the kernel source code will come from encoders/ocl_kernels.h. Otherwise, it will be read from the "ocl_kernels.cl" file in the current directory (for development). +#define BASISU_USE_OCL_KERNELS_HEADER (1) +#define BASISU_OCL_KERNELS_FILENAME "ocl_kernels.cl" + +#if BASISU_SUPPORT_OPENCL + +#include "basisu_enc.h" + +// We only use OpenCL v1.2 or less. +#define CL_TARGET_OPENCL_VERSION 120 + +#ifdef __APPLE__ +#include +#else +#include +#endif + +#ifndef BASISU_OPENCL_ASSERT_ON_ANY_ERRORS + #define BASISU_OPENCL_ASSERT_ON_ANY_ERRORS (0) +#endif + +namespace basisu +{ +#if BASISU_USE_OCL_KERNELS_HEADER +#include "basisu_ocl_kernels.h" +#endif + + static void ocl_error_printf(const char* pFmt, ...) + { + va_list args; + va_start(args, pFmt); + error_vprintf(pFmt, args); + va_end(args); + +#if BASISU_OPENCL_ASSERT_ON_ANY_ERRORS + assert(0); +#endif + } + + class ocl + { + public: + ocl() + { + memset(&m_dev_fp_config, 0, sizeof(m_dev_fp_config)); + + m_ocl_mutex.lock(); + m_ocl_mutex.unlock(); + } + + ~ocl() + { + } + + bool is_initialized() const { return m_device_id != nullptr; } + + cl_device_id get_device_id() const { return m_device_id; } + cl_context get_context() const { return m_context; } + cl_command_queue get_command_queue() { return m_command_queue; } + cl_program get_program() const { return m_program; } + + bool init(bool force_serialization) + { + deinit(); + + interval_timer tm; + tm.start(); + + cl_uint num_platforms = 0; + cl_int ret = clGetPlatformIDs(0, NULL, &num_platforms); + if (ret != CL_SUCCESS) + { + ocl_error_printf("ocl::init: clGetPlatformIDs() failed with %i\n", ret); + return false; + } + + if ((!num_platforms) || (num_platforms > INT_MAX)) + { + ocl_error_printf("ocl::init: clGetPlatformIDs() returned an invalid number of num_platforms\n"); + return false; + } + + std::vector platforms(num_platforms); + + ret = clGetPlatformIDs(num_platforms, platforms.data(), NULL); + if (ret != CL_SUCCESS) + { + ocl_error_printf("ocl::init: clGetPlatformIDs() failed\n"); + return false; + } + + cl_uint num_devices = 0; + ret = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_GPU, 1, &m_device_id, &num_devices); + + if (ret == CL_DEVICE_NOT_FOUND) + { + ocl_error_printf("ocl::init: Couldn't get any GPU device ID's, trying CL_DEVICE_TYPE_CPU\n"); + + ret = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_CPU, 1, &m_device_id, &num_devices); + } + + if (ret != CL_SUCCESS) + { + ocl_error_printf("ocl::init: Unable to get any device ID's\n"); + + m_device_id = nullptr; + return false; + } + + ret = clGetDeviceInfo(m_device_id, + CL_DEVICE_SINGLE_FP_CONFIG, + sizeof(m_dev_fp_config), + &m_dev_fp_config, + nullptr); + if (ret != CL_SUCCESS) + { + ocl_error_printf("ocl::init: clGetDeviceInfo() failed\n"); + return false; + } + + char plat_vers[256]; + size_t rv = 0; + ret = clGetPlatformInfo(platforms[0], CL_PLATFORM_VERSION, sizeof(plat_vers), plat_vers, &rv); + if (ret == CL_SUCCESS) + printf("OpenCL platform version: \"%s\"\n", plat_vers); + + // Serialize CL calls with the AMD driver to avoid lockups when multiple command queues per thread are used. This sucks, but what can we do? + m_use_mutex = (strstr(plat_vers, "AMD") != nullptr) || force_serialization; + + printf("Serializing OpenCL calls across threads: %u\n", (uint32_t)m_use_mutex); + + m_context = clCreateContext(nullptr, 1, &m_device_id, nullptr, nullptr, &ret); + if (ret != CL_SUCCESS) + { + ocl_error_printf("ocl::init: clCreateContext() failed\n"); + + m_device_id = nullptr; + m_context = nullptr; + return false; + } + + m_command_queue = clCreateCommandQueue(m_context, m_device_id, 0, &ret); + if (ret != CL_SUCCESS) + { + ocl_error_printf("ocl::init: clCreateCommandQueue() failed\n"); + + deinit(); + return false; + } + + printf("OpenCL init time: %3.3f secs\n", tm.get_elapsed_secs()); + + return true; + } + + bool deinit() + { + if (m_program) + { + clReleaseProgram(m_program); + m_program = nullptr; + } + + if (m_command_queue) + { + clReleaseCommandQueue(m_command_queue); + m_command_queue = nullptr; + } + + if (m_context) + { + clReleaseContext(m_context); + m_context = nullptr; + } + + m_device_id = nullptr; + + return true; + } + + cl_command_queue create_command_queue() + { + cl_serializer serializer(this); + + cl_int ret = 0; + cl_command_queue p = clCreateCommandQueue(m_context, m_device_id, 0, &ret); + if (ret != CL_SUCCESS) + return nullptr; + + return p; + } + + void destroy_command_queue(cl_command_queue p) + { + if (p) + { + cl_serializer serializer(this); + + clReleaseCommandQueue(p); + } + } + + bool init_program(const char* pSrc, size_t src_size) + { + cl_int ret; + + if (m_program != nullptr) + { + clReleaseProgram(m_program); + m_program = nullptr; + } + + m_program = clCreateProgramWithSource(m_context, 1, (const char**)&pSrc, (const size_t*)&src_size, &ret); + if (ret != CL_SUCCESS) + { + ocl_error_printf("ocl::init_program: clCreateProgramWithSource() failed!\n"); + return false; + } + + std::string options; + if (m_dev_fp_config & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT) + { + options += "-cl-fp32-correctly-rounded-divide-sqrt"; + } + + options += " -cl-std=CL1.2"; + //options += " -cl-opt-disable"; + //options += " -cl-mad-enable"; + //options += " -cl-fast-relaxed-math"; + + ret = clBuildProgram(m_program, 1, &m_device_id, + options.size() ? options.c_str() : nullptr, // options + nullptr, // notify + nullptr); // user_data + + if (ret != CL_SUCCESS) + { + const cl_int build_program_result = ret; + + size_t ret_val_size; + ret = clGetProgramBuildInfo(m_program, m_device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size); + if (ret != CL_SUCCESS) + { + ocl_error_printf("ocl::init_program: clGetProgramBuildInfo() failed!\n"); + return false; + } + + std::vector build_log(ret_val_size + 1); + + ret = clGetProgramBuildInfo(m_program, m_device_id, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log.data(), NULL); + + ocl_error_printf("\nclBuildProgram() failed with error %i:\n%s", build_program_result, build_log.data()); + + return false; + } + + return true; + } + + cl_kernel create_kernel(const char* pName) + { + if (!m_program) + return nullptr; + + cl_serializer serializer(this); + + cl_int ret; + cl_kernel kernel = clCreateKernel(m_program, pName, &ret); + if (ret != CL_SUCCESS) + { + ocl_error_printf("ocl::create_kernel: clCreateKernel() failed!\n"); + return nullptr; + } + + return kernel; + } + + bool destroy_kernel(cl_kernel k) + { + if (k) + { + cl_serializer serializer(this); + + cl_int ret = clReleaseKernel(k); + if (ret != CL_SUCCESS) + { + ocl_error_printf("ocl::destroy_kernel: clReleaseKernel() failed!\n"); + return false; + } + } + return true; + } + + cl_mem alloc_read_buffer(size_t size) + { + cl_serializer serializer(this); + + cl_int ret; + cl_mem obj = clCreateBuffer(m_context, CL_MEM_READ_ONLY, size, NULL, &ret); + if (ret != CL_SUCCESS) + { + ocl_error_printf("ocl::alloc_read_buffer: clCreateBuffer() failed!\n"); + return nullptr; + } + + return obj; + } + + cl_mem alloc_and_init_read_buffer(cl_command_queue command_queue, const void *pInit, size_t size) + { + cl_serializer serializer(this); + + cl_int ret; + cl_mem obj = clCreateBuffer(m_context, CL_MEM_READ_ONLY, size, NULL, &ret); + if (ret != CL_SUCCESS) + { + ocl_error_printf("ocl::alloc_and_init_read_buffer: clCreateBuffer() failed!\n"); + return nullptr; + } + +#if 0 + if (!write_to_buffer(command_queue, obj, pInit, size)) + { + destroy_buffer(obj); + return nullptr; + } +#else + ret = clEnqueueWriteBuffer(command_queue, obj, CL_TRUE, 0, size, pInit, 0, NULL, NULL); + if (ret != CL_SUCCESS) + { + ocl_error_printf("ocl::alloc_and_init_read_buffer: clEnqueueWriteBuffer() failed!\n"); + return nullptr; + } +#endif + + return obj; + } + + cl_mem alloc_write_buffer(size_t size) + { + cl_serializer serializer(this); + + cl_int ret; + cl_mem obj = clCreateBuffer(m_context, CL_MEM_WRITE_ONLY, size, NULL, &ret); + if (ret != CL_SUCCESS) + { + ocl_error_printf("ocl::alloc_write_buffer: clCreateBuffer() failed!\n"); + return nullptr; + } + + return obj; + } + + bool destroy_buffer(cl_mem buf) + { + if (buf) + { + cl_serializer serializer(this); + + cl_int ret = clReleaseMemObject(buf); + if (ret != CL_SUCCESS) + { + ocl_error_printf("ocl::destroy_buffer: clReleaseMemObject() failed!\n"); + return false; + } + } + + return true; + } + + bool write_to_buffer(cl_command_queue command_queue, cl_mem clmem, const void* d, const size_t m) + { + cl_serializer serializer(this); + + cl_int ret = clEnqueueWriteBuffer(command_queue, clmem, CL_TRUE, 0, m, d, 0, NULL, NULL); + if (ret != CL_SUCCESS) + { + ocl_error_printf("ocl::write_to_buffer: clEnqueueWriteBuffer() failed!\n"); + return false; + } + + return true; + } + + bool read_from_buffer(cl_command_queue command_queue, const cl_mem clmem, void* d, size_t m) + { + cl_serializer serializer(this); + + cl_int ret = clEnqueueReadBuffer(command_queue, clmem, CL_TRUE, 0, m, d, 0, NULL, NULL); + if (ret != CL_SUCCESS) + { + ocl_error_printf("ocl::read_from_buffer: clEnqueueReadBuffer() failed!\n"); + return false; + } + + return true; + } + + cl_mem create_read_image_u8(uint32_t width, uint32_t height, const void* pPixels, uint32_t bytes_per_pixel, bool normalized) + { + cl_image_format fmt = get_image_format(bytes_per_pixel, normalized); + + cl_image_desc desc; + memset(&desc, 0, sizeof(desc)); + desc.image_type = CL_MEM_OBJECT_IMAGE2D; + desc.image_width = width; + desc.image_height = height; + desc.image_row_pitch = width * bytes_per_pixel; + + cl_serializer serializer(this); + + cl_int ret; + cl_mem img = clCreateImage(m_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, &fmt, &desc, (void*)pPixels, &ret); + if (ret != CL_SUCCESS) + { + ocl_error_printf("ocl::create_read_image_u8: clCreateImage() failed!\n"); + return nullptr; + } + + return img; + } + + cl_mem create_write_image_u8(uint32_t width, uint32_t height, uint32_t bytes_per_pixel, bool normalized) + { + cl_image_format fmt = get_image_format(bytes_per_pixel, normalized); + + cl_image_desc desc; + memset(&desc, 0, sizeof(desc)); + desc.image_type = CL_MEM_OBJECT_IMAGE2D; + desc.image_width = width; + desc.image_height = height; + + cl_serializer serializer(this); + + cl_int ret; + cl_mem img = clCreateImage(m_context, CL_MEM_WRITE_ONLY, &fmt, &desc, nullptr, &ret); + if (ret != CL_SUCCESS) + { + ocl_error_printf("ocl::create_write_image_u8: clCreateImage() failed!\n"); + return nullptr; + } + + return img; + } + + bool read_from_image(cl_command_queue command_queue, cl_mem img, void* pPixels, uint32_t ofs_x, uint32_t ofs_y, uint32_t width, uint32_t height) + { + cl_serializer serializer(this); + + size_t origin[3] = { ofs_x, ofs_y, 0 }, region[3] = { width, height, 1 }; + + cl_int err = clEnqueueReadImage(command_queue, img, CL_TRUE, origin, region, 0, 0, pPixels, 0, NULL, NULL); + if (err != CL_SUCCESS) + { + ocl_error_printf("ocl::read_from_image: clEnqueueReadImage() failed!\n"); + return false; + } + + return true; + } + + bool run_1D(cl_command_queue command_queue, const cl_kernel kernel, size_t num_items) + { + cl_serializer serializer(this); + + cl_int ret = clEnqueueNDRangeKernel(command_queue, kernel, + 1, // work_dim + nullptr, // global_work_offset + &num_items, // global_work_size + nullptr, // local_work_size + 0, // num_events_in_wait_list + nullptr, // event_wait_list + nullptr // event + ); + + if (ret != CL_SUCCESS) + { + ocl_error_printf("ocl::run_1D: clEnqueueNDRangeKernel() failed!\n"); + return false; + } + + return true; + } + + bool run_2D(cl_command_queue command_queue, const cl_kernel kernel, size_t width, size_t height) + { + cl_serializer serializer(this); + + size_t num_global_items[2] = { width, height }; + //size_t num_local_items[2] = { 1, 1 }; + + cl_int ret = clEnqueueNDRangeKernel(command_queue, kernel, + 2, // work_dim + nullptr, // global_work_offset + num_global_items, // global_work_size + nullptr, // local_work_size + 0, // num_events_in_wait_list + nullptr, // event_wait_list + nullptr // event + ); + + if (ret != CL_SUCCESS) + { + ocl_error_printf("ocl::run_2D: clEnqueueNDRangeKernel() failed!\n"); + return false; + } + + return true; + } + + bool run_2D(cl_command_queue command_queue, const cl_kernel kernel, size_t ofs_x, size_t ofs_y, size_t width, size_t height) + { + cl_serializer serializer(this); + + size_t global_ofs[2] = { ofs_x, ofs_y }; + size_t num_global_items[2] = { width, height }; + //size_t num_local_items[2] = { 1, 1 }; + + cl_int ret = clEnqueueNDRangeKernel(command_queue, kernel, + 2, // work_dim + global_ofs, // global_work_offset + num_global_items, // global_work_size + nullptr, // local_work_size + 0, // num_events_in_wait_list + nullptr, // event_wait_list + nullptr // event + ); + + if (ret != CL_SUCCESS) + { + ocl_error_printf("ocl::run_2D: clEnqueueNDRangeKernel() failed!\n"); + return false; + } + + return true; + } + + void flush(cl_command_queue command_queue) + { + cl_serializer serializer(this); + + clFlush(command_queue); + clFinish(command_queue); + } + + template + bool set_kernel_arg(cl_kernel kernel, uint32_t index, const T& obj) + { + cl_serializer serializer(this); + + cl_int ret = clSetKernelArg(kernel, index, sizeof(T), (void*)&obj); + if (ret != CL_SUCCESS) + { + ocl_error_printf("ocl::set_kernel_arg: clSetKernelArg() failed!\n"); + return false; + } + return true; + } + + template + bool set_kernel_args(cl_kernel kernel, const T& obj1) + { + cl_serializer serializer(this); + + cl_int ret = clSetKernelArg(kernel, 0, sizeof(T), (void*)&obj1); + if (ret != CL_SUCCESS) + { + ocl_error_printf("ocl::set_kernel_arg: clSetKernelArg() failed!\n"); + return false; + } + return true; + } + +#define BASISU_CHECK_ERR if (ret != CL_SUCCESS) { ocl_error_printf("ocl::set_kernel_args: clSetKernelArg() failed!\n"); return false; } + + template + bool set_kernel_args(cl_kernel kernel, const T& obj1, const U& obj2) + { + cl_serializer serializer(this); + cl_int ret = clSetKernelArg(kernel, 0, sizeof(T), (void*)&obj1); BASISU_CHECK_ERR + ret = clSetKernelArg(kernel, 1, sizeof(U), (void*)&obj2); BASISU_CHECK_ERR + return true; + } + + template + bool set_kernel_args(cl_kernel kernel, const T& obj1, const U& obj2, const V& obj3) + { + cl_serializer serializer(this); + cl_int ret = clSetKernelArg(kernel, 0, sizeof(T), (void*)&obj1); BASISU_CHECK_ERR + ret = clSetKernelArg(kernel, 1, sizeof(U), (void*)&obj2); BASISU_CHECK_ERR + ret = clSetKernelArg(kernel, 2, sizeof(V), (void*)&obj3); BASISU_CHECK_ERR + return true; + } + + template + bool set_kernel_args(cl_kernel kernel, const T& obj1, const U& obj2, const V& obj3, const W& obj4) + { + cl_serializer serializer(this); + cl_int ret = clSetKernelArg(kernel, 0, sizeof(T), (void*)&obj1); BASISU_CHECK_ERR + ret = clSetKernelArg(kernel, 1, sizeof(U), (void*)&obj2); BASISU_CHECK_ERR + ret = clSetKernelArg(kernel, 2, sizeof(V), (void*)&obj3); BASISU_CHECK_ERR + ret = clSetKernelArg(kernel, 3, sizeof(W), (void*)&obj4); BASISU_CHECK_ERR + return true; + } + + template + bool set_kernel_args(cl_kernel kernel, const T& obj1, const U& obj2, const V& obj3, const W& obj4, const X& obj5) + { + cl_serializer serializer(this); + cl_int ret = clSetKernelArg(kernel, 0, sizeof(T), (void*)&obj1); BASISU_CHECK_ERR + ret = clSetKernelArg(kernel, 1, sizeof(U), (void*)&obj2); BASISU_CHECK_ERR + ret = clSetKernelArg(kernel, 2, sizeof(V), (void*)&obj3); BASISU_CHECK_ERR + ret = clSetKernelArg(kernel, 3, sizeof(W), (void*)&obj4); BASISU_CHECK_ERR + ret = clSetKernelArg(kernel, 4, sizeof(X), (void*)&obj5); BASISU_CHECK_ERR + return true; + } + + template + bool set_kernel_args(cl_kernel kernel, const T& obj1, const U& obj2, const V& obj3, const W& obj4, const X& obj5, const Y& obj6) + { + cl_serializer serializer(this); + cl_int ret = clSetKernelArg(kernel, 0, sizeof(T), (void*)&obj1); BASISU_CHECK_ERR + ret = clSetKernelArg(kernel, 1, sizeof(U), (void*)&obj2); BASISU_CHECK_ERR + ret = clSetKernelArg(kernel, 2, sizeof(V), (void*)&obj3); BASISU_CHECK_ERR + ret = clSetKernelArg(kernel, 3, sizeof(W), (void*)&obj4); BASISU_CHECK_ERR + ret = clSetKernelArg(kernel, 4, sizeof(X), (void*)&obj5); BASISU_CHECK_ERR + ret = clSetKernelArg(kernel, 5, sizeof(Y), (void*)&obj6); BASISU_CHECK_ERR + return true; + } + + template + bool set_kernel_args(cl_kernel kernel, const T& obj1, const U& obj2, const V& obj3, const W& obj4, const X& obj5, const Y& obj6, const Z& obj7) + { + cl_serializer serializer(this); + cl_int ret = clSetKernelArg(kernel, 0, sizeof(T), (void*)&obj1); BASISU_CHECK_ERR + ret = clSetKernelArg(kernel, 1, sizeof(U), (void*)&obj2); BASISU_CHECK_ERR + ret = clSetKernelArg(kernel, 2, sizeof(V), (void*)&obj3); BASISU_CHECK_ERR + ret = clSetKernelArg(kernel, 3, sizeof(W), (void*)&obj4); BASISU_CHECK_ERR + ret = clSetKernelArg(kernel, 4, sizeof(X), (void*)&obj5); BASISU_CHECK_ERR + ret = clSetKernelArg(kernel, 5, sizeof(Y), (void*)&obj6); BASISU_CHECK_ERR + ret = clSetKernelArg(kernel, 6, sizeof(Z), (void*)&obj7); BASISU_CHECK_ERR + return true; + } + + template + bool set_kernel_args(cl_kernel kernel, const T& obj1, const U& obj2, const V& obj3, const W& obj4, const X& obj5, const Y& obj6, const Z& obj7, const A& obj8) + { + cl_serializer serializer(this); + cl_int ret = clSetKernelArg(kernel, 0, sizeof(T), (void*)&obj1); BASISU_CHECK_ERR + ret = clSetKernelArg(kernel, 1, sizeof(U), (void*)&obj2); BASISU_CHECK_ERR + ret = clSetKernelArg(kernel, 2, sizeof(V), (void*)&obj3); BASISU_CHECK_ERR + ret = clSetKernelArg(kernel, 3, sizeof(W), (void*)&obj4); BASISU_CHECK_ERR + ret = clSetKernelArg(kernel, 4, sizeof(X), (void*)&obj5); BASISU_CHECK_ERR + ret = clSetKernelArg(kernel, 5, sizeof(Y), (void*)&obj6); BASISU_CHECK_ERR + ret = clSetKernelArg(kernel, 6, sizeof(Z), (void*)&obj7); BASISU_CHECK_ERR + ret = clSetKernelArg(kernel, 7, sizeof(A), (void*)&obj8); BASISU_CHECK_ERR + return true; + } +#undef BASISU_CHECK_ERR + + private: + cl_device_id m_device_id = nullptr; + cl_context m_context = nullptr; + cl_command_queue m_command_queue = nullptr; + cl_program m_program = nullptr; + cl_device_fp_config m_dev_fp_config; + + bool m_use_mutex = false; + std::mutex m_ocl_mutex; + + // This helper object is used to optionally serialize all calls to the CL driver after initialization. + // Currently this is only used to work around race conditions in the Windows AMD driver. + struct cl_serializer + { + inline cl_serializer(const cl_serializer&); + cl_serializer& operator= (const cl_serializer&); + + inline cl_serializer(ocl *p) : m_p(p) + { + if (m_p->m_use_mutex) + m_p->m_ocl_mutex.lock(); + } + + inline ~cl_serializer() + { + if (m_p->m_use_mutex) + m_p->m_ocl_mutex.unlock(); + } + + private: + ocl* m_p; + }; + + cl_image_format get_image_format(uint32_t bytes_per_pixel, bool normalized) + { + cl_image_format fmt; + switch (bytes_per_pixel) + { + case 1: fmt.image_channel_order = CL_LUMINANCE; break; + case 2: fmt.image_channel_order = CL_RG; break; + case 3: fmt.image_channel_order = CL_RGB; break; + case 4: fmt.image_channel_order = CL_RGBA; break; + default: assert(0); fmt.image_channel_order = CL_LUMINANCE; break; + } + + fmt.image_channel_data_type = normalized ? CL_UNORM_INT8 : CL_UNSIGNED_INT8; + return fmt; + } + }; + + // Library blobal state + ocl g_ocl; + + bool opencl_init(bool force_serialization) + { + if (g_ocl.is_initialized()) + { + assert(0); + return false; + } + + if (!g_ocl.init(force_serialization)) + { + ocl_error_printf("opencl_init: Failed initializing OpenCL\n"); + return false; + } + + const char* pKernel_src = nullptr; + size_t kernel_src_size = 0; + uint8_vec kernel_src; + +#if BASISU_USE_OCL_KERNELS_HEADER + pKernel_src = reinterpret_cast(ocl_kernels_cl); + kernel_src_size = ocl_kernels_cl_len; +#else + if (!read_file_to_vec(BASISU_OCL_KERNELS_FILENAME, kernel_src)) + { + ocl_error_printf("opencl_init: Cannot read OpenCL kernel source file \"%s\"\n", BASISU_OCL_KERNELS_FILENAME); + g_ocl.deinit(); + return false; + } + + pKernel_src = (char*)kernel_src.data(); + kernel_src_size = kernel_src.size(); +#endif + + if (!kernel_src_size) + { + ocl_error_printf("opencl_init: Invalid OpenCL kernel source file \"%s\"\n", BASISU_OCL_KERNELS_FILENAME); + g_ocl.deinit(); + return false; + } + + if (!g_ocl.init_program(pKernel_src, kernel_src_size)) + { + ocl_error_printf("opencl_init: Failed compiling OpenCL program\n"); + g_ocl.deinit(); + return false; + } + + printf("OpenCL support initialized successfully\n"); + + return true; + } + + void opencl_deinit() + { + g_ocl.deinit(); + } + + bool opencl_is_available() + { + return g_ocl.is_initialized(); + } + + struct opencl_context + { + size_t m_ocl_total_pixel_blocks; + cl_mem m_ocl_pixel_blocks; + + cl_command_queue m_command_queue; + + cl_kernel m_ocl_encode_etc1s_blocks_kernel; + cl_kernel m_ocl_refine_endpoint_clusterization_kernel; + cl_kernel m_ocl_encode_etc1s_from_pixel_cluster_kernel; + cl_kernel m_ocl_find_optimal_selector_clusters_for_each_block_kernel; + cl_kernel m_ocl_determine_selectors_kernel; + }; + + opencl_context_ptr opencl_create_context() + { + if (!opencl_is_available()) + { + ocl_error_printf("opencl_create_context: OpenCL not initialized\n"); + assert(0); + return nullptr; + } + + interval_timer tm; + tm.start(); + + opencl_context* pContext = static_cast(calloc(sizeof(opencl_context), 1)); + if (!pContext) + return nullptr; + + // To avoid driver bugs in some drivers - serialize this. Likely not necessary, we don't know. + // https://community.intel.com/t5/OpenCL-for-CPU/Bug-report-clCreateKernelsInProgram-is-not-thread-safe/td-p/1159771 + + pContext->m_command_queue = g_ocl.create_command_queue(); + if (!pContext->m_command_queue) + { + ocl_error_printf("opencl_create_context: Failed creating OpenCL command queue!\n"); + opencl_destroy_context(pContext); + return nullptr; + } + + pContext->m_ocl_encode_etc1s_blocks_kernel = g_ocl.create_kernel("encode_etc1s_blocks"); + if (!pContext->m_ocl_encode_etc1s_blocks_kernel) + { + ocl_error_printf("opencl_create_context: Failed creating OpenCL kernel encode_etc1s_block\n"); + opencl_destroy_context(pContext); + return nullptr; + } + + pContext->m_ocl_refine_endpoint_clusterization_kernel = g_ocl.create_kernel("refine_endpoint_clusterization"); + if (!pContext->m_ocl_refine_endpoint_clusterization_kernel) + { + ocl_error_printf("opencl_create_context: Failed creating OpenCL kernel refine_endpoint_clusterization\n"); + opencl_destroy_context(pContext); + return nullptr; + } + + pContext->m_ocl_encode_etc1s_from_pixel_cluster_kernel = g_ocl.create_kernel("encode_etc1s_from_pixel_cluster"); + if (!pContext->m_ocl_encode_etc1s_from_pixel_cluster_kernel) + { + ocl_error_printf("opencl_create_context: Failed creating OpenCL kernel encode_etc1s_from_pixel_cluster\n"); + opencl_destroy_context(pContext); + return nullptr; + } + + pContext->m_ocl_find_optimal_selector_clusters_for_each_block_kernel = g_ocl.create_kernel("find_optimal_selector_clusters_for_each_block"); + if (!pContext->m_ocl_find_optimal_selector_clusters_for_each_block_kernel) + { + ocl_error_printf("opencl_create_context: Failed creating OpenCL kernel find_optimal_selector_clusters_for_each_block\n"); + opencl_destroy_context(pContext); + return nullptr; + } + + pContext->m_ocl_determine_selectors_kernel = g_ocl.create_kernel("determine_selectors"); + if (!pContext->m_ocl_determine_selectors_kernel) + { + ocl_error_printf("opencl_create_context: Failed creating OpenCL kernel determine_selectors\n"); + opencl_destroy_context(pContext); + return nullptr; + } + + debug_printf("opencl_create_context: Elapsed time: %f secs\n", tm.get_elapsed_secs()); + + return pContext; + } + + void opencl_destroy_context(opencl_context_ptr pContext) + { + if (!pContext) + return; + + interval_timer tm; + tm.start(); + + g_ocl.destroy_buffer(pContext->m_ocl_pixel_blocks); + + g_ocl.destroy_kernel(pContext->m_ocl_determine_selectors_kernel); + g_ocl.destroy_kernel(pContext->m_ocl_find_optimal_selector_clusters_for_each_block_kernel); + g_ocl.destroy_kernel(pContext->m_ocl_encode_etc1s_from_pixel_cluster_kernel); + g_ocl.destroy_kernel(pContext->m_ocl_encode_etc1s_blocks_kernel); + g_ocl.destroy_kernel(pContext->m_ocl_refine_endpoint_clusterization_kernel); + + g_ocl.destroy_command_queue(pContext->m_command_queue); + + memset(pContext, 0, sizeof(opencl_context)); + + free(pContext); + + debug_printf("opencl_destroy_context: Elapsed time: %f secs\n", tm.get_elapsed_secs()); + } + +#pragma pack(push, 1) + struct cl_encode_etc1s_param_struct + { + int m_total_blocks; + int m_perceptual; + int m_total_perms; + }; +#pragma pack(pop) + + bool opencl_set_pixel_blocks(opencl_context_ptr pContext, size_t total_blocks, const cl_pixel_block* pPixel_blocks) + { + if (!opencl_is_available()) + return false; + + if (pContext->m_ocl_pixel_blocks) + { + g_ocl.destroy_buffer(pContext->m_ocl_pixel_blocks); + pContext->m_ocl_pixel_blocks = nullptr; + } + + pContext->m_ocl_pixel_blocks = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue, pPixel_blocks, sizeof(cl_pixel_block) * total_blocks); + if (!pContext->m_ocl_pixel_blocks) + return false; + + pContext->m_ocl_total_pixel_blocks = total_blocks; + + return true; + } + + bool opencl_encode_etc1s_blocks(opencl_context_ptr pContext, etc_block* pOutput_blocks, bool perceptual, uint32_t total_perms) + { + if (!opencl_is_available()) + return false; + + interval_timer tm; + tm.start(); + + assert(pContext->m_ocl_pixel_blocks); + if (!pContext->m_ocl_pixel_blocks) + return false; + + assert(pContext->m_ocl_total_pixel_blocks <= INT_MAX); + + cl_encode_etc1s_param_struct ps; + ps.m_total_blocks = (int)pContext->m_ocl_total_pixel_blocks; + ps.m_perceptual = perceptual; + ps.m_total_perms = total_perms; + + bool status = false; + + cl_mem vars = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue , &ps, sizeof(ps)); + cl_mem block_buf = g_ocl.alloc_write_buffer(sizeof(etc_block) * pContext->m_ocl_total_pixel_blocks); + + if (!vars || !block_buf) + goto exit; + + if (!g_ocl.set_kernel_args(pContext->m_ocl_encode_etc1s_blocks_kernel, vars, pContext->m_ocl_pixel_blocks, block_buf)) + goto exit; + + if (!g_ocl.run_2D(pContext->m_command_queue, pContext->m_ocl_encode_etc1s_blocks_kernel, pContext->m_ocl_total_pixel_blocks, 1)) + goto exit; + + if (!g_ocl.read_from_buffer(pContext->m_command_queue, block_buf, pOutput_blocks, pContext->m_ocl_total_pixel_blocks * sizeof(etc_block))) + goto exit; + + status = true; + + debug_printf("opencl_encode_etc1s_blocks: Elapsed time: %3.3f secs\n", tm.get_elapsed_secs()); + +exit: + g_ocl.destroy_buffer(block_buf); + g_ocl.destroy_buffer(vars); + + return status; + } + + bool opencl_encode_etc1s_pixel_clusters( + opencl_context_ptr pContext, + etc_block* pOutput_blocks, + uint32_t total_clusters, + const cl_pixel_cluster* pClusters, + uint64_t total_pixels, + const color_rgba* pPixels, const uint32_t* pPixel_weights, + bool perceptual, uint32_t total_perms) + { + if (!opencl_is_available()) + return false; + + interval_timer tm; + tm.start(); + + cl_encode_etc1s_param_struct ps; + ps.m_total_blocks = total_clusters; + ps.m_perceptual = perceptual; + ps.m_total_perms = total_perms; + + bool status = false; + + if (sizeof(size_t) == sizeof(uint32_t)) + { + if ( ((sizeof(cl_pixel_cluster) * total_clusters) > UINT32_MAX) || + ((sizeof(color_rgba) * total_pixels) > UINT32_MAX) || + ((sizeof(uint32_t) * total_pixels) > UINT32_MAX) ) + { + return false; + } + } + + cl_mem vars = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue , &ps, sizeof(ps)); + cl_mem input_clusters = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue, pClusters, (size_t)(sizeof(cl_pixel_cluster) * total_clusters)); + cl_mem input_pixels = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue, pPixels, (size_t)(sizeof(color_rgba) * total_pixels)); + cl_mem weights_buf = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue, pPixel_weights, (size_t)(sizeof(uint32_t) * total_pixels)); + cl_mem block_buf = g_ocl.alloc_write_buffer(sizeof(etc_block) * total_clusters); + + if (!vars || !input_clusters || !input_pixels || !weights_buf || !block_buf) + goto exit; + + if (!g_ocl.set_kernel_args(pContext->m_ocl_encode_etc1s_from_pixel_cluster_kernel, vars, input_clusters, input_pixels, weights_buf, block_buf)) + goto exit; + + if (!g_ocl.run_2D(pContext->m_command_queue, pContext->m_ocl_encode_etc1s_from_pixel_cluster_kernel, total_clusters, 1)) + goto exit; + + if (!g_ocl.read_from_buffer(pContext->m_command_queue, block_buf, pOutput_blocks, sizeof(etc_block) * total_clusters)) + goto exit; + + status = true; + + debug_printf("opencl_encode_etc1s_pixel_clusters: Elapsed time: %3.3f secs\n", tm.get_elapsed_secs()); + + exit: + g_ocl.destroy_buffer(block_buf); + g_ocl.destroy_buffer(weights_buf); + g_ocl.destroy_buffer(input_pixels); + g_ocl.destroy_buffer(input_clusters); + g_ocl.destroy_buffer(vars); + + return status; + } + +#pragma pack(push, 1) + struct cl_rec_param_struct + { + int m_total_blocks; + int m_perceptual; + }; +#pragma pack(pop) + + bool opencl_refine_endpoint_clusterization( + opencl_context_ptr pContext, + const cl_block_info_struct* pPixel_block_info, + uint32_t total_clusters, + const cl_endpoint_cluster_struct* pCluster_info, + const uint32_t* pSorted_block_indices, + uint32_t* pOutput_cluster_indices, + bool perceptual) + { + if (!opencl_is_available()) + return false; + + interval_timer tm; + tm.start(); + + assert(pContext->m_ocl_pixel_blocks); + if (!pContext->m_ocl_pixel_blocks) + return false; + + assert(pContext->m_ocl_total_pixel_blocks <= INT_MAX); + + cl_rec_param_struct ps; + ps.m_total_blocks = (int)pContext->m_ocl_total_pixel_blocks; + ps.m_perceptual = perceptual; + + bool status = false; + + cl_mem pixel_block_info = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue, pPixel_block_info, sizeof(cl_block_info_struct) * pContext->m_ocl_total_pixel_blocks); + cl_mem cluster_info = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue, pCluster_info, sizeof(cl_endpoint_cluster_struct) * total_clusters); + cl_mem sorted_block_indices = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue, pSorted_block_indices, sizeof(uint32_t) * pContext->m_ocl_total_pixel_blocks); + cl_mem output_buf = g_ocl.alloc_write_buffer(sizeof(uint32_t) * pContext->m_ocl_total_pixel_blocks); + + if (!pixel_block_info || !cluster_info || !sorted_block_indices || !output_buf) + goto exit; + + if (!g_ocl.set_kernel_args(pContext->m_ocl_refine_endpoint_clusterization_kernel, ps, pContext->m_ocl_pixel_blocks, pixel_block_info, cluster_info, sorted_block_indices, output_buf)) + goto exit; + + if (!g_ocl.run_2D(pContext->m_command_queue, pContext->m_ocl_refine_endpoint_clusterization_kernel, pContext->m_ocl_total_pixel_blocks, 1)) + goto exit; + + if (!g_ocl.read_from_buffer(pContext->m_command_queue, output_buf, pOutput_cluster_indices, pContext->m_ocl_total_pixel_blocks * sizeof(uint32_t))) + goto exit; + + debug_printf("opencl_refine_endpoint_clusterization: Elapsed time: %3.3f secs\n", tm.get_elapsed_secs()); + + status = true; + +exit: + g_ocl.destroy_buffer(pixel_block_info); + g_ocl.destroy_buffer(cluster_info); + g_ocl.destroy_buffer(sorted_block_indices); + g_ocl.destroy_buffer(output_buf); + + return status; + } + + bool opencl_find_optimal_selector_clusters_for_each_block( + opencl_context_ptr pContext, + const fosc_block_struct* pInput_block_info, // one per block + uint32_t total_input_selectors, + const fosc_selector_struct* pInput_selectors, + const uint32_t* pSelector_cluster_indices, + uint32_t* pOutput_selector_cluster_indices, // one per block + bool perceptual) + { + if (!opencl_is_available()) + return false; + + interval_timer tm; + tm.start(); + + assert(pContext->m_ocl_pixel_blocks); + if (!pContext->m_ocl_pixel_blocks) + return false; + + assert(pContext->m_ocl_total_pixel_blocks <= INT_MAX); + + fosc_param_struct ps; + ps.m_total_blocks = (int)pContext->m_ocl_total_pixel_blocks; + ps.m_perceptual = perceptual; + + bool status = false; + + cl_mem input_block_info = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue, pInput_block_info, sizeof(fosc_block_struct) * pContext->m_ocl_total_pixel_blocks); + cl_mem input_selectors = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue, pInput_selectors, sizeof(fosc_selector_struct) * total_input_selectors); + cl_mem selector_cluster_indices = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue, pSelector_cluster_indices, sizeof(uint32_t) * total_input_selectors); + cl_mem output_selector_cluster_indices = g_ocl.alloc_write_buffer(sizeof(uint32_t) * pContext->m_ocl_total_pixel_blocks); + + if (!input_block_info || !input_selectors || !selector_cluster_indices || !output_selector_cluster_indices) + goto exit; + + if (!g_ocl.set_kernel_args(pContext->m_ocl_find_optimal_selector_clusters_for_each_block_kernel, ps, pContext->m_ocl_pixel_blocks, input_block_info, input_selectors, selector_cluster_indices, output_selector_cluster_indices)) + goto exit; + + if (!g_ocl.run_2D(pContext->m_command_queue, pContext->m_ocl_find_optimal_selector_clusters_for_each_block_kernel, pContext->m_ocl_total_pixel_blocks, 1)) + goto exit; + + if (!g_ocl.read_from_buffer(pContext->m_command_queue, output_selector_cluster_indices, pOutput_selector_cluster_indices, pContext->m_ocl_total_pixel_blocks * sizeof(uint32_t))) + goto exit; + + debug_printf("opencl_find_optimal_selector_clusters_for_each_block: Elapsed time: %3.3f secs\n", tm.get_elapsed_secs()); + + status = true; + + exit: + g_ocl.destroy_buffer(input_block_info); + g_ocl.destroy_buffer(input_selectors); + g_ocl.destroy_buffer(selector_cluster_indices); + g_ocl.destroy_buffer(output_selector_cluster_indices); + + return status; + } + + bool opencl_determine_selectors( + opencl_context_ptr pContext, + const color_rgba* pInput_etc_color5_and_inten, + etc_block* pOutput_blocks, + bool perceptual) + { + if (!opencl_is_available()) + return false; + + interval_timer tm; + tm.start(); + + assert(pContext->m_ocl_pixel_blocks); + if (!pContext->m_ocl_pixel_blocks) + return false; + + assert(pContext->m_ocl_total_pixel_blocks <= INT_MAX); + + ds_param_struct ps; + ps.m_total_blocks = (int)pContext->m_ocl_total_pixel_blocks; + ps.m_perceptual = perceptual; + + bool status = false; + + cl_mem input_etc_color5_intens = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue, pInput_etc_color5_and_inten, sizeof(color_rgba) * pContext->m_ocl_total_pixel_blocks); + cl_mem output_blocks = g_ocl.alloc_write_buffer(sizeof(etc_block) * pContext->m_ocl_total_pixel_blocks); + + if (!input_etc_color5_intens || !output_blocks) + goto exit; + + if (!g_ocl.set_kernel_args(pContext->m_ocl_determine_selectors_kernel, ps, pContext->m_ocl_pixel_blocks, input_etc_color5_intens, output_blocks)) + goto exit; + + if (!g_ocl.run_2D(pContext->m_command_queue, pContext->m_ocl_determine_selectors_kernel, pContext->m_ocl_total_pixel_blocks, 1)) + goto exit; + + if (!g_ocl.read_from_buffer(pContext->m_command_queue, output_blocks, pOutput_blocks, pContext->m_ocl_total_pixel_blocks * sizeof(etc_block))) + goto exit; + + debug_printf("opencl_determine_selectors: Elapsed time: %3.3f secs\n", tm.get_elapsed_secs()); + + status = true; + + exit: + g_ocl.destroy_buffer(input_etc_color5_intens); + g_ocl.destroy_buffer(output_blocks); + + return status; + } + +#else +namespace basisu +{ + // No OpenCL support - all dummy functions that return false; + bool opencl_init(bool force_serialization) + { + BASISU_NOTE_UNUSED(force_serialization); + + return false; + } + + void opencl_deinit() + { + } + + bool opencl_is_available() + { + return false; + } + + opencl_context_ptr opencl_create_context() + { + return nullptr; + } + + void opencl_destroy_context(opencl_context_ptr context) + { + BASISU_NOTE_UNUSED(context); + } + + bool opencl_set_pixel_blocks(opencl_context_ptr pContext, size_t total_blocks, const cl_pixel_block* pPixel_blocks) + { + BASISU_NOTE_UNUSED(pContext); + BASISU_NOTE_UNUSED(total_blocks); + BASISU_NOTE_UNUSED(pPixel_blocks); + + return false; + } + + bool opencl_encode_etc1s_blocks(opencl_context_ptr pContext, etc_block* pOutput_blocks, bool perceptual, uint32_t total_perms) + { + BASISU_NOTE_UNUSED(pContext); + BASISU_NOTE_UNUSED(pOutput_blocks); + BASISU_NOTE_UNUSED(perceptual); + BASISU_NOTE_UNUSED(total_perms); + + return false; + } + + bool opencl_encode_etc1s_pixel_clusters( + opencl_context_ptr pContext, + etc_block* pOutput_blocks, + uint32_t total_clusters, + const cl_pixel_cluster* pClusters, + uint64_t total_pixels, + const color_rgba* pPixels, const uint32_t *pPixel_weights, + bool perceptual, uint32_t total_perms) + { + BASISU_NOTE_UNUSED(pContext); + BASISU_NOTE_UNUSED(pOutput_blocks); + BASISU_NOTE_UNUSED(total_clusters); + BASISU_NOTE_UNUSED(pClusters); + BASISU_NOTE_UNUSED(total_pixels); + BASISU_NOTE_UNUSED(pPixels); + BASISU_NOTE_UNUSED(pPixel_weights); + BASISU_NOTE_UNUSED(perceptual); + BASISU_NOTE_UNUSED(total_perms); + + return false; + } + + bool opencl_refine_endpoint_clusterization( + opencl_context_ptr pContext, + const cl_block_info_struct* pPixel_block_info, + uint32_t total_clusters, + const cl_endpoint_cluster_struct* pCluster_info, + const uint32_t* pSorted_block_indices, + uint32_t* pOutput_cluster_indices, + bool perceptual) + { + BASISU_NOTE_UNUSED(pContext); + BASISU_NOTE_UNUSED(pPixel_block_info); + BASISU_NOTE_UNUSED(total_clusters); + BASISU_NOTE_UNUSED(pCluster_info); + BASISU_NOTE_UNUSED(pSorted_block_indices); + BASISU_NOTE_UNUSED(pOutput_cluster_indices); + BASISU_NOTE_UNUSED(perceptual); + + return false; + } + + bool opencl_find_optimal_selector_clusters_for_each_block( + opencl_context_ptr pContext, + const fosc_block_struct* pInput_block_info, // one per block + uint32_t total_input_selectors, + const fosc_selector_struct* pInput_selectors, + const uint32_t* pSelector_cluster_indices, + uint32_t* pOutput_selector_cluster_indices, // one per block + bool perceptual) + { + BASISU_NOTE_UNUSED(pContext); + BASISU_NOTE_UNUSED(pInput_block_info); + BASISU_NOTE_UNUSED(total_input_selectors); + BASISU_NOTE_UNUSED(pInput_selectors); + BASISU_NOTE_UNUSED(pSelector_cluster_indices); + BASISU_NOTE_UNUSED(pOutput_selector_cluster_indices); + BASISU_NOTE_UNUSED(perceptual); + + return false; + } + + bool opencl_determine_selectors( + opencl_context_ptr pContext, + const color_rgba* pInput_etc_color5_and_inten, + etc_block* pOutput_blocks, + bool perceptual) + { + BASISU_NOTE_UNUSED(pContext); + BASISU_NOTE_UNUSED(pInput_etc_color5_and_inten); + BASISU_NOTE_UNUSED(pOutput_blocks); + BASISU_NOTE_UNUSED(perceptual); + + return false; + } + +#endif // BASISU_SUPPORT_OPENCL + +} // namespace basisu diff --git a/thirdparty/basisu/encoder/basisu_opencl.h b/thirdparty/basisu/encoder/basisu_opencl.h new file mode 100644 index 000000000..b44f288b7 --- /dev/null +++ b/thirdparty/basisu/encoder/basisu_opencl.h @@ -0,0 +1,143 @@ +// basisu_opencl.h +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Note: Undefine or set BASISU_SUPPORT_OPENCL to 0 to completely OpenCL support. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "../transcoder/basisu.h" +#include "basisu_enc.h" +#include "basisu_etc.h" + +namespace basisu +{ + bool opencl_init(bool force_serialization); + void opencl_deinit(); + bool opencl_is_available(); + + struct opencl_context; + + // Each thread calling OpenCL should have its own opencl_context_ptr. This corresponds to a OpenCL command queue. (Confusingly, we only use a single OpenCL device "context".) + typedef opencl_context* opencl_context_ptr; + + opencl_context_ptr opencl_create_context(); + void opencl_destroy_context(opencl_context_ptr context); + +#pragma pack(push, 1) + struct cl_pixel_block + { + color_rgba m_pixels[16]; // [y*4+x] + }; +#pragma pack(pop) + + // Must match BASISU_ETC1_CLUSTER_FIT_ORDER_TABLE_SIZE + const uint32_t OPENCL_ENCODE_ETC1S_MAX_PERMS = 165; + + bool opencl_set_pixel_blocks(opencl_context_ptr pContext, size_t total_blocks, const cl_pixel_block* pPixel_blocks); + + bool opencl_encode_etc1s_blocks(opencl_context_ptr pContext, etc_block* pOutput_blocks, bool perceptual, uint32_t total_perms); + + // opencl_encode_etc1s_pixel_clusters + +#pragma pack(push, 1) + struct cl_pixel_cluster + { + uint64_t m_total_pixels; + uint64_t m_first_pixel_index; + }; +#pragma pack(pop) + + bool opencl_encode_etc1s_pixel_clusters( + opencl_context_ptr pContext, + etc_block* pOutput_blocks, + uint32_t total_clusters, + const cl_pixel_cluster *pClusters, + uint64_t total_pixels, + const color_rgba *pPixels, + const uint32_t *pPixel_weights, + bool perceptual, uint32_t total_perms); + + // opencl_refine_endpoint_clusterization + +#pragma pack(push, 1) + struct cl_block_info_struct + { + uint16_t m_first_cluster_ofs; + uint16_t m_num_clusters; + uint16_t m_cur_cluster_index; + uint8_t m_cur_cluster_etc_inten; + }; + + struct cl_endpoint_cluster_struct + { + color_rgba m_unscaled_color; + uint8_t m_etc_inten; + uint16_t m_cluster_index; + }; +#pragma pack(pop) + + bool opencl_refine_endpoint_clusterization( + opencl_context_ptr pContext, + const cl_block_info_struct *pPixel_block_info, + uint32_t total_clusters, + const cl_endpoint_cluster_struct *pCluster_info, + const uint32_t *pSorted_block_indices, + uint32_t* pOutput_cluster_indices, + bool perceptual); + + // opencl_find_optimal_selector_clusters_for_each_block + +#pragma pack(push, 1) + struct fosc_selector_struct + { + uint32_t m_packed_selectors; // 4x4 grid of 2-bit selectors + }; + + struct fosc_block_struct + { + color_rgba m_etc_color5_inten; // unscaled 5-bit block color in RGB, alpha has block's intensity index + uint32_t m_first_selector; // offset into selector table + uint32_t m_num_selectors; // number of selectors to check + }; + + struct fosc_param_struct + { + uint32_t m_total_blocks; + int m_perceptual; + }; +#pragma pack(pop) + + bool opencl_find_optimal_selector_clusters_for_each_block( + opencl_context_ptr pContext, + const fosc_block_struct* pInput_block_info, // one per block + uint32_t total_input_selectors, + const fosc_selector_struct* pInput_selectors, + const uint32_t* pSelector_cluster_indices, + uint32_t* pOutput_selector_cluster_indices, // one per block + bool perceptual); + +#pragma pack(push, 1) + struct ds_param_struct + { + uint32_t m_total_blocks; + int m_perceptual; + }; +#pragma pack(pop) + + bool opencl_determine_selectors( + opencl_context_ptr pContext, + const color_rgba* pInput_etc_color5_and_inten, + etc_block* pOutput_blocks, + bool perceptual); + +} // namespace basisu diff --git a/thirdparty/basisu/encoder/basisu_pvrtc1_4.cpp b/thirdparty/basisu/encoder/basisu_pvrtc1_4.cpp new file mode 100644 index 000000000..4bf9516f9 --- /dev/null +++ b/thirdparty/basisu/encoder/basisu_pvrtc1_4.cpp @@ -0,0 +1,564 @@ +// basisu_pvrtc1_4.cpp +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "basisu_pvrtc1_4.h" + +namespace basisu +{ +#if 0 + static const uint8_t g_pvrtc_5[32] = { 0,8,16,24,33,41,49,57,66,74,82,90,99,107,115,123,132,140,148,156,165,173,181,189,198,206,214,222,231,239,247,255 }; + static const uint8_t g_pvrtc_4[16] = { 0,16,33,49,66,82,99,115,140,156,173,189,206,222,239,255 }; + static const uint8_t g_pvrtc_3[8] = { 0,33,74,107,148,181,222,255 }; + static const uint8_t g_pvrtc_alpha[9] = { 0,34,68,102,136,170,204,238,255 }; +#endif + + static const uint8_t g_pvrtc_5_nearest[256] = { 0,0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11,11,12,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,15,15,15,15,15,15,15,15,16,16,16,16,16,16,16,16,16,17,17,17,17,17,17,17,17,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19,20,20,20,20,20,20,20,20,20,21,21,21,21,21,21,21,21,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23,24,24,24,24,24,24,24,24,24,25,25,25,25,25,25,25,25,26,26,26,26,26,26,26,26,27,27,27,27,27,27,27,27,28,28,28,28,28,28,28,28,28,29,29,29,29,29,29,29,29,30,30,30,30,30,30,30,30,31,31,31,31 }; + static const uint8_t g_pvrtc_4_nearest[256] = { 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,15,15,15,15,15,15,15,15 }; +#if 0 + static const uint8_t g_pvrtc_3_nearest[256] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 }; + static const uint8_t g_pvrtc_alpha_nearest[256] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,8 }; +#endif + +#if 0 + static const uint8_t g_pvrtc_5_floor[256] = + { + 0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3, + 3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7, + 7,7,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,11,11,11,11,11,11, + 11,11,11,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,15,15,15,15,15, + 15,15,15,15,16,16,16,16,16,16,16,16,17,17,17,17,17,17,17,17,18,18,18,18,18,18,18,18,19,19,19,19, + 19,19,19,19,19,20,20,20,20,20,20,20,20,21,21,21,21,21,21,21,21,22,22,22,22,22,22,22,22,23,23,23, + 23,23,23,23,23,23,24,24,24,24,24,24,24,24,25,25,25,25,25,25,25,25,26,26,26,26,26,26,26,26,27,27, + 27,27,27,27,27,27,27,28,28,28,28,28,28,28,28,29,29,29,29,29,29,29,29,30,30,30,30,30,30,30,30,31 + }; + + static const uint8_t g_pvrtc_5_ceil[256] = + { + 0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4, + 4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,8,8,8,8,8,8, + 8,8,8,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11,11,12,12,12,12,12, + 12,12,12,12,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,15,15,15,15,15,15,15,15,16,16,16,16, + 16,16,16,16,16,17,17,17,17,17,17,17,17,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19,20,20,20, + 20,20,20,20,20,20,21,21,21,21,21,21,21,21,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23,24,24, + 24,24,24,24,24,24,24,25,25,25,25,25,25,25,25,26,26,26,26,26,26,26,26,27,27,27,27,27,27,27,27,28, + 28,28,28,28,28,28,28,28,29,29,29,29,29,29,29,29,30,30,30,30,30,30,30,30,31,31,31,31,31,31,31,31 + }; + + static const uint8_t g_pvrtc_4_floor[256] = + { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11, + 11,11,11,11,11,11,11,11,11,11,11,11,11,11,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13,13, + 13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,15 + }; + + static const uint8_t g_pvrtc_4_ceil[256] = + { + 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,10,10,10, + 10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,12,12, + 12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,14, + 14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15 + }; + + static const uint8_t g_pvrtc_3_floor[256] = + { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4, + 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7 + }; + + static const uint8_t g_pvrtc_3_ceil[256] = + { + 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 + }; + + static const uint8_t g_pvrtc_alpha_floor[256] = + { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8 + }; + + static const uint8_t g_pvrtc_alpha_ceil[256] = + { + 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8 + }; +#endif + + uint32_t pvrtc4_swizzle_uv(uint32_t width, uint32_t height, uint32_t x, uint32_t y) + { + assert((x < width) && (y < height) && basisu::is_pow2(height) && basisu::is_pow2(width)); + + uint32_t min_d = width, max_v = y; + if (height < width) + { + min_d = height; + max_v = x; + } + + // Interleave the XY LSB's + uint32_t shift_ofs = 0, swizzled = 0; + for (uint32_t s_bit = 1, d_bit = 1; s_bit < min_d; s_bit <<= 1, d_bit <<= 2, ++shift_ofs) + { + if (y & s_bit) swizzled |= d_bit; + if (x & s_bit) swizzled |= (2 * d_bit); + } + + max_v >>= shift_ofs; + + // OR in the rest of the bits from the largest dimension + swizzled |= (max_v << (2 * shift_ofs)); + + return swizzled; + } + + color_rgba pvrtc4_block::get_endpoint(uint32_t endpoint_index, bool unpack) const + { + assert(endpoint_index < 2); + const uint32_t packed = m_endpoints >> (endpoint_index * 16); + + uint32_t r, g, b, a; + if (packed & 0x8000) + { + // opaque 554 or 555 + if (!endpoint_index) + { + r = (packed >> 10) & 31; + g = (packed >> 5) & 31; + b = (packed >> 1) & 15; + + if (unpack) + { + b = (b << 1) | (b >> 3); + } + } + else + { + r = (packed >> 10) & 31; + g = (packed >> 5) & 31; + b = packed & 31; + } + + a = unpack ? 255 : 7; + } + else + { + // translucent 4433 or 4443 + if (!endpoint_index) + { + a = (packed >> 12) & 7; + r = (packed >> 8) & 15; + g = (packed >> 4) & 15; + b = (packed >> 1) & 7; + + if (unpack) + { + a = (a << 1); + a = (a << 4) | a; + + r = (r << 1) | (r >> 3); + g = (g << 1) | (g >> 3); + b = (b << 2) | (b >> 1); + } + } + else + { + a = (packed >> 12) & 7; + r = (packed >> 8) & 15; + g = (packed >> 4) & 15; + b = packed & 15; + + if (unpack) + { + a = (a << 1); + a = (a << 4) | a; + + r = (r << 1) | (r >> 3); + g = (g << 1) | (g >> 3); + b = (b << 1) | (b >> 3); + } + } + } + + if (unpack) + { + r = (r << 3) | (r >> 2); + g = (g << 3) | (g >> 2); + b = (b << 3) | (b >> 2); + } + + assert((r < 256) && (g < 256) && (b < 256) && (a < 256)); + + return color_rgba(r, g, b, a); + } + + color_rgba pvrtc4_block::get_endpoint_5554(uint32_t endpoint_index) const + { + assert(endpoint_index < 2); + const uint32_t packed = m_endpoints >> (endpoint_index * 16); + + uint32_t r, g, b, a; + if (packed & 0x8000) + { + // opaque 554 or 555 + if (!endpoint_index) + { + r = (packed >> 10) & 31; + g = (packed >> 5) & 31; + b = (packed >> 1) & 15; + + b = (b << 1) | (b >> 3); + } + else + { + r = (packed >> 10) & 31; + g = (packed >> 5) & 31; + b = packed & 31; + } + + a = 15; + } + else + { + // translucent 4433 or 4443 + if (!endpoint_index) + { + a = (packed >> 12) & 7; + r = (packed >> 8) & 15; + g = (packed >> 4) & 15; + b = (packed >> 1) & 7; + + a = a << 1; + + r = (r << 1) | (r >> 3); + g = (g << 1) | (g >> 3); + b = (b << 2) | (b >> 1); + } + else + { + a = (packed >> 12) & 7; + r = (packed >> 8) & 15; + g = (packed >> 4) & 15; + b = packed & 15; + + a = a << 1; + + r = (r << 1) | (r >> 3); + g = (g << 1) | (g >> 3); + b = (b << 1) | (b >> 3); + } + } + + assert((r < 32) && (g < 32) && (b < 32) && (a < 16)); + + return color_rgba(r, g, b, a); + } + + bool pvrtc4_image::get_interpolated_colors(uint32_t x, uint32_t y, color_rgba* pColors) const + { + assert((x < m_width) && (y < m_height)); + + int block_x0 = (static_cast(x) - 2) >> 2; + int block_x1 = block_x0 + 1; + int block_y0 = (static_cast(y) - 2) >> 2; + int block_y1 = block_y0 + 1; + + block_x0 = posmod(block_x0, m_block_width); + block_x1 = posmod(block_x1, m_block_width); + block_y0 = posmod(block_y0, m_block_height); + block_y1 = posmod(block_y1, m_block_height); + + pColors[0] = interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(0), m_blocks(block_x1, block_y0).get_endpoint_5554(0), m_blocks(block_x0, block_y1).get_endpoint_5554(0), m_blocks(block_x1, block_y1).get_endpoint_5554(0)); + pColors[3] = interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(1), m_blocks(block_x1, block_y0).get_endpoint_5554(1), m_blocks(block_x0, block_y1).get_endpoint_5554(1), m_blocks(block_x1, block_y1).get_endpoint_5554(1)); + + if (get_block_uses_transparent_modulation(x >> 2, y >> 2)) + { + for (uint32_t c = 0; c < 4; c++) + { + uint32_t m = (pColors[0][c] + pColors[3][c]) / 2; + pColors[1][c] = static_cast(m); + pColors[2][c] = static_cast(m); + } + pColors[2][3] = 0; + return true; + } + + for (uint32_t c = 0; c < 4; c++) + { + pColors[1][c] = static_cast((pColors[0][c] * 5 + pColors[3][c] * 3) / 8); + pColors[2][c] = static_cast((pColors[0][c] * 3 + pColors[3][c] * 5) / 8); + } + + return false; + } + + color_rgba pvrtc4_image::get_pixel(uint32_t x, uint32_t y, uint32_t m) const + { + assert((x < m_width) && (y < m_height)); + + int block_x0 = (static_cast(x) - 2) >> 2; + int block_x1 = block_x0 + 1; + int block_y0 = (static_cast(y) - 2) >> 2; + int block_y1 = block_y0 + 1; + + block_x0 = posmod(block_x0, m_block_width); + block_x1 = posmod(block_x1, m_block_width); + block_y0 = posmod(block_y0, m_block_height); + block_y1 = posmod(block_y1, m_block_height); + + if (get_block_uses_transparent_modulation(x >> 2, y >> 2)) + { + if (m == 0) + return interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(0), m_blocks(block_x1, block_y0).get_endpoint_5554(0), m_blocks(block_x0, block_y1).get_endpoint_5554(0), m_blocks(block_x1, block_y1).get_endpoint_5554(0)); + else if (m == 3) + return interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(1), m_blocks(block_x1, block_y0).get_endpoint_5554(1), m_blocks(block_x0, block_y1).get_endpoint_5554(1), m_blocks(block_x1, block_y1).get_endpoint_5554(1)); + + color_rgba l(interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(0), m_blocks(block_x1, block_y0).get_endpoint_5554(0), m_blocks(block_x0, block_y1).get_endpoint_5554(0), m_blocks(block_x1, block_y1).get_endpoint_5554(0))); + color_rgba h(interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(1), m_blocks(block_x1, block_y0).get_endpoint_5554(1), m_blocks(block_x0, block_y1).get_endpoint_5554(1), m_blocks(block_x1, block_y1).get_endpoint_5554(1))); + + return color_rgba((l[0] + h[0]) / 2, (l[1] + h[1]) / 2, (l[2] + h[2]) / 2, (m == 2) ? 0 : (l[3] + h[3]) / 2); + } + else + { + if (m == 0) + return interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(0), m_blocks(block_x1, block_y0).get_endpoint_5554(0), m_blocks(block_x0, block_y1).get_endpoint_5554(0), m_blocks(block_x1, block_y1).get_endpoint_5554(0)); + else if (m == 3) + return interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(1), m_blocks(block_x1, block_y0).get_endpoint_5554(1), m_blocks(block_x0, block_y1).get_endpoint_5554(1), m_blocks(block_x1, block_y1).get_endpoint_5554(1)); + + color_rgba l(interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(0), m_blocks(block_x1, block_y0).get_endpoint_5554(0), m_blocks(block_x0, block_y1).get_endpoint_5554(0), m_blocks(block_x1, block_y1).get_endpoint_5554(0))); + color_rgba h(interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(1), m_blocks(block_x1, block_y0).get_endpoint_5554(1), m_blocks(block_x0, block_y1).get_endpoint_5554(1), m_blocks(block_x1, block_y1).get_endpoint_5554(1))); + + if (m == 2) + return color_rgba((l[0] * 3 + h[0] * 5) / 8, (l[1] * 3 + h[1] * 5) / 8, (l[2] * 3 + h[2] * 5) / 8, (l[3] * 3 + h[3] * 5) / 8); + else + return color_rgba((l[0] * 5 + h[0] * 3) / 8, (l[1] * 5 + h[1] * 3) / 8, (l[2] * 5 + h[2] * 3) / 8, (l[3] * 5 + h[3] * 3) / 8); + } + } + + uint64_t pvrtc4_image::local_endpoint_optimization_opaque(uint32_t bx, uint32_t by, const image& orig_img, bool perceptual) + { + uint64_t initial_error = evaluate_1x1_endpoint_error(bx, by, orig_img, perceptual, false); + if (!initial_error) + return initial_error; + + vec3F c_avg_orig(0); + + for (int y = 0; y < 7; y++) + { + const uint32_t py = wrap_y(by * 4 + y - 1); + for (uint32_t x = 0; x < 7; x++) + { + const uint32_t px = wrap_x(bx * 4 + x - 1); + + const color_rgba& c = orig_img(px, py); + + c_avg_orig[0] += c[0]; + c_avg_orig[1] += c[1]; + c_avg_orig[2] += c[2]; + } + } + + c_avg_orig *= 1.0f / 49.0f; + + vec3F quant_colors[2]; + quant_colors[0].set(c_avg_orig); + quant_colors[0] -= vec3F(.0125f); + + quant_colors[1].set(c_avg_orig); + quant_colors[1] += vec3F(.0125f); + + float total_weight[2]; + + bool success = true; + + for (uint32_t pass = 0; pass < 4; pass++) + { + vec3F new_colors[2] = { vec3F(0), vec3F(0) }; + memset(total_weight, 0, sizeof(total_weight)); + + static const float s_weights[7][7] = + { + { 1.000000f, 1.637089f, 2.080362f, 2.242640f, 2.080362f, 1.637089f, 1.000000f }, + { 1.637089f, 2.414213f, 3.006572f, 3.242640f, 3.006572f, 2.414213f, 1.637089f }, + { 2.080362f, 3.006572f, 3.828426f, 4.242640f, 3.828426f, 3.006572f, 2.080362f }, + { 2.242640f, 3.242640f, 4.242640f, 5.000000f, 4.242640f, 3.242640f, 2.242640f }, + { 2.080362f, 3.006572f, 3.828426f, 4.242640f, 3.828426f, 3.006572f, 2.080362f }, + { 1.637089f, 2.414213f, 3.006572f, 3.242640f, 3.006572f, 2.414213f, 1.637089f }, + { 1.000000f, 1.637089f, 2.080362f, 2.242640f, 2.080362f, 1.637089f, 1.000000f } + }; + + for (int y = 0; y < 7; y++) + { + const uint32_t py = wrap_y(by * 4 + y - 1); + for (uint32_t x = 0; x < 7; x++) + { + const uint32_t px = wrap_x(bx * 4 + x - 1); + + const color_rgba& orig_c = orig_img(px, py); + + vec3F color(orig_c[0], orig_c[1], orig_c[2]); + + uint32_t c = quant_colors[0].squared_distance(color) > quant_colors[1].squared_distance(color); + + const float weight = s_weights[y][x]; + new_colors[c] += color * weight; + + total_weight[c] += weight; + } + } + + if (!total_weight[0] || !total_weight[1]) + success = false; + + quant_colors[0] = new_colors[0] / (float)total_weight[0]; + quant_colors[1] = new_colors[1] / (float)total_weight[1]; + } + + if (!success) + { + quant_colors[0] = c_avg_orig; + quant_colors[1] = c_avg_orig; + } + + vec4F colors[2] = { quant_colors[0], quant_colors[1] }; + + colors[0] += vec3F(.5f); + colors[1] += vec3F(.5f); + color_rgba color_0((int)colors[0][0], (int)colors[0][1], (int)colors[0][2], 0); + color_rgba color_1((int)colors[1][0], (int)colors[1][1], (int)colors[1][2], 0); + + pvrtc4_block cur_blocks[3][3]; + + for (int y = -1; y <= 1; y++) + { + for (int x = -1; x <= 1; x++) + { + const uint32_t block_x = wrap_block_x(bx + x); + const uint32_t block_y = wrap_block_y(by + y); + cur_blocks[x + 1][y + 1] = m_blocks(block_x, block_y); + } + } + + color_rgba l1(0), h1(0); + + l1[0] = g_pvrtc_5_nearest[color_0[0]]; + h1[0] = g_pvrtc_5_nearest[color_1[0]]; + + l1[1] = g_pvrtc_5_nearest[color_0[1]]; + h1[1] = g_pvrtc_5_nearest[color_1[1]]; + + l1[2] = g_pvrtc_4_nearest[color_0[2]]; + h1[2] = g_pvrtc_5_nearest[color_0[2]]; + + l1[3] = 0; + h1[3] = 0; + + m_blocks(bx, by).set_endpoint_raw(0, l1, true); + m_blocks(bx, by).set_endpoint_raw(1, h1, true); + + uint64_t e03_err_0 = remap_pixels_influenced_by_endpoint(bx, by, orig_img, perceptual, false); + + pvrtc4_block blocks0[3][3]; + for (int y = -1; y <= 1; y++) + { + for (int x = -1; x <= 1; x++) + { + const uint32_t block_x = wrap_block_x(bx + x); + const uint32_t block_y = wrap_block_y(by + y); + blocks0[x + 1][y + 1] = m_blocks(block_x, block_y); + } + } + + l1[0] = g_pvrtc_5_nearest[color_1[0]]; + h1[0] = g_pvrtc_5_nearest[color_0[0]]; + + l1[1] = g_pvrtc_5_nearest[color_1[1]]; + h1[1] = g_pvrtc_5_nearest[color_0[1]]; + + l1[2] = g_pvrtc_4_nearest[color_1[2]]; + h1[2] = g_pvrtc_5_nearest[color_0[2]]; + + l1[3] = 0; + h1[3] = 0; + + m_blocks(bx, by).set_endpoint_raw(0, l1, true); + m_blocks(bx, by).set_endpoint_raw(1, h1, true); + + uint64_t e03_err_1 = remap_pixels_influenced_by_endpoint(bx, by, orig_img, perceptual, false); + + if (initial_error < basisu::minimum(e03_err_0, e03_err_1)) + { + for (int y = -1; y <= 1; y++) + { + for (int x = -1; x <= 1; x++) + { + const uint32_t block_x = wrap_block_x(bx + x); + const uint32_t block_y = wrap_block_y(by + y); + m_blocks(block_x, block_y) = cur_blocks[x + 1][y + 1]; + } + } + return initial_error; + } + else if (e03_err_0 < e03_err_1) + { + for (int y = -1; y <= 1; y++) + { + for (int x = -1; x <= 1; x++) + { + const uint32_t block_x = wrap_block_x(bx + x); + const uint32_t block_y = wrap_block_y(by + y); + m_blocks(block_x, block_y) = blocks0[x + 1][y + 1]; + } + } + assert(e03_err_0 == evaluate_1x1_endpoint_error(bx, by, orig_img, perceptual, false)); + return e03_err_0; + } + + assert(e03_err_1 == evaluate_1x1_endpoint_error(bx, by, orig_img, perceptual, false)); + return e03_err_1; + } + +} // basisu diff --git a/thirdparty/basisu/encoder/basisu_pvrtc1_4.h b/thirdparty/basisu/encoder/basisu_pvrtc1_4.h new file mode 100644 index 000000000..a9fe6b27a --- /dev/null +++ b/thirdparty/basisu/encoder/basisu_pvrtc1_4.h @@ -0,0 +1,468 @@ +// basisu_pvrtc1_4.cpp +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "basisu_gpu_texture.h" + +namespace basisu +{ + enum + { + PVRTC2_MIN_WIDTH = 16, + PVRTC2_MIN_HEIGHT = 8, + PVRTC4_MIN_WIDTH = 8, + PVRTC4_MIN_HEIGHT = 8 + }; + + struct pvrtc4_block + { + uint32_t m_modulation; + uint32_t m_endpoints; + + pvrtc4_block() : m_modulation(0), m_endpoints(0) { } + + inline bool operator== (const pvrtc4_block& rhs) const + { + return (m_modulation == rhs.m_modulation) && (m_endpoints == rhs.m_endpoints); + } + + inline void clear() + { + m_modulation = 0; + m_endpoints = 0; + } + + inline bool get_block_uses_transparent_modulation() const + { + return (m_endpoints & 1) != 0; + } + + inline bool is_endpoint_opaque(uint32_t endpoint_index) const + { + static const uint32_t s_bitmasks[2] = { 0x8000U, 0x80000000U }; + return (m_endpoints & s_bitmasks[open_range_check(endpoint_index, 2U)]) != 0; + } + + // Returns raw endpoint or 8888 + color_rgba get_endpoint(uint32_t endpoint_index, bool unpack) const; + + color_rgba get_endpoint_5554(uint32_t endpoint_index) const; + + static uint32_t get_component_precision_in_bits(uint32_t c, uint32_t endpoint_index, bool opaque_endpoint) + { + static const uint32_t s_comp_prec[4][4] = + { + // R0 G0 B0 A0 R1 G1 B1 A1 + { 4, 4, 3, 3 }, { 4, 4, 4, 3 }, // transparent endpoint + + { 5, 5, 4, 0 }, { 5, 5, 5, 0 } // opaque endpoint + }; + return s_comp_prec[open_range_check(endpoint_index, 2U) + (opaque_endpoint * 2)][open_range_check(c, 4U)]; + } + + static color_rgba get_color_precision_in_bits(uint32_t endpoint_index, bool opaque_endpoint) + { + static const color_rgba s_color_prec[4] = + { + color_rgba(4, 4, 3, 3), color_rgba(4, 4, 4, 3), // transparent endpoint + color_rgba(5, 5, 4, 0), color_rgba(5, 5, 5, 0) // opaque endpoint + }; + return s_color_prec[open_range_check(endpoint_index, 2U) + (opaque_endpoint * 2)]; + } + + inline uint32_t get_modulation(uint32_t x, uint32_t y) const + { + assert((x < 4) && (y < 4)); + return (m_modulation >> ((y * 4 + x) * 2)) & 3; + } + + inline void set_modulation(uint32_t x, uint32_t y, uint32_t s) + { + assert((x < 4) && (y < 4) && (s < 4)); + uint32_t n = (y * 4 + x) * 2; + m_modulation = (m_modulation & (~(3 << n))) | (s << n); + assert(get_modulation(x, y) == s); + } + + // Scaled by 8 + inline const uint32_t* get_scaled_modulation_values(bool block_uses_transparent_modulation) const + { + static const uint32_t s_block_scales[2][4] = { { 0, 3, 5, 8 }, { 0, 4, 4, 8 } }; + return s_block_scales[block_uses_transparent_modulation]; + } + + // Scaled by 8 + inline uint32_t get_scaled_modulation(uint32_t x, uint32_t y) const + { + return get_scaled_modulation_values(get_block_uses_transparent_modulation())[get_modulation(x, y)]; + } + + inline void byte_swap() + { + m_modulation = byteswap32(m_modulation); + m_endpoints = byteswap32(m_endpoints); + } + + // opaque endpoints: 554, 555 + // transparent endpoints: 3443, 3444 + inline void set_endpoint_raw(uint32_t endpoint_index, const color_rgba& c, bool opaque_endpoint) + { + assert(endpoint_index < 2); + const uint32_t m = m_endpoints & 1; + uint32_t r = c[0], g = c[1], b = c[2], a = c[3]; + + uint32_t packed; + + if (opaque_endpoint) + { + if (!endpoint_index) + { + // 554 + // 1RRRRRGGGGGBBBBM + assert((r < 32) && (g < 32) && (b < 16)); + packed = 0x8000 | (r << 10) | (g << 5) | (b << 1) | m; + } + else + { + // 555 + // 1RRRRRGGGGGBBBBB + assert((r < 32) && (g < 32) && (b < 32)); + packed = 0x8000 | (r << 10) | (g << 5) | b; + } + } + else + { + if (!endpoint_index) + { + // 3443 + // 0AAA RRRR GGGG BBBM + assert((r < 16) && (g < 16) && (b < 8) && (a < 8)); + packed = (a << 12) | (r << 8) | (g << 4) | (b << 1) | m; + } + else + { + // 3444 + // 0AAA RRRR GGGG BBBB + assert((r < 16) && (g < 16) && (b < 16) && (a < 8)); + packed = (a << 12) | (r << 8) | (g << 4) | b; + } + } + + assert(packed <= 0xFFFF); + + if (endpoint_index) + m_endpoints = (m_endpoints & 0xFFFFU) | (packed << 16); + else + m_endpoints = (m_endpoints & 0xFFFF0000U) | packed; + } + }; + + typedef vector2D pvrtc4_block_vector2D; + + uint32_t pvrtc4_swizzle_uv(uint32_t XSize, uint32_t YSize, uint32_t XPos, uint32_t YPos); + + class pvrtc4_image + { + public: + inline pvrtc4_image() : + m_width(0), m_height(0), m_block_width(0), m_block_height(0), m_uses_alpha(false) + { + } + + inline pvrtc4_image(uint32_t width, uint32_t height) : + m_width(0), m_height(0), m_block_width(0), m_block_height(0), m_uses_alpha(false) + { + resize(width, height); + } + + inline void clear() + { + m_width = 0; + m_height = 0; + m_block_width = 0; + m_block_height = 0; + m_blocks.clear(); + m_uses_alpha = false; + } + + inline void resize(uint32_t width, uint32_t height) + { + if ((width == m_width) && (height == m_height)) + return; + + m_width = width; + m_height = height; + + m_block_width = (width + 3) >> 2; + m_block_height = (height + 3) >> 2; + + m_blocks.resize(m_block_width, m_block_height); + } + + inline uint32_t get_width() const { return m_width; } + inline uint32_t get_height() const { return m_height; } + + inline uint32_t get_block_width() const { return m_block_width; } + inline uint32_t get_block_height() const { return m_block_height; } + + inline const pvrtc4_block_vector2D &get_blocks() const { return m_blocks; } + inline pvrtc4_block_vector2D &get_blocks() { return m_blocks; } + + inline uint32_t get_total_blocks() const { return m_block_width * m_block_height; } + + inline bool get_uses_alpha() const { return m_uses_alpha; } + inline void set_uses_alpha(bool uses_alpha) { m_uses_alpha = uses_alpha; } + + inline bool are_blocks_equal(const pvrtc4_image& rhs) const + { + return m_blocks == rhs.m_blocks; + } + + inline void set_to_black() + { +#ifndef __EMSCRIPTEN__ +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wclass-memaccess" +#endif +#endif + memset(m_blocks.get_ptr(), 0, m_blocks.size_in_bytes()); +#ifndef __EMSCRIPTEN__ +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif +#endif + } + + inline bool get_block_uses_transparent_modulation(uint32_t bx, uint32_t by) const + { + return m_blocks(bx, by).get_block_uses_transparent_modulation(); + } + + inline bool is_endpoint_opaque(uint32_t bx, uint32_t by, uint32_t endpoint_index) const + { + return m_blocks(bx, by).is_endpoint_opaque(endpoint_index); + } + + color_rgba get_endpoint(uint32_t bx, uint32_t by, uint32_t endpoint_index, bool unpack) const + { + assert((bx < m_block_width) && (by < m_block_height)); + return m_blocks(bx, by).get_endpoint(endpoint_index, unpack); + } + + inline uint32_t get_modulation(uint32_t x, uint32_t y) const + { + assert((x < m_width) && (y < m_height)); + return m_blocks(x >> 2, y >> 2).get_modulation(x & 3, y & 3); + } + + // Returns true if the block uses transparent modulation. + bool get_interpolated_colors(uint32_t x, uint32_t y, color_rgba* pColors) const; + + color_rgba get_pixel(uint32_t x, uint32_t y, uint32_t m) const; + + inline color_rgba get_pixel(uint32_t x, uint32_t y) const + { + assert((x < m_width) && (y < m_height)); + return get_pixel(x, y, m_blocks(x >> 2, y >> 2).get_modulation(x & 3, y & 3)); + } + + void deswizzle() + { + pvrtc4_block_vector2D temp(m_blocks); + + for (uint32_t y = 0; y < m_block_height; y++) + for (uint32_t x = 0; x < m_block_width; x++) + m_blocks(x, y) = temp[pvrtc4_swizzle_uv(m_block_width, m_block_height, x, y)]; + } + + void swizzle() + { + pvrtc4_block_vector2D temp(m_blocks); + + for (uint32_t y = 0; y < m_block_height; y++) + for (uint32_t x = 0; x < m_block_width; x++) + m_blocks[pvrtc4_swizzle_uv(m_block_width, m_block_height, x, y)] = temp(x, y); + } + + void unpack_all_pixels(image& img) const + { + img.crop(m_width, m_height); + + for (uint32_t y = 0; y < m_height; y++) + for (uint32_t x = 0; x < m_width; x++) + img(x, y) = get_pixel(x, y); + } + + void unpack_block(image &dst, uint32_t block_x, uint32_t block_y) + { + for (uint32_t y = 0; y < 4; y++) + for (uint32_t x = 0; x < 4; x++) + dst(x, y) = get_pixel(block_x * 4 + x, block_y * 4 + y); + } + + inline int wrap_x(int x) const + { + return posmod(x, m_width); + } + + inline int wrap_y(int y) const + { + return posmod(y, m_height); + } + + inline int wrap_block_x(int bx) const + { + return posmod(bx, m_block_width); + } + + inline int wrap_block_y(int by) const + { + return posmod(by, m_block_height); + } + + inline vec2F get_interpolation_factors(uint32_t x, uint32_t y) const + { + // 0 1 2 3 + // 2 3 0 1 + // .5 .75 0 .25 + static const float s_interp[4] = { 2, 3, 0, 1 }; + return vec2F(s_interp[x & 3], s_interp[y & 3]); + } + + inline color_rgba interpolate(int x, int y, + const color_rgba& p, const color_rgba& q, + const color_rgba& r, const color_rgba& s) const + { + static const int s_interp[4] = { 2, 3, 0, 1 }; + const int u_interp = s_interp[x & 3]; + const int v_interp = s_interp[y & 3]; + + color_rgba result; + + for (uint32_t c = 0; c < 4; c++) + { + int t = p[c] * 4 + u_interp * ((int)q[c] - (int)p[c]); + int b = r[c] * 4 + u_interp * ((int)s[c] - (int)r[c]); + int v = t * 4 + v_interp * (b - t); + if (c < 3) + { + v >>= 1; + v += (v >> 5); + } + else + { + v += (v >> 4); + } + assert((v >= 0) && (v < 256)); + result[c] = static_cast(v); + } + + return result; + } + + inline void set_modulation(uint32_t x, uint32_t y, uint32_t s) + { + assert((x < m_width) && (y < m_height)); + return m_blocks(x >> 2, y >> 2).set_modulation(x & 3, y & 3, s); + } + + inline uint64_t map_pixel(uint32_t x, uint32_t y, const color_rgba& c, bool perceptual, bool alpha_is_significant, bool record = true) + { + color_rgba v[4]; + get_interpolated_colors(x, y, v); + + uint64_t best_dist = color_distance(perceptual, c, v[0], alpha_is_significant); + uint32_t best_v = 0; + for (uint32_t i = 1; i < 4; i++) + { + uint64_t dist = color_distance(perceptual, c, v[i], alpha_is_significant); + if (dist < best_dist) + { + best_dist = dist; + best_v = i; + } + } + + if (record) + set_modulation(x, y, best_v); + + return best_dist; + } + + inline uint64_t remap_pixels_influenced_by_endpoint(uint32_t bx, uint32_t by, const image& orig_img, bool perceptual, bool alpha_is_significant) + { + uint64_t total_error = 0; + + for (int yd = -3; yd <= 3; yd++) + { + const int y = wrap_y((int)by * 4 + 2 + yd); + + for (int xd = -3; xd <= 3; xd++) + { + const int x = wrap_x((int)bx * 4 + 2 + xd); + + total_error += map_pixel(x, y, orig_img(x, y), perceptual, alpha_is_significant); + } + } + + return total_error; + } + + inline uint64_t evaluate_1x1_endpoint_error(uint32_t bx, uint32_t by, const image& orig_img, bool perceptual, bool alpha_is_significant, uint64_t threshold_error = 0) const + { + uint64_t total_error = 0; + + for (int yd = -3; yd <= 3; yd++) + { + const int y = wrap_y((int)by * 4 + 2 + yd); + + for (int xd = -3; xd <= 3; xd++) + { + const int x = wrap_x((int)bx * 4 + 2 + xd); + + total_error += color_distance(perceptual, get_pixel(x, y), orig_img(x, y), alpha_is_significant); + + if ((threshold_error) && (total_error >= threshold_error)) + return total_error; + } + } + + return total_error; + } + + uint64_t local_endpoint_optimization_opaque(uint32_t bx, uint32_t by, const image& orig_img, bool perceptual); + + inline uint64_t map_all_pixels(const image& img, bool perceptual, bool alpha_is_significant) + { + assert(m_width == img.get_width()); + assert(m_height == img.get_height()); + + uint64_t total_error = 0; + for (uint32_t y = 0; y < img.get_height(); y++) + for (uint32_t x = 0; x < img.get_width(); x++) + total_error += map_pixel(x, y, img(x, y), perceptual, alpha_is_significant); + + return total_error; + } + + public: + uint32_t m_width, m_height; + pvrtc4_block_vector2D m_blocks; + uint32_t m_block_width, m_block_height; + + bool m_uses_alpha; + }; + +} // namespace basisu diff --git a/thirdparty/basisu/encoder/basisu_resample_filters.cpp b/thirdparty/basisu/encoder/basisu_resample_filters.cpp new file mode 100644 index 000000000..11c7ec2f6 --- /dev/null +++ b/thirdparty/basisu/encoder/basisu_resample_filters.cpp @@ -0,0 +1,336 @@ +// basisu_resampler_filters.cpp +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "basisu_resampler_filters.h" + +#ifndef M_PI + #define M_PI 3.14159265358979323846 +#endif + +namespace basisu +{ + float box_filter(float t) /* pulse/Fourier window */ + { + // make_clist() calls the filter function with t inverted (pos = left, neg = right) + if ((t >= -0.5f) && (t < 0.5f)) + return 1.0f; + else + return 0.0f; + } + + float tent_filter(float t) /* box (*) box, bilinear/triangle */ + { + if (t < 0.0f) + t = -t; + + if (t < 1.0f) + return 1.0f - t; + else + return 0.0f; + } + + float bell_filter(float t) /* box (*) box (*) box */ + { + if (t < 0.0f) + t = -t; + + if (t < .5f) + return (.75f - (t * t)); + + if (t < 1.5f) + { + t = (t - 1.5f); + return (.5f * (t * t)); + } + + return (0.0f); + } + +#define B_SPLINE_SUPPORT (2.0f) + static float B_spline_filter(float t) /* box (*) box (*) box (*) box */ + { + float tt; + + if (t < 0.0f) + t = -t; + + if (t < 1.0f) + { + tt = t * t; + return ((.5f * tt * t) - tt + (2.0f / 3.0f)); + } + else if (t < 2.0f) + { + t = 2.0f - t; + return ((1.0f / 6.0f) * (t * t * t)); + } + + return (0.0f); + } + + // Dodgson, N., "Quadratic Interpolation for Image Resampling" +#define QUADRATIC_SUPPORT 1.5f + static float quadratic(float t, const float R) + { + if (t < 0.0f) + t = -t; + if (t < QUADRATIC_SUPPORT) + { + float tt = t * t; + if (t <= .5f) + return (-2.0f * R) * tt + .5f * (R + 1.0f); + else + return (R * tt) + (-2.0f * R - .5f) * t + (3.0f / 4.0f) * (R + 1.0f); + } + else + return 0.0f; + } + + static float quadratic_interp_filter(float t) + { + return quadratic(t, 1.0f); + } + + static float quadratic_approx_filter(float t) + { + return quadratic(t, .5f); + } + + static float quadratic_mix_filter(float t) + { + return quadratic(t, .8f); + } + + // Mitchell, D. and A. Netravali, "Reconstruction Filters in Computer Graphics." + // Computer Graphics, Vol. 22, No. 4, pp. 221-228. + // (B, C) + // (1/3, 1/3) - Defaults recommended by Mitchell and Netravali + // (1, 0) - Equivalent to the Cubic B-Spline + // (0, 0.5) - Equivalent to the Catmull-Rom Spline + // (0, C) - The family of Cardinal Cubic Splines + // (B, 0) - Duff's tensioned B-Splines. + static float mitchell(float t, const float B, const float C) + { + float tt; + + tt = t * t; + + if (t < 0.0f) + t = -t; + + if (t < 1.0f) + { + t = (((12.0f - 9.0f * B - 6.0f * C) * (t * tt)) + ((-18.0f + 12.0f * B + 6.0f * C) * tt) + (6.0f - 2.0f * B)); + + return (t / 6.0f); + } + else if (t < 2.0f) + { + t = (((-1.0f * B - 6.0f * C) * (t * tt)) + ((6.0f * B + 30.0f * C) * tt) + ((-12.0f * B - 48.0f * C) * t) + (8.0f * B + 24.0f * C)); + + return (t / 6.0f); + } + + return (0.0f); + } + +#define MITCHELL_SUPPORT (2.0f) + static float mitchell_filter(float t) + { + return mitchell(t, 1.0f / 3.0f, 1.0f / 3.0f); + } + +#define CATMULL_ROM_SUPPORT (2.0f) + static float catmull_rom_filter(float t) + { + return mitchell(t, 0.0f, .5f); + } + + static double sinc(double x) + { + x = (x * M_PI); + + if ((x < 0.01f) && (x > -0.01f)) + return 1.0f + x * x * (-1.0f / 6.0f + x * x * 1.0f / 120.0f); + + return sin(x) / x; + } + + static float clean(double t) + { + const float EPSILON = .0000125f; + if (fabs(t) < EPSILON) + return 0.0f; + return (float)t; + } + + //static double blackman_window(double x) + //{ + // return .42f + .50f * cos(M_PI*x) + .08f * cos(2.0f*M_PI*x); + //} + + static double blackman_exact_window(double x) + { + return 0.42659071f + 0.49656062f * cos(M_PI * x) + 0.07684867f * cos(2.0f * M_PI * x); + } + +#define BLACKMAN_SUPPORT (3.0f) + static float blackman_filter(float t) + { + if (t < 0.0f) + t = -t; + + if (t < 3.0f) + //return clean(sinc(t) * blackman_window(t / 3.0f)); + return clean(sinc(t) * blackman_exact_window(t / 3.0f)); + else + return (0.0f); + } + + float gaussian_filter(float t) // with blackman window + { + if (t < 0) + t = -t; + if (t < BASISU_GAUSSIAN_FILTER_SUPPORT) + return clean(exp(-2.0f * t * t) * sqrt(2.0f / M_PI) * blackman_exact_window(t / BASISU_GAUSSIAN_FILTER_SUPPORT)); + else + return 0.0f; + } + + // Windowed sinc -- see "Jimm Blinn's Corner: Dirty Pixels" pg. 26. +#define LANCZOS3_SUPPORT (3.0f) + static float lanczos3_filter(float t) + { + if (t < 0.0f) + t = -t; + + if (t < 3.0f) + return clean(sinc(t) * sinc(t / 3.0f)); + else + return (0.0f); + } + +#define LANCZOS4_SUPPORT (4.0f) + static float lanczos4_filter(float t) + { + if (t < 0.0f) + t = -t; + + if (t < 4.0f) + return clean(sinc(t) * sinc(t / 4.0f)); + else + return (0.0f); + } + +#define LANCZOS6_SUPPORT (6.0f) + static float lanczos6_filter(float t) + { + if (t < 0.0f) + t = -t; + + if (t < 6.0f) + return clean(sinc(t) * sinc(t / 6.0f)); + else + return (0.0f); + } + +#define LANCZOS12_SUPPORT (12.0f) + static float lanczos12_filter(float t) + { + if (t < 0.0f) + t = -t; + + if (t < 12.0f) + return clean(sinc(t) * sinc(t / 12.0f)); + else + return (0.0f); + } + + static double bessel0(double x) + { + const double EPSILON_RATIO = 1E-16; + double xh, sum, pow, ds; + int k; + + xh = 0.5 * x; + sum = 1.0; + pow = 1.0; + k = 0; + ds = 1.0; + while (ds > sum * EPSILON_RATIO) // FIXME: Shouldn't this stop after X iterations for max. safety? + { + ++k; + pow = pow * (xh / k); + ds = pow * pow; + sum = sum + ds; + } + + return sum; + } + + //static const float KAISER_ALPHA = 4.0; + static double kaiser(double alpha, double half_width, double x) + { + const double ratio = (x / half_width); + return bessel0(alpha * sqrt(1 - ratio * ratio)) / bessel0(alpha); + } + +#define KAISER_SUPPORT 3 + static float kaiser_filter(float t) + { + if (t < 0.0f) + t = -t; + + if (t < KAISER_SUPPORT) + { + // db atten + const float att = 40.0f; + const float alpha = (float)(exp(log((double)0.58417 * (att - 20.96)) * 0.4) + 0.07886 * (att - 20.96)); + //const float alpha = KAISER_ALPHA; + return (float)clean(sinc(t) * kaiser(alpha, KAISER_SUPPORT, t)); + } + + return 0.0f; + } + + const resample_filter g_resample_filters[] = + { + { "box", box_filter, BASISU_BOX_FILTER_SUPPORT }, + { "tent", tent_filter, BASISU_TENT_FILTER_SUPPORT }, + { "bell", bell_filter, BASISU_BELL_FILTER_SUPPORT }, + { "b-spline", B_spline_filter, B_SPLINE_SUPPORT }, + { "mitchell", mitchell_filter, MITCHELL_SUPPORT }, + { "blackman", blackman_filter, BLACKMAN_SUPPORT }, + { "lanczos3", lanczos3_filter, LANCZOS3_SUPPORT }, + { "lanczos4", lanczos4_filter, LANCZOS4_SUPPORT }, + { "lanczos6", lanczos6_filter, LANCZOS6_SUPPORT }, + { "lanczos12", lanczos12_filter, LANCZOS12_SUPPORT }, + { "kaiser", kaiser_filter, KAISER_SUPPORT }, + { "gaussian", gaussian_filter, BASISU_GAUSSIAN_FILTER_SUPPORT }, + { "catmullrom", catmull_rom_filter, CATMULL_ROM_SUPPORT }, + { "quadratic_interp", quadratic_interp_filter, QUADRATIC_SUPPORT }, + { "quadratic_approx", quadratic_approx_filter, QUADRATIC_SUPPORT }, + { "quadratic_mix", quadratic_mix_filter, QUADRATIC_SUPPORT }, + }; + + const int g_num_resample_filters = BASISU_ARRAY_SIZE(g_resample_filters); + + int find_resample_filter(const char *pName) + { + for (int i = 0; i < g_num_resample_filters; i++) + if (strcmp(pName, g_resample_filters[i].name) == 0) + return i; + return -1; + } +} // namespace basisu diff --git a/thirdparty/basisu/encoder/basisu_resampler.cpp b/thirdparty/basisu/encoder/basisu_resampler.cpp new file mode 100644 index 000000000..fa0629852 --- /dev/null +++ b/thirdparty/basisu/encoder/basisu_resampler.cpp @@ -0,0 +1,844 @@ +// basisu_resampler.cpp +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "basisu_resampler.h" +#include "basisu_resampler_filters.h" + +#define RESAMPLER_DEBUG 0 + +namespace basisu +{ + static inline int resampler_range_check(int v, int h) + { + BASISU_NOTE_UNUSED(h); + assert((v >= 0) && (v < h)); + return v; + } + + // Float to int cast with truncation. + static inline int cast_to_int(Resample_Real i) + { + return (int)i; + } + + // Ensure that the contributing source sample is within bounds. If not, reflect, clamp, or wrap. + int Resampler::reflect(const int j, const int src_x, const Boundary_Op boundary_op) + { + int n; + + if (j < 0) + { + if (boundary_op == BOUNDARY_REFLECT) + { + n = -j; + + if (n >= src_x) + n = src_x - 1; + } + else if (boundary_op == BOUNDARY_WRAP) + n = posmod(j, src_x); + else + n = 0; + } + else if (j >= src_x) + { + if (boundary_op == BOUNDARY_REFLECT) + { + n = (src_x - j) + (src_x - 1); + + if (n < 0) + n = 0; + } + else if (boundary_op == BOUNDARY_WRAP) + n = posmod(j, src_x); + else + n = src_x - 1; + } + else + n = j; + + return n; + } + + // The make_clist() method generates, for all destination samples, + // the list of all source samples with non-zero weighted contributions. + Resampler::Contrib_List * Resampler::make_clist( + int src_x, int dst_x, Boundary_Op boundary_op, + Resample_Real(*Pfilter)(Resample_Real), + Resample_Real filter_support, + Resample_Real filter_scale, + Resample_Real src_ofs) + { + struct Contrib_Bounds + { + // The center of the range in DISCRETE coordinates (pixel center = 0.0f). + Resample_Real center; + int left, right; + }; + + int i, j, k, n, left, right; + Resample_Real total_weight; + Resample_Real xscale, center, half_width, weight; + Contrib_List* Pcontrib; + Contrib* Pcpool; + Contrib* Pcpool_next; + Contrib_Bounds* Pcontrib_bounds; + + if ((Pcontrib = (Contrib_List*)calloc(dst_x, sizeof(Contrib_List))) == NULL) + return NULL; + + Pcontrib_bounds = (Contrib_Bounds*)calloc(dst_x, sizeof(Contrib_Bounds)); + if (!Pcontrib_bounds) + { + free(Pcontrib); + return (NULL); + } + + const Resample_Real oo_filter_scale = 1.0f / filter_scale; + + const Resample_Real NUDGE = 0.5f; + xscale = dst_x / (Resample_Real)src_x; + + if (xscale < 1.0f) + { + int total; + (void)total; + + // Handle case when there are fewer destination samples than source samples (downsampling/minification). + + // stretched half width of filter + half_width = (filter_support / xscale) * filter_scale; + + // Find the range of source sample(s) that will contribute to each destination sample. + + for (i = 0, n = 0; i < dst_x; i++) + { + // Convert from discrete to continuous coordinates, scale, then convert back to discrete. + center = ((Resample_Real)i + NUDGE) / xscale; + center -= NUDGE; + center += src_ofs; + + left = cast_to_int((Resample_Real)floor(center - half_width)); + right = cast_to_int((Resample_Real)ceil(center + half_width)); + + Pcontrib_bounds[i].center = center; + Pcontrib_bounds[i].left = left; + Pcontrib_bounds[i].right = right; + + n += (right - left + 1); + } + + // Allocate memory for contributors. + + if ((n == 0) || ((Pcpool = (Contrib*)calloc(n, sizeof(Contrib))) == NULL)) + { + free(Pcontrib); + free(Pcontrib_bounds); + return NULL; + } + total = n; + + Pcpool_next = Pcpool; + + // Create the list of source samples which contribute to each destination sample. + + for (i = 0; i < dst_x; i++) + { + int max_k = -1; + Resample_Real max_w = -1e+20f; + + center = Pcontrib_bounds[i].center; + left = Pcontrib_bounds[i].left; + right = Pcontrib_bounds[i].right; + + Pcontrib[i].n = 0; + Pcontrib[i].p = Pcpool_next; + Pcpool_next += (right - left + 1); + assert((Pcpool_next - Pcpool) <= total); + + total_weight = 0; + + for (j = left; j <= right; j++) + total_weight += (*Pfilter)((center - (Resample_Real)j) * xscale * oo_filter_scale); + const Resample_Real norm = static_cast(1.0f / total_weight); + + total_weight = 0; + +#if RESAMPLER_DEBUG + printf("%i: ", i); +#endif + + for (j = left; j <= right; j++) + { + weight = (*Pfilter)((center - (Resample_Real)j) * xscale * oo_filter_scale) * norm; + if (weight == 0.0f) + continue; + + n = reflect(j, src_x, boundary_op); + +#if RESAMPLER_DEBUG + printf("%i(%f), ", n, weight); +#endif + + // Increment the number of source samples which contribute to the current destination sample. + + k = Pcontrib[i].n++; + + Pcontrib[i].p[k].pixel = (unsigned short)n; /* store src sample number */ + Pcontrib[i].p[k].weight = weight; /* store src sample weight */ + + total_weight += weight; /* total weight of all contributors */ + + if (weight > max_w) + { + max_w = weight; + max_k = k; + } + } + +#if RESAMPLER_DEBUG + printf("\n\n"); +#endif + + //assert(Pcontrib[i].n); + //assert(max_k != -1); + if ((max_k == -1) || (Pcontrib[i].n == 0)) + { + free(Pcpool); + free(Pcontrib); + free(Pcontrib_bounds); + return NULL; + } + + if (total_weight != 1.0f) + Pcontrib[i].p[max_k].weight += 1.0f - total_weight; + } + } + else + { + // Handle case when there are more destination samples than source samples (upsampling). + + half_width = filter_support * filter_scale; + + // Find the source sample(s) that contribute to each destination sample. + + for (i = 0, n = 0; i < dst_x; i++) + { + // Convert from discrete to continuous coordinates, scale, then convert back to discrete. + center = ((Resample_Real)i + NUDGE) / xscale; + center -= NUDGE; + center += src_ofs; + + left = cast_to_int((Resample_Real)floor(center - half_width)); + right = cast_to_int((Resample_Real)ceil(center + half_width)); + + Pcontrib_bounds[i].center = center; + Pcontrib_bounds[i].left = left; + Pcontrib_bounds[i].right = right; + + n += (right - left + 1); + } + + /* Allocate memory for contributors. */ + + int total = n; + if ((total == 0) || ((Pcpool = (Contrib*)calloc(total, sizeof(Contrib))) == NULL)) + { + free(Pcontrib); + free(Pcontrib_bounds); + return NULL; + } + + Pcpool_next = Pcpool; + + // Create the list of source samples which contribute to each destination sample. + + for (i = 0; i < dst_x; i++) + { + int max_k = -1; + Resample_Real max_w = -1e+20f; + + center = Pcontrib_bounds[i].center; + left = Pcontrib_bounds[i].left; + right = Pcontrib_bounds[i].right; + + Pcontrib[i].n = 0; + Pcontrib[i].p = Pcpool_next; + Pcpool_next += (right - left + 1); + assert((Pcpool_next - Pcpool) <= total); + + total_weight = 0; + for (j = left; j <= right; j++) + total_weight += (*Pfilter)((center - (Resample_Real)j) * oo_filter_scale); + + const Resample_Real norm = static_cast(1.0f / total_weight); + + total_weight = 0; + +#if RESAMPLER_DEBUG + printf("%i: ", i); +#endif + + for (j = left; j <= right; j++) + { + weight = (*Pfilter)((center - (Resample_Real)j) * oo_filter_scale) * norm; + if (weight == 0.0f) + continue; + + n = reflect(j, src_x, boundary_op); + +#if RESAMPLER_DEBUG + printf("%i(%f), ", n, weight); +#endif + + // Increment the number of source samples which contribute to the current destination sample. + + k = Pcontrib[i].n++; + + Pcontrib[i].p[k].pixel = (unsigned short)n; /* store src sample number */ + Pcontrib[i].p[k].weight = weight; /* store src sample weight */ + + total_weight += weight; /* total weight of all contributors */ + + if (weight > max_w) + { + max_w = weight; + max_k = k; + } + } + +#if RESAMPLER_DEBUG + printf("\n\n"); +#endif + + //assert(Pcontrib[i].n); + //assert(max_k != -1); + + if ((max_k == -1) || (Pcontrib[i].n == 0)) + { + free(Pcpool); + free(Pcontrib); + free(Pcontrib_bounds); + return NULL; + } + + if (total_weight != 1.0f) + Pcontrib[i].p[max_k].weight += 1.0f - total_weight; + } + } + +#if RESAMPLER_DEBUG + printf("*******\n"); +#endif + + free(Pcontrib_bounds); + + return Pcontrib; + } + + void Resampler::resample_x(Sample * Pdst, const Sample * Psrc) + { + assert(Pdst); + assert(Psrc); + + int i, j; + Sample total; + Contrib_List* Pclist = m_Pclist_x; + Contrib* p; + + for (i = m_resample_dst_x; i > 0; i--, Pclist++) + { +#if BASISU_RESAMPLER_DEBUG_OPS + total_ops += Pclist->n; +#endif + + for (j = Pclist->n, p = Pclist->p, total = 0; j > 0; j--, p++) + total += Psrc[p->pixel] * p->weight; + + *Pdst++ = total; + } + } + + void Resampler::scale_y_mov(Sample * Ptmp, const Sample * Psrc, Resample_Real weight, int dst_x) + { + int i; + +#if BASISU_RESAMPLER_DEBUG_OPS + total_ops += dst_x; +#endif + + // Not += because temp buf wasn't cleared. + for (i = dst_x; i > 0; i--) + * Ptmp++ = *Psrc++ * weight; + } + + void Resampler::scale_y_add(Sample * Ptmp, const Sample * Psrc, Resample_Real weight, int dst_x) + { +#if BASISU_RESAMPLER_DEBUG_OPS + total_ops += dst_x; +#endif + + for (int i = dst_x; i > 0; i--) + (*Ptmp++) += *Psrc++ * weight; + } + + void Resampler::clamp(Sample * Pdst, int n) + { + while (n > 0) + { + Sample x = *Pdst; + *Pdst++ = clamp_sample(x); + n--; + } + } + + void Resampler::resample_y(Sample * Pdst) + { + int i, j; + Sample* Psrc; + Contrib_List* Pclist = &m_Pclist_y[m_cur_dst_y]; + + Sample* Ptmp = m_delay_x_resample ? m_Ptmp_buf : Pdst; + assert(Ptmp); + + /* Process each contributor. */ + + for (i = 0; i < Pclist->n; i++) + { + // locate the contributor's location in the scan buffer -- the contributor must always be found! + for (j = 0; j < MAX_SCAN_BUF_SIZE; j++) + if (m_Pscan_buf->scan_buf_y[j] == Pclist->p[i].pixel) + break; + + assert(j < MAX_SCAN_BUF_SIZE); + + Psrc = m_Pscan_buf->scan_buf_l[j]; + + if (!i) + scale_y_mov(Ptmp, Psrc, Pclist->p[i].weight, m_intermediate_x); + else + scale_y_add(Ptmp, Psrc, Pclist->p[i].weight, m_intermediate_x); + + /* If this source line doesn't contribute to any + * more destination lines then mark the scanline buffer slot + * which holds this source line as free. + * (The max. number of slots used depends on the Y + * axis sampling factor and the scaled filter width.) + */ + + if (--m_Psrc_y_count[resampler_range_check(Pclist->p[i].pixel, m_resample_src_y)] == 0) + { + m_Psrc_y_flag[resampler_range_check(Pclist->p[i].pixel, m_resample_src_y)] = false; + m_Pscan_buf->scan_buf_y[j] = -1; + } + } + + /* Now generate the destination line */ + + if (m_delay_x_resample) // Was X resampling delayed until after Y resampling? + { + assert(Pdst != Ptmp); + resample_x(Pdst, Ptmp); + } + else + { + assert(Pdst == Ptmp); + } + + if (m_lo < m_hi) + clamp(Pdst, m_resample_dst_x); + } + + bool Resampler::put_line(const Sample * Psrc) + { + int i; + + if (m_cur_src_y >= m_resample_src_y) + return false; + + /* Does this source line contribute + * to any destination line? if not, + * exit now. + */ + + if (!m_Psrc_y_count[resampler_range_check(m_cur_src_y, m_resample_src_y)]) + { + m_cur_src_y++; + return true; + } + + /* Find an empty slot in the scanline buffer. (FIXME: Perf. is terrible here with extreme scaling ratios.) */ + + for (i = 0; i < MAX_SCAN_BUF_SIZE; i++) + if (m_Pscan_buf->scan_buf_y[i] == -1) + break; + + /* If the buffer is full, exit with an error. */ + + if (i == MAX_SCAN_BUF_SIZE) + { + m_status = STATUS_SCAN_BUFFER_FULL; + return false; + } + + m_Psrc_y_flag[resampler_range_check(m_cur_src_y, m_resample_src_y)] = true; + m_Pscan_buf->scan_buf_y[i] = m_cur_src_y; + + /* Does this slot have any memory allocated to it? */ + + if (!m_Pscan_buf->scan_buf_l[i]) + { + if ((m_Pscan_buf->scan_buf_l[i] = (Sample*)malloc(m_intermediate_x * sizeof(Sample))) == NULL) + { + m_status = STATUS_OUT_OF_MEMORY; + return false; + } + } + + // Resampling on the X axis first? + if (m_delay_x_resample) + { + assert(m_intermediate_x == m_resample_src_x); + + // Y-X resampling order + memcpy(m_Pscan_buf->scan_buf_l[i], Psrc, m_intermediate_x * sizeof(Sample)); + } + else + { + assert(m_intermediate_x == m_resample_dst_x); + + // X-Y resampling order + resample_x(m_Pscan_buf->scan_buf_l[i], Psrc); + } + + m_cur_src_y++; + + return true; + } + + const Resampler::Sample* Resampler::get_line() + { + int i; + + /* If all the destination lines have been + * generated, then always return NULL. + */ + + if (m_cur_dst_y == m_resample_dst_y) + return NULL; + + /* Check to see if all the required + * contributors are present, if not, + * return NULL. + */ + + for (i = 0; i < m_Pclist_y[m_cur_dst_y].n; i++) + if (!m_Psrc_y_flag[resampler_range_check(m_Pclist_y[m_cur_dst_y].p[i].pixel, m_resample_src_y)]) + return NULL; + + resample_y(m_Pdst_buf); + + m_cur_dst_y++; + + return m_Pdst_buf; + } + + Resampler::~Resampler() + { + int i; + +#if BASISU_RESAMPLER_DEBUG_OPS + printf("actual ops: %i\n", total_ops); +#endif + + free(m_Pdst_buf); + m_Pdst_buf = NULL; + + if (m_Ptmp_buf) + { + free(m_Ptmp_buf); + m_Ptmp_buf = NULL; + } + + /* Don't deallocate a contibutor list + * if the user passed us one of their own. + */ + + if ((m_Pclist_x) && (!m_clist_x_forced)) + { + free(m_Pclist_x->p); + free(m_Pclist_x); + m_Pclist_x = NULL; + } + + if ((m_Pclist_y) && (!m_clist_y_forced)) + { + free(m_Pclist_y->p); + free(m_Pclist_y); + m_Pclist_y = NULL; + } + + free(m_Psrc_y_count); + m_Psrc_y_count = NULL; + + free(m_Psrc_y_flag); + m_Psrc_y_flag = NULL; + + if (m_Pscan_buf) + { + for (i = 0; i < MAX_SCAN_BUF_SIZE; i++) + free(m_Pscan_buf->scan_buf_l[i]); + + free(m_Pscan_buf); + m_Pscan_buf = NULL; + } + } + + void Resampler::restart() + { + if (STATUS_OKAY != m_status) + return; + + m_cur_src_y = m_cur_dst_y = 0; + + int i, j; + for (i = 0; i < m_resample_src_y; i++) + { + m_Psrc_y_count[i] = 0; + m_Psrc_y_flag[i] = false; + } + + for (i = 0; i < m_resample_dst_y; i++) + { + for (j = 0; j < m_Pclist_y[i].n; j++) + m_Psrc_y_count[resampler_range_check(m_Pclist_y[i].p[j].pixel, m_resample_src_y)]++; + } + + for (i = 0; i < MAX_SCAN_BUF_SIZE; i++) + { + m_Pscan_buf->scan_buf_y[i] = -1; + + free(m_Pscan_buf->scan_buf_l[i]); + m_Pscan_buf->scan_buf_l[i] = NULL; + } + } + + Resampler::Resampler(int src_x, int src_y, + int dst_x, int dst_y, + Boundary_Op boundary_op, + Resample_Real sample_low, Resample_Real sample_high, + const char* Pfilter_name, + Contrib_List * Pclist_x, + Contrib_List * Pclist_y, + Resample_Real filter_x_scale, + Resample_Real filter_y_scale, + Resample_Real src_x_ofs, + Resample_Real src_y_ofs) + { + int i, j; + Resample_Real support, (*func)(Resample_Real); + + assert(src_x > 0); + assert(src_y > 0); + assert(dst_x > 0); + assert(dst_y > 0); + +#if BASISU_RESAMPLER_DEBUG_OPS + total_ops = 0; +#endif + + m_lo = sample_low; + m_hi = sample_high; + + m_delay_x_resample = false; + m_intermediate_x = 0; + m_Pdst_buf = NULL; + m_Ptmp_buf = NULL; + m_clist_x_forced = false; + m_Pclist_x = NULL; + m_clist_y_forced = false; + m_Pclist_y = NULL; + m_Psrc_y_count = NULL; + m_Psrc_y_flag = NULL; + m_Pscan_buf = NULL; + m_status = STATUS_OKAY; + + m_resample_src_x = src_x; + m_resample_src_y = src_y; + m_resample_dst_x = dst_x; + m_resample_dst_y = dst_y; + + m_boundary_op = boundary_op; + + if ((m_Pdst_buf = (Sample*)malloc(m_resample_dst_x * sizeof(Sample))) == NULL) + { + m_status = STATUS_OUT_OF_MEMORY; + return; + } + + // Find the specified filter. + + if (Pfilter_name == NULL) + Pfilter_name = BASISU_RESAMPLER_DEFAULT_FILTER; + + for (i = 0; i < g_num_resample_filters; i++) + if (strcmp(Pfilter_name, g_resample_filters[i].name) == 0) + break; + + if (i == g_num_resample_filters) + { + m_status = STATUS_BAD_FILTER_NAME; + return; + } + + func = g_resample_filters[i].func; + support = g_resample_filters[i].support; + + /* Create contributor lists, unless the user supplied custom lists. */ + + if (!Pclist_x) + { + m_Pclist_x = make_clist(m_resample_src_x, m_resample_dst_x, m_boundary_op, func, support, filter_x_scale, src_x_ofs); + if (!m_Pclist_x) + { + m_status = STATUS_OUT_OF_MEMORY; + return; + } + } + else + { + m_Pclist_x = Pclist_x; + m_clist_x_forced = true; + } + + if (!Pclist_y) + { + m_Pclist_y = make_clist(m_resample_src_y, m_resample_dst_y, m_boundary_op, func, support, filter_y_scale, src_y_ofs); + if (!m_Pclist_y) + { + m_status = STATUS_OUT_OF_MEMORY; + return; + } + } + else + { + m_Pclist_y = Pclist_y; + m_clist_y_forced = true; + } + + if ((m_Psrc_y_count = (int*)calloc(m_resample_src_y, sizeof(int))) == NULL) + { + m_status = STATUS_OUT_OF_MEMORY; + return; + } + + if ((m_Psrc_y_flag = (unsigned char*)calloc(m_resample_src_y, sizeof(unsigned char))) == NULL) + { + m_status = STATUS_OUT_OF_MEMORY; + return; + } + + // Count how many times each source line contributes to a destination line. + + for (i = 0; i < m_resample_dst_y; i++) + for (j = 0; j < m_Pclist_y[i].n; j++) + m_Psrc_y_count[resampler_range_check(m_Pclist_y[i].p[j].pixel, m_resample_src_y)]++; + + if ((m_Pscan_buf = (Scan_Buf*)malloc(sizeof(Scan_Buf))) == NULL) + { + m_status = STATUS_OUT_OF_MEMORY; + return; + } + + for (i = 0; i < MAX_SCAN_BUF_SIZE; i++) + { + m_Pscan_buf->scan_buf_y[i] = -1; + m_Pscan_buf->scan_buf_l[i] = NULL; + } + + m_cur_src_y = m_cur_dst_y = 0; + { + // Determine which axis to resample first by comparing the number of multiplies required + // for each possibility. + int x_ops = count_ops(m_Pclist_x, m_resample_dst_x); + int y_ops = count_ops(m_Pclist_y, m_resample_dst_y); + + // Hack 10/2000: Weight Y axis ops a little more than X axis ops. + // (Y axis ops use more cache resources.) + int xy_ops = x_ops * m_resample_src_y + + (4 * y_ops * m_resample_dst_x) / 3; + + int yx_ops = (4 * y_ops * m_resample_src_x) / 3 + + x_ops * m_resample_dst_y; + +#if BASISU_RESAMPLER_DEBUG_OPS + printf("src: %i %i\n", m_resample_src_x, m_resample_src_y); + printf("dst: %i %i\n", m_resample_dst_x, m_resample_dst_y); + printf("x_ops: %i\n", x_ops); + printf("y_ops: %i\n", y_ops); + printf("xy_ops: %i\n", xy_ops); + printf("yx_ops: %i\n", yx_ops); +#endif + + // Now check which resample order is better. In case of a tie, choose the order + // which buffers the least amount of data. + if ((xy_ops > yx_ops) || + ((xy_ops == yx_ops) && (m_resample_src_x < m_resample_dst_x))) + { + m_delay_x_resample = true; + m_intermediate_x = m_resample_src_x; + } + else + { + m_delay_x_resample = false; + m_intermediate_x = m_resample_dst_x; + } +#if BASISU_RESAMPLER_DEBUG_OPS + printf("delaying: %i\n", m_delay_x_resample); +#endif + } + + if (m_delay_x_resample) + { + if ((m_Ptmp_buf = (Sample*)malloc(m_intermediate_x * sizeof(Sample))) == NULL) + { + m_status = STATUS_OUT_OF_MEMORY; + return; + } + } + } + + void Resampler::get_clists(Contrib_List * *ptr_clist_x, Contrib_List * *ptr_clist_y) + { + if (ptr_clist_x) + * ptr_clist_x = m_Pclist_x; + + if (ptr_clist_y) + * ptr_clist_y = m_Pclist_y; + } + + int Resampler::get_filter_num() + { + return g_num_resample_filters; + } + + const char* Resampler::get_filter_name(int filter_num) + { + if ((filter_num < 0) || (filter_num >= g_num_resample_filters)) + return NULL; + else + return g_resample_filters[filter_num].name; + } + +} // namespace basisu diff --git a/thirdparty/basisu/encoder/basisu_resampler.h b/thirdparty/basisu/encoder/basisu_resampler.h new file mode 100644 index 000000000..fc1918ec8 --- /dev/null +++ b/thirdparty/basisu/encoder/basisu_resampler.h @@ -0,0 +1,198 @@ +// basisu_resampler.h +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "../transcoder/basisu.h" + +#define BASISU_RESAMPLER_DEBUG_OPS (0) +#define BASISU_RESAMPLER_DEFAULT_FILTER "lanczos4" +#define BASISU_RESAMPLER_MAX_DIMENSION (16384) + +namespace basisu +{ + // float or double + typedef float Resample_Real; + + class Resampler + { + public: + typedef Resample_Real Sample; + + struct Contrib + { + Resample_Real weight; + uint16_t pixel; + }; + + struct Contrib_List + { + uint16_t n; + Contrib *p; + }; + + enum Boundary_Op + { + BOUNDARY_WRAP = 0, + BOUNDARY_REFLECT = 1, + BOUNDARY_CLAMP = 2 + }; + + enum Status + { + STATUS_OKAY = 0, + STATUS_OUT_OF_MEMORY = 1, + STATUS_BAD_FILTER_NAME = 2, + STATUS_SCAN_BUFFER_FULL = 3 + }; + + // src_x/src_y - Input dimensions + // dst_x/dst_y - Output dimensions + // boundary_op - How to sample pixels near the image boundaries + // sample_low/sample_high - Clamp output samples to specified range, or disable clamping if sample_low >= sample_high + // Pclist_x/Pclist_y - Optional pointers to contributor lists from another instance of a Resampler + // src_x_ofs/src_y_ofs - Offset input image by specified amount (fractional values okay) + Resampler( + int src_x, int src_y, + int dst_x, int dst_y, + Boundary_Op boundary_op = BOUNDARY_CLAMP, + Resample_Real sample_low = 0.0f, Resample_Real sample_high = 0.0f, + const char *Pfilter_name = BASISU_RESAMPLER_DEFAULT_FILTER, + Contrib_List *Pclist_x = NULL, + Contrib_List *Pclist_y = NULL, + Resample_Real filter_x_scale = 1.0f, + Resample_Real filter_y_scale = 1.0f, + Resample_Real src_x_ofs = 0.0f, + Resample_Real src_y_ofs = 0.0f); + + ~Resampler(); + + // Reinits resampler so it can handle another frame. + void restart(); + + // false on out of memory. + bool put_line(const Sample *Psrc); + + // NULL if no scanlines are currently available (give the resampler more scanlines!) + const Sample *get_line(); + + Status status() const + { + return m_status; + } + + // Returned contributor lists can be shared with another Resampler. + void get_clists(Contrib_List **ptr_clist_x, Contrib_List **ptr_clist_y); + Contrib_List *get_clist_x() const + { + return m_Pclist_x; + } + Contrib_List *get_clist_y() const + { + return m_Pclist_y; + } + + // Filter accessors. + static int get_filter_num(); + static const char *get_filter_name(int filter_num); + + static Contrib_List *make_clist( + int src_x, int dst_x, Boundary_Op boundary_op, + Resample_Real(*Pfilter)(Resample_Real), + Resample_Real filter_support, + Resample_Real filter_scale, + Resample_Real src_ofs); + + static void free_clist(Contrib_List* p) { if (p) { free(p->p); free(p); } } + + private: + Resampler(); + Resampler(const Resampler &o); + Resampler &operator=(const Resampler &o); + +#ifdef BASISU_RESAMPLER_DEBUG_OPS + int total_ops; +#endif + + int m_intermediate_x; + + int m_resample_src_x; + int m_resample_src_y; + int m_resample_dst_x; + int m_resample_dst_y; + + Boundary_Op m_boundary_op; + + Sample *m_Pdst_buf; + Sample *m_Ptmp_buf; + + Contrib_List *m_Pclist_x; + Contrib_List *m_Pclist_y; + + bool m_clist_x_forced; + bool m_clist_y_forced; + + bool m_delay_x_resample; + + int *m_Psrc_y_count; + uint8_t *m_Psrc_y_flag; + + // The maximum number of scanlines that can be buffered at one time. + enum + { + MAX_SCAN_BUF_SIZE = BASISU_RESAMPLER_MAX_DIMENSION + }; + + struct Scan_Buf + { + int scan_buf_y[MAX_SCAN_BUF_SIZE]; + Sample *scan_buf_l[MAX_SCAN_BUF_SIZE]; + }; + + Scan_Buf *m_Pscan_buf; + + int m_cur_src_y; + int m_cur_dst_y; + + Status m_status; + + void resample_x(Sample *Pdst, const Sample *Psrc); + void scale_y_mov(Sample *Ptmp, const Sample *Psrc, Resample_Real weight, int dst_x); + void scale_y_add(Sample *Ptmp, const Sample *Psrc, Resample_Real weight, int dst_x); + void clamp(Sample *Pdst, int n); + void resample_y(Sample *Pdst); + + static int reflect(const int j, const int src_x, const Boundary_Op boundary_op); + + inline int count_ops(Contrib_List *Pclist, int k) + { + int i, t = 0; + for (i = 0; i < k; i++) + t += Pclist[i].n; + return (t); + } + + Resample_Real m_lo; + Resample_Real m_hi; + + inline Resample_Real clamp_sample(Resample_Real f) const + { + if (f < m_lo) + f = m_lo; + else if (f > m_hi) + f = m_hi; + return f; + } + }; + +} // namespace basisu diff --git a/thirdparty/basisu/encoder/basisu_resampler_filters.h b/thirdparty/basisu/encoder/basisu_resampler_filters.h new file mode 100644 index 000000000..c96416b1a --- /dev/null +++ b/thirdparty/basisu/encoder/basisu_resampler_filters.h @@ -0,0 +1,47 @@ +// basisu_resampler_filters.h +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include "../transcoder/basisu.h" + +namespace basisu +{ + typedef float (*resample_filter_func)(float t); + + struct resample_filter + { + const char *name; + resample_filter_func func; + float support; + }; + + extern const resample_filter g_resample_filters[]; + extern const int g_num_resample_filters; + + const float BASISU_BOX_FILTER_SUPPORT = 0.5f; + float box_filter(float t); /* pulse/Fourier window */ + + const float BASISU_TENT_FILTER_SUPPORT = 1.0f; + float tent_filter(float t); /* box (*) box, bilinear/triangle */ + + const float BASISU_GAUSSIAN_FILTER_SUPPORT = 1.25f; + float gaussian_filter(float t); // with blackman window + + const float BASISU_BELL_FILTER_SUPPORT = 1.5f; + float bell_filter(float t); /* box (*) box (*) box */ + + int find_resample_filter(const char *pName); + +} // namespace basisu diff --git a/thirdparty/basisu/encoder/basisu_ssim.cpp b/thirdparty/basisu/encoder/basisu_ssim.cpp new file mode 100644 index 000000000..4cdf3a48d --- /dev/null +++ b/thirdparty/basisu/encoder/basisu_ssim.cpp @@ -0,0 +1,410 @@ +// basisu_ssim.cpp +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "basisu_ssim.h" + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +namespace basisu +{ + float gauss(int x, int y, float sigma_sqr) + { + float pow = expf(-((x * x + y * y) / (2.0f * sigma_sqr))); + float g = (1.0f / (sqrtf((float)(2.0f * M_PI * sigma_sqr)))) * pow; + return g; + } + + // size_x/y should be odd + void compute_gaussian_kernel(float *pDst, int size_x, int size_y, float sigma_sqr, uint32_t flags) + { + assert(size_x & size_y & 1); + + if (!(size_x | size_y)) + return; + + int mid_x = size_x / 2; + int mid_y = size_y / 2; + + double sum = 0; + for (int x = 0; x < size_x; x++) + { + for (int y = 0; y < size_y; y++) + { + float g; + if ((x > mid_x) && (y < mid_y)) + g = pDst[(size_x - x - 1) + y * size_x]; + else if ((x < mid_x) && (y > mid_y)) + g = pDst[x + (size_y - y - 1) * size_x]; + else if ((x > mid_x) && (y > mid_y)) + g = pDst[(size_x - x - 1) + (size_y - y - 1) * size_x]; + else + g = gauss(x - mid_x, y - mid_y, sigma_sqr); + + pDst[x + y * size_x] = g; + sum += g; + } + } + + if (flags & cComputeGaussianFlagNormalizeCenterToOne) + { + sum = pDst[mid_x + mid_y * size_x]; + } + + if (flags & (cComputeGaussianFlagNormalizeCenterToOne | cComputeGaussianFlagNormalize)) + { + double one_over_sum = 1.0f / sum; + for (int i = 0; i < size_x * size_y; i++) + pDst[i] = static_cast(pDst[i] * one_over_sum); + + if (flags & cComputeGaussianFlagNormalizeCenterToOne) + pDst[mid_x + mid_y * size_x] = 1.0f; + } + + if (flags & cComputeGaussianFlagPrint) + { + printf("{\n"); + for (int y = 0; y < size_y; y++) + { + printf(" "); + for (int x = 0; x < size_x; x++) + { + printf("%f, ", pDst[x + y * size_x]); + } + printf("\n"); + } + printf("}"); + } + } + + void gaussian_filter(imagef &dst, const imagef &orig_img, uint32_t odd_filter_width, float sigma_sqr, bool wrapping, uint32_t width_divisor, uint32_t height_divisor) + { + assert(&dst != &orig_img); + + assert(odd_filter_width && (odd_filter_width & 1)); + odd_filter_width |= 1; + + vector2D kernel(odd_filter_width, odd_filter_width); + compute_gaussian_kernel(kernel.get_ptr(), odd_filter_width, odd_filter_width, sigma_sqr, cComputeGaussianFlagNormalize); + + const int dst_width = orig_img.get_width() / width_divisor; + const int dst_height = orig_img.get_height() / height_divisor; + + const int H = odd_filter_width / 2; + const int L = -H; + + dst.crop(dst_width, dst_height); + +//#pragma omp parallel for + for (int oy = 0; oy < dst_height; oy++) + { + for (int ox = 0; ox < dst_width; ox++) + { + vec4F c(0.0f); + + for (int yd = L; yd <= H; yd++) + { + int y = oy * height_divisor + (height_divisor >> 1) + yd; + + for (int xd = L; xd <= H; xd++) + { + int x = ox * width_divisor + (width_divisor >> 1) + xd; + + const vec4F &p = orig_img.get_clamped_or_wrapped(x, y, wrapping, wrapping); + + float w = kernel(xd + H, yd + H); + c[0] += p[0] * w; + c[1] += p[1] * w; + c[2] += p[2] * w; + c[3] += p[3] * w; + } + } + + dst(ox, oy).set(c[0], c[1], c[2], c[3]); + } + } + } + + void pow_image(const imagef &src, imagef &dst, const vec4F &power) + { + dst.resize(src); + +//#pragma omp parallel for + for (int y = 0; y < (int)dst.get_height(); y++) + { + for (uint32_t x = 0; x < dst.get_width(); x++) + { + const vec4F &p = src(x, y); + + if ((power[0] == 2.0f) && (power[1] == 2.0f) && (power[2] == 2.0f) && (power[3] == 2.0f)) + dst(x, y).set(p[0] * p[0], p[1] * p[1], p[2] * p[2], p[3] * p[3]); + else + dst(x, y).set(powf(p[0], power[0]), powf(p[1], power[1]), powf(p[2], power[2]), powf(p[3], power[3])); + } + } + } + + void mul_image(const imagef &src, imagef &dst, const vec4F &mul) + { + dst.resize(src); + +//#pragma omp parallel for + for (int y = 0; y < (int)dst.get_height(); y++) + { + for (uint32_t x = 0; x < dst.get_width(); x++) + { + const vec4F &p = src(x, y); + dst(x, y).set(p[0] * mul[0], p[1] * mul[1], p[2] * mul[2], p[3] * mul[3]); + } + } + } + + void scale_image(const imagef &src, imagef &dst, const vec4F &scale, const vec4F &shift) + { + dst.resize(src); + +//#pragma omp parallel for + for (int y = 0; y < (int)dst.get_height(); y++) + { + for (uint32_t x = 0; x < dst.get_width(); x++) + { + const vec4F &p = src(x, y); + + vec4F d; + + for (uint32_t c = 0; c < 4; c++) + d[c] = scale[c] * p[c] + shift[c]; + + dst(x, y).set(d[0], d[1], d[2], d[3]); + } + } + } + + void add_weighted_image(const imagef &src1, const vec4F &alpha, const imagef &src2, const vec4F &beta, const vec4F &gamma, imagef &dst) + { + dst.resize(src1); + +//#pragma omp parallel for + for (int y = 0; y < (int)dst.get_height(); y++) + { + for (uint32_t x = 0; x < dst.get_width(); x++) + { + const vec4F &s1 = src1(x, y); + const vec4F &s2 = src2(x, y); + + dst(x, y).set( + s1[0] * alpha[0] + s2[0] * beta[0] + gamma[0], + s1[1] * alpha[1] + s2[1] * beta[1] + gamma[1], + s1[2] * alpha[2] + s2[2] * beta[2] + gamma[2], + s1[3] * alpha[3] + s2[3] * beta[3] + gamma[3]); + } + } + } + + void add_image(const imagef &src1, const imagef &src2, imagef &dst) + { + dst.resize(src1); + +//#pragma omp parallel for + for (int y = 0; y < (int)dst.get_height(); y++) + { + for (uint32_t x = 0; x < dst.get_width(); x++) + { + const vec4F &s1 = src1(x, y); + const vec4F &s2 = src2(x, y); + + dst(x, y).set(s1[0] + s2[0], s1[1] + s2[1], s1[2] + s2[2], s1[3] + s2[3]); + } + } + } + + void adds_image(const imagef &src, const vec4F &value, imagef &dst) + { + dst.resize(src); + +//#pragma omp parallel for + for (int y = 0; y < (int)dst.get_height(); y++) + { + for (uint32_t x = 0; x < dst.get_width(); x++) + { + const vec4F &p = src(x, y); + + dst(x, y).set(p[0] + value[0], p[1] + value[1], p[2] + value[2], p[3] + value[3]); + } + } + } + + void mul_image(const imagef &src1, const imagef &src2, imagef &dst, const vec4F &scale) + { + dst.resize(src1); + +//#pragma omp parallel for + for (int y = 0; y < (int)dst.get_height(); y++) + { + for (uint32_t x = 0; x < dst.get_width(); x++) + { + const vec4F &s1 = src1(x, y); + const vec4F &s2 = src2(x, y); + + vec4F d; + + for (uint32_t c = 0; c < 4; c++) + { + float v1 = s1[c]; + float v2 = s2[c]; + d[c] = v1 * v2 * scale[c]; + } + + dst(x, y) = d; + } + } + } + + void div_image(const imagef &src1, const imagef &src2, imagef &dst, const vec4F &scale) + { + dst.resize(src1); + +//#pragma omp parallel for + for (int y = 0; y < (int)dst.get_height(); y++) + { + for (uint32_t x = 0; x < dst.get_width(); x++) + { + const vec4F &s1 = src1(x, y); + const vec4F &s2 = src2(x, y); + + vec4F d; + + for (uint32_t c = 0; c < 4; c++) + { + float v = s2[c]; + if (v == 0.0f) + d[c] = 0.0f; + else + d[c] = (s1[c] * scale[c]) / v; + } + + dst(x, y) = d; + } + } + } + + vec4F avg_image(const imagef &src) + { + vec4F avg(0.0f); + + for (uint32_t y = 0; y < src.get_height(); y++) + { + for (uint32_t x = 0; x < src.get_width(); x++) + { + const vec4F &s = src(x, y); + + avg += vec4F(s[0], s[1], s[2], s[3]); + } + } + + avg /= static_cast(src.get_total_pixels()); + + return avg; + } + + // Reference: https://ece.uwaterloo.ca/~z70wang/research/ssim/index.html + vec4F compute_ssim(const imagef &a, const imagef &b) + { + imagef axb, a_sq, b_sq, mu1, mu2, mu1_sq, mu2_sq, mu1_mu2, s1_sq, s2_sq, s12, smap, t1, t2, t3; + + const float C1 = 6.50250f, C2 = 58.52250f; + + pow_image(a, a_sq, vec4F(2)); + pow_image(b, b_sq, vec4F(2)); + mul_image(a, b, axb, vec4F(1.0f)); + + gaussian_filter(mu1, a, 11, 1.5f * 1.5f); + gaussian_filter(mu2, b, 11, 1.5f * 1.5f); + + pow_image(mu1, mu1_sq, vec4F(2)); + pow_image(mu2, mu2_sq, vec4F(2)); + mul_image(mu1, mu2, mu1_mu2, vec4F(1.0f)); + + gaussian_filter(s1_sq, a_sq, 11, 1.5f * 1.5f); + add_weighted_image(s1_sq, vec4F(1), mu1_sq, vec4F(-1), vec4F(0), s1_sq); + + gaussian_filter(s2_sq, b_sq, 11, 1.5f * 1.5f); + add_weighted_image(s2_sq, vec4F(1), mu2_sq, vec4F(-1), vec4F(0), s2_sq); + + gaussian_filter(s12, axb, 11, 1.5f * 1.5f); + add_weighted_image(s12, vec4F(1), mu1_mu2, vec4F(-1), vec4F(0), s12); + + scale_image(mu1_mu2, t1, vec4F(2), vec4F(0)); + adds_image(t1, vec4F(C1), t1); + + scale_image(s12, t2, vec4F(2), vec4F(0)); + adds_image(t2, vec4F(C2), t2); + + mul_image(t1, t2, t3, vec4F(1)); + + add_image(mu1_sq, mu2_sq, t1); + adds_image(t1, vec4F(C1), t1); + + add_image(s1_sq, s2_sq, t2); + adds_image(t2, vec4F(C2), t2); + + mul_image(t1, t2, t1, vec4F(1)); + + div_image(t3, t1, smap, vec4F(1)); + + return avg_image(smap); + } + + vec4F compute_ssim(const image &a, const image &b, bool luma, bool luma_601) + { + image ta(a), tb(b); + + if ((ta.get_width() != tb.get_width()) || (ta.get_height() != tb.get_height())) + { + debug_printf("compute_ssim: Cropping input images to equal dimensions\n"); + + const uint32_t w = minimum(a.get_width(), b.get_width()); + const uint32_t h = minimum(a.get_height(), b.get_height()); + ta.crop(w, h); + tb.crop(w, h); + } + + if (!ta.get_width() || !ta.get_height()) + { + assert(0); + return vec4F(0); + } + + if (luma) + { + for (uint32_t y = 0; y < ta.get_height(); y++) + { + for (uint32_t x = 0; x < ta.get_width(); x++) + { + ta(x, y).set(ta(x, y).get_luma(luma_601), ta(x, y).a); + tb(x, y).set(tb(x, y).get_luma(luma_601), tb(x, y).a); + } + } + } + + imagef fta, ftb; + + fta.set(ta); + ftb.set(tb); + + return compute_ssim(fta, ftb); + } + +} // namespace basisu diff --git a/thirdparty/basisu/encoder/basisu_ssim.h b/thirdparty/basisu/encoder/basisu_ssim.h new file mode 100644 index 000000000..51cd2d78f --- /dev/null +++ b/thirdparty/basisu/encoder/basisu_ssim.h @@ -0,0 +1,44 @@ +// basisu_ssim.h +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "basisu_enc.h" + +namespace basisu +{ + float gauss(int x, int y, float sigma_sqr); + + enum + { + cComputeGaussianFlagNormalize = 1, + cComputeGaussianFlagPrint = 2, + cComputeGaussianFlagNormalizeCenterToOne = 4 + }; + + void compute_gaussian_kernel(float *pDst, int size_x, int size_y, float sigma_sqr, uint32_t flags = 0); + + void scale_image(const imagef &src, imagef &dst, const vec4F &scale, const vec4F &shift); + void add_weighted_image(const imagef &src1, const vec4F &alpha, const imagef &src2, const vec4F &beta, const vec4F &gamma, imagef &dst); + void add_image(const imagef &src1, const imagef &src2, imagef &dst); + void adds_image(const imagef &src, const vec4F &value, imagef &dst); + void mul_image(const imagef &src1, const imagef &src2, imagef &dst, const vec4F &scale); + void div_image(const imagef &src1, const imagef &src2, imagef &dst, const vec4F &scale); + vec4F avg_image(const imagef &src); + + void gaussian_filter(imagef &dst, const imagef &orig_img, uint32_t odd_filter_width, float sigma_sqr, bool wrapping = false, uint32_t width_divisor = 1, uint32_t height_divisor = 1); + + vec4F compute_ssim(const imagef &a, const imagef &b); + vec4F compute_ssim(const image &a, const image &b, bool luma, bool luma_601); + +} // namespace basisu diff --git a/thirdparty/basisu/encoder/basisu_uastc_enc.cpp b/thirdparty/basisu/encoder/basisu_uastc_enc.cpp new file mode 100644 index 000000000..7e0a2b1df --- /dev/null +++ b/thirdparty/basisu/encoder/basisu_uastc_enc.cpp @@ -0,0 +1,4163 @@ +// basisu_uastc_enc.cpp +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "basisu_uastc_enc.h" +#include "3rdparty/android_astc_decomp.h" +#include "basisu_gpu_texture.h" +#include "basisu_bc7enc.h" + +#ifdef _DEBUG +// When BASISU_VALIDATE_UASTC_ENC is 1, we pack and unpack to/from UASTC and ASTC, then validate that each codec returns the exact same results. This is slower. +#define BASISU_VALIDATE_UASTC_ENC 1 +#endif + +#define BASISU_SUPPORT_FORCE_MODE 0 + +using namespace basist; + +namespace basisu +{ + const uint32_t MAX_ENCODE_RESULTS = 512; + +#if BASISU_VALIDATE_UASTC_ENC + static void validate_func(bool condition, int line) + { + if (!condition) + { + fprintf(stderr, "basisu_uastc_enc: Internal validation failed on line %u!\n", line); + } + } + + #define VALIDATE(c) validate_func(c, __LINE__); +#else + #define VALIDATE(c) +#endif + + enum dxt_constants + { + cDXT1SelectorBits = 2U, cDXT1SelectorValues = 1U << cDXT1SelectorBits, cDXT1SelectorMask = cDXT1SelectorValues - 1U, + cDXT5SelectorBits = 3U, cDXT5SelectorValues = 1U << cDXT5SelectorBits, cDXT5SelectorMask = cDXT5SelectorValues - 1U, + }; + + struct dxt1_block + { + enum { cTotalEndpointBytes = 2, cTotalSelectorBytes = 4 }; + + uint8_t m_low_color[cTotalEndpointBytes]; + uint8_t m_high_color[cTotalEndpointBytes]; + uint8_t m_selectors[cTotalSelectorBytes]; + + inline void clear() { basisu::clear_obj(*this); } + + inline uint32_t get_high_color() const { return m_high_color[0] | (m_high_color[1] << 8U); } + inline uint32_t get_low_color() const { return m_low_color[0] | (m_low_color[1] << 8U); } + inline void set_low_color(uint16_t c) { m_low_color[0] = static_cast(c & 0xFF); m_low_color[1] = static_cast((c >> 8) & 0xFF); } + inline void set_high_color(uint16_t c) { m_high_color[0] = static_cast(c & 0xFF); m_high_color[1] = static_cast((c >> 8) & 0xFF); } + inline uint32_t get_selector(uint32_t x, uint32_t y) const { assert((x < 4U) && (y < 4U)); return (m_selectors[y] >> (x * cDXT1SelectorBits))& cDXT1SelectorMask; } + inline void set_selector(uint32_t x, uint32_t y, uint32_t val) { assert((x < 4U) && (y < 4U) && (val < 4U)); m_selectors[y] &= (~(cDXT1SelectorMask << (x * cDXT1SelectorBits))); m_selectors[y] |= (val << (x * cDXT1SelectorBits)); } + + static uint16_t pack_color(const color_rgba& color, bool scaled, uint32_t bias = 127U) + { + uint32_t r = color.r, g = color.g, b = color.b; + if (scaled) + { + r = (r * 31U + bias) / 255U; + g = (g * 63U + bias) / 255U; + b = (b * 31U + bias) / 255U; + } + return static_cast(basisu::minimum(b, 31U) | (basisu::minimum(g, 63U) << 5U) | (basisu::minimum(r, 31U) << 11U)); + } + + static uint16_t pack_unscaled_color(uint32_t r, uint32_t g, uint32_t b) { return static_cast(b | (g << 5U) | (r << 11U)); } + }; + +#define UASTC_WRITE_MODE_DESCS 0 + + static inline void uastc_write_bits(uint8_t* pBuf, uint32_t& bit_offset, uint64_t code, uint32_t codesize, const char* pDesc) + { + (void)pDesc; + +#if UASTC_WRITE_MODE_DESCS + if (pDesc) + printf("%s: %u %u\n", pDesc, bit_offset, codesize); +#endif + + assert((codesize == 64) || (code < (1ULL << codesize))); + + while (codesize) + { + uint32_t byte_bit_offset = bit_offset & 7; + uint32_t bits_to_write = basisu::minimum(codesize, 8 - byte_bit_offset); + + pBuf[bit_offset >> 3] |= (code << byte_bit_offset); + + code >>= bits_to_write; + codesize -= bits_to_write; + bit_offset += bits_to_write; + } + } + + void pack_uastc(basist::uastc_block& blk, const uastc_encode_results& result, const etc_block& etc1_blk, uint32_t etc1_bias, const eac_a8_block& etc_eac_a8_blk, bool bc1_hint0, bool bc1_hint1) + { + if ((g_uastc_mode_has_alpha[result.m_uastc_mode]) && (result.m_uastc_mode != UASTC_MODE_INDEX_SOLID_COLOR)) + { + assert(etc_eac_a8_blk.m_multiplier >= 1); + } + + uint8_t buf[32]; + memset(buf, 0, sizeof(buf)); + + uint32_t block_bit_offset = 0; + +#if UASTC_WRITE_MODE_DESCS + printf("**** Mode: %u\n", result.m_uastc_mode); +#endif + + uastc_write_bits(buf, block_bit_offset, g_uastc_mode_huff_codes[result.m_uastc_mode][0], g_uastc_mode_huff_codes[result.m_uastc_mode][1], "mode"); + + if (result.m_uastc_mode == UASTC_MODE_INDEX_SOLID_COLOR) + { + uastc_write_bits(buf, block_bit_offset, result.m_solid_color.r, 8, "R"); + uastc_write_bits(buf, block_bit_offset, result.m_solid_color.g, 8, "G"); + uastc_write_bits(buf, block_bit_offset, result.m_solid_color.b, 8, "B"); + uastc_write_bits(buf, block_bit_offset, result.m_solid_color.a, 8, "A"); + + uastc_write_bits(buf, block_bit_offset, etc1_blk.get_diff_bit(), 1, "ETC1D"); + uastc_write_bits(buf, block_bit_offset, etc1_blk.get_inten_table(0), 3, "ETC1I"); + uastc_write_bits(buf, block_bit_offset, etc1_blk.get_selector(0, 0), 2, "ETC1S"); + + uint32_t r, g, b; + if (etc1_blk.get_diff_bit()) + etc_block::unpack_color5(r, g, b, etc1_blk.get_base5_color(), false); + else + etc_block::unpack_color4(r, g, b, etc1_blk.get_base4_color(0), false); + + uastc_write_bits(buf, block_bit_offset, r, 5, "ETC1R"); + uastc_write_bits(buf, block_bit_offset, g, 5, "ETC1G"); + uastc_write_bits(buf, block_bit_offset, b, 5, "ETC1B"); + + memcpy(&blk, buf, sizeof(blk)); + return; + } + + if (g_uastc_mode_has_bc1_hint0[result.m_uastc_mode]) + uastc_write_bits(buf, block_bit_offset, bc1_hint0, 1, "BC1H0"); + else + { + assert(bc1_hint0 == false); + } + + if (g_uastc_mode_has_bc1_hint1[result.m_uastc_mode]) + uastc_write_bits(buf, block_bit_offset, bc1_hint1, 1, "BC1H1"); + else + { + assert(bc1_hint1 == false); + } + + uastc_write_bits(buf, block_bit_offset, etc1_blk.get_flip_bit(), 1, "ETC1F"); + uastc_write_bits(buf, block_bit_offset, etc1_blk.get_diff_bit(), 1, "ETC1D"); + uastc_write_bits(buf, block_bit_offset, etc1_blk.get_inten_table(0), 3, "ETC1I0"); + uastc_write_bits(buf, block_bit_offset, etc1_blk.get_inten_table(1), 3, "ETC1I1"); + + if (g_uastc_mode_has_etc1_bias[result.m_uastc_mode]) + uastc_write_bits(buf, block_bit_offset, etc1_bias, 5, "ETC1BIAS"); + else + { + assert(etc1_bias == 0); + } + + if (g_uastc_mode_has_alpha[result.m_uastc_mode]) + { + const uint32_t etc2_hints = etc_eac_a8_blk.m_table | (etc_eac_a8_blk.m_multiplier << 4); + + assert(etc2_hints > 0 && etc2_hints <= 0xFF); + uastc_write_bits(buf, block_bit_offset, etc2_hints, 8, "ETC2TM"); + } + + uint32_t subsets = 1; + switch (result.m_uastc_mode) + { + case 2: + case 4: + case 7: + case 9: + case 16: + uastc_write_bits(buf, block_bit_offset, result.m_common_pattern, 5, "PAT"); + subsets = 2; + break; + case 3: + uastc_write_bits(buf, block_bit_offset, result.m_common_pattern, 4, "PAT"); + subsets = 3; + break; + default: + break; + } + +#ifdef _DEBUG + uint32_t part_seed = 0; + switch (result.m_uastc_mode) + { + case 2: + case 4: + case 9: + case 16: + part_seed = g_astc_bc7_common_partitions2[result.m_common_pattern].m_astc; + break; + case 3: + part_seed = g_astc_bc7_common_partitions3[result.m_common_pattern].m_astc; + break; + case 7: + part_seed = g_bc7_3_astc2_common_partitions[result.m_common_pattern].m_astc2; + break; + default: + break; + } +#endif + + uint32_t total_planes = 1; + switch (result.m_uastc_mode) + { + case 6: + case 11: + case 13: + uastc_write_bits(buf, block_bit_offset, result.m_astc.m_ccs, 2, "COMPSEL"); + total_planes = 2; + break; + case 17: + // CCS field is always 3 for dual plane LA. + assert(result.m_astc.m_ccs == 3); + total_planes = 2; + break; + default: + break; + } + + uint8_t weights[32]; + memcpy(weights, result.m_astc.m_weights, 16 * total_planes); + + uint8_t endpoints[18]; + memcpy(endpoints, result.m_astc.m_endpoints, sizeof(endpoints)); + + const uint32_t total_comps = g_uastc_mode_comps[result.m_uastc_mode]; + + // LLAA + // LLAA LLAA + // LLAA LLAA LLAA + // RRGGBB + // RRGGBB RRGGBB + // RRGGBB RRGGBB RRGGBB + // RRGGBBAA + // RRGGBBAA RRGGBBAA + + const uint32_t weight_bits = g_uastc_mode_weight_bits[result.m_uastc_mode]; + + const uint8_t* pPartition_pattern; + const uint8_t* pSubset_anchor_indices = basist::get_anchor_indices(subsets, result.m_uastc_mode, result.m_common_pattern, pPartition_pattern); + + for (uint32_t plane_index = 0; plane_index < total_planes; plane_index++) + { + for (uint32_t subset_index = 0; subset_index < subsets; subset_index++) + { + const uint32_t anchor_index = pSubset_anchor_indices[subset_index]; + +#ifdef _DEBUG + if (subsets >= 2) + { + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t part_index = astc_compute_texel_partition(part_seed, i & 3, i >> 2, 0, subsets, true); + if (part_index == subset_index) + { + assert(anchor_index == i); + break; + } + } + } + else + { + assert(!anchor_index); + } +#endif + + // Check anchor weight's MSB - if it's set then invert this subset's weights and swap the endpoints + if (weights[anchor_index * total_planes + plane_index] & (1 << (weight_bits - 1))) + { + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t part_index = pPartition_pattern[i]; + +#ifdef _DEBUG + if (subsets >= 2) + { + assert(part_index == (uint32_t)astc_compute_texel_partition(part_seed, i & 3, i >> 2, 0, subsets, true)); + } + else + { + assert(!part_index); + } +#endif + + if (part_index == subset_index) + weights[i * total_planes + plane_index] = ((1 << weight_bits) - 1) - weights[i * total_planes + plane_index]; + } + + if (total_planes == 2) + { + for (int c = 0; c < (int)total_comps; c++) + { + const uint32_t comp_plane = (total_comps == 2) ? c : ((c == result.m_astc.m_ccs) ? 1 : 0); + + if (comp_plane == plane_index) + std::swap(endpoints[c * 2 + 0], endpoints[c * 2 + 1]); + } + } + else + { + for (uint32_t c = 0; c < total_comps; c++) + std::swap(endpoints[subset_index * total_comps * 2 + c * 2 + 0], endpoints[subset_index * total_comps * 2 + c * 2 + 1]); + } + } + } // subset_index + } // plane_index + + const uint32_t total_values = total_comps * 2 * subsets; + const uint32_t endpoint_range = g_uastc_mode_endpoint_ranges[result.m_uastc_mode]; + + uint32_t bit_values[18]; + uint32_t tq_values[8]; + uint32_t total_tq_values = 0; + uint32_t tq_accum = 0; + uint32_t tq_mul = 1; + + const uint32_t ep_bits = g_astc_bise_range_table[endpoint_range][0]; + const uint32_t ep_trits = g_astc_bise_range_table[endpoint_range][1]; + const uint32_t ep_quints = g_astc_bise_range_table[endpoint_range][2]; + + for (uint32_t i = 0; i < total_values; i++) + { + uint32_t val = endpoints[i]; + + uint32_t bits = val & ((1 << ep_bits) - 1); + uint32_t tq = val >> ep_bits; + + bit_values[i] = bits; + + if (ep_trits) + { + assert(tq < 3); + tq_accum += tq * tq_mul; + tq_mul *= 3; + if (tq_mul == 243) + { + tq_values[total_tq_values++] = tq_accum; + tq_accum = 0; + tq_mul = 1; + } + } + else if (ep_quints) + { + assert(tq < 5); + tq_accum += tq * tq_mul; + tq_mul *= 5; + if (tq_mul == 125) + { + tq_values[total_tq_values++] = tq_accum; + tq_accum = 0; + tq_mul = 1; + } + } + } + + uint32_t total_endpoint_bits = 0; + (void)total_endpoint_bits; + + for (uint32_t i = 0; i < total_tq_values; i++) + { + const uint32_t num_bits = ep_trits ? 8 : 7; + uastc_write_bits(buf, block_bit_offset, tq_values[i], num_bits, "ETQ"); + total_endpoint_bits += num_bits; + } + + if (tq_mul > 1) + { + uint32_t num_bits; + if (ep_trits) + { + if (tq_mul == 3) + num_bits = 2; + else if (tq_mul == 9) + num_bits = 4; + else if (tq_mul == 27) + num_bits = 5; + else //if (tq_mul == 81) + num_bits = 7; + } + else + { + if (tq_mul == 5) + num_bits = 3; + else //if (tq_mul == 25) + num_bits = 5; + } + uastc_write_bits(buf, block_bit_offset, tq_accum, num_bits, "ETQ"); + total_endpoint_bits += num_bits; + } + + for (uint32_t i = 0; i < total_values; i++) + { + uastc_write_bits(buf, block_bit_offset, bit_values[i], ep_bits, "EBITS"); + total_endpoint_bits += ep_bits; + } + +#if UASTC_WRITE_MODE_DESCS + uint32_t weight_start = block_bit_offset; +#endif + + uint32_t total_weight_bits = 0; + (void)total_weight_bits; + + const uint32_t plane_shift = (total_planes == 2) ? 1 : 0; + for (uint32_t i = 0; i < 16 * total_planes; i++) + { + uint32_t numbits = weight_bits; + for (uint32_t s = 0; s < subsets; s++) + { + if (pSubset_anchor_indices[s] == (i >> plane_shift)) + { + numbits--; + break; + } + } + + uastc_write_bits(buf, block_bit_offset, weights[i], numbits, nullptr); + + total_weight_bits += numbits; + } + +#if UASTC_WRITE_MODE_DESCS + printf("WEIGHTS: %u %u\n", weight_start, total_weight_bits); +#endif + + assert(block_bit_offset <= 128); + memcpy(&blk, buf, sizeof(blk)); + +#if UASTC_WRITE_MODE_DESCS + printf("Total bits: %u, endpoint bits: %u, weight bits: %u\n", block_bit_offset, total_endpoint_bits, total_weight_bits); +#endif + } + + // MODE 0 + // 0. DualPlane: 0, WeightRange: 8 (16), Subsets: 1, CEM: 8 (RGB Direct ), EndpointRange: 19 (192) MODE6 RGB + // 18. DualPlane: 0, WeightRange: 11 (32), Subsets: 1, CEM: 8 (RGB Direct ), EndpointRange: 11 (32) MODE6 RGB + static void astc_mode0_or_18(uint32_t mode, const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params, const uint8_t *pForce_selectors = nullptr) + { + const uint32_t endpoint_range = (mode == 18) ? 11 : 19; + const uint32_t weight_range = (mode == 18) ? 11 : 8; + + color_cell_compressor_params ccell_params; + memset(&ccell_params, 0, sizeof(ccell_params)); + + ccell_params.m_num_pixels = 16; + ccell_params.m_pPixels = (color_quad_u8*)&block[0][0]; + ccell_params.m_num_selector_weights = (mode == 18) ? 32 : 16; + ccell_params.m_pSelector_weights = (mode == 18) ? g_astc_weights5 : g_astc_weights4; + ccell_params.m_pSelector_weightsx = (mode == 18) ? (const bc7enc_vec4F*)g_astc_weights5x : (const bc7enc_vec4F*)g_astc_weights4x; + ccell_params.m_astc_endpoint_range = endpoint_range; + ccell_params.m_weights[0] = 1; + ccell_params.m_weights[1] = 1; + ccell_params.m_weights[2] = 1; + ccell_params.m_weights[3] = 1; + ccell_params.m_pForce_selectors = pForce_selectors; + + color_cell_compressor_results ccell_results; + uint8_t ccell_result_selectors[16]; + uint8_t ccell_result_selectors_temp[16]; + memset(&ccell_results, 0, sizeof(ccell_results)); + ccell_results.m_pSelectors = &ccell_result_selectors[0]; + ccell_results.m_pSelectors_temp = &ccell_result_selectors_temp[0]; + + uint64_t part_err = color_cell_compression(255, &ccell_params, &ccell_results, &comp_params); + + // ASTC + astc_block_desc astc_results; + memset(&astc_results, 0, sizeof(astc_results)); + + astc_results.m_dual_plane = false; + astc_results.m_weight_range = weight_range;// (mode == 18) ? 11 : 8; + + astc_results.m_ccs = 0; + astc_results.m_subsets = 1; + astc_results.m_partition_seed = 0; + astc_results.m_cem = 8; + + astc_results.m_endpoints[0] = ccell_results.m_astc_low_endpoint.m_c[0]; + astc_results.m_endpoints[1] = ccell_results.m_astc_high_endpoint.m_c[0]; + astc_results.m_endpoints[2] = ccell_results.m_astc_low_endpoint.m_c[1]; + astc_results.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[1]; + astc_results.m_endpoints[4] = ccell_results.m_astc_low_endpoint.m_c[2]; + astc_results.m_endpoints[5] = ccell_results.m_astc_high_endpoint.m_c[2]; + + bool invert = false; + + if (pForce_selectors == nullptr) + { + int s0 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[4]].m_unquant; + int s1 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[5]].m_unquant; + if (s1 < s0) + { + std::swap(astc_results.m_endpoints[0], astc_results.m_endpoints[1]); + std::swap(astc_results.m_endpoints[2], astc_results.m_endpoints[3]); + std::swap(astc_results.m_endpoints[4], astc_results.m_endpoints[5]); + invert = true; + } + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + astc_results.m_weights[x + y * 4] = ccell_result_selectors[x + y * 4]; + + if (invert) + astc_results.m_weights[x + y * 4] = ((mode == 18) ? 31 : 15) - astc_results.m_weights[x + y * 4]; + } + } + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = mode; + pResults[total_results].m_common_pattern = 0; + pResults[total_results].m_astc = astc_results; + pResults[total_results].m_astc_err = part_err; + total_results++; + } + } + + // MODE 1 + // 1-subset, 2-bit indices, 8-bit endpoints, BC7 mode 3 + // DualPlane: 0, WeightRange: 2 (4), Subsets: 1, CEM: 8 (RGB Direct ), EndpointRange: 20 (256) MODE3 or MODE5 RGB + static void astc_mode1(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params) + { + color_cell_compressor_params ccell_params; + memset(&ccell_params, 0, sizeof(ccell_params)); + + ccell_params.m_num_pixels = 16; + ccell_params.m_pPixels = (color_quad_u8*)&block[0][0]; + ccell_params.m_num_selector_weights = 4; + ccell_params.m_pSelector_weights = g_bc7_weights2; + ccell_params.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x; + ccell_params.m_astc_endpoint_range = 20; + ccell_params.m_weights[0] = 1; + ccell_params.m_weights[1] = 1; + ccell_params.m_weights[2] = 1; + ccell_params.m_weights[3] = 1; + + color_cell_compressor_results ccell_results; + uint8_t ccell_result_selectors[16]; + uint8_t ccell_result_selectors_temp[16]; + memset(&ccell_results, 0, sizeof(ccell_results)); + ccell_results.m_pSelectors = &ccell_result_selectors[0]; + ccell_results.m_pSelectors_temp = &ccell_result_selectors_temp[0]; + + uint64_t part_err = color_cell_compression(255, &ccell_params, &ccell_results, &comp_params); + + // ASTC + astc_block_desc astc_results; + memset(&astc_results, 0, sizeof(astc_results)); + + astc_results.m_dual_plane = false; + astc_results.m_weight_range = 2; + + astc_results.m_ccs = 0; + astc_results.m_subsets = 1; + astc_results.m_partition_seed = 0; + astc_results.m_cem = 8; + + astc_results.m_endpoints[0] = ccell_results.m_astc_low_endpoint.m_c[0]; + astc_results.m_endpoints[1] = ccell_results.m_astc_high_endpoint.m_c[0]; + astc_results.m_endpoints[2] = ccell_results.m_astc_low_endpoint.m_c[1]; + astc_results.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[1]; + astc_results.m_endpoints[4] = ccell_results.m_astc_low_endpoint.m_c[2]; + astc_results.m_endpoints[5] = ccell_results.m_astc_high_endpoint.m_c[2]; + + const uint32_t range = 20; + + bool invert = false; + + int s0 = g_astc_unquant[range][astc_results.m_endpoints[0]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[2]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[4]].m_unquant; + int s1 = g_astc_unquant[range][astc_results.m_endpoints[1]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[3]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[5]].m_unquant; + if (s1 < s0) + { + std::swap(astc_results.m_endpoints[0], astc_results.m_endpoints[1]); + std::swap(astc_results.m_endpoints[2], astc_results.m_endpoints[3]); + std::swap(astc_results.m_endpoints[4], astc_results.m_endpoints[5]); + invert = true; + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + astc_results.m_weights[x + y * 4] = ccell_result_selectors[x + y * 4]; + + if (invert) + astc_results.m_weights[x + y * 4] = 3 - astc_results.m_weights[x + y * 4]; + } + } + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = 1; + pResults[total_results].m_common_pattern = 0; + pResults[total_results].m_astc = astc_results; + pResults[total_results].m_astc_err = part_err; + total_results++; + } + } + + static uint32_t estimate_partition2(uint32_t num_weights, uint32_t num_comps, const uint32_t* pWeights, const color_rgba block[4][4], const uint32_t weights[4]) + { + assert(pWeights[0] == 0 && pWeights[num_weights - 1] == 64); + + uint64_t best_err = UINT64_MAX; + uint32_t best_common_pattern = 0; + + for (uint32_t common_pattern = 0; common_pattern < TOTAL_ASTC_BC7_COMMON_PARTITIONS2; common_pattern++) + { + const uint32_t bc7_pattern = g_astc_bc7_common_partitions2[common_pattern].m_bc7; + + const uint8_t* pPartition = &g_bc7_partition2[bc7_pattern * 16]; + + color_quad_u8 subset_colors[2][16]; + uint32_t subset_total_colors[2] = { 0, 0 }; + for (uint32_t index = 0; index < 16; index++) + subset_colors[pPartition[index]][subset_total_colors[pPartition[index]]++] = ((const color_quad_u8*)block)[index]; + + uint64_t total_subset_err = 0; + for (uint32_t subset = 0; (subset < 2) && (total_subset_err < best_err); subset++) + total_subset_err += color_cell_compression_est_astc(num_weights, num_comps, pWeights, subset_total_colors[subset], &subset_colors[subset][0], best_err, weights); + + if (total_subset_err < best_err) + { + best_err = total_subset_err; + best_common_pattern = common_pattern; + } + } + + return best_common_pattern; + } + + // MODE 2 + // 2-subset, 3-bit indices, 4-bit endpoints, BC7 mode 1 + // DualPlane: 0, WeightRange: 5 (8), Subsets: 2, CEM: 8 (RGB Direct ), EndpointRange: 8 (16) MODE1 + static void astc_mode2(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params, bool estimate_partition) + { + uint32_t first_common_pattern = 0; + uint32_t last_common_pattern = TOTAL_ASTC_BC7_COMMON_PARTITIONS2; + + if (estimate_partition) + { + const uint32_t weights[4] = { 1, 1, 1, 1 }; + first_common_pattern = estimate_partition2(8, 3, g_bc7_weights3, block, weights); + last_common_pattern = first_common_pattern + 1; + } + + for (uint32_t common_pattern = first_common_pattern; common_pattern < last_common_pattern; common_pattern++) + { + const uint32_t bc7_pattern = g_astc_bc7_common_partitions2[common_pattern].m_bc7; + + color_rgba part_pixels[2][16]; + uint32_t part_pixel_index[4][4]; + uint32_t num_part_pixels[2] = { 0, 0 }; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t part = g_bc7_partition2[16 * bc7_pattern + x + y * 4]; + part_pixel_index[y][x] = num_part_pixels[part]; + part_pixels[part][num_part_pixels[part]++] = block[y][x]; + } + } + + color_cell_compressor_params ccell_params[2]; + color_cell_compressor_results ccell_results[2]; + uint8_t ccell_result_selectors[2][16]; + uint8_t ccell_result_selectors_temp[2][16]; + + uint64_t total_part_err = 0; + for (uint32_t part = 0; part < 2; part++) + { + memset(&ccell_params[part], 0, sizeof(ccell_params[part])); + + ccell_params[part].m_num_pixels = num_part_pixels[part]; + ccell_params[part].m_pPixels = (color_quad_u8*)&part_pixels[part][0]; + ccell_params[part].m_num_selector_weights = 8; + ccell_params[part].m_pSelector_weights = g_bc7_weights3; + ccell_params[part].m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights3x; + ccell_params[part].m_astc_endpoint_range = 8; + ccell_params[part].m_weights[0] = 1; + ccell_params[part].m_weights[1] = 1; + ccell_params[part].m_weights[2] = 1; + ccell_params[part].m_weights[3] = 1; + + memset(&ccell_results[part], 0, sizeof(ccell_results[part])); + ccell_results[part].m_pSelectors = &ccell_result_selectors[part][0]; + ccell_results[part].m_pSelectors_temp = &ccell_result_selectors_temp[part][0]; + + uint64_t part_err = color_cell_compression(255, &ccell_params[part], &ccell_results[part], &comp_params); + total_part_err += part_err; + } // part + + { + // ASTC + astc_block_desc astc_results; + memset(&astc_results, 0, sizeof(astc_results)); + + astc_results.m_dual_plane = false; + astc_results.m_weight_range = 5; + + astc_results.m_ccs = 0; + astc_results.m_subsets = 2; + astc_results.m_partition_seed = g_astc_bc7_common_partitions2[common_pattern].m_astc; + astc_results.m_cem = 8; + + uint32_t p0 = 0; + uint32_t p1 = 1; + if (g_astc_bc7_common_partitions2[common_pattern].m_invert) + std::swap(p0, p1); + + astc_results.m_endpoints[0] = ccell_results[p0].m_astc_low_endpoint.m_c[0]; + astc_results.m_endpoints[1] = ccell_results[p0].m_astc_high_endpoint.m_c[0]; + astc_results.m_endpoints[2] = ccell_results[p0].m_astc_low_endpoint.m_c[1]; + astc_results.m_endpoints[3] = ccell_results[p0].m_astc_high_endpoint.m_c[1]; + astc_results.m_endpoints[4] = ccell_results[p0].m_astc_low_endpoint.m_c[2]; + astc_results.m_endpoints[5] = ccell_results[p0].m_astc_high_endpoint.m_c[2]; + + const uint32_t range = 8; + + bool invert[2] = { false, false }; + + int s0 = g_astc_unquant[range][astc_results.m_endpoints[0]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[2]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[4]].m_unquant; + int s1 = g_astc_unquant[range][astc_results.m_endpoints[1]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[3]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[5]].m_unquant; + if (s1 < s0) + { + std::swap(astc_results.m_endpoints[0], astc_results.m_endpoints[1]); + std::swap(astc_results.m_endpoints[2], astc_results.m_endpoints[3]); + std::swap(astc_results.m_endpoints[4], astc_results.m_endpoints[5]); + invert[0] = true; + } + + astc_results.m_endpoints[6] = ccell_results[p1].m_astc_low_endpoint.m_c[0]; + astc_results.m_endpoints[7] = ccell_results[p1].m_astc_high_endpoint.m_c[0]; + astc_results.m_endpoints[8] = ccell_results[p1].m_astc_low_endpoint.m_c[1]; + astc_results.m_endpoints[9] = ccell_results[p1].m_astc_high_endpoint.m_c[1]; + astc_results.m_endpoints[10] = ccell_results[p1].m_astc_low_endpoint.m_c[2]; + astc_results.m_endpoints[11] = ccell_results[p1].m_astc_high_endpoint.m_c[2]; + + s0 = g_astc_unquant[range][astc_results.m_endpoints[0 + 6]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[2 + 6]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[4 + 6]].m_unquant; + s1 = g_astc_unquant[range][astc_results.m_endpoints[1 + 6]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[3 + 6]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[5 + 6]].m_unquant; + + if (s1 < s0) + { + std::swap(astc_results.m_endpoints[0 + 6], astc_results.m_endpoints[1 + 6]); + std::swap(astc_results.m_endpoints[2 + 6], astc_results.m_endpoints[3 + 6]); + std::swap(astc_results.m_endpoints[4 + 6], astc_results.m_endpoints[5 + 6]); + invert[1] = true; + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t bc7_part = g_bc7_partition2[16 * bc7_pattern + x + y * 4]; + + astc_results.m_weights[x + y * 4] = ccell_result_selectors[bc7_part][part_pixel_index[y][x]]; + + uint32_t astc_part = bc7_part; + if (g_astc_bc7_common_partitions2[common_pattern].m_invert) + astc_part = 1 - astc_part; + + if (invert[astc_part]) + astc_results.m_weights[x + y * 4] = 7 - astc_results.m_weights[x + y * 4]; + } + } + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = 2; + pResults[total_results].m_common_pattern = common_pattern; + pResults[total_results].m_astc = astc_results; + pResults[total_results].m_astc_err = total_part_err; + total_results++; + } + } + + } // common_pattern + } + + // MODE 3 + // 3-subsets, 2-bit indices, [0,11] endpoints, BC7 mode 2 + // DualPlane: 0, WeightRange: 2 (4), Subsets: 3, CEM: 8 (RGB Direct ), EndpointRange: 7 (12) MODE2 + static void astc_mode3(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params, bool estimate_partition) + { + uint32_t first_common_pattern = 0; + uint32_t last_common_pattern = TOTAL_ASTC_BC7_COMMON_PARTITIONS3; + + if (estimate_partition) + { + uint64_t best_err = UINT64_MAX; + uint32_t best_common_pattern = 0; + const uint32_t weights[4] = { 1, 1, 1, 1 }; + + for (uint32_t common_pattern = 0; common_pattern < TOTAL_ASTC_BC7_COMMON_PARTITIONS3; common_pattern++) + { + const uint32_t bc7_pattern = g_astc_bc7_common_partitions3[common_pattern].m_bc7; + + const uint8_t* pPartition = &g_bc7_partition3[bc7_pattern * 16]; + + color_quad_u8 subset_colors[3][16]; + uint32_t subset_total_colors[3] = { 0, 0 }; + for (uint32_t index = 0; index < 16; index++) + subset_colors[pPartition[index]][subset_total_colors[pPartition[index]]++] = ((const color_quad_u8*)block)[index]; + + uint64_t total_subset_err = 0; + for (uint32_t subset = 0; (subset < 3) && (total_subset_err < best_err); subset++) + total_subset_err += color_cell_compression_est_astc(4, 3, g_bc7_weights2, subset_total_colors[subset], &subset_colors[subset][0], best_err, weights); + + if (total_subset_err < best_err) + { + best_err = total_subset_err; + best_common_pattern = common_pattern; + } + } + + first_common_pattern = best_common_pattern; + last_common_pattern = best_common_pattern + 1; + } + + for (uint32_t common_pattern = first_common_pattern; common_pattern < last_common_pattern; common_pattern++) + { + const uint32_t endpoint_range = 7; + + const uint32_t bc7_pattern = g_astc_bc7_common_partitions3[common_pattern].m_bc7; + + color_rgba part_pixels[3][16]; + uint32_t part_pixel_index[4][4]; + uint32_t num_part_pixels[3] = { 0, 0, 0 }; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t bc7_part = g_bc7_partition3[16 * bc7_pattern + x + y * 4]; + part_pixel_index[y][x] = num_part_pixels[bc7_part]; + part_pixels[bc7_part][num_part_pixels[bc7_part]++] = block[y][x]; + } + } + + color_cell_compressor_params ccell_params[3]; + color_cell_compressor_results ccell_results[3]; + uint8_t ccell_result_selectors[3][16]; + uint8_t ccell_result_selectors_temp[3][16]; + + uint64_t total_part_err = 0; + for (uint32_t bc7_part = 0; bc7_part < 3; bc7_part++) + { + memset(&ccell_params[bc7_part], 0, sizeof(ccell_params[bc7_part])); + + ccell_params[bc7_part].m_num_pixels = num_part_pixels[bc7_part]; + ccell_params[bc7_part].m_pPixels = (color_quad_u8*)&part_pixels[bc7_part][0]; + ccell_params[bc7_part].m_num_selector_weights = 4; + ccell_params[bc7_part].m_pSelector_weights = g_bc7_weights2; + ccell_params[bc7_part].m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x; + ccell_params[bc7_part].m_astc_endpoint_range = endpoint_range; + ccell_params[bc7_part].m_weights[0] = 1; + ccell_params[bc7_part].m_weights[1] = 1; + ccell_params[bc7_part].m_weights[2] = 1; + ccell_params[bc7_part].m_weights[3] = 1; + + memset(&ccell_results[bc7_part], 0, sizeof(ccell_results[bc7_part])); + ccell_results[bc7_part].m_pSelectors = &ccell_result_selectors[bc7_part][0]; + ccell_results[bc7_part].m_pSelectors_temp = &ccell_result_selectors_temp[bc7_part][0]; + + uint64_t part_err = color_cell_compression(255, &ccell_params[bc7_part], &ccell_results[bc7_part], &comp_params); + total_part_err += part_err; + } // part + + { + // ASTC + astc_block_desc astc_results; + memset(&astc_results, 0, sizeof(astc_results)); + + astc_results.m_dual_plane = false; + astc_results.m_weight_range = 2; + + astc_results.m_ccs = 0; + astc_results.m_subsets = 3; + astc_results.m_partition_seed = g_astc_bc7_common_partitions3[common_pattern].m_astc; + astc_results.m_cem = 8; + + uint32_t astc_to_bc7_part[3]; // converts ASTC to BC7 partition index + const uint32_t perm = g_astc_bc7_common_partitions3[common_pattern].m_astc_to_bc7_perm; + astc_to_bc7_part[0] = g_astc_to_bc7_partition_index_perm_tables[perm][0]; + astc_to_bc7_part[1] = g_astc_to_bc7_partition_index_perm_tables[perm][1]; + astc_to_bc7_part[2] = g_astc_to_bc7_partition_index_perm_tables[perm][2]; + + bool invert_astc_part[3] = { false, false, false }; + + for (uint32_t astc_part = 0; astc_part < 3; astc_part++) + { + uint8_t* pEndpoints = &astc_results.m_endpoints[6 * astc_part]; + + pEndpoints[0] = ccell_results[astc_to_bc7_part[astc_part]].m_astc_low_endpoint.m_c[0]; + pEndpoints[1] = ccell_results[astc_to_bc7_part[astc_part]].m_astc_high_endpoint.m_c[0]; + pEndpoints[2] = ccell_results[astc_to_bc7_part[astc_part]].m_astc_low_endpoint.m_c[1]; + pEndpoints[3] = ccell_results[astc_to_bc7_part[astc_part]].m_astc_high_endpoint.m_c[1]; + pEndpoints[4] = ccell_results[astc_to_bc7_part[astc_part]].m_astc_low_endpoint.m_c[2]; + pEndpoints[5] = ccell_results[astc_to_bc7_part[astc_part]].m_astc_high_endpoint.m_c[2]; + + int s0 = g_astc_unquant[endpoint_range][pEndpoints[0]].m_unquant + g_astc_unquant[endpoint_range][pEndpoints[2]].m_unquant + g_astc_unquant[endpoint_range][pEndpoints[4]].m_unquant; + int s1 = g_astc_unquant[endpoint_range][pEndpoints[1]].m_unquant + g_astc_unquant[endpoint_range][pEndpoints[3]].m_unquant + g_astc_unquant[endpoint_range][pEndpoints[5]].m_unquant; + if (s1 < s0) + { + std::swap(pEndpoints[0], pEndpoints[1]); + std::swap(pEndpoints[2], pEndpoints[3]); + std::swap(pEndpoints[4], pEndpoints[5]); + invert_astc_part[astc_part] = true; + } + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t bc7_part = g_bc7_partition3[16 * bc7_pattern + x + y * 4]; + + astc_results.m_weights[x + y * 4] = ccell_result_selectors[bc7_part][part_pixel_index[y][x]]; + + uint32_t astc_part = 0; + for (uint32_t i = 0; i < 3; i++) + { + if (astc_to_bc7_part[i] == bc7_part) + { + astc_part = i; + break; + } + } + + if (invert_astc_part[astc_part]) + astc_results.m_weights[x + y * 4] = 3 - astc_results.m_weights[x + y * 4]; + } + } + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = 3; + pResults[total_results].m_common_pattern = common_pattern; + pResults[total_results].m_astc = astc_results; + pResults[total_results].m_astc_err = total_part_err; + total_results++; + } + + } + + } // common_pattern + } + + // MODE 4 + // DualPlane: 0, WeightRange: 2 (4), Subsets: 2, CEM: 8 (RGB Direct ), EndpointRange: 12 (40) MODE3 + static void astc_mode4(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params, bool estimate_partition) + { + //const uint32_t weight_range = 2; + const uint32_t endpoint_range = 12; + + uint32_t first_common_pattern = 0; + uint32_t last_common_pattern = TOTAL_ASTC_BC7_COMMON_PARTITIONS2; + + if (estimate_partition) + { + const uint32_t weights[4] = { 1, 1, 1, 1 }; + first_common_pattern = estimate_partition2(4, 3, g_bc7_weights2, block, weights); + last_common_pattern = first_common_pattern + 1; + } + + for (uint32_t common_pattern = first_common_pattern; common_pattern < last_common_pattern; common_pattern++) + { + const uint32_t bc7_pattern = g_astc_bc7_common_partitions2[common_pattern].m_bc7; + + color_rgba part_pixels[2][16]; + uint32_t part_pixel_index[4][4]; + uint32_t num_part_pixels[2] = { 0, 0 }; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t part = g_bc7_partition2[16 * bc7_pattern + x + y * 4]; + part_pixel_index[y][x] = num_part_pixels[part]; + part_pixels[part][num_part_pixels[part]++] = block[y][x]; + } + } + + color_cell_compressor_params ccell_params[2]; + color_cell_compressor_results ccell_results[2]; + uint8_t ccell_result_selectors[2][16]; + uint8_t ccell_result_selectors_temp[2][16]; + + uint64_t total_part_err = 0; + for (uint32_t part = 0; part < 2; part++) + { + memset(&ccell_params[part], 0, sizeof(ccell_params[part])); + + ccell_params[part].m_num_pixels = num_part_pixels[part]; + ccell_params[part].m_pPixels = (color_quad_u8*)&part_pixels[part][0]; + ccell_params[part].m_num_selector_weights = 4; + ccell_params[part].m_pSelector_weights = g_bc7_weights2; + ccell_params[part].m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x; + ccell_params[part].m_astc_endpoint_range = endpoint_range; + ccell_params[part].m_weights[0] = 1; + ccell_params[part].m_weights[1] = 1; + ccell_params[part].m_weights[2] = 1; + ccell_params[part].m_weights[3] = 1; + + memset(&ccell_results[part], 0, sizeof(ccell_results[part])); + ccell_results[part].m_pSelectors = &ccell_result_selectors[part][0]; + ccell_results[part].m_pSelectors_temp = &ccell_result_selectors_temp[part][0]; + + uint64_t part_err = color_cell_compression(255, &ccell_params[part], &ccell_results[part], &comp_params); + total_part_err += part_err; + } // part + + // ASTC + astc_block_desc astc_results; + memset(&astc_results, 0, sizeof(astc_results)); + + astc_results.m_dual_plane = false; + astc_results.m_weight_range = 2; + + astc_results.m_ccs = 0; + astc_results.m_subsets = 2; + astc_results.m_partition_seed = g_astc_bc7_common_partitions2[common_pattern].m_astc; + astc_results.m_cem = 8; + + uint32_t p0 = 0; + uint32_t p1 = 1; + if (g_astc_bc7_common_partitions2[common_pattern].m_invert) + std::swap(p0, p1); + + astc_results.m_endpoints[0] = ccell_results[p0].m_astc_low_endpoint.m_c[0]; + astc_results.m_endpoints[1] = ccell_results[p0].m_astc_high_endpoint.m_c[0]; + astc_results.m_endpoints[2] = ccell_results[p0].m_astc_low_endpoint.m_c[1]; + astc_results.m_endpoints[3] = ccell_results[p0].m_astc_high_endpoint.m_c[1]; + astc_results.m_endpoints[4] = ccell_results[p0].m_astc_low_endpoint.m_c[2]; + astc_results.m_endpoints[5] = ccell_results[p0].m_astc_high_endpoint.m_c[2]; + + bool invert[2] = { false, false }; + + int s0 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[4]].m_unquant; + int s1 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[5]].m_unquant; + if (s1 < s0) + { + std::swap(astc_results.m_endpoints[0], astc_results.m_endpoints[1]); + std::swap(astc_results.m_endpoints[2], astc_results.m_endpoints[3]); + std::swap(astc_results.m_endpoints[4], astc_results.m_endpoints[5]); + invert[0] = true; + } + + astc_results.m_endpoints[6] = ccell_results[p1].m_astc_low_endpoint.m_c[0]; + astc_results.m_endpoints[7] = ccell_results[p1].m_astc_high_endpoint.m_c[0]; + astc_results.m_endpoints[8] = ccell_results[p1].m_astc_low_endpoint.m_c[1]; + astc_results.m_endpoints[9] = ccell_results[p1].m_astc_high_endpoint.m_c[1]; + astc_results.m_endpoints[10] = ccell_results[p1].m_astc_low_endpoint.m_c[2]; + astc_results.m_endpoints[11] = ccell_results[p1].m_astc_high_endpoint.m_c[2]; + + s0 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[0 + 6]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[2 + 6]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[4 + 6]].m_unquant; + s1 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[1 + 6]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[3 + 6]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[5 + 6]].m_unquant; + + if (s1 < s0) + { + std::swap(astc_results.m_endpoints[0 + 6], astc_results.m_endpoints[1 + 6]); + std::swap(astc_results.m_endpoints[2 + 6], astc_results.m_endpoints[3 + 6]); + std::swap(astc_results.m_endpoints[4 + 6], astc_results.m_endpoints[5 + 6]); + invert[1] = true; + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t bc7_part = g_bc7_partition2[16 * bc7_pattern + x + y * 4]; + + astc_results.m_weights[x + y * 4] = ccell_result_selectors[bc7_part][part_pixel_index[y][x]]; + + uint32_t astc_part = bc7_part; + if (g_astc_bc7_common_partitions2[common_pattern].m_invert) + astc_part = 1 - astc_part; + + if (invert[astc_part]) + astc_results.m_weights[x + y * 4] = 3 - astc_results.m_weights[x + y * 4]; + } + } + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = 4; + pResults[total_results].m_common_pattern = common_pattern; + pResults[total_results].m_astc = astc_results; + pResults[total_results].m_astc_err = total_part_err; + total_results++; + } + + } // common_pattern + } + + // MODE 5 + // DualPlane: 0, WeightRange: 5 (8), Subsets: 1, CEM: 8 (RGB Direct ), EndpointRange: 20 (256) BC7 MODE 6 (or MODE 1 1-subset) + static void astc_mode5(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params) + { + const uint32_t weight_range = 5; + const uint32_t endpoint_range = 20; + + color_cell_compressor_params ccell_params; + memset(&ccell_params, 0, sizeof(ccell_params)); + + ccell_params.m_num_pixels = 16; + ccell_params.m_pPixels = (color_quad_u8*)&block[0][0]; + ccell_params.m_num_selector_weights = 8; + ccell_params.m_pSelector_weights = g_bc7_weights3; + ccell_params.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights3x; + ccell_params.m_astc_endpoint_range = endpoint_range; + ccell_params.m_weights[0] = 1; + ccell_params.m_weights[1] = 1; + ccell_params.m_weights[2] = 1; + ccell_params.m_weights[3] = 1; + + color_cell_compressor_results ccell_results; + uint8_t ccell_result_selectors[16]; + uint8_t ccell_result_selectors_temp[16]; + memset(&ccell_results, 0, sizeof(ccell_results)); + ccell_results.m_pSelectors = &ccell_result_selectors[0]; + ccell_results.m_pSelectors_temp = &ccell_result_selectors_temp[0]; + + uint64_t part_err = color_cell_compression(255, &ccell_params, &ccell_results, &comp_params); + + // ASTC + astc_block_desc blk; + memset(&blk, 0, sizeof(blk)); + + blk.m_dual_plane = false; + blk.m_weight_range = weight_range; + + blk.m_ccs = 0; + blk.m_subsets = 1; + blk.m_partition_seed = 0; + blk.m_cem = 8; + + blk.m_endpoints[0] = ccell_results.m_astc_low_endpoint.m_c[0]; + blk.m_endpoints[1] = ccell_results.m_astc_high_endpoint.m_c[0]; + blk.m_endpoints[2] = ccell_results.m_astc_low_endpoint.m_c[1]; + blk.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[1]; + blk.m_endpoints[4] = ccell_results.m_astc_low_endpoint.m_c[2]; + blk.m_endpoints[5] = ccell_results.m_astc_high_endpoint.m_c[2]; + + bool invert = false; + + int s0 = g_astc_unquant[endpoint_range][blk.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[4]].m_unquant; + int s1 = g_astc_unquant[endpoint_range][blk.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[5]].m_unquant; + if (s1 < s0) + { + std::swap(blk.m_endpoints[0], blk.m_endpoints[1]); + std::swap(blk.m_endpoints[2], blk.m_endpoints[3]); + std::swap(blk.m_endpoints[4], blk.m_endpoints[5]); + invert = true; + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + blk.m_weights[x + y * 4] = ccell_result_selectors[x + y * 4]; + + if (invert) + blk.m_weights[x + y * 4] = 7 - blk.m_weights[x + y * 4]; + } + } + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = 5; + pResults[total_results].m_common_pattern = 0; + pResults[total_results].m_astc = blk; + pResults[total_results].m_astc_err = part_err; + total_results++; + } + } + + // MODE 6 + // DualPlane: 1, WeightRange: 2 (4), Subsets: 1, CEM: 8 (RGB Direct ), EndpointRange: 18 (160) BC7 MODE5 + static void astc_mode6(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params) + { + for (uint32_t rot_comp = 0; rot_comp < 3; rot_comp++) + { + const uint32_t weight_range = 2; + const uint32_t endpoint_range = 18; + + color_quad_u8 block_rgb[16]; + color_quad_u8 block_a[16]; + for (uint32_t i = 0; i < 16; i++) + { + block_rgb[i] = ((color_quad_u8*)&block[0][0])[i]; + block_a[i] = block_rgb[i]; + + uint8_t c = block_a[i].m_c[rot_comp]; + block_a[i].m_c[0] = c; + block_a[i].m_c[1] = c; + block_a[i].m_c[2] = c; + block_a[i].m_c[3] = 255; + + block_rgb[i].m_c[rot_comp] = 255; + } + + uint8_t ccell_result_selectors_temp[16]; + + color_cell_compressor_params ccell_params_rgb; + memset(&ccell_params_rgb, 0, sizeof(ccell_params_rgb)); + + ccell_params_rgb.m_num_pixels = 16; + ccell_params_rgb.m_pPixels = block_rgb; + ccell_params_rgb.m_num_selector_weights = 4; + ccell_params_rgb.m_pSelector_weights = g_bc7_weights2; + ccell_params_rgb.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x; + ccell_params_rgb.m_astc_endpoint_range = endpoint_range; + ccell_params_rgb.m_weights[0] = 1; + ccell_params_rgb.m_weights[1] = 1; + ccell_params_rgb.m_weights[2] = 1; + ccell_params_rgb.m_weights[3] = 1; + + color_cell_compressor_results ccell_results_rgb; + uint8_t ccell_result_selectors_rgb[16]; + memset(&ccell_results_rgb, 0, sizeof(ccell_results_rgb)); + ccell_results_rgb.m_pSelectors = &ccell_result_selectors_rgb[0]; + ccell_results_rgb.m_pSelectors_temp = &ccell_result_selectors_temp[0]; + + uint64_t part_err_rgb = color_cell_compression(255, &ccell_params_rgb, &ccell_results_rgb, &comp_params); + + color_cell_compressor_params ccell_params_a; + memset(&ccell_params_a, 0, sizeof(ccell_params_a)); + + ccell_params_a.m_num_pixels = 16; + ccell_params_a.m_pPixels = block_a; + ccell_params_a.m_num_selector_weights = 4; + ccell_params_a.m_pSelector_weights = g_bc7_weights2; + ccell_params_a.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x; + ccell_params_a.m_astc_endpoint_range = endpoint_range; + ccell_params_a.m_weights[0] = 1; + ccell_params_a.m_weights[1] = 1; + ccell_params_a.m_weights[2] = 1; + ccell_params_a.m_weights[3] = 1; + + color_cell_compressor_results ccell_results_a; + uint8_t ccell_result_selectors_a[16]; + memset(&ccell_results_a, 0, sizeof(ccell_results_a)); + ccell_results_a.m_pSelectors = &ccell_result_selectors_a[0]; + ccell_results_a.m_pSelectors_temp = &ccell_result_selectors_temp[0]; + + uint64_t part_err_a = color_cell_compression(255, &ccell_params_a, &ccell_results_a, &comp_params) / 3; + + uint64_t total_err = part_err_rgb + part_err_a; + + // ASTC + astc_block_desc blk; + memset(&blk, 0, sizeof(blk)); + + blk.m_dual_plane = true; + blk.m_weight_range = weight_range; + + blk.m_ccs = rot_comp; + blk.m_subsets = 1; + blk.m_partition_seed = 0; + blk.m_cem = 8; + + blk.m_endpoints[0] = (rot_comp == 0 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[0]; + blk.m_endpoints[1] = (rot_comp == 0 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[0]; + blk.m_endpoints[2] = (rot_comp == 1 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[1]; + blk.m_endpoints[3] = (rot_comp == 1 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[1]; + blk.m_endpoints[4] = (rot_comp == 2 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[2]; + blk.m_endpoints[5] = (rot_comp == 2 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[2]; + + bool invert = false; + + int s0 = g_astc_unquant[endpoint_range][blk.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[4]].m_unquant; + int s1 = g_astc_unquant[endpoint_range][blk.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[5]].m_unquant; + if (s1 < s0) + { + std::swap(blk.m_endpoints[0], blk.m_endpoints[1]); + std::swap(blk.m_endpoints[2], blk.m_endpoints[3]); + std::swap(blk.m_endpoints[4], blk.m_endpoints[5]); + invert = true; + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t rgb_index = ccell_result_selectors_rgb[x + y * 4]; + uint32_t a_index = ccell_result_selectors_a[x + y * 4]; + + if (invert) + { + rgb_index = 3 - rgb_index; + a_index = 3 - a_index; + } + + blk.m_weights[(x + y * 4) * 2 + 0] = (uint8_t)rgb_index; + blk.m_weights[(x + y * 4) * 2 + 1] = (uint8_t)a_index; + } + } + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = 6; + pResults[total_results].m_common_pattern = 0; + pResults[total_results].m_astc = blk; + pResults[total_results].m_astc_err = total_err; + total_results++; + } + } // rot_comp + } + + // MODE 7 - 2 subset ASTC, 3 subset BC7 + // DualPlane: 0, WeightRange: 2 (4), Subsets: 2, CEM: 8 (RGB Direct ), EndpointRange: 12 (40) MODE2 + static void astc_mode7(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params, bool estimate_partition) + { + uint32_t first_common_pattern = 0; + uint32_t last_common_pattern = TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS; + + if (estimate_partition) + { + uint64_t best_err = UINT64_MAX; + uint32_t best_common_pattern = 0; + const uint32_t weights[4] = { 1, 1, 1, 1 }; + + for (uint32_t common_pattern = 0; common_pattern < TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS; common_pattern++) + { + const uint8_t* pPartition = &g_bc7_3_astc2_patterns2[common_pattern][0]; + +#ifdef _DEBUG + const uint32_t astc_pattern = g_bc7_3_astc2_common_partitions[common_pattern].m_astc2; + const uint32_t bc7_pattern = g_bc7_3_astc2_common_partitions[common_pattern].m_bc73; + const uint32_t common_pattern_k = g_bc7_3_astc2_common_partitions[common_pattern].k; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t astc_part = bc7_convert_partition_index_3_to_2(g_bc7_partition3[16 * bc7_pattern + x + y * 4], common_pattern_k); + assert((int)astc_part == astc_compute_texel_partition(astc_pattern, x, y, 0, 2, true)); + assert(astc_part == pPartition[x + y * 4]); + } + } +#endif + + color_quad_u8 subset_colors[2][16]; + uint32_t subset_total_colors[2] = { 0, 0 }; + for (uint32_t index = 0; index < 16; index++) + subset_colors[pPartition[index]][subset_total_colors[pPartition[index]]++] = ((const color_quad_u8*)block)[index]; + + uint64_t total_subset_err = 0; + for (uint32_t subset = 0; (subset < 2) && (total_subset_err < best_err); subset++) + total_subset_err += color_cell_compression_est_astc(4, 3, g_bc7_weights2, subset_total_colors[subset], &subset_colors[subset][0], best_err, weights); + + if (total_subset_err < best_err) + { + best_err = total_subset_err; + best_common_pattern = common_pattern; + } + } + + first_common_pattern = best_common_pattern; + last_common_pattern = best_common_pattern + 1; + } + + //const uint32_t weight_range = 2; + const uint32_t endpoint_range = 12; + + for (uint32_t common_pattern = first_common_pattern; common_pattern < last_common_pattern; common_pattern++) + { + const uint32_t astc_pattern = g_bc7_3_astc2_common_partitions[common_pattern].m_astc2; + const uint32_t bc7_pattern = g_bc7_3_astc2_common_partitions[common_pattern].m_bc73; + const uint32_t common_pattern_k = g_bc7_3_astc2_common_partitions[common_pattern].k; + + color_rgba part_pixels[2][16]; + uint32_t part_pixel_index[4][4]; + uint32_t num_part_pixels[2] = { 0, 0 }; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t astc_part = bc7_convert_partition_index_3_to_2(g_bc7_partition3[16 * bc7_pattern + x + y * 4], common_pattern_k); +#ifdef _DEBUG + assert((int)astc_part == astc_compute_texel_partition(astc_pattern, x, y, 0, 2, true)); +#endif + + part_pixel_index[y][x] = num_part_pixels[astc_part]; + part_pixels[astc_part][num_part_pixels[astc_part]++] = block[y][x]; + } + } + + color_cell_compressor_params ccell_params[2]; + color_cell_compressor_results ccell_results[2]; + uint8_t ccell_result_selectors[2][16]; + uint8_t ccell_result_selectors_temp[2][16]; + + uint64_t total_part_err = 0; + for (uint32_t part = 0; part < 2; part++) + { + memset(&ccell_params[part], 0, sizeof(ccell_params[part])); + + ccell_params[part].m_num_pixels = num_part_pixels[part]; + ccell_params[part].m_pPixels = (color_quad_u8*)&part_pixels[part][0]; + ccell_params[part].m_num_selector_weights = 4; + ccell_params[part].m_pSelector_weights = g_bc7_weights2; + ccell_params[part].m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x; + ccell_params[part].m_astc_endpoint_range = endpoint_range; + ccell_params[part].m_weights[0] = 1; + ccell_params[part].m_weights[1] = 1; + ccell_params[part].m_weights[2] = 1; + ccell_params[part].m_weights[3] = 1; + + memset(&ccell_results[part], 0, sizeof(ccell_results[part])); + ccell_results[part].m_pSelectors = &ccell_result_selectors[part][0]; + ccell_results[part].m_pSelectors_temp = &ccell_result_selectors_temp[part][0]; + + uint64_t part_err = color_cell_compression(255, &ccell_params[part], &ccell_results[part], &comp_params); + total_part_err += part_err; + } // part + + // ASTC + astc_block_desc blk; + memset(&blk, 0, sizeof(blk)); + + blk.m_dual_plane = false; + blk.m_weight_range = 2; + + blk.m_ccs = 0; + blk.m_subsets = 2; + blk.m_partition_seed = astc_pattern; + blk.m_cem = 8; + + const uint32_t p0 = 0; + const uint32_t p1 = 1; + + blk.m_endpoints[0] = ccell_results[p0].m_astc_low_endpoint.m_c[0]; + blk.m_endpoints[1] = ccell_results[p0].m_astc_high_endpoint.m_c[0]; + blk.m_endpoints[2] = ccell_results[p0].m_astc_low_endpoint.m_c[1]; + blk.m_endpoints[3] = ccell_results[p0].m_astc_high_endpoint.m_c[1]; + blk.m_endpoints[4] = ccell_results[p0].m_astc_low_endpoint.m_c[2]; + blk.m_endpoints[5] = ccell_results[p0].m_astc_high_endpoint.m_c[2]; + + bool invert[2] = { false, false }; + + int s0 = g_astc_unquant[endpoint_range][blk.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[4]].m_unquant; + int s1 = g_astc_unquant[endpoint_range][blk.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[5]].m_unquant; + if (s1 < s0) + { + std::swap(blk.m_endpoints[0], blk.m_endpoints[1]); + std::swap(blk.m_endpoints[2], blk.m_endpoints[3]); + std::swap(blk.m_endpoints[4], blk.m_endpoints[5]); + invert[0] = true; + } + + blk.m_endpoints[6] = ccell_results[p1].m_astc_low_endpoint.m_c[0]; + blk.m_endpoints[7] = ccell_results[p1].m_astc_high_endpoint.m_c[0]; + blk.m_endpoints[8] = ccell_results[p1].m_astc_low_endpoint.m_c[1]; + blk.m_endpoints[9] = ccell_results[p1].m_astc_high_endpoint.m_c[1]; + blk.m_endpoints[10] = ccell_results[p1].m_astc_low_endpoint.m_c[2]; + blk.m_endpoints[11] = ccell_results[p1].m_astc_high_endpoint.m_c[2]; + + s0 = g_astc_unquant[endpoint_range][blk.m_endpoints[0 + 6]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[2 + 6]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[4 + 6]].m_unquant; + s1 = g_astc_unquant[endpoint_range][blk.m_endpoints[1 + 6]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[3 + 6]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[5 + 6]].m_unquant; + + if (s1 < s0) + { + std::swap(blk.m_endpoints[0 + 6], blk.m_endpoints[1 + 6]); + std::swap(blk.m_endpoints[2 + 6], blk.m_endpoints[3 + 6]); + std::swap(blk.m_endpoints[4 + 6], blk.m_endpoints[5 + 6]); + invert[1] = true; + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t astc_part = bc7_convert_partition_index_3_to_2(g_bc7_partition3[16 * bc7_pattern + x + y * 4], common_pattern_k); + + blk.m_weights[x + y * 4] = ccell_result_selectors[astc_part][part_pixel_index[y][x]]; + + if (invert[astc_part]) + blk.m_weights[x + y * 4] = 3 - blk.m_weights[x + y * 4]; + } + } + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = 7; + pResults[total_results].m_common_pattern = common_pattern; + pResults[total_results].m_astc = blk; + pResults[total_results].m_astc_err = total_part_err; + total_results++; + } + + } // common_pattern + } + + static void estimate_partition2_list(uint32_t num_weights, uint32_t num_comps, const uint32_t* pWeights, const color_rgba block[4][4], uint32_t* pParts, uint32_t max_parts, const uint32_t weights[4]) + { + assert(pWeights[0] == 0 && pWeights[num_weights - 1] == 64); + + const uint32_t MAX_PARTS = 8; + assert(max_parts <= MAX_PARTS); + + uint64_t part_error[MAX_PARTS]; + memset(part_error, 0xFF, sizeof(part_error)); + memset(pParts, 0, sizeof(pParts[0]) * max_parts); + + for (uint32_t common_pattern = 0; common_pattern < TOTAL_ASTC_BC7_COMMON_PARTITIONS2; common_pattern++) + { + const uint32_t bc7_pattern = g_astc_bc7_common_partitions2[common_pattern].m_bc7; + + const uint8_t* pPartition = &g_bc7_partition2[bc7_pattern * 16]; + + color_quad_u8 subset_colors[2][16]; + uint32_t subset_total_colors[2] = { 0, 0 }; + for (uint32_t index = 0; index < 16; index++) + subset_colors[pPartition[index]][subset_total_colors[pPartition[index]]++] = ((const color_quad_u8*)block)[index]; + + uint64_t total_subset_err = 0; + for (uint32_t subset = 0; subset < 2; subset++) + total_subset_err += color_cell_compression_est_astc(num_weights, num_comps, pWeights, subset_total_colors[subset], &subset_colors[subset][0], UINT64_MAX, weights); + + for (int i = 0; i < (int)max_parts; i++) + { + if (total_subset_err < part_error[i]) + { + for (int j = max_parts - 1; j > i; --j) + { + pParts[j] = pParts[j - 1]; + part_error[j] = part_error[j - 1]; + } + + pParts[i] = common_pattern; + part_error[i] = total_subset_err; + + break; + } + } + } + +#ifdef _DEBUG + for (uint32_t i = 0; i < max_parts - 1; i++) + { + assert(part_error[i] <= part_error[i + 1]); + } +#endif + } + + // 9. DualPlane: 0, WeightRange: 2 (4), Subsets: 2, CEM: 12 (RGBA Direct), EndpointRange: 8 (16) - BC7 MODE 7 + // 16. DualPlane: 0, WeightRange : 2 (4), Subsets : 2, CEM: 4 (LA Direct), EndpointRange : 20 (256) - BC7 MODE 7 + static void astc_mode9_or_16(uint32_t mode, const color_rgba source_block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params, uint32_t estimate_partition_list_size) + { + assert(mode == 9 || mode == 16); + + const color_rgba* pBlock = &source_block[0][0]; + + color_rgba temp_block[16]; + if (mode == 16) + { + for (uint32_t i = 0; i < 16; i++) + { + if (mode == 16) + { + assert(pBlock[i].r == pBlock[i].g); + assert(pBlock[i].r == pBlock[i].b); + } + + const uint32_t l = pBlock[i].r; + const uint32_t a = pBlock[i].a; + + // Use (l,0,0,a) not (l,l,l,a) so both components are treated equally. + temp_block[i].set_noclamp_rgba(l, 0, 0, a); + } + + pBlock = temp_block; + } + + const uint32_t weights[4] = { 1, 1, 1, 1 }; + + //const uint32_t weight_range = 2; + const uint32_t endpoint_range = (mode == 16) ? 20 : 8; + + uint32_t first_common_pattern = 0; + uint32_t last_common_pattern = TOTAL_ASTC_BC7_COMMON_PARTITIONS2; + bool use_part_list = false; + + const uint32_t MAX_PARTS = 8; + uint32_t parts[MAX_PARTS]; + + if (estimate_partition_list_size == 1) + { + first_common_pattern = estimate_partition2(4, 4, g_bc7_weights2, (const color_rgba(*)[4])pBlock, weights); + last_common_pattern = first_common_pattern + 1; + } + else if (estimate_partition_list_size > 0) + { + assert(estimate_partition_list_size <= MAX_PARTS); + estimate_partition_list_size = basisu::minimum(estimate_partition_list_size, MAX_PARTS); + + estimate_partition2_list(4, 4, g_bc7_weights2, (const color_rgba(*)[4])pBlock, parts, estimate_partition_list_size, weights); + + first_common_pattern = 0; + last_common_pattern = estimate_partition_list_size; + use_part_list = true; + +#ifdef _DEBUG + assert(parts[0] == estimate_partition2(4, 4, g_bc7_weights2, (const color_rgba(*)[4])pBlock, weights)); +#endif + } + + for (uint32_t common_pattern_iter = first_common_pattern; common_pattern_iter < last_common_pattern; common_pattern_iter++) + { + const uint32_t common_pattern = use_part_list ? parts[common_pattern_iter] : common_pattern_iter; + + const uint32_t bc7_pattern = g_astc_bc7_common_partitions2[common_pattern].m_bc7; + + color_rgba part_pixels[2][16]; + uint32_t part_pixel_index[4][4]; + uint32_t num_part_pixels[2] = { 0, 0 }; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t part = g_bc7_partition2[16 * bc7_pattern + x + y * 4]; + part_pixel_index[y][x] = num_part_pixels[part]; + part_pixels[part][num_part_pixels[part]++] = pBlock[y * 4 + x]; + } + } + + color_cell_compressor_params ccell_params[2]; + color_cell_compressor_results ccell_results[2]; + uint8_t ccell_result_selectors[2][16]; + uint8_t ccell_result_selectors_temp[2][16]; + + uint64_t total_err = 0; + for (uint32_t subset = 0; subset < 2; subset++) + { + memset(&ccell_params[subset], 0, sizeof(ccell_params[subset])); + + ccell_params[subset].m_num_pixels = num_part_pixels[subset]; + ccell_params[subset].m_pPixels = (color_quad_u8*)&part_pixels[subset][0]; + ccell_params[subset].m_num_selector_weights = 4; + ccell_params[subset].m_pSelector_weights = g_bc7_weights2; + ccell_params[subset].m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x; + ccell_params[subset].m_astc_endpoint_range = endpoint_range; + ccell_params[subset].m_weights[0] = weights[0]; + ccell_params[subset].m_weights[1] = weights[1]; + ccell_params[subset].m_weights[2] = weights[2]; + ccell_params[subset].m_weights[3] = weights[3]; + ccell_params[subset].m_has_alpha = true; + + memset(&ccell_results[subset], 0, sizeof(ccell_results[subset])); + ccell_results[subset].m_pSelectors = &ccell_result_selectors[subset][0]; + ccell_results[subset].m_pSelectors_temp = &ccell_result_selectors_temp[subset][0]; + + uint64_t subset_err = color_cell_compression(255, &ccell_params[subset], &ccell_results[subset], &comp_params); + + if (mode == 16) + { + color_rgba colors[4]; + for (uint32_t c = 0; c < 4; c++) + { + colors[0].m_comps[c] = g_astc_unquant[endpoint_range][ccell_results[subset].m_astc_low_endpoint.m_c[(c < 3) ? 0 : 3]].m_unquant; + colors[3].m_comps[c] = g_astc_unquant[endpoint_range][ccell_results[subset].m_astc_high_endpoint.m_c[(c < 3) ? 0 : 3]].m_unquant; + } + + for (uint32_t i = 1; i < 4 - 1; i++) + for (uint32_t c = 0; c < 4; c++) + colors[i].m_comps[c] = (uint8_t)astc_interpolate(colors[0].m_comps[c], colors[3].m_comps[c], g_bc7_weights2[i], false); + + for (uint32_t p = 0; p < ccell_params[subset].m_num_pixels; p++) + { + color_rgba orig_pix(part_pixels[subset][p]); + orig_pix.g = orig_pix.r; + orig_pix.b = orig_pix.r; + total_err += color_distance_la(orig_pix, colors[ccell_result_selectors[subset][p]]); + } + } + else + { + total_err += subset_err; + } + } // subset + + // ASTC + astc_block_desc astc_results; + memset(&astc_results, 0, sizeof(astc_results)); + + astc_results.m_dual_plane = false; + astc_results.m_weight_range = 2; + + astc_results.m_ccs = 0; + astc_results.m_subsets = 2; + astc_results.m_partition_seed = g_astc_bc7_common_partitions2[common_pattern].m_astc; + astc_results.m_cem = (mode == 16) ? 4 : 12; + + uint32_t part[2] = { 0, 1 }; + if (g_astc_bc7_common_partitions2[common_pattern].m_invert) + std::swap(part[0], part[1]); + + bool invert[2] = { false, false }; + + for (uint32_t p = 0; p < 2; p++) + { + if (mode == 16) + { + astc_results.m_endpoints[p * 4 + 0] = ccell_results[part[p]].m_astc_low_endpoint.m_c[0]; + astc_results.m_endpoints[p * 4 + 1] = ccell_results[part[p]].m_astc_high_endpoint.m_c[0]; + + astc_results.m_endpoints[p * 4 + 2] = ccell_results[part[p]].m_astc_low_endpoint.m_c[3]; + astc_results.m_endpoints[p * 4 + 3] = ccell_results[part[p]].m_astc_high_endpoint.m_c[3]; + } + else + { + for (uint32_t c = 0; c < 4; c++) + { + astc_results.m_endpoints[p * 8 + c * 2] = ccell_results[part[p]].m_astc_low_endpoint.m_c[c]; + astc_results.m_endpoints[p * 8 + c * 2 + 1] = ccell_results[part[p]].m_astc_high_endpoint.m_c[c]; + } + + int s0 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[p * 8 + 0]].m_unquant + + g_astc_unquant[endpoint_range][astc_results.m_endpoints[p * 8 + 2]].m_unquant + + g_astc_unquant[endpoint_range][astc_results.m_endpoints[p * 8 + 4]].m_unquant; + + int s1 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[p * 8 + 1]].m_unquant + + g_astc_unquant[endpoint_range][astc_results.m_endpoints[p * 8 + 3]].m_unquant + + g_astc_unquant[endpoint_range][astc_results.m_endpoints[p * 8 + 5]].m_unquant; + + if (s1 < s0) + { + std::swap(astc_results.m_endpoints[p * 8 + 0], astc_results.m_endpoints[p * 8 + 1]); + std::swap(astc_results.m_endpoints[p * 8 + 2], astc_results.m_endpoints[p * 8 + 3]); + std::swap(astc_results.m_endpoints[p * 8 + 4], astc_results.m_endpoints[p * 8 + 5]); + std::swap(astc_results.m_endpoints[p * 8 + 6], astc_results.m_endpoints[p * 8 + 7]); + invert[p] = true; + } + } + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t bc7_part = g_bc7_partition2[16 * bc7_pattern + x + y * 4]; + + astc_results.m_weights[x + y * 4] = ccell_result_selectors[bc7_part][part_pixel_index[y][x]]; + + uint32_t astc_part = bc7_part; + if (g_astc_bc7_common_partitions2[common_pattern].m_invert) + astc_part = 1 - astc_part; + + if (invert[astc_part]) + astc_results.m_weights[x + y * 4] = 3 - astc_results.m_weights[x + y * 4]; + } + } + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = mode; + pResults[total_results].m_common_pattern = common_pattern; + pResults[total_results].m_astc = astc_results; + pResults[total_results].m_astc_err = total_err; + total_results++; + } + + } // common_pattern + } + + // MODE 10 + // DualPlane: 0, WeightRange: 8 (16), Subsets: 1, CEM: 12 (RGBA Direct ), EndpointRange: 13 (48) MODE6 + static void astc_mode10(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params) + { + const uint32_t weight_range = 8; + const uint32_t endpoint_range = 13; + + color_cell_compressor_params ccell_params; + memset(&ccell_params, 0, sizeof(ccell_params)); + + ccell_params.m_num_pixels = 16; + ccell_params.m_pPixels = (color_quad_u8*)&block[0][0]; + ccell_params.m_num_selector_weights = 16; + ccell_params.m_pSelector_weights = g_astc_weights4; + ccell_params.m_pSelector_weightsx = (const bc7enc_vec4F*)g_astc_weights4x; + ccell_params.m_astc_endpoint_range = endpoint_range; + ccell_params.m_weights[0] = 1; + ccell_params.m_weights[1] = 1; + ccell_params.m_weights[2] = 1; + ccell_params.m_weights[3] = 1; + ccell_params.m_has_alpha = true; + + color_cell_compressor_results ccell_results; + uint8_t ccell_result_selectors[16]; + uint8_t ccell_result_selectors_temp[16]; + memset(&ccell_results, 0, sizeof(ccell_results)); + ccell_results.m_pSelectors = &ccell_result_selectors[0]; + ccell_results.m_pSelectors_temp = &ccell_result_selectors_temp[0]; + + uint64_t part_err = color_cell_compression(255, &ccell_params, &ccell_results, &comp_params); + + // ASTC + astc_block_desc astc_results; + memset(&astc_results, 0, sizeof(astc_results)); + + astc_results.m_dual_plane = false; + astc_results.m_weight_range = weight_range; + + astc_results.m_ccs = 0; + astc_results.m_subsets = 1; + astc_results.m_partition_seed = 0; + astc_results.m_cem = 12; + + astc_results.m_endpoints[0] = ccell_results.m_astc_low_endpoint.m_c[0]; + astc_results.m_endpoints[1] = ccell_results.m_astc_high_endpoint.m_c[0]; + astc_results.m_endpoints[2] = ccell_results.m_astc_low_endpoint.m_c[1]; + astc_results.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[1]; + astc_results.m_endpoints[4] = ccell_results.m_astc_low_endpoint.m_c[2]; + astc_results.m_endpoints[5] = ccell_results.m_astc_high_endpoint.m_c[2]; + astc_results.m_endpoints[6] = ccell_results.m_astc_low_endpoint.m_c[3]; + astc_results.m_endpoints[7] = ccell_results.m_astc_high_endpoint.m_c[3]; + + bool invert = false; + + int s0 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[4]].m_unquant; + int s1 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[5]].m_unquant; + if (s1 < s0) + { + std::swap(astc_results.m_endpoints[0], astc_results.m_endpoints[1]); + std::swap(astc_results.m_endpoints[2], astc_results.m_endpoints[3]); + std::swap(astc_results.m_endpoints[4], astc_results.m_endpoints[5]); + std::swap(astc_results.m_endpoints[6], astc_results.m_endpoints[7]); + invert = true; + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + astc_results.m_weights[x + y * 4] = ccell_result_selectors[x + y * 4]; + + if (invert) + astc_results.m_weights[x + y * 4] = 15 - astc_results.m_weights[x + y * 4]; + } + } + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = 10; + pResults[total_results].m_common_pattern = 0; + pResults[total_results].m_astc = astc_results; + pResults[total_results].m_astc_err = part_err; + total_results++; + } + } + + // 11. DualPlane: 1, WeightRange: 2 (4), Subsets: 1, CEM: 12 (RGBA Direct), EndpointRange: 13 (48) MODE5 + // 17. DualPlane: 1, WeightRange : 2 (4), Subsets : 1, CEM : 4 (LA Direct), EndpointRange : 20 (256) BC7 MODE5 + static void astc_mode11_or_17(uint32_t mode, const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params) + { + assert((mode == 11) || (mode == 17)); + + const uint32_t weight_range = 2; + const uint32_t endpoint_range = (mode == 17) ? 20 : 13; + + bc7enc_compress_block_params local_comp_params(comp_params); + local_comp_params.m_perceptual = false; + local_comp_params.m_weights[0] = 1; + local_comp_params.m_weights[1] = 1; + local_comp_params.m_weights[2] = 1; + local_comp_params.m_weights[3] = 1; + + const uint32_t last_rot_comp = (mode == 17) ? 1 : 4; + + for (uint32_t rot_comp = 0; rot_comp < last_rot_comp; rot_comp++) + { + color_quad_u8 block_rgb[16]; + color_quad_u8 block_a[16]; + for (uint32_t i = 0; i < 16; i++) + { + block_rgb[i] = ((color_quad_u8*)&block[0][0])[i]; + block_a[i] = block_rgb[i]; + + if (mode == 17) + { + assert(block_rgb[i].m_c[0] == block_rgb[i].m_c[1]); + assert(block_rgb[i].m_c[0] == block_rgb[i].m_c[2]); + + block_a[i].m_c[0] = block_rgb[i].m_c[3]; + block_a[i].m_c[1] = block_rgb[i].m_c[3]; + block_a[i].m_c[2] = block_rgb[i].m_c[3]; + block_a[i].m_c[3] = 255; + + block_rgb[i].m_c[1] = block_rgb[i].m_c[0]; + block_rgb[i].m_c[2] = block_rgb[i].m_c[0]; + block_rgb[i].m_c[3] = 255; + } + else + { + uint8_t c = block_a[i].m_c[rot_comp]; + block_a[i].m_c[0] = c; + block_a[i].m_c[1] = c; + block_a[i].m_c[2] = c; + block_a[i].m_c[3] = 255; + + block_rgb[i].m_c[rot_comp] = block_rgb[i].m_c[3]; + block_rgb[i].m_c[3] = 255; + } + } + + uint8_t ccell_result_selectors_temp[16]; + + color_cell_compressor_params ccell_params_rgb; + memset(&ccell_params_rgb, 0, sizeof(ccell_params_rgb)); + + ccell_params_rgb.m_num_pixels = 16; + ccell_params_rgb.m_pPixels = block_rgb; + ccell_params_rgb.m_num_selector_weights = 4; + ccell_params_rgb.m_pSelector_weights = g_bc7_weights2; + ccell_params_rgb.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x; + ccell_params_rgb.m_astc_endpoint_range = endpoint_range; + ccell_params_rgb.m_weights[0] = 1; + ccell_params_rgb.m_weights[1] = 1; + ccell_params_rgb.m_weights[2] = 1; + ccell_params_rgb.m_weights[3] = 1; + + color_cell_compressor_results ccell_results_rgb; + uint8_t ccell_result_selectors_rgb[16]; + memset(&ccell_results_rgb, 0, sizeof(ccell_results_rgb)); + ccell_results_rgb.m_pSelectors = &ccell_result_selectors_rgb[0]; + ccell_results_rgb.m_pSelectors_temp = &ccell_result_selectors_temp[0]; + + uint64_t part_err_rgb = color_cell_compression(255, &ccell_params_rgb, &ccell_results_rgb, &local_comp_params); + + color_cell_compressor_params ccell_params_a; + memset(&ccell_params_a, 0, sizeof(ccell_params_a)); + + ccell_params_a.m_num_pixels = 16; + ccell_params_a.m_pPixels = block_a; + ccell_params_a.m_num_selector_weights = 4; + ccell_params_a.m_pSelector_weights = g_bc7_weights2; + ccell_params_a.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x; + ccell_params_a.m_astc_endpoint_range = endpoint_range; + ccell_params_a.m_weights[0] = 1; + ccell_params_a.m_weights[1] = 1; + ccell_params_a.m_weights[2] = 1; + ccell_params_a.m_weights[3] = 1; + + color_cell_compressor_results ccell_results_a; + uint8_t ccell_result_selectors_a[16]; + memset(&ccell_results_a, 0, sizeof(ccell_results_a)); + ccell_results_a.m_pSelectors = &ccell_result_selectors_a[0]; + ccell_results_a.m_pSelectors_temp = &ccell_result_selectors_temp[0]; + + uint64_t part_err_a = color_cell_compression(255, &ccell_params_a, &ccell_results_a, &local_comp_params) / 3; + + uint64_t total_err = (mode == 17) ? ((part_err_rgb / 3) + part_err_a) : (part_err_rgb + part_err_a); + + // ASTC + astc_block_desc blk; + memset(&blk, 0, sizeof(blk)); + + blk.m_dual_plane = true; + blk.m_weight_range = weight_range; + + blk.m_ccs = (mode == 17) ? 3 : rot_comp; + blk.m_subsets = 1; + blk.m_partition_seed = 0; + blk.m_cem = (mode == 17) ? 4 : 12; + + bool invert = false; + + if (mode == 17) + { + assert(ccell_results_rgb.m_astc_low_endpoint.m_c[0] == ccell_results_rgb.m_astc_low_endpoint.m_c[1]); + assert(ccell_results_rgb.m_astc_low_endpoint.m_c[0] == ccell_results_rgb.m_astc_low_endpoint.m_c[2]); + + assert(ccell_results_rgb.m_astc_high_endpoint.m_c[0] == ccell_results_rgb.m_astc_high_endpoint.m_c[1]); + assert(ccell_results_rgb.m_astc_high_endpoint.m_c[0] == ccell_results_rgb.m_astc_high_endpoint.m_c[2]); + + blk.m_endpoints[0] = ccell_results_rgb.m_astc_low_endpoint.m_c[0]; + blk.m_endpoints[1] = ccell_results_rgb.m_astc_high_endpoint.m_c[0]; + + blk.m_endpoints[2] = ccell_results_a.m_astc_low_endpoint.m_c[0]; + blk.m_endpoints[3] = ccell_results_a.m_astc_high_endpoint.m_c[0]; + } + else + { + blk.m_endpoints[0] = (rot_comp == 0 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[0]; + blk.m_endpoints[1] = (rot_comp == 0 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[0]; + blk.m_endpoints[2] = (rot_comp == 1 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[1]; + blk.m_endpoints[3] = (rot_comp == 1 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[1]; + blk.m_endpoints[4] = (rot_comp == 2 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[2]; + blk.m_endpoints[5] = (rot_comp == 2 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[2]; + if (rot_comp == 3) + { + blk.m_endpoints[6] = ccell_results_a.m_astc_low_endpoint.m_c[0]; + blk.m_endpoints[7] = ccell_results_a.m_astc_high_endpoint.m_c[0]; + } + else + { + blk.m_endpoints[6] = ccell_results_rgb.m_astc_low_endpoint.m_c[rot_comp]; + blk.m_endpoints[7] = ccell_results_rgb.m_astc_high_endpoint.m_c[rot_comp]; + } + + int s0 = g_astc_unquant[endpoint_range][blk.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[4]].m_unquant; + int s1 = g_astc_unquant[endpoint_range][blk.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[5]].m_unquant; + if (s1 < s0) + { + std::swap(blk.m_endpoints[0], blk.m_endpoints[1]); + std::swap(blk.m_endpoints[2], blk.m_endpoints[3]); + std::swap(blk.m_endpoints[4], blk.m_endpoints[5]); + std::swap(blk.m_endpoints[6], blk.m_endpoints[7]); + invert = true; + } + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t rgb_index = ccell_result_selectors_rgb[x + y * 4]; + uint32_t a_index = ccell_result_selectors_a[x + y * 4]; + + if (invert) + { + rgb_index = 3 - rgb_index; + a_index = 3 - a_index; + } + + blk.m_weights[(x + y * 4) * 2 + 0] = (uint8_t)rgb_index; + blk.m_weights[(x + y * 4) * 2 + 1] = (uint8_t)a_index; + } + } + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = mode; + pResults[total_results].m_common_pattern = 0; + pResults[total_results].m_astc = blk; + pResults[total_results].m_astc_err = total_err; + total_results++; + } + } // rot_comp + } + + // MODE 12 + // DualPlane: 0, WeightRange: 5 (8), Subsets: 1, CEM: 12 (RGBA Direct ), EndpointRange: 19 (192) MODE6 + static void astc_mode12(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params) + { + const uint32_t weight_range = 5; + const uint32_t endpoint_range = 19; + + color_cell_compressor_params ccell_params; + memset(&ccell_params, 0, sizeof(ccell_params)); + + ccell_params.m_num_pixels = 16; + ccell_params.m_pPixels = (color_quad_u8*)&block[0][0]; + ccell_params.m_num_selector_weights = 8; + ccell_params.m_pSelector_weights = g_bc7_weights3; + ccell_params.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights3x; + ccell_params.m_astc_endpoint_range = endpoint_range; + ccell_params.m_weights[0] = 1; + ccell_params.m_weights[1] = 1; + ccell_params.m_weights[2] = 1; + ccell_params.m_weights[3] = 1; + ccell_params.m_has_alpha = true; + + color_cell_compressor_results ccell_results; + uint8_t ccell_result_selectors[16]; + uint8_t ccell_result_selectors_temp[16]; + memset(&ccell_results, 0, sizeof(ccell_results)); + ccell_results.m_pSelectors = &ccell_result_selectors[0]; + ccell_results.m_pSelectors_temp = &ccell_result_selectors_temp[0]; + + uint64_t part_err = color_cell_compression(255, &ccell_params, &ccell_results, &comp_params); + + // ASTC + astc_block_desc astc_results; + memset(&astc_results, 0, sizeof(astc_results)); + + astc_results.m_dual_plane = false; + astc_results.m_weight_range = weight_range; + + astc_results.m_ccs = 0; + astc_results.m_subsets = 1; + astc_results.m_partition_seed = 0; + astc_results.m_cem = 12; + + astc_results.m_endpoints[0] = ccell_results.m_astc_low_endpoint.m_c[0]; + astc_results.m_endpoints[1] = ccell_results.m_astc_high_endpoint.m_c[0]; + astc_results.m_endpoints[2] = ccell_results.m_astc_low_endpoint.m_c[1]; + astc_results.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[1]; + astc_results.m_endpoints[4] = ccell_results.m_astc_low_endpoint.m_c[2]; + astc_results.m_endpoints[5] = ccell_results.m_astc_high_endpoint.m_c[2]; + astc_results.m_endpoints[6] = ccell_results.m_astc_low_endpoint.m_c[3]; + astc_results.m_endpoints[7] = ccell_results.m_astc_high_endpoint.m_c[3]; + + bool invert = false; + + int s0 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[4]].m_unquant; + int s1 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[5]].m_unquant; + if (s1 < s0) + { + std::swap(astc_results.m_endpoints[0], astc_results.m_endpoints[1]); + std::swap(astc_results.m_endpoints[2], astc_results.m_endpoints[3]); + std::swap(astc_results.m_endpoints[4], astc_results.m_endpoints[5]); + std::swap(astc_results.m_endpoints[6], astc_results.m_endpoints[7]); + invert = true; + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + astc_results.m_weights[x + y * 4] = ccell_result_selectors[x + y * 4]; + + if (invert) + astc_results.m_weights[x + y * 4] = 7 - astc_results.m_weights[x + y * 4]; + } + } + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = 12; + pResults[total_results].m_common_pattern = 0; + pResults[total_results].m_astc = astc_results; + pResults[total_results].m_astc_err = part_err; + total_results++; + } + } + + // 13. DualPlane: 1, WeightRange: 0 (2), Subsets: 1, CEM: 12 (RGBA Direct ), EndpointRange: 20 (256) MODE5 + static void astc_mode13(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params) + { + bc7enc_compress_block_params local_comp_params(comp_params); + local_comp_params.m_perceptual = false; + local_comp_params.m_weights[0] = 1; + local_comp_params.m_weights[1] = 1; + local_comp_params.m_weights[2] = 1; + local_comp_params.m_weights[3] = 1; + + for (uint32_t rot_comp = 0; rot_comp < 4; rot_comp++) + { + const uint32_t weight_range = 0; + const uint32_t endpoint_range = 20; + + color_quad_u8 block_rgb[16]; + color_quad_u8 block_a[16]; + for (uint32_t i = 0; i < 16; i++) + { + block_rgb[i] = ((color_quad_u8*)&block[0][0])[i]; + block_a[i] = block_rgb[i]; + + uint8_t c = block_a[i].m_c[rot_comp]; + block_a[i].m_c[0] = c; + block_a[i].m_c[1] = c; + block_a[i].m_c[2] = c; + block_a[i].m_c[3] = 255; + + block_rgb[i].m_c[rot_comp] = block_rgb[i].m_c[3]; + block_rgb[i].m_c[3] = 255; + } + + uint8_t ccell_result_selectors_temp[16]; + + color_cell_compressor_params ccell_params_rgb; + memset(&ccell_params_rgb, 0, sizeof(ccell_params_rgb)); + + ccell_params_rgb.m_num_pixels = 16; + ccell_params_rgb.m_pPixels = block_rgb; + ccell_params_rgb.m_num_selector_weights = 2; + ccell_params_rgb.m_pSelector_weights = g_bc7_weights1; + ccell_params_rgb.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights1x; + ccell_params_rgb.m_astc_endpoint_range = endpoint_range; + ccell_params_rgb.m_weights[0] = 1; + ccell_params_rgb.m_weights[1] = 1; + ccell_params_rgb.m_weights[2] = 1; + ccell_params_rgb.m_weights[3] = 1; + + color_cell_compressor_results ccell_results_rgb; + uint8_t ccell_result_selectors_rgb[16]; + memset(&ccell_results_rgb, 0, sizeof(ccell_results_rgb)); + ccell_results_rgb.m_pSelectors = &ccell_result_selectors_rgb[0]; + ccell_results_rgb.m_pSelectors_temp = &ccell_result_selectors_temp[0]; + + uint64_t part_err_rgb = color_cell_compression(255, &ccell_params_rgb, &ccell_results_rgb, &local_comp_params); + + color_cell_compressor_params ccell_params_a; + memset(&ccell_params_a, 0, sizeof(ccell_params_a)); + + ccell_params_a.m_num_pixels = 16; + ccell_params_a.m_pPixels = block_a; + ccell_params_a.m_num_selector_weights = 2; + ccell_params_a.m_pSelector_weights = g_bc7_weights1; + ccell_params_a.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights1x; + ccell_params_a.m_astc_endpoint_range = endpoint_range; + ccell_params_a.m_weights[0] = 1; + ccell_params_a.m_weights[1] = 1; + ccell_params_a.m_weights[2] = 1; + ccell_params_a.m_weights[3] = 1; + + color_cell_compressor_results ccell_results_a; + uint8_t ccell_result_selectors_a[16]; + memset(&ccell_results_a, 0, sizeof(ccell_results_a)); + ccell_results_a.m_pSelectors = &ccell_result_selectors_a[0]; + ccell_results_a.m_pSelectors_temp = &ccell_result_selectors_temp[0]; + + uint64_t part_err_a = color_cell_compression(255, &ccell_params_a, &ccell_results_a, &local_comp_params) / 3; + + uint64_t total_err = part_err_rgb + part_err_a; + + // ASTC + astc_block_desc blk; + memset(&blk, 0, sizeof(blk)); + + blk.m_dual_plane = true; + blk.m_weight_range = weight_range; + + blk.m_ccs = rot_comp; + blk.m_subsets = 1; + blk.m_partition_seed = 0; + blk.m_cem = 12; + + blk.m_endpoints[0] = (rot_comp == 0 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[0]; + blk.m_endpoints[1] = (rot_comp == 0 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[0]; + blk.m_endpoints[2] = (rot_comp == 1 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[1]; + blk.m_endpoints[3] = (rot_comp == 1 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[1]; + blk.m_endpoints[4] = (rot_comp == 2 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[2]; + blk.m_endpoints[5] = (rot_comp == 2 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[2]; + if (rot_comp == 3) + { + blk.m_endpoints[6] = ccell_results_a.m_astc_low_endpoint.m_c[0]; + blk.m_endpoints[7] = ccell_results_a.m_astc_high_endpoint.m_c[0]; + } + else + { + blk.m_endpoints[6] = ccell_results_rgb.m_astc_low_endpoint.m_c[rot_comp]; + blk.m_endpoints[7] = ccell_results_rgb.m_astc_high_endpoint.m_c[rot_comp]; + } + + bool invert = false; + + int s0 = g_astc_unquant[endpoint_range][blk.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[4]].m_unquant; + int s1 = g_astc_unquant[endpoint_range][blk.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[5]].m_unquant; + if (s1 < s0) + { + std::swap(blk.m_endpoints[0], blk.m_endpoints[1]); + std::swap(blk.m_endpoints[2], blk.m_endpoints[3]); + std::swap(blk.m_endpoints[4], blk.m_endpoints[5]); + std::swap(blk.m_endpoints[6], blk.m_endpoints[7]); + invert = true; + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t rgb_index = ccell_result_selectors_rgb[x + y * 4]; + uint32_t a_index = ccell_result_selectors_a[x + y * 4]; + + if (invert) + { + rgb_index = 1 - rgb_index; + a_index = 1 - a_index; + } + + blk.m_weights[(x + y * 4) * 2 + 0] = (uint8_t)rgb_index; + blk.m_weights[(x + y * 4) * 2 + 1] = (uint8_t)a_index; + } + } + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = 13; + pResults[total_results].m_common_pattern = 0; + pResults[total_results].m_astc = blk; + pResults[total_results].m_astc_err = total_err; + total_results++; + } + } // rot_comp + } + + // MODE14 + // DualPlane: 0, WeightRange: 2 (4), Subsets: 1, CEM: 12 (RGBA Direct ), EndpointRange: 20 (256) MODE6 + static void astc_mode14(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params) + { + const uint32_t weight_range = 2; + const uint32_t endpoint_range = 20; + + color_cell_compressor_params ccell_params; + memset(&ccell_params, 0, sizeof(ccell_params)); + + ccell_params.m_num_pixels = 16; + ccell_params.m_pPixels = (color_quad_u8*)&block[0][0]; + ccell_params.m_num_selector_weights = 4; + ccell_params.m_pSelector_weights = g_bc7_weights2; + ccell_params.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x; + ccell_params.m_astc_endpoint_range = endpoint_range; + ccell_params.m_weights[0] = 1; + ccell_params.m_weights[1] = 1; + ccell_params.m_weights[2] = 1; + ccell_params.m_weights[3] = 1; + ccell_params.m_has_alpha = true; + + color_cell_compressor_results ccell_results; + uint8_t ccell_result_selectors[16]; + uint8_t ccell_result_selectors_temp[16]; + memset(&ccell_results, 0, sizeof(ccell_results)); + ccell_results.m_pSelectors = &ccell_result_selectors[0]; + ccell_results.m_pSelectors_temp = &ccell_result_selectors_temp[0]; + + uint64_t part_err = color_cell_compression(255, &ccell_params, &ccell_results, &comp_params); + + // ASTC + astc_block_desc astc_results; + memset(&astc_results, 0, sizeof(astc_results)); + + astc_results.m_dual_plane = false; + astc_results.m_weight_range = weight_range; + + astc_results.m_ccs = 0; + astc_results.m_subsets = 1; + astc_results.m_partition_seed = 0; + astc_results.m_cem = 12; + + astc_results.m_endpoints[0] = ccell_results.m_astc_low_endpoint.m_c[0]; + astc_results.m_endpoints[1] = ccell_results.m_astc_high_endpoint.m_c[0]; + astc_results.m_endpoints[2] = ccell_results.m_astc_low_endpoint.m_c[1]; + astc_results.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[1]; + astc_results.m_endpoints[4] = ccell_results.m_astc_low_endpoint.m_c[2]; + astc_results.m_endpoints[5] = ccell_results.m_astc_high_endpoint.m_c[2]; + astc_results.m_endpoints[6] = ccell_results.m_astc_low_endpoint.m_c[3]; + astc_results.m_endpoints[7] = ccell_results.m_astc_high_endpoint.m_c[3]; + + bool invert = false; + + int s0 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[4]].m_unquant; + int s1 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[5]].m_unquant; + if (s1 < s0) + { + std::swap(astc_results.m_endpoints[0], astc_results.m_endpoints[1]); + std::swap(astc_results.m_endpoints[2], astc_results.m_endpoints[3]); + std::swap(astc_results.m_endpoints[4], astc_results.m_endpoints[5]); + std::swap(astc_results.m_endpoints[6], astc_results.m_endpoints[7]); + invert = true; + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + astc_results.m_weights[x + y * 4] = ccell_result_selectors[x + y * 4]; + + if (invert) + astc_results.m_weights[x + y * 4] = 3 - astc_results.m_weights[x + y * 4]; + } + } + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = 14; + pResults[total_results].m_common_pattern = 0; + pResults[total_results].m_astc = astc_results; + pResults[total_results].m_astc_err = part_err; + total_results++; + } + } + + // MODE 15 + // DualPlane: 0, WeightRange : 8 (16), Subsets : 1, CEM : 4 (LA Direct), EndpointRange : 20 (256) BC7 MODE6 + static void astc_mode15(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params) + { + const uint32_t weight_range = 8; + const uint32_t endpoint_range = 20; + + color_cell_compressor_params ccell_params; + memset(&ccell_params, 0, sizeof(ccell_params)); + + color_rgba temp_block[16]; + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t l = ((const color_rgba*)block)[i].r; + const uint32_t a = ((const color_rgba*)block)[i].a; + + // Use (l,0,0,a) not (l,l,l,a) so both components are treated equally. + temp_block[i].set_noclamp_rgba(l, 0, 0, a); + } + + ccell_params.m_num_pixels = 16; + //ccell_params.m_pPixels = (color_quad_u8*)&block[0][0]; + ccell_params.m_pPixels = (color_quad_u8*)temp_block; + ccell_params.m_num_selector_weights = 16; + ccell_params.m_pSelector_weights = g_astc_weights4; + ccell_params.m_pSelector_weightsx = (const bc7enc_vec4F*)g_astc_weights4x; + ccell_params.m_astc_endpoint_range = endpoint_range; + ccell_params.m_weights[0] = 1; + ccell_params.m_weights[1] = 1; + ccell_params.m_weights[2] = 1; + ccell_params.m_weights[3] = 1; + ccell_params.m_has_alpha = true; + + color_cell_compressor_results ccell_results; + uint8_t ccell_result_selectors[16]; + uint8_t ccell_result_selectors_temp[16]; + memset(&ccell_results, 0, sizeof(ccell_results)); + ccell_results.m_pSelectors = &ccell_result_selectors[0]; + ccell_results.m_pSelectors_temp = &ccell_result_selectors_temp[0]; + + color_cell_compression(255, &ccell_params, &ccell_results, &comp_params); + + // ASTC + astc_block_desc astc_results; + memset(&astc_results, 0, sizeof(astc_results)); + + astc_results.m_dual_plane = false; + astc_results.m_weight_range = weight_range; + + astc_results.m_ccs = 0; + astc_results.m_subsets = 1; + astc_results.m_partition_seed = 0; + astc_results.m_cem = 4; + + astc_results.m_endpoints[0] = ccell_results.m_astc_low_endpoint.m_c[0]; + astc_results.m_endpoints[1] = ccell_results.m_astc_high_endpoint.m_c[0]; + + astc_results.m_endpoints[2] = ccell_results.m_astc_low_endpoint.m_c[3]; + astc_results.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[3]; + + for (uint32_t y = 0; y < 4; y++) + for (uint32_t x = 0; x < 4; x++) + astc_results.m_weights[x + y * 4] = ccell_result_selectors[x + y * 4]; + + color_rgba colors[16]; + for (uint32_t c = 0; c < 4; c++) + { + colors[0].m_comps[c] = g_astc_unquant[endpoint_range][ccell_results.m_astc_low_endpoint.m_c[(c < 3) ? 0 : 3]].m_unquant; + colors[15].m_comps[c] = g_astc_unquant[endpoint_range][ccell_results.m_astc_high_endpoint.m_c[(c < 3) ? 0 : 3]].m_unquant; + } + + for (uint32_t i = 1; i < 16 - 1; i++) + for (uint32_t c = 0; c < 4; c++) + colors[i].m_comps[c] = (uint8_t)astc_interpolate(colors[0].m_comps[c], colors[15].m_comps[c], g_astc_weights4[i], false); + + uint64_t total_err = 0; + for (uint32_t p = 0; p < 16; p++) + total_err += color_distance_la(((const color_rgba*)block)[p], colors[ccell_result_selectors[p]]); + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = 15; + pResults[total_results].m_common_pattern = 0; + pResults[total_results].m_astc = astc_results; + pResults[total_results].m_astc_err = total_err; + total_results++; + } + } + + static void compute_block_error(const color_rgba block[4][4], const color_rgba decoded_block[4][4], uint64_t &total_rgb_err, uint64_t &total_rgba_err, uint64_t &total_la_err) + { + uint64_t total_err_r = 0, total_err_g = 0, total_err_b = 0, total_err_a = 0; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const int dr = (int)block[y][x].m_comps[0] - (int)decoded_block[y][x].m_comps[0]; + const int dg = (int)block[y][x].m_comps[1] - (int)decoded_block[y][x].m_comps[1]; + const int db = (int)block[y][x].m_comps[2] - (int)decoded_block[y][x].m_comps[2]; + const int da = (int)block[y][x].m_comps[3] - (int)decoded_block[y][x].m_comps[3]; + + total_err_r += dr * dr; + total_err_g += dg * dg; + total_err_b += db * db; + total_err_a += da * da; + } + } + + total_la_err = total_err_r + total_err_a; + total_rgb_err = total_err_r + total_err_g + total_err_b; + total_rgba_err = total_rgb_err + total_err_a; + } + + static void compute_bc1_hints(bool &bc1_hint0, bool &bc1_hint1, const uastc_encode_results &best_results, const color_rgba block[4][4], const color_rgba decoded_uastc_block[4][4]) + { + const uint32_t best_mode = best_results.m_uastc_mode; + const bool perceptual = false; + + bc1_hint0 = false; + bc1_hint1 = false; + + if (best_mode == UASTC_MODE_INDEX_SOLID_COLOR) + return; + + if (!g_uastc_mode_has_bc1_hint0[best_mode] && !g_uastc_mode_has_bc1_hint1[best_mode]) + return; + + color_rgba tblock_bc1[4][4]; + dxt1_block tbc1_block[8]; + basist::encode_bc1(tbc1_block, (const uint8_t*)&decoded_uastc_block[0][0], 0); + unpack_block(texture_format::cBC1, tbc1_block, &tblock_bc1[0][0]); + + color_rgba tblock_hint0_bc1[4][4]; + color_rgba tblock_hint1_bc1[4][4]; + + etc_block etc1_blk; + memset(&etc1_blk, 0, sizeof(etc1_blk)); + + eac_a8_block etc2_blk; + memset(&etc2_blk, 0, sizeof(etc2_blk)); + etc2_blk.m_multiplier = 1; + + // Pack to UASTC, then unpack, because the endpoints may be swapped. + + uastc_block temp_ublock; + pack_uastc(temp_ublock, best_results, etc1_blk, 0, etc2_blk, false, false); + + unpacked_uastc_block temp_ublock_unpacked; + unpack_uastc(temp_ublock, temp_ublock_unpacked, false); + + unpacked_uastc_block ublock; + memset(&ublock, 0, sizeof(ublock)); + ublock.m_mode = best_results.m_uastc_mode; + ublock.m_common_pattern = best_results.m_common_pattern; + ublock.m_astc = temp_ublock_unpacked.m_astc; + + dxt1_block b; + + // HINT1 + if (!g_uastc_mode_has_bc1_hint1[best_mode]) + { + memset(tblock_hint1_bc1, 0, sizeof(tblock_hint1_bc1)); + } + else + { + transcode_uastc_to_bc1_hint1(ublock, (color32 (*)[4]) decoded_uastc_block, &b, false); + + unpack_block(texture_format::cBC1, &b, &tblock_hint1_bc1[0][0]); + } + + // HINT0 + if (!g_uastc_mode_has_bc1_hint0[best_mode]) + { + memset(tblock_hint0_bc1, 0, sizeof(tblock_hint0_bc1)); + } + else + { + transcode_uastc_to_bc1_hint0(ublock, &b); + + unpack_block(texture_format::cBC1, &b, &tblock_hint0_bc1[0][0]); + } + + // Compute block errors + uint64_t total_t_err = 0, total_hint0_err = 0, total_hint1_err = 0; + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + total_t_err += color_distance(perceptual, block[y][x], tblock_bc1[y][x], false); + total_hint0_err += color_distance(perceptual, block[y][x], tblock_hint0_bc1[y][x], false); + total_hint1_err += color_distance(perceptual, block[y][x], tblock_hint1_bc1[y][x], false); + } + } + + const float t_err = sqrtf((float)total_t_err); + const float t_err_hint0 = sqrtf((float)total_hint0_err); + const float t_err_hint1 = sqrtf((float)total_hint1_err); + + const float err_thresh0 = 1.075f; + const float err_thresh1 = 1.075f; + + if ((g_uastc_mode_has_bc1_hint0[best_mode]) && (t_err_hint0 <= t_err * err_thresh0)) + bc1_hint0 = true; + + if ((g_uastc_mode_has_bc1_hint1[best_mode]) && (t_err_hint1 <= t_err * err_thresh1)) + bc1_hint1 = true; + } + + struct ycbcr + { + int32_t m_y; + int32_t m_cb; + int32_t m_cr; + }; + + static inline void rgb_to_y_cb_cr(const color_rgba& c, ycbcr& dst) + { + const int y = c.r * 54 + c.g * 183 + c.b * 19; + dst.m_y = y; + dst.m_cb = (c.b << 8) - y; + dst.m_cr = (c.r << 8) - y; + } + + static inline uint64_t color_diff(const ycbcr& a, const ycbcr& b) + { + const int y_delta = a.m_y - b.m_y; + const int cb_delta = a.m_cb - b.m_cb; + const int cr_delta = a.m_cr - b.m_cr; + return ((int64_t)y_delta * y_delta * 4) + ((int64_t)cr_delta * cr_delta) + ((int64_t)cb_delta * cb_delta); + } + + static inline int gray_distance2(const color_rgba& c, int r, int g, int b) + { + int gray_dist = (((int)c[0] - r) + ((int)c[1] - g) + ((int)c[2] - b) + 1) / 3; + + int gray_point_r = clamp255(r + gray_dist); + int gray_point_g = clamp255(g + gray_dist); + int gray_point_b = clamp255(b + gray_dist); + + int dist_to_gray_point_r = c[0] - gray_point_r; + int dist_to_gray_point_g = c[1] - gray_point_g; + int dist_to_gray_point_b = c[2] - gray_point_b; + + return (dist_to_gray_point_r * dist_to_gray_point_r) + (dist_to_gray_point_g * dist_to_gray_point_g) + (dist_to_gray_point_b * dist_to_gray_point_b); + } + + static bool pack_etc1_estimate_flipped(const color_rgba* pSrc_pixels) + { + int sums[3][2][2]; + +#define GET_XY(x, y, c) pSrc_pixels[(x) + ((y) * 4)][c] + + for (uint32_t c = 0; c < 3; c++) + { + sums[c][0][0] = GET_XY(0, 0, c) + GET_XY(0, 1, c) + GET_XY(1, 0, c) + GET_XY(1, 1, c); + sums[c][1][0] = GET_XY(2, 0, c) + GET_XY(2, 1, c) + GET_XY(3, 0, c) + GET_XY(3, 1, c); + sums[c][0][1] = GET_XY(0, 2, c) + GET_XY(0, 3, c) + GET_XY(1, 2, c) + GET_XY(1, 3, c); + sums[c][1][1] = GET_XY(2, 2, c) + GET_XY(2, 3, c) + GET_XY(3, 2, c) + GET_XY(3, 3, c); + } + + int upper_avg[3], lower_avg[3], left_avg[3], right_avg[3]; + for (uint32_t c = 0; c < 3; c++) + { + upper_avg[c] = (sums[c][0][0] + sums[c][1][0] + 4) / 8; + lower_avg[c] = (sums[c][0][1] + sums[c][1][1] + 4) / 8; + left_avg[c] = (sums[c][0][0] + sums[c][0][1] + 4) / 8; + right_avg[c] = (sums[c][1][0] + sums[c][1][1] + 4) / 8; + } + +#undef GET_XY +#define GET_XY(x, y, a) gray_distance2(pSrc_pixels[(x) + ((y) * 4)], a[0], a[1], a[2]) + + int upper_gray_dist = 0, lower_gray_dist = 0, left_gray_dist = 0, right_gray_dist = 0; + for (uint32_t i = 0; i < 4; i++) + { + for (uint32_t j = 0; j < 2; j++) + { + upper_gray_dist += GET_XY(i, j, upper_avg); + lower_gray_dist += GET_XY(i, 2 + j, lower_avg); + left_gray_dist += GET_XY(j, i, left_avg); + right_gray_dist += GET_XY(2 + j, i, right_avg); + } + } + +#undef GET_XY + + int upper_lower_sum = upper_gray_dist + lower_gray_dist; + int left_right_sum = left_gray_dist + right_gray_dist; + + return upper_lower_sum < left_right_sum; + } + + static void compute_etc1_hints(etc_block& best_etc1_blk, uint32_t& best_etc1_bias, const uastc_encode_results& best_results, const color_rgba block[4][4], const color_rgba decoded_uastc_block[4][4], int level, uint32_t flags) + { + best_etc1_bias = 0; + + if (best_results.m_uastc_mode == UASTC_MODE_INDEX_SOLID_COLOR) + { + pack_etc1_block_solid_color(best_etc1_blk, &best_results.m_solid_color.m_comps[0]); + return; + } + + const bool faster_etc1 = (flags & cPackUASTCETC1FasterHints) != 0; + const bool fastest_etc1 = (flags & cPackUASTCETC1FastestHints) != 0; + + const bool has_bias = g_uastc_mode_has_etc1_bias[best_results.m_uastc_mode]; + + // 0 should be at the top, but we need 13 first because it represents bias (0,0,0). + const uint8_t s_sorted_bias_modes[32] = { 13, 0, 22, 29, 27, 12, 26, 9, 30, 31, 8, 10, 25, 2, 23, 5, 15, 7, 3, 11, 6, 17, 28, 18, 1, 19, 20, 21, 24, 4, 14, 16 }; + + uint32_t last_bias = 1; + bool use_faster_bias_mode_table = false; + const bool flip_estimate = (level <= cPackUASTCLevelFaster) || (faster_etc1) || (fastest_etc1); + if (has_bias) + { + switch (level) + { + case cPackUASTCLevelFastest: + { + last_bias = fastest_etc1 ? 1 : (faster_etc1 ? 1 : 2); + use_faster_bias_mode_table = true; + break; + } + case cPackUASTCLevelFaster: + { + last_bias = fastest_etc1 ? 1 : (faster_etc1 ? 3 : 5); + use_faster_bias_mode_table = true; + break; + } + case cPackUASTCLevelDefault: + { + last_bias = fastest_etc1 ? 1 : (faster_etc1 ? 10 : 20); + use_faster_bias_mode_table = true; + break; + } + case cPackUASTCLevelSlower: + { + last_bias = fastest_etc1 ? 1 : (faster_etc1 ? 16 : 32); + use_faster_bias_mode_table = true; + break; + } + default: + { + last_bias = 32; + break; + } + } + } + + memset(&best_etc1_blk, 0, sizeof(best_etc1_blk)); + uint64_t best_err = UINT64_MAX; + + etc_block trial_block; + memset(&trial_block, 0, sizeof(trial_block)); + + ycbcr block_ycbcr[4][4], decoded_uastc_block_ycbcr[4][4]; + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + rgb_to_y_cb_cr(block[y][x], block_ycbcr[y][x]); + rgb_to_y_cb_cr(decoded_uastc_block[y][x], decoded_uastc_block_ycbcr[y][x]); + } + } + + uint32_t first_flip = 0, last_flip = 2; + uint32_t first_individ = 0, last_individ = 2; + + if (flags & cPackUASTCETC1DisableFlipAndIndividual) + { + last_flip = 1; + last_individ = 1; + } + else if (flip_estimate) + { + if (pack_etc1_estimate_flipped(&decoded_uastc_block[0][0])) + first_flip = 1; + last_flip = first_flip + 1; + } + + for (uint32_t flip = first_flip; flip < last_flip; flip++) + { + trial_block.set_flip_bit(flip != 0); + + for (uint32_t individ = first_individ; individ < last_individ; individ++) + { + const uint32_t mul = individ ? 15 : 31; + + trial_block.set_diff_bit(individ == 0); + + color_rgba unbiased_block_colors[2]; + + int min_r[2] = { 255, 255 }, min_g[2] = { 255, 255 }, min_b[2] = { 255, 255 }, max_r[2] = { 0, 0 }, max_g[2] = { 0, 0 }, max_b[2] = { 0, 0 }; + + for (uint32_t subset = 0; subset < 2; subset++) + { + uint32_t avg_color[3]; + memset(avg_color, 0, sizeof(avg_color)); + + for (uint32_t j = 0; j < 8; j++) + { + const etc_coord2 &c = g_etc1_pixel_coords[flip][subset][j]; + const color_rgba& p = decoded_uastc_block[c.m_y][c.m_x]; + + avg_color[0] += p.r; + avg_color[1] += p.g; + avg_color[2] += p.b; + + min_r[subset] = basisu::minimum(min_r[subset], p.r); + min_g[subset] = basisu::minimum(min_g[subset], p.g); + min_b[subset] = basisu::minimum(min_b[subset], p.b); + + max_r[subset] = basisu::maximum(max_r[subset], p.r); + max_g[subset] = basisu::maximum(max_g[subset], p.g); + max_b[subset] = basisu::maximum(max_b[subset], p.b); + } // j + + unbiased_block_colors[subset][0] = (uint8_t)((avg_color[0] * mul + 1020) / (8 * 255)); + unbiased_block_colors[subset][1] = (uint8_t)((avg_color[1] * mul + 1020) / (8 * 255)); + unbiased_block_colors[subset][2] = (uint8_t)((avg_color[2] * mul + 1020) / (8 * 255)); + unbiased_block_colors[subset][3] = 0; + + } // subset + + for (uint32_t bias_iter = 0; bias_iter < last_bias; bias_iter++) + { + const uint32_t bias = use_faster_bias_mode_table ? s_sorted_bias_modes[bias_iter] : bias_iter; + + color_rgba block_colors[2]; + for (uint32_t subset = 0; subset < 2; subset++) + block_colors[subset] = has_bias ? apply_etc1_bias((color32&)unbiased_block_colors[subset], bias, mul, subset) : unbiased_block_colors[subset]; + + if (individ) + trial_block.set_block_color4(block_colors[0], block_colors[1]); + else + trial_block.set_block_color5_clamp(block_colors[0], block_colors[1]); + + uint32_t range[2]; + for (uint32_t subset = 0; subset < 2; subset++) + { + const color_rgba base_c(trial_block.get_block_color(subset, true)); + + const int pos_r = iabs(max_r[subset] - base_c.r); + const int neg_r = iabs(base_c.r - min_r[subset]); + + const int pos_g = iabs(max_g[subset] - base_c.g); + const int neg_g = iabs(base_c.g - min_g[subset]); + + const int pos_b = iabs(max_b[subset] - base_c.b); + const int neg_b = iabs(base_c.b - min_b[subset]); + + range[subset] = maximum(maximum(pos_r, neg_r, pos_g, neg_g), pos_b, neg_b); + } + + uint32_t best_inten_table[2] = { 0, 0 }; + + for (uint32_t subset = 0; subset < 2; subset++) + { + uint64_t best_subset_err = UINT64_MAX; + + const uint32_t inten_table_limit = (level == cPackUASTCLevelVerySlow) ? 8 : ((range[subset] > 51) ? 8 : (range[subset] >= 7 ? 4 : 2)); + + for (uint32_t inten_table = 0; inten_table < inten_table_limit; inten_table++) + { + trial_block.set_inten_table(subset, inten_table); + + color_rgba color_table[4]; + trial_block.get_block_colors(color_table, subset); + + ycbcr color_table_ycbcr[4]; + for (uint32_t i = 0; i < 4; i++) + rgb_to_y_cb_cr(color_table[i], color_table_ycbcr[i]); + + uint64_t total_error = 0; + if (flip) + { + for (uint32_t y = 0; y < 2; y++) + { + { + const ycbcr& c = decoded_uastc_block_ycbcr[subset * 2 + y][0]; + total_error += minimum(color_diff(color_table_ycbcr[0], c), color_diff(color_table_ycbcr[1], c), color_diff(color_table_ycbcr[2], c), color_diff(color_table_ycbcr[3], c)); + } + { + const ycbcr& c = decoded_uastc_block_ycbcr[subset * 2 + y][1]; + total_error += minimum(color_diff(color_table_ycbcr[0], c), color_diff(color_table_ycbcr[1], c), color_diff(color_table_ycbcr[2], c), color_diff(color_table_ycbcr[3], c)); + } + { + const ycbcr& c = decoded_uastc_block_ycbcr[subset * 2 + y][2]; + total_error += minimum(color_diff(color_table_ycbcr[0], c), color_diff(color_table_ycbcr[1], c), color_diff(color_table_ycbcr[2], c), color_diff(color_table_ycbcr[3], c)); + } + { + const ycbcr& c = decoded_uastc_block_ycbcr[subset * 2 + y][3]; + total_error += minimum(color_diff(color_table_ycbcr[0], c), color_diff(color_table_ycbcr[1], c), color_diff(color_table_ycbcr[2], c), color_diff(color_table_ycbcr[3], c)); + } + if (total_error >= best_subset_err) + break; + } + } + else + { + for (uint32_t y = 0; y < 4; y++) + { + { + const ycbcr& c = decoded_uastc_block_ycbcr[y][subset * 2 + 0]; + total_error += minimum(color_diff(color_table_ycbcr[0], c), color_diff(color_table_ycbcr[1], c), color_diff(color_table_ycbcr[2], c), color_diff(color_table_ycbcr[3], c)); + } + { + const ycbcr& c = decoded_uastc_block_ycbcr[y][subset * 2 + 1]; + total_error += minimum(color_diff(color_table_ycbcr[0], c), color_diff(color_table_ycbcr[1], c), color_diff(color_table_ycbcr[2], c), color_diff(color_table_ycbcr[3], c)); + } + } + if (total_error >= best_subset_err) + break; + } + + if (total_error < best_subset_err) + { + best_subset_err = total_error; + best_inten_table[subset] = inten_table; + } + + } // inten_table + + } // subset + + trial_block.set_inten_table(0, best_inten_table[0]); + trial_block.set_inten_table(1, best_inten_table[1]); + + // Compute error against the ORIGINAL block. + uint64_t err = 0; + + for (uint32_t subset = 0; subset < 2; subset++) + { + color_rgba color_table[4]; + trial_block.get_block_colors(color_table, subset); + + ycbcr color_table_ycbcr[4]; + for (uint32_t i = 0; i < 4; i++) + rgb_to_y_cb_cr(color_table[i], color_table_ycbcr[i]); + + if (flip) + { + for (uint32_t y = 0; y < 2; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const ycbcr& c = decoded_uastc_block_ycbcr[subset * 2 + y][x]; + const uint64_t best_index_err = minimum(color_diff(color_table_ycbcr[0], c) << 2, (color_diff(color_table_ycbcr[1], c) << 2) + 1, (color_diff(color_table_ycbcr[2], c) << 2) + 2, (color_diff(color_table_ycbcr[3], c) << 2) + 3); + + const uint32_t best_index = (uint32_t)best_index_err & 3; + err += color_diff(block_ycbcr[subset * 2 + y][x], color_table_ycbcr[best_index]); + } + if (err >= best_err) + break; + } + } + else + { + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 2; x++) + { + const ycbcr& c = decoded_uastc_block_ycbcr[y][subset * 2 + x]; + const uint64_t best_index_err = minimum(color_diff(color_table_ycbcr[0], c) << 2, (color_diff(color_table_ycbcr[1], c) << 2) + 1, (color_diff(color_table_ycbcr[2], c) << 2) + 2, (color_diff(color_table_ycbcr[3], c) << 2) + 3); + + const uint32_t best_index = (uint32_t)best_index_err & 3; + err += color_diff(block_ycbcr[y][subset * 2 + x], color_table_ycbcr[best_index]); + } + if (err >= best_err) + break; + } + } + + } // subset + + if (err < best_err) + { + best_err = err; + + best_etc1_blk = trial_block; + best_etc1_bias = bias; + } + + } // bias_iter + + } // individ + + } // flip + } + + struct uastc_pack_eac_a8_results + { + uint32_t m_base; + uint32_t m_table; + uint32_t m_multiplier; + }; + + static uint64_t uastc_pack_eac_a8(uastc_pack_eac_a8_results& results, const uint8_t* pPixels, uint32_t num_pixels, uint32_t base_search_rad, uint32_t mul_search_rad, uint32_t table_mask) + { + assert(num_pixels <= 16); + + uint32_t min_alpha = 255, max_alpha = 0; + for (uint32_t i = 0; i < num_pixels; i++) + { + const uint32_t a = pPixels[i]; + if (a < min_alpha) min_alpha = a; + if (a > max_alpha) max_alpha = a; + } + + if (min_alpha == max_alpha) + { + results.m_base = min_alpha; + results.m_table = 13; + results.m_multiplier = 1; + return 0; + } + + const uint32_t alpha_range = max_alpha - min_alpha; + + uint64_t best_err = UINT64_MAX; + + for (uint32_t table = 0; table < 16; table++) + { + if ((table_mask & (1U << table)) == 0) + continue; + + const float range = (float)(g_etc2_eac_tables[table][ETC2_EAC_MAX_VALUE_SELECTOR] - g_etc2_eac_tables[table][ETC2_EAC_MIN_VALUE_SELECTOR]); + const int center = (int)roundf(lerp((float)min_alpha, (float)max_alpha, (float)(0 - g_etc2_eac_tables[table][ETC2_EAC_MIN_VALUE_SELECTOR]) / range)); + + const int base_min = clamp255(center - base_search_rad); + const int base_max = clamp255(center + base_search_rad); + + const int mul = (int)roundf(alpha_range / range); + const int mul_low = clamp(mul - mul_search_rad, 1, 15); + const int mul_high = clamp(mul + mul_search_rad, 1, 15); + + for (int base = base_min; base <= base_max; base++) + { + for (int multiplier = mul_low; multiplier <= mul_high; multiplier++) + { + uint64_t total_err = 0; + + for (uint32_t i = 0; i < num_pixels; i++) + { + const int a = pPixels[i]; + + uint32_t best_s_err = UINT32_MAX; + //uint32_t best_s = 0; + for (uint32_t s = 0; s < 8; s++) + { + const int v = clamp255((int)multiplier * g_etc2_eac_tables[table][s] + (int)base); + + uint32_t err = iabs(a - v); + if (err < best_s_err) + { + best_s_err = err; + //best_s = s; + } + } + + total_err += best_s_err * best_s_err; + if (total_err >= best_err) + break; + } + + if (total_err < best_err) + { + best_err = total_err; + results.m_base = base; + results.m_multiplier = multiplier; + results.m_table = table; + if (!best_err) + return best_err; + } + + } // table + + } // multiplier + + } // base + + return best_err; + } + + const int32_t DEFAULT_BC7_ERROR_WEIGHT = 50; + const float UASTC_ERROR_THRESH = 1.3f; + + // TODO: This is a quick hack to favor certain modes when we know we'll be followed up with an RDO postprocess. + static inline float get_uastc_mode_weight(uint32_t mode) + { + const float FAVORED_MODE_WEIGHT = .8f; + + switch (mode) + { + case 0: + case 10: + return FAVORED_MODE_WEIGHT; + default: + break; + } + + return 1.0f; + } + + void encode_uastc(const uint8_t* pRGBAPixels, uastc_block& output_block, uint32_t flags) + { +// printf("encode_uastc: \n"); +// for (int i = 0; i < 16; i++) +// printf("[%u %u %u %u] ", pRGBAPixels[i * 4 + 0], pRGBAPixels[i * 4 + 1], pRGBAPixels[i * 4 + 2], pRGBAPixels[i * 4 + 3]); +// printf("\n"); + + const color_rgba(*block)[4] = reinterpret_cast(pRGBAPixels); + + bool solid_color = true, has_alpha = false, is_la = true; + + const color_rgba first_color(block[0][0]); + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + if (block[y][x].a < 255) + has_alpha = true; + + if (block[y][x] != first_color) + solid_color = false; + + if ((block[y][x].r != block[y][x].g) || (block[y][x].r != block[y][x].b)) + is_la = false; + } + } + + if (solid_color) + { + // Solid color blocks are so common that we handle them specially and as quickly as we can. + uastc_encode_results solid_results; + solid_results.m_uastc_mode = UASTC_MODE_INDEX_SOLID_COLOR; + solid_results.m_astc_err = 0; + solid_results.m_common_pattern = 0; + solid_results.m_solid_color = first_color; + memset(&solid_results.m_astc, 0, sizeof(solid_results.m_astc)); + + etc_block etc1_blk; + uint32_t etc1_bias = 0; + + pack_etc1_block_solid_color(etc1_blk, &first_color.m_comps[0]); + + eac_a8_block eac_a8_blk; + eac_a8_blk.m_table = 0; + eac_a8_blk.m_multiplier = 1; + + pack_uastc(output_block, solid_results, etc1_blk, etc1_bias, eac_a8_blk, false, false); + +// printf(" Solid\n"); + + return; + } + + int level = flags & 7; + const bool favor_uastc_error = (flags & cPackUASTCFavorUASTCError) != 0; + const bool favor_bc7_error = !favor_uastc_error && ((flags & cPackUASTCFavorBC7Error) != 0); + //const bool etc1_perceptual = true; + + // TODO: This uses 64KB of stack space! + uastc_encode_results results[MAX_ENCODE_RESULTS]; + + level = clampi(level, cPackUASTCLevelFastest, cPackUASTCLevelVerySlow); + + // Set all options to slowest, then configure from there depending on the selected level. + uint32_t mode_mask = UINT32_MAX; + uint32_t uber_level = 6; + bool estimate_partition = false; + bool always_try_alpha_modes = true; + uint32_t eac_a8_mul_search_rad = 3; + uint32_t eac_a8_table_mask = UINT32_MAX; + uint32_t least_squares_passes = 2; + bool bc1_hints = true; + bool only_use_la_on_transparent_blocks = false; + + switch (level) + { + case cPackUASTCLevelFastest: + { + mode_mask = (1 << 0) | (1 << 8) | + (1 << 11) | (1 << 12) | + (1 << 15); + always_try_alpha_modes = false; + eac_a8_mul_search_rad = 0; + eac_a8_table_mask = (1 << 2) | (1 << 8) | (1 << 11) | (1 << 13); + uber_level = 0; + least_squares_passes = 1; + bc1_hints = false; + estimate_partition = true; + only_use_la_on_transparent_blocks = true; + break; + } + case cPackUASTCLevelFaster: + { + mode_mask = (1 << 0) | (1 << 4) | (1 << 6) | (1 << 8) | + (1 << 9) | (1 << 11) | (1 << 12) | + (1 << 15) | (1 << 17); + always_try_alpha_modes = false; + eac_a8_mul_search_rad = 0; + eac_a8_table_mask = (1 << 2) | (1 << 8) | (1 << 11) | (1 << 13); + uber_level = 0; + least_squares_passes = 1; + estimate_partition = true; + break; + } + case cPackUASTCLevelDefault: + { + mode_mask = (1 << 0) | (1 << 1) | (1 << 4) | (1 << 5) | (1 << 6) | (1 << 8) | + (1 << 9) | (1 << 10) | (1 << 11) | (1 << 12) | (1 << 13) | + (1 << 15) | (1 << 16) | (1 << 17); + always_try_alpha_modes = false; + eac_a8_mul_search_rad = 1; + eac_a8_table_mask = (1 << 0) | (1 << 2) | (1 << 6) | (1 << 7) | (1 << 8) | (1 << 10) | (1 << 11) | (1 << 13); + uber_level = 1; + least_squares_passes = 1; + estimate_partition = true; + break; + } + case cPackUASTCLevelSlower: + { + always_try_alpha_modes = false; + eac_a8_mul_search_rad = 2; + uber_level = 3; + estimate_partition = true; + break; + } + case cPackUASTCLevelVerySlow: + { + break; + } + } + +#if BASISU_SUPPORT_FORCE_MODE + static int force_mode = -1; + force_mode = (force_mode + 1) % TOTAL_UASTC_MODES; + mode_mask = UINT32_MAX; + always_try_alpha_modes = true; + only_use_la_on_transparent_blocks = false; +#endif + + // HACK HACK + //mode_mask &= ~(1 << 18); + //mode_mask = (1 << 18)| (1 << 10); + + uint32_t total_results = 0; + + if (only_use_la_on_transparent_blocks) + { + if ((is_la) && (!has_alpha)) + is_la = false; + } + + const bool try_alpha_modes = has_alpha || always_try_alpha_modes; + + bc7enc_compress_block_params comp_params; + memset(&comp_params, 0, sizeof(comp_params)); + comp_params.m_max_partitions_mode1 = 64; + comp_params.m_least_squares_passes = least_squares_passes; + comp_params.m_weights[0] = 1; + comp_params.m_weights[1] = 1; + comp_params.m_weights[2] = 1; + comp_params.m_weights[3] = 1; + comp_params.m_uber_level = uber_level; + + if (is_la) + { + if (mode_mask & (1U << 15)) + astc_mode15(block, results, total_results, comp_params); + + if (mode_mask & (1U << 16)) + astc_mode9_or_16(16, block, results, total_results, comp_params, estimate_partition ? 4 : 0); + + if (mode_mask & (1U << 17)) + astc_mode11_or_17(17, block, results, total_results, comp_params); + } + + if (!has_alpha) + { + if (mode_mask & (1U << 0)) + astc_mode0_or_18(0, block, results, total_results, comp_params); + + if (mode_mask & (1U << 1)) + astc_mode1(block, results, total_results, comp_params); + + if (mode_mask & (1U << 2)) + astc_mode2(block, results, total_results, comp_params, estimate_partition); + + if (mode_mask & (1U << 3)) + astc_mode3(block, results, total_results, comp_params, estimate_partition); + + if (mode_mask & (1U << 4)) + astc_mode4(block, results, total_results, comp_params, estimate_partition); + + if (mode_mask & (1U << 5)) + astc_mode5(block, results, total_results, comp_params); + + if (mode_mask & (1U << 6)) + astc_mode6(block, results, total_results, comp_params); + + if (mode_mask & (1U << 7)) + astc_mode7(block, results, total_results, comp_params, estimate_partition); + + if (mode_mask & (1U << 18)) + astc_mode0_or_18(18, block, results, total_results, comp_params); + } + + if (try_alpha_modes) + { + if (mode_mask & (1U << 9)) + astc_mode9_or_16(9, block, results, total_results, comp_params, estimate_partition ? 4 : 0); + + if (mode_mask & (1U << 10)) + astc_mode10(block, results, total_results, comp_params); + + if (mode_mask & (1U << 11)) + astc_mode11_or_17(11, block, results, total_results, comp_params); + + if (mode_mask & (1U << 12)) + astc_mode12(block, results, total_results, comp_params); + + if (mode_mask & (1U << 13)) + astc_mode13(block, results, total_results, comp_params); + + if (mode_mask & (1U << 14)) + astc_mode14(block, results, total_results, comp_params); + } + + assert(total_results); + + // Fix up the errors so we consistently have LA, RGB, or RGBA error. + for (uint32_t i = 0; i < total_results; i++) + { + uastc_encode_results& r = results[i]; + if (!is_la) + { + if (g_uastc_mode_is_la[r.m_uastc_mode]) + { + color_rgba unpacked_block[16]; + unpack_uastc(r.m_uastc_mode, r.m_common_pattern, r.m_solid_color.get_color32(), r.m_astc, (basist::color32 *)unpacked_block, false); + + uint64_t total_err = 0; + for (uint32_t j = 0; j < 16; j++) + total_err += color_distance(unpacked_block[j], ((const color_rgba*)block)[j], true); + + r.m_astc_err = total_err; + } + } + else + { + if (!g_uastc_mode_is_la[r.m_uastc_mode]) + { + color_rgba unpacked_block[16]; + unpack_uastc(r.m_uastc_mode, r.m_common_pattern, r.m_solid_color.get_color32(), r.m_astc, (basist::color32 *)unpacked_block, false); + + uint64_t total_err = 0; + for (uint32_t j = 0; j < 16; j++) + total_err += color_distance_la(unpacked_block[j], ((const color_rgba*)block)[j]); + + r.m_astc_err = total_err; + } + } + } + + unpacked_uastc_block unpacked_ublock; + memset(&unpacked_ublock, 0, sizeof(unpacked_ublock)); + + uint64_t total_overall_err[MAX_ENCODE_RESULTS]; + float uastc_err_f[MAX_ENCODE_RESULTS]; + double best_uastc_err_f = 1e+20f; + + int best_index = -1; + + if (total_results == 1) + { + best_index = 0; + } + else + { + const uint32_t bc7_err_weight = favor_bc7_error ? 100 : ((favor_uastc_error ? 0 : DEFAULT_BC7_ERROR_WEIGHT)); + const uint32_t uastc_err_weight = favor_bc7_error ? 0 : 100; + + // Find best overall results, balancing UASTC and UASTC->BC7 error. + // We purposely allow UASTC error to increase a little, if doing so lowers the BC7 error. + for (uint32_t i = 0; i < total_results; i++) + { +#if BASISU_SUPPORT_FORCE_MODE + if (results[i].m_uastc_mode == force_mode) + { + best_index = i; + break; + } +#endif + + unpacked_ublock.m_mode = results[i].m_uastc_mode; + unpacked_ublock.m_astc = results[i].m_astc; + unpacked_ublock.m_common_pattern = results[i].m_common_pattern; + unpacked_ublock.m_solid_color = results[i].m_solid_color.get_color32(); + + color_rgba decoded_uastc_block[4][4]; + bool success = unpack_uastc(results[i].m_uastc_mode, results[i].m_common_pattern, results[i].m_solid_color.get_color32(), results[i].m_astc, (basist::color32 *)&decoded_uastc_block[0][0], false); + (void)success; + VALIDATE(success); + + uint64_t total_uastc_rgb_err, total_uastc_rgba_err, total_uastc_la_err; + compute_block_error(block, decoded_uastc_block, total_uastc_rgb_err, total_uastc_rgba_err, total_uastc_la_err); + + // Validate the computed error, or we're go mad if it's inaccurate. + if (results[i].m_uastc_mode == UASTC_MODE_INDEX_SOLID_COLOR) + { + VALIDATE(total_uastc_rgba_err == 0); + } + else if (is_la) + { + VALIDATE(total_uastc_la_err == results[i].m_astc_err); + } + else if (g_uastc_mode_has_alpha[results[i].m_uastc_mode]) + { + VALIDATE(total_uastc_rgba_err == results[i].m_astc_err); + } + else + { + VALIDATE(total_uastc_rgb_err == results[i].m_astc_err); + } + + // Transcode to BC7 + bc7_optimization_results bc7_results; + transcode_uastc_to_bc7(unpacked_ublock, bc7_results); + + bc7_block bc7_data; + encode_bc7_block(&bc7_data, &bc7_results); + + color_rgba decoded_bc7_block[4][4]; + unpack_block(texture_format::cBC7, &bc7_data, &decoded_bc7_block[0][0]); + + // Compute BC7 error + uint64_t total_bc7_la_err, total_bc7_rgb_err, total_bc7_rgba_err; + compute_block_error(block, decoded_bc7_block, total_bc7_rgb_err, total_bc7_rgba_err, total_bc7_la_err); + + if (results[i].m_uastc_mode == UASTC_MODE_INDEX_SOLID_COLOR) + { + VALIDATE(total_bc7_rgba_err == 0); + + best_index = i; + break; + } + + uint64_t total_uastc_err = 0, total_bc7_err = 0; + if (is_la) + { + total_bc7_err = total_bc7_la_err; + total_uastc_err = total_uastc_la_err; + } + else if (has_alpha) + { + total_bc7_err = total_bc7_rgba_err; + total_uastc_err = total_uastc_rgba_err; + } + else + { + total_bc7_err = total_bc7_rgb_err; + total_uastc_err = total_uastc_rgb_err; + } + + total_overall_err[i] = ((total_bc7_err * bc7_err_weight) / 100) + ((total_uastc_err * uastc_err_weight) / 100); + if (!total_overall_err[i]) + { + best_index = i; + break; + } + + uastc_err_f[i] = sqrtf((float)total_uastc_err); + + if (uastc_err_f[i] < best_uastc_err_f) + { + best_uastc_err_f = uastc_err_f[i]; + } + + } // total_results + + if (best_index < 0) + { + uint64_t best_err = UINT64_MAX; + + if ((best_uastc_err_f == 0.0f) || (favor_bc7_error)) + { + for (uint32_t i = 0; i < total_results; i++) + { + // TODO: This is a quick hack to favor modes 0 or 10 for better RDO compression. + const float err_weight = (flags & cPackUASTCFavorSimplerModes) ? get_uastc_mode_weight(results[i].m_uastc_mode) : 1.0f; + + const uint64_t w = (uint64_t)(total_overall_err[i] * err_weight); + if (w < best_err) + { + best_err = w; + best_index = i; + if (!best_err) + break; + } + } // i + } + else + { + // Scan the UASTC results, and consider all results within a window that has the best UASTC+BC7 error. + for (uint32_t i = 0; i < total_results; i++) + { + double err_delta = uastc_err_f[i] / best_uastc_err_f; + + if (err_delta <= UASTC_ERROR_THRESH) + { + // TODO: This is a quick hack to favor modes 0 or 10 for better RDO compression. + const float err_weight = (flags & cPackUASTCFavorSimplerModes) ? get_uastc_mode_weight(results[i].m_uastc_mode) : 1.0f; + + const uint64_t w = (uint64_t)(total_overall_err[i] * err_weight); + if (w < best_err) + { + best_err = w; + best_index = i; + if (!best_err) + break; + } + } + } // i + } + } + } + + const uastc_encode_results& best_results = results[best_index]; + const uint32_t best_mode = best_results.m_uastc_mode; + const astc_block_desc& best_astc_results = best_results.m_astc; + + color_rgba decoded_uastc_block[4][4]; + bool success = unpack_uastc(best_mode, best_results.m_common_pattern, best_results.m_solid_color.get_color32(), best_astc_results, (basist::color32 *)&decoded_uastc_block[0][0], false); + (void)success; + VALIDATE(success); + +#if BASISU_VALIDATE_UASTC_ENC + // Make sure that the UASTC block unpacks to the same exact pixels as the ASTC block does, using two different decoders. + { + // Round trip to packed UASTC and back, then decode to pixels. + etc_block etc1_blk; + memset(&etc1_blk, 0, sizeof(etc1_blk)); + eac_a8_block etc_eac_a8_blk; + memset(&etc_eac_a8_blk, 0, sizeof(etc_eac_a8_blk)); + etc_eac_a8_blk.m_multiplier = 1; + + basist::uastc_block temp_block; + pack_uastc(temp_block, best_results, etc1_blk, 0, etc_eac_a8_blk, false, false); + + basist::color32 temp_block_unpacked[4][4]; + success = basist::unpack_uastc(temp_block, (basist::color32 *)temp_block_unpacked, false); + VALIDATE(success); + + // Now round trip to packed ASTC and back, then decode to pixels. + uint32_t astc_data[4]; + + if (best_results.m_uastc_mode == UASTC_MODE_INDEX_SOLID_COLOR) + pack_astc_solid_block(astc_data, (color32 &)best_results.m_solid_color); + else + { + success = pack_astc_block(astc_data, &best_astc_results, best_results.m_uastc_mode); + VALIDATE(success); + } + + color_rgba decoded_astc_block[4][4]; + success = basisu_astc::astc::decompress_ldr((uint8_t*)decoded_astc_block, (uint8_t*)&astc_data, false, 4, 4); + VALIDATE(success); + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + VALIDATE(decoded_astc_block[y][x] == decoded_uastc_block[y][x]); + + VALIDATE(temp_block_unpacked[y][x].c[0] == decoded_uastc_block[y][x].r); + VALIDATE(temp_block_unpacked[y][x].c[1] == decoded_uastc_block[y][x].g); + VALIDATE(temp_block_unpacked[y][x].c[2] == decoded_uastc_block[y][x].b); + VALIDATE(temp_block_unpacked[y][x].c[3] == decoded_uastc_block[y][x].a); + } + } + } +#endif + + // Compute BC1 hints + bool bc1_hint0 = false, bc1_hint1 = false; + if (bc1_hints) + compute_bc1_hints(bc1_hint0, bc1_hint1, best_results, block, decoded_uastc_block); + + eac_a8_block eac_a8_blk; + if ((g_uastc_mode_has_alpha[best_mode]) && (best_mode != UASTC_MODE_INDEX_SOLID_COLOR)) + { + // Compute ETC2 hints + uint8_t decoded_uastc_block_alpha[16]; + for (uint32_t i = 0; i < 16; i++) + decoded_uastc_block_alpha[i] = decoded_uastc_block[i >> 2][i & 3].a; + + uastc_pack_eac_a8_results eac8_a8_results; + memset(&eac8_a8_results, 0, sizeof(eac8_a8_results)); + uastc_pack_eac_a8(eac8_a8_results, decoded_uastc_block_alpha, 16, 0, eac_a8_mul_search_rad, eac_a8_table_mask); + + // All we care about for hinting is the table and multiplier. + eac_a8_blk.m_table = eac8_a8_results.m_table; + eac_a8_blk.m_multiplier = eac8_a8_results.m_multiplier; + } + else + { + memset(&eac_a8_blk, 0, sizeof(eac_a8_blk)); + } + + // Compute ETC1 hints + etc_block etc1_blk; + uint32_t etc1_bias = 0; + compute_etc1_hints(etc1_blk, etc1_bias, best_results, block, decoded_uastc_block, level, flags); + + // Finally, pack the UASTC block with its hints and we're done. + pack_uastc(output_block, best_results, etc1_blk, etc1_bias, eac_a8_blk, bc1_hint0, bc1_hint1); + +// printf(" Packed: "); +// for (int i = 0; i < 16; i++) +// printf("%X ", output_block.m_bytes[i]); +// printf("\n"); + } + + static bool uastc_recompute_hints(basist::uastc_block* pBlock, const color_rgba* pBlock_pixels, uint32_t flags, const unpacked_uastc_block *pUnpacked_blk) + { + unpacked_uastc_block unpacked_blk; + + if (pUnpacked_blk) + unpacked_blk = *pUnpacked_blk; + else + { + if (!unpack_uastc(*pBlock, unpacked_blk, false, true)) + return false; + } + color_rgba decoded_uastc_block[4][4]; + if (!unpack_uastc(unpacked_blk, (basist::color32 *)decoded_uastc_block, false)) + return false; + uastc_encode_results results; + results.m_uastc_mode = unpacked_blk.m_mode; + results.m_common_pattern = unpacked_blk.m_common_pattern; + results.m_astc = unpacked_blk.m_astc; + results.m_solid_color = unpacked_blk.m_solid_color; + results.m_astc_err = 0; + bool bc1_hints = true; + uint32_t eac_a8_mul_search_rad = 3; + uint32_t eac_a8_table_mask = UINT32_MAX; + const uint32_t level = flags & cPackUASTCLevelMask; + switch (level) + { + case cPackUASTCLevelFastest: + { + eac_a8_mul_search_rad = 0; + eac_a8_table_mask = (1 << 2) | (1 << 8) | (1 << 11) | (1 << 13); + bc1_hints = false; + break; + } + case cPackUASTCLevelFaster: + { + eac_a8_mul_search_rad = 0; + eac_a8_table_mask = (1 << 2) | (1 << 8) | (1 << 11) | (1 << 13); + break; + } + case cPackUASTCLevelDefault: + { + eac_a8_mul_search_rad = 1; + eac_a8_table_mask = (1 << 0) | (1 << 2) | (1 << 6) | (1 << 7) | (1 << 8) | (1 << 10) | (1 << 11) | (1 << 13); + break; + } + case cPackUASTCLevelSlower: + { + eac_a8_mul_search_rad = 2; + break; + } + case cPackUASTCLevelVerySlow: + { + break; + } + } + bool bc1_hint0 = false, bc1_hint1 = false; + if (bc1_hints) + compute_bc1_hints(bc1_hint0, bc1_hint1, results, (color_rgba (*)[4])pBlock_pixels, decoded_uastc_block); + const uint32_t best_mode = unpacked_blk.m_mode; + eac_a8_block eac_a8_blk; + if ((g_uastc_mode_has_alpha[best_mode]) && (best_mode != UASTC_MODE_INDEX_SOLID_COLOR)) + { + uint8_t decoded_uastc_block_alpha[16]; + for (uint32_t i = 0; i < 16; i++) + decoded_uastc_block_alpha[i] = decoded_uastc_block[i >> 2][i & 3].a; + uastc_pack_eac_a8_results eac8_a8_results; + memset(&eac8_a8_results, 0, sizeof(eac8_a8_results)); + uastc_pack_eac_a8(eac8_a8_results, decoded_uastc_block_alpha, 16, 0, eac_a8_mul_search_rad, eac_a8_table_mask); + eac_a8_blk.m_table = eac8_a8_results.m_table; + eac_a8_blk.m_multiplier = eac8_a8_results.m_multiplier; + } + else + { + memset(&eac_a8_blk, 0, sizeof(eac_a8_blk)); + } + etc_block etc1_blk; + uint32_t etc1_bias = 0; + compute_etc1_hints(etc1_blk, etc1_bias, results, (color_rgba (*)[4])pBlock_pixels, decoded_uastc_block, level, flags); + pack_uastc(*pBlock, results, etc1_blk, etc1_bias, eac_a8_blk, bc1_hint0, bc1_hint1); + return true; + } + + static const uint8_t g_uastc_mode_selector_bits[TOTAL_UASTC_MODES][2] = + { + { 65, 63 }, { 69, 31 }, { 73, 46 }, { 89, 29 }, + { 89, 30 }, { 68, 47 }, { 66, 62 }, { 89, 30 }, + { 0, 0 }, { 97, 30 }, { 65, 63 }, { 66, 62 }, + { 81, 47 }, { 94, 30 }, { 92, 31 }, { 62, 63 }, + { 98, 30 }, { 61, 62 }, { 49, 79 } + }; + + static inline uint32_t set_block_bits(uint8_t* pBytes, uint64_t val, uint32_t num_bits, uint32_t cur_ofs) + { + assert(num_bits <= 64); + assert((num_bits == 64) || (val < (1ULL << num_bits))); + uint64_t mask = (num_bits == 64) ? UINT64_MAX : ((1ULL << num_bits) - 1); + while (num_bits) + { + const uint32_t n = basisu::minimum(8U - (cur_ofs & 7U), num_bits); + pBytes[cur_ofs >> 3] &= ~static_cast(mask << (cur_ofs & 7U)); + pBytes[cur_ofs >> 3] |= static_cast(val << (cur_ofs & 7U)); + val >>= n; + mask >>= n; + num_bits -= n; + cur_ofs += n; + } + return cur_ofs; + } + + static const uint8_t g_tdefl_small_dist_extra[512] = + { + 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7 + }; + + static const uint8_t g_tdefl_large_dist_extra[128] = + { + 0, 0, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13 + }; + + static inline uint32_t compute_match_cost_estimate(uint32_t dist) + { + uint32_t len_cost = 7; + uint32_t dist_cost = 5; + if (dist < 512) + dist_cost += g_tdefl_small_dist_extra[dist & 511]; + else + { + dist_cost += g_tdefl_large_dist_extra[basisu::minimum(dist, 32767) >> 8]; + while (dist >= 32768) + { + dist_cost++; + dist >>= 1; + } + } + return len_cost + dist_cost; + } + + struct selector_bitsequence + { + uint64_t m_sel; + uint32_t m_ofs; + uint32_t m_pad; // avoid implicit padding for selector_bitsequence_hash + selector_bitsequence() { } + selector_bitsequence(uint32_t bit_ofs, uint64_t sel) : m_sel(sel), m_ofs(bit_ofs), m_pad(0) { } + bool operator== (const selector_bitsequence& other) const + { + return (m_ofs == other.m_ofs) && (m_sel == other.m_sel); + } + + bool operator< (const selector_bitsequence& other) const + { + if (m_ofs < other.m_ofs) + return true; + else if (m_ofs == other.m_ofs) + return m_sel < other.m_sel; + + return false; + } + }; + + struct selector_bitsequence_hash + { + std::size_t operator()(selector_bitsequence const& s) const noexcept + { + return hash_hsieh((const uint8_t*)&s, sizeof(s)); + } + }; + + static bool uastc_rdo_blocks(uint32_t first_index, uint32_t last_index, basist::uastc_block* pBlocks, const color_rgba* pBlock_pixels, const uastc_rdo_params& params, uint32_t flags, + uint32_t &total_skipped, uint32_t &total_refined, uint32_t &total_modified, uint32_t &total_smooth) + { + debug_printf("uastc_rdo_blocks: Processing blocks %u to %u\n", first_index, last_index); + + const int total_blocks_to_check = basisu::maximum(1U, params.m_lz_dict_size / sizeof(basist::uastc_block)); + const bool perceptual = false; + + std::unordered_map selector_history; + + for (uint32_t block_index = first_index; block_index < last_index; block_index++) + { + const basist::uastc_block& blk = pBlocks[block_index]; + const color_rgba* pPixels = &pBlock_pixels[16 * block_index]; + + unpacked_uastc_block unpacked_blk; + if (!unpack_uastc(blk, unpacked_blk, false, true)) + return false; + + const uint32_t block_mode = unpacked_blk.m_mode; + if (block_mode == UASTC_MODE_INDEX_SOLID_COLOR) + continue; + + tracked_stat r_stats, g_stats, b_stats, a_stats; + + for (uint32_t i = 0; i < 16; i++) + { + r_stats.update(pPixels[i].r); + g_stats.update(pPixels[i].g); + b_stats.update(pPixels[i].b); + a_stats.update(pPixels[i].a); + } + + const float max_std_dev = basisu::maximum(basisu::maximum(basisu::maximum(r_stats.get_std_dev(), g_stats.get_std_dev()), b_stats.get_std_dev()), a_stats.get_std_dev()); + + float yl = clamp(max_std_dev / params.m_max_smooth_block_std_dev, 0.0f, 1.0f); + yl = yl * yl; + const float smooth_block_error_scale = lerp(params.m_smooth_block_max_error_scale, 1.0f, yl); + if (smooth_block_error_scale > 1.0f) + total_smooth++; + + color_rgba decoded_uastc_block[4][4]; + if (!unpack_uastc(unpacked_blk, (basist::color32*)decoded_uastc_block, false)) + return false; + + uint64_t uastc_err = 0; + for (uint32_t i = 0; i < 16; i++) + uastc_err += color_distance(perceptual, pPixels[i], ((color_rgba*)decoded_uastc_block)[i], true); + + // Transcode to BC7 + bc7_optimization_results b7_results; + if (!transcode_uastc_to_bc7(unpacked_blk, b7_results)) + return false; + + basist::bc7_block b7_block; + basist::encode_bc7_block(&b7_block, &b7_results); + + color_rgba decoded_b7_blk[4][4]; + unpack_block(texture_format::cBC7, &b7_block, &decoded_b7_blk[0][0]); + + uint64_t bc7_err = 0; + for (uint32_t i = 0; i < 16; i++) + bc7_err += color_distance(perceptual, pPixels[i], ((color_rgba*)decoded_b7_blk)[i], true); + + uint64_t cur_err = (uastc_err + bc7_err) / 2; + + // Divide by 16*4 to compute RMS error + const float cur_ms_err = (float)cur_err * (1.0f / 64.0f); + const float cur_rms_err = sqrt(cur_ms_err); + + const uint32_t first_sel_bit = g_uastc_mode_selector_bits[block_mode][0]; + const uint32_t total_sel_bits = g_uastc_mode_selector_bits[block_mode][1]; + assert(first_sel_bit + total_sel_bits <= 128); + assert(total_sel_bits > 0); + + uint32_t cur_bit_offset = first_sel_bit; + uint64_t cur_sel_bits = read_bits((const uint8_t*)&blk, cur_bit_offset, basisu::minimum(64U, total_sel_bits)); + + if (cur_rms_err >= params.m_skip_block_rms_thresh) + { + auto cur_search_res = selector_history.insert(std::make_pair(selector_bitsequence(first_sel_bit, cur_sel_bits), block_index)); + + // Block already has too much error, so don't mess with it. + if (!cur_search_res.second) + (*cur_search_res.first).second = block_index; + + total_skipped++; + continue; + } + + int cur_bits; + auto cur_find_res = selector_history.find(selector_bitsequence(first_sel_bit, cur_sel_bits)); + if (cur_find_res == selector_history.end()) + { + // Wasn't found - wildly estimate literal cost + //cur_bits = (total_sel_bits * 5) / 4; + cur_bits = (total_sel_bits * params.m_lz_literal_cost) / 100; + } + else + { + // Was found - wildly estimate match cost + uint32_t match_block_index = cur_find_res->second; + const int block_dist_in_bytes = (block_index - match_block_index) * 16; + cur_bits = compute_match_cost_estimate(block_dist_in_bytes); + } + + int first_block_to_check = basisu::maximum(first_index, block_index - total_blocks_to_check); + int last_block_to_check = block_index - 1; + + basist::uastc_block best_block(blk); + uint32_t best_block_index = block_index; + + float best_t = cur_ms_err * smooth_block_error_scale + cur_bits * params.m_lambda; + + // Now scan through previous blocks, insert their selector bit patterns into the current block, and find + // selector bit patterns which don't increase the overall block error too much. + for (int prev_block_index = last_block_to_check; prev_block_index >= first_block_to_check; --prev_block_index) + { + const basist::uastc_block& prev_blk = pBlocks[prev_block_index]; + + uint32_t bit_offset = first_sel_bit; + uint64_t sel_bits = read_bits((const uint8_t*)&prev_blk, bit_offset, basisu::minimum(64U, total_sel_bits)); + + int match_block_index = prev_block_index; + auto res = selector_history.find(selector_bitsequence(first_sel_bit, sel_bits)); + if (res != selector_history.end()) + match_block_index = res->second; + // Have we already checked this bit pattern? If so then skip this block. + if (match_block_index > prev_block_index) + continue; + + unpacked_uastc_block unpacked_prev_blk; + if (!unpack_uastc(prev_blk, unpacked_prev_blk, false, true)) + return false; + + basist::uastc_block trial_blk(blk); + + set_block_bits((uint8_t*)&trial_blk, sel_bits, basisu::minimum(64U, total_sel_bits), first_sel_bit); + + if (total_sel_bits > 64) + { + sel_bits = read_bits((const uint8_t*)&prev_blk, bit_offset, total_sel_bits - 64U); + + set_block_bits((uint8_t*)&trial_blk, sel_bits, total_sel_bits - 64U, first_sel_bit + basisu::minimum(64U, total_sel_bits)); + } + + unpacked_uastc_block unpacked_trial_blk; + if (!unpack_uastc(trial_blk, unpacked_trial_blk, false, true)) + continue; + + color_rgba decoded_trial_uastc_block[4][4]; + if (!unpack_uastc(unpacked_trial_blk, (basist::color32*)decoded_trial_uastc_block, false)) + continue; + + uint64_t trial_uastc_err = 0; + for (uint32_t i = 0; i < 16; i++) + trial_uastc_err += color_distance(perceptual, pPixels[i], ((color_rgba*)decoded_trial_uastc_block)[i], true); + + // Transcode trial to BC7, compute error + bc7_optimization_results trial_b7_results; + if (!transcode_uastc_to_bc7(unpacked_trial_blk, trial_b7_results)) + return false; + + basist::bc7_block trial_b7_block; + basist::encode_bc7_block(&trial_b7_block, &trial_b7_results); + + color_rgba decoded_trial_b7_blk[4][4]; + unpack_block(texture_format::cBC7, &trial_b7_block, &decoded_trial_b7_blk[0][0]); + + uint64_t trial_bc7_err = 0; + for (uint32_t i = 0; i < 16; i++) + trial_bc7_err += color_distance(perceptual, pPixels[i], ((color_rgba*)decoded_trial_b7_blk)[i], true); + + uint64_t trial_err = (trial_uastc_err + trial_bc7_err) / 2; + + const float trial_ms_err = (float)trial_err * (1.0f / 64.0f); + const float trial_rms_err = sqrtf(trial_ms_err); + + if (trial_rms_err > cur_rms_err * params.m_max_allowed_rms_increase_ratio) + continue; + + const int block_dist_in_bytes = (block_index - match_block_index) * 16; + const int match_bits = compute_match_cost_estimate(block_dist_in_bytes); + + float t = trial_ms_err * smooth_block_error_scale + match_bits * params.m_lambda; + if (t < best_t) + { + best_t = t; + best_block_index = prev_block_index; + + best_block = trial_blk; + } + + } // prev_block_index + + if (best_block_index != block_index) + { + total_modified++; + + unpacked_uastc_block unpacked_best_blk; + if (!unpack_uastc(best_block, unpacked_best_blk, false, false)) + return false; + + if ((params.m_endpoint_refinement) && (block_mode == 0)) + { + // Attempt to refine mode 0 block's endpoints, using the new selectors. This doesn't help much, but it does help. + // TODO: We could do this with the other modes too. + color_rgba decoded_best_uastc_block[4][4]; + if (!unpack_uastc(unpacked_best_blk, (basist::color32*)decoded_best_uastc_block, false)) + return false; + + // Compute the block's current error (with the modified selectors). + uint64_t best_uastc_err = 0; + for (uint32_t i = 0; i < 16; i++) + best_uastc_err += color_distance(perceptual, pPixels[i], ((color_rgba*)decoded_best_uastc_block)[i], true); + + bc7enc_compress_block_params comp_params; + memset(&comp_params, 0, sizeof(comp_params)); + comp_params.m_max_partitions_mode1 = 64; + comp_params.m_least_squares_passes = 1; + comp_params.m_weights[0] = 1; + comp_params.m_weights[1] = 1; + comp_params.m_weights[2] = 1; + comp_params.m_weights[3] = 1; + comp_params.m_uber_level = 0; + + uastc_encode_results results; + uint32_t total_results = 0; + astc_mode0_or_18(0, (color_rgba(*)[4])pPixels, &results, total_results, comp_params, unpacked_best_blk.m_astc.m_weights); + assert(total_results == 1); + + // See if the overall error has actually gone done. + + color_rgba decoded_trial_uastc_block[4][4]; + bool success = unpack_uastc(results.m_uastc_mode, results.m_common_pattern, results.m_solid_color.get_color32(), results.m_astc, (basist::color32*) & decoded_trial_uastc_block[0][0], false); + assert(success); + + BASISU_NOTE_UNUSED(success); + + uint64_t trial_uastc_err = 0; + for (uint32_t i = 0; i < 16; i++) + trial_uastc_err += color_distance(perceptual, pPixels[i], ((color_rgba*)decoded_trial_uastc_block)[i], true); + + if (trial_uastc_err < best_uastc_err) + { + // The error went down, so accept the new endpoints. + + // Ensure the selectors haven't changed, otherwise we'll invalidate the LZ matches. + for (uint32_t i = 0; i < 16; i++) + assert(unpacked_best_blk.m_astc.m_weights[i] == results.m_astc.m_weights[i]); + + unpacked_best_blk.m_astc = results.m_astc; + + total_refined++; + } + } // if ((params.m_endpoint_refinement) && (block_mode == 0)) + + // The selectors have changed, so go recompute the block hints. + if (!uastc_recompute_hints(&best_block, pPixels, flags, &unpacked_best_blk)) + return false; + + // Write the modified block + pBlocks[block_index] = best_block; + + } // if (best_block_index != block_index) + + { + uint32_t bit_offset = first_sel_bit; + uint64_t sel_bits = read_bits((const uint8_t*)&best_block, bit_offset, basisu::minimum(64U, total_sel_bits)); + + auto res = selector_history.insert(std::make_pair(selector_bitsequence(first_sel_bit, sel_bits), block_index)); + if (!res.second) + (*res.first).second = block_index; + } + + } // block_index + + return true; + } + + // This function implements a basic form of rate distortion optimization (RDO) for UASTC. + // It only changes selectors and then updates the hints. It uses very approximate LZ bitprice estimation. + // There's A LOT that can be done better in here, but it's a start. + // One nice advantage of the method used here is that it works for any input, no matter which or how many modes it uses. + bool uastc_rdo(uint32_t num_blocks, basist::uastc_block* pBlocks, const color_rgba* pBlock_pixels, const uastc_rdo_params& params, uint32_t flags, job_pool* pJob_pool, uint32_t total_jobs) + { + assert(params.m_max_allowed_rms_increase_ratio > 1.0f); + assert(params.m_lz_dict_size > 0); + assert(params.m_lambda > 0.0f); + + uint32_t total_skipped = 0, total_modified = 0, total_refined = 0, total_smooth = 0; + + uint32_t blocks_per_job = total_jobs ? (num_blocks / total_jobs) : 0; + + std::mutex stat_mutex; + + bool status = false; + + if ((!pJob_pool) || (total_jobs <= 1) || (blocks_per_job <= 8)) + { + status = uastc_rdo_blocks(0, num_blocks, pBlocks, pBlock_pixels, params, flags, total_skipped, total_refined, total_modified, total_smooth); + } + else + { + bool all_succeeded = true; + + for (uint32_t block_index_iter = 0; block_index_iter < num_blocks; block_index_iter += blocks_per_job) + { + const uint32_t first_index = block_index_iter; + const uint32_t last_index = minimum(num_blocks, block_index_iter + blocks_per_job); + + pJob_pool->add_job([first_index, last_index, pBlocks, pBlock_pixels, ¶ms, flags, &total_skipped, &total_modified, &total_refined, &total_smooth, &all_succeeded, &stat_mutex] { + + uint32_t job_skipped = 0, job_modified = 0, job_refined = 0, job_smooth = 0; + + bool status = uastc_rdo_blocks(first_index, last_index, pBlocks, pBlock_pixels, params, flags, job_skipped, job_refined, job_modified, job_smooth); + + { + std::lock_guard lck(stat_mutex); + + all_succeeded = all_succeeded && status; + total_skipped += job_skipped; + total_modified += job_modified; + total_refined += job_refined; + total_smooth += job_smooth; + } + + } + ); + + } // block_index_iter + + pJob_pool->wait_for_all(); + + status = all_succeeded; + } + + debug_printf("uastc_rdo: Total modified: %3.2f%%, total skipped: %3.2f%%, total refined: %3.2f%%, total smooth: %3.2f%%\n", total_modified * 100.0f / num_blocks, total_skipped * 100.0f / num_blocks, total_refined * 100.0f / num_blocks, total_smooth * 100.0f / num_blocks); + + return status; + } +} // namespace basisu + + + + + diff --git a/thirdparty/basisu/encoder/basisu_uastc_enc.h b/thirdparty/basisu/encoder/basisu_uastc_enc.h new file mode 100644 index 000000000..54d39380e --- /dev/null +++ b/thirdparty/basisu/encoder/basisu_uastc_enc.h @@ -0,0 +1,140 @@ +// basisu_uastc_enc.h +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "basisu_etc.h" + +#include "../transcoder/basisu_transcoder_uastc.h" + +namespace basisu +{ + const uint32_t TOTAL_PACK_UASTC_LEVELS = 5; + + enum + { + // Fastest is the lowest quality, although it's stil substantially higher quality vs. BC1/ETC1. It supports 5 modes. + // The output may be somewhat blocky because this setting doesn't support 2/3-subset UASTC modes, but it should be less blocky vs. BC1/ETC1. + // This setting doesn't write BC1 hints, so BC1 transcoding will be slower. + // Transcoded ETC1 quality will be lower because it only considers 2 hints out of 32. + // Avg. 43.45 dB + cPackUASTCLevelFastest = 0, + + // Faster is ~3x slower than fastest. It supports 9 modes. + // Avg. 46.49 dB + cPackUASTCLevelFaster = 1, + + // Default is ~5.5x slower than fastest. It supports 14 modes. + // Avg. 47.47 dB + cPackUASTCLevelDefault = 2, + + // Slower is ~14.5x slower than fastest. It supports all 18 modes. + // Avg. 48.01 dB + cPackUASTCLevelSlower = 3, + + // VerySlow is ~200x slower than fastest. + // The best quality the codec is capable of, but you'll need to be patient or have a lot of cores. + // Avg. 48.24 dB + cPackUASTCLevelVerySlow = 4, + + cPackUASTCLevelMask = 0xF, + + // By default the encoder tries to strike a balance between UASTC and transcoded BC7 quality. + // These flags allow you to favor only optimizing for lowest UASTC error, or lowest BC7 error. + cPackUASTCFavorUASTCError = 8, + cPackUASTCFavorBC7Error = 16, + + cPackUASTCETC1FasterHints = 64, + cPackUASTCETC1FastestHints = 128, + cPackUASTCETC1DisableFlipAndIndividual = 256, + + // Favor UASTC modes 0 and 10 more than the others (this is experimental, it's useful for RDO compression) + cPackUASTCFavorSimplerModes = 512, + }; + + // pRGBAPixels: Pointer to source 4x4 block of RGBA pixels (R first in memory). + // block: Reference to destination UASTC block. + // level: Controls compression speed vs. performance tradeoff. + void encode_uastc(const uint8_t* pRGBAPixels, basist::uastc_block& output_block, uint32_t flags = cPackUASTCLevelDefault); + + struct uastc_encode_results + { + uint32_t m_uastc_mode; + uint32_t m_common_pattern; + basist::astc_block_desc m_astc; + color_rgba m_solid_color; + uint64_t m_astc_err; + }; + + void pack_uastc(basist::uastc_block& blk, const uastc_encode_results& result, const etc_block& etc1_blk, uint32_t etc1_bias, const eac_a8_block& etc_eac_a8_blk, bool bc1_hint0, bool bc1_hint1); + + const uint32_t UASCT_RDO_DEFAULT_LZ_DICT_SIZE = 4096; + + const float UASTC_RDO_DEFAULT_MAX_ALLOWED_RMS_INCREASE_RATIO = 10.0f; + const float UASTC_RDO_DEFAULT_SKIP_BLOCK_RMS_THRESH = 8.0f; + + // The RDO encoder computes a smoothness factor, from [0,1], for each block. To do this it computes each block's maximum component variance, then it divides this by this factor and clamps the result. + // Larger values will result in more blocks being protected from too much distortion. + const float UASTC_RDO_DEFAULT_MAX_SMOOTH_BLOCK_STD_DEV = 18.0f; + + // The RDO encoder can artifically boost the error of smooth blocks, in order to suppress distortions on smooth areas of the texture. + // The encoder will use this value as the maximum error scale to use on smooth blocks. The larger this value, the better smooth bocks will look. Set to 1.0 to disable this completely. + const float UASTC_RDO_DEFAULT_SMOOTH_BLOCK_MAX_ERROR_SCALE = 10.0f; + + struct uastc_rdo_params + { + uastc_rdo_params() + { + clear(); + } + + void clear() + { + m_lz_dict_size = UASCT_RDO_DEFAULT_LZ_DICT_SIZE; + m_lambda = 0.5f; + m_max_allowed_rms_increase_ratio = UASTC_RDO_DEFAULT_MAX_ALLOWED_RMS_INCREASE_RATIO; + m_skip_block_rms_thresh = UASTC_RDO_DEFAULT_SKIP_BLOCK_RMS_THRESH; + m_endpoint_refinement = true; + m_lz_literal_cost = 100; + + m_max_smooth_block_std_dev = UASTC_RDO_DEFAULT_MAX_SMOOTH_BLOCK_STD_DEV; + m_smooth_block_max_error_scale = UASTC_RDO_DEFAULT_SMOOTH_BLOCK_MAX_ERROR_SCALE; + } + + // m_lz_dict_size: Size of LZ dictionary to simulate in bytes. The larger this value, the slower the encoder but the higher the quality per LZ compressed bit. + uint32_t m_lz_dict_size; + + // m_lambda: The post-processor tries to reduce distortion+rate*lambda (rate is approximate LZ bits and distortion is scaled MS error). + // Larger values push the postprocessor towards optimizing more for lower rate, and smaller values more for distortion. 0=minimal distortion. + float m_lambda; + + // m_max_allowed_rms_increase_ratio: How much the RMS error of a block is allowed to increase before a trial is rejected. 1.0=no increase allowed, 1.05=5% increase allowed, etc. + float m_max_allowed_rms_increase_ratio; + + // m_skip_block_rms_thresh: Blocks with this much RMS error or more are completely skipped by the RDO encoder. + float m_skip_block_rms_thresh; + + // m_endpoint_refinement: If true, the post-process will attempt to refine the endpoints of blocks with modified selectors. + bool m_endpoint_refinement; + + float m_max_smooth_block_std_dev; + float m_smooth_block_max_error_scale; + + uint32_t m_lz_literal_cost; + }; + + // num_blocks, pBlocks: Number of blocks and pointer to UASTC blocks to process. + // pBlock_pixels: Pointer to an array of 4x4 blocks containing the original texture pixels. This is NOT a raster image, but a pointer to individual 4x4 blocks. + // flags: Pass in the same flags used to encode the UASTC blocks. The flags are used to reencode the transcode hints in the same way. + bool uastc_rdo(uint32_t num_blocks, basist::uastc_block* pBlocks, const color_rgba* pBlock_pixels, const uastc_rdo_params ¶ms, uint32_t flags = cPackUASTCLevelDefault, job_pool* pJob_pool = nullptr, uint32_t total_jobs = 0); +} // namespace basisu diff --git a/thirdparty/basisu/encoder/basisu_uastc_hdr_4x4_enc.cpp b/thirdparty/basisu/encoder/basisu_uastc_hdr_4x4_enc.cpp new file mode 100644 index 000000000..dd9d6fbb3 --- /dev/null +++ b/thirdparty/basisu/encoder/basisu_uastc_hdr_4x4_enc.cpp @@ -0,0 +1,1277 @@ +// basisu_uastc_hdr_4x4_enc.cpp +#include "basisu_uastc_hdr_4x4_enc.h" +#include "../transcoder/basisu_transcoder.h" + +using namespace basist; + +namespace basisu +{ + +const uint32_t UHDR_MODE11_FIRST_ISE_RANGE = astc_helpers::BISE_3_LEVELS, UHDR_MODE11_LAST_ISE_RANGE = astc_helpers::BISE_16_LEVELS; +const uint32_t UHDR_MODE7_PART1_FIRST_ISE_RANGE = astc_helpers::BISE_3_LEVELS, UHDR_MODE7_PART1_LAST_ISE_RANGE = astc_helpers::BISE_16_LEVELS; +const uint32_t UHDR_MODE7_PART2_FIRST_ISE_RANGE = astc_helpers::BISE_3_LEVELS, UHDR_MODE7_PART2_LAST_ISE_RANGE = astc_helpers::BISE_8_LEVELS; +const uint32_t UHDR_MODE11_PART2_FIRST_ISE_RANGE = astc_helpers::BISE_3_LEVELS, UHDR_MODE11_PART2_LAST_ISE_RANGE = astc_helpers::BISE_4_LEVELS; + +uastc_hdr_4x4_codec_options::uastc_hdr_4x4_codec_options() : + astc_hdr_codec_base_options() +{ + init(); +} + +void uastc_hdr_4x4_codec_options::init() +{ + astc_hdr_codec_base_options::init(); + + // This was the log bias we used on the initial release. It's too low. + //m_q_log_bias = Q_LOG_BIAS_4x4; + + m_q_log_bias = Q_LOG_BIAS_6x6; + + m_bc6h_err_weight = .85f; + +#if 0 + // HACK HACK + m_disable_weight_plane_optimization = true; + m_take_first_non_clamping_mode11_submode = false; + m_take_first_non_clamping_mode7_submode = false; +#endif + + // Must set the quality level at least once to reset this struct. + set_quality_level(cDefaultLevel); +} + +void uastc_hdr_4x4_codec_options::set_quality_best() +{ + // highest achievable quality + m_mode11_direct_only = false; + + m_use_solid = true; + + m_use_mode11_part1 = true; + m_mode11_uber_mode = true; + m_first_mode11_weight_ise_range = UHDR_MODE11_FIRST_ISE_RANGE; + m_last_mode11_weight_ise_range = UHDR_MODE11_LAST_ISE_RANGE; + m_first_mode11_submode = -1; + m_last_mode11_submode = 7; + + m_use_mode7_part1 = true; + m_first_mode7_part1_weight_ise_range = UHDR_MODE7_PART1_FIRST_ISE_RANGE; + m_last_mode7_part1_weight_ise_range = UHDR_MODE7_PART1_LAST_ISE_RANGE; + m_mode7_full_s_optimization = true; + + m_use_mode7_part2 = true; + m_mode7_part2_part_masks = UINT32_MAX; + m_first_mode7_part2_weight_ise_range = UHDR_MODE7_PART2_FIRST_ISE_RANGE; + m_last_mode7_part2_weight_ise_range = UHDR_MODE7_PART2_LAST_ISE_RANGE; + + m_use_mode11_part2 = true; + m_mode11_part2_part_masks = UINT32_MAX; + m_first_mode11_part2_weight_ise_range = UHDR_MODE11_PART2_FIRST_ISE_RANGE; + m_last_mode11_part2_weight_ise_range = UHDR_MODE11_PART2_LAST_ISE_RANGE; + + m_refine_weights = true; + + m_use_estimated_partitions = false; + m_max_estimated_partitions = 0; +} + +void uastc_hdr_4x4_codec_options::set_quality_normal() +{ + m_use_solid = true; + + // We'll allow uber mode in normal if the user allows it. + m_use_mode11_part1 = true; + m_mode11_uber_mode = true; + m_first_mode11_weight_ise_range = 6; + m_last_mode11_weight_ise_range = UHDR_MODE11_LAST_ISE_RANGE; + + m_use_mode7_part1 = true; + m_first_mode7_part1_weight_ise_range = UHDR_MODE7_PART1_LAST_ISE_RANGE; + m_last_mode7_part1_weight_ise_range = UHDR_MODE7_PART1_LAST_ISE_RANGE; + + m_use_mode7_part2 = true; + m_mode7_part2_part_masks = UINT32_MAX; + m_first_mode7_part2_weight_ise_range = UHDR_MODE7_PART2_LAST_ISE_RANGE; + m_last_mode7_part2_weight_ise_range = UHDR_MODE7_PART2_LAST_ISE_RANGE; + + m_use_mode11_part2 = true; + m_mode11_part2_part_masks = UINT32_MAX; + m_first_mode11_part2_weight_ise_range = UHDR_MODE11_PART2_LAST_ISE_RANGE; + m_last_mode11_part2_weight_ise_range = UHDR_MODE11_PART2_LAST_ISE_RANGE; + + m_refine_weights = true; +} + +void uastc_hdr_4x4_codec_options::set_quality_fastest() +{ + m_use_solid = true; + + m_use_mode11_part1 = true; + m_mode11_uber_mode = false; + m_first_mode11_weight_ise_range = UHDR_MODE11_LAST_ISE_RANGE; + m_last_mode11_weight_ise_range = UHDR_MODE11_LAST_ISE_RANGE; + + m_use_mode7_part1 = false; + m_mode7_full_s_optimization = false; + + m_use_mode7_part2 = false; + m_use_mode11_part2 = false; + + m_refine_weights = false; +} + +void uastc_hdr_4x4_codec_options::set_quality_level(int level) +{ + level = clamp(level, cMinLevel, cMaxLevel); + + m_level = level; + + // First ensure all options are set to best. + set_quality_best(); + + switch (level) + { + case 0: + { + set_quality_fastest(); + break; + } + case 1: + { + set_quality_normal(); + + m_first_mode11_weight_ise_range = UHDR_MODE11_LAST_ISE_RANGE - 1; + m_last_mode11_weight_ise_range = UHDR_MODE11_LAST_ISE_RANGE; + + m_use_mode7_part1 = false; + m_mode7_full_s_optimization = false; + m_use_mode7_part2 = false; + + m_use_estimated_partitions = true; + m_max_estimated_partitions = 1; + + m_mode11_part2_part_masks = 1 | 2; + m_mode7_part2_part_masks = 1 | 2; + + // TODO: Disabling this hurts BC6H quality, but significantly speeds up compression. + //m_refine_weights = false; + break; + } + case 2: + { + set_quality_normal(); + + m_use_estimated_partitions = true; + m_max_estimated_partitions = 2; + + m_mode11_part2_part_masks = 1 | 2; + m_mode7_part2_part_masks = 1 | 2; + + break; + } + case 3: + { + m_use_estimated_partitions = true; + m_max_estimated_partitions = 2; + + m_mode11_part2_part_masks = 1 | 2 | 4 | 8; + m_mode7_part2_part_masks = 1 | 2 | 4 | 8; + + break; + } + default: + { + // best options already set + break; + } + } +} + +//-------------------------------------------------------------------------------------------------------------------------- + +static bool pack_solid(const vec4F* pBlock_linear_colors, basisu::vector& all_results, const uastc_hdr_4x4_codec_options& coptions) +{ + float r = 0.0f, g = 0.0f, b = 0.0f; + + const float LOG_BIAS = .125f; + + bool solid_block = true; + for (uint32_t i = 0; i < 16; i++) + { + if ((pBlock_linear_colors[0][0] != pBlock_linear_colors[i][0]) || + (pBlock_linear_colors[0][1] != pBlock_linear_colors[i][1]) || + (pBlock_linear_colors[0][2] != pBlock_linear_colors[i][2])) + { + solid_block = false; + } + + r += log2f(pBlock_linear_colors[i][0] + LOG_BIAS); + g += log2f(pBlock_linear_colors[i][1] + LOG_BIAS); + b += log2f(pBlock_linear_colors[i][2] + LOG_BIAS); + } + + if (solid_block) + { + r = pBlock_linear_colors[0][0]; + g = pBlock_linear_colors[0][1]; + b = pBlock_linear_colors[0][2]; + } + else + { + r = maximum(0.0f, powf(2.0f, r * (1.0f / 16.0f)) - LOG_BIAS); + g = maximum(0.0f, powf(2.0f, g * (1.0f / 16.0f)) - LOG_BIAS); + b = maximum(0.0f, powf(2.0f, b * (1.0f / 16.0f)) - LOG_BIAS); + + // for safety + r = minimum(r, MAX_HALF_FLOAT); + g = minimum(g, MAX_HALF_FLOAT); + b = minimum(b, MAX_HALF_FLOAT); + } + + half_float rh = float_to_half_non_neg_no_nan_inf(r), gh = float_to_half_non_neg_no_nan_inf(g), bh = float_to_half_non_neg_no_nan_inf(b), ah = float_to_half_non_neg_no_nan_inf(1.0f); + + astc_hdr_4x4_pack_results results; + results.clear(); + + uint8_t* packed_blk = (uint8_t*)&results.m_solid_blk; + results.m_is_solid = true; + + packed_blk[0] = 0b11111100; + packed_blk[1] = 255; + packed_blk[2] = 255; + packed_blk[3] = 255; + packed_blk[4] = 255; + packed_blk[5] = 255; + packed_blk[6] = 255; + packed_blk[7] = 255; + + packed_blk[8] = (uint8_t)rh; + packed_blk[9] = (uint8_t)(rh >> 8); + packed_blk[10] = (uint8_t)gh; + packed_blk[11] = (uint8_t)(gh >> 8); + packed_blk[12] = (uint8_t)bh; + packed_blk[13] = (uint8_t)(bh >> 8); + packed_blk[14] = (uint8_t)ah; + packed_blk[15] = (uint8_t)(ah >> 8); + + results.m_best_block_error = 0; + + if (!solid_block) + { + const float R_WEIGHT = coptions.m_r_err_scale; + const float G_WEIGHT = coptions.m_g_err_scale; + + // This MUST match how errors are computed in eval_selectors(). + for (uint32_t i = 0; i < 16; i++) + { + half_float dr = float_to_half_non_neg_no_nan_inf(pBlock_linear_colors[i][0]), dg = float_to_half_non_neg_no_nan_inf(pBlock_linear_colors[i][1]), db = float_to_half_non_neg_no_nan_inf(pBlock_linear_colors[i][2]); + double rd = q(rh, Q_LOG_BIAS_4x4) - q(dr, Q_LOG_BIAS_4x4); + double gd = q(gh, Q_LOG_BIAS_4x4) - q(dg, Q_LOG_BIAS_4x4); + double bd = q(bh, Q_LOG_BIAS_4x4) - q(db, Q_LOG_BIAS_4x4); + + double e = R_WEIGHT * (rd * rd) + G_WEIGHT * (gd * gd) + bd * bd; + + results.m_best_block_error += e; + } + } + + const half_float hc[3] = { rh, gh, bh }; + + bc6h_enc_block_solid_color(&results.m_bc6h_block, hc); + + all_results.push_back(results); + + return solid_block; +} + +//-------------------------------------------------------------------------------------------------------------------------- + +static void pack_mode11( + const vec4F* pBlock_linear_colors, const half_float pBlock_pixels_half[16][3], const vec4F pBlock_pixels_q16[16], + basisu::vector& all_results, + const uastc_hdr_4x4_codec_options& coptions, + uint32_t first_weight_ise_range, uint32_t last_weight_ise_range, bool constrain_ise_weight_selectors) +{ + BASISU_NOTE_UNUSED(pBlock_linear_colors); + assert(first_weight_ise_range <= last_weight_ise_range); + + uint8_t trial_endpoints[NUM_MODE11_ENDPOINTS], trial_weights[16]; + uint32_t trial_submode11 = 0; + + clear_obj(trial_endpoints); + clear_obj(trial_weights); + + for (uint32_t weight_ise_range = first_weight_ise_range; weight_ise_range <= last_weight_ise_range; weight_ise_range++) + { + const bool direct_only = coptions.m_mode11_direct_only; + + uint32_t endpoint_ise_range = astc_helpers::BISE_256_LEVELS; + if (weight_ise_range == astc_helpers::BISE_16_LEVELS) + endpoint_ise_range = astc_helpers::BISE_192_LEVELS; + else + { + assert(weight_ise_range < astc_helpers::BISE_16_LEVELS); + } + + double trial_error = encode_astc_hdr_block_mode_11(16, pBlock_pixels_half, pBlock_pixels_q16, weight_ise_range, trial_submode11, BIG_FLOAT_VAL, trial_endpoints, trial_weights, coptions, direct_only, + endpoint_ise_range, coptions.m_mode11_uber_mode && (weight_ise_range >= astc_helpers::BISE_4_LEVELS) && coptions.m_allow_uber_mode, constrain_ise_weight_selectors, coptions.m_first_mode11_submode, coptions.m_last_mode11_submode, false, cOrdinaryLeastSquares); + + if (trial_error < BIG_FLOAT_VAL) + { + astc_hdr_4x4_pack_results results; + results.clear(); + + results.m_best_block_error = trial_error; + + results.m_best_submodes[0] = trial_submode11; + results.m_constrained_weights = constrain_ise_weight_selectors; + + results.m_best_blk.m_num_partitions = 1; + results.m_best_blk.m_color_endpoint_modes[0] = 11; + results.m_best_blk.m_weight_ise_range = (uint8_t)weight_ise_range; + results.m_best_blk.m_endpoint_ise_range = (uint8_t)endpoint_ise_range; + + memcpy(results.m_best_blk.m_endpoints, trial_endpoints, NUM_MODE11_ENDPOINTS); + memcpy(results.m_best_blk.m_weights, trial_weights, 16); + +#ifdef _DEBUG + // Sanity checking + { + half_float block_pixels_half[16][3]; + + for (uint32_t i = 0; i < 16; i++) + { + block_pixels_half[i][0] = float_to_half_non_neg_no_nan_inf(pBlock_linear_colors[i][0]); + block_pixels_half[i][1] = float_to_half_non_neg_no_nan_inf(pBlock_linear_colors[i][1]); + block_pixels_half[i][2] = float_to_half_non_neg_no_nan_inf(pBlock_linear_colors[i][2]); + } + + half_float unpacked_astc_blk_rgba[4][4][4]; + bool res = astc_helpers::decode_block(results.m_best_blk, unpacked_astc_blk_rgba, 4, 4, astc_helpers::cDecodeModeHDR16); + assert(res); + + half_float unpacked_astc_blk_rgb[4][4][3]; + for (uint32_t y = 0; y < 4; y++) + for (uint32_t x = 0; x < 4; x++) + for (uint32_t c = 0; c < 3; c++) + unpacked_astc_blk_rgb[y][x][c] = unpacked_astc_blk_rgba[y][x][c]; + + double cmp_err = compute_block_error(16, &block_pixels_half[0][0], &unpacked_astc_blk_rgb[0][0][0], coptions); + assert(results.m_best_block_error == cmp_err); + } +#endif + + // transcode to BC6H + assert(results.m_best_blk.m_color_endpoint_modes[0] == 11); + + // Get qlog12 endpoints + int e[2][3]; + bool success = decode_mode11_to_qlog12(results.m_best_blk.m_endpoints, e, results.m_best_blk.m_endpoint_ise_range); + assert(success); + BASISU_NOTE_UNUSED(success); + + // Transform endpoints to half float + half_float h_e[3][2] = + { + { qlog_to_half(e[0][0], 12), qlog_to_half(e[1][0], 12) }, + { qlog_to_half(e[0][1], 12), qlog_to_half(e[1][1], 12) }, + { qlog_to_half(e[0][2], 12), qlog_to_half(e[1][2], 12) } + }; + + // Transcode to bc6h + success = transcode_bc6h_1subset(h_e, results.m_best_blk, results.m_bc6h_block); + assert(success); + + all_results.push_back(results); + } + } +} + +//-------------------------------------------------------------------------------------------------------------------------- + +static void pack_mode7_single_part( + const half_float pBlock_pixels_half[16][3], const vec4F pBlock_pixels_q16[16], + basisu::vector& all_results, const uastc_hdr_4x4_codec_options& coptions, + uint32_t first_mode7_part1_weight_ise_range, uint32_t last_mode7_part1_weight_ise_range) +{ + assert(first_mode7_part1_weight_ise_range <= last_mode7_part1_weight_ise_range); + + uint8_t trial_endpoints[NUM_MODE7_ENDPOINTS], trial_weights[16]; + uint32_t trial_submode7 = 0; + + clear_obj(trial_endpoints); + clear_obj(trial_weights); + + for (uint32_t weight_ise_range = first_mode7_part1_weight_ise_range; weight_ise_range <= last_mode7_part1_weight_ise_range; weight_ise_range++) + { + const uint32_t ise_endpoint_range = astc_helpers::BISE_256_LEVELS; + + double trial_error = encode_astc_hdr_block_mode_7(16, pBlock_pixels_half, pBlock_pixels_q16, weight_ise_range, trial_submode7, BIG_FLOAT_VAL, trial_endpoints, trial_weights, coptions, ise_endpoint_range); + + if (trial_error < BIG_FLOAT_VAL) + { + astc_hdr_4x4_pack_results results; + results.clear(); + + results.m_best_block_error = trial_error; + + results.m_best_submodes[0] = trial_submode7; + + results.m_best_blk.m_num_partitions = 1; + results.m_best_blk.m_color_endpoint_modes[0] = 7; + results.m_best_blk.m_weight_ise_range = (uint8_t)weight_ise_range; + results.m_best_blk.m_endpoint_ise_range = (uint8_t)ise_endpoint_range; + + memcpy(results.m_best_blk.m_endpoints, trial_endpoints, NUM_MODE7_ENDPOINTS); + memcpy(results.m_best_blk.m_weights, trial_weights, 16); + + // transcode to BC6H + assert(results.m_best_blk.m_color_endpoint_modes[0] == 7); + + // Get qlog12 endpoints + int e[2][3]; + if (!decode_mode7_to_qlog12(results.m_best_blk.m_endpoints, e, nullptr, results.m_best_blk.m_endpoint_ise_range)) + continue; + + // Transform endpoints to half float + half_float h_e[3][2] = + { + { qlog_to_half(e[0][0], 12), qlog_to_half(e[1][0], 12) }, + { qlog_to_half(e[0][1], 12), qlog_to_half(e[1][1], 12) }, + { qlog_to_half(e[0][2], 12), qlog_to_half(e[1][2], 12) } + }; + + // Transcode to bc6h + bool status = transcode_bc6h_1subset(h_e, results.m_best_blk, results.m_bc6h_block); + assert(status); + (void)status; + + all_results.push_back(results); + } + } +} + +//-------------------------------------------------------------------------------------------------------------------------- + +static bool estimate_partition( + const half_float pBlock_pixels_half[16][3], + int* pBest_parts, uint32_t num_best_parts) +{ + assert(num_best_parts <= basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2); + + vec3F training_vecs[16], mean(0.0f); + + for (uint32_t i = 0; i < 16; i++) + { + vec3F& v = training_vecs[i]; + + v[0] = (float)pBlock_pixels_half[i][0]; + v[1] = (float)pBlock_pixels_half[i][1]; + v[2] = (float)pBlock_pixels_half[i][2]; + + mean += v; + } + mean *= (1.0f / 16.0f); + + vec3F cluster_centroids[2] = { mean - vec3F(.1f), mean + vec3F(.1f) }; + + uint32_t cluster_pixels[2][16]; + uint32_t num_cluster_pixels[2]; + vec3F new_cluster_means[2]; + + for (uint32_t s = 0; s < 4; s++) + { + num_cluster_pixels[0] = 0; + num_cluster_pixels[1] = 0; + + new_cluster_means[0].clear(); + new_cluster_means[1].clear(); + + for (uint32_t i = 0; i < 16; i++) + { + float d0 = training_vecs[i].squared_distance(cluster_centroids[0]); + float d1 = training_vecs[i].squared_distance(cluster_centroids[1]); + + if (d0 < d1) + { + cluster_pixels[0][num_cluster_pixels[0]] = i; + new_cluster_means[0] += training_vecs[i]; + num_cluster_pixels[0]++; + } + else + { + cluster_pixels[1][num_cluster_pixels[1]] = i; + new_cluster_means[1] += training_vecs[i]; + num_cluster_pixels[1]++; + } + } + + if (!num_cluster_pixels[0] || !num_cluster_pixels[1]) + return false; + + cluster_centroids[0] = new_cluster_means[0] / (float)num_cluster_pixels[0]; + cluster_centroids[1] = new_cluster_means[1] / (float)num_cluster_pixels[1]; + } + + int desired_parts[4][4]; // [y][x] + for (uint32_t p = 0; p < 2; p++) + { + for (uint32_t i = 0; i < num_cluster_pixels[p]; i++) + { + const uint32_t pix_index = cluster_pixels[p][i]; + + desired_parts[pix_index >> 2][pix_index & 3] = p; + } + } + + uint32_t part_similarity[basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2]; + + for (uint32_t part_index = 0; part_index < basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2; part_index++) + { + const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_bc7; + + int total_sim_non_inv = 0; + int total_sim_inv = 0; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + int part = basist::g_bc7_partition2[16 * bc7_pattern + x + y * 4]; + + if (part == desired_parts[y][x]) + total_sim_non_inv++; + + if ((part ^ 1) == desired_parts[y][x]) + total_sim_inv++; + } + } + + int total_sim = maximum(total_sim_non_inv, total_sim_inv); + + part_similarity[part_index] = (total_sim << 8) | part_index; + + } // part_index; + + std::sort(part_similarity, part_similarity + basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2); + + for (uint32_t i = 0; i < num_best_parts; i++) + pBest_parts[i] = part_similarity[(basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2 - 1) - i] & 0xFF; + + return true; +} + +//-------------------------------------------------------------------------------------------------------------------------- + +static void pack_mode7_2part( + const half_float pBlock_pixels_half[16][3], const vec4F pBlock_pixels_q16[16], + basisu::vector& all_results, const uastc_hdr_4x4_codec_options& coptions, + int num_estimated_partitions, const int *pEstimated_partitions, + uint32_t first_weight_ise_range, uint32_t last_weight_ise_range) +{ + assert(coptions.m_mode7_part2_part_masks); + + astc_helpers::log_astc_block trial_blk; + clear_obj(trial_blk); + trial_blk.m_grid_width = 4; + trial_blk.m_grid_height = 4; + + trial_blk.m_num_partitions = 2; + trial_blk.m_color_endpoint_modes[0] = 7; + trial_blk.m_color_endpoint_modes[1] = 7; + + uint32_t first_part_index = 0, last_part_index = basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2; + + if (num_estimated_partitions) + { + first_part_index = 0; + last_part_index = num_estimated_partitions; + } + + for (uint32_t part_index_iter = first_part_index; part_index_iter < last_part_index; ++part_index_iter) + { + uint32_t part_index; + if (num_estimated_partitions) + { + part_index = pEstimated_partitions[part_index_iter]; + assert(part_index < basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2); + } + else + { + part_index = part_index_iter; + if (((1U << part_index) & coptions.m_mode7_part2_part_masks) == 0) + continue; + } + + const uint32_t astc_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_astc; + const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_bc7; + const bool invert_flag = basist::g_astc_bc7_common_partitions2[part_index].m_invert; + + half_float part_pixels_half[2][16][3]; + vec4F part_pixels_q16[2][16]; + + uint32_t pixel_part_index[4][4]; // [y][x] + uint32_t num_part_pixels[2] = { 0, 0 }; + + // Extract each subset's texels for this partition pattern + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t part = basist::g_bc7_partition2[16 * bc7_pattern + x + y * 4]; + if (invert_flag) + part = 1 - part; + + pixel_part_index[y][x] = part; + + const uint32_t n = num_part_pixels[part]; + + part_pixels_half[part][n][0] = pBlock_pixels_half[x + y * 4][0]; + part_pixels_half[part][n][1] = pBlock_pixels_half[x + y * 4][1]; + part_pixels_half[part][n][2] = pBlock_pixels_half[x + y * 4][2]; + part_pixels_q16[part][n] = pBlock_pixels_q16[x + y * 4]; + + num_part_pixels[part] = n + 1; + } + } + + trial_blk.m_partition_id = (uint16_t)astc_pattern; + + for (uint32_t weight_ise_range = first_weight_ise_range; weight_ise_range <= last_weight_ise_range; weight_ise_range++) + { + assert(weight_ise_range <= astc_helpers::BISE_8_LEVELS); + + uint32_t ise_endpoint_range = astc_helpers::BISE_256_LEVELS; + if (weight_ise_range == astc_helpers::BISE_5_LEVELS) + ise_endpoint_range = astc_helpers::BISE_192_LEVELS; + else if (weight_ise_range == astc_helpers::BISE_6_LEVELS) + ise_endpoint_range = astc_helpers::BISE_128_LEVELS; + else if (weight_ise_range == astc_helpers::BISE_8_LEVELS) + ise_endpoint_range = astc_helpers::BISE_80_LEVELS; + + uint8_t trial_endpoints[2][NUM_MODE7_ENDPOINTS], trial_weights[2][16]; + uint32_t trial_submode7[2]; + + clear_obj(trial_endpoints); + clear_obj(trial_weights); + clear_obj(trial_submode7); + + double total_trial_err = 0; + for (uint32_t pack_part_index = 0; pack_part_index < 2; pack_part_index++) + { + total_trial_err += encode_astc_hdr_block_mode_7( + num_part_pixels[pack_part_index], part_pixels_half[pack_part_index], part_pixels_q16[pack_part_index], + weight_ise_range, trial_submode7[pack_part_index], BIG_FLOAT_VAL, + &trial_endpoints[pack_part_index][0], &trial_weights[pack_part_index][0], coptions, ise_endpoint_range); + + } // pack_part_index + + if (total_trial_err < BIG_FLOAT_VAL) + { + trial_blk.m_weight_ise_range = (uint8_t)weight_ise_range; + trial_blk.m_endpoint_ise_range = (uint8_t)ise_endpoint_range; + + for (uint32_t pack_part_index = 0; pack_part_index < 2; pack_part_index++) + memcpy(&trial_blk.m_endpoints[pack_part_index * NUM_MODE7_ENDPOINTS], &trial_endpoints[pack_part_index][0], NUM_MODE7_ENDPOINTS); + + uint32_t src_pixel_index[2] = { 0, 0 }; + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t p = pixel_part_index[y][x]; + trial_blk.m_weights[x + y * 4] = trial_weights[p][src_pixel_index[p]++]; + } + } + + astc_hdr_4x4_pack_results results; + results.clear(); + + results.m_best_block_error = total_trial_err; + results.m_best_submodes[0] = trial_submode7[0]; + results.m_best_submodes[1] = trial_submode7[1]; + results.m_best_pat_index = part_index; + + results.m_best_blk = trial_blk; + + bool status = transcode_bc6h_2subsets(part_index, results.m_best_blk, results.m_bc6h_block); + assert(status); + BASISU_NOTE_UNUSED(status); + + all_results.push_back(results); + } + + } // weight_ise_range + + } // part_index +} + +//-------------------------------------------------------------------------------------------------------------------------- + +static void pack_mode11_2part( + const half_float pBlock_pixels_half[16][3], const vec4F pBlock_pixels_q16[16], + basisu::vector& all_results, const uastc_hdr_4x4_codec_options& coptions, + int num_estimated_partitions, const int* pEstimated_partitions) +{ + assert(coptions.m_mode11_part2_part_masks); + + astc_helpers::log_astc_block trial_blk; + clear_obj(trial_blk); + trial_blk.m_grid_width = 4; + trial_blk.m_grid_height = 4; + + trial_blk.m_num_partitions = 2; + trial_blk.m_color_endpoint_modes[0] = 11; + trial_blk.m_color_endpoint_modes[1] = 11; + + uint32_t first_part_index = 0, last_part_index = basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2; + + if (num_estimated_partitions) + { + first_part_index = 0; + last_part_index = num_estimated_partitions; + } + + for (uint32_t part_index_iter = first_part_index; part_index_iter < last_part_index; ++part_index_iter) + { + uint32_t part_index; + if (num_estimated_partitions) + { + part_index = pEstimated_partitions[part_index_iter]; + assert(part_index < basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2); + } + else + { + part_index = part_index_iter; + if (((1U << part_index) & coptions.m_mode11_part2_part_masks) == 0) + continue; + } + + const uint32_t astc_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_astc; + const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_bc7; + const bool invert_flag = basist::g_astc_bc7_common_partitions2[part_index].m_invert; + + half_float part_pixels_half[2][16][3]; + vec4F part_pixels_q16[2][16]; + + uint32_t pixel_part_index[4][4]; // [y][x] + uint32_t num_part_pixels[2] = { 0, 0 }; + + // Extract each subset's texels for this partition pattern + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t part = basist::g_bc7_partition2[16 * bc7_pattern + x + y * 4]; + if (invert_flag) + part = 1 - part; + + pixel_part_index[y][x] = part; + + const uint32_t n = num_part_pixels[part]; + + part_pixels_half[part][n][0] = pBlock_pixels_half[x + y * 4][0]; + part_pixels_half[part][n][1] = pBlock_pixels_half[x + y * 4][1]; + part_pixels_half[part][n][2] = pBlock_pixels_half[x + y * 4][2]; + part_pixels_q16[part][n] = pBlock_pixels_q16[x + y * 4]; + + num_part_pixels[part] = n + 1; + } + } + + trial_blk.m_partition_id = (uint16_t)astc_pattern; + + for (uint32_t weight_ise_range = coptions.m_first_mode11_part2_weight_ise_range; weight_ise_range <= coptions.m_last_mode11_part2_weight_ise_range; weight_ise_range++) + { + bool direct_only = false; + uint32_t ise_endpoint_range = astc_helpers::BISE_64_LEVELS; + if (weight_ise_range == astc_helpers::BISE_4_LEVELS) + ise_endpoint_range = astc_helpers::BISE_40_LEVELS; + + uint8_t trial_endpoints[2][NUM_MODE11_ENDPOINTS], trial_weights[2][16]; + uint32_t trial_submode11[2]; + + clear_obj(trial_endpoints); + clear_obj(trial_weights); + clear_obj(trial_submode11); + + double total_trial_err = 0; + for (uint32_t pack_part_index = 0; pack_part_index < 2; pack_part_index++) + { + total_trial_err += encode_astc_hdr_block_mode_11( + num_part_pixels[pack_part_index], part_pixels_half[pack_part_index], part_pixels_q16[pack_part_index], + weight_ise_range, trial_submode11[pack_part_index], BIG_FLOAT_VAL, + &trial_endpoints[pack_part_index][0], &trial_weights[pack_part_index][0], coptions, + direct_only, ise_endpoint_range, coptions.m_mode11_uber_mode && (weight_ise_range >= astc_helpers::BISE_4_LEVELS) && coptions.m_allow_uber_mode, false, + coptions.m_first_mode11_submode, coptions.m_last_mode11_submode, false, cOrdinaryLeastSquares); + + } // pack_part_index + + if (total_trial_err < BIG_FLOAT_VAL) + { + trial_blk.m_weight_ise_range = (uint8_t)weight_ise_range; + trial_blk.m_endpoint_ise_range = (uint8_t)ise_endpoint_range; + + for (uint32_t pack_part_index = 0; pack_part_index < 2; pack_part_index++) + memcpy(&trial_blk.m_endpoints[pack_part_index * NUM_MODE11_ENDPOINTS], &trial_endpoints[pack_part_index][0], NUM_MODE11_ENDPOINTS); + + uint32_t src_pixel_index[2] = { 0, 0 }; + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t p = pixel_part_index[y][x]; + trial_blk.m_weights[x + y * 4] = trial_weights[p][src_pixel_index[p]++]; + } + } + + astc_hdr_4x4_pack_results results; + results.clear(); + + results.m_best_block_error = total_trial_err; + results.m_best_submodes[0] = trial_submode11[0]; + results.m_best_submodes[1] = trial_submode11[1]; + results.m_best_pat_index = part_index; + + results.m_best_blk = trial_blk; + + bool status = transcode_bc6h_2subsets(part_index, results.m_best_blk, results.m_bc6h_block); + assert(status); + BASISU_NOTE_UNUSED(status); + + all_results.push_back(results); + } + + } // weight_ise_range + + } // part_index +} + +bool astc_hdr_4x4_enc_block( + const float* pRGBPixels, const basist::half_float *pRGBPixelsHalf, + const uastc_hdr_4x4_codec_options& coptions, + basisu::vector& all_results) +{ + assert(g_astc_hdr_enc_initialized); + if (!g_astc_hdr_enc_initialized) + { + // astc_hdr_enc_init() MUST be called first. + assert(0); + return false; + } + + assert(coptions.m_use_solid || coptions.m_use_mode11_part1 || coptions.m_use_mode7_part2 || coptions.m_use_mode7_part1 || coptions.m_use_mode11_part2); + + all_results.resize(0); + + const half_float (*pBlock_pixels_half)[16][3] = reinterpret_cast(pRGBPixelsHalf); + + vec4F block_linear_colors[16]; + vec4F block_pixels_q16[16]; + + bool is_greyscale = true; + + for (uint32_t i = 0; i < 16; i++) + { + const float fr = pRGBPixels[i * 3 + 0], fg = pRGBPixels[i * 3 + 1], fb = pRGBPixels[i * 3 + 2]; + + // Sanity check the input block. + assert((fr >= 0) && (fr <= MAX_HALF_FLOAT) && (!std::isinf(fr)) && (!std::isnan(fr))); + assert((fg >= 0) && (fg <= MAX_HALF_FLOAT) && (!std::isinf(fg)) && (!std::isnan(fg))); + assert((fb >= 0) && (fb <= MAX_HALF_FLOAT) && (!std::isinf(fb)) && (!std::isnan(fb))); + + block_linear_colors[i].set(fr, fg, fb, 1.0f); + + const half_float hr = (*pBlock_pixels_half)[i][0]; + assert(hr == basist::float_to_half(fr)); + block_pixels_q16[i][0] = (float)half_to_qlog16(hr); + + const half_float hg = (*pBlock_pixels_half)[i][1]; + assert(hg == basist::float_to_half(fg)); + block_pixels_q16[i][1] = (float)half_to_qlog16(hg); + + const half_float hb = (*pBlock_pixels_half)[i][2]; + assert(hb == basist::float_to_half(fb)); + block_pixels_q16[i][2] = (float)half_to_qlog16(hb); + + block_pixels_q16[i][3] = 0.0f; + + if ((hr != hg) || (hr != hb)) + is_greyscale = false; + } // i + + bool is_solid = false; + if (coptions.m_use_solid) + is_solid = pack_solid(block_linear_colors, all_results, coptions); + + if (!is_solid) + { + if ((is_greyscale) && (coptions.m_level == 0)) + { + // Special case if it's a pure grayscale block - just try mode 7. + pack_mode7_single_part(*pBlock_pixels_half, block_pixels_q16, all_results, coptions, 1, 1); + pack_mode7_single_part(*pBlock_pixels_half, block_pixels_q16, all_results, coptions, UHDR_MODE7_PART1_LAST_ISE_RANGE, UHDR_MODE7_PART1_LAST_ISE_RANGE); + } + else + { + if (coptions.m_use_mode11_part1) + { + const size_t cur_num_results = all_results.size(); + + pack_mode11(block_linear_colors, *pBlock_pixels_half, block_pixels_q16, all_results, coptions, coptions.m_first_mode11_weight_ise_range, coptions.m_last_mode11_weight_ise_range, false); + + if (coptions.m_last_mode11_weight_ise_range >= astc_helpers::BISE_12_LEVELS) + { + // Try constrained weights if we're allowed to use 12/16 level ISE weight modes + pack_mode11(block_linear_colors, *pBlock_pixels_half, block_pixels_q16, all_results, coptions, maximum(coptions.m_first_mode11_weight_ise_range, astc_helpers::BISE_12_LEVELS), coptions.m_last_mode11_weight_ise_range, true); + } + + // If we couldn't get any mode 11 results at all, and we were restricted to just trying weight ISE range 8 (which required endpoint quantization) then + // fall back to weight ISE range 7 (which doesn't need any endpoint quantization). + // This is to guarantee we always get at least 1 non-solid result. + if (all_results.size() == cur_num_results) + { + if (coptions.m_first_mode11_weight_ise_range == astc_helpers::BISE_16_LEVELS) + { + pack_mode11(block_linear_colors, *pBlock_pixels_half, block_pixels_q16, all_results, coptions, astc_helpers::BISE_12_LEVELS, astc_helpers::BISE_12_LEVELS, false); + } + } + } + + if (coptions.m_use_mode7_part1) + { + // Mode 7 1-subset never requires endpoint quantization, so it cannot fail to find at least one usable solution. + pack_mode7_single_part(*pBlock_pixels_half, block_pixels_q16, all_results, coptions, coptions.m_first_mode7_part1_weight_ise_range, coptions.m_last_mode7_part1_weight_ise_range); + } + else if (is_greyscale) + { + // Special case if it's a pure grayscale block and mode 7 was disabled - try it anyway, because mode 11 has worse B channel quantization. + pack_mode7_single_part(*pBlock_pixels_half, block_pixels_q16, all_results, coptions, 1, 1); + pack_mode7_single_part(*pBlock_pixels_half, block_pixels_q16, all_results, coptions, UHDR_MODE7_PART1_LAST_ISE_RANGE, UHDR_MODE7_PART1_LAST_ISE_RANGE); + } + } + + bool have_est = false; + int best_parts[basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2]; + + if ((coptions.m_use_mode7_part2) || (coptions.m_use_mode11_part2)) + { + if (coptions.m_use_estimated_partitions) + have_est = estimate_partition(*pBlock_pixels_half, best_parts, coptions.m_max_estimated_partitions); + } + + if (coptions.m_use_mode7_part2) + { + const size_t cur_num_results = all_results.size(); + + pack_mode7_2part(*pBlock_pixels_half, block_pixels_q16, + all_results, coptions, have_est ? coptions.m_max_estimated_partitions : 0, best_parts, + coptions.m_first_mode7_part2_weight_ise_range, coptions.m_last_mode7_part2_weight_ise_range); + + // If we couldn't find any packable 2-subset mode 7 results at weight levels >= 5 levels (which always requires endpoint quant), then try falling back to + // 5 levels which doesn't require endpoint quantization. + if (all_results.size() == cur_num_results) + { + if (coptions.m_first_mode7_part2_weight_ise_range >= astc_helpers::BISE_5_LEVELS) + { + pack_mode7_2part(*pBlock_pixels_half, block_pixels_q16, + all_results, coptions, have_est ? coptions.m_max_estimated_partitions : 0, best_parts, + astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_4_LEVELS); + } + } + } + + if (coptions.m_use_mode11_part2) + { + // This always requires endpoint quant, so it could fail to find any usable solutions. + pack_mode11_2part(*pBlock_pixels_half, block_pixels_q16, all_results, coptions, have_est ? coptions.m_max_estimated_partitions : 0, best_parts); + } + + if (coptions.m_refine_weights) + { + // TODO: This is quite slow. + for (uint32_t i = 0; i < all_results.size(); i++) + { + bool status = astc_hdr_4x4_refine_weights(pRGBPixelsHalf, all_results[i], coptions, coptions.m_bc6h_err_weight, &all_results[i].m_improved_via_refinement_flag); + assert(status); + BASISU_NOTE_UNUSED(status); + } + } + + } // !is_solid + + return true; +} + +bool astc_hdr_4x4_pack_results_to_block(astc_blk& dst_blk, const astc_hdr_4x4_pack_results& results) +{ + assert(g_astc_hdr_enc_initialized); + if (!g_astc_hdr_enc_initialized) + return false; + + if (results.m_is_solid) + { + memcpy(&dst_blk, &results.m_solid_blk, sizeof(results.m_solid_blk)); + } + else + { + bool status = astc_helpers::pack_astc_block((astc_helpers::astc_block&)dst_blk, results.m_best_blk); + if (!status) + { + assert(0); + return false; + } + } + + return true; +} + +// Refines a block's chosen weight indices, balancing BC6H and ASTC HDR error. +bool astc_hdr_4x4_refine_weights(const half_float *pSource_block, + astc_hdr_4x4_pack_results& cur_results, const uastc_hdr_4x4_codec_options& coptions, float bc6h_weight, bool *pImproved_flag) +{ + if (pImproved_flag) + *pImproved_flag = false; + + if (cur_results.m_is_solid) + return true; + + const uint32_t total_weights = astc_helpers::get_ise_levels(cur_results.m_best_blk.m_weight_ise_range); + assert((total_weights >= MIN_SUPPORTED_WEIGHT_LEVELS) && (total_weights <= MAX_SUPPORTED_WEIGHT_LEVELS)); + + double best_err[4][4]; + uint8_t best_weight[4][4]; + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + best_err[y][x] = BIG_FLOAT_VAL; + best_weight[y][x] = 0; + } + } + + astc_hdr_4x4_pack_results temp_results; + + const float c_weights[3] = { coptions.m_r_err_scale, coptions.m_g_err_scale, 1.0f }; + + for (uint32_t weight_index = 0; weight_index < total_weights; weight_index++) + { + temp_results = cur_results; + for (uint32_t i = 0; i < 16; i++) + temp_results.m_best_blk.m_weights[i] = (uint8_t)weight_index; + + half_float unpacked_astc_blk_rgba[4][4][4]; + bool res = astc_helpers::decode_block(temp_results.m_best_blk, unpacked_astc_blk_rgba, 4, 4, astc_helpers::cDecodeModeHDR16); + assert(res); + + basist::bc6h_block trial_bc6h_blk; + res = basist::astc_hdr_transcode_to_bc6h(temp_results.m_best_blk, trial_bc6h_blk); + assert(res); + + half_float unpacked_bc6h_blk[4][4][3]; + res = unpack_bc6h(&trial_bc6h_blk, unpacked_bc6h_blk, false); + assert(res); + BASISU_NOTE_UNUSED(res); + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + double total_err = 0.0f; + + for (uint32_t c = 0; c < 3; c++) + { + const half_float orig_c = pSource_block[(x + y * 4) * 3 + c]; + const double orig_c_q = q(orig_c, Q_LOG_BIAS_4x4); + + const half_float astc_c = unpacked_astc_blk_rgba[y][x][c]; + const double astc_c_q = q(astc_c, Q_LOG_BIAS_4x4); + const double astc_e = square(astc_c_q - orig_c_q) * c_weights[c]; + + const half_float bc6h_c = unpacked_bc6h_blk[y][x][c]; + const double bc6h_c_q = q(bc6h_c, Q_LOG_BIAS_4x4); + const double bc6h_e = square(bc6h_c_q - orig_c_q) * c_weights[c]; + + const double overall_err = astc_e * (1.0f - bc6h_weight) + bc6h_e * bc6h_weight; + + total_err += overall_err; + + } // c + + if (total_err < best_err[y][x]) + { + best_err[y][x] = total_err; + best_weight[y][x] = (uint8_t)weight_index; + } + + } // x + } // y + + } // weight_index + + bool any_changed = false; + for (uint32_t i = 0; i < 16; i++) + { + if (cur_results.m_best_blk.m_weights[i] != best_weight[i >> 2][i & 3]) + { + any_changed = true; + break; + } + } + + if (any_changed) + { + memcpy(cur_results.m_best_blk.m_weights, best_weight, 16); + + { + bool res = basist::astc_hdr_transcode_to_bc6h(cur_results.m_best_blk, cur_results.m_bc6h_block); + assert(res); + BASISU_NOTE_UNUSED(res); + + half_float unpacked_astc_blk_rgba[4][4][4]; + res = astc_helpers::decode_block(cur_results.m_best_blk, unpacked_astc_blk_rgba, 4, 4, astc_helpers::cDecodeModeHDR16); + assert(res); + + half_float unpacked_astc_blk_rgb[4][4][3]; + for (uint32_t y = 0; y < 4; y++) + for (uint32_t x = 0; x < 4; x++) + for (uint32_t c = 0; c < 3; c++) + unpacked_astc_blk_rgb[y][x][c] = unpacked_astc_blk_rgba[y][x][c]; + + cur_results.m_best_block_error = compute_block_error(16, pSource_block, &unpacked_astc_blk_rgb[0][0][0], coptions); + } + + if (pImproved_flag) + *pImproved_flag = true; + } + + return true; +} + +void astc_hdr_4x4_block_stats::update(const astc_hdr_4x4_pack_results& log_blk) +{ + std::lock_guard lck(m_mutex); + + m_total_blocks++; + + if (log_blk.m_improved_via_refinement_flag) + m_total_refined++; + + if (log_blk.m_is_solid) + { + m_total_solid++; + } + else + { + int best_weight_range = log_blk.m_best_blk.m_weight_ise_range; + + if (log_blk.m_best_blk.m_color_endpoint_modes[0] == 7) + { + m_mode7_submode_hist[bounds_check(log_blk.m_best_submodes[0], 0U, 6U)]++; + + if (log_blk.m_best_blk.m_num_partitions == 2) + { + m_total_mode7_2part++; + + m_mode7_submode_hist[bounds_check(log_blk.m_best_submodes[1], 0U, 6U)]++; + m_total_2part++; + + m_weight_range_hist_7_2part[bounds_check(best_weight_range, 0, 11)]++; + + m_part_hist[bounds_check(log_blk.m_best_pat_index, 0U, 32U)]++; + } + else + { + m_total_mode7_1part++; + + m_weight_range_hist_7[bounds_check(best_weight_range, 0, 11)]++; + } + } + else + { + m_mode11_submode_hist[bounds_check(log_blk.m_best_submodes[0], 0U, 9U)]++; + if (log_blk.m_constrained_weights) + m_total_mode11_1part_constrained_weights++; + + if (log_blk.m_best_blk.m_num_partitions == 2) + { + m_total_mode11_2part++; + + m_mode11_submode_hist[bounds_check(log_blk.m_best_submodes[1], 0U, 9U)]++; + m_total_2part++; + + m_weight_range_hist_11_2part[bounds_check(best_weight_range, 0, 11)]++; + + m_part_hist[bounds_check(log_blk.m_best_pat_index, 0U, 32U)]++; + } + else + { + m_total_mode11_1part++; + + m_weight_range_hist_11[bounds_check(best_weight_range, 0, 11)]++; + } + } + } +} + +void astc_hdr_4x4_block_stats::print() +{ + std::lock_guard lck(m_mutex); + + assert(m_total_blocks); + if (!m_total_blocks) + return; + + printf("\nLow-level ASTC Encoder Statistics:\n"); + printf("Total blocks: %u\n", m_total_blocks); + printf("Total solid: %u %3.2f%%\n", m_total_solid, (m_total_solid * 100.0f) / m_total_blocks); + printf("Total refined: %u %3.2f%%\n", m_total_refined, (m_total_refined * 100.0f) / m_total_blocks); + + printf("Total mode 11, 1 partition: %u %3.2f%%\n", m_total_mode11_1part, (m_total_mode11_1part * 100.0f) / m_total_blocks); + printf("Total mode 11, 1 partition, constrained weights: %u %3.2f%%\n", m_total_mode11_1part_constrained_weights, (m_total_mode11_1part_constrained_weights * 100.0f) / m_total_blocks); + printf("Total mode 11, 2 partition: %u %3.2f%%\n", m_total_mode11_2part, (m_total_mode11_2part * 100.0f) / m_total_blocks); + + printf("Total mode 7, 1 partition: %u %3.2f%%\n", m_total_mode7_1part, (m_total_mode7_1part * 100.0f) / m_total_blocks); + printf("Total mode 7, 2 partition: %u %3.2f%%\n", m_total_mode7_2part, (m_total_mode7_2part * 100.0f) / m_total_blocks); + + printf("Total 2 partitions: %u %3.2f%%\n", m_total_2part, (m_total_2part * 100.0f) / m_total_blocks); + printf("\n"); + + printf("ISE texel weight range histogram mode 11:\n"); + for (uint32_t i = 1; i <= UHDR_MODE11_LAST_ISE_RANGE; i++) + printf("%u %u\n", i, m_weight_range_hist_11[i]); + printf("\n"); + + printf("ISE texel weight range histogram mode 11, 2 partition:\n"); + for (uint32_t i = 1; i <= UHDR_MODE11_PART2_LAST_ISE_RANGE; i++) + printf("%u %u\n", i, m_weight_range_hist_11_2part[i]); + printf("\n"); + + printf("ISE texel weight range histogram mode 7:\n"); + for (uint32_t i = 1; i <= UHDR_MODE7_PART1_LAST_ISE_RANGE; i++) + printf("%u %u\n", i, m_weight_range_hist_7[i]); + printf("\n"); + + printf("ISE texel weight range histogram mode 7, 2 partition:\n"); + for (uint32_t i = 1; i <= UHDR_MODE7_PART2_LAST_ISE_RANGE; i++) + printf("%u %u\n", i, m_weight_range_hist_7_2part[i]); + printf("\n"); + + printf("Mode 11 submode histogram:\n"); + for (uint32_t i = 0; i <= MODE11_TOTAL_SUBMODES; i++) // +1 because of the extra direct encoding + printf("%u %u\n", i, m_mode11_submode_hist[i]); + printf("\n"); + + printf("Mode 7 submode histogram:\n"); + for (uint32_t i = 0; i < MODE7_TOTAL_SUBMODES; i++) + printf("%u %u\n", i, m_mode7_submode_hist[i]); + printf("\n"); + + printf("Partition pattern table usage histogram:\n"); + for (uint32_t i = 0; i < basist::TOTAL_ASTC_BC7_COMMON_PARTITIONS2; i++) + printf("%u:%u ", i, m_part_hist[i]); + printf("\n\n"); +} + +} // namespace basisu + diff --git a/thirdparty/basisu/encoder/basisu_uastc_hdr_4x4_enc.h b/thirdparty/basisu/encoder/basisu_uastc_hdr_4x4_enc.h new file mode 100644 index 000000000..390520a80 --- /dev/null +++ b/thirdparty/basisu/encoder/basisu_uastc_hdr_4x4_enc.h @@ -0,0 +1,181 @@ +// basisu_uastc_hdr_4x4_enc.h +#pragma once +#include "basisu_enc.h" +#include "basisu_gpu_texture.h" +#include "../transcoder/basisu_astc_helpers.h" +#include "../transcoder/basisu_astc_hdr_core.h" +#include "basisu_astc_hdr_common.h" + +namespace basisu +{ + struct uastc_hdr_4x4_codec_options : astc_hdr_codec_base_options + { + float m_bc6h_err_weight; + + bool m_use_solid; + + bool m_use_mode11_part1; + bool m_mode11_uber_mode; + uint32_t m_first_mode11_weight_ise_range; + uint32_t m_last_mode11_weight_ise_range; + bool m_mode11_direct_only; + int32_t m_first_mode11_submode; + int32_t m_last_mode11_submode; + + bool m_use_mode7_part1; + uint32_t m_first_mode7_part1_weight_ise_range; + uint32_t m_last_mode7_part1_weight_ise_range; + + bool m_use_mode7_part2; + uint32_t m_mode7_part2_part_masks; + uint32_t m_first_mode7_part2_weight_ise_range; + uint32_t m_last_mode7_part2_weight_ise_range; + + bool m_use_mode11_part2; + uint32_t m_mode11_part2_part_masks; + uint32_t m_first_mode11_part2_weight_ise_range; + uint32_t m_last_mode11_part2_weight_ise_range; + + bool m_refine_weights; + + uint32_t m_level; + + bool m_use_estimated_partitions; + uint32_t m_max_estimated_partitions; + + uastc_hdr_4x4_codec_options(); + + void init(); + + // TODO: set_quality_level() is preferred to configure the codec for transcoding purposes. + static const int cMinLevel = 0; + static const int cMaxLevel = 4; + static const int cDefaultLevel = 1; + void set_quality_level(int level); + + private: + void set_quality_best(); + void set_quality_normal(); + void set_quality_fastest(); + }; + + struct astc_hdr_4x4_pack_results + { + double m_best_block_error; + double m_bc6h_block_error; // note this is not used/set by the encoder, here for convienance + + // Encoder results (logical ASTC block) + astc_helpers::log_astc_block m_best_blk; + + // For statistical use + uint32_t m_best_submodes[2]; + uint32_t m_best_pat_index; + bool m_constrained_weights; + + bool m_improved_via_refinement_flag; + + // Only valid if the block is solid + basist::astc_blk m_solid_blk; + + // The BC6H transcoded block + basist::bc6h_block m_bc6h_block; + + // Solid color/void extent flag + bool m_is_solid; + + void clear() + { + m_best_block_error = 1e+30f; + m_bc6h_block_error = 1e+30f; + + m_best_blk.clear(); + m_best_blk.m_grid_width = 4; + m_best_blk.m_grid_height = 4; + m_best_blk.m_endpoint_ise_range = 20; // 0-255 + + clear_obj(m_best_submodes); + + m_best_pat_index = 0; + m_constrained_weights = false; + + clear_obj(m_bc6h_block); + + m_is_solid = false; + m_improved_via_refinement_flag = false; + } + }; + + // Encodes a 4x4 ASTC HDR block given a 4x4 array of source block pixels/texels. + // Supports solid color blocks, mode 11 (all submodes), mode 7/1 partition (all submodes), + // and mode 7/2 partitions (all submodes) - 30 patterns, only the ones also in common with the BC6H format. + // The packed ASTC weight grid dimensions are currently always 4x4 texels, but may be also 3x3 in the future. + // This function is thread safe, i.e. it may be called from multiple encoding threads simultanously with different blocks. + // + // Parameters: + // pRGBPixels - An array of 48 (16 RGB) floats: the 4x4 block to pack + // pPacked_block - A pointer to the packed ASTC HDR block + // coptions - Codec options + // pInternal_results - An optional pointer to details about how the block was packed, for statistics/debugging purposes. May be nullptr. + // + // Requirements: + // astc_hdr_enc_init() MUST have been called first to initialized the codec. + // Input pixels are checked and cannot be NaN's, Inf's, signed, or too large (greater than MAX_HALF_FLOAT, or 65504). + // Normal values and denormals are okay. + bool astc_hdr_4x4_enc_block( + const float* pRGBPixels, const basist::half_float *pRGBPixelsHalf, + const uastc_hdr_4x4_codec_options& coptions, + basisu::vector &all_results); + + bool astc_hdr_4x4_pack_results_to_block(basist::astc_blk& dst_blk, const astc_hdr_4x4_pack_results& results); + + bool astc_hdr_4x4_refine_weights(const basist::half_float* pSource_block, astc_hdr_4x4_pack_results& cur_results, const uastc_hdr_4x4_codec_options& coptions, float bc6h_weight, bool* pImproved_flag); + + struct astc_hdr_4x4_block_stats + { + std::mutex m_mutex; + + uint32_t m_total_blocks; + uint32_t m_total_2part, m_total_solid; + uint32_t m_total_mode7_1part, m_total_mode7_2part; + uint32_t m_total_mode11_1part, m_total_mode11_2part; + uint32_t m_total_mode11_1part_constrained_weights; + + uint32_t m_weight_range_hist_7[11]; + uint32_t m_weight_range_hist_7_2part[11]; + uint32_t m_mode7_submode_hist[6]; + + uint32_t m_weight_range_hist_11[11]; + uint32_t m_weight_range_hist_11_2part[11]; + uint32_t m_mode11_submode_hist[9]; + + uint32_t m_part_hist[32]; + + uint32_t m_total_refined; + + astc_hdr_4x4_block_stats() { clear(); } + + void clear() + { + std::lock_guard lck(m_mutex); + + m_total_blocks = 0; + m_total_mode7_1part = 0, m_total_mode7_2part = 0, m_total_mode11_1part = 0, m_total_2part = 0, m_total_solid = 0, m_total_mode11_2part = 0; + m_total_mode11_1part_constrained_weights = 0; + m_total_refined = 0; + + clear_obj(m_weight_range_hist_11); + clear_obj(m_weight_range_hist_11_2part); + clear_obj(m_weight_range_hist_7); + clear_obj(m_weight_range_hist_7_2part); + clear_obj(m_mode7_submode_hist); + clear_obj(m_mode11_submode_hist); + clear_obj(m_part_hist); + } + + void update(const astc_hdr_4x4_pack_results& log_blk); + + void print(); + }; + +} // namespace basisu + diff --git a/thirdparty/basisu/encoder/cppspmd_flow.h b/thirdparty/basisu/encoder/cppspmd_flow.h new file mode 100644 index 000000000..93934173c --- /dev/null +++ b/thirdparty/basisu/encoder/cppspmd_flow.h @@ -0,0 +1,590 @@ +// Do not include this header directly. +// Control flow functionality in common between all the headers. +// +// Copyright 2020-2024 Binomial LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifdef _DEBUG +CPPSPMD_FORCE_INLINE void spmd_kernel::check_masks() +{ + assert(!any(andnot(m_kernel_exec, m_exec))); +} +#endif + +CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_break() +{ +#ifdef _DEBUG + assert(m_in_loop); +#endif + + m_exec = exec_mask::all_off(); +} + +CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_continue() +{ +#ifdef _DEBUG + assert(m_in_loop); +#endif + + // Kill any active lanes, and remember which lanes were active so we can re-enable them at the end of the loop body. + m_continue_mask = m_continue_mask | m_exec; + m_exec = exec_mask::all_off(); +} + +CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_return() +{ + // Permenantly kill all active lanes + m_kernel_exec = andnot(m_exec, m_kernel_exec); + m_exec = exec_mask::all_off(); +} + +template +CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_unmasked(const UnmaskedBody& unmaskedBody) +{ + exec_mask orig_exec = m_exec, orig_kernel_exec = m_kernel_exec; + + m_kernel_exec = exec_mask::all_on(); + m_exec = exec_mask::all_on(); + + unmaskedBody(); + + m_kernel_exec = m_kernel_exec & orig_kernel_exec; + m_exec = m_exec & orig_exec; + + check_masks(); +} + +struct scoped_unmasked_restorer +{ + spmd_kernel *m_pKernel; + exec_mask m_orig_exec, m_orig_kernel_exec; + + CPPSPMD_FORCE_INLINE scoped_unmasked_restorer(spmd_kernel *pKernel) : + m_pKernel(pKernel), + m_orig_exec(pKernel->m_exec), + m_orig_kernel_exec(pKernel->m_kernel_exec) + { + pKernel->m_kernel_exec = exec_mask::all_on(); + pKernel->m_exec = exec_mask::all_on(); + } + + CPPSPMD_FORCE_INLINE ~scoped_unmasked_restorer() + { + m_pKernel->m_kernel_exec = m_pKernel->m_kernel_exec & m_orig_kernel_exec; + m_pKernel->m_exec = m_pKernel->m_exec & m_orig_exec; + m_pKernel->check_masks(); + } +}; + +#define SPMD_UNMASKED_BEGIN { scoped_unmasked_restorer _unmasked_restorer(this); +#define SPMD_UNMASKED_END } + +#if 0 +template +CPPSPMD_FORCE_INLINE decltype(auto) spmd_kernel::spmd_call(Args&&... args) +{ + SPMDKernel kernel; + kernel.init(m_exec); + return kernel._call(std::forward(args)...); +} +#else +template +CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_call(Args&&... args) +{ + SPMDKernel kernel; + kernel.init(m_exec); + kernel._call(std::forward(args)...); +} +#endif + +CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_if_break(const vbool& cond) +{ +#ifdef _DEBUG + assert(m_in_loop); +#endif + + exec_mask cond_exec(cond); + + m_exec = andnot(m_exec & cond_exec, m_exec); + + check_masks(); +} + +// No SPMD breaks, continues, etc. allowed +template +CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_sif(const vbool& cond, const IfBody& ifBody) +{ + exec_mask im = m_exec & exec_mask(cond); + + if (any(im)) + { + const exec_mask orig_exec = m_exec; + m_exec = im; + ifBody(); + m_exec = orig_exec; + } +} + +// No SPMD breaks, continues, etc. allowed +template +CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_sifelse(const vbool& cond, const IfBody& ifBody, const ElseBody &elseBody) +{ + const exec_mask orig_exec = m_exec; + + exec_mask im = m_exec & exec_mask(cond); + + if (any(im)) + { + m_exec = im; + ifBody(); + } + + exec_mask em = orig_exec & exec_mask(!cond); + + if (any(em)) + { + m_exec = em; + elseBody(); + } + + m_exec = orig_exec; +} + +template +CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_if(const vbool& cond, const IfBody& ifBody) +{ + exec_mask cond_exec(cond); + + exec_mask pre_if_exec = cond_exec & m_exec; + + if (any(pre_if_exec)) + { + exec_mask unexecuted_lanes = andnot(cond_exec, m_exec); + m_exec = pre_if_exec; + + ifBody(); + + // Propagate any lanes that got disabled inside the if body into the exec mask outside the if body, but turn on any lanes that didn't execute inside the if body. + m_exec = m_exec | unexecuted_lanes; + + check_masks(); + } +} + +template +CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_ifelse(const vbool& cond, const IfBody& ifBody, const ElseBody& elseBody) +{ + bool all_flag = false; + + exec_mask cond_exec(cond); + + { + exec_mask pre_if_exec = cond_exec & m_exec; + + int mask = pre_if_exec.get_movemask(); + if (mask != 0) + { + all_flag = ((uint32_t)mask == m_exec.get_movemask()); + + exec_mask unexecuted_lanes = andnot(cond_exec, m_exec); + m_exec = pre_if_exec; + + ifBody(); + + // Propagate any lanes that got disabled inside the if body into the exec mask outside the if body, but turn on any lanes that didn't execute inside the if body. + m_exec = m_exec | unexecuted_lanes; + + check_masks(); + } + } + + if (!all_flag) + { + exec_mask pre_if_exec = andnot(cond_exec, m_exec); + + if (any(pre_if_exec)) + { + exec_mask unexecuted_lanes = cond_exec & m_exec; + m_exec = pre_if_exec; + + ifBody(); + + // Propagate any lanes that got disabled inside the if body into the exec mask outside the if body, but turn on any lanes that didn't execute inside the if body. + m_exec = m_exec | unexecuted_lanes; + + check_masks(); + } + } +} + +struct scoped_exec_restorer +{ + exec_mask *m_pMask; + exec_mask m_prev_mask; + CPPSPMD_FORCE_INLINE scoped_exec_restorer(exec_mask *pExec_mask) : m_pMask(pExec_mask), m_prev_mask(*pExec_mask) { } + CPPSPMD_FORCE_INLINE ~scoped_exec_restorer() { *m_pMask = m_prev_mask; } +}; + +// Cannot use SPMD break, continue, or return inside "simple" if/else +#define SPMD_SIF(cond) exec_mask CPPSPMD_GLUER2(_exec_temp, __LINE__)(m_exec & exec_mask(vbool(cond))); if (any(CPPSPMD_GLUER2(_exec_temp, __LINE__))) \ + { CPPSPMD::scoped_exec_restorer CPPSPMD_GLUER2(_exec_restore_, __LINE__)(&m_exec); m_exec = CPPSPMD_GLUER2(_exec_temp, __LINE__); + +#define SPMD_SELSE(cond) } exec_mask CPPSPMD_GLUER2(_exec_temp, __LINE__)(m_exec & exec_mask(!vbool(cond))); if (any(CPPSPMD_GLUER2(_exec_temp, __LINE__))) \ + { CPPSPMD::scoped_exec_restorer CPPSPMD_GLUER2(_exec_restore_, __LINE__)(&m_exec); m_exec = CPPSPMD_GLUER2(_exec_temp, __LINE__); + +#define SPMD_SENDIF } + +// Same as SPMD_SIF, except doesn't use a scoped object +#define SPMD_SIF2(cond) exec_mask CPPSPMD_GLUER2(_exec_temp, __LINE__)(m_exec & exec_mask(vbool(cond))); if (any(CPPSPMD_GLUER2(_exec_temp, __LINE__))) \ + { exec_mask _orig_exec = m_exec; m_exec = CPPSPMD_GLUER2(_exec_temp, __LINE__); + +#define SPMD_SELSE2(cond) m_exec = _orig_exec; } exec_mask CPPSPMD_GLUER2(_exec_temp, __LINE__)(m_exec & exec_mask(!vbool(cond))); if (any(CPPSPMD_GLUER2(_exec_temp, __LINE__))) \ + { exec_mask _orig_exec = m_exec; m_exec = CPPSPMD_GLUER2(_exec_temp, __LINE__); + +#define SPMD_SEND_IF2 m_exec = _orig_exec; } + +// Same as SPMD_SIF(), except the if/else blocks are always executed +#define SPMD_SAIF(cond) exec_mask CPPSPMD_GLUER2(_exec_temp, __LINE__)(m_exec & exec_mask(vbool(cond))); { CPPSPMD::scoped_exec_restorer CPPSPMD_GLUER2(_exec_restore_, __LINE__)(&m_exec); \ + m_exec = CPPSPMD_GLUER2(_exec_temp, __LINE__); + +#define SPMD_SAELSE(cond) } exec_mask CPPSPMD_GLUER2(_exec_temp, __LINE__)(m_exec & exec_mask(!vbool(cond))); { CPPSPMD::scoped_exec_restorer CPPSPMD_GLUER2(_exec_restore_, __LINE__)(&m_exec); \ + m_exec = CPPSPMD_GLUER2(_exec_temp, __LINE__); + +#define SPMD_SAENDIF } + +// Cannot use SPMD break, continue, or return inside sselect +#define SPMD_SSELECT(var) do { vint_t _select_var = var; scoped_exec_restorer _orig_exec(&m_exec); exec_mask _select_executed(exec_mask::all_off()); +#define SPMD_SCASE(value) exec_mask CPPSPMD_GLUER2(_exec_temp, __LINE__)(_orig_exec.m_prev_mask & exec_mask(vbool(_select_var == (value)))); if (any(CPPSPMD_GLUER2(_exec_temp, __LINE__))) \ + { m_exec = CPPSPMD_GLUER2(_exec_temp, __LINE__); _select_executed = _select_executed | m_exec; + +//#define SPMD_SCASE_END if (_select_executed.get_movemask() == _orig_exec.m_prev_mask.get_movemask()) break; } +#define SPMD_SCASE_END if (!any(_select_executed ^ _orig_exec.m_prev_mask)) break; } +#define SPMD_SDEFAULT exec_mask _all_other_lanes(andnot(_select_executed, _orig_exec.m_prev_mask)); if (any(_all_other_lanes)) { m_exec = _all_other_lanes; +#define SPMD_SDEFAULT_END } +#define SPMD_SSELECT_END } while(0); + +// Same as SPMD_SSELECT, except all cases are executed. +// Cannot use SPMD break, continue, or return inside sselect +#define SPMD_SASELECT(var) do { vint_t _select_var = var; scoped_exec_restorer _orig_exec(&m_exec); exec_mask _select_executed(exec_mask::all_off()); + +#define SPMD_SACASE(value) exec_mask CPPSPMD_GLUER2(_exec_temp, __LINE__)(_orig_exec.m_prev_mask & exec_mask(vbool(_select_var == (value)))); { m_exec = CPPSPMD_GLUER2(_exec_temp, __LINE__); \ + _select_executed = _select_executed | m_exec; + +#define SPMD_SACASE_END } +#define SPMD_SADEFAULT exec_mask _all_other_lanes(andnot(_select_executed, _orig_exec.m_prev_mask)); { m_exec = _all_other_lanes; +#define SPMD_SADEFAULT_END } +#define SPMD_SASELECT_END } while(0); + +struct scoped_exec_restorer2 +{ + spmd_kernel *m_pKernel; + exec_mask m_unexecuted_lanes; + + CPPSPMD_FORCE_INLINE scoped_exec_restorer2(spmd_kernel *pKernel, const vbool &cond) : + m_pKernel(pKernel) + { + exec_mask cond_exec(cond); + m_unexecuted_lanes = andnot(cond_exec, pKernel->m_exec); + pKernel->m_exec = cond_exec & pKernel->m_exec; + } + + CPPSPMD_FORCE_INLINE ~scoped_exec_restorer2() + { + m_pKernel->m_exec = m_pKernel->m_exec | m_unexecuted_lanes; + m_pKernel->check_masks(); + } +}; + +#define SPMD_IF(cond) { CPPSPMD::scoped_exec_restorer2 CPPSPMD_GLUER2(_exec_restore2_, __LINE__)(this, vbool(cond)); if (any(m_exec)) { +#define SPMD_ELSE(cond) } } { CPPSPMD::scoped_exec_restorer2 CPPSPMD_GLUER2(_exec_restore2_, __LINE__)(this, !vbool(cond)); if (any(m_exec)) { +#define SPMD_END_IF } } + +// Same as SPMD_IF, except the conditional block is always executed. +#define SPMD_AIF(cond) { CPPSPMD::scoped_exec_restorer2 CPPSPMD_GLUER2(_exec_restore2_, __LINE__)(this, vbool(cond)); { +#define SPMD_AELSE(cond) } } { CPPSPMD::scoped_exec_restorer2 CPPSPMD_GLUER2(_exec_restore2_, __LINE__)(this, !vbool(cond)); { +#define SPMD_AEND_IF } } + +class scoped_exec_saver +{ + exec_mask m_exec, m_kernel_exec, m_continue_mask; + spmd_kernel *m_pKernel; +#ifdef _DEBUG + bool m_in_loop; +#endif + +public: + inline scoped_exec_saver(spmd_kernel *pKernel) : + m_exec(pKernel->m_exec), m_kernel_exec(pKernel->m_kernel_exec), m_continue_mask(pKernel->m_continue_mask), + m_pKernel(pKernel) + { +#ifdef _DEBUG + m_in_loop = pKernel->m_in_loop; +#endif + } + + inline ~scoped_exec_saver() + { + m_pKernel->m_exec = m_exec; + m_pKernel->m_continue_mask = m_continue_mask; + m_pKernel->m_kernel_exec = m_kernel_exec; +#ifdef _DEBUG + m_pKernel->m_in_loop = m_in_loop; + m_pKernel->check_masks(); +#endif + } +}; + +#define SPMD_BEGIN_CALL scoped_exec_saver CPPSPMD_GLUER2(_begin_call_scoped_exec_saver, __LINE__)(this); m_continue_mask = exec_mask::all_off(); +#define SPMD_BEGIN_CALL_ALL_LANES scoped_exec_saver CPPSPMD_GLUER2(_begin_call_scoped_exec_saver, __LINE__)(this); m_exec = exec_mask::all_on(); m_continue_mask = exec_mask::all_off(); + +template +CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_foreach(int begin, int end, const ForeachBody& foreachBody) +{ + if (begin == end) + return; + + if (!any(m_exec)) + return; + + // We don't support iterating backwards. + if (begin > end) + std::swap(begin, end); + + exec_mask prev_continue_mask = m_continue_mask, prev_exec = m_exec; + + int total_full = (end - begin) / PROGRAM_COUNT; + int total_partial = (end - begin) % PROGRAM_COUNT; + + lint_t loop_index = begin + program_index; + + const int total_loops = total_full + (total_partial ? 1 : 0); + + m_continue_mask = exec_mask::all_off(); + + for (int i = 0; i < total_loops; i++) + { + int n = PROGRAM_COUNT; + if ((i == (total_loops - 1)) && (total_partial)) + { + exec_mask partial_mask = exec_mask(vint_t(total_partial) > vint_t(program_index)); + m_exec = m_exec & partial_mask; + n = total_partial; + } + + foreachBody(loop_index, n); + + m_exec = m_exec | m_continue_mask; + if (!any(m_exec)) + break; + + m_continue_mask = exec_mask::all_off(); + check_masks(); + + store_all(loop_index, loop_index + PROGRAM_COUNT); + } + + m_exec = prev_exec & m_kernel_exec; + m_continue_mask = prev_continue_mask; + check_masks(); +} + +template +CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_while(const WhileCondBody& whileCondBody, const WhileBody& whileBody) +{ + exec_mask orig_exec = m_exec; + + exec_mask orig_continue_mask = m_continue_mask; + m_continue_mask = exec_mask::all_off(); + +#ifdef _DEBUG + const bool prev_in_loop = m_in_loop; + m_in_loop = true; +#endif + + while(true) + { + exec_mask cond_exec = exec_mask(whileCondBody()); + m_exec = m_exec & cond_exec; + + if (!any(m_exec)) + break; + + whileBody(); + + m_exec = m_exec | m_continue_mask; + m_continue_mask = exec_mask::all_off(); + check_masks(); + } + +#ifdef _DEBUG + m_in_loop = prev_in_loop; +#endif + + m_exec = orig_exec & m_kernel_exec; + m_continue_mask = orig_continue_mask; + check_masks(); +} + +struct scoped_while_restorer +{ + spmd_kernel *m_pKernel; + exec_mask m_orig_exec, m_orig_continue_mask; +#ifdef _DEBUG + bool m_prev_in_loop; +#endif + + CPPSPMD_FORCE_INLINE scoped_while_restorer(spmd_kernel *pKernel) : + m_pKernel(pKernel), + m_orig_exec(pKernel->m_exec), + m_orig_continue_mask(pKernel->m_continue_mask) + { + pKernel->m_continue_mask.all_off(); + +#ifdef _DEBUG + m_prev_in_loop = pKernel->m_in_loop; + pKernel->m_in_loop = true; +#endif + } + + CPPSPMD_FORCE_INLINE ~scoped_while_restorer() + { + m_pKernel->m_exec = m_orig_exec & m_pKernel->m_kernel_exec; + m_pKernel->m_continue_mask = m_orig_continue_mask; +#ifdef _DEBUG + m_pKernel->m_in_loop = m_prev_in_loop; + m_pKernel->check_masks(); +#endif + } +}; + +#undef SPMD_WHILE +#undef SPMD_WEND +#define SPMD_WHILE(cond) { scoped_while_restorer CPPSPMD_GLUER2(_while_restore_, __LINE__)(this); while(true) { exec_mask CPPSPMD_GLUER2(cond_exec, __LINE__) = exec_mask(vbool(cond)); \ + m_exec = m_exec & CPPSPMD_GLUER2(cond_exec, __LINE__); if (!any(m_exec)) break; + +#define SPMD_WEND m_exec = m_exec | m_continue_mask; m_continue_mask = exec_mask::all_off(); check_masks(); } } + +// Nesting is not supported (although it will compile, but the results won't make much sense). +#define SPMD_FOREACH(loop_var, bi, ei) if (((bi) != (ei)) && (any(m_exec))) { \ + scoped_while_restorer CPPSPMD_GLUER2(_while_restore_, __LINE__)(this); \ + uint32_t b = (uint32_t)(bi), e = (uint32_t)(ei); if ((b) > (e)) { std::swap(b, e); } const uint32_t total_full = ((e) - (b)) >> PROGRAM_COUNT_SHIFT, total_partial = ((e) - (b)) & (PROGRAM_COUNT - 1); \ + lint_t loop_var = program_index + (int)b; const uint32_t total_loops = total_full + (total_partial ? 1U : 0U); \ + for (uint32_t CPPSPMD_GLUER2(_foreach_counter, __LINE__) = 0; CPPSPMD_GLUER2(_foreach_counter, __LINE__) < total_loops; ++CPPSPMD_GLUER2(_foreach_counter, __LINE__)) { \ + if ((CPPSPMD_GLUER2(_foreach_counter, __LINE__) == (total_loops - 1)) && (total_partial)) { exec_mask partial_mask = exec_mask(vint_t((int)total_partial) > vint_t(program_index)); m_exec = m_exec & partial_mask; } + +#define SPMD_FOREACH_END(loop_var) m_exec = m_exec | m_continue_mask; if (!any(m_exec)) break; m_continue_mask = exec_mask::all_off(); check_masks(); store_all(loop_var, loop_var + PROGRAM_COUNT); } } + +// Okay to use spmd_continue or spmd_return, but not spmd_break +#define SPMD_FOREACH_ACTIVE(index_var) int64_t index_var; { uint64_t _movemask = m_exec.get_movemask(); if (_movemask) { scoped_while_restorer CPPSPMD_GLUER2(_while_restore_, __LINE__)(this); \ + for (uint32_t _i = 0; _i < PROGRAM_COUNT; ++_i) { \ + if (_movemask & (1U << _i)) { \ + m_exec.enable_lane(_i); m_exec = m_exec & m_kernel_exec; \ + (index_var) = _i; \ + +#define SPMD_FOREACH_ACTIVE_END } } } } + +// Okay to use spmd_continue, but not spmd_break/spmd_continue +#define SPMD_FOREACH_UNIQUE_INT(index_var, var) { scoped_while_restorer CPPSPMD_GLUER2(_while_restore_, __LINE__)(this); \ + CPPSPMD_DECL(int_t, _vals[PROGRAM_COUNT]); store_linear_all(_vals, var); std::sort(_vals, _vals + PROGRAM_COUNT); \ + const int _n = (int)(std::unique(_vals, _vals + PROGRAM_COUNT) - _vals); \ + for (int _i = 0; _i < _n; ++_i) { int index_var = _vals[_i]; vbool cond = (vint_t(var) == vint_t(index_var)); m_exec = exec_mask(cond); + +#define SPMD_FOREACH_UNIQUE_INT_END } } + +struct scoped_simple_while_restorer +{ + spmd_kernel* m_pKernel; + exec_mask m_orig_exec; +#ifdef _DEBUG + bool m_prev_in_loop; +#endif + + CPPSPMD_FORCE_INLINE scoped_simple_while_restorer(spmd_kernel* pKernel) : + m_pKernel(pKernel), + m_orig_exec(pKernel->m_exec) + { + +#ifdef _DEBUG + m_prev_in_loop = pKernel->m_in_loop; + pKernel->m_in_loop = true; +#endif + } + + CPPSPMD_FORCE_INLINE ~scoped_simple_while_restorer() + { + m_pKernel->m_exec = m_orig_exec; +#ifdef _DEBUG + m_pKernel->m_in_loop = m_prev_in_loop; + m_pKernel->check_masks(); +#endif + } +}; + +// Cannot use SPMD break, continue, or return inside simple while + +#define SPMD_SWHILE(cond) { scoped_simple_while_restorer CPPSPMD_GLUER2(_while_restore_, __LINE__)(this); \ + while(true) { \ + exec_mask CPPSPMD_GLUER2(cond_exec, __LINE__) = exec_mask(vbool(cond)); m_exec = m_exec & CPPSPMD_GLUER2(cond_exec, __LINE__); if (!any(m_exec)) break; +#define SPMD_SWEND } } + +// Cannot use SPMD break, continue, or return inside simple do +#define SPMD_SDO { scoped_simple_while_restorer CPPSPMD_GLUER2(_while_restore_, __LINE__)(this); while(true) { +#define SPMD_SEND_DO(cond) exec_mask CPPSPMD_GLUER2(cond_exec, __LINE__) = exec_mask(vbool(cond)); m_exec = m_exec & CPPSPMD_GLUER2(cond_exec, __LINE__); if (!any(m_exec)) break; } } + +#undef SPMD_FOR +#undef SPMD_END_FOR +#define SPMD_FOR(for_init, for_cond) { for_init; scoped_while_restorer CPPSPMD_GLUER2(_while_restore_, __LINE__)(this); while(true) { exec_mask CPPSPMD_GLUER2(cond_exec, __LINE__) = exec_mask(vbool(for_cond)); \ + m_exec = m_exec & CPPSPMD_GLUER2(cond_exec, __LINE__); if (!any(m_exec)) break; +#define SPMD_END_FOR(for_inc) m_exec = m_exec | m_continue_mask; m_continue_mask = exec_mask::all_off(); check_masks(); for_inc; } } + +template +CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_for(const ForInitBody& forInitBody, const ForCondBody& forCondBody, const ForIncrBody& forIncrBody, const ForBody& forBody) +{ + exec_mask orig_exec = m_exec; + + forInitBody(); + + exec_mask orig_continue_mask = m_continue_mask; + m_continue_mask = exec_mask::all_off(); + +#ifdef _DEBUG + const bool prev_in_loop = m_in_loop; + m_in_loop = true; +#endif + + while(true) + { + exec_mask cond_exec = exec_mask(forCondBody()); + m_exec = m_exec & cond_exec; + + if (!any(m_exec)) + break; + + forBody(); + + m_exec = m_exec | m_continue_mask; + m_continue_mask = exec_mask::all_off(); + check_masks(); + + forIncrBody(); + } + + m_exec = orig_exec & m_kernel_exec; + m_continue_mask = orig_continue_mask; + +#ifdef _DEBUG + m_in_loop = prev_in_loop; + check_masks(); +#endif +} diff --git a/thirdparty/basisu/encoder/cppspmd_math.h b/thirdparty/basisu/encoder/cppspmd_math.h new file mode 100644 index 000000000..3032df865 --- /dev/null +++ b/thirdparty/basisu/encoder/cppspmd_math.h @@ -0,0 +1,725 @@ +// Do not include this header directly. +// +// Copyright 2020-2024 Binomial LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// The general goal of these vectorized estimated math functions is scalability/performance. +// There are explictly no checks NaN's/Inf's on the input arguments. There are no assertions either. +// These are fast estimate functions - if you need more than that, use stdlib. Please do a proper +// engineering analysis before relying on them. +// I have chosen functions written by others, ported them to CppSPMD, then measured their abs/rel errors. +// I compared each to the ones in DirectXMath and stdlib's for accuracy/performance. + +CPPSPMD_FORCE_INLINE vfloat fmod_inv(const vfloat& a, const vfloat& b, const vfloat& b_inv) +{ + vfloat c = frac(abs(a * b_inv)) * abs(b); + return spmd_ternaryf(a < 0, -c, c); +} + +CPPSPMD_FORCE_INLINE vfloat fmod_inv_p(const vfloat& a, const vfloat& b, const vfloat& b_inv) +{ + return frac(a * b_inv) * b; +} + +// Avoids dividing by zero or very small values. +CPPSPMD_FORCE_INLINE vfloat safe_div(vfloat a, vfloat b, float fDivThresh = 1e-7f) +{ + return a / spmd_ternaryf( abs(b) > fDivThresh, b, spmd_ternaryf(b < 0.0f, -fDivThresh, fDivThresh) ); +} + +/* + clang 9.0.0 for win /fp:precise release + f range: 0.0000000000001250 10000000000.0000000000000000, vals: 1073741824 + + log2_est(): + max abs err: 0.0000023076808731 + max rel err: 0.0000000756678881 + avg abs err: 0.0000007535452724 + avg rel err: 0.0000000235117843 + + XMVectorLog2(): + max abs err: 0.0000023329709933 + max rel err: 0.0000000826961046 + avg abs err: 0.0000007564889684 + avg rel err: 0.0000000236051899 + + std::log2f(): + max abs err: 0.0000020265979401 + max rel err: 0.0000000626647654 + avg abs err: 0.0000007494445227 + avg rel err: 0.0000000233800985 +*/ + +// See https://tech.ebayinc.com/engineering/fast-approximate-logarithms-part-iii-the-formulas/ +inline vfloat spmd_kernel::log2_est(vfloat v) +{ + vfloat signif, fexp; + + // Just clamp to a very small value, instead of checking for invalid inputs. + vfloat x = max(v, 2.2e-38f); + + /* + * Assume IEEE representation, which is sgn(1):exp(8):frac(23) + * representing (1+frac)*2^(exp-127). Call 1+frac the significand + */ + + // get exponent + vint ux1_i = cast_vfloat_to_vint(x); + + vint exp = VUINT_SHIFT_RIGHT(ux1_i & 0x7F800000, 23); + + // actual exponent is exp-127, will subtract 127 later + + vint ux2_i; + vfloat ux2_f; + + vint greater = ux1_i & 0x00400000; // true if signif > 1.5 + SPMD_SIF(greater != 0) + { + // signif >= 1.5 so need to divide by 2. Accomplish this by stuffing exp = 126 which corresponds to an exponent of -1 + store_all(ux2_i, (ux1_i & 0x007FFFFF) | 0x3f000000); + + store_all(ux2_f, cast_vint_to_vfloat(ux2_i)); + + // 126 instead of 127 compensates for division by 2 + store_all(fexp, vfloat(exp - 126)); + } + SPMD_SELSE(greater != 0) + { + // get signif by stuffing exp = 127 which corresponds to an exponent of 0 + store(ux2_i, (ux1_i & 0x007FFFFF) | 0x3f800000); + + store(ux2_f, cast_vint_to_vfloat(ux2_i)); + + store(fexp, vfloat(exp - 127)); + } + SPMD_SENDIF + + store_all(signif, ux2_f); + store_all(signif, signif - 1.0f); + + const float a = 0.1501692f, b = 3.4226132f, c = 5.0225057f, d = 4.1130283f, e = 3.4813372f; + + vfloat xm1 = signif; + vfloat xm1sqr = xm1 * xm1; + + return fexp + ((a * (xm1sqr * xm1) + b * xm1sqr + c * xm1) / (xm1sqr + d * xm1 + e)); + + // fma lowers accuracy for SSE4.1 - no idea why (compiler reordering?) + //return fexp + ((vfma(a, (xm1sqr * xm1), vfma(b, xm1sqr, c * xm1))) / (xm1sqr + vfma(d, xm1, e))); +} + +// Uses log2_est(), so this function must be <= the precision of that. +inline vfloat spmd_kernel::log_est(vfloat v) +{ + return log2_est(v) * 0.693147181f; +} + +CPPSPMD_FORCE_INLINE void spmd_kernel::reduce_expb(vfloat& arg, vfloat& two_int_a, vint& adjustment) +{ + // Assume we're using equation (2) + store_all(adjustment, 0); + + // integer part of the input argument + vint int_arg = (vint)arg; + + // if frac(arg) is in [0.5, 1.0]... + SPMD_SIF((arg - int_arg) > 0.5f) + { + store(adjustment, 1); + + // then change it to [0.0, 0.5] + store(arg, arg - 0.5f); + } + SPMD_SENDIF + + // arg == just the fractional part + store_all(arg, arg - (vfloat)int_arg); + + // Now compute 2** (int) arg. + store_all(int_arg, min(int_arg + 127, 254)); + + store_all(two_int_a, cast_vint_to_vfloat(VINT_SHIFT_LEFT(int_arg, 23))); +} + +/* + clang 9.0.0 for win /fp:precise release + f range : -50.0000000000000000 49.9999940395355225, vals : 16777216 + + exp2_est(): + Total passed near - zero check : 16777216 + Total sign diffs : 0 + max abs err: 1668910609.7500000000000000 + max rel err: 0.0000015642030031 + avg abs err: 10793794.4007573910057545 + avg rel err: 0.0000003890893282 + + XMVectorExp2(): + Total passed near-zero check: 16777216 + Total sign diffs: 0 + max abs err: 1665552836.8750000000000000 + max rel err: 0.0000114674862370 + avg abs err: 10771868.2627860084176064 + avg rel err: 0.0000011218880770 + + std::exp2f(): + Total passed near-zero check: 16777216 + Total sign diffs: 0 + max abs err: 1591636585.6250000000000000 + max rel err: 0.0000014849731018 + avg abs err: 10775800.3204844966530800 + avg rel err: 0.0000003851496422 +*/ + +// http://www.ganssle.com/item/approximations-c-code-exponentiation-log.htm +inline vfloat spmd_kernel::exp2_est(vfloat arg) +{ + SPMD_BEGIN_CALL + + const vfloat P00 = +7.2152891521493f; + const vfloat P01 = +0.0576900723731f; + const vfloat Q00 = +20.8189237930062f; + const vfloat Q01 = +1.0f; + const vfloat sqrt2 = 1.4142135623730950488f; // sqrt(2) for scaling + + vfloat result = 0.0f; + + // Return 0 if arg is too large. + // We're not introducing inf/nan's into calculations, or risk doing so by returning huge default values. + SPMD_IF(abs(arg) > 126.0f) + { + spmd_return(); + } + SPMD_END_IF + + // 2**(int(a)) + vfloat two_int_a; + + // set to 1 by reduce_expb + vint adjustment; + + // 0 if arg is +; 1 if negative + vint negative = 0; + + // If the input is negative, invert it. At the end we'll take the reciprocal, since n**(-1) = 1/(n**x). + SPMD_SIF(arg < 0.0f) + { + store(arg, -arg); + store(negative, 1); + } + SPMD_SENDIF + + store_all(arg, min(arg, 126.0f)); + + // reduce to [0.0, 0.5] + reduce_expb(arg, two_int_a, adjustment); + + // The format of the polynomial is: + // answer=(Q(x**2) + x*P(x**2))/(Q(x**2) - x*P(x**2)) + // + // The following computes the polynomial in several steps: + + // Q(x**2) + vfloat Q = vfma(Q01, (arg * arg), Q00); + + // x*P(x**2) + vfloat x_P = arg * (vfma(P01, arg * arg, P00)); + + vfloat answer = (Q + x_P) / (Q - x_P); + + // Now correct for the scaling factor of 2**(int(a)) + store_all(answer, answer * two_int_a); + + // If the result had a fractional part > 0.5, correct for that + store_all(answer, spmd_ternaryf(adjustment != 0, answer * sqrt2, answer)); + + // Correct for a negative input + SPMD_SIF(negative != 0) + { + store(answer, 1.0f / answer); + } + SPMD_SENDIF + + store(result, answer); + + return result; +} + +inline vfloat spmd_kernel::exp_est(vfloat arg) +{ + // e^x = exp2(x / log_base_e(2)) + // constant is 1.0/(log(2)/log(e)) or 1/log(2) + return exp2_est(arg * 1.44269504f); +} + +inline vfloat spmd_kernel::pow_est(vfloat arg1, vfloat arg2) +{ + return exp_est(log_est(arg1) * arg2); +} + +/* + clang 9.0.0 for win /fp:precise release + Total near-zero: 144, output above near-zero tresh: 30 + Total near-zero avg: 0.0000067941016621 max: 0.0000134706497192 + Total near-zero sign diffs: 5 + Total passed near-zero check: 16777072 + Total sign diffs: 5 + max abs err: 0.0000031375306036 + max rel err: 0.1140846017075028 + avg abs err: 0.0000003026226621 + avg rel err: 0.0000033564977623 +*/ + +// Math from this web page: http://developer.download.nvidia.com/cg/sin.html +// This is ~2x slower than sin_est() or cos_est(), and less accurate, but I'm keeping it here for comparison purposes to help validate/sanity check sin_est() and cos_est(). +inline vfloat spmd_kernel::sincos_est_a(vfloat a, bool sin_flag) +{ + const float c0_x = 0.0f, c0_y = 0.5f, c0_z = 1.0f; + const float c1_x = 0.25f, c1_y = -9.0f, c1_z = 0.75f, c1_w = 0.159154943091f; + const float c2_x = 24.9808039603f, c2_y = -24.9808039603f, c2_z = -60.1458091736f, c2_w = 60.1458091736f; + const float c3_x = 85.4537887573f, c3_y = -85.4537887573f, c3_z = -64.9393539429f, c3_w = 64.9393539429f; + const float c4_x = 19.7392082214f, c4_y = -19.7392082214f, c4_z = -1.0f, c4_w = 1.0f; + + vfloat r0_x, r0_y, r0_z, r1_x, r1_y, r1_z, r2_x, r2_y, r2_z; + + store_all(r1_x, sin_flag ? vfms(c1_w, a, c1_x) : c1_w * a); + + store_all(r1_y, frac(r1_x)); + + store_all(r2_x, (vfloat)(r1_y < c1_x)); + + store_all(r2_y, (vfloat)(r1_y >= c1_y)); + store_all(r2_z, (vfloat)(r1_y >= c1_z)); + + store_all(r2_y, vfma(r2_x, c4_z, vfma(r2_y, c4_w, r2_z * c4_z))); + + store_all(r0_x, c0_x - r1_y); + store_all(r0_y, c0_y - r1_y); + store_all(r0_z, c0_z - r1_y); + + store_all(r0_x, r0_x * r0_x); + store_all(r0_y, r0_y * r0_y); + store_all(r0_z, r0_z * r0_z); + + store_all(r1_x, vfma(c2_x, r0_x, c2_z)); + store_all(r1_y, vfma(c2_y, r0_y, c2_w)); + store_all(r1_z, vfma(c2_x, r0_z, c2_z)); + + store_all(r1_x, vfma(r1_x, r0_x, c3_x)); + store_all(r1_y, vfma(r1_y, r0_y, c3_y)); + store_all(r1_z, vfma(r1_z, r0_z, c3_x)); + + store_all(r1_x, vfma(r1_x, r0_x, c3_z)); + store_all(r1_y, vfma(r1_y, r0_y, c3_w)); + store_all(r1_z, vfma(r1_z, r0_z, c3_z)); + + store_all(r1_x, vfma(r1_x, r0_x, c4_x)); + store_all(r1_y, vfma(r1_y, r0_y, c4_y)); + store_all(r1_z, vfma(r1_z, r0_z, c4_x)); + + store_all(r1_x, vfma(r1_x, r0_x, c4_z)); + store_all(r1_y, vfma(r1_y, r0_y, c4_w)); + store_all(r1_z, vfma(r1_z, r0_z, c4_z)); + + store_all(r0_x, vfnma(r1_x, r2_x, vfnma(r1_y, r2_y, r1_z * -r2_z))); + + return r0_x; +} + +// positive values only +CPPSPMD_FORCE_INLINE vfloat spmd_kernel::recip_est1(const vfloat& q) +{ + //const int mag = 0x7EF312AC; // 2 NR iters, 3 is 0x7EEEEBB3 + const int mag = 0x7EF311C3; + const float fMinThresh = .0000125f; + + vfloat l = spmd_ternaryf(q >= fMinThresh, q, cast_vint_to_vfloat(vint(mag))); + + vint x_l = vint(mag) - cast_vfloat_to_vint(l); + + vfloat rcp_l = cast_vint_to_vfloat(x_l); + + return rcp_l * vfnma(rcp_l, q, 2.0f); +} + +CPPSPMD_FORCE_INLINE vfloat spmd_kernel::recip_est1_pn(const vfloat& t) +{ + //const int mag = 0x7EF312AC; // 2 NR iters, 3 is 0x7EEEEBB3 + const int mag = 0x7EF311C3; + const float fMinThresh = .0000125f; + + vfloat s = sign(t); + vfloat q = abs(t); + + vfloat l = spmd_ternaryf(q >= fMinThresh, q, cast_vint_to_vfloat(vint(mag))); + + vint x_l = vint(mag) - cast_vfloat_to_vint(l); + + vfloat rcp_l = cast_vint_to_vfloat(x_l); + + return rcp_l * vfnma(rcp_l, q, 2.0f) * s; +} + +// https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf +// https://github.com/hcs0/Hackers-Delight/blob/master/rsqrt.c.txt +CPPSPMD_FORCE_INLINE vfloat spmd_kernel::rsqrt_est1(vfloat x0) +{ + vfloat xhalf = 0.5f * x0; + vfloat x = cast_vint_to_vfloat(vint(0x5F375A82) - (VINT_SHIFT_RIGHT(cast_vfloat_to_vint(x0), 1))); + return x * vfnma(xhalf * x, x, 1.5008909f); +} + +CPPSPMD_FORCE_INLINE vfloat spmd_kernel::rsqrt_est2(vfloat x0) +{ + vfloat xhalf = 0.5f * x0; + vfloat x = cast_vint_to_vfloat(vint(0x5F37599E) - (VINT_SHIFT_RIGHT(cast_vfloat_to_vint(x0), 1))); + vfloat x1 = x * vfnma(xhalf * x, x, 1.5); + vfloat x2 = x1 * vfnma(xhalf * x1, x1, 1.5); + return x2; +} + +// Math from: http://developer.download.nvidia.com/cg/atan2.html +// TODO: Needs more validation, parameter checking. +CPPSPMD_FORCE_INLINE vfloat spmd_kernel::atan2_est(vfloat y, vfloat x) +{ + vfloat t1 = abs(y); + vfloat t3 = abs(x); + + vfloat t0 = max(t3, t1); + store_all(t1, min(t3, t1)); + + store_all(t3, t1 / t0); + + vfloat t4 = t3 * t3; + store_all(t0, vfma(-0.013480470f, t4, 0.057477314f)); + store_all(t0, vfms(t0, t4, 0.121239071f)); + store_all(t0, vfma(t0, t4, 0.195635925f)); + store_all(t0, vfms(t0, t4, 0.332994597f)); + store_all(t0, vfma(t0, t4, 0.999995630f)); + store_all(t3, t0 * t3); + + store_all(t3, spmd_ternaryf(abs(y) > abs(x), vfloat(1.570796327f) - t3, t3)); + + store_all(t3, spmd_ternaryf(x < 0.0f, vfloat(3.141592654f) - t3, t3)); + store_all(t3, spmd_ternaryf(y < 0.0f, -t3, t3)); + + return t3; +} + +/* + clang 9.0.0 for win /fp:precise release + Tested range: -25.1327412287183449 25.1327382326621169, vals : 16777216 + Skipped angles near 90/270 within +- .001 radians. + Near-zero threshold: .0000125f + Near-zero output above check threshold: 1e-6f + + Total near-zero: 144, output above near-zero tresh: 20 + Total near-zero avg: 0.0000067510751968 max: 0.0000133514404297 + Total near-zero sign diffs: 5 + Total passed near-zero check: 16766400 + Total sign diffs: 5 + max abs err: 1.4982600811139264 + max rel err: 0.1459155900188041 + avg rel err: 0.0000054659502568 + + XMVectorTan() precise: + Total near-zero: 144, output above near-zero tresh: 18 + Total near-zero avg: 0.0000067641216186 max: 0.0000133524126795 + Total near-zero sign diffs: 0 + Total passed near-zero check: 16766400 + Total sign diffs: 0 + max abs err: 1.9883573246424930 + max rel err: 0.1459724171926864 + avg rel err: 0.0000054965766843 + + std::tanf(): + Total near-zero: 144, output above near-zero tresh: 0 + Total near-zero avg: 0.0000067116930779 max: 0.0000127713074107 + Total near-zero sign diffs: 11 + Total passed near-zero check: 16766400 + Total sign diffs: 11 + max abs err: 0.8989131818294709 + max rel err: 0.0573181403173166 + avg rel err: 0.0000030791301203 + + Originally from: + http://www.ganssle.com/approx.htm +*/ + +CPPSPMD_FORCE_INLINE vfloat spmd_kernel::tan82(vfloat x) +{ + // Original double version was 8.2 digits + //double c1 = 211.849369664121f, c2 = -12.5288887278448f, c3 = 269.7350131214121f, c4 = -71.4145309347748f; + // Tuned float constants for lower avg rel error (without using FMA3): + const float c1 = 211.849350f, c2 = -12.5288887f, c3 = 269.734985f, c4 = -71.4145203f; + vfloat x2 = x * x; + return (x * (vfma(c2, x2, c1)) / (vfma(x2, (c4 + x2), c3))); +} + +// Don't call this for angles close to 90/270!. +inline vfloat spmd_kernel::tan_est(vfloat x) +{ + const float fPi = 3.141592653589793f, fOneOverPi = 0.3183098861837907f; + CPPSPMD_DECL(const uint8_t, s_table0[16]) = { 128 + 0, 128 + 2, 128 + -2, 128 + 4, 128 + 0, 128 + 2, 128 + -2, 128 + 4, 128 + 0, 128 + 2, 128 + -2, 128 + 4, 128 + 0, 128 + 2, 128 + -2, 128 + 4 }; + + vint table = init_lookup4(s_table0); // a load + vint sgn = cast_vfloat_to_vint(x) & 0x80000000; + + store_all(x, abs(x)); + vfloat orig_x = x; + + vfloat q = x * fOneOverPi; + store_all(x, q - floor(q)); + + vfloat x4 = x * 4.0f; + vint octant = (vint)(x4); + + vfloat x0 = spmd_ternaryf((octant & 1) != 0, -x4, x4); + + vint k = table_lookup4_8(octant, table) & 0xFF; // a shuffle + + vfloat bias = (vfloat)k + -128.0f; + vfloat y = x0 + bias; + + vfloat z = tan82(y); + + vfloat r; + + vbool octant_one_or_two = (octant == 1) || (octant == 2); + + // SPMD optimization - skip costly divide if we can + if (spmd_any(octant_one_or_two)) + { + const float fDivThresh = .4371e-7f; + vfloat one_over_z = 1.0f / spmd_ternaryf(abs(z) > fDivThresh, z, spmd_ternaryf(z < 0.0f, -fDivThresh, fDivThresh)); + + vfloat b = spmd_ternaryf(octant_one_or_two, one_over_z, z); + store_all(r, spmd_ternaryf((octant & 2) != 0, -b, b)); + } + else + { + store_all(r, spmd_ternaryf(octant == 0, z, -z)); + } + + // Small angle approximation, to decrease the max rel error near Pi. + SPMD_SIF(x >= (1.0f - .0003125f*4.0f)) + { + store(r, vfnma(floor(q) + 1.0f, fPi, orig_x)); + } + SPMD_SENDIF + + return cast_vint_to_vfloat(cast_vfloat_to_vint(r) ^ sgn); +} + +inline void spmd_kernel::seed_rand(rand_context& x, vint seed) +{ + store(x.a, 0xf1ea5eed); + store(x.b, seed ^ 0xd8487b1f); + store(x.c, seed ^ 0xdbadef9a); + store(x.d, seed); + for (int i = 0; i < 20; ++i) + (void)get_randu(x); +} + +// https://burtleburtle.net/bob/rand/smallprng.html +// Returns 32-bit unsigned random numbers. +inline vint spmd_kernel::get_randu(rand_context& x) +{ + vint e = x.a - VINT_ROT(x.b, 27); + store(x.a, x.b ^ VINT_ROT(x.c, 17)); + store(x.b, x.c + x.d); + store(x.c, x.d + e); + store(x.d, e + x.a); + return x.d; +} + +// Returns random numbers between [low, high), or low if low >= high +inline vint spmd_kernel::get_randi(rand_context& x, vint low, vint high) +{ + vint rnd = get_randu(x); + + vint range = high - low; + + vint rnd_range = mulhiu(rnd, range); + + return spmd_ternaryi(low < high, low + rnd_range, low); +} + +// Returns random numbers between [low, high), or low if low >= high +inline vfloat spmd_kernel::get_randf(rand_context& x, vfloat low, vfloat high) +{ + vint rndi = get_randu(x) & 0x7fffff; + + vfloat rnd = (vfloat)(rndi) * (1.0f / 8388608.0f); + + return spmd_ternaryf(low < high, vfma(high - low, rnd, low), low); +} + +CPPSPMD_FORCE_INLINE void spmd_kernel::init_reverse_bits(vint& tab1, vint& tab2) +{ + const uint8_t tab1_bytes[16] = { 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15 }; + const uint8_t tab2_bytes[16] = { 0, 8 << 4, 4 << 4, 12 << 4, 2 << 4, 10 << 4, 6 << 4, 14 << 4, 1 << 4, 9 << 4, 5 << 4, 13 << 4, 3 << 4, 11 << 4, 7 << 4, 15 << 4 }; + store_all(tab1, init_lookup4(tab1_bytes)); + store_all(tab2, init_lookup4(tab2_bytes)); +} + +CPPSPMD_FORCE_INLINE vint spmd_kernel::reverse_bits(vint k, vint tab1, vint tab2) +{ + vint r0 = table_lookup4_8(k & 0x7F7F7F7F, tab2); + vint r1 = table_lookup4_8(VUINT_SHIFT_RIGHT(k, 4) & 0x7F7F7F7F, tab1); + vint r3 = r0 | r1; + return byteswap(r3); +} + +CPPSPMD_FORCE_INLINE vint spmd_kernel::count_leading_zeros(vint x) +{ + CPPSPMD_DECL(const uint8_t, s_tab[16]) = { 0, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 }; + + vint tab = init_lookup4(s_tab); + + //x <= 0x0000ffff + vbool c0 = (x & 0xFFFF0000) == 0; + vint n0 = spmd_ternaryi(c0, 16, 0); + vint x0 = spmd_ternaryi(c0, VINT_SHIFT_LEFT(x, 16), x); + + //x <= 0x00ffffff + vbool c1 = (x0 & 0xFF000000) == 0; + vint n1 = spmd_ternaryi(c1, n0 + 8, n0); + vint x1 = spmd_ternaryi(c1, VINT_SHIFT_LEFT(x0, 8), x0); + + //x <= 0x0fffffff + vbool c2 = (x1 & 0xF0000000) == 0; + vint n2 = spmd_ternaryi(c2, n1 + 4, n1); + vint x2 = spmd_ternaryi(c2, VINT_SHIFT_LEFT(x1, 4), x1); + + return table_lookup4_8(VUINT_SHIFT_RIGHT(x2, 28), tab) + n2; +} + +CPPSPMD_FORCE_INLINE vint spmd_kernel::count_leading_zeros_alt(vint x) +{ + //x <= 0x0000ffff + vbool c0 = (x & 0xFFFF0000) == 0; + vint n0 = spmd_ternaryi(c0, 16, 0); + vint x0 = spmd_ternaryi(c0, VINT_SHIFT_LEFT(x, 16), x); + + //x <= 0x00ffffff + vbool c1 = (x0 & 0xFF000000) == 0; + vint n1 = spmd_ternaryi(c1, n0 + 8, n0); + vint x1 = spmd_ternaryi(c1, VINT_SHIFT_LEFT(x0, 8), x0); + + //x <= 0x0fffffff + vbool c2 = (x1 & 0xF0000000) == 0; + vint n2 = spmd_ternaryi(c2, n1 + 4, n1); + vint x2 = spmd_ternaryi(c2, VINT_SHIFT_LEFT(x1, 4), x1); + + // x <= 0x3fffffff + vbool c3 = (x2 & 0xC0000000) == 0; + vint n3 = spmd_ternaryi(c3, n2 + 2, n2); + vint x3 = spmd_ternaryi(c3, VINT_SHIFT_LEFT(x2, 2), x2); + + // x <= 0x7fffffff + vbool c4 = (x3 & 0x80000000) == 0; + return spmd_ternaryi(c4, n3 + 1, n3); +} + +CPPSPMD_FORCE_INLINE vint spmd_kernel::count_trailing_zeros(vint x) +{ + // cast the least significant bit in v to a float + vfloat f = (vfloat)(x & -x); + + // extract exponent and adjust + return VUINT_SHIFT_RIGHT(cast_vfloat_to_vint(f), 23) - 0x7F; +} + +CPPSPMD_FORCE_INLINE vint spmd_kernel::count_set_bits(vint x) +{ + vint v = x - (VUINT_SHIFT_RIGHT(x, 1) & 0x55555555); + vint v1 = (v & 0x33333333) + (VUINT_SHIFT_RIGHT(v, 2) & 0x33333333); + return VUINT_SHIFT_RIGHT(((v1 + (VUINT_SHIFT_RIGHT(v1, 4) & 0xF0F0F0F)) * 0x1010101), 24); +} + +CPPSPMD_FORCE_INLINE vint cmple_epu16(const vint &a, const vint &b) +{ + return cmpeq_epi16(subs_epu16(a, b), vint(0)); +} + +CPPSPMD_FORCE_INLINE vint cmpge_epu16(const vint &a, const vint &b) +{ + return cmple_epu16(b, a); +} + +CPPSPMD_FORCE_INLINE vint cmpgt_epu16(const vint &a, const vint &b) +{ + return andnot(cmpeq_epi16(a, b), cmple_epu16(b, a)); +} + +CPPSPMD_FORCE_INLINE vint cmplt_epu16(const vint &a, const vint &b) +{ + return cmpgt_epu16(b, a); +} + +CPPSPMD_FORCE_INLINE vint cmpge_epi16(const vint &a, const vint &b) +{ + return cmpeq_epi16(a, b) | cmpgt_epi16(a, b); +} + +CPPSPMD_FORCE_INLINE vint cmple_epi16(const vint &a, const vint &b) +{ + return cmpge_epi16(b, a); +} + +void spmd_kernel::print_vint(vint v) +{ + for (uint32_t i = 0; i < PROGRAM_COUNT; i++) + printf("%i ", extract(v, i)); + printf("\n"); +} + +void spmd_kernel::print_vbool(vbool v) +{ + for (uint32_t i = 0; i < PROGRAM_COUNT; i++) + printf("%i ", extract(v, i) ? 1 : 0); + printf("\n"); +} + +void spmd_kernel::print_vint_hex(vint v) +{ + for (uint32_t i = 0; i < PROGRAM_COUNT; i++) + printf("0x%X ", extract(v, i)); + printf("\n"); +} + +void spmd_kernel::print_active_lanes(const char *pPrefix) +{ + CPPSPMD_DECL(int, flags[PROGRAM_COUNT]); + memset(flags, 0, sizeof(flags)); + storeu_linear(flags, vint(1)); + + if (pPrefix) + printf("%s", pPrefix); + + for (uint32_t i = 0; i < PROGRAM_COUNT; i++) + { + if (flags[i]) + printf("%u ", i); + } + printf("\n"); +} + +void spmd_kernel::print_vfloat(vfloat v) +{ + for (uint32_t i = 0; i < PROGRAM_COUNT; i++) + printf("%f ", extract(v, i)); + printf("\n"); +} diff --git a/thirdparty/basisu/encoder/cppspmd_math_declares.h b/thirdparty/basisu/encoder/cppspmd_math_declares.h new file mode 100644 index 000000000..f76c9b7e3 --- /dev/null +++ b/thirdparty/basisu/encoder/cppspmd_math_declares.h @@ -0,0 +1,89 @@ +// Do not include this header directly. +// This header defines shared struct spmd_kernel helpers. +// +// Copyright 2020-2024 Binomial LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// See cppspmd_math.h for detailed error statistics. + +CPPSPMD_FORCE_INLINE void reduce_expb(vfloat& arg, vfloat& two_int_a, vint& adjustment); +CPPSPMD_FORCE_INLINE vfloat tan56(vfloat x); +CPPSPMD_FORCE_INLINE vfloat tan82(vfloat x); + +inline vfloat log2_est(vfloat v); + +inline vfloat log_est(vfloat v); + +inline vfloat exp2_est(vfloat arg); + +inline vfloat exp_est(vfloat arg); + +inline vfloat pow_est(vfloat arg1, vfloat arg2); + +CPPSPMD_FORCE_INLINE vfloat recip_est1(const vfloat& q); +CPPSPMD_FORCE_INLINE vfloat recip_est1_pn(const vfloat& q); + +inline vfloat mod_angles(vfloat a); + +inline vfloat sincos_est_a(vfloat a, bool sin_flag); +CPPSPMD_FORCE_INLINE vfloat sin_est_a(vfloat a) { return sincos_est_a(a, true); } +CPPSPMD_FORCE_INLINE vfloat cos_est_a(vfloat a) { return sincos_est_a(a, false); } + +inline vfloat sin_est(vfloat a); + +inline vfloat cos_est(vfloat a); + +// Don't call with values <= 0. +CPPSPMD_FORCE_INLINE vfloat rsqrt_est1(vfloat x0); + +// Don't call with values <= 0. +CPPSPMD_FORCE_INLINE vfloat rsqrt_est2(vfloat x0); + +CPPSPMD_FORCE_INLINE vfloat atan2_est(vfloat y, vfloat x); + +CPPSPMD_FORCE_INLINE vfloat atan_est(vfloat x) { return atan2_est(x, vfloat(1.0f)); } + +// Don't call this for angles close to 90/270! +inline vfloat tan_est(vfloat x); + +// https://burtleburtle.net/bob/rand/smallprng.html +struct rand_context { vint a, b, c, d; }; + +inline void seed_rand(rand_context& x, vint seed); + +// Returns 32-bit unsigned random numbers. +inline vint get_randu(rand_context& x); + +// Returns random numbers between [low, high), or low if low >= high +inline vint get_randi(rand_context& x, vint low, vint high); + +// Returns random numbers between [low, high), or low if low >= high +inline vfloat get_randf(rand_context& x, vfloat low, vfloat high); + +CPPSPMD_FORCE_INLINE void init_reverse_bits(vint& tab1, vint& tab2); +CPPSPMD_FORCE_INLINE vint reverse_bits(vint k, vint tab1, vint tab2); + +CPPSPMD_FORCE_INLINE vint count_leading_zeros(vint x); +CPPSPMD_FORCE_INLINE vint count_leading_zeros_alt(vint x); + +CPPSPMD_FORCE_INLINE vint count_trailing_zeros(vint x); + +CPPSPMD_FORCE_INLINE vint count_set_bits(vint x); + +void print_vint(vint v); +void print_vbool(vbool v); +void print_vint_hex(vint v); +void print_active_lanes(const char *pPrefix); +void print_vfloat(vfloat v); + diff --git a/thirdparty/basisu/encoder/cppspmd_sse.h b/thirdparty/basisu/encoder/cppspmd_sse.h new file mode 100644 index 000000000..79dfa1561 --- /dev/null +++ b/thirdparty/basisu/encoder/cppspmd_sse.h @@ -0,0 +1,2105 @@ +// cppspmd_sse.h +// Copyright 2020-2022 Binomial LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Notes for Basis Universal: +// All of the "cppspmd" code and headers are OPTIONAL to Basis Universal. if BASISU_SUPPORT_SSE is 0, it will never be included and does not impact compilation. +// The techniques used in this code were originally demonstrated for AVX2 by Nicolas Guillemot, Jefferson Amstutz in their "CppSPMD" project. +// This is new code for use in Basis Universal, although it uses the same general SPMD techniques in SSE 2/4. + +#include +#include +#include +#include +#include +#include + +#if CPPSPMD_SSE2 +#include // SSE +#include // SSE2 +#else +#include // SSE +#include // SSE2 +#include // SSE3 +#include // SSSE3 +#include // SSE4.1 +//#include // SSE4.2 +#endif + +#undef CPPSPMD_SSE +#undef CPPSPMD_AVX1 +#undef CPPSPMD_AVX2 +#undef CPPSPMD_AVX +#undef CPPSPMD_FLOAT4 +#undef CPPSPMD_INT16 + +#define CPPSPMD_SSE 1 +#define CPPSPMD_AVX 0 +#define CPPSPMD_AVX1 0 +#define CPPSPMD_AVX2 0 +#define CPPSPMD_FLOAT4 0 +#define CPPSPMD_INT16 0 + +#ifdef _MSC_VER + #ifndef CPPSPMD_DECL + #define CPPSPMD_DECL(type, name) __declspec(align(16)) type name + #endif + + #ifndef CPPSPMD_ALIGN + #define CPPSPMD_ALIGN(v) __declspec(align(v)) + #endif + + #define _mm_undefined_si128 _mm_setzero_si128 + #define _mm_undefined_ps _mm_setzero_ps +#else + #ifndef CPPSPMD_DECL + #define CPPSPMD_DECL(type, name) type name __attribute__((aligned(32))) + #endif + + #ifndef CPPSPMD_ALIGN + #define CPPSPMD_ALIGN(v) __attribute__((aligned(v))) + #endif +#endif + +#ifndef CPPSPMD_FORCE_INLINE +#ifdef _DEBUG +#define CPPSPMD_FORCE_INLINE inline +#else + #ifdef _MSC_VER + #define CPPSPMD_FORCE_INLINE __forceinline + #else + #define CPPSPMD_FORCE_INLINE inline + #endif +#endif +#endif + +#undef CPPSPMD +#undef CPPSPMD_ARCH + +#if CPPSPMD_SSE2 + #define CPPSPMD_SSE41 0 + #define CPPSPMD cppspmd_sse2 + #define CPPSPMD_ARCH _sse2 +#else + #define CPPSPMD_SSE41 1 + #define CPPSPMD cppspmd_sse41 + #define CPPSPMD_ARCH _sse41 +#endif + +#ifndef CPPSPMD_GLUER + #define CPPSPMD_GLUER(a, b) a##b +#endif + +#ifndef CPPSPMD_GLUER2 + #define CPPSPMD_GLUER2(a, b) CPPSPMD_GLUER(a, b) +#endif + +#ifndef CPPSPMD_NAME +#define CPPSPMD_NAME(a) CPPSPMD_GLUER2(a, CPPSPMD_ARCH) +#endif + +#undef VASSERT +#define VCOND(cond) ((exec_mask(vbool(cond)) & m_exec).get_movemask() == m_exec.get_movemask()) +#define VASSERT(cond) assert( VCOND(cond) ) + +#define CPPSPMD_ALIGNMENT (16) + +#define storeu_si32(p, a) (void)(*(int*)(p) = _mm_cvtsi128_si32((a))) + +namespace CPPSPMD +{ + +const int PROGRAM_COUNT_SHIFT = 2; +const int PROGRAM_COUNT = 1 << PROGRAM_COUNT_SHIFT; + +template inline N* aligned_new() { void* p = _mm_malloc(sizeof(N), 64); new (p) N; return static_cast(p); } +template void aligned_delete(N* p) { if (p) { p->~N(); _mm_free(p); } } + +CPPSPMD_DECL(const uint32_t, g_allones_128[4]) = { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX }; +CPPSPMD_DECL(const uint32_t, g_x_128[4]) = { UINT32_MAX, 0, 0, 0 }; +CPPSPMD_DECL(const float, g_onef_128[4]) = { 1.0f, 1.0f, 1.0f, 1.0f }; +CPPSPMD_DECL(const uint32_t, g_oneu_128[4]) = { 1, 1, 1, 1 }; + +CPPSPMD_DECL(const uint32_t, g_lane_masks_128[4][4]) = +{ + { UINT32_MAX, 0, 0, 0 }, + { 0, UINT32_MAX, 0, 0 }, + { 0, 0, UINT32_MAX, 0 }, + { 0, 0, 0, UINT32_MAX }, +}; + +#if CPPSPMD_SSE41 +CPPSPMD_FORCE_INLINE __m128i _mm_blendv_epi32(__m128i a, __m128i b, __m128i c) { return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _mm_castsi128_ps(c))); } +#endif + +CPPSPMD_FORCE_INLINE __m128i blendv_epi8(__m128i a, __m128i b, __m128i mask) +{ +#if CPPSPMD_SSE2 + return _mm_castps_si128(_mm_or_ps(_mm_and_ps(_mm_castsi128_ps(mask), _mm_castsi128_ps(b)), _mm_andnot_ps(_mm_castsi128_ps(mask), _mm_castsi128_ps(a)))); +#else + return _mm_blendv_epi8(a, b, mask); +#endif +} + +CPPSPMD_FORCE_INLINE __m128 blendv_mask_ps(__m128 a, __m128 b, __m128 mask) +{ +#if CPPSPMD_SSE2 + // We know it's a mask, so we can just emulate the blend. + return _mm_or_ps(_mm_and_ps(mask, b), _mm_andnot_ps(mask, a)); +#else + return _mm_blendv_ps(a, b, mask); +#endif +} + +CPPSPMD_FORCE_INLINE __m128 blendv_ps(__m128 a, __m128 b, __m128 mask) +{ +#if CPPSPMD_SSE2 + // Input is not a mask, but MSB bits - so emulate _mm_blendv_ps() by replicating bit 31. + mask = _mm_castsi128_ps(_mm_srai_epi32(_mm_castps_si128(mask), 31)); + return _mm_or_ps(_mm_and_ps(mask, b), _mm_andnot_ps(mask, a)); +#else + return _mm_blendv_ps(a, b, mask); +#endif +} + +CPPSPMD_FORCE_INLINE __m128i blendv_mask_epi32(__m128i a, __m128i b, __m128i mask) +{ + return _mm_castps_si128(blendv_mask_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _mm_castsi128_ps(mask))); +} + +CPPSPMD_FORCE_INLINE __m128i blendv_epi32(__m128i a, __m128i b, __m128i mask) +{ + return _mm_castps_si128(blendv_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _mm_castsi128_ps(mask))); +} + +#if CPPSPMD_SSE2 +CPPSPMD_FORCE_INLINE int extract_x(const __m128i& vec) { return _mm_cvtsi128_si32(vec); } +CPPSPMD_FORCE_INLINE int extract_y(const __m128i& vec) { return _mm_cvtsi128_si32(_mm_shuffle_epi32(vec, 0x55)); } +CPPSPMD_FORCE_INLINE int extract_z(const __m128i& vec) { return _mm_cvtsi128_si32(_mm_shuffle_epi32(vec, 0xAA)); } +CPPSPMD_FORCE_INLINE int extract_w(const __m128i& vec) { return _mm_cvtsi128_si32(_mm_shuffle_epi32(vec, 0xFF)); } + +// Returns float bits as int, to emulate _mm_extract_ps() +CPPSPMD_FORCE_INLINE int extract_ps_x(const __m128& vec) { float f = _mm_cvtss_f32(vec); return *(const int*)&f; } +CPPSPMD_FORCE_INLINE int extract_ps_y(const __m128& vec) { float f = _mm_cvtss_f32(_mm_shuffle_ps(vec, vec, 0x55)); return *(const int*)&f; } +CPPSPMD_FORCE_INLINE int extract_ps_z(const __m128& vec) { float f = _mm_cvtss_f32(_mm_shuffle_ps(vec, vec, 0xAA)); return *(const int*)&f; } +CPPSPMD_FORCE_INLINE int extract_ps_w(const __m128& vec) { float f = _mm_cvtss_f32(_mm_shuffle_ps(vec, vec, 0xFF)); return *(const int*)&f; } + +// Returns floats +CPPSPMD_FORCE_INLINE float extractf_ps_x(const __m128& vec) { return _mm_cvtss_f32(vec); } +CPPSPMD_FORCE_INLINE float extractf_ps_y(const __m128& vec) { return _mm_cvtss_f32(_mm_shuffle_ps(vec, vec, 0x55)); } +CPPSPMD_FORCE_INLINE float extractf_ps_z(const __m128& vec) { return _mm_cvtss_f32(_mm_shuffle_ps(vec, vec, 0xAA)); } +CPPSPMD_FORCE_INLINE float extractf_ps_w(const __m128& vec) { return _mm_cvtss_f32(_mm_shuffle_ps(vec, vec, 0xFF)); } +#else +CPPSPMD_FORCE_INLINE int extract_x(const __m128i& vec) { return _mm_extract_epi32(vec, 0); } +CPPSPMD_FORCE_INLINE int extract_y(const __m128i& vec) { return _mm_extract_epi32(vec, 1); } +CPPSPMD_FORCE_INLINE int extract_z(const __m128i& vec) { return _mm_extract_epi32(vec, 2); } +CPPSPMD_FORCE_INLINE int extract_w(const __m128i& vec) { return _mm_extract_epi32(vec, 3); } + +// Returns float bits as int +CPPSPMD_FORCE_INLINE int extract_ps_x(const __m128& vec) { return _mm_extract_ps(vec, 0); } +CPPSPMD_FORCE_INLINE int extract_ps_y(const __m128& vec) { return _mm_extract_ps(vec, 1); } +CPPSPMD_FORCE_INLINE int extract_ps_z(const __m128& vec) { return _mm_extract_ps(vec, 2); } +CPPSPMD_FORCE_INLINE int extract_ps_w(const __m128& vec) { return _mm_extract_ps(vec, 3); } + +// Returns floats +CPPSPMD_FORCE_INLINE float extractf_ps_x(const __m128& vec) { int v = extract_ps_x(vec); return *(const float*)&v; } +CPPSPMD_FORCE_INLINE float extractf_ps_y(const __m128& vec) { int v = extract_ps_y(vec); return *(const float*)&v; } +CPPSPMD_FORCE_INLINE float extractf_ps_z(const __m128& vec) { int v = extract_ps_z(vec); return *(const float*)&v; } +CPPSPMD_FORCE_INLINE float extractf_ps_w(const __m128& vec) { int v = extract_ps_w(vec); return *(const float*)&v; } +#endif + +#if CPPSPMD_SSE2 +CPPSPMD_FORCE_INLINE __m128i insert_x(const __m128i& vec, int v) { return _mm_insert_epi16(_mm_insert_epi16(vec, v, 0), (uint32_t)v >> 16U, 1); } +CPPSPMD_FORCE_INLINE __m128i insert_y(const __m128i& vec, int v) { return _mm_insert_epi16(_mm_insert_epi16(vec, v, 2), (uint32_t)v >> 16U, 3); } +CPPSPMD_FORCE_INLINE __m128i insert_z(const __m128i& vec, int v) { return _mm_insert_epi16(_mm_insert_epi16(vec, v, 4), (uint32_t)v >> 16U, 5); } +CPPSPMD_FORCE_INLINE __m128i insert_w(const __m128i& vec, int v) { return _mm_insert_epi16(_mm_insert_epi16(vec, v, 6), (uint32_t)v >> 16U, 7); } +#else +CPPSPMD_FORCE_INLINE __m128i insert_x(const __m128i& vec, int v) { return _mm_insert_epi32(vec, v, 0); } +CPPSPMD_FORCE_INLINE __m128i insert_y(const __m128i& vec, int v) { return _mm_insert_epi32(vec, v, 1); } +CPPSPMD_FORCE_INLINE __m128i insert_z(const __m128i& vec, int v) { return _mm_insert_epi32(vec, v, 2); } +CPPSPMD_FORCE_INLINE __m128i insert_w(const __m128i& vec, int v) { return _mm_insert_epi32(vec, v, 3); } +#endif + +#if CPPSPMD_SSE2 +inline __m128i shuffle_epi8(const __m128i& a, const __m128i& b) +{ + // Just emulate _mm_shuffle_epi8. This is very slow, but what else can we do? + CPPSPMD_ALIGN(16) uint8_t av[16]; + _mm_store_si128((__m128i*)av, a); + + CPPSPMD_ALIGN(16) uint8_t bvi[16]; + _mm_store_ps((float*)bvi, _mm_and_ps(_mm_castsi128_ps(b), _mm_castsi128_ps(_mm_set1_epi32(0x0F0F0F0F)))); + + CPPSPMD_ALIGN(16) uint8_t result[16]; + + result[0] = av[bvi[0]]; + result[1] = av[bvi[1]]; + result[2] = av[bvi[2]]; + result[3] = av[bvi[3]]; + + result[4] = av[bvi[4]]; + result[5] = av[bvi[5]]; + result[6] = av[bvi[6]]; + result[7] = av[bvi[7]]; + + result[8] = av[bvi[8]]; + result[9] = av[bvi[9]]; + result[10] = av[bvi[10]]; + result[11] = av[bvi[11]]; + + result[12] = av[bvi[12]]; + result[13] = av[bvi[13]]; + result[14] = av[bvi[14]]; + result[15] = av[bvi[15]]; + + return _mm_andnot_si128(_mm_cmplt_epi8(b, _mm_setzero_si128()), _mm_load_si128((__m128i*)result)); +} +#else +CPPSPMD_FORCE_INLINE __m128i shuffle_epi8(const __m128i& a, const __m128i& b) +{ + return _mm_shuffle_epi8(a, b); +} +#endif + +#if CPPSPMD_SSE2 +CPPSPMD_FORCE_INLINE __m128i min_epi32(__m128i a, __m128i b) +{ + return blendv_mask_epi32(b, a, _mm_cmplt_epi32(a, b)); +} +CPPSPMD_FORCE_INLINE __m128i max_epi32(__m128i a, __m128i b) +{ + return blendv_mask_epi32(b, a, _mm_cmpgt_epi32(a, b)); +} +CPPSPMD_FORCE_INLINE __m128i min_epu32(__m128i a, __m128i b) +{ + __m128i n = _mm_set1_epi32(0x80000000); + __m128i ac = _mm_add_epi32(a, n); + __m128i bc = _mm_add_epi32(b, n); + return blendv_mask_epi32(b, a, _mm_cmplt_epi32(ac, bc)); +} +CPPSPMD_FORCE_INLINE __m128i max_epu32(__m128i a, __m128i b) +{ + __m128i n = _mm_set1_epi32(0x80000000); + __m128i ac = _mm_add_epi32(a, n); + __m128i bc = _mm_add_epi32(b, n); + return blendv_mask_epi32(b, a, _mm_cmpgt_epi32(ac, bc)); +} +#else +CPPSPMD_FORCE_INLINE __m128i min_epi32(__m128i a, __m128i b) +{ + return _mm_min_epi32(a, b); +} +CPPSPMD_FORCE_INLINE __m128i max_epi32(__m128i a, __m128i b) +{ + return _mm_max_epi32(a, b); +} +CPPSPMD_FORCE_INLINE __m128i min_epu32(__m128i a, __m128i b) +{ + return _mm_min_epu32(a, b); +} +CPPSPMD_FORCE_INLINE __m128i max_epu32(__m128i a, __m128i b) +{ + return _mm_max_epu32(a, b); +} +#endif + +#if CPPSPMD_SSE2 +CPPSPMD_FORCE_INLINE __m128i abs_epi32(__m128i a) +{ + __m128i sign_mask = _mm_srai_epi32(a, 31); + return _mm_sub_epi32(_mm_castps_si128(_mm_xor_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(sign_mask))), sign_mask); +} +#else +CPPSPMD_FORCE_INLINE __m128i abs_epi32(__m128i a) +{ + return _mm_abs_epi32(a); +} +#endif + +#if CPPSPMD_SSE2 +CPPSPMD_FORCE_INLINE __m128i mullo_epi32(__m128i a, __m128i b) +{ + __m128i tmp1 = _mm_mul_epu32(a, b); + __m128i tmp2 = _mm_mul_epu32(_mm_srli_si128(a, 4), _mm_srli_si128(b, 4)); + return _mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, _MM_SHUFFLE(0, 0, 2, 0)), _mm_shuffle_epi32(tmp2, _MM_SHUFFLE(0, 0, 2, 0))); +} +#else +CPPSPMD_FORCE_INLINE __m128i mullo_epi32(__m128i a, __m128i b) +{ + return _mm_mullo_epi32(a, b); +} +#endif + +CPPSPMD_FORCE_INLINE __m128i mulhi_epu32(__m128i a, __m128i b) +{ + __m128i tmp1 = _mm_mul_epu32(a, b); + __m128i tmp2 = _mm_mul_epu32(_mm_srli_si128(a, 4), _mm_srli_si128(b, 4)); + return _mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, _MM_SHUFFLE(0, 0, 3, 1)), _mm_shuffle_epi32(tmp2, _MM_SHUFFLE(0, 0, 3, 1))); +} + +#if CPPSPMD_SSE2 +inline __m128i load_rgba32(const void* p) +{ + __m128i xmm = _mm_cvtsi32_si128(*(const int*)p); + xmm = _mm_unpacklo_epi8(xmm, _mm_setzero_si128()); + xmm = _mm_unpacklo_epi16(xmm, _mm_setzero_si128()); + return xmm; +} +#else +inline __m128i load_rgba32(const void* p) +{ + return _mm_cvtepu8_epi32(_mm_castps_si128(_mm_load_ss((const float*)p))); +} +#endif + +inline void transpose4x4(__m128i& x, __m128i& y, __m128i& z, __m128i& w, const __m128i& r0, const __m128i& r1, const __m128i& r2, const __m128i& r3) +{ + __m128i t0 = _mm_unpacklo_epi32(r0, r1); + __m128i t1 = _mm_unpacklo_epi32(r2, r3); + __m128i t2 = _mm_unpackhi_epi32(r0, r1); + __m128i t3 = _mm_unpackhi_epi32(r2, r3); + x = _mm_unpacklo_epi64(t0, t1); + y = _mm_unpackhi_epi64(t0, t1); + z = _mm_unpacklo_epi64(t2, t3); + w = _mm_unpackhi_epi64(t2, t3); +} + +const uint32_t ALL_ON_MOVEMASK = 0xF; + +struct spmd_kernel +{ + struct vint; + struct lint; + struct vbool; + struct vfloat; + + typedef int int_t; + typedef vint vint_t; + typedef lint lint_t; + + // Exec mask + struct exec_mask + { + __m128i m_mask; + + exec_mask() = default; + + CPPSPMD_FORCE_INLINE explicit exec_mask(const vbool& b); + CPPSPMD_FORCE_INLINE explicit exec_mask(const __m128i& mask) : m_mask(mask) { } + + CPPSPMD_FORCE_INLINE void enable_lane(uint32_t lane) { m_mask = _mm_load_si128((const __m128i *)&g_lane_masks_128[lane][0]); } + + static CPPSPMD_FORCE_INLINE exec_mask all_on() { return exec_mask{ _mm_load_si128((const __m128i*)g_allones_128) }; } + static CPPSPMD_FORCE_INLINE exec_mask all_off() { return exec_mask{ _mm_setzero_si128() }; } + + CPPSPMD_FORCE_INLINE uint32_t get_movemask() const { return _mm_movemask_ps(_mm_castsi128_ps(m_mask)); } + }; + + friend CPPSPMD_FORCE_INLINE bool all(const exec_mask& e); + friend CPPSPMD_FORCE_INLINE bool any(const exec_mask& e); + + CPPSPMD_FORCE_INLINE bool spmd_all() const { return all(m_exec); } + CPPSPMD_FORCE_INLINE bool spmd_any() const { return any(m_exec); } + CPPSPMD_FORCE_INLINE bool spmd_none() { return !any(m_exec); } + + // true if cond is true for all active lanes - false if no active lanes + CPPSPMD_FORCE_INLINE bool spmd_all(const vbool& e) { uint32_t m = m_exec.get_movemask(); return (m != 0) && ((exec_mask(e) & m_exec).get_movemask() == m); } + // true if cond is true for any active lanes + CPPSPMD_FORCE_INLINE bool spmd_any(const vbool& e) { return (exec_mask(e) & m_exec).get_movemask() != 0; } + CPPSPMD_FORCE_INLINE bool spmd_none(const vbool& e) { return !spmd_any(e); } + + friend CPPSPMD_FORCE_INLINE exec_mask operator^ (const exec_mask& a, const exec_mask& b); + friend CPPSPMD_FORCE_INLINE exec_mask operator& (const exec_mask& a, const exec_mask& b); + friend CPPSPMD_FORCE_INLINE exec_mask operator| (const exec_mask& a, const exec_mask& b); + + exec_mask m_exec; + exec_mask m_kernel_exec; + exec_mask m_continue_mask; +#ifdef _DEBUG + bool m_in_loop; +#endif + + CPPSPMD_FORCE_INLINE uint32_t get_movemask() const { return m_exec.get_movemask(); } + + void init(const exec_mask& kernel_exec); + + // Varying bool + + struct vbool + { + __m128i m_value; + + vbool() = default; + + CPPSPMD_FORCE_INLINE vbool(bool value) : m_value(_mm_set1_epi32(value ? UINT32_MAX : 0)) { } + + CPPSPMD_FORCE_INLINE explicit vbool(const __m128i& value) : m_value(value) { } + + CPPSPMD_FORCE_INLINE explicit operator vfloat() const; + CPPSPMD_FORCE_INLINE explicit operator vint() const; + + private: + //vbool& operator=(const vbool&); + }; + + friend vbool operator!(const vbool& v); + + CPPSPMD_FORCE_INLINE vbool& store(vbool& dst, const vbool& src) + { + dst.m_value = blendv_mask_epi32(dst.m_value, src.m_value, m_exec.m_mask); + return dst; + } + + CPPSPMD_FORCE_INLINE vbool& store_all(vbool& dst, const vbool& src) + { + dst.m_value = src.m_value; + return dst; + } + + // Varying float + struct vfloat + { + __m128 m_value; + + vfloat() = default; + + CPPSPMD_FORCE_INLINE explicit vfloat(const __m128& v) : m_value(v) { } + + CPPSPMD_FORCE_INLINE vfloat(float value) : m_value(_mm_set1_ps(value)) { } + + CPPSPMD_FORCE_INLINE explicit vfloat(int value) : m_value(_mm_set1_ps((float)value)) { } + + private: + //vfloat& operator=(const vfloat&); + }; + + CPPSPMD_FORCE_INLINE vfloat& store(vfloat& dst, const vfloat& src) + { + dst.m_value = blendv_mask_ps(dst.m_value, src.m_value, _mm_castsi128_ps(m_exec.m_mask)); + return dst; + } + + CPPSPMD_FORCE_INLINE vfloat& store(vfloat&& dst, const vfloat& src) + { + dst.m_value = blendv_mask_ps(dst.m_value, src.m_value, _mm_castsi128_ps(m_exec.m_mask)); + return dst; + } + + CPPSPMD_FORCE_INLINE vfloat& store_all(vfloat& dst, const vfloat& src) + { + dst.m_value = src.m_value; + return dst; + } + + CPPSPMD_FORCE_INLINE vfloat& store_all(vfloat&& dst, const vfloat& src) + { + dst.m_value = src.m_value; + return dst; + } + + // Linear ref to floats + struct float_lref + { + float* m_pValue; + + private: + //float_lref& operator=(const float_lref&); + }; + + CPPSPMD_FORCE_INLINE const float_lref& store(const float_lref& dst, const vfloat& src) + { + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + if (mask == ALL_ON_MOVEMASK) + _mm_storeu_ps(dst.m_pValue, src.m_value); + else + _mm_storeu_ps(dst.m_pValue, blendv_mask_ps(_mm_loadu_ps(dst.m_pValue), src.m_value, _mm_castsi128_ps(m_exec.m_mask))); + return dst; + } + + CPPSPMD_FORCE_INLINE const float_lref& store(const float_lref&& dst, const vfloat& src) + { + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + if (mask == ALL_ON_MOVEMASK) + _mm_storeu_ps(dst.m_pValue, src.m_value); + else + _mm_storeu_ps(dst.m_pValue, blendv_mask_ps(_mm_loadu_ps(dst.m_pValue), src.m_value, _mm_castsi128_ps(m_exec.m_mask))); + return dst; + } + + CPPSPMD_FORCE_INLINE const float_lref& store_all(const float_lref& dst, const vfloat& src) + { + _mm_storeu_ps(dst.m_pValue, src.m_value); + return dst; + } + + CPPSPMD_FORCE_INLINE const float_lref& store_all(const float_lref&& dst, const vfloat& src) + { + _mm_storeu_ps(dst.m_pValue, src.m_value); + return dst; + } + + CPPSPMD_FORCE_INLINE vfloat load(const float_lref& src) + { + return vfloat{ _mm_and_ps(_mm_loadu_ps(src.m_pValue), _mm_castsi128_ps(m_exec.m_mask)) }; + } + + // Varying ref to floats + struct float_vref + { + __m128i m_vindex; + float* m_pValue; + + private: + //float_vref& operator=(const float_vref&); + }; + + // Varying ref to varying float + struct vfloat_vref + { + __m128i m_vindex; + vfloat* m_pValue; + + private: + //vfloat_vref& operator=(const vfloat_vref&); + }; + + // Varying ref to varying int + struct vint_vref + { + __m128i m_vindex; + vint* m_pValue; + + private: + //vint_vref& operator=(const vint_vref&); + }; + + CPPSPMD_FORCE_INLINE const float_vref& store(const float_vref& dst, const vfloat& src); + CPPSPMD_FORCE_INLINE const float_vref& store(const float_vref&& dst, const vfloat& src); + + CPPSPMD_FORCE_INLINE const float_vref& store_all(const float_vref& dst, const vfloat& src); + CPPSPMD_FORCE_INLINE const float_vref& store_all(const float_vref&& dst, const vfloat& src); + + CPPSPMD_FORCE_INLINE vfloat load(const float_vref& src) + { + CPPSPMD_ALIGN(16) int vindex[4]; + _mm_store_si128((__m128i *)vindex, src.m_vindex); + + CPPSPMD_ALIGN(16) float loaded[4]; + + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + for (int i = 0; i < 4; i++) + { + if (mask & (1 << i)) + loaded[i] = src.m_pValue[vindex[i]]; + } + return vfloat{ _mm_and_ps(_mm_castsi128_ps(m_exec.m_mask), _mm_load_ps((const float*)loaded)) }; + } + + CPPSPMD_FORCE_INLINE vfloat load_all(const float_vref& src) + { + CPPSPMD_ALIGN(16) int vindex[4]; + _mm_store_si128((__m128i *)vindex, src.m_vindex); + + CPPSPMD_ALIGN(16) float loaded[4]; + + for (int i = 0; i < 4; i++) + loaded[i] = src.m_pValue[vindex[i]]; + return vfloat{ _mm_load_ps((const float*)loaded) }; + } + + // Linear ref to ints + struct int_lref + { + int* m_pValue; + + private: + //int_lref& operator=(const int_lref&); + }; + + CPPSPMD_FORCE_INLINE const int_lref& store(const int_lref& dst, const vint& src) + { + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + if (mask == ALL_ON_MOVEMASK) + { + _mm_storeu_si128((__m128i *)dst.m_pValue, src.m_value); + } + else + { + CPPSPMD_ALIGN(16) int stored[4]; + _mm_store_si128((__m128i *)stored, src.m_value); + + for (int i = 0; i < 4; i++) + { + if (mask & (1 << i)) + dst.m_pValue[i] = stored[i]; + } + } + return dst; + } + + CPPSPMD_FORCE_INLINE vint load(const int_lref& src) + { + __m128i v = _mm_loadu_si128((const __m128i*)src.m_pValue); + + v = _mm_castps_si128(_mm_and_ps(_mm_castsi128_ps(v), _mm_castsi128_ps(m_exec.m_mask))); + + return vint{ v }; + } + + // Linear ref to int16's + struct int16_lref + { + int16_t* m_pValue; + + private: + //int16_lref& operator=(const int16_lref&); + }; + + CPPSPMD_FORCE_INLINE const int16_lref& store(const int16_lref& dst, const vint& src) + { + CPPSPMD_ALIGN(16) int stored[4]; + _mm_store_si128((__m128i *)stored, src.m_value); + + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + for (int i = 0; i < 4; i++) + { + if (mask & (1 << i)) + dst.m_pValue[i] = static_cast(stored[i]); + } + return dst; + } + + CPPSPMD_FORCE_INLINE const int16_lref& store_all(const int16_lref& dst, const vint& src) + { + CPPSPMD_ALIGN(16) int stored[4]; + _mm_store_si128((__m128i *)stored, src.m_value); + + for (int i = 0; i < 4; i++) + dst.m_pValue[i] = static_cast(stored[i]); + return dst; + } + + CPPSPMD_FORCE_INLINE vint load(const int16_lref& src) + { + CPPSPMD_ALIGN(16) int values[4]; + + for (int i = 0; i < 4; i++) + values[i] = static_cast(src.m_pValue[i]); + + __m128i t = _mm_load_si128( (const __m128i *)values ); + + return vint{ _mm_castps_si128(_mm_and_ps(_mm_castsi128_ps( t ), _mm_castsi128_ps(m_exec.m_mask))) }; + } + + CPPSPMD_FORCE_INLINE vint load_all(const int16_lref& src) + { + CPPSPMD_ALIGN(16) int values[4]; + + for (int i = 0; i < 4; i++) + values[i] = static_cast(src.m_pValue[i]); + + __m128i t = _mm_load_si128( (const __m128i *)values ); + + return vint{ t }; + } + + // Linear ref to constant ints + struct cint_lref + { + const int* m_pValue; + + private: + //cint_lref& operator=(const cint_lref&); + }; + + CPPSPMD_FORCE_INLINE vint load(const cint_lref& src) + { + __m128i v = _mm_loadu_si128((const __m128i *)src.m_pValue); + v = _mm_castps_si128(_mm_and_ps(_mm_castsi128_ps(v), _mm_castsi128_ps(m_exec.m_mask))); + return vint{ v }; + } + + CPPSPMD_FORCE_INLINE vint load_all(const cint_lref& src) + { + return vint{ _mm_loadu_si128((const __m128i *)src.m_pValue) }; + } + + // Varying ref to ints + struct int_vref + { + __m128i m_vindex; + int* m_pValue; + + private: + //int_vref& operator=(const int_vref&); + }; + + // Varying ref to constant ints + struct cint_vref + { + __m128i m_vindex; + const int* m_pValue; + + private: + //cint_vref& operator=(const cint_vref&); + }; + + // Varying int + struct vint + { + __m128i m_value; + + vint() = default; + + CPPSPMD_FORCE_INLINE explicit vint(const __m128i& value) : m_value(value) { } + + CPPSPMD_FORCE_INLINE explicit vint(const lint &other) : m_value(other.m_value) { } + + CPPSPMD_FORCE_INLINE vint& operator=(const lint& other) { m_value = other.m_value; return *this; } + + CPPSPMD_FORCE_INLINE vint(int value) : m_value(_mm_set1_epi32(value)) { } + + CPPSPMD_FORCE_INLINE explicit vint(float value) : m_value(_mm_set1_epi32((int)value)) { } + + CPPSPMD_FORCE_INLINE explicit vint(const vfloat& other) : m_value(_mm_cvttps_epi32(other.m_value)) { } + + CPPSPMD_FORCE_INLINE explicit operator vbool() const + { + return vbool{ _mm_xor_si128( _mm_load_si128((const __m128i*)g_allones_128), _mm_cmpeq_epi32(m_value, _mm_setzero_si128())) }; + } + + CPPSPMD_FORCE_INLINE explicit operator vfloat() const + { + return vfloat{ _mm_cvtepi32_ps(m_value) }; + } + + CPPSPMD_FORCE_INLINE int_vref operator[](int* ptr) const + { + return int_vref{ m_value, ptr }; + } + + CPPSPMD_FORCE_INLINE cint_vref operator[](const int* ptr) const + { + return cint_vref{ m_value, ptr }; + } + + CPPSPMD_FORCE_INLINE float_vref operator[](float* ptr) const + { + return float_vref{ m_value, ptr }; + } + + CPPSPMD_FORCE_INLINE vfloat_vref operator[](vfloat* ptr) const + { + return vfloat_vref{ m_value, ptr }; + } + + CPPSPMD_FORCE_INLINE vint_vref operator[](vint* ptr) const + { + return vint_vref{ m_value, ptr }; + } + + private: + //vint& operator=(const vint&); + }; + + // Load/store linear int + CPPSPMD_FORCE_INLINE void storeu_linear(int *pDst, const vint& src) + { + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + if (mask == ALL_ON_MOVEMASK) + _mm_storeu_si128((__m128i *)pDst, src.m_value); + else + { + if (mask & 1) pDst[0] = extract_x(src.m_value); + if (mask & 2) pDst[1] = extract_y(src.m_value); + if (mask & 4) pDst[2] = extract_z(src.m_value); + if (mask & 8) pDst[3] = extract_w(src.m_value); + } + } + + CPPSPMD_FORCE_INLINE void storeu_linear_all(int *pDst, const vint& src) + { + _mm_storeu_si128((__m128i*)pDst, src.m_value); + } + + CPPSPMD_FORCE_INLINE void store_linear_all(int *pDst, const vint& src) + { + _mm_store_si128((__m128i*)pDst, src.m_value); + } + + CPPSPMD_FORCE_INLINE vint loadu_linear(const int *pSrc) + { + __m128i v = _mm_loadu_si128((const __m128i*)pSrc); + + v = _mm_castps_si128(_mm_and_ps(_mm_castsi128_ps(v), _mm_castsi128_ps(m_exec.m_mask))); + + return vint{ v }; + } + + CPPSPMD_FORCE_INLINE vint loadu_linear_all(const int *pSrc) + { + return vint{ _mm_loadu_si128((__m128i*)pSrc) }; + } + + CPPSPMD_FORCE_INLINE vint load_linear_all(const int *pSrc) + { + return vint{ _mm_load_si128((__m128i*)pSrc) }; + } + + // Load/store linear float + CPPSPMD_FORCE_INLINE void storeu_linear(float *pDst, const vfloat& src) + { + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + if (mask == ALL_ON_MOVEMASK) + _mm_storeu_ps((float*)pDst, src.m_value); + else + { + int *pDstI = (int *)pDst; + if (mask & 1) pDstI[0] = extract_ps_x(src.m_value); + if (mask & 2) pDstI[1] = extract_ps_y(src.m_value); + if (mask & 4) pDstI[2] = extract_ps_z(src.m_value); + if (mask & 8) pDstI[3] = extract_ps_w(src.m_value); + } + } + + CPPSPMD_FORCE_INLINE void storeu_linear_all(float *pDst, const vfloat& src) + { + _mm_storeu_ps((float*)pDst, src.m_value); + } + + CPPSPMD_FORCE_INLINE void store_linear_all(float *pDst, const vfloat& src) + { + _mm_store_ps((float*)pDst, src.m_value); + } + + CPPSPMD_FORCE_INLINE vfloat loadu_linear(const float *pSrc) + { + __m128 v = _mm_loadu_ps((const float*)pSrc); + + v = _mm_and_ps(v, _mm_castsi128_ps(m_exec.m_mask)); + + return vfloat{ v }; + } + + CPPSPMD_FORCE_INLINE vfloat loadu_linear_all(const float *pSrc) + { + return vfloat{ _mm_loadu_ps((float*)pSrc) }; + } + + CPPSPMD_FORCE_INLINE vfloat load_linear_all(const float *pSrc) + { + return vfloat{ _mm_load_ps((float*)pSrc) }; + } + + CPPSPMD_FORCE_INLINE vint& store(vint& dst, const vint& src) + { + dst.m_value = blendv_mask_epi32(dst.m_value, src.m_value, m_exec.m_mask); + return dst; + } + + CPPSPMD_FORCE_INLINE const int_vref& store(const int_vref& dst, const vint& src) + { + CPPSPMD_ALIGN(16) int vindex[4]; + _mm_store_si128((__m128i*)vindex, dst.m_vindex); + + CPPSPMD_ALIGN(16) int stored[4]; + _mm_store_si128((__m128i*)stored, src.m_value); + + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + for (int i = 0; i < 4; i++) + { + if (mask & (1 << i)) + dst.m_pValue[vindex[i]] = stored[i]; + } + return dst; + } + + CPPSPMD_FORCE_INLINE vint& store_all(vint& dst, const vint& src) + { + dst.m_value = src.m_value; + return dst; + } + + CPPSPMD_FORCE_INLINE const int_vref& store_all(const int_vref& dst, const vint& src) + { + CPPSPMD_ALIGN(16) int vindex[4]; + _mm_store_si128((__m128i*)vindex, dst.m_vindex); + + CPPSPMD_ALIGN(16) int stored[4]; + _mm_store_si128((__m128i*)stored, src.m_value); + + for (int i = 0; i < 4; i++) + dst.m_pValue[vindex[i]] = stored[i]; + + return dst; + } + + CPPSPMD_FORCE_INLINE vint load(const int_vref& src) + { + CPPSPMD_ALIGN(16) int values[4]; + + CPPSPMD_ALIGN(16) int indices[4]; + _mm_store_si128((__m128i *)indices, src.m_vindex); + + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + for (int i = 0; i < 4; i++) + { + if (mask & (1 << i)) + values[i] = src.m_pValue[indices[i]]; + } + + return vint{ _mm_castps_si128(_mm_and_ps(_mm_castsi128_ps(m_exec.m_mask), _mm_load_ps((const float*)values))) }; + } + + CPPSPMD_FORCE_INLINE vint load_all(const int_vref& src) + { + CPPSPMD_ALIGN(16) int values[4]; + + CPPSPMD_ALIGN(16) int indices[4]; + _mm_store_si128((__m128i *)indices, src.m_vindex); + + for (int i = 0; i < 4; i++) + values[i] = src.m_pValue[indices[i]]; + + return vint{ _mm_castps_si128( _mm_load_ps((const float*)values)) }; + } + + CPPSPMD_FORCE_INLINE vint load(const cint_vref& src) + { + CPPSPMD_ALIGN(16) int values[4]; + + CPPSPMD_ALIGN(16) int indices[4]; + _mm_store_si128((__m128i *)indices, src.m_vindex); + + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + for (int i = 0; i < 4; i++) + { + if (mask & (1 << i)) + values[i] = src.m_pValue[indices[i]]; + } + + return vint{ _mm_castps_si128(_mm_and_ps(_mm_castsi128_ps(m_exec.m_mask), _mm_load_ps((const float*)values))) }; + } + + CPPSPMD_FORCE_INLINE vint load_all(const cint_vref& src) + { + CPPSPMD_ALIGN(16) int values[4]; + + CPPSPMD_ALIGN(16) int indices[4]; + _mm_store_si128((__m128i *)indices, src.m_vindex); + + for (int i = 0; i < 4; i++) + values[i] = src.m_pValue[indices[i]]; + + return vint{ _mm_castps_si128( _mm_load_ps((const float*)values)) }; + } + + CPPSPMD_FORCE_INLINE vint load_bytes_all(const cint_vref& src) + { + __m128i v0_l; + + const uint8_t* pSrc = (const uint8_t*)src.m_pValue; + v0_l = insert_x(_mm_undefined_si128(), ((int*)(pSrc + extract_x(src.m_vindex)))[0]); + v0_l = insert_y(v0_l, ((int*)(pSrc + extract_y(src.m_vindex)))[0]); + v0_l = insert_z(v0_l, ((int*)(pSrc + extract_z(src.m_vindex)))[0]); + v0_l = insert_w(v0_l, ((int*)(pSrc + extract_w(src.m_vindex)))[0]); + + return vint{ v0_l }; + } + + CPPSPMD_FORCE_INLINE vint load_words_all(const cint_vref& src) + { + __m128i v0_l; + + const uint8_t* pSrc = (const uint8_t*)src.m_pValue; + v0_l = insert_x(_mm_undefined_si128(), ((int16_t*)(pSrc + 2 * extract_x(src.m_vindex)))[0]); + v0_l = insert_y(v0_l, ((int16_t*)(pSrc + 2 * extract_y(src.m_vindex)))[0]); + v0_l = insert_z(v0_l, ((int16_t*)(pSrc + 2 * extract_z(src.m_vindex)))[0]); + v0_l = insert_w(v0_l, ((int16_t*)(pSrc + 2 * extract_w(src.m_vindex)))[0]); + + return vint{ v0_l }; + } + + CPPSPMD_FORCE_INLINE void store_strided(int *pDst, uint32_t stride, const vint &v) + { + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + + if (mask & 1) pDst[0] = extract_x(v.m_value); + if (mask & 2) pDst[stride] = extract_y(v.m_value); + if (mask & 4) pDst[stride*2] = extract_z(v.m_value); + if (mask & 8) pDst[stride*3] = extract_w(v.m_value); + } + + CPPSPMD_FORCE_INLINE void store_strided(float *pDstF, uint32_t stride, const vfloat &v) + { + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + + if (mask & 1) ((int *)pDstF)[0] = extract_ps_x(v.m_value); + if (mask & 2) ((int *)pDstF)[stride] = extract_ps_y(v.m_value); + if (mask & 4) ((int *)pDstF)[stride*2] = extract_ps_z(v.m_value); + if (mask & 8) ((int *)pDstF)[stride*3] = extract_ps_w(v.m_value); + } + + CPPSPMD_FORCE_INLINE void store_all_strided(int *pDst, uint32_t stride, const vint &v) + { + pDst[0] = extract_x(v.m_value); + pDst[stride] = extract_y(v.m_value); + pDst[stride*2] = extract_z(v.m_value); + pDst[stride*3] = extract_w(v.m_value); + } + + CPPSPMD_FORCE_INLINE void store_all_strided(float *pDstF, uint32_t stride, const vfloat &v) + { + ((int *)pDstF)[0] = extract_ps_x(v.m_value); + ((int *)pDstF)[stride] = extract_ps_y(v.m_value); + ((int *)pDstF)[stride*2] = extract_ps_z(v.m_value); + ((int *)pDstF)[stride*3] = extract_ps_w(v.m_value); + } + + CPPSPMD_FORCE_INLINE vint load_strided(const int *pSrc, uint32_t stride) + { + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + +#if CPPSPMD_SSE2 + CPPSPMD_ALIGN(16) int vals[4] = { 0, 0, 0, 0 }; + if (mask & 1) vals[0] = pSrc[0]; + if (mask & 2) vals[1] = pSrc[stride]; + if (mask & 4) vals[2] = pSrc[stride * 2]; + if (mask & 8) vals[3] = pSrc[stride * 3]; + return vint{ _mm_load_si128((__m128i*)vals) }; +#else + const float* pSrcF = (const float*)pSrc; + __m128 v = _mm_setzero_ps(); + if (mask & 1) v = _mm_load_ss(pSrcF); + if (mask & 2) v = _mm_insert_ps(v, _mm_load_ss(pSrcF + stride), 0x10); + if (mask & 4) v = _mm_insert_ps(v, _mm_load_ss(pSrcF + 2 * stride), 0x20); + if (mask & 8) v = _mm_insert_ps(v, _mm_load_ss(pSrcF + 3 * stride), 0x30); + return vint{ _mm_castps_si128(v) }; +#endif + } + + CPPSPMD_FORCE_INLINE vfloat load_strided(const float *pSrc, uint32_t stride) + { + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + +#if CPPSPMD_SSE2 + CPPSPMD_ALIGN(16) float vals[4] = { 0, 0, 0, 0 }; + if (mask & 1) vals[0] = pSrc[0]; + if (mask & 2) vals[1] = pSrc[stride]; + if (mask & 4) vals[2] = pSrc[stride * 2]; + if (mask & 8) vals[3] = pSrc[stride * 3]; + return vfloat{ _mm_load_ps(vals) }; +#else + __m128 v = _mm_setzero_ps(); + if (mask & 1) v = _mm_load_ss(pSrc); + if (mask & 2) v = _mm_insert_ps(v, _mm_load_ss(pSrc + stride), 0x10); + if (mask & 4) v = _mm_insert_ps(v, _mm_load_ss(pSrc + 2 * stride), 0x20); + if (mask & 8) v = _mm_insert_ps(v, _mm_load_ss(pSrc + 3 * stride), 0x30); + return vfloat{ v }; +#endif + } + + CPPSPMD_FORCE_INLINE vint load_all_strided(const int *pSrc, uint32_t stride) + { +#if CPPSPMD_SSE2 + CPPSPMD_ALIGN(16) int vals[4]; + vals[0] = pSrc[0]; + vals[1] = pSrc[stride]; + vals[2] = pSrc[stride * 2]; + vals[3] = pSrc[stride * 3]; + return vint{ _mm_load_si128((__m128i*)vals) }; +#else + const float* pSrcF = (const float*)pSrc; + __m128 v = _mm_load_ss(pSrcF); + v = _mm_insert_ps(v, _mm_load_ss(pSrcF + stride), 0x10); + v = _mm_insert_ps(v, _mm_load_ss(pSrcF + 2 * stride), 0x20); + v = _mm_insert_ps(v, _mm_load_ss(pSrcF + 3 * stride), 0x30); + return vint{ _mm_castps_si128(v) }; +#endif + } + + CPPSPMD_FORCE_INLINE vfloat load_all_strided(const float *pSrc, uint32_t stride) + { +#if CPPSPMD_SSE2 + CPPSPMD_ALIGN(16) float vals[4]; + vals[0] = pSrc[0]; + vals[1] = pSrc[stride]; + vals[2] = pSrc[stride * 2]; + vals[3] = pSrc[stride * 3]; + return vfloat{ _mm_load_ps(vals) }; +#else + __m128 v = _mm_load_ss(pSrc); + v = _mm_insert_ps(v, _mm_load_ss(pSrc + stride), 0x10); + v = _mm_insert_ps(v, _mm_load_ss(pSrc + 2 * stride), 0x20); + v = _mm_insert_ps(v, _mm_load_ss(pSrc + 3 * stride), 0x30); + return vfloat{ v }; +#endif + } + + CPPSPMD_FORCE_INLINE const vfloat_vref& store(const vfloat_vref& dst, const vfloat& src) + { + // TODO: There's surely a better way + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + + if (mask & 1) ((int *)(&dst.m_pValue[extract_x(dst.m_vindex)]))[0] = extract_x(_mm_castps_si128(src.m_value)); + if (mask & 2) ((int *)(&dst.m_pValue[extract_y(dst.m_vindex)]))[1] = extract_y(_mm_castps_si128(src.m_value)); + if (mask & 4) ((int *)(&dst.m_pValue[extract_z(dst.m_vindex)]))[2] = extract_z(_mm_castps_si128(src.m_value)); + if (mask & 8) ((int *)(&dst.m_pValue[extract_w(dst.m_vindex)]))[3] = extract_w(_mm_castps_si128(src.m_value)); + + return dst; + } + + CPPSPMD_FORCE_INLINE vfloat load(const vfloat_vref& src) + { + // TODO: There's surely a better way + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + + __m128i k = _mm_setzero_si128(); + + if (mask & 1) k = insert_x(k, ((int *)(&src.m_pValue[extract_x(src.m_vindex)]))[0]); + if (mask & 2) k = insert_y(k, ((int *)(&src.m_pValue[extract_y(src.m_vindex)]))[1]); + if (mask & 4) k = insert_z(k, ((int *)(&src.m_pValue[extract_z(src.m_vindex)]))[2]); + if (mask & 8) k = insert_w(k, ((int *)(&src.m_pValue[extract_w(src.m_vindex)]))[3]); + + return vfloat{ _mm_castsi128_ps(k) }; + } + + CPPSPMD_FORCE_INLINE const vint_vref& store(const vint_vref& dst, const vint& src) + { + // TODO: There's surely a better way + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + + if (mask & 1) ((int *)(&dst.m_pValue[extract_x(dst.m_vindex)]))[0] = extract_x(src.m_value); + if (mask & 2) ((int *)(&dst.m_pValue[extract_y(dst.m_vindex)]))[1] = extract_y(src.m_value); + if (mask & 4) ((int *)(&dst.m_pValue[extract_z(dst.m_vindex)]))[2] = extract_z(src.m_value); + if (mask & 8) ((int *)(&dst.m_pValue[extract_w(dst.m_vindex)]))[3] = extract_w(src.m_value); + + return dst; + } + + CPPSPMD_FORCE_INLINE vint load(const vint_vref& src) + { + // TODO: There's surely a better way + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + + __m128i k = _mm_setzero_si128(); + + if (mask & 1) k = insert_x(k, ((int *)(&src.m_pValue[extract_x(src.m_vindex)]))[0]); + if (mask & 2) k = insert_y(k, ((int *)(&src.m_pValue[extract_y(src.m_vindex)]))[1]); + if (mask & 4) k = insert_z(k, ((int *)(&src.m_pValue[extract_z(src.m_vindex)]))[2]); + if (mask & 8) k = insert_w(k, ((int *)(&src.m_pValue[extract_w(src.m_vindex)]))[3]); + + return vint{ k }; + } + + CPPSPMD_FORCE_INLINE vint load_all(const vint_vref& src) + { + // TODO: There's surely a better way + __m128i k = _mm_setzero_si128(); + + k = insert_x(k, ((int*)(&src.m_pValue[extract_x(src.m_vindex)]))[0]); + k = insert_y(k, ((int*)(&src.m_pValue[extract_y(src.m_vindex)]))[1]); + k = insert_z(k, ((int*)(&src.m_pValue[extract_z(src.m_vindex)]))[2]); + k = insert_w(k, ((int*)(&src.m_pValue[extract_w(src.m_vindex)]))[3]); + + return vint{ k }; + } + + // Linear integer + struct lint + { + __m128i m_value; + + CPPSPMD_FORCE_INLINE explicit lint(__m128i value) + : m_value(value) + { } + + CPPSPMD_FORCE_INLINE explicit operator vfloat() const + { + return vfloat{ _mm_cvtepi32_ps(m_value) }; + } + + CPPSPMD_FORCE_INLINE explicit operator vint() const + { + return vint{ m_value }; + } + + CPPSPMD_FORCE_INLINE int get_first_value() const + { + return _mm_cvtsi128_si32(m_value); + } + + CPPSPMD_FORCE_INLINE float_lref operator[](float* ptr) const + { + return float_lref{ ptr + get_first_value() }; + } + + CPPSPMD_FORCE_INLINE int_lref operator[](int* ptr) const + { + return int_lref{ ptr + get_first_value() }; + } + + CPPSPMD_FORCE_INLINE int16_lref operator[](int16_t* ptr) const + { + return int16_lref{ ptr + get_first_value() }; + } + + CPPSPMD_FORCE_INLINE cint_lref operator[](const int* ptr) const + { + return cint_lref{ ptr + get_first_value() }; + } + + private: + //lint& operator=(const lint&); + }; + + CPPSPMD_FORCE_INLINE lint& store_all(lint& dst, const lint& src) + { + dst.m_value = src.m_value; + return dst; + } + + const lint program_index = lint{ _mm_set_epi32( 3, 2, 1, 0 ) }; + + // SPMD condition helpers + + template + CPPSPMD_FORCE_INLINE void spmd_if(const vbool& cond, const IfBody& ifBody); + + CPPSPMD_FORCE_INLINE void spmd_if_break(const vbool& cond); + + // No breaks, continues, etc. allowed + template + CPPSPMD_FORCE_INLINE void spmd_sif(const vbool& cond, const IfBody& ifBody); + + // No breaks, continues, etc. allowed + template + CPPSPMD_FORCE_INLINE void spmd_sifelse(const vbool& cond, const IfBody& ifBody, const ElseBody &elseBody); + + template + CPPSPMD_FORCE_INLINE void spmd_ifelse(const vbool& cond, const IfBody& ifBody, const ElseBody& elseBody); + + template + CPPSPMD_FORCE_INLINE void spmd_while(const WhileCondBody& whileCondBody, const WhileBody& whileBody); + + template + CPPSPMD_FORCE_INLINE void spmd_for(const ForInitBody& forInitBody, const ForCondBody& forCondBody, const ForIncrBody& forIncrBody, const ForBody& forBody); + + template + CPPSPMD_FORCE_INLINE void spmd_foreach(int begin, int end, const ForeachBody& foreachBody); + +#ifdef _DEBUG + CPPSPMD_FORCE_INLINE void check_masks(); +#else + CPPSPMD_FORCE_INLINE void check_masks() { } +#endif + + CPPSPMD_FORCE_INLINE void spmd_break(); + CPPSPMD_FORCE_INLINE void spmd_continue(); + + CPPSPMD_FORCE_INLINE void spmd_return(); + + template + CPPSPMD_FORCE_INLINE void spmd_unmasked(const UnmaskedBody& unmaskedBody); + + template + //CPPSPMD_FORCE_INLINE decltype(auto) spmd_call(Args&&... args); + CPPSPMD_FORCE_INLINE void spmd_call(Args&&... args); + + CPPSPMD_FORCE_INLINE void swap(vint &a, vint &b) { vint temp = a; store(a, b); store(b, temp); } + CPPSPMD_FORCE_INLINE void swap(vfloat &a, vfloat &b) { vfloat temp = a; store(a, b); store(b, temp); } + CPPSPMD_FORCE_INLINE void swap(vbool &a, vbool &b) { vbool temp = a; store(a, b); store(b, temp); } + + CPPSPMD_FORCE_INLINE float reduce_add(vfloat v) + { + __m128 k3210 = _mm_castsi128_ps(blendv_mask_epi32(_mm_setzero_si128(), _mm_castps_si128(v.m_value), m_exec.m_mask)); + __m128 temp = _mm_add_ps(_mm_shuffle_ps(k3210, k3210, _MM_SHUFFLE(0, 1, 2, 3)), k3210); + return _mm_cvtss_f32(_mm_add_ss(_mm_movehl_ps(temp, temp), temp)); + } + + CPPSPMD_FORCE_INLINE int reduce_add(vint v) + { + __m128i k3210 = blendv_mask_epi32(_mm_setzero_si128(), v.m_value, m_exec.m_mask); + __m128i temp = _mm_add_epi32(_mm_shuffle_epi32(k3210, _MM_SHUFFLE(0, 1, 2, 3)), k3210); + return extract_x(_mm_add_epi32(_mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(temp), _mm_castsi128_ps(temp))), temp)); + } + + #include "cppspmd_math_declares.h" + +}; // struct spmd_kernel + +using exec_mask = spmd_kernel::exec_mask; +using vint = spmd_kernel::vint; +using int_lref = spmd_kernel::int_lref; +using cint_vref = spmd_kernel::cint_vref; +using cint_lref = spmd_kernel::cint_lref; +using int_vref = spmd_kernel::int_vref; +using lint = spmd_kernel::lint; +using vbool = spmd_kernel::vbool; +using vfloat = spmd_kernel::vfloat; +using float_lref = spmd_kernel::float_lref; +using float_vref = spmd_kernel::float_vref; +using vfloat_vref = spmd_kernel::vfloat_vref; +using vint_vref = spmd_kernel::vint_vref; + +CPPSPMD_FORCE_INLINE spmd_kernel::vbool::operator vfloat() const +{ + return vfloat { _mm_and_ps( _mm_castsi128_ps(m_value), *(const __m128 *)g_onef_128 ) }; +} + +// Returns UINT32_MAX's for true, 0 for false. (Should it return 1's?) +CPPSPMD_FORCE_INLINE spmd_kernel::vbool::operator vint() const +{ + return vint { m_value }; +} + +CPPSPMD_FORCE_INLINE vbool operator!(const vbool& v) +{ + return vbool{ _mm_castps_si128(_mm_xor_ps(_mm_load_ps((const float*)g_allones_128), _mm_castsi128_ps(v.m_value))) }; +} + +CPPSPMD_FORCE_INLINE exec_mask::exec_mask(const vbool& b) { m_mask = b.m_value; } + +CPPSPMD_FORCE_INLINE exec_mask operator^(const exec_mask& a, const exec_mask& b) { return exec_mask{ _mm_xor_si128(a.m_mask, b.m_mask) }; } +CPPSPMD_FORCE_INLINE exec_mask operator&(const exec_mask& a, const exec_mask& b) { return exec_mask{ _mm_and_si128(a.m_mask, b.m_mask) }; } +CPPSPMD_FORCE_INLINE exec_mask operator|(const exec_mask& a, const exec_mask& b) { return exec_mask{ _mm_or_si128(a.m_mask, b.m_mask) }; } + +CPPSPMD_FORCE_INLINE bool all(const exec_mask& e) { return _mm_movemask_ps(_mm_castsi128_ps(e.m_mask)) == ALL_ON_MOVEMASK; } +CPPSPMD_FORCE_INLINE bool any(const exec_mask& e) { return _mm_movemask_ps(_mm_castsi128_ps(e.m_mask)) != 0; } + +// Bad pattern - doesn't factor in the current exec mask. Prefer spmd_any() instead. +CPPSPMD_FORCE_INLINE bool all(const vbool& e) { return _mm_movemask_ps(_mm_castsi128_ps(e.m_value)) == ALL_ON_MOVEMASK; } +CPPSPMD_FORCE_INLINE bool any(const vbool& e) { return _mm_movemask_ps(_mm_castsi128_ps(e.m_value)) != 0; } + +CPPSPMD_FORCE_INLINE exec_mask andnot(const exec_mask& a, const exec_mask& b) { return exec_mask{ _mm_andnot_si128(a.m_mask, b.m_mask) }; } +CPPSPMD_FORCE_INLINE vbool operator||(const vbool& a, const vbool& b) { return vbool{ _mm_or_si128(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vbool operator&&(const vbool& a, const vbool& b) { return vbool{ _mm_and_si128(a.m_value, b.m_value) }; } + +CPPSPMD_FORCE_INLINE vfloat operator+(const vfloat& a, const vfloat& b) { return vfloat{ _mm_add_ps(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vfloat operator-(const vfloat& a, const vfloat& b) { return vfloat{ _mm_sub_ps(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vfloat operator+(float a, const vfloat& b) { return vfloat(a) + b; } +CPPSPMD_FORCE_INLINE vfloat operator+(const vfloat& a, float b) { return a + vfloat(b); } +CPPSPMD_FORCE_INLINE vfloat operator-(const vfloat& a, const vint& b) { return a - vfloat(b); } +CPPSPMD_FORCE_INLINE vfloat operator-(const vint& a, const vfloat& b) { return vfloat(a) - b; } +CPPSPMD_FORCE_INLINE vfloat operator-(const vfloat& a, int b) { return a - vfloat(b); } +CPPSPMD_FORCE_INLINE vfloat operator-(int a, const vfloat& b) { return vfloat(a) - b; } +CPPSPMD_FORCE_INLINE vfloat operator-(const vfloat& a, float b) { return a - vfloat(b); } +CPPSPMD_FORCE_INLINE vfloat operator-(float a, const vfloat& b) { return vfloat(a) - b; } + +CPPSPMD_FORCE_INLINE vfloat operator*(const vfloat& a, const vfloat& b) { return vfloat{ _mm_mul_ps(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vfloat operator*(const vfloat& a, float b) { return a * vfloat(b); } +CPPSPMD_FORCE_INLINE vfloat operator*(float a, const vfloat& b) { return vfloat(a) * b; } +CPPSPMD_FORCE_INLINE vfloat operator*(const vfloat& a, int b) { return a * vfloat(b); } +CPPSPMD_FORCE_INLINE vfloat operator*(int a, const vfloat& b) { return vfloat(a) * b; } + +CPPSPMD_FORCE_INLINE vfloat operator/(const vfloat& a, const vfloat& b) { return vfloat{ _mm_div_ps(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vfloat operator/(const vfloat& a, int b) { return a / vfloat(b); } +CPPSPMD_FORCE_INLINE vfloat operator/(int a, const vfloat& b) { return vfloat(a) / b; } +CPPSPMD_FORCE_INLINE vfloat operator/(const vfloat& a, float b) { return a / vfloat(b); } +CPPSPMD_FORCE_INLINE vfloat operator/(float a, const vfloat& b) { return vfloat(a) / b; } +CPPSPMD_FORCE_INLINE vfloat operator-(const vfloat& v) { return vfloat{ _mm_sub_ps(_mm_xor_ps(v.m_value, v.m_value), v.m_value) }; } + +CPPSPMD_FORCE_INLINE vbool operator==(const vfloat& a, const vfloat& b) { return vbool{ _mm_castps_si128(_mm_cmpeq_ps(a.m_value, b.m_value)) }; } +CPPSPMD_FORCE_INLINE vbool operator==(const vfloat& a, float b) { return a == vfloat(b); } + +CPPSPMD_FORCE_INLINE vbool operator!=(const vfloat& a, const vfloat& b) { return !vbool{ _mm_castps_si128(_mm_cmpeq_ps(a.m_value, b.m_value)) }; } +CPPSPMD_FORCE_INLINE vbool operator!=(const vfloat& a, float b) { return a != vfloat(b); } + +CPPSPMD_FORCE_INLINE vbool operator<(const vfloat& a, const vfloat& b) { return vbool{ _mm_castps_si128(_mm_cmplt_ps(a.m_value, b.m_value)) }; } +CPPSPMD_FORCE_INLINE vbool operator<(const vfloat& a, float b) { return a < vfloat(b); } + +CPPSPMD_FORCE_INLINE vbool operator>(const vfloat& a, const vfloat& b) { return vbool{ _mm_castps_si128(_mm_cmpgt_ps(a.m_value, b.m_value)) }; } +CPPSPMD_FORCE_INLINE vbool operator>(const vfloat& a, float b) { return a > vfloat(b); } + +CPPSPMD_FORCE_INLINE vbool operator<=(const vfloat& a, const vfloat& b) { return vbool{ _mm_castps_si128(_mm_cmple_ps(a.m_value, b.m_value)) }; } +CPPSPMD_FORCE_INLINE vbool operator<=(const vfloat& a, float b) { return a <= vfloat(b); } + +CPPSPMD_FORCE_INLINE vbool operator>=(const vfloat& a, const vfloat& b) { return vbool{ _mm_castps_si128(_mm_cmpge_ps(a.m_value, b.m_value)) }; } +CPPSPMD_FORCE_INLINE vbool operator>=(const vfloat& a, float b) { return a >= vfloat(b); } + +CPPSPMD_FORCE_INLINE vfloat spmd_ternaryf(const vbool& cond, const vfloat& a, const vfloat& b) { return vfloat{ blendv_mask_ps(b.m_value, a.m_value, _mm_castsi128_ps(cond.m_value)) }; } +CPPSPMD_FORCE_INLINE vint spmd_ternaryi(const vbool& cond, const vint& a, const vint& b) { return vint{ blendv_mask_epi32(b.m_value, a.m_value, cond.m_value) }; } + +CPPSPMD_FORCE_INLINE vfloat sqrt(const vfloat& v) { return vfloat{ _mm_sqrt_ps(v.m_value) }; } +CPPSPMD_FORCE_INLINE vfloat abs(const vfloat& v) { return vfloat{ _mm_andnot_ps(_mm_set1_ps(-0.0f), v.m_value) }; } +CPPSPMD_FORCE_INLINE vfloat max(const vfloat& a, const vfloat& b) { return vfloat{ _mm_max_ps(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vfloat min(const vfloat& a, const vfloat& b) { return vfloat{ _mm_min_ps(a.m_value, b.m_value) }; } + +#if CPPSPMD_SSE2 +CPPSPMD_FORCE_INLINE vfloat round_truncate(const vfloat& a) +{ + __m128i abs_a = _mm_and_si128(_mm_castps_si128(a.m_value), _mm_set1_epi32(0x7FFFFFFFU) ); + __m128i has_fractional = _mm_cmplt_epi32(abs_a, _mm_castps_si128(_mm_set1_ps(8388608.0f))); + + __m128i ai = _mm_cvttps_epi32(a.m_value); + + __m128 af = _mm_cvtepi32_ps(ai); + return vfloat{ blendv_mask_ps(a.m_value, af, _mm_castsi128_ps(has_fractional)) }; +} + +CPPSPMD_FORCE_INLINE vfloat floor(const vfloat& a) +{ + __m128i abs_a = _mm_and_si128(_mm_castps_si128(a.m_value), _mm_set1_epi32(0x7FFFFFFFU)); + __m128i has_fractional = _mm_cmplt_epi32(abs_a, _mm_castps_si128(_mm_set1_ps(8388608.0f))); + + __m128i ai = _mm_cvtps_epi32(a.m_value); + __m128 af = _mm_cvtepi32_ps(ai); + __m128 changed = _mm_cvtepi32_ps(_mm_castps_si128(_mm_cmpgt_ps(af, a.m_value))); + + af = _mm_add_ps(af, changed); + + return vfloat{ blendv_mask_ps(a.m_value, af, _mm_castsi128_ps(has_fractional)) }; +} + +CPPSPMD_FORCE_INLINE vfloat ceil(const vfloat& a) +{ + __m128i abs_a = _mm_and_si128(_mm_castps_si128(a.m_value), _mm_set1_epi32(0x7FFFFFFFU)); + __m128i has_fractional = _mm_cmplt_epi32(abs_a, _mm_castps_si128(_mm_set1_ps(8388608.0f))); + + __m128i ai = _mm_cvtps_epi32(a.m_value); + __m128 af = _mm_cvtepi32_ps(ai); + __m128 changed = _mm_cvtepi32_ps(_mm_castps_si128(_mm_cmplt_ps(af, a.m_value))); + + af = _mm_sub_ps(af, changed); + + return vfloat{ blendv_mask_ps(a.m_value, af, _mm_castsi128_ps(has_fractional)) }; +} + +// We need to disable unsafe math optimizations for the key operations used for rounding to nearest. +// I wish there was a better way. +#if defined(__GNUC__) && !defined(__INTEL_COMPILER) && !defined(__clang__) +inline __m128 add_sub(__m128 a, __m128 b) __attribute__((optimize("-fno-unsafe-math-optimizations"))) +#elif defined(__clang__) +inline __m128 add_sub(__m128 a, __m128 b) __attribute__((optnone)) +#elif defined (_MSC_VER) +#pragma float_control(push) +#pragma float_control(precise, on) +inline __m128 add_sub(__m128 a, __m128 b) +#else +inline __m128 add_sub(__m128 a, __m128 b) +#endif +{ + return _mm_sub_ps(_mm_add_ps(a, b), b); +} + +#if defined (_MSC_VER) +#pragma float_control(pop) +#endif + +CPPSPMD_FORCE_INLINE vfloat round_nearest(const vfloat& a) +{ + __m128i no_fract_fp_bits = _mm_castps_si128(_mm_set1_ps(8388608.0f)); + + __m128i sign_a = _mm_and_si128(_mm_castps_si128(a.m_value), _mm_set1_epi32(0x80000000U)); + __m128 force_int = _mm_castsi128_ps(_mm_or_si128(no_fract_fp_bits, sign_a)); + + // Can't use individual _mm_add_ps/_mm_sub_ps - this will be optimized out with /fp:fast by clang and probably other compilers. + //__m128 temp1 = _mm_add_ps(a.m_value, force_int); + //__m128 temp2 = _mm_sub_ps(temp1, force_int); + __m128 temp2 = add_sub(a.m_value, force_int); + + __m128i abs_a = _mm_and_si128(_mm_castps_si128(a.m_value), _mm_set1_epi32(0x7FFFFFFFU)); + __m128i has_fractional = _mm_cmplt_epi32(abs_a, no_fract_fp_bits); + return vfloat{ blendv_mask_ps(a.m_value, temp2, _mm_castsi128_ps(has_fractional)) }; +} + +#else +CPPSPMD_FORCE_INLINE vfloat floor(const vfloat& v) { return vfloat{ _mm_floor_ps(v.m_value) }; } +CPPSPMD_FORCE_INLINE vfloat ceil(const vfloat& a) { return vfloat{ _mm_ceil_ps(a.m_value) }; } +CPPSPMD_FORCE_INLINE vfloat round_nearest(const vfloat &a) { return vfloat{ _mm_round_ps(a.m_value, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC ) }; } +CPPSPMD_FORCE_INLINE vfloat round_truncate(const vfloat &a) { return vfloat{ _mm_round_ps(a.m_value, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC ) }; } +#endif + +CPPSPMD_FORCE_INLINE vfloat frac(const vfloat& a) { return a - floor(a); } +CPPSPMD_FORCE_INLINE vfloat fmod(vfloat a, vfloat b) { vfloat c = frac(abs(a / b)) * abs(b); return spmd_ternaryf(a < 0, -c, c); } +CPPSPMD_FORCE_INLINE vfloat sign(const vfloat& a) { return spmd_ternaryf(a < 0.0f, 1.0f, 1.0f); } + +CPPSPMD_FORCE_INLINE vint max(const vint& a, const vint& b) { return vint{ max_epi32(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint min(const vint& a, const vint& b) { return vint{ min_epi32(a.m_value, b.m_value) }; } + +CPPSPMD_FORCE_INLINE vint maxu(const vint& a, const vint& b) { return vint{ max_epu32(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint minu(const vint& a, const vint& b) { return vint{ min_epu32(a.m_value, b.m_value) }; } + +CPPSPMD_FORCE_INLINE vint abs(const vint& v) { return vint{ abs_epi32(v.m_value) }; } + +CPPSPMD_FORCE_INLINE vint byteswap(const vint& v) { return vint{ shuffle_epi8(v.m_value, _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3)) }; } + +CPPSPMD_FORCE_INLINE vint cast_vfloat_to_vint(const vfloat& v) { return vint{ _mm_castps_si128(v.m_value) }; } +CPPSPMD_FORCE_INLINE vfloat cast_vint_to_vfloat(const vint& v) { return vfloat{ _mm_castsi128_ps(v.m_value) }; } + +CPPSPMD_FORCE_INLINE vfloat clamp(const vfloat& v, const vfloat& a, const vfloat& b) +{ + return vfloat{ _mm_min_ps(b.m_value, _mm_max_ps(v.m_value, a.m_value) ) }; +} + +CPPSPMD_FORCE_INLINE vint clamp(const vint& v, const vint& a, const vint& b) +{ + return vint{ min_epi32(b.m_value, max_epi32(v.m_value, a.m_value) ) }; +} + +CPPSPMD_FORCE_INLINE vfloat vfma(const vfloat& a, const vfloat& b, const vfloat& c) +{ + return vfloat{ _mm_add_ps(_mm_mul_ps(a.m_value, b.m_value), c.m_value) }; +} + +CPPSPMD_FORCE_INLINE vfloat vfms(const vfloat& a, const vfloat& b, const vfloat& c) +{ + return vfloat{ _mm_sub_ps(_mm_mul_ps(a.m_value, b.m_value), c.m_value) }; +} + +CPPSPMD_FORCE_INLINE vfloat vfnma(const vfloat& a, const vfloat& b, const vfloat& c) +{ + return vfloat{ _mm_sub_ps(c.m_value, _mm_mul_ps(a.m_value, b.m_value)) }; +} + +CPPSPMD_FORCE_INLINE vfloat vfnms(const vfloat& a, const vfloat& b, const vfloat& c) +{ + return vfloat{ _mm_sub_ps(_mm_sub_ps(_mm_xor_ps(a.m_value, a.m_value), _mm_mul_ps(a.m_value, b.m_value)), c.m_value) }; +} + +CPPSPMD_FORCE_INLINE vfloat lerp(const vfloat &x, const vfloat &y, const vfloat &s) { return vfma(y - x, s, x); } + +CPPSPMD_FORCE_INLINE lint operator+(int a, const lint& b) { return lint{ _mm_add_epi32(_mm_set1_epi32(a), b.m_value) }; } +CPPSPMD_FORCE_INLINE lint operator+(const lint& a, int b) { return lint{ _mm_add_epi32(a.m_value, _mm_set1_epi32(b)) }; } +CPPSPMD_FORCE_INLINE vfloat operator+(float a, const lint& b) { return vfloat(a) + vfloat(b); } +CPPSPMD_FORCE_INLINE vfloat operator+(const lint& a, float b) { return vfloat(a) + vfloat(b); } +CPPSPMD_FORCE_INLINE vfloat operator*(const lint& a, float b) { return vfloat(a) * vfloat(b); } +CPPSPMD_FORCE_INLINE vfloat operator*(float b, const lint& a) { return vfloat(a) * vfloat(b); } + +CPPSPMD_FORCE_INLINE vint operator&(const vint& a, const vint& b) { return vint{ _mm_and_si128(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint operator&(const vint& a, int b) { return a & vint(b); } +CPPSPMD_FORCE_INLINE vint andnot(const vint& a, const vint& b) { return vint{ _mm_andnot_si128(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint operator|(const vint& a, const vint& b) { return vint{ _mm_or_si128(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint operator|(const vint& a, int b) { return a | vint(b); } +CPPSPMD_FORCE_INLINE vint operator^(const vint& a, const vint& b) { return vint{ _mm_xor_si128(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint operator^(const vint& a, int b) { return a ^ vint(b); } +CPPSPMD_FORCE_INLINE vbool operator==(const vint& a, const vint& b) { return vbool{ _mm_cmpeq_epi32(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vbool operator!=(const vint& a, const vint& b) { return !vbool{ _mm_cmpeq_epi32(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vbool operator<(const vint& a, const vint& b) { return vbool{ _mm_cmpgt_epi32(b.m_value, a.m_value) }; } +CPPSPMD_FORCE_INLINE vbool operator<=(const vint& a, const vint& b) { return !vbool{ _mm_cmpgt_epi32(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vbool operator>=(const vint& a, const vint& b) { return !vbool{ _mm_cmpgt_epi32(b.m_value, a.m_value) }; } +CPPSPMD_FORCE_INLINE vbool operator>(const vint& a, const vint& b) { return vbool{ _mm_cmpgt_epi32(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint operator+(const vint& a, const vint& b) { return vint{ _mm_add_epi32(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint operator-(const vint& a, const vint& b) { return vint{ _mm_sub_epi32(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint operator+(const vint& a, int b) { return a + vint(b); } +CPPSPMD_FORCE_INLINE vint operator-(const vint& a, int b) { return a - vint(b); } +CPPSPMD_FORCE_INLINE vint operator+(int a, const vint& b) { return vint(a) + b; } +CPPSPMD_FORCE_INLINE vint operator-(int a, const vint& b) { return vint(a) - b; } +CPPSPMD_FORCE_INLINE vint operator*(const vint& a, const vint& b) { return vint{ mullo_epi32(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint operator*(const vint& a, int b) { return a * vint(b); } +CPPSPMD_FORCE_INLINE vint operator*(int a, const vint& b) { return vint(a) * b; } + +CPPSPMD_FORCE_INLINE vint mulhiu(const vint& a, const vint& b) { return vint{ mulhi_epu32(a.m_value, b.m_value) }; } + +CPPSPMD_FORCE_INLINE vint operator-(const vint& v) { return vint{ _mm_sub_epi32(_mm_setzero_si128(), v.m_value) }; } + +CPPSPMD_FORCE_INLINE vint operator~(const vint& a) { return vint{ -a - 1 }; } + +// A few of these break the lane-based abstraction model. They are supported in SSE2, so it makes sense to support them and let the user figure it out. +CPPSPMD_FORCE_INLINE vint adds_epu8(const vint& a, const vint& b) { return vint{ _mm_adds_epu8(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint subs_epu8(const vint& a, const vint& b) { return vint{ _mm_subs_epu8(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint avg_epu8(const vint & a, const vint & b) { return vint{ _mm_avg_epu8(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint max_epu8(const vint& a, const vint& b) { return vint{ _mm_max_epu8(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint min_epu8(const vint& a, const vint& b) { return vint{ _mm_min_epu8(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint sad_epu8(const vint& a, const vint& b) { return vint{ _mm_sad_epu8(a.m_value, b.m_value) }; } + +CPPSPMD_FORCE_INLINE vint add_epi8(const vint& a, const vint& b) { return vint{ _mm_add_epi8(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint adds_epi8(const vint& a, const vint& b) { return vint{ _mm_adds_epi8(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint sub_epi8(const vint& a, const vint& b) { return vint{ _mm_sub_epi8(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint subs_epi8(const vint& a, const vint& b) { return vint{ _mm_subs_epi8(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint cmpeq_epi8(const vint& a, const vint& b) { return vint{ _mm_cmpeq_epi8(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint cmpgt_epi8(const vint& a, const vint& b) { return vint{ _mm_cmpgt_epi8(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint cmplt_epi8(const vint& a, const vint& b) { return vint{ _mm_cmplt_epi8(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint unpacklo_epi8(const vint& a, const vint& b) { return vint{ _mm_unpacklo_epi8(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint unpackhi_epi8(const vint& a, const vint& b) { return vint{ _mm_unpackhi_epi8(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE int movemask_epi8(const vint& a) { return _mm_movemask_epi8(a.m_value); } +CPPSPMD_FORCE_INLINE int movemask_epi32(const vint& a) { return _mm_movemask_ps(_mm_castsi128_ps(a.m_value)); } + +CPPSPMD_FORCE_INLINE vint cmple_epu8(const vint& a, const vint& b) { return vint{ _mm_cmpeq_epi8(_mm_min_epu8(a.m_value, b.m_value), a.m_value) }; } +CPPSPMD_FORCE_INLINE vint cmpge_epu8(const vint& a, const vint& b) { return vint{ cmple_epu8(b, a) }; } +CPPSPMD_FORCE_INLINE vint cmpgt_epu8(const vint& a, const vint& b) { return vint{ _mm_andnot_si128(_mm_cmpeq_epi8(a.m_value, b.m_value), _mm_cmpeq_epi8(_mm_max_epu8(a.m_value, b.m_value), a.m_value)) }; } +CPPSPMD_FORCE_INLINE vint cmplt_epu8(const vint& a, const vint& b) { return vint{ cmpgt_epu8(b, a) }; } +CPPSPMD_FORCE_INLINE vint absdiff_epu8(const vint& a, const vint& b) { return vint{ _mm_or_si128(_mm_subs_epu8(a.m_value, b.m_value), _mm_subs_epu8(b.m_value, a.m_value)) }; } + +CPPSPMD_FORCE_INLINE vint blendv_epi8(const vint& a, const vint& b, const vint &mask) { return vint{ blendv_epi8(a.m_value, b.m_value, _mm_cmplt_epi8(mask.m_value, _mm_setzero_si128())) }; } +CPPSPMD_FORCE_INLINE vint blendv_epi32(const vint& a, const vint& b, const vint &mask) { return vint{ blendv_epi32(a.m_value, b.m_value, mask.m_value) }; } + +CPPSPMD_FORCE_INLINE vint add_epi16(const vint& a, const vint& b) { return vint{ _mm_add_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint adds_epi16(const vint& a, const vint& b) { return vint{ _mm_adds_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint adds_epu16(const vint& a, const vint& b) { return vint{ _mm_adds_epu16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint avg_epu16(const vint& a, const vint& b) { return vint{ _mm_avg_epu16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint sub_epi16(const vint& a, const vint& b) { return vint{ _mm_sub_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint subs_epi16(const vint& a, const vint& b) { return vint{ _mm_subs_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint subs_epu16(const vint& a, const vint& b) { return vint{ _mm_subs_epu16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint mullo_epi16(const vint& a, const vint& b) { return vint{ _mm_mullo_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint mulhi_epi16(const vint& a, const vint& b) { return vint{ _mm_mulhi_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint mulhi_epu16(const vint& a, const vint& b) { return vint{ _mm_mulhi_epu16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint min_epi16(const vint& a, const vint& b) { return vint{ _mm_min_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint max_epi16(const vint& a, const vint& b) { return vint{ _mm_max_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint madd_epi16(const vint& a, const vint& b) { return vint{ _mm_madd_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint cmpeq_epi16(const vint& a, const vint& b) { return vint{ _mm_cmpeq_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint cmpgt_epi16(const vint& a, const vint& b) { return vint{ _mm_cmpgt_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint cmplt_epi16(const vint& a, const vint& b) { return vint{ _mm_cmplt_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint packs_epi16(const vint& a, const vint& b) { return vint{ _mm_packs_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint packus_epi16(const vint& a, const vint& b) { return vint{ _mm_packus_epi16(a.m_value, b.m_value) }; } + +CPPSPMD_FORCE_INLINE vint uniform_shift_left_epi16(const vint& a, const vint& b) { return vint{ _mm_sll_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint uniform_arith_shift_right_epi16(const vint& a, const vint& b) { return vint{ _mm_sra_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint uniform_shift_right_epi16(const vint& a, const vint& b) { return vint{ _mm_srl_epi16(a.m_value, b.m_value) }; } + +#define VINT_SHIFT_LEFT_EPI16(a, b) vint(_mm_slli_epi16((a).m_value, b)) +#define VINT_SHIFT_RIGHT_EPI16(a, b) vint(_mm_srai_epi16((a).m_value, b)) +#define VUINT_SHIFT_RIGHT_EPI16(a, b) vint(_mm_srli_epi16((a).m_value, b)) + +CPPSPMD_FORCE_INLINE vint undefined_vint() { return vint{ _mm_undefined_si128() }; } +CPPSPMD_FORCE_INLINE vfloat undefined_vfloat() { return vfloat{ _mm_undefined_ps() }; } + +CPPSPMD_FORCE_INLINE vint zero_vint() { return vint{ _mm_setzero_si128() }; } +CPPSPMD_FORCE_INLINE vfloat zero_vfloat() { return vfloat{ _mm_setzero_ps() }; } + +CPPSPMD_FORCE_INLINE vint vint_lane_set(int v0, int v1, int v2, int v3) { return vint{ _mm_set_epi32(v3, v2, v1, v0) }; } +CPPSPMD_FORCE_INLINE vfloat vfloat_lane_set(float v0, float v1, float v2, float v3) { return vfloat{ _mm_set_ps(v3, v2, v1, v0) }; } +CPPSPMD_FORCE_INLINE vint vint_lane_set_r(int v3, int v2, int v1, int v0) { return vint{ _mm_set_epi32(v3, v2, v1, v0) }; } +CPPSPMD_FORCE_INLINE vfloat vfloat_lane_set_r(float v3, float v2, float v1, float v0) { return vfloat{ _mm_set_ps(v3, v2, v1, v0) }; } +// control is an 8-bit immediate value containing 4 2-bit indices which shuffles the int32's in each 128-bit lane. +#define VINT_LANE_SHUFFLE_EPI32(a, control) vint(_mm_shuffle_epi32((a).m_value, control)) +#define VFLOAT_LANE_SHUFFLE_PS(a, b, control) vfloat(_mm_shuffle_ps((a).m_value, (b).m_value, control)) + +// control is an 8-bit immediate value containing 4 2-bit indices which shuffles the int16's in either the high or low 64-bit lane. +#define VINT_LANE_SHUFFLELO_EPI16(a, control) vint(_mm_shufflelo_epi16((a).m_value, control)) +#define VINT_LANE_SHUFFLEHI_EPI16(a, control) vint(_mm_shufflehi_epi16((a).m_value, control)) + +#define VINT_LANE_SHUFFLE_MASK(a, b, c, d) ((a) | ((b) << 2) | ((c) << 4) | ((d) << 6)) +#define VINT_LANE_SHUFFLE_MASK_R(d, c, b, a) ((a) | ((b) << 2) | ((c) << 4) | ((d) << 6)) + +#define VINT_LANE_SHIFT_LEFT_BYTES(a, l) vint(_mm_slli_si128((a).m_value, l)) +#define VINT_LANE_SHIFT_RIGHT_BYTES(a, l) vint(_mm_srli_si128((a).m_value, l)) + +// Unpack and interleave 8-bit integers from the low or high half of a and b +CPPSPMD_FORCE_INLINE vint vint_lane_unpacklo_epi8(const vint& a, const vint& b) { return vint(_mm_unpacklo_epi8(a.m_value, b.m_value)); } +CPPSPMD_FORCE_INLINE vint vint_lane_unpackhi_epi8(const vint& a, const vint& b) { return vint(_mm_unpackhi_epi8(a.m_value, b.m_value)); } + +// Unpack and interleave 16-bit integers from the low or high half of a and b +CPPSPMD_FORCE_INLINE vint vint_lane_unpacklo_epi16(const vint& a, const vint& b) { return vint(_mm_unpacklo_epi16(a.m_value, b.m_value)); } +CPPSPMD_FORCE_INLINE vint vint_lane_unpackhi_epi16(const vint& a, const vint& b) { return vint(_mm_unpackhi_epi16(a.m_value, b.m_value)); } + +// Unpack and interleave 32-bit integers from the low or high half of a and b +CPPSPMD_FORCE_INLINE vint vint_lane_unpacklo_epi32(const vint& a, const vint& b) { return vint(_mm_unpacklo_epi32(a.m_value, b.m_value)); } +CPPSPMD_FORCE_INLINE vint vint_lane_unpackhi_epi32(const vint& a, const vint& b) { return vint(_mm_unpackhi_epi32(a.m_value, b.m_value)); } + +// Unpack and interleave 64-bit integers from the low or high half of a and b +CPPSPMD_FORCE_INLINE vint vint_lane_unpacklo_epi64(const vint& a, const vint& b) { return vint(_mm_unpacklo_epi64(a.m_value, b.m_value)); } +CPPSPMD_FORCE_INLINE vint vint_lane_unpackhi_epi64(const vint& a, const vint& b) { return vint(_mm_unpackhi_epi64(a.m_value, b.m_value)); } + +CPPSPMD_FORCE_INLINE vint vint_set1_epi8(int8_t a) { return vint(_mm_set1_epi8(a)); } +CPPSPMD_FORCE_INLINE vint vint_set1_epi16(int16_t a) { return vint(_mm_set1_epi16(a)); } +CPPSPMD_FORCE_INLINE vint vint_set1_epi32(int32_t a) { return vint(_mm_set1_epi32(a)); } +CPPSPMD_FORCE_INLINE vint vint_set1_epi64(int64_t a) { return vint(_mm_set1_epi64x(a)); } + +CPPSPMD_FORCE_INLINE vint mul_epu32(const vint &a, const vint& b) { return vint(_mm_mul_epu32(a.m_value, b.m_value)); } + +CPPSPMD_FORCE_INLINE vint div_epi32(const vint &a, const vint& b) +{ + __m128d al = _mm_cvtepi32_pd(a.m_value); + __m128d ah = _mm_cvtepi32_pd(_mm_unpackhi_epi64(a.m_value, a.m_value)); + + __m128d bl = _mm_cvtepi32_pd(b.m_value); + __m128d bh = _mm_cvtepi32_pd(_mm_unpackhi_epi64(b.m_value, b.m_value)); + + __m128d rl = _mm_div_pd(al, bl); + __m128d rh = _mm_div_pd(ah, bh); + + __m128i rli = _mm_cvttpd_epi32(rl); + __m128i rhi = _mm_cvttpd_epi32(rh); + + return vint(_mm_unpacklo_epi64(rli, rhi)); +} + +CPPSPMD_FORCE_INLINE vint mod_epi32(const vint &a, const vint& b) +{ + vint aa = abs(a), ab = abs(b); + vint q = div_epi32(aa, ab); + vint r = aa - q * ab; + return spmd_ternaryi(a < 0, -r, r); +} + +CPPSPMD_FORCE_INLINE vint operator/ (const vint& a, const vint& b) +{ + return div_epi32(a, b); +} + +CPPSPMD_FORCE_INLINE vint operator/ (const vint& a, int b) +{ + return div_epi32(a, vint(b)); +} + +CPPSPMD_FORCE_INLINE vint operator% (const vint& a, const vint& b) +{ + return mod_epi32(a, b); +} + +CPPSPMD_FORCE_INLINE vint operator% (const vint& a, int b) +{ + return mod_epi32(a, vint(b)); +} + +CPPSPMD_FORCE_INLINE vint operator<< (const vint& a, const vint& b) +{ +#if 0 + CPPSPMD_ALIGN(32) int result[4]; + result[0] = extract_x(a.m_value) << extract_x(b.m_value); + result[1] = extract_y(a.m_value) << extract_y(b.m_value); + result[2] = extract_z(a.m_value) << extract_z(b.m_value); + result[3] = extract_w(a.m_value) << extract_w(b.m_value); + + return vint{ _mm_load_si128((__m128i*)result) }; +#elif 0 + int x = extract_x(a.m_value) << extract_x(b.m_value); + int y = extract_y(a.m_value) << extract_y(b.m_value); + int z = extract_z(a.m_value) << extract_z(b.m_value); + int w = extract_w(a.m_value) << extract_w(b.m_value); + + __m128i v = insert_x(_mm_undefined_si128(), x); + v = insert_y(v, y); + v = insert_z(v, z); + return vint{ insert_w(v, w) }; +#else + // What this does: shift left each b lane by 23 bits (to move the shift amount into the FP exponent position), then epi32 add to the integer rep of 1.0f, then cast that to float, then convert that to int to get fast 2^x. + return a * vint(cast_vint_to_vfloat(vint(_mm_slli_epi32(b.m_value, 23)) + cast_vfloat_to_vint(vfloat(1.0f)))); +#endif +} + +// uniform shift left +CPPSPMD_FORCE_INLINE vint operator<< (const vint& a, int b) +{ + __m128i bv = _mm_castps_si128(_mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(b)), _mm_castsi128_ps(_mm_load_si128((const __m128i *)g_x_128)))); + return vint{ _mm_sll_epi32(a.m_value, bv) }; +} + +// uniform arithmetic shift right +CPPSPMD_FORCE_INLINE vint operator>> (const vint& a, int b) +{ + __m128i bv = _mm_castps_si128(_mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(b)), _mm_castsi128_ps(_mm_load_si128((const __m128i *)g_x_128)))); + return vint{ _mm_sra_epi32(a.m_value, bv) }; +} + +// uniform shift right +CPPSPMD_FORCE_INLINE vint vuint_shift_right(const vint& a, int b) +{ + __m128i bv = _mm_castps_si128(_mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(b)), _mm_castsi128_ps(_mm_load_si128((const __m128i *)g_x_128)))); + return vint{ _mm_srl_epi32(a.m_value, bv) }; +} + +CPPSPMD_FORCE_INLINE vint vuint_shift_right(const vint& a, const vint& b) +{ +#if 0 + CPPSPMD_ALIGN(32) int result[4]; + result[0] = ((uint32_t)extract_x(a.m_value)) >> extract_x(b.m_value); + result[1] = ((uint32_t)extract_y(a.m_value)) >> extract_y(b.m_value); + result[2] = ((uint32_t)extract_z(a.m_value)) >> extract_z(b.m_value); + result[3] = ((uint32_t)extract_w(a.m_value)) >> extract_w(b.m_value); + + return vint{ _mm_load_si128((__m128i*)result) }; +#elif 0 + uint32_t x = ((uint32_t)extract_x(a.m_value)) >> ((uint32_t)extract_x(b.m_value)); + uint32_t y = ((uint32_t)extract_y(a.m_value)) >> ((uint32_t)extract_y(b.m_value)); + uint32_t z = ((uint32_t)extract_z(a.m_value)) >> ((uint32_t)extract_z(b.m_value)); + uint32_t w = ((uint32_t)extract_w(a.m_value)) >> ((uint32_t)extract_w(b.m_value)); + + __m128i v = insert_x(_mm_undefined_si128(), x); + v = insert_y(v, y); + v = insert_z(v, z); + return vint{ insert_w(v, w) }; +#else + //vint inv_shift = 32 - b; + //vfloat f = cast_vint_to_vfloat(vint(_mm_slli_epi32(inv_shift.m_value, 23)) + cast_vfloat_to_vint(vfloat(1.0f))); + + // Take float rep of 1.0f (0x3f800000), subtract (32<<23), subtract (shift<<23), cast to float. + vfloat f = cast_vint_to_vfloat(vint(_mm_sub_epi32(_mm_set1_epi32(0x4f800000), _mm_slli_epi32(b.m_value, 23)))); + + // Now convert scale factor to integer. + vint r = vint(f); + + // mulhi_epu32 (using two _mm_mul_epu32), to emulate varying shift left. + vint q(mulhi_epu32(a.m_value, r.m_value)); + + // Handle shift amounts of 0. + return spmd_ternaryi(b > 0, q, a); +#endif +} + +CPPSPMD_FORCE_INLINE vint vuint_shift_right_not_zero(const vint& a, const vint& b) +{ + //vint inv_shift = 32 - b; + //vfloat f = cast_vint_to_vfloat(vint(_mm_slli_epi32(inv_shift.m_value, 23)) + cast_vfloat_to_vint(vfloat(1.0f))); + + // Take float rep of 1.0f (0x3f800000), subtract (32<<23), subtract (shift<<23), cast to float. + vfloat f = cast_vint_to_vfloat(vint(_mm_sub_epi32(_mm_set1_epi32(0x4f800000), _mm_slli_epi32(b.m_value, 23)))); + + // Now convert scale factor to integer. + vint r = vint(f); + + // mulhi_epu32 (using two _mm_mul_epu32), to emulate varying shift left. + return vint(mulhi_epu32(a.m_value, r.m_value)); +} + +CPPSPMD_FORCE_INLINE vint operator>> (const vint& a, const vint& b) +{ +#if 0 + CPPSPMD_ALIGN(32) int result[4]; + result[0] = extract_x(a.m_value) >> extract_x(b.m_value); + result[1] = extract_y(a.m_value) >> extract_y(b.m_value); + result[2] = extract_z(a.m_value) >> extract_z(b.m_value); + result[3] = extract_w(a.m_value) >> extract_w(b.m_value); + + return vint{ _mm_load_si128((__m128i*)result) }; +#elif 0 + int x = extract_x(a.m_value) >> extract_x(b.m_value); + int y = extract_y(a.m_value) >> extract_y(b.m_value); + int z = extract_z(a.m_value) >> extract_z(b.m_value); + int w = extract_w(a.m_value) >> extract_w(b.m_value); + + __m128i v = insert_x(_mm_undefined_si128(), x); + v = insert_y(v, y); + v = insert_z(v, z); + return vint{ insert_w(v, w) }; +#else + vint sign_mask(_mm_cmplt_epi32(a.m_value, _mm_setzero_si128())); + vint a_shifted = vuint_shift_right(a ^ sign_mask, b) ^ sign_mask; + return a_shifted; +#endif +} + +#undef VINT_SHIFT_LEFT +#undef VINT_SHIFT_RIGHT +#undef VUINT_SHIFT_RIGHT + +// Shift left/right by a uniform immediate constant +#define VINT_SHIFT_LEFT(a, b) vint(_mm_slli_epi32( (a).m_value, (b) ) ) +#define VINT_SHIFT_RIGHT(a, b) vint( _mm_srai_epi32( (a).m_value, (b) ) ) +#define VUINT_SHIFT_RIGHT(a, b) vint( _mm_srli_epi32( (a).m_value, (b) ) ) +#define VINT_ROT(x, k) (VINT_SHIFT_LEFT((x), (k)) | VUINT_SHIFT_RIGHT((x), 32 - (k))) + +CPPSPMD_FORCE_INLINE vbool operator==(const lint& a, const lint& b) { return vbool{ _mm_cmpeq_epi32(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vbool operator==(const lint& a, int b) { return vint(a) == vint(b); } +CPPSPMD_FORCE_INLINE vbool operator==(int a, const lint& b) { return vint(a) == vint(b); } +CPPSPMD_FORCE_INLINE vbool operator<(const lint& a, const lint& b) { return vbool{ _mm_cmpgt_epi32(b.m_value, a.m_value) }; } +CPPSPMD_FORCE_INLINE vbool operator>(const lint& a, const lint& b) { return vbool{ _mm_cmpgt_epi32(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vbool operator<=(const lint& a, const lint& b) { return !vbool{ _mm_cmpgt_epi32(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vbool operator>=(const lint& a, const lint& b) { return !vbool{ _mm_cmpgt_epi32(b.m_value, a.m_value) }; } + +CPPSPMD_FORCE_INLINE float extract(const vfloat& v, int instance) { assert(instance < 4); CPPSPMD_ALIGN(16) float values[4]; _mm_store_ps(values, v.m_value); return values[instance]; } +CPPSPMD_FORCE_INLINE int extract(const vint& v, int instance) { assert(instance < 4); CPPSPMD_ALIGN(16) int values[4]; _mm_store_si128((__m128i*)values, v.m_value); return values[instance]; } +CPPSPMD_FORCE_INLINE int extract(const lint& v, int instance) { assert(instance < 4); CPPSPMD_ALIGN(16) int values[4]; _mm_store_si128((__m128i*)values, v.m_value); return values[instance]; } +CPPSPMD_FORCE_INLINE bool extract(const vbool& v, int instance) { assert(instance < 4); CPPSPMD_ALIGN(16) int values[4]; _mm_store_si128((__m128i*)values, v.m_value); return values[instance] != 0; } + +#undef VINT_EXTRACT +#undef VBOOL_EXTRACT +#undef VFLOAT_EXTRACT + +#if CPPSPMD_SSE2 +// Pass in an immediate constant and the compiler will optimize these expressions. +#define VINT_EXTRACT(v, instance) ( ((instance) == 0) ? extract_x((v).m_value) : (((instance) == 1) ? extract_y((v).m_value) : (((instance) == 2) ? extract_z((v).m_value) : extract_w((v).m_value))) ) +#define VBOOL_EXTRACT(v, instance) ( ((instance) == 0) ? extract_x((v).m_value) : (((instance) == 1) ? extract_y((v).m_value) : (((instance) == 2) ? extract_z((v).m_value) : extract_w((v).m_value))) ) +#define VFLOAT_EXTRACT(v, instance) ( ((instance) == 0) ? extractf_ps_x((v).m_value) : (((instance) == 1) ? extractf_ps_y((v).m_value) : (((instance) == 2) ? extractf_ps_z((v).m_value) : extractf_ps_w((v).m_value))) ) +#else +CPPSPMD_FORCE_INLINE float cast_int_bits_as_float(int v) { return *(const float*)&v; } + +#define VINT_EXTRACT(v, instance) _mm_extract_epi32((v).m_value, instance) +#define VBOOL_EXTRACT(v, instance) _mm_extract_epi32((v).m_value, instance) +#define VFLOAT_EXTRACT(v, instance) cast_int_bits_as_float(_mm_extract_ps((v).m_value, instance)) +#endif + +CPPSPMD_FORCE_INLINE vfloat &insert(vfloat& v, int instance, float f) +{ + assert(instance < 4); + CPPSPMD_ALIGN(16) float values[4]; + _mm_store_ps(values, v.m_value); + values[instance] = f; + v.m_value = _mm_load_ps(values); + return v; +} + +CPPSPMD_FORCE_INLINE vint &insert(vint& v, int instance, int i) +{ + assert(instance < 4); + CPPSPMD_ALIGN(16) int values[4]; + _mm_store_si128((__m128i *)values, v.m_value); + values[instance] = i; + v.m_value = _mm_load_si128((__m128i *)values); + return v; +} + +CPPSPMD_FORCE_INLINE vint init_lookup4(const uint8_t pTab[16]) +{ + __m128i l = _mm_loadu_si128((const __m128i*)pTab); + return vint{ l }; +} + +CPPSPMD_FORCE_INLINE vint table_lookup4_8(const vint& a, const vint& table) +{ + return vint{ shuffle_epi8(table.m_value, a.m_value) }; +} + +CPPSPMD_FORCE_INLINE void init_lookup5(const uint8_t pTab[32], vint& table_0, vint& table_1) +{ + __m128i l = _mm_loadu_si128((const __m128i*)pTab); + __m128i h = _mm_loadu_si128((const __m128i*)(pTab + 16)); + table_0.m_value = l; + table_1.m_value = h; +} + +CPPSPMD_FORCE_INLINE vint table_lookup5_8(const vint& a, const vint& table_0, const vint& table_1) +{ + __m128i l_0 = shuffle_epi8(table_0.m_value, a.m_value); + __m128i h_0 = shuffle_epi8(table_1.m_value, a.m_value); + + __m128i m_0 = _mm_slli_epi32(a.m_value, 31 - 4); + + __m128 v_0 = blendv_ps(_mm_castsi128_ps(l_0), _mm_castsi128_ps(h_0), _mm_castsi128_ps(m_0)); + + return vint{ _mm_castps_si128(v_0) }; +} + +CPPSPMD_FORCE_INLINE void init_lookup6(const uint8_t pTab[64], vint& table_0, vint& table_1, vint& table_2, vint& table_3) +{ + __m128i a = _mm_loadu_si128((const __m128i*)pTab); + __m128i b = _mm_loadu_si128((const __m128i*)(pTab + 16)); + __m128i c = _mm_loadu_si128((const __m128i*)(pTab + 32)); + __m128i d = _mm_loadu_si128((const __m128i*)(pTab + 48)); + + table_0.m_value = a; + table_1.m_value = b; + table_2.m_value = c; + table_3.m_value = d; +} + +CPPSPMD_FORCE_INLINE vint table_lookup6_8(const vint& a, const vint& table_0, const vint& table_1, const vint& table_2, const vint& table_3) +{ + __m128i m_0 = _mm_slli_epi32(a.m_value, 31 - 4); + + __m128 av_0; + { + __m128i al_0 = shuffle_epi8(table_0.m_value, a.m_value); + __m128i ah_0 = shuffle_epi8(table_1.m_value, a.m_value); + av_0 = blendv_ps(_mm_castsi128_ps(al_0), _mm_castsi128_ps(ah_0), _mm_castsi128_ps(m_0)); + } + + __m128 bv_0; + { + __m128i bl_0 = shuffle_epi8(table_2.m_value, a.m_value); + __m128i bh_0 = shuffle_epi8(table_3.m_value, a.m_value); + bv_0 = blendv_ps(_mm_castsi128_ps(bl_0), _mm_castsi128_ps(bh_0), _mm_castsi128_ps(m_0)); + } + + __m128i m2_0 = _mm_slli_epi32(a.m_value, 31 - 5); + __m128 v2_0 = blendv_ps(av_0, bv_0, _mm_castsi128_ps(m2_0)); + + return vint{ _mm_castps_si128(v2_0) }; +} + +#if 0 +template +CPPSPMD_FORCE_INLINE decltype(auto) spmd_call(Args&&... args) +{ + SPMDKernel kernel; + kernel.init(exec_mask::all_on()); + return kernel._call(std::forward(args)...); +} +#else +template +CPPSPMD_FORCE_INLINE void spmd_call(Args&&... args) +{ + SPMDKernel kernel; + kernel.init(exec_mask::all_on()); + kernel._call(std::forward(args)...); +} +#endif + +CPPSPMD_FORCE_INLINE void spmd_kernel::init(const spmd_kernel::exec_mask& kernel_exec) +{ + m_exec = kernel_exec; + m_kernel_exec = kernel_exec; + m_continue_mask = exec_mask::all_off(); + +#ifdef _DEBUG + m_in_loop = false; +#endif +} + +CPPSPMD_FORCE_INLINE const float_vref& spmd_kernel::store(const float_vref& dst, const vfloat& src) +{ + CPPSPMD_ALIGN(16) int vindex[4]; + _mm_store_si128((__m128i*)vindex, dst.m_vindex); + + CPPSPMD_ALIGN(16) float stored[4]; + _mm_store_ps(stored, src.m_value); + + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + for (int i = 0; i < 4; i++) + { + if (mask & (1 << i)) + dst.m_pValue[vindex[i]] = stored[i]; + } + return dst; +} + +CPPSPMD_FORCE_INLINE const float_vref& spmd_kernel::store_all(const float_vref& dst, const vfloat& src) +{ + CPPSPMD_ALIGN(16) int vindex[4]; + _mm_store_si128((__m128i*)vindex, dst.m_vindex); + + CPPSPMD_ALIGN(16) float stored[4]; + _mm_store_ps(stored, src.m_value); + + for (int i = 0; i < 4; i++) + dst.m_pValue[vindex[i]] = stored[i]; + return dst; +} + +CPPSPMD_FORCE_INLINE const float_vref& spmd_kernel::store(const float_vref&& dst, const vfloat& src) +{ + CPPSPMD_ALIGN(16) int vindex[4]; + _mm_store_si128((__m128i*)vindex, dst.m_vindex); + + CPPSPMD_ALIGN(16) float stored[4]; + _mm_store_ps(stored, src.m_value); + + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + for (int i = 0; i < 4; i++) + { + if (mask & (1 << i)) + dst.m_pValue[vindex[i]] = stored[i]; + } + return dst; +} + +CPPSPMD_FORCE_INLINE const float_vref& spmd_kernel::store_all(const float_vref&& dst, const vfloat& src) +{ + CPPSPMD_ALIGN(16) int vindex[4]; + _mm_store_si128((__m128i*)vindex, dst.m_vindex); + + CPPSPMD_ALIGN(16) float stored[4]; + _mm_store_ps(stored, src.m_value); + + for (int i = 0; i < 4; i++) + dst.m_pValue[vindex[i]] = stored[i]; + return dst; +} + +#include "cppspmd_flow.h" +#include "cppspmd_math.h" + +} // namespace cppspmd_sse41 + diff --git a/thirdparty/basisu/encoder/cppspmd_type_aliases.h b/thirdparty/basisu/encoder/cppspmd_type_aliases.h new file mode 100644 index 000000000..260048123 --- /dev/null +++ b/thirdparty/basisu/encoder/cppspmd_type_aliases.h @@ -0,0 +1,47 @@ +// cppspmd_type_aliases.h +// Do not include this file directly +// +// Copyright 2020-2024 Binomial LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#ifndef CPPSPMD_TYPES +#define CPPSPMD_TYPES + +using exec_mask = CPPSPMD::exec_mask; + +#if CPPSPMD_INT16 +using vint16 = CPPSPMD::vint16; +using int16_lref = CPPSPMD::int16_lref; +using cint16_vref = CPPSPMD::cint16_vref; +using int16_vref = CPPSPMD::int16_vref; +using lint16 = CPPSPMD::lint16; +using vint16_vref = CPPSPMD::vint16_vref; +#else +using vint = CPPSPMD::vint; +using int_lref = CPPSPMD::int_lref; +using cint_vref = CPPSPMD::cint_vref; +using int_vref = CPPSPMD::int_vref; +using lint = CPPSPMD::lint; +using vint_vref = CPPSPMD::vint_vref; +#endif + +using vbool = CPPSPMD::vbool; +using vfloat = CPPSPMD::vfloat; +using float_lref = CPPSPMD::float_lref; +using float_vref = CPPSPMD::float_vref; +using vfloat_vref = CPPSPMD::vfloat_vref; + +#endif // CPPSPMD_TYPES diff --git a/thirdparty/basisu/encoder/jpgd.cpp b/thirdparty/basisu/encoder/jpgd.cpp new file mode 100644 index 000000000..f375ba201 --- /dev/null +++ b/thirdparty/basisu/encoder/jpgd.cpp @@ -0,0 +1,3230 @@ +// jpgd.cpp - C++ class for JPEG decompression. Written by Richard Geldreich between 1994-2020. +// Supports progressive and baseline sequential JPEG image files, and the most common chroma subsampling factors: Y, H1V1, H2V1, H1V2, and H2V2. +// Supports box and linear chroma upsampling. +// +// Released under two licenses. You are free to choose which license you want: +// License 1: +// Public Domain +// +// License 2: +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Alex Evans: Linear memory allocator (taken from jpge.h). +// v1.04, May. 19, 2012: Code tweaks to fix VS2008 static code analysis warnings +// v2.00, March 20, 2020: Fuzzed with zzuf and afl. Fixed several issues, converted most assert()'s to run-time checks. Added chroma upsampling. Removed freq. domain upsampling. gcc/clang warnings. +// + +#include "jpgd.h" +#include +#include +#include + +#ifdef _MSC_VER +#pragma warning (disable : 4611) // warning C4611: interaction between '_setjmp' and C++ object destruction is non-portable +#endif + +#define JPGD_TRUE (1) +#define JPGD_FALSE (0) + +#define JPGD_MAX(a,b) (((a)>(b)) ? (a) : (b)) +#define JPGD_MIN(a,b) (((a)<(b)) ? (a) : (b)) + +namespace jpgd { + + static inline void* jpgd_malloc(size_t nSize) { return malloc(nSize); } + static inline void jpgd_free(void* p) { free(p); } + + // DCT coefficients are stored in this sequence. + static int g_ZAG[64] = { 0,1,8,16,9,2,3,10,17,24,32,25,18,11,4,5,12,19,26,33,40,48,41,34,27,20,13,6,7,14,21,28,35,42,49,56,57,50,43,36,29,22,15,23,30,37,44,51,58,59,52,45,38,31,39,46,53,60,61,54,47,55,62,63 }; + + enum JPEG_MARKER + { + M_SOF0 = 0xC0, M_SOF1 = 0xC1, M_SOF2 = 0xC2, M_SOF3 = 0xC3, M_SOF5 = 0xC5, M_SOF6 = 0xC6, M_SOF7 = 0xC7, M_JPG = 0xC8, + M_SOF9 = 0xC9, M_SOF10 = 0xCA, M_SOF11 = 0xCB, M_SOF13 = 0xCD, M_SOF14 = 0xCE, M_SOF15 = 0xCF, M_DHT = 0xC4, M_DAC = 0xCC, + M_RST0 = 0xD0, M_RST1 = 0xD1, M_RST2 = 0xD2, M_RST3 = 0xD3, M_RST4 = 0xD4, M_RST5 = 0xD5, M_RST6 = 0xD6, M_RST7 = 0xD7, + M_SOI = 0xD8, M_EOI = 0xD9, M_SOS = 0xDA, M_DQT = 0xDB, M_DNL = 0xDC, M_DRI = 0xDD, M_DHP = 0xDE, M_EXP = 0xDF, + M_APP0 = 0xE0, M_APP15 = 0xEF, M_JPG0 = 0xF0, M_JPG13 = 0xFD, M_COM = 0xFE, M_TEM = 0x01, M_ERROR = 0x100, RST0 = 0xD0 + }; + + enum JPEG_SUBSAMPLING { JPGD_GRAYSCALE = 0, JPGD_YH1V1, JPGD_YH2V1, JPGD_YH1V2, JPGD_YH2V2 }; + +#define CONST_BITS 13 +#define PASS1_BITS 2 +#define SCALEDONE ((int32)1) + +#define FIX_0_298631336 ((int32)2446) /* FIX(0.298631336) */ +#define FIX_0_390180644 ((int32)3196) /* FIX(0.390180644) */ +#define FIX_0_541196100 ((int32)4433) /* FIX(0.541196100) */ +#define FIX_0_765366865 ((int32)6270) /* FIX(0.765366865) */ +#define FIX_0_899976223 ((int32)7373) /* FIX(0.899976223) */ +#define FIX_1_175875602 ((int32)9633) /* FIX(1.175875602) */ +#define FIX_1_501321110 ((int32)12299) /* FIX(1.501321110) */ +#define FIX_1_847759065 ((int32)15137) /* FIX(1.847759065) */ +#define FIX_1_961570560 ((int32)16069) /* FIX(1.961570560) */ +#define FIX_2_053119869 ((int32)16819) /* FIX(2.053119869) */ +#define FIX_2_562915447 ((int32)20995) /* FIX(2.562915447) */ +#define FIX_3_072711026 ((int32)25172) /* FIX(3.072711026) */ + +#define DESCALE(x,n) (((x) + (SCALEDONE << ((n)-1))) >> (n)) +#define DESCALE_ZEROSHIFT(x,n) (((x) + (128 << (n)) + (SCALEDONE << ((n)-1))) >> (n)) + +#define MULTIPLY(var, cnst) ((var) * (cnst)) + +#define CLAMP(i) ((static_cast(i) > 255) ? (((~i) >> 31) & 0xFF) : (i)) + + static inline int left_shifti(int val, uint32_t bits) + { + return static_cast(static_cast(val) << bits); + } + + // Compiler creates a fast path 1D IDCT for X non-zero columns + template + struct Row + { + static void idct(int* pTemp, const jpgd_block_t* pSrc) + { + // ACCESS_COL() will be optimized at compile time to either an array access, or 0. Good compilers will then optimize out muls against 0. +#define ACCESS_COL(x) (((x) < NONZERO_COLS) ? (int)pSrc[x] : 0) + + const int z2 = ACCESS_COL(2), z3 = ACCESS_COL(6); + + const int z1 = MULTIPLY(z2 + z3, FIX_0_541196100); + const int tmp2 = z1 + MULTIPLY(z3, -FIX_1_847759065); + const int tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); + + const int tmp0 = left_shifti(ACCESS_COL(0) + ACCESS_COL(4), CONST_BITS); + const int tmp1 = left_shifti(ACCESS_COL(0) - ACCESS_COL(4), CONST_BITS); + + const int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2; + + const int atmp0 = ACCESS_COL(7), atmp1 = ACCESS_COL(5), atmp2 = ACCESS_COL(3), atmp3 = ACCESS_COL(1); + + const int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3; + const int bz5 = MULTIPLY(bz3 + bz4, FIX_1_175875602); + + const int az1 = MULTIPLY(bz1, -FIX_0_899976223); + const int az2 = MULTIPLY(bz2, -FIX_2_562915447); + const int az3 = MULTIPLY(bz3, -FIX_1_961570560) + bz5; + const int az4 = MULTIPLY(bz4, -FIX_0_390180644) + bz5; + + const int btmp0 = MULTIPLY(atmp0, FIX_0_298631336) + az1 + az3; + const int btmp1 = MULTIPLY(atmp1, FIX_2_053119869) + az2 + az4; + const int btmp2 = MULTIPLY(atmp2, FIX_3_072711026) + az2 + az3; + const int btmp3 = MULTIPLY(atmp3, FIX_1_501321110) + az1 + az4; + + pTemp[0] = DESCALE(tmp10 + btmp3, CONST_BITS - PASS1_BITS); + pTemp[7] = DESCALE(tmp10 - btmp3, CONST_BITS - PASS1_BITS); + pTemp[1] = DESCALE(tmp11 + btmp2, CONST_BITS - PASS1_BITS); + pTemp[6] = DESCALE(tmp11 - btmp2, CONST_BITS - PASS1_BITS); + pTemp[2] = DESCALE(tmp12 + btmp1, CONST_BITS - PASS1_BITS); + pTemp[5] = DESCALE(tmp12 - btmp1, CONST_BITS - PASS1_BITS); + pTemp[3] = DESCALE(tmp13 + btmp0, CONST_BITS - PASS1_BITS); + pTemp[4] = DESCALE(tmp13 - btmp0, CONST_BITS - PASS1_BITS); + } + }; + + template <> + struct Row<0> + { + static void idct(int* pTemp, const jpgd_block_t* pSrc) + { + (void)pTemp; + (void)pSrc; + } + }; + + template <> + struct Row<1> + { + static void idct(int* pTemp, const jpgd_block_t* pSrc) + { + const int dcval = left_shifti(pSrc[0], PASS1_BITS); + + pTemp[0] = dcval; + pTemp[1] = dcval; + pTemp[2] = dcval; + pTemp[3] = dcval; + pTemp[4] = dcval; + pTemp[5] = dcval; + pTemp[6] = dcval; + pTemp[7] = dcval; + } + }; + + // Compiler creates a fast path 1D IDCT for X non-zero rows + template + struct Col + { + static void idct(uint8* pDst_ptr, const int* pTemp) + { + // ACCESS_ROW() will be optimized at compile time to either an array access, or 0. +#define ACCESS_ROW(x) (((x) < NONZERO_ROWS) ? pTemp[x * 8] : 0) + + const int z2 = ACCESS_ROW(2); + const int z3 = ACCESS_ROW(6); + + const int z1 = MULTIPLY(z2 + z3, FIX_0_541196100); + const int tmp2 = z1 + MULTIPLY(z3, -FIX_1_847759065); + const int tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); + + const int tmp0 = left_shifti(ACCESS_ROW(0) + ACCESS_ROW(4), CONST_BITS); + const int tmp1 = left_shifti(ACCESS_ROW(0) - ACCESS_ROW(4), CONST_BITS); + + const int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2; + + const int atmp0 = ACCESS_ROW(7), atmp1 = ACCESS_ROW(5), atmp2 = ACCESS_ROW(3), atmp3 = ACCESS_ROW(1); + + const int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3; + const int bz5 = MULTIPLY(bz3 + bz4, FIX_1_175875602); + + const int az1 = MULTIPLY(bz1, -FIX_0_899976223); + const int az2 = MULTIPLY(bz2, -FIX_2_562915447); + const int az3 = MULTIPLY(bz3, -FIX_1_961570560) + bz5; + const int az4 = MULTIPLY(bz4, -FIX_0_390180644) + bz5; + + const int btmp0 = MULTIPLY(atmp0, FIX_0_298631336) + az1 + az3; + const int btmp1 = MULTIPLY(atmp1, FIX_2_053119869) + az2 + az4; + const int btmp2 = MULTIPLY(atmp2, FIX_3_072711026) + az2 + az3; + const int btmp3 = MULTIPLY(atmp3, FIX_1_501321110) + az1 + az4; + + int i = DESCALE_ZEROSHIFT(tmp10 + btmp3, CONST_BITS + PASS1_BITS + 3); + pDst_ptr[8 * 0] = (uint8)CLAMP(i); + + i = DESCALE_ZEROSHIFT(tmp10 - btmp3, CONST_BITS + PASS1_BITS + 3); + pDst_ptr[8 * 7] = (uint8)CLAMP(i); + + i = DESCALE_ZEROSHIFT(tmp11 + btmp2, CONST_BITS + PASS1_BITS + 3); + pDst_ptr[8 * 1] = (uint8)CLAMP(i); + + i = DESCALE_ZEROSHIFT(tmp11 - btmp2, CONST_BITS + PASS1_BITS + 3); + pDst_ptr[8 * 6] = (uint8)CLAMP(i); + + i = DESCALE_ZEROSHIFT(tmp12 + btmp1, CONST_BITS + PASS1_BITS + 3); + pDst_ptr[8 * 2] = (uint8)CLAMP(i); + + i = DESCALE_ZEROSHIFT(tmp12 - btmp1, CONST_BITS + PASS1_BITS + 3); + pDst_ptr[8 * 5] = (uint8)CLAMP(i); + + i = DESCALE_ZEROSHIFT(tmp13 + btmp0, CONST_BITS + PASS1_BITS + 3); + pDst_ptr[8 * 3] = (uint8)CLAMP(i); + + i = DESCALE_ZEROSHIFT(tmp13 - btmp0, CONST_BITS + PASS1_BITS + 3); + pDst_ptr[8 * 4] = (uint8)CLAMP(i); + } + }; + + template <> + struct Col<1> + { + static void idct(uint8* pDst_ptr, const int* pTemp) + { + int dcval = DESCALE_ZEROSHIFT(pTemp[0], PASS1_BITS + 3); + const uint8 dcval_clamped = (uint8)CLAMP(dcval); + pDst_ptr[0 * 8] = dcval_clamped; + pDst_ptr[1 * 8] = dcval_clamped; + pDst_ptr[2 * 8] = dcval_clamped; + pDst_ptr[3 * 8] = dcval_clamped; + pDst_ptr[4 * 8] = dcval_clamped; + pDst_ptr[5 * 8] = dcval_clamped; + pDst_ptr[6 * 8] = dcval_clamped; + pDst_ptr[7 * 8] = dcval_clamped; + } + }; + + static const uint8 s_idct_row_table[] = + { + 1,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0, 2,1,0,0,0,0,0,0, 2,1,1,0,0,0,0,0, 2,2,1,0,0,0,0,0, 3,2,1,0,0,0,0,0, 4,2,1,0,0,0,0,0, 4,3,1,0,0,0,0,0, + 4,3,2,0,0,0,0,0, 4,3,2,1,0,0,0,0, 4,3,2,1,1,0,0,0, 4,3,2,2,1,0,0,0, 4,3,3,2,1,0,0,0, 4,4,3,2,1,0,0,0, 5,4,3,2,1,0,0,0, 6,4,3,2,1,0,0,0, + 6,5,3,2,1,0,0,0, 6,5,4,2,1,0,0,0, 6,5,4,3,1,0,0,0, 6,5,4,3,2,0,0,0, 6,5,4,3,2,1,0,0, 6,5,4,3,2,1,1,0, 6,5,4,3,2,2,1,0, 6,5,4,3,3,2,1,0, + 6,5,4,4,3,2,1,0, 6,5,5,4,3,2,1,0, 6,6,5,4,3,2,1,0, 7,6,5,4,3,2,1,0, 8,6,5,4,3,2,1,0, 8,7,5,4,3,2,1,0, 8,7,6,4,3,2,1,0, 8,7,6,5,3,2,1,0, + 8,7,6,5,4,2,1,0, 8,7,6,5,4,3,1,0, 8,7,6,5,4,3,2,0, 8,7,6,5,4,3,2,1, 8,7,6,5,4,3,2,2, 8,7,6,5,4,3,3,2, 8,7,6,5,4,4,3,2, 8,7,6,5,5,4,3,2, + 8,7,6,6,5,4,3,2, 8,7,7,6,5,4,3,2, 8,8,7,6,5,4,3,2, 8,8,8,6,5,4,3,2, 8,8,8,7,5,4,3,2, 8,8,8,7,6,4,3,2, 8,8,8,7,6,5,3,2, 8,8,8,7,6,5,4,2, + 8,8,8,7,6,5,4,3, 8,8,8,7,6,5,4,4, 8,8,8,7,6,5,5,4, 8,8,8,7,6,6,5,4, 8,8,8,7,7,6,5,4, 8,8,8,8,7,6,5,4, 8,8,8,8,8,6,5,4, 8,8,8,8,8,7,5,4, + 8,8,8,8,8,7,6,4, 8,8,8,8,8,7,6,5, 8,8,8,8,8,7,6,6, 8,8,8,8,8,7,7,6, 8,8,8,8,8,8,7,6, 8,8,8,8,8,8,8,6, 8,8,8,8,8,8,8,7, 8,8,8,8,8,8,8,8, + }; + + static const uint8 s_idct_col_table[] = + { + 1, 1, 2, 3, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 + }; + + // Scalar "fast pathing" IDCT. + static void idct(const jpgd_block_t* pSrc_ptr, uint8* pDst_ptr, int block_max_zag) + { + assert(block_max_zag >= 1); + assert(block_max_zag <= 64); + + if (block_max_zag <= 1) + { + int k = ((pSrc_ptr[0] + 4) >> 3) + 128; + k = CLAMP(k); + k = k | (k << 8); + k = k | (k << 16); + + for (int i = 8; i > 0; i--) + { + *(int*)&pDst_ptr[0] = k; + *(int*)&pDst_ptr[4] = k; + pDst_ptr += 8; + } + return; + } + + int temp[64]; + + const jpgd_block_t* pSrc = pSrc_ptr; + int* pTemp = temp; + + const uint8* pRow_tab = &s_idct_row_table[(block_max_zag - 1) * 8]; + int i; + for (i = 8; i > 0; i--, pRow_tab++) + { + switch (*pRow_tab) + { + case 0: Row<0>::idct(pTemp, pSrc); break; + case 1: Row<1>::idct(pTemp, pSrc); break; + case 2: Row<2>::idct(pTemp, pSrc); break; + case 3: Row<3>::idct(pTemp, pSrc); break; + case 4: Row<4>::idct(pTemp, pSrc); break; + case 5: Row<5>::idct(pTemp, pSrc); break; + case 6: Row<6>::idct(pTemp, pSrc); break; + case 7: Row<7>::idct(pTemp, pSrc); break; + case 8: Row<8>::idct(pTemp, pSrc); break; + } + + pSrc += 8; + pTemp += 8; + } + + pTemp = temp; + + const int nonzero_rows = s_idct_col_table[block_max_zag - 1]; + for (i = 8; i > 0; i--) + { + switch (nonzero_rows) + { + case 1: Col<1>::idct(pDst_ptr, pTemp); break; + case 2: Col<2>::idct(pDst_ptr, pTemp); break; + case 3: Col<3>::idct(pDst_ptr, pTemp); break; + case 4: Col<4>::idct(pDst_ptr, pTemp); break; + case 5: Col<5>::idct(pDst_ptr, pTemp); break; + case 6: Col<6>::idct(pDst_ptr, pTemp); break; + case 7: Col<7>::idct(pDst_ptr, pTemp); break; + case 8: Col<8>::idct(pDst_ptr, pTemp); break; + } + + pTemp++; + pDst_ptr++; + } + } + + // Retrieve one character from the input stream. + inline uint jpeg_decoder::get_char() + { + // Any bytes remaining in buffer? + if (!m_in_buf_left) + { + // Try to get more bytes. + prep_in_buffer(); + // Still nothing to get? + if (!m_in_buf_left) + { + // Pad the end of the stream with 0xFF 0xD9 (EOI marker) + int t = m_tem_flag; + m_tem_flag ^= 1; + if (t) + return 0xD9; + else + return 0xFF; + } + } + + uint c = *m_pIn_buf_ofs++; + m_in_buf_left--; + + return c; + } + + // Same as previous method, except can indicate if the character is a pad character or not. + inline uint jpeg_decoder::get_char(bool* pPadding_flag) + { + if (!m_in_buf_left) + { + prep_in_buffer(); + if (!m_in_buf_left) + { + *pPadding_flag = true; + int t = m_tem_flag; + m_tem_flag ^= 1; + if (t) + return 0xD9; + else + return 0xFF; + } + } + + *pPadding_flag = false; + + uint c = *m_pIn_buf_ofs++; + m_in_buf_left--; + + return c; + } + + // Inserts a previously retrieved character back into the input buffer. + inline void jpeg_decoder::stuff_char(uint8 q) + { + // This could write before the input buffer, but we've placed another array there. + *(--m_pIn_buf_ofs) = q; + m_in_buf_left++; + } + + // Retrieves one character from the input stream, but does not read past markers. Will continue to return 0xFF when a marker is encountered. + inline uint8 jpeg_decoder::get_octet() + { + bool padding_flag; + int c = get_char(&padding_flag); + + if (c == 0xFF) + { + if (padding_flag) + return 0xFF; + + c = get_char(&padding_flag); + if (padding_flag) + { + stuff_char(0xFF); + return 0xFF; + } + + if (c == 0x00) + return 0xFF; + else + { + stuff_char(static_cast(c)); + stuff_char(0xFF); + return 0xFF; + } + } + + return static_cast(c); + } + + // Retrieves a variable number of bits from the input stream. Does not recognize markers. + inline uint jpeg_decoder::get_bits(int num_bits) + { + if (!num_bits) + return 0; + + uint i = m_bit_buf >> (32 - num_bits); + + if ((m_bits_left -= num_bits) <= 0) + { + m_bit_buf <<= (num_bits += m_bits_left); + + uint c1 = get_char(); + uint c2 = get_char(); + m_bit_buf = (m_bit_buf & 0xFFFF0000) | (c1 << 8) | c2; + + m_bit_buf <<= -m_bits_left; + + m_bits_left += 16; + + assert(m_bits_left >= 0); + } + else + m_bit_buf <<= num_bits; + + return i; + } + + // Retrieves a variable number of bits from the input stream. Markers will not be read into the input bit buffer. Instead, an infinite number of all 1's will be returned when a marker is encountered. + inline uint jpeg_decoder::get_bits_no_markers(int num_bits) + { + if (!num_bits) + return 0; + + assert(num_bits <= 16); + + uint i = m_bit_buf >> (32 - num_bits); + + if ((m_bits_left -= num_bits) <= 0) + { + m_bit_buf <<= (num_bits += m_bits_left); + + if ((m_in_buf_left < 2) || (m_pIn_buf_ofs[0] == 0xFF) || (m_pIn_buf_ofs[1] == 0xFF)) + { + uint c1 = get_octet(); + uint c2 = get_octet(); + m_bit_buf |= (c1 << 8) | c2; + } + else + { + m_bit_buf |= ((uint)m_pIn_buf_ofs[0] << 8) | m_pIn_buf_ofs[1]; + m_in_buf_left -= 2; + m_pIn_buf_ofs += 2; + } + + m_bit_buf <<= -m_bits_left; + + m_bits_left += 16; + + assert(m_bits_left >= 0); + } + else + m_bit_buf <<= num_bits; + + return i; + } + + // Decodes a Huffman encoded symbol. + inline int jpeg_decoder::huff_decode(huff_tables* pH) + { + if (!pH) + stop_decoding(JPGD_DECODE_ERROR); + + int symbol; + // Check first 8-bits: do we have a complete symbol? + if ((symbol = pH->look_up[m_bit_buf >> 24]) < 0) + { + // Decode more bits, use a tree traversal to find symbol. + int ofs = 23; + do + { + unsigned int idx = -(int)(symbol + ((m_bit_buf >> ofs) & 1)); + + // This should never happen, but to be safe I'm turning these asserts into a run-time check. + if ((idx >= JPGD_HUFF_TREE_MAX_LENGTH) || (ofs < 0)) + stop_decoding(JPGD_DECODE_ERROR); + + symbol = pH->tree[idx]; + ofs--; + } while (symbol < 0); + + get_bits_no_markers(8 + (23 - ofs)); + } + else + { + assert(symbol < JPGD_HUFF_CODE_SIZE_MAX_LENGTH); + get_bits_no_markers(pH->code_size[symbol]); + } + + return symbol; + } + + // Decodes a Huffman encoded symbol. + inline int jpeg_decoder::huff_decode(huff_tables* pH, int& extra_bits) + { + int symbol; + + if (!pH) + stop_decoding(JPGD_DECODE_ERROR); + + // Check first 8-bits: do we have a complete symbol? + if ((symbol = pH->look_up2[m_bit_buf >> 24]) < 0) + { + // Use a tree traversal to find symbol. + int ofs = 23; + do + { + unsigned int idx = -(int)(symbol + ((m_bit_buf >> ofs) & 1)); + + // This should never happen, but to be safe I'm turning these asserts into a run-time check. + if ((idx >= JPGD_HUFF_TREE_MAX_LENGTH) || (ofs < 0)) + stop_decoding(JPGD_DECODE_ERROR); + + symbol = pH->tree[idx]; + ofs--; + } while (symbol < 0); + + get_bits_no_markers(8 + (23 - ofs)); + + extra_bits = get_bits_no_markers(symbol & 0xF); + } + else + { + if (symbol & 0x8000) + { + //get_bits_no_markers((symbol >> 8) & 31); + assert(((symbol >> 8) & 31) <= 15); + get_bits_no_markers((symbol >> 8) & 15); + extra_bits = symbol >> 16; + } + else + { + int code_size = (symbol >> 8) & 31; + int num_extra_bits = symbol & 0xF; + int bits = code_size + num_extra_bits; + + if (bits <= 16) + extra_bits = get_bits_no_markers(bits) & ((1 << num_extra_bits) - 1); + else + { + get_bits_no_markers(code_size); + extra_bits = get_bits_no_markers(num_extra_bits); + } + } + + symbol &= 0xFF; + } + + return symbol; + } + + // Tables and macro used to fully decode the DPCM differences. + static const int s_extend_test[16] = { 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 }; + static const int s_extend_offset[16] = { 0, -1, -3, -7, -15, -31, -63, -127, -255, -511, -1023, -2047, -4095, -8191, -16383, -32767 }; + //static const int s_extend_mask[] = { 0, (1 << 0), (1 << 1), (1 << 2), (1 << 3), (1 << 4), (1 << 5), (1 << 6), (1 << 7), (1 << 8), (1 << 9), (1 << 10), (1 << 11), (1 << 12), (1 << 13), (1 << 14), (1 << 15), (1 << 16) }; + +#define JPGD_HUFF_EXTEND(x, s) (((x) < s_extend_test[s & 15]) ? ((x) + s_extend_offset[s & 15]) : (x)) + + // Unconditionally frees all allocated m_blocks. + void jpeg_decoder::free_all_blocks() + { + m_pStream = nullptr; + for (mem_block* b = m_pMem_blocks; b; ) + { + mem_block* n = b->m_pNext; + jpgd_free(b); + b = n; + } + m_pMem_blocks = nullptr; + } + + // This method handles all errors. It will never return. + // It could easily be changed to use C++ exceptions. + JPGD_NORETURN void jpeg_decoder::stop_decoding(jpgd_status status) + { + m_error_code = status; + free_all_blocks(); + longjmp(m_jmp_state, status); + } + + void* jpeg_decoder::alloc(size_t nSize, bool zero) + { + nSize = (JPGD_MAX(nSize, 1) + 3) & ~3; + char* rv = nullptr; + for (mem_block* b = m_pMem_blocks; b; b = b->m_pNext) + { + if ((b->m_used_count + nSize) <= b->m_size) + { + rv = b->m_data + b->m_used_count; + b->m_used_count += nSize; + break; + } + } + if (!rv) + { + int capacity = JPGD_MAX(32768 - 256, ((int)nSize + 2047) & ~2047); + mem_block* b = (mem_block*)jpgd_malloc(sizeof(mem_block) + capacity); + if (!b) + { + stop_decoding(JPGD_NOTENOUGHMEM); + } + + b->m_pNext = m_pMem_blocks; + m_pMem_blocks = b; + b->m_used_count = nSize; + b->m_size = capacity; + rv = b->m_data; + } + if (zero) memset(rv, 0, nSize); + return rv; + } + + void jpeg_decoder::word_clear(void* p, uint16 c, uint n) + { + uint8* pD = (uint8*)p; + const uint8 l = c & 0xFF, h = (c >> 8) & 0xFF; + while (n) + { + pD[0] = l; + pD[1] = h; + pD += 2; + n--; + } + } + + // Refill the input buffer. + // This method will sit in a loop until (A) the buffer is full or (B) + // the stream's read() method reports and end of file condition. + void jpeg_decoder::prep_in_buffer() + { + m_in_buf_left = 0; + m_pIn_buf_ofs = m_in_buf; + + if (m_eof_flag) + return; + + do + { + int bytes_read = m_pStream->read(m_in_buf + m_in_buf_left, JPGD_IN_BUF_SIZE - m_in_buf_left, &m_eof_flag); + if (bytes_read == -1) + stop_decoding(JPGD_STREAM_READ); + + m_in_buf_left += bytes_read; + } while ((m_in_buf_left < JPGD_IN_BUF_SIZE) && (!m_eof_flag)); + + m_total_bytes_read += m_in_buf_left; + + // Pad the end of the block with M_EOI (prevents the decompressor from going off the rails if the stream is invalid). + // (This dates way back to when this decompressor was written in C/asm, and the all-asm Huffman decoder did some fancy things to increase perf.) + word_clear(m_pIn_buf_ofs + m_in_buf_left, 0xD9FF, 64); + } + + // Read a Huffman code table. + void jpeg_decoder::read_dht_marker() + { + int i, index, count; + uint8 huff_num[17]; + uint8 huff_val[256]; + + uint num_left = get_bits(16); + + if (num_left < 2) + stop_decoding(JPGD_BAD_DHT_MARKER); + + num_left -= 2; + + while (num_left) + { + index = get_bits(8); + + huff_num[0] = 0; + + count = 0; + + for (i = 1; i <= 16; i++) + { + huff_num[i] = static_cast(get_bits(8)); + count += huff_num[i]; + } + + if (count > 255) + stop_decoding(JPGD_BAD_DHT_COUNTS); + + bool symbol_present[256]; + memset(symbol_present, 0, sizeof(symbol_present)); + + for (i = 0; i < count; i++) + { + const int s = get_bits(8); + + // Check for obviously bogus tables. + if (symbol_present[s]) + stop_decoding(JPGD_BAD_DHT_COUNTS); + + huff_val[i] = static_cast(s); + symbol_present[s] = true; + } + + i = 1 + 16 + count; + + if (num_left < (uint)i) + stop_decoding(JPGD_BAD_DHT_MARKER); + + num_left -= i; + + if ((index & 0x10) > 0x10) + stop_decoding(JPGD_BAD_DHT_INDEX); + + index = (index & 0x0F) + ((index & 0x10) >> 4) * (JPGD_MAX_HUFF_TABLES >> 1); + + if (index >= JPGD_MAX_HUFF_TABLES) + stop_decoding(JPGD_BAD_DHT_INDEX); + + if (!m_huff_num[index]) + m_huff_num[index] = (uint8*)alloc(17); + + if (!m_huff_val[index]) + m_huff_val[index] = (uint8*)alloc(256); + + m_huff_ac[index] = (index & 0x10) != 0; + memcpy(m_huff_num[index], huff_num, 17); + memcpy(m_huff_val[index], huff_val, 256); + } + } + + // Read a quantization table. + void jpeg_decoder::read_dqt_marker() + { + int n, i, prec; + uint num_left; + uint temp; + + num_left = get_bits(16); + + if (num_left < 2) + stop_decoding(JPGD_BAD_DQT_MARKER); + + num_left -= 2; + + while (num_left) + { + n = get_bits(8); + prec = n >> 4; + n &= 0x0F; + + if (n >= JPGD_MAX_QUANT_TABLES) + stop_decoding(JPGD_BAD_DQT_TABLE); + + if (!m_quant[n]) + m_quant[n] = (jpgd_quant_t*)alloc(64 * sizeof(jpgd_quant_t)); + + // read quantization entries, in zag order + for (i = 0; i < 64; i++) + { + temp = get_bits(8); + + if (prec) + temp = (temp << 8) + get_bits(8); + + m_quant[n][i] = static_cast(temp); + } + + i = 64 + 1; + + if (prec) + i += 64; + + if (num_left < (uint)i) + stop_decoding(JPGD_BAD_DQT_LENGTH); + + num_left -= i; + } + } + + // Read the start of frame (SOF) marker. + void jpeg_decoder::read_sof_marker() + { + int i; + uint num_left; + + num_left = get_bits(16); + + /* precision: sorry, only 8-bit precision is supported */ + if (get_bits(8) != 8) + stop_decoding(JPGD_BAD_PRECISION); + + m_image_y_size = get_bits(16); + + if ((m_image_y_size < 1) || (m_image_y_size > JPGD_MAX_HEIGHT)) + stop_decoding(JPGD_BAD_HEIGHT); + + m_image_x_size = get_bits(16); + + if ((m_image_x_size < 1) || (m_image_x_size > JPGD_MAX_WIDTH)) + stop_decoding(JPGD_BAD_WIDTH); + + m_comps_in_frame = get_bits(8); + + if (m_comps_in_frame > JPGD_MAX_COMPONENTS) + stop_decoding(JPGD_TOO_MANY_COMPONENTS); + + if (num_left != (uint)(m_comps_in_frame * 3 + 8)) + stop_decoding(JPGD_BAD_SOF_LENGTH); + + for (i = 0; i < m_comps_in_frame; i++) + { + m_comp_ident[i] = get_bits(8); + m_comp_h_samp[i] = get_bits(4); + m_comp_v_samp[i] = get_bits(4); + + if (!m_comp_h_samp[i] || !m_comp_v_samp[i] || (m_comp_h_samp[i] > 2) || (m_comp_v_samp[i] > 2)) + stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS); + + m_comp_quant[i] = get_bits(8); + if (m_comp_quant[i] >= JPGD_MAX_QUANT_TABLES) + stop_decoding(JPGD_DECODE_ERROR); + } + } + + // Used to skip unrecognized markers. + void jpeg_decoder::skip_variable_marker() + { + uint num_left; + + num_left = get_bits(16); + + if (num_left < 2) + stop_decoding(JPGD_BAD_VARIABLE_MARKER); + + num_left -= 2; + + while (num_left) + { + get_bits(8); + num_left--; + } + } + + // Read a define restart interval (DRI) marker. + void jpeg_decoder::read_dri_marker() + { + if (get_bits(16) != 4) + stop_decoding(JPGD_BAD_DRI_LENGTH); + + m_restart_interval = get_bits(16); + } + + // Read a start of scan (SOS) marker. + void jpeg_decoder::read_sos_marker() + { + uint num_left; + int i, ci, n, c, cc; + + num_left = get_bits(16); + + n = get_bits(8); + + m_comps_in_scan = n; + + num_left -= 3; + + if ((num_left != (uint)(n * 2 + 3)) || (n < 1) || (n > JPGD_MAX_COMPS_IN_SCAN)) + stop_decoding(JPGD_BAD_SOS_LENGTH); + + for (i = 0; i < n; i++) + { + cc = get_bits(8); + c = get_bits(8); + num_left -= 2; + + for (ci = 0; ci < m_comps_in_frame; ci++) + if (cc == m_comp_ident[ci]) + break; + + if (ci >= m_comps_in_frame) + stop_decoding(JPGD_BAD_SOS_COMP_ID); + + if (ci >= JPGD_MAX_COMPONENTS) + stop_decoding(JPGD_DECODE_ERROR); + + m_comp_list[i] = ci; + + m_comp_dc_tab[ci] = (c >> 4) & 15; + m_comp_ac_tab[ci] = (c & 15) + (JPGD_MAX_HUFF_TABLES >> 1); + + if (m_comp_dc_tab[ci] >= JPGD_MAX_HUFF_TABLES) + stop_decoding(JPGD_DECODE_ERROR); + + if (m_comp_ac_tab[ci] >= JPGD_MAX_HUFF_TABLES) + stop_decoding(JPGD_DECODE_ERROR); + } + + m_spectral_start = get_bits(8); + m_spectral_end = get_bits(8); + m_successive_high = get_bits(4); + m_successive_low = get_bits(4); + + if (!m_progressive_flag) + { + m_spectral_start = 0; + m_spectral_end = 63; + } + + num_left -= 3; + + /* read past whatever is num_left */ + while (num_left) + { + get_bits(8); + num_left--; + } + } + + // Finds the next marker. + int jpeg_decoder::next_marker() + { + uint c;// , bytes; + + //bytes = 0; + + do + { + do + { + //bytes++; + c = get_bits(8); + } while (c != 0xFF); + + do + { + c = get_bits(8); + } while (c == 0xFF); + + } while (c == 0); + + // If bytes > 0 here, there where extra bytes before the marker (not good). + + return c; + } + + // Process markers. Returns when an SOFx, SOI, EOI, or SOS marker is + // encountered. + int jpeg_decoder::process_markers() + { + int c; + + for (; ; ) + { + c = next_marker(); + + switch (c) + { + case M_SOF0: + case M_SOF1: + case M_SOF2: + case M_SOF3: + case M_SOF5: + case M_SOF6: + case M_SOF7: + // case M_JPG: + case M_SOF9: + case M_SOF10: + case M_SOF11: + case M_SOF13: + case M_SOF14: + case M_SOF15: + case M_SOI: + case M_EOI: + case M_SOS: + { + return c; + } + case M_DHT: + { + read_dht_marker(); + break; + } + // No arithmitic support - dumb patents! + case M_DAC: + { + stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT); + break; + } + case M_DQT: + { + read_dqt_marker(); + break; + } + case M_DRI: + { + read_dri_marker(); + break; + } + //case M_APP0: /* no need to read the JFIF marker */ + case M_JPG: + case M_RST0: /* no parameters */ + case M_RST1: + case M_RST2: + case M_RST3: + case M_RST4: + case M_RST5: + case M_RST6: + case M_RST7: + case M_TEM: + { + stop_decoding(JPGD_UNEXPECTED_MARKER); + break; + } + default: /* must be DNL, DHP, EXP, APPn, JPGn, COM, or RESn or APP0 */ + { + skip_variable_marker(); + break; + } + } + } + } + + // Finds the start of image (SOI) marker. + void jpeg_decoder::locate_soi_marker() + { + uint lastchar, thischar; + uint bytesleft; + + lastchar = get_bits(8); + + thischar = get_bits(8); + + /* ok if it's a normal JPEG file without a special header */ + + if ((lastchar == 0xFF) && (thischar == M_SOI)) + return; + + bytesleft = 4096; + + for (; ; ) + { + if (--bytesleft == 0) + stop_decoding(JPGD_NOT_JPEG); + + lastchar = thischar; + + thischar = get_bits(8); + + if (lastchar == 0xFF) + { + if (thischar == M_SOI) + break; + else if (thischar == M_EOI) // get_bits will keep returning M_EOI if we read past the end + stop_decoding(JPGD_NOT_JPEG); + } + } + + // Check the next character after marker: if it's not 0xFF, it can't be the start of the next marker, so the file is bad. + thischar = (m_bit_buf >> 24) & 0xFF; + + if (thischar != 0xFF) + stop_decoding(JPGD_NOT_JPEG); + } + + // Find a start of frame (SOF) marker. + void jpeg_decoder::locate_sof_marker() + { + locate_soi_marker(); + + int c = process_markers(); + + switch (c) + { + case M_SOF2: + { + m_progressive_flag = JPGD_TRUE; + read_sof_marker(); + break; + } + case M_SOF0: /* baseline DCT */ + case M_SOF1: /* extended sequential DCT */ + { + read_sof_marker(); + break; + } + case M_SOF9: /* Arithmitic coding */ + { + stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT); + break; + } + default: + { + stop_decoding(JPGD_UNSUPPORTED_MARKER); + break; + } + } + } + + // Find a start of scan (SOS) marker. + int jpeg_decoder::locate_sos_marker() + { + int c; + + c = process_markers(); + + if (c == M_EOI) + return JPGD_FALSE; + else if (c != M_SOS) + stop_decoding(JPGD_UNEXPECTED_MARKER); + + read_sos_marker(); + + return JPGD_TRUE; + } + + // Reset everything to default/uninitialized state. + void jpeg_decoder::init(jpeg_decoder_stream* pStream, uint32_t flags) + { + m_flags = flags; + m_pMem_blocks = nullptr; + m_error_code = JPGD_SUCCESS; + m_ready_flag = false; + m_image_x_size = m_image_y_size = 0; + m_pStream = pStream; + m_progressive_flag = JPGD_FALSE; + + memset(m_huff_ac, 0, sizeof(m_huff_ac)); + memset(m_huff_num, 0, sizeof(m_huff_num)); + memset(m_huff_val, 0, sizeof(m_huff_val)); + memset(m_quant, 0, sizeof(m_quant)); + + m_scan_type = 0; + m_comps_in_frame = 0; + + memset(m_comp_h_samp, 0, sizeof(m_comp_h_samp)); + memset(m_comp_v_samp, 0, sizeof(m_comp_v_samp)); + memset(m_comp_quant, 0, sizeof(m_comp_quant)); + memset(m_comp_ident, 0, sizeof(m_comp_ident)); + memset(m_comp_h_blocks, 0, sizeof(m_comp_h_blocks)); + memset(m_comp_v_blocks, 0, sizeof(m_comp_v_blocks)); + + m_comps_in_scan = 0; + memset(m_comp_list, 0, sizeof(m_comp_list)); + memset(m_comp_dc_tab, 0, sizeof(m_comp_dc_tab)); + memset(m_comp_ac_tab, 0, sizeof(m_comp_ac_tab)); + + m_spectral_start = 0; + m_spectral_end = 0; + m_successive_low = 0; + m_successive_high = 0; + m_max_mcu_x_size = 0; + m_max_mcu_y_size = 0; + m_blocks_per_mcu = 0; + m_max_blocks_per_row = 0; + m_mcus_per_row = 0; + m_mcus_per_col = 0; + + memset(m_mcu_org, 0, sizeof(m_mcu_org)); + + m_total_lines_left = 0; + m_mcu_lines_left = 0; + m_num_buffered_scanlines = 0; + m_real_dest_bytes_per_scan_line = 0; + m_dest_bytes_per_scan_line = 0; + m_dest_bytes_per_pixel = 0; + + memset(m_pHuff_tabs, 0, sizeof(m_pHuff_tabs)); + + memset(m_dc_coeffs, 0, sizeof(m_dc_coeffs)); + memset(m_ac_coeffs, 0, sizeof(m_ac_coeffs)); + memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu)); + + m_eob_run = 0; + + m_pIn_buf_ofs = m_in_buf; + m_in_buf_left = 0; + m_eof_flag = false; + m_tem_flag = 0; + + memset(m_in_buf_pad_start, 0, sizeof(m_in_buf_pad_start)); + memset(m_in_buf, 0, sizeof(m_in_buf)); + memset(m_in_buf_pad_end, 0, sizeof(m_in_buf_pad_end)); + + m_restart_interval = 0; + m_restarts_left = 0; + m_next_restart_num = 0; + + m_max_mcus_per_row = 0; + m_max_blocks_per_mcu = 0; + m_max_mcus_per_col = 0; + + memset(m_last_dc_val, 0, sizeof(m_last_dc_val)); + m_pMCU_coefficients = nullptr; + m_pSample_buf = nullptr; + m_pSample_buf_prev = nullptr; + m_sample_buf_prev_valid = false; + + m_total_bytes_read = 0; + + m_pScan_line_0 = nullptr; + m_pScan_line_1 = nullptr; + + // Ready the input buffer. + prep_in_buffer(); + + // Prime the bit buffer. + m_bits_left = 16; + m_bit_buf = 0; + + get_bits(16); + get_bits(16); + + for (int i = 0; i < JPGD_MAX_BLOCKS_PER_MCU; i++) + m_mcu_block_max_zag[i] = 64; + } + +#define SCALEBITS 16 +#define ONE_HALF ((int) 1 << (SCALEBITS-1)) +#define FIX(x) ((int) ((x) * (1L<> SCALEBITS; + m_cbb[i] = (FIX(1.77200f) * k + ONE_HALF) >> SCALEBITS; + m_crg[i] = (-FIX(0.71414f)) * k; + m_cbg[i] = (-FIX(0.34414f)) * k + ONE_HALF; + } + } + + // This method throws back into the stream any bytes that where read + // into the bit buffer during initial marker scanning. + void jpeg_decoder::fix_in_buffer() + { + // In case any 0xFF's where pulled into the buffer during marker scanning. + assert((m_bits_left & 7) == 0); + + if (m_bits_left == 16) + stuff_char((uint8)(m_bit_buf & 0xFF)); + + if (m_bits_left >= 8) + stuff_char((uint8)((m_bit_buf >> 8) & 0xFF)); + + stuff_char((uint8)((m_bit_buf >> 16) & 0xFF)); + stuff_char((uint8)((m_bit_buf >> 24) & 0xFF)); + + m_bits_left = 16; + get_bits_no_markers(16); + get_bits_no_markers(16); + } + + void jpeg_decoder::transform_mcu(int mcu_row) + { + jpgd_block_t* pSrc_ptr = m_pMCU_coefficients; + if (mcu_row * m_blocks_per_mcu >= m_max_blocks_per_row) + stop_decoding(JPGD_DECODE_ERROR); + + uint8* pDst_ptr = m_pSample_buf + mcu_row * m_blocks_per_mcu * 64; + + for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++) + { + idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag[mcu_block]); + pSrc_ptr += 64; + pDst_ptr += 64; + } + } + + // Loads and dequantizes the next row of (already decoded) coefficients. + // Progressive images only. + void jpeg_decoder::load_next_row() + { + int i; + jpgd_block_t* p; + jpgd_quant_t* q; + int mcu_row, mcu_block;// , row_block = 0; + int component_num, component_id; + int block_x_mcu[JPGD_MAX_COMPONENTS]; + + memset(block_x_mcu, 0, JPGD_MAX_COMPONENTS * sizeof(int)); + + for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++) + { + int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0; + + for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++) + { + component_id = m_mcu_org[mcu_block]; + if (m_comp_quant[component_id] >= JPGD_MAX_QUANT_TABLES) + stop_decoding(JPGD_DECODE_ERROR); + + q = m_quant[m_comp_quant[component_id]]; + + p = m_pMCU_coefficients + 64 * mcu_block; + + jpgd_block_t* pAC = coeff_buf_getp(m_ac_coeffs[component_id], block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs); + jpgd_block_t* pDC = coeff_buf_getp(m_dc_coeffs[component_id], block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs); + p[0] = pDC[0]; + memcpy(&p[1], &pAC[1], 63 * sizeof(jpgd_block_t)); + + for (i = 63; i > 0; i--) + if (p[g_ZAG[i]]) + break; + + m_mcu_block_max_zag[mcu_block] = i + 1; + + for (; i >= 0; i--) + if (p[g_ZAG[i]]) + p[g_ZAG[i]] = static_cast(p[g_ZAG[i]] * q[i]); + + //row_block++; + + if (m_comps_in_scan == 1) + block_x_mcu[component_id]++; + else + { + if (++block_x_mcu_ofs == m_comp_h_samp[component_id]) + { + block_x_mcu_ofs = 0; + + if (++block_y_mcu_ofs == m_comp_v_samp[component_id]) + { + block_y_mcu_ofs = 0; + + block_x_mcu[component_id] += m_comp_h_samp[component_id]; + } + } + } + } + + transform_mcu(mcu_row); + } + + if (m_comps_in_scan == 1) + m_block_y_mcu[m_comp_list[0]]++; + else + { + for (component_num = 0; component_num < m_comps_in_scan; component_num++) + { + component_id = m_comp_list[component_num]; + + m_block_y_mcu[component_id] += m_comp_v_samp[component_id]; + } + } + } + + // Restart interval processing. + void jpeg_decoder::process_restart() + { + int i; + int c = 0; + + // Align to a byte boundry + // FIXME: Is this really necessary? get_bits_no_markers() never reads in markers! + //get_bits_no_markers(m_bits_left & 7); + + // Let's scan a little bit to find the marker, but not _too_ far. + // 1536 is a "fudge factor" that determines how much to scan. + for (i = 1536; i > 0; i--) + if (get_char() == 0xFF) + break; + + if (i == 0) + stop_decoding(JPGD_BAD_RESTART_MARKER); + + for (; i > 0; i--) + if ((c = get_char()) != 0xFF) + break; + + if (i == 0) + stop_decoding(JPGD_BAD_RESTART_MARKER); + + // Is it the expected marker? If not, something bad happened. + if (c != (m_next_restart_num + M_RST0)) + stop_decoding(JPGD_BAD_RESTART_MARKER); + + // Reset each component's DC prediction values. + memset(&m_last_dc_val, 0, m_comps_in_frame * sizeof(uint)); + + m_eob_run = 0; + + m_restarts_left = m_restart_interval; + + m_next_restart_num = (m_next_restart_num + 1) & 7; + + // Get the bit buffer going again... + + m_bits_left = 16; + get_bits_no_markers(16); + get_bits_no_markers(16); + } + + static inline int dequantize_ac(int c, int q) { c *= q; return c; } + + // Decodes and dequantizes the next row of coefficients. + void jpeg_decoder::decode_next_row() + { + //int row_block = 0; + + for (int mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++) + { + if ((m_restart_interval) && (m_restarts_left == 0)) + process_restart(); + + jpgd_block_t* p = m_pMCU_coefficients; + for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++, p += 64) + { + int component_id = m_mcu_org[mcu_block]; + if (m_comp_quant[component_id] >= JPGD_MAX_QUANT_TABLES) + stop_decoding(JPGD_DECODE_ERROR); + + jpgd_quant_t* q = m_quant[m_comp_quant[component_id]]; + + int r, s; + s = huff_decode(m_pHuff_tabs[m_comp_dc_tab[component_id]], r); + if (s >= 16) + stop_decoding(JPGD_DECODE_ERROR); + + s = JPGD_HUFF_EXTEND(r, s); + + m_last_dc_val[component_id] = (s += m_last_dc_val[component_id]); + + p[0] = static_cast(s * q[0]); + + int prev_num_set = m_mcu_block_max_zag[mcu_block]; + + huff_tables* pH = m_pHuff_tabs[m_comp_ac_tab[component_id]]; + + int k; + for (k = 1; k < 64; k++) + { + int extra_bits; + s = huff_decode(pH, extra_bits); + + r = s >> 4; + s &= 15; + + if (s) + { + if (r) + { + if ((k + r) > 63) + stop_decoding(JPGD_DECODE_ERROR); + + if (k < prev_num_set) + { + int n = JPGD_MIN(r, prev_num_set - k); + int kt = k; + while (n--) + p[g_ZAG[kt++]] = 0; + } + + k += r; + } + + s = JPGD_HUFF_EXTEND(extra_bits, s); + + if (k >= 64) + stop_decoding(JPGD_DECODE_ERROR); + + p[g_ZAG[k]] = static_cast(dequantize_ac(s, q[k])); //s * q[k]; + } + else + { + if (r == 15) + { + if ((k + 16) > 64) + stop_decoding(JPGD_DECODE_ERROR); + + if (k < prev_num_set) + { + int n = JPGD_MIN(16, prev_num_set - k); + int kt = k; + while (n--) + { + if (kt > 63) + stop_decoding(JPGD_DECODE_ERROR); + p[g_ZAG[kt++]] = 0; + } + } + + k += 16 - 1; // - 1 because the loop counter is k + + if (p[g_ZAG[k & 63]] != 0) + stop_decoding(JPGD_DECODE_ERROR); + } + else + break; + } + } + + if (k < prev_num_set) + { + int kt = k; + while (kt < prev_num_set) + p[g_ZAG[kt++]] = 0; + } + + m_mcu_block_max_zag[mcu_block] = k; + + //row_block++; + } + + transform_mcu(mcu_row); + + m_restarts_left--; + } + } + + // YCbCr H1V1 (1x1:1:1, 3 m_blocks per MCU) to RGB + void jpeg_decoder::H1V1Convert() + { + int row = m_max_mcu_y_size - m_mcu_lines_left; + uint8* d = m_pScan_line_0; + uint8* s = m_pSample_buf + row * 8; + + for (int i = m_max_mcus_per_row; i > 0; i--) + { + for (int j = 0; j < 8; j++) + { + int y = s[j]; + int cb = s[64 + j]; + int cr = s[128 + j]; + + d[0] = clamp(y + m_crr[cr]); + d[1] = clamp(y + ((m_crg[cr] + m_cbg[cb]) >> 16)); + d[2] = clamp(y + m_cbb[cb]); + d[3] = 255; + + d += 4; + } + + s += 64 * 3; + } + } + + // YCbCr H2V1 (2x1:1:1, 4 m_blocks per MCU) to RGB + void jpeg_decoder::H2V1Convert() + { + int row = m_max_mcu_y_size - m_mcu_lines_left; + uint8* d0 = m_pScan_line_0; + uint8* y = m_pSample_buf + row * 8; + uint8* c = m_pSample_buf + 2 * 64 + row * 8; + + for (int i = m_max_mcus_per_row; i > 0; i--) + { + for (int l = 0; l < 2; l++) + { + for (int j = 0; j < 4; j++) + { + int cb = c[0]; + int cr = c[64]; + + int rc = m_crr[cr]; + int gc = ((m_crg[cr] + m_cbg[cb]) >> 16); + int bc = m_cbb[cb]; + + int yy = y[j << 1]; + d0[0] = clamp(yy + rc); + d0[1] = clamp(yy + gc); + d0[2] = clamp(yy + bc); + d0[3] = 255; + + yy = y[(j << 1) + 1]; + d0[4] = clamp(yy + rc); + d0[5] = clamp(yy + gc); + d0[6] = clamp(yy + bc); + d0[7] = 255; + + d0 += 8; + + c++; + } + y += 64; + } + + y += 64 * 4 - 64 * 2; + c += 64 * 4 - 8; + } + } + + // YCbCr H2V1 (2x1:1:1, 4 m_blocks per MCU) to RGB + void jpeg_decoder::H2V1ConvertFiltered() + { + const uint BLOCKS_PER_MCU = 4; + int row = m_max_mcu_y_size - m_mcu_lines_left; + uint8* d0 = m_pScan_line_0; + + const int half_image_x_size = (m_image_x_size >> 1) - 1; + const int row_x8 = row * 8; + + for (int x = 0; x < m_image_x_size; x++) + { + int y = m_pSample_buf[check_sample_buf_ofs((x >> 4) * BLOCKS_PER_MCU * 64 + ((x & 8) ? 64 : 0) + (x & 7) + row_x8)]; + + int c_x0 = (x - 1) >> 1; + int c_x1 = JPGD_MIN(c_x0 + 1, half_image_x_size); + c_x0 = JPGD_MAX(c_x0, 0); + + int a = (c_x0 >> 3) * BLOCKS_PER_MCU * 64 + (c_x0 & 7) + row_x8 + 128; + int cb0 = m_pSample_buf[check_sample_buf_ofs(a)]; + int cr0 = m_pSample_buf[check_sample_buf_ofs(a + 64)]; + + int b = (c_x1 >> 3) * BLOCKS_PER_MCU * 64 + (c_x1 & 7) + row_x8 + 128; + int cb1 = m_pSample_buf[check_sample_buf_ofs(b)]; + int cr1 = m_pSample_buf[check_sample_buf_ofs(b + 64)]; + + int w0 = (x & 1) ? 3 : 1; + int w1 = (x & 1) ? 1 : 3; + + int cb = (cb0 * w0 + cb1 * w1 + 2) >> 2; + int cr = (cr0 * w0 + cr1 * w1 + 2) >> 2; + + int rc = m_crr[cr]; + int gc = ((m_crg[cr] + m_cbg[cb]) >> 16); + int bc = m_cbb[cb]; + + d0[0] = clamp(y + rc); + d0[1] = clamp(y + gc); + d0[2] = clamp(y + bc); + d0[3] = 255; + + d0 += 4; + } + } + + // YCbCr H2V1 (1x2:1:1, 4 m_blocks per MCU) to RGB + void jpeg_decoder::H1V2Convert() + { + int row = m_max_mcu_y_size - m_mcu_lines_left; + uint8* d0 = m_pScan_line_0; + uint8* d1 = m_pScan_line_1; + uint8* y; + uint8* c; + + if (row < 8) + y = m_pSample_buf + row * 8; + else + y = m_pSample_buf + 64 * 1 + (row & 7) * 8; + + c = m_pSample_buf + 64 * 2 + (row >> 1) * 8; + + for (int i = m_max_mcus_per_row; i > 0; i--) + { + for (int j = 0; j < 8; j++) + { + int cb = c[0 + j]; + int cr = c[64 + j]; + + int rc = m_crr[cr]; + int gc = ((m_crg[cr] + m_cbg[cb]) >> 16); + int bc = m_cbb[cb]; + + int yy = y[j]; + d0[0] = clamp(yy + rc); + d0[1] = clamp(yy + gc); + d0[2] = clamp(yy + bc); + d0[3] = 255; + + yy = y[8 + j]; + d1[0] = clamp(yy + rc); + d1[1] = clamp(yy + gc); + d1[2] = clamp(yy + bc); + d1[3] = 255; + + d0 += 4; + d1 += 4; + } + + y += 64 * 4; + c += 64 * 4; + } + } + + // YCbCr H2V1 (1x2:1:1, 4 m_blocks per MCU) to RGB + void jpeg_decoder::H1V2ConvertFiltered() + { + const uint BLOCKS_PER_MCU = 4; + int y = m_image_y_size - m_total_lines_left; + int row = y & 15; + + const int half_image_y_size = (m_image_y_size >> 1) - 1; + + uint8* d0 = m_pScan_line_0; + + const int w0 = (row & 1) ? 3 : 1; + const int w1 = (row & 1) ? 1 : 3; + + int c_y0 = (y - 1) >> 1; + int c_y1 = JPGD_MIN(c_y0 + 1, half_image_y_size); + + const uint8_t* p_YSamples = m_pSample_buf; + const uint8_t* p_C0Samples = m_pSample_buf; + if ((c_y0 >= 0) && (((row & 15) == 0) || ((row & 15) == 15)) && (m_total_lines_left > 1)) + { + assert(y > 0); + assert(m_sample_buf_prev_valid); + + if ((row & 15) == 15) + p_YSamples = m_pSample_buf_prev; + + p_C0Samples = m_pSample_buf_prev; + } + + const int y_sample_base_ofs = ((row & 8) ? 64 : 0) + (row & 7) * 8; + const int y0_base = (c_y0 & 7) * 8 + 128; + const int y1_base = (c_y1 & 7) * 8 + 128; + + for (int x = 0; x < m_image_x_size; x++) + { + const int base_ofs = (x >> 3) * BLOCKS_PER_MCU * 64 + (x & 7); + + int y_sample = p_YSamples[check_sample_buf_ofs(base_ofs + y_sample_base_ofs)]; + + int a = base_ofs + y0_base; + int cb0_sample = p_C0Samples[check_sample_buf_ofs(a)]; + int cr0_sample = p_C0Samples[check_sample_buf_ofs(a + 64)]; + + int b = base_ofs + y1_base; + int cb1_sample = m_pSample_buf[check_sample_buf_ofs(b)]; + int cr1_sample = m_pSample_buf[check_sample_buf_ofs(b + 64)]; + + int cb = (cb0_sample * w0 + cb1_sample * w1 + 2) >> 2; + int cr = (cr0_sample * w0 + cr1_sample * w1 + 2) >> 2; + + int rc = m_crr[cr]; + int gc = ((m_crg[cr] + m_cbg[cb]) >> 16); + int bc = m_cbb[cb]; + + d0[0] = clamp(y_sample + rc); + d0[1] = clamp(y_sample + gc); + d0[2] = clamp(y_sample + bc); + d0[3] = 255; + + d0 += 4; + } + } + + // YCbCr H2V2 (2x2:1:1, 6 m_blocks per MCU) to RGB + void jpeg_decoder::H2V2Convert() + { + int row = m_max_mcu_y_size - m_mcu_lines_left; + uint8* d0 = m_pScan_line_0; + uint8* d1 = m_pScan_line_1; + uint8* y; + uint8* c; + + if (row < 8) + y = m_pSample_buf + row * 8; + else + y = m_pSample_buf + 64 * 2 + (row & 7) * 8; + + c = m_pSample_buf + 64 * 4 + (row >> 1) * 8; + + for (int i = m_max_mcus_per_row; i > 0; i--) + { + for (int l = 0; l < 2; l++) + { + for (int j = 0; j < 8; j += 2) + { + int cb = c[0]; + int cr = c[64]; + + int rc = m_crr[cr]; + int gc = ((m_crg[cr] + m_cbg[cb]) >> 16); + int bc = m_cbb[cb]; + + int yy = y[j]; + d0[0] = clamp(yy + rc); + d0[1] = clamp(yy + gc); + d0[2] = clamp(yy + bc); + d0[3] = 255; + + yy = y[j + 1]; + d0[4] = clamp(yy + rc); + d0[5] = clamp(yy + gc); + d0[6] = clamp(yy + bc); + d0[7] = 255; + + yy = y[j + 8]; + d1[0] = clamp(yy + rc); + d1[1] = clamp(yy + gc); + d1[2] = clamp(yy + bc); + d1[3] = 255; + + yy = y[j + 8 + 1]; + d1[4] = clamp(yy + rc); + d1[5] = clamp(yy + gc); + d1[6] = clamp(yy + bc); + d1[7] = 255; + + d0 += 8; + d1 += 8; + + c++; + } + y += 64; + } + + y += 64 * 6 - 64 * 2; + c += 64 * 6 - 8; + } + } + + uint32_t jpeg_decoder::H2V2ConvertFiltered() + { + const uint BLOCKS_PER_MCU = 6; + int y = m_image_y_size - m_total_lines_left; + int row = y & 15; + + const int half_image_y_size = (m_image_y_size >> 1) - 1; + + uint8* d0 = m_pScan_line_0; + + int c_y0 = (y - 1) >> 1; + int c_y1 = JPGD_MIN(c_y0 + 1, half_image_y_size); + + const uint8_t* p_YSamples = m_pSample_buf; + const uint8_t* p_C0Samples = m_pSample_buf; + if ((c_y0 >= 0) && (((row & 15) == 0) || ((row & 15) == 15)) && (m_total_lines_left > 1)) + { + assert(y > 0); + assert(m_sample_buf_prev_valid); + + if ((row & 15) == 15) + p_YSamples = m_pSample_buf_prev; + + p_C0Samples = m_pSample_buf_prev; + } + + const int y_sample_base_ofs = ((row & 8) ? 128 : 0) + (row & 7) * 8; + const int y0_base = (c_y0 & 7) * 8 + 256; + const int y1_base = (c_y1 & 7) * 8 + 256; + + const int half_image_x_size = (m_image_x_size >> 1) - 1; + + static const uint8_t s_muls[2][2][4] = + { + { { 1, 3, 3, 9 }, { 3, 9, 1, 3 }, }, + { { 3, 1, 9, 3 }, { 9, 3, 3, 1 } } + }; + + if (((row & 15) >= 1) && ((row & 15) <= 14)) + { + assert((row & 1) == 1); + assert(((y + 1 - 1) >> 1) == c_y0); + + assert(p_YSamples == m_pSample_buf); + assert(p_C0Samples == m_pSample_buf); + + uint8* d1 = m_pScan_line_1; + const int y_sample_base_ofs1 = (((row + 1) & 8) ? 128 : 0) + ((row + 1) & 7) * 8; + + for (int x = 0; x < m_image_x_size; x++) + { + int k = (x >> 4) * BLOCKS_PER_MCU * 64 + ((x & 8) ? 64 : 0) + (x & 7); + int y_sample0 = p_YSamples[check_sample_buf_ofs(k + y_sample_base_ofs)]; + int y_sample1 = p_YSamples[check_sample_buf_ofs(k + y_sample_base_ofs1)]; + + int c_x0 = (x - 1) >> 1; + int c_x1 = JPGD_MIN(c_x0 + 1, half_image_x_size); + c_x0 = JPGD_MAX(c_x0, 0); + + int a = (c_x0 >> 3) * BLOCKS_PER_MCU * 64 + (c_x0 & 7); + int cb00_sample = p_C0Samples[check_sample_buf_ofs(a + y0_base)]; + int cr00_sample = p_C0Samples[check_sample_buf_ofs(a + y0_base + 64)]; + + int cb01_sample = m_pSample_buf[check_sample_buf_ofs(a + y1_base)]; + int cr01_sample = m_pSample_buf[check_sample_buf_ofs(a + y1_base + 64)]; + + int b = (c_x1 >> 3) * BLOCKS_PER_MCU * 64 + (c_x1 & 7); + int cb10_sample = p_C0Samples[check_sample_buf_ofs(b + y0_base)]; + int cr10_sample = p_C0Samples[check_sample_buf_ofs(b + y0_base + 64)]; + + int cb11_sample = m_pSample_buf[check_sample_buf_ofs(b + y1_base)]; + int cr11_sample = m_pSample_buf[check_sample_buf_ofs(b + y1_base + 64)]; + + { + const uint8_t* pMuls = &s_muls[row & 1][x & 1][0]; + int cb = (cb00_sample * pMuls[0] + cb01_sample * pMuls[1] + cb10_sample * pMuls[2] + cb11_sample * pMuls[3] + 8) >> 4; + int cr = (cr00_sample * pMuls[0] + cr01_sample * pMuls[1] + cr10_sample * pMuls[2] + cr11_sample * pMuls[3] + 8) >> 4; + + int rc = m_crr[cr]; + int gc = ((m_crg[cr] + m_cbg[cb]) >> 16); + int bc = m_cbb[cb]; + + d0[0] = clamp(y_sample0 + rc); + d0[1] = clamp(y_sample0 + gc); + d0[2] = clamp(y_sample0 + bc); + d0[3] = 255; + + d0 += 4; + } + + { + const uint8_t* pMuls = &s_muls[(row + 1) & 1][x & 1][0]; + int cb = (cb00_sample * pMuls[0] + cb01_sample * pMuls[1] + cb10_sample * pMuls[2] + cb11_sample * pMuls[3] + 8) >> 4; + int cr = (cr00_sample * pMuls[0] + cr01_sample * pMuls[1] + cr10_sample * pMuls[2] + cr11_sample * pMuls[3] + 8) >> 4; + + int rc = m_crr[cr]; + int gc = ((m_crg[cr] + m_cbg[cb]) >> 16); + int bc = m_cbb[cb]; + + d1[0] = clamp(y_sample1 + rc); + d1[1] = clamp(y_sample1 + gc); + d1[2] = clamp(y_sample1 + bc); + d1[3] = 255; + + d1 += 4; + } + + if (((x & 1) == 1) && (x < m_image_x_size - 1)) + { + const int nx = x + 1; + assert(c_x0 == (nx - 1) >> 1); + + k = (nx >> 4) * BLOCKS_PER_MCU * 64 + ((nx & 8) ? 64 : 0) + (nx & 7); + y_sample0 = p_YSamples[check_sample_buf_ofs(k + y_sample_base_ofs)]; + y_sample1 = p_YSamples[check_sample_buf_ofs(k + y_sample_base_ofs1)]; + + { + const uint8_t* pMuls = &s_muls[row & 1][nx & 1][0]; + int cb = (cb00_sample * pMuls[0] + cb01_sample * pMuls[1] + cb10_sample * pMuls[2] + cb11_sample * pMuls[3] + 8) >> 4; + int cr = (cr00_sample * pMuls[0] + cr01_sample * pMuls[1] + cr10_sample * pMuls[2] + cr11_sample * pMuls[3] + 8) >> 4; + + int rc = m_crr[cr]; + int gc = ((m_crg[cr] + m_cbg[cb]) >> 16); + int bc = m_cbb[cb]; + + d0[0] = clamp(y_sample0 + rc); + d0[1] = clamp(y_sample0 + gc); + d0[2] = clamp(y_sample0 + bc); + d0[3] = 255; + + d0 += 4; + } + + { + const uint8_t* pMuls = &s_muls[(row + 1) & 1][nx & 1][0]; + int cb = (cb00_sample * pMuls[0] + cb01_sample * pMuls[1] + cb10_sample * pMuls[2] + cb11_sample * pMuls[3] + 8) >> 4; + int cr = (cr00_sample * pMuls[0] + cr01_sample * pMuls[1] + cr10_sample * pMuls[2] + cr11_sample * pMuls[3] + 8) >> 4; + + int rc = m_crr[cr]; + int gc = ((m_crg[cr] + m_cbg[cb]) >> 16); + int bc = m_cbb[cb]; + + d1[0] = clamp(y_sample1 + rc); + d1[1] = clamp(y_sample1 + gc); + d1[2] = clamp(y_sample1 + bc); + d1[3] = 255; + + d1 += 4; + } + + ++x; + } + } + + return 2; + } + else + { + for (int x = 0; x < m_image_x_size; x++) + { + int y_sample = p_YSamples[check_sample_buf_ofs((x >> 4) * BLOCKS_PER_MCU * 64 + ((x & 8) ? 64 : 0) + (x & 7) + y_sample_base_ofs)]; + + int c_x0 = (x - 1) >> 1; + int c_x1 = JPGD_MIN(c_x0 + 1, half_image_x_size); + c_x0 = JPGD_MAX(c_x0, 0); + + int a = (c_x0 >> 3) * BLOCKS_PER_MCU * 64 + (c_x0 & 7); + int cb00_sample = p_C0Samples[check_sample_buf_ofs(a + y0_base)]; + int cr00_sample = p_C0Samples[check_sample_buf_ofs(a + y0_base + 64)]; + + int cb01_sample = m_pSample_buf[check_sample_buf_ofs(a + y1_base)]; + int cr01_sample = m_pSample_buf[check_sample_buf_ofs(a + y1_base + 64)]; + + int b = (c_x1 >> 3) * BLOCKS_PER_MCU * 64 + (c_x1 & 7); + int cb10_sample = p_C0Samples[check_sample_buf_ofs(b + y0_base)]; + int cr10_sample = p_C0Samples[check_sample_buf_ofs(b + y0_base + 64)]; + + int cb11_sample = m_pSample_buf[check_sample_buf_ofs(b + y1_base)]; + int cr11_sample = m_pSample_buf[check_sample_buf_ofs(b + y1_base + 64)]; + + const uint8_t* pMuls = &s_muls[row & 1][x & 1][0]; + int cb = (cb00_sample * pMuls[0] + cb01_sample * pMuls[1] + cb10_sample * pMuls[2] + cb11_sample * pMuls[3] + 8) >> 4; + int cr = (cr00_sample * pMuls[0] + cr01_sample * pMuls[1] + cr10_sample * pMuls[2] + cr11_sample * pMuls[3] + 8) >> 4; + + int rc = m_crr[cr]; + int gc = ((m_crg[cr] + m_cbg[cb]) >> 16); + int bc = m_cbb[cb]; + + d0[0] = clamp(y_sample + rc); + d0[1] = clamp(y_sample + gc); + d0[2] = clamp(y_sample + bc); + d0[3] = 255; + + d0 += 4; + } + + return 1; + } + } + + // Y (1 block per MCU) to 8-bit grayscale + void jpeg_decoder::gray_convert() + { + int row = m_max_mcu_y_size - m_mcu_lines_left; + uint8* d = m_pScan_line_0; + uint8* s = m_pSample_buf + row * 8; + + for (int i = m_max_mcus_per_row; i > 0; i--) + { + *(uint*)d = *(uint*)s; + *(uint*)(&d[4]) = *(uint*)(&s[4]); + + s += 64; + d += 8; + } + } + + // Find end of image (EOI) marker, so we can return to the user the exact size of the input stream. + void jpeg_decoder::find_eoi() + { + if (!m_progressive_flag) + { + // Attempt to read the EOI marker. + //get_bits_no_markers(m_bits_left & 7); + + // Prime the bit buffer + m_bits_left = 16; + get_bits(16); + get_bits(16); + + // The next marker _should_ be EOI + process_markers(); + } + + m_total_bytes_read -= m_in_buf_left; + } + + int jpeg_decoder::decode_next_mcu_row() + { + if (setjmp(m_jmp_state)) + return JPGD_FAILED; + + const bool chroma_y_filtering = (m_flags & cFlagLinearChromaFiltering) && ((m_scan_type == JPGD_YH2V2) || (m_scan_type == JPGD_YH1V2)) && (m_image_x_size >= 2) && (m_image_y_size >= 2); + if (chroma_y_filtering) + { + std::swap(m_pSample_buf, m_pSample_buf_prev); + + m_sample_buf_prev_valid = true; + } + + if (m_progressive_flag) + load_next_row(); + else + decode_next_row(); + + // Find the EOI marker if that was the last row. + if (m_total_lines_left <= m_max_mcu_y_size) + find_eoi(); + + m_mcu_lines_left = m_max_mcu_y_size; + return 0; + } + + int jpeg_decoder::decode(const void** pScan_line, uint* pScan_line_len) + { + if ((m_error_code) || (!m_ready_flag)) + return JPGD_FAILED; + + if (m_total_lines_left == 0) + return JPGD_DONE; + + const bool chroma_y_filtering = (m_flags & cFlagLinearChromaFiltering) && ((m_scan_type == JPGD_YH2V2) || (m_scan_type == JPGD_YH1V2)) && (m_image_x_size >= 2) && (m_image_y_size >= 2); + + bool get_another_mcu_row = false; + bool got_mcu_early = false; + if (chroma_y_filtering) + { + if (m_total_lines_left == m_image_y_size) + get_another_mcu_row = true; + else if ((m_mcu_lines_left == 1) && (m_total_lines_left > 1)) + { + get_another_mcu_row = true; + got_mcu_early = true; + } + } + else + { + get_another_mcu_row = (m_mcu_lines_left == 0); + } + + if (get_another_mcu_row) + { + int status = decode_next_mcu_row(); + if (status != 0) + return status; + } + + switch (m_scan_type) + { + case JPGD_YH2V2: + { + if ((m_flags & cFlagLinearChromaFiltering) && (m_image_x_size >= 2) && (m_image_y_size >= 2)) + { + if (m_num_buffered_scanlines == 1) + { + *pScan_line = m_pScan_line_1; + } + else if (m_num_buffered_scanlines == 0) + { + m_num_buffered_scanlines = H2V2ConvertFiltered(); + *pScan_line = m_pScan_line_0; + } + + m_num_buffered_scanlines--; + } + else + { + if ((m_mcu_lines_left & 1) == 0) + { + H2V2Convert(); + *pScan_line = m_pScan_line_0; + } + else + *pScan_line = m_pScan_line_1; + } + + break; + } + case JPGD_YH2V1: + { + if ((m_flags & cFlagLinearChromaFiltering) && (m_image_x_size >= 2) && (m_image_y_size >= 2)) + H2V1ConvertFiltered(); + else + H2V1Convert(); + *pScan_line = m_pScan_line_0; + break; + } + case JPGD_YH1V2: + { + if (chroma_y_filtering) + { + H1V2ConvertFiltered(); + *pScan_line = m_pScan_line_0; + } + else + { + if ((m_mcu_lines_left & 1) == 0) + { + H1V2Convert(); + *pScan_line = m_pScan_line_0; + } + else + *pScan_line = m_pScan_line_1; + } + + break; + } + case JPGD_YH1V1: + { + H1V1Convert(); + *pScan_line = m_pScan_line_0; + break; + } + case JPGD_GRAYSCALE: + { + gray_convert(); + *pScan_line = m_pScan_line_0; + + break; + } + } + + *pScan_line_len = m_real_dest_bytes_per_scan_line; + + if (!got_mcu_early) + { + m_mcu_lines_left--; + } + + m_total_lines_left--; + + return JPGD_SUCCESS; + } + + // Creates the tables needed for efficient Huffman decoding. + void jpeg_decoder::make_huff_table(int index, huff_tables* pH) + { + int p, i, l, si; + uint8 huffsize[258]; + uint huffcode[258]; + uint code; + uint subtree; + int code_size; + int lastp; + int nextfreeentry; + int currententry; + + pH->ac_table = m_huff_ac[index] != 0; + + p = 0; + + for (l = 1; l <= 16; l++) + { + for (i = 1; i <= m_huff_num[index][l]; i++) + { + if (p >= 257) + stop_decoding(JPGD_DECODE_ERROR); + huffsize[p++] = static_cast(l); + } + } + + assert(p < 258); + huffsize[p] = 0; + + lastp = p; + + code = 0; + si = huffsize[0]; + p = 0; + + while (huffsize[p]) + { + while (huffsize[p] == si) + { + if (p >= 257) + stop_decoding(JPGD_DECODE_ERROR); + huffcode[p++] = code; + code++; + } + + code <<= 1; + si++; + } + + memset(pH->look_up, 0, sizeof(pH->look_up)); + memset(pH->look_up2, 0, sizeof(pH->look_up2)); + memset(pH->tree, 0, sizeof(pH->tree)); + memset(pH->code_size, 0, sizeof(pH->code_size)); + + nextfreeentry = -1; + + p = 0; + + while (p < lastp) + { + i = m_huff_val[index][p]; + + code = huffcode[p]; + code_size = huffsize[p]; + + assert(i < JPGD_HUFF_CODE_SIZE_MAX_LENGTH); + pH->code_size[i] = static_cast(code_size); + + if (code_size <= 8) + { + code <<= (8 - code_size); + + for (l = 1 << (8 - code_size); l > 0; l--) + { + if (code >= 256) + stop_decoding(JPGD_DECODE_ERROR); + + pH->look_up[code] = i; + + bool has_extrabits = false; + int extra_bits = 0; + int num_extra_bits = i & 15; + + int bits_to_fetch = code_size; + if (num_extra_bits) + { + int total_codesize = code_size + num_extra_bits; + if (total_codesize <= 8) + { + has_extrabits = true; + extra_bits = ((1 << num_extra_bits) - 1) & (code >> (8 - total_codesize)); + + if (extra_bits > 0x7FFF) + stop_decoding(JPGD_DECODE_ERROR); + + bits_to_fetch += num_extra_bits; + } + } + + if (!has_extrabits) + pH->look_up2[code] = i | (bits_to_fetch << 8); + else + pH->look_up2[code] = i | 0x8000 | (extra_bits << 16) | (bits_to_fetch << 8); + + code++; + } + } + else + { + subtree = (code >> (code_size - 8)) & 0xFF; + + currententry = pH->look_up[subtree]; + + if (currententry == 0) + { + pH->look_up[subtree] = currententry = nextfreeentry; + pH->look_up2[subtree] = currententry = nextfreeentry; + + nextfreeentry -= 2; + } + + code <<= (16 - (code_size - 8)); + + for (l = code_size; l > 9; l--) + { + if ((code & 0x8000) == 0) + currententry--; + + unsigned int idx = -currententry - 1; + + if (idx >= JPGD_HUFF_TREE_MAX_LENGTH) + stop_decoding(JPGD_DECODE_ERROR); + + if (pH->tree[idx] == 0) + { + pH->tree[idx] = nextfreeentry; + + currententry = nextfreeentry; + + nextfreeentry -= 2; + } + else + { + currententry = pH->tree[idx]; + } + + code <<= 1; + } + + if ((code & 0x8000) == 0) + currententry--; + + if ((-currententry - 1) >= JPGD_HUFF_TREE_MAX_LENGTH) + stop_decoding(JPGD_DECODE_ERROR); + + pH->tree[-currententry - 1] = i; + } + + p++; + } + } + + // Verifies the quantization tables needed for this scan are available. + void jpeg_decoder::check_quant_tables() + { + for (int i = 0; i < m_comps_in_scan; i++) + if (m_quant[m_comp_quant[m_comp_list[i]]] == nullptr) + stop_decoding(JPGD_UNDEFINED_QUANT_TABLE); + } + + // Verifies that all the Huffman tables needed for this scan are available. + void jpeg_decoder::check_huff_tables() + { + for (int i = 0; i < m_comps_in_scan; i++) + { + if ((m_spectral_start == 0) && (m_huff_num[m_comp_dc_tab[m_comp_list[i]]] == nullptr)) + stop_decoding(JPGD_UNDEFINED_HUFF_TABLE); + + if ((m_spectral_end > 0) && (m_huff_num[m_comp_ac_tab[m_comp_list[i]]] == nullptr)) + stop_decoding(JPGD_UNDEFINED_HUFF_TABLE); + } + + for (int i = 0; i < JPGD_MAX_HUFF_TABLES; i++) + if (m_huff_num[i]) + { + if (!m_pHuff_tabs[i]) + m_pHuff_tabs[i] = (huff_tables*)alloc(sizeof(huff_tables)); + + make_huff_table(i, m_pHuff_tabs[i]); + } + } + + // Determines the component order inside each MCU. + // Also calcs how many MCU's are on each row, etc. + bool jpeg_decoder::calc_mcu_block_order() + { + int component_num, component_id; + int max_h_samp = 0, max_v_samp = 0; + + for (component_id = 0; component_id < m_comps_in_frame; component_id++) + { + if (m_comp_h_samp[component_id] > max_h_samp) + max_h_samp = m_comp_h_samp[component_id]; + + if (m_comp_v_samp[component_id] > max_v_samp) + max_v_samp = m_comp_v_samp[component_id]; + } + + for (component_id = 0; component_id < m_comps_in_frame; component_id++) + { + m_comp_h_blocks[component_id] = ((((m_image_x_size * m_comp_h_samp[component_id]) + (max_h_samp - 1)) / max_h_samp) + 7) / 8; + m_comp_v_blocks[component_id] = ((((m_image_y_size * m_comp_v_samp[component_id]) + (max_v_samp - 1)) / max_v_samp) + 7) / 8; + } + + if (m_comps_in_scan == 1) + { + m_mcus_per_row = m_comp_h_blocks[m_comp_list[0]]; + m_mcus_per_col = m_comp_v_blocks[m_comp_list[0]]; + } + else + { + m_mcus_per_row = (((m_image_x_size + 7) / 8) + (max_h_samp - 1)) / max_h_samp; + m_mcus_per_col = (((m_image_y_size + 7) / 8) + (max_v_samp - 1)) / max_v_samp; + } + + if (m_comps_in_scan == 1) + { + m_mcu_org[0] = m_comp_list[0]; + + m_blocks_per_mcu = 1; + } + else + { + m_blocks_per_mcu = 0; + + for (component_num = 0; component_num < m_comps_in_scan; component_num++) + { + int num_blocks; + + component_id = m_comp_list[component_num]; + + num_blocks = m_comp_h_samp[component_id] * m_comp_v_samp[component_id]; + + while (num_blocks--) + m_mcu_org[m_blocks_per_mcu++] = component_id; + } + } + + if (m_blocks_per_mcu > m_max_blocks_per_mcu) + return false; + + for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++) + { + int comp_id = m_mcu_org[mcu_block]; + if (comp_id >= JPGD_MAX_QUANT_TABLES) + return false; + } + + return true; + } + + // Starts a new scan. + int jpeg_decoder::init_scan() + { + if (!locate_sos_marker()) + return JPGD_FALSE; + + if (!calc_mcu_block_order()) + return JPGD_FALSE; + + check_huff_tables(); + + check_quant_tables(); + + memset(m_last_dc_val, 0, m_comps_in_frame * sizeof(uint)); + + m_eob_run = 0; + + if (m_restart_interval) + { + m_restarts_left = m_restart_interval; + m_next_restart_num = 0; + } + + fix_in_buffer(); + + return JPGD_TRUE; + } + + // Starts a frame. Determines if the number of components or sampling factors + // are supported. + void jpeg_decoder::init_frame() + { + int i; + + if (m_comps_in_frame == 1) + { + if ((m_comp_h_samp[0] != 1) || (m_comp_v_samp[0] != 1)) + stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS); + + m_scan_type = JPGD_GRAYSCALE; + m_max_blocks_per_mcu = 1; + m_max_mcu_x_size = 8; + m_max_mcu_y_size = 8; + } + else if (m_comps_in_frame == 3) + { + if (((m_comp_h_samp[1] != 1) || (m_comp_v_samp[1] != 1)) || + ((m_comp_h_samp[2] != 1) || (m_comp_v_samp[2] != 1))) + stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS); + + if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 1)) + { + m_scan_type = JPGD_YH1V1; + + m_max_blocks_per_mcu = 3; + m_max_mcu_x_size = 8; + m_max_mcu_y_size = 8; + } + else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 1)) + { + m_scan_type = JPGD_YH2V1; + m_max_blocks_per_mcu = 4; + m_max_mcu_x_size = 16; + m_max_mcu_y_size = 8; + } + else if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 2)) + { + m_scan_type = JPGD_YH1V2; + m_max_blocks_per_mcu = 4; + m_max_mcu_x_size = 8; + m_max_mcu_y_size = 16; + } + else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 2)) + { + m_scan_type = JPGD_YH2V2; + m_max_blocks_per_mcu = 6; + m_max_mcu_x_size = 16; + m_max_mcu_y_size = 16; + } + else + stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS); + } + else + stop_decoding(JPGD_UNSUPPORTED_COLORSPACE); + + m_max_mcus_per_row = (m_image_x_size + (m_max_mcu_x_size - 1)) / m_max_mcu_x_size; + m_max_mcus_per_col = (m_image_y_size + (m_max_mcu_y_size - 1)) / m_max_mcu_y_size; + + // These values are for the *destination* pixels: after conversion. + if (m_scan_type == JPGD_GRAYSCALE) + m_dest_bytes_per_pixel = 1; + else + m_dest_bytes_per_pixel = 4; + + m_dest_bytes_per_scan_line = ((m_image_x_size + 15) & 0xFFF0) * m_dest_bytes_per_pixel; + + m_real_dest_bytes_per_scan_line = (m_image_x_size * m_dest_bytes_per_pixel); + + // Initialize two scan line buffers. + m_pScan_line_0 = (uint8*)alloc(m_dest_bytes_per_scan_line, true); + if ((m_scan_type == JPGD_YH1V2) || (m_scan_type == JPGD_YH2V2)) + m_pScan_line_1 = (uint8*)alloc(m_dest_bytes_per_scan_line, true); + + m_max_blocks_per_row = m_max_mcus_per_row * m_max_blocks_per_mcu; + + // Should never happen + if (m_max_blocks_per_row > JPGD_MAX_BLOCKS_PER_ROW) + stop_decoding(JPGD_DECODE_ERROR); + + // Allocate the coefficient buffer, enough for one MCU + m_pMCU_coefficients = (jpgd_block_t*)alloc(m_max_blocks_per_mcu * 64 * sizeof(jpgd_block_t)); + + for (i = 0; i < m_max_blocks_per_mcu; i++) + m_mcu_block_max_zag[i] = 64; + + m_pSample_buf = (uint8*)alloc(m_max_blocks_per_row * 64); + m_pSample_buf_prev = (uint8*)alloc(m_max_blocks_per_row * 64); + + m_total_lines_left = m_image_y_size; + + m_mcu_lines_left = 0; + + create_look_ups(); + } + + // The coeff_buf series of methods originally stored the coefficients + // into a "virtual" file which was located in EMS, XMS, or a disk file. A cache + // was used to make this process more efficient. Now, we can store the entire + // thing in RAM. + jpeg_decoder::coeff_buf* jpeg_decoder::coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y) + { + coeff_buf* cb = (coeff_buf*)alloc(sizeof(coeff_buf)); + + cb->block_num_x = block_num_x; + cb->block_num_y = block_num_y; + cb->block_len_x = block_len_x; + cb->block_len_y = block_len_y; + cb->block_size = (block_len_x * block_len_y) * sizeof(jpgd_block_t); + cb->pData = (uint8*)alloc(cb->block_size * block_num_x * block_num_y, true); + return cb; + } + + inline jpgd_block_t* jpeg_decoder::coeff_buf_getp(coeff_buf* cb, int block_x, int block_y) + { + if ((block_x >= cb->block_num_x) || (block_y >= cb->block_num_y)) + stop_decoding(JPGD_DECODE_ERROR); + + return (jpgd_block_t*)(cb->pData + block_x * cb->block_size + block_y * (cb->block_size * cb->block_num_x)); + } + + // The following methods decode the various types of m_blocks encountered + // in progressively encoded images. + void jpeg_decoder::decode_block_dc_first(jpeg_decoder* pD, int component_id, int block_x, int block_y) + { + int s, r; + jpgd_block_t* p = pD->coeff_buf_getp(pD->m_dc_coeffs[component_id], block_x, block_y); + + if ((s = pD->huff_decode(pD->m_pHuff_tabs[pD->m_comp_dc_tab[component_id]])) != 0) + { + if (s >= 16) + pD->stop_decoding(JPGD_DECODE_ERROR); + + r = pD->get_bits_no_markers(s); + s = JPGD_HUFF_EXTEND(r, s); + } + + pD->m_last_dc_val[component_id] = (s += pD->m_last_dc_val[component_id]); + + p[0] = static_cast(s << pD->m_successive_low); + } + + void jpeg_decoder::decode_block_dc_refine(jpeg_decoder* pD, int component_id, int block_x, int block_y) + { + if (pD->get_bits_no_markers(1)) + { + jpgd_block_t* p = pD->coeff_buf_getp(pD->m_dc_coeffs[component_id], block_x, block_y); + + p[0] |= (1 << pD->m_successive_low); + } + } + + void jpeg_decoder::decode_block_ac_first(jpeg_decoder* pD, int component_id, int block_x, int block_y) + { + int k, s, r; + + if (pD->m_eob_run) + { + pD->m_eob_run--; + return; + } + + jpgd_block_t* p = pD->coeff_buf_getp(pD->m_ac_coeffs[component_id], block_x, block_y); + + for (k = pD->m_spectral_start; k <= pD->m_spectral_end; k++) + { + unsigned int idx = pD->m_comp_ac_tab[component_id]; + if (idx >= JPGD_MAX_HUFF_TABLES) + pD->stop_decoding(JPGD_DECODE_ERROR); + + s = pD->huff_decode(pD->m_pHuff_tabs[idx]); + + r = s >> 4; + s &= 15; + + if (s) + { + if ((k += r) > 63) + pD->stop_decoding(JPGD_DECODE_ERROR); + + r = pD->get_bits_no_markers(s); + s = JPGD_HUFF_EXTEND(r, s); + + p[g_ZAG[k]] = static_cast(s << pD->m_successive_low); + } + else + { + if (r == 15) + { + if ((k += 15) > 63) + pD->stop_decoding(JPGD_DECODE_ERROR); + } + else + { + pD->m_eob_run = 1 << r; + + if (r) + pD->m_eob_run += pD->get_bits_no_markers(r); + + pD->m_eob_run--; + + break; + } + } + } + } + + void jpeg_decoder::decode_block_ac_refine(jpeg_decoder* pD, int component_id, int block_x, int block_y) + { + int s, k, r; + + int p1 = 1 << pD->m_successive_low; + + //int m1 = (-1) << pD->m_successive_low; + int m1 = static_cast((UINT32_MAX << pD->m_successive_low)); + + jpgd_block_t* p = pD->coeff_buf_getp(pD->m_ac_coeffs[component_id], block_x, block_y); + if (pD->m_spectral_end > 63) + pD->stop_decoding(JPGD_DECODE_ERROR); + + k = pD->m_spectral_start; + + if (pD->m_eob_run == 0) + { + for (; k <= pD->m_spectral_end; k++) + { + unsigned int idx = pD->m_comp_ac_tab[component_id]; + if (idx >= JPGD_MAX_HUFF_TABLES) + pD->stop_decoding(JPGD_DECODE_ERROR); + + s = pD->huff_decode(pD->m_pHuff_tabs[idx]); + + r = s >> 4; + s &= 15; + + if (s) + { + if (s != 1) + pD->stop_decoding(JPGD_DECODE_ERROR); + + if (pD->get_bits_no_markers(1)) + s = p1; + else + s = m1; + } + else + { + if (r != 15) + { + pD->m_eob_run = 1 << r; + + if (r) + pD->m_eob_run += pD->get_bits_no_markers(r); + + break; + } + } + + do + { + jpgd_block_t* this_coef = p + g_ZAG[k & 63]; + + if (*this_coef != 0) + { + if (pD->get_bits_no_markers(1)) + { + if ((*this_coef & p1) == 0) + { + if (*this_coef >= 0) + *this_coef = static_cast(*this_coef + p1); + else + *this_coef = static_cast(*this_coef + m1); + } + } + } + else + { + if (--r < 0) + break; + } + + k++; + + } while (k <= pD->m_spectral_end); + + if ((s) && (k < 64)) + { + p[g_ZAG[k]] = static_cast(s); + } + } + } + + if (pD->m_eob_run > 0) + { + for (; k <= pD->m_spectral_end; k++) + { + jpgd_block_t* this_coef = p + g_ZAG[k & 63]; // logical AND to shut up static code analysis + + if (*this_coef != 0) + { + if (pD->get_bits_no_markers(1)) + { + if ((*this_coef & p1) == 0) + { + if (*this_coef >= 0) + *this_coef = static_cast(*this_coef + p1); + else + *this_coef = static_cast(*this_coef + m1); + } + } + } + } + + pD->m_eob_run--; + } + } + + // Decode a scan in a progressively encoded image. + void jpeg_decoder::decode_scan(pDecode_block_func decode_block_func) + { + int mcu_row, mcu_col, mcu_block; + int block_x_mcu[JPGD_MAX_COMPONENTS], block_y_mcu[JPGD_MAX_COMPONENTS]; + + memset(block_y_mcu, 0, sizeof(block_y_mcu)); + + for (mcu_col = 0; mcu_col < m_mcus_per_col; mcu_col++) + { + int component_num, component_id; + + memset(block_x_mcu, 0, sizeof(block_x_mcu)); + + for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++) + { + int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0; + + if ((m_restart_interval) && (m_restarts_left == 0)) + process_restart(); + + for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++) + { + component_id = m_mcu_org[mcu_block]; + + decode_block_func(this, component_id, block_x_mcu[component_id] + block_x_mcu_ofs, block_y_mcu[component_id] + block_y_mcu_ofs); + + if (m_comps_in_scan == 1) + block_x_mcu[component_id]++; + else + { + if (++block_x_mcu_ofs == m_comp_h_samp[component_id]) + { + block_x_mcu_ofs = 0; + + if (++block_y_mcu_ofs == m_comp_v_samp[component_id]) + { + block_y_mcu_ofs = 0; + block_x_mcu[component_id] += m_comp_h_samp[component_id]; + } + } + } + } + + m_restarts_left--; + } + + if (m_comps_in_scan == 1) + block_y_mcu[m_comp_list[0]]++; + else + { + for (component_num = 0; component_num < m_comps_in_scan; component_num++) + { + component_id = m_comp_list[component_num]; + block_y_mcu[component_id] += m_comp_v_samp[component_id]; + } + } + } + } + + // Decode a progressively encoded image. + void jpeg_decoder::init_progressive() + { + int i; + + if (m_comps_in_frame == 4) + stop_decoding(JPGD_UNSUPPORTED_COLORSPACE); + + // Allocate the coefficient buffers. + for (i = 0; i < m_comps_in_frame; i++) + { + m_dc_coeffs[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp[i], m_max_mcus_per_col * m_comp_v_samp[i], 1, 1); + m_ac_coeffs[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp[i], m_max_mcus_per_col * m_comp_v_samp[i], 8, 8); + } + + // See https://libjpeg-turbo.org/pmwiki/uploads/About/TwoIssueswiththeJPEGStandard.pdf + uint32_t total_scans = 0; + const uint32_t MAX_SCANS_TO_PROCESS = 1000; + + for (; ; ) + { + int dc_only_scan, refinement_scan; + pDecode_block_func decode_block_func; + + if (!init_scan()) + break; + + dc_only_scan = (m_spectral_start == 0); + refinement_scan = (m_successive_high != 0); + + if ((m_spectral_start > m_spectral_end) || (m_spectral_end > 63)) + stop_decoding(JPGD_BAD_SOS_SPECTRAL); + + if (dc_only_scan) + { + if (m_spectral_end) + stop_decoding(JPGD_BAD_SOS_SPECTRAL); + } + else if (m_comps_in_scan != 1) /* AC scans can only contain one component */ + stop_decoding(JPGD_BAD_SOS_SPECTRAL); + + if ((refinement_scan) && (m_successive_low != m_successive_high - 1)) + stop_decoding(JPGD_BAD_SOS_SUCCESSIVE); + + if (dc_only_scan) + { + if (refinement_scan) + decode_block_func = decode_block_dc_refine; + else + decode_block_func = decode_block_dc_first; + } + else + { + if (refinement_scan) + decode_block_func = decode_block_ac_refine; + else + decode_block_func = decode_block_ac_first; + } + + decode_scan(decode_block_func); + + m_bits_left = 16; + get_bits(16); + get_bits(16); + + total_scans++; + if (total_scans > MAX_SCANS_TO_PROCESS) + stop_decoding(JPGD_TOO_MANY_SCANS); + } + + m_comps_in_scan = m_comps_in_frame; + + for (i = 0; i < m_comps_in_frame; i++) + m_comp_list[i] = i; + + if (!calc_mcu_block_order()) + stop_decoding(JPGD_DECODE_ERROR); + } + + void jpeg_decoder::init_sequential() + { + if (!init_scan()) + stop_decoding(JPGD_UNEXPECTED_MARKER); + } + + void jpeg_decoder::decode_start() + { + init_frame(); + + if (m_progressive_flag) + init_progressive(); + else + init_sequential(); + } + + void jpeg_decoder::decode_init(jpeg_decoder_stream* pStream, uint32_t flags) + { + init(pStream, flags); + locate_sof_marker(); + } + + jpeg_decoder::jpeg_decoder(jpeg_decoder_stream* pStream, uint32_t flags) + { + if (setjmp(m_jmp_state)) + return; + decode_init(pStream, flags); + } + + int jpeg_decoder::begin_decoding() + { + if (m_ready_flag) + return JPGD_SUCCESS; + + if (m_error_code) + return JPGD_FAILED; + + if (setjmp(m_jmp_state)) + return JPGD_FAILED; + + decode_start(); + + m_ready_flag = true; + + return JPGD_SUCCESS; + } + + jpeg_decoder::~jpeg_decoder() + { + free_all_blocks(); + } + + jpeg_decoder_file_stream::jpeg_decoder_file_stream() + { + m_pFile = nullptr; + m_eof_flag = false; + m_error_flag = false; + } + + void jpeg_decoder_file_stream::close() + { + if (m_pFile) + { + fclose(m_pFile); + m_pFile = nullptr; + } + + m_eof_flag = false; + m_error_flag = false; + } + + jpeg_decoder_file_stream::~jpeg_decoder_file_stream() + { + close(); + } + + bool jpeg_decoder_file_stream::open(const char* Pfilename) + { + close(); + + m_eof_flag = false; + m_error_flag = false; + +#if defined(_MSC_VER) + m_pFile = nullptr; + fopen_s(&m_pFile, Pfilename, "rb"); +#else + m_pFile = fopen(Pfilename, "rb"); +#endif + return m_pFile != nullptr; + } + + int jpeg_decoder_file_stream::read(uint8* pBuf, int max_bytes_to_read, bool* pEOF_flag) + { + if (!m_pFile) + return -1; + + if (m_eof_flag) + { + *pEOF_flag = true; + return 0; + } + + if (m_error_flag) + return -1; + + int bytes_read = static_cast(fread(pBuf, 1, max_bytes_to_read, m_pFile)); + if (bytes_read < max_bytes_to_read) + { + if (ferror(m_pFile)) + { + m_error_flag = true; + return -1; + } + + m_eof_flag = true; + *pEOF_flag = true; + } + + return bytes_read; + } + + bool jpeg_decoder_mem_stream::open(const uint8* pSrc_data, uint size) + { + close(); + m_pSrc_data = pSrc_data; + m_ofs = 0; + m_size = size; + return true; + } + + int jpeg_decoder_mem_stream::read(uint8* pBuf, int max_bytes_to_read, bool* pEOF_flag) + { + *pEOF_flag = false; + + if (!m_pSrc_data) + return -1; + + uint bytes_remaining = m_size - m_ofs; + if ((uint)max_bytes_to_read > bytes_remaining) + { + max_bytes_to_read = bytes_remaining; + *pEOF_flag = true; + } + + memcpy(pBuf, m_pSrc_data + m_ofs, max_bytes_to_read); + m_ofs += max_bytes_to_read; + + return max_bytes_to_read; + } + + unsigned char* decompress_jpeg_image_from_stream(jpeg_decoder_stream* pStream, int* width, int* height, int* actual_comps, int req_comps, uint32_t flags) + { + if (!actual_comps) + return nullptr; + *actual_comps = 0; + + if ((!pStream) || (!width) || (!height) || (!req_comps)) + return nullptr; + + if ((req_comps != 1) && (req_comps != 3) && (req_comps != 4)) + return nullptr; + + jpeg_decoder decoder(pStream, flags); + if (decoder.get_error_code() != JPGD_SUCCESS) + return nullptr; + + const int image_width = decoder.get_width(), image_height = decoder.get_height(); + *width = image_width; + *height = image_height; + *actual_comps = decoder.get_num_components(); + + if (decoder.begin_decoding() != JPGD_SUCCESS) + return nullptr; + + const int dst_bpl = image_width * req_comps; + + uint8* pImage_data = (uint8*)jpgd_malloc(dst_bpl * image_height); + if (!pImage_data) + return nullptr; + + for (int y = 0; y < image_height; y++) + { + const uint8* pScan_line = nullptr; + uint scan_line_len; + if (decoder.decode((const void**)&pScan_line, &scan_line_len) != JPGD_SUCCESS) + { + jpgd_free(pImage_data); + return nullptr; + } + + uint8* pDst = pImage_data + y * dst_bpl; + + if (((req_comps == 1) && (decoder.get_num_components() == 1)) || ((req_comps == 4) && (decoder.get_num_components() == 3))) + memcpy(pDst, pScan_line, dst_bpl); + else if (decoder.get_num_components() == 1) + { + if (req_comps == 3) + { + for (int x = 0; x < image_width; x++) + { + uint8 luma = pScan_line[x]; + pDst[0] = luma; + pDst[1] = luma; + pDst[2] = luma; + pDst += 3; + } + } + else + { + for (int x = 0; x < image_width; x++) + { + uint8 luma = pScan_line[x]; + pDst[0] = luma; + pDst[1] = luma; + pDst[2] = luma; + pDst[3] = 255; + pDst += 4; + } + } + } + else if (decoder.get_num_components() == 3) + { + if (req_comps == 1) + { + const int YR = 19595, YG = 38470, YB = 7471; + for (int x = 0; x < image_width; x++) + { + int r = pScan_line[x * 4 + 0]; + int g = pScan_line[x * 4 + 1]; + int b = pScan_line[x * 4 + 2]; + *pDst++ = static_cast((r * YR + g * YG + b * YB + 32768) >> 16); + } + } + else + { + for (int x = 0; x < image_width; x++) + { + pDst[0] = pScan_line[x * 4 + 0]; + pDst[1] = pScan_line[x * 4 + 1]; + pDst[2] = pScan_line[x * 4 + 2]; + pDst += 3; + } + } + } + } + + return pImage_data; + } + + unsigned char* decompress_jpeg_image_from_memory(const unsigned char* pSrc_data, int src_data_size, int* width, int* height, int* actual_comps, int req_comps, uint32_t flags) + { + jpgd::jpeg_decoder_mem_stream mem_stream(pSrc_data, src_data_size); + return decompress_jpeg_image_from_stream(&mem_stream, width, height, actual_comps, req_comps, flags); + } + + unsigned char* decompress_jpeg_image_from_file(const char* pSrc_filename, int* width, int* height, int* actual_comps, int req_comps, uint32_t flags) + { + jpgd::jpeg_decoder_file_stream file_stream; + if (!file_stream.open(pSrc_filename)) + return nullptr; + return decompress_jpeg_image_from_stream(&file_stream, width, height, actual_comps, req_comps, flags); + } + +} // namespace jpgd diff --git a/thirdparty/basisu/encoder/jpgd.h b/thirdparty/basisu/encoder/jpgd.h new file mode 100644 index 000000000..86a7814ca --- /dev/null +++ b/thirdparty/basisu/encoder/jpgd.h @@ -0,0 +1,347 @@ +// jpgd.h - C++ class for JPEG decompression. +// Public domain, Rich Geldreich +#ifndef JPEG_DECODER_H +#define JPEG_DECODER_H + +#include +#include +#include +#include +#include + +#ifdef _MSC_VER +#define JPGD_NORETURN __declspec(noreturn) +#elif defined(__GNUC__) +#define JPGD_NORETURN __attribute__ ((noreturn)) +#else +#define JPGD_NORETURN +#endif + +#define JPGD_HUFF_TREE_MAX_LENGTH 512 +#define JPGD_HUFF_CODE_SIZE_MAX_LENGTH 256 + +namespace jpgd +{ + typedef unsigned char uint8; + typedef signed short int16; + typedef unsigned short uint16; + typedef unsigned int uint; + typedef signed int int32; + + // Loads a JPEG image from a memory buffer or a file. + // req_comps can be 1 (grayscale), 3 (RGB), or 4 (RGBA). + // On return, width/height will be set to the image's dimensions, and actual_comps will be set to the either 1 (grayscale) or 3 (RGB). + // Notes: For more control over where and how the source data is read, see the decompress_jpeg_image_from_stream() function below, or call the jpeg_decoder class directly. + // Requesting a 8 or 32bpp image is currently a little faster than 24bpp because the jpeg_decoder class itself currently always unpacks to either 8 or 32bpp. + unsigned char* decompress_jpeg_image_from_memory(const unsigned char* pSrc_data, int src_data_size, int* width, int* height, int* actual_comps, int req_comps, uint32_t flags = 0); + unsigned char* decompress_jpeg_image_from_file(const char* pSrc_filename, int* width, int* height, int* actual_comps, int req_comps, uint32_t flags = 0); + + // Success/failure error codes. + enum jpgd_status + { + JPGD_SUCCESS = 0, JPGD_FAILED = -1, JPGD_DONE = 1, + JPGD_BAD_DHT_COUNTS = -256, JPGD_BAD_DHT_INDEX, JPGD_BAD_DHT_MARKER, JPGD_BAD_DQT_MARKER, JPGD_BAD_DQT_TABLE, + JPGD_BAD_PRECISION, JPGD_BAD_HEIGHT, JPGD_BAD_WIDTH, JPGD_TOO_MANY_COMPONENTS, + JPGD_BAD_SOF_LENGTH, JPGD_BAD_VARIABLE_MARKER, JPGD_BAD_DRI_LENGTH, JPGD_BAD_SOS_LENGTH, + JPGD_BAD_SOS_COMP_ID, JPGD_W_EXTRA_BYTES_BEFORE_MARKER, JPGD_NO_ARITHMITIC_SUPPORT, JPGD_UNEXPECTED_MARKER, + JPGD_NOT_JPEG, JPGD_UNSUPPORTED_MARKER, JPGD_BAD_DQT_LENGTH, JPGD_TOO_MANY_BLOCKS, + JPGD_UNDEFINED_QUANT_TABLE, JPGD_UNDEFINED_HUFF_TABLE, JPGD_NOT_SINGLE_SCAN, JPGD_UNSUPPORTED_COLORSPACE, + JPGD_UNSUPPORTED_SAMP_FACTORS, JPGD_DECODE_ERROR, JPGD_BAD_RESTART_MARKER, + JPGD_BAD_SOS_SPECTRAL, JPGD_BAD_SOS_SUCCESSIVE, JPGD_STREAM_READ, JPGD_NOTENOUGHMEM, JPGD_TOO_MANY_SCANS + }; + + // Input stream interface. + // Derive from this class to read input data from sources other than files or memory. Set m_eof_flag to true when no more data is available. + // The decoder is rather greedy: it will keep on calling this method until its internal input buffer is full, or until the EOF flag is set. + // It the input stream contains data after the JPEG stream's EOI (end of image) marker it will probably be pulled into the internal buffer. + // Call the get_total_bytes_read() method to determine the actual size of the JPEG stream after successful decoding. + class jpeg_decoder_stream + { + public: + jpeg_decoder_stream() { } + virtual ~jpeg_decoder_stream() { } + + // The read() method is called when the internal input buffer is empty. + // Parameters: + // pBuf - input buffer + // max_bytes_to_read - maximum bytes that can be written to pBuf + // pEOF_flag - set this to true if at end of stream (no more bytes remaining) + // Returns -1 on error, otherwise return the number of bytes actually written to the buffer (which may be 0). + // Notes: This method will be called in a loop until you set *pEOF_flag to true or the internal buffer is full. + virtual int read(uint8* pBuf, int max_bytes_to_read, bool* pEOF_flag) = 0; + }; + + // stdio FILE stream class. + class jpeg_decoder_file_stream : public jpeg_decoder_stream + { + jpeg_decoder_file_stream(const jpeg_decoder_file_stream&); + jpeg_decoder_file_stream& operator =(const jpeg_decoder_file_stream&); + + FILE* m_pFile; + bool m_eof_flag, m_error_flag; + + public: + jpeg_decoder_file_stream(); + virtual ~jpeg_decoder_file_stream(); + + bool open(const char* Pfilename); + void close(); + + virtual int read(uint8* pBuf, int max_bytes_to_read, bool* pEOF_flag); + }; + + // Memory stream class. + class jpeg_decoder_mem_stream : public jpeg_decoder_stream + { + const uint8* m_pSrc_data; + uint m_ofs, m_size; + + public: + jpeg_decoder_mem_stream() : m_pSrc_data(NULL), m_ofs(0), m_size(0) { } + jpeg_decoder_mem_stream(const uint8* pSrc_data, uint size) : m_pSrc_data(pSrc_data), m_ofs(0), m_size(size) { } + + virtual ~jpeg_decoder_mem_stream() { } + + bool open(const uint8* pSrc_data, uint size); + void close() { m_pSrc_data = NULL; m_ofs = 0; m_size = 0; } + + virtual int read(uint8* pBuf, int max_bytes_to_read, bool* pEOF_flag); + }; + + // Loads JPEG file from a jpeg_decoder_stream. + unsigned char* decompress_jpeg_image_from_stream(jpeg_decoder_stream* pStream, int* width, int* height, int* actual_comps, int req_comps, uint32_t flags = 0); + + enum + { + JPGD_IN_BUF_SIZE = 8192, JPGD_MAX_BLOCKS_PER_MCU = 10, JPGD_MAX_HUFF_TABLES = 8, JPGD_MAX_QUANT_TABLES = 4, + JPGD_MAX_COMPONENTS = 4, JPGD_MAX_COMPS_IN_SCAN = 4, JPGD_MAX_BLOCKS_PER_ROW = 16384, JPGD_MAX_HEIGHT = 32768, JPGD_MAX_WIDTH = 32768 + }; + + typedef int16 jpgd_quant_t; + typedef int16 jpgd_block_t; + + class jpeg_decoder + { + public: + enum + { + cFlagLinearChromaFiltering = 1 + }; + + // Call get_error_code() after constructing to determine if the stream is valid or not. You may call the get_width(), get_height(), etc. + // methods after the constructor is called. You may then either destruct the object, or begin decoding the image by calling begin_decoding(), then decode() on each scanline. + jpeg_decoder(jpeg_decoder_stream* pStream, uint32_t flags = cFlagLinearChromaFiltering); + + ~jpeg_decoder(); + + // Call this method after constructing the object to begin decompression. + // If JPGD_SUCCESS is returned you may then call decode() on each scanline. + + int begin_decoding(); + + // Returns the next scan line. + // For grayscale images, pScan_line will point to a buffer containing 8-bit pixels (get_bytes_per_pixel() will return 1). + // Otherwise, it will always point to a buffer containing 32-bit RGBA pixels (A will always be 255, and get_bytes_per_pixel() will return 4). + // Returns JPGD_SUCCESS if a scan line has been returned. + // Returns JPGD_DONE if all scan lines have been returned. + // Returns JPGD_FAILED if an error occurred. Call get_error_code() for a more info. + int decode(const void** pScan_line, uint* pScan_line_len); + + inline jpgd_status get_error_code() const { return m_error_code; } + + inline int get_width() const { return m_image_x_size; } + inline int get_height() const { return m_image_y_size; } + + inline int get_num_components() const { return m_comps_in_frame; } + + inline int get_bytes_per_pixel() const { return m_dest_bytes_per_pixel; } + inline int get_bytes_per_scan_line() const { return m_image_x_size * get_bytes_per_pixel(); } + + // Returns the total number of bytes actually consumed by the decoder (which should equal the actual size of the JPEG file). + inline int get_total_bytes_read() const { return m_total_bytes_read; } + + private: + jpeg_decoder(const jpeg_decoder&); + jpeg_decoder& operator =(const jpeg_decoder&); + + typedef void (*pDecode_block_func)(jpeg_decoder*, int, int, int); + + struct huff_tables + { + bool ac_table; + uint look_up[256]; + uint look_up2[256]; + uint8 code_size[JPGD_HUFF_CODE_SIZE_MAX_LENGTH]; + uint tree[JPGD_HUFF_TREE_MAX_LENGTH]; + }; + + struct coeff_buf + { + uint8* pData; + int block_num_x, block_num_y; + int block_len_x, block_len_y; + int block_size; + }; + + struct mem_block + { + mem_block* m_pNext; + size_t m_used_count; + size_t m_size; + char m_data[1]; + }; + + jmp_buf m_jmp_state; + uint32_t m_flags; + mem_block* m_pMem_blocks; + int m_image_x_size; + int m_image_y_size; + jpeg_decoder_stream* m_pStream; + + int m_progressive_flag; + + uint8 m_huff_ac[JPGD_MAX_HUFF_TABLES]; + uint8* m_huff_num[JPGD_MAX_HUFF_TABLES]; // pointer to number of Huffman codes per bit size + uint8* m_huff_val[JPGD_MAX_HUFF_TABLES]; // pointer to Huffman codes per bit size + jpgd_quant_t* m_quant[JPGD_MAX_QUANT_TABLES]; // pointer to quantization tables + int m_scan_type; // Gray, Yh1v1, Yh1v2, Yh2v1, Yh2v2 (CMYK111, CMYK4114 no longer supported) + int m_comps_in_frame; // # of components in frame + int m_comp_h_samp[JPGD_MAX_COMPONENTS]; // component's horizontal sampling factor + int m_comp_v_samp[JPGD_MAX_COMPONENTS]; // component's vertical sampling factor + int m_comp_quant[JPGD_MAX_COMPONENTS]; // component's quantization table selector + int m_comp_ident[JPGD_MAX_COMPONENTS]; // component's ID + int m_comp_h_blocks[JPGD_MAX_COMPONENTS]; + int m_comp_v_blocks[JPGD_MAX_COMPONENTS]; + int m_comps_in_scan; // # of components in scan + int m_comp_list[JPGD_MAX_COMPS_IN_SCAN]; // components in this scan + int m_comp_dc_tab[JPGD_MAX_COMPONENTS]; // component's DC Huffman coding table selector + int m_comp_ac_tab[JPGD_MAX_COMPONENTS]; // component's AC Huffman coding table selector + int m_spectral_start; // spectral selection start + int m_spectral_end; // spectral selection end + int m_successive_low; // successive approximation low + int m_successive_high; // successive approximation high + int m_max_mcu_x_size; // MCU's max. X size in pixels + int m_max_mcu_y_size; // MCU's max. Y size in pixels + int m_blocks_per_mcu; + int m_max_blocks_per_row; + int m_mcus_per_row, m_mcus_per_col; + int m_mcu_org[JPGD_MAX_BLOCKS_PER_MCU]; + int m_total_lines_left; // total # lines left in image + int m_mcu_lines_left; // total # lines left in this MCU + int m_num_buffered_scanlines; + int m_real_dest_bytes_per_scan_line; + int m_dest_bytes_per_scan_line; // rounded up + int m_dest_bytes_per_pixel; // 4 (RGB) or 1 (Y) + huff_tables* m_pHuff_tabs[JPGD_MAX_HUFF_TABLES]; + coeff_buf* m_dc_coeffs[JPGD_MAX_COMPONENTS]; + coeff_buf* m_ac_coeffs[JPGD_MAX_COMPONENTS]; + int m_eob_run; + int m_block_y_mcu[JPGD_MAX_COMPONENTS]; + uint8* m_pIn_buf_ofs; + int m_in_buf_left; + int m_tem_flag; + + uint8 m_in_buf_pad_start[64]; + uint8 m_in_buf[JPGD_IN_BUF_SIZE + 128]; + uint8 m_in_buf_pad_end[64]; + + int m_bits_left; + uint m_bit_buf; + int m_restart_interval; + int m_restarts_left; + int m_next_restart_num; + int m_max_mcus_per_row; + int m_max_blocks_per_mcu; + + int m_max_mcus_per_col; + uint m_last_dc_val[JPGD_MAX_COMPONENTS]; + jpgd_block_t* m_pMCU_coefficients; + int m_mcu_block_max_zag[JPGD_MAX_BLOCKS_PER_MCU]; + uint8* m_pSample_buf; + uint8* m_pSample_buf_prev; + int m_crr[256]; + int m_cbb[256]; + int m_crg[256]; + int m_cbg[256]; + uint8* m_pScan_line_0; + uint8* m_pScan_line_1; + jpgd_status m_error_code; + int m_total_bytes_read; + + bool m_ready_flag; + bool m_eof_flag; + bool m_sample_buf_prev_valid; + + inline int check_sample_buf_ofs(int ofs) const { assert(ofs >= 0); assert(ofs < m_max_blocks_per_row * 64); return ofs; } + void free_all_blocks(); + JPGD_NORETURN void stop_decoding(jpgd_status status); + void* alloc(size_t n, bool zero = false); + void word_clear(void* p, uint16 c, uint n); + void prep_in_buffer(); + void read_dht_marker(); + void read_dqt_marker(); + void read_sof_marker(); + void skip_variable_marker(); + void read_dri_marker(); + void read_sos_marker(); + int next_marker(); + int process_markers(); + void locate_soi_marker(); + void locate_sof_marker(); + int locate_sos_marker(); + void init(jpeg_decoder_stream* pStream, uint32_t flags); + void create_look_ups(); + void fix_in_buffer(); + void transform_mcu(int mcu_row); + coeff_buf* coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y); + inline jpgd_block_t* coeff_buf_getp(coeff_buf* cb, int block_x, int block_y); + void load_next_row(); + void decode_next_row(); + void make_huff_table(int index, huff_tables* pH); + void check_quant_tables(); + void check_huff_tables(); + bool calc_mcu_block_order(); + int init_scan(); + void init_frame(); + void process_restart(); + void decode_scan(pDecode_block_func decode_block_func); + void init_progressive(); + void init_sequential(); + void decode_start(); + void decode_init(jpeg_decoder_stream* pStream, uint32_t flags); + void H2V2Convert(); + uint32_t H2V2ConvertFiltered(); + void H2V1Convert(); + void H2V1ConvertFiltered(); + void H1V2Convert(); + void H1V2ConvertFiltered(); + void H1V1Convert(); + void gray_convert(); + void find_eoi(); + inline uint get_char(); + inline uint get_char(bool* pPadding_flag); + inline void stuff_char(uint8 q); + inline uint8 get_octet(); + inline uint get_bits(int num_bits); + inline uint get_bits_no_markers(int numbits); + inline int huff_decode(huff_tables* pH); + inline int huff_decode(huff_tables* pH, int& extrabits); + + // Clamps a value between 0-255. + static inline uint8 clamp(int i) + { + if (static_cast(i) > 255) + i = (((~i) >> 31) & 0xFF); + return static_cast(i); + } + int decode_next_mcu_row(); + + static void decode_block_dc_first(jpeg_decoder* pD, int component_id, int block_x, int block_y); + static void decode_block_dc_refine(jpeg_decoder* pD, int component_id, int block_x, int block_y); + static void decode_block_ac_first(jpeg_decoder* pD, int component_id, int block_x, int block_y); + static void decode_block_ac_refine(jpeg_decoder* pD, int component_id, int block_x, int block_y); + }; + +} // namespace jpgd + +#endif // JPEG_DECODER_H diff --git a/thirdparty/basisu/encoder/pvpngreader.cpp b/thirdparty/basisu/encoder/pvpngreader.cpp new file mode 100644 index 000000000..6b32f66cb --- /dev/null +++ b/thirdparty/basisu/encoder/pvpngreader.cpp @@ -0,0 +1,2664 @@ +// pngreader.cpp - Public Domain - see unlicense at bottom of file. +// +// Notes: +// This is ancient code from ~1995 ported to C++. It was originally written for a +// DOS app with very limited memory. It's not as fast as it should be, but it works. +// The low-level PNG reader class was written assuming the PNG file could not fit +// entirely into memory, which dictated how it was written/structured. +// It has been modified to use either zlib or miniz. +// It supports all PNG color types/bit depths/interlacing, however 16-bit/component +// images are converted to 8-bit. +// TRNS chunks are converted to alpha as needed. +// GAMA chunk is read, but not applied. + +#include "../transcoder/basisu.h" + +#define MINIZ_HEADER_FILE_ONLY +#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES +#include "basisu_miniz.h" + +#include "pvpngreader.h" + +#include +#include +#include +#include +#include +#include + +#define PVPNG_IDAT_CRC_CHECKING (1) +#define PVPNG_ADLER32_CHECKING (1) + +namespace pv_png +{ + +const uint32_t MIN_PNG_SIZE = 8 + 13 + 8 + 1 + 4 + 12; + +template inline S maximum(S a, S b) { return (a > b) ? a : b; } +template inline S minimum(S a, S b) { return (a < b) ? a : b; } + +template inline void clear_obj(T& obj) { memset(&obj, 0, sizeof(obj)); } + +#define MAX_SUPPORTED_RES (32768) +#define FALSE (0) +#define TRUE (1) +#define PNG_MAX_ALLOC_BLOCKS (16) + +enum +{ + PNG_DECERROR = -3, + PNG_ALLDONE = -5, + PNG_READPASTEOF = -11, + PNG_UNKNOWNTYPE = -16, + PNG_FILEREADERROR = -17, + PNG_NOTENOUGHMEM = -108, + PNG_BAD_CHUNK_CRC32 = -13000, + PNG_NO_IHDR = -13001, + PNG_BAD_WIDTH = -13002, + PNG_BAD_HEIGHT = -13003, + PNG_UNS_COMPRESSION = -13004, + PNG_UNS_FILTER = -13005, + PNG_UNS_ILACE = -13006, + PNG_UNS_COLOR_TYPE = -13007, + PNG_BAD_BIT_DEPTH = -13008, + PNG_BAD_CHUNK_SIZE = -13009, + PNG_UNS_CRITICAL_CHUNK = -13010, + PNG_BAD_TRNS_CHUNK = -13011, + PNG_BAD_PLTE_CHUNK = -13012, + PNG_UNS_RESOLUTION = -13013, + PNG_INVALID_DATA_STREAM = -13014, + PNG_MISSING_PALETTE = -13015, + PNG_UNS_PREDICTOR = -13016, + PNG_INCOMPLETE_IMAGE = -13017, + PNG_TOO_MUCH_DATA = -13018 +}; + +#define PNG_COLOR_TYPE_PAL_MASK (1) +#define PNG_COLOR_TYPE_COL_MASK (2) +#define PNG_COLOR_TYPE_ALP_MASK (4) + +#define PNG_INFLATE_SRC_BUF_SIZE (4096) + +struct ihdr_struct +{ + uint32_t m_width; + uint32_t m_height; + uint8_t m_bit_depth; + uint8_t m_color_type; + uint8_t m_comp_type; + uint8_t m_filter_type; + uint8_t m_ilace_type; +}; + +class png_file +{ +public: + png_file() { } + virtual ~png_file() { } + + virtual bool resize(uint64_t new_size) = 0; + virtual uint64_t get_size() = 0; + virtual uint64_t tell() = 0; + virtual bool seek(uint64_t ofs) = 0; + virtual size_t write(const void* pBuf, size_t len) = 0; + virtual size_t read(void* pBuf, size_t len) = 0; +}; + +class png_memory_file : public png_file +{ +public: + std::vector m_buf; + uint64_t m_ofs; + + png_memory_file() : + png_file(), + m_ofs(0) + { + } + + virtual ~png_memory_file() + { + } + + std::vector& get_buf() { return m_buf; } + const std::vector& get_buf() const { return m_buf; } + + void init() + { + m_ofs = 0; + m_buf.resize(0); + } + + virtual bool resize(uint64_t new_size) + { + if ((sizeof(size_t) == sizeof(uint32_t)) && (new_size >= 0x7FFFFFFF)) + return false; + + m_buf.resize((size_t)new_size); + m_ofs = m_buf.size(); + + return true; + } + + virtual uint64_t get_size() + { + return m_buf.size(); + } + + virtual uint64_t tell() + { + return m_ofs; + } + + virtual bool seek(uint64_t ofs) + { + m_ofs = ofs; + return true; + } + + virtual size_t write(const void* pBuf, size_t len) + { + uint64_t new_size = m_ofs + len; + if (new_size > m_buf.size()) + { + if ((sizeof(size_t) == sizeof(uint32_t)) && (new_size > 0x7FFFFFFFUL)) + return 0; + m_buf.resize((size_t)new_size); + } + + memcpy(&m_buf[(size_t)m_ofs], pBuf, len); + m_ofs += len; + + return len; + } + + virtual size_t read(void* pBuf, size_t len) + { + if (m_ofs >= m_buf.size()) + return 0; + + uint64_t max_bytes = minimum(len, m_buf.size() - m_ofs); + memcpy(pBuf, &m_buf[(size_t)m_ofs], (size_t)max_bytes); + + m_ofs += max_bytes; + + return (size_t)max_bytes; + } +}; + +class png_readonly_memory_file : public png_file +{ +public: + const uint8_t* m_pBuf; + size_t m_buf_size; + uint64_t m_ofs; + + png_readonly_memory_file() : + png_file(), + m_pBuf(nullptr), + m_buf_size(0), + m_ofs(0) + { + } + + virtual ~png_readonly_memory_file() + { + } + + void init(const void *pBuf, size_t buf_size) + { + m_pBuf = static_cast(pBuf); + m_buf_size = buf_size; + m_ofs = 0; + } + + virtual bool resize(uint64_t new_size) + { + (void)new_size; + assert(0); + return false; + } + + virtual uint64_t get_size() + { + return m_buf_size; + } + + virtual uint64_t tell() + { + return m_ofs; + } + + virtual bool seek(uint64_t ofs) + { + m_ofs = ofs; + return true; + } + + virtual size_t write(const void* pBuf, size_t len) + { + (void)pBuf; + (void)len; + assert(0); + return 0; + } + + virtual size_t read(void* pBuf, size_t len) + { + if (m_ofs >= m_buf_size) + return 0; + + uint64_t max_bytes = minimum(len, m_buf_size - m_ofs); + memcpy(pBuf, &m_pBuf[(size_t)m_ofs], (size_t)max_bytes); + + m_ofs += max_bytes; + + return (size_t)max_bytes; + } +}; + +#ifdef _MSC_VER +#define ftell64 _ftelli64 +#define fseek64 _fseeki64 +#else +#define ftell64 ftello +#define fseek64 fseeko +#endif + +class png_cfile : public png_file +{ +public: + FILE* m_pFile; + + png_cfile() : + png_file(), + m_pFile(nullptr) + { + } + + virtual ~png_cfile() + { + close(); + } + + bool init(const char *pFilename, const char *pMode) + { + close(); + + m_pFile = nullptr; + +#ifdef _MSC_VER + fopen_s(&m_pFile, pFilename, pMode); +#else + m_pFile = fopen(pFilename, pMode); +#endif + + return m_pFile != nullptr; + } + + bool close() + { + bool status = true; + if (m_pFile) + { + if (fclose(m_pFile) == EOF) + status = false; + m_pFile = nullptr; + } + return status; + } + + virtual bool resize(uint64_t new_size) + { + if (new_size) + { + if (!seek(new_size - 1)) + return false; + + int v = 0; + if (write(&v, 1) != 1) + return false; + } + else + { + if (!seek(0)) + return false; + } + + return true; + } + + virtual uint64_t get_size() + { + int64_t cur_ofs = ftell64(m_pFile); + if (cur_ofs < 0) + return 0; + + if (fseek64(m_pFile, 0, SEEK_END) != 0) + return 0; + + const int64_t cur_size = ftell64(m_pFile); + if (cur_size < 0) + return 0; + + if (fseek64(m_pFile, cur_ofs, SEEK_SET) != 0) + return 0; + + return cur_size; + } + + virtual uint64_t tell() + { + int64_t cur_ofs = ftell64(m_pFile); + if (cur_ofs < 0) + return 0; + + return cur_ofs; + } + + virtual bool seek(uint64_t ofs) + { + return fseek64(m_pFile, ofs, SEEK_SET) == 0; + } + + virtual size_t write(const void* pBuf, size_t len) + { + return (size_t)fwrite(pBuf, 1, len, m_pFile); + } + + virtual size_t read(void* pBuf, size_t len) + { + return (size_t)fread(pBuf, 1, len, m_pFile); + } +}; + +// This low-level helper class handles the actual decoding of PNG files. +class png_decoder +{ +public: + png_decoder(); + ~png_decoder(); + + // Scans the PNG file, but doesn't decode the IDAT data. + // Returns 0 on success, or an error code. + // If the returned status is non-zero, or m_img_supported_flag==FALSE the image either the image is corrupted/not PNG or is unsupported in some way. + int png_scan(png_file *pFile); + + // Decodes a single scanline of PNG image data. + // Returns a pointer to the scanline's pixel data and its size in bytes. + // This data is only minimally processed from the internal PNG pixel data. + // The caller must use the ihdr, trns_flag and values, and the palette to actually decode the pixel data. + // + // Possible returned pixel formats is somewhat complex due to the history of this code: + // 8-bit RGBA, always 4 bytes/pixel - 24bpp PNG's are converted to 32bpp and TRNS processing is done automatically (8/16bpp RGB or RGBA PNG files) + // 1/2/4/8-bit grayscale, 1 byte per pixel - must convert to [0,255] using the palette or some other means, must optionally use the TRNS chunk for alpha (1/2/4/8 Grayscale PNG files - not 16bpp though!) + // 1/2/4/8-bit palettized, 1 byte per pixel - must convert to RGB using the 24bpp palette and optionally the TRNS chunk for alpha (1/2/4/8bpp palettized PNG files) + // 8-bit grayscale with alpha, 2 bytes per pixel - TRNS processing will be done for you on 16bpp images (there's a special case here for 16bpp Grey files) (8/16bpp Gray-Alpha *or 16bpp Grayscale* PNG files) + // + // Returns 0 on success, a non-zero error code, or PNG_ALLDONE. + int png_decode(void** ppImg_ptr, uint32_t* pImg_len); + + // Starts decoding. Returns 0 on success, otherwise an error code. + int png_decode_start(); + + // Deinitializes the decoder, freeing all allocations. + void png_decode_end(); + + png_file* m_pFile; + + // Image's 24bpp palette - 3 bytes per entry + uint8_t m_plte_flag; + uint8_t m_img_pal[768]; + + int m_img_supported_flag; + + ihdr_struct m_ihdr; + + uint8_t m_chunk_flag; + uint32_t m_chunk_size; + uint32_t m_chunk_left; + uint32_t m_chunk_crc32; + uint8_t m_chunk_name[4]; + + uint8_t m_end_of_idat_chunks; + + void* m_pMalloc_blocks[PNG_MAX_ALLOC_BLOCKS]; + + uint32_t m_dec_bytes_per_pixel; // bytes per pixel decoded from the PNG file (minimum 1 for 1/2/4 bpp), factors in the PNG 8/16 bit/component bit depth, may be up to 8 bytes (2*4) + uint32_t m_dst_bytes_per_pixel; // bytes per pixel returned to the caller (1-4), always has alpha if the PNG has alpha, 16-bit components always converted to 8-bits/component + + uint32_t m_dec_bytes_per_line; // bytes per line decoded from the PNG file (before 1/2/4 expansion), +1 for the filter byte + uint32_t m_src_bytes_per_line; // decoded PNG bytes per line, before 1/2/4 bpp expansion, not counting the filter byte, updated during adam7 deinterlacing + uint32_t m_dst_bytes_per_line; // bytes per line returned to the caller (1-4 times width) + + int (*m_pProcess_func)(uint8_t* src, uint8_t* dst, int pixels, png_decoder* pwi); + + uint8_t* m_pPre_line_buf; + uint8_t* m_pCur_line_buf; + uint8_t* m_pPro_line_buf; + + uint8_t m_bkgd_flag; + uint32_t m_bkgd_value[3]; + + uint8_t m_gama_flag; + uint32_t m_gama_value; + + uint8_t m_trns_flag; + uint32_t m_trns_value[256]; + + buminiz::mz_stream m_inflator; + + uint8_t inflate_src_buf[PNG_INFLATE_SRC_BUF_SIZE]; + + uint32_t m_inflate_src_buf_ofs; + uint32_t m_inflate_src_buf_size; + uint32_t m_inflate_dst_buf_ofs; + + int m_inflate_eof_flag; + + uint8_t m_gamma_table[256]; + + int m_pass_x_size; + int m_pass_y_left; + + int m_adam7_pass_num; + int m_adam7_pass_y; + int m_adam7_pass_size_x[7]; + int m_adam7_pass_size_y[7]; + + std::vector m_adam7_image_buf; + + int m_adam7_decoded_flag; + + bool m_scanned_flag; + + int m_terminate_status; + +#define TEMP_BUF_SIZE (384) + uint8_t m_temp_buf[TEMP_BUF_SIZE * 4]; + + void clear(); + void uninitialize(); + int terminate(int status); + void* png_malloc(uint32_t i); + void* png_calloc(uint32_t i); + int block_read(void* buf, uint32_t len); + int64_t block_read_dword(); + int fetch_next_chunk_data(uint8_t* buf, int bytes); + int fetch_next_chunk_byte(); + int fetch_next_chunk_word(); + int64_t fetch_next_chunk_dword(); + int fetch_next_chunk_init(); + int unchunk_data(uint8_t* buf, uint32_t bytes, uint32_t* ptr_bytes_read); + inline void adam7_write_pixel_8(int x, int y, int c); + inline void adam7_write_pixel_16(int x, int y, int r, int g); + inline void adam7_write_pixel_24(int x, int y, int r, int g, int b); + inline void adam7_write_pixel_32(int x, int y, int r, int g, int b, int a); + void unpredict_sub(uint8_t* lst, uint8_t* cur, uint32_t bytes, int bpp); + void unpredict_up(uint8_t* lst, uint8_t* cur, uint32_t bytes, int bpp); + void unpredict_average(uint8_t* lst, uint8_t* cur, uint32_t bytes, int bpp); + inline uint8_t paeth_predictor(int a, int b, int c); + void unpredict_paeth(uint8_t* lst, uint8_t* cur, uint32_t bytes, int bpp); + int adam7_pass_size(int size, int start, int step); + int decompress_line(uint32_t* bytes_decoded); + int find_iend_chunk(); + void calc_gamma_table(); + void create_grey_palette(); + int read_signature(); + int read_ihdr_chunk(); + int read_bkgd_chunk(); + int read_gama_chunk(); + int read_trns_chunk(); + int read_plte_chunk(); + int find_idat_chunk(); +}; + +void png_decoder::uninitialize() +{ + m_pFile = nullptr; + + for (int i = 0; i < PNG_MAX_ALLOC_BLOCKS; i++) + { + free(m_pMalloc_blocks[i]); + m_pMalloc_blocks[i] = nullptr; + } + + mz_inflateEnd(&m_inflator); +} + +int png_decoder::terminate(int status) +{ + if (m_terminate_status == 0) + m_terminate_status = status; + + uninitialize(); + return status; +} + +void* png_decoder::png_malloc(uint32_t len) +{ + if (!len) + len++; + + void* p = malloc(len); + + if (!p) + return nullptr; + + int j; + for (j = 0; j < PNG_MAX_ALLOC_BLOCKS; j++) + if (!m_pMalloc_blocks[j]) + break; + + if (j == PNG_MAX_ALLOC_BLOCKS) + return nullptr; + + m_pMalloc_blocks[j] = p; + + return p; +} + +void* png_decoder::png_calloc(uint32_t len) +{ + void* p = png_malloc(len); + if (!p) + return nullptr; + + if (p) + memset(p, 0, len); + + return p; +} + +int png_decoder::block_read(void* buf, uint32_t len) +{ + size_t bytes_read = m_pFile->read(buf, len); + if (bytes_read != len) + return terminate(PNG_READPASTEOF); + return 0; +} + +int64_t png_decoder::block_read_dword() +{ + uint8_t buf[4]; + + int status = block_read(buf, 4); + if (status != 0) + return status; + + uint32_t v = buf[3] + ((uint32_t)buf[2] << 8) + ((uint32_t)buf[1] << 16) + ((uint32_t)buf[0] << 24); + return (int64_t)v; +} + +int png_decoder::fetch_next_chunk_data(uint8_t* buf, int bytes) +{ + if (!m_chunk_flag) + return 0; + + bytes = minimum(bytes, m_chunk_left); + + int status = block_read(buf, bytes); + if (status != 0) + return status; + +#if PVPNG_IDAT_CRC_CHECKING + bool check_crc32 = true; +#else + const bool is_idat = (m_chunk_name[0] == 'I') && (m_chunk_name[1] == 'D') && (m_chunk_name[2] == 'A') && (m_chunk_name[3] == 'T'); + bool check_crc32 = !is_idat; +#endif + + if (check_crc32) + m_chunk_crc32 = buminiz::mz_crc32(m_chunk_crc32, buf, bytes); + + if ((m_chunk_left -= bytes) == 0) + { + int64_t res = block_read_dword(); + if (res < 0) + return (int)res; + + if (check_crc32) + { + if (m_chunk_crc32 != (uint32_t)res) + return terminate(PNG_BAD_CHUNK_CRC32); + } + + m_chunk_flag = FALSE; + } + + return bytes; +} + +int png_decoder::fetch_next_chunk_byte() +{ + uint8_t buf[1]; + + int status = fetch_next_chunk_data(buf, 1); + if (status < 0) + return status; + + if (status != 1) + return terminate(PNG_BAD_CHUNK_SIZE); + + return buf[0]; +} + +int png_decoder::fetch_next_chunk_word() +{ + uint8_t buf[2]; + + int status = fetch_next_chunk_data(buf, 2); + if (status < 0) + return status; + + if (status != 2) + return terminate(PNG_BAD_CHUNK_SIZE); + + return buf[1] + ((uint32_t)buf[0] << 8); +} + +int64_t png_decoder::fetch_next_chunk_dword() +{ + uint8_t buf[4]; + + int status = fetch_next_chunk_data(buf, 4); + if (status < 0) + return status; + + if (status != 4) + terminate(PNG_BAD_CHUNK_SIZE); + + uint32_t v = buf[3] + ((uint32_t)buf[2] << 8) + ((uint32_t)buf[1] << 16) + ((uint32_t)buf[0] << 24); + return (int64_t)v; +} + +int png_decoder::fetch_next_chunk_init() +{ + while (m_chunk_flag) + { + int status = fetch_next_chunk_data(m_temp_buf, TEMP_BUF_SIZE * 4); + if (status != 0) + return status; + } + + int64_t n = block_read_dword(); + if (n < 0) + return (int)n; + + m_chunk_size = (uint32_t)n; + + m_chunk_flag = TRUE; + m_chunk_left = m_chunk_size + 4; + m_chunk_crc32 = 0; + + int status = fetch_next_chunk_data(m_chunk_name, 4); + if (status < 0) + return status; + + return 0; +} + +int png_decoder::unchunk_data(uint8_t* buf, uint32_t bytes, uint32_t* ptr_bytes_read) +{ + uint32_t bytes_read = 0; + + if ((!bytes) || (m_end_of_idat_chunks)) + { + *ptr_bytes_read = 0; + return TRUE; + } + + while (bytes_read != bytes) + { + if (!m_chunk_flag) + { + int res = fetch_next_chunk_init(); + if (res < 0) + return res; + + if ((m_chunk_name[0] != 'I') || + (m_chunk_name[1] != 'D') || + (m_chunk_name[2] != 'A') || + (m_chunk_name[3] != 'T')) + { + *ptr_bytes_read = bytes_read; + m_end_of_idat_chunks = TRUE; + return TRUE; + } + } + + int res = fetch_next_chunk_data(buf + bytes_read, bytes - bytes_read); + if (res < 0) + return res; + + bytes_read += (uint32_t)res; + } + + *ptr_bytes_read = bytes_read; + + return FALSE; +} + +inline void png_decoder::adam7_write_pixel_8(int x, int y, int c) +{ + m_adam7_image_buf[x + y * m_dst_bytes_per_line] = (uint8_t)c; +} + +inline void png_decoder::adam7_write_pixel_16(int x, int y, int r, int g) +{ + uint32_t ofs = x * 2 + y * m_dst_bytes_per_line; + m_adam7_image_buf[ofs + 0] = (uint8_t)r; + m_adam7_image_buf[ofs + 1] = (uint8_t)g; +} + +inline void png_decoder::adam7_write_pixel_24(int x, int y, int r, int g, int b) +{ + uint32_t ofs = x * 3 + y * m_dst_bytes_per_line; + m_adam7_image_buf[ofs + 0] = (uint8_t)r; + m_adam7_image_buf[ofs + 1] = (uint8_t)g; + m_adam7_image_buf[ofs + 2] = (uint8_t)b; +} + +inline void png_decoder::adam7_write_pixel_32(int x, int y, int r, int g, int b, int a) +{ + uint32_t ofs = x * 4 + y * m_dst_bytes_per_line; + m_adam7_image_buf[ofs + 0] = (uint8_t)r; + m_adam7_image_buf[ofs + 1] = (uint8_t)g; + m_adam7_image_buf[ofs + 2] = (uint8_t)b; + m_adam7_image_buf[ofs + 3] = (uint8_t)a; +} + +static void PixelDePack2(void* src, void* dst, int numbytes) +{ + uint8_t* src8 = (uint8_t*)src; + uint8_t* dst8 = (uint8_t*)dst; + + while (numbytes) + { + uint8_t v = *src8++; + + for (uint32_t i = 0; i < 8; i++) + dst8[7 - i] = (v >> i) & 1; + + dst8 += 8; + numbytes--; + } +} + +static void PixelDePack16(void* src, void* dst, int numbytes) +{ + uint8_t* src8 = (uint8_t*)src; + uint8_t* dst8 = (uint8_t*)dst; + + while (numbytes) + { + uint8_t v = *src8++; + + dst8[0] = (uint8_t)v >> 4; + dst8[1] = (uint8_t)v & 0xF; + dst8 += 2; + + numbytes--; + } +} + +static int unpack_grey_1(uint8_t* src, uint8_t* dst, int pixels, png_decoder *pwi) +{ + (void)pwi; + PixelDePack2(src, dst, pixels >> 3); + + dst += (pixels & 0xFFF8); + + if ((pixels & 7) != 0) + { + uint8_t c = src[pixels >> 3]; + + pixels &= 7; + + while (pixels--) + { + *dst++ = ((c & 128) >> 7); + + c <<= 1; + } + } + + return TRUE; +} + +static int unpack_grey_2(uint8_t* src, uint8_t* dst, int pixels, png_decoder* pwi) +{ + (void)pwi; + int i = pixels; + uint8_t c; + + while (i >= 4) + { + c = *src++; + + *dst++ = (c >> 6); + *dst++ = (c >> 4) & 3; + *dst++ = (c >> 2) & 3; + *dst++ = (c) & 3; + + i -= 4; + } + + if (i) + { + c = *src; + + while (i--) + { + *dst++ = (c >> 6); + + c <<= 2; + } + } + + return TRUE; +} + +static int unpack_grey_4(uint8_t* src, uint8_t* dst, int pixels, png_decoder* pwi) +{ + (void)pwi; + + PixelDePack16(src, dst, pixels >> 1); + + if (pixels & 1) + dst[pixels & 0xFFFE] = (src[pixels >> 1] >> 4); + + return TRUE; +} + +static int unpack_grey_8(uint8_t* src, uint8_t* dst, int pixels, png_decoder* pwi) +{ + (void)src; + (void)dst; + (void)pixels; + (void)pwi; + return FALSE; +} + +static int unpack_grey_16(uint8_t* src, uint8_t* dst, int pixels, png_decoder* pwi) +{ + (void)pwi; + while (pixels--) + { + *dst++ = *src++; + + src++; + } + + return TRUE; +} + +static int unpack_grey_16_2(uint8_t* src, uint8_t* dst, int pixels, png_decoder* pwi) +{ + if (pwi->m_trns_flag) + { + while (pixels--) + { + uint32_t v = (src[0] << 8) + src[1]; + src += 2; + + *dst++ = (uint8_t)(v >> 8); + *dst++ = (v == pwi->m_trns_value[0]) ? 0 : 255; + } + } + else + { + while (pixels--) + { + *dst++ = *src++; + *dst++ = 0xFF; + + src++; + } + } + + return TRUE; +} + +static int unpack_true_8(uint8_t* src, uint8_t* dst, int pixels, png_decoder* pwi) +{ + if (pwi->m_trns_flag) + { + const uint32_t tr = pwi->m_trns_value[0]; + const uint32_t tg = pwi->m_trns_value[1]; + const uint32_t tb = pwi->m_trns_value[2]; + + for (int i = 0; i < pixels; i++) + { + uint8_t r = src[i * 3 + 0]; + uint8_t g = src[i * 3 + 1]; + uint8_t b = src[i * 3 + 2]; + + dst[i * 4 + 0] = r; + dst[i * 4 + 1] = g; + dst[i * 4 + 2] = b; + dst[i * 4 + 3] = ((r == tr) && (g == tg) && (b == tb)) ? 0 : 255; + } + } + else + { + for (int i = 0; i < pixels; i++) + { + dst[i * 4 + 0] = src[i * 3 + 0]; + dst[i * 4 + 1] = src[i * 3 + 1]; + dst[i * 4 + 2] = src[i * 3 + 2]; + dst[i * 4 + 3] = 255; + } + } + + return TRUE; +} + +static int unpack_true_16(uint8_t* src, uint8_t* dst, int pixels, png_decoder* pwi) +{ + if (pwi->m_trns_flag) + { + const uint32_t tr = pwi->m_trns_value[0]; + const uint32_t tg = pwi->m_trns_value[1]; + const uint32_t tb = pwi->m_trns_value[2]; + + for (int i = 0; i < pixels; i++) + { + uint32_t r = (src[i * 6 + 0] << 8) + src[i * 6 + 1]; + uint32_t g = (src[i * 6 + 2] << 8) + src[i * 6 + 3]; + uint32_t b = (src[i * 6 + 4] << 8) + src[i * 6 + 5]; + + dst[i * 4 + 0] = (uint8_t)(r >> 8); + dst[i * 4 + 1] = (uint8_t)(g >> 8); + dst[i * 4 + 2] = (uint8_t)(b >> 8); + dst[i * 4 + 3] = ((r == tr) && (g == tg) && (b == tb)) ? 0 : 255; + } + } + else + { + while (pixels--) + { + dst[0] = src[0]; + dst[1] = src[2]; + dst[2] = src[4]; + dst[3] = 255; + + dst += 4; + src += 6; + } + } + + return TRUE; +} + +static int unpack_grey_alpha_8(uint8_t* src, uint8_t* dst, int pixels, png_decoder* pwi) +{ + (void)pwi; + while (pixels--) + { + dst[0] = src[0]; + dst[1] = src[1]; + dst += 2; + src += 2; + } + + return TRUE; +} + +static int unpack_grey_alpha_16(uint8_t* src, uint8_t* dst, int pixels, png_decoder* pwi) +{ + (void)pwi; + while (pixels--) + { + dst[0] = src[0]; + dst[1] = src[2]; + dst += 2; + src += 4; + } + + return TRUE; +} + +static int unpack_true_alpha_8(uint8_t* src, uint8_t* dst, int pixels, png_decoder* pwi) +{ + (void)src; + (void)dst; + (void)pixels; + (void)pwi; + return FALSE; +} + +static int unpack_true_alpha_16(uint8_t* src, uint8_t* dst, int pixels, png_decoder* pwi) +{ + (void)pwi; + while (pixels--) + { + dst[0] = src[0]; + dst[1] = src[2]; + dst[2] = src[4]; + dst[3] = src[6]; + dst += 4; + src += 8; + } + + return TRUE; +} + +void png_decoder::unpredict_sub(uint8_t* lst, uint8_t* cur, uint32_t bytes, int bpp) +{ + (void)lst; + if (bytes == (uint32_t)bpp) + return; + + cur += bpp; + bytes -= bpp; + + while (bytes--) + { + *cur += *(cur - bpp); + cur++; + } +} + +void png_decoder::unpredict_up(uint8_t* lst, uint8_t* cur, uint32_t bytes, int bpp) +{ + (void)bpp; + while (bytes--) + *cur++ += *lst++; +} + +void png_decoder::unpredict_average(uint8_t* lst, uint8_t* cur, uint32_t bytes, int bpp) +{ + int i; + + for (i = 0; i < bpp; i++) + *cur++ += (*lst++ >> 1); + + if (bytes == (uint32_t)bpp) + return; + + bytes -= bpp; + + while (bytes--) + { + *cur += ((*lst++ + *(cur - bpp)) >> 1); + cur++; + } +} + +inline uint8_t png_decoder::paeth_predictor(int a, int b, int c) +{ + int p, pa, pb, pc; + + /* a = left, b = above, c = upper left */ + + p = a + b - c; + + pa = abs(p - a); + pb = abs(p - b); + pc = abs(p - c); + + if ((pa <= pb) && (pa <= pc)) + return (uint8_t)a; + else if (pb <= pc) + return (uint8_t)b; + else + return (uint8_t)c; +} + +void png_decoder::unpredict_paeth(uint8_t* lst, uint8_t* cur, uint32_t bytes, int bpp) +{ + int i; + + for (i = 0; i < bpp; i++) + *cur++ += paeth_predictor(0, *lst++, 0); + + if (bytes == (uint32_t)bpp) + return; + + bytes -= bpp; + + while (bytes--) + { + int p, a, b, c, pa, pb, pc; + + a = *(cur - bpp); + b = *lst; + c = *(lst - bpp); + + p = a + b - c; + + pa = abs(p - a); + pb = abs(p - b); + pc = abs(p - c); + + if ((pa <= pb) && (pa <= pc)) + *cur++ += (uint8_t)a; + else if (pb <= pc) + *cur++ += (uint8_t)b; + else + *cur++ += (uint8_t)c; + + lst++; + } +} + +int png_decoder::adam7_pass_size(int size, int start, int step) +{ + if (size > start) + return 1 + ((size - 1) - start) / step; + else + return 0; +} + +// TRUE if no more data, negative on error, FALSE if OK +int png_decoder::decompress_line(uint32_t* bytes_decoded) +{ + int status; + uint32_t temp, src_bytes_left, dst_bytes_left; + + m_inflate_dst_buf_ofs = 0; + + for (; ; ) + { + if (m_inflate_src_buf_ofs == PNG_INFLATE_SRC_BUF_SIZE) + { + int res = unchunk_data(inflate_src_buf, PNG_INFLATE_SRC_BUF_SIZE, &temp); + if (res < 0) + return res; + m_inflate_eof_flag = res; + + m_inflate_src_buf_size = temp; + + m_inflate_src_buf_ofs = 0; + } + + for (; ; ) + { + src_bytes_left = m_inflate_src_buf_size - m_inflate_src_buf_ofs; + dst_bytes_left = m_dec_bytes_per_line - m_inflate_dst_buf_ofs; + + m_inflator.next_in = inflate_src_buf + m_inflate_src_buf_ofs; + m_inflator.avail_in = src_bytes_left; + + m_inflator.next_out = m_pCur_line_buf + m_inflate_dst_buf_ofs; + m_inflator.avail_out = dst_bytes_left; + + status = buminiz::mz_inflate2(&m_inflator, buminiz::MZ_NO_FLUSH, PVPNG_ADLER32_CHECKING); + + const uint32_t src_bytes_consumed = src_bytes_left - m_inflator.avail_in; + const uint32_t dst_bytes_written = dst_bytes_left - m_inflator.avail_out; + + m_inflate_src_buf_ofs += src_bytes_consumed; + m_inflate_dst_buf_ofs += dst_bytes_written; + + if (status != buminiz::MZ_OK) + { + if (status != buminiz::MZ_STREAM_END) + return terminate(PNG_INVALID_DATA_STREAM); + + if (bytes_decoded) + *bytes_decoded = m_inflate_dst_buf_ofs; + + return TRUE; + } + + if (m_inflate_dst_buf_ofs == m_dec_bytes_per_line) + { + if (bytes_decoded) + *bytes_decoded = m_inflate_dst_buf_ofs; + + return FALSE; + } + + if ((m_inflate_src_buf_ofs == m_inflate_src_buf_size) && + (m_inflate_eof_flag == FALSE)) + break; + } + } +} + +int png_decoder::find_iend_chunk() +{ + uint32_t dummy; + + while (!m_end_of_idat_chunks) + { + int res = unchunk_data(m_temp_buf, TEMP_BUF_SIZE * 4, &dummy); + if (res < 0) + return res; + } + + for (; ; ) + { + if ((m_chunk_name[0] == 'I') && + (m_chunk_name[1] == 'E') && + (m_chunk_name[2] == 'N') && + (m_chunk_name[3] == 'D')) + break; + + int res = fetch_next_chunk_init(); + if (res < 0) + return res; + } + + return 0; +} + +int png_decoder::png_decode(void** ppImg_ptr, uint32_t* pImg_len) +{ + int status; + uint8_t* decoded_line; + uint32_t bytes_decoded; + + if (m_adam7_decoded_flag) + { + if (m_pass_y_left == 0) + return PNG_ALLDONE; + + *ppImg_ptr = &m_adam7_image_buf[(m_ihdr.m_height - m_pass_y_left) * m_dst_bytes_per_line]; + *pImg_len = m_dst_bytes_per_line; + + m_pass_y_left--; + + return 0; + } + + if (m_pass_y_left == 0) + { + if (m_ihdr.m_ilace_type == 0) + { + status = find_iend_chunk(); + if (status < 0) + return status; + + return PNG_ALLDONE; + } + + for (; ; ) + { + if (++m_adam7_pass_num == 7) + { + status = find_iend_chunk(); + if (status < 0) + return status; + + return PNG_ALLDONE; + } + + if (((m_pass_y_left = m_adam7_pass_size_y[m_adam7_pass_num]) != 0) && + ((m_pass_x_size = m_adam7_pass_size_x[m_adam7_pass_num]) != 0)) + break; + } + + switch (m_adam7_pass_num) + { + case 0: + case 1: + case 3: + case 5: + m_adam7_pass_y = 0; + break; + case 2: + m_adam7_pass_y = 4; + break; + case 4: + m_adam7_pass_y = 2; + break; + case 6: + m_adam7_pass_y = 1; + break; + } + + switch (m_ihdr.m_color_type) + { + case PNG_COLOR_TYPE_GREYSCALE: + case PNG_COLOR_TYPE_PALETTIZED: + { + m_src_bytes_per_line = (((uint32_t)m_pass_x_size * m_ihdr.m_bit_depth) + 7) / 8; + break; + } + case PNG_COLOR_TYPE_TRUECOLOR: + { + m_src_bytes_per_line = ((uint32_t)m_pass_x_size * m_dec_bytes_per_pixel); + break; + } + case PNG_COLOR_TYPE_GREYSCALE_ALPHA: + { + m_src_bytes_per_line = ((uint32_t)m_pass_x_size * m_dec_bytes_per_pixel); + break; + } + case PNG_COLOR_TYPE_TRUECOLOR_ALPHA: + { + m_src_bytes_per_line = ((uint32_t)m_pass_x_size * m_dec_bytes_per_pixel); + break; + } + } + + m_dec_bytes_per_line = m_src_bytes_per_line + 1; + + memset(m_pPre_line_buf, 0, m_src_bytes_per_line); + } + + int res = decompress_line(&bytes_decoded); + if (res < 0) + return terminate(res); + + if (res) + { + if (m_ihdr.m_ilace_type == 0) + { + if (m_pass_y_left != 1) + return terminate(PNG_INCOMPLETE_IMAGE); + } + else + { + if ((m_pass_y_left != 1) && (m_adam7_pass_num != 6)) + return terminate(PNG_INCOMPLETE_IMAGE); + } + } + + if (bytes_decoded != m_dec_bytes_per_line) + return terminate(PNG_INCOMPLETE_IMAGE); + + decoded_line = &m_pCur_line_buf[1]; + + switch (m_pCur_line_buf[0]) + { + case 0: + break; + case 1: + { + unpredict_sub(m_pPre_line_buf, m_pCur_line_buf + 1, m_src_bytes_per_line, m_dec_bytes_per_pixel); + break; + } + case 2: + { + unpredict_up(m_pPre_line_buf, m_pCur_line_buf + 1, m_src_bytes_per_line, m_dec_bytes_per_pixel); + break; + } + case 3: + { + unpredict_average(m_pPre_line_buf, m_pCur_line_buf + 1, m_src_bytes_per_line, m_dec_bytes_per_pixel); + break; + } + case 4: + { + unpredict_paeth(m_pPre_line_buf, m_pCur_line_buf + 1, m_src_bytes_per_line, m_dec_bytes_per_pixel); + break; + } + default: + return terminate(PNG_UNS_PREDICTOR); + } + + memmove(m_pPre_line_buf, m_pCur_line_buf + 1, m_src_bytes_per_line); + + if (m_pProcess_func) + { + if ((*m_pProcess_func)(m_pCur_line_buf + 1, m_pPro_line_buf, m_pass_x_size, this)) + decoded_line = m_pPro_line_buf; + } + + if (m_ihdr.m_ilace_type == 0) + { + *ppImg_ptr = decoded_line; + *pImg_len = m_dst_bytes_per_line; + + if (--m_pass_y_left == 0) + { + res = decompress_line(&bytes_decoded); + if (res < 0) + return terminate(res); + + if (res == FALSE) + return terminate(PNG_TOO_MUCH_DATA); + + if (bytes_decoded) + return terminate(PNG_TOO_MUCH_DATA); + } + } + else + { + int i, x_ofs = 0, y_ofs = 0, x_stp = 0; + uint8_t* p = decoded_line; + + switch (m_adam7_pass_num) + { + case 0: { x_ofs = 0; x_stp = 8; break; } + case 1: { x_ofs = 4; x_stp = 8; break; } + case 2: { x_ofs = 0; x_stp = 4; break; } + case 3: { x_ofs = 2; x_stp = 4; break; } + case 4: { x_ofs = 0; x_stp = 2; break; } + case 5: { x_ofs = 1; x_stp = 2; break; } + case 6: { x_ofs = 0; x_stp = 1; break; } + } + + y_ofs = m_adam7_pass_y; + + assert(x_ofs < (int)m_ihdr.m_width); + assert(y_ofs < (int)m_ihdr.m_height); + + if (m_dst_bytes_per_pixel == 1) + { + for (i = m_pass_x_size; i > 0; i--, x_ofs += x_stp) + adam7_write_pixel_8(x_ofs, y_ofs, *p++); + } + else if (m_dst_bytes_per_pixel == 2) + { + for (i = m_pass_x_size; i > 0; i--, x_ofs += x_stp, p += 2) + adam7_write_pixel_16(x_ofs, y_ofs, p[0], p[1]); + } + else if (m_dst_bytes_per_pixel == 3) + { + for (i = m_pass_x_size; i > 0; i--, x_ofs += x_stp, p += 3) + adam7_write_pixel_24(x_ofs, y_ofs, p[0], p[1], p[2]); + } + else if (m_dst_bytes_per_pixel == 4) + { + for (i = m_pass_x_size; i > 0; i--, x_ofs += x_stp, p += 4) + adam7_write_pixel_32(x_ofs, y_ofs, p[0], p[1], p[2], p[3]); + } + else + { + assert(0); + } + + switch (m_adam7_pass_num) + { + case 0: + case 1: + case 2: { m_adam7_pass_y += 8; break; } + case 3: + case 4: { m_adam7_pass_y += 4; break; } + case 5: + case 6: { m_adam7_pass_y += 2; break; } + } + + if ((--m_pass_y_left == 0) && (m_adam7_pass_num == 6)) + { + res = decompress_line(&bytes_decoded); + if (res < 0) + return terminate(res); + + if (res == FALSE) + return terminate(PNG_TOO_MUCH_DATA); + + if (bytes_decoded) + return terminate(PNG_TOO_MUCH_DATA); + } + } + + return 0; +} + +void png_decoder::png_decode_end() +{ + uninitialize(); +} + +int png_decoder::png_decode_start() +{ + int status; + + if (m_img_supported_flag != TRUE) + return terminate(m_img_supported_flag); + + switch (m_ihdr.m_color_type) + { + case PNG_COLOR_TYPE_GREYSCALE: + { + if (m_ihdr.m_bit_depth == 16) + { + // This is a special case. We can't pass back 8-bit samples and let the caller decide on transparency because the PNG is 16-bits. + // So we expand to 8-bit Gray-Alpha and handle transparency during decoding. + // We don't do this with all grayscale cases because that would require more code to deal with 1/2/4bpp expansion. + m_dec_bytes_per_pixel = (m_ihdr.m_bit_depth + 7) / 8; + m_dst_bytes_per_pixel = 2; + + m_src_bytes_per_line = (((uint32_t)m_ihdr.m_width * m_ihdr.m_bit_depth) + 7) / 8; + m_dst_bytes_per_line = 2 * m_ihdr.m_width; + + m_pProcess_func = unpack_grey_16_2; + } + else + { + m_dec_bytes_per_pixel = (m_ihdr.m_bit_depth + 7) / 8; + m_dst_bytes_per_pixel = 1; + + m_src_bytes_per_line = (((uint32_t)m_ihdr.m_width * m_ihdr.m_bit_depth) + 7) / 8; + m_dst_bytes_per_line = m_ihdr.m_width; + + if (m_ihdr.m_bit_depth == 1) + m_pProcess_func = unpack_grey_1; + else if (m_ihdr.m_bit_depth == 2) + m_pProcess_func = unpack_grey_2; + else if (m_ihdr.m_bit_depth == 4) + m_pProcess_func = unpack_grey_4; + else + m_pProcess_func = unpack_grey_8; + } + + break; + } + case PNG_COLOR_TYPE_PALETTIZED: + { + m_dec_bytes_per_pixel = (m_ihdr.m_bit_depth + 7) / 8; + m_dst_bytes_per_pixel = 1; + + m_src_bytes_per_line = (((uint32_t)m_ihdr.m_width * m_ihdr.m_bit_depth) + 7) / 8; + m_dst_bytes_per_line = m_ihdr.m_width; + + if (m_ihdr.m_bit_depth == 1) + m_pProcess_func = unpack_grey_1; + else if (m_ihdr.m_bit_depth == 2) + m_pProcess_func = unpack_grey_2; + else if (m_ihdr.m_bit_depth == 4) + m_pProcess_func = unpack_grey_4; + else if (m_ihdr.m_bit_depth == 8) + m_pProcess_func = unpack_grey_8; + else if (m_ihdr.m_bit_depth == 16) + m_pProcess_func = unpack_grey_16; + + break; + } + case PNG_COLOR_TYPE_TRUECOLOR: + { + // We always pass back alpha with transparency handling. + m_dec_bytes_per_pixel = 3 * (m_ihdr.m_bit_depth / 8); + m_dst_bytes_per_pixel = 4; + + m_src_bytes_per_line = ((uint32_t)m_ihdr.m_width * m_dec_bytes_per_pixel); + m_dst_bytes_per_line = 4 * m_ihdr.m_width; + + if (m_ihdr.m_bit_depth == 8) + m_pProcess_func = unpack_true_8; + else if (m_ihdr.m_bit_depth == 16) + m_pProcess_func = unpack_true_16; + + break; + } + case PNG_COLOR_TYPE_GREYSCALE_ALPHA: + { + m_dec_bytes_per_pixel = 2 * (m_ihdr.m_bit_depth / 8); + m_dst_bytes_per_pixel = 2; + + m_src_bytes_per_line = ((uint32_t)m_ihdr.m_width * m_dec_bytes_per_pixel); + m_dst_bytes_per_line = m_ihdr.m_width * 2; + + if (m_ihdr.m_bit_depth == 8) + m_pProcess_func = unpack_grey_alpha_8; + else if (m_ihdr.m_bit_depth == 16) + m_pProcess_func = unpack_grey_alpha_16; + + break; + } + case PNG_COLOR_TYPE_TRUECOLOR_ALPHA: + { + m_dec_bytes_per_pixel = 4 * (m_ihdr.m_bit_depth / 8); + m_dst_bytes_per_pixel = 4; + + m_src_bytes_per_line = ((uint32_t)m_ihdr.m_width * m_dec_bytes_per_pixel); + m_dst_bytes_per_line = 4 * m_ihdr.m_width; + + if (m_ihdr.m_bit_depth == 8) + m_pProcess_func = unpack_true_alpha_8; + else + m_pProcess_func = unpack_true_alpha_16; + + break; + } + } + + m_dec_bytes_per_line = m_src_bytes_per_line + 1; + + m_pPre_line_buf = (uint8_t*)png_calloc(m_src_bytes_per_line); + m_pCur_line_buf = (uint8_t*)png_calloc(m_dec_bytes_per_line); + m_pPro_line_buf = (uint8_t*)png_calloc(m_dst_bytes_per_line); + + if (!m_pPre_line_buf || !m_pCur_line_buf || !m_pPro_line_buf) + return terminate(PNG_NOTENOUGHMEM); + + m_inflate_src_buf_ofs = PNG_INFLATE_SRC_BUF_SIZE; + + int res = mz_inflateInit(&m_inflator); + if (res != 0) + return terminate(PNG_DECERROR); + + if (m_ihdr.m_ilace_type == 1) + { + //int i; + //uint32_t total_lines, lines_processed; + + m_adam7_pass_size_x[0] = adam7_pass_size(m_ihdr.m_width, 0, 8); + m_adam7_pass_size_x[1] = adam7_pass_size(m_ihdr.m_width, 4, 8); + m_adam7_pass_size_x[2] = adam7_pass_size(m_ihdr.m_width, 0, 4); + m_adam7_pass_size_x[3] = adam7_pass_size(m_ihdr.m_width, 2, 4); + m_adam7_pass_size_x[4] = adam7_pass_size(m_ihdr.m_width, 0, 2); + m_adam7_pass_size_x[5] = adam7_pass_size(m_ihdr.m_width, 1, 2); + m_adam7_pass_size_x[6] = adam7_pass_size(m_ihdr.m_width, 0, 1); + + m_adam7_pass_size_y[0] = adam7_pass_size(m_ihdr.m_height, 0, 8); + m_adam7_pass_size_y[1] = adam7_pass_size(m_ihdr.m_height, 0, 8); + m_adam7_pass_size_y[2] = adam7_pass_size(m_ihdr.m_height, 4, 8); + m_adam7_pass_size_y[3] = adam7_pass_size(m_ihdr.m_height, 0, 4); + m_adam7_pass_size_y[4] = adam7_pass_size(m_ihdr.m_height, 2, 4); + m_adam7_pass_size_y[5] = adam7_pass_size(m_ihdr.m_height, 0, 2); + m_adam7_pass_size_y[6] = adam7_pass_size(m_ihdr.m_height, 1, 2); + + m_adam7_image_buf.resize(m_dst_bytes_per_line * m_ihdr.m_height); + + m_adam7_pass_num = -1; + + m_pass_y_left = 0; + +#if 0 + total_lines = lines_processed = 0; + + for (i = 0; i < 7; i++) + total_lines += m_adam7_pass_size_y[i]; +#endif + + for (; ; ) + { + void* dummy_ptr = nullptr; + uint32_t dummy_len = 0; + + status = png_decode(&dummy_ptr, &dummy_len); + + if (status) + { + if (status == PNG_ALLDONE) + break; + else + { + uninitialize(); + + return status; + } + } + + //lines_processed++; + } + + m_adam7_decoded_flag = TRUE; + m_pass_y_left = m_ihdr.m_height; + } + else + { + m_pass_x_size = m_ihdr.m_width; + m_pass_y_left = m_ihdr.m_height; + } + + return 0; +} + +void png_decoder::calc_gamma_table() +{ + if (m_gama_value == 45000) + { + for (int i = 0; i < 256; i++) + m_gamma_table[i] = (uint8_t)i; + return; + } + + float gamma = (float)(m_gama_value) / 100000.0f; + + gamma = 1.0f / (gamma * 2.2f); + + for (int i = 0; i < 256; i++) + { + float temp = powf((float)(i) / 255.0f, gamma) * 255.0f; + + int j = (int)(temp + .5f); + + if (j < 0) + j = 0; + else if (j > 255) + j = 255; + + m_gamma_table[i] = (uint8_t)j; + } +} + +void png_decoder::create_grey_palette() +{ + int i, j; + uint8_t* p = m_img_pal; + + const int img_colors = minimum(256, 1 << m_ihdr.m_bit_depth); + for (i = 0; i < img_colors; i++) + { + j = ((uint32_t)255 * (uint32_t)i) / (img_colors - 1); + + *p++ = (uint8_t)j; + *p++ = (uint8_t)j; + *p++ = (uint8_t)j; + } +} + +int png_decoder::read_signature() +{ + if (m_pFile->read(m_temp_buf, 8) != 8) + return terminate(PNG_UNKNOWNTYPE); + + if ((m_temp_buf[0] != 137) || + (m_temp_buf[1] != 80) || + (m_temp_buf[2] != 78) || + (m_temp_buf[3] != 71) || + (m_temp_buf[4] != 13) || + (m_temp_buf[5] != 10) || + (m_temp_buf[6] != 26) || + (m_temp_buf[7] != 10)) + { + return terminate(PNG_UNKNOWNTYPE); + } + + return 0; +} + +int png_decoder::read_ihdr_chunk() +{ + int res = fetch_next_chunk_init(); + if (res < 0) + return res; + + if ((m_chunk_name[0] != 'I') || (m_chunk_name[1] != 'H') || (m_chunk_name[2] != 'D') || (m_chunk_name[3] != 'R') || (m_chunk_size != 13)) + return terminate(PNG_NO_IHDR); + + int64_t v64 = fetch_next_chunk_dword(); + if (v64 < 0) + return (int)v64; + m_ihdr.m_width = (uint32_t)v64; + + v64 = fetch_next_chunk_dword(); + if (v64 < 0) + return (int)v64; + m_ihdr.m_height = (uint32_t)v64; + + if ((m_ihdr.m_width == 0) || (m_ihdr.m_width > MAX_SUPPORTED_RES)) + return terminate(PNG_BAD_WIDTH); + + if ((m_ihdr.m_height == 0) || (m_ihdr.m_height > MAX_SUPPORTED_RES)) + return terminate(PNG_BAD_HEIGHT); + + int v = fetch_next_chunk_byte(); + if (v < 0) + return v; + m_ihdr.m_bit_depth = (uint8_t)v; + + v = fetch_next_chunk_byte(); + if (v < 0) + return v; + m_ihdr.m_color_type = (uint8_t)v; + + v = fetch_next_chunk_byte(); + if (v < 0) + return v; + m_ihdr.m_comp_type = (uint8_t)v; + + v = fetch_next_chunk_byte(); + if (v < 0) + return v; + m_ihdr.m_filter_type = (uint8_t)v; + + v = fetch_next_chunk_byte(); + if (v < 0) + return v; + m_ihdr.m_ilace_type = (uint8_t)v; + + if (m_ihdr.m_comp_type != 0) + m_img_supported_flag = PNG_UNS_COMPRESSION; + + if (m_ihdr.m_filter_type != 0) + m_img_supported_flag = PNG_UNS_FILTER; + + if (m_ihdr.m_ilace_type > 1) + m_img_supported_flag = PNG_UNS_ILACE; + + switch (m_ihdr.m_color_type) + { + case PNG_COLOR_TYPE_GREYSCALE: + { + switch (m_ihdr.m_bit_depth) + { + case 1: + case 2: + case 4: + case 8: + case 16: + { + break; + } + default: + return terminate(PNG_BAD_BIT_DEPTH); + } + + break; + } + case PNG_COLOR_TYPE_PALETTIZED: + { + switch (m_ihdr.m_bit_depth) + { + case 1: + case 2: + case 4: + case 8: + { + break; + } + default: + return terminate(PNG_BAD_BIT_DEPTH); + } + + break; + } + case PNG_COLOR_TYPE_TRUECOLOR: + case PNG_COLOR_TYPE_GREYSCALE_ALPHA: + case PNG_COLOR_TYPE_TRUECOLOR_ALPHA: + { + switch (m_ihdr.m_bit_depth) + { + case 8: + case 16: + { + break; + } + default: + return terminate(PNG_BAD_BIT_DEPTH); + } + + break; + } + default: + return terminate(PNG_UNS_COLOR_TYPE); + } + + return 0; +} + +int png_decoder::read_bkgd_chunk() +{ + m_bkgd_flag = TRUE; + + if (m_ihdr.m_color_type == PNG_COLOR_TYPE_PALETTIZED) + { + int v = fetch_next_chunk_byte(); + if (v < 0) + return v; + m_bkgd_value[0] = v; + } + else if ((m_ihdr.m_color_type == PNG_COLOR_TYPE_GREYSCALE) || (m_ihdr.m_color_type == PNG_COLOR_TYPE_GREYSCALE_ALPHA)) + { + int v = fetch_next_chunk_word(); + if (v < 0) + return v; + m_bkgd_value[0] = v; + } + else if ((m_ihdr.m_color_type == PNG_COLOR_TYPE_TRUECOLOR) || (m_ihdr.m_color_type == PNG_COLOR_TYPE_TRUECOLOR_ALPHA)) + { + int v = fetch_next_chunk_word(); + if (v < 0) + return v; + m_bkgd_value[0] = v; + + v = fetch_next_chunk_word(); + if (v < 0) + return v; + m_bkgd_value[1] = v; + + v = fetch_next_chunk_word(); + if (v < 0) + return v; + m_bkgd_value[2] = v; + } + + return 0; +} + +int png_decoder::read_gama_chunk() +{ + m_gama_flag = TRUE; + + int64_t v = fetch_next_chunk_dword(); + if (v < 0) + return (int)v; + + m_gama_value = (uint32_t)v; + + return 0; +} + +int png_decoder::read_trns_chunk() +{ + int i; + + m_trns_flag = TRUE; + + if (m_ihdr.m_color_type == PNG_COLOR_TYPE_PALETTIZED) + { + for (i = 0; i < 256; i++) + m_trns_value[i] = 255; + + const uint32_t img_colors = 1 << m_ihdr.m_bit_depth; + if (m_chunk_size > (uint32_t)img_colors) + return terminate(PNG_BAD_TRNS_CHUNK); + + for (i = 0; i < (int)m_chunk_size; i++) + { + int v = fetch_next_chunk_byte(); + if (v < 0) + return v; + m_trns_value[i] = v; + } + } + else if (m_ihdr.m_color_type == PNG_COLOR_TYPE_GREYSCALE) + { + int v = fetch_next_chunk_word(); + if (v < 0) + return v; + m_trns_value[0] = v; + } + else if (m_ihdr.m_color_type == PNG_COLOR_TYPE_TRUECOLOR) + { + int v = fetch_next_chunk_word(); + if (v < 0) + return v; + m_trns_value[0] = v; + + v = fetch_next_chunk_word(); + if (v < 0) + return v; + m_trns_value[1] = v; + + v = fetch_next_chunk_word(); + if (v < 0) + return v; + m_trns_value[2] = v; + } + else + { + return terminate(PNG_BAD_TRNS_CHUNK); + } + return 0; +} + +int png_decoder::read_plte_chunk() +{ + int i, j; + uint8_t* p; + + if (m_plte_flag) + return terminate(PNG_BAD_PLTE_CHUNK); + + m_plte_flag = TRUE; + + memset(m_img_pal, 0, 768); + + if (m_chunk_size % 3) + return terminate(PNG_BAD_PLTE_CHUNK); + + j = m_chunk_size / 3; + + const int img_colors = minimum(256, 1 << m_ihdr.m_bit_depth); + if (j > img_colors) + return terminate(PNG_BAD_PLTE_CHUNK); + + if ((m_ihdr.m_color_type == PNG_COLOR_TYPE_GREYSCALE) || + (m_ihdr.m_color_type == PNG_COLOR_TYPE_GREYSCALE_ALPHA)) + return terminate(PNG_BAD_PLTE_CHUNK); + + p = m_img_pal; + + for (i = 0; i < j; i++) + { + int v = fetch_next_chunk_byte(); + if (v < 0) + return v; + *p++ = (uint8_t)v; + + v = fetch_next_chunk_byte(); + if (v < 0) + return v; + *p++ = (uint8_t)v; + + v = fetch_next_chunk_byte(); + if (v < 0) + return v; + *p++ = (uint8_t)v; + } + + return 0; +} + +int png_decoder::find_idat_chunk() +{ + for (; ; ) + { + int res = fetch_next_chunk_init(); + if (res < 0) + return res; + + if (m_chunk_name[0] & 32) /* ancillary? */ + { + if ((m_chunk_name[0] == 'b') && (m_chunk_name[1] == 'K') && (m_chunk_name[2] == 'G') && (m_chunk_name[3] == 'D')) + { + res = read_bkgd_chunk(); + if (res < 0) + return res; + } + else if ((m_chunk_name[0] == 'g') && (m_chunk_name[1] == 'A') && (m_chunk_name[2] == 'M') && (m_chunk_name[3] == 'A')) + { + res = read_gama_chunk(); + if (res < 0) + return res; + } + else if ((m_chunk_name[0] == 't') && (m_chunk_name[1] == 'R') && (m_chunk_name[2] == 'N') && (m_chunk_name[3] == 'S')) + { + res = read_trns_chunk(); + if (res < 0) + return res; + } + } + else + { + if ((m_chunk_name[0] == 'P') && (m_chunk_name[1] == 'L') && (m_chunk_name[2] == 'T') && (m_chunk_name[3] == 'E')) + { + res = read_plte_chunk(); + if (res < 0) + return res; + } + else if ((m_chunk_name[0] == 'I') && (m_chunk_name[1] == 'D') && (m_chunk_name[2] == 'A') && (m_chunk_name[3] == 'T')) + { + break; + } + else + { + m_img_supported_flag = PNG_UNS_CRITICAL_CHUNK; + } + } + } + + return 0; +} + +png_decoder::png_decoder() +{ + clear(); +} + +png_decoder::~png_decoder() +{ + uninitialize(); +} + +void png_decoder::clear() +{ + clear_obj(m_pMalloc_blocks); + + m_pFile = nullptr; + + clear_obj(m_img_pal); + + m_img_supported_flag = FALSE; + + m_adam7_image_buf.clear(); + + clear_obj(m_ihdr); + + m_chunk_flag = FALSE; + m_chunk_size = 0; + m_chunk_left = 0; + m_chunk_crc32 = 0; + clear_obj(m_chunk_name); + + m_end_of_idat_chunks = 0; + + m_dec_bytes_per_pixel = 0; + m_dst_bytes_per_pixel = 0; + + m_dec_bytes_per_line = 0; + m_src_bytes_per_line = 0; + m_dst_bytes_per_line = 0; + + m_pProcess_func = nullptr; + + m_pPre_line_buf = nullptr; + m_pCur_line_buf = nullptr; + m_pPro_line_buf = nullptr; + + m_bkgd_flag = FALSE; + clear_obj(m_bkgd_value); + + m_gama_flag = FALSE; + m_gama_value = 0; + + m_plte_flag = FALSE; + + m_trns_flag = FALSE; + clear_obj(m_trns_value); + + clear_obj(m_inflator); + + m_inflate_src_buf_ofs = 0; + m_inflate_src_buf_size = 0; + m_inflate_dst_buf_ofs = 0; + + m_inflate_eof_flag = FALSE; + + clear_obj(m_trns_value); + + m_pass_x_size = 0; + m_pass_y_left = 0; + + m_adam7_pass_num = 0; + m_adam7_pass_y = 0; + clear_obj(m_adam7_pass_size_x); + clear_obj(m_adam7_pass_size_y); + + m_adam7_decoded_flag = FALSE; + + m_scanned_flag = false; + + m_terminate_status = 0; +} + +int png_decoder::png_scan(png_file *pFile) +{ + m_pFile = pFile; + + m_img_supported_flag = TRUE; + m_terminate_status = 0; + + int res = read_signature(); + if (res != 0) + return res; + + res = read_ihdr_chunk(); + if (res != 0) + return res; + + res = find_idat_chunk(); + if (res != 0) + return res; + + if (m_gama_flag) + calc_gamma_table(); + + if (m_ihdr.m_color_type == PNG_COLOR_TYPE_PALETTIZED) + { + if (!m_plte_flag) + return terminate(PNG_MISSING_PALETTE); + } + else if ((m_ihdr.m_color_type == PNG_COLOR_TYPE_GREYSCALE) || (m_ihdr.m_color_type == PNG_COLOR_TYPE_GREYSCALE_ALPHA)) + { + create_grey_palette(); + } + + m_scanned_flag = true; + + return 0; +} + +static inline uint8_t get_709_luma(uint32_t r, uint32_t g, uint32_t b) +{ + return (uint8_t)((13938U * r + 46869U * g + 4729U * b + 32768U) >> 16U); +} + +bool get_png_info(const void* pImage_buf, size_t buf_size, png_info &info) +{ + memset(&info, 0, sizeof(info)); + + if ((!pImage_buf) || (buf_size < MIN_PNG_SIZE)) + return false; + + png_readonly_memory_file mf; + mf.init(pImage_buf, buf_size); + + png_decoder dec; + + int status = dec.png_scan(&mf); + if ((status != 0) || (dec.m_img_supported_flag != TRUE)) + return false; + + info.m_width = dec.m_ihdr.m_width; + info.m_height = dec.m_ihdr.m_height; + info.m_bit_depth = dec.m_ihdr.m_bit_depth; + info.m_color_type = dec.m_ihdr.m_color_type; + info.m_has_gamma = dec.m_gama_flag != 0; + info.m_gamma_value = dec.m_gama_value; + info.m_has_trns = dec.m_trns_flag != 0; + + switch (dec.m_ihdr.m_color_type) + { + case PNG_COLOR_TYPE_GREYSCALE: + info.m_num_chans = dec.m_trns_flag ? 2 : 1; + break; + case PNG_COLOR_TYPE_GREYSCALE_ALPHA: + info.m_num_chans = 2; + break; + case PNG_COLOR_TYPE_PALETTIZED: + case PNG_COLOR_TYPE_TRUECOLOR: + info.m_num_chans = dec.m_trns_flag ? 4 : 3; + break; + case PNG_COLOR_TYPE_TRUECOLOR_ALPHA: + info.m_num_chans = 4; + break; + default: + assert(0); + break; + } + + return true; +} + +void* load_png(const void* pImage_buf, size_t buf_size, uint32_t desired_chans, uint32_t& width, uint32_t& height, uint32_t& num_chans) +{ + width = 0; + height = 0; + num_chans = 0; + + if ((!pImage_buf) || (buf_size < MIN_PNG_SIZE)) + { + assert(0); + return nullptr; + } + + if (desired_chans > 4) + { + assert(0); + return nullptr; + } + + png_readonly_memory_file mf; + mf.init(pImage_buf, buf_size); + + png_decoder dec; + + int status = dec.png_scan(&mf); + if ((status != 0) || (dec.m_img_supported_flag != TRUE)) + return nullptr; + + uint32_t colortype = dec.m_ihdr.m_color_type; + switch (colortype) + { + case PNG_COLOR_TYPE_GREYSCALE: + num_chans = dec.m_trns_flag ? 2 : 1; + break; + case PNG_COLOR_TYPE_GREYSCALE_ALPHA: + num_chans = 2; + break; + case PNG_COLOR_TYPE_PALETTIZED: + case PNG_COLOR_TYPE_TRUECOLOR: + num_chans = dec.m_trns_flag ? 4 : 3; + break; + case PNG_COLOR_TYPE_TRUECOLOR_ALPHA: + num_chans = 4; + break; + default: + assert(0); + break; + } + + if (!desired_chans) + desired_chans = num_chans; + +#if 0 + printf("lode_png: %ux%u bitdepth: %u colortype: %u trns: %u ilace: %u\n", + dec.m_ihdr.m_width, + dec.m_ihdr.m_height, + dec.m_ihdr.m_bit_depth, + dec.m_ihdr.m_color_type, + dec.m_trns_flag, + dec.m_ihdr.m_ilace_type); +#endif + + width = dec.m_ihdr.m_width; + height = dec.m_ihdr.m_height; + uint32_t bitdepth = dec.m_ihdr.m_bit_depth; + uint32_t pitch = width * desired_chans; + + uint64_t total_size = (uint64_t)pitch * height; + if (total_size > 0x7FFFFFFFULL) + return nullptr; + + uint8_t* pBuf = (uint8_t*)malloc((size_t)total_size); + if (!pBuf) + return nullptr; + + if (dec.png_decode_start() != 0) + { + free(pBuf); + return nullptr; + } + + uint8_t* pDst = pBuf; + + for (uint32_t y = 0; y < height; y++, pDst += pitch) + { + uint8_t* pLine; + uint32_t line_bytes; + if (dec.png_decode((void**)&pLine, &line_bytes) != 0) + { + free(pBuf); + return nullptr; + } + + // This conversion matrix handles converting RGB->Luma, converting grayscale samples to 8-bit samples, converting palettized images, and PNG transparency. + switch (colortype) + { + case PNG_COLOR_TYPE_GREYSCALE: + { + uint32_t trans_value = dec.m_trns_value[0]; + + switch (desired_chans) + { + case 1: + if (bitdepth == 16) + { + assert(line_bytes == width * 2); + + for (uint32_t i = 0; i < width; i++) + pDst[i] = dec.m_img_pal[pLine[i * 2 + 0] * 3]; + } + else if (bitdepth == 8) + { + assert(line_bytes == width); + memcpy(pDst, pLine, pitch); + } + else + { + assert(line_bytes == width); + for (uint32_t i = 0; i < width; i++) + pDst[i] = dec.m_img_pal[pLine[i] * 3]; + } + break; + case 2: + if (bitdepth == 16) + { + assert(line_bytes == width * 2); + for (uint32_t i = 0; i < width; i++) + { + pDst[i * 2 + 0] = dec.m_img_pal[pLine[i * 2 + 0] * 3]; + pDst[i * 2 + 1] = pLine[i * 2 + 1]; + } + } + else if (dec.m_trns_flag) + { + assert(line_bytes == width); + for (uint32_t i = 0; i < width; i++) + { + pDst[i * 2 + 0] = dec.m_img_pal[pLine[i] * 3]; + pDst[i * 2 + 1] = (pLine[i] == trans_value) ? 0 : 255; + } + } + else + { + assert(line_bytes == width); + for (uint32_t i = 0; i < width; i++) + { + pDst[i * 2 + 0] = dec.m_img_pal[pLine[i] * 3]; + pDst[i * 2 + 1] = 255; + } + } + break; + case 3: + if (bitdepth == 16) + { + assert(line_bytes == width * 2); + for (uint32_t i = 0; i < width; i++) + { + uint8_t c = dec.m_img_pal[pLine[i * 2 + 0] * 3]; + pDst[i * 3 + 0] = c; + pDst[i * 3 + 1] = c; + pDst[i * 3 + 2] = c; + } + } + else + { + assert(line_bytes == width); + for (uint32_t i = 0; i < width; i++) + { + uint8_t c = dec.m_img_pal[pLine[i] * 3]; + pDst[i * 3 + 0] = c; + pDst[i * 3 + 1] = c; + pDst[i * 3 + 2] = c; + } + } + break; + case 4: + if (bitdepth == 16) + { + assert(line_bytes == width * 2); + for (uint32_t i = 0; i < width; i++) + { + uint8_t c = dec.m_img_pal[pLine[i * 2 + 0] * 3]; + pDst[i * 4 + 0] = c; + pDst[i * 4 + 1] = c; + pDst[i * 4 + 2] = c; + pDst[i * 4 + 3] = pLine[i * 2 + 1]; + } + } + else if (dec.m_trns_flag) + { + assert(line_bytes == width); + for (uint32_t i = 0; i < width; i++) + { + uint8_t c = dec.m_img_pal[pLine[i] * 3]; + pDst[i * 4 + 0] = c; + pDst[i * 4 + 1] = c; + pDst[i * 4 + 2] = c; + pDst[i * 4 + 3] = (pLine[i] == trans_value) ? 0 : 255; + } + } + else + { + assert(line_bytes == width); + for (uint32_t i = 0; i < width; i++) + { + uint8_t c = dec.m_img_pal[pLine[i] * 3]; + pDst[i * 4 + 0] = c; + pDst[i * 4 + 1] = c; + pDst[i * 4 + 2] = c; + pDst[i * 4 + 3] = 255; + } + } + break; + } + + break; + } + case PNG_COLOR_TYPE_GREYSCALE_ALPHA: + { + assert(line_bytes == width * 2); + + switch (desired_chans) + { + case 1: + for (uint32_t i = 0; i < width; i++) + pDst[i] = dec.m_img_pal[pLine[i * 2 + 0] * 3]; + break; + case 2: + assert(line_bytes == pitch); + if (bitdepth >= 8) + memcpy(pDst, pLine, pitch); + else + { + for (uint32_t i = 0; i < width; i++) + { + pDst[i * 2 + 0] = dec.m_img_pal[pLine[i * 2 + 0] * 3]; + pDst[i * 2 + 1] = pLine[i * 2 + 1]; + } + } + break; + case 3: + for (uint32_t i = 0; i < width; i++) + { + uint8_t c = dec.m_img_pal[pLine[i * 2 + 0] * 3]; + pDst[i * 3 + 0] = c; + pDst[i * 3 + 1] = c; + pDst[i * 3 + 2] = c; + } + break; + case 4: + for (uint32_t i = 0; i < width; i++) + { + uint8_t c = dec.m_img_pal[pLine[i * 2 + 0] * 3]; + pDst[i * 4 + 0] = c; + pDst[i * 4 + 1] = c; + pDst[i * 4 + 2] = c; + pDst[i * 4 + 3] = pLine[i * 2 + 1]; + } + break; + } + + break; + } + case PNG_COLOR_TYPE_PALETTIZED: + { + assert(line_bytes == width); + + switch (desired_chans) + { + case 1: + for (uint32_t i = 0; i < width; i++) + { + const uint8_t* p = &dec.m_img_pal[pLine[i] * 3]; + pDst[i] = get_709_luma(p[0], p[1], p[2]); + } + break; + case 2: + if (dec.m_trns_flag) + { + for (uint32_t i = 0; i < width; i++) + { + const uint8_t* p = &dec.m_img_pal[pLine[i] * 3]; + pDst[i * 2 + 0] = get_709_luma(p[0], p[1], p[2]); + pDst[i * 2 + 1] = (uint8_t)dec.m_trns_value[pLine[i]]; + } + } + else + { + for (uint32_t i = 0; i < width; i++) + { + const uint8_t* p = &dec.m_img_pal[pLine[i] * 3]; + pDst[i * 2 + 0] = get_709_luma(p[0], p[1], p[2]); + pDst[i * 2 + 1] = 255; + } + } + break; + case 3: + for (uint32_t i = 0; i < width; i++) + { + const uint8_t* p = &dec.m_img_pal[pLine[i] * 3]; + pDst[i * 3 + 0] = p[0]; + pDst[i * 3 + 1] = p[1]; + pDst[i * 3 + 2] = p[2]; + } + break; + case 4: + if (dec.m_trns_flag) + { + for (uint32_t i = 0; i < width; i++) + { + const uint8_t* p = &dec.m_img_pal[pLine[i] * 3]; + pDst[i * 4 + 0] = p[0]; + pDst[i * 4 + 1] = p[1]; + pDst[i * 4 + 2] = p[2]; + pDst[i * 4 + 3] = (uint8_t)dec.m_trns_value[pLine[i]]; + } + } + else + { + for (uint32_t i = 0; i < width; i++) + { + const uint8_t* p = &dec.m_img_pal[pLine[i] * 3]; + pDst[i * 4 + 0] = p[0]; + pDst[i * 4 + 1] = p[1]; + pDst[i * 4 + 2] = p[2]; + pDst[i * 4 + 3] = 255; + } + } + break; + } + + break; + } + case PNG_COLOR_TYPE_TRUECOLOR: + case PNG_COLOR_TYPE_TRUECOLOR_ALPHA: + { + assert(line_bytes == width * 4); + + switch (desired_chans) + { + case 1: + for (uint32_t i = 0; i < width; i++) + { + const uint8_t* p = &pLine[i * 4]; + pDst[i] = get_709_luma(p[0], p[1], p[2]); + } + break; + case 2: + for (uint32_t i = 0; i < width; i++) + { + const uint8_t* p = &pLine[i * 4]; + pDst[i * 2 + 0] = get_709_luma(p[0], p[1], p[2]); + pDst[i * 2 + 1] = p[3]; + } + break; + case 3: + for (uint32_t i = 0; i < width; i++) + { + const uint8_t* p = &pLine[i * 4]; + pDst[i * 3 + 0] = p[0]; + pDst[i * 3 + 1] = p[1]; + pDst[i * 3 + 2] = p[2]; + } + break; + case 4: + memcpy(pDst, pLine, pitch); + break; + } + + break; + } + default: + assert(0); + break; + } + + } // y + + return pBuf; +} + +} // namespace pv_png + +/* + This is free and unencumbered software released into the public domain. + + Anyone is free to copy, modify, publish, use, compile, sell, or + distribute this software, either in source code form or as a compiled + binary, for any purpose, commercial or non-commercial, and by any + means. + + In jurisdictions that recognize copyright laws, the author or authors + of this software dedicate any and all copyright interest in the + software to the public domain. We make this dedication for the benefit + of the public at large and to the detriment of our heirs and + successors. We intend this dedication to be an overt act of + relinquishment in perpetuity of all present and future rights to this + software under copyright law. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + + For more information, please refer to + + Richard Geldreich, Jr. + 1/20/2022 +*/ diff --git a/thirdparty/basisu/encoder/pvpngreader.h b/thirdparty/basisu/encoder/pvpngreader.h new file mode 100644 index 000000000..4f3fe46b8 --- /dev/null +++ b/thirdparty/basisu/encoder/pvpngreader.h @@ -0,0 +1,48 @@ +// pngreader.h - Public Domain - see unlicense at bottom of pvpngreader.cpp +#pragma once +#include + +namespace pv_png +{ + // PNG color types + enum + { + PNG_COLOR_TYPE_GREYSCALE = 0, + PNG_COLOR_TYPE_TRUECOLOR = 2, + PNG_COLOR_TYPE_PALETTIZED = 3, + PNG_COLOR_TYPE_GREYSCALE_ALPHA = 4, + PNG_COLOR_TYPE_TRUECOLOR_ALPHA = 6 + }; + + // PNG file description + struct png_info + { + uint32_t m_width; + uint32_t m_height; + + uint32_t m_num_chans; // The number of channels, factoring in transparency. Ranges from [1-4]. + + uint32_t m_bit_depth; // PNG ihdr bit depth: 1, 2, 4, 8 or 16 + uint32_t m_color_type; // PNG ihdr color type, PNG_COLOR_TYPE_GRAYSCALE etc. + + bool m_has_gamma; // true if the PNG file had a GAMA chunk + uint32_t m_gamma_value; // PNG GAMA chunk value, scaled by 100000 + + bool m_has_trns; // true if the PNG file used colorkey transparency + }; + + // Retrieved information about the PNG file. + // Returns false on any errors. + bool get_png_info(const void* pImage_buf, size_t buf_size, png_info& info); + + // Input parameters: + // pImage_buf, buf_size - pointer to PNG image data + // desired_chans - desired number of output channels. 0=auto, 1=grayscale, 2=grayscale alpha, 3=24bpp RGB, 4=32bpp RGBA + // + // Output parameters: + // width, height - PNG image resolution + // num_chans - actual number of channels in PNG, from [1,4] (factoring in transparency) + // + // Returns nullptr on any errors. + void* load_png(const void* pImage_buf, size_t buf_size, uint32_t desired_chans, uint32_t &width, uint32_t &height, uint32_t& num_chans); +} diff --git a/thirdparty/basisu/transcoder/basisu.h b/thirdparty/basisu/transcoder/basisu.h new file mode 100644 index 000000000..e1f716114 --- /dev/null +++ b/thirdparty/basisu/transcoder/basisu.h @@ -0,0 +1,632 @@ +// basisu.h +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#ifdef _MSC_VER + + #pragma warning (disable : 4201) + #pragma warning (disable : 4127) // warning C4127: conditional expression is constant + #pragma warning (disable : 4530) // C++ exception handler used, but unwind semantics are not enabled. + +#endif // _MSC_VER + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "basisu_containers.h" + +#ifdef max +#undef max +#endif + +#ifdef min +#undef min +#endif + +#ifdef _WIN32 +#define strcasecmp _stricmp +#endif + +// Set to one to enable debug printf()'s when any errors occur, for development/debugging. Especially useful for WebGL development. +#ifndef BASISU_FORCE_DEVEL_MESSAGES +#define BASISU_FORCE_DEVEL_MESSAGES 0 +#endif + +#define BASISU_NOTE_UNUSED(x) (void)(x) +#define BASISU_ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) +#define BASISU_NO_EQUALS_OR_COPY_CONSTRUCT(x) x(const x &) = delete; x& operator= (const x &) = delete; +#define BASISU_ASSUME(x) static_assert(x, #x); +#define BASISU_OFFSETOF(s, m) offsetof(s, m) +#define BASISU_STRINGIZE(x) #x +#define BASISU_STRINGIZE2(x) BASISU_STRINGIZE(x) + +#if BASISU_FORCE_DEVEL_MESSAGES + #define BASISU_DEVEL_ERROR(...) do { basisu::debug_printf(__VA_ARGS__); } while(0) +#else + #define BASISU_DEVEL_ERROR(...) +#endif + +namespace basisu +{ + // Types/utilities + +#ifdef _WIN32 + const char BASISU_PATH_SEPERATOR_CHAR = '\\'; +#else + const char BASISU_PATH_SEPERATOR_CHAR = '/'; +#endif + + typedef basisu::vector uint8_vec; + typedef basisu::vector int16_vec; + typedef basisu::vector uint16_vec; + typedef basisu::vector uint_vec; + typedef basisu::vector size_t_vec; + typedef basisu::vector uint64_vec; + typedef basisu::vector int_vec; + typedef basisu::vector bool_vec; + typedef basisu::vector float_vec; + + void enable_debug_printf(bool enabled); + void debug_printf(const char *pFmt, ...); + void debug_puts(const char* p); + + template + inline void fmt_debug_printf(const char* pFmt, Args&&... args) + { + std::string res; + if (!fmt_variants(res, pFmt, fmt_variant_vec{ fmt_variant(std::forward(args))... })) + return; + debug_puts(res.c_str()); + } + +#ifndef __EMSCRIPTEN__ +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wclass-memaccess" +#endif +#endif + + template inline void clear_obj(T& obj) { memset((void *)&obj, 0, sizeof(obj)); } + +#ifndef __EMSCRIPTEN__ +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif +#endif + + constexpr double cPiD = 3.14159265358979323846264338327950288; + constexpr float REALLY_SMALL_FLOAT_VAL = .000000125f; + constexpr float SMALL_FLOAT_VAL = .0000125f; + constexpr float BIG_FLOAT_VAL = 1e+30f; + + template inline T0 lerp(T0 a, T0 b, T1 c) { return a + (b - a) * c; } + + inline float clampf(float value, float low, float high) { if (value < low) value = low; else if (value > high) value = high; return value; } + inline float saturate(float value) { return clampf(value, 0, 1.0f); } + inline uint8_t minimumub(uint8_t a, uint8_t b) { return (a < b) ? a : b; } + inline uint32_t minimumu(uint32_t a, uint32_t b) { return (a < b) ? a : b; } + inline int32_t minimumi(int32_t a, int32_t b) { return (a < b) ? a : b; } + inline float minimumf(float a, float b) { return (a < b) ? a : b; } + inline uint8_t maximumub(uint8_t a, uint8_t b) { return (a > b) ? a : b; } + inline uint32_t maximumu(uint32_t a, uint32_t b) { return (a > b) ? a : b; } + inline int32_t maximumi(int32_t a, int32_t b) { return (a > b) ? a : b; } + inline float maximumf(float a, float b) { return (a > b) ? a : b; } + inline int squarei(int i) { return i * i; } + inline float squaref(float i) { return i * i; } + inline double squared(double i) { return i * i; } + template inline T square(T a) { return a * a; } + template inline T sign(T a) { return (a < 0) ? (T)-1 : ((a == 0) ? (T)0 : (T)1); } + + inline bool equal_tol(float a, float b, float t) { return fabsf(a - b) <= ((maximum(fabsf(a), fabsf(b)) + 1.0f) * t); } + inline bool equal_tol(double a, double b, double t) { return fabs(a - b) <= ((maximum(fabs(a), fabs(b)) + 1.0f) * t); } + + template + inline T prev_wrap(T i, T n) + { + T temp = i - 1; + if (temp < 0) + temp = n - 1; + return temp; + } + + template + inline T next_wrap(T i, T n) + { + T temp = i + 1; + if (temp >= n) + temp = 0; + return temp; + } + + inline uint32_t iabs(int32_t i) { return (i < 0) ? static_cast(-i) : static_cast(i); } + inline uint64_t iabs64(int64_t i) { return (i < 0) ? static_cast(-i) : static_cast(i); } + + template inline void clear_vector(T &vec) { vec.erase(vec.begin(), vec.end()); } + template inline typename T::value_type *enlarge_vector(T &vec, size_t n) { size_t cs = vec.size(); vec.resize(cs + n); return &vec[cs]; } + + inline bool is_pow2(uint32_t x) { return x && ((x & (x - 1U)) == 0U); } + inline bool is_pow2(uint64_t x) { return x && ((x & (x - 1U)) == 0U); } + + template inline T open_range_check(T v, T minv, T maxv) { assert(v >= minv && v < maxv); BASISU_NOTE_UNUSED(minv); BASISU_NOTE_UNUSED(maxv); return v; } + template inline T open_range_check(T v, T maxv) { assert(v < maxv); BASISU_NOTE_UNUSED(maxv); return v; } + + // Open interval + inline bool in_bounds(int v, int l, int h) + { + return (v >= l) && (v < h); + } + + // Closed interval + inline bool in_range(int v, int l, int h) + { + return (v >= l) && (v <= h); + } + + inline uint32_t total_bits(uint32_t v) { uint32_t l = 0; for ( ; v > 0U; ++l) v >>= 1; return l; } + + template inline T saturate(T val) { return clamp(val, 0.0f, 1.0f); } + + inline uint32_t get_bit(uint32_t src, int ndx) + { + assert(in_bounds(ndx, 0, 32)); + return (src >> ndx) & 1; + } + + inline bool is_bit_set(uint32_t src, int ndx) + { + return get_bit(src, ndx) != 0; + } + + inline uint32_t get_bits(uint32_t val, int low, int high) + { + const int num_bits = (high - low) + 1; + assert(in_range(num_bits, 1, 32)); + + val >>= low; + if (num_bits != 32) + val &= ((1u << num_bits) - 1); + + return val; + } + + template inline void append_vector(T &vec, const R *pObjs, size_t n) + { + if (n) + { + if (vec.size()) + { + assert((pObjs + n) <= vec.begin() || (pObjs >= vec.end())); + } + const size_t cur_s = vec.size(); + vec.resize(cur_s + n); + memcpy(&vec[cur_s], pObjs, sizeof(R) * n); + } + } + + template inline void append_vector(T &vec, const T &other_vec) + { + assert(&vec != &other_vec); + if (other_vec.size()) + append_vector(vec, &other_vec[0], other_vec.size()); + } + + template inline void vector_ensure_element_is_valid(T &vec, size_t idx) + { + if (idx >= vec.size()) + vec.resize(idx + 1); + } + + template inline void vector_sort(T &vec) + { + if (vec.size()) + std::sort(vec.begin(), vec.end()); + } + + template inline bool unordered_set_contains(T& set, const U&obj) + { + return set.find(obj) != set.end(); + } + + template int vector_find(const T &vec, const typename T::value_type &obj) + { + assert(vec.size() <= INT_MAX); + for (size_t i = 0; i < vec.size(); i++) + if (vec[i] == obj) + return static_cast(i); + return -1; + } + + template void vector_set_all(T &vec, const typename T::value_type &obj) + { + for (size_t i = 0; i < vec.size(); i++) + vec[i] = obj; + } + + inline uint64_t read_be64(const void *p) + { + uint64_t val = 0; + for (uint32_t i = 0; i < 8; i++) + val |= (static_cast(static_cast(p)[7 - i]) << (i * 8)); + return val; + } + + inline void write_be64(void *p, uint64_t x) + { + for (uint32_t i = 0; i < 8; i++) + static_cast(p)[7 - i] = static_cast(x >> (i * 8)); + } + + static inline uint16_t byteswap16(uint16_t x) { return static_cast((x << 8) | (x >> 8)); } + static inline uint32_t byteswap32(uint32_t x) { return ((x << 24) | ((x << 8) & 0x00FF0000) | ((x >> 8) & 0x0000FF00) | (x >> 24)); } + + inline uint32_t floor_log2i(uint32_t v) + { + uint32_t b = 0; + for (; v > 1U; ++b) + v >>= 1; + return b; + } + + inline uint32_t ceil_log2i(uint32_t v) + { + uint32_t b = floor_log2i(v); + if ((b != 32) && (v > (1U << b))) + ++b; + return b; + } + + inline int posmod(int x, int y) + { + if (x >= 0) + return (x < y) ? x : (x % y); + int m = (-x) % y; + return (m != 0) ? (y - m) : m; + } + + inline bool do_excl_ranges_overlap(int la, int ha, int lb, int hb) + { + assert(la < ha && lb < hb); + if ((ha <= lb) || (la >= hb)) return false; + return true; + } + + static inline uint32_t read_le_word(const uint8_t* pBytes) + { + return (pBytes[1] << 8U) | (pBytes[0]); + } + + static inline uint32_t read_le_dword(const uint8_t *pBytes) + { + return (pBytes[3] << 24U) | (pBytes[2] << 16U) | (pBytes[1] << 8U) | (pBytes[0]); + } + + static inline void write_le_dword(uint8_t* pBytes, uint32_t val) + { + pBytes[0] = (uint8_t)val; + pBytes[1] = (uint8_t)(val >> 8U); + pBytes[2] = (uint8_t)(val >> 16U); + pBytes[3] = (uint8_t)(val >> 24U); + } + + // Always little endian 1-8 byte unsigned int + template + struct packed_uint + { + uint8_t m_bytes[NumBytes]; + + inline packed_uint() { static_assert(NumBytes <= sizeof(uint64_t), "Invalid NumBytes"); } + inline packed_uint(uint64_t v) { *this = v; } + inline packed_uint(const packed_uint& other) { *this = other; } + + inline packed_uint& operator= (uint64_t v) + { + // TODO: Add assert on truncation? + for (uint32_t i = 0; i < NumBytes; i++) + m_bytes[i] = static_cast(v >> (i * 8)); + return *this; + } + + inline packed_uint& operator= (const packed_uint& rhs) + { + memcpy(m_bytes, rhs.m_bytes, sizeof(m_bytes)); + return *this; + } + + inline uint64_t get_uint64() const + { + // Some compilers may warn about this code. It clearly cannot access beyond the end of the m_bytes struct here. + if constexpr (NumBytes == 1) + { + return m_bytes[0]; + } + else if constexpr (NumBytes == 2) + { + return (m_bytes[1] << 8U) | m_bytes[0]; + } + else if constexpr (NumBytes == 3) + { + return (m_bytes[2] << 16U) | (m_bytes[1] << 8U) | m_bytes[0]; + } + else if constexpr (NumBytes == 4) + { + return read_le_dword(m_bytes); + } + else if constexpr (NumBytes == 5) + { + uint32_t l = read_le_dword(m_bytes); + uint32_t h = m_bytes[4]; + return static_cast(l) | (static_cast(h) << 32U); + } + else if constexpr (NumBytes == 6) + { + uint32_t l = read_le_dword(m_bytes); + uint32_t h = (m_bytes[5] << 8U) | m_bytes[4]; + return static_cast(l) | (static_cast(h) << 32U); + } + else if constexpr (NumBytes == 7) + { + uint32_t l = read_le_dword(m_bytes); + uint32_t h = (m_bytes[6] << 16U) | (m_bytes[5] << 8U) | m_bytes[4]; + return static_cast(l) | (static_cast(h) << 32U); + } + else if constexpr (NumBytes == 8) + { + uint32_t l = read_le_dword(m_bytes); + uint32_t h = read_le_dword(m_bytes + 4); + return static_cast(l) | (static_cast(h) << 32U); + } + else + { + static_assert(NumBytes <= 8, "Invalid NumBytes"); + return 0; + } + } + + inline uint32_t get_uint32() const + { + static_assert(NumBytes <= sizeof(uint32_t), "packed_uint too large to use get_uint32"); + return static_cast(get_uint64()); + } + + inline operator uint32_t() const + { + static_assert(NumBytes <= sizeof(uint32_t), "packed_uint too large to use operator uint32_t"); + return static_cast(get_uint64()); + } + }; + + enum eZero { cZero }; + enum eNoClamp { cNoClamp }; + + // Rice/Huffman entropy coding + + // This is basically Deflate-style canonical Huffman, except we allow for a lot more symbols. + enum + { + cHuffmanMaxSupportedCodeSize = 16, cHuffmanMaxSupportedInternalCodeSize = 31, + cHuffmanFastLookupBits = 10, + cHuffmanMaxSymsLog2 = 14, cHuffmanMaxSyms = 1 << cHuffmanMaxSymsLog2, + + // Small zero runs + cHuffmanSmallZeroRunSizeMin = 3, cHuffmanSmallZeroRunSizeMax = 10, cHuffmanSmallZeroRunExtraBits = 3, + + // Big zero run + cHuffmanBigZeroRunSizeMin = 11, cHuffmanBigZeroRunSizeMax = 138, cHuffmanBigZeroRunExtraBits = 7, + + // Small non-zero run + cHuffmanSmallRepeatSizeMin = 3, cHuffmanSmallRepeatSizeMax = 6, cHuffmanSmallRepeatExtraBits = 2, + + // Big non-zero run + cHuffmanBigRepeatSizeMin = 7, cHuffmanBigRepeatSizeMax = 134, cHuffmanBigRepeatExtraBits = 7, + + cHuffmanTotalCodelengthCodes = 21, cHuffmanSmallZeroRunCode = 17, cHuffmanBigZeroRunCode = 18, cHuffmanSmallRepeatCode = 19, cHuffmanBigRepeatCode = 20 + }; + + static const uint8_t g_huffman_sorted_codelength_codes[] = { cHuffmanSmallZeroRunCode, cHuffmanBigZeroRunCode, cHuffmanSmallRepeatCode, cHuffmanBigRepeatCode, 0, 8, 7, 9, 6, 0xA, 5, 0xB, 4, 0xC, 3, 0xD, 2, 0xE, 1, 0xF, 0x10 }; + const uint32_t cHuffmanTotalSortedCodelengthCodes = sizeof(g_huffman_sorted_codelength_codes) / sizeof(g_huffman_sorted_codelength_codes[0]); + + // GPU texture formats and various uncompressed texture formats. + + enum class texture_format + { + cInvalidTextureFormat = -1, + + // Block-based formats + cETC1, // ETC1 + cETC1S, // ETC1 (subset: diff colors only, no subblocks) + cETC2_RGB, // ETC2 color block (basisu doesn't support ETC2 planar/T/H modes - just basic ETC1) + cETC2_RGBA, // ETC2 EAC alpha block followed by ETC2 color block + cETC2_ALPHA, // ETC2 EAC alpha block + cBC1, // DXT1 + cBC3, // DXT5 (BC4/DXT5A block followed by a BC1/DXT1 block) + cBC4, // DXT5A + cBC5, // 3DC/DXN (two BC4/DXT5A blocks) + cBC6HSigned, // HDR + cBC6HUnsigned, // HDR + cBC7, + cASTC_LDR_4x4, // ASTC 4x4 LDR only + cASTC_HDR_4x4, // ASTC 4x4 HDR only (but may use LDR ASTC blocks internally, although our encoders don't do this) + cASTC_HDR_6x6, // ASTC 6x6 HDR only (but may use LDR ASTC blocks internally, although our encoders don't do this) + cPVRTC1_4_RGB, + cPVRTC1_4_RGBA, + cATC_RGB, + cATC_RGBA_INTERPOLATED_ALPHA, + cFXT1_RGB, + cPVRTC2_4_RGBA, + cETC2_R11_EAC, + cETC2_RG11_EAC, + cUASTC4x4, + cUASTC_HDR_4x4, + cBC1_NV, + cBC1_AMD, + + // Uncompressed/raw pixels + cRGBA32, + cRGB565, + cBGR565, + cRGBA4444, + cABGR4444, + cRGBA_HALF, + cRGB_HALF, + cRGB_9E5 + }; + + inline bool is_uncompressed_texture_format(texture_format fmt) + { + switch (fmt) + { + case texture_format::cRGBA32: + case texture_format::cRGB565: + case texture_format::cBGR565: + case texture_format::cRGBA4444: + case texture_format::cABGR4444: + case texture_format::cRGBA_HALF: + case texture_format::cRGB_HALF: + case texture_format::cRGB_9E5: + return true; + default: + break; + } + + return false; + } + + inline bool is_block_based_texture_format(texture_format fmt) + { + return !is_uncompressed_texture_format(fmt); + } + + // This is bytes per block for GPU formats, or bytes per texel for uncompressed formats. + inline uint32_t get_bytes_per_block_or_pixel(texture_format fmt) + { + switch (fmt) + { + case texture_format::cETC1: + case texture_format::cETC1S: + case texture_format::cETC2_RGB: + case texture_format::cETC2_ALPHA: + case texture_format::cBC1: + case texture_format::cBC1_NV: + case texture_format::cBC1_AMD: + case texture_format::cBC4: + case texture_format::cPVRTC1_4_RGB: + case texture_format::cPVRTC1_4_RGBA: + case texture_format::cATC_RGB: + case texture_format::cPVRTC2_4_RGBA: + case texture_format::cETC2_R11_EAC: + return 8; + case texture_format::cRGBA32: + case texture_format::cRGB_9E5: + return sizeof(uint32_t); + case texture_format::cRGB_HALF: + return sizeof(uint16_t) * 3; + case texture_format::cRGBA_HALF: + return sizeof(uint16_t) * 4; + case texture_format::cRGB565: + case texture_format::cBGR565: + case texture_format::cRGBA4444: + case texture_format::cABGR4444: + return sizeof(uint16_t); + + default: + break; + } + + // Everything else is 16 bytes/block. + return 16; + } + + // This is qwords per block for GPU formats, or not valid for uncompressed formats. + inline uint32_t get_qwords_per_block(texture_format fmt) + { + assert(is_block_based_texture_format(fmt)); + + const uint32_t bytes_per_block = get_bytes_per_block_or_pixel(fmt); + return bytes_per_block >> 3; + } + + inline uint32_t get_block_width(texture_format fmt) + { + assert(is_block_based_texture_format(fmt)); + + switch (fmt) + { + case texture_format::cFXT1_RGB: + return 8; + case texture_format::cASTC_HDR_6x6: + return 6; + default: + break; + } + return 4; + } + + inline uint32_t get_block_height(texture_format fmt) + { + assert(is_block_based_texture_format(fmt)); + + switch (fmt) + { + case texture_format::cASTC_HDR_6x6: + return 6; + default: + break; + } + return 4; + } + + inline bool is_hdr_texture_format(texture_format fmt) + { + switch (fmt) + { + case texture_format::cASTC_HDR_4x4: + case texture_format::cUASTC_HDR_4x4: + case texture_format::cASTC_HDR_6x6: + case texture_format::cBC6HSigned: + case texture_format::cBC6HUnsigned: + case texture_format::cRGBA_HALF: + case texture_format::cRGB_HALF: + case texture_format::cRGB_9E5: + return true; + default: + break; + } + + return false; + } + + inline bool is_ldr_texture_format(texture_format fmt) + { + return !is_hdr_texture_format(fmt); + } + +} // namespace basisu + diff --git a/thirdparty/basisu/transcoder/basisu_astc_hdr_core.h b/thirdparty/basisu/transcoder/basisu_astc_hdr_core.h new file mode 100644 index 000000000..f17271a71 --- /dev/null +++ b/thirdparty/basisu/transcoder/basisu_astc_hdr_core.h @@ -0,0 +1,206 @@ +// File: basisu_astc_hdr_core.h +#pragma once +#include "basisu_astc_helpers.h" + +namespace basist +{ + struct astc_blk + { + uint8_t m_vals[16]; + }; + + // ASTC_HDR_MAX_VAL is the maximum color component value that can be encoded. + // If the input has values higher than this, they need to be linearly scaled so all values are between [0,ASTC_HDR_MAX_VAL], and the linear scaling inverted in the shader. + const float ASTC_HDR_MAX_VAL = 65216.0f; // actually MAX_QLOG12_VAL + + // Maximum usable QLOG encodings, and their floating point equivalent values, that don't result in NaN/Inf's. + const uint32_t MAX_QLOG7 = 123; + //const float MAX_QLOG7_VAL = 55296.0f; + + const uint32_t MAX_QLOG8 = 247; + //const float MAX_QLOG8_VAL = 60416.0f; + + const uint32_t MAX_QLOG9 = 495; + //const float MAX_QLOG9_VAL = 62976.0f; + + const uint32_t MAX_QLOG10 = 991; + //const float MAX_QLOG10_VAL = 64256.0f; + + const uint32_t MAX_QLOG11 = 1983; + //const float MAX_QLOG11_VAL = 64896.0f; + + const uint32_t MAX_QLOG12 = 3967; + //const float MAX_QLOG12_VAL = 65216.0f; + + const uint32_t MAX_QLOG16 = 63487; + const float MAX_QLOG16_VAL = 65504.0f; + + // TODO: Should be called something like "NUM_MODE11_ENDPOINT_VALUES" + const uint32_t NUM_MODE11_ENDPOINTS = 6, NUM_MODE7_ENDPOINTS = 4; + + // This is not lossless + inline half_float qlog_to_half(uint32_t qlog, uint32_t bits) + { + assert((bits >= 7U) && (bits <= 16U)); + assert(qlog < (1U << bits)); + + int C = qlog << (16 - bits); + return astc_helpers::qlog16_to_half(C); + } + + void astc_hdr_core_init(); + + void decode_mode7_to_qlog12_ise20( + const uint8_t* pEndpoints, + int e[2][3], + int* pScale); + + bool decode_mode7_to_qlog12( + const uint8_t* pEndpoints, + int e[2][3], + int* pScale, + uint32_t ise_endpoint_range); + + void decode_mode11_to_qlog12_ise20( + const uint8_t* pEndpoints, + int e[2][3]); + + bool decode_mode11_to_qlog12( + const uint8_t* pEndpoints, + int e[2][3], + uint32_t ise_endpoint_range); + + bool transcode_bc6h_1subset(half_float h_e[3][2], const astc_helpers::log_astc_block& best_blk, bc6h_block& transcoded_bc6h_blk); + bool transcode_bc6h_2subsets(uint32_t common_part_index, const astc_helpers::log_astc_block& best_blk, bc6h_block& transcoded_bc6h_blk); + + bool astc_hdr_transcode_to_bc6h(const astc_blk& src_blk, bc6h_block& dst_blk); + bool astc_hdr_transcode_to_bc6h(const astc_helpers::log_astc_block& log_blk, bc6h_block& dst_blk); + + namespace astc_6x6_hdr + { + const uint32_t MAX_ASTC_HDR_6X6_DIM = 32768; + const int32_t REUSE_MAX_BUFFER_ROWS = 5; // 1+-(-4), so we need to buffer 5 rows total + + struct block_mode_desc + { + bool m_dp; + uint32_t m_cem; + uint32_t m_num_partitions; + uint32_t m_grid_x; + uint32_t m_grid_y; + + // the coding ISE ranges (which may not be valid ASTC ranges for this configuration) + uint32_t m_endpoint_ise_range; + uint32_t m_weight_ise_range; + + // the physical/output ASTC decompression ISE ranges (i.e. what the decompressor must output) + uint32_t m_transcode_endpoint_ise_range; + uint32_t m_transcode_weight_ise_range; + + uint32_t m_flags; + int m_dp_channel; + }; + + // Lack of level flag indicates level 3+ + const uint32_t BASIST_HDR_6X6_LEVEL0 = 1; + const uint32_t BASIST_HDR_6X6_LEVEL1 = 2; + const uint32_t BASIST_HDR_6X6_LEVEL2 = 4; + + const uint32_t TOTAL_BLOCK_MODE_DECS = 75; + extern const block_mode_desc g_block_mode_descs[TOTAL_BLOCK_MODE_DECS]; + + void copy_weight_grid(bool dual_plane, uint32_t grid_x, uint32_t grid_y, const uint8_t* transcode_weights, astc_helpers::log_astc_block& decomp_blk); + + enum class encoding_type + { + cInvalid = -1, + cRun = 0, + cSolid = 1, + cReuse = 2, + cBlock = 3, + cTotal + }; + + const uint32_t REUSE_XY_DELTA_BITS = 5; + const uint32_t NUM_REUSE_XY_DELTAS = 1 << REUSE_XY_DELTA_BITS; + + struct reuse_xy_delta + { + int8_t m_x, m_y; + }; + + extern const reuse_xy_delta g_reuse_xy_deltas[NUM_REUSE_XY_DELTAS]; + + const uint32_t RUN_CODE = 0b000, RUN_CODE_LEN = 3; + const uint32_t SOLID_CODE = 0b100, SOLID_CODE_LEN = 3; + const uint32_t REUSE_CODE = 0b10, REUSE_CODE_LEN = 2; + const uint32_t BLOCK_CODE = 0b1, BLOCK_CODE_LEN = 1; + + enum class endpoint_mode + { + cInvalid = -1, + + cRaw = 0, + cUseLeft, + cUseUpper, + cUseLeftDelta, + cUseUpperDelta, + + cTotal + }; + + enum class block_mode + { + cInvalid = -1, + + cBMTotalModes = TOTAL_BLOCK_MODE_DECS + }; + + const uint32_t NUM_ENDPOINT_DELTA_BITS = 5; + + const uint32_t NUM_UNIQUE_PARTITIONS2 = 521; + extern const uint32_t g_part2_unique_index_to_seed[NUM_UNIQUE_PARTITIONS2]; + + const uint32_t NUM_UNIQUE_PARTITIONS3 = 333; + extern const uint32_t g_part3_unique_index_to_seed[NUM_UNIQUE_PARTITIONS3]; + + bool decode_values(basist::bitwise_decoder& decoder, uint32_t total_values, uint32_t ise_range, uint8_t* pValues); + + void requantize_astc_weights(uint32_t n, const uint8_t* pSrc_ise_vals, uint32_t from_ise_range, uint8_t* pDst_ise_vals, uint32_t to_ise_range); + + void requantize_ise_endpoints(uint32_t cem, uint32_t src_ise_endpoint_range, const uint8_t* pSrc_endpoints, uint32_t dst_ise_endpoint_range, uint8_t* pDst_endpoints); + + const uint32_t BC6H_NUM_DIFF_ENDPOINT_MODES_TO_TRY_2 = 2; + const uint32_t BC6H_NUM_DIFF_ENDPOINT_MODES_TO_TRY_4 = 4; + const uint32_t BC6H_NUM_DIFF_ENDPOINT_MODES_TO_TRY_9 = 9; + + struct fast_bc6h_params + { + uint32_t m_num_diff_endpoint_modes_to_try; + uint32_t m_max_2subset_pats_to_try; + + bool m_hq_ls; + bool m_brute_force_weight4_assignment; + + fast_bc6h_params() + { + init(); + } + + void init() + { + m_hq_ls = true; + m_num_diff_endpoint_modes_to_try = BC6H_NUM_DIFF_ENDPOINT_MODES_TO_TRY_2; + m_max_2subset_pats_to_try = 1; + m_brute_force_weight4_assignment = false; + } + }; + + void fast_encode_bc6h(const basist::half_float* pPixels, basist::bc6h_block* pBlock, const fast_bc6h_params ¶ms); + + bool decode_6x6_hdr(const uint8_t* pComp_data, uint32_t comp_data_size, basisu::vector2D& decoded_blocks, uint32_t& width, uint32_t& height); + + } // namespace astc_6x6_hdr + +} // namespace basist + diff --git a/thirdparty/basisu/transcoder/basisu_astc_helpers.h b/thirdparty/basisu/transcoder/basisu_astc_helpers.h new file mode 100644 index 000000000..428a964b5 --- /dev/null +++ b/thirdparty/basisu/transcoder/basisu_astc_helpers.h @@ -0,0 +1,3714 @@ +// basisu_astc_helpers.h +// Be sure to define ASTC_HELPERS_IMPLEMENTATION somewhere to get the implementation, otherwise you only get the header. +#ifndef BASISU_ASTC_HELPERS_HEADER +#define BASISU_ASTC_HELPERS_HEADER + +#include +#include +#include +#include + +namespace astc_helpers +{ + const uint32_t MAX_WEIGHT_VALUE = 64; // grid texel weights must range from [0,64] + const uint32_t MIN_GRID_DIM = 2; // the minimum dimension of a block's weight grid + const uint32_t MIN_BLOCK_DIM = 4, MAX_BLOCK_DIM = 12; // the valid block dimensions in texels + const uint32_t MAX_GRID_WEIGHTS = 64; // a block may have a maximum of 64 weight grid values + const uint32_t NUM_MODE11_ENDPOINTS = 6, NUM_MODE7_ENDPOINTS = 4; + + static const uint32_t NUM_ASTC_BLOCK_SIZES = 14; + extern const uint8_t g_astc_block_sizes[NUM_ASTC_BLOCK_SIZES][2]; + + // The Color Endpoint Modes (CEM's) + enum cems + { + CEM_LDR_LUM_DIRECT = 0, + CEM_LDR_LUM_BASE_PLUS_OFS = 1, + CEM_HDR_LUM_LARGE_RANGE = 2, + CEM_HDR_LUM_SMALL_RANGE = 3, + CEM_LDR_LUM_ALPHA_DIRECT = 4, + CEM_LDR_LUM_ALPHA_BASE_PLUS_OFS = 5, + CEM_LDR_RGB_BASE_SCALE = 6, + CEM_HDR_RGB_BASE_SCALE = 7, + CEM_LDR_RGB_DIRECT = 8, + CEM_LDR_RGB_BASE_PLUS_OFFSET = 9, + CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A = 10, + CEM_HDR_RGB = 11, + CEM_LDR_RGBA_DIRECT = 12, + CEM_LDR_RGBA_BASE_PLUS_OFFSET = 13, + CEM_HDR_RGB_LDR_ALPHA = 14, + CEM_HDR_RGB_HDR_ALPHA = 15 + }; + + // All Bounded Integer Sequence Coding (BISE or ISE) ranges. + // Weights: Ranges [0,11] are valid. + // Endpoints: Ranges [4,20] are valid. + enum bise_levels + { + BISE_2_LEVELS = 0, + BISE_3_LEVELS = 1, + BISE_4_LEVELS = 2, + BISE_5_LEVELS = 3, + BISE_6_LEVELS = 4, + BISE_8_LEVELS = 5, + BISE_10_LEVELS = 6, + BISE_12_LEVELS = 7, + BISE_16_LEVELS = 8, + BISE_20_LEVELS = 9, + BISE_24_LEVELS = 10, + BISE_32_LEVELS = 11, + BISE_40_LEVELS = 12, + BISE_48_LEVELS = 13, + BISE_64_LEVELS = 14, + BISE_80_LEVELS = 15, + BISE_96_LEVELS = 16, + BISE_128_LEVELS = 17, + BISE_160_LEVELS = 18, + BISE_192_LEVELS = 19, + BISE_256_LEVELS = 20 + }; + + const uint32_t TOTAL_ISE_RANGES = 21; + + // Valid endpoint ISE ranges + const uint32_t FIRST_VALID_ENDPOINT_ISE_RANGE = BISE_6_LEVELS; // 4 + const uint32_t LAST_VALID_ENDPOINT_ISE_RANGE = BISE_256_LEVELS; // 20 + const uint32_t TOTAL_ENDPOINT_ISE_RANGES = LAST_VALID_ENDPOINT_ISE_RANGE - FIRST_VALID_ENDPOINT_ISE_RANGE + 1; + + // Valid weight ISE ranges + const uint32_t FIRST_VALID_WEIGHT_ISE_RANGE = BISE_2_LEVELS; // 0 + const uint32_t LAST_VALID_WEIGHT_ISE_RANGE = BISE_32_LEVELS; // 11 + const uint32_t TOTAL_WEIGHT_ISE_RANGES = LAST_VALID_WEIGHT_ISE_RANGE - FIRST_VALID_WEIGHT_ISE_RANGE + 1; + + // The ISE range table. + extern const int8_t g_ise_range_table[TOTAL_ISE_RANGES][3]; // 0=bits (0 to 8), 1=trits (0 or 1), 2=quints (0 or 1) + + // Possible Color Component Select values, used in dual plane mode. + // The CCS component will be interpolated using the 2nd weight plane. + enum ccs + { + CCS_GBA_R = 0, + CCS_RBA_G = 1, + CCS_RGA_B = 2, + CCS_RGB_A = 3 + }; + + struct astc_block + { + uint32_t m_vals[4]; + }; + + const uint32_t MAX_PARTITIONS = 4; // Max # of partitions or subsets for single plane mode + const uint32_t MAX_DUAL_PLANE_PARTITIONS = 3; // Max # of partitions or subsets for dual plane mode + const uint32_t NUM_PARTITION_PATTERNS = 1024; // Total # of partition pattern seeds (10-bits) + const uint32_t MAX_ENDPOINTS = 18; // Maximum # of endpoint values in a block + + struct log_astc_block + { + bool m_error_flag; + + bool m_solid_color_flag_ldr, m_solid_color_flag_hdr; + + uint8_t m_user_mode; // user defined value, not used in this module + + // Rest is only valid if !m_solid_color_flag_ldr && !m_solid_color_flag_hdr + uint8_t m_grid_width, m_grid_height; // weight grid dimensions, not the dimension of the block + + bool m_dual_plane; + + uint8_t m_weight_ise_range; // 0-11 + uint8_t m_endpoint_ise_range; // 4-20, this is actually inferred from the size of the other config bits+weights, but this is here for checking + + uint8_t m_color_component_selector; // 0-3, controls which channel uses the 2nd (odd) weights, only used in dual plane mode + + uint8_t m_num_partitions; // or the # of subsets, 1-4 (1-3 if dual plane mode) + uint16_t m_partition_id; // 10-bits, must be 0 if m_num_partitions==1 + + uint8_t m_color_endpoint_modes[MAX_PARTITIONS]; // each subset's CEM's + + union + { + // ISE weight grid values. In dual plane mode, the order is p0,p1, p0,p1, etc. + uint8_t m_weights[MAX_GRID_WEIGHTS]; + uint16_t m_solid_color[4]; + }; + + // ISE endpoint values + // Endpoint order examples: + // 1 subset LA : LL0 LH0 AL0 AH0 + // 1 subset RGB : RL0 RH0 GL0 GH0 BL0 BH0 + // 1 subset RGBA : RL0 RH0 GL0 GH0 BL0 BH0 AL0 AH0 + // 2 subset LA : LL0 LH0 AL0 AH0 LL1 LH1 AL1 AH1 + // 2 subset RGB : RL0 RH0 GL0 GH0 BL0 BH0 RL1 RH1 GL1 GH1 BL1 BH1 + // 2 subset RGBA : RL0 RH0 GL0 GH0 BL0 BH0 AL0 AH0 RL1 RH1 GL1 GH1 BL1 BH1 AL1 AH1 + uint8_t m_endpoints[MAX_ENDPOINTS]; + + void clear() + { + memset(this, 0, sizeof(*this)); + } + }; + + // Open interval + inline int bounds_check(int v, int l, int h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; } + inline uint32_t bounds_check(uint32_t v, uint32_t l, uint32_t h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; } + + inline uint32_t get_bits(uint32_t val, int low, int high) + { + const int num_bits = (high - low) + 1; + assert((num_bits >= 1) && (num_bits <= 32)); + + val >>= low; + if (num_bits != 32) + val &= ((1u << num_bits) - 1); + + return val; + } + + // Returns the number of levels in the given ISE range. + inline uint32_t get_ise_levels(uint32_t ise_range) + { + assert(ise_range < TOTAL_ISE_RANGES); + return (1 + 2 * g_ise_range_table[ise_range][1] + 4 * g_ise_range_table[ise_range][2]) << g_ise_range_table[ise_range][0]; + } + + inline int get_ise_sequence_bits(int count, int range) + { + // See 18.22 Data Size Determination - note this will be <= the # of bits actually written by encode_bise(). (It's magic.) + int total_bits = g_ise_range_table[range][0] * count; + total_bits += (g_ise_range_table[range][1] * 8 * count + 4) / 5; + total_bits += (g_ise_range_table[range][2] * 7 * count + 2) / 3; + return total_bits; + } + + inline uint32_t weight_interpolate(uint32_t l, uint32_t h, uint32_t w) + { + assert(w <= MAX_WEIGHT_VALUE); + return (l * (64 - w) + h * w + 32) >> 6; + } + + void encode_bise(uint32_t* pDst, const uint8_t* pSrc_vals, uint32_t bit_pos, int num_vals, int range, uint32_t *pStats = nullptr); + + struct pack_stats + { + uint32_t m_header_bits; + uint32_t m_endpoint_bits; + uint32_t m_weight_bits; + + inline pack_stats() { clear(); } + inline void clear() { memset(this, 0, sizeof(*this)); } + }; + + // Packs a logical to physical ASTC block. Note this does not validate the block's dimensions (use is_valid_block_size()), just the grid dimensions. + bool pack_astc_block(astc_block &phys_block, const log_astc_block& log_block, int* pExpected_endpoint_range = nullptr, pack_stats *pStats = nullptr); + + // Pack LDR void extent (really solid color) blocks. For LDR, pass in (val | (val << 8)) for each component. + void pack_void_extent_ldr(astc_block& blk, uint16_t r, uint16_t g, uint16_t b, uint16_t a, pack_stats *pStats = nullptr); + + // Pack HDR void extent (16-bit values are FP16/half floats - no NaN/Inf's) + void pack_void_extent_hdr(astc_block& blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah, pack_stats* pStats = nullptr); + + // These helpers are all quite slow, but are useful for table preparation. + + // Dequantizes ISE encoded endpoint val to [0,255] + uint32_t dequant_bise_endpoint(uint32_t val, uint32_t ise_range); // ISE ranges 4-11 + + // Dequantizes ISE encoded weight val to [0,64] + uint32_t dequant_bise_weight(uint32_t val, uint32_t ise_range); // ISE ranges 0-10 + + uint32_t find_nearest_bise_endpoint(int v, uint32_t ise_range); + uint32_t find_nearest_bise_weight(int v, uint32_t ise_range); + + void create_quant_tables( + uint8_t* pVal_to_ise, // [0-255] or [0-64] value to nearest ISE symbol, array size is [256] or [65] + uint8_t* pISE_to_val, // ASTC encoded ISE symbol to [0,255] or [0,64] value, [levels] + uint8_t* pISE_to_rank, // returns the level rank index given an ISE symbol, [levels] + uint8_t* pRank_to_ISE, // returns the ISE symbol given a level rank, inverse of pISE_to_rank, [levels] + uint32_t ise_range, // ise range, [4,20] for endpoints, [0,11] for weights + bool weight_flag); // false if block endpoints, true if weights + + // True if the CEM is LDR. + bool is_cem_ldr(uint32_t mode); + inline bool is_cem_hdr(uint32_t mode) { return !is_cem_ldr(mode); } + + // True if the passed in dimensions are a valid ASTC block size. There are 14 supported configs, from 4x4 (8bpp) to 12x12 (.89bpp). + bool is_valid_block_size(uint32_t w, uint32_t h); + + bool block_has_any_hdr_cems(const log_astc_block& log_blk); + bool block_has_any_ldr_cems(const log_astc_block& log_blk); + + // Returns the # of endpoint values for the given CEM. + inline uint32_t get_num_cem_values(uint32_t cem) { assert(cem <= 15); return 2 + 2 * (cem >> 2); } + + struct dequant_table + { + basisu::vector m_val_to_ise; // [0-255] or [0-64] value to nearest ISE symbol, array size is [256] or [65] + basisu::vector m_ISE_to_val; // ASTC encoded ISE symbol to [0,255] or [0,64] value, [levels] + basisu::vector m_ISE_to_rank; // returns the level rank index given an ISE symbol, [levels] + basisu::vector m_rank_to_ISE; // returns the ISE symbol given a level rank, inverse of pISE_to_rank, [levels] + + void init(bool weight_flag, uint32_t num_levels, bool init_rank_tabs) + { + m_val_to_ise.resize(weight_flag ? (MAX_WEIGHT_VALUE + 1) : 256); + m_ISE_to_val.resize(num_levels); + if (init_rank_tabs) + { + m_ISE_to_rank.resize(num_levels); + m_rank_to_ISE.resize(num_levels); + } + } + }; + + struct dequant_tables + { + dequant_table m_weights[TOTAL_WEIGHT_ISE_RANGES]; + dequant_table m_endpoints[TOTAL_ENDPOINT_ISE_RANGES]; + + const dequant_table& get_weight_tab(uint32_t range) const + { + assert((range >= FIRST_VALID_WEIGHT_ISE_RANGE) && (range <= LAST_VALID_WEIGHT_ISE_RANGE)); + return m_weights[range - FIRST_VALID_WEIGHT_ISE_RANGE]; + } + + dequant_table& get_weight_tab(uint32_t range) + { + assert((range >= FIRST_VALID_WEIGHT_ISE_RANGE) && (range <= LAST_VALID_WEIGHT_ISE_RANGE)); + return m_weights[range - FIRST_VALID_WEIGHT_ISE_RANGE]; + } + + const dequant_table& get_endpoint_tab(uint32_t range) const + { + assert((range >= FIRST_VALID_ENDPOINT_ISE_RANGE) && (range <= LAST_VALID_ENDPOINT_ISE_RANGE)); + return m_endpoints[range - FIRST_VALID_ENDPOINT_ISE_RANGE]; + } + + dequant_table& get_endpoint_tab(uint32_t range) + { + assert((range >= FIRST_VALID_ENDPOINT_ISE_RANGE) && (range <= LAST_VALID_ENDPOINT_ISE_RANGE)); + return m_endpoints[range - FIRST_VALID_ENDPOINT_ISE_RANGE]; + } + + void init(bool init_rank_tabs) + { + for (uint32_t range = FIRST_VALID_WEIGHT_ISE_RANGE; range <= LAST_VALID_WEIGHT_ISE_RANGE; range++) + { + const uint32_t num_levels = get_ise_levels(range); + dequant_table& tab = get_weight_tab(range); + + tab.init(true, num_levels, init_rank_tabs); + + create_quant_tables(tab.m_val_to_ise.data(), tab.m_ISE_to_val.data(), init_rank_tabs ? tab.m_ISE_to_rank.data() : nullptr, init_rank_tabs ? tab.m_rank_to_ISE.data() : nullptr, range, true); + } + + for (uint32_t range = FIRST_VALID_ENDPOINT_ISE_RANGE; range <= LAST_VALID_ENDPOINT_ISE_RANGE; range++) + { + const uint32_t num_levels = get_ise_levels(range); + dequant_table& tab = get_endpoint_tab(range); + + tab.init(false, num_levels, init_rank_tabs); + + create_quant_tables(tab.m_val_to_ise.data(), tab.m_ISE_to_val.data(), init_rank_tabs ? tab.m_ISE_to_rank.data() : nullptr, init_rank_tabs ? tab.m_rank_to_ISE.data() : nullptr, range, false); + } + } + }; + + extern dequant_tables g_dequant_tables; + void init_tables(bool init_rank_tabs); + + struct weighted_sample + { + uint8_t m_src_x; + uint8_t m_src_y; + uint8_t m_weights[2][2]; // [y][x], scaled by 16, round by adding 8 + }; + + void compute_upsample_weights( + int block_width, int block_height, + int weight_grid_width, int weight_grid_height, + weighted_sample* pWeights); // there will be block_width * block_height bilinear samples + + void upsample_weight_grid( + uint32_t bx, uint32_t by, // destination/to dimension + uint32_t wx, uint32_t wy, // source/from dimension + const uint8_t* pSrc_weights, // these are dequantized [0,64] weights, NOT ISE symbols, [wy][wx] + uint8_t* pDst_weights); // [by][bx] + + // Procedurally returns the texel partition/subset index given the block coordinate and config. + int compute_texel_partition(uint32_t seedIn, uint32_t xIn, uint32_t yIn, uint32_t zIn, int num_partitions, bool small_block); + + void blue_contract( + int r, int g, int b, int a, + int& dr, int& dg, int& db, int& da); + + void bit_transfer_signed(int& a, int& b); + + void decode_endpoint(uint32_t cem_index, int (*pEndpoints)[2], const uint8_t* pE); + + typedef uint16_t half_float; + half_float float_to_half(float val, bool toward_zero); + float half_to_float(half_float hval); + + // Notes: + // qlog16_to_half(half_to_qlog16(half_val_as_int)) == half_val_as_int (is lossless) + // However, this is not lossless in the general sense. + inline half_float qlog16_to_half(int k) + { + assert((k >= 0) && (k <= 0xFFFF)); + + int E = (k & 0xF800) >> 11; + int M = k & 0x7FF; + + int Mt; + if (M < 512) + Mt = 3 * M; + else if (M >= 1536) + Mt = 5 * M - 2048; + else + Mt = 4 * M - 512; + + return (half_float)((E << 10) + (Mt >> 3)); + } + + const int MAX_RGB9E5 = 0xff80; + void unpack_rgb9e5(uint32_t packed, float& r, float& g, float& b); + uint32_t pack_rgb9e5(float r, float g, float b); + + enum decode_mode + { + cDecodeModeSRGB8 = 0, // returns uint8_t's, not valid on HDR blocks + cDecodeModeLDR8 = 1, // returns uint8_t's, not valid on HDR blocks + cDecodeModeHDR16 = 2, // returns uint16_t's (half floats), valid on all LDR/HDR blocks + cDecodeModeRGB9E5 = 3 // returns uint32_t's, packed as RGB 9E5 (shared exponent), see https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_shared_exponent.txt + }; + + // Decodes logical block to output pixels. + // pPixels must point to either 32-bit pixel values (SRGB8/LDR8/9E5) or 64-bit pixel values (HDR16) + bool decode_block(const log_astc_block& log_blk, void* pPixels, uint32_t blk_width, uint32_t blk_height, decode_mode dec_mode); + + void decode_bise(uint32_t ise_range, uint8_t* pVals, uint32_t num_vals, const uint8_t *pBits128, uint32_t bit_ofs); + + // Unpack a physical ASTC encoded GPU texture block to a logical block description. + bool unpack_block(const void* pASTC_block, log_astc_block& log_blk, uint32_t blk_width, uint32_t blk_height); + +} // namespace astc_helpers + +#endif // BASISU_ASTC_HELPERS_HEADER + +//------------------------------------------------------------------ + +#ifdef BASISU_ASTC_HELPERS_IMPLEMENTATION + +namespace astc_helpers +{ + template inline T my_min(T a, T b) { return (a < b) ? a : b; } + template inline T my_max(T a, T b) { return (a > b) ? a : b; } + + const uint8_t g_astc_block_sizes[NUM_ASTC_BLOCK_SIZES][2] = { + { 4, 4 }, { 5, 4 }, { 5, 5 }, { 6, 5 }, + { 6, 6 }, { 8, 5 }, { 8, 6 }, { 10, 5 }, + { 10, 6 }, { 8, 8 }, { 10, 8 }, { 10, 10 }, + { 12, 10 }, { 12, 12 } + }; + + const int8_t g_ise_range_table[TOTAL_ISE_RANGES][3] = + { + //b t q + //2 3 5 // rng ise_index notes + { 1, 0, 0 }, // 0..1 0 + { 0, 1, 0 }, // 0..2 1 + { 2, 0, 0 }, // 0..3 2 + { 0, 0, 1 }, // 0..4 3 + { 1, 1, 0 }, // 0..5 4 min endpoint ISE index + { 3, 0, 0 }, // 0..7 5 + { 1, 0, 1 }, // 0..9 6 + { 2, 1, 0 }, // 0..11 7 + { 4, 0, 0 }, // 0..15 8 + { 2, 0, 1 }, // 0..19 9 + { 3, 1, 0 }, // 0..23 10 + { 5, 0, 0 }, // 0..31 11 max weight ISE index + { 3, 0, 1 }, // 0..39 12 + { 4, 1, 0 }, // 0..47 13 + { 6, 0, 0 }, // 0..63 14 + { 4, 0, 1 }, // 0..79 15 + { 5, 1, 0 }, // 0..95 16 + { 7, 0, 0 }, // 0..127 17 + { 5, 0, 1 }, // 0..159 18 + { 6, 1, 0 }, // 0..191 19 + { 8, 0, 0 }, // 0..255 20 + }; + + static inline void astc_set_bits_1_to_9(uint32_t* pDst, uint32_t& bit_offset, uint32_t code, uint32_t codesize) + { + uint8_t* pBuf = reinterpret_cast(pDst); + + assert(codesize <= 9); + if (codesize) + { + uint32_t byte_bit_offset = bit_offset & 7; + uint32_t val = code << byte_bit_offset; + + uint32_t index = bit_offset >> 3; + pBuf[index] |= (uint8_t)val; + + if (codesize > (8 - byte_bit_offset)) + pBuf[index + 1] |= (uint8_t)(val >> 8); + + bit_offset += codesize; + } + } + + static inline uint32_t astc_extract_bits(uint32_t bits, int low, int high) + { + return (bits >> low) & ((1 << (high - low + 1)) - 1); + } + + // Writes bits to output in an endian safe way + static inline void astc_set_bits(uint32_t* pOutput, uint32_t& bit_pos, uint32_t value, uint32_t total_bits) + { + assert(total_bits <= 31); + assert(value < (1u << total_bits)); + + uint8_t* pBytes = reinterpret_cast(pOutput); + + while (total_bits) + { + const uint32_t bits_to_write = my_min(total_bits, 8 - (bit_pos & 7)); + + pBytes[bit_pos >> 3] |= static_cast(value << (bit_pos & 7)); + + bit_pos += bits_to_write; + total_bits -= bits_to_write; + value >>= bits_to_write; + } + } + + static const uint8_t g_astc_quint_encode[125] = + { + 0, 1, 2, 3, 4, 8, 9, 10, 11, 12, 16, 17, 18, 19, 20, 24, 25, 26, 27, 28, 5, 13, 21, 29, 6, 32, 33, 34, 35, 36, 40, 41, 42, 43, 44, 48, 49, 50, 51, 52, 56, 57, + 58, 59, 60, 37, 45, 53, 61, 14, 64, 65, 66, 67, 68, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 88, 89, 90, 91, 92, 69, 77, 85, 93, 22, 96, 97, 98, 99, 100, 104, + 105, 106, 107, 108, 112, 113, 114, 115, 116, 120, 121, 122, 123, 124, 101, 109, 117, 125, 30, 102, 103, 70, 71, 38, 110, 111, 78, 79, 46, 118, 119, 86, 87, 54, + 126, 127, 94, 95, 62, 39, 47, 55, 63, 7 /*31 - results in the same decode as 7*/ + }; + + // Encodes 3 values to output, usable for any range that uses quints and bits + static inline void astc_encode_quints(uint32_t* pOutput, const uint8_t* pValues, uint32_t& bit_pos, int n, uint32_t* pStats) + { + // First extract the quints and the bits from the 3 input values + int quints = 0, bits[3]; + const uint32_t bit_mask = (1 << n) - 1; + for (int i = 0; i < 3; i++) + { + static const int s_muls[3] = { 1, 5, 25 }; + + const int t = pValues[i] >> n; + + quints += t * s_muls[i]; + bits[i] = pValues[i] & bit_mask; + } + + // Encode the quints, by inverting the bit manipulations done by the decoder, converting 3 quints into 7-bits. + // See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding + + assert(quints < 125); + const int T = g_astc_quint_encode[quints]; + + // Now interleave the 7 encoded quint bits with the bits to form the encoded output. See table 95-96. + astc_set_bits(pOutput, bit_pos, bits[0] | (astc_extract_bits(T, 0, 2) << n) | (bits[1] << (3 + n)) | (astc_extract_bits(T, 3, 4) << (3 + n * 2)) | + (bits[2] << (5 + n * 2)) | (astc_extract_bits(T, 5, 6) << (5 + n * 3)), 7 + n * 3); + + if (pStats) + *pStats += n * 3 + 7; + } + + static const uint8_t g_astc_trit_encode[243] = { 0, 1, 2, 4, 5, 6, 8, 9, 10, 16, 17, 18, 20, 21, 22, 24, 25, 26, 3, 7, 11, 19, 23, 27, 12, 13, 14, 32, 33, 34, 36, 37, 38, 40, 41, 42, 48, 49, 50, 52, 53, 54, 56, 57, 58, 35, 39, + 43, 51, 55, 59, 44, 45, 46, 64, 65, 66, 68, 69, 70, 72, 73, 74, 80, 81, 82, 84, 85, 86, 88, 89, 90, 67, 71, 75, 83, 87, 91, 76, 77, 78, 128, 129, 130, 132, 133, 134, 136, 137, 138, 144, 145, 146, 148, 149, 150, 152, 153, 154, + 131, 135, 139, 147, 151, 155, 140, 141, 142, 160, 161, 162, 164, 165, 166, 168, 169, 170, 176, 177, 178, 180, 181, 182, 184, 185, 186, 163, 167, 171, 179, 183, 187, 172, 173, 174, 192, 193, 194, 196, 197, 198, 200, 201, 202, + 208, 209, 210, 212, 213, 214, 216, 217, 218, 195, 199, 203, 211, 215, 219, 204, 205, 206, 96, 97, 98, 100, 101, 102, 104, 105, 106, 112, 113, 114, 116, 117, 118, 120, 121, 122, 99, 103, 107, 115, 119, 123, 108, 109, 110, 224, + 225, 226, 228, 229, 230, 232, 233, 234, 240, 241, 242, 244, 245, 246, 248, 249, 250, 227, 231, 235, 243, 247, 251, 236, 237, 238, 28, 29, 30, 60, 61, 62, 92, 93, 94, 156, 157, 158, 188, 189, 190, 220, 221, 222, 31, 63, 95, 159, + 191, 223, 124, 125, 126 }; + + // Encodes 5 values to output, usable for any range that uses trits and bits + static void astc_encode_trits(uint32_t* pOutput, const uint8_t* pValues, uint32_t& bit_pos, int n, uint32_t *pStats) + { + // First extract the trits and the bits from the 5 input values + int trits = 0, bits[5]; + const uint32_t bit_mask = (1 << n) - 1; + for (int i = 0; i < 5; i++) + { + static const int s_muls[5] = { 1, 3, 9, 27, 81 }; + + const int t = pValues[i] >> n; + + trits += t * s_muls[i]; + bits[i] = pValues[i] & bit_mask; + } + + // Encode the trits, by inverting the bit manipulations done by the decoder, converting 5 trits into 8-bits. + // See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding + + assert(trits < 243); + const int T = g_astc_trit_encode[trits]; + + // Now interleave the 8 encoded trit bits with the bits to form the encoded output. See table 94. + astc_set_bits(pOutput, bit_pos, bits[0] | (astc_extract_bits(T, 0, 1) << n) | (bits[1] << (2 + n)), n * 2 + 2); + + astc_set_bits(pOutput, bit_pos, astc_extract_bits(T, 2, 3) | (bits[2] << 2) | (astc_extract_bits(T, 4, 4) << (2 + n)) | (bits[3] << (3 + n)) | (astc_extract_bits(T, 5, 6) << (3 + n * 2)) | + (bits[4] << (5 + n * 2)) | (astc_extract_bits(T, 7, 7) << (5 + n * 3)), n * 3 + 6); + + if (pStats) + *pStats += n * 5 + 8; + } + + // Packs values using ASTC's BISE to output buffer. + void encode_bise(uint32_t* pDst, const uint8_t* pSrc_vals, uint32_t bit_pos, int num_vals, int range, uint32_t *pStats) + { + uint32_t temp[5] = { 0 }; + + const int num_bits = g_ise_range_table[range][0]; + + int group_size = 0; + if (g_ise_range_table[range][1]) + group_size = 5; + else if (g_ise_range_table[range][2]) + group_size = 3; + +#ifndef NDEBUG + const uint32_t num_levels = get_ise_levels(range); + for (int i = 0; i < num_vals; i++) + { + assert(pSrc_vals[i] < num_levels); + } +#endif + + if (group_size) + { + // Range has trits or quints - pack each group of 5 or 3 values + const int total_groups = (group_size == 5) ? ((num_vals + 4) / 5) : ((num_vals + 2) / 3); + + for (int group_index = 0; group_index < total_groups; group_index++) + { + uint8_t vals[5] = { 0 }; + + const int limit = my_min(group_size, num_vals - group_index * group_size); + for (int i = 0; i < limit; i++) + vals[i] = pSrc_vals[group_index * group_size + i]; + + // Note this always writes a group of 3 or 5 bits values, even for incomplete groups. So it can write more than needed. + // get_ise_sequence_bits() returns the # of bits that must be written for proper decoding. + if (group_size == 5) + astc_encode_trits(temp, vals, bit_pos, num_bits, pStats); + else + astc_encode_quints(temp, vals, bit_pos, num_bits, pStats); + } + } + else + { + for (int i = 0; i < num_vals; i++) + astc_set_bits_1_to_9(temp, bit_pos, pSrc_vals[i], num_bits); + + if (pStats) + *pStats += num_vals * num_bits; + } + + pDst[0] |= temp[0]; pDst[1] |= temp[1]; + pDst[2] |= temp[2]; pDst[3] |= temp[3]; + } + + inline uint32_t rev_dword(uint32_t bits) + { + uint32_t v = (bits << 16) | (bits >> 16); + v = ((v & 0x00ff00ff) << 8) | ((v & 0xff00ff00) >> 8); v = ((v & 0x0f0f0f0f) << 4) | ((v & 0xf0f0f0f0) >> 4); + v = ((v & 0x33333333) << 2) | ((v & 0xcccccccc) >> 2); v = ((v & 0x55555555) << 1) | ((v & 0xaaaaaaaa) >> 1); + return v; + } + + static inline bool is_packable(int value, int num_bits) { assert((num_bits >= 1) && (num_bits < 31)); return (value >= 0) && (value < (1 << num_bits)); } + + static bool get_config_bits(const log_astc_block &log_block, uint32_t &config_bits) + { + config_bits = 0; + + const int W = log_block.m_grid_width, H = log_block.m_grid_height; + + const uint32_t P = log_block.m_weight_ise_range >= 6; // high precision + const uint32_t Dp_P = (log_block.m_dual_plane << 1) | P; // pack dual plane+high precision bits + + // See Tables 81-82 + // Compute p from weight range + uint32_t p = 2 + log_block.m_weight_ise_range - (P ? 6 : 0); + + // Rearrange p's bits to p0 p2 p1 + p = (p >> 1) + ((p & 1) << 2); + + // Try encoding each row of table 82. + + // W+4 H+2 + if (is_packable(W - 4, 2) && is_packable(H - 2, 2)) + { + config_bits = (Dp_P << 9) | ((W - 4) << 7) | ((H - 2) << 5) | ((p & 4) << 2) | (p & 3); + return true; + } + + // W+8 H+2 + if (is_packable(W - 8, 2) && is_packable(H - 2, 2)) + { + config_bits = (Dp_P << 9) | ((W - 8) << 7) | ((H - 2) << 5) | ((p & 4) << 2) | 4 | (p & 3); + return true; + } + + // W+2 H+8 + if (is_packable(W - 2, 2) && is_packable(H - 8, 2)) + { + config_bits = (Dp_P << 9) | ((H - 8) << 7) | ((W - 2) << 5) | ((p & 4) << 2) | 8 | (p & 3); + return true; + } + + // W+2 H+6 + if (is_packable(W - 2, 2) && is_packable(H - 6, 1)) + { + config_bits = (Dp_P << 9) | ((H - 6) << 7) | ((W - 2) << 5) | ((p & 4) << 2) | 12 | (p & 3); + return true; + } + + // W+2 H+2 + if (is_packable(W - 2, 1) && is_packable(H - 2, 2)) + { + config_bits = (Dp_P << 9) | ((W) << 7) | ((H - 2) << 5) | ((p & 4) << 2) | 12 | (p & 3); + return true; + } + + // 12 H+2 + if ((W == 12) && is_packable(H - 2, 2)) + { + config_bits = (Dp_P << 9) | ((H - 2) << 5) | (p << 2); + return true; + } + + // W+2 12 + if ((H == 12) && is_packable(W - 2, 2)) + { + config_bits = (Dp_P << 9) | (1 << 7) | ((W - 2) << 5) | (p << 2); + return true; + } + + // 6 10 + if ((W == 6) && (H == 10)) + { + config_bits = (Dp_P << 9) | (3 << 7) | (p << 2); + return true; + } + + // 10 6 + if ((W == 10) && (H == 6)) + { + config_bits = (Dp_P << 9) | (0b1101 << 5) | (p << 2); + return true; + } + + // W+6 H+6 (no dual plane or high prec) + if ((!Dp_P) && is_packable(W - 6, 2) && is_packable(H - 6, 2)) + { + config_bits = ((H - 6) << 9) | 256 | ((W - 6) << 5) | (p << 2); + return true; + } + + // Failed: unsupported weight grid dimensions or config. + return false; + } + + bool pack_astc_block(astc_block& phys_block, const log_astc_block& log_block, int* pExpected_endpoint_range, pack_stats *pStats) + { + memset(&phys_block, 0, sizeof(phys_block)); + + if (pExpected_endpoint_range) + *pExpected_endpoint_range = -1; + + assert(!log_block.m_error_flag); + if (log_block.m_error_flag) + return false; + + if (log_block.m_solid_color_flag_ldr) + { + pack_void_extent_ldr(phys_block, log_block.m_solid_color[0], log_block.m_solid_color[1], log_block.m_solid_color[2], log_block.m_solid_color[3], pStats); + return true; + } + else if (log_block.m_solid_color_flag_hdr) + { + pack_void_extent_hdr(phys_block, log_block.m_solid_color[0], log_block.m_solid_color[1], log_block.m_solid_color[2], log_block.m_solid_color[3], pStats); + return true; + } + + if ((log_block.m_num_partitions < 1) || (log_block.m_num_partitions > MAX_PARTITIONS)) + return false; + + // Max usable weight range is 11 + if (log_block.m_weight_ise_range > LAST_VALID_WEIGHT_ISE_RANGE) + return false; + + // See 23.24 Illegal Encodings, [0,5] is the minimum ISE encoding for endpoints + if ((log_block.m_endpoint_ise_range < FIRST_VALID_ENDPOINT_ISE_RANGE) || (log_block.m_endpoint_ise_range > LAST_VALID_ENDPOINT_ISE_RANGE)) + return false; + + if (log_block.m_color_component_selector > 3) + return false; + + // TODO: sanity check grid width/height vs. block's physical width/height + + uint32_t config_bits = 0; + if (!get_config_bits(log_block, config_bits)) + return false; + + uint32_t bit_pos = 0; + astc_set_bits(&phys_block.m_vals[0], bit_pos, config_bits, 11); + if (pStats) + pStats->m_header_bits += 11; + + const uint32_t total_grid_weights = (log_block.m_dual_plane ? 2 : 1) * (log_block.m_grid_width * log_block.m_grid_height); + const uint32_t total_weight_bits = get_ise_sequence_bits(total_grid_weights, log_block.m_weight_ise_range); + + // 18.24 Illegal Encodings + if ((!total_grid_weights) || (total_grid_weights > MAX_GRID_WEIGHTS) || (total_weight_bits < 24) || (total_weight_bits > 96)) + return false; + + uint32_t total_extra_bits = 0; + + astc_set_bits(&phys_block.m_vals[0], bit_pos, log_block.m_num_partitions - 1, 2); + if (pStats) + pStats->m_header_bits += 2; + + if (log_block.m_num_partitions > 1) + { + if (log_block.m_partition_id >= NUM_PARTITION_PATTERNS) + return false; + + astc_set_bits(&phys_block.m_vals[0], bit_pos, log_block.m_partition_id, 10); + if (pStats) + pStats->m_header_bits += 10; + + uint32_t highest_cem = 0, lowest_cem = UINT32_MAX; + for (uint32_t j = 0; j < log_block.m_num_partitions; j++) + { + highest_cem = my_max(highest_cem, log_block.m_color_endpoint_modes[j]); + lowest_cem = my_min(lowest_cem, log_block.m_color_endpoint_modes[j]); + } + + if (highest_cem > 15) + return false; + + // Ensure CEM range is contiguous + if (((highest_cem >> 2) > (1 + (lowest_cem >> 2)))) + return false; + + // See tables 79/80 + uint32_t encoded_cem = log_block.m_color_endpoint_modes[0] << 2; + if (lowest_cem != highest_cem) + { + encoded_cem = my_min(3, 1 + (lowest_cem >> 2)); + + // See tables at 23.11 Color Endpoint Mode + for (uint32_t j = 0; j < log_block.m_num_partitions; j++) + { + const int M = log_block.m_color_endpoint_modes[j] & 3; + + const int C = (log_block.m_color_endpoint_modes[j] >> 2) - ((encoded_cem & 3) - 1); + if ((C & 1) != C) + return false; + + encoded_cem |= (C << (2 + j)) | (M << (2 + log_block.m_num_partitions + 2 * j)); + } + + total_extra_bits = 3 * log_block.m_num_partitions - 4; + + if ((total_weight_bits + total_extra_bits) > 128) + return false; + + uint32_t cem_bit_pos = 128 - total_weight_bits - total_extra_bits; + astc_set_bits(&phys_block.m_vals[0], cem_bit_pos, encoded_cem >> 6, total_extra_bits); + if (pStats) + pStats->m_header_bits += total_extra_bits; + } + + astc_set_bits(&phys_block.m_vals[0], bit_pos, encoded_cem & 0x3f, 6); + if (pStats) + pStats->m_header_bits += 6; + } + else + { + if (log_block.m_partition_id) + return false; + if (log_block.m_color_endpoint_modes[0] > 15) + return false; + + astc_set_bits(&phys_block.m_vals[0], bit_pos, log_block.m_color_endpoint_modes[0], 4); + if (pStats) + pStats->m_header_bits += 4; + } + + if (log_block.m_dual_plane) + { + if (log_block.m_num_partitions > 3) + return false; + + total_extra_bits += 2; + + uint32_t ccs_bit_pos = 128 - (int)total_weight_bits - (int)total_extra_bits; + astc_set_bits(&phys_block.m_vals[0], ccs_bit_pos, log_block.m_color_component_selector, 2); + if (pStats) + pStats->m_header_bits += 2; + } + + const uint32_t total_config_bits = bit_pos + total_extra_bits; + const int num_remaining_bits = 128 - (int)total_config_bits - (int)total_weight_bits; + if (num_remaining_bits < 0) + return false; + + uint32_t total_cem_vals = 0; + for (uint32_t j = 0; j < log_block.m_num_partitions; j++) + total_cem_vals += 2 + 2 * (log_block.m_color_endpoint_modes[j] >> 2); + + if (total_cem_vals > MAX_ENDPOINTS) + return false; + + int endpoint_ise_range = -1; + for (int k = 20; k > 0; k--) + { + int bits = get_ise_sequence_bits(total_cem_vals, k); + if (bits <= num_remaining_bits) + { + endpoint_ise_range = k; + break; + } + } + + // See 23.24 Illegal Encodings, [0,5] is the minimum ISE encoding for endpoints + if (endpoint_ise_range < (int)FIRST_VALID_ENDPOINT_ISE_RANGE) + return false; + + // Ensure the caller utilized the right endpoint ISE range. + if ((int)log_block.m_endpoint_ise_range != endpoint_ise_range) + { + if (pExpected_endpoint_range) + *pExpected_endpoint_range = endpoint_ise_range; + return false; + } + + if (pStats) + { + pStats->m_endpoint_bits += get_ise_sequence_bits(total_cem_vals, endpoint_ise_range); + pStats->m_weight_bits += get_ise_sequence_bits(total_grid_weights, log_block.m_weight_ise_range); + } + + // Pack endpoints forwards + encode_bise(&phys_block.m_vals[0], log_block.m_endpoints, bit_pos, total_cem_vals, endpoint_ise_range); + + // Pack weights backwards + uint32_t weight_data[4] = { 0 }; + encode_bise(weight_data, log_block.m_weights, 0, total_grid_weights, log_block.m_weight_ise_range); + + for (uint32_t i = 0; i < 4; i++) + phys_block.m_vals[i] |= rev_dword(weight_data[3 - i]); + + return true; + } + + static inline uint32_t bit_replication_scale(uint32_t src, int num_src_bits, int num_dst_bits) + { + assert(num_src_bits <= num_dst_bits); + assert((src & ((1 << num_src_bits) - 1)) == src); + + uint32_t dst = 0; + for (int shift = num_dst_bits - num_src_bits; shift > -num_src_bits; shift -= num_src_bits) + dst |= (shift >= 0) ? (src << shift) : (src >> -shift); + + return dst; + } + + uint32_t dequant_bise_endpoint(uint32_t val, uint32_t ise_range) + { + assert((ise_range >= FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_range <= LAST_VALID_ENDPOINT_ISE_RANGE)); + assert(val < get_ise_levels(ise_range)); + + uint32_t u = 0; + + switch (ise_range) + { + case 5: + { + u = bit_replication_scale(val, 3, 8); + break; + } + case 8: + { + u = bit_replication_scale(val, 4, 8); + break; + } + case 11: + { + u = bit_replication_scale(val, 5, 8); + break; + } + case 14: + { + u = bit_replication_scale(val, 6, 8); + break; + } + case 17: + { + u = bit_replication_scale(val, 7, 8); + break; + } + case 20: + { + u = val; + break; + } + case 4: + case 6: + case 7: + case 9: + case 10: + case 12: + case 13: + case 15: + case 16: + case 18: + case 19: + { + const uint32_t num_bits = g_ise_range_table[ise_range][0]; + const uint32_t num_trits = g_ise_range_table[ise_range][1]; BASISU_NOTE_UNUSED(num_trits); + const uint32_t num_quints = g_ise_range_table[ise_range][2]; BASISU_NOTE_UNUSED(num_quints); + + // compute Table 103 row index + const int range_index = (num_bits * 2 + (num_quints ? 1 : 0)) - 2; + + assert(range_index >= 0 && range_index <= 10); + + uint32_t bits = val & ((1 << num_bits) - 1); + uint32_t tval = val >> num_bits; + + assert(tval < (num_trits ? 3U : 5U)); + + uint32_t a = bits & 1; + uint32_t b = (bits >> 1) & 1; + uint32_t c = (bits >> 2) & 1; + uint32_t d = (bits >> 3) & 1; + uint32_t e = (bits >> 4) & 1; + uint32_t f = (bits >> 5) & 1; + + uint32_t A = a ? 511 : 0; + uint32_t B = 0; + + switch (range_index) + { + case 2: + { + // 876543210 + // b000b0bb0 + B = (b << 1) | (b << 2) | (b << 4) | (b << 8); + break; + } + case 3: + { + // 876543210 + // b0000bb00 + B = (b << 2) | (b << 3) | (b << 8); + break; + } + case 4: + { + // 876543210 + // cb000cbcb + B = b | (c << 1) | (b << 2) | (c << 3) | (b << 7) | (c << 8); + break; + } + case 5: + { + // 876543210 + // cb0000cbc + B = c | (b << 1) | (c << 2) | (b << 7) | (c << 8); + break; + } + case 6: + { + // 876543210 + // dcb000dcb + B = b | (c << 1) | (d << 2) | (b << 6) | (c << 7) | (d << 8); + break; + } + case 7: + { + // 876543210 + // dcb0000dc + B = c | (d << 1) | (b << 6) | (c << 7) | (d << 8); + break; + } + case 8: + { + // 876543210 + // edcb000ed + B = d | (e << 1) | (b << 5) | (c << 6) | (d << 7) | (e << 8); + break; + } + case 9: + { + // 876543210 + // edcb0000e + B = e | (b << 5) | (c << 6) | (d << 7) | (e << 8); + break; + } + case 10: + { + // 876543210 + // fedcb000f + B = f | (b << 4) | (c << 5) | (d << 6) | (e << 7) | (f << 8); + break; + } + default: + break; + } + + static uint8_t C_vals[11] = { 204, 113, 93, 54, 44, 26, 22, 13, 11, 6, 5 }; + uint32_t C = C_vals[range_index]; + uint32_t D = tval; + + u = D * C + B; + u = u ^ A; + u = (A & 0x80) | (u >> 2); + + break; + } + default: + { + assert(0); + break; + } + } + + return u; + } + + uint32_t dequant_bise_weight(uint32_t val, uint32_t ise_range) + { + assert(val < get_ise_levels(ise_range)); + + uint32_t u = 0; + switch (ise_range) + { + case 0: + { + u = val ? 63 : 0; + break; + } + case 1: // 0-2 + { + const uint8_t s_tab_0_2[3] = { 0, 32, 63 }; + u = s_tab_0_2[val]; + break; + } + case 2: // 0-3 + { + u = bit_replication_scale(val, 2, 6); + break; + } + case 3: // 0-4 + { + const uint8_t s_tab_0_4[5] = { 0, 16, 32, 47, 63 }; + u = s_tab_0_4[val]; + break; + } + case 5: // 0-7 + { + u = bit_replication_scale(val, 3, 6); + break; + } + case 8: // 0-15 + { + u = bit_replication_scale(val, 4, 6); + break; + } + case 11: // 0-31 + { + u = bit_replication_scale(val, 5, 6); + break; + } + case 4: // 0-5 + case 6: // 0-9 + case 7: // 0-11 + case 9: // 0-19 + case 10: // 0-23 + { + const uint32_t num_bits = g_ise_range_table[ise_range][0]; + const uint32_t num_trits = g_ise_range_table[ise_range][1]; BASISU_NOTE_UNUSED(num_trits); + const uint32_t num_quints = g_ise_range_table[ise_range][2]; BASISU_NOTE_UNUSED(num_quints); + + // compute Table 103 row index + const int range_index = num_bits * 2 + (num_quints ? 1 : 0); + + // Extract bits and tris/quints from value + const uint32_t bits = val & ((1u << num_bits) - 1); + const uint32_t D = val >> num_bits; + + assert(D < (num_trits ? 3U : 5U)); + + // Now dequantize + // See Table 103. ASTC weight unquantization parameters + static const uint32_t C_table[5] = { 50, 28, 23, 13, 11 }; + + const uint32_t a = bits & 1, b = (bits >> 1) & 1, c = (bits >> 2) & 1; + + const uint32_t A = (a == 0) ? 0 : 0x7F; + + uint32_t B = 0; + if (range_index == 4) + B = ((b << 6) | (b << 2) | (b << 0)); + else if (range_index == 5) + B = ((b << 6) | (b << 1)); + else if (range_index == 6) + B = ((c << 6) | (b << 5) | (c << 1) | (b << 0)); + + const uint32_t C = C_table[range_index - 2]; + + u = D * C + B; + u = u ^ A; + u = (A & 0x20) | (u >> 2); + break; + } + default: + assert(0); + break; + } + + if (u > 32) + u++; + + return u; + } + + // Returns the nearest ISE symbol given a [0,255] endpoint value. + uint32_t find_nearest_bise_endpoint(int v, uint32_t ise_range) + { + assert(ise_range >= FIRST_VALID_ENDPOINT_ISE_RANGE && ise_range <= LAST_VALID_ENDPOINT_ISE_RANGE); + + const uint32_t total_levels = get_ise_levels(ise_range); + int best_e = INT_MAX, best_index = 0; + for (uint32_t i = 0; i < total_levels; i++) + { + const int qv = dequant_bise_endpoint(i, ise_range); + int e = labs(v - qv); + if (e < best_e) + { + best_e = e; + best_index = i; + if (!best_e) + break; + } + } + return best_index; + } + + // Returns the nearest ISE weight given a [0,64] endpoint value. + uint32_t find_nearest_bise_weight(int v, uint32_t ise_range) + { + assert(ise_range >= FIRST_VALID_WEIGHT_ISE_RANGE && ise_range <= LAST_VALID_WEIGHT_ISE_RANGE); + assert(v <= (int)MAX_WEIGHT_VALUE); + + const uint32_t total_levels = get_ise_levels(ise_range); + int best_e = INT_MAX, best_index = 0; + for (uint32_t i = 0; i < total_levels; i++) + { + const int qv = dequant_bise_weight(i, ise_range); + int e = labs(v - qv); + if (e < best_e) + { + best_e = e; + best_index = i; + if (!best_e) + break; + } + } + return best_index; + } + + void create_quant_tables( + uint8_t* pVal_to_ise, // [0-255] or [0-64] value to nearest ISE symbol, array size is [256] or [65] + uint8_t* pISE_to_val, // ASTC encoded ISE symbol to [0,255] or [0,64] value, [levels] + uint8_t* pISE_to_rank, // returns the level rank index given an ISE symbol, [levels] + uint8_t* pRank_to_ISE, // returns the ISE symbol given a level rank, inverse of pISE_to_rank, [levels] + uint32_t ise_range, // ise range, [4,20] for endpoints, [0,11] for weights + bool weight_flag) // false if block endpoints, true if weights + { + const uint32_t num_dequant_vals = weight_flag ? (MAX_WEIGHT_VALUE + 1) : 256; + + for (uint32_t i = 0; i < num_dequant_vals; i++) + { + uint32_t bise_index = weight_flag ? astc_helpers::find_nearest_bise_weight(i, ise_range) : astc_helpers::find_nearest_bise_endpoint(i, ise_range); + + if (pVal_to_ise) + pVal_to_ise[i] = (uint8_t)bise_index; + + if (pISE_to_val) + pISE_to_val[bise_index] = weight_flag ? (uint8_t)astc_helpers::dequant_bise_weight(bise_index, ise_range) : (uint8_t)astc_helpers::dequant_bise_endpoint(bise_index, ise_range); + } + + if (pISE_to_rank || pRank_to_ISE) + { + const uint32_t num_levels = get_ise_levels(ise_range); + + if (!g_ise_range_table[ise_range][1] && !g_ise_range_table[ise_range][2]) + { + // Only bits + for (uint32_t i = 0; i < num_levels; i++) + { + if (pISE_to_rank) + pISE_to_rank[i] = (uint8_t)i; + + if (pRank_to_ISE) + pRank_to_ISE[i] = (uint8_t)i; + } + } + else + { + // Range has trits or quints + uint32_t vals[256]; + for (uint32_t i = 0; i < num_levels; i++) + { + uint32_t v = weight_flag ? astc_helpers::dequant_bise_weight(i, ise_range) : astc_helpers::dequant_bise_endpoint(i, ise_range); + + // Low=ISE value + // High=dequantized value + vals[i] = (v << 16) | i; + } + + // Sorts by dequantized value + std::sort(vals, vals + num_levels); + + for (uint32_t rank = 0; rank < num_levels; rank++) + { + uint32_t ise_val = (uint8_t)vals[rank]; + + if (pISE_to_rank) + pISE_to_rank[ise_val] = (uint8_t)rank; + + if (pRank_to_ISE) + pRank_to_ISE[rank] = (uint8_t)ise_val; + } + } + } + } + + void pack_void_extent_ldr(astc_block &blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah, pack_stats* pStats) + { + uint8_t* pDst = (uint8_t*)&blk.m_vals[0]; + memset(pDst, 0xFF, 16); + + pDst[0] = 0b11111100; + pDst[1] = 0b11111101; + + pDst[8] = (uint8_t)rh; + pDst[9] = (uint8_t)(rh >> 8); + pDst[10] = (uint8_t)gh; + pDst[11] = (uint8_t)(gh >> 8); + pDst[12] = (uint8_t)bh; + pDst[13] = (uint8_t)(bh >> 8); + pDst[14] = (uint8_t)ah; + pDst[15] = (uint8_t)(ah >> 8); + + if (pStats) + pStats->m_header_bits += 128; + } + + // rh-ah are half-floats + void pack_void_extent_hdr(astc_block& blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah, pack_stats *pStats) + { + uint8_t* pDst = (uint8_t*)&blk.m_vals[0]; + memset(pDst, 0xFF, 16); + + pDst[0] = 0b11111100; + + pDst[8] = (uint8_t)rh; + pDst[9] = (uint8_t)(rh >> 8); + pDst[10] = (uint8_t)gh; + pDst[11] = (uint8_t)(gh >> 8); + pDst[12] = (uint8_t)bh; + pDst[13] = (uint8_t)(bh >> 8); + pDst[14] = (uint8_t)ah; + pDst[15] = (uint8_t)(ah >> 8); + + if (pStats) + pStats->m_header_bits += 128; + } + + bool is_cem_ldr(uint32_t mode) + { + switch (mode) + { + case CEM_LDR_LUM_DIRECT: + case CEM_LDR_LUM_BASE_PLUS_OFS: + case CEM_LDR_LUM_ALPHA_DIRECT: + case CEM_LDR_LUM_ALPHA_BASE_PLUS_OFS: + case CEM_LDR_RGB_BASE_SCALE: + case CEM_LDR_RGB_DIRECT: + case CEM_LDR_RGB_BASE_PLUS_OFFSET: + case CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A: + case CEM_LDR_RGBA_DIRECT: + case CEM_LDR_RGBA_BASE_PLUS_OFFSET: + return true; + default: + break; + } + + return false; + } + + bool is_valid_block_size(uint32_t w, uint32_t h) + { + assert((w >= MIN_BLOCK_DIM) && (w <= MAX_BLOCK_DIM)); + assert((h >= MIN_BLOCK_DIM) && (h <= MAX_BLOCK_DIM)); + +#define SIZECHK(x, y) if ((w == (x)) && (h == (y))) return true; + SIZECHK(4, 4); + SIZECHK(5, 4); + + SIZECHK(5, 5); + + SIZECHK(6, 5); + SIZECHK(6, 6); + + SIZECHK(8, 5); + SIZECHK(8, 6); + SIZECHK(10, 5); + SIZECHK(10, 6); + + SIZECHK(8, 8); + SIZECHK(10, 8); + SIZECHK(10, 10); + + SIZECHK(12, 10); + SIZECHK(12, 12); +#undef SIZECHK + + return false; + } + + bool block_has_any_hdr_cems(const log_astc_block& log_blk) + { + assert((log_blk.m_num_partitions >= 1) && (log_blk.m_num_partitions <= MAX_PARTITIONS)); + + for (uint32_t i = 0; i < log_blk.m_num_partitions; i++) + if (is_cem_hdr(log_blk.m_color_endpoint_modes[i])) + return true; + + return false; + } + + bool block_has_any_ldr_cems(const log_astc_block& log_blk) + { + assert((log_blk.m_num_partitions >= 1) && (log_blk.m_num_partitions <= MAX_PARTITIONS)); + + for (uint32_t i = 0; i < log_blk.m_num_partitions; i++) + if (!is_cem_hdr(log_blk.m_color_endpoint_modes[i])) + return true; + + return false; + } + + dequant_tables g_dequant_tables; + + void precompute_texel_partitions_4x4(); + void precompute_texel_partitions_6x6(); + + void init_tables(bool init_rank_tabs) + { + g_dequant_tables.init(init_rank_tabs); + + precompute_texel_partitions_4x4(); + precompute_texel_partitions_6x6(); + } + + void compute_upsample_weights( + int block_width, int block_height, + int weight_grid_width, int weight_grid_height, + weighted_sample* pWeights) // there will be block_width * block_height bilinear samples + { + const uint32_t scaleX = (1024 + block_width / 2) / (block_width - 1); + const uint32_t scaleY = (1024 + block_height / 2) / (block_height - 1); + + for (int texelY = 0; texelY < block_height; texelY++) + { + for (int texelX = 0; texelX < block_width; texelX++) + { + const uint32_t gX = (scaleX * texelX * (weight_grid_width - 1) + 32) >> 6; + const uint32_t gY = (scaleY * texelY * (weight_grid_height - 1) + 32) >> 6; + const uint32_t jX = gX >> 4; + const uint32_t jY = gY >> 4; + const uint32_t fX = gX & 0xf; + const uint32_t fY = gY & 0xf; + const uint32_t w11 = (fX * fY + 8) >> 4; + const uint32_t w10 = fY - w11; + const uint32_t w01 = fX - w11; + const uint32_t w00 = 16 - fX - fY + w11; + + weighted_sample& s = pWeights[texelX + texelY * block_width]; + s.m_src_x = (uint8_t)jX; + s.m_src_y = (uint8_t)jY; + s.m_weights[0][0] = (uint8_t)w00; + s.m_weights[0][1] = (uint8_t)w01; + s.m_weights[1][0] = (uint8_t)w10; + s.m_weights[1][1] = (uint8_t)w11; + } + } + } + + // Should be dequantized [0,64] weights + void upsample_weight_grid( + uint32_t bx, uint32_t by, // destination/to dimension + uint32_t wx, uint32_t wy, // source/from dimension + const uint8_t* pSrc_weights, // these are dequantized [0,64] weights, NOT ISE symbols, [wy][wx] + uint8_t* pDst_weights) // [by][bx] + { + assert((bx >= 2) && (by >= 2) && (bx <= 12) && (by <= 12)); + assert((wx >= 2) && (wy >= 2) && (wx <= bx) && (wy <= by)); + + const uint32_t total_src_weights = wx * wy; + const uint32_t total_dst_weights = bx * by; + + if (total_src_weights == total_dst_weights) + { + memcpy(pDst_weights, pSrc_weights, total_src_weights); + return; + } + + weighted_sample weights[12 * 12]; + compute_upsample_weights(bx, by, wx, wy, weights); + + const weighted_sample* pS = weights; + + for (uint32_t y = 0; y < by; y++) + { + for (uint32_t x = 0; x < bx; x++, ++pS) + { + const uint32_t w00 = pS->m_weights[0][0]; + const uint32_t w01 = pS->m_weights[0][1]; + const uint32_t w10 = pS->m_weights[1][0]; + const uint32_t w11 = pS->m_weights[1][1]; + + assert(w00 || w01 || w10 || w11); + + const uint32_t sx = pS->m_src_x, sy = pS->m_src_y; + + uint32_t total = 8; + if (w00) total += pSrc_weights[bounds_check(sx + sy * wx, 0U, total_src_weights)] * w00; + if (w01) total += pSrc_weights[bounds_check(sx + 1 + sy * wx, 0U, total_src_weights)] * w01; + if (w10) total += pSrc_weights[bounds_check(sx + (sy + 1) * wx, 0U, total_src_weights)] * w10; + if (w11) total += pSrc_weights[bounds_check(sx + 1 + (sy + 1) * wx, 0U, total_src_weights)] * w11; + + pDst_weights[x + y * bx] = (uint8_t)(total >> 4); + } + } + } + + inline uint32_t hash52(uint32_t v) + { + uint32_t p = v; + p ^= p >> 15; p -= p << 17; p += p << 7; p += p << 4; + p ^= p >> 5; p += p << 16; p ^= p >> 7; p ^= p >> 3; + p ^= p << 6; p ^= p >> 17; + return p; + } + + // small_block = num_blk_pixels < 31 + int compute_texel_partition(uint32_t seedIn, uint32_t xIn, uint32_t yIn, uint32_t zIn, int num_partitions, bool small_block) + { + assert(zIn == 0); + + const uint32_t x = small_block ? xIn << 1 : xIn; + const uint32_t y = small_block ? yIn << 1 : yIn; + const uint32_t z = small_block ? zIn << 1 : zIn; + const uint32_t seed = seedIn + 1024 * (num_partitions - 1); + const uint32_t rnum = hash52(seed); + + uint8_t seed1 = (uint8_t)(rnum & 0xf); + uint8_t seed2 = (uint8_t)((rnum >> 4) & 0xf); + uint8_t seed3 = (uint8_t)((rnum >> 8) & 0xf); + uint8_t seed4 = (uint8_t)((rnum >> 12) & 0xf); + uint8_t seed5 = (uint8_t)((rnum >> 16) & 0xf); + uint8_t seed6 = (uint8_t)((rnum >> 20) & 0xf); + uint8_t seed7 = (uint8_t)((rnum >> 24) & 0xf); + uint8_t seed8 = (uint8_t)((rnum >> 28) & 0xf); + uint8_t seed9 = (uint8_t)((rnum >> 18) & 0xf); + uint8_t seed10 = (uint8_t)((rnum >> 22) & 0xf); + uint8_t seed11 = (uint8_t)((rnum >> 26) & 0xf); + uint8_t seed12 = (uint8_t)(((rnum >> 30) | (rnum << 2)) & 0xf); + + seed1 = (uint8_t)(seed1 * seed1); + seed2 = (uint8_t)(seed2 * seed2); + seed3 = (uint8_t)(seed3 * seed3); + seed4 = (uint8_t)(seed4 * seed4); + seed5 = (uint8_t)(seed5 * seed5); + seed6 = (uint8_t)(seed6 * seed6); + seed7 = (uint8_t)(seed7 * seed7); + seed8 = (uint8_t)(seed8 * seed8); + seed9 = (uint8_t)(seed9 * seed9); + seed10 = (uint8_t)(seed10 * seed10); + seed11 = (uint8_t)(seed11 * seed11); + seed12 = (uint8_t)(seed12 * seed12); + + const int shA = (seed & 2) != 0 ? 4 : 5; + const int shB = (num_partitions == 3) ? 6 : 5; + const int sh1 = (seed & 1) != 0 ? shA : shB; + const int sh2 = (seed & 1) != 0 ? shB : shA; + const int sh3 = (seed & 0x10) != 0 ? sh1 : sh2; + + seed1 = (uint8_t)(seed1 >> sh1); + seed2 = (uint8_t)(seed2 >> sh2); + seed3 = (uint8_t)(seed3 >> sh1); + seed4 = (uint8_t)(seed4 >> sh2); + seed5 = (uint8_t)(seed5 >> sh1); + seed6 = (uint8_t)(seed6 >> sh2); + seed7 = (uint8_t)(seed7 >> sh1); + seed8 = (uint8_t)(seed8 >> sh2); + seed9 = (uint8_t)(seed9 >> sh3); + seed10 = (uint8_t)(seed10 >> sh3); + seed11 = (uint8_t)(seed11 >> sh3); + seed12 = (uint8_t)(seed12 >> sh3); + + const int a = 0x3f & (seed1 * x + seed2 * y + seed11 * z + (rnum >> 14)); + const int b = 0x3f & (seed3 * x + seed4 * y + seed12 * z + (rnum >> 10)); + const int c = (num_partitions >= 3) ? 0x3f & (seed5 * x + seed6 * y + seed9 * z + (rnum >> 6)) : 0; + const int d = (num_partitions >= 4) ? 0x3f & (seed7 * x + seed8 * y + seed10 * z + (rnum >> 2)) : 0; + + return (a >= b && a >= c && a >= d) ? 0 + : (b >= c && b >= d) ? 1 + : (c >= d) ? 2 + : 3; + } + + // 4x4, 2 and 3 subsets + static uint32_t g_texel_partitions_4x4[1024][2]; + + // 6x6, 2 and 3 subsets (2 subsets low 4 bits, 3 subsets high 4 bits) + static uint8_t g_texel_partitions_6x6[1024][6 * 6]; + + void precompute_texel_partitions_4x4() + { + for (uint32_t p = 0; p < 1024; p++) + { + uint32_t v2 = 0, v3 = 0; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t shift = x * 2 + y * 8; + v2 |= (compute_texel_partition(p, x, y, 0, 2, true) << shift); + v3 |= (compute_texel_partition(p, x, y, 0, 3, true) << shift); + } + } + + g_texel_partitions_4x4[p][0] = v2; + g_texel_partitions_4x4[p][1] = v3; + } + } + + void precompute_texel_partitions_6x6() + { + for (uint32_t p = 0; p < 1024; p++) + { + for (uint32_t y = 0; y < 6; y++) + { + for (uint32_t x = 0; x < 6; x++) + { + const uint32_t p2 = compute_texel_partition(p, x, y, 0, 2, false); + const uint32_t p3 = compute_texel_partition(p, x, y, 0, 3, false); + + assert((p2 <= 1) && (p3 <= 2)); + g_texel_partitions_6x6[p][x + y * 6] = (uint8_t)((p3 << 4) | p2); + } + } + } + } + + static inline int get_precompute_texel_partitions_4x4(uint32_t seed, uint32_t x, uint32_t y, uint32_t num_partitions) + { + assert(g_texel_partitions_4x4[1][0]); + assert(seed < 1024); + assert((x <= 3) && (y <= 3)); + assert((num_partitions >= 2) && (num_partitions <= 3)); + + const uint32_t shift = x * 2 + y * 8; + return (g_texel_partitions_4x4[seed][num_partitions - 2] >> shift) & 3; + } + + static inline int get_precompute_texel_partitions_6x6(uint32_t seed, uint32_t x, uint32_t y, uint32_t num_partitions) + { + assert(g_texel_partitions_6x6[0][0]); + assert(seed < 1024); + assert((x <= 5) && (y <= 5)); + assert((num_partitions >= 2) && (num_partitions <= 3)); + + const uint32_t shift = (num_partitions == 3) ? 4 : 0; + return (g_texel_partitions_6x6[seed][x + y * 6] >> shift) & 3; + } + + void blue_contract( + int r, int g, int b, int a, + int &dr, int &dg, int &db, int &da) + { + dr = (r + b) >> 1; + dg = (g + b) >> 1; + db = b; + da = a; + } + + inline void bit_transfer_signed(int& a, int& b) + { + b >>= 1; + b |= (a & 0x80); + a >>= 1; + a &= 0x3F; + if ((a & 0x20) != 0) + a -= 0x40; + } + + static inline int clamp(int a, int l, int h) + { + if (a < l) + a = l; + else if (a > h) + a = h; + return a; + } + + static inline float clampf(float a, float l, float h) + { + if (a < l) + a = l; + else if (a > h) + a = h; + return a; + } + + inline int sign_extend(int src, int num_src_bits) + { + assert((num_src_bits >= 2) && (num_src_bits <= 31)); + + const bool negative = (src & (1 << (num_src_bits - 1))) != 0; + if (negative) + return src | ~((1 << num_src_bits) - 1); + else + return src & ((1 << num_src_bits) - 1); + } + + // endpoints is [4][2] + void decode_endpoint(uint32_t cem_index, int (*pEndpoints)[2], const uint8_t *pE) + { + assert(cem_index <= CEM_HDR_RGB_HDR_ALPHA); + + int v0 = pE[0], v1 = pE[1]; + + int& e0_r = pEndpoints[0][0], &e0_g = pEndpoints[1][0], &e0_b = pEndpoints[2][0], &e0_a = pEndpoints[3][0]; + int& e1_r = pEndpoints[0][1], &e1_g = pEndpoints[1][1], &e1_b = pEndpoints[2][1], &e1_a = pEndpoints[3][1]; + + switch (cem_index) + { + case CEM_LDR_LUM_DIRECT: + { + e0_r = v0; e1_r = v1; + e0_g = v0; e1_g = v1; + e0_b = v0; e1_b = v1; + e0_a = 0xFF; e1_a = 0xFF; + break; + } + case CEM_LDR_LUM_BASE_PLUS_OFS: + { + int l0 = (v0 >> 2) | (v1 & 0xc0); + int l1 = l0 + (v1 & 0x3f); + + if (l1 > 0xFF) + l1 = 0xFF; + + e0_r = l0; e1_r = l1; + e0_g = l0; e1_g = l1; + e0_b = l0; e1_b = l1; + e0_a = 0xFF; e1_a = 0xFF; + break; + } + case CEM_LDR_LUM_ALPHA_DIRECT: + { + int v2 = pE[2], v3 = pE[3]; + + e0_r = v0; e1_r = v1; + e0_g = v0; e1_g = v1; + e0_b = v0; e1_b = v1; + e0_a = v2; e1_a = v3; + break; + } + case CEM_LDR_LUM_ALPHA_BASE_PLUS_OFS: + { + int v2 = pE[2], v3 = pE[3]; + + bit_transfer_signed(v1, v0); + bit_transfer_signed(v3, v2); + + e0_r = v0; e1_r = v0 + v1; + e0_g = v0; e1_g = v0 + v1; + e0_b = v0; e1_b = v0 + v1; + e0_a = v2; e1_a = v2 + v3; + + for (uint32_t c = 0; c < 4; c++) + { + pEndpoints[c][0] = clamp(pEndpoints[c][0], 0, 255); + pEndpoints[c][1] = clamp(pEndpoints[c][1], 0, 255); + } + + break; + } + case CEM_LDR_RGB_BASE_SCALE: + { + int v2 = pE[2], v3 = pE[3]; + + e0_r = (v0 * v3) >> 8; e1_r = v0; + e0_g = (v1 * v3) >> 8; e1_g = v1; + e0_b = (v2 * v3) >> 8; e1_b = v2; + e0_a = 0xFF; e1_a = 0xFF; + + break; + } + case CEM_LDR_RGB_DIRECT: + { + int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5]; + + if ((v1 + v3 + v5) >= (v0 + v2 + v4)) + { + e0_r = v0; e1_r = v1; + e0_g = v2; e1_g = v3; + e0_b = v4; e1_b = v5; + e0_a = 0xFF; e1_a = 0xFF; + } + else + { + blue_contract(v1, v3, v5, 0xFF, e0_r, e0_g, e0_b, e0_a); + blue_contract(v0, v2, v4, 0xFF, e1_r, e1_g, e1_b, e1_a); + } + + break; + } + case CEM_LDR_RGB_BASE_PLUS_OFFSET: + { + int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5]; + + bit_transfer_signed(v1, v0); + bit_transfer_signed(v3, v2); + bit_transfer_signed(v5, v4); + + if ((v1 + v3 + v5) >= 0) + { + e0_r = v0; e1_r = v0 + v1; + e0_g = v2; e1_g = v2 + v3; + e0_b = v4; e1_b = v4 + v5; + e0_a = 0xFF; e1_a = 0xFF; + } + else + { + blue_contract(v0 + v1, v2 + v3, v4 + v5, 0xFF, e0_r, e0_g, e0_b, e0_a); + blue_contract(v0, v2, v4, 0xFF, e1_r, e1_g, e1_b, e1_a); + } + + for (uint32_t c = 0; c < 4; c++) + { + pEndpoints[c][0] = clamp(pEndpoints[c][0], 0, 255); + pEndpoints[c][1] = clamp(pEndpoints[c][1], 0, 255); + } + + break; + } + case CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A: + { + int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5]; + + e0_r = (v0 * v3) >> 8; e1_r = v0; + e0_g = (v1 * v3) >> 8; e1_g = v1; + e0_b = (v2 * v3) >> 8; e1_b = v2; + e0_a = v4; e1_a = v5; + + break; + } + case CEM_LDR_RGBA_DIRECT: + { + int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5], v6 = pE[6], v7 = pE[7]; + + if ((v1 + v3 + v5) >= (v0 + v2 + v4)) + { + e0_r = v0; e1_r = v1; + e0_g = v2; e1_g = v3; + e0_b = v4; e1_b = v5; + e0_a = v6; e1_a = v7; + } + else + { + blue_contract(v1, v3, v5, v7, e0_r, e0_g, e0_b, e0_a); + blue_contract(v0, v2, v4, v6, e1_r, e1_g, e1_b, e1_a); + } + + break; + } + case CEM_LDR_RGBA_BASE_PLUS_OFFSET: + { + int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5], v6 = pE[6], v7 = pE[7]; + + bit_transfer_signed(v1, v0); + bit_transfer_signed(v3, v2); + bit_transfer_signed(v5, v4); + bit_transfer_signed(v7, v6); + + if ((v1 + v3 + v5) >= 0) + { + e0_r = v0; e1_r = v0 + v1; + e0_g = v2; e1_g = v2 + v3; + e0_b = v4; e1_b = v4 + v5; + e0_a = v6; e1_a = v6 + v7; + } + else + { + blue_contract(v0 + v1, v2 + v3, v4 + v5, v6 + v7, e0_r, e0_g, e0_b, e0_a); + blue_contract(v0, v2, v4, v6, e1_r, e1_g, e1_b, e1_a); + } + + for (uint32_t c = 0; c < 4; c++) + { + pEndpoints[c][0] = clamp(pEndpoints[c][0], 0, 255); + pEndpoints[c][1] = clamp(pEndpoints[c][1], 0, 255); + } + + break; + } + case CEM_HDR_LUM_LARGE_RANGE: + { + int y0, y1; + if (v1 >= v0) + { + y0 = (v0 << 4); + y1 = (v1 << 4); + } + else + { + y0 = (v1 << 4) + 8; + y1 = (v0 << 4) - 8; + } + + e0_r = y0; e1_r = y1; + e0_g = y0; e1_g = y1; + e0_b = y0; e1_b = y1; + e0_a = 0x780; e1_a = 0x780; + + break; + } + case CEM_HDR_LUM_SMALL_RANGE: + { + int y0, y1, d; + + if ((v0 & 0x80) != 0) + { + y0 = ((v1 & 0xE0) << 4) | ((v0 & 0x7F) << 2); + d = (v1 & 0x1F) << 2; + } + else + { + y0 = ((v1 & 0xF0) << 4) | ((v0 & 0x7F) << 1); + d = (v1 & 0x0F) << 1; + } + + y1 = y0 + d; + if (y1 > 0xFFF) + y1 = 0xFFF; + + e0_r = y0; e1_r = y1; + e0_g = y0; e1_g = y1; + e0_b = y0; e1_b = y1; + e0_a = 0x780; e1_a = 0x780; + + break; + } + case CEM_HDR_RGB_BASE_SCALE: + { + int v2 = pE[2], v3 = pE[3]; + + int modeval = ((v0 & 0xC0) >> 6) | ((v1 & 0x80) >> 5) | ((v2 & 0x80) >> 4); + + int majcomp, mode; + if ((modeval & 0xC) != 0xC) + { + majcomp = modeval >> 2; + mode = modeval & 3; + } + else if (modeval != 0xF) + { + majcomp = modeval & 3; + mode = 4; + } + else + { + majcomp = 0; + mode = 5; + } + + int red = v0 & 0x3f; + int green = v1 & 0x1f; + int blue = v2 & 0x1f; + int scale = v3 & 0x1f; + + int x0 = (v1 >> 6) & 1; + int x1 = (v1 >> 5) & 1; + int x2 = (v2 >> 6) & 1; + int x3 = (v2 >> 5) & 1; + int x4 = (v3 >> 7) & 1; + int x5 = (v3 >> 6) & 1; + int x6 = (v3 >> 5) & 1; + + int ohm = 1 << mode; + if (ohm & 0x30) green |= x0 << 6; + if (ohm & 0x3A) green |= x1 << 5; + if (ohm & 0x30) blue |= x2 << 6; + if (ohm & 0x3A) blue |= x3 << 5; + if (ohm & 0x3D) scale |= x6 << 5; + if (ohm & 0x2D) scale |= x5 << 6; + if (ohm & 0x04) scale |= x4 << 7; + if (ohm & 0x3B) red |= x4 << 6; + if (ohm & 0x04) red |= x3 << 6; + if (ohm & 0x10) red |= x5 << 7; + if (ohm & 0x0F) red |= x2 << 7; + if (ohm & 0x05) red |= x1 << 8; + if (ohm & 0x0A) red |= x0 << 8; + if (ohm & 0x05) red |= x0 << 9; + if (ohm & 0x02) red |= x6 << 9; + if (ohm & 0x01) red |= x3 << 10; + if (ohm & 0x02) red |= x5 << 10; + + static const int s_shamts[6] = { 1,1,2,3,4,5 }; + + const int shamt = s_shamts[mode]; + red <<= shamt; + green <<= shamt; + blue <<= shamt; + scale <<= shamt; + + if (mode != 5) + { + green = red - green; + blue = red - blue; + } + + if (majcomp == 1) + std::swap(red, green); + + if (majcomp == 2) + std::swap(red, blue); + + e1_r = clamp(red, 0, 0xFFF); + e1_g = clamp(green, 0, 0xFFF); + e1_b = clamp(blue, 0, 0xFFF); + e1_a = 0x780; + + e0_r = clamp(red - scale, 0, 0xFFF); + e0_g = clamp(green - scale, 0, 0xFFF); + e0_b = clamp(blue - scale, 0, 0xFFF); + e0_a = 0x780; + + break; + } + case CEM_HDR_RGB_HDR_ALPHA: + case CEM_HDR_RGB_LDR_ALPHA: + case CEM_HDR_RGB: + { + int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5]; + + int majcomp = ((v4 & 0x80) >> 7) | ((v5 & 0x80) >> 6); + + e0_a = 0x780; + e1_a = 0x780; + + if (majcomp == 3) + { + e0_r = v0 << 4; + e0_g = v2 << 4; + e0_b = (v4 & 0x7f) << 5; + + e1_r = v1 << 4; + e1_g = v3 << 4; + e1_b = (v5 & 0x7f) << 5; + } + else + { + int mode = ((v1 & 0x80) >> 7) | ((v2 & 0x80) >> 6) | ((v3 & 0x80) >> 5); + int va = v0 | ((v1 & 0x40) << 2); + int vb0 = v2 & 0x3f; + int vb1 = v3 & 0x3f; + int vc = v1 & 0x3f; + int vd0 = v4 & 0x7f; + int vd1 = v5 & 0x7f; + + static const int s_dbitstab[8] = { 7,6,7,6,5,6,5,6 }; + vd0 = sign_extend(vd0, s_dbitstab[mode]); + vd1 = sign_extend(vd1, s_dbitstab[mode]); + + int x0 = (v2 >> 6) & 1; + int x1 = (v3 >> 6) & 1; + int x2 = (v4 >> 6) & 1; + int x3 = (v5 >> 6) & 1; + int x4 = (v4 >> 5) & 1; + int x5 = (v5 >> 5) & 1; + + int ohm = 1 << mode; + if (ohm & 0xA4) va |= x0 << 9; + if (ohm & 0x08) va |= x2 << 9; + if (ohm & 0x50) va |= x4 << 9; + if (ohm & 0x50) va |= x5 << 10; + if (ohm & 0xA0) va |= x1 << 10; + if (ohm & 0xC0) va |= x2 << 11; + if (ohm & 0x04) vc |= x1 << 6; + if (ohm & 0xE8) vc |= x3 << 6; + if (ohm & 0x20) vc |= x2 << 7; + if (ohm & 0x5B) vb0 |= x0 << 6; + if (ohm & 0x5B) vb1 |= x1 << 6; + if (ohm & 0x12) vb0 |= x2 << 7; + if (ohm & 0x12) vb1 |= x3 << 7; + + int shamt = (mode >> 1) ^ 3; + va = (uint32_t)va << shamt; + vb0 = (uint32_t)vb0 << shamt; + vb1 = (uint32_t)vb1 << shamt; + vc = (uint32_t)vc << shamt; + vd0 = (uint32_t)vd0 << shamt; + vd1 = (uint32_t)vd1 << shamt; + + e1_r = clamp(va, 0, 0xFFF); + e1_g = clamp(va - vb0, 0, 0xFFF); + e1_b = clamp(va - vb1, 0, 0xFFF); + + e0_r = clamp(va - vc, 0, 0xFFF); + e0_g = clamp(va - vb0 - vc - vd0, 0, 0xFFF); + e0_b = clamp(va - vb1 - vc - vd1, 0, 0xFFF); + + if (majcomp == 1) + { + std::swap(e0_r, e0_g); + std::swap(e1_r, e1_g); + } + else if (majcomp == 2) + { + std::swap(e0_r, e0_b); + std::swap(e1_r, e1_b); + } + } + + if (cem_index == CEM_HDR_RGB_LDR_ALPHA) + { + int v6 = pE[6], v7 = pE[7]; + + e0_a = v6; + e1_a = v7; + } + else if (cem_index == CEM_HDR_RGB_HDR_ALPHA) + { + int v6 = pE[6], v7 = pE[7]; + + // Extract mode bits + int mode = ((v6 >> 7) & 1) | ((v7 >> 6) & 2); + v6 &= 0x7F; + v7 &= 0x7F; + + if (mode == 3) + { + e0_a = v6 << 5; + e1_a = v7 << 5; + } + else + { + v6 |= (v7 << (mode + 1)) & 0x780; + v7 &= (0x3F >> mode); + v7 ^= (0x20 >> mode); + v7 -= (0x20 >> mode); + v6 <<= (4 - mode); + v7 <<= (4 - mode); + + v7 += v6; + v7 = clamp(v7, 0, 0xFFF); + e0_a = v6; + e1_a = v7; + } + } + + break; + } + default: + { + assert(0); + for (uint32_t c = 0; c < 4; c++) + { + pEndpoints[c][0] = 0; + pEndpoints[c][1] = 0; + } + break; + } + } + } + + static inline bool is_half_inf_or_nan(half_float v) + { + return get_bits(v, 10, 14) == 31; + } + + // This float->half conversion matches how "F32TO16" works on Intel GPU's. + half_float float_to_half(float val, bool toward_zero) + { + union { float f; int32_t i; uint32_t u; } fi = { val }; + const int flt_m = fi.i & 0x7FFFFF, flt_e = (fi.i >> 23) & 0xFF, flt_s = (fi.i >> 31) & 0x1; + int s = flt_s, e = 0, m = 0; + + // inf/NaN + if (flt_e == 0xff) + { + e = 31; + if (flt_m != 0) // NaN + m = 1; + } + // not zero or denormal + else if (flt_e != 0) + { + int new_exp = flt_e - 127; + if (new_exp > 15) + e = 31; + else if (new_exp < -14) + { + if (toward_zero) + m = (int)truncf((1 << 24) * fabsf(fi.f)); + else + m = lrintf((1 << 24) * fabsf(fi.f)); + } + else + { + e = new_exp + 15; + if (toward_zero) + m = (int)truncf((float)flt_m * (1.0f / (float)(1 << 13))); + else + m = lrintf((float)flt_m * (1.0f / (float)(1 << 13))); + } + } + + assert((0 <= m) && (m <= 1024)); + if (m == 1024) + { + e++; + m = 0; + } + + assert((s >= 0) && (s <= 1)); + assert((e >= 0) && (e <= 31)); + assert((m >= 0) && (m <= 1023)); + + half_float result = (half_float)((s << 15) | (e << 10) | m); + return result; + } + + float half_to_float(half_float hval) + { + union { float f; uint32_t u; } x = { 0 }; + + uint32_t s = ((uint32_t)hval >> 15) & 1; + uint32_t e = ((uint32_t)hval >> 10) & 0x1F; + uint32_t m = (uint32_t)hval & 0x3FF; + + if (!e) + { + if (!m) + { + // +- 0 + x.u = s << 31; + return x.f; + } + else + { + // denormalized + while (!(m & 0x00000400)) + { + m <<= 1; + --e; + } + + ++e; + m &= ~0x00000400; + } + } + else if (e == 31) + { + if (m == 0) + { + // +/- INF + x.u = (s << 31) | 0x7f800000; + return x.f; + } + else + { + // +/- NaN + x.u = (s << 31) | 0x7f800000 | (m << 13); + return x.f; + } + } + + e = e + (127 - 15); + m = m << 13; + + assert(s <= 1); + assert(m <= 0x7FFFFF); + assert(e <= 255); + + x.u = m | (e << 23) | (s << 31); + return x.f; + } + + // See https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_shared_exponent.txt + const int RGB9E5_EXPONENT_BITS = 5, RGB9E5_MANTISSA_BITS = 9, RGB9E5_EXP_BIAS = 15, RGB9E5_MAX_VALID_BIASED_EXP = 31; + const int MAX_RGB9E5_EXP = (RGB9E5_MAX_VALID_BIASED_EXP - RGB9E5_EXP_BIAS); + const int RGB9E5_MANTISSA_VALUES = (1 << RGB9E5_MANTISSA_BITS); + const int MAX_RGB9E5_MANTISSA = (RGB9E5_MANTISSA_VALUES - 1); + //const int MAX_RGB9E5 = (int)(((float)MAX_RGB9E5_MANTISSA) / RGB9E5_MANTISSA_VALUES * (1 << MAX_RGB9E5_EXP)); + const int EPSILON_RGB9E5 = (int)((1.0f / (float)RGB9E5_MANTISSA_VALUES) / (float)(1 << RGB9E5_EXP_BIAS)); + + void unpack_rgb9e5(uint32_t packed, float& r, float& g, float& b) + { + int x = packed & 511; + int y = (packed >> 9) & 511; + int z = (packed >> 18) & 511; + int w = (packed >> 27) & 31; + + const float scale = powf(2.0f, static_cast(w - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS)); + + r = x * scale; + g = y * scale; + b = z * scale; + } + + // floor_log2 is not correct for the denorm and zero values, but we are going to do a max of this value with the minimum rgb9e5 exponent that will hide these problem cases. + static inline int floor_log2(float x) + { + union float754 + { + unsigned int raw; + float value; + }; + + float754 f; + f.value = x; + // Extract float exponent + return ((f.raw >> 23) & 0xFF) - 127; + } + + static inline int maximumi(int a, int b) { return (a > b) ? a : b; } + static inline float maximumf(float a, float b) { return (a > b) ? a : b; } + + uint32_t pack_rgb9e5(float r, float g, float b) + { + r = clampf(r, 0.0f, MAX_RGB9E5); + g = clampf(g, 0.0f, MAX_RGB9E5); + b = clampf(b, 0.0f, MAX_RGB9E5); + + float maxrgb = maximumf(maximumf(r, g), b); + int exp_shared = maximumi(-RGB9E5_EXP_BIAS - 1, floor_log2(maxrgb)) + 1 + RGB9E5_EXP_BIAS; + assert((exp_shared >= 0) && (exp_shared <= RGB9E5_MAX_VALID_BIASED_EXP)); + + float denom = powf(2.0f, (float)(exp_shared - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS)); + + int maxm = (int)floorf((maxrgb / denom) + 0.5f); + if (maxm == (MAX_RGB9E5_MANTISSA + 1)) + { + denom *= 2; + exp_shared += 1; + assert(exp_shared <= RGB9E5_MAX_VALID_BIASED_EXP); + } + else + { + assert(maxm <= MAX_RGB9E5_MANTISSA); + } + + int rm = (int)floorf((r / denom) + 0.5f); + int gm = (int)floorf((g / denom) + 0.5f); + int bm = (int)floorf((b / denom) + 0.5f); + + assert((rm >= 0) && (rm <= MAX_RGB9E5_MANTISSA)); + assert((gm >= 0) && (gm <= MAX_RGB9E5_MANTISSA)); + assert((bm >= 0) && (bm <= MAX_RGB9E5_MANTISSA)); + + return rm | (gm << 9) | (bm << 18) | (exp_shared << 27); + } + + static inline int clz17(uint32_t x) + { + assert(x <= 0x1FFFF); + x &= 0x1FFFF; + + if (!x) + return 17; + + uint32_t n = 0; + while ((x & 0x10000) == 0) + { + x <<= 1u; + n++; + } + + return n; + } + + static inline uint32_t pack_rgb9e5_ldr_astc(int Cr, int Cg, int Cb) + { + int lz = clz17(Cr | Cg | Cb | 1); + if (Cr == 65535) { Cr = 65536; lz = 0; } + if (Cg == 65535) { Cg = 65536; lz = 0; } + if (Cb == 65535) { Cb = 65536; lz = 0; } + Cr <<= lz; Cg <<= lz; Cb <<= lz; + Cr = (Cr >> 8) & 0x1FF; + Cg = (Cg >> 8) & 0x1FF; + Cb = (Cb >> 8) & 0x1FF; + uint32_t exponent = 16 - lz; + uint32_t texel = (exponent << 27) | (Cb << 18) | (Cg << 9) | Cr; + return texel; + } + + static inline uint32_t pack_rgb9e5_hdr_astc(int Cr, int Cg, int Cb) + { + if (Cr > 0x7c00) Cr = 0; else if (Cr == 0x7c00) Cr = 0x7bff; + if (Cg > 0x7c00) Cg = 0; else if (Cg == 0x7c00) Cg = 0x7bff; + if (Cb > 0x7c00) Cb = 0; else if (Cb == 0x7c00) Cb = 0x7bff; + int Re = (Cr >> 10) & 0x1F; + int Ge = (Cg >> 10) & 0x1F; + int Be = (Cb >> 10) & 0x1F; + int Rex = (Re == 0) ? 1 : Re; + int Gex = (Ge == 0) ? 1 : Ge; + int Bex = (Be == 0) ? 1 : Be; + int Xm = ((Cr | Cg | Cb) & 0x200) >> 9; + int Xe = Re | Ge | Be; + uint32_t rshift, gshift, bshift, expo; + + if (Xe == 0) + { + expo = rshift = gshift = bshift = Xm; + } + else if (Re >= Ge && Re >= Be) + { + expo = Rex + 1; + rshift = 2; + gshift = Rex - Gex + 2; + bshift = Rex - Bex + 2; + } + else if (Ge >= Be) + { + expo = Gex + 1; + rshift = Gex - Rex + 2; + gshift = 2; + bshift = Gex - Bex + 2; + } + else + { + expo = Bex + 1; + rshift = Bex - Rex + 2; + gshift = Bex - Gex + 2; + bshift = 2; + } + + int Rm = (Cr & 0x3FF) | (Re == 0 ? 0 : 0x400); + int Gm = (Cg & 0x3FF) | (Ge == 0 ? 0 : 0x400); + int Bm = (Cb & 0x3FF) | (Be == 0 ? 0 : 0x400); + Rm = (Rm >> rshift) & 0x1FF; + Gm = (Gm >> gshift) & 0x1FF; + Bm = (Bm >> bshift) & 0x1FF; + + uint32_t texel = (expo << 27) | (Bm << 18) | (Gm << 9) | (Rm << 0); + return texel; + } + + // Important: pPixels is either 32-bit/texel or 64-bit/texel. + bool decode_block(const log_astc_block& log_blk, void* pPixels, uint32_t blk_width, uint32_t blk_height, decode_mode dec_mode) + { + assert(is_valid_block_size(blk_width, blk_height)); + + assert(g_dequant_tables.m_endpoints[0].m_ISE_to_val.size()); + if (!g_dequant_tables.m_endpoints[0].m_ISE_to_val.size()) + return false; + + const uint32_t num_blk_pixels = blk_width * blk_height; + + // Write block error color + if (dec_mode == cDecodeModeHDR16) + { + // NaN's + memset(pPixels, 0xFF, num_blk_pixels * sizeof(half_float) * 4); + } + else if (dec_mode == cDecodeModeRGB9E5) + { + const uint32_t purple_9e5 = pack_rgb9e5(1.0f, 0.0f, 1.0f); + + for (uint32_t i = 0; i < num_blk_pixels; i++) + ((uint32_t*)pPixels)[i] = purple_9e5; + } + else + { + for (uint32_t i = 0; i < num_blk_pixels; i++) + ((uint32_t*)pPixels)[i] = 0xFFFF00FF; + } + + if (log_blk.m_error_flag) + { + // Should this return false? It's not an invalid logical block config, though. + return false; + } + + // Handle solid color blocks + if (log_blk.m_solid_color_flag_ldr) + { + // LDR solid block + if (dec_mode == cDecodeModeHDR16) + { + // Convert LDR pixels to half-float + half_float h[4]; + for (uint32_t c = 0; c < 4; c++) + h[c] = (log_blk.m_solid_color[c] == 0xFFFF) ? 0x3C00 : float_to_half((float)log_blk.m_solid_color[c] * (1.0f / 65536.0f), true); + + for (uint32_t i = 0; i < num_blk_pixels; i++) + memcpy((uint16_t*)pPixels + i * 4, h, sizeof(half_float) * 4); + } + else if (dec_mode == cDecodeModeRGB9E5) + { + float r = (log_blk.m_solid_color[0] == 0xFFFF) ? 1.0f : ((float)log_blk.m_solid_color[0] * (1.0f / 65536.0f)); + float g = (log_blk.m_solid_color[1] == 0xFFFF) ? 1.0f : ((float)log_blk.m_solid_color[1] * (1.0f / 65536.0f)); + float b = (log_blk.m_solid_color[2] == 0xFFFF) ? 1.0f : ((float)log_blk.m_solid_color[2] * (1.0f / 65536.0f)); + + const uint32_t packed = pack_rgb9e5(r, g, b); + + for (uint32_t i = 0; i < num_blk_pixels; i++) + ((uint32_t*)pPixels)[i] = packed; + } + else + { + // Convert LDR pixels to 8-bits + for (uint32_t i = 0; i < num_blk_pixels; i++) + for (uint32_t c = 0; c < 4; c++) + ((uint8_t*)pPixels)[i * 4 + c] = (log_blk.m_solid_color[c] >> 8); + } + + return true; + } + else if (log_blk.m_solid_color_flag_hdr) + { + // HDR solid block, decode mode must be half-float or RGB9E5 + if (dec_mode == cDecodeModeHDR16) + { + for (uint32_t i = 0; i < num_blk_pixels; i++) + memcpy((uint16_t*)pPixels + i * 4, log_blk.m_solid_color, sizeof(half_float) * 4); + } + else if (dec_mode == cDecodeModeRGB9E5) + { + float r = half_to_float(log_blk.m_solid_color[0]); + float g = half_to_float(log_blk.m_solid_color[1]); + float b = half_to_float(log_blk.m_solid_color[2]); + + const uint32_t packed = pack_rgb9e5(r, g, b); + + for (uint32_t i = 0; i < num_blk_pixels; i++) + ((uint32_t*)pPixels)[i] = packed; + } + else + { + return false; + } + + return true; + } + + // Sanity check block's config + if ((log_blk.m_grid_width < 2) || (log_blk.m_grid_height < 2)) + return false; + if ((log_blk.m_grid_width > blk_width) || (log_blk.m_grid_height > blk_height)) + return false; + + if ((log_blk.m_endpoint_ise_range < FIRST_VALID_ENDPOINT_ISE_RANGE) || (log_blk.m_endpoint_ise_range > LAST_VALID_ENDPOINT_ISE_RANGE)) + return false; + if ((log_blk.m_weight_ise_range < FIRST_VALID_WEIGHT_ISE_RANGE) || (log_blk.m_weight_ise_range > LAST_VALID_WEIGHT_ISE_RANGE)) + return false; + if ((log_blk.m_num_partitions < 1) || (log_blk.m_num_partitions > MAX_PARTITIONS)) + return false; + if ((log_blk.m_dual_plane) && (log_blk.m_num_partitions > MAX_DUAL_PLANE_PARTITIONS)) + return false; + if (log_blk.m_partition_id >= NUM_PARTITION_PATTERNS) + return false; + if ((log_blk.m_num_partitions == 1) && (log_blk.m_partition_id > 0)) + return false; + if (log_blk.m_color_component_selector > 3) + return false; + + const uint32_t total_endpoint_levels = get_ise_levels(log_blk.m_endpoint_ise_range); + const uint32_t total_weight_levels = get_ise_levels(log_blk.m_weight_ise_range); + + bool is_ldr_endpoints[MAX_PARTITIONS]; + + // Check CEM's + uint32_t total_cem_vals = 0; + for (uint32_t i = 0; i < log_blk.m_num_partitions; i++) + { + if (log_blk.m_color_endpoint_modes[i] > 15) + return false; + + total_cem_vals += get_num_cem_values(log_blk.m_color_endpoint_modes[i]); + + is_ldr_endpoints[i] = is_cem_ldr(log_blk.m_color_endpoint_modes[i]); + } + + if (total_cem_vals > MAX_ENDPOINTS) + return false; + + const dequant_table& endpoint_dequant_tab = g_dequant_tables.get_endpoint_tab(log_blk.m_endpoint_ise_range); + const uint8_t* pEndpoint_dequant = endpoint_dequant_tab.m_ISE_to_val.data(); + + // Dequantized endpoints to [0,255] + uint8_t dequantized_endpoints[MAX_ENDPOINTS]; + for (uint32_t i = 0; i < total_cem_vals; i++) + { + if (log_blk.m_endpoints[i] >= total_endpoint_levels) + return false; + dequantized_endpoints[i] = pEndpoint_dequant[log_blk.m_endpoints[i]]; + } + + // Dequantize weights to [0,64] + uint8_t dequantized_weights[2][12 * 12]; + + const dequant_table& weight_dequant_tab = g_dequant_tables.get_weight_tab(log_blk.m_weight_ise_range); + const uint8_t* pWeight_dequant = weight_dequant_tab.m_ISE_to_val.data(); + + const uint32_t total_weight_vals = (log_blk.m_dual_plane ? 2 : 1) * log_blk.m_grid_width * log_blk.m_grid_height; + for (uint32_t i = 0; i < total_weight_vals; i++) + { + if (log_blk.m_weights[i] >= total_weight_levels) + return false; + + const uint32_t plane_index = log_blk.m_dual_plane ? (i & 1) : 0; + const uint32_t grid_index = log_blk.m_dual_plane ? (i >> 1) : i; + + dequantized_weights[plane_index][grid_index] = pWeight_dequant[log_blk.m_weights[i]]; + } + + // Upsample weight grid. [0,64] weights + uint8_t upsampled_weights[2][12 * 12]; + + upsample_weight_grid(blk_width, blk_height, log_blk.m_grid_width, log_blk.m_grid_height, &dequantized_weights[0][0], &upsampled_weights[0][0]); + if (log_blk.m_dual_plane) + upsample_weight_grid(blk_width, blk_height, log_blk.m_grid_width, log_blk.m_grid_height, &dequantized_weights[1][0], &upsampled_weights[1][0]); + + // Decode CEM's + int endpoints[4][4][2]; // [subset][comp][l/h] + + uint32_t endpoint_val_index = 0; + for (uint32_t subset = 0; subset < log_blk.m_num_partitions; subset++) + { + const uint32_t cem_index = log_blk.m_color_endpoint_modes[subset]; + + decode_endpoint(cem_index, &endpoints[subset][0], &dequantized_endpoints[endpoint_val_index]); + + endpoint_val_index += get_num_cem_values(cem_index); + } + + // Decode texels + const bool small_block = num_blk_pixels < 31; + const bool use_precomputed_texel_partitions_4x4 = (blk_width == 4) && (blk_height == 4) && (log_blk.m_num_partitions >= 2) && (log_blk.m_num_partitions <= 3); + const bool use_precomputed_texel_partitions_6x6 = (blk_width == 6) && (blk_height == 6) && (log_blk.m_num_partitions >= 2) && (log_blk.m_num_partitions <= 3); + const uint32_t ccs = log_blk.m_dual_plane ? log_blk.m_color_component_selector : UINT32_MAX; + + bool success = true; + + if (dec_mode == cDecodeModeRGB9E5) + { + // returns uint32_t's + for (uint32_t y = 0; y < blk_height; y++) + { + for (uint32_t x = 0; x < blk_width; x++) + { + const uint32_t pixel_index = x + y * blk_width; + + uint32_t subset = 0; + if (log_blk.m_num_partitions > 1) + { + if (use_precomputed_texel_partitions_4x4) + subset = get_precompute_texel_partitions_4x4(log_blk.m_partition_id, x, y, log_blk.m_num_partitions); + else if (use_precomputed_texel_partitions_6x6) + subset = get_precompute_texel_partitions_6x6(log_blk.m_partition_id, x, y, log_blk.m_num_partitions); + else + subset = compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block); + } + + int comp[3]; + + for (uint32_t c = 0; c < 3; c++) + { + const uint32_t w = upsampled_weights[(c == ccs) ? 1 : 0][pixel_index]; + + if (is_ldr_endpoints[subset]) + { + assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFF)); + assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFF)); + + int le = endpoints[subset][c][0]; + int he = endpoints[subset][c][1]; + + le = (le << 8) | le; + he = (he << 8) | he; + + int k = weight_interpolate(le, he, w); + assert((k >= 0) && (k <= 0xFFFF)); + + comp[c] = k; // 1.0 + } + else + { + assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFFF)); + assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFFF)); + + int le = endpoints[subset][c][0] << 4; + int he = endpoints[subset][c][1] << 4; + + int qlog16 = weight_interpolate(le, he, w); + + comp[c] = qlog16_to_half(qlog16); + + if (is_half_inf_or_nan((half_float)comp[c])) + comp[c] = 0x7BFF; + } + + } // c + + uint32_t packed; + if (is_ldr_endpoints[subset]) + packed = pack_rgb9e5_ldr_astc(comp[0], comp[1], comp[2]); + else + packed = pack_rgb9e5_hdr_astc(comp[0], comp[1], comp[2]); + + ((uint32_t*)pPixels)[pixel_index] = packed; + + } // x + } // y + } + else if (dec_mode == cDecodeModeHDR16) + { + // Note: must round towards zero when converting float to half for ASTC (18.19 Weight Application) + + // returns half floats + for (uint32_t y = 0; y < blk_height; y++) + { + for (uint32_t x = 0; x < blk_width; x++) + { + const uint32_t pixel_index = x + y * blk_width; + + uint32_t subset = 0; + if (log_blk.m_num_partitions > 1) + { + if (use_precomputed_texel_partitions_4x4) + subset = get_precompute_texel_partitions_4x4(log_blk.m_partition_id, x, y, log_blk.m_num_partitions); + else if (use_precomputed_texel_partitions_6x6) + subset = get_precompute_texel_partitions_6x6(log_blk.m_partition_id, x, y, log_blk.m_num_partitions); + else + subset = compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block); + } + + for (uint32_t c = 0; c < 4; c++) + { + const uint32_t w = upsampled_weights[(c == ccs) ? 1 : 0][pixel_index]; + + half_float o; + + if ( (is_ldr_endpoints[subset]) || + ((log_blk.m_color_endpoint_modes[subset] == CEM_HDR_RGB_LDR_ALPHA) && (c == 3)) ) + { + assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFF)); + assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFF)); + + int le = endpoints[subset][c][0]; + int he = endpoints[subset][c][1]; + + le = (le << 8) | le; + he = (he << 8) | he; + + int k = weight_interpolate(le, he, w); + assert((k >= 0) && (k <= 0xFFFF)); + + if (k == 0xFFFF) + o = 0x3C00; // 1.0 + else + o = float_to_half((float)k * (1.0f / 65536.0f), true); + } + else + { + assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFFF)); + assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFFF)); + + int le = endpoints[subset][c][0] << 4; + int he = endpoints[subset][c][1] << 4; + + int qlog16 = weight_interpolate(le, he, w); + + o = qlog16_to_half(qlog16); + + if (is_half_inf_or_nan(o)) + o = 0x7BFF; + } + + ((half_float*)pPixels)[pixel_index * 4 + c] = o; + } + + } // x + } // y + } + else + { + // returns uint8_t's + for (uint32_t y = 0; y < blk_height; y++) + { + for (uint32_t x = 0; x < blk_width; x++) + { + const uint32_t pixel_index = x + y * blk_width; + + uint32_t subset = 0; + if (log_blk.m_num_partitions > 1) + { + if (use_precomputed_texel_partitions_4x4) + subset = get_precompute_texel_partitions_4x4(log_blk.m_partition_id, x, y, log_blk.m_num_partitions); + else if (use_precomputed_texel_partitions_6x6) + subset = get_precompute_texel_partitions_6x6(log_blk.m_partition_id, x, y, log_blk.m_num_partitions); + else + subset = compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block); + } + + if (!is_ldr_endpoints[subset]) + { + ((uint32_t*)pPixels)[pixel_index * 4] = 0xFFFF00FF; + success = false; + } + else + { + for (uint32_t c = 0; c < 4; c++) + { + const uint32_t w = upsampled_weights[(c == ccs) ? 1 : 0][pixel_index]; + + int le = endpoints[subset][c][0]; + int he = endpoints[subset][c][1]; + + // FIXME: the spec is apparently wrong? this matches ARM's and Google's decoder + //if ((dec_mode == cDecodeModeSRGB8) && (c <= 2)) + // See https://github.com/ARM-software/astc-encoder/issues/447 + if (dec_mode == cDecodeModeSRGB8) + { + le = (le << 8) | 0x80; + he = (he << 8) | 0x80; + } + else + { + le = (le << 8) | le; + he = (he << 8) | he; + } + + uint32_t k = weight_interpolate(le, he, w); + + // FIXME: This is what the spec says to do in LDR mode, but this is not what ARM's decoder does + // See decompress_symbolic_block(), decode_texel() and unorm16_to_sf16. + // It seems to effectively divide by 65535.0 and convert to FP16, then back to float, mul by 255.0, add .5 and then convert to 8-bit. + ((uint8_t*)pPixels)[pixel_index * 4 + c] = (uint8_t)(k >> 8); + } + } + + } // x + } // y + } + + return success; + } + + //------------------------------------------------ + // Physical to logical block decoding + + // unsigned 128-bit int, with some signed helpers + class uint128 + { + uint64_t m_lo, m_hi; + + public: + uint128() = default; + inline uint128(uint64_t lo) : m_lo(lo), m_hi(0) { } + inline uint128(uint64_t lo, uint64_t hi) : m_lo(lo), m_hi(hi) { } + inline uint128(const uint128& other) : m_lo(other.m_lo), m_hi(other.m_hi) { } + + inline uint128& set_signed(int64_t lo) { m_lo = lo; m_hi = (lo < 0) ? UINT64_MAX : 0; return *this; } + inline uint128& set(uint64_t lo) { m_lo = lo; m_hi = 0; return *this; } + + inline explicit operator uint8_t () const { return (uint8_t)m_lo; } + inline explicit operator uint16_t () const { return (uint16_t)m_lo; } + inline explicit operator uint32_t () const { return (uint32_t)m_lo; } + inline explicit operator uint64_t () const { return m_lo; } + + inline uint128& operator= (const uint128& rhs) { m_lo = rhs.m_lo; m_hi = rhs.m_hi; return *this; } + inline uint128& operator= (const uint64_t val) { m_lo = val; m_hi = 0; return *this; } + + inline uint64_t get_low() const { return m_lo; } + inline uint64_t& get_low() { return m_lo; } + + inline uint64_t get_high() const { return m_hi; } + inline uint64_t& get_high() { return m_hi; } + + inline bool operator== (const uint128& rhs) const { return (m_lo == rhs.m_lo) && (m_hi == rhs.m_hi); } + inline bool operator!= (const uint128& rhs) const { return (m_lo != rhs.m_lo) || (m_hi != rhs.m_hi); } + + inline bool operator< (const uint128& rhs) const + { + if (m_hi < rhs.m_hi) + return true; + + if (m_hi == rhs.m_hi) + { + if (m_lo < rhs.m_lo) + return true; + } + + return false; + } + + inline bool operator> (const uint128& rhs) const { return (rhs < *this); } + + inline bool operator<= (const uint128& rhs) const { return (*this == rhs) || (*this < rhs); } + inline bool operator>= (const uint128& rhs) const { return (*this == rhs) || (*this > rhs); } + + inline bool is_zero() const { return (m_lo == 0) && (m_hi == 0); } + inline bool is_all_ones() const { return (m_lo == UINT64_MAX) && (m_hi == UINT64_MAX); } + inline bool is_non_zero() const { return (m_lo != 0) || (m_hi != 0); } + inline explicit operator bool() const { return is_non_zero(); } + inline bool is_signed() const { return ((int64_t)m_hi) < 0; } + + inline bool signed_less(const uint128& rhs) const + { + const bool l_signed = is_signed(), r_signed = rhs.is_signed(); + + if (l_signed == r_signed) + return *this < rhs; + + if (l_signed && !r_signed) + return true; + + assert(!l_signed && r_signed); + return false; + } + + inline bool signed_greater(const uint128& rhs) const { return rhs.signed_less(*this); } + inline bool signed_less_equal(const uint128& rhs) const { return !rhs.signed_less(*this); } + inline bool signed_greater_equal(const uint128& rhs) const { return !signed_less(rhs); } + + double get_double() const + { + double res = 0; + + if (m_hi) + res = (double)m_hi * pow(2.0f, 64.0f); + + res += (double)m_lo; + + return res; + } + + double get_signed_double() const + { + if (is_signed()) + return -(uint128(*this).abs().get_double()); + else + return get_double(); + } + + inline uint128 abs() const + { + uint128 res(*this); + if (res.is_signed()) + res = -res; + return res; + } + + inline uint128& operator<<= (int shift) + { + assert(shift >= 0); + if (shift < 0) + return *this; + + m_hi = (shift >= 64) ? ((shift >= 128) ? 0 : (m_lo << (shift - 64))) : (m_hi << shift); + + if ((shift) && (shift < 64)) + m_hi |= (m_lo >> (64 - shift)); + + m_lo = (shift >= 64) ? 0 : (m_lo << shift); + + return *this; + } + + inline uint128 operator<< (int shift) const { uint128 res(*this); res <<= shift; return res; } + + inline uint128& operator>>= (int shift) + { + assert(shift >= 0); + if (shift < 0) + return *this; + + m_lo = (shift >= 64) ? ((shift >= 128) ? 0 : (m_hi >> (shift - 64))) : (m_lo >> shift); + + if ((shift) && (shift < 64)) + m_lo |= (m_hi << (64 - shift)); + + m_hi = (shift >= 64) ? 0 : (m_hi >> shift); + + return *this; + } + + inline uint128 operator>> (int shift) const { uint128 res(*this); res >>= shift; return res; } + + inline uint128 signed_shift_right(int shift) const + { + uint128 res(*this); + res >>= shift; + + if (is_signed()) + { + uint128 x(0U); + x = ~x; + x >>= shift; + res |= (~x); + } + + return res; + } + + inline uint128& operator |= (const uint128& rhs) { m_lo |= rhs.m_lo; m_hi |= rhs.m_hi; return *this; } + inline uint128 operator | (const uint128& rhs) const { uint128 res(*this); res |= rhs; return res; } + + inline uint128& operator &= (const uint128& rhs) { m_lo &= rhs.m_lo; m_hi &= rhs.m_hi; return *this; } + inline uint128 operator & (const uint128& rhs) const { uint128 res(*this); res &= rhs; return res; } + + inline uint128& operator ^= (const uint128& rhs) { m_lo ^= rhs.m_lo; m_hi ^= rhs.m_hi; return *this; } + inline uint128 operator ^ (const uint128& rhs) const { uint128 res(*this); res ^= rhs; return res; } + + inline uint128 operator ~() const { return uint128(~m_lo, ~m_hi); } + + inline uint128 operator -() const { uint128 res(~*this); if (++res.m_lo == 0) ++res.m_hi; return res; } + + // prefix + inline uint128 operator ++() + { + if (++m_lo == 0) + ++m_hi; + return *this; + } + + // postfix + inline uint128 operator ++(int) + { + uint128 res(*this); + if (++m_lo == 0) + ++m_hi; + return res; + } + + // prefix + inline uint128 operator --() + { + const uint64_t t = m_lo; + if (--m_lo > t) + --m_hi; + return *this; + } + + // postfix + inline uint128 operator --(int) + { + const uint64_t t = m_lo; + uint128 res(*this); + if (--m_lo > t) + --m_hi; + return res; + } + + inline uint128& operator+= (const uint128& rhs) + { + const uint64_t t = m_lo + rhs.m_lo; + m_hi = m_hi + rhs.m_hi + (t < m_lo); + m_lo = t; + return *this; + } + + inline uint128 operator+ (const uint128& rhs) const { uint128 res(*this); res += rhs; return res; } + + inline uint128& operator-= (const uint128& rhs) + { + const uint64_t t = m_lo - rhs.m_lo; + m_hi = m_hi - rhs.m_hi - (t > m_lo); + m_lo = t; + return *this; + } + + inline uint128 operator- (const uint128& rhs) const { uint128 res(*this); res -= rhs; return res; } + + // computes bit by bit, very slow + uint128& operator*=(const uint128& rhs) + { + uint128 temp(*this), result(0U); + + for (uint128 bitmask(rhs); bitmask; bitmask >>= 1, temp <<= 1) + if (bitmask.get_low() & 1) + result += temp; + + *this = result; + return *this; + } + + uint128 operator*(const uint128& rhs) const { uint128 res(*this); res *= rhs; return res; } + + // computes bit by bit, very slow + friend uint128 divide(const uint128& dividend, const uint128& divisor, uint128& remainder) + { + remainder = 0; + + if (!divisor) + { + assert(0); + return ~uint128(0U); + } + + uint128 quotient(0), one(1); + + for (int i = 127; i >= 0; i--) + { + remainder = (remainder << 1) | ((dividend >> i) & one); + if (remainder >= divisor) + { + remainder -= divisor; + quotient |= (one << i); + } + } + + return quotient; + } + + uint128 operator/(const uint128& rhs) const { uint128 remainder, res; res = divide(*this, rhs, remainder); return res; } + uint128 operator/=(const uint128& rhs) { uint128 remainder; *this = divide(*this, rhs, remainder); return *this; } + + uint128 operator%(const uint128& rhs) const { uint128 remainder; divide(*this, rhs, remainder); return remainder; } + uint128 operator%=(const uint128& rhs) { uint128 remainder; divide(*this, rhs, remainder); *this = remainder; return *this; } + + void print_hex(FILE* pFile) const + { + fprintf(pFile, "0x%016llx%016llx", (unsigned long long int)m_hi, (unsigned long long int)m_lo); + } + + void format_unsigned(std::string& res) const + { + basisu::vector digits; + digits.reserve(39 + 1); + + uint128 k(*this), ten(10); + do + { + uint128 r; + k = divide(k, ten, r); + digits.push_back((uint8_t)r); + } while (k); + + for (int i = (int)digits.size() - 1; i >= 0; i--) + res += ('0' + digits[i]); + } + + void format_signed(std::string& res) const + { + uint128 val(*this); + + if (val.is_signed()) + { + res.push_back('-'); + val = -val; + } + + val.format_unsigned(res); + } + + void print_unsigned(FILE* pFile) + { + std::string str; + format_unsigned(str); + fprintf(pFile, "%s", str.c_str()); + } + + void print_signed(FILE* pFile) + { + std::string str; + format_signed(str); + fprintf(pFile, "%s", str.c_str()); + } + + uint128 get_reversed_bits() const + { + uint128 res; + + const uint32_t* pSrc = (const uint32_t*)this; + uint32_t* pDst = (uint32_t*)&res; + + pDst[0] = rev_dword(pSrc[3]); + pDst[1] = rev_dword(pSrc[2]); + pDst[2] = rev_dword(pSrc[1]); + pDst[3] = rev_dword(pSrc[0]); + + return res; + } + + uint128 get_byteswapped() const + { + uint128 res; + + const uint8_t* pSrc = (const uint8_t*)this; + uint8_t* pDst = (uint8_t*)&res; + + for (uint32_t i = 0; i < 16; i++) + pDst[i] = pSrc[15 - i]; + + return res; + } + + inline uint64_t get_bits64(uint32_t bit_ofs, uint32_t bit_len) const + { + assert(bit_ofs < 128); + assert(bit_len && (bit_len <= 64) && ((bit_ofs + bit_len) <= 128)); + + uint128 res(*this); + res >>= bit_ofs; + + const uint64_t bitmask = (bit_len == 64) ? UINT64_MAX : ((1ull << bit_len) - 1); + return res.get_low() & bitmask; + } + + inline uint32_t get_bits(uint32_t bit_ofs, uint32_t bit_len) const + { + assert(bit_len <= 32); + return (uint32_t)get_bits64(bit_ofs, bit_len); + } + + inline uint32_t next_bits(uint32_t& bit_ofs, uint32_t len) const + { + assert(len && (len <= 32)); + uint32_t x = get_bits(bit_ofs, len); + bit_ofs += len; + return x; + } + + inline uint128& set_bits(uint64_t val, uint32_t bit_ofs, uint32_t num_bits) + { + assert(bit_ofs < 128); + assert(num_bits && (num_bits <= 64) && ((bit_ofs + num_bits) <= 128)); + + uint128 bitmask(1); + bitmask = (bitmask << num_bits) - 1; + assert(uint128(val) <= bitmask); + + bitmask <<= bit_ofs; + *this &= ~bitmask; + + *this = *this | (uint128(val) << bit_ofs); + return *this; + } + }; + + static bool decode_void_extent(const uint128& bits, log_astc_block& log_blk) + { + if (bits.get_bits(10, 2) != 0b11) + return false; + + uint32_t bit_ofs = 12; + const uint32_t min_s = bits.next_bits(bit_ofs, 13); + const uint32_t max_s = bits.next_bits(bit_ofs, 13); + const uint32_t min_t = bits.next_bits(bit_ofs, 13); + const uint32_t max_t = bits.next_bits(bit_ofs, 13); + assert(bit_ofs == 64); + + const bool all_extents_all_ones = (min_s == 0x1FFF) && (max_s == 0x1FFF) && (min_t == 0x1FFF) && (max_t == 0x1FFF); + + if (!all_extents_all_ones && ((min_s >= max_s) || (min_t >= max_t))) + return false; + + const bool hdr_flag = bits.get_bits(9, 1) != 0; + + if (hdr_flag) + log_blk.m_solid_color_flag_hdr = true; + else + log_blk.m_solid_color_flag_ldr = true; + + log_blk.m_solid_color[0] = (uint16_t)bits.get_bits(64, 16); + log_blk.m_solid_color[1] = (uint16_t)bits.get_bits(80, 16); + log_blk.m_solid_color[2] = (uint16_t)bits.get_bits(96, 16); + log_blk.m_solid_color[3] = (uint16_t)bits.get_bits(112, 16); + + if (log_blk.m_solid_color_flag_hdr) + { + for (uint32_t c = 0; c < 4; c++) + if (is_half_inf_or_nan(log_blk.m_solid_color[c])) + return false; + } + + return true; + } + + struct astc_dec_row + { + int8_t Dp_ofs, P_ofs, W_ofs, W_size, H_ofs, H_size, W_bias, H_bias, p0_ofs, p1_ofs, p2_ofs; + }; + + static const astc_dec_row s_dec_rows[10] = + { + // Dp_ofs, P_ofs, W_ofs, W_size, H_ofs, H_size, W_bias, H_bias, p0_ofs, p1_ofs, p2_ofs; + { 10, 9, 7, 2, 5, 2, 4, 2, 4, 0, 1 }, // 4 2 + { 10, 9, 7, 2, 5, 2, 8, 2, 4, 0, 1 }, // 8 2 + { 10, 9, 5, 2, 7, 2, 2, 8, 4, 0, 1 }, // 2 8 + { 10, 9, 5, 2, 7, 1, 2, 6, 4, 0, 1 }, // 2 6 + + { 10, 9, 7, 1, 5, 2, 2, 2, 4, 0, 1 }, // 2 2 + { 10, 9, 0, 0, 5, 2, 12, 2, 4, 2, 3 }, // 12 2 + { 10, 9, 5, 2, 0, 0, 2, 12, 4, 2, 3 }, // 2 12 + { 10, 9, 0, 0, 0, 0, 6, 10, 4, 2, 3 }, // 6 10 + + { 10, 9, 0, 0, 0, 0, 10, 6, 4, 2, 3 }, // 10 6 + { -1, -1, 5, 2, 9, 2, 6, 6, 4, 2, 3 }, // 6 6 + }; + + static bool decode_config(const uint128& bits, log_astc_block& log_blk) + { + // Reserved + if (bits.get_bits(0, 4) == 0) + return false; + + // Reserved + if ((bits.get_bits(0, 2) == 0) && (bits.get_bits(6, 3) == 0b111)) + { + if (bits.get_bits(2, 4) != 0b1111) + return false; + } + + // Void extent + if (bits.get_bits(0, 9) == 0b111111100) + return decode_void_extent(bits, log_blk); + + // Check rows + const uint32_t x0_2 = bits.get_bits(0, 2), x2_2 = bits.get_bits(2, 2); + const uint32_t x5_4 = bits.get_bits(5, 4), x8_1 = bits.get_bits(8, 1); + const uint32_t x7_2 = bits.get_bits(7, 2); + + int row_index = -1; + if (x0_2 == 0) + { + if (x7_2 == 0b00) + row_index = 5; + else if (x7_2 == 0b01) + row_index = 6; + else if (x5_4 == 0b1100) + row_index = 7; + else if (x5_4 == 0b1101) + row_index = 8; + else if (x7_2 == 0b10) + row_index = 9; + } + else + { + if (x2_2 == 0b00) + row_index = 0; + else if (x2_2 == 0b01) + row_index = 1; + else if (x2_2 == 0b10) + row_index = 2; + else if ((x2_2 == 0b11) && (x8_1 == 0)) + row_index = 3; + else if ((x2_2 == 0b11) && (x8_1 == 1)) + row_index = 4; + } + if (row_index < 0) + return false; + + const astc_dec_row& r = s_dec_rows[row_index]; + + bool P = false, Dp = false; + uint32_t W = r.W_bias, H = r.H_bias; + + if (r.P_ofs >= 0) + P = bits.get_bits(r.P_ofs, 1) != 0; + + if (r.Dp_ofs >= 0) + Dp = bits.get_bits(r.Dp_ofs, 1) != 0; + + if (r.W_size) + W += bits.get_bits(r.W_ofs, r.W_size); + + if (r.H_size) + H += bits.get_bits(r.H_ofs, r.H_size); + + assert((W >= MIN_GRID_DIM) && (W <= MAX_BLOCK_DIM)); + assert((H >= MIN_GRID_DIM) && (H <= MAX_BLOCK_DIM)); + + int p0 = bits.get_bits(r.p0_ofs, 1); + int p1 = bits.get_bits(r.p1_ofs, 1); + int p2 = bits.get_bits(r.p2_ofs, 1); + + uint32_t p = p0 | (p1 << 1) | (p2 << 2); + if (p < 2) + return false; + + log_blk.m_grid_width = (uint8_t)W; + log_blk.m_grid_height = (uint8_t)H; + + log_blk.m_weight_ise_range = (uint8_t)((p - 2) + (P * BISE_10_LEVELS)); + assert(log_blk.m_weight_ise_range <= LAST_VALID_WEIGHT_ISE_RANGE); + + log_blk.m_dual_plane = Dp; + + return true; + } + + static inline uint32_t read_le_dword(const uint8_t* pBytes) + { + return (pBytes[0]) | (pBytes[1] << 8U) | (pBytes[2] << 16U) | (pBytes[3] << 24U); + } + + // See 18.12.Integer Sequence Encoding - tables computed by executing the decoder functions with all possible 8/7-bit inputs. + static const uint8_t s_trit_decode[256][5] = + { + {0,0,0,0,0},{1,0,0,0,0},{2,0,0,0,0},{0,0,2,0,0},{0,1,0,0,0},{1,1,0,0,0},{2,1,0,0,0},{1,0,2,0,0}, + {0,2,0,0,0},{1,2,0,0,0},{2,2,0,0,0},{2,0,2,0,0},{0,2,2,0,0},{1,2,2,0,0},{2,2,2,0,0},{2,0,2,0,0}, + {0,0,1,0,0},{1,0,1,0,0},{2,0,1,0,0},{0,1,2,0,0},{0,1,1,0,0},{1,1,1,0,0},{2,1,1,0,0},{1,1,2,0,0}, + {0,2,1,0,0},{1,2,1,0,0},{2,2,1,0,0},{2,1,2,0,0},{0,0,0,2,2},{1,0,0,2,2},{2,0,0,2,2},{0,0,2,2,2}, + {0,0,0,1,0},{1,0,0,1,0},{2,0,0,1,0},{0,0,2,1,0},{0,1,0,1,0},{1,1,0,1,0},{2,1,0,1,0},{1,0,2,1,0}, + {0,2,0,1,0},{1,2,0,1,0},{2,2,0,1,0},{2,0,2,1,0},{0,2,2,1,0},{1,2,2,1,0},{2,2,2,1,0},{2,0,2,1,0}, + {0,0,1,1,0},{1,0,1,1,0},{2,0,1,1,0},{0,1,2,1,0},{0,1,1,1,0},{1,1,1,1,0},{2,1,1,1,0},{1,1,2,1,0}, + {0,2,1,1,0},{1,2,1,1,0},{2,2,1,1,0},{2,1,2,1,0},{0,1,0,2,2},{1,1,0,2,2},{2,1,0,2,2},{1,0,2,2,2}, + {0,0,0,2,0},{1,0,0,2,0},{2,0,0,2,0},{0,0,2,2,0},{0,1,0,2,0},{1,1,0,2,0},{2,1,0,2,0},{1,0,2,2,0}, + {0,2,0,2,0},{1,2,0,2,0},{2,2,0,2,0},{2,0,2,2,0},{0,2,2,2,0},{1,2,2,2,0},{2,2,2,2,0},{2,0,2,2,0}, + {0,0,1,2,0},{1,0,1,2,0},{2,0,1,2,0},{0,1,2,2,0},{0,1,1,2,0},{1,1,1,2,0},{2,1,1,2,0},{1,1,2,2,0}, + {0,2,1,2,0},{1,2,1,2,0},{2,2,1,2,0},{2,1,2,2,0},{0,2,0,2,2},{1,2,0,2,2},{2,2,0,2,2},{2,0,2,2,2}, + {0,0,0,0,2},{1,0,0,0,2},{2,0,0,0,2},{0,0,2,0,2},{0,1,0,0,2},{1,1,0,0,2},{2,1,0,0,2},{1,0,2,0,2}, + {0,2,0,0,2},{1,2,0,0,2},{2,2,0,0,2},{2,0,2,0,2},{0,2,2,0,2},{1,2,2,0,2},{2,2,2,0,2},{2,0,2,0,2}, + {0,0,1,0,2},{1,0,1,0,2},{2,0,1,0,2},{0,1,2,0,2},{0,1,1,0,2},{1,1,1,0,2},{2,1,1,0,2},{1,1,2,0,2}, + {0,2,1,0,2},{1,2,1,0,2},{2,2,1,0,2},{2,1,2,0,2},{0,2,2,2,2},{1,2,2,2,2},{2,2,2,2,2},{2,0,2,2,2}, + {0,0,0,0,1},{1,0,0,0,1},{2,0,0,0,1},{0,0,2,0,1},{0,1,0,0,1},{1,1,0,0,1},{2,1,0,0,1},{1,0,2,0,1}, + {0,2,0,0,1},{1,2,0,0,1},{2,2,0,0,1},{2,0,2,0,1},{0,2,2,0,1},{1,2,2,0,1},{2,2,2,0,1},{2,0,2,0,1}, + {0,0,1,0,1},{1,0,1,0,1},{2,0,1,0,1},{0,1,2,0,1},{0,1,1,0,1},{1,1,1,0,1},{2,1,1,0,1},{1,1,2,0,1}, + {0,2,1,0,1},{1,2,1,0,1},{2,2,1,0,1},{2,1,2,0,1},{0,0,1,2,2},{1,0,1,2,2},{2,0,1,2,2},{0,1,2,2,2}, + {0,0,0,1,1},{1,0,0,1,1},{2,0,0,1,1},{0,0,2,1,1},{0,1,0,1,1},{1,1,0,1,1},{2,1,0,1,1},{1,0,2,1,1}, + {0,2,0,1,1},{1,2,0,1,1},{2,2,0,1,1},{2,0,2,1,1},{0,2,2,1,1},{1,2,2,1,1},{2,2,2,1,1},{2,0,2,1,1}, + {0,0,1,1,1},{1,0,1,1,1},{2,0,1,1,1},{0,1,2,1,1},{0,1,1,1,1},{1,1,1,1,1},{2,1,1,1,1},{1,1,2,1,1}, + {0,2,1,1,1},{1,2,1,1,1},{2,2,1,1,1},{2,1,2,1,1},{0,1,1,2,2},{1,1,1,2,2},{2,1,1,2,2},{1,1,2,2,2}, + {0,0,0,2,1},{1,0,0,2,1},{2,0,0,2,1},{0,0,2,2,1},{0,1,0,2,1},{1,1,0,2,1},{2,1,0,2,1},{1,0,2,2,1}, + {0,2,0,2,1},{1,2,0,2,1},{2,2,0,2,1},{2,0,2,2,1},{0,2,2,2,1},{1,2,2,2,1},{2,2,2,2,1},{2,0,2,2,1}, + {0,0,1,2,1},{1,0,1,2,1},{2,0,1,2,1},{0,1,2,2,1},{0,1,1,2,1},{1,1,1,2,1},{2,1,1,2,1},{1,1,2,2,1}, + {0,2,1,2,1},{1,2,1,2,1},{2,2,1,2,1},{2,1,2,2,1},{0,2,1,2,2},{1,2,1,2,2},{2,2,1,2,2},{2,1,2,2,2}, + {0,0,0,1,2},{1,0,0,1,2},{2,0,0,1,2},{0,0,2,1,2},{0,1,0,1,2},{1,1,0,1,2},{2,1,0,1,2},{1,0,2,1,2}, + {0,2,0,1,2},{1,2,0,1,2},{2,2,0,1,2},{2,0,2,1,2},{0,2,2,1,2},{1,2,2,1,2},{2,2,2,1,2},{2,0,2,1,2}, + {0,0,1,1,2},{1,0,1,1,2},{2,0,1,1,2},{0,1,2,1,2},{0,1,1,1,2},{1,1,1,1,2},{2,1,1,1,2},{1,1,2,1,2}, + {0,2,1,1,2},{1,2,1,1,2},{2,2,1,1,2},{2,1,2,1,2},{0,2,2,2,2},{1,2,2,2,2},{2,2,2,2,2},{2,1,2,2,2} + }; + + static const uint8_t s_quint_decode[128][3] = + { + {0,0,0},{1,0,0},{2,0,0},{3,0,0},{4,0,0},{0,4,0},{4,4,0},{4,4,4}, + {0,1,0},{1,1,0},{2,1,0},{3,1,0},{4,1,0},{1,4,0},{4,4,1},{4,4,4}, + {0,2,0},{1,2,0},{2,2,0},{3,2,0},{4,2,0},{2,4,0},{4,4,2},{4,4,4}, + {0,3,0},{1,3,0},{2,3,0},{3,3,0},{4,3,0},{3,4,0},{4,4,3},{4,4,4}, + {0,0,1},{1,0,1},{2,0,1},{3,0,1},{4,0,1},{0,4,1},{4,0,4},{0,4,4}, + {0,1,1},{1,1,1},{2,1,1},{3,1,1},{4,1,1},{1,4,1},{4,1,4},{1,4,4}, + {0,2,1},{1,2,1},{2,2,1},{3,2,1},{4,2,1},{2,4,1},{4,2,4},{2,4,4}, + {0,3,1},{1,3,1},{2,3,1},{3,3,1},{4,3,1},{3,4,1},{4,3,4},{3,4,4}, + {0,0,2},{1,0,2},{2,0,2},{3,0,2},{4,0,2},{0,4,2},{2,0,4},{3,0,4}, + {0,1,2},{1,1,2},{2,1,2},{3,1,2},{4,1,2},{1,4,2},{2,1,4},{3,1,4}, + {0,2,2},{1,2,2},{2,2,2},{3,2,2},{4,2,2},{2,4,2},{2,2,4},{3,2,4}, + {0,3,2},{1,3,2},{2,3,2},{3,3,2},{4,3,2},{3,4,2},{2,3,4},{3,3,4}, + {0,0,3},{1,0,3},{2,0,3},{3,0,3},{4,0,3},{0,4,3},{0,0,4},{1,0,4}, + {0,1,3},{1,1,3},{2,1,3},{3,1,3},{4,1,3},{1,4,3},{0,1,4},{1,1,4}, + {0,2,3},{1,2,3},{2,2,3},{3,2,3},{4,2,3},{2,4,3},{0,2,4},{1,2,4}, + {0,3,3},{1,3,3},{2,3,3},{3,3,3},{4,3,3},{3,4,3},{0,3,4},{1,3,4} + }; + + static void decode_trit_block(uint8_t* pVals, uint32_t num_vals, const uint128& bits, uint32_t& bit_ofs, uint32_t bits_per_val) + { + assert((num_vals >= 1) && (num_vals <= 5)); + uint32_t m[5] = { 0 }, T = 0; + + static const uint8_t s_t_bits[5] = { 2, 2, 1, 2, 1 }; + + for (uint32_t T_ofs = 0, c = 0; c < num_vals; c++) + { + if (bits_per_val) + m[c] = bits.next_bits(bit_ofs, bits_per_val); + T |= (bits.next_bits(bit_ofs, s_t_bits[c]) << T_ofs); + T_ofs += s_t_bits[c]; + } + + const uint8_t (&p_trits)[5] = s_trit_decode[T]; + + for (uint32_t i = 0; i < num_vals; i++) + pVals[i] = (uint8_t)((p_trits[i] << bits_per_val) | m[i]); + } + + static void decode_quint_block(uint8_t* pVals, uint32_t num_vals, const uint128& bits, uint32_t& bit_ofs, uint32_t bits_per_val) + { + assert((num_vals >= 1) && (num_vals <= 3)); + uint32_t m[3] = { 0 }, T = 0; + + static const uint8_t s_t_bits[3] = { 3, 2, 2 }; + + for (uint32_t T_ofs = 0, c = 0; c < num_vals; c++) + { + if (bits_per_val) + m[c] = bits.next_bits(bit_ofs, bits_per_val); + T |= (bits.next_bits(bit_ofs, s_t_bits[c]) << T_ofs); + T_ofs += s_t_bits[c]; + } + + const uint8_t (&p_quints)[3] = s_quint_decode[T]; + + for (uint32_t i = 0; i < num_vals; i++) + pVals[i] = (uint8_t)((p_quints[i] << bits_per_val) | m[i]); + } + + static void decode_bise(uint32_t ise_range, uint8_t* pVals, uint32_t num_vals, const uint128& bits, uint32_t bit_ofs) + { + assert(num_vals && (ise_range < TOTAL_ISE_RANGES)); + + const uint32_t bits_per_val = g_ise_range_table[ise_range][0]; + + if (g_ise_range_table[ise_range][1]) + { + // Trits+bits, 5 vals per block, 7 bits extra per block + const uint32_t total_blocks = (num_vals + 4) / 5; + for (uint32_t b = 0; b < total_blocks; b++) + { + const uint32_t num_vals_in_block = std::min(num_vals - 5 * b, 5); + decode_trit_block(pVals + 5 * b, num_vals_in_block, bits, bit_ofs, bits_per_val); + } + } + else if (g_ise_range_table[ise_range][2]) + { + // Quints+bits, 3 vals per block, 8 bits extra per block + const uint32_t total_blocks = (num_vals + 2) / 3; + for (uint32_t b = 0; b < total_blocks; b++) + { + const uint32_t num_vals_in_block = std::min(num_vals - 3 * b, 3); + decode_quint_block(pVals + 3 * b, num_vals_in_block, bits, bit_ofs, bits_per_val); + } + } + else + { + assert(bits_per_val); + + // Only bits + for (uint32_t i = 0; i < num_vals; i++) + pVals[i] = (uint8_t)bits.next_bits(bit_ofs, bits_per_val); + } + } + + void decode_bise(uint32_t ise_range, uint8_t* pVals, uint32_t num_vals, const uint8_t* pBits128, uint32_t bit_ofs) + { + const uint128 bits( + (uint64_t)read_le_dword(pBits128) | (((uint64_t)read_le_dword(pBits128 + sizeof(uint32_t))) << 32), + (uint64_t)read_le_dword(pBits128 + sizeof(uint32_t) * 2) | (((uint64_t)read_le_dword(pBits128 + sizeof(uint32_t) * 3)) << 32)); + + return decode_bise(ise_range, pVals, num_vals, bits, bit_ofs); + } + + // Decodes a physical ASTC block to a logical ASTC block. + // blk_width/blk_height are only used to validate the weight grid's dimensions. + bool unpack_block(const void* pASTC_block, log_astc_block& log_blk, uint32_t blk_width, uint32_t blk_height) + { + assert(is_valid_block_size(blk_width, blk_height)); + + const uint8_t* pS = (uint8_t*)pASTC_block; + + log_blk.clear(); + log_blk.m_error_flag = true; + + const uint128 bits( + (uint64_t)read_le_dword(pS) | (((uint64_t)read_le_dword(pS + sizeof(uint32_t))) << 32), + (uint64_t)read_le_dword(pS + sizeof(uint32_t) * 2) | (((uint64_t)read_le_dword(pS + sizeof(uint32_t) * 3)) << 32)); + + const uint128 rev_bits(bits.get_reversed_bits()); + + if (!decode_config(bits, log_blk)) + return false; + + if (log_blk.m_solid_color_flag_hdr || log_blk.m_solid_color_flag_ldr) + { + // Void extent + log_blk.m_error_flag = false; + return true; + } + + // Check grid dimensions + if ((log_blk.m_grid_width > blk_width) || (log_blk.m_grid_height > blk_height)) + return false; + + // Now we have the grid width/height, dual plane, weight ISE range + + const uint32_t total_grid_weights = (log_blk.m_dual_plane ? 2 : 1) * (log_blk.m_grid_width * log_blk.m_grid_height); + const uint32_t total_weight_bits = get_ise_sequence_bits(total_grid_weights, log_blk.m_weight_ise_range); + + // 18.24 Illegal Encodings + if ((!total_grid_weights) || (total_grid_weights > MAX_GRID_WEIGHTS) || (total_weight_bits < 24) || (total_weight_bits > 96)) + return false; + + const uint32_t end_of_weight_bit_ofs = 128 - total_weight_bits; + + uint32_t total_extra_bits = 0; + + // Right before the weight bits, there may be extra CEM bits, then the 2 CCS bits if dual plane. + + log_blk.m_num_partitions = (uint8_t)(bits.get_bits(11, 2) + 1); + if (log_blk.m_num_partitions == 1) + log_blk.m_color_endpoint_modes[0] = (uint8_t)(bits.get_bits(13, 4)); // read CEM bits + else + { + // 2 or more partitions + if (log_blk.m_dual_plane && (log_blk.m_num_partitions == 4)) + return false; + + log_blk.m_partition_id = (uint16_t)bits.get_bits(13, 10); + + uint32_t cem_bits = bits.get_bits(23, 6); + + if ((cem_bits & 3) == 0) + { + // All CEM's the same + for (uint32_t i = 0; i < log_blk.m_num_partitions; i++) + log_blk.m_color_endpoint_modes[i] = (uint8_t)(cem_bits >> 2); + } + else + { + // CEM's different, but within up to 2 adjacent classes + const uint32_t first_cem_index = ((cem_bits & 3) - 1) * 4; + + total_extra_bits = 3 * log_blk.m_num_partitions - 4; + + if ((total_weight_bits + total_extra_bits) > 128) + return false; + + uint32_t cem_bit_pos = end_of_weight_bit_ofs - total_extra_bits; + + uint32_t c[4] = { 0 }, m[4] = { 0 }; + + cem_bits >>= 2; + for (uint32_t i = 0; i < log_blk.m_num_partitions; i++, cem_bits >>= 1) + c[i] = cem_bits & 1; + + switch (log_blk.m_num_partitions) + { + case 2: + { + m[0] = cem_bits & 3; + m[1] = bits.next_bits(cem_bit_pos, 2); + break; + } + case 3: + { + m[0] = cem_bits & 1; + m[0] |= (bits.next_bits(cem_bit_pos, 1) << 1); + m[1] = bits.next_bits(cem_bit_pos, 2); + m[2] = bits.next_bits(cem_bit_pos, 2); + break; + } + case 4: + { + for (uint32_t i = 0; i < 4; i++) + m[i] = bits.next_bits(cem_bit_pos, 2); + break; + } + default: + { + assert(0); + break; + } + } + + assert(cem_bit_pos == end_of_weight_bit_ofs); + + for (uint32_t i = 0; i < log_blk.m_num_partitions; i++) + { + log_blk.m_color_endpoint_modes[i] = (uint8_t)(first_cem_index + (c[i] * 4) + m[i]); + assert(log_blk.m_color_endpoint_modes[i] <= 15); + } + } + } + + // Now we have all the CEM indices. + + if (log_blk.m_dual_plane) + { + // Read CCS bits, beneath any CEM bits + total_extra_bits += 2; + + if (total_extra_bits > end_of_weight_bit_ofs) + return false; + + uint32_t ccs_bit_pos = end_of_weight_bit_ofs - total_extra_bits; + log_blk.m_color_component_selector = (uint8_t)(bits.get_bits(ccs_bit_pos, 2)); + } + + uint32_t config_bit_pos = 11 + 2; // config+num_parts + if (log_blk.m_num_partitions == 1) + config_bit_pos += 4; // CEM bits + else + config_bit_pos += 10 + 6; // part_id+CEM bits + + // config+num_parts+total_extra_bits (CEM extra+CCS) + uint32_t total_config_bits = config_bit_pos + total_extra_bits; + + // Compute number of remaining bits in block + const int num_remaining_bits = 128 - (int)total_config_bits - (int)total_weight_bits; + if (num_remaining_bits < 0) + return false; + + // Compute total number of ISE encoded color endpoint mode values + uint32_t total_cem_vals = 0; + for (uint32_t j = 0; j < log_blk.m_num_partitions; j++) + total_cem_vals += get_num_cem_values(log_blk.m_color_endpoint_modes[j]); + + if (total_cem_vals > MAX_ENDPOINTS) + return false; + + // Infer endpoint ISE range based off the # of values we need to encode, and the # of remaining bits in the block + int endpoint_ise_range = -1; + for (int k = 20; k > 0; k--) + { + int b = get_ise_sequence_bits(total_cem_vals, k); + if (b <= num_remaining_bits) + { + endpoint_ise_range = k; + break; + } + } + + // See 23.24 Illegal Encodings, [0,5] is the minimum ISE encoding for endpoints + if (endpoint_ise_range < (int)FIRST_VALID_ENDPOINT_ISE_RANGE) + return false; + + log_blk.m_endpoint_ise_range = (uint8_t)endpoint_ise_range; + + // Decode endpoints forwards in block + decode_bise(log_blk.m_endpoint_ise_range, log_blk.m_endpoints, total_cem_vals, bits, config_bit_pos); + + // Decode grid weights backwards in block + decode_bise(log_blk.m_weight_ise_range, log_blk.m_weights, total_grid_weights, rev_bits, 0); + + log_blk.m_error_flag = false; + + return true; + } + +} // namespace astc_helpers + +#endif //BASISU_ASTC_HELPERS_IMPLEMENTATION diff --git a/thirdparty/basisu/transcoder/basisu_containers.h b/thirdparty/basisu/transcoder/basisu_containers.h new file mode 100644 index 000000000..82b78cba6 --- /dev/null +++ b/thirdparty/basisu/transcoder/basisu_containers.h @@ -0,0 +1,4202 @@ +// basisu_containers.h +#pragma once +#include +#include +#include +#include +#include + +#if defined(__linux__) && !defined(ANDROID) +// Only for malloc_usable_size() in basisu_containers_impl.h +#include +#define HAS_MALLOC_USABLE_SIZE 1 +#endif + +// Set to 1 to always check vector operator[], front(), and back() even in release. +#define BASISU_VECTOR_FORCE_CHECKING 0 + +// If 1, the vector container will not query the CRT to get the size of resized memory blocks. +#define BASISU_VECTOR_DETERMINISTIC 1 + +#ifdef _MSC_VER +#define BASISU_FORCE_INLINE __forceinline +#else +#define BASISU_FORCE_INLINE inline +#endif + +#define BASISU_HASHMAP_TEST 0 + +namespace basisu +{ + enum { cInvalidIndex = -1 }; + + template inline S clamp(S value, S low, S high) { return (value < low) ? low : ((value > high) ? high : value); } + + template inline S maximum(S a, S b) { return (a > b) ? a : b; } + template inline S maximum(S a, S b, S c) { return maximum(maximum(a, b), c); } + template inline S maximum(S a, S b, S c, S d) { return maximum(maximum(maximum(a, b), c), d); } + + template inline S minimum(S a, S b) { return (a < b) ? a : b; } + template inline S minimum(S a, S b, S c) { return minimum(minimum(a, b), c); } + template inline S minimum(S a, S b, S c, S d) { return minimum(minimum(minimum(a, b), c), d); } + +#ifdef _MSC_VER + __declspec(noreturn) +#else + [[noreturn]] +#endif + void container_abort(const char* pMsg, ...); + + namespace helpers + { + inline bool is_power_of_2(uint32_t x) { return x && ((x & (x - 1U)) == 0U); } + inline bool is_power_of_2(uint64_t x) { return x && ((x & (x - 1U)) == 0U); } + + template const T& minimum(const T& a, const T& b) { return (b < a) ? b : a; } + template const T& maximum(const T& a, const T& b) { return (a < b) ? b : a; } + + inline uint32_t floor_log2i(uint32_t v) + { + uint32_t l = 0; + while (v > 1U) + { + v >>= 1; + l++; + } + return l; + } + + inline uint32_t floor_log2i(uint64_t v) + { + uint32_t l = 0; + while (v > 1U) + { + v >>= 1; + l++; + } + return l; + } + + inline uint32_t next_pow2(uint32_t val) + { + val--; + val |= val >> 16; + val |= val >> 8; + val |= val >> 4; + val |= val >> 2; + val |= val >> 1; + return val + 1; + } + + inline uint64_t next_pow2(uint64_t val) + { + val--; + val |= val >> 32; + val |= val >> 16; + val |= val >> 8; + val |= val >> 4; + val |= val >> 2; + val |= val >> 1; + return val + 1; + } + } // namespace helpers + + template + inline T* construct(T* p) + { + return new (static_cast(p)) T; + } + + template + inline T* construct(T* p, const U& init) + { + return new (static_cast(p)) T(init); + } + + template + inline void construct_array(T* p, size_t n) + { + T* q = p + n; + for (; p != q; ++p) + new (static_cast(p)) T; + } + + template + inline void construct_array(T* p, size_t n, const U& init) + { + T* q = p + n; + for (; p != q; ++p) + new (static_cast(p)) T(init); + } + + template + inline void destruct(T* p) + { + p->~T(); + } + + template inline void destruct_array(T* p, size_t n) + { + T* q = p + n; + for (; p != q; ++p) + p->~T(); + } + + template + struct scalar_type + { + enum { cFlag = false }; + static inline void construct(T* p) { basisu::construct(p); } + static inline void construct(T* p, const T& init) { basisu::construct(p, init); } + static inline void construct_array(T* p, size_t n) { basisu::construct_array(p, n); } + static inline void destruct(T* p) { basisu::destruct(p); } + static inline void destruct_array(T* p, size_t n) { basisu::destruct_array(p, n); } + }; + + template struct scalar_type + { + enum { cFlag = true }; + static inline void construct(T** p) { memset(p, 0, sizeof(T*)); } + static inline void construct(T** p, T* init) { *p = init; } + static inline void construct_array(T** p, size_t n) { memset(p, 0, sizeof(T*) * n); } + static inline void destruct(T** p) { p; } + static inline void destruct_array(T** p, size_t n) { p, n; } + }; + +#define BASISU_DEFINE_BUILT_IN_TYPE(X) \ + template<> struct scalar_type { \ + enum { cFlag = true }; \ + static inline void construct(X* p) { memset(p, 0, sizeof(X)); } \ + static inline void construct(X* p, const X& init) { memcpy(p, &init, sizeof(X)); } \ + static inline void construct_array(X* p, size_t n) { memset(p, 0, sizeof(X) * n); } \ + static inline void destruct(X* p) { p; } \ + static inline void destruct_array(X* p, size_t n) { p, n; } }; + + BASISU_DEFINE_BUILT_IN_TYPE(bool) + BASISU_DEFINE_BUILT_IN_TYPE(char) + BASISU_DEFINE_BUILT_IN_TYPE(unsigned char) + BASISU_DEFINE_BUILT_IN_TYPE(short) + BASISU_DEFINE_BUILT_IN_TYPE(unsigned short) + BASISU_DEFINE_BUILT_IN_TYPE(int) + BASISU_DEFINE_BUILT_IN_TYPE(unsigned int) + BASISU_DEFINE_BUILT_IN_TYPE(long) + BASISU_DEFINE_BUILT_IN_TYPE(unsigned long) +#ifdef __GNUC__ + BASISU_DEFINE_BUILT_IN_TYPE(long long) + BASISU_DEFINE_BUILT_IN_TYPE(unsigned long long) +#else + BASISU_DEFINE_BUILT_IN_TYPE(__int64) + BASISU_DEFINE_BUILT_IN_TYPE(unsigned __int64) +#endif + BASISU_DEFINE_BUILT_IN_TYPE(float) + BASISU_DEFINE_BUILT_IN_TYPE(double) + BASISU_DEFINE_BUILT_IN_TYPE(long double) + +#undef BASISU_DEFINE_BUILT_IN_TYPE + + template + struct bitwise_movable { enum { cFlag = false }; }; + +#define BASISU_DEFINE_BITWISE_MOVABLE(Q) template<> struct bitwise_movable { enum { cFlag = true }; }; + + template + struct bitwise_copyable { enum { cFlag = false }; }; + +#define BASISU_DEFINE_BITWISE_COPYABLE(Q) template<> struct bitwise_copyable { enum { cFlag = true }; }; + +#define BASISU_IS_POD(T) __is_pod(T) + +#define BASISU_IS_SCALAR_TYPE(T) (scalar_type::cFlag) + +#if !defined(BASISU_HAVE_STD_TRIVIALLY_COPYABLE) && defined(__GNUC__) && (__GNUC__ < 5) +#define BASISU_IS_TRIVIALLY_COPYABLE(...) __is_trivially_copyable(__VA_ARGS__) +#else +#define BASISU_IS_TRIVIALLY_COPYABLE(...) std::is_trivially_copyable<__VA_ARGS__>::value +#endif + + // TODO: clean this up, it's still confusing (copying vs. movable). +#define BASISU_IS_BITWISE_COPYABLE(T) (BASISU_IS_SCALAR_TYPE(T) || BASISU_IS_POD(T) || BASISU_IS_TRIVIALLY_COPYABLE(T) || std::is_trivial::value || (bitwise_copyable::cFlag)) + +#define BASISU_IS_BITWISE_COPYABLE_OR_MOVABLE(T) (BASISU_IS_BITWISE_COPYABLE(T) || (bitwise_movable::cFlag)) + +#define BASISU_HAS_DESTRUCTOR(T) ((!scalar_type::cFlag) && (!__is_pod(T)) && (!std::is_trivially_destructible::value)) + + typedef char(&yes_t)[1]; + typedef char(&no_t)[2]; + + template yes_t class_test(int U::*); + template no_t class_test(...); + + template struct is_class + { + enum { value = (sizeof(class_test(0)) == sizeof(yes_t)) }; + }; + + template struct is_pointer + { + enum { value = false }; + }; + + template struct is_pointer + { + enum { value = true }; + }; + + struct empty_type { }; + + BASISU_DEFINE_BITWISE_COPYABLE(empty_type); + BASISU_DEFINE_BITWISE_MOVABLE(empty_type); + + template struct rel_ops + { + friend bool operator!=(const T& x, const T& y) { return (!(x == y)); } + friend bool operator> (const T& x, const T& y) { return (y < x); } + friend bool operator<=(const T& x, const T& y) { return (!(y < x)); } + friend bool operator>=(const T& x, const T& y) { return (!(x < y)); } + }; + + struct elemental_vector + { + void* m_p; + size_t m_size; + size_t m_capacity; + + typedef void (*object_mover)(void* pDst, void* pSrc, size_t num); + + bool increase_capacity(size_t min_new_capacity, bool grow_hint, size_t element_size, object_mover pRelocate, bool nofail); + }; + + // Returns true if a+b would overflow a size_t. + inline bool add_overflow_check(size_t a, size_t b) + { + size_t c = a + b; + return c < a; + } + + // Returns false on overflow, true if OK. + template + inline bool can_fit_into_size_t(T val) + { + static_assert(std::is_integral::value, "T must be an integral type"); + + return (val >= 0) && (static_cast(val) == val); + } + + // Returns true if a*b would overflow a size_t. + inline bool mul_overflow_check(size_t a, size_t b) + { + // Avoid the division on 32-bit platforms + if (sizeof(size_t) == sizeof(uint32_t)) + return !can_fit_into_size_t(static_cast(a) * b); + else + return b && (a > (SIZE_MAX / b)); + } + + template + class writable_span; + + template + class readable_span + { + public: + using value_type = T; + using size_type = size_t; + using const_pointer = const T*; + using const_reference = const T&; + using const_iterator = const T*; + + inline readable_span() : + m_p(nullptr), + m_size(0) + { + } + + inline readable_span(const writable_span& other); + inline readable_span& operator= (const writable_span& rhs); + + inline readable_span(const_pointer p, size_t n) + { + set(p, n); + } + + inline readable_span(const_pointer s, const_pointer e) + { + set(s, e); + } + + inline readable_span(const readable_span& other) : + m_p(other.m_p), + m_size(other.m_size) + { + assert(!m_size || m_p); + } + + inline readable_span(readable_span&& other) : + m_p(other.m_p), + m_size(other.m_size) + { + assert(!m_size || m_p); + + other.m_p = nullptr; + other.m_size = 0; + } + + template + inline readable_span(const T(&arr)[N]) : + m_p(arr), + m_size(N) + { + } + + template + inline readable_span& set(const T(&arr)[N]) + { + m_p = arr; + m_size = N; + return *this; + } + + inline readable_span& set(const_pointer p, size_t n) + { + if (!p && n) + { + assert(0); + m_p = nullptr; + m_size = 0; + } + else + { + m_p = p; + m_size = n; + } + + return *this; + } + + inline readable_span& set(const_pointer s, const_pointer e) + { + if ((e < s) || (!s && e)) + { + assert(0); + m_p = nullptr; + m_size = 0; + } + else + { + m_p = s; + m_size = e - s; + } + + return *this; + } + + inline bool operator== (const readable_span& rhs) const + { + return (m_p == rhs.m_p) && (m_size == rhs.m_size); + } + + inline bool operator!= (const readable_span& rhs) const + { + return (m_p != rhs.m_p) || (m_size != rhs.m_size); + } + + // only true if the region is totally inside the span + inline bool is_inside_ptr(const_pointer p, size_t n) const + { + if (!is_valid()) + { + assert(0); + return false; + } + + if (!p) + { + assert(!n); + return false; + } + + return (p >= m_p) && ((p + n) <= end()); + } + + inline bool is_inside(size_t ofs, size_t size) const + { + if (add_overflow_check(ofs, size)) + { + assert(0); + return false; + } + + if (!is_valid()) + { + assert(0); + return false; + } + + if ((ofs + size) > m_size) + return false; + + return true; + } + + inline readable_span subspan(size_t ofs, size_t n) const + { + if (!is_valid()) + { + assert(0); + return readable_span((const_pointer)nullptr, (size_t)0); + } + + if (add_overflow_check(ofs, n)) + { + assert(0); + return readable_span((const_pointer)nullptr, (size_t)0); + } + + if ((ofs + n) > m_size) + { + assert(0); + return readable_span((const_pointer)nullptr, (size_t)0); + } + + return readable_span(m_p + ofs, n); + } + + void clear() + { + m_p = nullptr; + m_size = 0; + } + + inline bool empty() const { return !m_size; } + + // true if the span is non-nullptr and is not empty + inline bool is_valid() const { return m_p && m_size; } + + inline bool is_nullptr() const { return m_p == nullptr; } + + inline size_t size() const { return m_size; } + inline size_t size_in_bytes() const { assert(can_fit_into_size_t((uint64_t)m_size * sizeof(T))); return m_size * sizeof(T); } + + inline const_pointer get_ptr() const { return m_p; } + + inline const_iterator begin() const { return m_p; } + inline const_iterator end() const { assert(m_p || !m_size); return m_p + m_size; } + + inline const_iterator cbegin() const { return m_p; } + inline const_iterator cend() const { assert(m_p || !m_size); return m_p + m_size; } + + inline const_reference front() const + { + if (!(m_p && m_size)) + container_abort("readable_span invalid\n"); + + return m_p[0]; + } + + inline const_reference back() const + { + if (!(m_p && m_size)) + container_abort("readable_span invalid\n"); + + return m_p[m_size - 1]; + } + + inline readable_span& operator= (const readable_span& rhs) + { + m_p = rhs.m_p; + m_size = rhs.m_size; + return *this; + } + + inline readable_span& operator= (readable_span&& rhs) + { + if (this != &rhs) + { + m_p = rhs.m_p; + m_size = rhs.m_size; + rhs.m_p = nullptr; + rhs.m_size = 0; + } + + return *this; + } + + inline const_reference operator* () const + { + if (!(m_p && m_size)) + container_abort("readable_span invalid\n"); + + return *m_p; + } + + inline const_pointer operator-> () const + { + if (!(m_p && m_size)) + container_abort("readable_span invalid\n"); + + return m_p; + } + + inline readable_span& remove_prefix(size_t n) + { + if ((!m_p) || (n > m_size)) + { + assert(0); + return *this; + } + + m_p += n; + m_size -= n; + return *this; + } + + inline readable_span& remove_suffix(size_t n) + { + if ((!m_p) || (n > m_size)) + { + assert(0); + return *this; + } + + m_size -= n; + return *this; + } + + inline readable_span& enlarge(size_t n) + { + if (!m_p) + { + assert(0); + return *this; + } + + if (add_overflow_check(m_size, n)) + { + assert(0); + return *this; + } + + m_size += n; + return *this; + } + + bool copy_from(size_t src_ofs, size_t src_size, T* pDst, size_t dst_ofs) const + { + if (!src_size) + return true; + + if (!pDst) + { + assert(0); + return false; + } + + if (!is_inside(src_ofs, src_size)) + { + assert(0); + return false; + } + + const_pointer pS = m_p + src_ofs; + + if (BASISU_IS_BITWISE_COPYABLE(T)) + { + const uint64_t num_bytes = (uint64_t)src_size * sizeof(T); + + if (!can_fit_into_size_t(num_bytes)) + { + assert(0); + return false; + } + + memcpy(pDst, pS, (size_t)num_bytes); + } + else + { + T* pD = pDst + dst_ofs; + T* pDst_end = pD + src_size; + + while (pD != pDst_end) + *pD++ = *pS++; + } + + return true; + } + + inline const_reference operator[] (size_t idx) const + { + if ((!is_valid()) || (idx >= m_size)) + container_abort("readable_span: invalid span or index\n"); + + return m_p[idx]; + } + + inline uint16_t read_le16(size_t ofs) const + { + static_assert(sizeof(T) == 1, "T must be byte size"); + + if (!is_inside(ofs, sizeof(uint16_t))) + { + assert(0); + return false; + } + + const uint8_t a = (uint8_t)m_p[ofs]; + const uint8_t b = (uint8_t)m_p[ofs + 1]; + return a | (b << 8u); + } + + template + inline R read_val(size_t ofs) const + { + static_assert(sizeof(T) == 1, "T must be byte size"); + + if (!is_inside(ofs, sizeof(R))) + { + assert(0); + return (R)0; + } + + return *reinterpret_cast(&m_p[ofs]); + } + + inline uint16_t read_be16(size_t ofs) const + { + static_assert(sizeof(T) == 1, "T must be byte size"); + + if (!is_inside(ofs, sizeof(uint16_t))) + { + assert(0); + return 0; + } + + const uint8_t b = (uint8_t)m_p[ofs]; + const uint8_t a = (uint8_t)m_p[ofs + 1]; + return a | (b << 8u); + } + + inline uint32_t read_le32(size_t ofs) const + { + static_assert(sizeof(T) == 1, "T must be byte size"); + + if (!is_inside(ofs, sizeof(uint32_t))) + { + assert(0); + return 0; + } + + const uint8_t a = (uint8_t)m_p[ofs]; + const uint8_t b = (uint8_t)m_p[ofs + 1]; + const uint8_t c = (uint8_t)m_p[ofs + 2]; + const uint8_t d = (uint8_t)m_p[ofs + 3]; + return a | (b << 8u) | (c << 16u) | (d << 24u); + } + + inline uint32_t read_be32(size_t ofs) const + { + static_assert(sizeof(T) == 1, "T must be byte size"); + + if (!is_inside(ofs, sizeof(uint32_t))) + { + assert(0); + return 0; + } + + const uint8_t d = (uint8_t)m_p[ofs]; + const uint8_t c = (uint8_t)m_p[ofs + 1]; + const uint8_t b = (uint8_t)m_p[ofs + 2]; + const uint8_t a = (uint8_t)m_p[ofs + 3]; + return a | (b << 8u) | (c << 16u) | (d << 24u); + } + + inline uint64_t read_le64(size_t ofs) const + { + if (!add_overflow_check(ofs, sizeof(uint64_t))) + { + assert(0); + return 0; + } + const uint64_t l = read_le32(ofs); + const uint64_t h = read_le32(ofs + sizeof(uint32_t)); + return l | (h << 32u); + } + + inline uint64_t read_be64(size_t ofs) const + { + if (!add_overflow_check(ofs, sizeof(uint64_t))) + { + assert(0); + return 0; + } + const uint64_t h = read_be32(ofs); + const uint64_t l = read_be32(ofs + sizeof(uint32_t)); + return l | (h << 32u); + } + + private: + const_pointer m_p; + size_t m_size; + }; + + template + class writable_span + { + friend readable_span; + + public: + using value_type = T; + using size_type = size_t; + using const_pointer = const T*; + using const_reference = const T&; + using const_iterator = const T*; + using pointer = T*; + using reference = T&; + using iterator = T*; + + inline writable_span() : + m_p(nullptr), + m_size(0) + { + } + + inline writable_span(T* p, size_t n) + { + set(p, n); + } + + inline writable_span(T* s, T* e) + { + set(s, e); + } + + inline writable_span(const writable_span& other) : + m_p(other.m_p), + m_size(other.m_size) + { + assert(!m_size || m_p); + } + + inline writable_span(writable_span&& other) : + m_p(other.m_p), + m_size(other.m_size) + { + assert(!m_size || m_p); + + other.m_p = nullptr; + other.m_size = 0; + } + + template + inline writable_span(T(&arr)[N]) : + m_p(arr), + m_size(N) + { + } + + readable_span get_readable_span() const + { + return readable_span(m_p, m_size); + } + + template + inline writable_span& set(T(&arr)[N]) + { + m_p = arr; + m_size = N; + return *this; + } + + inline writable_span& set(T* p, size_t n) + { + if (!p && n) + { + assert(0); + m_p = nullptr; + m_size = 0; + } + else + { + m_p = p; + m_size = n; + } + + return *this; + } + + inline writable_span& set(T* s, T* e) + { + if ((e < s) || (!s && e)) + { + assert(0); + m_p = nullptr; + m_size = 0; + } + else + { + m_p = s; + m_size = e - s; + } + + return *this; + } + + inline bool operator== (const writable_span& rhs) const + { + return (m_p == rhs.m_p) && (m_size == rhs.m_size); + } + + inline bool operator== (const readable_span& rhs) const + { + return (m_p == rhs.m_p) && (m_size == rhs.m_size); + } + + inline bool operator!= (const writable_span& rhs) const + { + return (m_p != rhs.m_p) || (m_size != rhs.m_size); + } + + inline bool operator!= (const readable_span& rhs) const + { + return (m_p != rhs.m_p) || (m_size != rhs.m_size); + } + + // only true if the region is totally inside the span + inline bool is_inside_ptr(const_pointer p, size_t n) const + { + if (!is_valid()) + { + assert(0); + return false; + } + + if (!p) + { + assert(!n); + return false; + } + + return (p >= m_p) && ((p + n) <= end()); + } + + inline bool is_inside(size_t ofs, size_t size) const + { + if (add_overflow_check(ofs, size)) + { + assert(0); + return false; + } + + if (!is_valid()) + { + assert(0); + return false; + } + + if ((ofs + size) > m_size) + return false; + + return true; + } + + inline writable_span subspan(size_t ofs, size_t n) const + { + if (!is_valid()) + { + assert(0); + return writable_span((T*)nullptr, (size_t)0); + } + + if (add_overflow_check(ofs, n)) + { + assert(0); + return writable_span((T*)nullptr, (size_t)0); + } + + if ((ofs + n) > m_size) + { + assert(0); + return writable_span((T*)nullptr, (size_t)0); + } + + return writable_span(m_p + ofs, n); + } + + void clear() + { + m_p = nullptr; + m_size = 0; + } + + inline bool empty() const { return !m_size; } + + // true if the span is non-nullptr and is not empty + inline bool is_valid() const { return m_p && m_size; } + + inline bool is_nullptr() const { return m_p == nullptr; } + + inline size_t size() const { return m_size; } + inline size_t size_in_bytes() const { assert(can_fit_into_size_t((uint64_t)m_size * sizeof(T))); return m_size * sizeof(T); } + + inline T* get_ptr() const { return m_p; } + + inline iterator begin() const { return m_p; } + inline iterator end() const { assert(m_p || !m_size); return m_p + m_size; } + + inline const_iterator cbegin() const { return m_p; } + inline const_iterator cend() const { assert(m_p || !m_size); return m_p + m_size; } + + inline T& front() const + { + if (!(m_p && m_size)) + container_abort("writable_span invalid\n"); + + return m_p[0]; + } + + inline T& back() const + { + if (!(m_p && m_size)) + container_abort("writable_span invalid\n"); + + return m_p[m_size - 1]; + } + + inline writable_span& operator= (const writable_span& rhs) + { + m_p = rhs.m_p; + m_size = rhs.m_size; + return *this; + } + + inline writable_span& operator= (writable_span&& rhs) + { + if (this != &rhs) + { + m_p = rhs.m_p; + m_size = rhs.m_size; + rhs.m_p = nullptr; + rhs.m_size = 0; + } + + return *this; + } + + inline T& operator* () const + { + if (!(m_p && m_size)) + container_abort("writable_span invalid\n"); + + return *m_p; + } + + inline T* operator-> () const + { + if (!(m_p && m_size)) + container_abort("writable_span invalid\n"); + + return m_p; + } + + inline bool set_all(size_t ofs, size_t size, const_reference val) + { + if (!size) + return true; + + if (!is_inside(ofs, size)) + { + assert(0); + return false; + } + + T* pDst = m_p + ofs; + + if ((sizeof(T) == sizeof(uint8_t)) && (BASISU_IS_BITWISE_COPYABLE(T))) + { + memset(pDst, (int)((uint8_t)val), size); + } + else + { + + T* pDst_end = pDst + size; + + while (pDst != pDst_end) + *pDst++ = val; + } + + return true; + } + + inline bool set_all(const_reference val) + { + return set_all(0, m_size, val); + } + + inline writable_span& remove_prefix(size_t n) + { + if ((!m_p) || (n > m_size)) + { + assert(0); + return *this; + } + + m_p += n; + m_size -= n; + return *this; + } + + inline writable_span& remove_suffix(size_t n) + { + if ((!m_p) || (n > m_size)) + { + assert(0); + return *this; + } + + m_size -= n; + return *this; + } + + inline writable_span& enlarge(size_t n) + { + if (!m_p) + { + assert(0); + return *this; + } + + if (add_overflow_check(m_size, n)) + { + assert(0); + return *this; + } + + m_size += n; + return *this; + } + + // copy from this span to the destination ptr + bool copy_from(size_t src_ofs, size_t src_size, T* pDst, size_t dst_ofs) const + { + if (!src_size) + return true; + + if (!pDst) + { + assert(0); + return false; + } + + if (!is_inside(src_ofs, src_size)) + { + assert(0); + return false; + } + + const_pointer pS = m_p + src_ofs; + + if (BASISU_IS_BITWISE_COPYABLE(T)) + { + const uint64_t num_bytes = (uint64_t)src_size * sizeof(T); + + if (!can_fit_into_size_t(num_bytes)) + { + assert(0); + return false; + } + + memcpy(pDst, pS, (size_t)num_bytes); + } + else + { + T* pD = pDst + dst_ofs; + T* pDst_end = pD + src_size; + + while (pD != pDst_end) + *pD++ = *pS++; + } + + return true; + } + + // copy from the source ptr into this span + bool copy_into(const_pointer pSrc, size_t src_ofs, size_t src_size, size_t dst_ofs) const + { + if (!src_size) + return true; + + if (!pSrc) + { + assert(0); + return false; + } + + if (add_overflow_check(src_ofs, src_size) || add_overflow_check(dst_ofs, src_size)) + { + assert(0); + return false; + } + + if (!is_valid()) + { + assert(0); + return false; + } + + if (!is_inside(dst_ofs, src_size)) + { + assert(0); + return false; + } + + const_pointer pS = pSrc + src_ofs; + T* pD = m_p + dst_ofs; + + if (BASISU_IS_BITWISE_COPYABLE(T)) + { + const uint64_t num_bytes = (uint64_t)src_size * sizeof(T); + + if (!can_fit_into_size_t(num_bytes)) + { + assert(0); + return false; + } + + memcpy(pD, pS, (size_t)num_bytes); + } + else + { + T* pDst_end = pD + src_size; + + while (pD != pDst_end) + *pD++ = *pS++; + } + + return true; + } + + // copy from a source span into this span + bool copy_into(const readable_span& src, size_t src_ofs, size_t src_size, size_t dst_ofs) const + { + if (!src.is_inside(src_ofs, src_size)) + { + assert(0); + return false; + } + + return copy_into(src.get_ptr(), src_ofs, src_size, dst_ofs); + } + + // copy from a source span into this span + bool copy_into(const writable_span& src, size_t src_ofs, size_t src_size, size_t dst_ofs) const + { + if (!src.is_inside(src_ofs, src_size)) + { + assert(0); + return false; + } + + return copy_into(src.get_ptr(), src_ofs, src_size, dst_ofs); + } + + inline T& operator[] (size_t idx) const + { + if ((!is_valid()) || (idx >= m_size)) + container_abort("writable_span: invalid span or index\n"); + + return m_p[idx]; + } + + template + inline R read_val(size_t ofs) const + { + static_assert(sizeof(T) == 1, "T must be byte size"); + + if (!is_inside(ofs, sizeof(R))) + { + assert(0); + return (R)0; + } + + return *reinterpret_cast(&m_p[ofs]); + } + + template + inline bool write_val(size_t ofs, R val) const + { + static_assert(sizeof(T) == 1, "T must be byte size"); + + if (!is_inside(ofs, sizeof(R))) + { + assert(0); + return false; + } + + *reinterpret_cast(&m_p[ofs]) = val; + return true; + } + + inline bool write_le16(size_t ofs, uint16_t val) const + { + static_assert(sizeof(T) == 1, "T must be byte size"); + + if (!is_inside(ofs, sizeof(uint16_t))) + { + assert(0); + return false; + } + + m_p[ofs] = (uint8_t)val; + m_p[ofs + 1] = (uint8_t)(val >> 8u); + return true; + } + + inline bool write_be16(size_t ofs, uint16_t val) const + { + static_assert(sizeof(T) == 1, "T must be byte size"); + + if (!is_inside(ofs, sizeof(uint16_t))) + { + assert(0); + return false; + } + + m_p[ofs + 1] = (uint8_t)val; + m_p[ofs] = (uint8_t)(val >> 8u); + return true; + } + + inline bool write_le32(size_t ofs, uint32_t val) const + { + static_assert(sizeof(T) == 1, "T must be byte size"); + + if (!is_inside(ofs, sizeof(uint32_t))) + { + assert(0); + return false; + } + + m_p[ofs] = (uint8_t)val; + m_p[ofs + 1] = (uint8_t)(val >> 8u); + m_p[ofs + 2] = (uint8_t)(val >> 16u); + m_p[ofs + 3] = (uint8_t)(val >> 24u); + return true; + } + + inline bool write_be32(size_t ofs, uint32_t val) const + { + static_assert(sizeof(T) == 1, "T must be byte size"); + + if (!is_inside(ofs, sizeof(uint32_t))) + { + assert(0); + return false; + } + + m_p[ofs + 3] = (uint8_t)val; + m_p[ofs + 2] = (uint8_t)(val >> 8u); + m_p[ofs + 1] = (uint8_t)(val >> 16u); + m_p[ofs] = (uint8_t)(val >> 24u); + return true; + } + + inline bool write_le64(size_t ofs, uint64_t val) const + { + if (!add_overflow_check(ofs, sizeof(uint64_t))) + { + assert(0); + return false; + } + + return write_le32(ofs, (uint32_t)val) && write_le32(ofs + sizeof(uint32_t), (uint32_t)(val >> 32u)); + } + + inline bool write_be64(size_t ofs, uint64_t val) const + { + if (!add_overflow_check(ofs, sizeof(uint64_t))) + { + assert(0); + return false; + } + + return write_be32(ofs + sizeof(uint32_t), (uint32_t)val) && write_be32(ofs, (uint32_t)(val >> 32u)); + } + + inline uint16_t read_le16(size_t ofs) const + { + static_assert(sizeof(T) == 1, "T must be byte size"); + + if (!is_inside(ofs, sizeof(uint16_t))) + { + assert(0); + return 0; + } + + const uint8_t a = (uint8_t)m_p[ofs]; + const uint8_t b = (uint8_t)m_p[ofs + 1]; + return a | (b << 8u); + } + + inline uint16_t read_be16(size_t ofs) const + { + static_assert(sizeof(T) == 1, "T must be byte size"); + + if (!is_inside(ofs, sizeof(uint16_t))) + { + assert(0); + return 0; + } + + const uint8_t b = (uint8_t)m_p[ofs]; + const uint8_t a = (uint8_t)m_p[ofs + 1]; + return a | (b << 8u); + } + + inline uint32_t read_le32(size_t ofs) const + { + static_assert(sizeof(T) == 1, "T must be byte size"); + + if (!is_inside(ofs, sizeof(uint32_t))) + { + assert(0); + return 0; + } + + const uint8_t a = (uint8_t)m_p[ofs]; + const uint8_t b = (uint8_t)m_p[ofs + 1]; + const uint8_t c = (uint8_t)m_p[ofs + 2]; + const uint8_t d = (uint8_t)m_p[ofs + 3]; + return a | (b << 8u) | (c << 16u) | (d << 24u); + } + + inline uint32_t read_be32(size_t ofs) const + { + static_assert(sizeof(T) == 1, "T must be byte size"); + + if (!is_inside(ofs, sizeof(uint32_t))) + { + assert(0); + return 0; + } + + const uint8_t d = (uint8_t)m_p[ofs]; + const uint8_t c = (uint8_t)m_p[ofs + 1]; + const uint8_t b = (uint8_t)m_p[ofs + 2]; + const uint8_t a = (uint8_t)m_p[ofs + 3]; + return a | (b << 8u) | (c << 16u) | (d << 24u); + } + + inline uint64_t read_le64(size_t ofs) const + { + if (!add_overflow_check(ofs, sizeof(uint64_t))) + { + assert(0); + return 0; + } + const uint64_t l = read_le32(ofs); + const uint64_t h = read_le32(ofs + sizeof(uint32_t)); + return l | (h << 32u); + } + + inline uint64_t read_be64(size_t ofs) const + { + if (!add_overflow_check(ofs, sizeof(uint64_t))) + { + assert(0); + return 0; + } + const uint64_t h = read_be32(ofs); + const uint64_t l = read_be32(ofs + sizeof(uint32_t)); + return l | (h << 32u); + } + + private: + T* m_p; + size_t m_size; + }; + + template + inline readable_span::readable_span(const writable_span& other) : + m_p(other.m_p), + m_size(other.m_size) + { + } + + template + inline readable_span& readable_span::operator= (const writable_span& rhs) + { + m_p = rhs.m_p; + m_size = rhs.m_size; + return *this; + } + + template + inline bool span_copy(const writable_span& dst, const readable_span& src) + { + return dst.copy_into(src, 0, src.size(), 0); + } + + template + inline bool span_copy(const writable_span& dst, const writable_span& src) + { + return dst.copy_into(src, 0, src.size(), 0); + } + + template + inline bool span_copy(const writable_span& dst, size_t dst_ofs, const writable_span& src, size_t src_ofs, size_t len) + { + return dst.copy_into(src, src_ofs, len, dst_ofs); + } + + template + inline bool span_copy(const writable_span& dst, size_t dst_ofs, const readable_span& src, size_t src_ofs, size_t len) + { + return dst.copy_into(src, src_ofs, len, dst_ofs); + } + + template + class vector : public rel_ops< vector > + { + public: + typedef T* iterator; + typedef const T* const_iterator; + typedef T value_type; + typedef T& reference; + typedef const T& const_reference; + typedef T* pointer; + typedef const T* const_pointer; + + inline vector() : + m_p(nullptr), + m_size(0), + m_capacity(0) + { + } + + inline vector(size_t n, const T& init) : + m_p(nullptr), + m_size(0), + m_capacity(0) + { + increase_capacity(n, false); + construct_array(m_p, n, init); + m_size = n; + } + + inline vector(vector&& other) : + m_p(other.m_p), + m_size(other.m_size), + m_capacity(other.m_capacity) + { + other.m_p = nullptr; + other.m_size = 0; + other.m_capacity = 0; + } + + inline vector(const vector& other) : + m_p(nullptr), + m_size(0), + m_capacity(0) + { + increase_capacity(other.m_size, false); + + m_size = other.m_size; + + if (BASISU_IS_BITWISE_COPYABLE(T)) + { + +#ifndef __EMSCRIPTEN__ +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wclass-memaccess" +#endif +#endif + if ((m_p) && (other.m_p)) + { + memcpy(m_p, other.m_p, m_size * sizeof(T)); + } +#ifndef __EMSCRIPTEN__ +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif +#endif + } + else + { + T* pDst = m_p; + const T* pSrc = other.m_p; + for (size_t i = m_size; i > 0; i--) + construct(pDst++, *pSrc++); + } + } + + inline explicit vector(size_t size) : + m_p(nullptr), + m_size(0), + m_capacity(0) + { + resize(size); + } + + inline explicit vector(std::initializer_list init_list) : + m_p(nullptr), + m_size(0), + m_capacity(0) + { + resize(init_list.size()); + + size_t idx = 0; + for (const T& elem : init_list) + m_p[idx++] = elem; + + assert(idx == m_size); + } + + inline vector(const readable_span& rs) : + m_p(nullptr), + m_size(0), + m_capacity(0) + { + set(rs); + } + + inline vector(const writable_span& ws) : + m_p(nullptr), + m_size(0), + m_capacity(0) + { + set(ws); + } + + // Set contents of vector to contents of the readable span + bool set(const readable_span& rs) + { + if (!rs.is_valid()) + { + assert(0); + return false; + } + + const size_t new_size = rs.size(); + + // Could call resize(), but it'll redundantly construct trivial types. + if (m_size != new_size) + { + if (new_size < m_size) + { + if (BASISU_HAS_DESTRUCTOR(T)) + { + scalar_type::destruct_array(m_p + new_size, m_size - new_size); + } + } + else + { + if (new_size > m_capacity) + { + if (!increase_capacity(new_size, false, true)) + return false; + } + } + + // Don't bother constructing trivial types, because we're going to memcpy() over them anyway. + if (!BASISU_IS_BITWISE_COPYABLE(T)) + { + scalar_type::construct_array(m_p + m_size, new_size - m_size); + } + + m_size = new_size; + } + + if (!rs.copy_from(0, rs.size(), m_p, 0)) + { + assert(0); + return false; + } + + return true; + } + + // Set contents of vector to contents of the writable span + inline bool set(const writable_span& ws) + { + return set(ws.get_readable_span()); + } + + inline ~vector() + { + if (m_p) + { + if (BASISU_HAS_DESTRUCTOR(T)) + { + scalar_type::destruct_array(m_p, m_size); + } + + free(m_p); + } + } + + inline vector& operator= (const vector& other) + { + if (this == &other) + return *this; + + if (m_capacity >= other.m_size) + resize(0); + else + { + clear(); + increase_capacity(other.m_size, false); + } + + if (BASISU_IS_BITWISE_COPYABLE(T)) + { +#ifndef __EMSCRIPTEN__ +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wclass-memaccess" +#endif +#endif + if ((m_p) && (other.m_p)) + memcpy((void *)m_p, other.m_p, other.m_size * sizeof(T)); +#ifndef __EMSCRIPTEN__ +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif +#endif + } + else + { + T* pDst = m_p; + const T* pSrc = other.m_p; + for (size_t i = other.m_size; i > 0; i--) + construct(pDst++, *pSrc++); + } + + m_size = other.m_size; + + return *this; + } + + inline vector& operator= (vector&& rhs) + { + if (this != &rhs) + { + clear(); + + m_p = rhs.m_p; + m_size = rhs.m_size; + m_capacity = rhs.m_capacity; + + rhs.m_p = nullptr; + rhs.m_size = 0; + rhs.m_capacity = 0; + } + return *this; + } + + BASISU_FORCE_INLINE const T* begin() const { return m_p; } + BASISU_FORCE_INLINE T* begin() { return m_p; } + + BASISU_FORCE_INLINE const T* end() const { return m_p + m_size; } + BASISU_FORCE_INLINE T* end() { return m_p + m_size; } + + BASISU_FORCE_INLINE bool empty() const { return !m_size; } + + BASISU_FORCE_INLINE size_t size() const { return m_size; } + BASISU_FORCE_INLINE uint32_t size_u32() const { assert(m_size <= UINT32_MAX); return static_cast(m_size); } + + BASISU_FORCE_INLINE size_t size_in_bytes() const { return m_size * sizeof(T); } + BASISU_FORCE_INLINE uint32_t size_in_bytes_u32() const { assert((m_size * sizeof(T)) <= UINT32_MAX); return static_cast(m_size * sizeof(T)); } + + BASISU_FORCE_INLINE size_t capacity() const { return m_capacity; } + +#if !BASISU_VECTOR_FORCE_CHECKING + BASISU_FORCE_INLINE const T& operator[] (size_t i) const { assert(i < m_size); return m_p[i]; } + BASISU_FORCE_INLINE T& operator[] (size_t i) { assert(i < m_size); return m_p[i]; } +#else + BASISU_FORCE_INLINE const T& operator[] (size_t i) const + { + if (i >= m_size) + container_abort("vector::operator[] invalid index: %zu, max entries %u, type size %zu\n", i, m_size, sizeof(T)); + + return m_p[i]; + } + BASISU_FORCE_INLINE T& operator[] (size_t i) + { + if (i >= m_size) + container_abort("vector::operator[] invalid index: %zu, max entries %u, type size %zu\n", i, m_size, sizeof(T)); + + return m_p[i]; + } +#endif + + // at() always includes range checking, even in final builds, unlike operator []. + BASISU_FORCE_INLINE const T& at(size_t i) const + { + if (i >= m_size) + container_abort("vector::at() invalid index: %zu, max entries %u, type size %zu\n", i, m_size, sizeof(T)); + + return m_p[i]; + } + BASISU_FORCE_INLINE T& at(size_t i) + { + if (i >= m_size) + container_abort("vector::at() invalid index: %zu, max entries %u, type size %zu\n", i, m_size, sizeof(T)); + + return m_p[i]; + } + +#if !BASISU_VECTOR_FORCE_CHECKING + BASISU_FORCE_INLINE const T& front() const { assert(m_size); return m_p[0]; } + BASISU_FORCE_INLINE T& front() { assert(m_size); return m_p[0]; } + + BASISU_FORCE_INLINE const T& back() const { assert(m_size); return m_p[m_size - 1]; } + BASISU_FORCE_INLINE T& back() { assert(m_size); return m_p[m_size - 1]; } +#else + BASISU_FORCE_INLINE const T& front() const + { + if (!m_size) + container_abort("front: vector is empty, type size %zu\n", sizeof(T)); + + return m_p[0]; + } + BASISU_FORCE_INLINE T& front() + { + if (!m_size) + container_abort("front: vector is empty, type size %zu\n", sizeof(T)); + + return m_p[0]; + } + + BASISU_FORCE_INLINE const T& back() const + { + if (!m_size) + container_abort("back: vector is empty, type size %zu\n", sizeof(T)); + + return m_p[m_size - 1]; + } + BASISU_FORCE_INLINE T& back() + { + if (!m_size) + container_abort("back: vector is empty, type size %zu\n", sizeof(T)); + + return m_p[m_size - 1]; + } +#endif + + BASISU_FORCE_INLINE const T* get_ptr() const { return m_p; } + BASISU_FORCE_INLINE T* get_ptr() { return m_p; } + + BASISU_FORCE_INLINE const T* data() const { return m_p; } + BASISU_FORCE_INLINE T* data() { return m_p; } + + // clear() sets the container to empty, then frees the allocated block. + inline void clear() + { + if (m_p) + { + if (BASISU_HAS_DESTRUCTOR(T)) + { + scalar_type::destruct_array(m_p, m_size); + } + + free(m_p); + + m_p = nullptr; + m_size = 0; + m_capacity = 0; + } + } + + inline void clear_no_destruction() + { + if (m_p) + { + free(m_p); + m_p = nullptr; + m_size = 0; + m_capacity = 0; + } + } + + inline void reserve(size_t new_capacity) + { + if (!try_reserve(new_capacity)) + container_abort("vector:reserve: try_reserve failed!\n"); + } + + inline bool try_reserve(size_t new_capacity) + { + if (new_capacity > m_capacity) + { + if (!increase_capacity(new_capacity, false, true)) + return false; + } + else if (new_capacity < m_capacity) + { + // Must work around the lack of a "decrease_capacity()" method. + // This case is rare enough in practice that it's probably not worth implementing an optimized in-place resize. + vector tmp; + if (!tmp.increase_capacity(helpers::maximum(m_size, new_capacity), false, true)) + return false; + + tmp = *this; + swap(tmp); + } + + return true; + } + + // try_resize(0) sets the container to empty, but does not free the allocated block. + inline bool try_resize(size_t new_size, bool grow_hint = false) + { + if (m_size != new_size) + { + if (new_size < m_size) + { + if (BASISU_HAS_DESTRUCTOR(T)) + { + scalar_type::destruct_array(m_p + new_size, m_size - new_size); + } + } + else + { + if (new_size > m_capacity) + { + if (!increase_capacity(new_size, (new_size == (m_size + 1)) || grow_hint, true)) + return false; + } + + scalar_type::construct_array(m_p + m_size, new_size - m_size); + } + + m_size = new_size; + } + + return true; + } + + // resize(0) sets the container to empty, but does not free the allocated block. + inline void resize(size_t new_size, bool grow_hint = false) + { + if (!try_resize(new_size, grow_hint)) + container_abort("vector::resize failed, new size %zu\n", new_size); + } + + // If size >= capacity/2, reset() sets the container's size to 0 but doesn't free the allocated block (because the container may be similarly loaded in the future). + // Otherwise it blows away the allocated block. See http://www.codercorner.com/blog/?p=494 + inline void reset() + { + if (m_size >= (m_capacity >> 1)) + resize(0); + else + clear(); + } + + inline T* try_enlarge(size_t i) + { + size_t cur_size = m_size; + + if (add_overflow_check(cur_size, i)) + return nullptr; + + if (!try_resize(cur_size + i, true)) + return nullptr; + + return get_ptr() + cur_size; + } + + inline T* enlarge(size_t i) + { + T* p = try_enlarge(i); + if (!p) + container_abort("vector::enlarge failed, amount %zu!\n", i); + return p; + } + + BASISU_FORCE_INLINE void push_back(const T& obj) + { + assert(!m_p || (&obj < m_p) || (&obj >= (m_p + m_size))); + + if (m_size >= m_capacity) + { + if (add_overflow_check(m_size, 1)) + container_abort("vector::push_back: vector too large\n"); + + increase_capacity(m_size + 1, true); + } + + scalar_type::construct(m_p + m_size, obj); + m_size++; + } + + BASISU_FORCE_INLINE void push_back_value(T&& obj) + { + assert(!m_p || (&obj < m_p) || (&obj >= (m_p + m_size))); + + if (m_size >= m_capacity) + { + if (add_overflow_check(m_size, 1)) + container_abort("vector::push_back_value: vector too large\n"); + + increase_capacity(m_size + 1, true); + } + + new ((void*)(m_p + m_size)) T(std::move(obj)); + m_size++; + } + + inline bool try_push_back(const T& obj) + { + assert(!m_p || (&obj < m_p) || (&obj >= (m_p + m_size))); + + if (m_size >= m_capacity) + { + if (add_overflow_check(m_size, 1)) + return false; + + if (!increase_capacity(m_size + 1, true, true)) + return false; + } + + scalar_type::construct(m_p + m_size, obj); + m_size++; + + return true; + } + + inline bool try_push_back(T&& obj) + { + assert(!m_p || (&obj < m_p) || (&obj >= (m_p + m_size))); + + if (m_size >= m_capacity) + { + if (add_overflow_check(m_size, 1)) + return false; + + if (!increase_capacity(m_size + 1, true, true)) + return false; + } + + new ((void*)(m_p + m_size)) T(std::move(obj)); + m_size++; + + return true; + } + + // obj is explictly passed in by value, not ref + inline void push_back_value(T obj) + { + if (m_size >= m_capacity) + { + if (add_overflow_check(m_size, 1)) + container_abort("vector::push_back_value: vector too large\n"); + + increase_capacity(m_size + 1, true); + } + + scalar_type::construct(m_p + m_size, obj); + m_size++; + } + + // obj is explictly passed in by value, not ref + inline bool try_push_back_value(T obj) + { + if (m_size >= m_capacity) + { + if (add_overflow_check(m_size, 1)) + return false; + + if (!increase_capacity(m_size + 1, true, true)) + return false; + } + + scalar_type::construct(m_p + m_size, obj); + m_size++; + + return true; + } + + template + BASISU_FORCE_INLINE void emplace_back(Args&&... args) + { + if (m_size >= m_capacity) + { + if (add_overflow_check(m_size, 1)) + container_abort("vector::enlarge: vector too large\n"); + + increase_capacity(m_size + 1, true); + } + + new ((void*)(m_p + m_size)) T(std::forward(args)...); // perfect forwarding + m_size++; + } + + template + BASISU_FORCE_INLINE bool try_emplace_back(Args&&... args) + { + if (m_size >= m_capacity) + { + if (add_overflow_check(m_size, 1)) + return false; + + if (!increase_capacity(m_size + 1, true, true)) + return false; + } + + new ((void*)(m_p + m_size)) T(std::forward(args)...); // perfect forwarding + m_size++; + + return true; + } + + inline void pop_back() + { + assert(m_size); + + if (m_size) + { + m_size--; + scalar_type::destruct(&m_p[m_size]); + } + } + + inline bool try_insert(size_t index, const T* p, size_t n) + { + assert(index <= m_size); + + if (index > m_size) + return false; + + if (!n) + return true; + + const size_t orig_size = m_size; + + if (add_overflow_check(m_size, n)) + return false; + + if (!try_resize(m_size + n, true)) + return false; + + const size_t num_to_move = orig_size - index; + + if (BASISU_IS_BITWISE_COPYABLE(T)) + { + // This overwrites the destination object bits, but bitwise copyable means we don't need to worry about destruction. + memmove(m_p + index + n, m_p + index, sizeof(T) * num_to_move); + } + else + { + const T* pSrc = m_p + orig_size - 1; + T* pDst = const_cast(pSrc) + n; + + for (size_t i = 0; i < num_to_move; i++) + { + assert((uint64_t)(pDst - m_p) < (uint64_t)m_size); + + *pDst = std::move(*pSrc); + pDst--; + pSrc--; + } + } + + T* pDst = m_p + index; + + if (BASISU_IS_BITWISE_COPYABLE(T)) + { + // This copies in the new bits, overwriting the existing objects, which is OK for copyable types that don't need destruction. + memcpy(pDst, p, sizeof(T) * n); + } + else + { + for (size_t i = 0; i < n; i++) + { + assert((uint64_t)(pDst - m_p) < (uint64_t)m_size); + *pDst++ = *p++; + } + } + + return true; + } + + inline void insert(size_t index, const T* p, size_t n) + { + if (!try_insert(index, p, n)) + container_abort("vector::insert() failed!\n"); + } + + inline bool try_insert(T* p, const T& obj) + { + if (p < begin()) + { + assert(0); + return false; + } + + uint64_t ofs = p - begin(); + + if (ofs > m_size) + { + assert(0); + return false; + } + + if ((size_t)ofs != ofs) + { + assert(0); + return false; + } + + return try_insert((size_t)ofs, &obj, 1); + } + + inline void insert(T* p, const T& obj) + { + if (!try_insert(p, obj)) + container_abort("vector::insert() failed!\n"); + } + + // push_front() isn't going to be very fast - it's only here for usability. + inline void push_front(const T& obj) + { + insert(0, &obj, 1); + } + + inline bool try_push_front(const T& obj) + { + return try_insert(0, &obj, 1); + } + + vector& append(const vector& other) + { + if (other.m_size) + insert(m_size, &other[0], other.m_size); + return *this; + } + + bool try_append(const vector& other) + { + if (other.m_size) + return try_insert(m_size, &other[0], other.m_size); + + return true; + } + + vector& append(const T* p, size_t n) + { + if (n) + insert(m_size, p, n); + return *this; + } + + bool try_append(const T* p, size_t n) + { + if (n) + return try_insert(m_size, p, n); + + return true; + } + + inline bool erase(size_t start, size_t n) + { + if (add_overflow_check(start, n)) + { + assert(0); + return false; + } + + assert((start + n) <= m_size); + + if ((start + n) > m_size) + { + assert(0); + return false; + } + + if (!n) + return true; + + const size_t num_to_move = m_size - (start + n); + + T* pDst = m_p + start; + + const T* pSrc = m_p + start + n; + + if (BASISU_IS_BITWISE_COPYABLE_OR_MOVABLE(T)) + { + // This test is overly cautious. + if ((!BASISU_IS_BITWISE_COPYABLE(T)) || (BASISU_HAS_DESTRUCTOR(T))) + { + // Type has been marked explictly as bitwise movable, which means we can move them around but they may need to be destructed. + // First destroy the erased objects. + scalar_type::destruct_array(pDst, n); + } + + // Copy "down" the objects to preserve, filling in the empty slots. + +#ifndef __EMSCRIPTEN__ +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wclass-memaccess" +#endif +#endif + + memmove((void *)pDst, pSrc, num_to_move * sizeof(T)); + +#ifndef __EMSCRIPTEN__ +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif +#endif + } + else + { + // Type is not bitwise copyable or movable. + // Move them down one at a time by using the equals operator, and destroying anything that's left over at the end. + T* pDst_end = pDst + num_to_move; + + while (pDst != pDst_end) + { + *pDst = std::move(*pSrc); + + ++pDst; + ++pSrc; + } + + scalar_type::destruct_array(pDst_end, n); + } + + m_size -= n; + + return true; + } + + inline bool erase_index(size_t index) + { + return erase(index, 1); + } + + inline bool erase(T* p) + { + assert((p >= m_p) && (p < (m_p + m_size))); + + if (p < m_p) + return false; + + return erase_index(static_cast(p - m_p)); + } + + inline bool erase(T* pFirst, T* pEnd) + { + assert(pFirst <= pEnd); + assert(pFirst >= begin() && pFirst <= end()); + assert(pEnd >= begin() && pEnd <= end()); + + if ((pFirst < begin()) || (pEnd < pFirst)) + { + assert(0); + return false; + } + + uint64_t ofs = pFirst - begin(); + if ((size_t)ofs != ofs) + { + assert(0); + return false; + } + + uint64_t n = pEnd - pFirst; + if ((size_t)n != n) + { + assert(0); + return false; + } + + return erase((size_t)ofs, (size_t)n); + } + + bool erase_unordered(size_t index) + { + if (index >= m_size) + { + assert(0); + return false; + } + + if ((index + 1) < m_size) + { + (*this)[index] = std::move(back()); + } + + pop_back(); + return true; + } + + inline bool operator== (const vector& rhs) const + { + if (m_size != rhs.m_size) + return false; + else if (m_size) + { + if (scalar_type::cFlag) + return memcmp(m_p, rhs.m_p, sizeof(T) * m_size) == 0; + else + { + const T* pSrc = m_p; + const T* pDst = rhs.m_p; + for (size_t i = m_size; i; i--) + if (!(*pSrc++ == *pDst++)) + return false; + } + } + + return true; + } + + inline bool operator< (const vector& rhs) const + { + const size_t min_size = helpers::minimum(m_size, rhs.m_size); + + const T* pSrc = m_p; + const T* pSrc_end = m_p + min_size; + const T* pDst = rhs.m_p; + + while ((pSrc < pSrc_end) && (*pSrc == *pDst)) + { + pSrc++; + pDst++; + } + + if (pSrc < pSrc_end) + return *pSrc < *pDst; + + return m_size < rhs.m_size; + } + + inline void swap(vector& other) + { + std::swap(m_p, other.m_p); + std::swap(m_size, other.m_size); + std::swap(m_capacity, other.m_capacity); + } + + inline void sort() + { + std::sort(begin(), end()); + } + + inline void unique() + { + if (!empty()) + { + sort(); + + resize(std::unique(begin(), end()) - begin()); + } + } + + inline void reverse() + { + const size_t j = m_size >> 1; + + for (size_t i = 0; i < j; i++) + std::swap(m_p[i], m_p[m_size - 1 - i]); + } + + inline bool find(const T& key, size_t &idx) const + { + idx = 0; + + const T* p = m_p; + const T* p_end = m_p + m_size; + + size_t index = 0; + + while (p != p_end) + { + if (key == *p) + { + idx = index; + return true; + } + + p++; + index++; + } + + return false; + } + + inline bool find_sorted(const T& key, size_t& idx) const + { + idx = 0; + + if (!m_size) + return false; + + // Inclusive range + size_t low = 0, high = m_size - 1; + + while (low <= high) + { + size_t mid = (size_t)(((uint64_t)low + (uint64_t)high) >> 1); + + const T* pTrial_key = m_p + mid; + + // Sanity check comparison operator + assert(!((*pTrial_key < key) && (key < *pTrial_key))); + + if (*pTrial_key < key) + { + if (add_overflow_check(mid, 1)) + break; + + low = mid + 1; + } + else if (key < *pTrial_key) + { + if (!mid) + break; + + high = mid - 1; + } + else + { + idx = mid; + return true; + } + } + + return false; + } + + inline size_t count_occurences(const T& key) const + { + size_t c = 0; + + const T* p = m_p; + const T* p_end = m_p + m_size; + + while (p != p_end) + { + if (key == *p) + c++; + + p++; + } + + return c; + } + + inline void set_all(const T& o) + { + if ((sizeof(T) == 1) && (scalar_type::cFlag)) + { +#ifndef __EMSCRIPTEN__ +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wclass-memaccess" +#endif +#endif + memset(m_p, *reinterpret_cast(&o), m_size); + +#ifndef __EMSCRIPTEN__ +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif +#endif + } + else + { + T* pDst = m_p; + T* pDst_end = pDst + m_size; + while (pDst != pDst_end) + *pDst++ = o; + } + } + + // Caller assumes ownership of the heap block associated with the container. Container is cleared. + // Caller must use free() on the returned pointer. + inline void* assume_ownership() + { + T* p = m_p; + m_p = nullptr; + m_size = 0; + m_capacity = 0; + return p; + } + + // Caller is granting ownership of the indicated heap block. + // Block must have size constructed elements, and have enough room for capacity elements. + // The block must have been allocated using malloc(). + // Important: This method is used in Basis Universal. If you change how this container allocates memory, you'll need to change any users of this method. + inline bool grant_ownership(T* p, size_t size, size_t capacity) + { + // To prevent the caller from obviously shooting themselves in the foot. + if (((p + capacity) > m_p) && (p < (m_p + m_capacity))) + { + // Can grant ownership of a block inside the container itself! + assert(0); + return false; + } + + if (size > capacity) + { + assert(0); + return false; + } + + if (!p) + { + if (capacity) + { + assert(0); + return false; + } + } + else if (!capacity) + { + assert(0); + return false; + } + + clear(); + m_p = p; + m_size = size; + m_capacity = capacity; + return true; + } + + readable_span get_readable_span() const + { + return readable_span(m_p, m_size); + } + + writable_span get_writable_span() + { + return writable_span(m_p, m_size); + } + + private: + T* m_p; + size_t m_size; // the number of constructed objects + size_t m_capacity; // the size of the allocation + + template struct is_vector { enum { cFlag = false }; }; + template struct is_vector< vector > { enum { cFlag = true }; }; + + static void object_mover(void* pDst_void, void* pSrc_void, size_t num) + { + T* pSrc = static_cast(pSrc_void); + T* const pSrc_end = pSrc + num; + T* pDst = static_cast(pDst_void); + + while (pSrc != pSrc_end) + { + new ((void*)(pDst)) T(std::move(*pSrc)); + scalar_type::destruct(pSrc); + + ++pSrc; + ++pDst; + } + } + + inline bool increase_capacity(size_t min_new_capacity, bool grow_hint, bool nofail = false) + { + return reinterpret_cast(this)->increase_capacity( + min_new_capacity, grow_hint, sizeof(T), + (BASISU_IS_BITWISE_COPYABLE_OR_MOVABLE(T) || (is_vector::cFlag)) ? nullptr : object_mover, nofail); + } + }; + + template struct bitwise_movable< vector > { enum { cFlag = true }; }; + + // Hash map + // rg TODO 9/8/2024: I've upgraded this class to support 64-bit size_t, and it needs a lot more testing. + + const uint32_t SIZE_T_BITS = sizeof(size_t) * 8U; + + inline uint32_t safe_shift_left(uint32_t v, uint32_t l) + { + return (l < 32U) ? (v << l) : 0; + } + + inline uint64_t safe_shift_left(uint64_t v, uint32_t l) + { + return (l < 64U) ? (v << l) : 0; + } + + template + struct hasher + { + inline size_t operator() (const T& key) const { return static_cast(key); } + }; + + template + struct equal_to + { + inline bool operator()(const T& a, const T& b) const { return a == b; } + }; + + // Important: The Hasher and Equals objects must be bitwise movable! + template, typename Equals = equal_to > + class hash_map + { + public: + class iterator; + class const_iterator; + + private: + friend class iterator; + friend class const_iterator; + + enum state + { + cStateInvalid = 0, + cStateValid = 1 + }; + + enum + { + cMinHashSize = 4U + }; + + public: + typedef hash_map hash_map_type; + typedef std::pair value_type; + typedef Key key_type; + typedef Value referent_type; + typedef Hasher hasher_type; + typedef Equals equals_type; + + hash_map() : + m_num_valid(0), + m_grow_threshold(0), + m_hash_shift(SIZE_T_BITS) + { + static_assert((SIZE_T_BITS == 32) || (SIZE_T_BITS == 64), "SIZE_T_BITS must be 32 or 64"); + } + + hash_map(const hash_map& other) : + m_values(other.m_values), + m_num_valid(other.m_num_valid), + m_grow_threshold(other.m_grow_threshold), + m_hash_shift(other.m_hash_shift), + m_hasher(other.m_hasher), + m_equals(other.m_equals) + { + static_assert((SIZE_T_BITS == 32) || (SIZE_T_BITS == 64), "SIZE_T_BITS must be 32 or 64"); + } + + hash_map(hash_map&& other) : + m_values(std::move(other.m_values)), + m_num_valid(other.m_num_valid), + m_grow_threshold(other.m_grow_threshold), + m_hash_shift(other.m_hash_shift), + m_hasher(std::move(other.m_hasher)), + m_equals(std::move(other.m_equals)) + { + static_assert((SIZE_T_BITS == 32) || (SIZE_T_BITS == 64), "SIZE_T_BITS must be 32 or 64"); + + other.m_hash_shift = SIZE_T_BITS; + other.m_num_valid = 0; + other.m_grow_threshold = 0; + } + + hash_map& operator= (const hash_map& other) + { + if (this == &other) + return *this; + + clear(); + + m_values = other.m_values; + m_hash_shift = other.m_hash_shift; + m_num_valid = other.m_num_valid; + m_grow_threshold = other.m_grow_threshold; + m_hasher = other.m_hasher; + m_equals = other.m_equals; + + return *this; + } + + hash_map& operator= (hash_map&& other) + { + if (this == &other) + return *this; + + clear(); + + m_values = std::move(other.m_values); + m_hash_shift = other.m_hash_shift; + m_num_valid = other.m_num_valid; + m_grow_threshold = other.m_grow_threshold; + m_hasher = std::move(other.m_hasher); + m_equals = std::move(other.m_equals); + + other.m_hash_shift = SIZE_T_BITS; + other.m_num_valid = 0; + other.m_grow_threshold = 0; + + return *this; + } + + inline ~hash_map() + { + clear(); + } + + inline const Equals& get_equals() const { return m_equals; } + inline Equals& get_equals() { return m_equals; } + inline void set_equals(const Equals& equals) { m_equals = equals; } + + inline const Hasher& get_hasher() const { return m_hasher; } + inline Hasher& get_hasher() { return m_hasher; } + inline void set_hasher(const Hasher& hasher) { m_hasher = hasher; } + + inline void clear() + { + if (m_values.empty()) + return; + + if (BASISU_HAS_DESTRUCTOR(Key) || BASISU_HAS_DESTRUCTOR(Value)) + { + node* p = &get_node(0); + node* p_end = p + m_values.size(); + + size_t num_remaining = m_num_valid; + while (p != p_end) + { + if (p->state) + { + destruct_value_type(p); + num_remaining--; + if (!num_remaining) + break; + } + + p++; + } + } + + m_values.clear_no_destruction(); + + m_hash_shift = SIZE_T_BITS; + m_num_valid = 0; + m_grow_threshold = 0; + } + + inline void reset() + { + if (!m_num_valid) + return; + + if (BASISU_HAS_DESTRUCTOR(Key) || BASISU_HAS_DESTRUCTOR(Value)) + { + node* p = &get_node(0); + node* p_end = p + m_values.size(); + + size_t num_remaining = m_num_valid; + while (p != p_end) + { + if (p->state) + { + destruct_value_type(p); + p->state = cStateInvalid; + + num_remaining--; + if (!num_remaining) + break; + } + + p++; + } + } + else if (sizeof(node) <= 16) + { + memset(&m_values[0], 0, m_values.size_in_bytes()); + } + else + { + node* p = &get_node(0); + node* p_end = p + m_values.size(); + + size_t num_remaining = m_num_valid; + while (p != p_end) + { + if (p->state) + { + p->state = cStateInvalid; + + num_remaining--; + if (!num_remaining) + break; + } + + p++; + } + } + + m_num_valid = 0; + } + + inline size_t size() + { + return m_num_valid; + } + + inline size_t get_table_size() + { + return m_values.size(); + } + + inline bool empty() + { + return !m_num_valid; + } + + inline bool reserve(size_t new_capacity) + { + if (!new_capacity) + return true; + + uint64_t new_hash_size = new_capacity; + + new_hash_size = new_hash_size * 2ULL; + + if (!helpers::is_power_of_2(new_hash_size)) + new_hash_size = helpers::next_pow2(new_hash_size); + + new_hash_size = helpers::maximum(cMinHashSize, new_hash_size); + + if (!can_fit_into_size_t(new_hash_size)) + { + assert(0); + return false; + } + + assert(new_hash_size >= new_capacity); + + if (new_hash_size <= m_values.size()) + return true; + + return rehash((size_t)new_hash_size); + } + + class iterator + { + friend class hash_map; + friend class hash_map::const_iterator; + + public: + inline iterator() : m_pTable(nullptr), m_index(0) { } + inline iterator(hash_map_type& table, size_t index) : m_pTable(&table), m_index(index) { } + inline iterator(const iterator& other) : m_pTable(other.m_pTable), m_index(other.m_index) { } + + inline iterator& operator= (const iterator& other) + { + m_pTable = other.m_pTable; + m_index = other.m_index; + return *this; + } + + // post-increment + inline iterator operator++(int) + { + iterator result(*this); + ++*this; + return result; + } + + // pre-increment + inline iterator& operator++() + { + probe(); + return *this; + } + + inline value_type& operator*() const { return *get_cur(); } + inline value_type* operator->() const { return get_cur(); } + + inline bool operator == (const iterator& b) const { return (m_pTable == b.m_pTable) && (m_index == b.m_index); } + inline bool operator != (const iterator& b) const { return !(*this == b); } + inline bool operator == (const const_iterator& b) const { return (m_pTable == b.m_pTable) && (m_index == b.m_index); } + inline bool operator != (const const_iterator& b) const { return !(*this == b); } + + private: + hash_map_type* m_pTable; + size_t m_index; + + inline value_type* get_cur() const + { + assert(m_pTable && (m_index < m_pTable->m_values.size())); + assert(m_pTable->get_node_state(m_index) == cStateValid); + + return &m_pTable->get_node(m_index); + } + + inline void probe() + { + assert(m_pTable); + m_index = m_pTable->find_next(m_index); + } + }; + + class const_iterator + { + friend class hash_map; + friend class hash_map::iterator; + + public: + inline const_iterator() : m_pTable(nullptr), m_index(0) { } + inline const_iterator(const hash_map_type& table, size_t index) : m_pTable(&table), m_index(index) { } + inline const_iterator(const iterator& other) : m_pTable(other.m_pTable), m_index(other.m_index) { } + inline const_iterator(const const_iterator& other) : m_pTable(other.m_pTable), m_index(other.m_index) { } + + inline const_iterator& operator= (const const_iterator& other) + { + m_pTable = other.m_pTable; + m_index = other.m_index; + return *this; + } + + inline const_iterator& operator= (const iterator& other) + { + m_pTable = other.m_pTable; + m_index = other.m_index; + return *this; + } + + // post-increment + inline const_iterator operator++(int) + { + const_iterator result(*this); + ++*this; + return result; + } + + // pre-increment + inline const_iterator& operator++() + { + probe(); + return *this; + } + + inline const value_type& operator*() const { return *get_cur(); } + inline const value_type* operator->() const { return get_cur(); } + + inline bool operator == (const const_iterator& b) const { return (m_pTable == b.m_pTable) && (m_index == b.m_index); } + inline bool operator != (const const_iterator& b) const { return !(*this == b); } + inline bool operator == (const iterator& b) const { return (m_pTable == b.m_pTable) && (m_index == b.m_index); } + inline bool operator != (const iterator& b) const { return !(*this == b); } + + private: + const hash_map_type* m_pTable; + size_t m_index; + + inline const value_type* get_cur() const + { + assert(m_pTable && (m_index < m_pTable->m_values.size())); + assert(m_pTable->get_node_state(m_index) == cStateValid); + + return &m_pTable->get_node(m_index); + } + + inline void probe() + { + assert(m_pTable); + m_index = m_pTable->find_next(m_index); + } + }; + + inline const_iterator begin() const + { + if (!m_num_valid) + return end(); + + return const_iterator(*this, find_next(std::numeric_limits::max())); + } + + inline const_iterator end() const + { + return const_iterator(*this, m_values.size()); + } + + inline iterator begin() + { + if (!m_num_valid) + return end(); + + return iterator(*this, find_next(std::numeric_limits::max())); + } + + inline iterator end() + { + return iterator(*this, m_values.size()); + } + + // insert_result.first will always point to inserted key/value (or the already existing key/value). + // insert_result.second will be true if a new key/value was inserted, or false if the key already existed (in which case first will point to the already existing value). + typedef std::pair insert_result; + + inline insert_result insert(const Key& k, const Value& v = Value()) + { + insert_result result; + if (!insert_no_grow(result, k, v)) + { + if (!try_grow()) + container_abort("hash_map::try_grow() failed"); + + // This must succeed. + if (!insert_no_grow(result, k, v)) + container_abort("hash_map::insert() failed"); + } + + return result; + } + + inline bool try_insert(insert_result& result, const Key& k, const Value& v = Value()) + { + if (!insert_no_grow(result, k, v)) + { + if (!try_grow()) + return false; + + if (!insert_no_grow(result, k, v)) + return false; + } + + return true; + } + + inline insert_result insert(Key&& k, Value&& v = Value()) + { + insert_result result; + if (!insert_no_grow_move(result, std::move(k), std::move(v))) + { + if (!try_grow()) + container_abort("hash_map::try_grow() failed"); + + // This must succeed. + if (!insert_no_grow_move(result, std::move(k), std::move(v))) + container_abort("hash_map::insert() failed"); + } + + return result; + } + + inline bool try_insert(insert_result& result, Key&& k, Value&& v = Value()) + { + if (!insert_no_grow_move(result, std::move(k), std::move(v))) + { + if (!try_grow()) + return false; + + if (!insert_no_grow_move(result, std::move(k), std::move(v))) + return false; + } + + return true; + } + + inline insert_result insert(const value_type& v) + { + return insert(v.first, v.second); + } + + inline bool try_insert(insert_result& result, const value_type& v) + { + return try_insert(result, v.first, v.second); + } + + inline insert_result insert(value_type&& v) + { + return insert(std::move(v.first), std::move(v.second)); + } + + inline bool try_insert(insert_result& result, value_type&& v) + { + return try_insert(result, std::move(v.first), std::move(v.second)); + } + + inline const_iterator find(const Key& k) const + { + return const_iterator(*this, find_index(k)); + } + + inline iterator find(const Key& k) + { + return iterator(*this, find_index(k)); + } + + inline bool contains(const Key& k) const + { + const size_t idx = find_index(k); + return idx != m_values.size(); + } + + inline bool erase(const Key& k) + { + size_t i = find_index(k); + + if (i >= m_values.size()) + return false; + + node* pDst = &get_node(i); + destruct_value_type(pDst); + pDst->state = cStateInvalid; + + m_num_valid--; + + for (; ; ) + { + size_t r, j = i; + + node* pSrc = pDst; + + do + { + if (!i) + { + i = m_values.size() - 1; + pSrc = &get_node(i); + } + else + { + i--; + pSrc--; + } + + if (!pSrc->state) + return true; + + r = hash_key(pSrc->first); + + } while ((i <= r && r < j) || (r < j && j < i) || (j < i && i <= r)); + + move_node(pDst, pSrc); + + pDst = pSrc; + } + } + + inline void swap(hash_map_type& other) + { + m_values.swap(other.m_values); + std::swap(m_hash_shift, other.m_hash_shift); + std::swap(m_num_valid, other.m_num_valid); + std::swap(m_grow_threshold, other.m_grow_threshold); + std::swap(m_hasher, other.m_hasher); + std::swap(m_equals, other.m_equals); + } + + private: + struct node : public value_type + { + uint8_t state; + }; + + static inline void construct_value_type(value_type* pDst, const Key& k, const Value& v) + { + if (BASISU_IS_BITWISE_COPYABLE(Key)) + memcpy(&pDst->first, &k, sizeof(Key)); + else + scalar_type::construct(&pDst->first, k); + + if (BASISU_IS_BITWISE_COPYABLE(Value)) + memcpy(&pDst->second, &v, sizeof(Value)); + else + scalar_type::construct(&pDst->second, v); + } + + static inline void construct_value_type(value_type* pDst, const value_type* pSrc) + { + if ((BASISU_IS_BITWISE_COPYABLE(Key)) && (BASISU_IS_BITWISE_COPYABLE(Value))) + { + memcpy(pDst, pSrc, sizeof(value_type)); + } + else + { + if (BASISU_IS_BITWISE_COPYABLE(Key)) + memcpy(&pDst->first, &pSrc->first, sizeof(Key)); + else + scalar_type::construct(&pDst->first, pSrc->first); + + if (BASISU_IS_BITWISE_COPYABLE(Value)) + memcpy(&pDst->second, &pSrc->second, sizeof(Value)); + else + scalar_type::construct(&pDst->second, pSrc->second); + } + } + + static inline void destruct_value_type(value_type* p) + { + scalar_type::destruct(&p->first); + scalar_type::destruct(&p->second); + } + + // Moves nodes *pSrc to *pDst efficiently from one hashmap to another. + // pDst should NOT be constructed on entry. + static inline void move_node(node* pDst, node* pSrc, bool update_src_state = true) + { + assert(!pDst->state); + + if (BASISU_IS_BITWISE_COPYABLE_OR_MOVABLE(Key) && BASISU_IS_BITWISE_COPYABLE_OR_MOVABLE(Value)) + { + memcpy(pDst, pSrc, sizeof(node)); + + assert(pDst->state == cStateValid); + } + else + { + if (BASISU_IS_BITWISE_COPYABLE_OR_MOVABLE(Key)) + memcpy(&pDst->first, &pSrc->first, sizeof(Key)); + else + { + new ((void*)&pDst->first) Key(std::move(pSrc->first)); + scalar_type::destruct(&pSrc->first); + } + + if (BASISU_IS_BITWISE_COPYABLE_OR_MOVABLE(Value)) + memcpy(&pDst->second, &pSrc->second, sizeof(Value)); + else + { + new ((void*)&pDst->second) Value(std::move(pSrc->second)); + scalar_type::destruct(&pSrc->second); + } + + pDst->state = cStateValid; + } + + if (update_src_state) + pSrc->state = cStateInvalid; + } + + struct raw_node + { + inline raw_node() + { + node* p = reinterpret_cast(this); + p->state = cStateInvalid; + } + + // In practice, this should never be called (right?). We manage destruction ourselves. + inline ~raw_node() + { + node* p = reinterpret_cast(this); + if (p->state) + hash_map_type::destruct_value_type(p); + } + + inline raw_node(const raw_node& other) + { + node* pDst = reinterpret_cast(this); + const node* pSrc = reinterpret_cast(&other); + + if (pSrc->state) + { + hash_map_type::construct_value_type(pDst, pSrc); + pDst->state = cStateValid; + } + else + pDst->state = cStateInvalid; + } + + inline raw_node& operator= (const raw_node& rhs) + { + if (this == &rhs) + return *this; + + node* pDst = reinterpret_cast(this); + const node* pSrc = reinterpret_cast(&rhs); + + if (pSrc->state) + { + if (pDst->state) + { + pDst->first = pSrc->first; + pDst->second = pSrc->second; + } + else + { + hash_map_type::construct_value_type(pDst, pSrc); + pDst->state = cStateValid; + } + } + else if (pDst->state) + { + hash_map_type::destruct_value_type(pDst); + pDst->state = cStateInvalid; + } + + return *this; + } + + uint8_t m_bits[sizeof(node)]; + }; + + typedef basisu::vector node_vector; + + node_vector m_values; + + size_t m_num_valid; + size_t m_grow_threshold; + + uint32_t m_hash_shift; + + Hasher m_hasher; + Equals m_equals; + + inline size_t hash_key(const Key& k) const + { + assert((safe_shift_left(static_cast(1), (SIZE_T_BITS - m_hash_shift))) == m_values.size()); + + // Fibonacci hashing + if (SIZE_T_BITS == 32) + { + assert(m_hash_shift != 32); + + uint32_t hash = static_cast(m_hasher(k)); + hash = (2654435769U * hash) >> m_hash_shift; + + assert(hash < m_values.size()); + return (size_t)hash; + } + else + { + assert(m_hash_shift != 64); + + uint64_t hash = static_cast(m_hasher(k)); + hash = (0x9E3779B97F4A7C15ULL * hash) >> m_hash_shift; + + assert(hash < m_values.size()); + return (size_t)hash; + } + } + + inline const node& get_node(size_t index) const + { + return *reinterpret_cast(&m_values[index]); + } + + inline node& get_node(size_t index) + { + return *reinterpret_cast(&m_values[index]); + } + + inline state get_node_state(size_t index) const + { + return static_cast(get_node(index).state); + } + + inline void set_node_state(size_t index, bool valid) + { + get_node(index).state = valid; + } + + inline bool try_grow() + { + uint64_t n = m_values.size() * 2ULL; + + if (!helpers::is_power_of_2(n)) + n = helpers::next_pow2(n); + + if (!can_fit_into_size_t(n)) + { + assert(0); + return false; + } + + return rehash(helpers::maximum(cMinHashSize, (size_t)n)); + } + + // new_hash_size must be a power of 2. + inline bool rehash(size_t new_hash_size) + { + if (!helpers::is_power_of_2((uint64_t)new_hash_size)) + { + assert(0); + return false; + } + + if (new_hash_size < m_num_valid) + { + assert(0); + return false; + } + + if (new_hash_size == m_values.size()) + return true; + + hash_map new_map; + if (!new_map.m_values.try_resize(new_hash_size)) + return false; + + new_map.m_hash_shift = SIZE_T_BITS - helpers::floor_log2i((uint64_t)new_hash_size); + assert(new_hash_size == safe_shift_left(static_cast(1), SIZE_T_BITS - new_map.m_hash_shift)); + + new_map.m_grow_threshold = std::numeric_limits::max(); + + node* pNode = reinterpret_cast(m_values.begin()); + node* pNode_end = pNode + m_values.size(); + + while (pNode != pNode_end) + { + if (pNode->state) + { + new_map.move_into(pNode); + + if (new_map.m_num_valid == m_num_valid) + break; + } + + pNode++; + } + + new_map.m_grow_threshold = new_hash_size >> 1U; + if (new_hash_size & 1) + new_map.m_grow_threshold++; + + m_values.clear_no_destruction(); + m_hash_shift = SIZE_T_BITS; + + swap(new_map); + + return true; + } + + inline size_t find_next(size_t index) const + { + index++; + + if (index >= m_values.size()) + return index; + + const node* pNode = &get_node(index); + + for (; ; ) + { + if (pNode->state) + break; + + if (++index >= m_values.size()) + break; + + pNode++; + } + + return index; + } + + inline size_t find_index(const Key& k) const + { + if (m_num_valid) + { + size_t index = hash_key(k); + const node* pNode = &get_node(index); + + if (pNode->state) + { + if (m_equals(pNode->first, k)) + return index; + + const size_t orig_index = index; + + for (; ; ) + { + if (!index) + { + index = m_values.size() - 1; + pNode = &get_node(index); + } + else + { + index--; + pNode--; + } + + if (index == orig_index) + break; + + if (!pNode->state) + break; + + if (m_equals(pNode->first, k)) + return index; + } + } + } + + return m_values.size(); + } + + inline bool insert_no_grow(insert_result& result, const Key& k, const Value& v) + { + if (!m_values.size()) + return false; + + size_t index = hash_key(k); + node* pNode = &get_node(index); + + if (pNode->state) + { + if (m_equals(pNode->first, k)) + { + result.first = iterator(*this, index); + result.second = false; + return true; + } + + const size_t orig_index = index; + + for (; ; ) + { + if (!index) + { + index = m_values.size() - 1; + pNode = &get_node(index); + } + else + { + index--; + pNode--; + } + + if (orig_index == index) + return false; + + if (!pNode->state) + break; + + if (m_equals(pNode->first, k)) + { + result.first = iterator(*this, index); + result.second = false; + return true; + } + } + } + + if (m_num_valid >= m_grow_threshold) + return false; + + construct_value_type(pNode, k, v); + + pNode->state = cStateValid; + + m_num_valid++; + assert(m_num_valid <= m_values.size()); + + result.first = iterator(*this, index); + result.second = true; + + return true; + } + + // Move user supplied key/value into a node. + static inline void move_value_type(value_type* pDst, Key&& k, Value&& v) + { + // Not checking for is MOVABLE because the caller could later destruct k and/or v (what state do we set them to?) + if (BASISU_IS_BITWISE_COPYABLE(Key)) + { + memcpy(&pDst->first, &k, sizeof(Key)); + } + else + { + new ((void*)&pDst->first) Key(std::move(k)); + // No destruction - user will do that (we don't own k). + } + + if (BASISU_IS_BITWISE_COPYABLE(Value)) + { + memcpy(&pDst->second, &v, sizeof(Value)); + } + else + { + new ((void*)&pDst->second) Value(std::move(v)); + // No destruction - user will do that (we don't own v). + } + } + + // Insert user provided k/v, by moving, into the current hash table + inline bool insert_no_grow_move(insert_result& result, Key&& k, Value&& v) + { + if (!m_values.size()) + return false; + + size_t index = hash_key(k); + node* pNode = &get_node(index); + + if (pNode->state) + { + if (m_equals(pNode->first, k)) + { + result.first = iterator(*this, index); + result.second = false; + return true; + } + + const size_t orig_index = index; + + for (; ; ) + { + if (!index) + { + index = m_values.size() - 1; + pNode = &get_node(index); + } + else + { + index--; + pNode--; + } + + if (orig_index == index) + return false; + + if (!pNode->state) + break; + + if (m_equals(pNode->first, k)) + { + result.first = iterator(*this, index); + result.second = false; + return true; + } + } + } + + if (m_num_valid >= m_grow_threshold) + return false; + + move_value_type(pNode, std::move(k), std::move(v)); + + pNode->state = cStateValid; + + m_num_valid++; + assert(m_num_valid <= m_values.size()); + + result.first = iterator(*this, index); + result.second = true; + + return true; + } + + // Insert pNode by moving into the current hash table + inline void move_into(node* pNode) + { + size_t index = hash_key(pNode->first); + node* pDst_node = &get_node(index); + + if (pDst_node->state) + { + const size_t orig_index = index; + + for (; ; ) + { + if (!index) + { + index = m_values.size() - 1; + pDst_node = &get_node(index); + } + else + { + index--; + pDst_node--; + } + + if (index == orig_index) + { + assert(false); + return; + } + + if (!pDst_node->state) + break; + } + } + + // No need to update the source node's state (it's going away) + move_node(pDst_node, pNode, false); + + m_num_valid++; + } + }; + + template + struct bitwise_movable< hash_map > { enum { cFlag = true }; }; + +#if BASISU_HASHMAP_TEST + extern void hash_map_test(); +#endif + + // String formatting + inline std::string string_format(const char* pFmt, ...) + { + char buf[2048]; + + va_list args; + va_start(args, pFmt); +#ifdef _WIN32 + vsprintf_s(buf, sizeof(buf), pFmt, args); +#else + vsnprintf(buf, sizeof(buf), pFmt, args); +#endif + va_end(args); + + return std::string(buf); + } + + enum class variant_type + { + cInvalid, + cI32, cU32, + cI64, cU64, + cFlt, cDbl, cBool, + cStrPtr, cStdStr + }; + + struct fmt_variant + { + union + { + int32_t m_i32; + uint32_t m_u32; + int64_t m_i64; + uint64_t m_u64; + float m_flt; + double m_dbl; + bool m_bool; + const char* m_pStr; + }; + + std::string m_str; + + variant_type m_type; + + inline fmt_variant() : + m_u64(0), + m_type(variant_type::cInvalid) + { + } + + inline fmt_variant(const fmt_variant& other) : + m_u64(other.m_u64), + m_str(other.m_str), + m_type(other.m_type) + { + } + + inline fmt_variant(fmt_variant&& other) : + m_u64(other.m_u64), + m_str(std::move(other.m_str)), + m_type(other.m_type) + { + other.m_type = variant_type::cInvalid; + other.m_u64 = 0; + } + + inline fmt_variant& operator= (fmt_variant&& other) + { + if (this == &other) + return *this; + + m_type = other.m_type; + m_u64 = other.m_u64; + m_str = std::move(other.m_str); + + other.m_type = variant_type::cInvalid; + other.m_u64 = 0; + + return *this; + } + + inline fmt_variant& operator= (const fmt_variant& rhs) + { + if (this == &rhs) + return *this; + + m_u64 = rhs.m_u64; + m_type = rhs.m_type; + m_str = rhs.m_str; + + return *this; + } + + inline fmt_variant(int32_t v) : m_i32(v), m_type(variant_type::cI32) { } + inline fmt_variant(uint32_t v) : m_u32(v), m_type(variant_type::cU32) { } + inline fmt_variant(int64_t v) : m_i64(v), m_type(variant_type::cI64) { } + inline fmt_variant(uint64_t v) : m_u64(v), m_type(variant_type::cU64) { } +#ifdef _MSC_VER + inline fmt_variant(unsigned long v) : m_u64(v), m_type(variant_type::cU64) {} + inline fmt_variant(long v) : m_i64(v), m_type(variant_type::cI64) {} +#endif + inline fmt_variant(float v) : m_flt(v), m_type(variant_type::cFlt) { } + inline fmt_variant(double v) : m_dbl(v), m_type(variant_type::cDbl) { } + inline fmt_variant(const char* pStr) : m_pStr(pStr), m_type(variant_type::cStrPtr) { } + inline fmt_variant(const std::string& str) : m_u64(0), m_str(str), m_type(variant_type::cStdStr) { } + inline fmt_variant(bool val) : m_bool(val), m_type(variant_type::cBool) { } + + bool to_string(std::string& res, std::string& fmt) const; + }; + + typedef basisu::vector fmt_variant_vec; + + bool fmt_variants(std::string& res, const char* pFmt, const fmt_variant_vec& variants); + + template + inline bool fmt_string(std::string& res, const char* pFmt, Args&&... args) + { + return fmt_variants(res, pFmt, fmt_variant_vec{ fmt_variant(std::forward(args))... }); + } + + template + inline std::string fmt_string(const char* pFmt, Args&&... args) + { + std::string res; + fmt_variants(res, pFmt, fmt_variant_vec{ fmt_variant(std::forward(args))... }); + return res; + } + + template + inline int fmt_printf(const char* pFmt, Args&&... args) + { + std::string res; + if (!fmt_variants(res, pFmt, fmt_variant_vec{ fmt_variant(std::forward(args))... })) + return EOF; + + return fputs(res.c_str(), stdout); + } + + template + inline int fmt_fprintf(FILE* pFile, const char* pFmt, Args&&... args) + { + std::string res; + if (!fmt_variants(res, pFmt, fmt_variant_vec{ fmt_variant(std::forward(args))... })) + return EOF; + + return fputs(res.c_str(), pFile); + } + + // fixed_array - zero initialized by default, operator[] is always bounds checked. + template + class fixed_array + { + static_assert(N >= 1, "fixed_array size must be at least 1"); + + public: + using value_type = T; + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + using reference = T&; + using const_reference = const T&; + using pointer = T*; + using const_pointer = const T*; + using iterator = T*; + using const_iterator = const T*; + + T m_data[N]; + + BASISU_FORCE_INLINE fixed_array() + { + initialize_array(); + } + + BASISU_FORCE_INLINE fixed_array(std::initializer_list list) + { + assert(list.size() <= N); + + std::size_t copy_size = std::min(list.size(), N); + std::copy_n(list.begin(), copy_size, m_data); // Copy up to min(list.size(), N) + + if (list.size() < N) + { + // Initialize the rest of the array + std::fill(m_data + copy_size, m_data + N, T{}); + } + } + + BASISU_FORCE_INLINE T& operator[](std::size_t index) + { + if (index >= N) + container_abort("fixed_array: Index out of bounds."); + return m_data[index]; + } + + BASISU_FORCE_INLINE const T& operator[](std::size_t index) const + { + if (index >= N) + container_abort("fixed_array: Index out of bounds."); + return m_data[index]; + } + + BASISU_FORCE_INLINE T* begin() { return m_data; } + BASISU_FORCE_INLINE const T* begin() const { return m_data; } + + BASISU_FORCE_INLINE T* end() { return m_data + N; } + BASISU_FORCE_INLINE const T* end() const { return m_data + N; } + + BASISU_FORCE_INLINE const T* data() const { return m_data; } + BASISU_FORCE_INLINE T* data() { return m_data; } + + BASISU_FORCE_INLINE const T& front() const { return m_data[0]; } + BASISU_FORCE_INLINE T& front() { return m_data[0]; } + + BASISU_FORCE_INLINE const T& back() const { return m_data[N - 1]; } + BASISU_FORCE_INLINE T& back() { return m_data[N - 1]; } + + BASISU_FORCE_INLINE constexpr std::size_t size() const { return N; } + + BASISU_FORCE_INLINE void clear() + { + initialize_array(); // Reinitialize the array + } + + BASISU_FORCE_INLINE void set_all(const T& value) + { + std::fill(m_data, m_data + N, value); + } + + BASISU_FORCE_INLINE readable_span get_readable_span() const + { + return readable_span(m_data, N); + } + + BASISU_FORCE_INLINE writable_span get_writable_span() + { + return writable_span(m_data, N); + } + + private: + BASISU_FORCE_INLINE void initialize_array() + { + if constexpr (std::is_integral::value || std::is_floating_point::value) + memset(m_data, 0, sizeof(m_data)); + else + std::fill(m_data, m_data + N, T{}); + } + + BASISU_FORCE_INLINE T& access_element(std::size_t index) + { + if (index >= N) + container_abort("fixed_array: Index out of bounds."); + return m_data[index]; + } + + BASISU_FORCE_INLINE const T& access_element(std::size_t index) const + { + if (index >= N) + container_abort("fixed_array: Index out of bounds."); + return m_data[index]; + } + }; + + // 2D array + + template + class vector2D + { + typedef basisu::vector vec_type; + + uint32_t m_width, m_height; + vec_type m_values; + + public: + vector2D() : + m_width(0), + m_height(0) + { + } + + vector2D(uint32_t w, uint32_t h) : + m_width(0), + m_height(0) + { + resize(w, h); + } + + vector2D(const vector2D& other) + { + *this = other; + } + + vector2D(vector2D&& other) : + m_width(0), + m_height(0) + { + *this = std::move(other); + } + + vector2D& operator= (const vector2D& other) + { + if (this != &other) + { + m_width = other.m_width; + m_height = other.m_height; + m_values = other.m_values; + } + return *this; + } + + vector2D& operator= (vector2D&& other) + { + if (this != &other) + { + m_width = other.m_width; + m_height = other.m_height; + m_values = std::move(other.m_values); + + other.m_width = 0; + other.m_height = 0; + } + return *this; + } + + inline bool operator== (const vector2D& rhs) const + { + return (m_width == rhs.m_width) && (m_height == rhs.m_height) && (m_values == rhs.m_values); + } + + inline size_t size_in_bytes() const { return m_values.size_in_bytes(); } + + inline uint32_t get_width() const { return m_width; } + inline uint32_t get_height() const { return m_height; } + + inline const T& operator() (uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_values[x + y * m_width]; } + inline T& operator() (uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_values[x + y * m_width]; } + + inline size_t size() const { return m_values.size(); } + + inline const T& operator[] (uint32_t i) const { return m_values[i]; } + inline T& operator[] (uint32_t i) { return m_values[i]; } + + inline const T& at_clamped(int x, int y) const { return (*this)(clamp(x, 0, m_width - 1), clamp(y, 0, m_height - 1)); } + inline T& at_clamped(int x, int y) { return (*this)(clamp(x, 0, m_width - 1), clamp(y, 0, m_height - 1)); } + + void clear() + { + m_width = 0; + m_height = 0; + m_values.clear(); + } + + void set_all(const T& val) + { + vector_set_all(m_values, val); + } + + inline const T* get_ptr() const { return m_values.data(); } + inline T* get_ptr() { return m_values.data(); } + + vector2D& resize(uint32_t new_width, uint32_t new_height) + { + if ((m_width == new_width) && (m_height == new_height)) + return *this; + + const uint64_t total_vals = (uint64_t)new_width * new_height; + + if (!can_fit_into_size_t(total_vals)) + { + // What can we do? + assert(0); + return *this; + } + + vec_type oldVals((size_t)total_vals); + oldVals.swap(m_values); + + const uint32_t w = minimum(m_width, new_width); + const uint32_t h = minimum(m_height, new_height); + + if ((w) && (h)) + { + for (uint32_t y = 0; y < h; y++) + for (uint32_t x = 0; x < w; x++) + m_values[x + y * new_width] = oldVals[x + y * m_width]; + } + + m_width = new_width; + m_height = new_height; + + return *this; + } + + bool try_resize(uint32_t new_width, uint32_t new_height) + { + if ((m_width == new_width) && (m_height == new_height)) + return true; + + const uint64_t total_vals = (uint64_t)new_width * new_height; + + if (!can_fit_into_size_t(total_vals)) + { + // What can we do? + assert(0); + return false; + } + + vec_type oldVals; + if (!oldVals.try_resize((size_t)total_vals)) + return false; + + oldVals.swap(m_values); + + const uint32_t w = minimum(m_width, new_width); + const uint32_t h = minimum(m_height, new_height); + + if ((w) && (h)) + { + for (uint32_t y = 0; y < h; y++) + for (uint32_t x = 0; x < w; x++) + m_values[x + y * new_width] = oldVals[x + y * m_width]; + } + + m_width = new_width; + m_height = new_height; + + return true; + } + + const vector2D& extract_block_clamped(T* pDst, uint32_t src_x, uint32_t src_y, uint32_t w, uint32_t h) const + { + // HACK HACK + if (((src_x + w) > m_width) || ((src_y + h) > m_height)) + { + // Slower clamping case + for (uint32_t y = 0; y < h; y++) + for (uint32_t x = 0; x < w; x++) + *pDst++ = at_clamped(src_x + x, src_y + y); + } + else + { + const T* pSrc = &m_values[src_x + src_y * m_width]; + + for (uint32_t y = 0; y < h; y++) + { + memcpy(pDst, pSrc, w * sizeof(T)); + pSrc += m_width; + pDst += w; + } + } + + return *this; + } + }; + +} // namespace basisu + +namespace std +{ + template + inline void swap(basisu::vector& a, basisu::vector& b) + { + a.swap(b); + } + + template + inline void swap(basisu::hash_map& a, basisu::hash_map& b) + { + a.swap(b); + } + +} // namespace std diff --git a/thirdparty/basisu/transcoder/basisu_containers_impl.h b/thirdparty/basisu/transcoder/basisu_containers_impl.h new file mode 100644 index 000000000..3d7aaddca --- /dev/null +++ b/thirdparty/basisu/transcoder/basisu_containers_impl.h @@ -0,0 +1,814 @@ +// basisu_containers_impl.h +// Do not include directly + +#include + +#ifdef _MSC_VER +#pragma warning (disable:4127) // warning C4127: conditional expression is constant +#endif + +namespace basisu +{ + // A container operation has internally panicked in an unrecoverable way. + // Either an allocation has failed, or a range or consistency check has failed. +#ifdef _MSC_VER + __declspec(noreturn) +#else + [[noreturn]] +#endif + void container_abort(const char* pMsg, ...) + { + assert(0); + + va_list args; + va_start(args, pMsg); + + char buf[1024] = {}; + +#ifdef _MSC_VER + vsprintf_s(buf, sizeof(buf), pMsg, args); +#else + vsnprintf(buf, sizeof(buf), pMsg, args); +#endif + va_end(args); + + fputs(buf, stderr); + + std::terminate(); + } + + bool elemental_vector::increase_capacity(size_t min_new_capacity, bool grow_hint, size_t element_size, object_mover pMover, bool nofail_flag) + { + assert(m_size <= m_capacity); + assert(min_new_capacity >= m_size); + assert(element_size); + + // Basic sanity check min_new_capacity + if (!can_fit_into_size_t((uint64_t)min_new_capacity * element_size)) + { + assert(0); + + if (nofail_flag) + return false; + + container_abort("elemental_vector::increase_capacity: requesting too many elements\n"); + } + + // Check for sane library limits + if (sizeof(void*) == sizeof(uint64_t)) + { + // 16 GB + assert(min_new_capacity < (0x400000000ULL / element_size)); + } + else + { + // ~1.99 GB + assert(min_new_capacity < (0x7FFF0000U / element_size)); + } + + // If vector is already large enough just return. + if (m_capacity >= min_new_capacity) + return true; + + uint64_t new_capacity_u64 = min_new_capacity; + + if ((grow_hint) && (!helpers::is_power_of_2(new_capacity_u64))) + { + new_capacity_u64 = helpers::next_pow2(new_capacity_u64); + + if (!can_fit_into_size_t(new_capacity_u64)) + { + assert(0); + + if (nofail_flag) + return false; + + container_abort("elemental_vector::increase_capacity: vector too large\n"); + } + } + + const uint64_t desired_size_u64 = element_size * new_capacity_u64; + + if (!can_fit_into_size_t(desired_size_u64)) + { + assert(0); + + if (nofail_flag) + return false; + + container_abort("elemental_vector::increase_capacity: vector too large\n"); + } + + const size_t desired_size = static_cast(desired_size_u64); + + size_t actual_size = 0; + BASISU_NOTE_UNUSED(actual_size); + + if (!pMover) + { + void* new_p = realloc(m_p, desired_size); + if (!new_p) + { + assert(0); + + if (nofail_flag) + return false; + + container_abort("elemental_vector::increase_capacity: realloc() failed allocating %zu bytes", desired_size); + } + +#if BASISU_VECTOR_DETERMINISTIC + actual_size = desired_size; +#elif defined(_MSC_VER) + actual_size = _msize(new_p); +#elif HAS_MALLOC_USABLE_SIZE + actual_size = malloc_usable_size(new_p); +#else + actual_size = desired_size; +#endif + m_p = new_p; + } + else + { + void* new_p = malloc(desired_size); + if (!new_p) + { + assert(0); + if (nofail_flag) + return false; + + container_abort("elemental_vector::increase_capacity: malloc() failed allocating %zu bytes", desired_size); + } + +#if BASISU_VECTOR_DETERMINISTIC + actual_size = desired_size; +#elif defined(_MSC_VER) + actual_size = _msize(new_p); +#elif HAS_MALLOC_USABLE_SIZE + actual_size = malloc_usable_size(new_p); +#else + actual_size = desired_size; +#endif + + (*pMover)(new_p, m_p, m_size); + + if (m_p) + free(m_p); + + m_p = new_p; + } + +#if BASISU_VECTOR_DETERMINISTIC + m_capacity = static_cast(new_capacity_u64); +#else + if (actual_size > desired_size) + m_capacity = static_cast(actual_size / element_size); + else + m_capacity = static_cast(new_capacity_u64); +#endif + + return true; + } + +#if BASISU_HASHMAP_TEST + +#define HASHMAP_TEST_VERIFY(c) do { if (!(c)) handle_hashmap_test_verify_failure(__LINE__); } while(0) + + static void handle_hashmap_test_verify_failure(int line) + { + container_abort("HASHMAP_TEST_VERIFY() faild on line %i\n", line); + } + + class counted_obj + { + public: + counted_obj(uint32_t v = 0) : + m_val(v) + { + m_count++; + } + + counted_obj(const counted_obj& obj) : + m_val(obj.m_val) + { + if (m_val != UINT64_MAX) + m_count++; + } + + counted_obj(counted_obj&& obj) : + m_val(obj.m_val) + { + obj.m_val = UINT64_MAX; + } + + counted_obj& operator= (counted_obj&& rhs) + { + if (this != &rhs) + { + m_val = rhs.m_val; + rhs.m_val = UINT64_MAX; + } + return *this; + } + + ~counted_obj() + { + if (m_val != UINT64_MAX) + { + assert(m_count > 0); + m_count--; + } + } + + static uint32_t m_count; + + uint64_t m_val; + + operator size_t() const { return (size_t)m_val; } + + bool operator== (const counted_obj& rhs) const { return m_val == rhs.m_val; } + bool operator== (const uint32_t rhs) const { return m_val == rhs; } + + }; + + uint32_t counted_obj::m_count; + + static uint32_t urand32() + { + uint32_t a = rand(); + uint32_t b = rand() << 15; + uint32_t c = rand() << (32 - 15); + return a ^ b ^ c; + } + + static int irand32(int l, int h) + { + assert(l < h); + if (l >= h) + return l; + + uint32_t range = static_cast(h - l); + + uint32_t rnd = urand32(); + + uint32_t rnd_range = static_cast((((uint64_t)range) * ((uint64_t)rnd)) >> 32U); + + int result = l + rnd_range; + assert((result >= l) && (result < h)); + return result; + } + + void hash_map_test() + { + { + basisu::hash_map s; + uint_vec k; + + for (uint32_t i = 0; i < 1000000; i++) + { + s.insert(i); + k.push_back(i); + } + + for (uint32_t i = 0; i < k.size(); i++) + { + uint32_t r = rand() ^ (rand() << 15); + + uint32_t j = i + (r % (k.size() - i)); + + std::swap(k[i], k[j]); + } + + basisu::hash_map s1(s); + + for (uint32_t i = 0; i < 1000000; i++) + { + auto res = s.find(i); + HASHMAP_TEST_VERIFY(res != s.end()); + HASHMAP_TEST_VERIFY(res->first == i); + s.erase(i); + } + + for (uint32_t it = 0; it < 1000000; it++) + { + uint32_t i = k[it]; + + auto res = s1.find(i); + HASHMAP_TEST_VERIFY(res != s.end()); + HASHMAP_TEST_VERIFY(res->first == i); + s1.erase(i); + } + + for (uint32_t i = 0; i < 1000000; i++) + { + auto res = s.find(i); + HASHMAP_TEST_VERIFY(res == s.end()); + + auto res1 = s1.find(i); + HASHMAP_TEST_VERIFY(res1 == s1.end()); + } + + HASHMAP_TEST_VERIFY(s.empty()); + HASHMAP_TEST_VERIFY(s1.empty()); + } + + { + typedef basisu::hash_map< uint32_t, basisu::vector > hm; + hm q; + + basisu::vector a, b; + a.push_back(1); + b.push_back(2); + b.push_back(3); + + basisu::vector c(b); + + hm::insert_result ir; + q.try_insert(ir, 1, std::move(a)); + q.try_insert(ir, 2, std::move(b)); + q.try_insert(ir, std::make_pair(3, c)); + } + + { + typedef basisu::hash_map my_hash_map; + my_hash_map m; + counted_obj a, b; + m.insert(std::move(a), std::move(b)); + } + + { + basisu::hash_map k; + basisu::hash_map l; + std::swap(k, l); + + k.begin(); + k.end(); + k.clear(); + k.empty(); + k.erase(0); + k.insert(0, 1); + k.find(0); + k.get_equals(); + k.get_hasher(); + k.get_table_size(); + k.reset(); + k.reserve(1); + k = l; + k.set_equals(l.get_equals()); + k.set_hasher(l.get_hasher()); + k.get_table_size(); + } + + uint32_t seed = 0; + for (; ; ) + { + seed++; + + typedef basisu::hash_map my_hash_map; + my_hash_map m; + + const uint32_t n = irand32(1, 100000); + + printf("%u\n", n); + + srand(seed); // r1.seed(seed); + + basisu::vector q; + + uint32_t count = 0; + for (uint32_t i = 0; i < n; i++) + { + uint32_t v = urand32() & 0x7FFFFFFF; + my_hash_map::insert_result res = m.insert(counted_obj(v), counted_obj(v ^ 0xdeadbeef)); + if (res.second) + { + count++; + q.push_back(v); + } + } + + HASHMAP_TEST_VERIFY(m.size() == count); + + srand(seed); + + my_hash_map cm(m); + m.clear(); + m = cm; + cm.reset(); + + for (uint32_t i = 0; i < n; i++) + { + uint32_t v = urand32() & 0x7FFFFFFF; + my_hash_map::const_iterator it = m.find(counted_obj(v)); + HASHMAP_TEST_VERIFY(it != m.end()); + HASHMAP_TEST_VERIFY(it->first == v); + HASHMAP_TEST_VERIFY(it->second == (v ^ 0xdeadbeef)); + } + + for (uint32_t t = 0; t < 2; t++) + { + const uint32_t nd = irand32(1, q.size_u32() + 1); + for (uint32_t i = 0; i < nd; i++) + { + uint32_t p = irand32(0, q.size_u32()); + + int k = q[p]; + if (k >= 0) + { + q[p] = -k - 1; + + bool s = m.erase(counted_obj(k)); + HASHMAP_TEST_VERIFY(s); + } + } + + typedef basisu::hash_map uint_hash_set; + uint_hash_set s; + + for (uint32_t i = 0; i < q.size(); i++) + { + int v = q[i]; + + if (v >= 0) + { + my_hash_map::const_iterator it = m.find(counted_obj(v)); + HASHMAP_TEST_VERIFY(it != m.end()); + HASHMAP_TEST_VERIFY(it->first == (uint32_t)v); + HASHMAP_TEST_VERIFY(it->second == ((uint32_t)v ^ 0xdeadbeef)); + + s.insert(v); + } + else + { + my_hash_map::const_iterator it = m.find(counted_obj(-v - 1)); + HASHMAP_TEST_VERIFY(it == m.end()); + } + } + + uint32_t found_count = 0; + for (my_hash_map::const_iterator it = m.begin(); it != m.end(); ++it) + { + HASHMAP_TEST_VERIFY(it->second == ((uint32_t)it->first ^ 0xdeadbeef)); + + uint_hash_set::const_iterator fit(s.find((uint32_t)it->first)); + HASHMAP_TEST_VERIFY(fit != s.end()); + + HASHMAP_TEST_VERIFY(fit->first == it->first); + + found_count++; + } + + HASHMAP_TEST_VERIFY(found_count == s.size()); + } + + HASHMAP_TEST_VERIFY(counted_obj::m_count == m.size() * 2); + } + } + +#endif // BASISU_HASHMAP_TEST + + // String formatting + + bool fmt_variant::to_string(std::string& res, std::string& fmt) const + { + res.resize(0); + + // Scan for allowed formatting characters. + for (size_t i = 0; i < fmt.size(); i++) + { + const char c = fmt[i]; + + if (isdigit(c) || (c == '.') || (c == ' ') || (c == '#') || (c == '+') || (c == '-')) + continue; + + if (isalpha(c)) + { + if ((i + 1) == fmt.size()) + continue; + } + + return false; + } + + if (fmt.size() && (fmt.back() == 'c')) + { + if ((m_type == variant_type::cI32) || (m_type == variant_type::cU32)) + { + if (m_u32 > 255) + return false; + + // Explictly allowing caller to pass in a char of 0, which is ignored. + if (m_u32) + res.push_back((uint8_t)m_u32); + return true; + } + else + return false; + } + + switch (m_type) + { + case variant_type::cInvalid: + { + return false; + } + case variant_type::cI32: + { + if (fmt.size()) + { + int e = fmt.back(); + if (isalpha(e)) + { + if ((e != 'x') && (e != 'X') && (e != 'i') && (e != 'd') && (e != 'u')) + return false; + } + else + { + fmt += "i"; + } + + res = string_format((std::string("%") + fmt).c_str(), m_i32); + } + else + { + res = string_format("%i", m_i32); + } + break; + } + case variant_type::cU32: + { + if (fmt.size()) + { + int e = fmt.back(); + if (isalpha(e)) + { + if ((e != 'x') && (e != 'X') && (e != 'i') && (e != 'd') && (e != 'u')) + return false; + } + else + { + fmt += "u"; + } + + res = string_format((std::string("%") + fmt).c_str(), m_u32); + } + else + { + res = string_format("%u", m_u32); + } + break; + } + case variant_type::cI64: + { + if (fmt.size()) + { + int e = fmt.back(); + if (isalpha(e)) + { + if (e == 'x') + { + fmt.pop_back(); + fmt += PRIx64; + } + else if (e == 'X') + { + fmt.pop_back(); + fmt += PRIX64; + } + else + return false; + } + else + { + fmt += PRId64; + } + + res = string_format((std::string("%") + fmt).c_str(), m_i64); + } + else + { + res = string_format("%" PRId64, m_i64); + } + break; + } + case variant_type::cU64: + { + if (fmt.size()) + { + int e = fmt.back(); + if (isalpha(e)) + { + if (e == 'x') + { + fmt.pop_back(); + fmt += PRIx64; + } + else if (e == 'X') + { + fmt.pop_back(); + fmt += PRIX64; + } + else + return false; + } + else + { + fmt += PRIu64; + } + + res = string_format((std::string("%") + fmt).c_str(), m_u64); + } + else + { + res = string_format("%" PRIu64, m_u64); + } + break; + } + case variant_type::cFlt: + { + if (fmt.size()) + { + int e = fmt.back(); + if (isalpha(e)) + { + if ((e != 'f') && (e != 'g') && (e != 'e') && (e != 'E')) + return false; + } + else + { + fmt += "f"; + } + + res = string_format((std::string("%") + fmt).c_str(), m_flt); + } + else + { + res = string_format("%f", m_flt); + } + break; + } + case variant_type::cDbl: + { + if (fmt.size()) + { + int e = fmt.back(); + if (isalpha(e)) + { + if ((e != 'f') && (e != 'g') && (e != 'e') && (e != 'E')) + return false; + } + else + { + fmt += "f"; + } + + res = string_format((std::string("%") + fmt).c_str(), m_dbl); + } + else + { + res = string_format("%f", m_dbl); + } + break; + } + case variant_type::cStrPtr: + { + if (fmt.size()) + return false; + if (!m_pStr) + return false; + res = m_pStr; + break; + } + case variant_type::cBool: + { + if (fmt.size()) + return false; + res = m_bool ? "true" : "false"; + break; + } + case variant_type::cStdStr: + { + if (fmt.size()) + return false; + res = m_str; + break; + } + default: + { + return false; + } + } + + return true; + } + + bool fmt_variants(std::string& res, const char* pFmt, const fmt_variant_vec& variants) + { + res.resize(0); + + // Must specify a format string + if (!pFmt) + { + assert(0); + return false; + } + + // Check format string's length + const size_t fmt_len = strlen(pFmt); + if (!fmt_len) + { + if (variants.size()) + { + assert(0); + return false; + } + return true; + } + + // Wildly estimate output length + res.reserve(fmt_len + 32); + + std::string var_fmt; + var_fmt.reserve(16); + + std::string tmp; + tmp.reserve(16); + + size_t variant_index = 0; + bool inside_brackets = false; + const char* p = pFmt; + + while (*p) + { + const uint8_t c = *p++; + + if (inside_brackets) + { + if (c == '}') + { + inside_brackets = false; + + if (variant_index >= variants.size()) + { + assert(0); + return false; + } + + if (!variants[variant_index].to_string(tmp, var_fmt)) + { + assert(0); + return false; + } + + res += tmp; + + variant_index++; + } + else + { + // Check for forbidden formatting characters. + if ((c == '*') || (c == 'n') || (c == '%')) + { + assert(0); + return false; + } + + var_fmt.push_back(c); + } + } + else if (c == '{') + { + // Check for escaped '{' + if (*p == '{') + { + res.push_back((char)c); + p++; + } + else + { + inside_brackets = true; + var_fmt.resize(0); + } + } + else + { + res.push_back((char)c); + } + } + + if (inside_brackets) + { + assert(0); + return false; + } + + if (variant_index != variants.size()) + { + assert(0); + return false; + } + + return true; + } + +} // namespace basisu diff --git a/thirdparty/basisu/transcoder/basisu_file_headers.h b/thirdparty/basisu/transcoder/basisu_file_headers.h new file mode 100644 index 000000000..5c1606625 --- /dev/null +++ b/thirdparty/basisu/transcoder/basisu_file_headers.h @@ -0,0 +1,146 @@ +// basis_file_headers.h +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "basisu_transcoder_internal.h" + +namespace basist +{ + // Slice desc header flags + enum basis_slice_desc_flags + { + cSliceDescFlagsHasAlpha = 1, + + // Video only: Frame doesn't refer to previous frame (no usage of conditional replenishment pred symbols) + // Currently the first frame is always an I-Frame, all subsequent frames are P-Frames. This will eventually be changed to periodic I-Frames. + cSliceDescFlagsFrameIsIFrame = 2 + }; + +#pragma pack(push) +#pragma pack(1) + struct basis_slice_desc + { + basisu::packed_uint<3> m_image_index; // The index of the source image provided to the encoder (will always appear in order from first to last, first image index is 0, no skipping allowed) + basisu::packed_uint<1> m_level_index; // The mipmap level index (mipmaps will always appear from largest to smallest) + basisu::packed_uint<1> m_flags; // enum basis_slice_desc_flags + + basisu::packed_uint<2> m_orig_width; // The original image width (may not be a multiple of 4 pixels) + basisu::packed_uint<2> m_orig_height; // The original image height (may not be a multiple of 4 pixels) + + basisu::packed_uint<2> m_num_blocks_x; // The slice's block X dimensions. Each block is 4x4 or 6x6 pixels. The slice's pixel resolution may or may not be a power of 2. + basisu::packed_uint<2> m_num_blocks_y; // The slice's block Y dimensions. + + basisu::packed_uint<4> m_file_ofs; // Offset from the start of the file to the start of the slice's data + basisu::packed_uint<4> m_file_size; // The size of the compressed slice data in bytes + + basisu::packed_uint<2> m_slice_data_crc16; // The CRC16 of the compressed slice data, for extra-paranoid use cases + }; + + // File header files + enum basis_header_flags + { + // Always set for ETC1S files. Not set for UASTC files. + cBASISHeaderFlagETC1S = 1, + + // Set if the texture had to be Y flipped before encoding. The actual interpretation of this (is Y up or down?) is up to the user. + cBASISHeaderFlagYFlipped = 2, + + // Set if any slices contain alpha (for ETC1S, if the odd slices contain alpha data) + cBASISHeaderFlagHasAlphaSlices = 4, + + // For ETC1S files, this will be true if the file utilizes a codebook from another .basis file. + cBASISHeaderFlagUsesGlobalCodebook = 8, + + // Set if the texture data is sRGB, otherwise it's linear. + // In reality, we have no idea if the texture data is actually linear or sRGB. This is the m_perceptual parameter passed to the compressor. + cBASISHeaderFlagSRGB = 16, + }; + + // The image type field attempts to describe how to interpret the image data in a Basis file. + // The encoder library doesn't really do anything special or different with these texture types, this is mostly here for the benefit of the user. + // We do make sure the various constraints are followed (2DArray/cubemap/videoframes/volume implies that each image has the same resolution and # of mipmap levels, etc., cubemap implies that the # of image slices is a multiple of 6) + enum basis_texture_type + { + cBASISTexType2D = 0, // An arbitrary array of 2D RGB or RGBA images with optional mipmaps, array size = # images, each image may have a different resolution and # of mipmap levels + cBASISTexType2DArray = 1, // An array of 2D RGB or RGBA images with optional mipmaps, array size = # images, each image has the same resolution and mipmap levels + cBASISTexTypeCubemapArray = 2, // an array of cubemap levels, total # of images must be divisable by 6, in X+, X-, Y+, Y-, Z+, Z- order, with optional mipmaps + cBASISTexTypeVideoFrames = 3, // An array of 2D video frames, with optional mipmaps, # frames = # images, each image has the same resolution and # of mipmap levels + cBASISTexTypeVolume = 4, // A 3D texture with optional mipmaps, Z dimension = # images, each image has the same resolution and # of mipmap levels + + cBASISTexTypeTotal + }; + + enum + { + cBASISMaxUSPerFrame = 0xFFFFFF + }; + + enum class basis_tex_format + { + cETC1S = 0, + cUASTC4x4 = 1, + cUASTC_HDR_4x4 = 2, + cASTC_HDR_6x6 = 3, + cASTC_HDR_6x6_INTERMEDIATE = 4, + cTotalFormats + }; + + struct basis_file_header + { + enum + { + cBASISSigValue = ('B' << 8) | 's', + cBASISFirstVersion = 0x10 + }; + + basisu::packed_uint<2> m_sig; // 2 byte file signature + basisu::packed_uint<2> m_ver; // Baseline file version + basisu::packed_uint<2> m_header_size; // Header size in bytes, sizeof(basis_file_header) + basisu::packed_uint<2> m_header_crc16; // CRC16 of the remaining header data + + basisu::packed_uint<4> m_data_size; // The total size of all data after the header + basisu::packed_uint<2> m_data_crc16; // The CRC16 of all data after the header + + basisu::packed_uint<3> m_total_slices; // The total # of compressed slices (1 slice per image, or 2 for alpha .basis files) + + basisu::packed_uint<3> m_total_images; // The total # of images + + basisu::packed_uint<1> m_tex_format; // enum basis_tex_format + basisu::packed_uint<2> m_flags; // enum basist::header_flags + basisu::packed_uint<1> m_tex_type; // enum basist::basis_texture_type + basisu::packed_uint<3> m_us_per_frame; // Framerate of video, in microseconds per frame + + basisu::packed_uint<4> m_reserved; // For future use + basisu::packed_uint<4> m_userdata0; // For client use + basisu::packed_uint<4> m_userdata1; // For client use + + basisu::packed_uint<2> m_total_endpoints; // The number of endpoints in the endpoint codebook + basisu::packed_uint<4> m_endpoint_cb_file_ofs; // The compressed endpoint codebook's file offset relative to the start of the file + basisu::packed_uint<3> m_endpoint_cb_file_size; // The compressed endpoint codebook's size in bytes + + basisu::packed_uint<2> m_total_selectors; // The number of selectors in the endpoint codebook + basisu::packed_uint<4> m_selector_cb_file_ofs; // The compressed selectors codebook's file offset relative to the start of the file + basisu::packed_uint<3> m_selector_cb_file_size; // The compressed selector codebook's size in bytes + + basisu::packed_uint<4> m_tables_file_ofs; // The file offset of the compressed Huffman codelength tables, for decompressing slices + basisu::packed_uint<4> m_tables_file_size; // The file size in bytes of the compressed huffman codelength tables + + basisu::packed_uint<4> m_slice_desc_file_ofs; // The file offset to the slice description array, usually follows the header + + basisu::packed_uint<4> m_extended_file_ofs; // The file offset of the "extended" header and compressed data, for future use + basisu::packed_uint<4> m_extended_file_size; // The file size in bytes of the "extended" header and compressed data, for future use + }; +#pragma pack (pop) + +} // namespace basist diff --git a/thirdparty/basisu/transcoder/basisu_transcoder.cpp b/thirdparty/basisu/transcoder/basisu_transcoder.cpp new file mode 100644 index 000000000..da8cd43c7 --- /dev/null +++ b/thirdparty/basisu/transcoder/basisu_transcoder.cpp @@ -0,0 +1,23974 @@ +// basisu_transcoder.cpp +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "basisu_transcoder.h" +#include "basisu_containers_impl.h" + +#include "basisu_astc_hdr_core.h" + +#define BASISU_ASTC_HELPERS_IMPLEMENTATION +#include "basisu_astc_helpers.h" + +#include + +#if defined(_MSC_VER) + #include // For __popcnt intrinsic +#endif + +#ifndef BASISD_IS_BIG_ENDIAN +// TODO: This doesn't work on OSX. How can this be so difficult? +//#if defined(__BIG_ENDIAN__) || defined(_BIG_ENDIAN) || defined(BIG_ENDIAN) +// #define BASISD_IS_BIG_ENDIAN (1) +//#else + #define BASISD_IS_BIG_ENDIAN (0) +//#endif +#endif + +#ifndef BASISD_USE_UNALIGNED_WORD_READS + #ifdef __EMSCRIPTEN__ + // Can't use unaligned loads/stores with WebAssembly. + #define BASISD_USE_UNALIGNED_WORD_READS (0) + #elif defined(_M_AMD64) || defined(_M_IX86) || defined(__i386__) || defined(__x86_64__) + #define BASISD_USE_UNALIGNED_WORD_READS (1) + #else + #define BASISD_USE_UNALIGNED_WORD_READS (0) + #endif +#endif + +// Using unaligned loads and stores causes errors when using UBSan. Jam it off. +#if defined(__has_feature) +#if __has_feature(undefined_behavior_sanitizer) +#undef BASISD_USE_UNALIGNED_WORD_READS +#define BASISD_USE_UNALIGNED_WORD_READS 0 +#endif +#endif + +#define BASISD_SUPPORTED_BASIS_VERSION (0x13) + +#ifndef BASISD_SUPPORT_KTX2 + #error Must have defined BASISD_SUPPORT_KTX2 +#endif + +#ifndef BASISD_SUPPORT_KTX2_ZSTD +#error Must have defined BASISD_SUPPORT_KTX2_ZSTD +#endif + +// Set to 1 for fuzz testing. This will disable all CRC16 checks on headers and compressed data. +#ifndef BASISU_NO_HEADER_OR_DATA_CRC16_CHECKS + #define BASISU_NO_HEADER_OR_DATA_CRC16_CHECKS 0 +#endif + +#ifndef BASISD_SUPPORT_DXT1 + #define BASISD_SUPPORT_DXT1 1 +#endif + +#ifndef BASISD_SUPPORT_DXT5A + #define BASISD_SUPPORT_DXT5A 1 +#endif + +// Disable all BC7 transcoders if necessary (useful when cross compiling to Javascript) +#if defined(BASISD_SUPPORT_BC7) && !BASISD_SUPPORT_BC7 + #ifndef BASISD_SUPPORT_BC7_MODE5 + #define BASISD_SUPPORT_BC7_MODE5 0 + #endif +#endif // !BASISD_SUPPORT_BC7 + +// BC7 mode 5 supports both opaque and opaque+alpha textures, and uses less memory BC1. +#ifndef BASISD_SUPPORT_BC7_MODE5 + #define BASISD_SUPPORT_BC7_MODE5 1 +#endif + +#ifndef BASISD_SUPPORT_PVRTC1 + #define BASISD_SUPPORT_PVRTC1 1 +#endif + +#ifndef BASISD_SUPPORT_ETC2_EAC_A8 + #define BASISD_SUPPORT_ETC2_EAC_A8 1 +#endif + +// Set BASISD_SUPPORT_UASTC to 0 to completely disable support for transcoding UASTC files. +#ifndef BASISD_SUPPORT_UASTC + #define BASISD_SUPPORT_UASTC 1 +#endif + +#ifndef BASISD_SUPPORT_ASTC + #define BASISD_SUPPORT_ASTC 1 +#endif + +// Note that if BASISD_SUPPORT_ATC is enabled, BASISD_SUPPORT_DXT5A should also be enabled for alpha support. +#ifndef BASISD_SUPPORT_ATC + #define BASISD_SUPPORT_ATC 1 +#endif + +// Support for ETC2 EAC R11 and ETC2 EAC RG11 +#ifndef BASISD_SUPPORT_ETC2_EAC_RG11 + #define BASISD_SUPPORT_ETC2_EAC_RG11 1 +#endif + +// If BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY is 1, opaque blocks will be transcoded to ASTC at slightly higher quality (higher than BC1), but the transcoder tables will be 2x as large. +// This impacts grayscale and grayscale+alpha textures the most. +#ifndef BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY + #ifdef __EMSCRIPTEN__ + // Let's assume size matters more than quality when compiling with emscripten. + #define BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY 0 + #else + // Compiling native, so an extra 64K lookup table is probably acceptable. + #define BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY 1 + #endif +#endif + +#ifndef BASISD_SUPPORT_FXT1 + #define BASISD_SUPPORT_FXT1 1 +#endif + +#ifndef BASISD_SUPPORT_PVRTC2 + #define BASISD_SUPPORT_PVRTC2 1 +#endif + +#if BASISD_SUPPORT_PVRTC2 + #if !BASISD_SUPPORT_ATC + #error BASISD_SUPPORT_ATC must be 1 if BASISD_SUPPORT_PVRTC2 is 1 + #endif +#endif + +#if BASISD_SUPPORT_ATC + #if !BASISD_SUPPORT_DXT5A + #error BASISD_SUPPORT_DXT5A must be 1 if BASISD_SUPPORT_ATC is 1 + #endif +#endif + +#ifndef BASISD_SUPPORT_UASTC_HDR + #define BASISD_SUPPORT_UASTC_HDR 1 +#endif + +#define BASISD_WRITE_NEW_BC7_MODE5_TABLES 0 +#define BASISD_WRITE_NEW_DXT1_TABLES 0 +#define BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES 0 +#define BASISD_WRITE_NEW_ASTC_TABLES 0 +#define BASISD_WRITE_NEW_ATC_TABLES 0 +#define BASISD_WRITE_NEW_ETC2_EAC_R11_TABLES 0 + +#ifndef BASISD_ENABLE_DEBUG_FLAGS + #define BASISD_ENABLE_DEBUG_FLAGS 0 +#endif + +// If KTX2 support is enabled, we may need Zstd for decompression of supercompressed UASTC files. Include this header. +#if BASISD_SUPPORT_KTX2 + // If BASISD_SUPPORT_KTX2_ZSTD is 0, UASTC files compressed with Zstd cannot be loaded. + #if BASISD_SUPPORT_KTX2_ZSTD + // We only use two Zstd API's: ZSTD_decompress() and ZSTD_isError() + #include "../zstd/zstd.h" + #endif +#endif + +#if BASISD_SUPPORT_UASTC_HDR +using namespace basist::astc_6x6_hdr; +#endif + +namespace basisu +{ + bool g_debug_printf; + + void enable_debug_printf(bool enabled) + { + g_debug_printf = enabled; + } + + void debug_printf(const char* pFmt, ...) + { +#if BASISU_FORCE_DEVEL_MESSAGES + g_debug_printf = true; +#endif + if (g_debug_printf) + { + va_list args; + va_start(args, pFmt); + vprintf(pFmt, args); + va_end(args); + } + } + + void debug_puts(const char* p) + { +#if BASISU_FORCE_DEVEL_MESSAGES + g_debug_printf = true; +#endif + if (g_debug_printf) + { + //puts(p); + printf("%s", p); + } + } +} // namespace basisu + +namespace basist +{ +#if BASISD_ENABLE_DEBUG_FLAGS + static uint32_t g_debug_flags = 0; +#endif + + uint32_t get_debug_flags() + { +#if BASISD_ENABLE_DEBUG_FLAGS + return g_debug_flags; +#else + return 0; +#endif + } + + void set_debug_flags(uint32_t f) + { + BASISU_NOTE_UNUSED(f); +#if BASISD_ENABLE_DEBUG_FLAGS + g_debug_flags = f; +#endif + } + + inline uint16_t byteswap_uint16(uint16_t v) + { + return static_cast((v >> 8) | (v << 8)); + } + + static inline int32_t clampi(int32_t value, int32_t low, int32_t high) { if (value < low) value = low; else if (value > high) value = high; return value; } + static inline float clampf(float value, float low, float high) { if (value < low) value = low; else if (value > high) value = high; return value; } + static inline float saturate(float value) { return clampf(value, 0, 1.0f); } + + static inline uint8_t mul_8(uint32_t v, uint32_t q) { v = v * q + 128; return (uint8_t)((v + (v >> 8)) >> 8); } + static inline int mul_8bit(int a, int b) { int t = a * b + 128; return (t + (t >> 8)) >> 8; } + static inline int lerp_8bit(int a, int b, int s) { assert(a >= 0 && a <= 255); assert(b >= 0 && b <= 255); assert(s >= 0 && s <= 255); return a + mul_8bit(b - a, s); } + + struct vec2F + { + float c[2]; + + inline vec2F() {} + + inline vec2F(float s) { c[0] = s; c[1] = s; } + inline vec2F(float x, float y) { c[0] = x; c[1] = y; } + + inline void set(float x, float y) { c[0] = x; c[1] = y; } + + inline float dot(const vec2F& o) const { return (c[0] * o.c[0]) + (c[1] * o.c[1]); } + + inline float operator[] (uint32_t index) const { assert(index < 2); return c[index]; } + inline float& operator[] (uint32_t index) { assert(index < 2); return c[index]; } + + inline vec2F& clamp(float l, float h) + { + c[0] = basisu::clamp(c[0], l, h); + c[1] = basisu::clamp(c[1], l, h); + return *this; + } + + static vec2F lerp(const vec2F& a, const vec2F& b, float s) + { + vec2F res; + for (uint32_t i = 0; i < 2; i++) + res[i] = basisu::lerp(a[i], b[i], s); + return res; + } + }; + + struct vec3F + { + float c[3]; + + inline vec3F() {} + + inline vec3F(float s) { c[0] = s; c[1] = s; c[2] = s; } + inline vec3F(float x, float y, float z) { c[0] = x; c[1] = y; c[2] = z; } + + inline void set(float x, float y, float z) { c[0] = x; c[1] = y; c[2] = z; } + + inline float dot(const vec3F& o) const { return (c[0] * o.c[0]) + (c[1] * o.c[1]) + (c[2] * o.c[2]); } + + inline float operator[] (uint32_t index) const { assert(index < 3); return c[index]; } + inline float &operator[] (uint32_t index) { assert(index < 3); return c[index]; } + + inline vec3F& clamp(float l, float h) + { + c[0] = basisu::clamp(c[0], l, h); + c[1] = basisu::clamp(c[1], l, h); + c[2] = basisu::clamp(c[2], l, h); + return *this; + } + + static vec3F lerp(const vec3F& a, const vec3F& b, float s) + { + vec3F res; + for (uint32_t i = 0; i < 3; i++) + res[i] = basisu::lerp(a[i], b[i], s); + return res; + } + }; + + uint16_t crc16(const void* r, size_t size, uint16_t crc) + { + crc = ~crc; + + const uint8_t* p = static_cast(r); + for (; size; --size) + { + const uint16_t q = *p++ ^ (crc >> 8); + uint16_t k = (q >> 4) ^ q; + crc = (((crc << 8) ^ k) ^ (k << 5)) ^ (k << 12); + } + + return static_cast(~crc); + } + + struct vec4F + { + float c[4]; + + inline void set(float x, float y, float z, float w) { c[0] = x; c[1] = y; c[2] = z; c[3] = w; } + + float operator[] (uint32_t index) const { assert(index < 4); return c[index]; } + float& operator[] (uint32_t index) { assert(index < 4); return c[index]; } + }; + + enum etc_constants + { + cETC1BytesPerBlock = 8U, + + cETC1SelectorBits = 2U, + cETC1SelectorValues = 1U << cETC1SelectorBits, + cETC1SelectorMask = cETC1SelectorValues - 1U, + + cETC1BlockShift = 2U, + cETC1BlockSize = 1U << cETC1BlockShift, + + cETC1LSBSelectorIndicesBitOffset = 0, + cETC1MSBSelectorIndicesBitOffset = 16, + + cETC1FlipBitOffset = 32, + cETC1DiffBitOffset = 33, + + cETC1IntenModifierNumBits = 3, + cETC1IntenModifierValues = 1 << cETC1IntenModifierNumBits, + cETC1RightIntenModifierTableBitOffset = 34, + cETC1LeftIntenModifierTableBitOffset = 37, + + // Base+Delta encoding (5 bit bases, 3 bit delta) + cETC1BaseColorCompNumBits = 5, + cETC1BaseColorCompMax = 1 << cETC1BaseColorCompNumBits, + + cETC1DeltaColorCompNumBits = 3, + cETC1DeltaColorComp = 1 << cETC1DeltaColorCompNumBits, + cETC1DeltaColorCompMax = 1 << cETC1DeltaColorCompNumBits, + + cETC1BaseColor5RBitOffset = 59, + cETC1BaseColor5GBitOffset = 51, + cETC1BaseColor5BBitOffset = 43, + + cETC1DeltaColor3RBitOffset = 56, + cETC1DeltaColor3GBitOffset = 48, + cETC1DeltaColor3BBitOffset = 40, + + // Absolute (non-delta) encoding (two 4-bit per component bases) + cETC1AbsColorCompNumBits = 4, + cETC1AbsColorCompMax = 1 << cETC1AbsColorCompNumBits, + + cETC1AbsColor4R1BitOffset = 60, + cETC1AbsColor4G1BitOffset = 52, + cETC1AbsColor4B1BitOffset = 44, + + cETC1AbsColor4R2BitOffset = 56, + cETC1AbsColor4G2BitOffset = 48, + cETC1AbsColor4B2BitOffset = 40, + + cETC1ColorDeltaMin = -4, + cETC1ColorDeltaMax = 3, + + // Delta3: + // 0 1 2 3 4 5 6 7 + // 000 001 010 011 100 101 110 111 + // 0 1 2 3 -4 -3 -2 -1 + }; + +#define DECLARE_ETC1_INTEN_TABLE(name, N) \ + static const int name[cETC1IntenModifierValues][cETC1SelectorValues] = \ + { \ + { N * -8, N * -2, N * 2, N * 8 },{ N * -17, N * -5, N * 5, N * 17 },{ N * -29, N * -9, N * 9, N * 29 },{ N * -42, N * -13, N * 13, N * 42 }, \ + { N * -60, N * -18, N * 18, N * 60 },{ N * -80, N * -24, N * 24, N * 80 },{ N * -106, N * -33, N * 33, N * 106 },{ N * -183, N * -47, N * 47, N * 183 } \ + }; + + DECLARE_ETC1_INTEN_TABLE(g_etc1_inten_tables, 1); + DECLARE_ETC1_INTEN_TABLE(g_etc1_inten_tables16, 16); + DECLARE_ETC1_INTEN_TABLE(g_etc1_inten_tables48, 3 * 16); + + //const uint8_t g_etc1_to_selector_index[cETC1SelectorValues] = { 2, 3, 1, 0 }; + const uint8_t g_selector_index_to_etc1[cETC1SelectorValues] = { 3, 2, 0, 1 }; + + static const uint8_t g_etc_5_to_8[32] = { 0, 8, 16, 24, 33, 41, 49, 57, 66, 74, 82, 90, 99, 107, 115, 123, 132, 140, 148, 156, 165, 173, 181, 189, 198, 206, 214, 222, 231, 239, 247, 255 }; + + struct decoder_etc_block + { + // big endian uint64: + // bit ofs: 56 48 40 32 24 16 8 0 + // byte ofs: b0, b1, b2, b3, b4, b5, b6, b7 + union + { + uint64_t m_uint64; + + uint32_t m_uint32[2]; + + uint8_t m_bytes[8]; + + struct + { + signed m_dred2 : 3; + uint32_t m_red1 : 5; + + signed m_dgreen2 : 3; + uint32_t m_green1 : 5; + + signed m_dblue2 : 3; + uint32_t m_blue1 : 5; + + uint32_t m_flip : 1; + uint32_t m_diff : 1; + uint32_t m_cw2 : 3; + uint32_t m_cw1 : 3; + + uint32_t m_selectors; + } m_differential; + }; + + inline void clear() + { + assert(sizeof(*this) == 8); + basisu::clear_obj(*this); + } + + inline void set_byte_bits(uint32_t ofs, uint32_t num, uint32_t bits) + { + assert((ofs + num) <= 64U); + assert(num && (num < 32U)); + assert((ofs >> 3) == ((ofs + num - 1) >> 3)); + assert(bits < (1U << num)); + const uint32_t byte_ofs = 7 - (ofs >> 3); + const uint32_t byte_bit_ofs = ofs & 7; + const uint32_t mask = (1 << num) - 1; + m_bytes[byte_ofs] &= ~(mask << byte_bit_ofs); + m_bytes[byte_ofs] |= (bits << byte_bit_ofs); + } + + inline void set_flip_bit(bool flip) + { + m_bytes[3] &= ~1; + m_bytes[3] |= static_cast(flip); + } + + inline void set_diff_bit(bool diff) + { + m_bytes[3] &= ~2; + m_bytes[3] |= (static_cast(diff) << 1); + } + + // Sets intensity modifier table (0-7) used by subblock subblock_id (0 or 1) + inline void set_inten_table(uint32_t subblock_id, uint32_t t) + { + assert(subblock_id < 2); + assert(t < 8); + const uint32_t ofs = subblock_id ? 2 : 5; + m_bytes[3] &= ~(7 << ofs); + m_bytes[3] |= (t << ofs); + } + + // Selector "val" ranges from 0-3 and is a direct index into g_etc1_inten_tables. + inline void set_selector(uint32_t x, uint32_t y, uint32_t val) + { + assert((x | y | val) < 4); + const uint32_t bit_index = x * 4 + y; + + uint8_t* p = &m_bytes[7 - (bit_index >> 3)]; + + const uint32_t byte_bit_ofs = bit_index & 7; + const uint32_t mask = 1 << byte_bit_ofs; + + static const uint8_t s_selector_index_to_etc1[4] = { 3, 2, 0, 1 }; + const uint32_t etc1_val = s_selector_index_to_etc1[val]; + + const uint32_t lsb = etc1_val & 1; + const uint32_t msb = etc1_val >> 1; + + p[0] &= ~mask; + p[0] |= (lsb << byte_bit_ofs); + + p[-2] &= ~mask; + p[-2] |= (msb << byte_bit_ofs); + } + + // Returned encoded selector value ranges from 0-3 (this is NOT a direct index into g_etc1_inten_tables, see get_selector()) + inline uint32_t get_raw_selector(uint32_t x, uint32_t y) const + { + assert((x | y) < 4); + + const uint32_t bit_index = x * 4 + y; + const uint32_t byte_bit_ofs = bit_index & 7; + const uint8_t* p = &m_bytes[7 - (bit_index >> 3)]; + const uint32_t lsb = (p[0] >> byte_bit_ofs) & 1; + const uint32_t msb = (p[-2] >> byte_bit_ofs) & 1; + const uint32_t val = lsb | (msb << 1); + + return val; + } + + // Returned selector value ranges from 0-3 and is a direct index into g_etc1_inten_tables. + inline uint32_t get_selector(uint32_t x, uint32_t y) const + { + static const uint8_t s_etc1_to_selector_index[cETC1SelectorValues] = { 2, 3, 1, 0 }; + return s_etc1_to_selector_index[get_raw_selector(x, y)]; + } + + inline void set_raw_selector_bits(uint32_t bits) + { + m_bytes[4] = static_cast(bits); + m_bytes[5] = static_cast(bits >> 8); + m_bytes[6] = static_cast(bits >> 16); + m_bytes[7] = static_cast(bits >> 24); + } + + inline bool are_all_selectors_the_same() const + { + uint32_t v = *reinterpret_cast(&m_bytes[4]); + + if ((v == 0xFFFFFFFF) || (v == 0xFFFF) || (!v) || (v == 0xFFFF0000)) + return true; + + return false; + } + + inline void set_raw_selector_bits(uint8_t byte0, uint8_t byte1, uint8_t byte2, uint8_t byte3) + { + m_bytes[4] = byte0; + m_bytes[5] = byte1; + m_bytes[6] = byte2; + m_bytes[7] = byte3; + } + + inline uint32_t get_raw_selector_bits() const + { + return m_bytes[4] | (m_bytes[5] << 8) | (m_bytes[6] << 16) | (m_bytes[7] << 24); + } + + inline void set_base4_color(uint32_t idx, uint16_t c) + { + if (idx) + { + set_byte_bits(cETC1AbsColor4R2BitOffset, 4, (c >> 8) & 15); + set_byte_bits(cETC1AbsColor4G2BitOffset, 4, (c >> 4) & 15); + set_byte_bits(cETC1AbsColor4B2BitOffset, 4, c & 15); + } + else + { + set_byte_bits(cETC1AbsColor4R1BitOffset, 4, (c >> 8) & 15); + set_byte_bits(cETC1AbsColor4G1BitOffset, 4, (c >> 4) & 15); + set_byte_bits(cETC1AbsColor4B1BitOffset, 4, c & 15); + } + } + + inline void set_base5_color(uint16_t c) + { + set_byte_bits(cETC1BaseColor5RBitOffset, 5, (c >> 10) & 31); + set_byte_bits(cETC1BaseColor5GBitOffset, 5, (c >> 5) & 31); + set_byte_bits(cETC1BaseColor5BBitOffset, 5, c & 31); + } + + void set_delta3_color(uint16_t c) + { + set_byte_bits(cETC1DeltaColor3RBitOffset, 3, (c >> 6) & 7); + set_byte_bits(cETC1DeltaColor3GBitOffset, 3, (c >> 3) & 7); + set_byte_bits(cETC1DeltaColor3BBitOffset, 3, c & 7); + } + + void set_block_color4(const color32& c0_unscaled, const color32& c1_unscaled) + { + set_diff_bit(false); + + set_base4_color(0, pack_color4(c0_unscaled, false)); + set_base4_color(1, pack_color4(c1_unscaled, false)); + } + + void set_block_color5(const color32& c0_unscaled, const color32& c1_unscaled) + { + set_diff_bit(true); + + set_base5_color(pack_color5(c0_unscaled, false)); + + int dr = c1_unscaled.r - c0_unscaled.r; + int dg = c1_unscaled.g - c0_unscaled.g; + int db = c1_unscaled.b - c0_unscaled.b; + + set_delta3_color(pack_delta3(dr, dg, db)); + } + + bool set_block_color5_check(const color32& c0_unscaled, const color32& c1_unscaled) + { + set_diff_bit(true); + + set_base5_color(pack_color5(c0_unscaled, false)); + + int dr = c1_unscaled.r - c0_unscaled.r; + int dg = c1_unscaled.g - c0_unscaled.g; + int db = c1_unscaled.b - c0_unscaled.b; + + if (((dr < cETC1ColorDeltaMin) || (dr > cETC1ColorDeltaMax)) || + ((dg < cETC1ColorDeltaMin) || (dg > cETC1ColorDeltaMax)) || + ((db < cETC1ColorDeltaMin) || (db > cETC1ColorDeltaMax))) + return false; + + set_delta3_color(pack_delta3(dr, dg, db)); + + return true; + } + + inline uint32_t get_byte_bits(uint32_t ofs, uint32_t num) const + { + assert((ofs + num) <= 64U); + assert(num && (num <= 8U)); + assert((ofs >> 3) == ((ofs + num - 1) >> 3)); + const uint32_t byte_ofs = 7 - (ofs >> 3); + const uint32_t byte_bit_ofs = ofs & 7; + return (m_bytes[byte_ofs] >> byte_bit_ofs) & ((1 << num) - 1); + } + + inline uint16_t get_base5_color() const + { + const uint32_t r = get_byte_bits(cETC1BaseColor5RBitOffset, 5); + const uint32_t g = get_byte_bits(cETC1BaseColor5GBitOffset, 5); + const uint32_t b = get_byte_bits(cETC1BaseColor5BBitOffset, 5); + return static_cast(b | (g << 5U) | (r << 10U)); + } + + inline uint16_t get_base4_color(uint32_t idx) const + { + uint32_t r, g, b; + if (idx) + { + r = get_byte_bits(cETC1AbsColor4R2BitOffset, 4); + g = get_byte_bits(cETC1AbsColor4G2BitOffset, 4); + b = get_byte_bits(cETC1AbsColor4B2BitOffset, 4); + } + else + { + r = get_byte_bits(cETC1AbsColor4R1BitOffset, 4); + g = get_byte_bits(cETC1AbsColor4G1BitOffset, 4); + b = get_byte_bits(cETC1AbsColor4B1BitOffset, 4); + } + return static_cast(b | (g << 4U) | (r << 8U)); + } + + inline color32 get_base5_color_unscaled() const + { + return color32(m_differential.m_red1, m_differential.m_green1, m_differential.m_blue1, 255); + } + + inline bool get_flip_bit() const + { + return (m_bytes[3] & 1) != 0; + } + + inline bool get_diff_bit() const + { + return (m_bytes[3] & 2) != 0; + } + + inline uint32_t get_inten_table(uint32_t subblock_id) const + { + assert(subblock_id < 2); + const uint32_t ofs = subblock_id ? 2 : 5; + return (m_bytes[3] >> ofs) & 7; + } + + inline uint16_t get_delta3_color() const + { + const uint32_t r = get_byte_bits(cETC1DeltaColor3RBitOffset, 3); + const uint32_t g = get_byte_bits(cETC1DeltaColor3GBitOffset, 3); + const uint32_t b = get_byte_bits(cETC1DeltaColor3BBitOffset, 3); + return static_cast(b | (g << 3U) | (r << 6U)); + } + + void get_block_colors(color32* pBlock_colors, uint32_t subblock_index) const + { + color32 b; + + if (get_diff_bit()) + { + if (subblock_index) + unpack_color5(b, get_base5_color(), get_delta3_color(), true, 255); + else + unpack_color5(b, get_base5_color(), true); + } + else + { + b = unpack_color4(get_base4_color(subblock_index), true, 255); + } + + const int* pInten_table = g_etc1_inten_tables[get_inten_table(subblock_index)]; + + pBlock_colors[0].set_noclamp_rgba(clamp255(b.r + pInten_table[0]), clamp255(b.g + pInten_table[0]), clamp255(b.b + pInten_table[0]), 255); + pBlock_colors[1].set_noclamp_rgba(clamp255(b.r + pInten_table[1]), clamp255(b.g + pInten_table[1]), clamp255(b.b + pInten_table[1]), 255); + pBlock_colors[2].set_noclamp_rgba(clamp255(b.r + pInten_table[2]), clamp255(b.g + pInten_table[2]), clamp255(b.b + pInten_table[2]), 255); + pBlock_colors[3].set_noclamp_rgba(clamp255(b.r + pInten_table[3]), clamp255(b.g + pInten_table[3]), clamp255(b.b + pInten_table[3]), 255); + } + + static uint16_t pack_color4(const color32& color, bool scaled, uint32_t bias = 127U) + { + return pack_color4(color.r, color.g, color.b, scaled, bias); + } + + static uint16_t pack_color4(uint32_t r, uint32_t g, uint32_t b, bool scaled, uint32_t bias = 127U) + { + if (scaled) + { + r = (r * 15U + bias) / 255U; + g = (g * 15U + bias) / 255U; + b = (b * 15U + bias) / 255U; + } + + r = basisu::minimum(r, 15U); + g = basisu::minimum(g, 15U); + b = basisu::minimum(b, 15U); + + return static_cast(b | (g << 4U) | (r << 8U)); + } + + static uint16_t pack_color5(const color32& color, bool scaled, uint32_t bias = 127U) + { + return pack_color5(color.r, color.g, color.b, scaled, bias); + } + + static uint16_t pack_color5(uint32_t r, uint32_t g, uint32_t b, bool scaled, uint32_t bias = 127U) + { + if (scaled) + { + r = (r * 31U + bias) / 255U; + g = (g * 31U + bias) / 255U; + b = (b * 31U + bias) / 255U; + } + + r = basisu::minimum(r, 31U); + g = basisu::minimum(g, 31U); + b = basisu::minimum(b, 31U); + + return static_cast(b | (g << 5U) | (r << 10U)); + } + + uint16_t pack_delta3(const color32& color) + { + return pack_delta3(color.r, color.g, color.b); + } + + uint16_t pack_delta3(int r, int g, int b) + { + assert((r >= cETC1ColorDeltaMin) && (r <= cETC1ColorDeltaMax)); + assert((g >= cETC1ColorDeltaMin) && (g <= cETC1ColorDeltaMax)); + assert((b >= cETC1ColorDeltaMin) && (b <= cETC1ColorDeltaMax)); + if (r < 0) r += 8; + if (g < 0) g += 8; + if (b < 0) b += 8; + return static_cast(b | (g << 3) | (r << 6)); + } + + static void unpack_delta3(int& r, int& g, int& b, uint16_t packed_delta3) + { + r = (packed_delta3 >> 6) & 7; + g = (packed_delta3 >> 3) & 7; + b = packed_delta3 & 7; + if (r >= 4) r -= 8; + if (g >= 4) g -= 8; + if (b >= 4) b -= 8; + } + + static color32 unpack_color5(uint16_t packed_color5, bool scaled, uint32_t alpha) + { + uint32_t b = packed_color5 & 31U; + uint32_t g = (packed_color5 >> 5U) & 31U; + uint32_t r = (packed_color5 >> 10U) & 31U; + + if (scaled) + { + b = (b << 3U) | (b >> 2U); + g = (g << 3U) | (g >> 2U); + r = (r << 3U) | (r >> 2U); + } + + assert(alpha <= 255); + + return color32(cNoClamp, r, g, b, alpha); + } + + static void unpack_color5(uint32_t& r, uint32_t& g, uint32_t& b, uint16_t packed_color5, bool scaled) + { + color32 c(unpack_color5(packed_color5, scaled, 0)); + r = c.r; + g = c.g; + b = c.b; + } + + static void unpack_color5(color32& result, uint16_t packed_color5, bool scaled) + { + result = unpack_color5(packed_color5, scaled, 255); + } + + static bool unpack_color5(color32& result, uint16_t packed_color5, uint16_t packed_delta3, bool scaled, uint32_t alpha) + { + int dr, dg, db; + unpack_delta3(dr, dg, db, packed_delta3); + + int r = ((packed_color5 >> 10U) & 31U) + dr; + int g = ((packed_color5 >> 5U) & 31U) + dg; + int b = (packed_color5 & 31U) + db; + + bool success = true; + if (static_cast(r | g | b) > 31U) + { + success = false; + r = basisu::clamp(r, 0, 31); + g = basisu::clamp(g, 0, 31); + b = basisu::clamp(b, 0, 31); + } + + if (scaled) + { + b = (b << 3U) | (b >> 2U); + g = (g << 3U) | (g >> 2U); + r = (r << 3U) | (r >> 2U); + } + + result.set_noclamp_rgba(r, g, b, basisu::minimum(alpha, 255U)); + return success; + } + + static color32 unpack_color4(uint16_t packed_color4, bool scaled, uint32_t alpha) + { + uint32_t b = packed_color4 & 15U; + uint32_t g = (packed_color4 >> 4U) & 15U; + uint32_t r = (packed_color4 >> 8U) & 15U; + + if (scaled) + { + b = (b << 4U) | b; + g = (g << 4U) | g; + r = (r << 4U) | r; + } + + return color32(cNoClamp, r, g, b, basisu::minimum(alpha, 255U)); + } + + static void unpack_color4(uint32_t& r, uint32_t& g, uint32_t& b, uint16_t packed_color4, bool scaled) + { + color32 c(unpack_color4(packed_color4, scaled, 0)); + r = c.r; + g = c.g; + b = c.b; + } + + static void get_diff_subblock_colors(color32* pDst, uint16_t packed_color5, uint32_t table_idx) + { + assert(table_idx < cETC1IntenModifierValues); + const int* pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; + + uint32_t r, g, b; + unpack_color5(r, g, b, packed_color5, true); + + const int ir = static_cast(r), ig = static_cast(g), ib = static_cast(b); + + const int y0 = pInten_modifer_table[0]; + pDst[0].set(clamp255(ir + y0), clamp255(ig + y0), clamp255(ib + y0), 255); + + const int y1 = pInten_modifer_table[1]; + pDst[1].set(clamp255(ir + y1), clamp255(ig + y1), clamp255(ib + y1), 255); + + const int y2 = pInten_modifer_table[2]; + pDst[2].set(clamp255(ir + y2), clamp255(ig + y2), clamp255(ib + y2), 255); + + const int y3 = pInten_modifer_table[3]; + pDst[3].set(clamp255(ir + y3), clamp255(ig + y3), clamp255(ib + y3), 255); + } + + static int clamp255(int x) + { + if (x & 0xFFFFFF00) + { + if (x < 0) + x = 0; + else if (x > 255) + x = 255; + } + + return x; + } + + static void get_block_colors5(color32* pBlock_colors, const color32& base_color5, uint32_t inten_table) + { + color32 b(base_color5); + + b.r = (b.r << 3) | (b.r >> 2); + b.g = (b.g << 3) | (b.g >> 2); + b.b = (b.b << 3) | (b.b >> 2); + + const int* pInten_table = g_etc1_inten_tables[inten_table]; + + pBlock_colors[0].set(clamp255(b.r + pInten_table[0]), clamp255(b.g + pInten_table[0]), clamp255(b.b + pInten_table[0]), 255); + pBlock_colors[1].set(clamp255(b.r + pInten_table[1]), clamp255(b.g + pInten_table[1]), clamp255(b.b + pInten_table[1]), 255); + pBlock_colors[2].set(clamp255(b.r + pInten_table[2]), clamp255(b.g + pInten_table[2]), clamp255(b.b + pInten_table[2]), 255); + pBlock_colors[3].set(clamp255(b.r + pInten_table[3]), clamp255(b.g + pInten_table[3]), clamp255(b.b + pInten_table[3]), 255); + } + + static void get_block_color5(const color32& base_color5, uint32_t inten_table, uint32_t index, uint32_t& r, uint32_t &g, uint32_t &b) + { + assert(index < 4); + + uint32_t br = (base_color5.r << 3) | (base_color5.r >> 2); + uint32_t bg = (base_color5.g << 3) | (base_color5.g >> 2); + uint32_t bb = (base_color5.b << 3) | (base_color5.b >> 2); + + const int* pInten_table = g_etc1_inten_tables[inten_table]; + + r = clamp255(br + pInten_table[index]); + g = clamp255(bg + pInten_table[index]); + b = clamp255(bb + pInten_table[index]); + } + + static void get_block_color5_r(const color32& base_color5, uint32_t inten_table, uint32_t index, uint32_t &r) + { + assert(index < 4); + + uint32_t br = (base_color5.r << 3) | (base_color5.r >> 2); + + const int* pInten_table = g_etc1_inten_tables[inten_table]; + + r = clamp255(br + pInten_table[index]); + } + + static void get_block_colors5_g(int* pBlock_colors, const color32& base_color5, uint32_t inten_table) + { + const int g = (base_color5.g << 3) | (base_color5.g >> 2); + + const int* pInten_table = g_etc1_inten_tables[inten_table]; + + pBlock_colors[0] = clamp255(g + pInten_table[0]); + pBlock_colors[1] = clamp255(g + pInten_table[1]); + pBlock_colors[2] = clamp255(g + pInten_table[2]); + pBlock_colors[3] = clamp255(g + pInten_table[3]); + } + + static void get_block_colors5_bounds(color32* pBlock_colors, const color32& base_color5, uint32_t inten_table, uint32_t l = 0, uint32_t h = 3) + { + color32 b(base_color5); + + b.r = (b.r << 3) | (b.r >> 2); + b.g = (b.g << 3) | (b.g >> 2); + b.b = (b.b << 3) | (b.b >> 2); + + const int* pInten_table = g_etc1_inten_tables[inten_table]; + + pBlock_colors[0].set(clamp255(b.r + pInten_table[l]), clamp255(b.g + pInten_table[l]), clamp255(b.b + pInten_table[l]), 255); + pBlock_colors[1].set(clamp255(b.r + pInten_table[h]), clamp255(b.g + pInten_table[h]), clamp255(b.b + pInten_table[h]), 255); + } + + static void get_block_colors5_bounds_g(uint32_t* pBlock_colors, const color32& base_color5, uint32_t inten_table, uint32_t l = 0, uint32_t h = 3) + { + color32 b(base_color5); + + b.g = (b.g << 3) | (b.g >> 2); + + const int* pInten_table = g_etc1_inten_tables[inten_table]; + + pBlock_colors[0] = clamp255(b.g + pInten_table[l]); + pBlock_colors[1] = clamp255(b.g + pInten_table[h]); + } + }; + + enum dxt_constants + { + cDXT1SelectorBits = 2U, cDXT1SelectorValues = 1U << cDXT1SelectorBits, cDXT1SelectorMask = cDXT1SelectorValues - 1U, + cDXT5SelectorBits = 3U, cDXT5SelectorValues = 1U << cDXT5SelectorBits, cDXT5SelectorMask = cDXT5SelectorValues - 1U, + }; + + static const uint8_t g_etc1_x_selector_unpack[4][256] = + { + { + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, + }, + { + 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, + 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, + 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, + 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, + 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, + 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, + 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, + 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, + }, + + { + 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, + 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, + 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, + 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, + 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, + 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, + 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, + 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, + }, + + { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, + 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, + 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, + 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, + } + }; + + struct dxt1_block + { + enum { cTotalEndpointBytes = 2, cTotalSelectorBytes = 4 }; + + uint8_t m_low_color[cTotalEndpointBytes]; + uint8_t m_high_color[cTotalEndpointBytes]; + uint8_t m_selectors[cTotalSelectorBytes]; + + inline void clear() { basisu::clear_obj(*this); } + + inline uint32_t get_high_color() const { return m_high_color[0] | (m_high_color[1] << 8U); } + inline uint32_t get_low_color() const { return m_low_color[0] | (m_low_color[1] << 8U); } + inline void set_low_color(uint16_t c) { m_low_color[0] = static_cast(c & 0xFF); m_low_color[1] = static_cast((c >> 8) & 0xFF); } + inline void set_high_color(uint16_t c) { m_high_color[0] = static_cast(c & 0xFF); m_high_color[1] = static_cast((c >> 8) & 0xFF); } + inline uint32_t get_selector(uint32_t x, uint32_t y) const { assert((x < 4U) && (y < 4U)); return (m_selectors[y] >> (x * cDXT1SelectorBits)) & cDXT1SelectorMask; } + inline void set_selector(uint32_t x, uint32_t y, uint32_t val) { assert((x < 4U) && (y < 4U) && (val < 4U)); m_selectors[y] &= (~(cDXT1SelectorMask << (x * cDXT1SelectorBits))); m_selectors[y] |= (val << (x * cDXT1SelectorBits)); } + + static uint16_t pack_color(const color32& color, bool scaled, uint32_t bias = 127U) + { + uint32_t r = color.r, g = color.g, b = color.b; + if (scaled) + { + r = (r * 31U + bias) / 255U; + g = (g * 63U + bias) / 255U; + b = (b * 31U + bias) / 255U; + } + return static_cast(basisu::minimum(b, 31U) | (basisu::minimum(g, 63U) << 5U) | (basisu::minimum(r, 31U) << 11U)); + } + + static uint16_t pack_unscaled_color(uint32_t r, uint32_t g, uint32_t b) { return static_cast(b | (g << 5U) | (r << 11U)); } + }; + + struct dxt_selector_range + { + uint32_t m_low; + uint32_t m_high; + }; + + struct etc1_to_dxt1_56_solution + { + uint8_t m_lo; + uint8_t m_hi; + uint16_t m_err; + }; + +#if BASISD_SUPPORT_DXT1 + static dxt_selector_range g_etc1_to_dxt1_selector_ranges[] = + { + { 0, 3 }, + + { 1, 3 }, + { 0, 2 }, + + { 1, 2 }, + + { 2, 3 }, + { 0, 1 }, + }; + + const uint32_t NUM_ETC1_TO_DXT1_SELECTOR_RANGES = sizeof(g_etc1_to_dxt1_selector_ranges) / sizeof(g_etc1_to_dxt1_selector_ranges[0]); + + static uint32_t g_etc1_to_dxt1_selector_range_index[4][4]; + + const uint32_t NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS = 10; + static const uint8_t g_etc1_to_dxt1_selector_mappings[NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS][4] = + { + { 0, 0, 1, 1 }, + { 0, 0, 1, 2 }, + { 0, 0, 1, 3 }, + { 0, 0, 2, 3 }, + { 0, 1, 1, 1 }, + { 0, 1, 2, 2 }, + { 0, 1, 2, 3 }, + { 0, 2, 3, 3 }, + { 1, 2, 2, 2 }, + { 1, 2, 3, 3 }, + }; + + static uint8_t g_etc1_to_dxt1_selector_mappings_raw_dxt1_256[NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS][256]; + static uint8_t g_etc1_to_dxt1_selector_mappings_raw_dxt1_inv_256[NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS][256]; + + static const etc1_to_dxt1_56_solution g_etc1_to_dxt_6[32 * 8 * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS * NUM_ETC1_TO_DXT1_SELECTOR_RANGES] = { +#include "basisu_transcoder_tables_dxt1_6.inc" + }; + + static const etc1_to_dxt1_56_solution g_etc1_to_dxt_5[32 * 8 * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS * NUM_ETC1_TO_DXT1_SELECTOR_RANGES] = { +#include "basisu_transcoder_tables_dxt1_5.inc" + }; +#endif // BASISD_SUPPORT_DXT1 + +#if BASISD_SUPPORT_DXT1 || BASISD_SUPPORT_UASTC + // First saw the idea for optimal BC1 single-color block encoding using lookup tables in ryg_dxt. + struct bc1_match_entry + { + uint8_t m_hi; + uint8_t m_lo; + }; + static bc1_match_entry g_bc1_match5_equals_1[256], g_bc1_match6_equals_1[256]; // selector 1, allow equals hi/lo + static bc1_match_entry g_bc1_match5_equals_0[256], g_bc1_match6_equals_0[256]; // selector 0, allow equals hi/lo + + static void prepare_bc1_single_color_table(bc1_match_entry* pTable, const uint8_t* pExpand, int size0, int size1, int sel) + { + for (int i = 0; i < 256; i++) + { + int lowest_e = 256; + for (int lo = 0; lo < size0; lo++) + { + for (int hi = 0; hi < size1; hi++) + { + const int lo_e = pExpand[lo], hi_e = pExpand[hi]; + int e; + + if (sel == 1) + { + // Selector 1 + e = basisu::iabs(((hi_e * 2 + lo_e) / 3) - i); + e += (basisu::iabs(hi_e - lo_e) * 3) / 100; + } + else + { + assert(sel == 0); + + // Selector 0 + e = basisu::iabs(hi_e - i); + } + + if (e < lowest_e) + { + pTable[i].m_hi = static_cast(hi); + pTable[i].m_lo = static_cast(lo); + + lowest_e = e; + } + + } // hi + } // lo + } + } +#endif + +#if BASISD_WRITE_NEW_DXT1_TABLES + static void create_etc1_to_dxt1_5_conversion_table() + { + FILE* pFile = nullptr; + fopen_s(&pFile, "basisu_transcoder_tables_dxt1_5.inc", "w"); + + uint32_t n = 0; + + for (int inten = 0; inten < 8; inten++) + { + for (uint32_t g = 0; g < 32; g++) + { + color32 block_colors[4]; + decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten); + + for (uint32_t sr = 0; sr < NUM_ETC1_TO_DXT1_SELECTOR_RANGES; sr++) + { + const uint32_t low_selector = g_etc1_to_dxt1_selector_ranges[sr].m_low; + const uint32_t high_selector = g_etc1_to_dxt1_selector_ranges[sr].m_high; + + for (uint32_t m = 0; m < NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS; m++) + { + uint32_t best_lo = 0; + uint32_t best_hi = 0; + uint64_t best_err = UINT64_MAX; + + for (uint32_t hi = 0; hi <= 31; hi++) + { + for (uint32_t lo = 0; lo <= 31; lo++) + { + //if (lo == hi) continue; + + uint32_t colors[4]; + + colors[0] = (lo << 3) | (lo >> 2); + colors[3] = (hi << 3) | (hi >> 2); + + colors[1] = (colors[0] * 2 + colors[3]) / 3; + colors[2] = (colors[3] * 2 + colors[0]) / 3; + + uint64_t total_err = 0; + + for (uint32_t s = low_selector; s <= high_selector; s++) + { + int err = block_colors[s].g - colors[g_etc1_to_dxt1_selector_mappings[m][s]]; + + total_err += err * err; + } + + if (total_err < best_err) + { + best_err = total_err; + best_lo = lo; + best_hi = hi; + } + } + } + + assert(best_err <= 0xFFFF); + + //table[g + inten * 32].m_solutions[sr][m].m_lo = static_cast(best_lo); + //table[g + inten * 32].m_solutions[sr][m].m_hi = static_cast(best_hi); + //table[g + inten * 32].m_solutions[sr][m].m_err = static_cast(best_err); + + //assert(best_lo != best_hi); + fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err); + n++; + if ((n & 31) == 31) + fprintf(pFile, "\n"); + } // m + } // sr + } // g + } // inten + + fclose(pFile); + } + + static void create_etc1_to_dxt1_6_conversion_table() + { + FILE* pFile = nullptr; + fopen_s(&pFile, "basisu_transcoder_tables_dxt1_6.inc", "w"); + + uint32_t n = 0; + + for (int inten = 0; inten < 8; inten++) + { + for (uint32_t g = 0; g < 32; g++) + { + color32 block_colors[4]; + decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten); + + for (uint32_t sr = 0; sr < NUM_ETC1_TO_DXT1_SELECTOR_RANGES; sr++) + { + const uint32_t low_selector = g_etc1_to_dxt1_selector_ranges[sr].m_low; + const uint32_t high_selector = g_etc1_to_dxt1_selector_ranges[sr].m_high; + + for (uint32_t m = 0; m < NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS; m++) + { + uint32_t best_lo = 0; + uint32_t best_hi = 0; + uint64_t best_err = UINT64_MAX; + + for (uint32_t hi = 0; hi <= 63; hi++) + { + for (uint32_t lo = 0; lo <= 63; lo++) + { + //if (lo == hi) continue; + + uint32_t colors[4]; + + colors[0] = (lo << 2) | (lo >> 4); + colors[3] = (hi << 2) | (hi >> 4); + + colors[1] = (colors[0] * 2 + colors[3]) / 3; + colors[2] = (colors[3] * 2 + colors[0]) / 3; + + uint64_t total_err = 0; + + for (uint32_t s = low_selector; s <= high_selector; s++) + { + int err = block_colors[s].g - colors[g_etc1_to_dxt1_selector_mappings[m][s]]; + + total_err += err * err; + } + + if (total_err < best_err) + { + best_err = total_err; + best_lo = lo; + best_hi = hi; + } + } + } + + assert(best_err <= 0xFFFF); + + //table[g + inten * 32].m_solutions[sr][m].m_lo = static_cast(best_lo); + //table[g + inten * 32].m_solutions[sr][m].m_hi = static_cast(best_hi); + //table[g + inten * 32].m_solutions[sr][m].m_err = static_cast(best_err); + + //assert(best_lo != best_hi); + fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err); + n++; + if ((n & 31) == 31) + fprintf(pFile, "\n"); + + } // m + } // sr + } // g + } // inten + + fclose(pFile); + } +#endif + +#if BASISD_SUPPORT_UASTC || BASISD_SUPPORT_ETC2_EAC_A8 || BASISD_SUPPORT_ETC2_EAC_RG11 + static const int8_t g_eac_modifier_table[16][8] = + { + { -3, -6, -9, -15, 2, 5, 8, 14 }, + { -3, -7, -10, -13, 2, 6, 9, 12 }, + { -2, -5, -8, -13, 1, 4, 7, 12 }, + { -2, -4, -6, -13, 1, 3, 5, 12 }, + { -3, -6, -8, -12, 2, 5, 7, 11 }, + { -3, -7, -9, -11, 2, 6, 8, 10 }, + { -4, -7, -8, -11, 3, 6, 7, 10 }, + { -3, -5, -8, -11, 2, 4, 7, 10 }, + + { -2, -6, -8, -10, 1, 5, 7, 9 }, + { -2, -5, -8, -10, 1, 4, 7, 9 }, + { -2, -4, -8, -10, 1, 3, 7, 9 }, + { -2, -5, -7, -10, 1, 4, 6, 9 }, + { -3, -4, -7, -10, 2, 3, 6, 9 }, + { -1, -2, -3, -10, 0, 1, 2, 9 }, // entry 13 + { -4, -6, -8, -9, 3, 5, 7, 8 }, + { -3, -5, -7, -9, 2, 4, 6, 8 } + }; + + // Used by ETC2 EAC A8 and ETC2 EAC R11/RG11. + struct eac_block + { + uint16_t m_base : 8; + + uint16_t m_table : 4; + uint16_t m_multiplier : 4; + + uint8_t m_selectors[6]; + + uint32_t get_selector(uint32_t x, uint32_t y) const + { + assert((x < 4) && (y < 4)); + + const uint32_t ofs = 45 - (y + x * 4) * 3; + + const uint64_t pixels = get_selector_bits(); + + return (pixels >> ofs) & 7; + } + + void set_selector(uint32_t x, uint32_t y, uint32_t s) + { + assert((x < 4) && (y < 4) && (s < 8)); + + const uint32_t ofs = 45 - (y + x * 4) * 3; + + uint64_t pixels = get_selector_bits(); + + pixels &= ~(7ULL << ofs); + pixels |= (static_cast(s) << ofs); + + set_selector_bits(pixels); + } + + uint64_t get_selector_bits() const + { + uint64_t pixels = ((uint64_t)m_selectors[0] << 40) | ((uint64_t)m_selectors[1] << 32) | + ((uint64_t)m_selectors[2] << 24) | + ((uint64_t)m_selectors[3] << 16) | ((uint64_t)m_selectors[4] << 8) | m_selectors[5]; + return pixels; + } + + void set_selector_bits(uint64_t pixels) + { + m_selectors[0] = (uint8_t)(pixels >> 40); + m_selectors[1] = (uint8_t)(pixels >> 32); + m_selectors[2] = (uint8_t)(pixels >> 24); + m_selectors[3] = (uint8_t)(pixels >> 16); + m_selectors[4] = (uint8_t)(pixels >> 8); + m_selectors[5] = (uint8_t)(pixels); + } + }; + +#endif // #if BASISD_SUPPORT_UASTC BASISD_SUPPORT_ETC2_EAC_A8 || BASISD_SUPPORT_ETC2_EAC_RG11 + +#if BASISD_SUPPORT_ETC2_EAC_A8 || BASISD_SUPPORT_ETC2_EAC_RG11 + static const dxt_selector_range s_etc2_eac_selector_ranges[] = + { + { 0, 3 }, + + { 1, 3 }, + { 0, 2 }, + + { 1, 2 }, + }; + + const uint32_t NUM_ETC2_EAC_SELECTOR_RANGES = sizeof(s_etc2_eac_selector_ranges) / sizeof(s_etc2_eac_selector_ranges[0]); + + struct etc1_g_to_eac_conversion + { + uint8_t m_base; + uint8_t m_table_mul; // mul*16+table + uint16_t m_trans; // translates ETC1 selectors to ETC2_EAC_A8 + }; +#endif // BASISD_SUPPORT_ETC2_EAC_A8 || BASISD_SUPPORT_ETC2_EAC_RG11 + +#if BASISD_SUPPORT_ETC2_EAC_A8 + +#if BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES + struct pack_eac_a8_results + { + uint32_t m_base; + uint32_t m_table; + uint32_t m_multiplier; + basisu::vector m_selectors; + basisu::vector m_selectors_temp; + }; + + static uint64_t pack_eac_a8_exhaustive(pack_eac_a8_results& results, const uint8_t* pPixels, uint32_t num_pixels) + { + results.m_selectors.resize(num_pixels); + results.m_selectors_temp.resize(num_pixels); + + uint64_t best_err = UINT64_MAX; + + for (uint32_t base_color = 0; base_color < 256; base_color++) + { + for (uint32_t multiplier = 1; multiplier < 16; multiplier++) + { + for (uint32_t table = 0; table < 16; table++) + { + uint64_t total_err = 0; + + for (uint32_t i = 0; i < num_pixels; i++) + { + const int a = pPixels[i]; + + uint32_t best_s_err = UINT32_MAX; + uint32_t best_s = 0; + for (uint32_t s = 0; s < 8; s++) + { + int v = (int)multiplier * g_eac_modifier_table[table][s] + (int)base_color; + if (v < 0) + v = 0; + else if (v > 255) + v = 255; + + uint32_t err = abs(a - v); + if (err < best_s_err) + { + best_s_err = err; + best_s = s; + } + } + + results.m_selectors_temp[i] = static_cast(best_s); + + total_err += best_s_err * best_s_err; + if (total_err >= best_err) + break; + } + + if (total_err < best_err) + { + best_err = total_err; + results.m_base = base_color; + results.m_multiplier = multiplier; + results.m_table = table; + results.m_selectors.swap(results.m_selectors_temp); + } + + } // table + + } // multiplier + + } // base_color + + return best_err; + } +#endif // BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES + + static +#if !BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES + const +#endif + etc1_g_to_eac_conversion s_etc1_g_to_etc2_a8[32 * 8][NUM_ETC2_EAC_SELECTOR_RANGES] = + { + { { 0,1,3328 },{ 0,1,3328 },{ 0,1,256 },{ 0,1,256 } }, + { { 0,226,3936 },{ 0,226,3936 },{ 0,81,488 },{ 0,81,488 } }, + { { 6,178,4012 },{ 6,178,4008 },{ 0,146,501 },{ 0,130,496 } }, + { { 14,178,4012 },{ 14,178,4008 },{ 8,146,501 },{ 6,82,496 } }, + { { 23,178,4012 },{ 23,178,4008 },{ 17,146,501 },{ 3,228,496 } }, + { { 31,178,4012 },{ 31,178,4008 },{ 25,146,501 },{ 11,228,496 } }, + { { 39,178,4012 },{ 39,178,4008 },{ 33,146,501 },{ 19,228,496 } }, + { { 47,178,4012 },{ 47,178,4008 },{ 41,146,501 },{ 27,228,496 } }, + { { 56,178,4012 },{ 56,178,4008 },{ 50,146,501 },{ 36,228,496 } }, + { { 64,178,4012 },{ 64,178,4008 },{ 58,146,501 },{ 44,228,496 } }, + { { 72,178,4012 },{ 72,178,4008 },{ 66,146,501 },{ 52,228,496 } }, + { { 80,178,4012 },{ 80,178,4008 },{ 74,146,501 },{ 60,228,496 } }, + { { 89,178,4012 },{ 89,178,4008 },{ 83,146,501 },{ 69,228,496 } }, + { { 97,178,4012 },{ 97,178,4008 },{ 91,146,501 },{ 77,228,496 } }, + { { 105,178,4012 },{ 105,178,4008 },{ 99,146,501 },{ 85,228,496 } }, + { { 113,178,4012 },{ 113,178,4008 },{ 107,146,501 },{ 93,228,496 } }, + { { 122,178,4012 },{ 122,178,4008 },{ 116,146,501 },{ 102,228,496 } }, + { { 130,178,4012 },{ 130,178,4008 },{ 124,146,501 },{ 110,228,496 } }, + { { 138,178,4012 },{ 138,178,4008 },{ 132,146,501 },{ 118,228,496 } }, + { { 146,178,4012 },{ 146,178,4008 },{ 140,146,501 },{ 126,228,496 } }, + { { 155,178,4012 },{ 155,178,4008 },{ 149,146,501 },{ 135,228,496 } }, + { { 163,178,4012 },{ 163,178,4008 },{ 157,146,501 },{ 143,228,496 } }, + { { 171,178,4012 },{ 171,178,4008 },{ 165,146,501 },{ 151,228,496 } }, + { { 179,178,4012 },{ 179,178,4008 },{ 173,146,501 },{ 159,228,496 } }, + { { 188,178,4012 },{ 188,178,4008 },{ 182,146,501 },{ 168,228,496 } }, + { { 196,178,4012 },{ 196,178,4008 },{ 190,146,501 },{ 176,228,496 } }, + { { 204,178,4012 },{ 204,178,4008 },{ 198,146,501 },{ 184,228,496 } }, + { { 212,178,4012 },{ 212,178,4008 },{ 206,146,501 },{ 192,228,496 } }, + { { 221,178,4012 },{ 221,178,4008 },{ 215,146,501 },{ 201,228,496 } }, + { { 229,178,4012 },{ 229,178,4008 },{ 223,146,501 },{ 209,228,496 } }, + { { 235,66,4012 },{ 221,100,4008 },{ 231,146,501 },{ 217,228,496 } }, + { { 211,102,4085 },{ 118,31,4080 },{ 211,102,501 },{ 118,31,496 } }, + { { 1,2,3328 },{ 1,2,3328 },{ 0,1,320 },{ 0,1,320 } }, + { { 7,162,3905 },{ 7,162,3904 },{ 1,17,480 },{ 1,17,480 } }, + { { 15,162,3906 },{ 15,162,3904 },{ 1,117,352 },{ 1,117,352 } }, + { { 23,162,3906 },{ 23,162,3904 },{ 5,34,500 },{ 4,53,424 } }, + { { 32,162,3906 },{ 32,162,3904 },{ 14,34,500 },{ 3,69,424 } }, + { { 40,162,3906 },{ 40,162,3904 },{ 22,34,500 },{ 1,133,496 } }, + { { 48,162,3906 },{ 48,162,3904 },{ 30,34,500 },{ 4,85,496 } }, + { { 56,162,3906 },{ 56,162,3904 },{ 38,34,500 },{ 12,85,496 } }, + { { 65,162,3906 },{ 65,162,3904 },{ 47,34,500 },{ 1,106,424 } }, + { { 73,162,3906 },{ 73,162,3904 },{ 55,34,500 },{ 9,106,424 } }, + { { 81,162,3906 },{ 81,162,3904 },{ 63,34,500 },{ 7,234,496 } }, + { { 89,162,3906 },{ 89,162,3904 },{ 71,34,500 },{ 15,234,496 } }, + { { 98,162,3906 },{ 98,162,3904 },{ 80,34,500 },{ 24,234,496 } }, + { { 106,162,3906 },{ 106,162,3904 },{ 88,34,500 },{ 32,234,496 } }, + { { 114,162,3906 },{ 114,162,3904 },{ 96,34,500 },{ 40,234,496 } }, + { { 122,162,3906 },{ 122,162,3904 },{ 104,34,500 },{ 48,234,496 } }, + { { 131,162,3906 },{ 131,162,3904 },{ 113,34,500 },{ 57,234,496 } }, + { { 139,162,3906 },{ 139,162,3904 },{ 121,34,500 },{ 65,234,496 } }, + { { 147,162,3906 },{ 147,162,3904 },{ 129,34,500 },{ 73,234,496 } }, + { { 155,162,3906 },{ 155,162,3904 },{ 137,34,500 },{ 81,234,496 } }, + { { 164,162,3906 },{ 164,162,3904 },{ 146,34,500 },{ 90,234,496 } }, + { { 172,162,3906 },{ 172,162,3904 },{ 154,34,500 },{ 98,234,496 } }, + { { 180,162,3906 },{ 180,162,3904 },{ 162,34,500 },{ 106,234,496 } }, + { { 188,162,3906 },{ 188,162,3904 },{ 170,34,500 },{ 114,234,496 } }, + { { 197,162,3906 },{ 197,162,3904 },{ 179,34,500 },{ 123,234,496 } }, + { { 205,162,3906 },{ 205,162,3904 },{ 187,34,500 },{ 131,234,496 } }, + { { 213,162,3906 },{ 213,162,3904 },{ 195,34,500 },{ 139,234,496 } }, + { { 221,162,3906 },{ 221,162,3904 },{ 203,34,500 },{ 147,234,496 } }, + { { 230,162,3906 },{ 230,162,3904 },{ 212,34,500 },{ 156,234,496 } }, + { { 238,162,3906 },{ 174,106,4008 },{ 220,34,500 },{ 164,234,496 } }, + { { 240,178,4001 },{ 182,106,4008 },{ 228,34,500 },{ 172,234,496 } }, + { { 166,108,4085 },{ 115,31,4080 },{ 166,108,501 },{ 115,31,496 } }, + { { 1,68,3328 },{ 1,68,3328 },{ 0,17,384 },{ 0,17,384 } }, + { { 1,148,3904 },{ 1,148,3904 },{ 1,2,384 },{ 1,2,384 } }, + { { 21,18,3851 },{ 21,18,3848 },{ 1,50,488 },{ 1,50,488 } }, + { { 27,195,3851 },{ 29,18,3848 },{ 0,67,488 },{ 0,67,488 } }, + { { 34,195,3907 },{ 38,18,3848 },{ 20,66,482 },{ 0,3,496 } }, + { { 42,195,3907 },{ 46,18,3848 },{ 28,66,482 },{ 2,6,424 } }, + { { 50,195,3907 },{ 54,18,3848 },{ 36,66,482 },{ 4,22,424 } }, + { { 58,195,3907 },{ 62,18,3848 },{ 44,66,482 },{ 3,73,424 } }, + { { 67,195,3907 },{ 71,18,3848 },{ 53,66,482 },{ 3,22,496 } }, + { { 75,195,3907 },{ 79,18,3848 },{ 61,66,482 },{ 2,137,496 } }, + { { 83,195,3907 },{ 87,18,3848 },{ 69,66,482 },{ 1,89,496 } }, + { { 91,195,3907 },{ 95,18,3848 },{ 77,66,482 },{ 9,89,496 } }, + { { 100,195,3907 },{ 104,18,3848 },{ 86,66,482 },{ 18,89,496 } }, + { { 108,195,3907 },{ 112,18,3848 },{ 94,66,482 },{ 26,89,496 } }, + { { 116,195,3907 },{ 120,18,3848 },{ 102,66,482 },{ 34,89,496 } }, + { { 124,195,3907 },{ 128,18,3848 },{ 110,66,482 },{ 42,89,496 } }, + { { 133,195,3907 },{ 137,18,3848 },{ 119,66,482 },{ 51,89,496 } }, + { { 141,195,3907 },{ 145,18,3848 },{ 127,66,482 },{ 59,89,496 } }, + { { 149,195,3907 },{ 153,18,3848 },{ 135,66,482 },{ 67,89,496 } }, + { { 157,195,3907 },{ 161,18,3848 },{ 143,66,482 },{ 75,89,496 } }, + { { 166,195,3907 },{ 170,18,3848 },{ 152,66,482 },{ 84,89,496 } }, + { { 174,195,3907 },{ 178,18,3848 },{ 160,66,482 },{ 92,89,496 } }, + { { 182,195,3907 },{ 186,18,3848 },{ 168,66,482 },{ 100,89,496 } }, + { { 190,195,3907 },{ 194,18,3848 },{ 176,66,482 },{ 108,89,496 } }, + { { 199,195,3907 },{ 203,18,3848 },{ 185,66,482 },{ 117,89,496 } }, + { { 207,195,3907 },{ 211,18,3848 },{ 193,66,482 },{ 125,89,496 } }, + { { 215,195,3907 },{ 219,18,3848 },{ 201,66,482 },{ 133,89,496 } }, + { { 223,195,3907 },{ 227,18,3848 },{ 209,66,482 },{ 141,89,496 } }, + { { 231,195,3907 },{ 168,89,4008 },{ 218,66,482 },{ 150,89,496 } }, + { { 236,18,3907 },{ 176,89,4008 },{ 226,66,482 },{ 158,89,496 } }, + { { 158,90,4085 },{ 103,31,4080 },{ 158,90,501 },{ 103,31,496 } }, + { { 166,90,4085 },{ 111,31,4080 },{ 166,90,501 },{ 111,31,496 } }, + { { 0,70,3328 },{ 0,70,3328 },{ 0,45,256 },{ 0,45,256 } }, + { { 0,117,3904 },{ 0,117,3904 },{ 0,35,384 },{ 0,35,384 } }, + { { 13,165,3905 },{ 13,165,3904 },{ 3,221,416 },{ 3,221,416 } }, + { { 21,165,3906 },{ 21,165,3904 },{ 11,221,416 },{ 11,221,416 } }, + { { 30,165,3906 },{ 30,165,3904 },{ 7,61,352 },{ 7,61,352 } }, + { { 38,165,3906 },{ 38,165,3904 },{ 2,125,352 },{ 2,125,352 } }, + { { 46,165,3906 },{ 46,165,3904 },{ 2,37,500 },{ 10,125,352 } }, + { { 54,165,3906 },{ 54,165,3904 },{ 10,37,500 },{ 5,61,424 } }, + { { 63,165,3906 },{ 63,165,3904 },{ 19,37,500 },{ 1,189,424 } }, + { { 4,254,4012 },{ 71,165,3904 },{ 27,37,500 },{ 9,189,424 } }, + { { 12,254,4012 },{ 79,165,3904 },{ 35,37,500 },{ 4,77,424 } }, + { { 20,254,4012 },{ 87,165,3904 },{ 43,37,500 },{ 12,77,424 } }, + { { 29,254,4012 },{ 96,165,3904 },{ 52,37,500 },{ 8,93,424 } }, + { { 37,254,4012 },{ 104,165,3904 },{ 60,37,500 },{ 3,141,496 } }, + { { 45,254,4012 },{ 112,165,3904 },{ 68,37,500 },{ 11,141,496 } }, + { { 53,254,4012 },{ 120,165,3904 },{ 76,37,500 },{ 6,93,496 } }, + { { 62,254,4012 },{ 129,165,3904 },{ 85,37,500 },{ 15,93,496 } }, + { { 70,254,4012 },{ 137,165,3904 },{ 93,37,500 },{ 23,93,496 } }, + { { 78,254,4012 },{ 145,165,3904 },{ 101,37,500 },{ 31,93,496 } }, + { { 86,254,4012 },{ 153,165,3904 },{ 109,37,500 },{ 39,93,496 } }, + { { 95,254,4012 },{ 162,165,3904 },{ 118,37,500 },{ 48,93,496 } }, + { { 103,254,4012 },{ 170,165,3904 },{ 126,37,500 },{ 56,93,496 } }, + { { 111,254,4012 },{ 178,165,3904 },{ 134,37,500 },{ 64,93,496 } }, + { { 119,254,4012 },{ 186,165,3904 },{ 142,37,500 },{ 72,93,496 } }, + { { 128,254,4012 },{ 195,165,3904 },{ 151,37,500 },{ 81,93,496 } }, + { { 136,254,4012 },{ 203,165,3904 },{ 159,37,500 },{ 89,93,496 } }, + { { 212,165,3906 },{ 136,77,4008 },{ 167,37,500 },{ 97,93,496 } }, + { { 220,165,3394 },{ 131,93,4008 },{ 175,37,500 },{ 105,93,496 } }, + { { 214,181,4001 },{ 140,93,4008 },{ 184,37,500 },{ 114,93,496 } }, + { { 222,181,4001 },{ 148,93,4008 },{ 192,37,500 },{ 122,93,496 } }, + { { 114,95,4085 },{ 99,31,4080 },{ 114,95,501 },{ 99,31,496 } }, + { { 122,95,4085 },{ 107,31,4080 },{ 122,95,501 },{ 107,31,496 } }, + { { 0,102,3840 },{ 0,102,3840 },{ 0,18,384 },{ 0,18,384 } }, + { { 5,167,3904 },{ 5,167,3904 },{ 0,13,256 },{ 0,13,256 } }, + { { 4,54,3968 },{ 4,54,3968 },{ 1,67,448 },{ 1,67,448 } }, + { { 30,198,3850 },{ 30,198,3848 },{ 0,3,480 },{ 0,3,480 } }, + { { 39,198,3850 },{ 39,198,3848 },{ 3,52,488 },{ 3,52,488 } }, + { { 47,198,3851 },{ 47,198,3848 },{ 3,4,488 },{ 3,4,488 } }, + { { 55,198,3851 },{ 55,198,3848 },{ 1,70,488 },{ 1,70,488 } }, + { { 54,167,3906 },{ 63,198,3848 },{ 3,22,488 },{ 3,22,488 } }, + { { 62,167,3906 },{ 72,198,3848 },{ 24,118,488 },{ 0,6,496 } }, + { { 70,167,3906 },{ 80,198,3848 },{ 32,118,488 },{ 2,89,488 } }, + { { 78,167,3906 },{ 88,198,3848 },{ 40,118,488 },{ 1,73,496 } }, + { { 86,167,3906 },{ 96,198,3848 },{ 48,118,488 },{ 0,28,424 } }, + { { 95,167,3906 },{ 105,198,3848 },{ 57,118,488 },{ 9,28,424 } }, + { { 103,167,3906 },{ 113,198,3848 },{ 65,118,488 },{ 5,108,496 } }, + { { 111,167,3906 },{ 121,198,3848 },{ 73,118,488 },{ 13,108,496 } }, + { { 119,167,3906 },{ 129,198,3848 },{ 81,118,488 },{ 21,108,496 } }, + { { 128,167,3906 },{ 138,198,3848 },{ 90,118,488 },{ 6,28,496 } }, + { { 136,167,3906 },{ 146,198,3848 },{ 98,118,488 },{ 14,28,496 } }, + { { 144,167,3906 },{ 154,198,3848 },{ 106,118,488 },{ 22,28,496 } }, + { { 152,167,3906 },{ 162,198,3848 },{ 114,118,488 },{ 30,28,496 } }, + { { 161,167,3906 },{ 171,198,3848 },{ 123,118,488 },{ 39,28,496 } }, + { { 169,167,3906 },{ 179,198,3848 },{ 131,118,488 },{ 47,28,496 } }, + { { 177,167,3906 },{ 187,198,3848 },{ 139,118,488 },{ 55,28,496 } }, + { { 185,167,3906 },{ 195,198,3848 },{ 147,118,488 },{ 63,28,496 } }, + { { 194,167,3906 },{ 120,12,4008 },{ 156,118,488 },{ 72,28,496 } }, + { { 206,198,3907 },{ 116,28,4008 },{ 164,118,488 },{ 80,28,496 } }, + { { 214,198,3907 },{ 124,28,4008 },{ 172,118,488 },{ 88,28,496 } }, + { { 222,198,3395 },{ 132,28,4008 },{ 180,118,488 },{ 96,28,496 } }, + { { 207,134,4001 },{ 141,28,4008 },{ 189,118,488 },{ 105,28,496 } }, + { { 95,30,4085 },{ 86,31,4080 },{ 95,30,501 },{ 86,31,496 } }, + { { 103,30,4085 },{ 94,31,4080 },{ 103,30,501 },{ 94,31,496 } }, + { { 111,30,4085 },{ 102,31,4080 },{ 111,30,501 },{ 102,31,496 } }, + { { 0,104,3840 },{ 0,104,3840 },{ 0,18,448 },{ 0,18,448 } }, + { { 4,39,3904 },{ 4,39,3904 },{ 0,4,384 },{ 0,4,384 } }, + { { 0,56,3968 },{ 0,56,3968 },{ 0,84,448 },{ 0,84,448 } }, + { { 6,110,3328 },{ 6,110,3328 },{ 0,20,448 },{ 0,20,448 } }, + { { 41,200,3850 },{ 41,200,3848 },{ 1,4,480 },{ 1,4,480 } }, + { { 49,200,3850 },{ 49,200,3848 },{ 1,8,416 },{ 1,8,416 } }, + { { 57,200,3851 },{ 57,200,3848 },{ 1,38,488 },{ 1,38,488 } }, + { { 65,200,3851 },{ 65,200,3848 },{ 1,120,488 },{ 1,120,488 } }, + { { 74,200,3851 },{ 74,200,3848 },{ 2,72,488 },{ 2,72,488 } }, + { { 69,6,3907 },{ 82,200,3848 },{ 2,24,488 },{ 2,24,488 } }, + { { 77,6,3907 },{ 90,200,3848 },{ 26,120,488 },{ 10,24,488 } }, + { { 97,63,3330 },{ 98,200,3848 },{ 34,120,488 },{ 2,8,496 } }, + { { 106,63,3330 },{ 107,200,3848 },{ 43,120,488 },{ 3,92,488 } }, + { { 114,63,3330 },{ 115,200,3848 },{ 51,120,488 },{ 11,92,488 } }, + { { 122,63,3330 },{ 123,200,3848 },{ 59,120,488 },{ 7,76,496 } }, + { { 130,63,3330 },{ 131,200,3848 },{ 67,120,488 },{ 15,76,496 } }, + { { 139,63,3330 },{ 140,200,3848 },{ 76,120,488 },{ 24,76,496 } }, + { { 147,63,3330 },{ 148,200,3848 },{ 84,120,488 },{ 32,76,496 } }, + { { 155,63,3330 },{ 156,200,3848 },{ 92,120,488 },{ 40,76,496 } }, + { { 163,63,3330 },{ 164,200,3848 },{ 100,120,488 },{ 48,76,496 } }, + { { 172,63,3330 },{ 173,200,3848 },{ 109,120,488 },{ 57,76,496 } }, + { { 184,6,3851 },{ 181,200,3848 },{ 117,120,488 },{ 65,76,496 } }, + { { 192,6,3851 },{ 133,28,3936 },{ 125,120,488 },{ 73,76,496 } }, + { { 189,200,3907 },{ 141,28,3936 },{ 133,120,488 },{ 81,76,496 } }, + { { 198,200,3907 },{ 138,108,4000 },{ 142,120,488 },{ 90,76,496 } }, + { { 206,200,3907 },{ 146,108,4000 },{ 150,120,488 },{ 98,76,496 } }, + { { 214,200,3395 },{ 154,108,4000 },{ 158,120,488 },{ 106,76,496 } }, + { { 190,136,4001 },{ 162,108,4000 },{ 166,120,488 },{ 114,76,496 } }, + { { 123,30,4076 },{ 87,15,4080 },{ 123,30,492 },{ 87,15,496 } }, + { { 117,110,4084 },{ 80,31,4080 },{ 117,110,500 },{ 80,31,496 } }, + { { 125,110,4084 },{ 88,31,4080 },{ 125,110,500 },{ 88,31,496 } }, + { { 133,110,4084 },{ 96,31,4080 },{ 133,110,500 },{ 96,31,496 } }, + { { 9,56,3904 },{ 9,56,3904 },{ 0,67,448 },{ 0,67,448 } }, + { { 1,8,3904 },{ 1,8,3904 },{ 1,84,448 },{ 1,84,448 } }, + { { 1,124,3904 },{ 1,124,3904 },{ 0,39,384 },{ 0,39,384 } }, + { { 9,124,3904 },{ 9,124,3904 },{ 1,4,448 },{ 1,4,448 } }, + { { 6,76,3904 },{ 6,76,3904 },{ 0,70,448 },{ 0,70,448 } }, + { { 62,6,3859 },{ 62,6,3856 },{ 2,38,480 },{ 2,38,480 } }, + { { 70,6,3859 },{ 70,6,3856 },{ 5,43,416 },{ 5,43,416 } }, + { { 78,6,3859 },{ 78,6,3856 },{ 2,11,416 },{ 2,11,416 } }, + { { 87,6,3859 },{ 87,6,3856 },{ 0,171,488 },{ 0,171,488 } }, + { { 67,8,3906 },{ 95,6,3856 },{ 8,171,488 },{ 8,171,488 } }, + { { 75,8,3907 },{ 103,6,3856 },{ 5,123,488 },{ 5,123,488 } }, + { { 83,8,3907 },{ 111,6,3856 },{ 2,75,488 },{ 2,75,488 } }, + { { 92,8,3907 },{ 120,6,3856 },{ 0,27,488 },{ 0,27,488 } }, + { { 100,8,3907 },{ 128,6,3856 },{ 8,27,488 },{ 8,27,488 } }, + { { 120,106,3843 },{ 136,6,3856 },{ 100,6,387 },{ 16,27,488 } }, + { { 128,106,3843 },{ 144,6,3856 },{ 108,6,387 },{ 2,11,496 } }, + { { 137,106,3843 },{ 153,6,3856 },{ 117,6,387 },{ 11,11,496 } }, + { { 145,106,3843 },{ 161,6,3856 },{ 125,6,387 },{ 19,11,496 } }, + { { 163,8,3851 },{ 137,43,3904 },{ 133,6,387 },{ 27,11,496 } }, + { { 171,8,3851 },{ 101,11,4000 },{ 141,6,387 },{ 35,11,496 } }, + { { 180,8,3851 },{ 110,11,4000 },{ 150,6,387 },{ 44,11,496 } }, + { { 188,8,3851 },{ 118,11,4000 },{ 158,6,387 },{ 52,11,496 } }, + { { 172,72,3907 },{ 126,11,4000 },{ 166,6,387 },{ 60,11,496 } }, + { { 174,6,3971 },{ 134,11,4000 },{ 174,6,387 },{ 68,11,496 } }, + { { 183,6,3971 },{ 143,11,4000 },{ 183,6,387 },{ 77,11,496 } }, + { { 191,6,3971 },{ 151,11,4000 },{ 191,6,387 },{ 85,11,496 } }, + { { 199,6,3971 },{ 159,11,4000 },{ 199,6,387 },{ 93,11,496 } }, + { { 92,12,4084 },{ 69,15,4080 },{ 92,12,500 },{ 69,15,496 } }, + { { 101,12,4084 },{ 78,15,4080 },{ 101,12,500 },{ 78,15,496 } }, + { { 109,12,4084 },{ 86,15,4080 },{ 109,12,500 },{ 86,15,496 } }, + { { 117,12,4084 },{ 79,31,4080 },{ 117,12,500 },{ 79,31,496 } }, + { { 125,12,4084 },{ 87,31,4080 },{ 125,12,500 },{ 87,31,496 } }, + { { 71,8,3602 },{ 71,8,3600 },{ 2,21,384 },{ 2,21,384 } }, + { { 79,8,3611 },{ 79,8,3608 },{ 0,69,448 },{ 0,69,448 } }, + { { 87,8,3611 },{ 87,8,3608 },{ 0,23,384 },{ 0,23,384 } }, + { { 95,8,3611 },{ 95,8,3608 },{ 1,5,448 },{ 1,5,448 } }, + { { 104,8,3611 },{ 104,8,3608 },{ 0,88,448 },{ 0,88,448 } }, + { { 112,8,3611 },{ 112,8,3608 },{ 0,72,448 },{ 0,72,448 } }, + { { 120,8,3611 },{ 121,8,3608 },{ 36,21,458 },{ 36,21,456 } }, + { { 133,47,3091 },{ 129,8,3608 },{ 44,21,458 },{ 44,21,456 } }, + { { 142,47,3091 },{ 138,8,3608 },{ 53,21,459 },{ 53,21,456 } }, + { { 98,12,3850 },{ 98,12,3848 },{ 61,21,459 },{ 61,21,456 } }, + { { 106,12,3850 },{ 106,12,3848 },{ 10,92,480 },{ 69,21,456 } }, + { { 114,12,3851 },{ 114,12,3848 },{ 18,92,480 },{ 77,21,456 } }, + { { 87,12,3906 },{ 87,12,3904 },{ 3,44,488 },{ 86,21,456 } }, + { { 95,12,3906 },{ 95,12,3904 },{ 11,44,488 },{ 94,21,456 } }, + { { 103,12,3906 },{ 103,12,3904 },{ 19,44,488 },{ 102,21,456 } }, + { { 111,12,3907 },{ 111,12,3904 },{ 27,44,489 },{ 110,21,456 } }, + { { 120,12,3907 },{ 120,12,3904 },{ 36,44,489 },{ 119,21,456 } }, + { { 128,12,3907 },{ 128,12,3904 },{ 44,44,489 },{ 127,21,456 } }, + { { 136,12,3907 },{ 136,12,3904 },{ 52,44,489 },{ 135,21,456 } }, + { { 144,12,3907 },{ 144,12,3904 },{ 60,44,489 },{ 143,21,456 } }, + { { 153,12,3907 },{ 153,12,3904 },{ 69,44,490 },{ 152,21,456 } }, + { { 161,12,3395 },{ 149,188,3968 },{ 77,44,490 },{ 160,21,456 } }, + { { 169,12,3395 },{ 198,21,3928 },{ 85,44,490 },{ 168,21,456 } }, + { { 113,95,4001 },{ 201,69,3992 },{ 125,8,483 },{ 176,21,456 } }, + { { 122,95,4001 },{ 200,21,3984 },{ 134,8,483 },{ 185,21,456 } }, + { { 142,8,4067 },{ 208,21,3984 },{ 142,8,483 },{ 193,21,456 } }, + { { 151,8,4067 },{ 47,15,4080 },{ 151,8,483 },{ 47,15,496 } }, + { { 159,8,4067 },{ 55,15,4080 },{ 159,8,483 },{ 55,15,496 } }, + { { 168,8,4067 },{ 64,15,4080 },{ 168,8,483 },{ 64,15,496 } }, + { { 160,40,4075 },{ 72,15,4080 },{ 160,40,491 },{ 72,15,496 } }, + { { 168,40,4075 },{ 80,15,4080 },{ 168,40,491 },{ 80,15,496 } }, + { { 144,8,4082 },{ 88,15,4080 },{ 144,8,498 },{ 88,15,496 } } + }; +#endif // BASISD_SUPPORT_ETC2_EAC_A8 + +#if BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES + static void create_etc2_eac_a8_conversion_table() + { + FILE* pFile = fopen("basisu_decoder_tables_etc2_eac_a8.inc", "w"); + + for (uint32_t inten = 0; inten < 8; inten++) + { + for (uint32_t base = 0; base < 32; base++) + { + color32 block_colors[4]; + decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(base, base, base, 255), false), inten); + + fprintf(pFile, "{"); + + for (uint32_t sel_range = 0; sel_range < NUM_ETC2_EAC_SELECTOR_RANGES; sel_range++) + { + const uint32_t low_selector = s_etc2_eac_selector_ranges[sel_range].m_low; + const uint32_t high_selector = s_etc2_eac_selector_ranges[sel_range].m_high; + + // We have a ETC1 base color and intensity, and a used selector range from low_selector-high_selector. + // Now find the best ETC2 EAC A8 base/table/multiplier that fits these colors. + + uint8_t pixels[4]; + uint32_t num_pixels = 0; + for (uint32_t s = low_selector; s <= high_selector; s++) + pixels[num_pixels++] = block_colors[s].g; + + pack_eac_a8_results pack_results; + pack_eac_a8_exhaustive(pack_results, pixels, num_pixels); + + etc1_g_to_eac_conversion& c = s_etc1_g_to_etc2_a8[base + inten * 32][sel_range]; + + c.m_base = pack_results.m_base; + c.m_table_mul = pack_results.m_table * 16 + pack_results.m_multiplier; + c.m_trans = 0; + + for (uint32_t s = 0; s < 4; s++) + { + if ((s < low_selector) || (s > high_selector)) + continue; + + uint32_t etc2_selector = pack_results.m_selectors[s - low_selector]; + + c.m_trans |= (etc2_selector << (s * 3)); + } + + fprintf(pFile, "{%u,%u,%u}", c.m_base, c.m_table_mul, c.m_trans); + if (sel_range < (NUM_ETC2_EAC_SELECTOR_RANGES - 1)) + fprintf(pFile, ","); + } + + fprintf(pFile, "},\n"); + } + } + + fclose(pFile); + } +#endif + +#if BASISD_WRITE_NEW_ETC2_EAC_R11_TABLES + struct pack_eac_r11_results + { + uint32_t m_base; + uint32_t m_table; + uint32_t m_multiplier; + basisu::vector m_selectors; + basisu::vector m_selectors_temp; + }; + + static uint64_t pack_eac_r11_exhaustive(pack_eac_r11_results& results, const uint8_t* pPixels, uint32_t num_pixels) + { + results.m_selectors.resize(num_pixels); + results.m_selectors_temp.resize(num_pixels); + + uint64_t best_err = UINT64_MAX; + + for (uint32_t base_color = 0; base_color < 256; base_color++) + { + for (uint32_t multiplier = 0; multiplier < 16; multiplier++) + { + for (uint32_t table = 0; table < 16; table++) + { + uint64_t total_err = 0; + + for (uint32_t i = 0; i < num_pixels; i++) + { + // Convert 8-bit input to 11-bits + const int a = (pPixels[i] * 2047 + 128) / 255; + + uint32_t best_s_err = UINT32_MAX; + uint32_t best_s = 0; + for (uint32_t s = 0; s < 8; s++) + { + int v = (int)(multiplier ? (multiplier * 8) : 1) * g_eac_modifier_table[table][s] + (int)base_color * 8 + 4; + if (v < 0) + v = 0; + else if (v > 2047) + v = 2047; + + uint32_t err = abs(a - v); + if (err < best_s_err) + { + best_s_err = err; + best_s = s; + } + } + + results.m_selectors_temp[i] = static_cast(best_s); + + total_err += best_s_err * best_s_err; + if (total_err >= best_err) + break; + } + + if (total_err < best_err) + { + best_err = total_err; + results.m_base = base_color; + results.m_multiplier = multiplier; + results.m_table = table; + results.m_selectors.swap(results.m_selectors_temp); + } + + } // table + + } // multiplier + + } // base_color + + return best_err; + } + + static void create_etc2_eac_r11_conversion_table() + { + FILE* pFile = nullptr; + fopen_s(&pFile, "basisu_decoder_tables_etc2_eac_r11.inc", "w"); + + for (uint32_t inten = 0; inten < 8; inten++) + { + for (uint32_t base = 0; base < 32; base++) + { + color32 block_colors[4]; + decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(base, base, base, 255), false), inten); + + fprintf(pFile, "{"); + + for (uint32_t sel_range = 0; sel_range < NUM_ETC2_EAC_SELECTOR_RANGES; sel_range++) + { + const uint32_t low_selector = s_etc2_eac_selector_ranges[sel_range].m_low; + const uint32_t high_selector = s_etc2_eac_selector_ranges[sel_range].m_high; + + // We have a ETC1 base color and intensity, and a used selector range from low_selector-high_selector. + // Now find the best ETC2 EAC R11 base/table/multiplier that fits these colors. + + uint8_t pixels[4]; + uint32_t num_pixels = 0; + for (uint32_t s = low_selector; s <= high_selector; s++) + pixels[num_pixels++] = block_colors[s].g; + + pack_eac_r11_results pack_results; + pack_eac_r11_exhaustive(pack_results, pixels, num_pixels); + + etc1_g_to_eac_conversion c; + + c.m_base = (uint8_t)pack_results.m_base; + c.m_table_mul = (uint8_t)(pack_results.m_table * 16 + pack_results.m_multiplier); + c.m_trans = 0; + + for (uint32_t s = 0; s < 4; s++) + { + if ((s < low_selector) || (s > high_selector)) + continue; + + uint32_t etc2_selector = pack_results.m_selectors[s - low_selector]; + + c.m_trans |= (etc2_selector << (s * 3)); + } + + fprintf(pFile, "{%u,%u,%u}", c.m_base, c.m_table_mul, c.m_trans); + if (sel_range < (NUM_ETC2_EAC_SELECTOR_RANGES - 1)) + fprintf(pFile, ","); + } + + fprintf(pFile, "},\n"); + } + } + + fclose(pFile); + } +#endif // BASISD_WRITE_NEW_ETC2_EAC_R11_TABLES + +#if BASISD_WRITE_NEW_ASTC_TABLES + static void create_etc1_to_astc_conversion_table_0_47(); + static void create_etc1_to_astc_conversion_table_0_255(); +#endif + +#if BASISD_SUPPORT_ASTC + static void transcoder_init_astc(); +#endif + +#if BASISD_WRITE_NEW_BC7_MODE5_TABLES + static void create_etc1_to_bc7_m5_color_conversion_table(); + static void create_etc1_to_bc7_m5_alpha_conversion_table(); +#endif + +#if BASISD_SUPPORT_BC7_MODE5 + static void transcoder_init_bc7_mode5(); +#endif + +#if BASISD_WRITE_NEW_ATC_TABLES + static void create_etc1s_to_atc_conversion_tables(); +#endif + +#if BASISD_SUPPORT_ATC + static void transcoder_init_atc(); +#endif + +#if BASISD_SUPPORT_PVRTC2 + static void transcoder_init_pvrtc2(); +#endif + +#if BASISD_SUPPORT_UASTC + void uastc_init(); +#endif + +#if BASISD_SUPPORT_UASTC_HDR + namespace astc_6x6_hdr + { + static void init_quantize_tables(); + static void fast_encode_bc6h_init(); + } +#endif + +#if BASISD_SUPPORT_BC7_MODE5 + namespace bc7_mode_5_encoder + { + void encode_bc7_mode5_init(); + } +#endif + + static bool g_transcoder_initialized; + + // Library global initialization. Requires ~9 milliseconds when compiled and executed natively on a Core i7 2.2 GHz. + // If this is too slow, these computed tables can easilky be moved to be compiled in. + void basisu_transcoder_init() + { + if (g_transcoder_initialized) + { + BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Called more than once\n"); + return; + } + + BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Initializing (this is not an error)\n"); + +#if BASISD_SUPPORT_UASTC + uastc_init(); +#endif + +#if BASISD_SUPPORT_UASTC_HDR + // TODO: Examine this, optimize for startup time/mem utilization. + astc_helpers::init_tables(false); + + astc_hdr_core_init(); +#endif + +#if BASISD_SUPPORT_ASTC + transcoder_init_astc(); +#endif + +#if BASISD_WRITE_NEW_ASTC_TABLES + create_etc1_to_astc_conversion_table_0_47(); + create_etc1_to_astc_conversion_table_0_255(); + exit(0); +#endif + +#if BASISD_WRITE_NEW_BC7_MODE5_TABLES + create_etc1_to_bc7_m5_color_conversion_table(); + create_etc1_to_bc7_m5_alpha_conversion_table(); + exit(0); +#endif + +#if BASISD_WRITE_NEW_DXT1_TABLES + create_etc1_to_dxt1_5_conversion_table(); + create_etc1_to_dxt1_6_conversion_table(); + exit(0); +#endif + +#if BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES + create_etc2_eac_a8_conversion_table(); + exit(0); +#endif + +#if BASISD_WRITE_NEW_ATC_TABLES + create_etc1s_to_atc_conversion_tables(); + exit(0); +#endif + +#if BASISD_WRITE_NEW_ETC2_EAC_R11_TABLES + create_etc2_eac_r11_conversion_table(); + exit(0); +#endif + +#if BASISD_SUPPORT_DXT1 || BASISD_SUPPORT_UASTC + uint8_t bc1_expand5[32]; + for (int i = 0; i < 32; i++) + bc1_expand5[i] = static_cast((i << 3) | (i >> 2)); + prepare_bc1_single_color_table(g_bc1_match5_equals_1, bc1_expand5, 32, 32, 1); + prepare_bc1_single_color_table(g_bc1_match5_equals_0, bc1_expand5, 1, 32, 0); + + uint8_t bc1_expand6[64]; + for (int i = 0; i < 64; i++) + bc1_expand6[i] = static_cast((i << 2) | (i >> 4)); + prepare_bc1_single_color_table(g_bc1_match6_equals_1, bc1_expand6, 64, 64, 1); + prepare_bc1_single_color_table(g_bc1_match6_equals_0, bc1_expand6, 1, 64, 0); + +#if 0 + for (uint32_t i = 0; i < 256; i++) + { + printf("%u %u %u\n", i, (i * 63 + 127) / 255, g_bc1_match6_equals_0[i].m_hi); + } + exit(0); +#endif + +#endif + +#if BASISD_SUPPORT_DXT1 + for (uint32_t i = 0; i < NUM_ETC1_TO_DXT1_SELECTOR_RANGES; i++) + { + uint32_t l = g_etc1_to_dxt1_selector_ranges[i].m_low; + uint32_t h = g_etc1_to_dxt1_selector_ranges[i].m_high; + g_etc1_to_dxt1_selector_range_index[l][h] = i; + } + + for (uint32_t sm = 0; sm < NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS; sm++) + { + uint8_t etc1_to_dxt1_selector_mappings_raw_dxt1[4]; + uint8_t etc1_to_dxt1_selector_mappings_raw_dxt1_inv[4]; + + for (uint32_t j = 0; j < 4; j++) + { + static const uint8_t s_linear_dxt1_to_dxt1[4] = { 0, 2, 3, 1 }; + static const uint8_t s_dxt1_inverted_xlat[4] = { 1, 0, 3, 2 }; + + etc1_to_dxt1_selector_mappings_raw_dxt1[j] = (uint8_t)s_linear_dxt1_to_dxt1[g_etc1_to_dxt1_selector_mappings[sm][j]]; + etc1_to_dxt1_selector_mappings_raw_dxt1_inv[j] = (uint8_t)s_dxt1_inverted_xlat[etc1_to_dxt1_selector_mappings_raw_dxt1[j]]; + } + + for (uint32_t i = 0; i < 256; i++) + { + uint32_t k = 0, k_inv = 0; + for (uint32_t s = 0; s < 4; s++) + { + k |= (etc1_to_dxt1_selector_mappings_raw_dxt1[(i >> (s * 2)) & 3] << (s * 2)); + k_inv |= (etc1_to_dxt1_selector_mappings_raw_dxt1_inv[(i >> (s * 2)) & 3] << (s * 2)); + } + g_etc1_to_dxt1_selector_mappings_raw_dxt1_256[sm][i] = (uint8_t)k; + g_etc1_to_dxt1_selector_mappings_raw_dxt1_inv_256[sm][i] = (uint8_t)k_inv; + } + } +#endif + +#if BASISD_SUPPORT_BC7_MODE5 + transcoder_init_bc7_mode5(); +#endif + +#if BASISD_SUPPORT_ATC + transcoder_init_atc(); +#endif + +#if BASISD_SUPPORT_PVRTC2 + transcoder_init_pvrtc2(); +#endif + +#if BASISD_SUPPORT_UASTC_HDR + bc6h_enc_init(); + astc_6x6_hdr::init_quantize_tables(); + fast_encode_bc6h_init(); +#endif + +#if BASISD_SUPPORT_BC7_MODE5 + bc7_mode_5_encoder::encode_bc7_mode5_init(); +#endif + + g_transcoder_initialized = true; + } + +#if BASISD_SUPPORT_DXT1 + static void convert_etc1s_to_dxt1(dxt1_block* pDst_block, const endpoint *pEndpoints, const selector* pSelector, bool use_threecolor_blocks) + { +#if !BASISD_WRITE_NEW_DXT1_TABLES + const uint32_t low_selector = pSelector->m_lo_selector; + const uint32_t high_selector = pSelector->m_hi_selector; + + const color32& base_color = pEndpoints->m_color5; + const uint32_t inten_table = pEndpoints->m_inten5; + + if (low_selector == high_selector) + { + uint32_t r, g, b; + decoder_etc_block::get_block_color5(base_color, inten_table, low_selector, r, g, b); + + uint32_t mask = 0xAA; + uint32_t max16 = (g_bc1_match5_equals_1[r].m_hi << 11) | (g_bc1_match6_equals_1[g].m_hi << 5) | g_bc1_match5_equals_1[b].m_hi; + uint32_t min16 = (g_bc1_match5_equals_1[r].m_lo << 11) | (g_bc1_match6_equals_1[g].m_lo << 5) | g_bc1_match5_equals_1[b].m_lo; + + if ((!use_threecolor_blocks) && (min16 == max16)) + { + // This is an annoying edge case that impacts BC3. + // This is to guarantee that BC3 blocks never use punchthrough alpha (3 color) mode, which isn't supported on some (all?) GPU's. + mask = 0; + + // Make l > h + if (min16 > 0) + min16--; + else + { + // l = h = 0 + assert(min16 == max16 && max16 == 0); + + max16 = 1; + min16 = 0; + mask = 0x55; + } + + assert(max16 > min16); + } + + if (max16 < min16) + { + std::swap(max16, min16); + mask ^= 0x55; + } + + pDst_block->set_low_color(static_cast(max16)); + pDst_block->set_high_color(static_cast(min16)); + pDst_block->m_selectors[0] = static_cast(mask); + pDst_block->m_selectors[1] = static_cast(mask); + pDst_block->m_selectors[2] = static_cast(mask); + pDst_block->m_selectors[3] = static_cast(mask); + + return; + } + else if ((inten_table >= 7) && (pSelector->m_num_unique_selectors == 2) && (pSelector->m_lo_selector == 0) && (pSelector->m_hi_selector == 3)) + { + color32 block_colors[4]; + + decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table); + + const uint32_t r0 = block_colors[0].r; + const uint32_t g0 = block_colors[0].g; + const uint32_t b0 = block_colors[0].b; + + const uint32_t r1 = block_colors[3].r; + const uint32_t g1 = block_colors[3].g; + const uint32_t b1 = block_colors[3].b; + + uint32_t max16 = (g_bc1_match5_equals_0[r0].m_hi << 11) | (g_bc1_match6_equals_0[g0].m_hi << 5) | g_bc1_match5_equals_0[b0].m_hi; + uint32_t min16 = (g_bc1_match5_equals_0[r1].m_hi << 11) | (g_bc1_match6_equals_0[g1].m_hi << 5) | g_bc1_match5_equals_0[b1].m_hi; + + uint32_t l = 0, h = 1; + + if (min16 == max16) + { + // Make l > h + if (min16 > 0) + { + min16--; + + l = 0; + h = 0; + } + else + { + // l = h = 0 + assert(min16 == max16 && max16 == 0); + + max16 = 1; + min16 = 0; + + l = 1; + h = 1; + } + + assert(max16 > min16); + } + + if (max16 < min16) + { + std::swap(max16, min16); + l = 1; + h = 0; + } + + pDst_block->set_low_color((uint16_t)max16); + pDst_block->set_high_color((uint16_t)min16); + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t s = pSelector->get_selector(x, y); + pDst_block->set_selector(x, y, (s == 3) ? h : l); + } + } + + return; + } + + const uint32_t selector_range_table = g_etc1_to_dxt1_selector_range_index[low_selector][high_selector]; + + //[32][8][RANGES][MAPPING] + const etc1_to_dxt1_56_solution* pTable_r = &g_etc1_to_dxt_5[(inten_table * 32 + base_color.r) * (NUM_ETC1_TO_DXT1_SELECTOR_RANGES * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS]; + const etc1_to_dxt1_56_solution* pTable_g = &g_etc1_to_dxt_6[(inten_table * 32 + base_color.g) * (NUM_ETC1_TO_DXT1_SELECTOR_RANGES * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS]; + const etc1_to_dxt1_56_solution* pTable_b = &g_etc1_to_dxt_5[(inten_table * 32 + base_color.b) * (NUM_ETC1_TO_DXT1_SELECTOR_RANGES * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS]; + + uint32_t best_err = UINT_MAX; + uint32_t best_mapping = 0; + + assert(NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS == 10); +#define DO_ITER(m) { uint32_t total_err = pTable_r[m].m_err + pTable_g[m].m_err + pTable_b[m].m_err; if (total_err < best_err) { best_err = total_err; best_mapping = m; } } + DO_ITER(0); DO_ITER(1); DO_ITER(2); DO_ITER(3); DO_ITER(4); + DO_ITER(5); DO_ITER(6); DO_ITER(7); DO_ITER(8); DO_ITER(9); +#undef DO_ITER + + uint32_t l = dxt1_block::pack_unscaled_color(pTable_r[best_mapping].m_lo, pTable_g[best_mapping].m_lo, pTable_b[best_mapping].m_lo); + uint32_t h = dxt1_block::pack_unscaled_color(pTable_r[best_mapping].m_hi, pTable_g[best_mapping].m_hi, pTable_b[best_mapping].m_hi); + + const uint8_t* pSelectors_xlat_256 = &g_etc1_to_dxt1_selector_mappings_raw_dxt1_256[best_mapping][0]; + + if (l < h) + { + std::swap(l, h); + pSelectors_xlat_256 = &g_etc1_to_dxt1_selector_mappings_raw_dxt1_inv_256[best_mapping][0]; + } + + pDst_block->set_low_color(static_cast(l)); + pDst_block->set_high_color(static_cast(h)); + + if (l == h) + { + uint8_t mask = 0; + + if (!use_threecolor_blocks) + { + // This is an annoying edge case that impacts BC3. + + // Make l > h + if (h > 0) + h--; + else + { + // l = h = 0 + assert(l == h && h == 0); + + h = 0; + l = 1; + mask = 0x55; + } + + assert(l > h); + pDst_block->set_low_color(static_cast(l)); + pDst_block->set_high_color(static_cast(h)); + } + + pDst_block->m_selectors[0] = mask; + pDst_block->m_selectors[1] = mask; + pDst_block->m_selectors[2] = mask; + pDst_block->m_selectors[3] = mask; + + return; + } + + pDst_block->m_selectors[0] = pSelectors_xlat_256[pSelector->m_selectors[0]]; + pDst_block->m_selectors[1] = pSelectors_xlat_256[pSelector->m_selectors[1]]; + pDst_block->m_selectors[2] = pSelectors_xlat_256[pSelector->m_selectors[2]]; + pDst_block->m_selectors[3] = pSelectors_xlat_256[pSelector->m_selectors[3]]; +#endif + } + +#if BASISD_ENABLE_DEBUG_FLAGS + static void convert_etc1s_to_dxt1_vis(dxt1_block* pDst_block, const endpoint* pEndpoints, const selector* pSelector, bool use_threecolor_blocks) + { + convert_etc1s_to_dxt1(pDst_block, pEndpoints, pSelector, use_threecolor_blocks); + + if (g_debug_flags & cDebugFlagVisBC1Sels) + { + uint32_t l = dxt1_block::pack_unscaled_color(31, 63, 31); + uint32_t h = dxt1_block::pack_unscaled_color(0, 0, 0); + pDst_block->set_low_color(static_cast(l)); + pDst_block->set_high_color(static_cast(h)); + } + else if (g_debug_flags & cDebugFlagVisBC1Endpoints) + { + for (uint32_t y = 0; y < 4; y++) + for (uint32_t x = 0; x < 4; x++) + pDst_block->set_selector(x, y, (y < 2) ? 0 : 1); + } + } +#endif +#endif + +#if BASISD_SUPPORT_FXT1 + struct fxt1_block + { + union + { + struct + { + uint64_t m_t00 : 2; + uint64_t m_t01 : 2; + uint64_t m_t02 : 2; + uint64_t m_t03 : 2; + uint64_t m_t04 : 2; + uint64_t m_t05 : 2; + uint64_t m_t06 : 2; + uint64_t m_t07 : 2; + uint64_t m_t08 : 2; + uint64_t m_t09 : 2; + uint64_t m_t10 : 2; + uint64_t m_t11 : 2; + uint64_t m_t12 : 2; + uint64_t m_t13 : 2; + uint64_t m_t14 : 2; + uint64_t m_t15 : 2; + uint64_t m_t16 : 2; + uint64_t m_t17 : 2; + uint64_t m_t18 : 2; + uint64_t m_t19 : 2; + uint64_t m_t20 : 2; + uint64_t m_t21 : 2; + uint64_t m_t22 : 2; + uint64_t m_t23 : 2; + uint64_t m_t24 : 2; + uint64_t m_t25 : 2; + uint64_t m_t26 : 2; + uint64_t m_t27 : 2; + uint64_t m_t28 : 2; + uint64_t m_t29 : 2; + uint64_t m_t30 : 2; + uint64_t m_t31 : 2; + } m_lo; + uint64_t m_lo_bits; + uint8_t m_sels[8]; + }; + union + { + struct + { +#ifdef BASISU_USE_ORIGINAL_3DFX_FXT1_ENCODING + uint64_t m_b1 : 5; + uint64_t m_g1 : 5; + uint64_t m_r1 : 5; + uint64_t m_b0 : 5; + uint64_t m_g0 : 5; + uint64_t m_r0 : 5; + uint64_t m_b3 : 5; + uint64_t m_g3 : 5; + uint64_t m_r3 : 5; + uint64_t m_b2 : 5; + uint64_t m_g2 : 5; + uint64_t m_r2 : 5; +#else + uint64_t m_b0 : 5; + uint64_t m_g0 : 5; + uint64_t m_r0 : 5; + uint64_t m_b1 : 5; + uint64_t m_g1 : 5; + uint64_t m_r1 : 5; + uint64_t m_b2 : 5; + uint64_t m_g2 : 5; + uint64_t m_r2 : 5; + uint64_t m_b3 : 5; + uint64_t m_g3 : 5; + uint64_t m_r3 : 5; +#endif + uint64_t m_alpha : 1; + uint64_t m_glsb : 2; + uint64_t m_mode : 1; + } m_hi; + uint64_t m_hi_bits; + }; + }; + + static uint8_t conv_dxt1_to_fxt1_sels(uint32_t sels) + { + static uint8_t s_conv_table[16] = { 0, 3, 1, 2, 12, 15, 13, 14, 4, 7, 5, 6, 8, 11, 9, 10 }; + return s_conv_table[sels & 15] | (s_conv_table[sels >> 4] << 4); + } + + static void convert_etc1s_to_fxt1(void *pDst, const endpoint *pEndpoints, const selector *pSelectors, uint32_t fxt1_subblock) + { + fxt1_block* pBlock = static_cast(pDst); + + // CC_MIXED is basically DXT1 with different encoding tricks. + // So transcode ETC1S to DXT1, then transcode that to FXT1 which is easy and nearly lossless. + // (It's not completely lossless because FXT1 rounds in its color lerps while DXT1 doesn't, but it should be good enough.) + dxt1_block blk; + convert_etc1s_to_dxt1(&blk, pEndpoints, pSelectors, false); + + const uint32_t l = blk.get_low_color(); + const uint32_t h = blk.get_high_color(); + + color32 color0((l >> 11) & 31, (l >> 5) & 63, l & 31, 255); + color32 color1((h >> 11) & 31, (h >> 5) & 63, h & 31, 255); + + uint32_t g0 = color0.g & 1; + uint32_t g1 = color1.g & 1; + + color0.g >>= 1; + color1.g >>= 1; + + blk.m_selectors[0] = conv_dxt1_to_fxt1_sels(blk.m_selectors[0]); + blk.m_selectors[1] = conv_dxt1_to_fxt1_sels(blk.m_selectors[1]); + blk.m_selectors[2] = conv_dxt1_to_fxt1_sels(blk.m_selectors[2]); + blk.m_selectors[3] = conv_dxt1_to_fxt1_sels(blk.m_selectors[3]); + + if ((blk.get_selector(0, 0) >> 1) != (g0 ^ g1)) + { + std::swap(color0, color1); + std::swap(g0, g1); + + blk.m_selectors[0] ^= 0xFF; + blk.m_selectors[1] ^= 0xFF; + blk.m_selectors[2] ^= 0xFF; + blk.m_selectors[3] ^= 0xFF; + } + + if (fxt1_subblock == 0) + { + pBlock->m_hi.m_mode = 1; + pBlock->m_hi.m_alpha = 0; + pBlock->m_hi.m_glsb = g1 | (g1 << 1); + pBlock->m_hi.m_r0 = color0.r; + pBlock->m_hi.m_g0 = color0.g; + pBlock->m_hi.m_b0 = color0.b; + pBlock->m_hi.m_r1 = color1.r; + pBlock->m_hi.m_g1 = color1.g; + pBlock->m_hi.m_b1 = color1.b; + pBlock->m_hi.m_r2 = color0.r; + pBlock->m_hi.m_g2 = color0.g; + pBlock->m_hi.m_b2 = color0.b; + pBlock->m_hi.m_r3 = color1.r; + pBlock->m_hi.m_g3 = color1.g; + pBlock->m_hi.m_b3 = color1.b; + pBlock->m_sels[0] = blk.m_selectors[0]; + pBlock->m_sels[1] = blk.m_selectors[1]; + pBlock->m_sels[2] = blk.m_selectors[2]; + pBlock->m_sels[3] = blk.m_selectors[3]; + + static const uint8_t s_border_dup[4] = { 0, 85, 170, 255 }; + pBlock->m_sels[4] = s_border_dup[blk.m_selectors[0] >> 6]; + pBlock->m_sels[5] = s_border_dup[blk.m_selectors[1] >> 6]; + pBlock->m_sels[6] = s_border_dup[blk.m_selectors[2] >> 6]; + pBlock->m_sels[7] = s_border_dup[blk.m_selectors[3] >> 6]; + } + else + { + pBlock->m_hi.m_glsb = (pBlock->m_hi.m_glsb & 1) | (g1 << 1); + pBlock->m_hi.m_r2 = color0.r; + pBlock->m_hi.m_g2 = color0.g; + pBlock->m_hi.m_b2 = color0.b; + pBlock->m_hi.m_r3 = color1.r; + pBlock->m_hi.m_g3 = color1.g; + pBlock->m_hi.m_b3 = color1.b; + pBlock->m_sels[4] = blk.m_selectors[0]; + pBlock->m_sels[5] = blk.m_selectors[1]; + pBlock->m_sels[6] = blk.m_selectors[2]; + pBlock->m_sels[7] = blk.m_selectors[3]; + } + } +#endif // BASISD_SUPPORT_FXT1 +#if BASISD_SUPPORT_DXT5A + static dxt_selector_range s_dxt5a_selector_ranges[] = + { + { 0, 3 }, + + { 1, 3 }, + { 0, 2 }, + + { 1, 2 }, + }; + + const uint32_t NUM_DXT5A_SELECTOR_RANGES = sizeof(s_dxt5a_selector_ranges) / sizeof(s_dxt5a_selector_ranges[0]); + + struct etc1_g_to_dxt5a_conversion + { + uint8_t m_lo, m_hi; + uint16_t m_trans; + }; + + static etc1_g_to_dxt5a_conversion g_etc1_g_to_dxt5a[32 * 8][NUM_DXT5A_SELECTOR_RANGES] = + { + { { 8, 0, 393 },{ 8, 0, 392 },{ 2, 0, 9 },{ 2, 0, 8 }, }, { { 6, 16, 710 },{ 16, 6, 328 },{ 0, 10, 96 },{ 10, 6, 8 }, }, + { { 28, 5, 1327 },{ 24, 14, 328 },{ 8, 18, 96 },{ 18, 14, 8 }, }, { { 36, 13, 1327 },{ 32, 22, 328 },{ 16, 26, 96 },{ 26, 22, 8 }, }, + { { 45, 22, 1327 },{ 41, 31, 328 },{ 25, 35, 96 },{ 35, 31, 8 }, }, { { 53, 30, 1327 },{ 49, 39, 328 },{ 33, 43, 96 },{ 43, 39, 8 }, }, + { { 61, 38, 1327 },{ 57, 47, 328 },{ 41, 51, 96 },{ 51, 47, 8 }, }, { { 69, 46, 1327 },{ 65, 55, 328 },{ 49, 59, 96 },{ 59, 55, 8 }, }, + { { 78, 55, 1327 },{ 74, 64, 328 },{ 58, 68, 96 },{ 68, 64, 8 }, }, { { 86, 63, 1327 },{ 82, 72, 328 },{ 66, 76, 96 },{ 76, 72, 8 }, }, + { { 94, 71, 1327 },{ 90, 80, 328 },{ 74, 84, 96 },{ 84, 80, 8 }, }, { { 102, 79, 1327 },{ 98, 88, 328 },{ 82, 92, 96 },{ 92, 88, 8 }, }, + { { 111, 88, 1327 },{ 107, 97, 328 },{ 91, 101, 96 },{ 101, 97, 8 }, }, { { 119, 96, 1327 },{ 115, 105, 328 },{ 99, 109, 96 },{ 109, 105, 8 }, }, + { { 127, 104, 1327 },{ 123, 113, 328 },{ 107, 117, 96 },{ 117, 113, 8 }, }, { { 135, 112, 1327 },{ 131, 121, 328 },{ 115, 125, 96 },{ 125, 121, 8 }, }, + { { 144, 121, 1327 },{ 140, 130, 328 },{ 124, 134, 96 },{ 134, 130, 8 }, }, { { 152, 129, 1327 },{ 148, 138, 328 },{ 132, 142, 96 },{ 142, 138, 8 }, }, + { { 160, 137, 1327 },{ 156, 146, 328 },{ 140, 150, 96 },{ 150, 146, 8 }, }, { { 168, 145, 1327 },{ 164, 154, 328 },{ 148, 158, 96 },{ 158, 154, 8 }, }, + { { 177, 154, 1327 },{ 173, 163, 328 },{ 157, 167, 96 },{ 167, 163, 8 }, }, { { 185, 162, 1327 },{ 181, 171, 328 },{ 165, 175, 96 },{ 175, 171, 8 }, }, + { { 193, 170, 1327 },{ 189, 179, 328 },{ 173, 183, 96 },{ 183, 179, 8 }, }, { { 201, 178, 1327 },{ 197, 187, 328 },{ 181, 191, 96 },{ 191, 187, 8 }, }, + { { 210, 187, 1327 },{ 206, 196, 328 },{ 190, 200, 96 },{ 200, 196, 8 }, }, { { 218, 195, 1327 },{ 214, 204, 328 },{ 198, 208, 96 },{ 208, 204, 8 }, }, + { { 226, 203, 1327 },{ 222, 212, 328 },{ 206, 216, 96 },{ 216, 212, 8 }, }, { { 234, 211, 1327 },{ 230, 220, 328 },{ 214, 224, 96 },{ 224, 220, 8 }, }, + { { 243, 220, 1327 },{ 239, 229, 328 },{ 223, 233, 96 },{ 233, 229, 8 }, }, { { 251, 228, 1327 },{ 247, 237, 328 },{ 231, 241, 96 },{ 241, 237, 8 }, }, + { { 239, 249, 3680 },{ 245, 249, 3648 },{ 239, 249, 96 },{ 249, 245, 8 }, }, { { 247, 253, 4040 },{ 255, 253, 8 },{ 247, 253, 456 },{ 255, 253, 8 }, }, + { { 5, 17, 566 },{ 5, 17, 560 },{ 5, 0, 9 },{ 5, 0, 8 }, }, { { 25, 0, 313 },{ 25, 3, 328 },{ 13, 0, 49 },{ 13, 3, 8 }, }, + { { 39, 0, 1329 },{ 33, 11, 328 },{ 11, 21, 70 },{ 21, 11, 8 }, }, { { 47, 7, 1329 },{ 41, 19, 328 },{ 29, 7, 33 },{ 29, 19, 8 }, }, + { { 50, 11, 239 },{ 50, 28, 328 },{ 38, 16, 33 },{ 38, 28, 8 }, }, { { 92, 13, 2423 },{ 58, 36, 328 },{ 46, 24, 33 },{ 46, 36, 8 }, }, + { { 100, 21, 2423 },{ 66, 44, 328 },{ 54, 32, 33 },{ 54, 44, 8 }, }, { { 86, 7, 1253 },{ 74, 52, 328 },{ 62, 40, 33 },{ 62, 52, 8 }, }, + { { 95, 16, 1253 },{ 83, 61, 328 },{ 71, 49, 33 },{ 71, 61, 8 }, }, { { 103, 24, 1253 },{ 91, 69, 328 },{ 79, 57, 33 },{ 79, 69, 8 }, }, + { { 111, 32, 1253 },{ 99, 77, 328 },{ 87, 65, 33 },{ 87, 77, 8 }, }, { { 119, 40, 1253 },{ 107, 85, 328 },{ 95, 73, 33 },{ 95, 85, 8 }, }, + { { 128, 49, 1253 },{ 116, 94, 328 },{ 104, 82, 33 },{ 104, 94, 8 }, }, { { 136, 57, 1253 },{ 124, 102, 328 },{ 112, 90, 33 },{ 112, 102, 8 }, }, + { { 144, 65, 1253 },{ 132, 110, 328 },{ 120, 98, 33 },{ 120, 110, 8 }, }, { { 152, 73, 1253 },{ 140, 118, 328 },{ 128, 106, 33 },{ 128, 118, 8 }, }, + { { 161, 82, 1253 },{ 149, 127, 328 },{ 137, 115, 33 },{ 137, 127, 8 }, }, { { 169, 90, 1253 },{ 157, 135, 328 },{ 145, 123, 33 },{ 145, 135, 8 }, }, + { { 177, 98, 1253 },{ 165, 143, 328 },{ 153, 131, 33 },{ 153, 143, 8 }, }, { { 185, 106, 1253 },{ 173, 151, 328 },{ 161, 139, 33 },{ 161, 151, 8 }, }, + { { 194, 115, 1253 },{ 182, 160, 328 },{ 170, 148, 33 },{ 170, 160, 8 }, }, { { 202, 123, 1253 },{ 190, 168, 328 },{ 178, 156, 33 },{ 178, 168, 8 }, }, + { { 210, 131, 1253 },{ 198, 176, 328 },{ 186, 164, 33 },{ 186, 176, 8 }, }, { { 218, 139, 1253 },{ 206, 184, 328 },{ 194, 172, 33 },{ 194, 184, 8 }, }, + { { 227, 148, 1253 },{ 215, 193, 328 },{ 203, 181, 33 },{ 203, 193, 8 }, }, { { 235, 156, 1253 },{ 223, 201, 328 },{ 211, 189, 33 },{ 211, 201, 8 }, }, + { { 243, 164, 1253 },{ 231, 209, 328 },{ 219, 197, 33 },{ 219, 209, 8 }, }, { { 183, 239, 867 },{ 239, 217, 328 },{ 227, 205, 33 },{ 227, 217, 8 }, }, + { { 254, 214, 1329 },{ 248, 226, 328 },{ 236, 214, 33 },{ 236, 226, 8 }, }, { { 222, 244, 3680 },{ 234, 244, 3648 },{ 244, 222, 33 },{ 244, 234, 8 }, }, + { { 230, 252, 3680 },{ 242, 252, 3648 },{ 252, 230, 33 },{ 252, 242, 8 }, }, { { 238, 250, 4040 },{ 255, 250, 8 },{ 238, 250, 456 },{ 255, 250, 8 }, }, + { { 9, 29, 566 },{ 9, 29, 560 },{ 9, 0, 9 },{ 9, 0, 8 }, }, { { 17, 37, 566 },{ 17, 37, 560 },{ 17, 0, 9 },{ 17, 0, 8 }, }, + { { 45, 0, 313 },{ 45, 0, 312 },{ 25, 0, 49 },{ 25, 7, 8 }, }, { { 14, 63, 2758 },{ 5, 53, 784 },{ 15, 33, 70 },{ 33, 15, 8 }, }, + { { 71, 6, 1329 },{ 72, 4, 1328 },{ 42, 4, 33 },{ 42, 24, 8 }, }, { { 70, 3, 239 },{ 70, 2, 232 },{ 50, 12, 33 },{ 50, 32, 8 }, }, + { { 0, 98, 2842 },{ 78, 10, 232 },{ 58, 20, 33 },{ 58, 40, 8 }, }, { { 97, 27, 1329 },{ 86, 18, 232 },{ 66, 28, 33 },{ 66, 48, 8 }, }, + { { 0, 94, 867 },{ 95, 27, 232 },{ 75, 37, 33 },{ 75, 57, 8 }, }, { { 8, 102, 867 },{ 103, 35, 232 },{ 83, 45, 33 },{ 83, 65, 8 }, }, + { { 12, 112, 867 },{ 111, 43, 232 },{ 91, 53, 33 },{ 91, 73, 8 }, }, { { 139, 2, 1253 },{ 119, 51, 232 },{ 99, 61, 33 },{ 99, 81, 8 }, }, + { { 148, 13, 1253 },{ 128, 60, 232 },{ 108, 70, 33 },{ 108, 90, 8 }, }, { { 156, 21, 1253 },{ 136, 68, 232 },{ 116, 78, 33 },{ 116, 98, 8 }, }, + { { 164, 29, 1253 },{ 144, 76, 232 },{ 124, 86, 33 },{ 124, 106, 8 }, }, { { 172, 37, 1253 },{ 152, 84, 232 },{ 132, 94, 33 },{ 132, 114, 8 }, }, + { { 181, 46, 1253 },{ 161, 93, 232 },{ 141, 103, 33 },{ 141, 123, 8 }, }, { { 189, 54, 1253 },{ 169, 101, 232 },{ 149, 111, 33 },{ 149, 131, 8 }, }, + { { 197, 62, 1253 },{ 177, 109, 232 },{ 157, 119, 33 },{ 157, 139, 8 }, }, { { 205, 70, 1253 },{ 185, 117, 232 },{ 165, 127, 33 },{ 165, 147, 8 }, }, + { { 214, 79, 1253 },{ 194, 126, 232 },{ 174, 136, 33 },{ 174, 156, 8 }, }, { { 222, 87, 1253 },{ 202, 134, 232 },{ 182, 144, 33 },{ 182, 164, 8 }, }, + { { 230, 95, 1253 },{ 210, 142, 232 },{ 190, 152, 33 },{ 190, 172, 8 }, }, { { 238, 103, 1253 },{ 218, 150, 232 },{ 198, 160, 33 },{ 198, 180, 8 }, }, + { { 247, 112, 1253 },{ 227, 159, 232 },{ 207, 169, 33 },{ 207, 189, 8 }, }, { { 255, 120, 1253 },{ 235, 167, 232 },{ 215, 177, 33 },{ 215, 197, 8 }, }, + { { 146, 243, 867 },{ 243, 175, 232 },{ 223, 185, 33 },{ 223, 205, 8 }, }, { { 184, 231, 3682 },{ 203, 251, 784 },{ 231, 193, 33 },{ 231, 213, 8 }, }, + { { 193, 240, 3682 },{ 222, 240, 3648 },{ 240, 202, 33 },{ 240, 222, 8 }, }, { { 255, 210, 169 },{ 230, 248, 3648 },{ 248, 210, 33 },{ 248, 230, 8 }, }, + { { 218, 238, 4040 },{ 255, 238, 8 },{ 218, 238, 456 },{ 255, 238, 8 }, }, { { 226, 246, 4040 },{ 255, 246, 8 },{ 226, 246, 456 },{ 255, 246, 8 }, }, + { { 13, 42, 566 },{ 13, 42, 560 },{ 13, 0, 9 },{ 13, 0, 8 }, }, { { 50, 0, 329 },{ 50, 0, 328 },{ 21, 0, 9 },{ 21, 0, 8 }, }, + { { 29, 58, 566 },{ 67, 2, 1352 },{ 3, 29, 70 },{ 29, 3, 8 }, }, { { 10, 79, 2758 },{ 76, 11, 1352 },{ 11, 37, 70 },{ 37, 11, 8 }, }, + { { 7, 75, 790 },{ 7, 75, 784 },{ 20, 46, 70 },{ 46, 20, 8 }, }, { { 15, 83, 790 },{ 97, 1, 1328 },{ 28, 54, 70 },{ 54, 28, 8 }, }, + { { 101, 7, 1329 },{ 105, 9, 1328 },{ 62, 0, 39 },{ 62, 36, 8 }, }, { { 99, 1, 239 },{ 99, 3, 232 },{ 1, 71, 98 },{ 70, 44, 8 }, }, + { { 107, 11, 239 },{ 108, 12, 232 },{ 10, 80, 98 },{ 79, 53, 8 }, }, { { 115, 19, 239 },{ 116, 20, 232 },{ 18, 88, 98 },{ 87, 61, 8 }, }, + { { 123, 27, 239 },{ 124, 28, 232 },{ 26, 96, 98 },{ 95, 69, 8 }, }, { { 131, 35, 239 },{ 132, 36, 232 },{ 34, 104, 98 },{ 103, 77, 8 }, }, + { { 140, 44, 239 },{ 141, 45, 232 },{ 43, 113, 98 },{ 112, 86, 8 }, }, { { 148, 52, 239 },{ 149, 53, 232 },{ 51, 121, 98 },{ 120, 94, 8 }, }, + { { 156, 60, 239 },{ 157, 61, 232 },{ 59, 129, 98 },{ 128, 102, 8 }, }, { { 164, 68, 239 },{ 165, 69, 232 },{ 67, 137, 98 },{ 136, 110, 8 }, }, + { { 173, 77, 239 },{ 174, 78, 232 },{ 76, 146, 98 },{ 145, 119, 8 }, }, { { 181, 85, 239 },{ 182, 86, 232 },{ 84, 154, 98 },{ 153, 127, 8 }, }, + { { 189, 93, 239 },{ 190, 94, 232 },{ 92, 162, 98 },{ 161, 135, 8 }, }, { { 197, 101, 239 },{ 198, 102, 232 },{ 100, 170, 98 },{ 169, 143, 8 }, }, + { { 206, 110, 239 },{ 207, 111, 232 },{ 109, 179, 98 },{ 178, 152, 8 }, }, { { 214, 118, 239 },{ 215, 119, 232 },{ 117, 187, 98 },{ 186, 160, 8 }, }, + { { 222, 126, 239 },{ 223, 127, 232 },{ 125, 195, 98 },{ 194, 168, 8 }, }, { { 230, 134, 239 },{ 231, 135, 232 },{ 133, 203, 98 },{ 202, 176, 8 }, }, + { { 239, 143, 239 },{ 240, 144, 232 },{ 142, 212, 98 },{ 211, 185, 8 }, }, { { 247, 151, 239 },{ 180, 248, 784 },{ 150, 220, 98 },{ 219, 193, 8 }, }, + { { 159, 228, 3682 },{ 201, 227, 3648 },{ 158, 228, 98 },{ 227, 201, 8 }, }, { { 181, 249, 3928 },{ 209, 235, 3648 },{ 166, 236, 98 },{ 235, 209, 8 }, }, + { { 255, 189, 169 },{ 218, 244, 3648 },{ 175, 245, 98 },{ 244, 218, 8 }, }, { { 197, 226, 4040 },{ 226, 252, 3648 },{ 183, 253, 98 },{ 252, 226, 8 }, }, + { { 205, 234, 4040 },{ 255, 234, 8 },{ 205, 234, 456 },{ 255, 234, 8 }, }, { { 213, 242, 4040 },{ 255, 242, 8 },{ 213, 242, 456 },{ 255, 242, 8 }, }, + { { 18, 60, 566 },{ 18, 60, 560 },{ 18, 0, 9 },{ 18, 0, 8 }, }, { { 26, 68, 566 },{ 26, 68, 560 },{ 26, 0, 9 },{ 26, 0, 8 }, }, + { { 34, 76, 566 },{ 34, 76, 560 },{ 34, 0, 9 },{ 34, 0, 8 }, }, { { 5, 104, 2758 },{ 98, 5, 1352 },{ 42, 0, 57 },{ 42, 6, 8 }, }, + { { 92, 0, 313 },{ 93, 1, 312 },{ 15, 51, 70 },{ 51, 15, 8 }, }, { { 3, 101, 790 },{ 3, 101, 784 },{ 0, 59, 88 },{ 59, 23, 8 }, }, + { { 14, 107, 790 },{ 11, 109, 784 },{ 31, 67, 70 },{ 67, 31, 8 }, }, { { 19, 117, 790 },{ 19, 117, 784 },{ 39, 75, 70 },{ 75, 39, 8 }, }, + { { 28, 126, 790 },{ 28, 126, 784 },{ 83, 5, 33 },{ 84, 48, 8 }, }, { { 132, 0, 239 },{ 36, 134, 784 },{ 91, 13, 33 },{ 92, 56, 8 }, }, + { { 142, 4, 239 },{ 44, 142, 784 },{ 99, 21, 33 },{ 100, 64, 8 }, }, { { 150, 12, 239 },{ 52, 150, 784 },{ 107, 29, 33 },{ 108, 72, 8 }, }, + { { 159, 21, 239 },{ 61, 159, 784 },{ 116, 38, 33 },{ 117, 81, 8 }, }, { { 167, 29, 239 },{ 69, 167, 784 },{ 124, 46, 33 },{ 125, 89, 8 }, }, + { { 175, 37, 239 },{ 77, 175, 784 },{ 132, 54, 33 },{ 133, 97, 8 }, }, { { 183, 45, 239 },{ 85, 183, 784 },{ 140, 62, 33 },{ 141, 105, 8 }, }, + { { 192, 54, 239 },{ 94, 192, 784 },{ 149, 71, 33 },{ 150, 114, 8 }, }, { { 200, 62, 239 },{ 102, 200, 784 },{ 157, 79, 33 },{ 158, 122, 8 }, }, + { { 208, 70, 239 },{ 110, 208, 784 },{ 165, 87, 33 },{ 166, 130, 8 }, }, { { 216, 78, 239 },{ 118, 216, 784 },{ 173, 95, 33 },{ 174, 138, 8 }, }, + { { 225, 87, 239 },{ 127, 225, 784 },{ 182, 104, 33 },{ 183, 147, 8 }, }, { { 233, 95, 239 },{ 135, 233, 784 },{ 190, 112, 33 },{ 191, 155, 8 }, }, + { { 241, 103, 239 },{ 143, 241, 784 },{ 198, 120, 33 },{ 199, 163, 8 }, }, { { 111, 208, 3682 },{ 151, 249, 784 },{ 206, 128, 33 },{ 207, 171, 8 }, }, + { { 120, 217, 3682 },{ 180, 216, 3648 },{ 215, 137, 33 },{ 216, 180, 8 }, }, { { 128, 225, 3682 },{ 188, 224, 3648 },{ 223, 145, 33 },{ 224, 188, 8 }, }, + { { 155, 253, 3928 },{ 196, 232, 3648 },{ 231, 153, 33 },{ 232, 196, 8 }, }, { { 144, 241, 3682 },{ 204, 240, 3648 },{ 239, 161, 33 },{ 240, 204, 8 }, }, + { { 153, 250, 3682 },{ 213, 249, 3648 },{ 248, 170, 33 },{ 249, 213, 8 }, }, { { 179, 221, 4040 },{ 255, 221, 8 },{ 179, 221, 456 },{ 255, 221, 8 }, }, + { { 187, 229, 4040 },{ 255, 229, 8 },{ 187, 229, 456 },{ 255, 229, 8 }, }, { { 195, 237, 4040 },{ 255, 237, 8 },{ 195, 237, 456 },{ 255, 237, 8 }, }, + { { 24, 80, 566 },{ 24, 80, 560 },{ 24, 0, 9 },{ 24, 0, 8 }, }, { { 32, 88, 566 },{ 32, 88, 560 },{ 32, 0, 9 },{ 32, 0, 8 }, }, + { { 40, 96, 566 },{ 40, 96, 560 },{ 40, 0, 9 },{ 40, 0, 8 }, }, { { 48, 104, 566 },{ 48, 104, 560 },{ 48, 0, 9 },{ 48, 0, 8 }, }, + { { 9, 138, 2758 },{ 130, 7, 1352 },{ 9, 57, 70 },{ 57, 9, 8 }, }, { { 119, 0, 313 },{ 120, 0, 312 },{ 17, 65, 70 },{ 65, 17, 8 }, }, + { { 0, 128, 784 },{ 128, 6, 312 },{ 25, 73, 70 },{ 73, 25, 8 }, }, { { 6, 137, 790 },{ 5, 136, 784 },{ 33, 81, 70 },{ 81, 33, 8 }, }, + { { 42, 171, 2758 },{ 14, 145, 784 },{ 42, 90, 70 },{ 90, 42, 8 }, }, { { 50, 179, 2758 },{ 22, 153, 784 },{ 50, 98, 70 },{ 98, 50, 8 }, }, + { { 58, 187, 2758 },{ 30, 161, 784 },{ 58, 106, 70 },{ 106, 58, 8 }, }, { { 191, 18, 1329 },{ 38, 169, 784 },{ 112, 9, 33 },{ 114, 66, 8 }, }, + { { 176, 0, 239 },{ 47, 178, 784 },{ 121, 18, 33 },{ 123, 75, 8 }, }, { { 187, 1, 239 },{ 55, 186, 784 },{ 129, 26, 33 },{ 131, 83, 8 }, }, + { { 195, 10, 239 },{ 63, 194, 784 },{ 137, 34, 33 },{ 139, 91, 8 }, }, { { 203, 18, 239 },{ 71, 202, 784 },{ 145, 42, 33 },{ 147, 99, 8 }, }, + { { 212, 27, 239 },{ 80, 211, 784 },{ 154, 51, 33 },{ 156, 108, 8 }, }, { { 220, 35, 239 },{ 88, 219, 784 },{ 162, 59, 33 },{ 164, 116, 8 }, }, + { { 228, 43, 239 },{ 96, 227, 784 },{ 170, 67, 33 },{ 172, 124, 8 }, }, { { 236, 51, 239 },{ 104, 235, 784 },{ 178, 75, 33 },{ 180, 132, 8 }, }, + { { 245, 60, 239 },{ 113, 244, 784 },{ 187, 84, 33 },{ 189, 141, 8 }, }, { { 91, 194, 3680 },{ 149, 197, 3648 },{ 195, 92, 33 },{ 197, 149, 8 }, }, + { { 99, 202, 3680 },{ 157, 205, 3648 },{ 203, 100, 33 },{ 205, 157, 8 }, }, { { 107, 210, 3680 },{ 165, 213, 3648 },{ 211, 108, 33 },{ 213, 165, 8 }, }, + { { 119, 249, 3928 },{ 174, 222, 3648 },{ 220, 117, 33 },{ 222, 174, 8 }, }, { { 127, 255, 856 },{ 182, 230, 3648 },{ 228, 125, 33 },{ 230, 182, 8 }, }, + { { 255, 135, 169 },{ 190, 238, 3648 },{ 236, 133, 33 },{ 238, 190, 8 }, }, { { 140, 243, 3680 },{ 198, 246, 3648 },{ 244, 141, 33 },{ 246, 198, 8 }, }, + { { 151, 207, 4040 },{ 255, 207, 8 },{ 151, 207, 456 },{ 255, 207, 8 }, }, { { 159, 215, 4040 },{ 255, 215, 8 },{ 159, 215, 456 },{ 255, 215, 8 }, }, + { { 167, 223, 4040 },{ 255, 223, 8 },{ 167, 223, 456 },{ 255, 223, 8 }, }, { { 175, 231, 4040 },{ 255, 231, 8 },{ 175, 231, 456 },{ 255, 231, 8 }, }, + { { 33, 106, 566 },{ 33, 106, 560 },{ 33, 0, 9 },{ 33, 0, 8 }, }, { { 41, 114, 566 },{ 41, 114, 560 },{ 41, 0, 9 },{ 41, 0, 8 }, }, + { { 49, 122, 566 },{ 49, 122, 560 },{ 49, 0, 9 },{ 49, 0, 8 }, }, { { 57, 130, 566 },{ 57, 130, 560 },{ 57, 0, 9 },{ 57, 0, 8 }, }, + { { 66, 139, 566 },{ 66, 139, 560 },{ 66, 0, 9 },{ 66, 0, 8 }, }, { { 74, 147, 566 },{ 170, 7, 1352 },{ 8, 74, 70 },{ 74, 8, 8 }, }, + { { 152, 0, 313 },{ 178, 15, 1352 },{ 0, 82, 80 },{ 82, 16, 8 }, }, { { 162, 0, 313 },{ 186, 23, 1352 },{ 24, 90, 70 },{ 90, 24, 8 }, }, + { { 0, 171, 784 },{ 195, 32, 1352 },{ 33, 99, 70 },{ 99, 33, 8 }, }, { { 6, 179, 790 },{ 203, 40, 1352 },{ 41, 107, 70 },{ 107, 41, 8 }, }, + { { 15, 187, 790 },{ 211, 48, 1352 },{ 115, 0, 41 },{ 115, 49, 8 }, }, { { 61, 199, 710 },{ 219, 56, 1352 },{ 57, 123, 70 },{ 123, 57, 8 }, }, + { { 70, 208, 710 },{ 228, 65, 1352 },{ 66, 132, 70 },{ 132, 66, 8 }, }, { { 78, 216, 710 },{ 236, 73, 1352 },{ 74, 140, 70 },{ 140, 74, 8 }, }, + { { 86, 224, 710 },{ 244, 81, 1352 },{ 145, 7, 33 },{ 148, 82, 8 }, }, { { 222, 8, 233 },{ 252, 89, 1352 },{ 153, 15, 33 },{ 156, 90, 8 }, }, + { { 235, 0, 239 },{ 241, 101, 328 },{ 166, 6, 39 },{ 165, 99, 8 }, }, { { 32, 170, 3680 },{ 249, 109, 328 },{ 0, 175, 98 },{ 173, 107, 8 }, }, + { { 40, 178, 3680 },{ 115, 181, 3648 },{ 8, 183, 98 },{ 181, 115, 8 }, }, { { 48, 186, 3680 },{ 123, 189, 3648 },{ 16, 191, 98 },{ 189, 123, 8 }, }, + { { 57, 195, 3680 },{ 132, 198, 3648 },{ 25, 200, 98 },{ 198, 132, 8 }, }, { { 67, 243, 3928 },{ 140, 206, 3648 },{ 33, 208, 98 },{ 206, 140, 8 }, }, + { { 76, 251, 3928 },{ 148, 214, 3648 },{ 41, 216, 98 },{ 214, 148, 8 }, }, { { 86, 255, 856 },{ 156, 222, 3648 },{ 49, 224, 98 },{ 222, 156, 8 }, }, + { { 255, 93, 169 },{ 165, 231, 3648 },{ 58, 233, 98 },{ 231, 165, 8 }, }, { { 98, 236, 3680 },{ 173, 239, 3648 },{ 66, 241, 98 },{ 239, 173, 8 }, }, + { { 108, 181, 4040 },{ 181, 247, 3648 },{ 74, 249, 98 },{ 247, 181, 8 }, }, { { 116, 189, 4040 },{ 255, 189, 8 },{ 116, 189, 456 },{ 255, 189, 8 }, }, + { { 125, 198, 4040 },{ 255, 198, 8 },{ 125, 198, 456 },{ 255, 198, 8 }, }, { { 133, 206, 4040 },{ 255, 206, 8 },{ 133, 206, 456 },{ 255, 206, 8 }, }, + { { 141, 214, 4040 },{ 255, 214, 8 },{ 141, 214, 456 },{ 255, 214, 8 }, }, { { 149, 222, 4040 },{ 255, 222, 8 },{ 149, 222, 456 },{ 255, 222, 8 }, }, + { { 47, 183, 566 },{ 47, 183, 560 },{ 47, 0, 9 },{ 47, 0, 8 }, }, { { 55, 191, 566 },{ 55, 191, 560 },{ 55, 0, 9 },{ 55, 0, 8 }, }, + { { 63, 199, 566 },{ 63, 199, 560 },{ 63, 0, 9 },{ 63, 0, 8 }, }, { { 71, 207, 566 },{ 71, 207, 560 },{ 71, 0, 9 },{ 71, 0, 8 }, }, + { { 80, 216, 566 },{ 80, 216, 560 },{ 80, 0, 9 },{ 80, 0, 8 }, }, { { 88, 224, 566 },{ 88, 224, 560 },{ 88, 0, 9 },{ 88, 0, 8 }, }, + { { 3, 233, 710 },{ 3, 233, 704 },{ 2, 96, 70 },{ 96, 2, 8 }, }, { { 11, 241, 710 },{ 11, 241, 704 },{ 10, 104, 70 },{ 104, 10, 8 }, }, + { { 20, 250, 710 },{ 20, 250, 704 },{ 19, 113, 70 },{ 113, 19, 8 }, }, { { 27, 121, 3654 },{ 27, 121, 3648 },{ 27, 121, 70 },{ 121, 27, 8 }, }, + { { 35, 129, 3654 },{ 35, 129, 3648 },{ 35, 129, 70 },{ 129, 35, 8 }, }, { { 43, 137, 3654 },{ 43, 137, 3648 },{ 43, 137, 70 },{ 137, 43, 8 }, }, + { { 52, 146, 3654 },{ 52, 146, 3648 },{ 52, 146, 70 },{ 146, 52, 8 }, }, { { 60, 154, 3654 },{ 60, 154, 3648 },{ 60, 154, 70 },{ 154, 60, 8 }, }, + { { 68, 162, 3654 },{ 68, 162, 3648 },{ 68, 162, 70 },{ 162, 68, 8 }, }, { { 76, 170, 3654 },{ 76, 170, 3648 },{ 76, 170, 70 },{ 170, 76, 8 }, }, + { { 85, 179, 3654 },{ 85, 179, 3648 },{ 85, 179, 70 },{ 179, 85, 8 }, }, { { 93, 187, 3654 },{ 93, 187, 3648 },{ 93, 187, 70 },{ 187, 93, 8 }, }, + { { 101, 195, 3654 },{ 101, 195, 3648 },{ 101, 195, 70 },{ 195, 101, 8 }, }, { { 109, 203, 3654 },{ 109, 203, 3648 },{ 109, 203, 70 },{ 203, 109, 8 }, }, + { { 118, 212, 3654 },{ 118, 212, 3648 },{ 118, 212, 70 },{ 212, 118, 8 }, }, { { 126, 220, 3654 },{ 126, 220, 3648 },{ 126, 220, 70 },{ 220, 126, 8 }, }, + { { 134, 228, 3654 },{ 134, 228, 3648 },{ 134, 228, 70 },{ 228, 134, 8 }, }, { { 5, 236, 3680 },{ 142, 236, 3648 },{ 5, 236, 96 },{ 236, 142, 8 }, }, + { { 14, 245, 3680 },{ 151, 245, 3648 },{ 14, 245, 96 },{ 245, 151, 8 }, }, { { 23, 159, 4040 },{ 159, 253, 3648 },{ 23, 159, 456 },{ 253, 159, 8 }, }, + { { 31, 167, 4040 },{ 255, 167, 8 },{ 31, 167, 456 },{ 255, 167, 8 }, }, { { 39, 175, 4040 },{ 255, 175, 8 },{ 39, 175, 456 },{ 255, 175, 8 }, }, + { { 48, 184, 4040 },{ 255, 184, 8 },{ 48, 184, 456 },{ 255, 184, 8 }, }, { { 56, 192, 4040 },{ 255, 192, 8 },{ 56, 192, 456 },{ 255, 192, 8 }, }, + { { 64, 200, 4040 },{ 255, 200, 8 },{ 64, 200, 456 },{ 255, 200, 8 }, },{ { 72, 208, 4040 },{ 255, 208, 8 },{ 72, 208, 456 },{ 255, 208, 8 }, }, + + }; + + struct dxt5a_block + { + uint8_t m_endpoints[2]; + + enum { cTotalSelectorBytes = 6 }; + uint8_t m_selectors[cTotalSelectorBytes]; + + inline void clear() + { + basisu::clear_obj(*this); + } + + inline uint32_t get_low_alpha() const + { + return m_endpoints[0]; + } + + inline uint32_t get_high_alpha() const + { + return m_endpoints[1]; + } + + inline void set_low_alpha(uint32_t i) + { + assert(i <= UINT8_MAX); + m_endpoints[0] = static_cast(i); + } + + inline void set_high_alpha(uint32_t i) + { + assert(i <= UINT8_MAX); + m_endpoints[1] = static_cast(i); + } + + inline bool is_alpha6_block() const { return get_low_alpha() <= get_high_alpha(); } + + uint32_t get_endpoints_as_word() const { return m_endpoints[0] | (m_endpoints[1] << 8); } + uint32_t get_selectors_as_word(uint32_t index) { assert(index < 3); return m_selectors[index * 2] | (m_selectors[index * 2 + 1] << 8); } + + inline uint32_t get_selector(uint32_t x, uint32_t y) const + { + assert((x < 4U) && (y < 4U)); + + uint32_t selector_index = (y * 4) + x; + uint32_t bit_index = selector_index * cDXT5SelectorBits; + + uint32_t byte_index = bit_index >> 3; + uint32_t bit_ofs = bit_index & 7; + + uint32_t v = m_selectors[byte_index]; + if (byte_index < (cTotalSelectorBytes - 1)) + v |= (m_selectors[byte_index + 1] << 8); + + return (v >> bit_ofs) & 7; + } + + inline void set_selector(uint32_t x, uint32_t y, uint32_t val) + { + assert((x < 4U) && (y < 4U) && (val < 8U)); + + uint32_t selector_index = (y * 4) + x; + uint32_t bit_index = selector_index * cDXT5SelectorBits; + + uint32_t byte_index = bit_index >> 3; + uint32_t bit_ofs = bit_index & 7; + + uint32_t v = m_selectors[byte_index]; + if (byte_index < (cTotalSelectorBytes - 1)) + v |= (m_selectors[byte_index + 1] << 8); + + v &= (~(7 << bit_ofs)); + v |= (val << bit_ofs); + + m_selectors[byte_index] = static_cast(v); + if (byte_index < (cTotalSelectorBytes - 1)) + m_selectors[byte_index + 1] = static_cast(v >> 8); + } + + enum { cMaxSelectorValues = 8 }; + + static uint32_t get_block_values6(color32* pDst, uint32_t l, uint32_t h) + { + pDst[0].a = static_cast(l); + pDst[1].a = static_cast(h); + pDst[2].a = static_cast((l * 4 + h) / 5); + pDst[3].a = static_cast((l * 3 + h * 2) / 5); + pDst[4].a = static_cast((l * 2 + h * 3) / 5); + pDst[5].a = static_cast((l + h * 4) / 5); + pDst[6].a = 0; + pDst[7].a = 255; + return 6; + } + + static uint32_t get_block_values8(color32* pDst, uint32_t l, uint32_t h) + { + pDst[0].a = static_cast(l); + pDst[1].a = static_cast(h); + pDst[2].a = static_cast((l * 6 + h) / 7); + pDst[3].a = static_cast((l * 5 + h * 2) / 7); + pDst[4].a = static_cast((l * 4 + h * 3) / 7); + pDst[5].a = static_cast((l * 3 + h * 4) / 7); + pDst[6].a = static_cast((l * 2 + h * 5) / 7); + pDst[7].a = static_cast((l + h * 6) / 7); + return 8; + } + + static uint32_t get_block_values(color32* pDst, uint32_t l, uint32_t h) + { + if (l > h) + return get_block_values8(pDst, l, h); + else + return get_block_values6(pDst, l, h); + } + }; + + static void convert_etc1s_to_dxt5a(dxt5a_block* pDst_block, const endpoint* pEndpoints, const selector* pSelector) + { + const uint32_t low_selector = pSelector->m_lo_selector; + const uint32_t high_selector = pSelector->m_hi_selector; + + const color32& base_color = pEndpoints->m_color5; + const uint32_t inten_table = pEndpoints->m_inten5; + + if (low_selector == high_selector) + { + uint32_t r; + decoder_etc_block::get_block_color5_r(base_color, inten_table, low_selector, r); + + pDst_block->set_low_alpha(r); + pDst_block->set_high_alpha(r); + pDst_block->m_selectors[0] = 0; + pDst_block->m_selectors[1] = 0; + pDst_block->m_selectors[2] = 0; + pDst_block->m_selectors[3] = 0; + pDst_block->m_selectors[4] = 0; + pDst_block->m_selectors[5] = 0; + return; + } + else if (pSelector->m_num_unique_selectors == 2) + { + color32 block_colors[4]; + + decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table); + + const uint32_t r0 = block_colors[low_selector].r; + const uint32_t r1 = block_colors[high_selector].r; + + pDst_block->set_low_alpha(r0); + pDst_block->set_high_alpha(r1); + + // TODO: Optimize this + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t s = pSelector->get_selector(x, y); + pDst_block->set_selector(x, y, (s == high_selector) ? 1 : 0); + } + } + + return; + } + + uint32_t selector_range_table = 0; + for (selector_range_table = 0; selector_range_table < NUM_DXT5A_SELECTOR_RANGES; selector_range_table++) + if ((low_selector == s_dxt5a_selector_ranges[selector_range_table].m_low) && (high_selector == s_dxt5a_selector_ranges[selector_range_table].m_high)) + break; + if (selector_range_table >= NUM_DXT5A_SELECTOR_RANGES) + selector_range_table = 0; + + const etc1_g_to_dxt5a_conversion* pTable_entry = &g_etc1_g_to_dxt5a[base_color.r + inten_table * 32][selector_range_table]; + + pDst_block->set_low_alpha(pTable_entry->m_lo); + pDst_block->set_high_alpha(pTable_entry->m_hi); + + // TODO: Optimize this (like ETC1->BC1) + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t s = pSelector->get_selector(x, y); + + uint32_t ds = (pTable_entry->m_trans >> (s * 3)) & 7; + + pDst_block->set_selector(x, y, ds); + } + } + } +#endif //BASISD_SUPPORT_DXT5A + + // PVRTC + +#if BASISD_SUPPORT_PVRTC1 || BASISD_SUPPORT_UASTC + static const uint16_t g_pvrtc_swizzle_table[256] = + { + 0x0000, 0x0001, 0x0004, 0x0005, 0x0010, 0x0011, 0x0014, 0x0015, 0x0040, 0x0041, 0x0044, 0x0045, 0x0050, 0x0051, 0x0054, 0x0055, 0x0100, 0x0101, 0x0104, 0x0105, 0x0110, 0x0111, 0x0114, 0x0115, 0x0140, 0x0141, 0x0144, 0x0145, 0x0150, 0x0151, 0x0154, 0x0155, + 0x0400, 0x0401, 0x0404, 0x0405, 0x0410, 0x0411, 0x0414, 0x0415, 0x0440, 0x0441, 0x0444, 0x0445, 0x0450, 0x0451, 0x0454, 0x0455, 0x0500, 0x0501, 0x0504, 0x0505, 0x0510, 0x0511, 0x0514, 0x0515, 0x0540, 0x0541, 0x0544, 0x0545, 0x0550, 0x0551, 0x0554, 0x0555, + 0x1000, 0x1001, 0x1004, 0x1005, 0x1010, 0x1011, 0x1014, 0x1015, 0x1040, 0x1041, 0x1044, 0x1045, 0x1050, 0x1051, 0x1054, 0x1055, 0x1100, 0x1101, 0x1104, 0x1105, 0x1110, 0x1111, 0x1114, 0x1115, 0x1140, 0x1141, 0x1144, 0x1145, 0x1150, 0x1151, 0x1154, 0x1155, + 0x1400, 0x1401, 0x1404, 0x1405, 0x1410, 0x1411, 0x1414, 0x1415, 0x1440, 0x1441, 0x1444, 0x1445, 0x1450, 0x1451, 0x1454, 0x1455, 0x1500, 0x1501, 0x1504, 0x1505, 0x1510, 0x1511, 0x1514, 0x1515, 0x1540, 0x1541, 0x1544, 0x1545, 0x1550, 0x1551, 0x1554, 0x1555, + 0x4000, 0x4001, 0x4004, 0x4005, 0x4010, 0x4011, 0x4014, 0x4015, 0x4040, 0x4041, 0x4044, 0x4045, 0x4050, 0x4051, 0x4054, 0x4055, 0x4100, 0x4101, 0x4104, 0x4105, 0x4110, 0x4111, 0x4114, 0x4115, 0x4140, 0x4141, 0x4144, 0x4145, 0x4150, 0x4151, 0x4154, 0x4155, + 0x4400, 0x4401, 0x4404, 0x4405, 0x4410, 0x4411, 0x4414, 0x4415, 0x4440, 0x4441, 0x4444, 0x4445, 0x4450, 0x4451, 0x4454, 0x4455, 0x4500, 0x4501, 0x4504, 0x4505, 0x4510, 0x4511, 0x4514, 0x4515, 0x4540, 0x4541, 0x4544, 0x4545, 0x4550, 0x4551, 0x4554, 0x4555, + 0x5000, 0x5001, 0x5004, 0x5005, 0x5010, 0x5011, 0x5014, 0x5015, 0x5040, 0x5041, 0x5044, 0x5045, 0x5050, 0x5051, 0x5054, 0x5055, 0x5100, 0x5101, 0x5104, 0x5105, 0x5110, 0x5111, 0x5114, 0x5115, 0x5140, 0x5141, 0x5144, 0x5145, 0x5150, 0x5151, 0x5154, 0x5155, + 0x5400, 0x5401, 0x5404, 0x5405, 0x5410, 0x5411, 0x5414, 0x5415, 0x5440, 0x5441, 0x5444, 0x5445, 0x5450, 0x5451, 0x5454, 0x5455, 0x5500, 0x5501, 0x5504, 0x5505, 0x5510, 0x5511, 0x5514, 0x5515, 0x5540, 0x5541, 0x5544, 0x5545, 0x5550, 0x5551, 0x5554, 0x5555 + }; + + // Note we can't use simple calculations to convert PVRTC1 encoded endpoint components to/from 8-bits, due to hardware approximations. + static const uint8_t g_pvrtc_5[32] = { 0,8,16,24,33,41,49,57,66,74,82,90,99,107,115,123,132,140,148,156,165,173,181,189,198,206,214,222,231,239,247,255 }; + static const uint8_t g_pvrtc_4[16] = { 0,16,33,49,66,82,99,115,140,156,173,189,206,222,239,255 }; + static const uint8_t g_pvrtc_3[8] = { 0,33,74,107,148,181,222,255 }; + static const uint8_t g_pvrtc_alpha[9] = { 0,34,68,102,136,170,204,238,255 }; + + static const uint8_t g_pvrtc_5_floor[256] = + { + 0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3, + 3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7, + 7,7,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,11,11,11,11,11,11, + 11,11,11,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,15,15,15,15,15, + 15,15,15,15,16,16,16,16,16,16,16,16,17,17,17,17,17,17,17,17,18,18,18,18,18,18,18,18,19,19,19,19, + 19,19,19,19,19,20,20,20,20,20,20,20,20,21,21,21,21,21,21,21,21,22,22,22,22,22,22,22,22,23,23,23, + 23,23,23,23,23,23,24,24,24,24,24,24,24,24,25,25,25,25,25,25,25,25,26,26,26,26,26,26,26,26,27,27, + 27,27,27,27,27,27,27,28,28,28,28,28,28,28,28,29,29,29,29,29,29,29,29,30,30,30,30,30,30,30,30,31 + }; + + static const uint8_t g_pvrtc_5_ceil[256] = + { + 0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4, + 4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,8,8,8,8,8,8, + 8,8,8,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11,11,12,12,12,12,12, + 12,12,12,12,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,15,15,15,15,15,15,15,15,16,16,16,16, + 16,16,16,16,16,17,17,17,17,17,17,17,17,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19,20,20,20, + 20,20,20,20,20,20,21,21,21,21,21,21,21,21,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23,24,24, + 24,24,24,24,24,24,24,25,25,25,25,25,25,25,25,26,26,26,26,26,26,26,26,27,27,27,27,27,27,27,27,28, + 28,28,28,28,28,28,28,28,29,29,29,29,29,29,29,29,30,30,30,30,30,30,30,30,31,31,31,31,31,31,31,31 + }; + + static const uint8_t g_pvrtc_4_floor[256] = + { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11, + 11,11,11,11,11,11,11,11,11,11,11,11,11,11,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13,13, + 13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,15 + }; + + static const uint8_t g_pvrtc_4_ceil[256] = + { + 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,10,10,10, + 10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,12,12, + 12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,14, + 14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15 + }; + + static const uint8_t g_pvrtc_3_floor[256] = + { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4, + 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7 + }; + + static const uint8_t g_pvrtc_3_ceil[256] = + { + 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 + }; + + static const uint8_t g_pvrtc_alpha_floor[256] = + { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8 + }; + + static const uint8_t g_pvrtc_alpha_ceil[256] = + { + 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8 + }; + + struct pvrtc4_block + { + uint32_t m_modulation; + uint32_t m_endpoints; + + pvrtc4_block() : m_modulation(0), m_endpoints(0) { } + + inline bool operator== (const pvrtc4_block& rhs) const + { + return (m_modulation == rhs.m_modulation) && (m_endpoints == rhs.m_endpoints); + } + + inline void clear() + { + m_modulation = 0; + m_endpoints = 0; + } + + inline bool get_block_uses_transparent_modulation() const + { + return (m_endpoints & 1) != 0; + } + + inline void set_block_uses_transparent_modulation(bool m) + { + m_endpoints = (m_endpoints & ~1U) | static_cast(m); + } + + inline bool is_endpoint_opaque(uint32_t endpoint_index) const + { + static const uint32_t s_bitmasks[2] = { 0x8000U, 0x80000000U }; + return (m_endpoints & s_bitmasks[basisu::open_range_check(endpoint_index, 2U)]) != 0; + } + + inline void set_endpoint_opaque(uint32_t endpoint_index, bool opaque) + { + assert(endpoint_index < 2); + static const uint32_t s_bitmasks[2] = { 0x8000U, 0x80000000U }; + if (opaque) + m_endpoints |= s_bitmasks[endpoint_index]; + else + m_endpoints &= ~s_bitmasks[endpoint_index]; + } + + inline color32 get_endpoint_5554(uint32_t endpoint_index) const + { + assert(endpoint_index < 2); + static const uint32_t s_endpoint_mask[2] = { 0xFFFE, 0xFFFF }; + uint32_t packed = (m_endpoints >> (basisu::open_range_check(endpoint_index, 2U) ? 16 : 0)) & s_endpoint_mask[endpoint_index]; + + uint32_t r, g, b, a; + if (packed & 0x8000) + { + // opaque 554 or 555 + r = (packed >> 10) & 31; + g = (packed >> 5) & 31; + b = packed & 31; + + if (!endpoint_index) + b |= (b >> 4); + + a = 0xF; + } + else + { + // translucent 4433 or 4443 + r = (packed >> 7) & 0x1E; + g = (packed >> 3) & 0x1E; + b = (packed & 0xF) << 1; + + r |= (r >> 4); + g |= (g >> 4); + + if (!endpoint_index) + b |= (b >> 3); + else + b |= (b >> 4); + + a = (packed >> 11) & 0xE; + } + + assert((r < 32) && (g < 32) && (b < 32) && (a < 16)); + + return color32(r, g, b, a); + } + + inline color32 get_endpoint_8888(uint32_t endpoint_index) const + { + assert(endpoint_index < 2); + static const uint32_t s_endpoint_mask[2] = { 0xFFFE, 0xFFFF }; + uint32_t packed = (m_endpoints >> (basisu::open_range_check(endpoint_index, 2U) ? 16 : 0)) & s_endpoint_mask[endpoint_index]; + + uint32_t r, g, b, a; + if (packed & 0x8000) + { + // opaque 554 or 555 + // 1RRRRRGGGGGBBBBM + // 1RRRRRGGGGGBBBBB + r = (packed >> 10) & 31; + g = (packed >> 5) & 31; + b = packed & 31; + + r = g_pvrtc_5[r]; + g = g_pvrtc_5[g]; + + if (!endpoint_index) + b = g_pvrtc_4[b >> 1]; + else + b = g_pvrtc_5[b]; + + a = 255; + } + else + { + // translucent 4433 or 4443 + // 0AAA RRRR GGGG BBBM + // 0AAA RRRR GGGG BBBB + r = (packed >> 8) & 0xF; + g = (packed >> 4) & 0xF; + b = packed & 0xF; + a = (packed >> 12) & 7; + + r = g_pvrtc_4[r]; + g = g_pvrtc_4[g]; + + if (!endpoint_index) + b = g_pvrtc_3[b >> 1]; + else + b = g_pvrtc_4[b]; + + a = g_pvrtc_alpha[a]; + } + + return color32(r, g, b, a); + } + + inline uint32_t get_endpoint_l8(uint32_t endpoint_index) const + { + color32 c(get_endpoint_8888(endpoint_index)); + return c.r + c.g + c.b + c.a; + } + + inline uint32_t get_opaque_endpoint_l0() const + { + uint32_t packed = m_endpoints & 0xFFFE; + + uint32_t r, g, b; + assert(packed & 0x8000); + + // opaque 554 or 555 + r = (packed >> 10) & 31; + g = (packed >> 5) & 31; + b = packed & 31; + b |= (b >> 4); + + return r + g + b; + } + + inline uint32_t get_opaque_endpoint_l1() const + { + uint32_t packed = m_endpoints >> 16; + + uint32_t r, g, b; + assert(packed & 0x8000); + + // opaque 554 or 555 + r = (packed >> 10) & 31; + g = (packed >> 5) & 31; + b = packed & 31; + + return r + g + b; + } + + static uint32_t get_component_precision_in_bits(uint32_t c, uint32_t endpoint_index, bool opaque_endpoint) + { + static const uint32_t s_comp_prec[4][4] = + { + // R0 G0 B0 A0 R1 G1 B1 A1 + { 4, 4, 3, 3 },{ 4, 4, 4, 3 }, // transparent endpoint + + { 5, 5, 4, 0 },{ 5, 5, 5, 0 } // opaque endpoint + }; + return s_comp_prec[basisu::open_range_check(endpoint_index, 2U) + (opaque_endpoint * 2)][basisu::open_range_check(c, 4U)]; + } + + static color32 get_color_precision_in_bits(uint32_t endpoint_index, bool opaque_endpoint) + { + static const color32 s_color_prec[4] = + { + color32(4, 4, 3, 3), color32(4, 4, 4, 3), // transparent endpoint + color32(5, 5, 4, 0), color32(5, 5, 5, 0) // opaque endpoint + }; + return s_color_prec[basisu::open_range_check(endpoint_index, 2U) + (opaque_endpoint * 2)]; + } + + inline void set_opaque_endpoint_floor(uint32_t endpoint_index, const color32& c) + { + assert(endpoint_index < 2); + const uint32_t m = m_endpoints & 1; + + uint32_t r = g_pvrtc_5_floor[c[0]], g = g_pvrtc_5_floor[c[1]], b = c[2]; + + if (!endpoint_index) + b = g_pvrtc_4_floor[b] << 1; + else + b = g_pvrtc_5_floor[b]; + + // rgba=555 here + assert((r < 32) && (g < 32) && (b < 32)); + + // 1RRRRRGGGGGBBBBM + // 1RRRRRGGGGGBBBBB + + // opaque 554 or 555 + uint32_t packed = 0x8000 | (r << 10) | (g << 5) | b; + if (!endpoint_index) + packed = (packed & ~1) | m; + + assert(packed <= 0xFFFF); + + if (endpoint_index) + m_endpoints = (m_endpoints & 0xFFFFU) | (packed << 16); + else + m_endpoints = (m_endpoints & 0xFFFF0000U) | packed; + } + + inline void set_opaque_endpoint_ceil(uint32_t endpoint_index, const color32& c) + { + assert(endpoint_index < 2); + const uint32_t m = m_endpoints & 1; + + uint32_t r = g_pvrtc_5_ceil[c[0]], g = g_pvrtc_5_ceil[c[1]], b = c[2]; + + if (!endpoint_index) + b = g_pvrtc_4_ceil[b] << 1; + else + b = g_pvrtc_5_ceil[b]; + + // rgba=555 here + assert((r < 32) && (g < 32) && (b < 32)); + + // 1RRRRRGGGGGBBBBM + // 1RRRRRGGGGGBBBBB + + // opaque 554 or 555 + uint32_t packed = 0x8000 | (r << 10) | (g << 5) | b; + if (!endpoint_index) + packed |= m; + + assert(packed <= 0xFFFF); + + if (endpoint_index) + m_endpoints = (m_endpoints & 0xFFFFU) | (packed << 16); + else + m_endpoints = (m_endpoints & 0xFFFF0000U) | packed; + } + + // opaque endpoints: 554 or 555 + // transparent endpoints: 3443 or 3444 + inline void set_endpoint_raw(uint32_t endpoint_index, const color32& c, bool opaque_endpoint) + { + assert(endpoint_index < 2); + const uint32_t m = m_endpoints & 1; + uint32_t r = c[0], g = c[1], b = c[2], a = c[3]; + + uint32_t packed; + + if (opaque_endpoint) + { + if (!endpoint_index) + { + // 554 + // 1RRRRRGGGGGBBBBM + assert((r < 32) && (g < 32) && (b < 16)); + packed = 0x8000 | (r << 10) | (g << 5) | (b << 1) | m; + } + else + { + // 555 + // 1RRRRRGGGGGBBBBB + assert((r < 32) && (g < 32) && (b < 32)); + packed = 0x8000 | (r << 10) | (g << 5) | b; + } + } + else + { + if (!endpoint_index) + { + // 3443 + // 0AAA RRRR GGGG BBBM + assert((r < 16) && (g < 16) && (b < 8) && (a < 8)); + packed = (a << 12) | (r << 8) | (g << 4) | (b << 1) | m; + } + else + { + // 3444 + // 0AAA RRRR GGGG BBBB + assert((r < 16) && (g < 16) && (b < 16) && (a < 8)); + packed = (a << 12) | (r << 8) | (g << 4) | b; + } + } + + assert(packed <= 0xFFFF); + + if (endpoint_index) + m_endpoints = (m_endpoints & 0xFFFFU) | (packed << 16); + else + m_endpoints = (m_endpoints & 0xFFFF0000U) | packed; + } + + inline void set_endpoint_floor(uint32_t endpoint_index, const color32& c) + { + assert(endpoint_index < 2); + + int a = g_pvrtc_alpha_floor[c.a]; + if (a == 8) + { + // 554 or 555 + uint32_t r = g_pvrtc_5_floor[c[0]], g = g_pvrtc_5_floor[c[1]], b = c[2]; + + if (!endpoint_index) + b = g_pvrtc_4_floor[b]; + else + b = g_pvrtc_5_floor[b]; + + set_endpoint_raw(endpoint_index, color32(r, g, b, a), true); + } + else + { + // 4433 or 4443 + uint32_t r = g_pvrtc_4_floor[c[0]], g = g_pvrtc_4_floor[c[1]], b = c[2]; + + if (!endpoint_index) + b = g_pvrtc_3_floor[b]; + else + b = g_pvrtc_4_floor[b]; + + set_endpoint_raw(endpoint_index, color32(r, g, b, a), false); + } + } + + inline void set_endpoint_ceil(uint32_t endpoint_index, const color32& c) + { + assert(endpoint_index < 2); + + int a = g_pvrtc_alpha_ceil[c.a]; + if (a == 8) + { + // 554 or 555 + uint32_t r = g_pvrtc_5_ceil[c[0]], g = g_pvrtc_5_ceil[c[1]], b = c[2]; + + if (!endpoint_index) + b = g_pvrtc_4_ceil[b]; + else + b = g_pvrtc_5_ceil[b]; + + set_endpoint_raw(endpoint_index, color32(r, g, b, a), true); + } + else + { + // 4433 or 4443 + uint32_t r = g_pvrtc_4_ceil[c[0]], g = g_pvrtc_4_ceil[c[1]], b = c[2]; + + if (!endpoint_index) + b = g_pvrtc_3_ceil[b]; + else + b = g_pvrtc_4_ceil[b]; + + set_endpoint_raw(endpoint_index, color32(r, g, b, a), false); + } + } + + inline uint32_t get_modulation(uint32_t x, uint32_t y) const + { + assert((x < 4) && (y < 4)); + return (m_modulation >> ((y * 4 + x) * 2)) & 3; + } + + // Scaled by 8 + inline const uint32_t* get_scaled_modulation_values(bool block_uses_transparent_modulation) const + { + static const uint32_t s_block_scales[2][4] = { { 0, 3, 5, 8 },{ 0, 4, 4, 8 } }; + return s_block_scales[block_uses_transparent_modulation]; + } + + // Scaled by 8 + inline uint32_t get_scaled_modulation(uint32_t x, uint32_t y) const + { + return get_scaled_modulation_values(get_block_uses_transparent_modulation())[get_modulation(x, y)]; + } + + inline void set_modulation(uint32_t x, uint32_t y, uint32_t s) + { + assert((x < 4) && (y < 4) && (s < 4)); + uint32_t n = (y * 4 + x) * 2; + m_modulation = (m_modulation & (~(3 << n))) | (s << n); + assert(get_modulation(x, y) == s); + } + + // Assumes modulation was initialized to 0 + inline void set_modulation_fast(uint32_t x, uint32_t y, uint32_t s) + { + assert((x < 4) && (y < 4) && (s < 4)); + uint32_t n = (y * 4 + x) * 2; + m_modulation |= (s << n); + assert(get_modulation(x, y) == s); + } + }; + +#if 0 + static const uint8_t g_pvrtc_bilinear_weights[16][4] = + { + { 4, 4, 4, 4 }, { 2, 6, 2, 6 }, { 8, 0, 8, 0 }, { 6, 2, 6, 2 }, + { 2, 2, 6, 6 }, { 1, 3, 3, 9 }, { 4, 0, 12, 0 }, { 3, 1, 9, 3 }, + { 8, 8, 0, 0 }, { 4, 12, 0, 0 }, { 16, 0, 0, 0 }, { 12, 4, 0, 0 }, + { 6, 6, 2, 2 }, { 3, 9, 1, 3 }, { 12, 0, 4, 0 }, { 9, 3, 3, 1 }, + }; +#endif + + struct pvrtc1_temp_block + { + decoder_etc_block m_etc1_block; + uint32_t m_pvrtc_endpoints; + }; + + static inline uint32_t get_opaque_endpoint_l0(uint32_t endpoints) + { + uint32_t packed = endpoints; + + uint32_t r, g, b; + assert(packed & 0x8000); + + r = (packed >> 10) & 31; + g = (packed >> 5) & 31; + b = packed & 30; + b |= (b >> 4); + + return r + g + b; + } + + static inline uint32_t get_opaque_endpoint_l1(uint32_t endpoints) + { + uint32_t packed = endpoints >> 16; + + uint32_t r, g, b; + assert(packed & 0x8000); + + r = (packed >> 10) & 31; + g = (packed >> 5) & 31; + b = packed & 31; + + return r + g + b; + } + + static color32 get_endpoint_8888(uint32_t endpoints, uint32_t endpoint_index) + { + assert(endpoint_index < 2); + static const uint32_t s_endpoint_mask[2] = { 0xFFFE, 0xFFFF }; + uint32_t packed = (endpoints >> (basisu::open_range_check(endpoint_index, 2U) ? 16 : 0)) & s_endpoint_mask[endpoint_index]; + + uint32_t r, g, b, a; + if (packed & 0x8000) + { + // opaque 554 or 555 + // 1RRRRRGGGGGBBBBM + // 1RRRRRGGGGGBBBBB + r = (packed >> 10) & 31; + g = (packed >> 5) & 31; + b = packed & 31; + + r = g_pvrtc_5[r]; + g = g_pvrtc_5[g]; + + if (!endpoint_index) + b = g_pvrtc_4[b >> 1]; + else + b = g_pvrtc_5[b]; + + a = 255; + } + else + { + // translucent 4433 or 4443 + // 0AAA RRRR GGGG BBBM + // 0AAA RRRR GGGG BBBB + r = (packed >> 8) & 0xF; + g = (packed >> 4) & 0xF; + b = packed & 0xF; + a = (packed >> 12) & 7; + + r = g_pvrtc_4[r]; + g = g_pvrtc_4[g]; + + if (!endpoint_index) + b = g_pvrtc_3[b >> 1]; + else + b = g_pvrtc_4[b]; + + a = g_pvrtc_alpha[a]; + } + + return color32(r, g, b, a); + } + + static uint32_t get_endpoint_l8(uint32_t endpoints, uint32_t endpoint_index) + { + color32 c(get_endpoint_8888(endpoints, endpoint_index)); + return c.r + c.g + c.b + c.a; + } +#endif + +#if BASISD_SUPPORT_PVRTC1 + // TODO: Support decoding a non-pow2 ETC1S texture into the next larger pow2 PVRTC texture. + static void fixup_pvrtc1_4_modulation_rgb(const decoder_etc_block* pETC_Blocks, const uint32_t* pPVRTC_endpoints, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y) + { + const uint32_t x_mask = num_blocks_x - 1; + const uint32_t y_mask = num_blocks_y - 1; + const uint32_t x_bits = basisu::total_bits(x_mask); + const uint32_t y_bits = basisu::total_bits(y_mask); + const uint32_t min_bits = basisu::minimum(x_bits, y_bits); + //const uint32_t max_bits = basisu::maximum(x_bits, y_bits); + const uint32_t swizzle_mask = (1 << (min_bits * 2)) - 1; + + uint32_t block_index = 0; + + // really 3x3 + int e0[4][4], e1[4][4]; + + for (int y = 0; y < static_cast(num_blocks_y); y++) + { + const uint32_t* pE_rows[3]; + + for (int ey = 0; ey < 3; ey++) + { + int by = y + ey - 1; + + const uint32_t* pE = &pPVRTC_endpoints[(by & y_mask) * num_blocks_x]; + + pE_rows[ey] = pE; + + for (int ex = 0; ex < 3; ex++) + { + int bx = 0 + ex - 1; + + const uint32_t e = pE[bx & x_mask]; + + e0[ex][ey] = (get_opaque_endpoint_l0(e) * 255) / 31; + e1[ex][ey] = (get_opaque_endpoint_l1(e) * 255) / 31; + } + } + + const uint32_t y_swizzle = (g_pvrtc_swizzle_table[y >> 8] << 16) | g_pvrtc_swizzle_table[y & 0xFF]; + + for (int x = 0; x < static_cast(num_blocks_x); x++, block_index++) + { + const decoder_etc_block& src_block = pETC_Blocks[block_index]; + + const uint32_t x_swizzle = (g_pvrtc_swizzle_table[x >> 8] << 17) | (g_pvrtc_swizzle_table[x & 0xFF] << 1); + + uint32_t swizzled = x_swizzle | y_swizzle; + if (num_blocks_x != num_blocks_y) + { + swizzled &= swizzle_mask; + + if (num_blocks_x > num_blocks_y) + swizzled |= ((x >> min_bits) << (min_bits * 2)); + else + swizzled |= ((y >> min_bits) << (min_bits * 2)); + } + + pvrtc4_block* pDst_block = static_cast(pDst_blocks) + swizzled; + pDst_block->m_endpoints = pPVRTC_endpoints[block_index]; + + uint32_t base_r = g_etc_5_to_8[src_block.m_differential.m_red1]; + uint32_t base_g = g_etc_5_to_8[src_block.m_differential.m_green1]; + uint32_t base_b = g_etc_5_to_8[src_block.m_differential.m_blue1]; + + const int* pInten_table48 = g_etc1_inten_tables48[src_block.m_differential.m_cw1]; + int by = (base_r + base_g + base_b) * 16; + int block_colors_y_x16[4]; + block_colors_y_x16[0] = by + pInten_table48[2]; + block_colors_y_x16[1] = by + pInten_table48[3]; + block_colors_y_x16[2] = by + pInten_table48[1]; + block_colors_y_x16[3] = by + pInten_table48[0]; + + { + const uint32_t ex = 2; + int bx = x + ex - 1; + bx &= x_mask; + +#define DO_ROW(ey) \ + { \ + const uint32_t e = pE_rows[ey][bx]; \ + e0[ex][ey] = (get_opaque_endpoint_l0(e) * 255) / 31; \ + e1[ex][ey] = (get_opaque_endpoint_l1(e) * 255) / 31; \ + } + + DO_ROW(0); + DO_ROW(1); + DO_ROW(2); +#undef DO_ROW + } + + uint32_t mod = 0; + + uint32_t lookup_x[4]; + +#define DO_LOOKUP(lx) { \ + const uint32_t byte_ofs = 7 - (((lx) * 4) >> 3); \ + const uint32_t lsb_bits = src_block.m_bytes[byte_ofs] >> (((lx) & 1) * 4); \ + const uint32_t msb_bits = src_block.m_bytes[byte_ofs - 2] >> (((lx) & 1) * 4); \ + lookup_x[lx] = (lsb_bits & 0xF) | ((msb_bits & 0xF) << 4); } + + DO_LOOKUP(0); + DO_LOOKUP(1); + DO_LOOKUP(2); + DO_LOOKUP(3); +#undef DO_LOOKUP + +#define DO_PIX(lx, ly, w0, w1, w2, w3) \ + { \ + int ca_l = a0 * w0 + a1 * w1 + a2 * w2 + a3 * w3; \ + int cb_l = b0 * w0 + b1 * w1 + b2 * w2 + b3 * w3; \ + int cl = block_colors_y_x16[g_etc1_x_selector_unpack[ly][lookup_x[lx]]]; \ + int dl = cb_l - ca_l; \ + int vl = cl - ca_l; \ + int p = vl * 16; \ + if (ca_l > cb_l) { p = -p; dl = -dl; } \ + uint32_t m = 0; \ + if (p > 3 * dl) m = (uint32_t)(1 << ((ly) * 8 + (lx) * 2)); \ + if (p > 8 * dl) m = (uint32_t)(2 << ((ly) * 8 + (lx) * 2)); \ + if (p > 13 * dl) m = (uint32_t)(3 << ((ly) * 8 + (lx) * 2)); \ + mod |= m; \ + } + + { + const uint32_t ex = 0, ey = 0; + const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1]; + const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1]; + DO_PIX(0, 0, 4, 4, 4, 4); + DO_PIX(1, 0, 2, 6, 2, 6); + DO_PIX(0, 1, 2, 2, 6, 6); + DO_PIX(1, 1, 1, 3, 3, 9); + } + + { + const uint32_t ex = 1, ey = 0; + const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1]; + const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1]; + DO_PIX(2, 0, 8, 0, 8, 0); + DO_PIX(3, 0, 6, 2, 6, 2); + DO_PIX(2, 1, 4, 0, 12, 0); + DO_PIX(3, 1, 3, 1, 9, 3); + } + + { + const uint32_t ex = 0, ey = 1; + const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1]; + const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1]; + DO_PIX(0, 2, 8, 8, 0, 0); + DO_PIX(1, 2, 4, 12, 0, 0); + DO_PIX(0, 3, 6, 6, 2, 2); + DO_PIX(1, 3, 3, 9, 1, 3); + } + + { + const uint32_t ex = 1, ey = 1; + const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1]; + const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1]; + DO_PIX(2, 2, 16, 0, 0, 0); + DO_PIX(3, 2, 12, 4, 0, 0); + DO_PIX(2, 3, 12, 0, 4, 0); + DO_PIX(3, 3, 9, 3, 3, 1); + } +#undef DO_PIX + + pDst_block->m_modulation = mod; + + e0[0][0] = e0[1][0]; e0[1][0] = e0[2][0]; + e0[0][1] = e0[1][1]; e0[1][1] = e0[2][1]; + e0[0][2] = e0[1][2]; e0[1][2] = e0[2][2]; + + e1[0][0] = e1[1][0]; e1[1][0] = e1[2][0]; + e1[0][1] = e1[1][1]; e1[1][1] = e1[2][1]; + e1[0][2] = e1[1][2]; e1[1][2] = e1[2][2]; + + } // x + } // y + } + + static void fixup_pvrtc1_4_modulation_rgba( + const decoder_etc_block* pETC_Blocks, + const uint32_t* pPVRTC_endpoints, + void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, void *pAlpha_blocks, + const endpoint* pEndpoints, const selector* pSelectors) + { + const uint32_t x_mask = num_blocks_x - 1; + const uint32_t y_mask = num_blocks_y - 1; + const uint32_t x_bits = basisu::total_bits(x_mask); + const uint32_t y_bits = basisu::total_bits(y_mask); + const uint32_t min_bits = basisu::minimum(x_bits, y_bits); + //const uint32_t max_bits = basisu::maximum(x_bits, y_bits); + const uint32_t swizzle_mask = (1 << (min_bits * 2)) - 1; + + uint32_t block_index = 0; + + // really 3x3 + int e0[4][4], e1[4][4]; + + for (int y = 0; y < static_cast(num_blocks_y); y++) + { + const uint32_t* pE_rows[3]; + + for (int ey = 0; ey < 3; ey++) + { + int by = y + ey - 1; + + const uint32_t* pE = &pPVRTC_endpoints[(by & y_mask) * num_blocks_x]; + + pE_rows[ey] = pE; + + for (int ex = 0; ex < 3; ex++) + { + int bx = 0 + ex - 1; + + const uint32_t e = pE[bx & x_mask]; + + e0[ex][ey] = get_endpoint_l8(e, 0); + e1[ex][ey] = get_endpoint_l8(e, 1); + } + } + + const uint32_t y_swizzle = (g_pvrtc_swizzle_table[y >> 8] << 16) | g_pvrtc_swizzle_table[y & 0xFF]; + + for (int x = 0; x < static_cast(num_blocks_x); x++, block_index++) + { + const decoder_etc_block& src_block = pETC_Blocks[block_index]; + + const uint16_t* pSrc_alpha_block = reinterpret_cast(static_cast(pAlpha_blocks) + x + (y * num_blocks_x)); + const endpoint* pAlpha_endpoints = &pEndpoints[pSrc_alpha_block[0]]; + const selector* pAlpha_selectors = &pSelectors[pSrc_alpha_block[1]]; + + const uint32_t x_swizzle = (g_pvrtc_swizzle_table[x >> 8] << 17) | (g_pvrtc_swizzle_table[x & 0xFF] << 1); + + uint32_t swizzled = x_swizzle | y_swizzle; + if (num_blocks_x != num_blocks_y) + { + swizzled &= swizzle_mask; + + if (num_blocks_x > num_blocks_y) + swizzled |= ((x >> min_bits) << (min_bits * 2)); + else + swizzled |= ((y >> min_bits) << (min_bits * 2)); + } + + pvrtc4_block* pDst_block = static_cast(pDst_blocks) + swizzled; + pDst_block->m_endpoints = pPVRTC_endpoints[block_index]; + + uint32_t base_r = g_etc_5_to_8[src_block.m_differential.m_red1]; + uint32_t base_g = g_etc_5_to_8[src_block.m_differential.m_green1]; + uint32_t base_b = g_etc_5_to_8[src_block.m_differential.m_blue1]; + + const int* pInten_table48 = g_etc1_inten_tables48[src_block.m_differential.m_cw1]; + int by = (base_r + base_g + base_b) * 16; + int block_colors_y_x16[4]; + block_colors_y_x16[0] = basisu::clamp(by + pInten_table48[0], 0, 48 * 255); + block_colors_y_x16[1] = basisu::clamp(by + pInten_table48[1], 0, 48 * 255); + block_colors_y_x16[2] = basisu::clamp(by + pInten_table48[2], 0, 48 * 255); + block_colors_y_x16[3] = basisu::clamp(by + pInten_table48[3], 0, 48 * 255); + + uint32_t alpha_base_g = g_etc_5_to_8[pAlpha_endpoints->m_color5.g] * 16; + const int* pInten_table16 = g_etc1_inten_tables16[pAlpha_endpoints->m_inten5]; + int alpha_block_colors_x16[4]; + alpha_block_colors_x16[0] = basisu::clamp(alpha_base_g + pInten_table16[0], 0, 16 * 255); + alpha_block_colors_x16[1] = basisu::clamp(alpha_base_g + pInten_table16[1], 0, 16 * 255); + alpha_block_colors_x16[2] = basisu::clamp(alpha_base_g + pInten_table16[2], 0, 16 * 255); + alpha_block_colors_x16[3] = basisu::clamp(alpha_base_g + pInten_table16[3], 0, 16 * 255); + + // clamp((base_r + base_g + base_b) * 16 + color_inten[s] * 48) + clamp(alpha_base_g * 16 + alpha_inten[as] * 16) + + { + const uint32_t ex = 2; + int bx = x + ex - 1; + bx &= x_mask; + +#define DO_ROW(ey) \ + { \ + const uint32_t e = pE_rows[ey][bx]; \ + e0[ex][ey] = get_endpoint_l8(e, 0); \ + e1[ex][ey] = get_endpoint_l8(e, 1); \ + } + + DO_ROW(0); + DO_ROW(1); + DO_ROW(2); +#undef DO_ROW + } + + uint32_t mod = 0; + +#define DO_PIX(lx, ly, w0, w1, w2, w3) \ + { \ + int ca_l = a0 * w0 + a1 * w1 + a2 * w2 + a3 * w3; \ + int cb_l = b0 * w0 + b1 * w1 + b2 * w2 + b3 * w3; \ + int cl = block_colors_y_x16[(src_block.m_bytes[4 + ly] >> (lx * 2)) & 3] + alpha_block_colors_x16[(pAlpha_selectors->m_selectors[ly] >> (lx * 2)) & 3]; \ + int dl = cb_l - ca_l; \ + int vl = cl - ca_l; \ + int p = vl * 16; \ + if (ca_l > cb_l) { p = -p; dl = -dl; } \ + uint32_t m = 0; \ + if (p > 3 * dl) m = (uint32_t)(1 << ((ly) * 8 + (lx) * 2)); \ + if (p > 8 * dl) m = (uint32_t)(2 << ((ly) * 8 + (lx) * 2)); \ + if (p > 13 * dl) m = (uint32_t)(3 << ((ly) * 8 + (lx) * 2)); \ + mod |= m; \ + } + + { + const uint32_t ex = 0, ey = 0; + const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1]; + const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1]; + DO_PIX(0, 0, 4, 4, 4, 4); + DO_PIX(1, 0, 2, 6, 2, 6); + DO_PIX(0, 1, 2, 2, 6, 6); + DO_PIX(1, 1, 1, 3, 3, 9); + } + + { + const uint32_t ex = 1, ey = 0; + const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1]; + const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1]; + DO_PIX(2, 0, 8, 0, 8, 0); + DO_PIX(3, 0, 6, 2, 6, 2); + DO_PIX(2, 1, 4, 0, 12, 0); + DO_PIX(3, 1, 3, 1, 9, 3); + } + + { + const uint32_t ex = 0, ey = 1; + const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1]; + const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1]; + DO_PIX(0, 2, 8, 8, 0, 0); + DO_PIX(1, 2, 4, 12, 0, 0); + DO_PIX(0, 3, 6, 6, 2, 2); + DO_PIX(1, 3, 3, 9, 1, 3); + } + + { + const uint32_t ex = 1, ey = 1; + const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1]; + const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1]; + DO_PIX(2, 2, 16, 0, 0, 0); + DO_PIX(3, 2, 12, 4, 0, 0); + DO_PIX(2, 3, 12, 0, 4, 0); + DO_PIX(3, 3, 9, 3, 3, 1); + } +#undef DO_PIX + + pDst_block->m_modulation = mod; + + e0[0][0] = e0[1][0]; e0[1][0] = e0[2][0]; + e0[0][1] = e0[1][1]; e0[1][1] = e0[2][1]; + e0[0][2] = e0[1][2]; e0[1][2] = e0[2][2]; + + e1[0][0] = e1[1][0]; e1[1][0] = e1[2][0]; + e1[0][1] = e1[1][1]; e1[1][1] = e1[2][1]; + e1[0][2] = e1[1][2]; e1[1][2] = e1[2][2]; + + } // x + } // y + } +#endif // BASISD_SUPPORT_PVRTC1 + +#if BASISD_SUPPORT_BC7_MODE5 + static dxt_selector_range g_etc1_to_bc7_m5_selector_ranges[] = + { + { 0, 3 }, + { 1, 3 }, + { 0, 2 }, + { 1, 2 }, + { 2, 3 }, + { 0, 1 }, + }; + + const uint32_t NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES = sizeof(g_etc1_to_bc7_m5_selector_ranges) / sizeof(g_etc1_to_bc7_m5_selector_ranges[0]); + + static uint32_t g_etc1_to_bc7_m5_selector_range_index[4][4]; + + const uint32_t NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS = 10; + static const uint8_t g_etc1_to_bc7_m5_selector_mappings[NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS][4] = + { + { 0, 0, 1, 1 }, + { 0, 0, 1, 2 }, + { 0, 0, 1, 3 }, + { 0, 0, 2, 3 }, + { 0, 1, 1, 1 }, + { 0, 1, 2, 2 }, + { 0, 1, 2, 3 }, + { 0, 2, 3, 3 }, + { 1, 2, 2, 2 }, + { 1, 2, 3, 3 }, + }; + + struct etc1_to_bc7_m5_solution + { + uint8_t m_lo; + uint8_t m_hi; + uint16_t m_err; + }; + + static const etc1_to_bc7_m5_solution g_etc1_to_bc7_m5_color[32 * 8 * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS * NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES] = { +#include "basisu_transcoder_tables_bc7_m5_color.inc" + }; + + static dxt_selector_range g_etc1_to_bc7_m5a_selector_ranges[] = + { + { 0, 3 }, + { 1, 3 }, + { 0, 2 }, + { 1, 2 }, + { 2, 3 }, + { 0, 1 } + }; + + const uint32_t NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES = sizeof(g_etc1_to_bc7_m5a_selector_ranges) / sizeof(g_etc1_to_bc7_m5a_selector_ranges[0]); + + static uint32_t g_etc1_to_bc7_m5a_selector_range_index[4][4]; + + struct etc1_g_to_bc7_m5a_conversion + { + uint8_t m_lo, m_hi; + uint8_t m_trans; + }; + + static etc1_g_to_bc7_m5a_conversion g_etc1_g_to_bc7_m5a[8 * 32 * NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES] = + { + #include "basisu_transcoder_tables_bc7_m5_alpha.inc" + }; + + static inline uint32_t set_block_bits(uint8_t* pBytes, uint32_t val, uint32_t num_bits, uint32_t cur_ofs) + { + assert(num_bits < 32); + assert(val < (1ULL << num_bits)); + + uint32_t mask = static_cast((1ULL << num_bits) - 1); + + while (num_bits) + { + const uint32_t n = basisu::minimum(8 - (cur_ofs & 7), num_bits); + + pBytes[cur_ofs >> 3] &= ~static_cast(mask << (cur_ofs & 7)); + pBytes[cur_ofs >> 3] |= static_cast(val << (cur_ofs & 7)); + + val >>= n; + mask >>= n; + + num_bits -= n; + cur_ofs += n; + } + + return cur_ofs; + } + + struct bc7_mode_5 + { + union + { + struct + { + uint64_t m_mode : 6; + uint64_t m_rot : 2; + + uint64_t m_r0 : 7; + uint64_t m_r1 : 7; + uint64_t m_g0 : 7; + uint64_t m_g1 : 7; + uint64_t m_b0 : 7; + uint64_t m_b1 : 7; + uint64_t m_a0 : 8; + uint64_t m_a1_0 : 6; + + } m_lo; + + uint64_t m_lo_bits; + }; + + union + { + struct + { + uint64_t m_a1_1 : 2; + + // bit 2 + uint64_t m_c00 : 1; + uint64_t m_c10 : 2; + uint64_t m_c20 : 2; + uint64_t m_c30 : 2; + + uint64_t m_c01 : 2; + uint64_t m_c11 : 2; + uint64_t m_c21 : 2; + uint64_t m_c31 : 2; + + uint64_t m_c02 : 2; + uint64_t m_c12 : 2; + uint64_t m_c22 : 2; + uint64_t m_c32 : 2; + + uint64_t m_c03 : 2; + uint64_t m_c13 : 2; + uint64_t m_c23 : 2; + uint64_t m_c33 : 2; + + // bit 33 + uint64_t m_a00 : 1; + uint64_t m_a10 : 2; + uint64_t m_a20 : 2; + uint64_t m_a30 : 2; + + uint64_t m_a01 : 2; + uint64_t m_a11 : 2; + uint64_t m_a21 : 2; + uint64_t m_a31 : 2; + + uint64_t m_a02 : 2; + uint64_t m_a12 : 2; + uint64_t m_a22 : 2; + uint64_t m_a32 : 2; + + uint64_t m_a03 : 2; + uint64_t m_a13 : 2; + uint64_t m_a23 : 2; + uint64_t m_a33 : 2; + + } m_hi; + + uint64_t m_hi_bits; + }; + }; + +#if BASISD_WRITE_NEW_BC7_MODE5_TABLES + static void create_etc1_to_bc7_m5_color_conversion_table() + { + FILE* pFile = nullptr; + fopen_s(&pFile, "basisu_transcoder_tables_bc7_m5_color.inc", "w"); + + uint32_t n = 0; + + for (int inten = 0; inten < 8; inten++) + { + for (uint32_t g = 0; g < 32; g++) + { + color32 block_colors[4]; + decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten); + + for (uint32_t sr = 0; sr < NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES; sr++) + { + const uint32_t low_selector = g_etc1_to_bc7_m5_selector_ranges[sr].m_low; + const uint32_t high_selector = g_etc1_to_bc7_m5_selector_ranges[sr].m_high; + + for (uint32_t m = 0; m < NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS; m++) + { + uint32_t best_lo = 0; + uint32_t best_hi = 0; + uint64_t best_err = UINT64_MAX; + + for (uint32_t hi = 0; hi <= 127; hi++) + { + for (uint32_t lo = 0; lo <= 127; lo++) + { + uint32_t colors[4]; + + colors[0] = (lo << 1) | (lo >> 6); + colors[3] = (hi << 1) | (hi >> 6); + + colors[1] = (colors[0] * (64 - 21) + colors[3] * 21 + 32) / 64; + colors[2] = (colors[0] * (64 - 43) + colors[3] * 43 + 32) / 64; + + uint64_t total_err = 0; + + for (uint32_t s = low_selector; s <= high_selector; s++) + { + int err = block_colors[s].g - colors[g_etc1_to_bc7_m5_selector_mappings[m][s]]; + + int err_scale = 1; + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // the low/high selectors which are clamping to either 0 or 255. + if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) + err_scale = 5; + + total_err += (err * err) * err_scale; + } + + if (total_err < best_err) + { + best_err = total_err; + best_lo = lo; + best_hi = hi; + } + } + } + + best_err = basisu::minimum(best_err, 0xFFFF); + + fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err); + n++; + if ((n & 31) == 31) + fprintf(pFile, "\n"); + } // m + } // sr + } // g + } // inten + + fclose(pFile); + } + + static void create_etc1_to_bc7_m5_alpha_conversion_table() + { + FILE* pFile = nullptr; + fopen_s(&pFile, "basisu_transcoder_tables_bc7_m5_alpha.inc", "w"); + + uint32_t n = 0; + + for (int inten = 0; inten < 8; inten++) + { + for (uint32_t g = 0; g < 32; g++) + { + color32 block_colors[4]; + decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten); + + for (uint32_t sr = 0; sr < NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES; sr++) + { + const uint32_t low_selector = g_etc1_to_bc7_m5a_selector_ranges[sr].m_low; + const uint32_t high_selector = g_etc1_to_bc7_m5a_selector_ranges[sr].m_high; + + uint32_t best_lo = 0; + uint32_t best_hi = 0; + uint64_t best_err = UINT64_MAX; + uint32_t best_output_selectors = 0; + + for (uint32_t hi = 0; hi <= 255; hi++) + { + for (uint32_t lo = 0; lo <= 255; lo++) + { + uint32_t colors[4]; + + colors[0] = lo; + colors[3] = hi; + + colors[1] = (colors[0] * (64 - 21) + colors[3] * 21 + 32) / 64; + colors[2] = (colors[0] * (64 - 43) + colors[3] * 43 + 32) / 64; + + uint64_t total_err = 0; + uint32_t output_selectors = 0; + + for (uint32_t s = low_selector; s <= high_selector; s++) + { + int best_mapping_err = INT_MAX; + int best_k = 0; + for (int k = 0; k < 4; k++) + { + int mapping_err = block_colors[s].g - colors[k]; + mapping_err *= mapping_err; + + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // the low/high selectors which are clamping to either 0 or 255. + if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) + mapping_err *= 5; + + if (mapping_err < best_mapping_err) + { + best_mapping_err = mapping_err; + best_k = k; + } + } // k + + total_err += best_mapping_err; + output_selectors |= (best_k << (s * 2)); + } // s + + if (total_err < best_err) + { + best_err = total_err; + best_lo = lo; + best_hi = hi; + best_output_selectors = output_selectors; + } + + } // lo + } // hi + + fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, best_output_selectors); + n++; + if ((n & 31) == 31) + fprintf(pFile, "\n"); + + } // sr + } // g + } // inten + + fclose(pFile); + } +#endif // BASISD_WRITE_NEW_BC7_MODE5_TABLES + + struct bc7_m5_match_entry + { + uint8_t m_hi; + uint8_t m_lo; + }; + + static bc7_m5_match_entry g_bc7_m5_equals_1[256] = + { + {0,0},{1,0},{3,0},{4,0},{6,0},{7,0},{9,0},{10,0},{12,0},{13,0},{15,0},{16,0},{18,0},{20,0},{21,0},{23,0}, + {24,0},{26,0},{27,0},{29,0},{30,0},{32,0},{33,0},{35,0},{36,0},{38,0},{39,0},{41,0},{42,0},{44,0},{45,0},{47,0}, + {48,0},{50,0},{52,0},{53,0},{55,0},{56,0},{58,0},{59,0},{61,0},{62,0},{64,0},{65,0},{66,0},{68,0},{69,0},{71,0}, + {72,0},{74,0},{75,0},{77,0},{78,0},{80,0},{82,0},{83,0},{85,0},{86,0},{88,0},{89,0},{91,0},{92,0},{94,0},{95,0}, + {97,0},{98,0},{100,0},{101,0},{103,0},{104,0},{106,0},{107,0},{109,0},{110,0},{112,0},{114,0},{115,0},{117,0},{118,0},{120,0}, + {121,0},{123,0},{124,0},{126,0},{127,0},{127,1},{126,2},{126,3},{127,3},{127,4},{126,5},{126,6},{127,6},{127,7},{126,8},{126,9}, + {127,9},{127,10},{126,11},{126,12},{127,12},{127,13},{126,14},{125,15},{127,15},{126,16},{126,17},{127,17},{127,18},{126,19},{126,20},{127,20}, + {127,21},{126,22},{126,23},{127,23},{127,24},{126,25},{126,26},{127,26},{127,27},{126,28},{126,29},{127,29},{127,30},{126,31},{126,32},{127,32}, + {127,33},{126,34},{126,35},{127,35},{127,36},{126,37},{126,38},{127,38},{127,39},{126,40},{126,41},{127,41},{127,42},{126,43},{126,44},{127,44}, + {127,45},{126,46},{125,47},{127,47},{126,48},{126,49},{127,49},{127,50},{126,51},{126,52},{127,52},{127,53},{126,54},{126,55},{127,55},{127,56}, + {126,57},{126,58},{127,58},{127,59},{126,60},{126,61},{127,61},{127,62},{126,63},{125,64},{126,64},{126,65},{127,65},{127,66},{126,67},{126,68}, + {127,68},{127,69},{126,70},{126,71},{127,71},{127,72},{126,73},{126,74},{127,74},{127,75},{126,76},{125,77},{127,77},{126,78},{126,79},{127,79}, + {127,80},{126,81},{126,82},{127,82},{127,83},{126,84},{126,85},{127,85},{127,86},{126,87},{126,88},{127,88},{127,89},{126,90},{126,91},{127,91}, + {127,92},{126,93},{126,94},{127,94},{127,95},{126,96},{126,97},{127,97},{127,98},{126,99},{126,100},{127,100},{127,101},{126,102},{126,103},{127,103}, + {127,104},{126,105},{126,106},{127,106},{127,107},{126,108},{125,109},{127,109},{126,110},{126,111},{127,111},{127,112},{126,113},{126,114},{127,114},{127,115}, + {126,116},{126,117},{127,117},{127,118},{126,119},{126,120},{127,120},{127,121},{126,122},{126,123},{127,123},{127,124},{126,125},{126,126},{127,126},{127,127} + }; + + static void transcoder_init_bc7_mode5() + { +#if 0 + // This is a little too much work to do at init time, so precompute it. + for (int i = 0; i < 256; i++) + { + int lowest_e = 256; + for (int lo = 0; lo < 128; lo++) + { + for (int hi = 0; hi < 128; hi++) + { + const int lo_e = (lo << 1) | (lo >> 6); + const int hi_e = (hi << 1) | (hi >> 6); + + // Selector 1 + int v = (lo_e * (64 - 21) + hi_e * 21 + 32) >> 6; + int e = abs(v - i); + + if (e < lowest_e) + { + g_bc7_m5_equals_1[i].m_hi = static_cast(hi); + g_bc7_m5_equals_1[i].m_lo = static_cast(lo); + + lowest_e = e; + } + + } // hi + + } // lo + + printf("{%u,%u},", g_bc7_m5_equals_1[i].m_hi, g_bc7_m5_equals_1[i].m_lo); + if ((i & 15) == 15) printf("\n"); + } +#endif + + for (uint32_t i = 0; i < NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES; i++) + { + uint32_t l = g_etc1_to_bc7_m5_selector_ranges[i].m_low; + uint32_t h = g_etc1_to_bc7_m5_selector_ranges[i].m_high; + g_etc1_to_bc7_m5_selector_range_index[l][h] = i; + } + + for (uint32_t i = 0; i < NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES; i++) + { + uint32_t l = g_etc1_to_bc7_m5a_selector_ranges[i].m_low; + uint32_t h = g_etc1_to_bc7_m5a_selector_ranges[i].m_high; + g_etc1_to_bc7_m5a_selector_range_index[l][h] = i; + } + } + + static void convert_etc1s_to_bc7_m5_color(void* pDst, const endpoint* pEndpoints, const selector* pSelector) + { + bc7_mode_5* pDst_block = static_cast(pDst); + + // First ensure the block is cleared to all 0's + static_cast(pDst)[0] = 0; + static_cast(pDst)[1] = 0; + + // Set alpha to 255 + pDst_block->m_lo.m_mode = 1 << 5; + pDst_block->m_lo.m_a0 = 255; + pDst_block->m_lo.m_a1_0 = 63; + pDst_block->m_hi.m_a1_1 = 3; + + const uint32_t low_selector = pSelector->m_lo_selector; + const uint32_t high_selector = pSelector->m_hi_selector; + + const uint32_t base_color_r = pEndpoints->m_color5.r; + const uint32_t base_color_g = pEndpoints->m_color5.g; + const uint32_t base_color_b = pEndpoints->m_color5.b; + const uint32_t inten_table = pEndpoints->m_inten5; + + if (pSelector->m_num_unique_selectors == 1) + { + // Solid color block - use precomputed tables and set selectors to 1. + uint32_t r, g, b; + decoder_etc_block::get_block_color5(pEndpoints->m_color5, inten_table, low_selector, r, g, b); + + pDst_block->m_lo.m_r0 = g_bc7_m5_equals_1[r].m_lo; + pDst_block->m_lo.m_g0 = g_bc7_m5_equals_1[g].m_lo; + pDst_block->m_lo.m_b0 = g_bc7_m5_equals_1[b].m_lo; + + pDst_block->m_lo.m_r1 = g_bc7_m5_equals_1[r].m_hi; + pDst_block->m_lo.m_g1 = g_bc7_m5_equals_1[g].m_hi; + pDst_block->m_lo.m_b1 = g_bc7_m5_equals_1[b].m_hi; + + set_block_bits((uint8_t*)pDst, 0x2aaaaaab, 31, 66); + return; + } + else if (pSelector->m_num_unique_selectors == 2) + { + // Only one or two unique selectors, so just switch to block truncation coding (BTC) to avoid quality issues on extreme blocks. + color32 block_colors[4]; + + decoder_etc_block::get_block_colors5(block_colors, color32(base_color_r, base_color_g, base_color_b, 255), inten_table); + + const uint32_t r0 = block_colors[low_selector].r; + const uint32_t g0 = block_colors[low_selector].g; + const uint32_t b0 = block_colors[low_selector].b; + + const uint32_t r1 = block_colors[high_selector].r; + const uint32_t g1 = block_colors[high_selector].g; + const uint32_t b1 = block_colors[high_selector].b; + + pDst_block->m_lo.m_r0 = r0 >> 1; + pDst_block->m_lo.m_g0 = g0 >> 1; + pDst_block->m_lo.m_b0 = b0 >> 1; + + pDst_block->m_lo.m_r1 = r1 >> 1; + pDst_block->m_lo.m_g1 = g1 >> 1; + pDst_block->m_lo.m_b1 = b1 >> 1; + + uint32_t output_low_selector = 0, output_bit_offset = 0, output_bits = 0; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t s = pSelector->get_selector(x, y); + uint32_t os = (s == low_selector) ? output_low_selector : (3 ^ output_low_selector); + + uint32_t num_bits = 2; + + if ((x | y) == 0) + { + if (os & 2) + { + pDst_block->m_lo.m_r0 = r1 >> 1; + pDst_block->m_lo.m_g0 = g1 >> 1; + pDst_block->m_lo.m_b0 = b1 >> 1; + + pDst_block->m_lo.m_r1 = r0 >> 1; + pDst_block->m_lo.m_g1 = g0 >> 1; + pDst_block->m_lo.m_b1 = b0 >> 1; + + output_low_selector = 3; + os = 0; + } + + num_bits = 1; + } + + output_bits |= (os << output_bit_offset); + output_bit_offset += num_bits; + } + } + + set_block_bits((uint8_t*)pDst, output_bits, 31, 66); + return; + } + + const uint32_t selector_range_table = g_etc1_to_bc7_m5_selector_range_index[low_selector][high_selector]; + + //[32][8][RANGES][MAPPING] + const etc1_to_bc7_m5_solution* pTable_r = &g_etc1_to_bc7_m5_color[(inten_table * 32 + base_color_r) * (NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS]; + const etc1_to_bc7_m5_solution* pTable_g = &g_etc1_to_bc7_m5_color[(inten_table * 32 + base_color_g) * (NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS]; + const etc1_to_bc7_m5_solution* pTable_b = &g_etc1_to_bc7_m5_color[(inten_table * 32 + base_color_b) * (NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS]; + + uint32_t best_err = UINT_MAX; + uint32_t best_mapping = 0; + + assert(NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS == 10); +#define DO_ITER(m) { uint32_t total_err = pTable_r[m].m_err + pTable_g[m].m_err + pTable_b[m].m_err; if (total_err < best_err) { best_err = total_err; best_mapping = m; } } + DO_ITER(0); DO_ITER(1); DO_ITER(2); DO_ITER(3); DO_ITER(4); + DO_ITER(5); DO_ITER(6); DO_ITER(7); DO_ITER(8); DO_ITER(9); +#undef DO_ITER + + const uint8_t* pSelectors_xlat = &g_etc1_to_bc7_m5_selector_mappings[best_mapping][0]; + + uint32_t s_inv = 0; + if (pSelectors_xlat[pSelector->get_selector(0, 0)] & 2) + { + pDst_block->m_lo.m_r0 = pTable_r[best_mapping].m_hi; + pDst_block->m_lo.m_g0 = pTable_g[best_mapping].m_hi; + pDst_block->m_lo.m_b0 = pTable_b[best_mapping].m_hi; + + pDst_block->m_lo.m_r1 = pTable_r[best_mapping].m_lo; + pDst_block->m_lo.m_g1 = pTable_g[best_mapping].m_lo; + pDst_block->m_lo.m_b1 = pTable_b[best_mapping].m_lo; + + s_inv = 3; + } + else + { + pDst_block->m_lo.m_r0 = pTable_r[best_mapping].m_lo; + pDst_block->m_lo.m_g0 = pTable_g[best_mapping].m_lo; + pDst_block->m_lo.m_b0 = pTable_b[best_mapping].m_lo; + + pDst_block->m_lo.m_r1 = pTable_r[best_mapping].m_hi; + pDst_block->m_lo.m_g1 = pTable_g[best_mapping].m_hi; + pDst_block->m_lo.m_b1 = pTable_b[best_mapping].m_hi; + } + + uint32_t output_bits = 0, output_bit_ofs = 0; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t s = pSelector->get_selector(x, y); + + const uint32_t os = pSelectors_xlat[s] ^ s_inv; + + output_bits |= (os << output_bit_ofs); + + output_bit_ofs += (((x | y) == 0) ? 1 : 2); + } + } + + set_block_bits((uint8_t*)pDst, output_bits, 31, 66); + } + + static void convert_etc1s_to_bc7_m5_alpha(void* pDst, const endpoint* pEndpoints, const selector* pSelector) + { + bc7_mode_5* pDst_block = static_cast(pDst); + + const uint32_t low_selector = pSelector->m_lo_selector; + const uint32_t high_selector = pSelector->m_hi_selector; + + const uint32_t base_color_r = pEndpoints->m_color5.r; + const uint32_t inten_table = pEndpoints->m_inten5; + + if (pSelector->m_num_unique_selectors == 1) + { + uint32_t r; + decoder_etc_block::get_block_color5_r(pEndpoints->m_color5, inten_table, low_selector, r); + + pDst_block->m_lo.m_a0 = r; + pDst_block->m_lo.m_a1_0 = r & 63; + pDst_block->m_hi.m_a1_1 = r >> 6; + + return; + } + else if (pSelector->m_num_unique_selectors == 2) + { + // Only one or two unique selectors, so just switch to block truncation coding (BTC) to avoid quality issues on extreme blocks. + int block_colors[4]; + + decoder_etc_block::get_block_colors5_g(block_colors, pEndpoints->m_color5, inten_table); + + pDst_block->m_lo.m_a0 = block_colors[low_selector]; + pDst_block->m_lo.m_a1_0 = block_colors[high_selector] & 63; + pDst_block->m_hi.m_a1_1 = block_colors[high_selector] >> 6; + + uint32_t output_low_selector = 0, output_bit_offset = 0, output_bits = 0; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t s = pSelector->get_selector(x, y); + uint32_t os = (s == low_selector) ? output_low_selector : (3 ^ output_low_selector); + + uint32_t num_bits = 2; + + if ((x | y) == 0) + { + if (os & 2) + { + pDst_block->m_lo.m_a0 = block_colors[high_selector]; + pDst_block->m_lo.m_a1_0 = block_colors[low_selector] & 63; + pDst_block->m_hi.m_a1_1 = block_colors[low_selector] >> 6; + + output_low_selector = 3; + os = 0; + } + + num_bits = 1; + } + + output_bits |= (os << output_bit_offset); + output_bit_offset += num_bits; + } + } + + set_block_bits((uint8_t*)pDst, output_bits, 31, 97); + return; + } + + const uint32_t selector_range_table = g_etc1_to_bc7_m5a_selector_range_index[low_selector][high_selector]; + + const etc1_g_to_bc7_m5a_conversion* pTable = &g_etc1_g_to_bc7_m5a[inten_table * (32 * NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES) + base_color_r * NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES + selector_range_table]; + + pDst_block->m_lo.m_a0 = pTable->m_lo; + pDst_block->m_lo.m_a1_0 = pTable->m_hi & 63; + pDst_block->m_hi.m_a1_1 = pTable->m_hi >> 6; + + uint32_t output_bit_offset = 0, output_bits = 0, selector_trans = pTable->m_trans; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t s = pSelector->get_selector(x, y); + uint32_t os = (selector_trans >> (s * 2)) & 3; + + uint32_t num_bits = 2; + + if ((x | y) == 0) + { + if (os & 2) + { + pDst_block->m_lo.m_a0 = pTable->m_hi; + pDst_block->m_lo.m_a1_0 = pTable->m_lo & 63; + pDst_block->m_hi.m_a1_1 = pTable->m_lo >> 6; + + selector_trans ^= 0xFF; + os ^= 3; + } + + num_bits = 1; + } + + output_bits |= (os << output_bit_offset); + output_bit_offset += num_bits; + } + } + + set_block_bits((uint8_t*)pDst, output_bits, 31, 97); + } + + static inline vec3F rgb_to_ycocg(const vec3F& rgb) + { + return vec3F(rgb.dot(vec3F(0.25f, 0.5f, 0.25f)), rgb.dot(vec3F(0.5f, 0.0f, -0.5f)), rgb.dot(vec3F(-0.25f, 0.5f, -0.25f))); + } + + static inline vec2F rgb_to_cocg(const vec3F& rgb) + { + return vec2F(rgb.dot(vec3F(0.5f, 0.0f, -0.5f)), rgb.dot(vec3F(-0.25f, 0.5f, -0.25f))); + } + + static inline vec3F ycocg_to_rgb(const vec3F& ycocg) + { + return vec3F(ycocg.dot(vec3F(1.0f, 1.0f, -1.0f)), ycocg.dot(vec3F(1.0f, 0.0f, 1.0f)), ycocg.dot(vec3F(1.0f, -1.0f, -1.0f))); + } + + static inline vec3F color32_to_vec3F(const color32& c) + { + return vec3F(c.r, c.g, c.b); + } + + static inline vec3F color5_to_ycocg(const endpoint& e) + { + const int r = (e.m_color5[0] << 3) | (e.m_color5[0] >> 2); + const int g = (e.m_color5[1] << 3) | (e.m_color5[1] >> 2); + const int b = (e.m_color5[2] << 3) | (e.m_color5[2] >> 2); + return rgb_to_ycocg(vec3F((float)r, (float)g, (float)b)); + } + + static inline vec2F color5_to_cocg(const endpoint& e) + { + const int r = (e.m_color5[0] << 3) | (e.m_color5[0] >> 2); + const int g = (e.m_color5[1] << 3) | (e.m_color5[1] >> 2); + const int b = (e.m_color5[2] << 3) | (e.m_color5[2] >> 2); + return rgb_to_cocg(vec3F((float)r, (float)g, (float)b)); + } + + static inline uint32_t bc7_7_to_8(uint32_t v) + { + assert(v < 128); + return (v << 1) | (v >> 6); + } + + static inline uint32_t bc7_interp2(uint32_t l, uint32_t h, uint32_t w) + { + assert(w < 4); + return (l * (64 - basist::g_bc7_weights2[w]) + h * basist::g_bc7_weights2[w] + 32) >> 6; + } + + static inline vec2F get_endpoint_cocg_clamped(int bx, int by, const basisu::vector2D& decoded_endpoints, const endpoint* pEndpoints) + { + const uint32_t endpoint_index = decoded_endpoints.at_clamped(bx, by); + return color5_to_cocg(pEndpoints[endpoint_index]); + } + + static void chroma_filter_bc7_mode5(const basisu::vector2D& decoded_endpoints, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t output_row_pitch_in_blocks_or_pixels, const endpoint *pEndpoints) + { + const bool hq_bc7_mode_5_encoder_mode = false; + + const int CHROMA_THRESH = 10; + + uint32_t total_filtered_blocks = 0; + BASISU_NOTE_UNUSED(total_filtered_blocks); + + for (int by = 0; by < (int)num_blocks_y; by++) + { + for (int bx = 0; bx < (int)num_blocks_x; bx++) + { + vec2F center_cocg(color5_to_cocg(pEndpoints[decoded_endpoints(bx, by)])); + + //bool filter_flag = false; + for (int dy = -1; dy <= 1; dy++) + { + const int oy = by + dy; + if ((oy < 0) || (oy >= (int)num_blocks_y)) + continue; + + for (int dx = -1; dx <= 1; dx++) + { + if ((dx | dy) == 0) + continue; + + const int ox = bx + dx; + if ((ox < 0) || (ox >= (int)num_blocks_x)) + continue; + + vec2F nearby_cocg(color5_to_cocg(pEndpoints[decoded_endpoints(ox, oy)])); + + float delta_co = fabsf(nearby_cocg[0] - center_cocg[0]); + float delta_cg = fabsf(nearby_cocg[1] - center_cocg[1]); + + if ((delta_co > CHROMA_THRESH) || (delta_cg > CHROMA_THRESH)) + { + //filter_flag = true; + goto do_filter; + } + + } // dx + } // dy + + continue; + + do_filter:; + + total_filtered_blocks++; + + bc7_mode_5* pDst_block = (bc7_mode_5*)(static_cast(pDst_blocks) + (bx + by * output_row_pitch_in_blocks_or_pixels) * sizeof(bc7_mode_5)); + + //memset(pDst_block, 0x80, 16); + + int lr = bc7_7_to_8(pDst_block->m_lo.m_r0); + int lg = bc7_7_to_8(pDst_block->m_lo.m_g0); + int lb = bc7_7_to_8(pDst_block->m_lo.m_b0); + + int hr = bc7_7_to_8(pDst_block->m_lo.m_r1); + int hg = bc7_7_to_8(pDst_block->m_lo.m_g1); + int hb = bc7_7_to_8(pDst_block->m_lo.m_b1); + + float y_vals[4]; + for (uint32_t i = 0; i < 4; i++) + { + int cr = bc7_interp2(lr, hr, i); + int cg = bc7_interp2(lg, hg, i); + int cb = bc7_interp2(lb, hb, i); + y_vals[i] = (float)cr * .25f + (float)cg * .5f + (float)cb * .25f; + } // i + + uint64_t sel_bits = pDst_block->m_hi_bits >> 2; + + float block_y_vals[16]; // [y][x] + float y_sum = 0.0f, y_sum_sq = 0.0f; + + for (uint32_t i = 0; i < 16; i++) + { + uint32_t sel = sel_bits & (i ? 3 : 1); + sel_bits >>= (i ? 2 : 1); + float y = y_vals[sel]; + block_y_vals[i] = y; + y_sum += y; + y_sum_sq += y * y; + + } // i + + const float S = 1.0f / 16.0f; + float y_var = (y_sum_sq * S) - basisu::squaref(y_sum * S); + + // Don't bother if the block is too smooth. + const float Y_VAR_SKIP_THRESH = 3.0f; + if (y_var < Y_VAR_SKIP_THRESH) + continue; + + color32 block_to_pack[16]; + + for (int bpy = 0; bpy < 4; bpy++) + { + const int uby = by + ((bpy - 2) >> 2); + + for (int bpx = 0; bpx < 4; bpx++) + { + const float fx = ((float)((bpx + 2) & 3) + .5f) * (1.0f / 4.0f); + const float fy = ((float)((bpy + 2) & 3) + .5f) * (1.0f / 4.0f); + + const int ubx = bx + ((bpx - 2) >> 2); + + vec2F a(get_endpoint_cocg_clamped(ubx, uby, decoded_endpoints, pEndpoints)); + vec2F b(get_endpoint_cocg_clamped(ubx + 1, uby, decoded_endpoints, pEndpoints)); + vec2F c(get_endpoint_cocg_clamped(ubx, uby + 1, decoded_endpoints, pEndpoints)); + vec2F d(get_endpoint_cocg_clamped(ubx + 1, uby + 1, decoded_endpoints, pEndpoints)); + + assert((fx >= 0) && (fx <= 1.0f) && (fy >= 0) && (fy <= 1.0f)); + + // TODO: Could merge this into 4 muls on each corner by weights + vec2F ab = vec2F::lerp(a, b, fx); + vec2F cd = vec2F::lerp(c, d, fx); + vec2F f = vec2F::lerp(ab, cd, fy); + + vec3F final_ycocg(block_y_vals[bpx + bpy * 4], f[0], f[1]); + + vec3F final_conv(ycocg_to_rgb(final_ycocg)); + final_conv.clamp(0.0f, 255.0f); + + block_to_pack[bpx + bpy * 4].set_noclamp_rgba((int)(.5f + final_conv[0]), (int)(.5f + final_conv[1]), (int)(.5f + final_conv[2]), 255); + + } // x + } // y + + bc7_mode_5_encoder::encode_bc7_mode_5_block(pDst_block, block_to_pack, hq_bc7_mode_5_encoder_mode); + + } // bx + } // by + + //basisu::fmt_printf("Chroma thresh: {}, Total blocks to filter: {} out of {} {}\n", CHROMA_THRESH, total_filtered_blocks, num_blocks_x * num_blocks_y, (float)total_filtered_blocks * 100.0f / (num_blocks_x * num_blocks_y)); + } +#endif // BASISD_SUPPORT_BC7_MODE5 + +#if BASISD_SUPPORT_ETC2_EAC_A8 || BASISD_SUPPORT_UASTC + static const uint8_t g_etc2_eac_a8_sel4[6] = { 0x92, 0x49, 0x24, 0x92, 0x49, 0x24 }; +#endif + +#if BASISD_SUPPORT_ETC2_EAC_A8 + static void convert_etc1s_to_etc2_eac_a8(eac_block* pDst_block, const endpoint* pEndpoints, const selector* pSelector) + { + const uint32_t low_selector = pSelector->m_lo_selector; + const uint32_t high_selector = pSelector->m_hi_selector; + + const color32& base_color = pEndpoints->m_color5; + const uint32_t inten_table = pEndpoints->m_inten5; + + if (low_selector == high_selector) + { + uint32_t r; + decoder_etc_block::get_block_color5_r(base_color, inten_table, low_selector, r); + + // Constant alpha block + // Select table 13, use selector 4 (0), set multiplier to 1 and base color g + pDst_block->m_base = r; + pDst_block->m_table = 13; + pDst_block->m_multiplier = 1; + + // selectors are all 4's + memcpy(pDst_block->m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4)); + + return; + } + + uint32_t selector_range_table = 0; + for (selector_range_table = 0; selector_range_table < NUM_ETC2_EAC_SELECTOR_RANGES; selector_range_table++) + if ((low_selector == s_etc2_eac_selector_ranges[selector_range_table].m_low) && (high_selector == s_etc2_eac_selector_ranges[selector_range_table].m_high)) + break; + if (selector_range_table >= NUM_ETC2_EAC_SELECTOR_RANGES) + selector_range_table = 0; + + const etc1_g_to_eac_conversion* pTable_entry = &s_etc1_g_to_etc2_a8[base_color.r + inten_table * 32][selector_range_table]; + + pDst_block->m_base = pTable_entry->m_base; + pDst_block->m_table = pTable_entry->m_table_mul >> 4; + pDst_block->m_multiplier = pTable_entry->m_table_mul & 15; + + uint64_t selector_bits = 0; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t s = pSelector->get_selector(x, y); + + uint32_t ds = (pTable_entry->m_trans >> (s * 3)) & 7; + + const uint32_t dst_ofs = 45 - (y + x * 4) * 3; + selector_bits |= (static_cast(ds) << dst_ofs); + } + } + + pDst_block->set_selector_bits(selector_bits); + } +#endif // BASISD_SUPPORT_ETC2_EAC_A8 + +#if BASISD_SUPPORT_ETC2_EAC_RG11 + static const etc1_g_to_eac_conversion s_etc1_g_to_etc2_r11[32 * 8][NUM_ETC2_EAC_SELECTOR_RANGES] = + { + {{0,1,3328},{0,1,3328},{0,16,457},{0,16,456}}, + {{0,226,3936},{0,226,3936},{0,17,424},{8,0,472}}, + {{6,178,4012},{6,178,4008},{0,146,501},{16,0,472}}, + {{14,178,4012},{14,178,4008},{8,146,501},{24,0,472}}, + {{23,178,4012},{23,178,4008},{17,146,501},{33,0,472}}, + {{31,178,4012},{31,178,4008},{25,146,501},{41,0,472}}, + {{39,178,4012},{39,178,4008},{33,146,501},{49,0,472}}, + {{47,178,4012},{47,178,4008},{41,146,501},{27,228,496}}, + {{56,178,4012},{56,178,4008},{50,146,501},{36,228,496}}, + {{64,178,4012},{64,178,4008},{58,146,501},{44,228,496}}, + {{72,178,4012},{72,178,4008},{66,146,501},{52,228,496}}, + {{80,178,4012},{80,178,4008},{74,146,501},{60,228,496}}, + {{89,178,4012},{89,178,4008},{83,146,501},{69,228,496}}, + {{97,178,4012},{97,178,4008},{91,146,501},{77,228,496}}, + {{105,178,4012},{105,178,4008},{99,146,501},{85,228,496}}, + {{113,178,4012},{113,178,4008},{107,146,501},{93,228,496}}, + {{122,178,4012},{122,178,4008},{116,146,501},{102,228,496}}, + {{130,178,4012},{130,178,4008},{124,146,501},{110,228,496}}, + {{138,178,4012},{138,178,4008},{132,146,501},{118,228,496}}, + {{146,178,4012},{146,178,4008},{140,146,501},{126,228,496}}, + {{155,178,4012},{155,178,4008},{149,146,501},{135,228,496}}, + {{163,178,4012},{163,178,4008},{157,146,501},{143,228,496}}, + {{171,178,4012},{171,178,4008},{165,146,501},{151,228,496}}, + {{179,178,4012},{179,178,4008},{173,146,501},{159,228,496}}, + {{188,178,4012},{188,178,4008},{182,146,501},{168,228,496}}, + {{196,178,4012},{196,178,4008},{190,146,501},{176,228,496}}, + {{204,178,4012},{204,178,4008},{198,146,501},{184,228,496}}, + {{212,178,4012},{212,178,4008},{206,146,501},{192,228,496}}, + {{221,178,4012},{221,178,4008},{215,146,501},{201,228,496}}, + {{229,178,4012},{229,178,4008},{223,146,501},{209,228,496}}, + {{235,66,4012},{221,100,4008},{231,146,501},{217,228,496}}, + {{211,102,4085},{254,32,4040},{211,102,501},{254,32,456}}, + {{0,2,3328},{0,2,3328},{0,1,320},{0,1,320}}, + {{7,162,3905},{7,162,3904},{0,17,480},{0,17,480}}, + {{15,162,3906},{15,162,3904},{1,117,352},{1,117,352}}, + {{23,162,3906},{23,162,3904},{5,34,500},{4,53,424}}, + {{32,162,3906},{32,162,3904},{14,34,500},{3,69,424}}, + {{40,162,3906},{40,162,3904},{22,34,500},{1,133,496}}, + {{48,162,3906},{48,162,3904},{30,34,500},{4,85,496}}, + {{56,162,3906},{56,162,3904},{38,34,500},{12,85,496}}, + {{65,162,3906},{65,162,3904},{47,34,500},{1,106,424}}, + {{73,162,3906},{73,162,3904},{55,34,500},{9,106,424}}, + {{81,162,3906},{81,162,3904},{63,34,500},{7,234,496}}, + {{89,162,3906},{89,162,3904},{71,34,500},{15,234,496}}, + {{98,162,3906},{98,162,3904},{80,34,500},{24,234,496}}, + {{106,162,3906},{106,162,3904},{88,34,500},{32,234,496}}, + {{114,162,3906},{114,162,3904},{96,34,500},{40,234,496}}, + {{122,162,3906},{122,162,3904},{104,34,500},{48,234,496}}, + {{131,162,3906},{131,162,3904},{113,34,500},{57,234,496}}, + {{139,162,3906},{139,162,3904},{121,34,500},{65,234,496}}, + {{147,162,3906},{147,162,3904},{129,34,500},{73,234,496}}, + {{155,162,3906},{155,162,3904},{137,34,500},{81,234,496}}, + {{164,162,3906},{164,162,3904},{146,34,500},{90,234,496}}, + {{172,162,3906},{172,162,3904},{154,34,500},{98,234,496}}, + {{180,162,3906},{180,162,3904},{162,34,500},{106,234,496}}, + {{188,162,3906},{188,162,3904},{170,34,500},{114,234,496}}, + {{197,162,3906},{197,162,3904},{179,34,500},{123,234,496}}, + {{205,162,3906},{205,162,3904},{187,34,500},{131,234,496}}, + {{213,162,3906},{213,162,3904},{195,34,500},{139,234,496}}, + {{221,162,3906},{221,162,3904},{203,34,500},{147,234,496}}, + {{230,162,3906},{230,162,3904},{212,34,500},{156,234,496}}, + {{238,162,3906},{174,106,4008},{220,34,500},{164,234,496}}, + {{240,178,4001},{182,106,4008},{228,34,500},{172,234,496}}, + {{166,108,4085},{115,31,4080},{166,108,501},{115,31,496}}, + {{1,68,3328},{1,68,3328},{0,1,384},{0,1,384}}, + {{1,51,3968},{1,51,3968},{0,2,384},{0,2,384}}, + {{21,18,3851},{21,18,3848},{1,50,488},{1,50,488}}, + {{26,195,3851},{29,18,3848},{0,67,488},{0,67,488}}, + {{35,195,3851},{38,18,3848},{12,115,488},{0,3,496}}, + {{43,195,3851},{46,18,3848},{20,115,488},{2,6,424}}, + {{51,195,3851},{54,18,3848},{36,66,482},{4,22,424}}, + {{59,195,3851},{62,18,3848},{44,66,482},{3,73,424}}, + {{68,195,3851},{71,18,3848},{53,66,482},{3,22,496}}, + {{76,195,3851},{79,18,3848},{61,66,482},{2,137,496}}, + {{84,195,3851},{87,18,3848},{69,66,482},{1,89,496}}, + {{92,195,3851},{95,18,3848},{77,66,482},{9,89,496}}, + {{101,195,3851},{104,18,3848},{86,66,482},{18,89,496}}, + {{109,195,3851},{112,18,3848},{94,66,482},{26,89,496}}, + {{117,195,3851},{120,18,3848},{102,66,482},{34,89,496}}, + {{125,195,3851},{128,18,3848},{110,66,482},{42,89,496}}, + {{134,195,3851},{137,18,3848},{119,66,482},{51,89,496}}, + {{141,195,3907},{145,18,3848},{127,66,482},{59,89,496}}, + {{149,195,3907},{153,18,3848},{135,66,482},{67,89,496}}, + {{157,195,3907},{161,18,3848},{143,66,482},{75,89,496}}, + {{166,195,3907},{170,18,3848},{152,66,482},{84,89,496}}, + {{174,195,3907},{178,18,3848},{160,66,482},{92,89,496}}, + {{182,195,3907},{186,18,3848},{168,66,482},{100,89,496}}, + {{190,195,3907},{194,18,3848},{176,66,482},{108,89,496}}, + {{199,195,3907},{203,18,3848},{185,66,482},{117,89,496}}, + {{207,195,3907},{211,18,3848},{193,66,482},{125,89,496}}, + {{215,195,3907},{219,18,3848},{201,66,482},{133,89,496}}, + {{223,195,3907},{227,18,3848},{209,66,482},{141,89,496}}, + {{232,195,3907},{168,89,4008},{218,66,482},{150,89,496}}, + {{236,18,3907},{176,89,4008},{226,66,482},{158,89,496}}, + {{158,90,4085},{103,31,4080},{158,90,501},{103,31,496}}, + {{166,90,4085},{111,31,4080},{166,90,501},{111,31,496}}, + {{0,70,3328},{0,70,3328},{0,17,448},{0,17,448}}, + {{0,117,3904},{0,117,3904},{0,35,384},{0,35,384}}, + {{13,165,3905},{13,165,3904},{2,211,480},{2,211,480}}, + {{21,165,3906},{21,165,3904},{1,51,488},{1,51,488}}, + {{30,165,3906},{30,165,3904},{7,61,352},{7,61,352}}, + {{38,165,3906},{38,165,3904},{2,125,352},{2,125,352}}, + {{46,165,3906},{46,165,3904},{1,37,500},{10,125,352}}, + {{54,165,3906},{54,165,3904},{9,37,500},{5,61,424}}, + {{63,165,3906},{63,165,3904},{18,37,500},{1,189,424}}, + {{71,165,3906},{71,165,3904},{26,37,500},{9,189,424}}, + {{79,165,3906},{79,165,3904},{34,37,500},{4,77,424}}, + {{87,165,3906},{87,165,3904},{42,37,500},{12,77,424}}, + {{96,165,3906},{96,165,3904},{51,37,500},{8,93,424}}, + {{104,165,3906},{104,165,3904},{59,37,500},{3,141,496}}, + {{112,165,3906},{112,165,3904},{68,37,500},{11,141,496}}, + {{120,165,3906},{120,165,3904},{76,37,500},{6,93,496}}, + {{129,165,3906},{129,165,3904},{85,37,500},{15,93,496}}, + {{70,254,4012},{137,165,3904},{93,37,500},{23,93,496}}, + {{145,165,3906},{145,165,3904},{101,37,500},{31,93,496}}, + {{86,254,4012},{153,165,3904},{109,37,500},{39,93,496}}, + {{163,165,3906},{162,165,3904},{118,37,500},{48,93,496}}, + {{171,165,3906},{170,165,3904},{126,37,500},{56,93,496}}, + {{179,165,3906},{178,165,3904},{134,37,500},{64,93,496}}, + {{187,165,3906},{187,165,3904},{142,37,500},{72,93,496}}, + {{196,165,3906},{196,165,3904},{151,37,500},{81,93,496}}, + {{204,165,3906},{204,165,3904},{159,37,500},{89,93,496}}, + {{212,165,3906},{136,77,4008},{167,37,500},{97,93,496}}, + {{220,165,3906},{131,93,4008},{175,37,500},{105,93,496}}, + {{214,181,4001},{140,93,4008},{184,37,500},{114,93,496}}, + {{222,181,4001},{148,93,4008},{192,37,500},{122,93,496}}, + {{115,95,4085},{99,31,4080},{115,95,501},{99,31,496}}, + {{123,95,4085},{107,31,4080},{123,95,501},{107,31,496}}, + {{0,102,3840},{0,102,3840},{0,18,384},{0,18,384}}, + {{5,167,3904},{5,167,3904},{0,13,256},{0,13,256}}, + {{4,54,3968},{4,54,3968},{1,67,448},{1,67,448}}, + {{30,198,3850},{30,198,3848},{0,3,480},{0,3,480}}, + {{39,198,3850},{39,198,3848},{3,52,488},{3,52,488}}, + {{47,198,3851},{47,198,3848},{3,4,488},{3,4,488}}, + {{55,198,3851},{55,198,3848},{1,70,488},{1,70,488}}, + {{53,167,3906},{63,198,3848},{3,22,488},{3,22,488}}, + {{62,167,3906},{72,198,3848},{24,118,488},{0,6,496}}, + {{70,167,3906},{80,198,3848},{32,118,488},{2,89,488}}, + {{78,167,3906},{88,198,3848},{40,118,488},{1,73,496}}, + {{86,167,3906},{96,198,3848},{48,118,488},{0,28,424}}, + {{95,167,3906},{105,198,3848},{57,118,488},{9,28,424}}, + {{103,167,3906},{113,198,3848},{65,118,488},{5,108,496}}, + {{111,167,3906},{121,198,3848},{73,118,488},{13,108,496}}, + {{119,167,3906},{129,198,3848},{81,118,488},{21,108,496}}, + {{128,167,3906},{138,198,3848},{90,118,488},{6,28,496}}, + {{136,167,3906},{146,198,3848},{98,118,488},{14,28,496}}, + {{145,167,3906},{154,198,3848},{106,118,488},{22,28,496}}, + {{153,167,3906},{162,198,3848},{114,118,488},{30,28,496}}, + {{162,167,3906},{171,198,3848},{123,118,488},{39,28,496}}, + {{170,167,3906},{179,198,3848},{131,118,488},{47,28,496}}, + {{178,167,3906},{187,198,3848},{139,118,488},{55,28,496}}, + {{186,167,3906},{195,198,3848},{147,118,488},{63,28,496}}, + {{194,167,3906},{120,12,4008},{156,118,488},{72,28,496}}, + {{206,198,3907},{116,28,4008},{164,118,488},{80,28,496}}, + {{214,198,3907},{124,28,4008},{172,118,488},{88,28,496}}, + {{222,198,3395},{132,28,4008},{180,118,488},{96,28,496}}, + {{207,134,4001},{141,28,4008},{189,118,488},{105,28,496}}, + {{95,30,4085},{86,31,4080},{95,30,501},{86,31,496}}, + {{103,30,4085},{94,31,4080},{103,30,501},{94,31,496}}, + {{111,30,4085},{102,31,4080},{111,30,501},{102,31,496}}, + {{0,104,3840},{0,104,3840},{0,18,448},{0,18,448}}, + {{4,39,3904},{4,39,3904},{0,4,384},{0,4,384}}, + {{0,56,3968},{0,56,3968},{0,84,448},{0,84,448}}, + {{6,110,3328},{6,110,3328},{0,20,448},{0,20,448}}, + {{41,200,3850},{41,200,3848},{1,4,480},{1,4,480}}, + {{49,200,3850},{49,200,3848},{1,8,416},{1,8,416}}, + {{57,200,3851},{57,200,3848},{1,38,488},{1,38,488}}, + {{65,200,3851},{65,200,3848},{1,120,488},{1,120,488}}, + {{74,200,3851},{74,200,3848},{2,72,488},{2,72,488}}, + {{68,6,3907},{82,200,3848},{2,24,488},{2,24,488}}, + {{77,6,3907},{90,200,3848},{26,120,488},{10,24,488}}, + {{97,63,3330},{98,200,3848},{34,120,488},{2,8,496}}, + {{106,63,3330},{107,200,3848},{43,120,488},{3,92,488}}, + {{114,63,3330},{115,200,3848},{51,120,488},{11,92,488}}, + {{122,63,3330},{123,200,3848},{59,120,488},{7,76,496}}, + {{130,63,3330},{131,200,3848},{67,120,488},{15,76,496}}, + {{139,63,3330},{140,200,3848},{76,120,488},{24,76,496}}, + {{147,63,3330},{148,200,3848},{84,120,488},{32,76,496}}, + {{155,63,3330},{156,200,3848},{92,120,488},{40,76,496}}, + {{164,63,3330},{164,200,3848},{100,120,488},{48,76,496}}, + {{173,63,3330},{173,200,3848},{109,120,488},{57,76,496}}, + {{184,6,3851},{181,200,3848},{117,120,488},{65,76,496}}, + {{192,6,3851},{133,28,3936},{125,120,488},{73,76,496}}, + {{189,200,3907},{141,28,3936},{133,120,488},{81,76,496}}, + {{198,200,3907},{138,108,4000},{142,120,488},{90,76,496}}, + {{206,200,3907},{146,108,4000},{150,120,488},{98,76,496}}, + {{214,200,3395},{154,108,4000},{158,120,488},{106,76,496}}, + {{190,136,4001},{162,108,4000},{166,120,488},{114,76,496}}, + {{123,30,4076},{87,15,4080},{123,30,492},{87,15,496}}, + {{117,110,4084},{80,31,4080},{117,110,500},{80,31,496}}, + {{125,110,4084},{88,31,4080},{125,110,500},{88,31,496}}, + {{133,110,4084},{96,31,4080},{133,110,500},{96,31,496}}, + {{9,56,3904},{9,56,3904},{0,67,448},{0,67,448}}, + {{1,8,3904},{1,8,3904},{1,84,448},{1,84,448}}, + {{1,124,3904},{1,124,3904},{0,39,384},{0,39,384}}, + {{9,124,3904},{9,124,3904},{1,4,448},{1,4,448}}, + {{6,76,3904},{6,76,3904},{0,70,448},{0,70,448}}, + {{62,6,3859},{62,6,3856},{2,38,480},{2,38,480}}, + {{70,6,3859},{70,6,3856},{5,43,416},{5,43,416}}, + {{78,6,3859},{78,6,3856},{2,11,416},{2,11,416}}, + {{87,6,3859},{87,6,3856},{0,171,488},{0,171,488}}, + {{67,8,3906},{95,6,3856},{8,171,488},{8,171,488}}, + {{75,8,3907},{103,6,3856},{5,123,488},{5,123,488}}, + {{83,8,3907},{111,6,3856},{2,75,488},{2,75,488}}, + {{92,8,3907},{120,6,3856},{0,27,488},{0,27,488}}, + {{100,8,3907},{128,6,3856},{8,27,488},{8,27,488}}, + {{120,106,3843},{136,6,3856},{99,6,387},{16,27,488}}, + {{128,106,3843},{144,6,3856},{107,6,387},{2,11,496}}, + {{137,106,3843},{153,6,3856},{117,6,387},{11,11,496}}, + {{145,106,3843},{161,6,3856},{125,6,387},{19,11,496}}, + {{163,8,3851},{137,43,3904},{133,6,387},{27,11,496}}, + {{171,8,3851},{145,43,3904},{141,6,387},{35,11,496}}, + {{180,8,3851},{110,11,4000},{150,6,387},{44,11,496}}, + {{188,8,3851},{118,11,4000},{158,6,387},{52,11,496}}, + {{172,72,3907},{126,11,4000},{166,6,387},{60,11,496}}, + {{174,6,3971},{134,11,4000},{174,6,387},{68,11,496}}, + {{183,6,3971},{143,11,4000},{183,6,387},{77,11,496}}, + {{191,6,3971},{151,11,4000},{191,6,387},{85,11,496}}, + {{199,6,3971},{159,11,4000},{199,6,387},{93,11,496}}, + {{92,12,4084},{69,15,4080},{92,12,500},{69,15,496}}, + {{101,12,4084},{78,15,4080},{101,12,500},{78,15,496}}, + {{110,12,4084},{86,15,4080},{110,12,500},{86,15,496}}, + {{118,12,4084},{79,31,4080},{118,12,500},{79,31,496}}, + {{126,12,4084},{87,31,4080},{126,12,500},{87,31,496}}, + {{71,8,3602},{71,8,3600},{2,21,384},{2,21,384}}, + {{79,8,3611},{79,8,3608},{0,69,448},{0,69,448}}, + {{87,8,3611},{87,8,3608},{0,23,384},{0,23,384}}, + {{95,8,3611},{95,8,3608},{1,5,448},{1,5,448}}, + {{104,8,3611},{104,8,3608},{0,88,448},{0,88,448}}, + {{112,8,3611},{112,8,3608},{0,72,448},{0,72,448}}, + {{120,8,3611},{121,8,3608},{36,21,458},{36,21,456}}, + {{133,47,3091},{129,8,3608},{44,21,458},{44,21,456}}, + {{142,47,3091},{138,8,3608},{53,21,459},{53,21,456}}, + {{98,12,3850},{98,12,3848},{61,21,459},{61,21,456}}, + {{106,12,3850},{106,12,3848},{10,92,480},{69,21,456}}, + {{114,12,3851},{114,12,3848},{18,92,480},{77,21,456}}, + {{123,12,3851},{123,12,3848},{3,44,488},{86,21,456}}, + {{95,12,3906},{95,12,3904},{11,44,488},{94,21,456}}, + {{103,12,3906},{103,12,3904},{19,44,488},{102,21,456}}, + {{111,12,3907},{111,12,3904},{27,44,489},{110,21,456}}, + {{120,12,3907},{120,12,3904},{36,44,489},{119,21,456}}, + {{128,12,3907},{128,12,3904},{44,44,489},{127,21,456}}, + {{136,12,3907},{136,12,3904},{52,44,489},{135,21,456}}, + {{144,12,3907},{144,12,3904},{60,44,490},{144,21,456}}, + {{153,12,3907},{153,12,3904},{69,44,490},{153,21,456}}, + {{161,12,3395},{149,188,3968},{77,44,490},{161,21,456}}, + {{169,12,3395},{199,21,3928},{85,44,490},{169,21,456}}, + {{113,95,4001},{202,69,3992},{125,8,483},{177,21,456}}, + {{122,95,4001},{201,21,3984},{134,8,483},{186,21,456}}, + {{143,8,4067},{209,21,3984},{142,8,483},{194,21,456}}, + {{151,8,4067},{47,15,4080},{151,8,483},{47,15,496}}, + {{159,8,4067},{55,15,4080},{159,8,483},{55,15,496}}, + {{168,8,4067},{64,15,4080},{168,8,483},{64,15,496}}, + {{160,40,4075},{72,15,4080},{160,40,491},{72,15,496}}, + {{168,40,4075},{80,15,4080},{168,40,491},{80,15,496}}, + {{144,8,4082},{88,15,4080},{144,8,498},{88,15,496}}, + }; + + static void convert_etc1s_to_etc2_eac_r11(eac_block* pDst_block, const endpoint* pEndpoints, const selector* pSelector) + { + const uint32_t low_selector = pSelector->m_lo_selector; + const uint32_t high_selector = pSelector->m_hi_selector; + + const color32& base_color = pEndpoints->m_color5; + const uint32_t inten_table = pEndpoints->m_inten5; + + if (low_selector == high_selector) + { + uint32_t r; + decoder_etc_block::get_block_color5_r(base_color, inten_table, low_selector, r); + + // Constant alpha block + // Select table 13, use selector 4 (0), set multiplier to 1 and base color r + pDst_block->m_base = r; + pDst_block->m_table = 13; + pDst_block->m_multiplier = 1; + + // selectors are all 4's + static const uint8_t s_etc2_eac_r11_sel4[6] = { 0x92, 0x49, 0x24, 0x92, 0x49, 0x24 }; + memcpy(pDst_block->m_selectors, s_etc2_eac_r11_sel4, sizeof(s_etc2_eac_r11_sel4)); + + return; + } + + uint32_t selector_range_table = 0; + for (selector_range_table = 0; selector_range_table < NUM_ETC2_EAC_SELECTOR_RANGES; selector_range_table++) + if ((low_selector == s_etc2_eac_selector_ranges[selector_range_table].m_low) && (high_selector == s_etc2_eac_selector_ranges[selector_range_table].m_high)) + break; + if (selector_range_table >= NUM_ETC2_EAC_SELECTOR_RANGES) + selector_range_table = 0; + + const etc1_g_to_eac_conversion* pTable_entry = &s_etc1_g_to_etc2_r11[base_color.r + inten_table * 32][selector_range_table]; + + pDst_block->m_base = pTable_entry->m_base; + pDst_block->m_table = pTable_entry->m_table_mul >> 4; + pDst_block->m_multiplier = pTable_entry->m_table_mul & 15; + + uint64_t selector_bits = 0; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t s = pSelector->get_selector(x, y); + + uint32_t ds = (pTable_entry->m_trans >> (s * 3)) & 7; + + const uint32_t dst_ofs = 45 - (y + x * 4) * 3; + selector_bits |= (static_cast(ds) << dst_ofs); + } + } + + pDst_block->set_selector_bits(selector_bits); + } +#endif // BASISD_SUPPORT_ETC2_EAC_RG11 + +// ASTC + struct etc1_to_astc_solution + { + uint8_t m_lo; + uint8_t m_hi; + uint16_t m_err; + }; + +#if BASISD_SUPPORT_ASTC + static dxt_selector_range g_etc1_to_astc_selector_ranges[] = + { + { 0, 3 }, + + { 1, 3 }, + { 0, 2 }, + + { 1, 2 }, + + { 2, 3 }, + { 0, 1 }, + }; + + const uint32_t NUM_ETC1_TO_ASTC_SELECTOR_RANGES = sizeof(g_etc1_to_astc_selector_ranges) / sizeof(g_etc1_to_astc_selector_ranges[0]); + + static uint32_t g_etc1_to_astc_selector_range_index[4][4]; + + const uint32_t NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS = 10; + static const uint8_t g_etc1_to_astc_selector_mappings[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS][4] = + { + { 0, 0, 1, 1 }, + { 0, 0, 1, 2 }, + { 0, 0, 1, 3 }, + { 0, 0, 2, 3 }, + { 0, 1, 1, 1 }, + { 0, 1, 2, 2 }, + { 0, 1, 2, 3 }, + { 0, 2, 3, 3 }, + { 1, 2, 2, 2 }, + { 1, 2, 3, 3 }, + }; + + static const etc1_to_astc_solution g_etc1_to_astc[32 * 8 * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS * NUM_ETC1_TO_ASTC_SELECTOR_RANGES] = { +#include "basisu_transcoder_tables_astc.inc" + }; + + // The best selector mapping to use given a base base+inten table and used selector range for converting grayscale data. + static uint8_t g_etc1_to_astc_best_grayscale_mapping[32][8][NUM_ETC1_TO_ASTC_SELECTOR_RANGES]; + +#if BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY + static const etc1_to_astc_solution g_etc1_to_astc_0_255[32 * 8 * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS * NUM_ETC1_TO_ASTC_SELECTOR_RANGES] = { +#include "basisu_transcoder_tables_astc_0_255.inc" + }; + static uint8_t g_etc1_to_astc_best_grayscale_mapping_0_255[32][8][NUM_ETC1_TO_ASTC_SELECTOR_RANGES]; +#endif + + static uint32_t g_ise_to_unquant[48]; + +#if BASISD_WRITE_NEW_ASTC_TABLES + static void create_etc1_to_astc_conversion_table_0_47() + { + FILE* pFile = nullptr; + fopen_s(&pFile, "basisu_transcoder_tables_astc.inc", "w"); + + uint32_t n = 0; + + for (int inten = 0; inten < 8; inten++) + { + for (uint32_t g = 0; g < 32; g++) + { + color32 block_colors[4]; + decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten); + + for (uint32_t sr = 0; sr < NUM_ETC1_TO_ASTC_SELECTOR_RANGES; sr++) + { + const uint32_t low_selector = g_etc1_to_astc_selector_ranges[sr].m_low; + const uint32_t high_selector = g_etc1_to_astc_selector_ranges[sr].m_high; + + uint32_t mapping_best_low[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS]; + uint32_t mapping_best_high[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS]; + uint64_t mapping_best_err[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS]; + uint64_t highest_best_err = 0; + + for (uint32_t m = 0; m < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; m++) + { + uint32_t best_lo = 0; + uint32_t best_hi = 0; + uint64_t best_err = UINT64_MAX; + + for (uint32_t hi = 0; hi <= 47; hi++) + { + for (uint32_t lo = 0; lo <= 47; lo++) + { + uint32_t colors[4]; + + for (uint32_t s = 0; s < 4; s++) + { + uint32_t s_scaled = s | (s << 2) | (s << 4); + if (s_scaled > 32) + s_scaled++; + + uint32_t c0 = g_ise_to_unquant[lo] | (g_ise_to_unquant[lo] << 8); + uint32_t c1 = g_ise_to_unquant[hi] | (g_ise_to_unquant[hi] << 8); + colors[s] = ((c0 * (64 - s_scaled) + c1 * s_scaled + 32) / 64) >> 8; + } + + uint64_t total_err = 0; + + for (uint32_t s = low_selector; s <= high_selector; s++) + { + int err = block_colors[s].g - colors[g_etc1_to_astc_selector_mappings[m][s]]; + + int err_scale = 1; + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // the low/high selectors which are clamping to either 0 or 255. + if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) + err_scale = 8; + + total_err += (err * err) * err_scale; + } + + if (total_err < best_err) + { + best_err = total_err; + best_lo = lo; + best_hi = hi; + } + } + } + + mapping_best_low[m] = best_lo; + mapping_best_high[m] = best_hi; + mapping_best_err[m] = best_err; + highest_best_err = basisu::maximum(highest_best_err, best_err); + + } // m + + for (uint32_t m = 0; m < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; m++) + { + uint64_t err = mapping_best_err[m]; + + err = basisu::minimum(err, 0xFFFF); + + fprintf(pFile, "{%u,%u,%u},", mapping_best_low[m], mapping_best_high[m], (uint32_t)err); + + n++; + if ((n & 31) == 31) + fprintf(pFile, "\n"); + } // m + + } // sr + } // g + } // inten + + fclose(pFile); + } + + static void create_etc1_to_astc_conversion_table_0_255() + { + FILE* pFile = nullptr; + fopen_s(&pFile, "basisu_transcoder_tables_astc_0_255.inc", "w"); + + uint32_t n = 0; + + for (int inten = 0; inten < 8; inten++) + { + for (uint32_t g = 0; g < 32; g++) + { + color32 block_colors[4]; + decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten); + + for (uint32_t sr = 0; sr < NUM_ETC1_TO_ASTC_SELECTOR_RANGES; sr++) + { + const uint32_t low_selector = g_etc1_to_astc_selector_ranges[sr].m_low; + const uint32_t high_selector = g_etc1_to_astc_selector_ranges[sr].m_high; + + uint32_t mapping_best_low[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS]; + uint32_t mapping_best_high[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS]; + uint64_t mapping_best_err[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS]; + uint64_t highest_best_err = 0; + + for (uint32_t m = 0; m < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; m++) + { + uint32_t best_lo = 0; + uint32_t best_hi = 0; + uint64_t best_err = UINT64_MAX; + + for (uint32_t hi = 0; hi <= 255; hi++) + { + for (uint32_t lo = 0; lo <= 255; lo++) + { + uint32_t colors[4]; + + for (uint32_t s = 0; s < 4; s++) + { + uint32_t s_scaled = s | (s << 2) | (s << 4); + if (s_scaled > 32) + s_scaled++; + + uint32_t c0 = lo | (lo << 8); + uint32_t c1 = hi | (hi << 8); + colors[s] = ((c0 * (64 - s_scaled) + c1 * s_scaled + 32) / 64) >> 8; + } + + uint64_t total_err = 0; + + for (uint32_t s = low_selector; s <= high_selector; s++) + { + int err = block_colors[s].g - colors[g_etc1_to_astc_selector_mappings[m][s]]; + + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // the low/high selectors which are clamping to either 0 or 255. + int err_scale = 1; + if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) + err_scale = 8; + + total_err += (err * err) * err_scale; + } + + if (total_err < best_err) + { + best_err = total_err; + best_lo = lo; + best_hi = hi; + } + } + } + + mapping_best_low[m] = best_lo; + mapping_best_high[m] = best_hi; + mapping_best_err[m] = best_err; + highest_best_err = basisu::maximum(highest_best_err, best_err); + } // m + + for (uint32_t m = 0; m < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; m++) + { + uint64_t err = mapping_best_err[m]; + + err = basisu::minimum(err, 0xFFFF); + + fprintf(pFile, "{%u,%u,%u},", mapping_best_low[m], mapping_best_high[m], (uint32_t)err); + + n++; + if ((n & 31) == 31) + fprintf(pFile, "\n"); + } // m + + } // sr + } // g + } // inten + + fclose(pFile); + } +#endif + +#endif + +#if BASISD_SUPPORT_UASTC || BASISD_SUPPORT_ASTC + // Table encodes 5 trits to 8 output bits. 3^5 entries. + // Inverse of the trit bit manipulation process in https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding + static const uint8_t g_astc_trit_encode[243] = { 0, 1, 2, 4, 5, 6, 8, 9, 10, 16, 17, 18, 20, 21, 22, 24, 25, 26, 3, 7, 11, 19, 23, 27, 12, 13, 14, 32, 33, 34, 36, 37, 38, 40, 41, 42, 48, 49, 50, 52, 53, 54, 56, 57, 58, 35, 39, + 43, 51, 55, 59, 44, 45, 46, 64, 65, 66, 68, 69, 70, 72, 73, 74, 80, 81, 82, 84, 85, 86, 88, 89, 90, 67, 71, 75, 83, 87, 91, 76, 77, 78, 128, 129, 130, 132, 133, 134, 136, 137, 138, 144, 145, 146, 148, 149, 150, 152, 153, 154, + 131, 135, 139, 147, 151, 155, 140, 141, 142, 160, 161, 162, 164, 165, 166, 168, 169, 170, 176, 177, 178, 180, 181, 182, 184, 185, 186, 163, 167, 171, 179, 183, 187, 172, 173, 174, 192, 193, 194, 196, 197, 198, 200, 201, 202, + 208, 209, 210, 212, 213, 214, 216, 217, 218, 195, 199, 203, 211, 215, 219, 204, 205, 206, 96, 97, 98, 100, 101, 102, 104, 105, 106, 112, 113, 114, 116, 117, 118, 120, 121, 122, 99, 103, 107, 115, 119, 123, 108, 109, 110, 224, + 225, 226, 228, 229, 230, 232, 233, 234, 240, 241, 242, 244, 245, 246, 248, 249, 250, 227, 231, 235, 243, 247, 251, 236, 237, 238, 28, 29, 30, 60, 61, 62, 92, 93, 94, 156, 157, 158, 188, 189, 190, 220, 221, 222, 31, 63, 95, 159, + 191, 223, 124, 125, 126 }; + + // Extracts bits [low,high] + static inline uint32_t astc_extract_bits(uint32_t bits, int low, int high) + { + return (bits >> low) & ((1 << (high - low + 1)) - 1); + } + + // Writes bits to output in an endian safe way + static inline void astc_set_bits(uint32_t* pOutput, int& bit_pos, uint32_t value, uint32_t total_bits) + { + uint8_t* pBytes = reinterpret_cast(pOutput); + + while (total_bits) + { + const uint32_t bits_to_write = basisu::minimum(total_bits, 8 - (bit_pos & 7)); + + pBytes[bit_pos >> 3] |= static_cast(value << (bit_pos & 7)); + + bit_pos += bits_to_write; + total_bits -= bits_to_write; + value >>= bits_to_write; + } + } + + // Encodes 5 values to output, usable for any range that uses trits and bits + static void astc_encode_trits(uint32_t* pOutput, const uint8_t* pValues, int& bit_pos, int n) + { + // First extract the trits and the bits from the 5 input values + int trits = 0, bits[5]; + const uint32_t bit_mask = (1 << n) - 1; + for (int i = 0; i < 5; i++) + { + static const int s_muls[5] = { 1, 3, 9, 27, 81 }; + + const int t = pValues[i] >> n; + + trits += t * s_muls[i]; + bits[i] = pValues[i] & bit_mask; + } + + // Encode the trits, by inverting the bit manipulations done by the decoder, converting 5 trits into 8-bits. + // See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding + + assert(trits < 243); + const int T = g_astc_trit_encode[trits]; + + // Now interleave the 8 encoded trit bits with the bits to form the encoded output. See table 94. + astc_set_bits(pOutput, bit_pos, bits[0] | (astc_extract_bits(T, 0, 1) << n) | (bits[1] << (2 + n)), n * 2 + 2); + + astc_set_bits(pOutput, bit_pos, astc_extract_bits(T, 2, 3) | (bits[2] << 2) | (astc_extract_bits(T, 4, 4) << (2 + n)) | (bits[3] << (3 + n)) | (astc_extract_bits(T, 5, 6) << (3 + n * 2)) | + (bits[4] << (5 + n * 2)) | (astc_extract_bits(T, 7, 7) << (5 + n * 3)), n * 3 + 6); + } +#endif // #if BASISD_SUPPORT_UASTC || BASISD_SUPPORT_ASTC + +#if BASISD_SUPPORT_ASTC + struct astc_block_params + { + // 2 groups of 5, but only a max of 8 are used (RRGGBBAA00) + uint8_t m_endpoints[10]; + uint8_t m_weights[32]; + }; + + // Packs a single format ASTC block using Color Endpoint Mode 12 (LDR RGBA direct), endpoint BISE range 13, 2-bit weights (range 2). + // We're always going to output blocks containing alpha, even if the input doesn't have alpha, for simplicity. + // Each block always has 4x4 weights, uses range 13 BISE encoding on the endpoints (0-47), and each weight ranges from 0-3. This encoding should be roughly equal in quality vs. BC1 for color. + // 8 total endpoints, stored as RGBA LH LH LH LH order, each ranging from 0-47. + // Note the input [0,47] endpoint values are not linear - they are encoded as outlined in the ASTC spec: + // https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-endpoint-unquantization + // 32 total weights, stored as 16 CA CA, each ranging from 0-3. + static void astc_pack_block_cem_12_weight_range2(uint32_t *pOutput, const astc_block_params* pBlock) + { + uint8_t* pBytes = reinterpret_cast(pOutput); + + // Write constant block mode, color component selector, number of partitions, color endpoint mode + // https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#_block_mode + pBytes[0] = 0x42; pBytes[1] = 0x84; pBytes[2] = 0x01; pBytes[3] = 0x00; + pBytes[4] = 0x00; pBytes[5] = 0x00; pBytes[6] = 0x00; pBytes[7] = 0xc0; + + pOutput[2] = 0; + pOutput[3] = 0; + + // Pack 8 endpoints (each ranging between [0,47]) using BISE starting at bit 17 + int bit_pos = 17; + astc_encode_trits(pOutput, pBlock->m_endpoints, bit_pos, 4); + astc_encode_trits(pOutput, pBlock->m_endpoints + 5, bit_pos, 4); + + // Pack 32 2-bit weights, which are stored from the top down into the block in opposite bit order. + + for (uint32_t i = 0; i < 32; i++) + { + static const uint8_t s_reverse_bits[4] = { 0, 2, 1, 3 }; + const uint32_t ofs = 126 - (i * 2); + pBytes[ofs >> 3] |= (s_reverse_bits[pBlock->m_weights[i]] << (ofs & 7)); + } + } + + // CEM mode 12 (LDR RGBA Direct), 8-bit endpoints, 1-bit weights + // This ASTC mode is basically block truncation coding (BTC) using 1-bit weights and 8-bit/component endpoints - very convenient. + static void astc_pack_block_cem_12_weight_range0(uint32_t* pOutput, const astc_block_params* pBlock) + { + uint8_t* pBytes = reinterpret_cast(pOutput); + + // Write constant block mode, color component selector, number of partitions, color endpoint mode + // https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#_block_mode + pBytes[0] = 0x41; pBytes[1] = 0x84; pBytes[2] = 0x01; pBytes[3] = 0x00; + pOutput[1] = 0; + pBytes[8] = 0x00; pBytes[9] = 0x00; pBytes[10] = 0x00; pBytes[11] = 0xc0; + pOutput[3] = 0; + + // Pack 8 endpoints (each ranging between [0,255]) as 8-bits starting at bit 17 + int bit_pos = 17; + for (uint32_t i = 0; i < 8; i++) + astc_set_bits(pOutput, bit_pos, pBlock->m_endpoints[i], 8); + + // Pack 32 1-bit weights, which are stored from the top down into the block in opposite bit order. + for (uint32_t i = 0; i < 32; i++) + { + const uint32_t ofs = 127 - i; + pBytes[ofs >> 3] |= (pBlock->m_weights[i] << (ofs & 7)); + } + } + +#if BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY + // Optional 8-bit endpoint packing functions. + + // CEM mode 4 (LDR Luminance+Alpha Direct), 8-bit endpoints, 2 bit weights + static void astc_pack_block_cem_4_weight_range2(uint32_t* pOutput, const astc_block_params* pBlock) + { + uint8_t* pBytes = reinterpret_cast(pOutput); + + // Write constant block mode, color component selector, number of partitions, color endpoint mode + // https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#_block_mode + pBytes[0] = 0x42; pBytes[1] = 0x84; pBytes[2] = 0x00; pBytes[3] = 0x00; + pBytes[4] = 0x00; pBytes[5] = 0x00; pBytes[6] = 0x00; pBytes[7] = 0xc0; + + pOutput[2] = 0; + pOutput[3] = 0; + + // Pack 4 endpoints (each ranging between [0,255]) as 8-bits starting at bit 17 + int bit_pos = 17; + for (uint32_t i = 0; i < 4; i++) + astc_set_bits(pOutput, bit_pos, pBlock->m_endpoints[i], 8); + + // Pack 32 2-bit weights, which are stored from the top down into the block in opposite bit order. + for (uint32_t i = 0; i < 32; i++) + { + static const uint8_t s_reverse_bits[4] = { 0, 2, 1, 3 }; + const uint32_t ofs = 126 - (i * 2); + pBytes[ofs >> 3] |= (s_reverse_bits[pBlock->m_weights[i]] << (ofs & 7)); + } + } + + // CEM mode 8 (LDR RGB Direct), 8-bit endpoints, 2 bit weights + static void astc_pack_block_cem_8_weight_range2(uint32_t* pOutput, const astc_block_params* pBlock) + { + uint8_t* pBytes = reinterpret_cast(pOutput); + + // Write constant block mode, color component selector, number of partitions, color endpoint mode + // https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#_block_mode + pBytes[0] = 0x42; pBytes[1] = 0x00; pBytes[2] = 0x01; pBytes[3] = 0x00; + + pOutput[1] = 0; + pOutput[2] = 0; + pOutput[3] = 0; + + // Pack 6 endpoints (each ranging between [0,255]) as 8-bits starting at bit 17 + int bit_pos = 17; + for (uint32_t i = 0; i < 6; i++) + astc_set_bits(pOutput, bit_pos, pBlock->m_endpoints[i], 8); + + // Pack 16 2-bit weights, which are stored from the top down into the block in opposite bit order. + for (uint32_t i = 0; i < 16; i++) + { + static const uint8_t s_reverse_bits[4] = { 0, 2, 1, 3 }; + const uint32_t ofs = 126 - (i * 2); + pBytes[ofs >> 3] |= (s_reverse_bits[pBlock->m_weights[i]] << (ofs & 7)); + } + } +#endif + + // Optimal quantized [0,47] entry to use given [0,255] input + static uint8_t g_astc_single_color_encoding_0[256]; + + // Optimal quantized [0,47] low/high values given [0,255] input assuming a selector of 1 + static struct + { + uint8_t m_lo, m_hi; + } g_astc_single_color_encoding_1[256]; + + static void transcoder_init_astc() + { + for (uint32_t base_color = 0; base_color < 32; base_color++) + { + for (uint32_t inten_table = 0; inten_table < 8; inten_table++) + { + for (uint32_t range_index = 0; range_index < NUM_ETC1_TO_ASTC_SELECTOR_RANGES; range_index++) + { + const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc[(inten_table * 32 + base_color) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + range_index * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS]; + + uint32_t best_mapping = 0; + uint32_t best_err = UINT32_MAX; + for (uint32_t mapping_index = 0; mapping_index < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; mapping_index++) + { + if (pTable_g[mapping_index].m_err < best_err) + { + best_err = pTable_g[mapping_index].m_err; + best_mapping = mapping_index; + } + } + + g_etc1_to_astc_best_grayscale_mapping[base_color][inten_table][range_index] = static_cast(best_mapping); + } + } + } + +#if BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY + for (uint32_t base_color = 0; base_color < 32; base_color++) + { + for (uint32_t inten_table = 0; inten_table < 8; inten_table++) + { + for (uint32_t range_index = 0; range_index < NUM_ETC1_TO_ASTC_SELECTOR_RANGES; range_index++) + { + const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc_0_255[(inten_table * 32 + base_color) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + range_index * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS]; + + uint32_t best_mapping = 0; + uint32_t best_err = UINT32_MAX; + for (uint32_t mapping_index = 0; mapping_index < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; mapping_index++) + { + if (pTable_g[mapping_index].m_err < best_err) + { + best_err = pTable_g[mapping_index].m_err; + best_mapping = mapping_index; + } + } + + g_etc1_to_astc_best_grayscale_mapping_0_255[base_color][inten_table][range_index] = static_cast(best_mapping); + } + } + } +#endif + + for (uint32_t i = 0; i < NUM_ETC1_TO_ASTC_SELECTOR_RANGES; i++) + { + uint32_t l = g_etc1_to_astc_selector_ranges[i].m_low; + uint32_t h = g_etc1_to_astc_selector_ranges[i].m_high; + g_etc1_to_astc_selector_range_index[l][h] = i; + } + + // Endpoint dequantization, see: + // https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-endpoint-unquantization + for (uint32_t trit = 0; trit < 3; trit++) + { + for (uint32_t bit = 0; bit < 16; bit++) + { + const uint32_t A = (bit & 1) ? 511 : 0; + const uint32_t B = (bit >> 1) | ((bit >> 1) << 6); + const uint32_t C = 22; + const uint32_t D = trit; + + uint32_t unq = D * C + B; + unq = unq ^ A; + unq = (A & 0x80) | (unq >> 2); + + g_ise_to_unquant[bit | (trit << 4)] = unq; + } + } + + // Compute table used for optimal single color encoding. + for (int i = 0; i < 256; i++) + { + int lowest_e = INT_MAX; + + for (int lo = 0; lo < 48; lo++) + { + for (int hi = 0; hi < 48; hi++) + { + const int lo_v = g_ise_to_unquant[lo]; + const int hi_v = g_ise_to_unquant[hi]; + + int l = lo_v | (lo_v << 8); + int h = hi_v | (hi_v << 8); + + int v = ((l * (64 - 21) + (h * 21) + 32) / 64) >> 8; + + int e = abs(v - i); + + if (e < lowest_e) + { + g_astc_single_color_encoding_1[i].m_hi = static_cast(hi); + g_astc_single_color_encoding_1[i].m_lo = static_cast(lo); + + lowest_e = e; + } + + } // hi + } // lo + } + + for (int i = 0; i < 256; i++) + { + int lowest_e = INT_MAX; + + for (int lo = 0; lo < 48; lo++) + { + const int lo_v = g_ise_to_unquant[lo]; + + int e = abs(lo_v - i); + + if (e < lowest_e) + { + g_astc_single_color_encoding_0[i] = static_cast(lo); + + lowest_e = e; + } + } // lo + } + } + + // Converts opaque or color+alpha ETC1S block to ASTC 4x4. + // This function tries to use the best ASTC mode given the block's actual contents. + static void convert_etc1s_to_astc_4x4(void* pDst_block, const endpoint* pEndpoints, const selector* pSelector, + bool transcode_alpha, const endpoint *pEndpoint_codebook, const selector *pSelector_codebook) + { + astc_block_params blk; + + blk.m_endpoints[8] = 0; + blk.m_endpoints[9] = 0; + + int constant_alpha_val = 255; + int num_unique_alpha_selectors = 1; + + if (transcode_alpha) + { + const selector& alpha_selectors = pSelector_codebook[((uint16_t*)pDst_block)[1]]; + + num_unique_alpha_selectors = alpha_selectors.m_num_unique_selectors; + + if (num_unique_alpha_selectors == 1) + { + const endpoint& alpha_endpoint = pEndpoint_codebook[((uint16_t*)pDst_block)[0]]; + + const color32& alpha_base_color = alpha_endpoint.m_color5; + const uint32_t alpha_inten_table = alpha_endpoint.m_inten5; + + int alpha_block_colors[4]; + decoder_etc_block::get_block_colors5_g(alpha_block_colors, alpha_base_color, alpha_inten_table); + + constant_alpha_val = alpha_block_colors[alpha_selectors.m_lo_selector]; + } + } + + const color32& base_color = pEndpoints->m_color5; + const uint32_t inten_table = pEndpoints->m_inten5; + + const uint32_t low_selector = pSelector->m_lo_selector; + const uint32_t high_selector = pSelector->m_hi_selector; + + // Handle solid color or BTC blocks, which can always be encoded from ETC1S to ASTC losslessly. + if ((pSelector->m_num_unique_selectors == 1) && (num_unique_alpha_selectors == 1)) + { + // Both color and alpha are constant, write a solid color block and exit. + // See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-void-extent-blocks + uint32_t r, g, b; + decoder_etc_block::get_block_color5(base_color, inten_table, low_selector, r, g, b); + + uint32_t* pOutput = static_cast(pDst_block); + uint8_t* pBytes = reinterpret_cast(pDst_block); + + pBytes[0] = 0xfc; pBytes[1] = 0xfd; pBytes[2] = 0xff; pBytes[3] = 0xff; + + pOutput[1] = 0xffffffff; + pOutput[2] = 0; + pOutput[3] = 0; + + int bit_pos = 64; + astc_set_bits(pOutput, bit_pos, r | (r << 8), 16); + astc_set_bits(pOutput, bit_pos, g | (g << 8), 16); + astc_set_bits(pOutput, bit_pos, b | (b << 8), 16); + astc_set_bits(pOutput, bit_pos, constant_alpha_val | (constant_alpha_val << 8), 16); + + return; + } + else if ((pSelector->m_num_unique_selectors <= 2) && (num_unique_alpha_selectors <= 2)) + { + // Both color and alpha use <= 2 unique selectors each. + // Use block truncation coding, which is lossless with ASTC (8-bit endpoints, 1-bit weights). + color32 block_colors[4]; + decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table); + + blk.m_endpoints[0] = block_colors[low_selector].r; + blk.m_endpoints[2] = block_colors[low_selector].g; + blk.m_endpoints[4] = block_colors[low_selector].b; + + blk.m_endpoints[1] = block_colors[high_selector].r; + blk.m_endpoints[3] = block_colors[high_selector].g; + blk.m_endpoints[5] = block_colors[high_selector].b; + + int s0 = blk.m_endpoints[0] + blk.m_endpoints[2] + blk.m_endpoints[4]; + int s1 = blk.m_endpoints[1] + blk.m_endpoints[3] + blk.m_endpoints[5]; + bool invert = false; + if (s1 < s0) + { + std::swap(blk.m_endpoints[0], blk.m_endpoints[1]); + std::swap(blk.m_endpoints[2], blk.m_endpoints[3]); + std::swap(blk.m_endpoints[4], blk.m_endpoints[5]); + invert = true; + } + + if (transcode_alpha) + { + const endpoint& alpha_endpoint = pEndpoint_codebook[((uint16_t*)pDst_block)[0]]; + const selector& alpha_selectors = pSelector_codebook[((uint16_t*)pDst_block)[1]]; + + const color32& alpha_base_color = alpha_endpoint.m_color5; + const uint32_t alpha_inten_table = alpha_endpoint.m_inten5; + + const uint32_t alpha_low_selector = alpha_selectors.m_lo_selector; + const uint32_t alpha_high_selector = alpha_selectors.m_hi_selector; + + int alpha_block_colors[4]; + decoder_etc_block::get_block_colors5_g(alpha_block_colors, alpha_base_color, alpha_inten_table); + + blk.m_endpoints[6] = static_cast(alpha_block_colors[alpha_low_selector]); + blk.m_endpoints[7] = static_cast(alpha_block_colors[alpha_high_selector]); + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t s = alpha_selectors.get_selector(x, y); + s = (s == alpha_high_selector) ? 1 : 0; + + blk.m_weights[(x + y * 4) * 2 + 1] = static_cast(s); + } // x + } // y + } + else + { + blk.m_endpoints[6] = 255; + blk.m_endpoints[7] = 255; + + for (uint32_t i = 0; i < 16; i++) + blk.m_weights[i * 2 + 1] = 0; + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t s = pSelector->get_selector(x, y); + + s = (s == high_selector) ? 1 : 0; + + if (invert) + s = 1 - s; + + blk.m_weights[(x + y * 4) * 2] = static_cast(s); + } // x + } // y + + astc_pack_block_cem_12_weight_range0(reinterpret_cast(pDst_block), &blk); + + return; + } + + // Either alpha and/or color use > 2 unique selectors each, so we must do something more complex. + +#if BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY + // The optional higher quality modes use 8-bits endpoints vs. [0,47] endpoints. + + // If the block's base color is grayscale, all pixels are grayscale, so encode the block as Luminance+Alpha. + if ((base_color.r == base_color.g) && (base_color.r == base_color.b)) + { + if (transcode_alpha) + { + const endpoint& alpha_endpoint = pEndpoint_codebook[((uint16_t*)pDst_block)[0]]; + const selector& alpha_selectors = pSelector_codebook[((uint16_t*)pDst_block)[1]]; + + const color32& alpha_base_color = alpha_endpoint.m_color5; + const uint32_t alpha_inten_table = alpha_endpoint.m_inten5; + + const uint32_t alpha_low_selector = alpha_selectors.m_lo_selector; + const uint32_t alpha_high_selector = alpha_selectors.m_hi_selector; + + if (num_unique_alpha_selectors <= 2) + { + // Simple alpha block with only 1 or 2 unique values, so use BTC. This is lossless. + int alpha_block_colors[4]; + decoder_etc_block::get_block_colors5_g(alpha_block_colors, alpha_base_color, alpha_inten_table); + + blk.m_endpoints[2] = static_cast(alpha_block_colors[alpha_low_selector]); + blk.m_endpoints[3] = static_cast(alpha_block_colors[alpha_high_selector]); + + for (uint32_t i = 0; i < 16; i++) + { + uint32_t s = alpha_selectors.get_selector(i & 3, i >> 2); + blk.m_weights[i * 2 + 1] = (s == alpha_high_selector) ? 3 : 0; + } + } + else + { + // Convert ETC1S alpha + const uint32_t alpha_selector_range_table = g_etc1_to_astc_selector_range_index[alpha_low_selector][alpha_high_selector]; + + //[32][8][RANGES][MAPPING] + const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc_0_255[(alpha_inten_table * 32 + alpha_base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + alpha_selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS]; + + const uint32_t best_mapping = g_etc1_to_astc_best_grayscale_mapping_0_255[alpha_base_color.g][alpha_inten_table][alpha_selector_range_table]; + + blk.m_endpoints[2] = pTable_g[best_mapping].m_lo; + blk.m_endpoints[3] = pTable_g[best_mapping].m_hi; + + const uint8_t* pSelectors_xlat = &g_etc1_to_astc_selector_mappings[best_mapping][0]; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t s = alpha_selectors.get_selector(x, y); + uint32_t as = pSelectors_xlat[s]; + + blk.m_weights[(x + y * 4) * 2 + 1] = static_cast(as); + } // x + } // y + } + } + else + { + // No alpha slice - set output alpha to all 255's + blk.m_endpoints[2] = 255; + blk.m_endpoints[3] = 255; + + for (uint32_t i = 0; i < 16; i++) + blk.m_weights[i * 2 + 1] = 0; + } + + if (pSelector->m_num_unique_selectors <= 2) + { + // Simple color block with only 1 or 2 unique values, so use BTC. This is lossless. + int block_colors[4]; + decoder_etc_block::get_block_colors5_g(block_colors, base_color, inten_table); + + blk.m_endpoints[0] = static_cast(block_colors[low_selector]); + blk.m_endpoints[1] = static_cast(block_colors[high_selector]); + + for (uint32_t i = 0; i < 16; i++) + { + uint32_t s = pSelector->get_selector(i & 3, i >> 2); + blk.m_weights[i * 2] = (s == high_selector) ? 3 : 0; + } + } + else + { + // Convert ETC1S alpha + const uint32_t selector_range_table = g_etc1_to_astc_selector_range_index[low_selector][high_selector]; + + //[32][8][RANGES][MAPPING] + const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc_0_255[(inten_table * 32 + base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS]; + + const uint32_t best_mapping = g_etc1_to_astc_best_grayscale_mapping_0_255[base_color.g][inten_table][selector_range_table]; + + blk.m_endpoints[0] = pTable_g[best_mapping].m_lo; + blk.m_endpoints[1] = pTable_g[best_mapping].m_hi; + + const uint8_t* pSelectors_xlat = &g_etc1_to_astc_selector_mappings[best_mapping][0]; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t s = pSelector->get_selector(x, y); + uint32_t as = pSelectors_xlat[s]; + + blk.m_weights[(x + y * 4) * 2] = static_cast(as); + } // x + } // y + } + + astc_pack_block_cem_4_weight_range2(reinterpret_cast(pDst_block), &blk); + return; + } + + // The block isn't grayscale and it uses > 2 unique selectors for opaque and/or alpha. + // Check for fully opaque blocks, if so use 8-bit endpoints for slightly higher opaque quality (higher than BC1, but lower than BC7 mode 6 opaque). + if ((num_unique_alpha_selectors == 1) && (constant_alpha_val == 255)) + { + // Convert ETC1S color + const uint32_t selector_range_table = g_etc1_to_astc_selector_range_index[low_selector][high_selector]; + + //[32][8][RANGES][MAPPING] + const etc1_to_astc_solution* pTable_r = &g_etc1_to_astc_0_255[(inten_table * 32 + base_color.r) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS]; + const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc_0_255[(inten_table * 32 + base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS]; + const etc1_to_astc_solution* pTable_b = &g_etc1_to_astc_0_255[(inten_table * 32 + base_color.b) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS]; + + uint32_t best_err = UINT_MAX; + uint32_t best_mapping = 0; + + assert(NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS == 10); +#define DO_ITER(m) { uint32_t total_err = pTable_r[m].m_err + pTable_g[m].m_err + pTable_b[m].m_err; if (total_err < best_err) { best_err = total_err; best_mapping = m; } } + DO_ITER(0); DO_ITER(1); DO_ITER(2); DO_ITER(3); DO_ITER(4); + DO_ITER(5); DO_ITER(6); DO_ITER(7); DO_ITER(8); DO_ITER(9); +#undef DO_ITER + + blk.m_endpoints[0] = pTable_r[best_mapping].m_lo; + blk.m_endpoints[1] = pTable_r[best_mapping].m_hi; + + blk.m_endpoints[2] = pTable_g[best_mapping].m_lo; + blk.m_endpoints[3] = pTable_g[best_mapping].m_hi; + + blk.m_endpoints[4] = pTable_b[best_mapping].m_lo; + blk.m_endpoints[5] = pTable_b[best_mapping].m_hi; + + int s0 = blk.m_endpoints[0] + blk.m_endpoints[2] + blk.m_endpoints[4]; + int s1 = blk.m_endpoints[1] + blk.m_endpoints[3] + blk.m_endpoints[5]; + bool invert = false; + + if (s1 < s0) + { + std::swap(blk.m_endpoints[0], blk.m_endpoints[1]); + std::swap(blk.m_endpoints[2], blk.m_endpoints[3]); + std::swap(blk.m_endpoints[4], blk.m_endpoints[5]); + invert = true; + } + + const uint8_t* pSelectors_xlat = &g_etc1_to_astc_selector_mappings[best_mapping][0]; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t s = pSelector->get_selector(x, y); + uint32_t as = pSelectors_xlat[s]; + if (invert) + as = 3 - as; + + blk.m_weights[x + y * 4] = static_cast(as); + } // x + } // y + + // Now pack to ASTC + astc_pack_block_cem_8_weight_range2(reinterpret_cast(pDst_block), &blk); + return; + } +#endif //#if BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY + + // Nothing else worked, so fall back to CEM Mode 12 (LDR RGBA Direct), [0,47] endpoints, weight range 2 (2-bit weights), dual planes. + // This mode can handle everything, but at slightly less quality than BC1. + if (transcode_alpha) + { + const endpoint& alpha_endpoint = pEndpoint_codebook[((uint16_t*)pDst_block)[0]]; + const selector& alpha_selectors = pSelector_codebook[((uint16_t*)pDst_block)[1]]; + + const color32& alpha_base_color = alpha_endpoint.m_color5; + const uint32_t alpha_inten_table = alpha_endpoint.m_inten5; + + const uint32_t alpha_low_selector = alpha_selectors.m_lo_selector; + const uint32_t alpha_high_selector = alpha_selectors.m_hi_selector; + + if (alpha_low_selector == alpha_high_selector) + { + // Solid alpha block - use precomputed tables. + int alpha_block_colors[4]; + decoder_etc_block::get_block_colors5_g(alpha_block_colors, alpha_base_color, alpha_inten_table); + + const uint32_t g = alpha_block_colors[alpha_low_selector]; + + blk.m_endpoints[6] = g_astc_single_color_encoding_1[g].m_lo; + blk.m_endpoints[7] = g_astc_single_color_encoding_1[g].m_hi; + + for (uint32_t i = 0; i < 16; i++) + blk.m_weights[i * 2 + 1] = 1; + } + else if ((alpha_inten_table >= 7) && (alpha_selectors.m_num_unique_selectors == 2) && (alpha_low_selector == 0) && (alpha_high_selector == 3)) + { + // Handle outlier case where only the two outer colors are used with inten table 7. + color32 alpha_block_colors[4]; + + decoder_etc_block::get_block_colors5(alpha_block_colors, alpha_base_color, alpha_inten_table); + + const uint32_t g0 = alpha_block_colors[0].g; + const uint32_t g1 = alpha_block_colors[3].g; + + blk.m_endpoints[6] = g_astc_single_color_encoding_0[g0]; + blk.m_endpoints[7] = g_astc_single_color_encoding_0[g1]; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t s = alpha_selectors.get_selector(x, y); + uint32_t as = (s == alpha_high_selector) ? 3 : 0; + + blk.m_weights[(x + y * 4) * 2 + 1] = static_cast(as); + } // x + } // y + } + else + { + // Convert ETC1S alpha + const uint32_t alpha_selector_range_table = g_etc1_to_astc_selector_range_index[alpha_low_selector][alpha_high_selector]; + + //[32][8][RANGES][MAPPING] + const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc[(alpha_inten_table * 32 + alpha_base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + alpha_selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS]; + + const uint32_t best_mapping = g_etc1_to_astc_best_grayscale_mapping[alpha_base_color.g][alpha_inten_table][alpha_selector_range_table]; + + blk.m_endpoints[6] = pTable_g[best_mapping].m_lo; + blk.m_endpoints[7] = pTable_g[best_mapping].m_hi; + + const uint8_t* pSelectors_xlat = &g_etc1_to_astc_selector_mappings[best_mapping][0]; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t s = alpha_selectors.get_selector(x, y); + uint32_t as = pSelectors_xlat[s]; + + blk.m_weights[(x + y * 4) * 2 + 1] = static_cast(as); + } // x + } // y + } + } + else + { + // No alpha slice - set output alpha to all 255's + // 1 is 255 when dequantized + blk.m_endpoints[6] = 1; + blk.m_endpoints[7] = 1; + + for (uint32_t i = 0; i < 16; i++) + blk.m_weights[i * 2 + 1] = 0; + } + + if (low_selector == high_selector) + { + // Solid color block - use precomputed tables of optimal endpoints assuming selector weights are all 1. + color32 block_colors[4]; + + decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table); + + const uint32_t r = block_colors[low_selector].r; + const uint32_t g = block_colors[low_selector].g; + const uint32_t b = block_colors[low_selector].b; + + blk.m_endpoints[0] = g_astc_single_color_encoding_1[r].m_lo; + blk.m_endpoints[1] = g_astc_single_color_encoding_1[r].m_hi; + + blk.m_endpoints[2] = g_astc_single_color_encoding_1[g].m_lo; + blk.m_endpoints[3] = g_astc_single_color_encoding_1[g].m_hi; + + blk.m_endpoints[4] = g_astc_single_color_encoding_1[b].m_lo; + blk.m_endpoints[5] = g_astc_single_color_encoding_1[b].m_hi; + + int s0 = g_ise_to_unquant[blk.m_endpoints[0]] + g_ise_to_unquant[blk.m_endpoints[2]] + g_ise_to_unquant[blk.m_endpoints[4]]; + int s1 = g_ise_to_unquant[blk.m_endpoints[1]] + g_ise_to_unquant[blk.m_endpoints[3]] + g_ise_to_unquant[blk.m_endpoints[5]]; + bool invert = false; + + if (s1 < s0) + { + std::swap(blk.m_endpoints[0], blk.m_endpoints[1]); + std::swap(blk.m_endpoints[2], blk.m_endpoints[3]); + std::swap(blk.m_endpoints[4], blk.m_endpoints[5]); + invert = true; + } + + for (uint32_t i = 0; i < 16; i++) + blk.m_weights[i * 2] = invert ? 2 : 1; + } + else if ((inten_table >= 7) && (pSelector->m_num_unique_selectors == 2) && (pSelector->m_lo_selector == 0) && (pSelector->m_hi_selector == 3)) + { + // Handle outlier case where only the two outer colors are used with inten table 7. + color32 block_colors[4]; + + decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table); + + const uint32_t r0 = block_colors[0].r; + const uint32_t g0 = block_colors[0].g; + const uint32_t b0 = block_colors[0].b; + + const uint32_t r1 = block_colors[3].r; + const uint32_t g1 = block_colors[3].g; + const uint32_t b1 = block_colors[3].b; + + blk.m_endpoints[0] = g_astc_single_color_encoding_0[r0]; + blk.m_endpoints[1] = g_astc_single_color_encoding_0[r1]; + + blk.m_endpoints[2] = g_astc_single_color_encoding_0[g0]; + blk.m_endpoints[3] = g_astc_single_color_encoding_0[g1]; + + blk.m_endpoints[4] = g_astc_single_color_encoding_0[b0]; + blk.m_endpoints[5] = g_astc_single_color_encoding_0[b1]; + + int s0 = g_ise_to_unquant[blk.m_endpoints[0]] + g_ise_to_unquant[blk.m_endpoints[2]] + g_ise_to_unquant[blk.m_endpoints[4]]; + int s1 = g_ise_to_unquant[blk.m_endpoints[1]] + g_ise_to_unquant[blk.m_endpoints[3]] + g_ise_to_unquant[blk.m_endpoints[5]]; + bool invert = false; + + if (s1 < s0) + { + std::swap(blk.m_endpoints[0], blk.m_endpoints[1]); + std::swap(blk.m_endpoints[2], blk.m_endpoints[3]); + std::swap(blk.m_endpoints[4], blk.m_endpoints[5]); + invert = true; + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t s = pSelector->get_selector(x, y); + uint32_t as = (s == low_selector) ? 0 : 3; + + if (invert) + as = 3 - as; + + blk.m_weights[(x + y * 4) * 2] = static_cast(as); + } // x + } // y + } + else + { + // Convert ETC1S color + const uint32_t selector_range_table = g_etc1_to_astc_selector_range_index[low_selector][high_selector]; + + //[32][8][RANGES][MAPPING] + const etc1_to_astc_solution* pTable_r = &g_etc1_to_astc[(inten_table * 32 + base_color.r) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS]; + const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc[(inten_table * 32 + base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS]; + const etc1_to_astc_solution* pTable_b = &g_etc1_to_astc[(inten_table * 32 + base_color.b) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS]; + + uint32_t best_err = UINT_MAX; + uint32_t best_mapping = 0; + + assert(NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS == 10); +#define DO_ITER(m) { uint32_t total_err = pTable_r[m].m_err + pTable_g[m].m_err + pTable_b[m].m_err; if (total_err < best_err) { best_err = total_err; best_mapping = m; } } + DO_ITER(0); DO_ITER(1); DO_ITER(2); DO_ITER(3); DO_ITER(4); + DO_ITER(5); DO_ITER(6); DO_ITER(7); DO_ITER(8); DO_ITER(9); +#undef DO_ITER + + blk.m_endpoints[0] = pTable_r[best_mapping].m_lo; + blk.m_endpoints[1] = pTable_r[best_mapping].m_hi; + + blk.m_endpoints[2] = pTable_g[best_mapping].m_lo; + blk.m_endpoints[3] = pTable_g[best_mapping].m_hi; + + blk.m_endpoints[4] = pTable_b[best_mapping].m_lo; + blk.m_endpoints[5] = pTable_b[best_mapping].m_hi; + + int s0 = g_ise_to_unquant[blk.m_endpoints[0]] + g_ise_to_unquant[blk.m_endpoints[2]] + g_ise_to_unquant[blk.m_endpoints[4]]; + int s1 = g_ise_to_unquant[blk.m_endpoints[1]] + g_ise_to_unquant[blk.m_endpoints[3]] + g_ise_to_unquant[blk.m_endpoints[5]]; + bool invert = false; + + if (s1 < s0) + { + std::swap(blk.m_endpoints[0], blk.m_endpoints[1]); + std::swap(blk.m_endpoints[2], blk.m_endpoints[3]); + std::swap(blk.m_endpoints[4], blk.m_endpoints[5]); + invert = true; + } + + const uint8_t* pSelectors_xlat = &g_etc1_to_astc_selector_mappings[best_mapping][0]; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t s = pSelector->get_selector(x, y); + uint32_t as = pSelectors_xlat[s]; + if (invert) + as = 3 - as; + + blk.m_weights[(x + y * 4) * 2] = static_cast(as); + } // x + } // y + } + + // Now pack to ASTC + astc_pack_block_cem_12_weight_range2(reinterpret_cast(pDst_block), &blk); + } +#endif + +#if BASISD_SUPPORT_ATC + // ATC and PVRTC2 both use these tables. + struct etc1s_to_atc_solution + { + uint8_t m_lo; + uint8_t m_hi; + uint16_t m_err; + }; + + static dxt_selector_range g_etc1s_to_atc_selector_ranges[] = + { + { 0, 3 }, + { 1, 3 }, + { 0, 2 }, + { 1, 2 }, + { 2, 3 }, + { 0, 1 }, + }; + + const uint32_t NUM_ETC1S_TO_ATC_SELECTOR_RANGES = sizeof(g_etc1s_to_atc_selector_ranges) / sizeof(g_etc1s_to_atc_selector_ranges[0]); + + static uint32_t g_etc1s_to_atc_selector_range_index[4][4]; + + const uint32_t NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS = 10; + static const uint8_t g_etc1s_to_atc_selector_mappings[NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS][4] = + { + { 0, 0, 1, 1 }, + { 0, 0, 1, 2 }, + { 0, 0, 1, 3 }, + { 0, 0, 2, 3 }, + { 0, 1, 1, 1 }, + { 0, 1, 2, 2 }, + { 0, 1, 2, 3 }, //6 - identity + { 0, 2, 3, 3 }, + { 1, 2, 2, 2 }, + { 1, 2, 3, 3 }, + }; + const uint32_t ATC_IDENTITY_SELECTOR_MAPPING_INDEX = 6; + +#if BASISD_SUPPORT_PVRTC2 + static const etc1s_to_atc_solution g_etc1s_to_pvrtc2_45[32 * 8 * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS * NUM_ETC1S_TO_ATC_SELECTOR_RANGES] = { +#include "basisu_transcoder_tables_pvrtc2_45.inc" + }; + +#if 0 + static const etc1s_to_atc_solution g_etc1s_to_pvrtc2_alpha_33[32 * 8 * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS * NUM_ETC1S_TO_ATC_SELECTOR_RANGES] = { +#include "basisu_transcoder_tables_pvrtc2_alpha_33.inc" + }; +#endif + +#endif + + static const etc1s_to_atc_solution g_etc1s_to_atc_55[32 * 8 * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS * NUM_ETC1S_TO_ATC_SELECTOR_RANGES] = { +#include "basisu_transcoder_tables_atc_55.inc" + }; + + static const etc1s_to_atc_solution g_etc1s_to_atc_56[32 * 8 * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS * NUM_ETC1S_TO_ATC_SELECTOR_RANGES] = { +#include "basisu_transcoder_tables_atc_56.inc" + }; + + struct atc_match_entry + { + uint8_t m_lo; + uint8_t m_hi; + }; + static atc_match_entry g_pvrtc2_match45_equals_1[256], g_atc_match55_equals_1[256], g_atc_match56_equals_1[256]; // selector 1 + static atc_match_entry g_pvrtc2_match4[256], g_atc_match5[256], g_atc_match6[256]; + + static void prepare_atc_single_color_table(atc_match_entry* pTable, int size0, int size1, int sel) + { + for (int i = 0; i < 256; i++) + { + int lowest_e = 256; + for (int lo = 0; lo < size0; lo++) + { + int lo_e = lo; + if (size0 == 16) + { + lo_e = (lo_e << 1) | (lo_e >> 3); + lo_e = (lo_e << 3) | (lo_e >> 2); + } + else if (size0 == 32) + lo_e = (lo_e << 3) | (lo_e >> 2); + else + lo_e = (lo_e << 2) | (lo_e >> 4); + + for (int hi = 0; hi < size1; hi++) + { + int hi_e = hi; + if (size1 == 16) + { + // This is only for PVRTC2 - expand to 5 then 8 + hi_e = (hi_e << 1) | (hi_e >> 3); + hi_e = (hi_e << 3) | (hi_e >> 2); + } + else if (size1 == 32) + hi_e = (hi_e << 3) | (hi_e >> 2); + else + hi_e = (hi_e << 2) | (hi_e >> 4); + + int e; + + if (sel == 1) + { + // Selector 1 + e = abs(((lo_e * 5 + hi_e * 3) / 8) - i); + } + else + { + assert(sel == 3); + + // Selector 3 + e = abs(hi_e - i); + } + + if (e < lowest_e) + { + pTable[i].m_lo = static_cast(lo); + pTable[i].m_hi = static_cast(hi); + + lowest_e = e; + } + + } // hi + } // lo + } // i + } + + static void transcoder_init_atc() + { + prepare_atc_single_color_table(g_pvrtc2_match45_equals_1, 16, 32, 1); + prepare_atc_single_color_table(g_atc_match55_equals_1, 32, 32, 1); + prepare_atc_single_color_table(g_atc_match56_equals_1, 32, 64, 1); + + prepare_atc_single_color_table(g_pvrtc2_match4, 1, 16, 3); + prepare_atc_single_color_table(g_atc_match5, 1, 32, 3); + prepare_atc_single_color_table(g_atc_match6, 1, 64, 3); + + for (uint32_t i = 0; i < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; i++) + { + uint32_t l = g_etc1s_to_atc_selector_ranges[i].m_low; + uint32_t h = g_etc1s_to_atc_selector_ranges[i].m_high; + g_etc1s_to_atc_selector_range_index[l][h] = i; + } + } + + struct atc_block + { + uint8_t m_lo[2]; + uint8_t m_hi[2]; + uint8_t m_sels[4]; + + void set_low_color(uint32_t r, uint32_t g, uint32_t b) + { + assert((r < 32) && (g < 32) && (b < 32)); + uint32_t x = (r << 10) | (g << 5) | b; + m_lo[0] = x & 0xFF; + m_lo[1] = (x >> 8) & 0xFF; + } + + void set_high_color(uint32_t r, uint32_t g, uint32_t b) + { + assert((r < 32) && (g < 64) && (b < 32)); + uint32_t x = (r << 11) | (g << 5) | b; + m_hi[0] = x & 0xFF; + m_hi[1] = (x >> 8) & 0xFF; + } + }; + + static void convert_etc1s_to_atc(void* pDst, const endpoint* pEndpoints, const selector* pSelector) + { + atc_block* pBlock = static_cast(pDst); + + const uint32_t low_selector = pSelector->m_lo_selector; + const uint32_t high_selector = pSelector->m_hi_selector; + + const color32& base_color = pEndpoints->m_color5; + const uint32_t inten_table = pEndpoints->m_inten5; + + if (low_selector == high_selector) + { + uint32_t r, g, b; + decoder_etc_block::get_block_color5(base_color, inten_table, low_selector, r, g, b); + + pBlock->set_low_color(g_atc_match55_equals_1[r].m_lo, g_atc_match56_equals_1[g].m_lo, g_atc_match55_equals_1[b].m_lo); + pBlock->set_high_color(g_atc_match55_equals_1[r].m_hi, g_atc_match56_equals_1[g].m_hi, g_atc_match55_equals_1[b].m_hi); + + pBlock->m_sels[0] = 0x55; + pBlock->m_sels[1] = 0x55; + pBlock->m_sels[2] = 0x55; + pBlock->m_sels[3] = 0x55; + + return; + } + else if ((inten_table >= 7) && (pSelector->m_num_unique_selectors == 2) && (pSelector->m_lo_selector == 0) && (pSelector->m_hi_selector == 3)) + { + color32 block_colors[4]; + decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table); + + const uint32_t r0 = block_colors[0].r; + const uint32_t g0 = block_colors[0].g; + const uint32_t b0 = block_colors[0].b; + + const uint32_t r1 = block_colors[3].r; + const uint32_t g1 = block_colors[3].g; + const uint32_t b1 = block_colors[3].b; + + pBlock->set_low_color(g_atc_match5[r0].m_hi, g_atc_match5[g0].m_hi, g_atc_match5[b0].m_hi); + pBlock->set_high_color(g_atc_match5[r1].m_hi, g_atc_match6[g1].m_hi, g_atc_match5[b1].m_hi); + + pBlock->m_sels[0] = pSelector->m_selectors[0]; + pBlock->m_sels[1] = pSelector->m_selectors[1]; + pBlock->m_sels[2] = pSelector->m_selectors[2]; + pBlock->m_sels[3] = pSelector->m_selectors[3]; + + return; + } + + const uint32_t selector_range_table = g_etc1s_to_atc_selector_range_index[low_selector][high_selector]; + + //[32][8][RANGES][MAPPING] + const etc1s_to_atc_solution* pTable_r = &g_etc1s_to_atc_55[(inten_table * 32 + base_color.r) * (NUM_ETC1S_TO_ATC_SELECTOR_RANGES * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS]; + const etc1s_to_atc_solution* pTable_g = &g_etc1s_to_atc_56[(inten_table * 32 + base_color.g) * (NUM_ETC1S_TO_ATC_SELECTOR_RANGES * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS]; + const etc1s_to_atc_solution* pTable_b = &g_etc1s_to_atc_55[(inten_table * 32 + base_color.b) * (NUM_ETC1S_TO_ATC_SELECTOR_RANGES * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS]; + + uint32_t best_err = UINT_MAX; + uint32_t best_mapping = 0; + + assert(NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS == 10); +#define DO_ITER(m) { uint32_t total_err = pTable_r[m].m_err + pTable_g[m].m_err + pTable_b[m].m_err; if (total_err < best_err) { best_err = total_err; best_mapping = m; } } + DO_ITER(0); DO_ITER(1); DO_ITER(2); DO_ITER(3); DO_ITER(4); + DO_ITER(5); DO_ITER(6); DO_ITER(7); DO_ITER(8); DO_ITER(9); +#undef DO_ITER + + pBlock->set_low_color(pTable_r[best_mapping].m_lo, pTable_g[best_mapping].m_lo, pTable_b[best_mapping].m_lo); + pBlock->set_high_color(pTable_r[best_mapping].m_hi, pTable_g[best_mapping].m_hi, pTable_b[best_mapping].m_hi); + + if (ATC_IDENTITY_SELECTOR_MAPPING_INDEX == best_mapping) + { + pBlock->m_sels[0] = pSelector->m_selectors[0]; + pBlock->m_sels[1] = pSelector->m_selectors[1]; + pBlock->m_sels[2] = pSelector->m_selectors[2]; + pBlock->m_sels[3] = pSelector->m_selectors[3]; + } + else + { + const uint8_t* pSelectors_xlat = &g_etc1s_to_atc_selector_mappings[best_mapping][0]; + + const uint32_t sel_bits0 = pSelector->m_selectors[0]; + const uint32_t sel_bits1 = pSelector->m_selectors[1]; + const uint32_t sel_bits2 = pSelector->m_selectors[2]; + const uint32_t sel_bits3 = pSelector->m_selectors[3]; + + uint32_t atc_sels0 = 0, atc_sels1 = 0, atc_sels2 = 0, atc_sels3 = 0; + +#define DO_X(x) { \ + const uint32_t x_shift = (x) * 2; \ + atc_sels0 |= (pSelectors_xlat[(sel_bits0 >> x_shift) & 3] << x_shift); \ + atc_sels1 |= (pSelectors_xlat[(sel_bits1 >> x_shift) & 3] << x_shift); \ + atc_sels2 |= (pSelectors_xlat[(sel_bits2 >> x_shift) & 3] << x_shift); \ + atc_sels3 |= (pSelectors_xlat[(sel_bits3 >> x_shift) & 3] << x_shift); } + + DO_X(0); + DO_X(1); + DO_X(2); + DO_X(3); +#undef DO_X + + pBlock->m_sels[0] = (uint8_t)atc_sels0; + pBlock->m_sels[1] = (uint8_t)atc_sels1; + pBlock->m_sels[2] = (uint8_t)atc_sels2; + pBlock->m_sels[3] = (uint8_t)atc_sels3; + } + } + +#if BASISD_WRITE_NEW_ATC_TABLES + static void create_etc1s_to_atc_conversion_tables() + { + // ATC 55 + FILE* pFile = nullptr; + fopen_s(&pFile, "basisu_transcoder_tables_atc_55.inc", "w"); + + uint32_t n = 0; + + for (int inten = 0; inten < 8; inten++) + { + for (uint32_t g = 0; g < 32; g++) + { + color32 block_colors[4]; + decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten); + + for (uint32_t sr = 0; sr < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; sr++) + { + const uint32_t low_selector = g_etc1s_to_atc_selector_ranges[sr].m_low; + const uint32_t high_selector = g_etc1s_to_atc_selector_ranges[sr].m_high; + + for (uint32_t m = 0; m < NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS; m++) + { + uint32_t best_lo = 0; + uint32_t best_hi = 0; + uint64_t best_err = UINT64_MAX; + + for (uint32_t hi = 0; hi <= 31; hi++) + { + for (uint32_t lo = 0; lo <= 31; lo++) + { + uint32_t colors[4]; + + colors[0] = (lo << 3) | (lo >> 2); + colors[3] = (hi << 3) | (hi >> 2); + + colors[1] = (colors[0] * 5 + colors[3] * 3) / 8; + colors[2] = (colors[3] * 5 + colors[0] * 3) / 8; + + uint64_t total_err = 0; + + for (uint32_t s = low_selector; s <= high_selector; s++) + { + int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]]; + + int err_scale = 1; + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // the low/high selectors which are clamping to either 0 or 255. + if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) + err_scale = 5; + + total_err += (err * err) * err_scale; + } + + if (total_err < best_err) + { + best_err = total_err; + best_lo = lo; + best_hi = hi; + } + } + } + + //assert(best_err <= 0xFFFF); + best_err = basisu::minimum(best_err, 0xFFFF); + + fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err); + n++; + if ((n & 31) == 31) + fprintf(pFile, "\n"); + } // m + } // sr + } // g + } // inten + + fclose(pFile); + pFile = nullptr; + + // ATC 56 + fopen_s(&pFile, "basisu_transcoder_tables_atc_56.inc", "w"); + + n = 0; + + for (int inten = 0; inten < 8; inten++) + { + for (uint32_t g = 0; g < 32; g++) + { + color32 block_colors[4]; + decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten); + + for (uint32_t sr = 0; sr < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; sr++) + { + const uint32_t low_selector = g_etc1s_to_atc_selector_ranges[sr].m_low; + const uint32_t high_selector = g_etc1s_to_atc_selector_ranges[sr].m_high; + + for (uint32_t m = 0; m < NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS; m++) + { + uint32_t best_lo = 0; + uint32_t best_hi = 0; + uint64_t best_err = UINT64_MAX; + + for (uint32_t hi = 0; hi <= 63; hi++) + { + for (uint32_t lo = 0; lo <= 31; lo++) + { + uint32_t colors[4]; + + colors[0] = (lo << 3) | (lo >> 2); + colors[3] = (hi << 2) | (hi >> 4); + + colors[1] = (colors[0] * 5 + colors[3] * 3) / 8; + colors[2] = (colors[3] * 5 + colors[0] * 3) / 8; + + uint64_t total_err = 0; + + for (uint32_t s = low_selector; s <= high_selector; s++) + { + int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]]; + + int err_scale = 1; + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // the low/high selectors which are clamping to either 0 or 255. + if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) + err_scale = 5; + + total_err += (err * err) * err_scale; + } + + if (total_err < best_err) + { + best_err = total_err; + best_lo = lo; + best_hi = hi; + } + } + } + + //assert(best_err <= 0xFFFF); + best_err = basisu::minimum(best_err, 0xFFFF); + + fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err); + n++; + if ((n & 31) == 31) + fprintf(pFile, "\n"); + } // m + } // sr + } // g + } // inten + + fclose(pFile); + + // PVRTC2 45 + fopen_s(&pFile, "basisu_transcoder_tables_pvrtc2_45.inc", "w"); + + n = 0; + + for (int inten = 0; inten < 8; inten++) + { + for (uint32_t g = 0; g < 32; g++) + { + color32 block_colors[4]; + decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten); + + for (uint32_t sr = 0; sr < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; sr++) + { + const uint32_t low_selector = g_etc1s_to_atc_selector_ranges[sr].m_low; + const uint32_t high_selector = g_etc1s_to_atc_selector_ranges[sr].m_high; + + for (uint32_t m = 0; m < NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS; m++) + { + uint32_t best_lo = 0; + uint32_t best_hi = 0; + uint64_t best_err = UINT64_MAX; + + for (uint32_t hi = 0; hi <= 31; hi++) + { + for (uint32_t lo = 0; lo <= 15; lo++) + { + uint32_t colors[4]; + + colors[0] = (lo << 1) | (lo >> 3); + colors[0] = (colors[0] << 3) | (colors[0] >> 2); + + colors[3] = (hi << 3) | (hi >> 2); + + colors[1] = (colors[0] * 5 + colors[3] * 3) / 8; + colors[2] = (colors[3] * 5 + colors[0] * 3) / 8; + + uint64_t total_err = 0; + + for (uint32_t s = low_selector; s <= high_selector; s++) + { + int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]]; + + int err_scale = 1; + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // the low/high selectors which are clamping to either 0 or 255. + if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) + err_scale = 5; + + total_err += (err * err) * err_scale; + } + + if (total_err < best_err) + { + best_err = total_err; + best_lo = lo; + best_hi = hi; + } + } + } + + //assert(best_err <= 0xFFFF); + best_err = basisu::minimum(best_err, 0xFFFF); + + fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err); + n++; + if ((n & 31) == 31) + fprintf(pFile, "\n"); + } // m + } // sr + } // g + } // inten + + fclose(pFile); + +#if 0 + // PVRTC2 34 + fopen_s(&pFile, "basisu_transcoder_tables_pvrtc2_34.inc", "w"); + + n = 0; + + for (int inten = 0; inten < 8; inten++) + { + for (uint32_t g = 0; g < 32; g++) + { + color32 block_colors[4]; + decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten); + + for (uint32_t sr = 0; sr < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; sr++) + { + const uint32_t low_selector = g_etc1s_to_atc_selector_ranges[sr].m_low; + const uint32_t high_selector = g_etc1s_to_atc_selector_ranges[sr].m_high; + + for (uint32_t m = 0; m < NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS; m++) + { + uint32_t best_lo = 0; + uint32_t best_hi = 0; + uint64_t best_err = UINT64_MAX; + + for (uint32_t hi = 0; hi <= 15; hi++) + { + for (uint32_t lo = 0; lo <= 7; lo++) + { + uint32_t colors[4]; + + colors[0] = (lo << 2) | (lo >> 1); + colors[0] = (colors[0] << 3) | (colors[0] >> 2); + + colors[3] = (hi << 1) | (hi >> 3); + colors[3] = (colors[3] << 3) | (colors[3] >> 2); + + colors[1] = (colors[0] * 5 + colors[3] * 3) / 8; + colors[2] = (colors[3] * 5 + colors[0] * 3) / 8; + + uint64_t total_err = 0; + + for (uint32_t s = low_selector; s <= high_selector; s++) + { + int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]]; + + int err_scale = 1; + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // the low/high selectors which are clamping to either 0 or 255. + if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) + err_scale = 5; + + total_err += (err * err) * err_scale; + } + + if (total_err < best_err) + { + best_err = total_err; + best_lo = lo; + best_hi = hi; + } + } + } + + //assert(best_err <= 0xFFFF); + best_err = basisu::minimum(best_err, 0xFFFF); + + fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err); + n++; + if ((n & 31) == 31) + fprintf(pFile, "\n"); + } // m + } // sr + } // g + } // inten + + fclose(pFile); +#endif +#if 0 + // PVRTC2 44 + fopen_s(&pFile, "basisu_transcoder_tables_pvrtc2_44.inc", "w"); + + n = 0; + + for (int inten = 0; inten < 8; inten++) + { + for (uint32_t g = 0; g < 32; g++) + { + color32 block_colors[4]; + decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten); + + for (uint32_t sr = 0; sr < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; sr++) + { + const uint32_t low_selector = g_etc1s_to_atc_selector_ranges[sr].m_low; + const uint32_t high_selector = g_etc1s_to_atc_selector_ranges[sr].m_high; + + for (uint32_t m = 0; m < NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS; m++) + { + uint32_t best_lo = 0; + uint32_t best_hi = 0; + uint64_t best_err = UINT64_MAX; + + for (uint32_t hi = 0; hi <= 15; hi++) + { + for (uint32_t lo = 0; lo <= 15; lo++) + { + uint32_t colors[4]; + + colors[0] = (lo << 1) | (lo >> 3); + colors[0] = (colors[0] << 3) | (colors[0] >> 2); + + colors[3] = (hi << 1) | (hi >> 3); + colors[3] = (colors[3] << 3) | (colors[3] >> 2); + + colors[1] = (colors[0] * 5 + colors[3] * 3) / 8; + colors[2] = (colors[3] * 5 + colors[0] * 3) / 8; + + uint64_t total_err = 0; + + for (uint32_t s = low_selector; s <= high_selector; s++) + { + int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]]; + + int err_scale = 1; + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // the low/high selectors which are clamping to either 0 or 255. + if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) + err_scale = 5; + + total_err += (err * err) * err_scale; + } + + if (total_err < best_err) + { + best_err = total_err; + best_lo = lo; + best_hi = hi; + } + } + } + + //assert(best_err <= 0xFFFF); + best_err = basisu::minimum(best_err, 0xFFFF); + + fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err); + n++; + if ((n & 31) == 31) + fprintf(pFile, "\n"); + } // m + } // sr + } // g + } // inten + + fclose(pFile); +#endif + + // PVRTC2 alpha 33 + fopen_s(&pFile, "basisu_transcoder_tables_pvrtc2_alpha_33.inc", "w"); + + n = 0; + + for (int inten = 0; inten < 8; inten++) + { + for (uint32_t g = 0; g < 32; g++) + { + color32 block_colors[4]; + decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten); + + for (uint32_t sr = 0; sr < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; sr++) + { + const uint32_t low_selector = g_etc1s_to_atc_selector_ranges[sr].m_low; + const uint32_t high_selector = g_etc1s_to_atc_selector_ranges[sr].m_high; + + for (uint32_t m = 0; m < NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS; m++) + { + uint32_t best_lo = 0; + uint32_t best_hi = 0; + uint64_t best_err = UINT64_MAX; + + for (uint32_t hi = 0; hi <= 7; hi++) + { + for (uint32_t lo = 0; lo <= 7; lo++) + { + uint32_t colors[4]; + + colors[0] = (lo << 1); + colors[0] = (colors[0] << 4) | colors[0]; + + colors[3] = (hi << 1) | 1; + colors[3] = (colors[3] << 4) | colors[3]; + + colors[1] = (colors[0] * 5 + colors[3] * 3) / 8; + colors[2] = (colors[3] * 5 + colors[0] * 3) / 8; + + uint64_t total_err = 0; + + for (uint32_t s = low_selector; s <= high_selector; s++) + { + int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]]; + + int err_scale = 1; + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // the low/high selectors which are clamping to either 0 or 255. + if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) + err_scale = 5; + + total_err += (err * err) * err_scale; + } + + if (total_err < best_err) + { + best_err = total_err; + best_lo = lo; + best_hi = hi; + } + } + } + + //assert(best_err <= 0xFFFF); + best_err = basisu::minimum(best_err, 0xFFFF); + + fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err); + n++; + if ((n & 31) == 31) + fprintf(pFile, "\n"); + } // m + } // sr + } // g + } // inten + + fclose(pFile); + } +#endif // BASISD_WRITE_NEW_ATC_TABLES + +#endif // BASISD_SUPPORT_ATC + +#if BASISD_SUPPORT_PVRTC2 + struct pvrtc2_block + { + uint8_t m_modulation[4]; + + union + { + union + { + // Opaque mode: RGB colora=554 and colorb=555 + struct + { + uint32_t m_mod_flag : 1; + uint32_t m_blue_a : 4; + uint32_t m_green_a : 5; + uint32_t m_red_a : 5; + uint32_t m_hard_flag : 1; + uint32_t m_blue_b : 5; + uint32_t m_green_b : 5; + uint32_t m_red_b : 5; + uint32_t m_opaque_flag : 1; + + } m_opaque_color_data; + + // Transparent mode: RGBA colora=4433 and colorb=4443 + struct + { + uint32_t m_mod_flag : 1; + uint32_t m_blue_a : 3; + uint32_t m_green_a : 4; + uint32_t m_red_a : 4; + uint32_t m_alpha_a : 3; + uint32_t m_hard_flag : 1; + uint32_t m_blue_b : 4; + uint32_t m_green_b : 4; + uint32_t m_red_b : 4; + uint32_t m_alpha_b : 3; + uint32_t m_opaque_flag : 1; + + } m_trans_color_data; + }; + + uint32_t m_color_data_bits; + }; + + // 554 + void set_low_color(uint32_t r, uint32_t g, uint32_t b) + { + assert((r < 32) && (g < 32) && (b < 16)); + m_opaque_color_data.m_red_a = r; + m_opaque_color_data.m_green_a = g; + m_opaque_color_data.m_blue_a = b; + } + + // 555 + void set_high_color(uint32_t r, uint32_t g, uint32_t b) + { + assert((r < 32) && (g < 32) && (b < 32)); + m_opaque_color_data.m_red_b = r; + m_opaque_color_data.m_green_b = g; + m_opaque_color_data.m_blue_b = b; + } + + // 4433 + void set_trans_low_color(uint32_t r, uint32_t g, uint32_t b, uint32_t a) + { + assert((r < 16) && (g < 16) && (b < 8) && (a < 8)); + m_trans_color_data.m_red_a = r; + m_trans_color_data.m_green_a = g; + m_trans_color_data.m_blue_a = b; + m_trans_color_data.m_alpha_a = a; + } + + // 4443 + void set_trans_high_color(uint32_t r, uint32_t g, uint32_t b, uint32_t a) + { + assert((r < 16) && (g < 16) && (b < 16) && (a < 8)); + m_trans_color_data.m_red_b = r; + m_trans_color_data.m_green_b = g; + m_trans_color_data.m_blue_b = b; + m_trans_color_data.m_alpha_b = a; + } + }; + + static struct + { + uint8_t m_l, m_h; + } g_pvrtc2_trans_match34[256]; + + static struct + { + uint8_t m_l, m_h; + } g_pvrtc2_trans_match44[256]; + + static struct + { + uint8_t m_l, m_h; + } g_pvrtc2_alpha_match33[256]; + + static struct + { + uint8_t m_l, m_h; + } g_pvrtc2_alpha_match33_0[256]; + + static struct + { + uint8_t m_l, m_h; + } g_pvrtc2_alpha_match33_3[256]; + + // PVRTC2 can be forced to look like a slightly weaker variant of ATC/BC1, so that's what we do here for simplicity. + static void convert_etc1s_to_pvrtc2_rgb(void* pDst, const endpoint* pEndpoints, const selector* pSelector) + { + pvrtc2_block* pBlock = static_cast(pDst); + + pBlock->m_opaque_color_data.m_hard_flag = 1; + pBlock->m_opaque_color_data.m_mod_flag = 0; + pBlock->m_opaque_color_data.m_opaque_flag = 1; + + const uint32_t low_selector = pSelector->m_lo_selector; + const uint32_t high_selector = pSelector->m_hi_selector; + + const color32& base_color = pEndpoints->m_color5; + const uint32_t inten_table = pEndpoints->m_inten5; + + if (low_selector == high_selector) + { + uint32_t r, g, b; + decoder_etc_block::get_block_color5(base_color, inten_table, low_selector, r, g, b); + + pBlock->set_low_color(g_atc_match55_equals_1[r].m_lo, g_atc_match55_equals_1[g].m_lo, g_pvrtc2_match45_equals_1[b].m_lo); + pBlock->set_high_color(g_atc_match55_equals_1[r].m_hi, g_atc_match55_equals_1[g].m_hi, g_pvrtc2_match45_equals_1[b].m_hi); + + pBlock->m_modulation[0] = 0x55; + pBlock->m_modulation[1] = 0x55; + pBlock->m_modulation[2] = 0x55; + pBlock->m_modulation[3] = 0x55; + + return; + } + else if ((inten_table >= 7) && (pSelector->m_num_unique_selectors == 2) && (pSelector->m_lo_selector == 0) && (pSelector->m_hi_selector == 3)) + { + color32 block_colors[4]; + decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table); + + const uint32_t r0 = block_colors[0].r; + const uint32_t g0 = block_colors[0].g; + const uint32_t b0 = block_colors[0].b; + + const uint32_t r1 = block_colors[3].r; + const uint32_t g1 = block_colors[3].g; + const uint32_t b1 = block_colors[3].b; + + pBlock->set_low_color(g_atc_match5[r0].m_hi, g_atc_match5[g0].m_hi, g_pvrtc2_match4[b0].m_hi); + pBlock->set_high_color(g_atc_match5[r1].m_hi, g_atc_match5[g1].m_hi, g_atc_match5[b1].m_hi); + + pBlock->m_modulation[0] = pSelector->m_selectors[0]; + pBlock->m_modulation[1] = pSelector->m_selectors[1]; + pBlock->m_modulation[2] = pSelector->m_selectors[2]; + pBlock->m_modulation[3] = pSelector->m_selectors[3]; + + return; + } + + const uint32_t selector_range_table = g_etc1s_to_atc_selector_range_index[low_selector][high_selector]; + + //[32][8][RANGES][MAPPING] + const etc1s_to_atc_solution* pTable_r = &g_etc1s_to_atc_55[(inten_table * 32 + base_color.r) * (NUM_ETC1S_TO_ATC_SELECTOR_RANGES * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS]; + const etc1s_to_atc_solution* pTable_g = &g_etc1s_to_atc_55[(inten_table * 32 + base_color.g) * (NUM_ETC1S_TO_ATC_SELECTOR_RANGES * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS]; + const etc1s_to_atc_solution* pTable_b = &g_etc1s_to_pvrtc2_45[(inten_table * 32 + base_color.b) * (NUM_ETC1S_TO_ATC_SELECTOR_RANGES * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS]; + + uint32_t best_err = UINT_MAX; + uint32_t best_mapping = 0; + + assert(NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS == 10); +#define DO_ITER(m) { uint32_t total_err = pTable_r[m].m_err + pTable_g[m].m_err + pTable_b[m].m_err; if (total_err < best_err) { best_err = total_err; best_mapping = m; } } + DO_ITER(0); DO_ITER(1); DO_ITER(2); DO_ITER(3); DO_ITER(4); + DO_ITER(5); DO_ITER(6); DO_ITER(7); DO_ITER(8); DO_ITER(9); +#undef DO_ITER + + pBlock->set_low_color(pTable_r[best_mapping].m_lo, pTable_g[best_mapping].m_lo, pTable_b[best_mapping].m_lo); + pBlock->set_high_color(pTable_r[best_mapping].m_hi, pTable_g[best_mapping].m_hi, pTable_b[best_mapping].m_hi); + + if (ATC_IDENTITY_SELECTOR_MAPPING_INDEX == best_mapping) + { + pBlock->m_modulation[0] = pSelector->m_selectors[0]; + pBlock->m_modulation[1] = pSelector->m_selectors[1]; + pBlock->m_modulation[2] = pSelector->m_selectors[2]; + pBlock->m_modulation[3] = pSelector->m_selectors[3]; + } + else + { + // TODO: We could make this faster using several precomputed 256 entry tables, like ETC1S->BC1 does. + const uint8_t* pSelectors_xlat = &g_etc1s_to_atc_selector_mappings[best_mapping][0]; + + const uint32_t sel_bits0 = pSelector->m_selectors[0]; + const uint32_t sel_bits1 = pSelector->m_selectors[1]; + const uint32_t sel_bits2 = pSelector->m_selectors[2]; + const uint32_t sel_bits3 = pSelector->m_selectors[3]; + + uint32_t sels0 = 0, sels1 = 0, sels2 = 0, sels3 = 0; + +#define DO_X(x) { \ + const uint32_t x_shift = (x) * 2; \ + sels0 |= (pSelectors_xlat[(sel_bits0 >> x_shift) & 3] << x_shift); \ + sels1 |= (pSelectors_xlat[(sel_bits1 >> x_shift) & 3] << x_shift); \ + sels2 |= (pSelectors_xlat[(sel_bits2 >> x_shift) & 3] << x_shift); \ + sels3 |= (pSelectors_xlat[(sel_bits3 >> x_shift) & 3] << x_shift); } + + DO_X(0); + DO_X(1); + DO_X(2); + DO_X(3); +#undef DO_X + + pBlock->m_modulation[0] = (uint8_t)sels0; + pBlock->m_modulation[1] = (uint8_t)sels1; + pBlock->m_modulation[2] = (uint8_t)sels2; + pBlock->m_modulation[3] = (uint8_t)sels3; + } + } + + static inline vec4F* vec4F_set_scalar(vec4F* pV, float x) { pV->c[0] = x; pV->c[1] = x; pV->c[2] = x; pV->c[3] = x; return pV; } + static inline vec4F* vec4F_set(vec4F* pV, float x, float y, float z, float w) { pV->c[0] = x; pV->c[1] = y; pV->c[2] = z; pV->c[3] = w; return pV; } + static inline vec4F* vec4F_saturate_in_place(vec4F* pV) { pV->c[0] = saturate(pV->c[0]); pV->c[1] = saturate(pV->c[1]); pV->c[2] = saturate(pV->c[2]); pV->c[3] = saturate(pV->c[3]); return pV; } + static inline vec4F vec4F_saturate(const vec4F* pV) { vec4F res; res.c[0] = saturate(pV->c[0]); res.c[1] = saturate(pV->c[1]); res.c[2] = saturate(pV->c[2]); res.c[3] = saturate(pV->c[3]); return res; } + static inline vec4F vec4F_from_color(const color32* pC) { vec4F res; vec4F_set(&res, pC->c[0], pC->c[1], pC->c[2], pC->c[3]); return res; } + static inline vec4F vec4F_add(const vec4F* pLHS, const vec4F* pRHS) { vec4F res; vec4F_set(&res, pLHS->c[0] + pRHS->c[0], pLHS->c[1] + pRHS->c[1], pLHS->c[2] + pRHS->c[2], pLHS->c[3] + pRHS->c[3]); return res; } + static inline vec4F vec4F_sub(const vec4F* pLHS, const vec4F* pRHS) { vec4F res; vec4F_set(&res, pLHS->c[0] - pRHS->c[0], pLHS->c[1] - pRHS->c[1], pLHS->c[2] - pRHS->c[2], pLHS->c[3] - pRHS->c[3]); return res; } + static inline float vec4F_dot(const vec4F* pLHS, const vec4F* pRHS) { return pLHS->c[0] * pRHS->c[0] + pLHS->c[1] * pRHS->c[1] + pLHS->c[2] * pRHS->c[2] + pLHS->c[3] * pRHS->c[3]; } + static inline vec4F vec4F_mul(const vec4F* pLHS, float s) { vec4F res; vec4F_set(&res, pLHS->c[0] * s, pLHS->c[1] * s, pLHS->c[2] * s, pLHS->c[3] * s); return res; } + static inline vec4F* vec4F_normalize_in_place(vec4F* pV) { float s = pV->c[0] * pV->c[0] + pV->c[1] * pV->c[1] + pV->c[2] * pV->c[2] + pV->c[3] * pV->c[3]; if (s != 0.0f) { s = 1.0f / sqrtf(s); pV->c[0] *= s; pV->c[1] *= s; pV->c[2] *= s; pV->c[3] *= s; } return pV; } + + static color32 convert_rgba_5554_to_8888(const color32& col) + { + return color32((col[0] << 3) | (col[0] >> 2), (col[1] << 3) | (col[1] >> 2), (col[2] << 3) | (col[2] >> 2), (col[3] << 4) | col[3]); + } + + static inline int sq(int x) { return x * x; } + + // PVRTC2 is a slightly borked format for alpha: In Non-Interpolated mode, the way AlphaB8 is expanded from 4 to 8 bits means it can never be 0. + // This is actually very bad, because on 100% transparent blocks which have non-trivial color pixels, part of the color channel will leak into alpha! + // And there's nothing straightforward we can do because using the other modes is too expensive/complex. I can see why Apple didn't adopt it. + static void convert_etc1s_to_pvrtc2_rgba(void* pDst, const endpoint* pEndpoints, const selector* pSelector, const endpoint* pEndpoint_codebook, const selector* pSelector_codebook) + { + pvrtc2_block* pBlock = static_cast(pDst); + + const endpoint& alpha_endpoint = pEndpoint_codebook[((uint16_t*)pBlock)[0]]; + const selector& alpha_selectors = pSelector_codebook[((uint16_t*)pBlock)[1]]; + + pBlock->m_opaque_color_data.m_hard_flag = 1; + pBlock->m_opaque_color_data.m_mod_flag = 0; + pBlock->m_opaque_color_data.m_opaque_flag = 0; + + const int num_unique_alpha_selectors = alpha_selectors.m_num_unique_selectors; + + const color32& alpha_base_color = alpha_endpoint.m_color5; + const uint32_t alpha_inten_table = alpha_endpoint.m_inten5; + + int constant_alpha_val = -1; + + int alpha_block_colors[4]; + decoder_etc_block::get_block_colors5_g(alpha_block_colors, alpha_base_color, alpha_inten_table); + + if (num_unique_alpha_selectors == 1) + { + constant_alpha_val = alpha_block_colors[alpha_selectors.m_lo_selector]; + } + else + { + constant_alpha_val = alpha_block_colors[alpha_selectors.m_lo_selector]; + + for (uint32_t i = alpha_selectors.m_lo_selector + 1; i <= alpha_selectors.m_hi_selector; i++) + { + if (constant_alpha_val != alpha_block_colors[i]) + { + constant_alpha_val = -1; + break; + } + } + } + + if (constant_alpha_val >= 250) + { + // It's opaque enough, so don't bother trying to encode it as an alpha block. + convert_etc1s_to_pvrtc2_rgb(pDst, pEndpoints, pSelector); + return; + } + + const color32& base_color = pEndpoints->m_color5; + const uint32_t inten_table = pEndpoints->m_inten5; + + const uint32_t low_selector = pSelector->m_lo_selector; + const uint32_t high_selector = pSelector->m_hi_selector; + + const int num_unique_color_selectors = pSelector->m_num_unique_selectors; + + // We need to reencode the block at the pixel level, unfortunately, from two ETC1S planes. + // Do 4D incremental PCA, project all pixels to this hyperline, then quantize to packed endpoints and compute the modulation values. + const int br = (base_color.r << 3) | (base_color.r >> 2); + const int bg = (base_color.g << 3) | (base_color.g >> 2); + const int bb = (base_color.b << 3) | (base_color.b >> 2); + + color32 block_cols[4]; + for (uint32_t i = 0; i < 4; i++) + { + const int ci = g_etc1_inten_tables[inten_table][i]; + block_cols[i].set_clamped(br + ci, bg + ci, bb + ci, alpha_block_colors[i]); + } + + bool solid_color_block = true; + if (num_unique_color_selectors > 1) + { + for (uint32_t i = low_selector + 1; i <= high_selector; i++) + { + if ((block_cols[low_selector].r != block_cols[i].r) || (block_cols[low_selector].g != block_cols[i].g) || (block_cols[low_selector].b != block_cols[i].b)) + { + solid_color_block = false; + break; + } + } + } + + if ((solid_color_block) && (constant_alpha_val >= 0)) + { + // Constant color/alpha block. + // This is more complex than it may seem because of the way color and alpha are packed in PVRTC2. We need to evaluate mod0, mod1 and mod3 encodings to find the best one. + uint32_t r, g, b; + decoder_etc_block::get_block_color5(base_color, inten_table, low_selector, r, g, b); + + // Mod 0 + uint32_t lr0 = (r * 15 + 128) / 255, lg0 = (g * 15 + 128) / 255, lb0 = (b * 7 + 128) / 255; + uint32_t la0 = g_pvrtc2_alpha_match33_0[constant_alpha_val].m_l; + + uint32_t cr0 = (lr0 << 1) | (lr0 >> 3); + uint32_t cg0 = (lg0 << 1) | (lg0 >> 3); + uint32_t cb0 = (lb0 << 2) | (lb0 >> 1); + uint32_t ca0 = (la0 << 1); + + cr0 = (cr0 << 3) | (cr0 >> 2); + cg0 = (cg0 << 3) | (cg0 >> 2); + cb0 = (cb0 << 3) | (cb0 >> 2); + ca0 = (ca0 << 4) | ca0; + + uint32_t err0 = sq(cr0 - r) + sq(cg0 - g) + sq(cb0 - b) + sq(ca0 - constant_alpha_val) * 2; + + // If the alpha is < 3 or so we're kinda screwed. It's better to have some RGB error than it is to turn a 100% transparent area slightly opaque. + if ((err0 == 0) || (constant_alpha_val < 3)) + { + pBlock->set_trans_low_color(lr0, lg0, lb0, la0); + pBlock->set_trans_high_color(0, 0, 0, 0); + + pBlock->m_modulation[0] = 0; + pBlock->m_modulation[1] = 0; + pBlock->m_modulation[2] = 0; + pBlock->m_modulation[3] = 0; + return; + } + + // Mod 3 + uint32_t lr3 = (r * 15 + 128) / 255, lg3 = (g * 15 + 128) / 255, lb3 = (b * 15 + 128) / 255; + uint32_t la3 = g_pvrtc2_alpha_match33_3[constant_alpha_val].m_l; + + uint32_t cr3 = (lr3 << 1) | (lr3 >> 3); + uint32_t cg3 = (lg3 << 1) | (lg3 >> 3); + uint32_t cb3 = (lb3 << 1) | (lb3 >> 3); + uint32_t ca3 = (la3 << 1) | 1; + + cr3 = (cr3 << 3) | (cr3 >> 2); + cg3 = (cg3 << 3) | (cg3 >> 2); + cb3 = (cb3 << 3) | (cb3 >> 2); + ca3 = (ca3 << 4) | ca3; + + uint32_t err3 = sq(cr3 - r) + sq(cg3 - g) + sq(cb3 - b) + sq(ca3 - constant_alpha_val) * 2; + + // Mod 1 + uint32_t lr1 = g_pvrtc2_trans_match44[r].m_l, lg1 = g_pvrtc2_trans_match44[g].m_l, lb1 = g_pvrtc2_trans_match34[b].m_l; + uint32_t hr1 = g_pvrtc2_trans_match44[r].m_h, hg1 = g_pvrtc2_trans_match44[g].m_h, hb1 = g_pvrtc2_trans_match34[b].m_h; + uint32_t la1 = g_pvrtc2_alpha_match33[constant_alpha_val].m_l, ha1 = g_pvrtc2_alpha_match33[constant_alpha_val].m_h; + + uint32_t clr1 = (lr1 << 1) | (lr1 >> 3); + uint32_t clg1 = (lg1 << 1) | (lg1 >> 3); + uint32_t clb1 = (lb1 << 2) | (lb1 >> 1); + uint32_t cla1 = (la1 << 1); + + clr1 = (clr1 << 3) | (clr1 >> 2); + clg1 = (clg1 << 3) | (clg1 >> 2); + clb1 = (clb1 << 3) | (clb1 >> 2); + cla1 = (cla1 << 4) | cla1; + + uint32_t chr1 = (hr1 << 1) | (hr1 >> 3); + uint32_t chg1 = (hg1 << 1) | (hg1 >> 3); + uint32_t chb1 = (hb1 << 1) | (hb1 >> 3); + uint32_t cha1 = (ha1 << 1) | 1; + + chr1 = (chr1 << 3) | (chr1 >> 2); + chg1 = (chg1 << 3) | (chg1 >> 2); + chb1 = (chb1 << 3) | (chb1 >> 2); + cha1 = (cha1 << 4) | cha1; + + uint32_t r1 = (clr1 * 5 + chr1 * 3) / 8; + uint32_t g1 = (clg1 * 5 + chg1 * 3) / 8; + uint32_t b1 = (clb1 * 5 + chb1 * 3) / 8; + uint32_t a1 = (cla1 * 5 + cha1 * 3) / 8; + + uint32_t err1 = sq(r1 - r) + sq(g1 - g) + sq(b1 - b) + sq(a1 - constant_alpha_val) * 2; + + if ((err1 < err0) && (err1 < err3)) + { + pBlock->set_trans_low_color(lr1, lg1, lb1, la1); + pBlock->set_trans_high_color(hr1, hg1, hb1, ha1); + + pBlock->m_modulation[0] = 0x55; + pBlock->m_modulation[1] = 0x55; + pBlock->m_modulation[2] = 0x55; + pBlock->m_modulation[3] = 0x55; + } + else if (err0 < err3) + { + pBlock->set_trans_low_color(lr0, lg0, lb0, la0); + pBlock->set_trans_high_color(0, 0, 0, 0); + + pBlock->m_modulation[0] = 0; + pBlock->m_modulation[1] = 0; + pBlock->m_modulation[2] = 0; + pBlock->m_modulation[3] = 0; + } + else + { + pBlock->set_trans_low_color(0, 0, 0, 0); + pBlock->set_trans_high_color(lr3, lg3, lb3, la3); + + pBlock->m_modulation[0] = 0xFF; + pBlock->m_modulation[1] = 0xFF; + pBlock->m_modulation[2] = 0xFF; + pBlock->m_modulation[3] = 0xFF; + } + + return; + } + + // It's a complex block with non-solid color and/or alpha pixels. + vec4F minColor, maxColor; + + if (solid_color_block) + { + // It's a solid color block. + uint32_t low_a = block_cols[alpha_selectors.m_lo_selector].a; + uint32_t high_a = block_cols[alpha_selectors.m_hi_selector].a; + + const float S = 1.0f / 255.0f; + vec4F_set(&minColor, block_cols[low_selector].r * S, block_cols[low_selector].g * S, block_cols[low_selector].b * S, low_a * S); + vec4F_set(&maxColor, block_cols[low_selector].r * S, block_cols[low_selector].g * S, block_cols[low_selector].b * S, high_a * S); + } + else if (constant_alpha_val >= 0) + { + // It's a solid alpha block. + const float S = 1.0f / 255.0f; + vec4F_set(&minColor, block_cols[low_selector].r * S, block_cols[low_selector].g * S, block_cols[low_selector].b * S, constant_alpha_val * S); + vec4F_set(&maxColor, block_cols[high_selector].r * S, block_cols[high_selector].g * S, block_cols[high_selector].b * S, constant_alpha_val * S); + } + // See if any of the block colors got clamped - if so the principle axis got distorted (it's no longer just the ETC1S luma axis). + // To keep quality up we need to use full 4D PCA in this case. + else if ((block_cols[low_selector].c[0] == 0) || (block_cols[high_selector].c[0] == 255) || + (block_cols[low_selector].c[1] == 0) || (block_cols[high_selector].c[1] == 255) || + (block_cols[low_selector].c[2] == 0) || (block_cols[high_selector].c[2] == 255) || + (block_cols[alpha_selectors.m_lo_selector].c[3] == 0) || (block_cols[alpha_selectors.m_hi_selector].c[3] == 255)) + { + // Find principle component of RGBA colors treated as 4D vectors. + color32 pixels[16]; + + uint32_t sum_r = 0, sum_g = 0, sum_b = 0, sum_a = 0; + for (uint32_t i = 0; i < 16; i++) + { + color32 rgb(block_cols[pSelector->get_selector(i & 3, i >> 2)]); + uint32_t a = block_cols[alpha_selectors.get_selector(i & 3, i >> 2)].a; + + pixels[i].set(rgb.r, rgb.g, rgb.b, a); + + sum_r += rgb.r; + sum_g += rgb.g; + sum_b += rgb.b; + sum_a += a; + } + + vec4F meanColor; + vec4F_set(&meanColor, (float)sum_r, (float)sum_g, (float)sum_b, (float)sum_a); + vec4F meanColorScaled = vec4F_mul(&meanColor, 1.0f / 16.0f); + + meanColor = vec4F_mul(&meanColor, 1.0f / (float)(16.0f * 255.0f)); + vec4F_saturate_in_place(&meanColor); + + vec4F axis; + vec4F_set_scalar(&axis, 0.0f); + // Why this incremental method? Because it's stable and predictable. Covar+power method can require a lot of iterations to converge in 4D. + for (uint32_t i = 0; i < 16; i++) + { + vec4F color = vec4F_from_color(&pixels[i]); + color = vec4F_sub(&color, &meanColorScaled); + vec4F a = vec4F_mul(&color, color.c[0]); + vec4F b = vec4F_mul(&color, color.c[1]); + vec4F c = vec4F_mul(&color, color.c[2]); + vec4F d = vec4F_mul(&color, color.c[3]); + vec4F n = i ? axis : color; + vec4F_normalize_in_place(&n); + axis.c[0] += vec4F_dot(&a, &n); + axis.c[1] += vec4F_dot(&b, &n); + axis.c[2] += vec4F_dot(&c, &n); + axis.c[3] += vec4F_dot(&d, &n); + } + + vec4F_normalize_in_place(&axis); + + if (vec4F_dot(&axis, &axis) < .5f) + vec4F_set_scalar(&axis, .5f); + + float l = 1e+9f, h = -1e+9f; + + for (uint32_t i = 0; i < 16; i++) + { + vec4F color = vec4F_from_color(&pixels[i]); + + vec4F q = vec4F_sub(&color, &meanColorScaled); + float d = vec4F_dot(&q, &axis); + + l = basisu::minimum(l, d); + h = basisu::maximum(h, d); + } + + l *= (1.0f / 255.0f); + h *= (1.0f / 255.0f); + + vec4F b0 = vec4F_mul(&axis, l); + vec4F b1 = vec4F_mul(&axis, h); + vec4F c0 = vec4F_add(&meanColor, &b0); + vec4F c1 = vec4F_add(&meanColor, &b1); + minColor = vec4F_saturate(&c0); + maxColor = vec4F_saturate(&c1); + if (minColor.c[3] > maxColor.c[3]) + { + // VS 2019 release Code Generator issue + //std::swap(minColor, maxColor); + + float a = minColor.c[0], b = minColor.c[1], c = minColor.c[2], d = minColor.c[3]; + minColor.c[0] = maxColor.c[0]; minColor.c[1] = maxColor.c[1]; minColor.c[2] = maxColor.c[2]; minColor.c[3] = maxColor.c[3]; + minColor.c[0] = maxColor.c[0]; minColor.c[1] = maxColor.c[1]; minColor.c[2] = maxColor.c[2]; minColor.c[3] = maxColor.c[3]; + maxColor.c[0] = a; maxColor.c[1] = b; maxColor.c[2] = c; maxColor.c[3] = d; + } + } + else + { + // We know the RGB axis is luma, because it's an ETC1S block and none of the block colors got clamped. So we only need to use 2D PCA. + // We project each LA vector onto two 2D lines with axes (1,1) and (1,-1) and find the largest projection to determine if axis A is flipped relative to L. + uint32_t block_cols_l[4], block_cols_a[4]; + for (uint32_t i = 0; i < 4; i++) + { + block_cols_l[i] = block_cols[i].r + block_cols[i].g + block_cols[i].b; + block_cols_a[i] = block_cols[i].a * 3; + } + + int p0_min = INT_MAX, p0_max = INT_MIN; + int p1_min = INT_MAX, p1_max = INT_MIN; + for (uint32_t y = 0; y < 4; y++) + { + const uint32_t cs = pSelector->m_selectors[y]; + const uint32_t as = alpha_selectors.m_selectors[y]; + + { + const int l = block_cols_l[cs & 3]; + const int a = block_cols_a[as & 3]; + const int p0 = l + a; p0_min = basisu::minimum(p0_min, p0); p0_max = basisu::maximum(p0_max, p0); + const int p1 = l - a; p1_min = basisu::minimum(p1_min, p1); p1_max = basisu::maximum(p1_max, p1); + } + { + const int l = block_cols_l[(cs >> 2) & 3]; + const int a = block_cols_a[(as >> 2) & 3]; + const int p0 = l + a; p0_min = basisu::minimum(p0_min, p0); p0_max = basisu::maximum(p0_max, p0); + const int p1 = l - a; p1_min = basisu::minimum(p1_min, p1); p1_max = basisu::maximum(p1_max, p1); + } + { + const int l = block_cols_l[(cs >> 4) & 3]; + const int a = block_cols_a[(as >> 4) & 3]; + const int p0 = l + a; p0_min = basisu::minimum(p0_min, p0); p0_max = basisu::maximum(p0_max, p0); + const int p1 = l - a; p1_min = basisu::minimum(p1_min, p1); p1_max = basisu::maximum(p1_max, p1); + } + { + const int l = block_cols_l[cs >> 6]; + const int a = block_cols_a[as >> 6]; + const int p0 = l + a; p0_min = basisu::minimum(p0_min, p0); p0_max = basisu::maximum(p0_max, p0); + const int p1 = l - a; p1_min = basisu::minimum(p1_min, p1); p1_max = basisu::maximum(p1_max, p1); + } + } + + int dist0 = p0_max - p0_min; + int dist1 = p1_max - p1_min; + + const float S = 1.0f / 255.0f; + + vec4F_set(&minColor, block_cols[low_selector].r * S, block_cols[low_selector].g * S, block_cols[low_selector].b * S, block_cols[alpha_selectors.m_lo_selector].a * S); + vec4F_set(&maxColor, block_cols[high_selector].r * S, block_cols[high_selector].g * S, block_cols[high_selector].b * S, block_cols[alpha_selectors.m_hi_selector].a * S); + + // See if the A component of the principle axis is flipped relative to L. If so, we need to flip either RGB or A bounds. + if (dist1 > dist0) + { + std::swap(minColor.c[0], maxColor.c[0]); + std::swap(minColor.c[1], maxColor.c[1]); + std::swap(minColor.c[2], maxColor.c[2]); + } + } + + // 4433 4443 + color32 trialMinColor, trialMaxColor; + + trialMinColor.set_clamped((int)(minColor.c[0] * 15.0f + .5f), (int)(minColor.c[1] * 15.0f + .5f), (int)(minColor.c[2] * 7.0f + .5f), (int)(minColor.c[3] * 7.0f + .5f)); + trialMaxColor.set_clamped((int)(maxColor.c[0] * 15.0f + .5f), (int)(maxColor.c[1] * 15.0f + .5f), (int)(maxColor.c[2] * 15.0f + .5f), (int)(maxColor.c[3] * 7.0f + .5f)); + + pBlock->set_trans_low_color(trialMinColor.r, trialMinColor.g, trialMinColor.b, trialMinColor.a); + pBlock->set_trans_high_color(trialMaxColor.r, trialMaxColor.g, trialMaxColor.b, trialMaxColor.a); + + color32 color_a((trialMinColor.r << 1) | (trialMinColor.r >> 3), (trialMinColor.g << 1) | (trialMinColor.g >> 3), (trialMinColor.b << 2) | (trialMinColor.b >> 1), trialMinColor.a << 1); + color32 color_b((trialMaxColor.r << 1) | (trialMaxColor.r >> 3), (trialMaxColor.g << 1) | (trialMaxColor.g >> 3), (trialMaxColor.b << 1) | (trialMaxColor.b >> 3), (trialMaxColor.a << 1) | 1); + + color32 color0(convert_rgba_5554_to_8888(color_a)); + color32 color3(convert_rgba_5554_to_8888(color_b)); + + const int lr = color0.r; + const int lg = color0.g; + const int lb = color0.b; + const int la = color0.a; + + const int axis_r = color3.r - lr; + const int axis_g = color3.g - lg; + const int axis_b = color3.b - lb; + const int axis_a = color3.a - la; + const int len_a = (axis_r * axis_r) + (axis_g * axis_g) + (axis_b * axis_b) + (axis_a * axis_a); + + const int thresh01 = (len_a * 3) / 16; + const int thresh12 = len_a >> 1; + const int thresh23 = (len_a * 13) / 16; + + if ((axis_r | axis_g | axis_b) == 0) + { + int ca_sel[4]; + + for (uint32_t i = 0; i < 4; i++) + { + int ca = (block_cols[i].a - la) * axis_a; + ca_sel[i] = (ca >= thresh23) + (ca >= thresh12) + (ca >= thresh01); + } + + for (uint32_t y = 0; y < 4; y++) + { + const uint32_t a_sels = alpha_selectors.m_selectors[y]; + + uint32_t sel = ca_sel[a_sels & 3] | (ca_sel[(a_sels >> 2) & 3] << 2) | (ca_sel[(a_sels >> 4) & 3] << 4) | (ca_sel[a_sels >> 6] << 6); + + pBlock->m_modulation[y] = (uint8_t)sel; + } + } + else + { + int cy[4], ca[4]; + + for (uint32_t i = 0; i < 4; i++) + { + cy[i] = (block_cols[i].r - lr) * axis_r + (block_cols[i].g - lg) * axis_g + (block_cols[i].b - lb) * axis_b; + ca[i] = (block_cols[i].a - la) * axis_a; + } + + for (uint32_t y = 0; y < 4; y++) + { + const uint32_t c_sels = pSelector->m_selectors[y]; + const uint32_t a_sels = alpha_selectors.m_selectors[y]; + + const int d0 = cy[c_sels & 3] + ca[a_sels & 3]; + const int d1 = cy[(c_sels >> 2) & 3] + ca[(a_sels >> 2) & 3]; + const int d2 = cy[(c_sels >> 4) & 3] + ca[(a_sels >> 4) & 3]; + const int d3 = cy[c_sels >> 6] + ca[a_sels >> 6]; + + uint32_t sel = ((d0 >= thresh23) + (d0 >= thresh12) + (d0 >= thresh01)) | + (((d1 >= thresh23) + (d1 >= thresh12) + (d1 >= thresh01)) << 2) | + (((d2 >= thresh23) + (d2 >= thresh12) + (d2 >= thresh01)) << 4) | + (((d3 >= thresh23) + (d3 >= thresh12) + (d3 >= thresh01)) << 6); + + pBlock->m_modulation[y] = (uint8_t)sel; + } + } + } + + static void transcoder_init_pvrtc2() + { + for (uint32_t v = 0; v < 256; v++) + { + int best_l = 0, best_h = 0, lowest_err = INT_MAX; + + for (uint32_t l = 0; l < 8; l++) + { + uint32_t le = (l << 1); + le = (le << 4) | le; + + for (uint32_t h = 0; h < 8; h++) + { + uint32_t he = (h << 1) | 1; + he = (he << 4) | he; + + uint32_t m = (le * 5 + he * 3) / 8; + + int err = (int)labs((int)v - (int)m); + if (err < lowest_err) + { + lowest_err = err; + best_l = l; + best_h = h; + } + } + } + + g_pvrtc2_alpha_match33[v].m_l = (uint8_t)best_l; + g_pvrtc2_alpha_match33[v].m_h = (uint8_t)best_h; + } + + for (uint32_t v = 0; v < 256; v++) + { + int best_l = 0, best_h = 0, lowest_err = INT_MAX; + + for (uint32_t l = 0; l < 8; l++) + { + uint32_t le = (l << 1); + le = (le << 4) | le; + + int err = (int)labs((int)v - (int)le); + if (err < lowest_err) + { + lowest_err = err; + best_l = l; + best_h = l; + } + } + + g_pvrtc2_alpha_match33_0[v].m_l = (uint8_t)best_l; + g_pvrtc2_alpha_match33_0[v].m_h = (uint8_t)best_h; + } + + for (uint32_t v = 0; v < 256; v++) + { + int best_l = 0, best_h = 0, lowest_err = INT_MAX; + + for (uint32_t h = 0; h < 8; h++) + { + uint32_t he = (h << 1) | 1; + he = (he << 4) | he; + + int err = (int)labs((int)v - (int)he); + if (err < lowest_err) + { + lowest_err = err; + best_l = h; + best_h = h; + } + } + + g_pvrtc2_alpha_match33_3[v].m_l = (uint8_t)best_l; + g_pvrtc2_alpha_match33_3[v].m_h = (uint8_t)best_h; + } + + for (uint32_t v = 0; v < 256; v++) + { + int best_l = 0, best_h = 0, lowest_err = INT_MAX; + + for (uint32_t l = 0; l < 8; l++) + { + uint32_t le = (l << 2) | (l >> 1); + le = (le << 3) | (le >> 2); + + for (uint32_t h = 0; h < 16; h++) + { + uint32_t he = (h << 1) | (h >> 3); + he = (he << 3) | (he >> 2); + + uint32_t m = (le * 5 + he * 3) / 8; + + int err = (int)labs((int)v - (int)m); + if (err < lowest_err) + { + lowest_err = err; + best_l = l; + best_h = h; + } + } + } + + g_pvrtc2_trans_match34[v].m_l = (uint8_t)best_l; + g_pvrtc2_trans_match34[v].m_h = (uint8_t)best_h; + } + + for (uint32_t v = 0; v < 256; v++) + { + int best_l = 0, best_h = 0, lowest_err = INT_MAX; + + for (uint32_t l = 0; l < 16; l++) + { + uint32_t le = (l << 1) | (l >> 3); + le = (le << 3) | (le >> 2); + + for (uint32_t h = 0; h < 16; h++) + { + uint32_t he = (h << 1) | (h >> 3); + he = (he << 3) | (he >> 2); + + uint32_t m = (le * 5 + he * 3) / 8; + + int err = (int)labs((int)v - (int)m); + if (err < lowest_err) + { + lowest_err = err; + best_l = l; + best_h = h; + } + } + } + + g_pvrtc2_trans_match44[v].m_l = (uint8_t)best_l; + g_pvrtc2_trans_match44[v].m_h = (uint8_t)best_h; + } + } +#endif // BASISD_SUPPORT_PVRTC2 + + //------------------------------------------------------------------------------------------------ + + // BC7 mode 5 RGB encoder + +#if BASISD_SUPPORT_BC7_MODE5 + namespace bc7_mode_5_encoder + { + static float g_mode5_rgba_midpoints[128]; + + void encode_bc7_mode5_init() + { + // Mode 5 endpoint midpoints + for (uint32_t i = 0; i < 128; i++) + { + uint32_t vl = (i << 1); + vl |= (vl >> 7); + float lo = vl / 255.0f; + + uint32_t vh = basisu::minimumi(127, i + 1) << 1; + vh |= (vh >> 7); + float hi = vh / 255.0f; + + if (i == 127) + g_mode5_rgba_midpoints[i] = 1e+15f; + else + g_mode5_rgba_midpoints[i] = (lo + hi) / 2.0f; + } + } + + static inline uint32_t from_7(uint32_t v) + { + assert(v < 128); + return (v << 1) | (v >> 6); + } + + static inline int to_7(float c) + { + assert((c >= 0) && (c <= 1.0f)); + + int vl = (int)(c * 127.0f); + vl += (c > g_mode5_rgba_midpoints[vl]); + return clampi(vl, 0, 127); + } + + static inline int to_7(int c8) + { + assert((c8 >= 0) && (c8 <= 255)); + + float c = (float)c8 * (1.0f / 255.0f); + + int vl = (int)(c * 127.0f); + vl += (c > g_mode5_rgba_midpoints[vl]); + return clampi(vl, 0, 127); + } + + // This is usable with ASTC as well, which uses the same 2-bit interpolation weights. + static inline uint32_t bc7_interp2(uint32_t l, uint32_t h, uint32_t w) + { + assert(w < 4); + return (l * (64 - basist::g_bc7_weights2[w]) + h * basist::g_bc7_weights2[w] + 32) >> 6; + } + + static void eval_weights( + const color32 *pPixels, uint8_t* pWeights, + int lr, int lg, int lb, + int hr, int hg, int hb) + { + lr = from_7(lr); lg = from_7(lg); lb = from_7(lb); + hr = from_7(hr); hg = from_7(hg); hb = from_7(hb); + + int cr[4], cg[4], cb[4]; + for (uint32_t i = 0; i < 4; i++) + { + cr[i] = (uint8_t)bc7_interp2(lr, hr, i); + cg[i] = (uint8_t)bc7_interp2(lg, hg, i); + cb[i] = (uint8_t)bc7_interp2(lb, hb, i); + } + +#if 0 + for (uint32_t i = 0; i < 16; i++) + { + const int pr = pPixels[i].r, pg = pPixels[i].g, pb = pPixels[i].b; + + uint32_t best_err = UINT32_MAX; + uint32_t best_idx = 0; + for (uint32_t j = 0; j < 4; j++) + { + uint32_t e = square(pr - cr[j]) + square(pg - cg[j]) + square(pb - cb[j]); + if (e < best_err) + { + best_err = e; + best_idx = j; + } + + pWeights[i] = (uint8_t)best_idx; + } + } // i +#else + int ar = cr[3] - cr[0], ag = cg[3] - cg[0], ab = cb[3] - cb[0]; + + int dots[4]; + for (uint32_t i = 0; i < 4; i++) + dots[i] = (int)cr[i] * ar + (int)cg[i] * ag + (int)cb[i] * ab; + + // seems very rare in LDR, so rare that it doesn't matter + //assert(dots[0] <= dots[1]); + //assert(dots[1] <= dots[2]); + //assert(dots[2] <= dots[3]); + + int t0 = dots[0] + dots[1], t1 = dots[1] + dots[2], t2 = dots[2] + dots[3]; + + ar *= 2; ag *= 2; ab *= 2; + + for (uint32_t i = 0; i < 16; i += 4) + { + const int d0 = pPixels[i + 0].r * ar + pPixels[i + 0].g * ag + pPixels[i + 0].b * ab; + const int d1 = pPixels[i + 1].r * ar + pPixels[i + 1].g * ag + pPixels[i + 1].b * ab; + const int d2 = pPixels[i + 2].r * ar + pPixels[i + 2].g * ag + pPixels[i + 2].b * ab; + const int d3 = pPixels[i + 3].r * ar + pPixels[i + 3].g * ag + pPixels[i + 3].b * ab; + + pWeights[i + 0] = (d0 > t0) + (d0 >= t1) + (d0 >= t2); + pWeights[i + 1] = (d1 > t0) + (d1 >= t1) + (d1 >= t2); + pWeights[i + 2] = (d2 > t0) + (d2 >= t1) + (d2 >= t2); + pWeights[i + 3] = (d3 > t0) + (d3 >= t1) + (d3 >= t2); + } +#endif + } + + static void pack_bc7_mode5_rgb_block( + bc7_mode_5* pDst_block, + int lr, int lg, int lb, int hr, int hg, int hb, + const uint8_t* pWeights) + { + assert((lr >= 0) && (lr <= 127)); + assert((lg >= 0) && (lg <= 127)); + assert((lb >= 0) && (lb <= 127)); + assert((hr >= 0) && (hr <= 127)); + assert((hg >= 0) && (hg <= 127)); + assert((hb >= 0) && (hb <= 127)); + + pDst_block->m_lo_bits = 0; + + uint8_t weight_inv = 0; + if (pWeights[0] & 2) + { + std::swap(lr, hr); + std::swap(lg, hg); + std::swap(lb, hb); + weight_inv = 3; + } + assert((pWeights[0] ^ weight_inv) <= 1); + + pDst_block->m_lo.m_mode = 32; + pDst_block->m_lo.m_r0 = lr; + pDst_block->m_lo.m_r1 = hr; + pDst_block->m_lo.m_g0 = lg; + pDst_block->m_lo.m_g1 = hg; + pDst_block->m_lo.m_b0 = lb; + pDst_block->m_lo.m_b1 = hb; + + pDst_block->m_lo.m_a0 = 255; + pDst_block->m_lo.m_a1_0 = 63; + + uint64_t sel_bits = 3; + uint32_t cur_ofs = 2; + for (uint32_t i = 0; i < 16; i++) + { + assert(pWeights[i] <= 3); + sel_bits |= ((uint64_t)(weight_inv ^ pWeights[i])) << cur_ofs; + cur_ofs += (i ? 2 : 1); + } + + pDst_block->m_hi_bits = sel_bits; + } + + // This table is: 9 * (w * w), 9 * ((1.0f - w) * w), 9 * ((1.0f - w) * (1.0f - w)) + // where w is [0,1/3,2/3,1]. 9 is the perfect multiplier. + static const uint32_t g_weight_vals4[4] = { 0x000009, 0x010204, 0x040201, 0x090000 }; + + static inline bool compute_least_squares_endpoints4_rgb( + const color32 *pColors, const uint8_t* pSelectors, + int& lr, int& lg, int& lb, int& hr, int& hg, int& hb, + int total_r, int total_g, int total_b) + { + uint32_t uq00_r = 0, uq00_g = 0, uq00_b = 0; + uint32_t weight_accum = 0; + for (uint32_t i = 0; i < 16; i++) + { + const uint8_t r = pColors[i].r, g = pColors[i].g, b = pColors[i].b; + const uint8_t sel = pSelectors[i]; + + weight_accum += g_weight_vals4[sel]; + uq00_r += sel * r; + uq00_g += sel * g; + uq00_b += sel * b; + } + + int q10_r = total_r * 3 - uq00_r; + int q10_g = total_g * 3 - uq00_g; + int q10_b = total_b * 3 - uq00_b; + + float z00 = (float)((weight_accum >> 16) & 0xFF); + float z10 = (float)((weight_accum >> 8) & 0xFF); + float z11 = (float)(weight_accum & 0xFF); + float z01 = z10; + + float det = z00 * z11 - z01 * z10; + if (fabs(det) < 1e-8f) + return false; + + det = (3.0f / 255.0f) / det; + + float iz00, iz01, iz10, iz11; + iz00 = z11 * det; + iz01 = -z01 * det; + iz10 = -z10 * det; + iz11 = z00 * det; + + float fhr = basisu::clamp(iz00 * (float)uq00_r + iz01 * q10_r, 0.0f, 1.0f); + float flr = basisu::clamp(iz10 * (float)uq00_r + iz11 * q10_r, 0.0f, 1.0f); + + float fhg = basisu::clamp(iz00 * (float)uq00_g + iz01 * q10_g, 0.0f, 1.0f); + float flg = basisu::clamp(iz10 * (float)uq00_g + iz11 * q10_g, 0.0f, 1.0f); + + float fhb = basisu::clamp(iz00 * (float)uq00_b + iz01 * q10_b, 0.0f, 1.0f); + float flb = basisu::clamp(iz10 * (float)uq00_b + iz11 * q10_b, 0.0f, 1.0f); + + lr = to_7(flr); lg = to_7(flg); lb = to_7(flb); + hr = to_7(fhr); hg = to_7(fhg); hb = to_7(fhb); + + return true; + } + + void encode_bc7_mode_5_block(void* pDst_block, color32* pPixels, bool hq_mode) + { + assert(g_mode5_rgba_midpoints[1]); + + int total_r = 0, total_g = 0, total_b = 0; + + int min_r = 255, min_g = 255, min_b = 255; + int max_r = 0, max_g = 0, max_b = 0; + + for (uint32_t i = 0; i < 16; i++) + { + int r = pPixels[i].r, g = pPixels[i].g, b = pPixels[i].b; + + total_r += r; total_g += g; total_b += b; + + min_r = basisu::minimum(min_r, r); min_g = basisu::minimum(min_g, g); min_b = basisu::minimum(min_b, b); + max_r = basisu::maximum(max_r, r); max_g = basisu::maximum(max_g, g); max_b = basisu::maximum(max_b, b); + } + + if ((min_r == max_r) && (min_g == max_g) && (min_b == max_b)) + { + const int lr = g_bc7_m5_equals_1[min_r].m_lo, lg = g_bc7_m5_equals_1[min_g].m_lo, lb = g_bc7_m5_equals_1[min_b].m_lo; + const int hr = g_bc7_m5_equals_1[min_r].m_hi, hg = g_bc7_m5_equals_1[min_g].m_hi, hb = g_bc7_m5_equals_1[min_b].m_hi; + uint8_t solid_weights[16]; + memset(solid_weights, 1, 16); + pack_bc7_mode5_rgb_block((bc7_mode_5*)pDst_block, lr, lg, lb, hr, hg, hb, solid_weights); + return; + } + + int mean_r = (total_r + 8) >> 4, mean_g = (total_g + 8) >> 4, mean_b = (total_b + 8) >> 4; + + // covar rows are: + // 0, 1, 2 + // 1, 3, 4 + // 2, 4, 5 + int icov[6] = { 0, 0, 0, 0, 0, 0 }; + + for (uint32_t i = 0; i < 16; i++) + { + int r = (int)pPixels[i].r - mean_r; + int g = (int)pPixels[i].g - mean_g; + int b = (int)pPixels[i].b - mean_b; + icov[0] += r * r; icov[1] += r * g; icov[2] += r * b; + icov[3] += g * g; icov[4] += g * b; + icov[5] += b * b; + } + + int block_max_var = basisu::maximum(icov[0], icov[3], icov[5]); // not divided by 16, i.e. scaled by 16 + + // TODO: Tune this + const int32_t SIMPLE_BLOCK_THRESH = 10 * 16; + + if ((!hq_mode) && (block_max_var < SIMPLE_BLOCK_THRESH)) + { + const int L = 16, H = 239; + + int lr = to_7(lerp_8bit(min_r, max_r, L)); + int lg = to_7(lerp_8bit(min_g, max_g, L)); + int lb = to_7(lerp_8bit(min_b, max_b, L)); + + int hr = to_7(lerp_8bit(min_r, max_r, H)); + int hg = to_7(lerp_8bit(min_g, max_g, H)); + int hb = to_7(lerp_8bit(min_b, max_b, H)); + + uint8_t cur_weights[16]; + eval_weights(pPixels, cur_weights, lr, lg, lb, hr, hg, hb); + + pack_bc7_mode5_rgb_block((bc7_mode_5*)pDst_block, lr, lg, lb, hr, hg, hb, cur_weights); + return; + } + + float cov[6]; + for (uint32_t i = 0; i < 6; i++) + cov[i] = (float)icov[i]; + + const float sc = 1.0f / (float)block_max_var; + const float wx = sc * cov[0], wy = sc * cov[3], wz = sc * cov[5]; + + const float alt_xr = cov[0] * wx + cov[1] * wy + cov[2] * wz; + const float alt_xg = cov[1] * wx + cov[3] * wy + cov[4] * wz; + const float alt_xb = cov[2] * wx + cov[4] * wy + cov[5] * wz; + + int saxis_r = 306, saxis_g = 601, saxis_b = 117; + + float k = basisu::maximum(fabsf(alt_xr), fabsf(alt_xg), fabsf(alt_xb)); + if (fabs(k) >= basisu::SMALL_FLOAT_VAL) + { + float m = 2048.0f / k; + saxis_r = (int)(alt_xr * m); + saxis_g = (int)(alt_xg * m); + saxis_b = (int)(alt_xb * m); + } + + saxis_r = (int)((uint32_t)saxis_r << 4U); + saxis_g = (int)((uint32_t)saxis_g << 4U); + saxis_b = (int)((uint32_t)saxis_b << 4U); + + int low_dot = INT_MAX, high_dot = INT_MIN; + + for (uint32_t i = 0; i < 16; i += 4) + { + int dot0 = ((pPixels[i].r * saxis_r + pPixels[i].g * saxis_g + pPixels[i].b * saxis_b) & ~0xF) + i; + int dot1 = ((pPixels[i + 1].r * saxis_r + pPixels[i + 1].g * saxis_g + pPixels[i + 1].b * saxis_b) & ~0xF) + i + 1; + int dot2 = ((pPixels[i + 2].r * saxis_r + pPixels[i + 2].g * saxis_g + pPixels[i + 2].b * saxis_b) & ~0xF) + i + 2; + int dot3 = ((pPixels[i + 3].r * saxis_r + pPixels[i + 3].g * saxis_g + pPixels[i + 3].b * saxis_b) & ~0xF) + i + 3; + + int min_d01 = basisu::minimum(dot0, dot1); + int max_d01 = basisu::maximum(dot0, dot1); + + int min_d23 = basisu::minimum(dot2, dot3); + int max_d23 = basisu::maximum(dot2, dot3); + + int min_d = basisu::minimum(min_d01, min_d23); + int max_d = basisu::maximum(max_d01, max_d23); + + low_dot = basisu::minimum(low_dot, min_d); + high_dot = basisu::maximum(high_dot, max_d); + } + int low_c = low_dot & 15; + int high_c = high_dot & 15; + + int lr = to_7(pPixels[low_c].r), lg = to_7(pPixels[low_c].g), lb = to_7(pPixels[low_c].b); + int hr = to_7(pPixels[high_c].r), hg = to_7(pPixels[high_c].g), hb = to_7(pPixels[high_c].b); + + uint8_t cur_weights[16]; + eval_weights(pPixels, cur_weights, lr, lg, lb, hr, hg, hb); + + if (compute_least_squares_endpoints4_rgb( + pPixels, cur_weights, + lr, lg, lb, hr, hg, hb, + total_r, total_g, total_b)) + { + eval_weights(pPixels, cur_weights, lr, lg, lb, hr, hg, hb); + } + +#if 0 + lr = 0; lg = 0; lb = 0; + hr = 0; hg = 0; hb = 0; +#endif + + pack_bc7_mode5_rgb_block((bc7_mode_5*)pDst_block, lr, lg, lb, hr, hg, hb, cur_weights); + } + + } // namespace bc7_mode_5_encoder + +#endif // BASISD_SUPPORT_BC7_MODE5 + + //------------------------------------------------------------------------------------------------ + + basisu_lowlevel_etc1s_transcoder::basisu_lowlevel_etc1s_transcoder() : + m_pGlobal_codebook(nullptr), + m_selector_history_buf_size(0) + { + } + + bool basisu_lowlevel_etc1s_transcoder::decode_palettes( + uint32_t num_endpoints, const uint8_t* pEndpoints_data, uint32_t endpoints_data_size, + uint32_t num_selectors, const uint8_t* pSelectors_data, uint32_t selectors_data_size) + { + if (m_pGlobal_codebook) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 11\n"); + return false; + } + bitwise_decoder sym_codec; + + huffman_decoding_table color5_delta_model0, color5_delta_model1, color5_delta_model2, inten_delta_model; + + if (!sym_codec.init(pEndpoints_data, endpoints_data_size)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 0\n"); + return false; + } + + if (!sym_codec.read_huffman_table(color5_delta_model0)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 1\n"); + return false; + } + + if (!sym_codec.read_huffman_table(color5_delta_model1)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 1a\n"); + return false; + } + + if (!sym_codec.read_huffman_table(color5_delta_model2)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 2a\n"); + return false; + } + + if (!sym_codec.read_huffman_table(inten_delta_model)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 2b\n"); + return false; + } + + if (!color5_delta_model0.is_valid() || !color5_delta_model1.is_valid() || !color5_delta_model2.is_valid() || !inten_delta_model.is_valid()) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 2b\n"); + return false; + } + + const bool endpoints_are_grayscale = sym_codec.get_bits(1) != 0; + + m_local_endpoints.resize(num_endpoints); + + color32 prev_color5(16, 16, 16, 0); + uint32_t prev_inten = 0; + + for (uint32_t i = 0; i < num_endpoints; i++) + { + uint32_t inten_delta = sym_codec.decode_huffman(inten_delta_model); + m_local_endpoints[i].m_inten5 = static_cast((inten_delta + prev_inten) & 7); + prev_inten = m_local_endpoints[i].m_inten5; + + for (uint32_t c = 0; c < (endpoints_are_grayscale ? 1U : 3U); c++) + { + int delta; + if (prev_color5[c] <= basist::COLOR5_PAL0_PREV_HI) + delta = sym_codec.decode_huffman(color5_delta_model0); + else if (prev_color5[c] <= basist::COLOR5_PAL1_PREV_HI) + delta = sym_codec.decode_huffman(color5_delta_model1); + else + delta = sym_codec.decode_huffman(color5_delta_model2); + + int v = (prev_color5[c] + delta) & 31; + + m_local_endpoints[i].m_color5[c] = static_cast(v); + + prev_color5[c] = static_cast(v); + } + + if (endpoints_are_grayscale) + { + m_local_endpoints[i].m_color5[1] = m_local_endpoints[i].m_color5[0]; + m_local_endpoints[i].m_color5[2] = m_local_endpoints[i].m_color5[0]; + } + } + + sym_codec.stop(); + + m_local_selectors.resize(num_selectors); + + if (!sym_codec.init(pSelectors_data, selectors_data_size)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 5\n"); + return false; + } + + basist::huffman_decoding_table delta_selector_pal_model; + + const bool used_global_selector_cb = (sym_codec.get_bits(1) == 1); + + if (used_global_selector_cb) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: global selector codebooks are unsupported\n"); + return false; + } + else + { + const bool used_hybrid_selector_cb = (sym_codec.get_bits(1) == 1); + + if (used_hybrid_selector_cb) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: hybrid global selector codebooks are unsupported\n"); + return false; + } + + const bool used_raw_encoding = (sym_codec.get_bits(1) == 1); + + if (used_raw_encoding) + { + for (uint32_t i = 0; i < num_selectors; i++) + { + for (uint32_t j = 0; j < 4; j++) + { + uint32_t cur_byte = sym_codec.get_bits(8); + + for (uint32_t k = 0; k < 4; k++) + m_local_selectors[i].set_selector(k, j, (cur_byte >> (k * 2)) & 3); + } + + m_local_selectors[i].init_flags(); + } + } + else + { + if (!sym_codec.read_huffman_table(delta_selector_pal_model)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 10\n"); + return false; + } + + if ((num_selectors > 1) && (!delta_selector_pal_model.is_valid())) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 10a\n"); + return false; + } + + uint8_t prev_bytes[4] = { 0, 0, 0, 0 }; + + for (uint32_t i = 0; i < num_selectors; i++) + { + if (!i) + { + for (uint32_t j = 0; j < 4; j++) + { + uint32_t cur_byte = sym_codec.get_bits(8); + prev_bytes[j] = static_cast(cur_byte); + + for (uint32_t k = 0; k < 4; k++) + m_local_selectors[i].set_selector(k, j, (cur_byte >> (k * 2)) & 3); + } + m_local_selectors[i].init_flags(); + continue; + } + + for (uint32_t j = 0; j < 4; j++) + { + int delta_byte = sym_codec.decode_huffman(delta_selector_pal_model); + + uint32_t cur_byte = delta_byte ^ prev_bytes[j]; + prev_bytes[j] = static_cast(cur_byte); + + for (uint32_t k = 0; k < 4; k++) + m_local_selectors[i].set_selector(k, j, (cur_byte >> (k * 2)) & 3); + } + m_local_selectors[i].init_flags(); + } + } + } + + sym_codec.stop(); + + return true; + } + + bool basisu_lowlevel_etc1s_transcoder::decode_tables(const uint8_t* pTable_data, uint32_t table_data_size) + { + basist::bitwise_decoder sym_codec; + if (!sym_codec.init(pTable_data, table_data_size)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 0\n"); + return false; + } + + if (!sym_codec.read_huffman_table(m_endpoint_pred_model)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 1\n"); + return false; + } + + if (m_endpoint_pred_model.get_code_sizes().size() == 0) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 1a\n"); + return false; + } + + if (!sym_codec.read_huffman_table(m_delta_endpoint_model)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 2\n"); + return false; + } + + if (m_delta_endpoint_model.get_code_sizes().size() == 0) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 2a\n"); + return false; + } + + if (!sym_codec.read_huffman_table(m_selector_model)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 3\n"); + return false; + } + + if (m_selector_model.get_code_sizes().size() == 0) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 3a\n"); + return false; + } + + if (!sym_codec.read_huffman_table(m_selector_history_buf_rle_model)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 4\n"); + return false; + } + + if (m_selector_history_buf_rle_model.get_code_sizes().size() == 0) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 4a\n"); + return false; + } + + m_selector_history_buf_size = sym_codec.get_bits(13); + // Check for bogus values. + if (!m_selector_history_buf_size) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 5\n"); + return false; + } + + sym_codec.stop(); + + return true; + } + + bool basisu_lowlevel_etc1s_transcoder::transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const bool is_video, const bool is_alpha_slice, const uint32_t level_index, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels, + basisu_transcoder_state* pState, bool transcode_alpha, void *pAlpha_blocks, uint32_t output_rows_in_pixels, uint32_t decode_flags) + { + // 'pDst_blocks' unused when disabling *all* hardware transcode options + // (and 'bc1_allow_threecolor_blocks' when disabling DXT) + BASISU_NOTE_UNUSED(pDst_blocks); + BASISU_NOTE_UNUSED(bc1_allow_threecolor_blocks); + BASISU_NOTE_UNUSED(transcode_alpha); + BASISU_NOTE_UNUSED(pAlpha_blocks); + + assert(g_transcoder_initialized); + if (!g_transcoder_initialized) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: Transcoder not globally initialized.\n"); + return false; + } + + if (!pState) + pState = &m_def_state; + + const uint32_t total_blocks = num_blocks_x * num_blocks_y; + + if (!output_row_pitch_in_blocks_or_pixels) + { + if (basis_block_format_is_uncompressed(fmt)) + output_row_pitch_in_blocks_or_pixels = orig_width; + else + { + if (fmt == block_format::cFXT1_RGB) + output_row_pitch_in_blocks_or_pixels = (orig_width + 7) / 8; + else + output_row_pitch_in_blocks_or_pixels = num_blocks_x; + } + } + + if (basis_block_format_is_uncompressed(fmt)) + { + if (!output_rows_in_pixels) + output_rows_in_pixels = orig_height; + } + + basisu::vector* pPrev_frame_indices = nullptr; + if (is_video) + { + // TODO: Add check to make sure the caller hasn't tried skipping past p-frames + //const bool alpha_flag = (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0; + //const uint32_t level_index = slice_desc.m_level_index; + + if (level_index >= basisu_transcoder_state::cMaxPrevFrameLevels) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: unsupported level_index\n"); + return false; + } + + pPrev_frame_indices = &pState->m_prev_frame_indices[is_alpha_slice][level_index]; + if (pPrev_frame_indices->size() < total_blocks) + pPrev_frame_indices->resize(total_blocks); + } + + basist::bitwise_decoder sym_codec; + + if (!sym_codec.init(pImage_data, image_data_size)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: sym_codec.init failed\n"); + return false; + } + + approx_move_to_front selector_history_buf(m_selector_history_buf_size); + + uint32_t cur_selector_rle_count = 0; + + decoder_etc_block block; + memset(&block, 0, sizeof(block)); + + //block.set_flip_bit(true); + // Setting the flip bit to false to be compatible with the Khronos KDFS. + block.set_flip_bit(false); + + block.set_diff_bit(true); + + // Important: This MUST be freed before this function returns. + void* pPVRTC_work_mem = nullptr; + uint32_t* pPVRTC_endpoints = nullptr; + if ((fmt == block_format::cPVRTC1_4_RGB) || (fmt == block_format::cPVRTC1_4_RGBA)) + { + pPVRTC_work_mem = malloc(num_blocks_x * num_blocks_y * (sizeof(decoder_etc_block) + sizeof(uint32_t))); + if (!pPVRTC_work_mem) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: malloc failed\n"); + return false; + } + pPVRTC_endpoints = (uint32_t*)&((decoder_etc_block*)pPVRTC_work_mem)[num_blocks_x * num_blocks_y]; + } + + if (pState->m_block_endpoint_preds[0].size() < num_blocks_x) + { + pState->m_block_endpoint_preds[0].resize(num_blocks_x); + pState->m_block_endpoint_preds[1].resize(num_blocks_x); + } + + uint32_t cur_pred_bits = 0; + int prev_endpoint_pred_sym = 0; + int endpoint_pred_repeat_count = 0; + uint32_t prev_endpoint_index = 0; + const endpoint_vec& endpoints = m_pGlobal_codebook ? m_pGlobal_codebook->m_local_endpoints : m_local_endpoints; + const selector_vec& selectors = m_pGlobal_codebook ? m_pGlobal_codebook->m_local_selectors : m_local_selectors; + if (!endpoints.size() || !selectors.size()) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: global codebooks must be unpacked first\n"); + + if (pPVRTC_work_mem) + free(pPVRTC_work_mem); + + return false; + } + + const uint32_t SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX = (uint32_t)selectors.size(); + const uint32_t SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX = m_selector_history_buf_size + SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX; + +#if BASISD_SUPPORT_BC7_MODE5 + const bool bc7_chroma_filtering = ((decode_flags & cDecodeFlagsNoETC1SChromaFiltering) == 0) && + ((fmt == block_format::cBC7_M5_COLOR) || (fmt == block_format::cBC7)); + + basisu::vector2D decoded_endpoints; + if (bc7_chroma_filtering) + { + if (!decoded_endpoints.try_resize(num_blocks_x, num_blocks_y)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: allocation failed\n"); + + if (pPVRTC_work_mem) + free(pPVRTC_work_mem); + + return false; + } + } +#endif + + for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) + { + const uint32_t cur_block_endpoint_pred_array = block_y & 1; + + for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) + { + // Decode endpoint index predictor symbols + if ((block_x & 1) == 0) + { + if ((block_y & 1) == 0) + { + if (endpoint_pred_repeat_count) + { + endpoint_pred_repeat_count--; + cur_pred_bits = prev_endpoint_pred_sym; + } + else + { + cur_pred_bits = sym_codec.decode_huffman(m_endpoint_pred_model); + if (cur_pred_bits == ENDPOINT_PRED_REPEAT_LAST_SYMBOL) + { + endpoint_pred_repeat_count = sym_codec.decode_vlc(ENDPOINT_PRED_COUNT_VLC_BITS) + ENDPOINT_PRED_MIN_REPEAT_COUNT - 1; + + cur_pred_bits = prev_endpoint_pred_sym; + } + else + { + prev_endpoint_pred_sym = cur_pred_bits; + } + } + + pState->m_block_endpoint_preds[cur_block_endpoint_pred_array ^ 1][block_x].m_pred_bits = (uint8_t)(cur_pred_bits >> 4); + } + else + { + cur_pred_bits = pState->m_block_endpoint_preds[cur_block_endpoint_pred_array][block_x].m_pred_bits; + } + } + + // Decode endpoint index + uint32_t endpoint_index, selector_index = 0; + + const uint32_t pred = cur_pred_bits & 3; + cur_pred_bits >>= 2; + + if (pred == 0) + { + // Left + if (!block_x) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (0)\n"); + if (pPVRTC_work_mem) + free(pPVRTC_work_mem); + return false; + } + + endpoint_index = prev_endpoint_index; + } + else if (pred == 1) + { + // Upper + if (!block_y) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (1)\n"); + if (pPVRTC_work_mem) + free(pPVRTC_work_mem); + return false; + } + + endpoint_index = pState->m_block_endpoint_preds[cur_block_endpoint_pred_array ^ 1][block_x].m_endpoint_index; + } + else if (pred == 2) + { + if (is_video) + { + assert(pred == CR_ENDPOINT_PRED_INDEX); + endpoint_index = (*pPrev_frame_indices)[block_x + block_y * num_blocks_x]; + selector_index = endpoint_index >> 16; + endpoint_index &= 0xFFFFU; + } + else + { + // Upper left + if ((!block_x) || (!block_y)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (2)\n"); + if (pPVRTC_work_mem) + free(pPVRTC_work_mem); + return false; + } + + endpoint_index = pState->m_block_endpoint_preds[cur_block_endpoint_pred_array ^ 1][block_x - 1].m_endpoint_index; + } + } + else + { + // Decode and apply delta + const uint32_t delta_sym = sym_codec.decode_huffman(m_delta_endpoint_model); + + endpoint_index = delta_sym + prev_endpoint_index; + if (endpoint_index >= endpoints.size()) + endpoint_index -= (int)endpoints.size(); + } + + pState->m_block_endpoint_preds[cur_block_endpoint_pred_array][block_x].m_endpoint_index = (uint16_t)endpoint_index; + + prev_endpoint_index = endpoint_index; + + // Decode selector index + if ((!is_video) || (pred != CR_ENDPOINT_PRED_INDEX)) + { + int selector_sym; + if (cur_selector_rle_count > 0) + { + cur_selector_rle_count--; + + selector_sym = (int)selectors.size(); + } + else + { + selector_sym = sym_codec.decode_huffman(m_selector_model); + + if (selector_sym == static_cast(SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX)) + { + int run_sym = sym_codec.decode_huffman(m_selector_history_buf_rle_model); + + if (run_sym == (SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL - 1)) + cur_selector_rle_count = sym_codec.decode_vlc(7) + SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH; + else + cur_selector_rle_count = run_sym + SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH; + + if (cur_selector_rle_count > total_blocks) + { + // The file is corrupted or we've got a bug. + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (3)\n"); + if (pPVRTC_work_mem) + free(pPVRTC_work_mem); + return false; + } + + selector_sym = (int)selectors.size(); + + cur_selector_rle_count--; + } + } + + if (selector_sym >= (int)selectors.size()) + { + assert(m_selector_history_buf_size > 0); + + int history_buf_index = selector_sym - (int)selectors.size(); + + if (history_buf_index >= (int)selector_history_buf.size()) + { + // The file is corrupted or we've got a bug. + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (4)\n"); + if (pPVRTC_work_mem) + free(pPVRTC_work_mem); + return false; + } + + selector_index = selector_history_buf[history_buf_index]; + + if (history_buf_index != 0) + selector_history_buf.use(history_buf_index); + } + else + { + selector_index = selector_sym; + + if (m_selector_history_buf_size) + selector_history_buf.add(selector_index); + } + } + + if ((endpoint_index >= endpoints.size()) || (selector_index >= selectors.size())) + { + // The file is corrupted or we've got a bug. + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (5)\n"); + if (pPVRTC_work_mem) + free(pPVRTC_work_mem); + return false; + } + + if (is_video) + (*pPrev_frame_indices)[block_x + block_y * num_blocks_x] = endpoint_index | (selector_index << 16); + +#if BASISD_ENABLE_DEBUG_FLAGS + if ((g_debug_flags & cDebugFlagVisCRs) && ((fmt == block_format::cETC1) || (fmt == block_format::cBC1))) + { + if ((is_video) && (pred == 2)) + { + decoder_etc_block* pDst_block = reinterpret_cast(static_cast(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes); + memset(pDst_block, 0xFF, 8); + continue; + } + } +#endif + + const endpoint* pEndpoints = &endpoints[endpoint_index]; + const selector* pSelector = &selectors[selector_index]; + + switch (fmt) + { + case block_format::cETC1: + { + decoder_etc_block* pDst_block = reinterpret_cast(static_cast(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes); + + block.set_base5_color(decoder_etc_block::pack_color5(pEndpoints->m_color5, false)); + block.set_inten_table(0, pEndpoints->m_inten5); + block.set_inten_table(1, pEndpoints->m_inten5); + + pDst_block->m_uint32[0] = block.m_uint32[0]; + pDst_block->set_raw_selector_bits(pSelector->m_bytes[0], pSelector->m_bytes[1], pSelector->m_bytes[2], pSelector->m_bytes[3]); + + break; + } + case block_format::cBC1: + { +#if BASISD_SUPPORT_DXT1 + void* pDst_block = static_cast(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes; +#if BASISD_ENABLE_DEBUG_FLAGS + if (g_debug_flags & (cDebugFlagVisBC1Sels | cDebugFlagVisBC1Endpoints)) + convert_etc1s_to_dxt1_vis(static_cast(pDst_block), pEndpoints, pSelector, bc1_allow_threecolor_blocks); + else +#endif + convert_etc1s_to_dxt1(static_cast(pDst_block), pEndpoints, pSelector, bc1_allow_threecolor_blocks); +#else + assert(0); +#endif + break; + } + case block_format::cBC4: + { +#if BASISD_SUPPORT_DXT5A + void* pDst_block = static_cast(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes; + convert_etc1s_to_dxt5a(static_cast(pDst_block), pEndpoints, pSelector); +#else + assert(0); +#endif + break; + } + case block_format::cPVRTC1_4_RGB: + { +#if BASISD_SUPPORT_PVRTC1 + block.set_base5_color(decoder_etc_block::pack_color5(pEndpoints->m_color5, false)); + block.set_inten_table(0, pEndpoints->m_inten5); + block.set_inten_table(1, pEndpoints->m_inten5); + block.set_raw_selector_bits(pSelector->m_bytes[0], pSelector->m_bytes[1], pSelector->m_bytes[2], pSelector->m_bytes[3]); + + ((decoder_etc_block*)pPVRTC_work_mem)[block_x + block_y * num_blocks_x] = block; + + const color32& base_color = pEndpoints->m_color5; + const uint32_t inten_table = pEndpoints->m_inten5; + + const uint32_t low_selector = pSelector->m_lo_selector; + const uint32_t high_selector = pSelector->m_hi_selector; + + // Get block's RGB bounding box + color32 block_colors[2]; + decoder_etc_block::get_block_colors5_bounds(block_colors, base_color, inten_table, low_selector, high_selector); + + assert(block_colors[0][0] <= block_colors[1][0]); + assert(block_colors[0][1] <= block_colors[1][1]); + assert(block_colors[0][2] <= block_colors[1][2]); + + // Set PVRTC1 endpoints to floor/ceil of bounding box's coordinates. + pvrtc4_block temp; + temp.set_opaque_endpoint_floor(0, block_colors[0]); + temp.set_opaque_endpoint_ceil(1, block_colors[1]); + + pPVRTC_endpoints[block_x + block_y * num_blocks_x] = temp.m_endpoints; +#else + assert(0); +#endif + + break; + } + case block_format::cPVRTC1_4_RGBA: + { +#if BASISD_SUPPORT_PVRTC1 + assert(pAlpha_blocks); + + block.set_base5_color(decoder_etc_block::pack_color5(pEndpoints->m_color5, false)); + block.set_inten_table(0, pEndpoints->m_inten5); + block.set_inten_table(1, pEndpoints->m_inten5); + block.set_raw_selector_bits(pSelector->m_selectors[0], pSelector->m_selectors[1], pSelector->m_selectors[2], pSelector->m_selectors[3]); + + ((decoder_etc_block*)pPVRTC_work_mem)[block_x + block_y * num_blocks_x] = block; + + // Get block's RGBA bounding box + const color32& base_color = pEndpoints->m_color5; + const uint32_t inten_table = pEndpoints->m_inten5; + const uint32_t low_selector = pSelector->m_lo_selector; + const uint32_t high_selector = pSelector->m_hi_selector; + color32 block_colors[2]; + decoder_etc_block::get_block_colors5_bounds(block_colors, base_color, inten_table, low_selector, high_selector); + + assert(block_colors[0][0] <= block_colors[1][0]); + assert(block_colors[0][1] <= block_colors[1][1]); + assert(block_colors[0][2] <= block_colors[1][2]); + + const uint16_t* pAlpha_block = reinterpret_cast(static_cast(pAlpha_blocks) + (block_x + block_y * num_blocks_x) * sizeof(uint32_t)); + + const endpoint* pAlpha_endpoints = &endpoints[pAlpha_block[0]]; + const selector* pAlpha_selector = &selectors[pAlpha_block[1]]; + + const color32& alpha_base_color = pAlpha_endpoints->m_color5; + const uint32_t alpha_inten_table = pAlpha_endpoints->m_inten5; + const uint32_t alpha_low_selector = pAlpha_selector->m_lo_selector; + const uint32_t alpha_high_selector = pAlpha_selector->m_hi_selector; + uint32_t alpha_block_colors[2]; + decoder_etc_block::get_block_colors5_bounds_g(alpha_block_colors, alpha_base_color, alpha_inten_table, alpha_low_selector, alpha_high_selector); + assert(alpha_block_colors[0] <= alpha_block_colors[1]); + block_colors[0].a = (uint8_t)alpha_block_colors[0]; + block_colors[1].a = (uint8_t)alpha_block_colors[1]; + + // Set PVRTC1 endpoints to floor/ceil of bounding box's coordinates. + pvrtc4_block temp; + temp.set_endpoint_floor(0, block_colors[0]); + temp.set_endpoint_ceil(1, block_colors[1]); + + pPVRTC_endpoints[block_x + block_y * num_blocks_x] = temp.m_endpoints; +#else + assert(0); +#endif + + break; + } + case block_format::cBC7: // for more consistency with UASTC + case block_format::cBC7_M5_COLOR: + { +#if BASISD_SUPPORT_BC7_MODE5 + if (bc7_chroma_filtering) + { + assert(endpoint_index <= UINT16_MAX); + decoded_endpoints(block_x, block_y) = (uint16_t)endpoint_index; + } + + void* pDst_block = static_cast(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes; + convert_etc1s_to_bc7_m5_color(pDst_block, pEndpoints, pSelector); +#else + assert(0); +#endif + break; + } + case block_format::cBC7_M5_ALPHA: + { +#if BASISD_SUPPORT_BC7_MODE5 + void* pDst_block = static_cast(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes; + convert_etc1s_to_bc7_m5_alpha(pDst_block, pEndpoints, pSelector); +#else + assert(0); +#endif + break; + } + case block_format::cETC2_EAC_A8: + { +#if BASISD_SUPPORT_ETC2_EAC_A8 + void* pDst_block = static_cast(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes; + convert_etc1s_to_etc2_eac_a8(static_cast(pDst_block), pEndpoints, pSelector); +#else + assert(0); +#endif + break; + } + case block_format::cASTC_4x4: + { +#if BASISD_SUPPORT_ASTC + void* pDst_block = static_cast(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes; + convert_etc1s_to_astc_4x4(pDst_block, pEndpoints, pSelector, transcode_alpha, &endpoints[0], &selectors[0]); +#else + assert(0); +#endif + break; + } + case block_format::cATC_RGB: + { +#if BASISD_SUPPORT_ATC + void* pDst_block = static_cast(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes; + convert_etc1s_to_atc(pDst_block, pEndpoints, pSelector); +#else + assert(0); +#endif + break; + } + case block_format::cFXT1_RGB: + { +#if BASISD_SUPPORT_FXT1 + const uint32_t fxt1_block_x = block_x >> 1; + const uint32_t fxt1_block_y = block_y; + const uint32_t fxt1_subblock = block_x & 1; + + void* pDst_block = static_cast(pDst_blocks) + (fxt1_block_x + fxt1_block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes; + + convert_etc1s_to_fxt1(pDst_block, pEndpoints, pSelector, fxt1_subblock); +#else + assert(0); +#endif + break; + } + case block_format::cPVRTC2_4_RGB: + { +#if BASISD_SUPPORT_PVRTC2 + void* pDst_block = static_cast(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes; + convert_etc1s_to_pvrtc2_rgb(pDst_block, pEndpoints, pSelector); +#endif + break; + } + case block_format::cPVRTC2_4_RGBA: + { +#if BASISD_SUPPORT_PVRTC2 + assert(transcode_alpha); + + void* pDst_block = static_cast(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes; + + convert_etc1s_to_pvrtc2_rgba(pDst_block, pEndpoints, pSelector, &endpoints[0], &selectors[0]); +#endif + break; + } + case block_format::cIndices: + { + uint16_t* pDst_block = reinterpret_cast(static_cast(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes); + pDst_block[0] = static_cast(endpoint_index); + pDst_block[1] = static_cast(selector_index); + break; + } + case block_format::cA32: + { + assert(sizeof(uint32_t) == output_block_or_pixel_stride_in_bytes); + uint8_t* pDst_pixels = static_cast(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t); + + const uint32_t max_x = basisu::minimum(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); + const uint32_t max_y = basisu::minimum(4, (int)output_rows_in_pixels - (int)block_y * 4); + + int colors[4]; + decoder_etc_block::get_block_colors5_g(colors, pEndpoints->m_color5, pEndpoints->m_inten5); + + if (max_x == 4) + { + for (uint32_t y = 0; y < max_y; y++) + { + const uint32_t s = pSelector->m_selectors[y]; + + pDst_pixels[3] = static_cast(colors[s & 3]); + pDst_pixels[3+4] = static_cast(colors[(s >> 2) & 3]); + pDst_pixels[3+8] = static_cast(colors[(s >> 4) & 3]); + pDst_pixels[3+12] = static_cast(colors[(s >> 6) & 3]); + + pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint32_t); + } + } + else + { + for (uint32_t y = 0; y < max_y; y++) + { + const uint32_t s = pSelector->m_selectors[y]; + + for (uint32_t x = 0; x < max_x; x++) + pDst_pixels[3 + 4 * x] = static_cast(colors[(s >> (x * 2)) & 3]); + + pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint32_t); + } + } + + break; + } + case block_format::cRGB32: + { + assert(sizeof(uint32_t) == output_block_or_pixel_stride_in_bytes); + uint8_t* pDst_pixels = static_cast(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t); + + const uint32_t max_x = basisu::minimum(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); + const uint32_t max_y = basisu::minimum(4, (int)output_rows_in_pixels - (int)block_y * 4); + + color32 colors[4]; + decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5); + + for (uint32_t y = 0; y < max_y; y++) + { + const uint32_t s = pSelector->m_selectors[y]; + + for (uint32_t x = 0; x < max_x; x++) + { + const color32& c = colors[(s >> (x * 2)) & 3]; + + pDst_pixels[0 + 4 * x] = c.r; + pDst_pixels[1 + 4 * x] = c.g; + pDst_pixels[2 + 4 * x] = c.b; + } + + pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint32_t); + } + + break; + } + case block_format::cRGBA32: + { + assert(sizeof(uint32_t) == output_block_or_pixel_stride_in_bytes); + uint8_t* pDst_pixels = static_cast(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t); + + const uint32_t max_x = basisu::minimum(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); + const uint32_t max_y = basisu::minimum(4, (int)output_rows_in_pixels - (int)block_y * 4); + + color32 colors[4]; + decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5); + + for (uint32_t y = 0; y < max_y; y++) + { + const uint32_t s = pSelector->m_selectors[y]; + + for (uint32_t x = 0; x < max_x; x++) + { + const color32& c = colors[(s >> (x * 2)) & 3]; + + pDst_pixels[0 + 4 * x] = c.r; + pDst_pixels[1 + 4 * x] = c.g; + pDst_pixels[2 + 4 * x] = c.b; + pDst_pixels[3 + 4 * x] = 255; + } + + pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint32_t); + } + + break; + } + case block_format::cRGB565: + case block_format::cBGR565: + { + assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes); + uint8_t* pDst_pixels = static_cast(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t); + + const uint32_t max_x = basisu::minimum(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); + const uint32_t max_y = basisu::minimum(4, (int)output_rows_in_pixels - (int)block_y * 4); + + color32 colors[4]; + decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5); + + uint16_t packed_colors[4]; + if (fmt == block_format::cRGB565) + { + for (uint32_t i = 0; i < 4; i++) + { + packed_colors[i] = static_cast((mul_8(colors[i].r, 31) << 11) | (mul_8(colors[i].g, 63) << 5) | mul_8(colors[i].b, 31)); + if (BASISD_IS_BIG_ENDIAN) + packed_colors[i] = byteswap_uint16(packed_colors[i]); + } + } + else + { + for (uint32_t i = 0; i < 4; i++) + { + packed_colors[i] = static_cast((mul_8(colors[i].b, 31) << 11) | (mul_8(colors[i].g, 63) << 5) | mul_8(colors[i].r, 31)); + if (BASISD_IS_BIG_ENDIAN) + packed_colors[i] = byteswap_uint16(packed_colors[i]); + } + } + + for (uint32_t y = 0; y < max_y; y++) + { + const uint32_t s = pSelector->m_selectors[y]; + + for (uint32_t x = 0; x < max_x; x++) + reinterpret_cast(pDst_pixels)[x] = packed_colors[(s >> (x * 2)) & 3]; + + pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t); + } + + break; + } + case block_format::cRGBA4444_COLOR: + { + assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes); + uint8_t* pDst_pixels = static_cast(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t); + + const uint32_t max_x = basisu::minimum(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); + const uint32_t max_y = basisu::minimum(4, (int)output_rows_in_pixels - (int)block_y * 4); + + color32 colors[4]; + decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5); + + uint16_t packed_colors[4]; + for (uint32_t i = 0; i < 4; i++) + { + packed_colors[i] = static_cast((mul_8(colors[i].r, 15) << 12) | (mul_8(colors[i].g, 15) << 8) | (mul_8(colors[i].b, 15) << 4)); + } + + for (uint32_t y = 0; y < max_y; y++) + { + const uint32_t s = pSelector->m_selectors[y]; + + for (uint32_t x = 0; x < max_x; x++) + { + uint16_t cur = reinterpret_cast(pDst_pixels)[x]; + if (BASISD_IS_BIG_ENDIAN) + cur = byteswap_uint16(cur); + + cur = (cur & 0xF) | packed_colors[(s >> (x * 2)) & 3]; + + if (BASISD_IS_BIG_ENDIAN) + cur = byteswap_uint16(cur); + + reinterpret_cast(pDst_pixels)[x] = cur; + } + + pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t); + } + + break; + } + case block_format::cRGBA4444_COLOR_OPAQUE: + { + assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes); + uint8_t* pDst_pixels = static_cast(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t); + + const uint32_t max_x = basisu::minimum(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); + const uint32_t max_y = basisu::minimum(4, (int)output_rows_in_pixels - (int)block_y * 4); + + color32 colors[4]; + decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5); + + uint16_t packed_colors[4]; + for (uint32_t i = 0; i < 4; i++) + { + packed_colors[i] = static_cast((mul_8(colors[i].r, 15) << 12) | (mul_8(colors[i].g, 15) << 8) | (mul_8(colors[i].b, 15) << 4) | 0xF); + if (BASISD_IS_BIG_ENDIAN) + packed_colors[i] = byteswap_uint16(packed_colors[i]); + } + + for (uint32_t y = 0; y < max_y; y++) + { + const uint32_t s = pSelector->m_selectors[y]; + + for (uint32_t x = 0; x < max_x; x++) + reinterpret_cast(pDst_pixels)[x] = packed_colors[(s >> (x * 2)) & 3]; + + pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t); + } + + break; + } + case block_format::cRGBA4444_ALPHA: + { + assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes); + uint8_t* pDst_pixels = static_cast(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t); + + const uint32_t max_x = basisu::minimum(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); + const uint32_t max_y = basisu::minimum(4, (int)output_rows_in_pixels - (int)block_y * 4); + + color32 colors[4]; + decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5); + + uint16_t packed_colors[4]; + for (uint32_t i = 0; i < 4; i++) + { + packed_colors[i] = mul_8(colors[i].g, 15); + if (BASISD_IS_BIG_ENDIAN) + packed_colors[i] = byteswap_uint16(packed_colors[i]); + } + + for (uint32_t y = 0; y < max_y; y++) + { + const uint32_t s = pSelector->m_selectors[y]; + + for (uint32_t x = 0; x < max_x; x++) + { + reinterpret_cast(pDst_pixels)[x] = packed_colors[(s >> (x * 2)) & 3]; + } + + pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t); + } + + break; + } + case block_format::cETC2_EAC_R11: + { +#if BASISD_SUPPORT_ETC2_EAC_RG11 + void* pDst_block = static_cast(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes; + convert_etc1s_to_etc2_eac_r11(static_cast(pDst_block), pEndpoints, pSelector); +#else + assert(0); +#endif + break; + } + default: + { + assert(0); + break; + } + } + + } // block_x + + } // block_y + + if (endpoint_pred_repeat_count != 0) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: endpoint_pred_repeat_count != 0. The file is corrupted or this is a bug\n"); + + if (pPVRTC_work_mem) + free(pPVRTC_work_mem); + + return false; + } + + //assert(endpoint_pred_repeat_count == 0); + +#if BASISD_SUPPORT_PVRTC1 + // PVRTC post process - create per-pixel modulation values. + if (fmt == block_format::cPVRTC1_4_RGB) + fixup_pvrtc1_4_modulation_rgb((decoder_etc_block*)pPVRTC_work_mem, pPVRTC_endpoints, pDst_blocks, num_blocks_x, num_blocks_y); + else if (fmt == block_format::cPVRTC1_4_RGBA) + fixup_pvrtc1_4_modulation_rgba((decoder_etc_block*)pPVRTC_work_mem, pPVRTC_endpoints, pDst_blocks, num_blocks_x, num_blocks_y, pAlpha_blocks, &endpoints[0], &selectors[0]); +#endif // BASISD_SUPPORT_PVRTC1 + +#if BASISD_SUPPORT_BC7_MODE5 + if (bc7_chroma_filtering) + { + chroma_filter_bc7_mode5(decoded_endpoints, pDst_blocks, num_blocks_x, num_blocks_y, output_row_pitch_in_blocks_or_pixels, &endpoints[0]); + } +#endif + + if (pPVRTC_work_mem) + free(pPVRTC_work_mem); + + return true; + } + + bool basis_validate_output_buffer_size( + basis_tex_format source_format, + transcoder_texture_format target_format, + uint32_t output_blocks_buf_size_in_blocks_or_pixels, + uint32_t orig_width, uint32_t orig_height, + uint32_t output_row_pitch_in_blocks_or_pixels, + uint32_t output_rows_in_pixels) + { + BASISU_NOTE_UNUSED(source_format); + + if (basis_transcoder_format_is_uncompressed(target_format)) + { + // Assume the output buffer is orig_width by orig_height + if (!output_row_pitch_in_blocks_or_pixels) + output_row_pitch_in_blocks_or_pixels = orig_width; + + if (!output_rows_in_pixels) + output_rows_in_pixels = orig_height; + + // Now make sure the output buffer is large enough, or we'll overwrite memory. + if (output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels)) + { + BASISU_DEVEL_ERROR("basis_validate_output_buffer_size: output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels)\n"); + return false; + } + } + else + { + const uint32_t dst_block_width = basis_get_block_width(target_format); + const uint32_t dst_block_height = basis_get_block_height(target_format); + //const uint32_t bytes_per_block = basis_get_bytes_per_block_or_pixel(target_format); + + // Take into account the destination format's block width/height. + const uint32_t num_dst_blocks_x = (orig_width + dst_block_width - 1) / dst_block_width; + const uint32_t num_dst_blocks_y = (orig_height + dst_block_height - 1) / dst_block_height; + const uint32_t total_dst_blocks = num_dst_blocks_x * num_dst_blocks_y; + + assert(total_dst_blocks); + + // Note this only computes the # of blocks we will write during transcoding, but for PVRTC1 OpenGL may require more for very small textures. + // basis_compute_transcoded_image_size_in_bytes() may return larger buffers. + if (output_blocks_buf_size_in_blocks_or_pixels < total_dst_blocks) + { + BASISU_DEVEL_ERROR("basis_validate_output_buffer_size: output_blocks_buf_size_in_blocks_or_pixels is too small\n"); + return false; + } + } + + return true; + } + + uint32_t basis_compute_transcoded_image_size_in_bytes(transcoder_texture_format target_format, uint32_t orig_width, uint32_t orig_height) + { + assert(orig_width && orig_height); + + const uint32_t dst_block_width = basis_get_block_width(target_format); + const uint32_t dst_block_height = basis_get_block_height(target_format); + + if (basis_transcoder_format_is_uncompressed(target_format)) + { + // Uncompressed formats are just plain raster images. + const uint32_t bytes_per_pixel = basis_get_uncompressed_bytes_per_pixel(target_format); + const uint32_t bytes_per_line = orig_width * bytes_per_pixel; + const uint32_t bytes_per_slice = bytes_per_line * orig_height; + return bytes_per_slice; + } + + // Compressed formats are 2D arrays of blocks. + const uint32_t bytes_per_block = basis_get_bytes_per_block_or_pixel(target_format); + + if ((target_format == transcoder_texture_format::cTFPVRTC1_4_RGB) || (target_format == transcoder_texture_format::cTFPVRTC1_4_RGBA)) + { + // For PVRTC1, Basis only writes (or requires) total_blocks * bytes_per_block. But GL requires extra padding for very small textures: + // https://www.khronos.org/registry/OpenGL/extensions/IMG/IMG_texture_compression_pvrtc.txt + const uint32_t width = (orig_width + 3) & ~3; + const uint32_t height = (orig_height + 3) & ~3; + const uint32_t size_in_bytes = (std::max(8U, width) * std::max(8U, height) * 4 + 7) / 8; + return size_in_bytes; + } + + // Take into account the destination format's block width/height. + const uint32_t num_dst_blocks_x = (orig_width + dst_block_width - 1) / dst_block_width; + const uint32_t num_dst_blocks_y = (orig_height + dst_block_height - 1) / dst_block_height; + const uint32_t total_dst_blocks = num_dst_blocks_x * num_dst_blocks_y; + + assert(total_dst_blocks); + + return total_dst_blocks * bytes_per_block; + } + + bool basisu_lowlevel_etc1s_transcoder::transcode_image( + transcoder_texture_format target_format, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + const uint8_t* pCompressed_data, uint32_t compressed_data_length, + uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index, + uint64_t rgb_offset, uint32_t rgb_length, uint64_t alpha_offset, uint32_t alpha_length, + uint32_t decode_flags, + bool basis_file_has_alpha_slices, + bool is_video, + uint32_t output_row_pitch_in_blocks_or_pixels, + basisu_transcoder_state* pState, + uint32_t output_rows_in_pixels) + { + if (((uint64_t)rgb_offset + rgb_length) > (uint64_t)compressed_data_length) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: source data buffer too small (color)\n"); + return false; + } + + if (alpha_length) + { + if (((uint64_t)alpha_offset + alpha_length) > (uint64_t)compressed_data_length) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: source data buffer too small (alpha)\n"); + return false; + } + } + else + { + assert(!basis_file_has_alpha_slices); + } + + if ((target_format == transcoder_texture_format::cTFPVRTC1_4_RGB) || (target_format == transcoder_texture_format::cTFPVRTC1_4_RGBA)) + { + if ((!basisu::is_pow2(num_blocks_x * 4)) || (!basisu::is_pow2(num_blocks_y * 4))) + { + // PVRTC1 only supports power of 2 dimensions + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: PVRTC1 only supports power of 2 dimensions\n"); + return false; + } + } + + if ((target_format == transcoder_texture_format::cTFPVRTC1_4_RGBA) && (!basis_file_has_alpha_slices)) + { + // Switch to PVRTC1 RGB if the input doesn't have alpha. + target_format = transcoder_texture_format::cTFPVRTC1_4_RGB; + } + + const bool transcode_alpha_data_to_opaque_formats = (decode_flags & cDecodeFlagsTranscodeAlphaDataToOpaqueFormats) != 0; + const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(target_format); + const uint32_t total_slice_blocks = num_blocks_x * num_blocks_y; + + if (!basis_validate_output_buffer_size(basis_tex_format::cETC1S, target_format, output_blocks_buf_size_in_blocks_or_pixels, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: output buffer size too small\n"); + return false; + } + + bool status = false; + + const uint8_t* pData = pCompressed_data + rgb_offset; + uint32_t data_len = rgb_length; + bool is_alpha_slice = false; + + // If the caller wants us to transcode the mip level's alpha data, then use the next slice. + if ((basis_file_has_alpha_slices) && (transcode_alpha_data_to_opaque_formats)) + { + pData = pCompressed_data + alpha_offset; + data_len = alpha_length; + is_alpha_slice = true; + } + + switch (target_format) + { + case transcoder_texture_format::cTFETC1_RGB: + { + //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC1, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cETC1, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags); + + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC1 failed\n"); + } + break; + } + case transcoder_texture_format::cTFBC1_RGB: + { +#if !BASISD_SUPPORT_DXT1 + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: BC1/DXT1 unsupported\n"); + return false; +#else + // status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC1, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cBC1, bytes_per_block_or_pixel, true, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC1 failed\n"); + } + break; +#endif + } + case transcoder_texture_format::cTFBC4_R: + { +#if !BASISD_SUPPORT_DXT5A + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: BC4/DXT5A unsupported\n"); + return false; +#else + //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cBC4, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC4 failed\n"); + } + break; +#endif + } + case transcoder_texture_format::cTFPVRTC1_4_RGB: + { +#if !BASISD_SUPPORT_PVRTC1 + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: PVRTC1 4 unsupported\n"); + return false; +#else + // output_row_pitch_in_blocks_or_pixels is actually ignored because we're transcoding to PVRTC1. (Print a dev warning if it's != 0?) + //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC1_4_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cPVRTC1_4_RGB, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to PVRTC1 4 RGB failed\n"); + } + break; +#endif + } + case transcoder_texture_format::cTFPVRTC1_4_RGBA: + { +#if !BASISD_SUPPORT_PVRTC1 + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: PVRTC1 4 unsupported\n"); + return false; +#else + assert(basis_file_has_alpha_slices); + assert(alpha_length); + + // Temp buffer to hold alpha block endpoint/selector indices + basisu::vector temp_block_indices(total_slice_blocks); + + // First transcode alpha data to temp buffer + //status = transcode_slice(pData, data_size, slice_index + 1, &temp_block_indices[0], total_slice_blocks, block_format::cIndices, sizeof(uint32_t), decode_flags, pSlice_descs[slice_index].m_num_blocks_x, pState); + status = transcode_slice(&temp_block_indices[0], num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cIndices, sizeof(uint32_t), false, is_video, true, level_index, orig_width, orig_height, num_blocks_x, pState, false, nullptr, 0, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to PVRTC1 4 RGBA failed (0)\n"); + } + else + { + // output_row_pitch_in_blocks_or_pixels is actually ignored because we're transcoding to PVRTC1. (Print a dev warning if it's != 0?) + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC1_4_RGBA, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState, &temp_block_indices[0]); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cPVRTC1_4_RGBA, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, &temp_block_indices[0], 0, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to PVRTC1 4 RGBA failed (1)\n"); + } + } + + break; +#endif + } + case transcoder_texture_format::cTFBC7_RGBA: + case transcoder_texture_format::cTFBC7_ALT: + { +#if !BASISD_SUPPORT_BC7_MODE5 + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: BC7 unsupported\n"); + return false; +#else + assert(bytes_per_block_or_pixel == 16); + // We used to support transcoding just alpha to BC7 - but is that useful at all? + + // First transcode the color slice. The cBC7_M5_COLOR transcoder will output opaque mode 5 blocks. + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC7_M5_COLOR, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cBC7_M5_COLOR, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags); + + if ((status) && (basis_file_has_alpha_slices)) + { + // Now transcode the alpha slice. The cBC7_M5_ALPHA transcoder will now change the opaque mode 5 blocks to blocks with alpha. + //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC7_M5_ALPHA, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cBC7_M5_ALPHA, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags); + } + + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC7 failed (0)\n"); + } + + break; +#endif + } + case transcoder_texture_format::cTFETC2_RGBA: + { +#if !BASISD_SUPPORT_ETC2_EAC_A8 + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: ETC2 EAC A8 unsupported\n"); + return false; +#else + assert(bytes_per_block_or_pixel == 16); + + if (basis_file_has_alpha_slices) + { + // First decode the alpha data + //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_A8, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cETC2_EAC_A8, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags); + } + else + { + //write_opaque_alpha_blocks(pSlice_descs[slice_index].m_num_blocks_x, pSlice_descs[slice_index].m_num_blocks_y, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_A8, 16, output_row_pitch_in_blocks_or_pixels); + basisu_transcoder::write_opaque_alpha_blocks(num_blocks_x, num_blocks_y, pOutput_blocks, block_format::cETC2_EAC_A8, 16, output_row_pitch_in_blocks_or_pixels); + status = true; + } + + if (status) + { + // Now decode the color data + //status = transcode_slice(pData, data_size, slice_index, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC1, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice((uint8_t *)pOutput_blocks + 8, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cETC1, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC2 RGB failed\n"); + } + } + else + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC2 A failed\n"); + } + break; +#endif + } + case transcoder_texture_format::cTFBC3_RGBA: + { +#if !BASISD_SUPPORT_DXT1 + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: DXT1 unsupported\n"); + return false; +#elif !BASISD_SUPPORT_DXT5A + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: DXT5A unsupported\n"); + return false; +#else + assert(bytes_per_block_or_pixel == 16); + + // First decode the alpha data + if (basis_file_has_alpha_slices) + { + //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cBC4, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags); + } + else + { + basisu_transcoder::write_opaque_alpha_blocks(num_blocks_x, num_blocks_y, pOutput_blocks, block_format::cBC4, 16, output_row_pitch_in_blocks_or_pixels); + status = true; + } + + if (status) + { + // Now decode the color data. Forbid 3 color blocks, which aren't allowed in BC3. + //status = transcode_slice(pData, data_size, slice_index, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC1, 16, decode_flags | cDecodeFlagsBC1ForbidThreeColorBlocks, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice((uint8_t *)pOutput_blocks + 8, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cBC1, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC3 RGB failed\n"); + } + } + else + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC3 A failed\n"); + } + + break; +#endif + } + case transcoder_texture_format::cTFBC5_RG: + { +#if !BASISD_SUPPORT_DXT5A + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: DXT5A unsupported\n"); + return false; +#else + assert(bytes_per_block_or_pixel == 16); + + //bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + // uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const bool is_video, const bool is_alpha_slice, const uint32_t level_index, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels = 0, + // basisu_transcoder_state* pState = nullptr, bool astc_transcode_alpha = false, void* pAlpha_blocks = nullptr, uint32_t output_rows_in_pixels = 0); + + // Decode the R data (actually the green channel of the color data slice in the basis file) + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cBC4, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags); + if (status) + { + if (basis_file_has_alpha_slices) + { + // Decode the G data (actually the green channel of the alpha data slice in the basis file) + //status = transcode_slice(pData, data_size, slice_index + 1, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice((uint8_t *)pOutput_blocks + 8, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cBC4, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC5 1 failed\n"); + } + } + else + { + basisu_transcoder::write_opaque_alpha_blocks(num_blocks_x, num_blocks_y, (uint8_t*)pOutput_blocks + 8, block_format::cBC4, 16, output_row_pitch_in_blocks_or_pixels); + status = true; + } + } + else + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC5 channel 0 failed\n"); + } + break; +#endif + } + case transcoder_texture_format::cTFASTC_4x4_RGBA: + { +#if !BASISD_SUPPORT_ASTC + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: ASTC unsupported\n"); + return false; +#else + assert(bytes_per_block_or_pixel == 16); + + if (basis_file_has_alpha_slices) + { + // First decode the alpha data to the output (we're using the output texture as a temp buffer here). + //status = transcode_slice(pData, data_size, slice_index + 1, (uint8_t*)pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cIndices, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cIndices, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags); + if (status) + { + // Now decode the color data and transcode to ASTC. The transcoder function will read the alpha selector data from the output texture as it converts and + // transcode both the alpha and color data at the same time to ASTC. + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cASTC_4x4, 16, decode_flags | cDecodeFlagsOutputHasAlphaIndices, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cASTC_4x4, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, true, nullptr, output_rows_in_pixels, decode_flags); + } + } + else + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cASTC_4x4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cASTC_4x4, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags); + + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ASTC failed (0)\n"); + } + + break; +#endif + } + case transcoder_texture_format::cTFATC_RGB: + { +#if !BASISD_SUPPORT_ATC + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: ATC unsupported\n"); + return false; +#else + //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cATC_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cATC_RGB, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ATC_RGB failed\n"); + } + break; +#endif + } + case transcoder_texture_format::cTFATC_RGBA: + { +#if !BASISD_SUPPORT_ATC + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: ATC unsupported\n"); + return false; +#elif !BASISD_SUPPORT_DXT5A + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: DXT5A unsupported\n"); + return false; +#else + assert(bytes_per_block_or_pixel == 16); + + // First decode the alpha data + if (basis_file_has_alpha_slices) + { + //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cBC4, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags); + } + else + { + basisu_transcoder::write_opaque_alpha_blocks(num_blocks_x, num_blocks_y, pOutput_blocks, block_format::cBC4, 16, output_row_pitch_in_blocks_or_pixels); + status = true; + } + + if (status) + { + //status = transcode_slice(pData, data_size, slice_index, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cATC_RGB, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice((uint8_t *)pOutput_blocks + 8, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cATC_RGB, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ATC RGB failed\n"); + } + } + else + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ATC A failed\n"); + } + break; +#endif + } + case transcoder_texture_format::cTFPVRTC2_4_RGB: + { +#if !BASISD_SUPPORT_PVRTC2 + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: PVRTC2 unsupported\n"); + return false; +#else + //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC2_4_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cPVRTC2_4_RGB, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to cPVRTC2_4_RGB failed\n"); + } + break; +#endif + } + case transcoder_texture_format::cTFPVRTC2_4_RGBA: + { +#if !BASISD_SUPPORT_PVRTC2 + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: PVRTC2 unsupported\n"); + return false; +#else + if (basis_file_has_alpha_slices) + { + // First decode the alpha data to the output (we're using the output texture as a temp buffer here). + //status = transcode_slice(pData, data_size, slice_index + 1, (uint8_t*)pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cIndices, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cIndices, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to failed\n"); + } + else + { + // Now decode the color data and transcode to PVRTC2 RGBA. + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC2_4_RGBA, bytes_per_block_or_pixel, decode_flags | cDecodeFlagsOutputHasAlphaIndices, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cPVRTC2_4_RGBA, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, true, nullptr, output_rows_in_pixels, decode_flags); + } + } + else + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC2_4_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cPVRTC2_4_RGB, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags); + + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to cPVRTC2_4_RGBA failed\n"); + } + + break; +#endif + } + case transcoder_texture_format::cTFRGBA32: + { + // Raw 32bpp pixels, decoded in the usual raster order (NOT block order) into an image in memory. + + // First decode the alpha data + if (basis_file_has_alpha_slices) + //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cA32, sizeof(uint32_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cA32, sizeof(uint32_t), false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags); + else + status = true; + + if (status) + { + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, basis_file_has_alpha_slices ? block_format::cRGB32 : block_format::cRGBA32, sizeof(uint32_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, basis_file_has_alpha_slices ? block_format::cRGB32 : block_format::cRGBA32, sizeof(uint32_t), false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to RGBA32 RGB failed\n"); + } + } + else + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to RGBA32 A failed\n"); + } + + break; + } + case transcoder_texture_format::cTFRGB565: + case transcoder_texture_format::cTFBGR565: + { + // Raw 16bpp pixels, decoded in the usual raster order (NOT block order) into an image in memory. + + //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, (fmt == transcoder_texture_format::cTFRGB565) ? block_format::cRGB565 : block_format::cBGR565, sizeof(uint16_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, (target_format == transcoder_texture_format::cTFRGB565) ? block_format::cRGB565 : block_format::cBGR565, sizeof(uint16_t), false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to RGB565 RGB failed\n"); + } + + break; + } + case transcoder_texture_format::cTFRGBA4444: + { + // Raw 16bpp pixels, decoded in the usual raster order (NOT block order) into an image in memory. + + // First decode the alpha data + if (basis_file_has_alpha_slices) + //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cRGBA4444_ALPHA, sizeof(uint16_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cRGBA4444_ALPHA, sizeof(uint16_t), false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags); + else + status = true; + + if (status) + { + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, basis_file_has_alpha_slices ? block_format::cRGBA4444_COLOR : block_format::cRGBA4444_COLOR_OPAQUE, sizeof(uint16_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, basis_file_has_alpha_slices ? block_format::cRGBA4444_COLOR : block_format::cRGBA4444_COLOR_OPAQUE, sizeof(uint16_t), false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to RGBA4444 RGB failed\n"); + } + } + else + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to RGBA4444 A failed\n"); + } + + break; + } + case transcoder_texture_format::cTFFXT1_RGB: + { +#if !BASISD_SUPPORT_FXT1 + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: FXT1 unsupported\n"); + return false; +#else + //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cFXT1_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cFXT1_RGB, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to FXT1_RGB failed\n"); + } + break; +#endif + } + case transcoder_texture_format::cTFETC2_EAC_R11: + { +#if !BASISD_SUPPORT_ETC2_EAC_RG11 + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: EAC_RG11 unsupported\n"); + return false; +#else + //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC2_EAC_R11 failed\n"); + } + + break; +#endif + } + case transcoder_texture_format::cTFETC2_EAC_RG11: + { +#if !BASISD_SUPPORT_ETC2_EAC_RG11 + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: EAC_RG11 unsupported\n"); + return false; +#else + assert(bytes_per_block_or_pixel == 16); + + if (basis_file_has_alpha_slices) + { + // First decode the alpha data to G + //status = transcode_slice(pData, data_size, slice_index + 1, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_R11, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice((uint8_t *)pOutput_blocks + 8, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags); + } + else + { + basisu_transcoder::write_opaque_alpha_blocks(num_blocks_x, num_blocks_y, (uint8_t*)pOutput_blocks + 8, block_format::cETC2_EAC_R11, 16, output_row_pitch_in_blocks_or_pixels); + status = true; + } + + if (status) + { + // Now decode the color data to R + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_R11, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC2_EAC_R11 R failed\n"); + } + } + else + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC2_EAC_R11 G failed\n"); + } + + break; +#endif + } + default: + { + assert(0); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: Invalid fmt\n"); + break; + } + } + + return status; + } + + //------------------------------------------------------------------------------------------------ + + basisu_lowlevel_uastc_ldr_4x4_transcoder::basisu_lowlevel_uastc_ldr_4x4_transcoder() + { + } + + bool basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_slice( + void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, + const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels, + basisu_transcoder_state* pState, uint32_t output_rows_in_pixels, int channel0, int channel1, uint32_t decode_flags) + { + BASISU_NOTE_UNUSED(pState); + BASISU_NOTE_UNUSED(bc1_allow_threecolor_blocks); + + assert(g_transcoder_initialized); + if (!g_transcoder_initialized) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_slice: Transcoder not globally initialized.\n"); + return false; + } + +#if BASISD_SUPPORT_UASTC + const uint32_t total_blocks = num_blocks_x * num_blocks_y; + + if (!output_row_pitch_in_blocks_or_pixels) + { + if (basis_block_format_is_uncompressed(fmt)) + output_row_pitch_in_blocks_or_pixels = orig_width; + else + { + if (fmt == block_format::cFXT1_RGB) + output_row_pitch_in_blocks_or_pixels = (orig_width + 7) / 8; + else + output_row_pitch_in_blocks_or_pixels = num_blocks_x; + } + } + + if (basis_block_format_is_uncompressed(fmt)) + { + if (!output_rows_in_pixels) + output_rows_in_pixels = orig_height; + } + + uint32_t total_expected_block_bytes = sizeof(uastc_block) * total_blocks; + if (image_data_size < total_expected_block_bytes) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_slice: image_data_size < total_expected_block_bytes The file is corrupted or this is a bug.\n"); + return false; + } + + const uastc_block* pSource_block = reinterpret_cast(pImage_data); + + const bool high_quality = (decode_flags & cDecodeFlagsHighQuality) != 0; + const bool from_alpha = has_alpha && (decode_flags & cDecodeFlagsTranscodeAlphaDataToOpaqueFormats) != 0; + + bool status = false; + if ((fmt == block_format::cPVRTC1_4_RGB) || (fmt == block_format::cPVRTC1_4_RGBA)) + { + if (fmt == block_format::cPVRTC1_4_RGBA) + transcode_uastc_to_pvrtc1_4_rgba((const uastc_block*)pImage_data, pDst_blocks, num_blocks_x, num_blocks_y, high_quality); + else + transcode_uastc_to_pvrtc1_4_rgb((const uastc_block *)pImage_data, pDst_blocks, num_blocks_x, num_blocks_y, high_quality, from_alpha); + } + else + { + for (uint32_t block_y = 0; block_y < num_blocks_y; ++block_y) + { + void* pDst_block = (uint8_t*)pDst_blocks + block_y * output_row_pitch_in_blocks_or_pixels * output_block_or_pixel_stride_in_bytes; + + for (uint32_t block_x = 0; block_x < num_blocks_x; ++block_x, ++pSource_block, pDst_block = (uint8_t *)pDst_block + output_block_or_pixel_stride_in_bytes) + { + switch (fmt) + { + case block_format::cUASTC_4x4: + { + memcpy(pDst_block, pSource_block, sizeof(uastc_block)); + status = true; + break; + } + case block_format::cETC1: + { + if (from_alpha) + status = transcode_uastc_to_etc1(*pSource_block, pDst_block, 3); + else + status = transcode_uastc_to_etc1(*pSource_block, pDst_block); + break; + } + case block_format::cETC2_RGBA: + { + status = transcode_uastc_to_etc2_rgba(*pSource_block, pDst_block); + break; + } + case block_format::cBC1: + { + status = transcode_uastc_to_bc1(*pSource_block, pDst_block, high_quality); + break; + } + case block_format::cBC3: + { + status = transcode_uastc_to_bc3(*pSource_block, pDst_block, high_quality); + break; + } + case block_format::cBC4: + { + if (channel0 < 0) + channel0 = 0; + status = transcode_uastc_to_bc4(*pSource_block, pDst_block, high_quality, channel0); + break; + } + case block_format::cBC5: + { + if (channel0 < 0) + channel0 = 0; + if (channel1 < 0) + channel1 = 3; + status = transcode_uastc_to_bc5(*pSource_block, pDst_block, high_quality, channel0, channel1); + break; + } + case block_format::cBC7: + case block_format::cBC7_M5_COLOR: // for consistently with ETC1S + { + status = transcode_uastc_to_bc7(*pSource_block, pDst_block); + break; + } + case block_format::cASTC_4x4: + { + status = transcode_uastc_to_astc(*pSource_block, pDst_block); + break; + } + case block_format::cETC2_EAC_R11: + { + if (channel0 < 0) + channel0 = 0; + status = transcode_uastc_to_etc2_eac_r11(*pSource_block, pDst_block, high_quality, channel0); + break; + } + case block_format::cETC2_EAC_RG11: + { + if (channel0 < 0) + channel0 = 0; + if (channel1 < 0) + channel1 = 3; + status = transcode_uastc_to_etc2_eac_rg11(*pSource_block, pDst_block, high_quality, channel0, channel1); + break; + } + case block_format::cRGBA32: + { + color32 block_pixels[4][4]; + status = unpack_uastc(*pSource_block, (color32 *)block_pixels, false); + + assert(sizeof(uint32_t) == output_block_or_pixel_stride_in_bytes); + uint8_t* pDst_pixels = static_cast(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t); + + const uint32_t max_x = basisu::minimum(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); + const uint32_t max_y = basisu::minimum(4, (int)output_rows_in_pixels - (int)block_y * 4); + + for (uint32_t y = 0; y < max_y; y++) + { + for (uint32_t x = 0; x < max_x; x++) + { + const color32& c = block_pixels[y][x]; + + pDst_pixels[0 + 4 * x] = c.r; + pDst_pixels[1 + 4 * x] = c.g; + pDst_pixels[2 + 4 * x] = c.b; + pDst_pixels[3 + 4 * x] = c.a; + } + + pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint32_t); + } + + break; + } + case block_format::cRGB565: + case block_format::cBGR565: + { + color32 block_pixels[4][4]; + status = unpack_uastc(*pSource_block, (color32*)block_pixels, false); + + assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes); + uint8_t* pDst_pixels = static_cast(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t); + + const uint32_t max_x = basisu::minimum(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); + const uint32_t max_y = basisu::minimum(4, (int)output_rows_in_pixels - (int)block_y * 4); + + for (uint32_t y = 0; y < max_y; y++) + { + for (uint32_t x = 0; x < max_x; x++) + { + const color32& c = block_pixels[y][x]; + + const uint16_t packed = (fmt == block_format::cRGB565) ? static_cast((mul_8(c.r, 31) << 11) | (mul_8(c.g, 63) << 5) | mul_8(c.b, 31)) : + static_cast((mul_8(c.b, 31) << 11) | (mul_8(c.g, 63) << 5) | mul_8(c.r, 31)); + + pDst_pixels[x * 2 + 0] = (uint8_t)(packed & 0xFF); + pDst_pixels[x * 2 + 1] = (uint8_t)((packed >> 8) & 0xFF); + } + + pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t); + } + + break; + } + case block_format::cRGBA4444: + { + color32 block_pixels[4][4]; + status = unpack_uastc(*pSource_block, (color32*)block_pixels, false); + + assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes); + uint8_t* pDst_pixels = static_cast(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t); + + const uint32_t max_x = basisu::minimum(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); + const uint32_t max_y = basisu::minimum(4, (int)output_rows_in_pixels - (int)block_y * 4); + + for (uint32_t y = 0; y < max_y; y++) + { + for (uint32_t x = 0; x < max_x; x++) + { + const color32& c = block_pixels[y][x]; + + const uint16_t packed = static_cast((mul_8(c.r, 15) << 12) | (mul_8(c.g, 15) << 8) | (mul_8(c.b, 15) << 4) | mul_8(c.a, 15)); + + pDst_pixels[x * 2 + 0] = (uint8_t)(packed & 0xFF); + pDst_pixels[x * 2 + 1] = (uint8_t)((packed >> 8) & 0xFF); + } + + pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t); + } + break; + } + default: + assert(0); + break; + + } + + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_slice: Transcoder failed to unpack a UASTC block - this is a bug, or the data was corrupted\n"); + return false; + } + + } // block_x + + } // block_y + } + + return true; +#else + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_slice: UASTC is unsupported\n"); + + BASISU_NOTE_UNUSED(decode_flags); + BASISU_NOTE_UNUSED(channel0); + BASISU_NOTE_UNUSED(channel1); + BASISU_NOTE_UNUSED(output_rows_in_pixels); + BASISU_NOTE_UNUSED(output_row_pitch_in_blocks_or_pixels); + BASISU_NOTE_UNUSED(output_block_or_pixel_stride_in_bytes); + BASISU_NOTE_UNUSED(fmt); + BASISU_NOTE_UNUSED(image_data_size); + BASISU_NOTE_UNUSED(pImage_data); + BASISU_NOTE_UNUSED(num_blocks_x); + BASISU_NOTE_UNUSED(num_blocks_y); + BASISU_NOTE_UNUSED(pDst_blocks); + + return false; +#endif + } + + bool basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image( + transcoder_texture_format target_format, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + const uint8_t* pCompressed_data, uint32_t compressed_data_length, + uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index, + uint64_t slice_offset, uint32_t slice_length, + uint32_t decode_flags, + bool has_alpha, + bool is_video, + uint32_t output_row_pitch_in_blocks_or_pixels, + basisu_transcoder_state* pState, + uint32_t output_rows_in_pixels, + int channel0, int channel1) + { + BASISU_NOTE_UNUSED(is_video); + BASISU_NOTE_UNUSED(level_index); + + if (((uint64_t)slice_offset + slice_length) > (uint64_t)compressed_data_length) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: source data buffer too small\n"); + return false; + } + + if ((target_format == transcoder_texture_format::cTFPVRTC1_4_RGB) || (target_format == transcoder_texture_format::cTFPVRTC1_4_RGBA)) + { + if ((!basisu::is_pow2(num_blocks_x * 4)) || (!basisu::is_pow2(num_blocks_y * 4))) + { + // PVRTC1 only supports power of 2 dimensions + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: PVRTC1 only supports power of 2 dimensions\n"); + return false; + } + } + + if ((target_format == transcoder_texture_format::cTFPVRTC1_4_RGBA) && (!has_alpha)) + { + // Switch to PVRTC1 RGB if the input doesn't have alpha. + target_format = transcoder_texture_format::cTFPVRTC1_4_RGB; + } + + const bool transcode_alpha_data_to_opaque_formats = (decode_flags & cDecodeFlagsTranscodeAlphaDataToOpaqueFormats) != 0; + const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(target_format); + //const uint32_t total_slice_blocks = num_blocks_x * num_blocks_y; + + if (!basis_validate_output_buffer_size(basis_tex_format::cUASTC4x4, target_format, output_blocks_buf_size_in_blocks_or_pixels, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: output buffer size too small\n"); + return false; + } + + bool status = false; + + // UASTC4x4 + switch (target_format) + { + case transcoder_texture_format::cTFETC1_RGB: + { + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC1, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cETC1, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags); + + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to ETC1 failed\n"); + } + break; + } + case transcoder_texture_format::cTFETC2_RGBA: + { + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_RGBA, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cETC2_RGBA, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to ETC2 failed\n"); + } + break; + } + case transcoder_texture_format::cTFBC1_RGB: + { + // TODO: ETC1S allows BC1 from alpha channel. That doesn't seem actually useful, though. + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC1, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC1, + bytes_per_block_or_pixel, true, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to BC1 failed\n"); + } + break; + } + case transcoder_texture_format::cTFBC3_RGBA: + { + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC3, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC3, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to BC3 failed\n"); + } + break; + } + case transcoder_texture_format::cTFBC4_R: + { + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState, + // nullptr, 0, + // ((has_alpha) && (transcode_alpha_data_to_opaque_formats)) ? 3 : 0); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC4, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, + ((has_alpha) && (transcode_alpha_data_to_opaque_formats)) ? 3 : 0, -1, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to BC4 failed\n"); + } + break; + } + case transcoder_texture_format::cTFBC5_RG: + { + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC5, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState, + // nullptr, 0, + // 0, 3); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC5, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, + 0, 3, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to BC5 failed\n"); + } + break; + } + case transcoder_texture_format::cTFBC7_RGBA: + case transcoder_texture_format::cTFBC7_ALT: + { + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC7, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC7, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, -1, -1, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to BC7 failed\n"); + } + break; + } + case transcoder_texture_format::cTFPVRTC1_4_RGB: + { + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC1_4_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cPVRTC1_4_RGB, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, -1, -1, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to PVRTC1 RGB 4bpp failed\n"); + } + break; + } + case transcoder_texture_format::cTFPVRTC1_4_RGBA: + { + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC1_4_RGBA, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cPVRTC1_4_RGBA, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, -1, -1, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to PVRTC1 RGBA 4bpp failed\n"); + } + break; + } + case transcoder_texture_format::cTFASTC_4x4_RGBA: + { + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cASTC_4x4, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cASTC_4x4, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, -1, -1, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to ASTC 4x4 failed\n"); + } + break; + } + case transcoder_texture_format::cTFATC_RGB: + case transcoder_texture_format::cTFATC_RGBA: + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: UASTC->ATC currently unsupported\n"); + return false; + } + case transcoder_texture_format::cTFFXT1_RGB: + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: UASTC->FXT1 currently unsupported\n"); + return false; + } + case transcoder_texture_format::cTFPVRTC2_4_RGB: + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: UASTC->PVRTC2 currently unsupported\n"); + return false; + } + case transcoder_texture_format::cTFPVRTC2_4_RGBA: + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: UASTC->PVRTC2 currently unsupported\n"); + return false; + } + case transcoder_texture_format::cTFETC2_EAC_R11: + { + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState, + // nullptr, 0, + // ((has_alpha) && (transcode_alpha_data_to_opaque_formats)) ? 3 : 0); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cETC2_EAC_R11, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, + ((has_alpha) && (transcode_alpha_data_to_opaque_formats)) ? 3 : 0, -1, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to EAC R11 failed\n"); + } + break; + } + case transcoder_texture_format::cTFETC2_EAC_RG11: + { + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_RG11, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState, + // nullptr, 0, + // 0, 3); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cETC2_EAC_RG11, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, + 0, 3, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_basisu_lowlevel_uastc_ldr_4x4_transcodertranscoder::transcode_image: transcode_slice() to EAC RG11 failed\n"); + } + break; + } + case transcoder_texture_format::cTFRGBA32: + { + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cRGBA32, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGBA32, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, -1, -1, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to RGBA32 failed\n"); + } + break; + } + case transcoder_texture_format::cTFRGB565: + { + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cRGB565, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGB565, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, -1, -1, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to RGB565 failed\n"); + } + break; + } + case transcoder_texture_format::cTFBGR565: + { + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBGR565, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBGR565, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, -1, -1, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to RGB565 failed\n"); + } + break; + } + case transcoder_texture_format::cTFRGBA4444: + { + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cRGBA4444, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGBA4444, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, -1, -1, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to RGBA4444 failed\n"); + } + break; + } + default: + { + assert(0); + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: Invalid format\n"); + break; + } + } + + return status; + } + + //------------------------------------------------------------------------------------------------ + // UASTC HDR 4x4 + + basisu_lowlevel_uastc_hdr_4x4_transcoder::basisu_lowlevel_uastc_hdr_4x4_transcoder() + { + } + + bool basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_slice( + void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, + const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels, + basisu_transcoder_state* pState, uint32_t output_rows_in_pixels, int channel0, int channel1, uint32_t decode_flags) + { + BASISU_NOTE_UNUSED(pState); + BASISU_NOTE_UNUSED(bc1_allow_threecolor_blocks); + BASISU_NOTE_UNUSED(has_alpha); + BASISU_NOTE_UNUSED(channel0); + BASISU_NOTE_UNUSED(channel1); + BASISU_NOTE_UNUSED(decode_flags); + BASISU_NOTE_UNUSED(orig_width); + BASISU_NOTE_UNUSED(orig_height); + + assert(g_transcoder_initialized); + if (!g_transcoder_initialized) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_slice: Transcoder not globally initialized.\n"); + return false; + } + +#if BASISD_SUPPORT_UASTC_HDR + const uint32_t total_blocks = num_blocks_x * num_blocks_y; + + if (!output_row_pitch_in_blocks_or_pixels) + { + if (basis_block_format_is_uncompressed(fmt)) + output_row_pitch_in_blocks_or_pixels = orig_width; + else + output_row_pitch_in_blocks_or_pixels = num_blocks_x; + } + + if (basis_block_format_is_uncompressed(fmt)) + { + if (!output_rows_in_pixels) + output_rows_in_pixels = orig_height; + } + + uint32_t total_expected_block_bytes = sizeof(astc_blk) * total_blocks; + if (image_data_size < total_expected_block_bytes) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_slice: image_data_size < total_expected_block_bytes The file is corrupted or this is a bug.\n"); + return false; + } + + const astc_blk* pSource_block = reinterpret_cast(pImage_data); + + bool status = false; + + // TODO: Optimize pure memcpy() case. + + for (uint32_t block_y = 0; block_y < num_blocks_y; ++block_y) + { + void* pDst_block = (uint8_t*)pDst_blocks + block_y * output_row_pitch_in_blocks_or_pixels * output_block_or_pixel_stride_in_bytes; + + for (uint32_t block_x = 0; block_x < num_blocks_x; ++block_x, ++pSource_block, pDst_block = (uint8_t*)pDst_block + output_block_or_pixel_stride_in_bytes) + { + switch (fmt) + { + case block_format::cUASTC_HDR_4x4: + case block_format::cASTC_HDR_4x4: + { + // Nothing to do, UASTC HDR 4x4 is just ASTC. + memcpy(pDst_block, pSource_block, sizeof(uastc_block)); + status = true; + break; + } + case block_format::cBC6H: + { + status = astc_hdr_transcode_to_bc6h(*pSource_block, *(bc6h_block *)pDst_block); + break; + } + case block_format::cRGB_9E5: + { + astc_helpers::log_astc_block log_blk; + status = astc_helpers::unpack_block(pSource_block, log_blk, 4, 4); + if (status) + { + uint32_t* pDst_pixels = reinterpret_cast( + static_cast(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t) + ); + + uint32_t blk_texels[4][4]; + + status = astc_helpers::decode_block(log_blk, blk_texels, 4, 4, astc_helpers::cDecodeModeRGB9E5); + + if (status) + { + const uint32_t max_x = basisu::minimum(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); + const uint32_t max_y = basisu::minimum(4, (int)output_rows_in_pixels - (int)block_y * 4); + + for (uint32_t y = 0; y < max_y; y++) + { + memcpy(pDst_pixels, &blk_texels[y][0], sizeof(uint32_t) * max_x); + + pDst_pixels += output_row_pitch_in_blocks_or_pixels; + } // y + } + } + + break; + } + case block_format::cRGBA_HALF: + { + astc_helpers::log_astc_block log_blk; + status = astc_helpers::unpack_block(pSource_block, log_blk, 4, 4); + if (status) + { + half_float* pDst_pixels = reinterpret_cast( + static_cast(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(half_float) * 4 + ); + + half_float blk_texels[4][4][4]; + status = astc_helpers::decode_block(log_blk, blk_texels, 4, 4, astc_helpers::cDecodeModeHDR16); + + if (status) + { + const uint32_t max_x = basisu::minimum(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); + const uint32_t max_y = basisu::minimum(4, (int)output_rows_in_pixels - (int)block_y * 4); + + for (uint32_t y = 0; y < max_y; y++) + { + for (uint32_t x = 0; x < max_x; x++) + { + pDst_pixels[0 + 4 * x] = blk_texels[y][x][0]; + pDst_pixels[1 + 4 * x] = blk_texels[y][x][1]; + pDst_pixels[2 + 4 * x] = blk_texels[y][x][2]; + pDst_pixels[3 + 4 * x] = blk_texels[y][x][3]; + } // x + + pDst_pixels += output_row_pitch_in_blocks_or_pixels * 4; + } // y + } + } + + break; + } + case block_format::cRGB_HALF: + { + astc_helpers:: log_astc_block log_blk; + status = astc_helpers::unpack_block(pSource_block, log_blk, 4, 4); + if (status) + { + half_float* pDst_pixels = + reinterpret_cast(static_cast(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(half_float) * 3); + + half_float blk_texels[4][4][4]; + status = astc_helpers::decode_block(log_blk, blk_texels, 4, 4, astc_helpers::cDecodeModeHDR16); + if (status) + { + const uint32_t max_x = basisu::minimum(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); + const uint32_t max_y = basisu::minimum(4, (int)output_rows_in_pixels - (int)block_y * 4); + + for (uint32_t y = 0; y < max_y; y++) + { + for (uint32_t x = 0; x < max_x; x++) + { + pDst_pixels[0 + 3 * x] = blk_texels[y][x][0]; + pDst_pixels[1 + 3 * x] = blk_texels[y][x][1]; + pDst_pixels[2 + 3 * x] = blk_texels[y][x][2]; + } // x + + pDst_pixels += output_row_pitch_in_blocks_or_pixels * 3; + } // y + } + } + + break; + } + default: + assert(0); + break; + + } + + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_slice: Transcoder failed to unpack a UASTC HDR block - this is a bug, or the data was corrupted\n"); + return false; + } + + } // block_x + + } // block_y + + return true; +#else + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_slice: UASTC_HDR is unsupported\n"); + + BASISU_NOTE_UNUSED(decode_flags); + BASISU_NOTE_UNUSED(channel0); + BASISU_NOTE_UNUSED(channel1); + BASISU_NOTE_UNUSED(output_rows_in_pixels); + BASISU_NOTE_UNUSED(output_row_pitch_in_blocks_or_pixels); + BASISU_NOTE_UNUSED(output_block_or_pixel_stride_in_bytes); + BASISU_NOTE_UNUSED(fmt); + BASISU_NOTE_UNUSED(image_data_size); + BASISU_NOTE_UNUSED(pImage_data); + BASISU_NOTE_UNUSED(num_blocks_x); + BASISU_NOTE_UNUSED(num_blocks_y); + BASISU_NOTE_UNUSED(pDst_blocks); + + return false; +#endif + } + + bool basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_image( + transcoder_texture_format target_format, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + const uint8_t* pCompressed_data, uint32_t compressed_data_length, + uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index, + uint64_t slice_offset, uint32_t slice_length, + uint32_t decode_flags, + bool has_alpha, + bool is_video, + uint32_t output_row_pitch_in_blocks_or_pixels, + basisu_transcoder_state* pState, + uint32_t output_rows_in_pixels, + int channel0, int channel1) + { + BASISU_NOTE_UNUSED(is_video); + BASISU_NOTE_UNUSED(level_index); + BASISU_NOTE_UNUSED(decode_flags); + + if (((uint64_t)slice_offset + slice_length) > (uint64_t)compressed_data_length) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_image: source data buffer too small\n"); + return false; + } + + const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(target_format); + //const uint32_t total_slice_blocks = num_blocks_x * num_blocks_y; + + if (!basis_validate_output_buffer_size(basis_tex_format::cUASTC_HDR_4x4, target_format, output_blocks_buf_size_in_blocks_or_pixels, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_image: output buffer size too small\n"); + return false; + } + + bool status = false; + + switch (target_format) + { + case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA: + { + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cASTC_HDR_4x4, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags); + + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_image: transcode_slice() to ASTC_HDR failed\n"); + } + break; + } + case transcoder_texture_format::cTFBC6H: + { + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC6H, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_image: transcode_slice() to BC6H failed\n"); + } + break; + } + case transcoder_texture_format::cTFRGB_HALF: + { + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGB_HALF, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_image: transcode_slice() to RGB_HALF failed\n"); + } + break; + } + case transcoder_texture_format::cTFRGBA_HALF: + { + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGBA_HALF, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_image: transcode_slice() to RGBA_HALF failed\n"); + } + break; + } + case transcoder_texture_format::cTFRGB_9E5: + { + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGB_9E5, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_image: transcode_slice() to RGBA_HALF failed\n"); + } + break; + } + default: + { + assert(0); + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_image: Invalid format\n"); + break; + } + } + + return status; + } + + //------------------------------------------------------------------------------------------------ + // ASTC 6x6 HDR + + basisu_lowlevel_astc_hdr_6x6_transcoder::basisu_lowlevel_astc_hdr_6x6_transcoder() + { + } + + // num_blocks_x/num_blocks_y are source 6x6 blocks + bool basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_slice( + void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, + const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels, + basisu_transcoder_state* pState, uint32_t output_rows_in_pixels, int channel0, int channel1, uint32_t decode_flags) + { + BASISU_NOTE_UNUSED(pState); + BASISU_NOTE_UNUSED(bc1_allow_threecolor_blocks); + BASISU_NOTE_UNUSED(has_alpha); + BASISU_NOTE_UNUSED(channel0); + BASISU_NOTE_UNUSED(channel1); + BASISU_NOTE_UNUSED(decode_flags); + BASISU_NOTE_UNUSED(orig_width); + BASISU_NOTE_UNUSED(orig_height); + + assert(g_transcoder_initialized); + if (!g_transcoder_initialized) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_slice: Transcoder not globally initialized.\n"); + return false; + } + +#if BASISD_SUPPORT_UASTC_HDR + const uint32_t total_src_blocks = num_blocks_x * num_blocks_y; + + const uint32_t output_block_width = get_block_width(fmt); + //const uint32_t output_block_height = get_block_height(fmt); + + if (!output_row_pitch_in_blocks_or_pixels) + { + if (basis_block_format_is_uncompressed(fmt)) + output_row_pitch_in_blocks_or_pixels = orig_width; + else + output_row_pitch_in_blocks_or_pixels = (orig_width + output_block_width - 1) / output_block_width; + } + + if (basis_block_format_is_uncompressed(fmt)) + { + if (!output_rows_in_pixels) + output_rows_in_pixels = orig_height; + } + + uint32_t total_expected_block_bytes = sizeof(astc_blk) * total_src_blocks; + if (image_data_size < total_expected_block_bytes) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_slice: image_data_size < total_expected_block_bytes The file is corrupted or this is a bug.\n"); + return false; + } + + const astc_blk* pSource_block = reinterpret_cast(pImage_data); + + bool status = false; + + half_float unpacked_blocks[12][12][3]; // [y][x][c] + + assert(((orig_width + 5) / 6) == num_blocks_x); + assert(((orig_height + 5) / 6) == num_blocks_y); + + if (fmt == block_format::cBC6H) + { + const uint32_t num_dst_blocks_x = (orig_width + 3) / 4; + const uint32_t num_dst_blocks_y = (orig_height + 3) / 4; + + if (!output_row_pitch_in_blocks_or_pixels) + { + output_row_pitch_in_blocks_or_pixels = num_dst_blocks_x; + } + else if (output_row_pitch_in_blocks_or_pixels < num_dst_blocks_x) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_slice: output_row_pitch_in_blocks_or_pixels is too low\n"); + return false; + } + + if (output_block_or_pixel_stride_in_bytes != sizeof(bc6h_block)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_slice: invalid output_block_or_pixel_stride_in_bytes\n"); + return false; + } + + fast_bc6h_params bc6h_enc_params; + const bool hq_flag = (decode_flags & cDecodeFlagsHighQuality) != 0; + bc6h_enc_params.m_max_2subset_pats_to_try = hq_flag ? 1 : 0; + + for (uint32_t src_block_y = 0; src_block_y < num_blocks_y; src_block_y += 2) + { + const uint32_t num_inner_blocks_y = basisu::minimum(2, num_blocks_y - src_block_y); + + for (uint32_t src_block_x = 0; src_block_x < num_blocks_x; src_block_x += 2) + { + const uint32_t num_inner_blocks_x = basisu::minimum(2, num_blocks_x - src_block_x); + + for (uint32_t iy = 0; iy < num_inner_blocks_y; iy++) + { + for (uint32_t ix = 0; ix < num_inner_blocks_x; ix++) + { + const astc_blk* pS = pSource_block + (src_block_y + iy) * num_blocks_x + (src_block_x + ix); + + half_float blk_texels[6][6][4]; + + astc_helpers::log_astc_block log_blk; + status = astc_helpers::unpack_block(pS, log_blk, 6, 6); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_slice: Transcoder failed to unpack a ASTC HDR block - this is a bug, or the data was corrupted\n"); + return false; + } + + status = astc_helpers::decode_block(log_blk, blk_texels, 6, 6, astc_helpers::cDecodeModeHDR16); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_slice: Transcoder failed to unpack a ASTC HDR block - this is a bug, or the data was corrupted\n"); + return false; + } + + for (uint32_t y = 0; y < 6; y++) + { + for (uint32_t x = 0; x < 6; x++) + { + unpacked_blocks[iy * 6 + y][ix * 6 + x][0] = blk_texels[y][x][0]; + unpacked_blocks[iy * 6 + y][ix * 6 + x][1] = blk_texels[y][x][1]; + unpacked_blocks[iy * 6 + y][ix * 6 + x][2] = blk_texels[y][x][2]; + + } // x + } // y + + } // ix + + } // iy + + const uint32_t dst_x = src_block_x * 6; + assert((dst_x & 3) == 0); + const uint32_t dst_block_x = dst_x >> 2; + + const uint32_t dst_y = src_block_y * 6; + assert((dst_y & 3) == 0); + const uint32_t dst_block_y = dst_y >> 2; + + const uint32_t num_inner_dst_blocks_x = basisu::minimum(3, num_dst_blocks_x - dst_block_x); + const uint32_t num_inner_dst_blocks_y = basisu::minimum(3, num_dst_blocks_y - dst_block_y); + + for (uint32_t dy = 0; dy < num_inner_dst_blocks_y; dy++) + { + for (uint32_t dx = 0; dx < num_inner_dst_blocks_x; dx++) + { + bc6h_block* pDst_block = (bc6h_block*)pDst_blocks + (dst_block_x + dx) + (dst_block_y + dy) * output_row_pitch_in_blocks_or_pixels; + + half_float src_pixels[4][4][3]; // [y][x][c] + + for (uint32_t y = 0; y < 4; y++) + { + const uint32_t src_pixel_y = basisu::minimum(dy * 4 + y, num_inner_blocks_y * 6 - 1); + + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t src_pixel_x = basisu::minimum(dx * 4 + x, num_inner_blocks_x * 6 - 1); + + assert((src_pixel_y < 12) && (src_pixel_x < 12)); + + src_pixels[y][x][0] = unpacked_blocks[src_pixel_y][src_pixel_x][0]; + src_pixels[y][x][1] = unpacked_blocks[src_pixel_y][src_pixel_x][1]; + src_pixels[y][x][2] = unpacked_blocks[src_pixel_y][src_pixel_x][2]; + + } // x + } // y + + astc_6x6_hdr::fast_encode_bc6h(&src_pixels[0][0][0], pDst_block, bc6h_enc_params); + + } // dx + } // dy + + } // block_x + + } // block_y + + status = true; + } + else + { + for (uint32_t block_y = 0; block_y < num_blocks_y; ++block_y) + { + void* pDst_block = (uint8_t*)pDst_blocks + block_y * output_row_pitch_in_blocks_or_pixels * output_block_or_pixel_stride_in_bytes; + + for (uint32_t block_x = 0; block_x < num_blocks_x; ++block_x, ++pSource_block, pDst_block = (uint8_t*)pDst_block + output_block_or_pixel_stride_in_bytes) + { + switch (fmt) + { + case block_format::cASTC_HDR_6x6: + { + // Nothing to do, ASTC HDR 6x6 is just ASTC. + // TODO: Optimize this copy + memcpy(pDst_block, pSource_block, sizeof(astc_helpers::astc_block)); + status = true; + break; + } + case block_format::cRGB_9E5: + { + astc_helpers::log_astc_block log_blk; + status = astc_helpers::unpack_block(pSource_block, log_blk, 6, 6); + if (status) + { + uint32_t* pDst_pixels = reinterpret_cast( + static_cast(pDst_blocks) + (block_x * 6 + block_y * 6 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t) + ); + + uint32_t blk_texels[6][6]; + + status = astc_helpers::decode_block(log_blk, blk_texels, 6, 6, astc_helpers::cDecodeModeRGB9E5); + + if (status) + { + const uint32_t max_x = basisu::minimum(6, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 6); + const uint32_t max_y = basisu::minimum(6, (int)output_rows_in_pixels - (int)block_y * 6); + + for (uint32_t y = 0; y < max_y; y++) + { + memcpy(pDst_pixels, &blk_texels[y][0], sizeof(uint32_t) * max_x); + + pDst_pixels += output_row_pitch_in_blocks_or_pixels; + } // y + } + } + + break; + } + case block_format::cRGBA_HALF: + { + astc_helpers::log_astc_block log_blk; + status = astc_helpers::unpack_block(pSource_block, log_blk, 6, 6); + if (status) + { + half_float* pDst_pixels = reinterpret_cast( + static_cast(pDst_blocks) + (block_x * 6 + block_y * 6 * output_row_pitch_in_blocks_or_pixels) * sizeof(half_float) * 4 + ); + + half_float blk_texels[6][6][4]; + status = astc_helpers::decode_block(log_blk, blk_texels, 6, 6, astc_helpers::cDecodeModeHDR16); + + if (status) + { + const uint32_t max_x = basisu::minimum(6, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 6); + const uint32_t max_y = basisu::minimum(6, (int)output_rows_in_pixels - (int)block_y * 6); + + for (uint32_t y = 0; y < max_y; y++) + { + for (uint32_t x = 0; x < max_x; x++) + { + pDst_pixels[0 + 4 * x] = blk_texels[y][x][0]; + pDst_pixels[1 + 4 * x] = blk_texels[y][x][1]; + pDst_pixels[2 + 4 * x] = blk_texels[y][x][2]; + pDst_pixels[3 + 4 * x] = blk_texels[y][x][3]; + } // x + + pDst_pixels += output_row_pitch_in_blocks_or_pixels * 4; + } // y + } + } + + break; + } + case block_format::cRGB_HALF: + { + astc_helpers::log_astc_block log_blk; + status = astc_helpers::unpack_block(pSource_block, log_blk, 6, 6); + if (status) + { + half_float* pDst_pixels = + reinterpret_cast(static_cast(pDst_blocks) + (block_x * 6 + block_y * 6 * output_row_pitch_in_blocks_or_pixels) * sizeof(half_float) * 3); + + half_float blk_texels[6][6][4]; + status = astc_helpers::decode_block(log_blk, blk_texels, 6, 6, astc_helpers::cDecodeModeHDR16); + if (status) + { + const uint32_t max_x = basisu::minimum(6, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 6); + const uint32_t max_y = basisu::minimum(6, (int)output_rows_in_pixels - (int)block_y * 6); + + for (uint32_t y = 0; y < max_y; y++) + { + for (uint32_t x = 0; x < max_x; x++) + { + pDst_pixels[0 + 3 * x] = blk_texels[y][x][0]; + pDst_pixels[1 + 3 * x] = blk_texels[y][x][1]; + pDst_pixels[2 + 3 * x] = blk_texels[y][x][2]; + } // x + + pDst_pixels += output_row_pitch_in_blocks_or_pixels * 3; + } // y + } + } + + break; + } + default: + assert(0); + break; + + } + + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_slice: Transcoder failed to unpack a ASTC HDR block - this is a bug, or the data was corrupted\n"); + return false; + } + + } // block_x + + } // block_y + } + + return true; +#else + BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_slice: ASTC HDR is unsupported\n"); + + BASISU_NOTE_UNUSED(decode_flags); + BASISU_NOTE_UNUSED(channel0); + BASISU_NOTE_UNUSED(channel1); + BASISU_NOTE_UNUSED(output_rows_in_pixels); + BASISU_NOTE_UNUSED(output_row_pitch_in_blocks_or_pixels); + BASISU_NOTE_UNUSED(output_block_or_pixel_stride_in_bytes); + BASISU_NOTE_UNUSED(fmt); + BASISU_NOTE_UNUSED(image_data_size); + BASISU_NOTE_UNUSED(pImage_data); + BASISU_NOTE_UNUSED(num_blocks_x); + BASISU_NOTE_UNUSED(num_blocks_y); + BASISU_NOTE_UNUSED(pDst_blocks); + + return false; +#endif + } + + bool basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_image( + transcoder_texture_format target_format, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + const uint8_t* pCompressed_data, uint32_t compressed_data_length, + uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index, + uint64_t slice_offset, uint32_t slice_length, + uint32_t decode_flags, + bool has_alpha, + bool is_video, + uint32_t output_row_pitch_in_blocks_or_pixels, + basisu_transcoder_state* pState, + uint32_t output_rows_in_pixels, + int channel0, int channel1) + { + BASISU_NOTE_UNUSED(is_video); + BASISU_NOTE_UNUSED(level_index); + BASISU_NOTE_UNUSED(decode_flags); + + if (((uint64_t)slice_offset + slice_length) > (uint64_t)compressed_data_length) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_image: source data buffer too small\n"); + return false; + } + + const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(target_format); + //const uint32_t total_slice_blocks = num_blocks_x * num_blocks_y; + + if (!basis_validate_output_buffer_size(basis_tex_format::cASTC_HDR_6x6, target_format, output_blocks_buf_size_in_blocks_or_pixels, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_image: output buffer size too small\n"); + return false; + } + + bool status = false; + + switch (target_format) + { + case transcoder_texture_format::cTFASTC_HDR_6x6_RGBA: + { + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cASTC_HDR_6x6, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags); + + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_image: transcode_slice() to ASTC_HDR failed\n"); + } + break; + } + case transcoder_texture_format::cTFBC6H: + { + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC6H, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_image: transcode_slice() to BC6H failed\n"); + } + break; + } + case transcoder_texture_format::cTFRGB_HALF: + { + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGB_HALF, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_image: transcode_slice() to RGB_HALF failed\n"); + } + break; + } + case transcoder_texture_format::cTFRGBA_HALF: + { + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGBA_HALF, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_image: transcode_slice() to RGBA_HALF failed\n"); + } + break; + } + case transcoder_texture_format::cTFRGB_9E5: + { + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGB_9E5, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_image: transcode_slice() to RGBA_HALF failed\n"); + } + break; + } + default: + { + assert(0); + BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_image: Invalid format\n"); + break; + } + } + + return status; + } + + //------------------------------------------------------------------------------------------------ + // ASTC 6x6 HDR intermediate + + basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder() + { + } + + // num_blocks_x/num_blocks_y are source 6x6 blocks + bool basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_slice( + void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, + const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels, + basisu_transcoder_state* pState, uint32_t output_rows_in_pixels, int channel0, int channel1, uint32_t decode_flags) + { + BASISU_NOTE_UNUSED(pState); + BASISU_NOTE_UNUSED(bc1_allow_threecolor_blocks); + BASISU_NOTE_UNUSED(has_alpha); + BASISU_NOTE_UNUSED(channel0); + BASISU_NOTE_UNUSED(channel1); + BASISU_NOTE_UNUSED(decode_flags); + BASISU_NOTE_UNUSED(orig_width); + BASISU_NOTE_UNUSED(orig_height); + + assert(g_transcoder_initialized); + if (!g_transcoder_initialized) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_slice: Transcoder not globally initialized.\n"); + return false; + } + +#if BASISD_SUPPORT_UASTC_HDR + + // TODO: Optimize this + + basisu::vector2D decoded_blocks; + uint32_t dec_width = 0, dec_height = 0; + bool dec_status = astc_6x6_hdr::decode_6x6_hdr(pImage_data, image_data_size, decoded_blocks, dec_width, dec_height); + if (!dec_status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_slice: decode_6x6_hdr() failed.\n"); + return false; + } + + if ((dec_width != orig_width) || (dec_height != orig_height) || + (decoded_blocks.get_width() != num_blocks_x) || (decoded_blocks.get_height() != num_blocks_y)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_slice: unexpected decoded width/height\n"); + return false; + } + + //const uint32_t total_src_blocks = num_blocks_x * num_blocks_y; + + const uint32_t output_block_width = get_block_width(fmt); + //const uint32_t output_block_height = get_block_height(fmt); + + if (!output_row_pitch_in_blocks_or_pixels) + { + if (basis_block_format_is_uncompressed(fmt)) + output_row_pitch_in_blocks_or_pixels = orig_width; + else + output_row_pitch_in_blocks_or_pixels = (orig_width + output_block_width - 1) / output_block_width; + } + + if (basis_block_format_is_uncompressed(fmt)) + { + if (!output_rows_in_pixels) + output_rows_in_pixels = orig_height; + } + + const astc_blk* pSource_block = (const astc_blk *)decoded_blocks.get_ptr(); + + bool status = false; + + half_float unpacked_blocks[12][12][3]; // [y][x][c] + + assert(((orig_width + 5) / 6) == num_blocks_x); + assert(((orig_height + 5) / 6) == num_blocks_y); + + if (fmt == block_format::cBC6H) + { + const uint32_t num_dst_blocks_x = (orig_width + 3) / 4; + const uint32_t num_dst_blocks_y = (orig_height + 3) / 4; + + if (!output_row_pitch_in_blocks_or_pixels) + { + output_row_pitch_in_blocks_or_pixels = num_dst_blocks_x; + } + else if (output_row_pitch_in_blocks_or_pixels < num_dst_blocks_x) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_slice: output_row_pitch_in_blocks_or_pixels is too low\n"); + return false; + } + + if (output_block_or_pixel_stride_in_bytes != sizeof(bc6h_block)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_slice: invalid output_block_or_pixel_stride_in_bytes\n"); + return false; + } + + fast_bc6h_params bc6h_enc_params; + const bool hq_flag = (decode_flags & cDecodeFlagsHighQuality) != 0; + bc6h_enc_params.m_max_2subset_pats_to_try = hq_flag ? 1 : 0; + + for (uint32_t src_block_y = 0; src_block_y < num_blocks_y; src_block_y += 2) + { + const uint32_t num_inner_blocks_y = basisu::minimum(2, num_blocks_y - src_block_y); + + for (uint32_t src_block_x = 0; src_block_x < num_blocks_x; src_block_x += 2) + { + const uint32_t num_inner_blocks_x = basisu::minimum(2, num_blocks_x - src_block_x); + + for (uint32_t iy = 0; iy < num_inner_blocks_y; iy++) + { + for (uint32_t ix = 0; ix < num_inner_blocks_x; ix++) + { + const astc_blk* pS = pSource_block + (src_block_y + iy) * num_blocks_x + (src_block_x + ix); + + half_float blk_texels[6][6][4]; + + astc_helpers::log_astc_block log_blk; + status = astc_helpers::unpack_block(pS, log_blk, 6, 6); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_slice: Transcoder failed to unpack a ASTC HDR block - this is a bug, or the data was corrupted\n"); + return false; + } + + status = astc_helpers::decode_block(log_blk, blk_texels, 6, 6, astc_helpers::cDecodeModeHDR16); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_slice: Transcoder failed to unpack a ASTC HDR block - this is a bug, or the data was corrupted\n"); + return false; + } + + for (uint32_t y = 0; y < 6; y++) + { + for (uint32_t x = 0; x < 6; x++) + { + unpacked_blocks[iy * 6 + y][ix * 6 + x][0] = blk_texels[y][x][0]; + unpacked_blocks[iy * 6 + y][ix * 6 + x][1] = blk_texels[y][x][1]; + unpacked_blocks[iy * 6 + y][ix * 6 + x][2] = blk_texels[y][x][2]; + } // x + } // y + + } // ix + + } // iy + + const uint32_t dst_x = src_block_x * 6; + assert((dst_x & 3) == 0); + const uint32_t dst_block_x = dst_x >> 2; + + const uint32_t dst_y = src_block_y * 6; + assert((dst_y & 3) == 0); + const uint32_t dst_block_y = dst_y >> 2; + + const uint32_t num_inner_dst_blocks_x = basisu::minimum(3, num_dst_blocks_x - dst_block_x); + const uint32_t num_inner_dst_blocks_y = basisu::minimum(3, num_dst_blocks_y - dst_block_y); + + for (uint32_t dy = 0; dy < num_inner_dst_blocks_y; dy++) + { + for (uint32_t dx = 0; dx < num_inner_dst_blocks_x; dx++) + { + bc6h_block* pDst_block = (bc6h_block*)pDst_blocks + (dst_block_x + dx) + (dst_block_y + dy) * output_row_pitch_in_blocks_or_pixels; + + half_float src_pixels[4][4][3]; // [y][x][c] + + for (uint32_t y = 0; y < 4; y++) + { + const uint32_t src_pixel_y = basisu::minimum(dy * 4 + y, num_inner_blocks_y * 6 - 1); + + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t src_pixel_x = basisu::minimum(dx * 4 + x, num_inner_blocks_x * 6 - 1); + + assert((src_pixel_y < 12) && (src_pixel_x < 12)); + + src_pixels[y][x][0] = unpacked_blocks[src_pixel_y][src_pixel_x][0]; + src_pixels[y][x][1] = unpacked_blocks[src_pixel_y][src_pixel_x][1]; + src_pixels[y][x][2] = unpacked_blocks[src_pixel_y][src_pixel_x][2]; + + } // x + } // y + + astc_6x6_hdr::fast_encode_bc6h(&src_pixels[0][0][0], pDst_block, bc6h_enc_params); + + } // dx + } // dy + + } // block_x + + } // block_y + + status = true; + } + else + { + for (uint32_t block_y = 0; block_y < num_blocks_y; ++block_y) + { + void* pDst_block = (uint8_t*)pDst_blocks + block_y * output_row_pitch_in_blocks_or_pixels * output_block_or_pixel_stride_in_bytes; + + for (uint32_t block_x = 0; block_x < num_blocks_x; ++block_x, ++pSource_block, pDst_block = (uint8_t*)pDst_block + output_block_or_pixel_stride_in_bytes) + { + switch (fmt) + { + case block_format::cASTC_HDR_6x6: + { + // Nothing to do, ASTC HDR 6x6 is just ASTC. + // TODO: Optimize this copy + memcpy(pDst_block, pSource_block, sizeof(astc_helpers::astc_block)); + status = true; + break; + } + case block_format::cRGB_9E5: + { + astc_helpers::log_astc_block log_blk; + status = astc_helpers::unpack_block(pSource_block, log_blk, 6, 6); + if (status) + { + uint32_t* pDst_pixels = reinterpret_cast( + static_cast(pDst_blocks) + (block_x * 6 + block_y * 6 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t) + ); + + uint32_t blk_texels[6][6]; + + status = astc_helpers::decode_block(log_blk, blk_texels, 6, 6, astc_helpers::cDecodeModeRGB9E5); + + if (status) + { + const uint32_t max_x = basisu::minimum(6, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 6); + const uint32_t max_y = basisu::minimum(6, (int)output_rows_in_pixels - (int)block_y * 6); + + for (uint32_t y = 0; y < max_y; y++) + { + memcpy(pDst_pixels, &blk_texels[y][0], sizeof(uint32_t) * max_x); + + pDst_pixels += output_row_pitch_in_blocks_or_pixels; + } // y + } + } + + break; + } + case block_format::cRGBA_HALF: + { + astc_helpers::log_astc_block log_blk; + status = astc_helpers::unpack_block(pSource_block, log_blk, 6, 6); + if (status) + { + half_float* pDst_pixels = reinterpret_cast( + static_cast(pDst_blocks) + (block_x * 6 + block_y * 6 * output_row_pitch_in_blocks_or_pixels) * sizeof(half_float) * 4 + ); + + half_float blk_texels[6][6][4]; + status = astc_helpers::decode_block(log_blk, blk_texels, 6, 6, astc_helpers::cDecodeModeHDR16); + + if (status) + { + const uint32_t max_x = basisu::minimum(6, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 6); + const uint32_t max_y = basisu::minimum(6, (int)output_rows_in_pixels - (int)block_y * 6); + + for (uint32_t y = 0; y < max_y; y++) + { + for (uint32_t x = 0; x < max_x; x++) + { + pDst_pixels[0 + 4 * x] = blk_texels[y][x][0]; + pDst_pixels[1 + 4 * x] = blk_texels[y][x][1]; + pDst_pixels[2 + 4 * x] = blk_texels[y][x][2]; + pDst_pixels[3 + 4 * x] = blk_texels[y][x][3]; + } // x + + pDst_pixels += output_row_pitch_in_blocks_or_pixels * 4; + } // y + } + } + + break; + } + case block_format::cRGB_HALF: + { + astc_helpers::log_astc_block log_blk; + status = astc_helpers::unpack_block(pSource_block, log_blk, 6, 6); + if (status) + { + half_float* pDst_pixels = + reinterpret_cast(static_cast(pDst_blocks) + (block_x * 6 + block_y * 6 * output_row_pitch_in_blocks_or_pixels) * sizeof(half_float) * 3); + + half_float blk_texels[6][6][4]; + status = astc_helpers::decode_block(log_blk, blk_texels, 6, 6, astc_helpers::cDecodeModeHDR16); + if (status) + { + const uint32_t max_x = basisu::minimum(6, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 6); + const uint32_t max_y = basisu::minimum(6, (int)output_rows_in_pixels - (int)block_y * 6); + + for (uint32_t y = 0; y < max_y; y++) + { + for (uint32_t x = 0; x < max_x; x++) + { + pDst_pixels[0 + 3 * x] = blk_texels[y][x][0]; + pDst_pixels[1 + 3 * x] = blk_texels[y][x][1]; + pDst_pixels[2 + 3 * x] = blk_texels[y][x][2]; + } // x + + pDst_pixels += output_row_pitch_in_blocks_or_pixels * 3; + } // y + } + } + + break; + } + default: + assert(0); + break; + + } + + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_slice: Transcoder failed to unpack a ASTC HDR block - this is a bug, or the data was corrupted\n"); + return false; + } + + } // block_x + + } // block_y + } + + return true; +#else + BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_slice: ASTC HDR is unsupported\n"); + + BASISU_NOTE_UNUSED(decode_flags); + BASISU_NOTE_UNUSED(channel0); + BASISU_NOTE_UNUSED(channel1); + BASISU_NOTE_UNUSED(output_rows_in_pixels); + BASISU_NOTE_UNUSED(output_row_pitch_in_blocks_or_pixels); + BASISU_NOTE_UNUSED(output_block_or_pixel_stride_in_bytes); + BASISU_NOTE_UNUSED(fmt); + BASISU_NOTE_UNUSED(image_data_size); + BASISU_NOTE_UNUSED(pImage_data); + BASISU_NOTE_UNUSED(num_blocks_x); + BASISU_NOTE_UNUSED(num_blocks_y); + BASISU_NOTE_UNUSED(pDst_blocks); + + return false; +#endif + } + + bool basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_image( + transcoder_texture_format target_format, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + const uint8_t* pCompressed_data, uint32_t compressed_data_length, + uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index, + uint64_t slice_offset, uint32_t slice_length, + uint32_t decode_flags, + bool has_alpha, + bool is_video, + uint32_t output_row_pitch_in_blocks_or_pixels, + basisu_transcoder_state* pState, + uint32_t output_rows_in_pixels, + int channel0, int channel1) + { + BASISU_NOTE_UNUSED(is_video); + BASISU_NOTE_UNUSED(level_index); + BASISU_NOTE_UNUSED(decode_flags); + + if (((uint64_t)slice_offset + slice_length) > (uint64_t)compressed_data_length) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_image: source data buffer too small\n"); + return false; + } + + const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(target_format); + //const uint32_t total_slice_blocks = num_blocks_x * num_blocks_y; + + if (!basis_validate_output_buffer_size(basis_tex_format::cASTC_HDR_6x6, target_format, output_blocks_buf_size_in_blocks_or_pixels, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_image: output buffer size too small\n"); + return false; + } + + bool status = false; + + switch (target_format) + { + case transcoder_texture_format::cTFASTC_HDR_6x6_RGBA: + { + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cASTC_HDR_6x6, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags); + + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_image: transcode_slice() to ASTC_HDR failed\n"); + } + break; + } + case transcoder_texture_format::cTFBC6H: + { + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC6H, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_image: transcode_slice() to BC6H failed\n"); + } + break; + } + case transcoder_texture_format::cTFRGB_HALF: + { + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGB_HALF, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_image: transcode_slice() to RGB_HALF failed\n"); + } + break; + } + case transcoder_texture_format::cTFRGBA_HALF: + { + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGBA_HALF, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_image: transcode_slice() to RGBA_HALF failed\n"); + } + break; + } + case transcoder_texture_format::cTFRGB_9E5: + { + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGB_9E5, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1 , decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_image: transcode_slice() to RGBA_HALF failed\n"); + } + break; + } + default: + { + assert(0); + BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_image: Invalid format\n"); + break; + } + } + + return status; + } + + //------------------------------------------------------------------------------------------------ + + basisu_transcoder::basisu_transcoder() : + m_ready_to_transcode(false) + { + } + + bool basisu_transcoder::validate_file_checksums(const void* pData, uint32_t data_size, bool full_validation) const + { + if (!validate_header(pData, data_size)) + return false; + + const basis_file_header* pHeader = reinterpret_cast(pData); + +#if !BASISU_NO_HEADER_OR_DATA_CRC16_CHECKS + if (crc16(&pHeader->m_data_size, sizeof(basis_file_header) - BASISU_OFFSETOF(basis_file_header, m_data_size), 0) != pHeader->m_header_crc16) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: header CRC check failed\n"); + return false; + } + + if (full_validation) + { + if (crc16(reinterpret_cast(pData) + sizeof(basis_file_header), pHeader->m_data_size, 0) != pHeader->m_data_crc16) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: data CRC check failed\n"); + return false; + } + } +#endif + + return true; + } + + bool basisu_transcoder::validate_header_quick(const void* pData, uint32_t data_size) const + { + if (data_size <= sizeof(basis_file_header)) + return false; + + const basis_file_header* pHeader = reinterpret_cast(pData); + + if ((pHeader->m_sig != basis_file_header::cBASISSigValue) || (pHeader->m_ver != BASISD_SUPPORTED_BASIS_VERSION) || (pHeader->m_header_size != sizeof(basis_file_header))) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: header has an invalid signature, or file version is unsupported\n"); + return false; + } + + uint32_t expected_file_size = sizeof(basis_file_header) + pHeader->m_data_size; + if (data_size < expected_file_size) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: source buffer is too small\n"); + return false; + } + + if ((!pHeader->m_total_slices) || (!pHeader->m_total_images)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::validate_header_quick: header is invalid\n"); + return false; + } + + if ((pHeader->m_slice_desc_file_ofs >= data_size) || + ((data_size - pHeader->m_slice_desc_file_ofs) < (sizeof(basis_slice_desc) * pHeader->m_total_slices)) + ) + { + BASISU_DEVEL_ERROR("basisu_transcoder::validate_header_quick: passed in buffer is too small or data is corrupted\n"); + return false; + } + + return true; + } + + bool basisu_transcoder::validate_header(const void* pData, uint32_t data_size) const + { + if (data_size <= sizeof(basis_file_header)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: input source buffer is too small\n"); + return false; + } + + const basis_file_header* pHeader = reinterpret_cast(pData); + + if ((pHeader->m_sig != basis_file_header::cBASISSigValue) || (pHeader->m_ver != BASISD_SUPPORTED_BASIS_VERSION) || (pHeader->m_header_size != sizeof(basis_file_header))) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: header has an invalid signature, or file version is unsupported\n"); + return false; + } + + uint32_t expected_file_size = sizeof(basis_file_header) + pHeader->m_data_size; + if (data_size < expected_file_size) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: input source buffer is too small, or header is corrupted\n"); + return false; + } + + if ((!pHeader->m_total_images) || (!pHeader->m_total_slices)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: invalid basis file (total images or slices are 0)\n"); + return false; + } + + if (pHeader->m_total_images > pHeader->m_total_slices) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: invalid basis file (too many images)\n"); + return false; + } + + if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S) + { + if (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) + { + if (pHeader->m_total_slices & 1) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: invalid alpha .basis file\n"); + return false; + } + } + + // This flag dates back to pre-Basis Universal, when .basis supported full ETC1 too. + if ((pHeader->m_flags & cBASISHeaderFlagETC1S) == 0) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: Invalid .basis file (ETC1S check)\n"); + return false; + } + } + else + { + if ((pHeader->m_flags & cBASISHeaderFlagETC1S) != 0) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: Invalid .basis file (ETC1S check)\n"); + return false; + } + } + + if ((pHeader->m_slice_desc_file_ofs >= data_size) || + ((data_size - pHeader->m_slice_desc_file_ofs) < (sizeof(basis_slice_desc) * pHeader->m_total_slices)) + ) + { + BASISU_DEVEL_ERROR("basisu_transcoder::validate_header_quick: passed in buffer is too small or data is corrupted\n"); + return false; + } + + return true; + } + + basis_texture_type basisu_transcoder::get_texture_type(const void* pData, uint32_t data_size) const + { + if (!validate_header_quick(pData, data_size)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_texture_type: header validation failed\n"); + return cBASISTexType2DArray; + } + + const basis_file_header* pHeader = static_cast(pData); + + basis_texture_type btt = static_cast(static_cast(pHeader->m_tex_type)); + + if (btt >= cBASISTexTypeTotal) + { + BASISU_DEVEL_ERROR("basisu_transcoder::validate_header_quick: header's texture type field is invalid\n"); + return cBASISTexType2DArray; + } + + return btt; + } + + bool basisu_transcoder::get_userdata(const void* pData, uint32_t data_size, uint32_t& userdata0, uint32_t& userdata1) const + { + if (!validate_header_quick(pData, data_size)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_userdata: header validation failed\n"); + return false; + } + + const basis_file_header* pHeader = static_cast(pData); + + userdata0 = pHeader->m_userdata0; + userdata1 = pHeader->m_userdata1; + return true; + } + + uint32_t basisu_transcoder::get_total_images(const void* pData, uint32_t data_size) const + { + if (!validate_header_quick(pData, data_size)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: header validation failed\n"); + return 0; + } + + const basis_file_header* pHeader = static_cast(pData); + + return pHeader->m_total_images; + } + + basis_tex_format basisu_transcoder::get_basis_tex_format(const void* pData, uint32_t data_size) const + { + if (!validate_header_quick(pData, data_size)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_basis_tex_format: header validation failed\n"); + return basis_tex_format::cETC1S; + } + + const basis_file_header* pHeader = static_cast(pData); + + return (basis_tex_format)(uint32_t)pHeader->m_tex_format; + } + + bool basisu_transcoder::get_image_info(const void* pData, uint32_t data_size, basisu_image_info& image_info, uint32_t image_index) const + { + if (!validate_header_quick(pData, data_size)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_image_info: header validation failed\n"); + return false; + } + + int slice_index = find_first_slice_index(pData, data_size, image_index, 0); + if (slice_index < 0) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_image_info: invalid slice index\n"); + return false; + } + + const basis_file_header* pHeader = static_cast(pData); + + if (image_index >= pHeader->m_total_images) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_image_info: invalid image_index\n"); + return false; + } + + const basis_slice_desc* pSlice_descs = reinterpret_cast(static_cast(pData) + pHeader->m_slice_desc_file_ofs); + + uint32_t total_levels = 1; + for (uint32_t i = slice_index + 1; i < pHeader->m_total_slices; i++) + if (pSlice_descs[i].m_image_index == image_index) + total_levels = basisu::maximum(total_levels, pSlice_descs[i].m_level_index + 1); + else + break; + + if (total_levels > 16) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_image_info: invalid image_index\n"); + return false; + } + + const basis_slice_desc& slice_desc = pSlice_descs[slice_index]; + + image_info.m_image_index = image_index; + image_info.m_total_levels = total_levels; + + image_info.m_alpha_flag = false; + + // For ETC1S, if anything has alpha all images have alpha. For UASTC, we only report alpha when the image actually has alpha. + if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S) + image_info.m_alpha_flag = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0; + else + image_info.m_alpha_flag = (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0; + + image_info.m_iframe_flag = (slice_desc.m_flags & cSliceDescFlagsFrameIsIFrame) != 0; + + const uint32_t block_width = basis_tex_format_get_block_width((basis_tex_format)((uint32_t)pHeader->m_tex_format)); + const uint32_t block_height = basis_tex_format_get_block_height((basis_tex_format)((uint32_t)pHeader->m_tex_format)); + + image_info.m_width = slice_desc.m_num_blocks_x * block_width; + image_info.m_height = slice_desc.m_num_blocks_y * block_height; + image_info.m_orig_width = slice_desc.m_orig_width; + image_info.m_orig_height = slice_desc.m_orig_height; + image_info.m_num_blocks_x = slice_desc.m_num_blocks_x; + image_info.m_num_blocks_y = slice_desc.m_num_blocks_y; + image_info.m_block_width = block_width; + image_info.m_block_height = block_height; + image_info.m_total_blocks = image_info.m_num_blocks_x * image_info.m_num_blocks_y; + image_info.m_first_slice_index = slice_index; + + return true; + } + + uint32_t basisu_transcoder::get_total_image_levels(const void* pData, uint32_t data_size, uint32_t image_index) const + { + if (!validate_header_quick(pData, data_size)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_total_image_levels: header validation failed\n"); + return false; + } + + int slice_index = find_first_slice_index(pData, data_size, image_index, 0); + if (slice_index < 0) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_total_image_levels: failed finding slice\n"); + return false; + } + + const basis_file_header* pHeader = static_cast(pData); + + if (image_index >= pHeader->m_total_images) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_total_image_levels: invalid image_index\n"); + return false; + } + + const basis_slice_desc* pSlice_descs = reinterpret_cast(static_cast(pData) + pHeader->m_slice_desc_file_ofs); + + uint32_t total_levels = 1; + for (uint32_t i = slice_index + 1; i < pHeader->m_total_slices; i++) + if (pSlice_descs[i].m_image_index == image_index) + total_levels = basisu::maximum(total_levels, pSlice_descs[i].m_level_index + 1); + else + break; + + const uint32_t cMaxSupportedLevels = 16; + if (total_levels > cMaxSupportedLevels) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_total_image_levels: invalid image levels!\n"); + return false; + } + + return total_levels; + } + + bool basisu_transcoder::get_image_level_desc(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index, uint32_t& orig_width, uint32_t& orig_height, uint32_t& total_blocks) const + { + if (!validate_header_quick(pData, data_size)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_desc: header validation failed\n"); + return false; + } + + int slice_index = find_first_slice_index(pData, data_size, image_index, level_index); + if (slice_index < 0) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_desc: failed finding slice\n"); + return false; + } + + const basis_file_header* pHeader = static_cast(pData); + + if (image_index >= pHeader->m_total_images) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_desc: invalid image_index\n"); + return false; + } + + const basis_slice_desc* pSlice_descs = reinterpret_cast(static_cast(pData) + pHeader->m_slice_desc_file_ofs); + + const basis_slice_desc& slice_desc = pSlice_descs[slice_index]; + + orig_width = slice_desc.m_orig_width; + orig_height = slice_desc.m_orig_height; + total_blocks = slice_desc.m_num_blocks_x * slice_desc.m_num_blocks_y; + + return true; + } + + bool basisu_transcoder::get_image_level_info(const void* pData, uint32_t data_size, basisu_image_level_info& image_info, uint32_t image_index, uint32_t level_index) const + { + if (!validate_header_quick(pData, data_size)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_info: validate_file_checksums failed\n"); + return false; + } + + int slice_index = find_first_slice_index(pData, data_size, image_index, level_index); + if (slice_index < 0) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_info: failed finding slice\n"); + return false; + } + + const basis_file_header* pHeader = static_cast(pData); + + if (image_index >= pHeader->m_total_images) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_info: invalid image_index\n"); + return false; + } + + const basis_slice_desc* pSlice_descs = reinterpret_cast(static_cast(pData) + pHeader->m_slice_desc_file_ofs); + + const basis_slice_desc& slice_desc = pSlice_descs[slice_index]; + + image_info.m_image_index = image_index; + image_info.m_level_index = level_index; + + // For ETC1S, if anything has alpha all images have alpha. For UASTC, we only report alpha when the image actually has alpha. + if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S) + image_info.m_alpha_flag = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0; + else + image_info.m_alpha_flag = (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0; + + const uint32_t block_width = basis_tex_format_get_block_width((basis_tex_format)((uint32_t)pHeader->m_tex_format)); + const uint32_t block_height = basis_tex_format_get_block_height((basis_tex_format)((uint32_t)pHeader->m_tex_format)); + + image_info.m_iframe_flag = (slice_desc.m_flags & cSliceDescFlagsFrameIsIFrame) != 0; + image_info.m_width = slice_desc.m_num_blocks_x * block_width; + image_info.m_height = slice_desc.m_num_blocks_y * block_height; + image_info.m_orig_width = slice_desc.m_orig_width; + image_info.m_orig_height = slice_desc.m_orig_height; + image_info.m_block_width = block_width; + image_info.m_block_height = block_height; + image_info.m_num_blocks_x = slice_desc.m_num_blocks_x; + image_info.m_num_blocks_y = slice_desc.m_num_blocks_y; + image_info.m_total_blocks = image_info.m_num_blocks_x * image_info.m_num_blocks_y; + image_info.m_first_slice_index = slice_index; + + image_info.m_rgb_file_ofs = slice_desc.m_file_ofs; + image_info.m_rgb_file_len = slice_desc.m_file_size; + image_info.m_alpha_file_ofs = 0; + image_info.m_alpha_file_len = 0; + + if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S) + { + if (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) + { + assert((slice_index + 1) < (int)pHeader->m_total_slices); + image_info.m_alpha_file_ofs = pSlice_descs[slice_index + 1].m_file_ofs; + image_info.m_alpha_file_len = pSlice_descs[slice_index + 1].m_file_size; + } + } + + return true; + } + + bool basisu_transcoder::get_file_info(const void* pData, uint32_t data_size, basisu_file_info& file_info) const + { + if (!validate_file_checksums(pData, data_size, false)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_file_info: validate_file_checksums failed\n"); + return false; + } + + const basis_file_header* pHeader = static_cast(pData); + const basis_slice_desc* pSlice_descs = reinterpret_cast(static_cast(pData) + pHeader->m_slice_desc_file_ofs); + + file_info.m_version = pHeader->m_ver; + + file_info.m_total_header_size = sizeof(basis_file_header) + pHeader->m_total_slices * sizeof(basis_slice_desc); + + file_info.m_total_selectors = pHeader->m_total_selectors; + file_info.m_selector_codebook_ofs = pHeader->m_selector_cb_file_ofs; + file_info.m_selector_codebook_size = pHeader->m_selector_cb_file_size; + + file_info.m_total_endpoints = pHeader->m_total_endpoints; + file_info.m_endpoint_codebook_ofs = pHeader->m_endpoint_cb_file_ofs; + file_info.m_endpoint_codebook_size = pHeader->m_endpoint_cb_file_size; + + file_info.m_tables_ofs = pHeader->m_tables_file_ofs; + file_info.m_tables_size = pHeader->m_tables_file_size; + + file_info.m_tex_format = static_cast(static_cast(pHeader->m_tex_format)); + + file_info.m_etc1s = (pHeader->m_tex_format == (int)basis_tex_format::cETC1S); + + file_info.m_y_flipped = (pHeader->m_flags & cBASISHeaderFlagYFlipped) != 0; + file_info.m_has_alpha_slices = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0; + + const uint32_t total_slices = pHeader->m_total_slices; + + file_info.m_slice_info.resize(total_slices); + + file_info.m_slices_size = 0; + + file_info.m_tex_type = static_cast(static_cast(pHeader->m_tex_type)); + + if (file_info.m_tex_type > cBASISTexTypeTotal) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_file_info: invalid texture type, file is corrupted\n"); + return false; + } + + file_info.m_us_per_frame = pHeader->m_us_per_frame; + file_info.m_userdata0 = pHeader->m_userdata0; + file_info.m_userdata1 = pHeader->m_userdata1; + + file_info.m_image_mipmap_levels.resize(0); + file_info.m_image_mipmap_levels.resize(pHeader->m_total_images); + + file_info.m_total_images = pHeader->m_total_images; + + const uint32_t block_width = basis_tex_format_get_block_width((basis_tex_format)((uint32_t)pHeader->m_tex_format)); + const uint32_t block_height = basis_tex_format_get_block_height((basis_tex_format)((uint32_t)pHeader->m_tex_format)); + file_info.m_block_width = block_width; + file_info.m_block_height = block_height; + + for (uint32_t i = 0; i < total_slices; i++) + { + file_info.m_slices_size += pSlice_descs[i].m_file_size; + + basisu_slice_info& slice_info = file_info.m_slice_info[i]; + + slice_info.m_orig_width = pSlice_descs[i].m_orig_width; + slice_info.m_orig_height = pSlice_descs[i].m_orig_height; + slice_info.m_width = pSlice_descs[i].m_num_blocks_x * block_width; + slice_info.m_height = pSlice_descs[i].m_num_blocks_y * block_height; + slice_info.m_num_blocks_x = pSlice_descs[i].m_num_blocks_x; + slice_info.m_num_blocks_y = pSlice_descs[i].m_num_blocks_y; + slice_info.m_block_width = block_width; + slice_info.m_block_height = block_height; + slice_info.m_total_blocks = slice_info.m_num_blocks_x * slice_info.m_num_blocks_y; + slice_info.m_compressed_size = pSlice_descs[i].m_file_size; + slice_info.m_slice_index = i; + slice_info.m_image_index = pSlice_descs[i].m_image_index; + slice_info.m_level_index = pSlice_descs[i].m_level_index; + slice_info.m_unpacked_slice_crc16 = pSlice_descs[i].m_slice_data_crc16; + slice_info.m_alpha_flag = (pSlice_descs[i].m_flags & cSliceDescFlagsHasAlpha) != 0; + slice_info.m_iframe_flag = (pSlice_descs[i].m_flags & cSliceDescFlagsFrameIsIFrame) != 0; + + if (pSlice_descs[i].m_image_index >= pHeader->m_total_images) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_file_info: slice desc's image index is invalid\n"); + return false; + } + + file_info.m_image_mipmap_levels[pSlice_descs[i].m_image_index] = basisu::maximum(file_info.m_image_mipmap_levels[pSlice_descs[i].m_image_index], pSlice_descs[i].m_level_index + 1); + + if (file_info.m_image_mipmap_levels[pSlice_descs[i].m_image_index] > 16) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_file_info: slice mipmap level is invalid\n"); + return false; + } + } + + return true; + } + + bool basisu_transcoder::start_transcoding(const void* pData, uint32_t data_size) + { + if (!validate_header_quick(pData, data_size)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: header validation failed\n"); + return false; + } + + const basis_file_header* pHeader = reinterpret_cast(pData); + const uint8_t* pDataU8 = static_cast(pData); + + if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S) + { + if (m_lowlevel_etc1s_decoder.m_local_endpoints.size()) + { + m_lowlevel_etc1s_decoder.clear(); + } + + if (pHeader->m_flags & cBASISHeaderFlagUsesGlobalCodebook) + { + if (!m_lowlevel_etc1s_decoder.get_global_codebooks()) + { + BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: File uses global codebooks, but set_global_codebooks() has not been called\n"); + return false; + } + if (!m_lowlevel_etc1s_decoder.get_global_codebooks()->get_endpoints().size()) + { + BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: Global codebooks must be unpacked first by calling start_transcoding()\n"); + return false; + } + if ((m_lowlevel_etc1s_decoder.get_global_codebooks()->get_endpoints().size() != pHeader->m_total_endpoints) || + (m_lowlevel_etc1s_decoder.get_global_codebooks()->get_selectors().size() != pHeader->m_total_selectors)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: Global codebook size mismatch (wrong codebooks for file).\n"); + return false; + } + if (!pHeader->m_tables_file_size) + { + BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted (2)\n"); + return false; + } + if (pHeader->m_tables_file_ofs > data_size) + { + BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (4)\n"); + return false; + } + if (pHeader->m_tables_file_size > (data_size - pHeader->m_tables_file_ofs)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (5)\n"); + return false; + } + } + else + { + if (!pHeader->m_endpoint_cb_file_size || !pHeader->m_selector_cb_file_size || !pHeader->m_tables_file_size) + { + BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted (0)\n"); + return false; + } + + if ((pHeader->m_endpoint_cb_file_ofs > data_size) || (pHeader->m_selector_cb_file_ofs > data_size) || (pHeader->m_tables_file_ofs > data_size)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (1)\n"); + return false; + } + + if (pHeader->m_endpoint_cb_file_size > (data_size - pHeader->m_endpoint_cb_file_ofs)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (2)\n"); + return false; + } + + if (pHeader->m_selector_cb_file_size > (data_size - pHeader->m_selector_cb_file_ofs)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (3)\n"); + return false; + } + + if (pHeader->m_tables_file_size > (data_size - pHeader->m_tables_file_ofs)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (3)\n"); + return false; + } + + if (!m_lowlevel_etc1s_decoder.decode_palettes( + pHeader->m_total_endpoints, pDataU8 + pHeader->m_endpoint_cb_file_ofs, pHeader->m_endpoint_cb_file_size, + pHeader->m_total_selectors, pDataU8 + pHeader->m_selector_cb_file_ofs, pHeader->m_selector_cb_file_size)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: decode_palettes failed\n"); + return false; + } + } + + if (!m_lowlevel_etc1s_decoder.decode_tables(pDataU8 + pHeader->m_tables_file_ofs, pHeader->m_tables_file_size)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: decode_tables failed\n"); + return false; + } + } + else + { + // Nothing special to do for UASTC/UASTC HDR. + if (m_lowlevel_etc1s_decoder.m_local_endpoints.size()) + { + m_lowlevel_etc1s_decoder.clear(); + } + } + + m_ready_to_transcode = true; + + return true; + } + + bool basisu_transcoder::stop_transcoding() + { + m_lowlevel_etc1s_decoder.clear(); + + m_ready_to_transcode = false; + + return true; + } + + bool basisu_transcoder::transcode_slice(const void* pData, uint32_t data_size, uint32_t slice_index, void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, uint32_t decode_flags, uint32_t output_row_pitch_in_blocks_or_pixels, basisu_transcoder_state* pState, void *pAlpha_blocks, uint32_t output_rows_in_pixels, int channel0, int channel1) const + { + if (!m_ready_to_transcode) + { + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: must call start_transcoding first\n"); + return false; + } + + if (decode_flags & cDecodeFlagsPVRTCDecodeToNextPow2) + { + // TODO: Not yet supported + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: cDecodeFlagsPVRTCDecodeToNextPow2 currently unsupported\n"); + return false; + } + + if (!validate_header_quick(pData, data_size)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: header validation failed\n"); + return false; + } + + const basis_file_header* pHeader = reinterpret_cast(pData); + + const uint8_t* pDataU8 = static_cast(pData); + + if (slice_index >= pHeader->m_total_slices) + { + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: slice_index >= pHeader->m_total_slices\n"); + return false; + } + + const basis_slice_desc& slice_desc = reinterpret_cast(pDataU8 + pHeader->m_slice_desc_file_ofs)[slice_index]; + + if (basis_block_format_is_uncompressed(fmt)) + { + // Assume the output buffer is orig_width by orig_height + if (!output_row_pitch_in_blocks_or_pixels) + output_row_pitch_in_blocks_or_pixels = slice_desc.m_orig_width; + + if (!output_rows_in_pixels) + output_rows_in_pixels = slice_desc.m_orig_height; + + // Now make sure the output buffer is large enough, or we'll overwrite memory. + if (output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels)\n"); + return false; + } + } + else if (fmt == block_format::cFXT1_RGB) + { + const uint32_t num_blocks_fxt1_x = (slice_desc.m_orig_width + 7) / 8; + const uint32_t num_blocks_fxt1_y = (slice_desc.m_orig_height + 3) / 4; + const uint32_t total_blocks_fxt1 = num_blocks_fxt1_x * num_blocks_fxt1_y; + + if (output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1) + { + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1\n"); + return false; + } + } + else if (fmt == block_format::cASTC_HDR_6x6) + { + const uint32_t num_blocks_6x6_x = (slice_desc.m_orig_width + 5) / 6; + const uint32_t num_blocks_6x6_y = (slice_desc.m_orig_height + 5) / 6; + const uint32_t total_blocks_6x6 = num_blocks_6x6_x * num_blocks_6x6_y; + + if (output_blocks_buf_size_in_blocks_or_pixels < total_blocks_6x6) + { + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: output_blocks_buf_size_in_blocks_or_pixels < total_blocks_6x6\n"); + return false; + } + } + else + { + // must be a 4x4 pixel block format + const uint32_t num_blocks_4x4_x = (slice_desc.m_orig_width + 3) / 4; + const uint32_t num_blocks_4x4_y = (slice_desc.m_orig_height + 3) / 4; + const uint32_t total_4x4_blocks = num_blocks_4x4_x * num_blocks_4x4_y; + + if (output_blocks_buf_size_in_blocks_or_pixels < total_4x4_blocks) + { + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: output_blocks_buf_size_in_blocks_or_pixels < total_blocks\n"); + return false; + } + } + + if ((pHeader->m_tex_format == (uint32_t)basis_tex_format::cETC1S) || (pHeader->m_tex_format == (uint32_t)basis_tex_format::cUASTC4x4)) + { + if ((fmt == block_format::cPVRTC1_4_RGB) || (fmt == block_format::cPVRTC1_4_RGBA)) + { + if ((!basisu::is_pow2(slice_desc.m_num_blocks_x * 4)) || (!basisu::is_pow2(slice_desc.m_num_blocks_y * 4))) + { + // PVRTC1 only supports power of 2 dimensions + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: PVRTC1 only supports power of 2 dimensions\n"); + return false; + } + } + } + + if (slice_desc.m_file_ofs > data_size) + { + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: invalid slice_desc.m_file_ofs, or passed in buffer too small\n"); + return false; + } + + const uint32_t data_size_left = data_size - slice_desc.m_file_ofs; + if (data_size_left < slice_desc.m_file_size) + { + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: invalid slice_desc.m_file_size, or passed in buffer too small\n"); + return false; + } + + if (pHeader->m_tex_format == (int)basis_tex_format::cASTC_HDR_6x6) + { + return m_lowlevel_astc_6x6_hdr_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y, + pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size, + fmt, output_block_or_pixel_stride_in_bytes, (decode_flags & cDecodeFlagsBC1ForbidThreeColorBlocks) == 0, *pHeader, slice_desc, output_row_pitch_in_blocks_or_pixels, pState, + output_rows_in_pixels, channel0, channel1, decode_flags); + } + else if (pHeader->m_tex_format == (int)basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE) + { + return m_lowlevel_astc_6x6_hdr_intermediate_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y, + pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size, + fmt, output_block_or_pixel_stride_in_bytes, (decode_flags & cDecodeFlagsBC1ForbidThreeColorBlocks) == 0, *pHeader, slice_desc, output_row_pitch_in_blocks_or_pixels, pState, + output_rows_in_pixels, channel0, channel1, decode_flags); + } + else if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC_HDR_4x4) + { + return m_lowlevel_uastc_4x4_hdr_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y, + pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size, + fmt, output_block_or_pixel_stride_in_bytes, (decode_flags & cDecodeFlagsBC1ForbidThreeColorBlocks) == 0, *pHeader, slice_desc, output_row_pitch_in_blocks_or_pixels, pState, + output_rows_in_pixels, channel0, channel1, decode_flags); + } + else if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4) + { + return m_lowlevel_uastc_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y, + pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size, + fmt, output_block_or_pixel_stride_in_bytes, (decode_flags & cDecodeFlagsBC1ForbidThreeColorBlocks) == 0, *pHeader, slice_desc, output_row_pitch_in_blocks_or_pixels, pState, + output_rows_in_pixels, channel0, channel1, decode_flags); + } + else + { + return m_lowlevel_etc1s_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y, + pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size, + fmt, output_block_or_pixel_stride_in_bytes, (decode_flags & cDecodeFlagsBC1ForbidThreeColorBlocks) == 0, *pHeader, slice_desc, output_row_pitch_in_blocks_or_pixels, pState, + (decode_flags & cDecodeFlagsOutputHasAlphaIndices) != 0, pAlpha_blocks, output_rows_in_pixels); + } + } + + int basisu_transcoder::find_first_slice_index(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index) const + { + BASISU_NOTE_UNUSED(data_size); + + const basis_file_header* pHeader = reinterpret_cast(pData); + const uint8_t* pDataU8 = static_cast(pData); + + // For very large basis files this search could be painful + // TODO: Binary search this + for (uint32_t slice_iter = 0; slice_iter < pHeader->m_total_slices; slice_iter++) + { + const basis_slice_desc& slice_desc = reinterpret_cast(pDataU8 + pHeader->m_slice_desc_file_ofs)[slice_iter]; + if ((slice_desc.m_image_index == image_index) && (slice_desc.m_level_index == level_index)) + return slice_iter; + } + + BASISU_DEVEL_ERROR("basisu_transcoder::find_first_slice_index: didn't find slice\n"); + + return -1; + } + + int basisu_transcoder::find_slice(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index, bool alpha_data) const + { + if (!validate_header_quick(pData, data_size)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::find_slice: header validation failed\n"); + return false; + } + + const basis_file_header* pHeader = reinterpret_cast(pData); + const uint8_t* pDataU8 = static_cast(pData); + const basis_slice_desc* pSlice_descs = reinterpret_cast(pDataU8 + pHeader->m_slice_desc_file_ofs); + + // For very large basis files this search could be painful + // TODO: Binary search this + for (uint32_t slice_iter = 0; slice_iter < pHeader->m_total_slices; slice_iter++) + { + const basis_slice_desc& slice_desc = pSlice_descs[slice_iter]; + if ((slice_desc.m_image_index == image_index) && (slice_desc.m_level_index == level_index)) + { + if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S) + { + const bool slice_alpha = (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0; + if (slice_alpha == alpha_data) + return slice_iter; + } + else + { + return slice_iter; + } + } + } + + BASISU_DEVEL_ERROR("basisu_transcoder::find_slice: didn't find slice\n"); + + return -1; + } + + void basisu_transcoder::write_opaque_alpha_blocks( + uint32_t num_blocks_x, uint32_t num_blocks_y, + void* pOutput_blocks, block_format fmt, + uint32_t block_stride_in_bytes, uint32_t output_row_pitch_in_blocks_or_pixels) + { + // 'num_blocks_y', 'pOutput_blocks' & 'block_stride_in_bytes' unused + // when disabling BASISD_SUPPORT_ETC2_EAC_A8 *and* BASISD_SUPPORT_DXT5A + BASISU_NOTE_UNUSED(num_blocks_y); + BASISU_NOTE_UNUSED(pOutput_blocks); + BASISU_NOTE_UNUSED(block_stride_in_bytes); + + if (!output_row_pitch_in_blocks_or_pixels) + output_row_pitch_in_blocks_or_pixels = num_blocks_x; + + if ((fmt == block_format::cETC2_EAC_A8) || (fmt == block_format::cETC2_EAC_R11)) + { +#if BASISD_SUPPORT_ETC2_EAC_A8 + eac_block blk; + blk.m_base = 255; + blk.m_multiplier = 1; + blk.m_table = 13; + + // Selectors are all 4's + memcpy(&blk.m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4)); + + for (uint32_t y = 0; y < num_blocks_y; y++) + { + uint32_t dst_ofs = y * output_row_pitch_in_blocks_or_pixels * block_stride_in_bytes; + for (uint32_t x = 0; x < num_blocks_x; x++) + { + memcpy((uint8_t*)pOutput_blocks + dst_ofs, &blk, sizeof(blk)); + dst_ofs += block_stride_in_bytes; + } + } +#endif + } + else if (fmt == block_format::cBC4) + { +#if BASISD_SUPPORT_DXT5A + dxt5a_block blk; + blk.m_endpoints[0] = 255; + blk.m_endpoints[1] = 255; + memset(blk.m_selectors, 0, sizeof(blk.m_selectors)); + + for (uint32_t y = 0; y < num_blocks_y; y++) + { + uint32_t dst_ofs = y * output_row_pitch_in_blocks_or_pixels * block_stride_in_bytes; + for (uint32_t x = 0; x < num_blocks_x; x++) + { + memcpy((uint8_t*)pOutput_blocks + dst_ofs, &blk, sizeof(blk)); + dst_ofs += block_stride_in_bytes; + } + } +#endif + } + } + + bool basisu_transcoder::transcode_image_level( + const void* pData, uint32_t data_size, + uint32_t image_index, uint32_t level_index, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + transcoder_texture_format fmt, + uint32_t decode_flags, uint32_t output_row_pitch_in_blocks_or_pixels, basisu_transcoder_state *pState, uint32_t output_rows_in_pixels) const + { + const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(fmt); + + if (!m_ready_to_transcode) + { + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: must call start_transcoding() first\n"); + return false; + } + + //const bool transcode_alpha_data_to_opaque_formats = (decode_flags & cDecodeFlagsTranscodeAlphaDataToOpaqueFormats) != 0; + + if (decode_flags & cDecodeFlagsPVRTCDecodeToNextPow2) + { + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: cDecodeFlagsPVRTCDecodeToNextPow2 currently unsupported\n"); + // TODO: Not yet supported + return false; + } + + if (!validate_header_quick(pData, data_size)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: header validation failed\n"); + return false; + } + + const basis_file_header* pHeader = reinterpret_cast(pData); + + const uint8_t* pDataU8 = static_cast(pData); + + const basis_slice_desc* pSlice_descs = reinterpret_cast(pDataU8 + pHeader->m_slice_desc_file_ofs); + + const bool basis_file_has_alpha_slices = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0; + + int slice_index = find_first_slice_index(pData, data_size, image_index, level_index); + if (slice_index < 0) + { + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: failed finding slice index\n"); + // Unable to find the requested image/level + return false; + } + + if ((fmt == transcoder_texture_format::cTFPVRTC1_4_RGBA) && (!basis_file_has_alpha_slices)) + { + // Switch to PVRTC1 RGB if the input doesn't have alpha. + fmt = transcoder_texture_format::cTFPVRTC1_4_RGB; + } + + if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S) + { + if (pSlice_descs[slice_index].m_flags & cSliceDescFlagsHasAlpha) + { + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: alpha basis file has out of order alpha slice\n"); + + // The first slice shouldn't have alpha data in a properly formed basis file + return false; + } + + if (basis_file_has_alpha_slices) + { + // The alpha data should immediately follow the color data, and have the same resolution. + if ((slice_index + 1U) >= pHeader->m_total_slices) + { + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: alpha basis file has missing alpha slice\n"); + // basis file is missing the alpha slice + return false; + } + + // Basic sanity checks + if ((pSlice_descs[slice_index + 1].m_flags & cSliceDescFlagsHasAlpha) == 0) + { + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: alpha basis file has missing alpha slice (flag check)\n"); + // This slice should have alpha data + return false; + } + + if ((pSlice_descs[slice_index].m_num_blocks_x != pSlice_descs[slice_index + 1].m_num_blocks_x) || (pSlice_descs[slice_index].m_num_blocks_y != pSlice_descs[slice_index + 1].m_num_blocks_y)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: alpha basis file slice dimensions bad\n"); + // Alpha slice should have been the same res as the color slice + return false; + } + } + } + + bool status = false; + + if ((pHeader->m_tex_format == (int)basis_tex_format::cETC1S) || (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4)) + { + // Only do this on 4x4 LDR formats that supports transcoding to PVRTC1. + const uint32_t total_slice_blocks = pSlice_descs[slice_index].m_num_blocks_x * pSlice_descs[slice_index].m_num_blocks_y; + + if (((fmt == transcoder_texture_format::cTFPVRTC1_4_RGB) || (fmt == transcoder_texture_format::cTFPVRTC1_4_RGBA)) && (output_blocks_buf_size_in_blocks_or_pixels > total_slice_blocks)) + { + // The transcoder doesn't write beyond total_slice_blocks, so we need to clear the rest ourselves. + // For GL usage, PVRTC1 4bpp image size is (max(width, 8)* max(height, 8) * 4 + 7) / 8. + // However, for KTX and internally in Basis this formula isn't used, it's just ((width+3)/4) * ((height+3)/4) * bytes_per_block_or_pixel. This is all the transcoder actually writes to memory. + memset(static_cast(pOutput_blocks) + total_slice_blocks * bytes_per_block_or_pixel, 0, (output_blocks_buf_size_in_blocks_or_pixels - total_slice_blocks) * bytes_per_block_or_pixel); + } + } + + if (pHeader->m_tex_format == (int)basis_tex_format::cASTC_HDR_6x6) + { + const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index]; + + // Use the container independent image transcode method. + status = m_lowlevel_astc_6x6_hdr_decoder.transcode_image(fmt, + pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, + (const uint8_t*)pData, data_size, pSlice_desc->m_num_blocks_x, pSlice_desc->m_num_blocks_y, pSlice_desc->m_orig_width, pSlice_desc->m_orig_height, pSlice_desc->m_level_index, + pSlice_desc->m_file_ofs, pSlice_desc->m_file_size, + decode_flags, basis_file_has_alpha_slices, pHeader->m_tex_type == cBASISTexTypeVideoFrames, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels); + } + else if (pHeader->m_tex_format == (int)basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE) + { + const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index]; + + // Use the container independent image transcode method. + status = m_lowlevel_astc_6x6_hdr_intermediate_decoder.transcode_image(fmt, + pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, + (const uint8_t*)pData, data_size, pSlice_desc->m_num_blocks_x, pSlice_desc->m_num_blocks_y, pSlice_desc->m_orig_width, pSlice_desc->m_orig_height, pSlice_desc->m_level_index, + pSlice_desc->m_file_ofs, pSlice_desc->m_file_size, + decode_flags, basis_file_has_alpha_slices, pHeader->m_tex_type == cBASISTexTypeVideoFrames, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels); + } + else if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC_HDR_4x4) + { + const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index]; + + // Use the container independent image transcode method. + status = m_lowlevel_uastc_4x4_hdr_decoder.transcode_image(fmt, + pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, + (const uint8_t*)pData, data_size, pSlice_desc->m_num_blocks_x, pSlice_desc->m_num_blocks_y, pSlice_desc->m_orig_width, pSlice_desc->m_orig_height, pSlice_desc->m_level_index, + pSlice_desc->m_file_ofs, pSlice_desc->m_file_size, + decode_flags, basis_file_has_alpha_slices, pHeader->m_tex_type == cBASISTexTypeVideoFrames, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels); + } + else if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4) + { + const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index]; + + // Use the container independent image transcode method. + status = m_lowlevel_uastc_decoder.transcode_image(fmt, + pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, + (const uint8_t*)pData, data_size, pSlice_desc->m_num_blocks_x, pSlice_desc->m_num_blocks_y, pSlice_desc->m_orig_width, pSlice_desc->m_orig_height, pSlice_desc->m_level_index, + pSlice_desc->m_file_ofs, pSlice_desc->m_file_size, + decode_flags, basis_file_has_alpha_slices, pHeader->m_tex_type == cBASISTexTypeVideoFrames, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels); + } + else + { + // ETC1S + const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index]; + const basis_slice_desc* pAlpha_slice_desc = basis_file_has_alpha_slices ? &pSlice_descs[slice_index + 1] : nullptr; + + assert((pSlice_desc->m_flags & cSliceDescFlagsHasAlpha) == 0); + + if (pAlpha_slice_desc) + { + // Basic sanity checks + assert((pAlpha_slice_desc->m_flags & cSliceDescFlagsHasAlpha) != 0); + assert(pSlice_desc->m_num_blocks_x == pAlpha_slice_desc->m_num_blocks_x); + assert(pSlice_desc->m_num_blocks_y == pAlpha_slice_desc->m_num_blocks_y); + assert(pSlice_desc->m_level_index == pAlpha_slice_desc->m_level_index); + } + + // Use the container independent image transcode method. + status = m_lowlevel_etc1s_decoder.transcode_image(fmt, + pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, + (const uint8_t *)pData, data_size, pSlice_desc->m_num_blocks_x, pSlice_desc->m_num_blocks_y, pSlice_desc->m_orig_width, pSlice_desc->m_orig_height, pSlice_desc->m_level_index, + pSlice_desc->m_file_ofs, pSlice_desc->m_file_size, + (pAlpha_slice_desc != nullptr) ? (uint32_t)pAlpha_slice_desc->m_file_ofs : 0U, (pAlpha_slice_desc != nullptr) ? (uint32_t)pAlpha_slice_desc->m_file_size : 0U, + decode_flags, basis_file_has_alpha_slices, pHeader->m_tex_type == cBASISTexTypeVideoFrames, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels); + + } // if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4) + + if (!status) + { + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: Returning false\n"); + } + else + { + //BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: Returning true\n"); + } + + return status; + } + + uint32_t basis_get_bytes_per_block_or_pixel(transcoder_texture_format fmt) + { + switch (fmt) + { + case transcoder_texture_format::cTFETC1_RGB: + case transcoder_texture_format::cTFBC1_RGB: + case transcoder_texture_format::cTFBC4_R: + case transcoder_texture_format::cTFPVRTC1_4_RGB: + case transcoder_texture_format::cTFPVRTC1_4_RGBA: + case transcoder_texture_format::cTFATC_RGB: + case transcoder_texture_format::cTFPVRTC2_4_RGB: + case transcoder_texture_format::cTFPVRTC2_4_RGBA: + case transcoder_texture_format::cTFETC2_EAC_R11: + return 8; + case transcoder_texture_format::cTFBC7_RGBA: + case transcoder_texture_format::cTFBC7_ALT: + case transcoder_texture_format::cTFBC6H: + case transcoder_texture_format::cTFETC2_RGBA: + case transcoder_texture_format::cTFBC3_RGBA: + case transcoder_texture_format::cTFBC5_RG: + case transcoder_texture_format::cTFASTC_4x4_RGBA: + case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA: + case transcoder_texture_format::cTFASTC_HDR_6x6_RGBA: + case transcoder_texture_format::cTFATC_RGBA: + case transcoder_texture_format::cTFFXT1_RGB: + case transcoder_texture_format::cTFETC2_EAC_RG11: + return 16; + case transcoder_texture_format::cTFRGBA32: + case transcoder_texture_format::cTFRGB_9E5: + return sizeof(uint32_t); + case transcoder_texture_format::cTFRGB565: + case transcoder_texture_format::cTFBGR565: + case transcoder_texture_format::cTFRGBA4444: + return sizeof(uint16_t); + case transcoder_texture_format::cTFRGB_HALF: + return sizeof(half_float) * 3; + case transcoder_texture_format::cTFRGBA_HALF: + return sizeof(half_float) * 4; + default: + assert(0); + BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n"); + break; + } + return 0; + } + + const char* basis_get_format_name(transcoder_texture_format fmt) + { + switch (fmt) + { + case transcoder_texture_format::cTFETC1_RGB: return "ETC1_RGB"; + case transcoder_texture_format::cTFBC1_RGB: return "BC1_RGB"; + case transcoder_texture_format::cTFBC4_R: return "BC4_R"; + case transcoder_texture_format::cTFPVRTC1_4_RGB: return "PVRTC1_4_RGB"; + case transcoder_texture_format::cTFPVRTC1_4_RGBA: return "PVRTC1_4_RGBA"; + case transcoder_texture_format::cTFBC7_RGBA: return "BC7_RGBA"; + case transcoder_texture_format::cTFBC7_ALT: return "BC7_RGBA"; + case transcoder_texture_format::cTFETC2_RGBA: return "ETC2_RGBA"; + case transcoder_texture_format::cTFBC3_RGBA: return "BC3_RGBA"; + case transcoder_texture_format::cTFBC5_RG: return "BC5_RG"; + case transcoder_texture_format::cTFASTC_4x4_RGBA: return "ASTC_RGBA"; + case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA: return "ASTC_HDR_4X4_RGBA"; + case transcoder_texture_format::cTFASTC_HDR_6x6_RGBA: return "ASTC_HDR_6X6_RGBA"; + case transcoder_texture_format::cTFATC_RGB: return "ATC_RGB"; + case transcoder_texture_format::cTFATC_RGBA: return "ATC_RGBA"; + case transcoder_texture_format::cTFRGBA32: return "RGBA32"; + case transcoder_texture_format::cTFRGB565: return "RGB565"; + case transcoder_texture_format::cTFBGR565: return "BGR565"; + case transcoder_texture_format::cTFRGBA4444: return "RGBA4444"; + case transcoder_texture_format::cTFRGBA_HALF: return "RGBA_HALF"; + case transcoder_texture_format::cTFRGB_9E5: return "RGB_9E5"; + case transcoder_texture_format::cTFRGB_HALF: return "RGB_HALF"; + case transcoder_texture_format::cTFFXT1_RGB: return "FXT1_RGB"; + case transcoder_texture_format::cTFPVRTC2_4_RGB: return "PVRTC2_4_RGB"; + case transcoder_texture_format::cTFPVRTC2_4_RGBA: return "PVRTC2_4_RGBA"; + case transcoder_texture_format::cTFETC2_EAC_R11: return "ETC2_EAC_R11"; + case transcoder_texture_format::cTFETC2_EAC_RG11: return "ETC2_EAC_RG11"; + case transcoder_texture_format::cTFBC6H: return "BC6H"; + default: + assert(0); + BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n"); + break; + } + return ""; + } + + const char* basis_get_block_format_name(block_format fmt) + { + switch (fmt) + { + case block_format::cETC1: return "ETC1"; + case block_format::cBC1: return "BC1"; + case block_format::cPVRTC1_4_RGB: return "PVRTC1_4_RGB"; + case block_format::cPVRTC1_4_RGBA: return "PVRTC1_4_RGBA"; + case block_format::cBC7: return "BC7"; + case block_format::cETC2_RGBA: return "ETC2_RGBA"; + case block_format::cBC3: return "BC3"; + case block_format::cASTC_4x4: return "ASTC_4x4"; + case block_format::cATC_RGB: return "ATC_RGB"; + case block_format::cRGBA32: return "RGBA32"; + case block_format::cRGB565: return "RGB565"; + case block_format::cBGR565: return "BGR565"; + case block_format::cRGBA4444: return "RGBA4444"; + case block_format::cRGBA_HALF: return "RGBA_HALF"; + case block_format::cRGB_HALF: return "RGB_HALF"; + case block_format::cRGB_9E5: return "RGB_9E5"; + case block_format::cUASTC_4x4: return "UASTC_4x4"; + case block_format::cUASTC_HDR_4x4: return "UASTC_HDR_4x4"; + case block_format::cBC6H: return "BC6H"; + case block_format::cASTC_HDR_4x4: return "ASTC_HDR_4x4"; + case block_format::cASTC_HDR_6x6: return "ASTC_HDR_6x6"; + case block_format::cFXT1_RGB: return "FXT1_RGB"; + case block_format::cPVRTC2_4_RGB: return "PVRTC2_4_RGB"; + case block_format::cPVRTC2_4_RGBA: return "PVRTC2_4_RGBA"; + case block_format::cETC2_EAC_R11: return "ETC2_EAC_R11"; + case block_format::cETC2_EAC_RG11: return "ETC2_EAC_RG11"; + default: + assert(0); + BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n"); + break; + } + return ""; + } + + const char* basis_get_texture_type_name(basis_texture_type tex_type) + { + switch (tex_type) + { + case cBASISTexType2D: return "2D"; + case cBASISTexType2DArray: return "2D array"; + case cBASISTexTypeCubemapArray: return "cubemap array"; + case cBASISTexTypeVideoFrames: return "video"; + case cBASISTexTypeVolume: return "3D"; + default: + assert(0); + BASISU_DEVEL_ERROR("basis_get_texture_type_name: Invalid tex_type\n"); + break; + } + return ""; + } + + bool basis_transcoder_format_has_alpha(transcoder_texture_format fmt) + { + // TODO: Technically ASTC HDR does support alpha, but our ASTC HDR encoders don't yet support it. Unsure what to do here. + switch (fmt) + { + case transcoder_texture_format::cTFETC2_RGBA: + case transcoder_texture_format::cTFBC3_RGBA: + case transcoder_texture_format::cTFASTC_4x4_RGBA: + case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA: // technically this ASTC HDR format supports alpha, but we currently don't exploit that in our encoders + case transcoder_texture_format::cTFASTC_HDR_6x6_RGBA: // technically this ASTC HDR format supports alpha, but we currently don't exploit that in our encoders + case transcoder_texture_format::cTFBC7_RGBA: + case transcoder_texture_format::cTFBC7_ALT: + case transcoder_texture_format::cTFPVRTC1_4_RGBA: + case transcoder_texture_format::cTFPVRTC2_4_RGBA: + case transcoder_texture_format::cTFATC_RGBA: + case transcoder_texture_format::cTFRGBA32: + case transcoder_texture_format::cTFRGBA4444: + case transcoder_texture_format::cTFRGBA_HALF: + return true; + default: + break; + } + return false; + } + + bool basis_transcoder_format_is_hdr(transcoder_texture_format fmt) + { + switch (fmt) + { + case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA: + case transcoder_texture_format::cTFASTC_HDR_6x6_RGBA: + case transcoder_texture_format::cTFBC6H: + case transcoder_texture_format::cTFRGBA_HALF: + case transcoder_texture_format::cTFRGB_HALF: + case transcoder_texture_format::cTFRGB_9E5: + return true; + default: + break; + } + return false; + } + + basisu::texture_format basis_get_basisu_texture_format(transcoder_texture_format fmt) + { + switch (fmt) + { + case transcoder_texture_format::cTFETC1_RGB: return basisu::texture_format::cETC1; + case transcoder_texture_format::cTFBC1_RGB: return basisu::texture_format::cBC1; + case transcoder_texture_format::cTFBC4_R: return basisu::texture_format::cBC4; + case transcoder_texture_format::cTFPVRTC1_4_RGB: return basisu::texture_format::cPVRTC1_4_RGB; + case transcoder_texture_format::cTFPVRTC1_4_RGBA: return basisu::texture_format::cPVRTC1_4_RGBA; + case transcoder_texture_format::cTFBC7_RGBA: return basisu::texture_format::cBC7; + case transcoder_texture_format::cTFBC7_ALT: return basisu::texture_format::cBC7; + case transcoder_texture_format::cTFETC2_RGBA: return basisu::texture_format::cETC2_RGBA; + case transcoder_texture_format::cTFBC3_RGBA: return basisu::texture_format::cBC3; + case transcoder_texture_format::cTFBC5_RG: return basisu::texture_format::cBC5; + case transcoder_texture_format::cTFASTC_4x4_RGBA: return basisu::texture_format::cASTC_LDR_4x4; + case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA: return basisu::texture_format::cASTC_HDR_4x4; + case transcoder_texture_format::cTFASTC_HDR_6x6_RGBA: return basisu::texture_format::cASTC_HDR_6x6; + case transcoder_texture_format::cTFBC6H: return basisu::texture_format::cBC6HUnsigned; + case transcoder_texture_format::cTFATC_RGB: return basisu::texture_format::cATC_RGB; + case transcoder_texture_format::cTFATC_RGBA: return basisu::texture_format::cATC_RGBA_INTERPOLATED_ALPHA; + case transcoder_texture_format::cTFRGBA32: return basisu::texture_format::cRGBA32; + case transcoder_texture_format::cTFRGB565: return basisu::texture_format::cRGB565; + case transcoder_texture_format::cTFBGR565: return basisu::texture_format::cBGR565; + case transcoder_texture_format::cTFRGBA4444: return basisu::texture_format::cRGBA4444; + case transcoder_texture_format::cTFRGBA_HALF: return basisu::texture_format::cRGBA_HALF; + case transcoder_texture_format::cTFRGB_9E5: return basisu::texture_format::cRGB_9E5; + case transcoder_texture_format::cTFRGB_HALF: return basisu::texture_format::cRGB_HALF; + case transcoder_texture_format::cTFFXT1_RGB: return basisu::texture_format::cFXT1_RGB; + case transcoder_texture_format::cTFPVRTC2_4_RGB: return basisu::texture_format::cPVRTC2_4_RGBA; + case transcoder_texture_format::cTFPVRTC2_4_RGBA: return basisu::texture_format::cPVRTC2_4_RGBA; + case transcoder_texture_format::cTFETC2_EAC_R11: return basisu::texture_format::cETC2_R11_EAC; + case transcoder_texture_format::cTFETC2_EAC_RG11: return basisu::texture_format::cETC2_RG11_EAC; + default: + assert(0); + BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n"); + break; + } + return basisu::texture_format::cInvalidTextureFormat; + } + + bool basis_transcoder_format_is_uncompressed(transcoder_texture_format tex_type) + { + switch (tex_type) + { + case transcoder_texture_format::cTFRGBA32: + case transcoder_texture_format::cTFRGB565: + case transcoder_texture_format::cTFBGR565: + case transcoder_texture_format::cTFRGBA4444: + case transcoder_texture_format::cTFRGB_HALF: + case transcoder_texture_format::cTFRGBA_HALF: + case transcoder_texture_format::cTFRGB_9E5: + return true; + default: + break; + } + return false; + } + + bool basis_block_format_is_uncompressed(block_format blk_fmt) + { + switch (blk_fmt) + { + case block_format::cRGB32: + case block_format::cRGBA32: + case block_format::cA32: + case block_format::cRGB565: + case block_format::cBGR565: + case block_format::cRGBA4444: + case block_format::cRGBA4444_COLOR: + case block_format::cRGBA4444_ALPHA: + case block_format::cRGBA4444_COLOR_OPAQUE: + case block_format::cRGBA_HALF: + case block_format::cRGB_HALF: + case block_format::cRGB_9E5: + return true; + default: + break; + } + return false; + } + + uint32_t basis_get_uncompressed_bytes_per_pixel(transcoder_texture_format fmt) + { + switch (fmt) + { + case transcoder_texture_format::cTFRGBA32: + case transcoder_texture_format::cTFRGB_9E5: + return sizeof(uint32_t); + case transcoder_texture_format::cTFRGB565: + case transcoder_texture_format::cTFBGR565: + case transcoder_texture_format::cTFRGBA4444: + return sizeof(uint16_t); + case transcoder_texture_format::cTFRGB_HALF: + return sizeof(half_float) * 3; + case transcoder_texture_format::cTFRGBA_HALF: + return sizeof(half_float) * 4; + default: + break; + } + return 0; + } + + uint32_t basis_get_block_width(transcoder_texture_format tex_type) + { + switch (tex_type) + { + case transcoder_texture_format::cTFFXT1_RGB: + return 8; + case transcoder_texture_format::cTFASTC_HDR_6x6_RGBA: + return 6; + default: + break; + } + return 4; + } + + uint32_t basis_get_block_height(transcoder_texture_format tex_type) + { + switch (tex_type) + { + case transcoder_texture_format::cTFASTC_HDR_6x6_RGBA: + return 6; + default: + break; + } + return 4; + } + + uint32_t basis_tex_format_get_block_width(basis_tex_format fmt) + { + switch (fmt) + { + case basis_tex_format::cASTC_HDR_6x6: + case basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE: + return 6; + default: + break; + } + return 4; + } + + uint32_t basis_tex_format_get_block_height(basis_tex_format fmt) + { + switch (fmt) + { + case basis_tex_format::cASTC_HDR_6x6: + case basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE: + return 6; + default: + break; + } + return 4; + } + + bool basis_tex_format_is_hdr(basis_tex_format fmt) + { + switch (fmt) + { + case basis_tex_format::cUASTC_HDR_4x4: + case basis_tex_format::cASTC_HDR_6x6: + case basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE: + return true; + default: + break; + } + return false; + } + + bool basis_is_format_supported(transcoder_texture_format tex_type, basis_tex_format fmt) + { + if ((fmt == basis_tex_format::cASTC_HDR_6x6) || (fmt == basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE)) + { + // RDO UASTC HDR 6x6, or our custom intermediate format +#if BASISD_SUPPORT_UASTC_HDR + switch (tex_type) + { + case transcoder_texture_format::cTFASTC_HDR_6x6_RGBA: + case transcoder_texture_format::cTFBC6H: + case transcoder_texture_format::cTFRGBA_HALF: + case transcoder_texture_format::cTFRGB_HALF: + case transcoder_texture_format::cTFRGB_9E5: + return true; + default: + break; + } +#endif + } + else if (fmt == basis_tex_format::cUASTC_HDR_4x4) + { + // UASTC HDR 4x4 +#if BASISD_SUPPORT_UASTC_HDR + switch (tex_type) + { + case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA: + case transcoder_texture_format::cTFBC6H: + case transcoder_texture_format::cTFRGBA_HALF: + case transcoder_texture_format::cTFRGB_HALF: + case transcoder_texture_format::cTFRGB_9E5: + return true; + default: + break; + } +#endif + } + else if (fmt == basis_tex_format::cUASTC4x4) + { + // UASTC LDR 4x4 +#if BASISD_SUPPORT_UASTC + switch (tex_type) + { + // These niche formats aren't currently supported for UASTC - everything else is. + case transcoder_texture_format::cTFPVRTC2_4_RGB: + case transcoder_texture_format::cTFPVRTC2_4_RGBA: + case transcoder_texture_format::cTFATC_RGB: + case transcoder_texture_format::cTFATC_RGBA: + case transcoder_texture_format::cTFFXT1_RGB: + // UASTC LDR doesn't support transcoding to HDR formats + case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA: + case transcoder_texture_format::cTFASTC_HDR_6x6_RGBA: + case transcoder_texture_format::cTFBC6H: + case transcoder_texture_format::cTFRGBA_HALF: + case transcoder_texture_format::cTFRGB_HALF: + case transcoder_texture_format::cTFRGB_9E5: + return false; + default: + return true; + } +#endif + } + else + { + // ETC1S + switch (tex_type) + { + // ETC1 and uncompressed are always supported. + case transcoder_texture_format::cTFETC1_RGB: + case transcoder_texture_format::cTFRGBA32: + case transcoder_texture_format::cTFRGB565: + case transcoder_texture_format::cTFBGR565: + case transcoder_texture_format::cTFRGBA4444: + return true; +#if BASISD_SUPPORT_DXT1 + case transcoder_texture_format::cTFBC1_RGB: + return true; +#endif +#if BASISD_SUPPORT_DXT5A + case transcoder_texture_format::cTFBC4_R: + case transcoder_texture_format::cTFBC5_RG: + return true; +#endif +#if BASISD_SUPPORT_DXT1 && BASISD_SUPPORT_DXT5A + case transcoder_texture_format::cTFBC3_RGBA: + return true; +#endif +#if BASISD_SUPPORT_PVRTC1 + case transcoder_texture_format::cTFPVRTC1_4_RGB: + case transcoder_texture_format::cTFPVRTC1_4_RGBA: + return true; +#endif +#if BASISD_SUPPORT_BC7_MODE5 + case transcoder_texture_format::cTFBC7_RGBA: + case transcoder_texture_format::cTFBC7_ALT: + return true; +#endif +#if BASISD_SUPPORT_ETC2_EAC_A8 + case transcoder_texture_format::cTFETC2_RGBA: + return true; +#endif +#if BASISD_SUPPORT_ASTC + case transcoder_texture_format::cTFASTC_4x4_RGBA: + return true; +#endif +#if BASISD_SUPPORT_ATC + case transcoder_texture_format::cTFATC_RGB: + case transcoder_texture_format::cTFATC_RGBA: + return true; +#endif +#if BASISD_SUPPORT_FXT1 + case transcoder_texture_format::cTFFXT1_RGB: + return true; +#endif +#if BASISD_SUPPORT_PVRTC2 + case transcoder_texture_format::cTFPVRTC2_4_RGB: + case transcoder_texture_format::cTFPVRTC2_4_RGBA: + return true; +#endif +#if BASISD_SUPPORT_ETC2_EAC_RG11 + case transcoder_texture_format::cTFETC2_EAC_R11: + case transcoder_texture_format::cTFETC2_EAC_RG11: + return true; +#endif + default: + break; + } + } + + return false; + } + + // ------------------------------------------------------------------------------------------------------ + // UASTC LDR 4x4 + // ------------------------------------------------------------------------------------------------------ + +#if BASISD_SUPPORT_UASTC + const astc_bc7_common_partition2_desc g_astc_bc7_common_partitions2[TOTAL_ASTC_BC7_COMMON_PARTITIONS2] = + { + { 0, 28, false }, { 1, 20, false }, { 2, 16, true }, { 3, 29, false }, + { 4, 91, true }, { 5, 9, false }, { 6, 107, true }, { 7, 72, true }, + { 8, 149, false }, { 9, 204, true }, { 10, 50, false }, { 11, 114, true }, + { 12, 496, true }, { 13, 17, true }, { 14, 78, false }, { 15, 39, true }, + { 17, 252, true }, { 18, 828, true }, { 19, 43, false }, { 20, 156, false }, + { 21, 116, false }, { 22, 210, true }, { 23, 476, true }, { 24, 273, false }, + { 25, 684, true }, { 26, 359, false }, { 29, 246, true }, { 32, 195, true }, + { 33, 694, true }, { 52, 524, true } + }; + + const bc73_astc2_common_partition_desc g_bc7_3_astc2_common_partitions[TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS] = + { + { 10, 36, 4 }, { 11, 48, 4 }, { 0, 61, 3 }, { 2, 137, 4 }, + { 8, 161, 5 }, { 13, 183, 4 }, { 1, 226, 2 }, { 33, 281, 2 }, + { 40, 302, 3 }, { 20, 307, 4 }, { 21, 479, 0 }, { 58, 495, 3 }, + { 3, 593, 0 }, { 32, 594, 2 }, { 59, 605, 1 }, { 34, 799, 3 }, + { 20, 812, 1 }, { 14, 988, 4 }, { 31, 993, 3 } + }; + + const astc_bc7_common_partition3_desc g_astc_bc7_common_partitions3[TOTAL_ASTC_BC7_COMMON_PARTITIONS3] = + { + { 4, 260, 0 }, { 8, 74, 5 }, { 9, 32, 5 }, { 10, 156, 2 }, + { 11, 183, 2 }, { 12, 15, 0 }, { 13, 745, 4 }, { 20, 0, 1 }, + { 35, 335, 1 }, { 36, 902, 5 }, { 57, 254, 0 } + }; + + const uint8_t g_astc_to_bc7_partition_index_perm_tables[6][3] = { { 0, 1, 2 }, { 1, 2, 0 }, { 2, 0, 1 }, { 2, 1, 0 }, { 0, 2, 1 }, { 1, 0, 2 } }; + + const uint8_t g_bc7_to_astc_partition_index_perm_tables[6][3] = { { 0, 1, 2 }, { 2, 0, 1 }, { 1, 2, 0 }, { 2, 1, 0 }, { 0, 2, 1 }, { 1, 0, 2 } }; + + uint32_t bc7_convert_partition_index_3_to_2(uint32_t p, uint32_t k) + { + assert(k < 6); + switch (k >> 1) + { + case 0: + if (p <= 1) + p = 0; + else + p = 1; + break; + case 1: + if (p == 0) + p = 0; + else + p = 1; + break; + case 2: + if ((p == 0) || (p == 2)) + p = 0; + else + p = 1; + break; + } + if (k & 1) + p = 1 - p; + return p; + } + + static const uint8_t g_zero_pattern[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + + const uint8_t g_astc_bc7_patterns2[TOTAL_ASTC_BC7_COMMON_PARTITIONS2][16] = + { + { 0,0,1,1,0,0,1,1,0,0,1,1,0,0,1,1 }, { 0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1 }, { 1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0 }, { 0,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1 }, + { 1,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0 }, { 0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1 }, { 1,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0 }, { 1,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0 }, + { 0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1 }, { 1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0 }, { 0,0,0,0,0,0,0,1,0,1,1,1,1,1,1,1 }, { 1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0 }, + { 1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0 }, { 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0 }, { 0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1 }, { 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0 }, + { 1,0,0,0,1,1,1,0,1,1,1,1,1,1,1,1 }, { 1,1,1,1,1,1,1,1,0,1,1,1,0,0,0,1 }, { 0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0 }, { 0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0 }, + { 0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,0 }, { 1,1,1,1,1,1,1,1,0,1,1,1,0,0,1,1 }, { 1,0,0,0,1,1,0,0,1,1,0,0,1,1,1,0 }, { 0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0 }, + { 1,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1 }, { 0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0 }, { 1,1,1,1,0,0,0,0,0,0,0,0,1,1,1,1 }, { 1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0 }, + { 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0 }, { 1,0,0,1,0,0,1,1,0,1,1,0,1,1,0,0 } + }; + + const uint8_t g_astc_bc7_patterns3[TOTAL_ASTC_BC7_COMMON_PARTITIONS3][16] = + { + { 0,0,0,0,0,0,0,0,1,1,2,2,1,1,2,2 }, { 1,1,1,1,1,1,1,1,0,0,0,0,2,2,2,2 }, { 1,1,1,1,0,0,0,0,0,0,0,0,2,2,2,2 }, { 1,1,1,1,2,2,2,2,0,0,0,0,0,0,0,0 }, + { 1,1,2,0,1,1,2,0,1,1,2,0,1,1,2,0 }, { 0,1,1,2,0,1,1,2,0,1,1,2,0,1,1,2 }, { 0,2,1,1,0,2,1,1,0,2,1,1,0,2,1,1 }, { 2,0,0,0,2,0,0,0,2,1,1,1,2,1,1,1 }, + { 2,0,1,2,2,0,1,2,2,0,1,2,2,0,1,2 }, { 1,1,1,1,0,0,0,0,2,2,2,2,1,1,1,1 }, { 0,0,2,2,0,0,1,1,0,0,1,1,0,0,2,2 } + }; + + const uint8_t g_bc7_3_astc2_patterns2[TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS][16] = + { + { 0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0 }, { 0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0 }, { 1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0 }, { 0,0,0,0,0,0,0,1,0,0,1,1,0,0,1,1 }, + { 1,1,1,1,1,1,1,1,0,0,0,0,1,1,1,1 }, { 0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0 }, { 0,0,0,1,0,0,1,1,1,1,1,1,1,1,1,1 }, { 0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1 }, + { 1,1,0,0,0,0,0,0,0,0,1,1,1,1,0,0 }, { 0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0 }, { 0,0,0,0,0,0,0,0,1,1,1,0,1,1,1,0 }, { 1,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0 }, + { 0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0 }, { 0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1 }, { 1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0 }, { 1,1,0,0,1,1,0,0,1,1,0,0,1,0,0,0 }, + { 1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0 }, { 0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0 }, { 1,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0 } + }; + + const uint8_t g_astc_bc7_pattern2_anchors[TOTAL_ASTC_BC7_COMMON_PARTITIONS2][3] = + { + { 0, 2 }, { 0, 3 }, { 1, 0 }, { 0, 3 }, { 7, 0 }, { 0, 2 }, { 3, 0 }, { 7, 0 }, + { 0, 11 }, { 2, 0 }, { 0, 7 }, { 11, 0 }, { 3, 0 }, { 8, 0 }, { 0, 4 }, { 12, 0 }, + { 1, 0 }, { 8, 0 }, { 0, 1 }, { 0, 2 }, { 0, 4 }, { 8, 0 }, { 1, 0 }, { 0, 2 }, + { 4, 0 }, { 0, 1 }, { 4, 0 }, { 1, 0 }, { 4, 0 }, { 1, 0 } + }; + + const uint8_t g_astc_bc7_pattern3_anchors[TOTAL_ASTC_BC7_COMMON_PARTITIONS3][3] = + { + { 0, 8, 10 }, { 8, 0, 12 }, { 4, 0, 12 }, { 8, 0, 4 }, { 3, 0, 2 }, { 0, 1, 3 }, { 0, 2, 1 }, { 1, 9, 0 }, { 1, 2, 0 }, { 4, 0, 8 }, { 0, 6, 2 } + }; + + const uint8_t g_bc7_3_astc2_patterns2_anchors[TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS][3] = + { + { 0, 4 }, { 0, 2 }, { 2, 0 }, { 0, 7 }, { 8, 0 }, { 0, 1 }, { 0, 3 }, { 0, 1 }, { 2, 0 }, { 0, 1 }, { 0, 8 }, { 2, 0 }, { 0, 1 }, { 0, 7 }, { 12, 0 }, { 2, 0 }, { 9, 0 }, { 0, 2 }, { 4, 0 } + }; + + const uint32_t g_uastc_mode_huff_codes[TOTAL_UASTC_MODES + 1][2] = + { + { 0x1, 4 }, + { 0x35, 6 }, + { 0x1D, 5 }, + { 0x3, 5 }, + + { 0x13, 5 }, + { 0xB, 5 }, + { 0x1B, 5 }, + { 0x7, 5 }, + + { 0x17, 5 }, + { 0xF, 5 }, + { 0x2, 3 }, + { 0x0, 2 }, + + { 0x6, 3 }, + { 0x1F, 5 }, + { 0xD, 5 }, + { 0x5, 7 }, + + { 0x15, 6 }, + { 0x25, 6 }, + { 0x9, 4 }, + { 0x45, 7 } // future expansion + }; + + // If g_uastc_mode_huff_codes[] changes this table must be updated! + static const uint8_t g_uastc_huff_modes[128] = + { + 11,0,10,3,11,15,12,7,11,18,10,5,11,14,12,9,11,0,10,4,11,16,12,8,11,18,10,6,11,2,12,13,11,0,10,3,11,17,12,7,11,18,10,5,11,14,12,9,11,0,10,4,11,1,12,8,11,18,10,6,11,2,12,13,11,0,10,3,11, + 19,12,7,11,18,10,5,11,14,12,9,11,0,10,4,11,16,12,8,11,18,10,6,11,2,12,13,11,0,10,3,11,17,12,7,11,18,10,5,11,14,12,9,11,0,10,4,11,1,12,8,11,18,10,6,11,2,12,13 + }; + + const uint8_t g_uastc_mode_weight_bits[TOTAL_UASTC_MODES] = { 4, 2, 3, 2, 2, 3, 2, 2, 0, 2, 4, 2, 3, 1, 2, 4, 2, 2, 5 }; + const uint8_t g_uastc_mode_weight_ranges[TOTAL_UASTC_MODES] = { 8, 2, 5, 2, 2, 5, 2, 2, 0, 2, 8, 2, 5, 0, 2, 8, 2, 2, 11 }; + const uint8_t g_uastc_mode_endpoint_ranges[TOTAL_UASTC_MODES] = { 19, 20, 8, 7, 12, 20, 18, 12, 0, 8, 13, 13, 19, 20, 20, 20, 20, 20, 11 }; + const uint8_t g_uastc_mode_subsets[TOTAL_UASTC_MODES] = { 1, 1, 2, 3, 2, 1, 1, 2, 0, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1 }; + const uint8_t g_uastc_mode_planes[TOTAL_UASTC_MODES] = { 1, 1, 1, 1, 1, 1, 2, 1, 0, 1, 1, 2, 1, 2, 1, 1, 1, 2, 1 }; + const uint8_t g_uastc_mode_comps[TOTAL_UASTC_MODES] = { 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 3 }; + const uint8_t g_uastc_mode_has_etc1_bias[TOTAL_UASTC_MODES] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1 }; + const uint8_t g_uastc_mode_has_bc1_hint0[TOTAL_UASTC_MODES] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }; + const uint8_t g_uastc_mode_has_bc1_hint1[TOTAL_UASTC_MODES] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1 }; + const uint8_t g_uastc_mode_cem[TOTAL_UASTC_MODES] = { 8, 8, 8, 8, 8, 8, 8, 8, 0, 12, 12, 12, 12, 12, 12, 4, 4, 4, 8 }; + const uint8_t g_uastc_mode_has_alpha[TOTAL_UASTC_MODES] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 }; + const uint8_t g_uastc_mode_is_la[TOTAL_UASTC_MODES] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0 }; + const uint8_t g_uastc_mode_total_hint_bits[TOTAL_UASTC_MODES] = { 15, 15, 15, 15, 15, 15, 15, 15, 0, 23, 17, 17, 17, 23, 23, 23, 23, 23, 15 }; + + // bits, trits, quints + const int g_astc_bise_range_table[TOTAL_ASTC_RANGES][3] = + { + { 1, 0, 0 }, // 0-1 0 + { 0, 1, 0 }, // 0-2 1 + { 2, 0, 0 }, // 0-3 2 + { 0, 0, 1 }, // 0-4 3 + + { 1, 1, 0 }, // 0-5 4 + { 3, 0, 0 }, // 0-7 5 + { 1, 0, 1 }, // 0-9 6 + { 2, 1, 0 }, // 0-11 7 + + { 4, 0, 0 }, // 0-15 8 + { 2, 0, 1 }, // 0-19 9 + { 3, 1, 0 }, // 0-23 10 + { 5, 0, 0 }, // 0-31 11 + + { 3, 0, 1 }, // 0-39 12 + { 4, 1, 0 }, // 0-47 13 + { 6, 0, 0 }, // 0-63 14 + { 4, 0, 1 }, // 0-79 15 + + { 5, 1, 0 }, // 0-95 16 + { 7, 0, 0 }, // 0-127 17 + { 5, 0, 1 }, // 0-159 18 + { 6, 1, 0 }, // 0-191 19 + + { 8, 0, 0 }, // 0-255 20 + }; + + int astc_get_levels(int range) + { + assert(range < (int)BC7ENC_TOTAL_ASTC_RANGES); + return (1 + 2 * g_astc_bise_range_table[range][1] + 4 * g_astc_bise_range_table[range][2]) << g_astc_bise_range_table[range][0]; + } + + // g_astc_unquant[] is the inverse of g_astc_sorted_order_unquant[] + astc_quant_bin g_astc_unquant[BC7ENC_TOTAL_ASTC_RANGES][256]; // [ASTC encoded endpoint index] + + // Taken right from the ASTC spec. + static struct + { + const char* m_pB_str; + uint32_t m_c; + } g_astc_endpoint_unquant_params[BC7ENC_TOTAL_ASTC_RANGES] = + { + { "", 0 }, + { "", 0 }, + { "", 0 }, + { "", 0 }, + { "000000000", 204, }, // 0-5 + { "", 0 }, + { "000000000", 113, }, // 0-9 + { "b000b0bb0", 93 }, // 0-11 + { "", 0 }, + { "b0000bb00", 54 }, // 0-19 + { "cb000cbcb", 44 }, // 0-23 + { "", 0 }, + { "cb0000cbc", 26 }, // 0-39 + { "dcb000dcb", 22 }, // 0-47 + { "", 0 }, + { "dcb0000dc", 13 }, // 0-79 + { "edcb000ed", 11 }, // 0-95 + { "", 0 }, + { "edcb0000e", 6 }, // 0-159 + { "fedcb000f", 5 }, // 0-191 + { "", 0 }, + }; + + bool astc_is_valid_endpoint_range(uint32_t range) + { + if ((g_astc_bise_range_table[range][1] == 0) && (g_astc_bise_range_table[range][2] == 0)) + return true; + + return g_astc_endpoint_unquant_params[range].m_c != 0; + } + + uint32_t unquant_astc_endpoint(uint32_t packed_bits, uint32_t packed_trits, uint32_t packed_quints, uint32_t range) + { + assert(range < BC7ENC_TOTAL_ASTC_RANGES); + + const uint32_t bits = g_astc_bise_range_table[range][0]; + const uint32_t trits = g_astc_bise_range_table[range][1]; + const uint32_t quints = g_astc_bise_range_table[range][2]; + + uint32_t val = 0; + if ((!trits) && (!quints)) + { + assert(!packed_trits && !packed_quints); + + int bits_left = 8; + while (bits_left > 0) + { + uint32_t v = packed_bits; + + int n = basisu::minimumi(bits_left, bits); + if (n < (int)bits) + v >>= (bits - n); + + assert(v < (1U << n)); + + val |= (v << (bits_left - n)); + bits_left -= n; + } + } + else + { + const uint32_t A = (packed_bits & 1) ? 511 : 0; + const uint32_t C = g_astc_endpoint_unquant_params[range].m_c; + const uint32_t D = trits ? packed_trits : packed_quints; + + assert(C); + + uint32_t B = 0; + for (uint32_t i = 0; i < 9; i++) + { + B <<= 1; + + char c = g_astc_endpoint_unquant_params[range].m_pB_str[i]; + if (c != '0') + { + c -= 'a'; + B |= ((packed_bits >> c) & 1); + } + } + + val = D * C + B; + val = val ^ A; + val = (A & 0x80) | (val >> 2); + } + + return val; + } + + uint32_t unquant_astc_endpoint_val(uint32_t packed_val, uint32_t range) + { + assert(range < BC7ENC_TOTAL_ASTC_RANGES); + assert(packed_val < (uint32_t)astc_get_levels(range)); + + const uint32_t bits = g_astc_bise_range_table[range][0]; + const uint32_t trits = g_astc_bise_range_table[range][1]; + const uint32_t quints = g_astc_bise_range_table[range][2]; + + if ((!trits) && (!quints)) + return unquant_astc_endpoint(packed_val, 0, 0, range); + else if (trits) + return unquant_astc_endpoint(packed_val & ((1 << bits) - 1), packed_val >> bits, 0, range); + else + return unquant_astc_endpoint(packed_val & ((1 << bits) - 1), 0, packed_val >> bits, range); + } + + // BC7 - Various BC7 tables/helpers + const uint32_t g_bc7_weights1[2] = { 0, 64 }; + const uint32_t g_bc7_weights2[4] = { 0, 21, 43, 64 }; + const uint32_t g_bc7_weights3[8] = { 0, 9, 18, 27, 37, 46, 55, 64 }; + const uint32_t g_bc7_weights4[16] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 }; + const uint32_t g_astc_weights4[16] = { 0, 4, 8, 12, 17, 21, 25, 29, 35, 39, 43, 47, 52, 56, 60, 64 }; + const uint32_t g_astc_weights5[32] = { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64 }; + const uint32_t g_astc_weights_3levels[3] = { 0, 32, 64 }; + + const uint8_t g_bc7_partition1[16] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }; + + const uint8_t g_bc7_partition2[64 * 16] = + { + 0,0,1,1,0,0,1,1,0,0,1,1,0,0,1,1, 0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1, 0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1, 0,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1, 0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1, 0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1, 0,0,0,1,0,0,1,1,0,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,1, + 0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1, 0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,1,0,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1, 0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1, 0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1, + 0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,1, 0,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,0, 0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0, 0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0, 0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,0, 0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0, 0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1, + 0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0, 0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0, 0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0, 0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0, 0,0,0,1,0,1,1,1,1,1,1,0,1,0,0,0, 0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0, 0,1,1,1,0,0,0,1,1,0,0,0,1,1,1,0, 0,0,1,1,1,0,0,1,1,0,0,1,1,1,0,0, + 0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1, 0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1, 0,1,0,1,1,0,1,0,0,1,0,1,1,0,1,0, 0,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0, 0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,0, 0,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0, 0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1, 0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1, + 0,1,1,1,0,0,1,1,1,1,0,0,1,1,1,0, 0,0,0,1,0,0,1,1,1,1,0,0,1,0,0,0, 0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,0, 0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,0, 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0, 0,0,1,1,1,1,0,0,1,1,0,0,0,0,1,1, 0,1,1,0,0,1,1,0,1,0,0,1,1,0,0,1, 0,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0, + 0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0, 0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,0, 0,0,0,0,0,0,1,0,0,1,1,1,0,0,1,0, 0,0,0,0,0,1,0,0,1,1,1,0,0,1,0,0, 0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,1, 0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1, 0,1,1,0,0,0,1,1,1,0,0,1,1,1,0,0, 0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0, + 0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,1, 0,1,1,0,0,0,1,1,0,0,1,1,1,0,0,1, 0,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1, 0,0,0,1,1,0,0,0,1,1,1,0,0,1,1,1, 0,0,0,0,1,1,1,1,0,0,1,1,0,0,1,1, 0,0,1,1,0,0,1,1,1,1,1,1,0,0,0,0, 0,0,1,0,0,0,1,0,1,1,1,0,1,1,1,0, 0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,1 + }; + + const uint8_t g_bc7_partition3[64 * 16] = + { + 0,0,1,1,0,0,1,1,0,2,2,1,2,2,2,2, 0,0,0,1,0,0,1,1,2,2,1,1,2,2,2,1, 0,0,0,0,2,0,0,1,2,2,1,1,2,2,1,1, 0,2,2,2,0,0,2,2,0,0,1,1,0,1,1,1, 0,0,0,0,0,0,0,0,1,1,2,2,1,1,2,2, 0,0,1,1,0,0,1,1,0,0,2,2,0,0,2,2, 0,0,2,2,0,0,2,2,1,1,1,1,1,1,1,1, 0,0,1,1,0,0,1,1,2,2,1,1,2,2,1,1, + 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2, 0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2, 0,0,0,0,1,1,1,1,2,2,2,2,2,2,2,2, 0,0,1,2,0,0,1,2,0,0,1,2,0,0,1,2, 0,1,1,2,0,1,1,2,0,1,1,2,0,1,1,2, 0,1,2,2,0,1,2,2,0,1,2,2,0,1,2,2, 0,0,1,1,0,1,1,2,1,1,2,2,1,2,2,2, 0,0,1,1,2,0,0,1,2,2,0,0,2,2,2,0, + 0,0,0,1,0,0,1,1,0,1,1,2,1,1,2,2, 0,1,1,1,0,0,1,1,2,0,0,1,2,2,0,0, 0,0,0,0,1,1,2,2,1,1,2,2,1,1,2,2, 0,0,2,2,0,0,2,2,0,0,2,2,1,1,1,1, 0,1,1,1,0,1,1,1,0,2,2,2,0,2,2,2, 0,0,0,1,0,0,0,1,2,2,2,1,2,2,2,1, 0,0,0,0,0,0,1,1,0,1,2,2,0,1,2,2, 0,0,0,0,1,1,0,0,2,2,1,0,2,2,1,0, + 0,1,2,2,0,1,2,2,0,0,1,1,0,0,0,0, 0,0,1,2,0,0,1,2,1,1,2,2,2,2,2,2, 0,1,1,0,1,2,2,1,1,2,2,1,0,1,1,0, 0,0,0,0,0,1,1,0,1,2,2,1,1,2,2,1, 0,0,2,2,1,1,0,2,1,1,0,2,0,0,2,2, 0,1,1,0,0,1,1,0,2,0,0,2,2,2,2,2, 0,0,1,1,0,1,2,2,0,1,2,2,0,0,1,1, 0,0,0,0,2,0,0,0,2,2,1,1,2,2,2,1, + 0,0,0,0,0,0,0,2,1,1,2,2,1,2,2,2, 0,2,2,2,0,0,2,2,0,0,1,2,0,0,1,1, 0,0,1,1,0,0,1,2,0,0,2,2,0,2,2,2, 0,1,2,0,0,1,2,0,0,1,2,0,0,1,2,0, 0,0,0,0,1,1,1,1,2,2,2,2,0,0,0,0, 0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0, 0,1,2,0,2,0,1,2,1,2,0,1,0,1,2,0, 0,0,1,1,2,2,0,0,1,1,2,2,0,0,1,1, + 0,0,1,1,1,1,2,2,2,2,0,0,0,0,1,1, 0,1,0,1,0,1,0,1,2,2,2,2,2,2,2,2, 0,0,0,0,0,0,0,0,2,1,2,1,2,1,2,1, 0,0,2,2,1,1,2,2,0,0,2,2,1,1,2,2, 0,0,2,2,0,0,1,1,0,0,2,2,0,0,1,1, 0,2,2,0,1,2,2,1,0,2,2,0,1,2,2,1, 0,1,0,1,2,2,2,2,2,2,2,2,0,1,0,1, 0,0,0,0,2,1,2,1,2,1,2,1,2,1,2,1, + 0,1,0,1,0,1,0,1,0,1,0,1,2,2,2,2, 0,2,2,2,0,1,1,1,0,2,2,2,0,1,1,1, 0,0,0,2,1,1,1,2,0,0,0,2,1,1,1,2, 0,0,0,0,2,1,1,2,2,1,1,2,2,1,1,2, 0,2,2,2,0,1,1,1,0,1,1,1,0,2,2,2, 0,0,0,2,1,1,1,2,1,1,1,2,0,0,0,2, 0,1,1,0,0,1,1,0,0,1,1,0,2,2,2,2, 0,0,0,0,0,0,0,0,2,1,1,2,2,1,1,2, + 0,1,1,0,0,1,1,0,2,2,2,2,2,2,2,2, 0,0,2,2,0,0,1,1,0,0,1,1,0,0,2,2, 0,0,2,2,1,1,2,2,1,1,2,2,0,0,2,2, 0,0,0,0,0,0,0,0,0,0,0,0,2,1,1,2, 0,0,0,2,0,0,0,1,0,0,0,2,0,0,0,1, 0,2,2,2,1,2,2,2,0,2,2,2,1,2,2,2, 0,1,0,1,2,2,2,2,2,2,2,2,2,2,2,2, 0,1,1,1,2,0,1,1,2,2,0,1,2,2,2,0, + }; + + const uint8_t g_bc7_table_anchor_index_second_subset[64] = { 15,15,15,15,15,15,15,15, 15,15,15,15,15,15,15,15, 15, 2, 8, 2, 2, 8, 8,15, 2, 8, 2, 2, 8, 8, 2, 2, 15,15, 6, 8, 2, 8,15,15, 2, 8, 2, 2, 2,15,15, 6, 6, 2, 6, 8,15,15, 2, 2, 15,15,15,15,15, 2, 2,15 }; + + const uint8_t g_bc7_table_anchor_index_third_subset_1[64] = + { + 3, 3,15,15, 8, 3,15,15, 8, 8, 6, 6, 6, 5, 3, 3, 3, 3, 8,15, 3, 3, 6,10, 5, 8, 8, 6, 8, 5,15,15, 8,15, 3, 5, 6,10, 8,15, 15, 3,15, 5,15,15,15,15, 3,15, 5, 5, 5, 8, 5,10, 5,10, 8,13,15,12, 3, 3 + }; + + const uint8_t g_bc7_table_anchor_index_third_subset_2[64] = + { + 15, 8, 8, 3,15,15, 3, 8, 15,15,15,15,15,15,15, 8, 15, 8,15, 3,15, 8,15, 8, 3,15, 6,10,15,15,10, 8, 15, 3,15,10,10, 8, 9,10, 6,15, 8,15, 3, 6, 6, 8, 15, 3,15,15,15,15,15,15, 15,15,15,15, 3,15,15, 8 + }; + + const uint8_t g_bc7_num_subsets[8] = { 3, 2, 3, 2, 1, 1, 1, 2 }; + const uint8_t g_bc7_partition_bits[8] = { 4, 6, 6, 6, 0, 0, 0, 6 }; + const uint8_t g_bc7_color_index_bitcount[8] = { 3, 3, 2, 2, 2, 2, 4, 2 }; + + const uint8_t g_bc7_mode_has_p_bits[8] = { 1, 1, 0, 1, 0, 0, 1, 1 }; + const uint8_t g_bc7_mode_has_shared_p_bits[8] = { 0, 1, 0, 0, 0, 0, 0, 0 }; + const uint8_t g_bc7_color_precision_table[8] = { 4, 6, 5, 7, 5, 7, 7, 5 }; + const int8_t g_bc7_alpha_precision_table[8] = { 0, 0, 0, 0, 6, 8, 7, 5 }; + + const uint8_t g_bc7_alpha_index_bitcount[8] = { 0, 0, 0, 0, 3, 2, 4, 2 }; + + endpoint_err g_bc7_mode_6_optimal_endpoints[256][2]; // [c][pbit] + endpoint_err g_bc7_mode_5_optimal_endpoints[256]; // [c] + + static inline void bc7_set_block_bits(uint8_t* pBytes, uint32_t val, uint32_t num_bits, uint32_t* pCur_ofs) + { + assert((num_bits <= 32) && (val < (1ULL << num_bits))); + while (num_bits) + { + const uint32_t n = basisu::minimumu(8 - (*pCur_ofs & 7), num_bits); + pBytes[*pCur_ofs >> 3] |= (uint8_t)(val << (*pCur_ofs & 7)); + val >>= n; + num_bits -= n; + *pCur_ofs += n; + } + assert(*pCur_ofs <= 128); + } + + // TODO: Optimize this. + void encode_bc7_block(void* pBlock, const bc7_optimization_results* pResults) + { + const uint32_t best_mode = pResults->m_mode; + + const uint32_t total_subsets = g_bc7_num_subsets[best_mode]; + const uint32_t total_partitions = 1 << g_bc7_partition_bits[best_mode]; + //const uint32_t num_rotations = 1 << g_bc7_rotation_bits[best_mode]; + //const uint32_t num_index_selectors = (best_mode == 4) ? 2 : 1; + + const uint8_t* pPartition; + if (total_subsets == 1) + pPartition = &g_bc7_partition1[0]; + else if (total_subsets == 2) + pPartition = &g_bc7_partition2[pResults->m_partition * 16]; + else + pPartition = &g_bc7_partition3[pResults->m_partition * 16]; + + uint8_t color_selectors[16]; + memcpy(color_selectors, pResults->m_selectors, 16); + + uint8_t alpha_selectors[16]; + memcpy(alpha_selectors, pResults->m_alpha_selectors, 16); + + color_quad_u8 low[3], high[3]; + memcpy(low, pResults->m_low, sizeof(low)); + memcpy(high, pResults->m_high, sizeof(high)); + + uint32_t pbits[3][2]; + memcpy(pbits, pResults->m_pbits, sizeof(pbits)); + + int anchor[3] = { -1, -1, -1 }; + + for (uint32_t k = 0; k < total_subsets; k++) + { + uint32_t anchor_index = 0; + if (k) + { + if ((total_subsets == 3) && (k == 1)) + anchor_index = g_bc7_table_anchor_index_third_subset_1[pResults->m_partition]; + else if ((total_subsets == 3) && (k == 2)) + anchor_index = g_bc7_table_anchor_index_third_subset_2[pResults->m_partition]; + else + anchor_index = g_bc7_table_anchor_index_second_subset[pResults->m_partition]; + } + + anchor[k] = anchor_index; + + const uint32_t color_index_bits = get_bc7_color_index_size(best_mode, pResults->m_index_selector); + const uint32_t num_color_indices = 1 << color_index_bits; + + if (color_selectors[anchor_index] & (num_color_indices >> 1)) + { + for (uint32_t i = 0; i < 16; i++) + if (pPartition[i] == k) + color_selectors[i] = (uint8_t)((num_color_indices - 1) - color_selectors[i]); + + if (get_bc7_mode_has_seperate_alpha_selectors(best_mode)) + { + for (uint32_t q = 0; q < 3; q++) + { + uint8_t t = low[k].m_c[q]; + low[k].m_c[q] = high[k].m_c[q]; + high[k].m_c[q] = t; + } + } + else + { + color_quad_u8 tmp = low[k]; + low[k] = high[k]; + high[k] = tmp; + } + + if (!g_bc7_mode_has_shared_p_bits[best_mode]) + { + uint32_t t = pbits[k][0]; + pbits[k][0] = pbits[k][1]; + pbits[k][1] = t; + } + } + + if (get_bc7_mode_has_seperate_alpha_selectors(best_mode)) + { + const uint32_t alpha_index_bits = get_bc7_alpha_index_size(best_mode, pResults->m_index_selector); + const uint32_t num_alpha_indices = 1 << alpha_index_bits; + + if (alpha_selectors[anchor_index] & (num_alpha_indices >> 1)) + { + for (uint32_t i = 0; i < 16; i++) + if (pPartition[i] == k) + alpha_selectors[i] = (uint8_t)((num_alpha_indices - 1) - alpha_selectors[i]); + + uint8_t t = low[k].m_c[3]; + low[k].m_c[3] = high[k].m_c[3]; + high[k].m_c[3] = t; + } + } + } + + uint8_t* pBlock_bytes = (uint8_t*)(pBlock); + memset(pBlock_bytes, 0, BC7ENC_BLOCK_SIZE); + + uint32_t cur_bit_ofs = 0; + bc7_set_block_bits(pBlock_bytes, 1 << best_mode, best_mode + 1, &cur_bit_ofs); + + if ((best_mode == 4) || (best_mode == 5)) + bc7_set_block_bits(pBlock_bytes, pResults->m_rotation, 2, &cur_bit_ofs); + + if (best_mode == 4) + bc7_set_block_bits(pBlock_bytes, pResults->m_index_selector, 1, &cur_bit_ofs); + + if (total_partitions > 1) + bc7_set_block_bits(pBlock_bytes, pResults->m_partition, (total_partitions == 64) ? 6 : 4, &cur_bit_ofs); + + const uint32_t total_comps = (best_mode >= 4) ? 4 : 3; + for (uint32_t comp = 0; comp < total_comps; comp++) + { + for (uint32_t subset = 0; subset < total_subsets; subset++) + { + bc7_set_block_bits(pBlock_bytes, low[subset].m_c[comp], (comp == 3) ? g_bc7_alpha_precision_table[best_mode] : g_bc7_color_precision_table[best_mode], &cur_bit_ofs); + bc7_set_block_bits(pBlock_bytes, high[subset].m_c[comp], (comp == 3) ? g_bc7_alpha_precision_table[best_mode] : g_bc7_color_precision_table[best_mode], &cur_bit_ofs); + } + } + + if (g_bc7_mode_has_p_bits[best_mode]) + { + for (uint32_t subset = 0; subset < total_subsets; subset++) + { + bc7_set_block_bits(pBlock_bytes, pbits[subset][0], 1, &cur_bit_ofs); + if (!g_bc7_mode_has_shared_p_bits[best_mode]) + bc7_set_block_bits(pBlock_bytes, pbits[subset][1], 1, &cur_bit_ofs); + } + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + int idx = x + y * 4; + + uint32_t n = pResults->m_index_selector ? get_bc7_alpha_index_size(best_mode, pResults->m_index_selector) : get_bc7_color_index_size(best_mode, pResults->m_index_selector); + + if ((idx == anchor[0]) || (idx == anchor[1]) || (idx == anchor[2])) + n--; + + bc7_set_block_bits(pBlock_bytes, pResults->m_index_selector ? alpha_selectors[idx] : color_selectors[idx], n, &cur_bit_ofs); + } + } + + if (get_bc7_mode_has_seperate_alpha_selectors(best_mode)) + { + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + int idx = x + y * 4; + + uint32_t n = pResults->m_index_selector ? get_bc7_color_index_size(best_mode, pResults->m_index_selector) : get_bc7_alpha_index_size(best_mode, pResults->m_index_selector); + + if ((idx == anchor[0]) || (idx == anchor[1]) || (idx == anchor[2])) + n--; + + bc7_set_block_bits(pBlock_bytes, pResults->m_index_selector ? color_selectors[idx] : alpha_selectors[idx], n, &cur_bit_ofs); + } + } + } + + assert(cur_bit_ofs == 128); + } + + // ASTC + static inline void astc_set_bits_1_to_9(uint32_t* pDst, int& bit_offset, uint32_t code, uint32_t codesize) + { + uint8_t* pBuf = reinterpret_cast(pDst); + + assert(codesize <= 9); + if (codesize) + { + uint32_t byte_bit_offset = bit_offset & 7; + uint32_t val = code << byte_bit_offset; + + uint32_t index = bit_offset >> 3; + pBuf[index] |= (uint8_t)val; + + if (codesize > (8 - byte_bit_offset)) + pBuf[index + 1] |= (uint8_t)(val >> 8); + + bit_offset += codesize; + } + } + + void pack_astc_solid_block(void* pDst_block, const color32& color) + { + uint32_t r = color[0], g = color[1], b = color[2]; + uint32_t a = color[3]; + + uint32_t* pOutput = static_cast(pDst_block); + uint8_t* pBytes = reinterpret_cast(pDst_block); + + pBytes[0] = 0xfc; pBytes[1] = 0xfd; pBytes[2] = 0xff; pBytes[3] = 0xff; + + pOutput[1] = 0xffffffff; + pOutput[2] = 0; + pOutput[3] = 0; + + int bit_pos = 64; + astc_set_bits(reinterpret_cast(pDst_block), bit_pos, r | (r << 8), 16); + astc_set_bits(reinterpret_cast(pDst_block), bit_pos, g | (g << 8), 16); + astc_set_bits(reinterpret_cast(pDst_block), bit_pos, b | (b << 8), 16); + astc_set_bits(reinterpret_cast(pDst_block), bit_pos, a | (a << 8), 16); + } + + // See 23.21 https://www.khronos.org/registry/DataFormat/specs/1.3/dataformat.1.3.inline.html#_partition_pattern_generation +#ifdef _DEBUG + static inline uint32_t astc_hash52(uint32_t v) + { + uint32_t p = v; + p ^= p >> 15; p -= p << 17; p += p << 7; p += p << 4; + p ^= p >> 5; p += p << 16; p ^= p >> 7; p ^= p >> 3; + p ^= p << 6; p ^= p >> 17; + return p; + } + + int astc_compute_texel_partition(int seed, int x, int y, int z, int partitioncount, bool small_block) + { + if (small_block) + { + x <<= 1; y <<= 1; z <<= 1; + } + seed += (partitioncount - 1) * 1024; + uint32_t rnum = astc_hash52(seed); + uint8_t seed1 = rnum & 0xF; + uint8_t seed2 = (rnum >> 4) & 0xF; + uint8_t seed3 = (rnum >> 8) & 0xF; + uint8_t seed4 = (rnum >> 12) & 0xF; + uint8_t seed5 = (rnum >> 16) & 0xF; + uint8_t seed6 = (rnum >> 20) & 0xF; + uint8_t seed7 = (rnum >> 24) & 0xF; + uint8_t seed8 = (rnum >> 28) & 0xF; + uint8_t seed9 = (rnum >> 18) & 0xF; + uint8_t seed10 = (rnum >> 22) & 0xF; + uint8_t seed11 = (rnum >> 26) & 0xF; + uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF; + + seed1 *= seed1; seed2 *= seed2; + seed3 *= seed3; seed4 *= seed4; + seed5 *= seed5; seed6 *= seed6; + seed7 *= seed7; seed8 *= seed8; + seed9 *= seed9; seed10 *= seed10; + seed11 *= seed11; seed12 *= seed12; + + int sh1, sh2, sh3; + if (seed & 1) + { + sh1 = (seed & 2 ? 4 : 5); sh2 = (partitioncount == 3 ? 6 : 5); + } + else + { + sh1 = (partitioncount == 3 ? 6 : 5); sh2 = (seed & 2 ? 4 : 5); + } + sh3 = (seed & 0x10) ? sh1 : sh2; + + seed1 >>= sh1; seed2 >>= sh2; seed3 >>= sh1; seed4 >>= sh2; + seed5 >>= sh1; seed6 >>= sh2; seed7 >>= sh1; seed8 >>= sh2; + seed9 >>= sh3; seed10 >>= sh3; seed11 >>= sh3; seed12 >>= sh3; + + int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14); + int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10); + int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6); + int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2); + + a &= 0x3F; b &= 0x3F; c &= 0x3F; d &= 0x3F; + + if (partitioncount < 4) d = 0; + if (partitioncount < 3) c = 0; + + if (a >= b && a >= c && a >= d) + return 0; + else if (b >= c && b >= d) + return 1; + else if (c >= d) + return 2; + else + return 3; + } +#endif + + static const uint8_t g_astc_quint_encode[125] = + { + 0, 1, 2, 3, 4, 8, 9, 10, 11, 12, 16, 17, 18, 19, 20, 24, 25, 26, 27, 28, 5, 13, 21, 29, 6, 32, 33, 34, 35, 36, 40, 41, 42, 43, 44, 48, 49, 50, 51, 52, 56, 57, + 58, 59, 60, 37, 45, 53, 61, 14, 64, 65, 66, 67, 68, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 88, 89, 90, 91, 92, 69, 77, 85, 93, 22, 96, 97, 98, 99, 100, 104, + 105, 106, 107, 108, 112, 113, 114, 115, 116, 120, 121, 122, 123, 124, 101, 109, 117, 125, 30, 102, 103, 70, 71, 38, 110, 111, 78, 79, 46, 118, 119, 86, 87, 54, + 126, 127, 94, 95, 62, 39, 47, 55, 63, 31 + }; + + // Encodes 3 values to output, usable for any range that uses quints and bits + static inline void astc_encode_quints(uint32_t* pOutput, const uint8_t* pValues, int& bit_pos, int n) + { + // First extract the quints and the bits from the 3 input values + int quints = 0, bits[3]; + const uint32_t bit_mask = (1 << n) - 1; + for (int i = 0; i < 3; i++) + { + static const int s_muls[3] = { 1, 5, 25 }; + + const int t = pValues[i] >> n; + + quints += t * s_muls[i]; + bits[i] = pValues[i] & bit_mask; + } + + // Encode the quints, by inverting the bit manipulations done by the decoder, converting 3 quints into 7-bits. + // See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding + + assert(quints < 125); + const int T = g_astc_quint_encode[quints]; + + // Now interleave the 7 encoded quint bits with the bits to form the encoded output. See table 95-96. + astc_set_bits(pOutput, bit_pos, bits[0] | (astc_extract_bits(T, 0, 2) << n) | (bits[1] << (3 + n)) | (astc_extract_bits(T, 3, 4) << (3 + n * 2)) | + (bits[2] << (5 + n * 2)) | (astc_extract_bits(T, 5, 6) << (5 + n * 3)), 7 + n * 3); + } + + // Packs values using ASTC's BISE to output buffer. + static void astc_pack_bise(uint32_t* pDst, const uint8_t* pSrc_vals, int bit_pos, int num_vals, int range) + { + uint32_t temp[5] = { 0, 0, 0, 0, 0 }; + + const int num_bits = g_astc_bise_range_table[range][0]; + + int group_size = 0; + if (g_astc_bise_range_table[range][1]) + group_size = 5; + else if (g_astc_bise_range_table[range][2]) + group_size = 3; + + if (group_size) + { + // Range has trits or quints - pack each group of 5 or 3 values + const int total_groups = (group_size == 5) ? ((num_vals + 4) / 5) : ((num_vals + 2) / 3); + + for (int group_index = 0; group_index < total_groups; group_index++) + { + uint8_t vals[5] = { 0, 0, 0, 0, 0 }; + + const int limit = basisu::minimum(group_size, num_vals - group_index * group_size); + for (int i = 0; i < limit; i++) + vals[i] = pSrc_vals[group_index * group_size + i]; + + if (group_size == 5) + astc_encode_trits(temp, vals, bit_pos, num_bits); + else + astc_encode_quints(temp, vals, bit_pos, num_bits); + } + } + else + { + for (int i = 0; i < num_vals; i++) + astc_set_bits_1_to_9(temp, bit_pos, pSrc_vals[i], num_bits); + } + + pDst[0] |= temp[0]; pDst[1] |= temp[1]; + pDst[2] |= temp[2]; pDst[3] |= temp[3]; + } + + const uint32_t ASTC_BLOCK_MODE_BITS = 11; + const uint32_t ASTC_PART_BITS = 2; + const uint32_t ASTC_CEM_BITS = 4; + const uint32_t ASTC_PARTITION_INDEX_BITS = 10; + const uint32_t ASTC_CCS_BITS = 2; + + const uint32_t g_uastc_mode_astc_block_mode[TOTAL_UASTC_MODES] = { 0x242, 0x42, 0x53, 0x42, 0x42, 0x53, 0x442, 0x42, 0, 0x42, 0x242, 0x442, 0x53, 0x441, 0x42, 0x242, 0x42, 0x442, 0x253 }; + + bool pack_astc_block(uint32_t* pDst, const astc_block_desc* pBlock, uint32_t uastc_mode) + { + assert(uastc_mode < TOTAL_UASTC_MODES); + uint8_t* pDst_bytes = reinterpret_cast(pDst); + + const int total_weights = pBlock->m_dual_plane ? 32 : 16; + + // Set mode bits - see Table 146-147 + uint32_t mode = g_uastc_mode_astc_block_mode[uastc_mode]; + pDst_bytes[0] = (uint8_t)mode; + pDst_bytes[1] = (uint8_t)(mode >> 8); + + memset(pDst_bytes + 2, 0, 16 - 2); + + int bit_pos = ASTC_BLOCK_MODE_BITS; + + // We only support 1-5 bit weight indices + assert(!g_astc_bise_range_table[pBlock->m_weight_range][1] && !g_astc_bise_range_table[pBlock->m_weight_range][2]); + const int bits_per_weight = g_astc_bise_range_table[pBlock->m_weight_range][0]; + + // See table 143 - PART + astc_set_bits_1_to_9(pDst, bit_pos, pBlock->m_subsets - 1, ASTC_PART_BITS); + + if (pBlock->m_subsets == 1) + astc_set_bits_1_to_9(pDst, bit_pos, pBlock->m_cem, ASTC_CEM_BITS); + else + { + // See table 145 + astc_set_bits(pDst, bit_pos, pBlock->m_partition_seed, ASTC_PARTITION_INDEX_BITS); + + // Table 150 - we assume all CEM's are equal, so write 2 0's along with the CEM + astc_set_bits_1_to_9(pDst, bit_pos, (pBlock->m_cem << 2) & 63, ASTC_CEM_BITS + 2); + } + + if (pBlock->m_dual_plane) + { + const int total_weight_bits = total_weights * bits_per_weight; + + // See Illegal Encodings 23.24 + // https://www.khronos.org/registry/DataFormat/specs/1.3/dataformat.1.3.inline.html#_illegal_encodings + assert((total_weight_bits >= 24) && (total_weight_bits <= 96)); + + int ccs_bit_pos = 128 - total_weight_bits - ASTC_CCS_BITS; + astc_set_bits_1_to_9(pDst, ccs_bit_pos, pBlock->m_ccs, ASTC_CCS_BITS); + } + + const int num_cem_pairs = (1 + (pBlock->m_cem >> 2)) * pBlock->m_subsets; + assert(num_cem_pairs <= 9); + + astc_pack_bise(pDst, pBlock->m_endpoints, bit_pos, num_cem_pairs * 2, g_uastc_mode_endpoint_ranges[uastc_mode]); + + // Write the weight bits in reverse bit order. + switch (bits_per_weight) + { + case 1: + { + const uint32_t N = 1; + for (int i = 0; i < total_weights; i++) + { + const uint32_t ofs = 128 - N - i; + assert((ofs >> 3) < 16); + pDst_bytes[ofs >> 3] |= (pBlock->m_weights[i] << (ofs & 7)); + } + break; + } + case 2: + { + const uint32_t N = 2; + for (int i = 0; i < total_weights; i++) + { + static const uint8_t s_reverse_bits2[4] = { 0, 2, 1, 3 }; + const uint32_t ofs = 128 - N - (i * N); + assert((ofs >> 3) < 16); + pDst_bytes[ofs >> 3] |= (s_reverse_bits2[pBlock->m_weights[i]] << (ofs & 7)); + } + break; + } + case 3: + { + const uint32_t N = 3; + for (int i = 0; i < total_weights; i++) + { + static const uint8_t s_reverse_bits3[8] = { 0, 4, 2, 6, 1, 5, 3, 7 }; + + const uint32_t ofs = 128 - N - (i * N); + const uint32_t rev = s_reverse_bits3[pBlock->m_weights[i]] << (ofs & 7); + + uint32_t index = ofs >> 3; + assert(index < 16); + pDst_bytes[index++] |= rev & 0xFF; + if (index < 16) + pDst_bytes[index++] |= (rev >> 8); + } + break; + } + case 4: + { + const uint32_t N = 4; + for (int i = 0; i < total_weights; i++) + { + static const uint8_t s_reverse_bits4[16] = { 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15 }; + const int ofs = 128 - N - (i * N); + assert(ofs >= 0 && (ofs >> 3) < 16); + pDst_bytes[ofs >> 3] |= (s_reverse_bits4[pBlock->m_weights[i]] << (ofs & 7)); + } + break; + } + case 5: + { + const uint32_t N = 5; + for (int i = 0; i < total_weights; i++) + { + static const uint8_t s_reverse_bits5[32] = { 0, 16, 8, 24, 4, 20, 12, 28, 2, 18, 10, 26, 6, 22, 14, 30, 1, 17, 9, 25, 5, 21, 13, 29, 3, 19, 11, 27, 7, 23, 15, 31 }; + + const uint32_t ofs = 128 - N - (i * N); + const uint32_t rev = s_reverse_bits5[pBlock->m_weights[i]] << (ofs & 7); + + uint32_t index = ofs >> 3; + assert(index < 16); + pDst_bytes[index++] |= rev & 0xFF; + if (index < 16) + pDst_bytes[index++] |= (rev >> 8); + } + + break; + } + default: + assert(0); + break; + } + + return true; + } + + const uint8_t* get_anchor_indices(uint32_t subsets, uint32_t mode, uint32_t common_pattern, const uint8_t*& pPartition_pattern) + { + const uint8_t* pSubset_anchor_indices = g_zero_pattern; + pPartition_pattern = g_zero_pattern; + + if (subsets >= 2) + { + if (subsets == 3) + { + pPartition_pattern = &g_astc_bc7_patterns3[common_pattern][0]; + pSubset_anchor_indices = &g_astc_bc7_pattern3_anchors[common_pattern][0]; + } + else if (mode == 7) + { + pPartition_pattern = &g_bc7_3_astc2_patterns2[common_pattern][0]; + pSubset_anchor_indices = &g_bc7_3_astc2_patterns2_anchors[common_pattern][0]; + } + else + { + pPartition_pattern = &g_astc_bc7_patterns2[common_pattern][0]; + pSubset_anchor_indices = &g_astc_bc7_pattern2_anchors[common_pattern][0]; + } + } + + return pSubset_anchor_indices; + } + + static inline uint32_t read_bit(const uint8_t* pBuf, uint32_t& bit_offset) + { + uint32_t byte_bits = pBuf[bit_offset >> 3] >> (bit_offset & 7); + bit_offset += 1; + return byte_bits & 1; + } + + static inline uint32_t read_bits1_to_9(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize) + { + assert(codesize <= 9); + if (!codesize) + return 0; + + if ((BASISD_IS_BIG_ENDIAN) || (!BASISD_USE_UNALIGNED_WORD_READS) || (bit_offset >= 112)) + { + const uint8_t* pBytes = &pBuf[bit_offset >> 3U]; + + uint32_t byte_bit_offset = bit_offset & 7U; + + uint32_t bits = pBytes[0] >> byte_bit_offset; + uint32_t bits_read = basisu::minimum(codesize, 8 - byte_bit_offset); + + uint32_t bits_remaining = codesize - bits_read; + if (bits_remaining) + bits |= ((uint32_t)pBytes[1]) << bits_read; + + bit_offset += codesize; + + return bits & ((1U << codesize) - 1U); + } + + uint32_t byte_bit_offset = bit_offset & 7U; + const uint16_t w = *(const uint16_t *)(&pBuf[bit_offset >> 3U]); + bit_offset += codesize; + return (w >> byte_bit_offset) & ((1U << codesize) - 1U); + } + + inline uint64_t read_bits64(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize) + { + assert(codesize <= 64U); + uint64_t bits = 0; + uint32_t total_bits = 0; + + while (total_bits < codesize) + { + uint32_t byte_bit_offset = bit_offset & 7U; + uint32_t bits_to_read = basisu::minimum(codesize - total_bits, 8U - byte_bit_offset); + + uint32_t byte_bits = pBuf[bit_offset >> 3U] >> byte_bit_offset; + byte_bits &= ((1U << bits_to_read) - 1U); + + bits |= ((uint64_t)(byte_bits) << total_bits); + + total_bits += bits_to_read; + bit_offset += bits_to_read; + } + + return bits; + } + + static inline uint32_t read_bits1_to_9_fst(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize) + { + assert(codesize <= 9); + if (!codesize) + return 0; + assert(bit_offset < 112); + + if ((BASISD_IS_BIG_ENDIAN) || (!BASISD_USE_UNALIGNED_WORD_READS)) + { + const uint8_t* pBytes = &pBuf[bit_offset >> 3U]; + + uint32_t byte_bit_offset = bit_offset & 7U; + + uint32_t bits = pBytes[0] >> byte_bit_offset; + uint32_t bits_read = basisu::minimum(codesize, 8 - byte_bit_offset); + + uint32_t bits_remaining = codesize - bits_read; + if (bits_remaining) + bits |= ((uint32_t)pBytes[1]) << bits_read; + + bit_offset += codesize; + + return bits & ((1U << codesize) - 1U); + } + else + { + uint32_t byte_bit_offset = bit_offset & 7U; + const uint16_t w = *(const uint16_t*)(&pBuf[bit_offset >> 3U]); + bit_offset += codesize; + return (w >> byte_bit_offset) & ((1U << codesize) - 1U); + } + } + + bool unpack_uastc(const uastc_block& blk, unpacked_uastc_block& unpacked, bool blue_contract_check, bool read_hints) + { + //memset(&unpacked, 0, sizeof(unpacked)); + +#if 0 + uint8_t table[128]; + memset(table, 0xFF, sizeof(table)); + + { + for (uint32_t mode = 0; mode <= TOTAL_UASTC_MODES; mode++) + { + const uint32_t code = g_uastc_mode_huff_codes[mode][0]; + const uint32_t codesize = g_uastc_mode_huff_codes[mode][1]; + + table[code] = mode; + + uint32_t bits_left = 7 - codesize; + for (uint32_t i = 0; i < (1 << bits_left); i++) + table[code | (i << codesize)] = mode; + } + + for (uint32_t i = 0; i < 128; i++) + printf("%u,", table[i]); + exit(0); + } +#endif + + const int mode = g_uastc_huff_modes[blk.m_bytes[0] & 127]; + if (mode >= (int)TOTAL_UASTC_MODES) + return false; + + unpacked.m_mode = mode; + unpacked.m_common_pattern = 0; + + uint32_t bit_ofs = g_uastc_mode_huff_codes[mode][1]; + + if (mode == UASTC_MODE_INDEX_SOLID_COLOR) + { + unpacked.m_solid_color.r = (uint8_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 8); + unpacked.m_solid_color.g = (uint8_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 8); + unpacked.m_solid_color.b = (uint8_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 8); + unpacked.m_solid_color.a = (uint8_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 8); + + if (read_hints) + { + unpacked.m_etc1_flip = false; + unpacked.m_etc1_diff = read_bit(blk.m_bytes, bit_ofs) != 0; + unpacked.m_etc1_inten0 = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 3); + unpacked.m_etc1_inten1 = 0; + unpacked.m_etc1_selector = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 2); + unpacked.m_etc1_r = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 5); + unpacked.m_etc1_g = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 5); + unpacked.m_etc1_b = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 5); + unpacked.m_etc1_bias = 0; + unpacked.m_etc2_hints = 0; + } + + return true; + } + + if (read_hints) + { + if (g_uastc_mode_has_bc1_hint0[mode]) + unpacked.m_bc1_hint0 = read_bit(blk.m_bytes, bit_ofs) != 0; + else + unpacked.m_bc1_hint0 = false; + + if (g_uastc_mode_has_bc1_hint1[mode]) + unpacked.m_bc1_hint1 = read_bit(blk.m_bytes, bit_ofs) != 0; + else + unpacked.m_bc1_hint1 = false; + + unpacked.m_etc1_flip = read_bit(blk.m_bytes, bit_ofs) != 0; + unpacked.m_etc1_diff = read_bit(blk.m_bytes, bit_ofs) != 0; + unpacked.m_etc1_inten0 = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 3); + unpacked.m_etc1_inten1 = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 3); + + if (g_uastc_mode_has_etc1_bias[mode]) + unpacked.m_etc1_bias = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 5); + else + unpacked.m_etc1_bias = 0; + + if (g_uastc_mode_has_alpha[mode]) + { + unpacked.m_etc2_hints = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 8); + //assert(unpacked.m_etc2_hints > 0); + } + else + unpacked.m_etc2_hints = 0; + } + else + bit_ofs += g_uastc_mode_total_hint_bits[mode]; + + uint32_t subsets = 1; + switch (mode) + { + case 2: + case 4: + case 7: + case 9: + case 16: + unpacked.m_common_pattern = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 5); + subsets = 2; + break; + case 3: + unpacked.m_common_pattern = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 4); + subsets = 3; + break; + default: + break; + } + + uint32_t part_seed = 0; + switch (mode) + { + case 2: + case 4: + case 9: + case 16: + if (unpacked.m_common_pattern >= TOTAL_ASTC_BC7_COMMON_PARTITIONS2) + return false; + + part_seed = g_astc_bc7_common_partitions2[unpacked.m_common_pattern].m_astc; + break; + case 3: + if (unpacked.m_common_pattern >= TOTAL_ASTC_BC7_COMMON_PARTITIONS3) + return false; + + part_seed = g_astc_bc7_common_partitions3[unpacked.m_common_pattern].m_astc; + break; + case 7: + if (unpacked.m_common_pattern >= TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS) + return false; + + part_seed = g_bc7_3_astc2_common_partitions[unpacked.m_common_pattern].m_astc2; + break; + default: + break; + } + + uint32_t total_planes = 1; + switch (mode) + { + case 6: + case 11: + case 13: + unpacked.m_astc.m_ccs = (int)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 2); + total_planes = 2; + break; + case 17: + unpacked.m_astc.m_ccs = 3; + total_planes = 2; + break; + default: + break; + } + + unpacked.m_astc.m_dual_plane = (total_planes == 2); + + unpacked.m_astc.m_subsets = subsets; + unpacked.m_astc.m_partition_seed = part_seed; + + const uint32_t total_comps = g_uastc_mode_comps[mode]; + + const uint32_t weight_bits = g_uastc_mode_weight_bits[mode]; + + unpacked.m_astc.m_weight_range = g_uastc_mode_weight_ranges[mode]; + + const uint32_t total_values = total_comps * 2 * subsets; + const uint32_t endpoint_range = g_uastc_mode_endpoint_ranges[mode]; + + const uint32_t cem = g_uastc_mode_cem[mode]; + unpacked.m_astc.m_cem = cem; + + const uint32_t ep_bits = g_astc_bise_range_table[endpoint_range][0]; + const uint32_t ep_trits = g_astc_bise_range_table[endpoint_range][1]; + const uint32_t ep_quints = g_astc_bise_range_table[endpoint_range][2]; + + uint32_t total_tqs = 0; + uint32_t bundle_size = 0, mul = 0; + if (ep_trits) + { + total_tqs = (total_values + 4) / 5; + bundle_size = 5; + mul = 3; + } + else if (ep_quints) + { + total_tqs = (total_values + 2) / 3; + bundle_size = 3; + mul = 5; + } + + uint32_t tq_values[8]; + for (uint32_t i = 0; i < total_tqs; i++) + { + uint32_t num_bits = ep_trits ? 8 : 7; + if (i == (total_tqs - 1)) + { + uint32_t num_remaining = total_values - (total_tqs - 1) * bundle_size; + if (ep_trits) + { + switch (num_remaining) + { + case 1: num_bits = 2; break; + case 2: num_bits = 4; break; + case 3: num_bits = 5; break; + case 4: num_bits = 7; break; + default: break; + } + } + else if (ep_quints) + { + switch (num_remaining) + { + case 1: num_bits = 3; break; + case 2: num_bits = 5; break; + default: break; + } + } + } + + tq_values[i] = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, num_bits); + } // i + + uint32_t accum = 0; + uint32_t accum_remaining = 0; + uint32_t next_tq_index = 0; + + for (uint32_t i = 0; i < total_values; i++) + { + uint32_t value = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, ep_bits); + + if (total_tqs) + { + if (!accum_remaining) + { + assert(next_tq_index < total_tqs); + accum = tq_values[next_tq_index++]; + accum_remaining = bundle_size; + } + + // TODO: Optimize with tables + uint32_t v = accum % mul; + accum /= mul; + accum_remaining--; + + value |= (v << ep_bits); + } + + unpacked.m_astc.m_endpoints[i] = (uint8_t)value; + } + + const uint8_t* pPartition_pattern; + const uint8_t* pSubset_anchor_indices = get_anchor_indices(subsets, mode, unpacked.m_common_pattern, pPartition_pattern); + +#ifdef _DEBUG + for (uint32_t i = 0; i < 16; i++) + assert(pPartition_pattern[i] == astc_compute_texel_partition(part_seed, i & 3, i >> 2, 0, subsets, true)); + + for (uint32_t subset_index = 0; subset_index < subsets; subset_index++) + { + uint32_t anchor_index = 0; + + for (uint32_t i = 0; i < 16; i++) + { + if (pPartition_pattern[i] == subset_index) + { + anchor_index = i; + break; + } + } + + assert(pSubset_anchor_indices[subset_index] == anchor_index); + } +#endif + +#if 0 + const uint32_t total_planes_shift = total_planes - 1; + for (uint32_t i = 0; i < 16 * total_planes; i++) + { + uint32_t num_bits = weight_bits; + for (uint32_t s = 0; s < subsets; s++) + { + if (pSubset_anchor_indices[s] == (i >> total_planes_shift)) + { + num_bits--; + break; + } + } + + unpacked.m_astc.m_weights[i] = (uint8_t)read_bits1_to_9(blk.m_bytes, bit_ofs, num_bits); + } +#endif + + if (mode == 18) + { + // Mode 18 is the only mode with more than 64 weight bits. + for (uint32_t i = 0; i < 16; i++) + unpacked.m_astc.m_weights[i] = (uint8_t)read_bits1_to_9(blk.m_bytes, bit_ofs, i ? weight_bits : (weight_bits - 1)); + } + else + { + // All other modes have <= 64 weight bits. + uint64_t bits; + + // Read the weight bits + if ((BASISD_IS_BIG_ENDIAN) || (!BASISD_USE_UNALIGNED_WORD_READS)) + bits = read_bits64(blk.m_bytes, bit_ofs, basisu::minimum(64, 128 - (int)bit_ofs)); + else + { + bits = blk.m_dwords[2]; + bits |= (((uint64_t)blk.m_dwords[3]) << 32U); + + if (bit_ofs >= 64U) + bits >>= (bit_ofs - 64U); + else + { + assert(bit_ofs >= 56U); + + uint32_t bits_needed = 64U - bit_ofs; + bits <<= bits_needed; + bits |= (blk.m_bytes[7] >> (8U - bits_needed)); + } + } + + bit_ofs = 0; + + const uint32_t mask = (1U << weight_bits) - 1U; + const uint32_t anchor_mask = (1U << (weight_bits - 1U)) - 1U; + + if (total_planes == 2) + { + // Dual plane modes always have a single subset, and the first 2 weights are anchors. + + unpacked.m_astc.m_weights[0] = (uint8_t)((uint32_t)(bits >> bit_ofs) & anchor_mask); + bit_ofs += (weight_bits - 1); + + unpacked.m_astc.m_weights[1] = (uint8_t)((uint32_t)(bits >> bit_ofs) & anchor_mask); + bit_ofs += (weight_bits - 1); + + for (uint32_t i = 2; i < 32; i++) + { + unpacked.m_astc.m_weights[i] = (uint8_t)((uint32_t)(bits >> bit_ofs) & mask); + bit_ofs += weight_bits; + } + } + else + { + if (subsets == 1) + { + // Specialize the single subset case. + if (weight_bits == 4) + { + assert(bit_ofs == 0); + + // Specialize the most common case: 4-bit weights. + unpacked.m_astc.m_weights[0] = (uint8_t)((uint32_t)(bits) & 7); + unpacked.m_astc.m_weights[1] = (uint8_t)((uint32_t)(bits >> 3) & 15); + unpacked.m_astc.m_weights[2] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 1)) & 15); + unpacked.m_astc.m_weights[3] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 2)) & 15); + + unpacked.m_astc.m_weights[4] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 3)) & 15); + unpacked.m_astc.m_weights[5] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 4)) & 15); + unpacked.m_astc.m_weights[6] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 5)) & 15); + unpacked.m_astc.m_weights[7] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 6)) & 15); + + unpacked.m_astc.m_weights[8] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 7)) & 15); + unpacked.m_astc.m_weights[9] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 8)) & 15); + unpacked.m_astc.m_weights[10] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 9)) & 15); + unpacked.m_astc.m_weights[11] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 10)) & 15); + + unpacked.m_astc.m_weights[12] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 11)) & 15); + unpacked.m_astc.m_weights[13] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 12)) & 15); + unpacked.m_astc.m_weights[14] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 13)) & 15); + unpacked.m_astc.m_weights[15] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 14)) & 15); + } + else + { + // First weight is always an anchor. + unpacked.m_astc.m_weights[0] = (uint8_t)((uint32_t)(bits >> bit_ofs) & anchor_mask); + bit_ofs += (weight_bits - 1); + + for (uint32_t i = 1; i < 16; i++) + { + unpacked.m_astc.m_weights[i] = (uint8_t)((uint32_t)(bits >> bit_ofs) & mask); + bit_ofs += weight_bits; + } + } + } + else + { + const uint32_t a0 = pSubset_anchor_indices[0], a1 = pSubset_anchor_indices[1], a2 = pSubset_anchor_indices[2]; + + for (uint32_t i = 0; i < 16; i++) + { + if ((i == a0) || (i == a1) || (i == a2)) + { + unpacked.m_astc.m_weights[i] = (uint8_t)((uint32_t)(bits >> bit_ofs) & anchor_mask); + bit_ofs += (weight_bits - 1); + } + else + { + unpacked.m_astc.m_weights[i] = (uint8_t)((uint32_t)(bits >> bit_ofs) & mask); + bit_ofs += weight_bits; + } + } + } + } + } + + if ((blue_contract_check) && (total_comps >= 3)) + { + // We only need to disable ASTC Blue Contraction when we'll be packing to ASTC. The other transcoders don't care. + bool invert_subset[3] = { false, false, false }; + bool any_flag = false; + + for (uint32_t subset_index = 0; subset_index < subsets; subset_index++) + { + const int s0 = g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 0]].m_unquant + + g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 2]].m_unquant + + g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 4]].m_unquant; + + const int s1 = g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 1]].m_unquant + + g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 3]].m_unquant + + g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 5]].m_unquant; + + if (s1 < s0) + { + for (uint32_t c = 0; c < total_comps; c++) + std::swap(unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + c * 2 + 0], unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + c * 2 + 1]); + + invert_subset[subset_index] = true; + any_flag = true; + } + } + + if (any_flag) + { + const uint32_t weight_mask = (1 << weight_bits) - 1; + + for (uint32_t i = 0; i < 16; i++) + { + uint32_t subset = pPartition_pattern[i]; + + if (invert_subset[subset]) + { + unpacked.m_astc.m_weights[i * total_planes] = (uint8_t)(weight_mask - unpacked.m_astc.m_weights[i * total_planes]); + + if (total_planes == 2) + unpacked.m_astc.m_weights[i * total_planes + 1] = (uint8_t)(weight_mask - unpacked.m_astc.m_weights[i * total_planes + 1]); + } + } + } + } + + return true; + } + + static const uint32_t* g_astc_weight_tables[6] = { nullptr, g_bc7_weights1, g_bc7_weights2, g_bc7_weights3, g_astc_weights4, g_astc_weights5 }; + + bool unpack_uastc(uint32_t mode, uint32_t common_pattern, const color32& solid_color, const astc_block_desc& astc, color32* pPixels, bool srgb) + { + if (mode == UASTC_MODE_INDEX_SOLID_COLOR) + { + for (uint32_t i = 0; i < 16; i++) + pPixels[i] = solid_color; + return true; + } + + color32 endpoints[3][2]; + + const uint32_t total_subsets = g_uastc_mode_subsets[mode]; + const uint32_t total_comps = basisu::minimum(4U, g_uastc_mode_comps[mode]); + const uint32_t endpoint_range = g_uastc_mode_endpoint_ranges[mode]; + const uint32_t total_planes = g_uastc_mode_planes[mode]; + const uint32_t weight_bits = g_uastc_mode_weight_bits[mode]; + const uint32_t weight_levels = 1 << weight_bits; + + for (uint32_t subset_index = 0; subset_index < total_subsets; subset_index++) + { + if (total_comps == 2) + { + const uint32_t ll = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + 0 * 2 + 0]].m_unquant; + const uint32_t lh = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + 0 * 2 + 1]].m_unquant; + + const uint32_t al = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + 1 * 2 + 0]].m_unquant; + const uint32_t ah = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + 1 * 2 + 1]].m_unquant; + + endpoints[subset_index][0].set_noclamp_rgba(ll, ll, ll, al); + endpoints[subset_index][1].set_noclamp_rgba(lh, lh, lh, ah); + } + else + { + for (uint32_t comp_index = 0; comp_index < total_comps; comp_index++) + { + endpoints[subset_index][0][comp_index] = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + comp_index * 2 + 0]].m_unquant; + endpoints[subset_index][1][comp_index] = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + comp_index * 2 + 1]].m_unquant; + } + for (uint32_t comp_index = total_comps; comp_index < 4; comp_index++) + { + endpoints[subset_index][0][comp_index] = 255; + endpoints[subset_index][1][comp_index] = 255; + } + } + } + + color32 block_colors[3][32]; + + const uint32_t* pWeights = g_astc_weight_tables[weight_bits]; + + for (uint32_t subset_index = 0; subset_index < total_subsets; subset_index++) + { + for (uint32_t l = 0; l < weight_levels; l++) + { + if (total_comps == 2) + { + const uint8_t lc = (uint8_t)astc_interpolate(endpoints[subset_index][0][0], endpoints[subset_index][1][0], pWeights[l], srgb); + const uint8_t ac = (uint8_t)astc_interpolate(endpoints[subset_index][0][3], endpoints[subset_index][1][3], pWeights[l], srgb); + + block_colors[subset_index][l].set(lc, lc, lc, ac); + } + else + { + uint32_t comp_index; + for (comp_index = 0; comp_index < total_comps; comp_index++) + block_colors[subset_index][l][comp_index] = (uint8_t)astc_interpolate(endpoints[subset_index][0][comp_index], endpoints[subset_index][1][comp_index], pWeights[l], srgb); + + for (; comp_index < 4; comp_index++) + block_colors[subset_index][l][comp_index] = 255; + } + } + } + + const uint8_t* pPartition_pattern = g_zero_pattern; + + if (total_subsets >= 2) + { + if (total_subsets == 3) + pPartition_pattern = &g_astc_bc7_patterns3[common_pattern][0]; + else if (mode == 7) + pPartition_pattern = &g_bc7_3_astc2_patterns2[common_pattern][0]; + else + pPartition_pattern = &g_astc_bc7_patterns2[common_pattern][0]; + +#ifdef _DEBUG + for (uint32_t i = 0; i < 16; i++) + { + assert(pPartition_pattern[i] == (uint8_t)astc_compute_texel_partition(astc.m_partition_seed, i & 3, i >> 2, 0, total_subsets, true)); + } +#endif + } + + if (total_planes == 1) + { + if (total_subsets == 1) + { + for (uint32_t i = 0; i < 16; i++) + { + assert(astc.m_weights[i] < weight_levels); + pPixels[i] = block_colors[0][astc.m_weights[i]]; + } + } + else + { + for (uint32_t i = 0; i < 16; i++) + { + assert(astc.m_weights[i] < weight_levels); + pPixels[i] = block_colors[pPartition_pattern[i]][astc.m_weights[i]]; + } + } + } + else + { + assert(total_subsets == 1); + + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t subset_index = 0; // pPartition_pattern[i]; + + const uint32_t weight_index0 = astc.m_weights[i * 2]; + const uint32_t weight_index1 = astc.m_weights[i * 2 + 1]; + + assert(weight_index0 < weight_levels && weight_index1 < weight_levels); + + color32& c = pPixels[i]; + for (uint32_t comp = 0; comp < 4; comp++) + { + if ((int)comp == astc.m_ccs) + c[comp] = block_colors[subset_index][weight_index1][comp]; + else + c[comp] = block_colors[subset_index][weight_index0][comp]; + } + } + } + + return true; + } + + bool unpack_uastc(const unpacked_uastc_block& unpacked_blk, color32* pPixels, bool srgb) + { + return unpack_uastc(unpacked_blk.m_mode, unpacked_blk.m_common_pattern, unpacked_blk.m_solid_color, unpacked_blk.m_astc, pPixels, srgb); + } + + bool unpack_uastc(const uastc_block& blk, color32* pPixels, bool srgb) + { + unpacked_uastc_block unpacked_blk; + + if (!unpack_uastc(blk, unpacked_blk, false, false)) + return false; + + return unpack_uastc(unpacked_blk, pPixels, srgb); + } + + // Determines the best shared pbits to use to encode xl/xh + static void determine_shared_pbits( + uint32_t total_comps, uint32_t comp_bits, float xl[4], float xh[4], + color_quad_u8& bestMinColor, color_quad_u8& bestMaxColor, uint32_t best_pbits[2]) + { + const uint32_t total_bits = comp_bits + 1; + assert(total_bits >= 4 && total_bits <= 8); + + const int iscalep = (1 << total_bits) - 1; + const float scalep = (float)iscalep; + + float best_err = 1e+9f; + + for (int p = 0; p < 2; p++) + { + color_quad_u8 xMinColor, xMaxColor; + for (uint32_t c = 0; c < 4; c++) + { + xMinColor.m_c[c] = (uint8_t)(clampi(((int)((xl[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p)); + xMaxColor.m_c[c] = (uint8_t)(clampi(((int)((xh[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p)); + } + + color_quad_u8 scaledLow, scaledHigh; + + for (uint32_t i = 0; i < 4; i++) + { + scaledLow.m_c[i] = (xMinColor.m_c[i] << (8 - total_bits)); + scaledLow.m_c[i] |= (scaledLow.m_c[i] >> total_bits); + assert(scaledLow.m_c[i] <= 255); + + scaledHigh.m_c[i] = (xMaxColor.m_c[i] << (8 - total_bits)); + scaledHigh.m_c[i] |= (scaledHigh.m_c[i] >> total_bits); + assert(scaledHigh.m_c[i] <= 255); + } + + float err = 0; + for (uint32_t i = 0; i < total_comps; i++) + err += basisu::squaref((scaledLow.m_c[i] / 255.0f) - xl[i]) + basisu::squaref((scaledHigh.m_c[i] / 255.0f) - xh[i]); + + if (err < best_err) + { + best_err = err; + best_pbits[0] = p; + best_pbits[1] = p; + for (uint32_t j = 0; j < 4; j++) + { + bestMinColor.m_c[j] = xMinColor.m_c[j] >> 1; + bestMaxColor.m_c[j] = xMaxColor.m_c[j] >> 1; + } + } + } + } + + // Determines the best unique pbits to use to encode xl/xh + static void determine_unique_pbits( + uint32_t total_comps, uint32_t comp_bits, float xl[4], float xh[4], + color_quad_u8& bestMinColor, color_quad_u8& bestMaxColor, uint32_t best_pbits[2]) + { + const uint32_t total_bits = comp_bits + 1; + const int iscalep = (1 << total_bits) - 1; + const float scalep = (float)iscalep; + + float best_err0 = 1e+9f; + float best_err1 = 1e+9f; + + for (int p = 0; p < 2; p++) + { + color_quad_u8 xMinColor, xMaxColor; + + for (uint32_t c = 0; c < 4; c++) + { + xMinColor.m_c[c] = (uint8_t)(clampi(((int)((xl[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p)); + xMaxColor.m_c[c] = (uint8_t)(clampi(((int)((xh[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p)); + } + + color_quad_u8 scaledLow, scaledHigh; + for (uint32_t i = 0; i < 4; i++) + { + scaledLow.m_c[i] = (xMinColor.m_c[i] << (8 - total_bits)); + scaledLow.m_c[i] |= (scaledLow.m_c[i] >> total_bits); + assert(scaledLow.m_c[i] <= 255); + + scaledHigh.m_c[i] = (xMaxColor.m_c[i] << (8 - total_bits)); + scaledHigh.m_c[i] |= (scaledHigh.m_c[i] >> total_bits); + assert(scaledHigh.m_c[i] <= 255); + } + + float err0 = 0, err1 = 0; + for (uint32_t i = 0; i < total_comps; i++) + { + err0 += basisu::squaref(scaledLow.m_c[i] - xl[i] * 255.0f); + err1 += basisu::squaref(scaledHigh.m_c[i] - xh[i] * 255.0f); + } + + if (err0 < best_err0) + { + best_err0 = err0; + best_pbits[0] = p; + + bestMinColor.m_c[0] = xMinColor.m_c[0] >> 1; + bestMinColor.m_c[1] = xMinColor.m_c[1] >> 1; + bestMinColor.m_c[2] = xMinColor.m_c[2] >> 1; + bestMinColor.m_c[3] = xMinColor.m_c[3] >> 1; + } + + if (err1 < best_err1) + { + best_err1 = err1; + best_pbits[1] = p; + + bestMaxColor.m_c[0] = xMaxColor.m_c[0] >> 1; + bestMaxColor.m_c[1] = xMaxColor.m_c[1] >> 1; + bestMaxColor.m_c[2] = xMaxColor.m_c[2] >> 1; + bestMaxColor.m_c[3] = xMaxColor.m_c[3] >> 1; + } + } + } + + bool transcode_uastc_to_astc(const uastc_block& src_blk, void* pDst) + { + unpacked_uastc_block unpacked_src_blk; + if (!unpack_uastc(src_blk, unpacked_src_blk, true, false)) + return false; + + bool success = false; + if (unpacked_src_blk.m_mode == UASTC_MODE_INDEX_SOLID_COLOR) + { + pack_astc_solid_block(pDst, unpacked_src_blk.m_solid_color); + success = true; + } + else + { + success = pack_astc_block(static_cast(pDst), &unpacked_src_blk.m_astc, unpacked_src_blk.m_mode); + } + + return success; + } + + bool transcode_uastc_to_bc7(const unpacked_uastc_block& unpacked_src_blk, bc7_optimization_results& dst_blk) + { + memset(&dst_blk, 0, sizeof(dst_blk)); + + const uint32_t mode = unpacked_src_blk.m_mode; + + const uint32_t endpoint_range = g_uastc_mode_endpoint_ranges[mode]; + const uint32_t total_comps = g_uastc_mode_comps[mode]; + + switch (mode) + { + case 0: + case 5: + case 10: + case 12: + case 14: + case 15: + case 18: + { + // MODE 0: DualPlane: 0, WeightRange: 8 (16), Subsets: 1, EndpointRange: 19 (192) - BC7 MODE6 RGB + // MODE 5: DualPlane: 0, WeightRange : 5 (8), Subsets : 1, EndpointRange : 20 (256) - BC7 MODE6 RGB + // MODE 10 DualPlane: 0, WeightRange: 8 (16), Subsets: 1, EndpointRange: 13 (48) - BC7 MODE6 + // MODE 12: DualPlane: 0, WeightRange : 5 (8), Subsets : 1, EndpointRange : 19 (192) - BC7 MODE6 + // MODE 14: DualPlane: 0, WeightRange : 2 (4), Subsets : 1, EndpointRange : 20 (256) - BC7 MODE6 + // MODE 18: DualPlane: 0, WeightRange : 11 (32), Subsets : 1, CEM : 8, EndpointRange : 11 (32) - BC7 MODE6 + // MODE 15: DualPlane: 0, WeightRange : 8 (16), Subsets : 1, CEM : 4 (LA Direct), EndpointRange : 20 (256) - BC7 MODE6 + dst_blk.m_mode = 6; + + float xl[4], xh[4]; + if (total_comps == 2) + { + xl[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[0]].m_unquant / 255.0f; + xh[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[1]].m_unquant / 255.0f; + + xl[1] = xl[0]; + xh[1] = xh[0]; + + xl[2] = xl[0]; + xh[2] = xh[0]; + + xl[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[2]].m_unquant / 255.0f; + xh[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[3]].m_unquant / 255.0f; + } + else + { + xl[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[0]].m_unquant / 255.0f; + xl[1] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[2]].m_unquant / 255.0f; + xl[2] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[4]].m_unquant / 255.0f; + + xh[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[1]].m_unquant / 255.0f; + xh[1] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[3]].m_unquant / 255.0f; + xh[2] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[5]].m_unquant / 255.0f; + + if (total_comps == 4) + { + xl[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[6]].m_unquant / 255.0f; + xh[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[7]].m_unquant / 255.0f; + } + else + { + xl[3] = 1.0f; + xh[3] = 1.0f; + } + } + + uint32_t best_pbits[2]; + color_quad_u8 bestMinColor, bestMaxColor; + determine_unique_pbits((total_comps == 2) ? 4 : total_comps, 7, xl, xh, bestMinColor, bestMaxColor, best_pbits); + + dst_blk.m_low[0] = bestMinColor; + dst_blk.m_high[0] = bestMaxColor; + + if (total_comps == 3) + { + dst_blk.m_low[0].m_c[3] = 127; + dst_blk.m_high[0].m_c[3] = 127; + } + + dst_blk.m_pbits[0][0] = best_pbits[0]; + dst_blk.m_pbits[0][1] = best_pbits[1]; + + if (mode == 18) + { + const uint8_t s_bc7_5_to_4[32] = { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 6, 7, 8, 9, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15 }; + for (uint32_t i = 0; i < 16; i++) + dst_blk.m_selectors[i] = s_bc7_5_to_4[unpacked_src_blk.m_astc.m_weights[i]]; + } + else if (mode == 14) + { + const uint8_t s_bc7_2_to_4[4] = { 0, 5, 10, 15 }; + for (uint32_t i = 0; i < 16; i++) + dst_blk.m_selectors[i] = s_bc7_2_to_4[unpacked_src_blk.m_astc.m_weights[i]]; + } + else if ((mode == 5) || (mode == 12)) + { + const uint8_t s_bc7_3_to_4[8] = { 0, 2, 4, 6, 9, 11, 13, 15 }; + for (uint32_t i = 0; i < 16; i++) + dst_blk.m_selectors[i] = s_bc7_3_to_4[unpacked_src_blk.m_astc.m_weights[i]]; + } + else + { + for (uint32_t i = 0; i < 16; i++) + dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i]; + } + + break; + } + case 1: + { + // DualPlane: 0, WeightRange : 2 (4), Subsets : 1, EndpointRange : 20 (256) - BC7 MODE3 + // Mode 1 uses endpoint range 20 - no need to use ASTC dequant tables. + dst_blk.m_mode = 3; + + float xl[4], xh[4]; + xl[0] = unpacked_src_blk.m_astc.m_endpoints[0] / 255.0f; + xl[1] = unpacked_src_blk.m_astc.m_endpoints[2] / 255.0f; + xl[2] = unpacked_src_blk.m_astc.m_endpoints[4] / 255.0f; + xl[3] = 1.0f; + + xh[0] = unpacked_src_blk.m_astc.m_endpoints[1] / 255.0f; + xh[1] = unpacked_src_blk.m_astc.m_endpoints[3] / 255.0f; + xh[2] = unpacked_src_blk.m_astc.m_endpoints[5] / 255.0f; + xh[3] = 1.0f; + + uint32_t best_pbits[2]; + color_quad_u8 bestMinColor, bestMaxColor; + memset(&bestMinColor, 0, sizeof(bestMinColor)); + memset(&bestMaxColor, 0, sizeof(bestMaxColor)); + determine_unique_pbits(3, 7, xl, xh, bestMinColor, bestMaxColor, best_pbits); + + for (uint32_t i = 0; i < 3; i++) + { + dst_blk.m_low[0].m_c[i] = bestMinColor.m_c[i]; + dst_blk.m_high[0].m_c[i] = bestMaxColor.m_c[i]; + dst_blk.m_low[1].m_c[i] = bestMinColor.m_c[i]; + dst_blk.m_high[1].m_c[i] = bestMaxColor.m_c[i]; + } + dst_blk.m_pbits[0][0] = best_pbits[0]; + dst_blk.m_pbits[0][1] = best_pbits[1]; + dst_blk.m_pbits[1][0] = best_pbits[0]; + dst_blk.m_pbits[1][1] = best_pbits[1]; + + for (uint32_t i = 0; i < 16; i++) + dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i]; + + break; + } + case 2: + { + // 2. DualPlane: 0, WeightRange : 5 (8), Subsets : 2, EndpointRange : 8 (16) - BC7 MODE1 + dst_blk.m_mode = 1; + dst_blk.m_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_bc7; + + const bool invert_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_invert; + + float xl[4], xh[4]; + xl[3] = 1.0f; + xh[3] = 1.0f; + + for (uint32_t subset = 0; subset < 2; subset++) + { + for (uint32_t i = 0; i < 3; i++) + { + uint32_t v = unpacked_src_blk.m_astc.m_endpoints[i * 2 + subset * 6]; + v = (v << 4) | v; + xl[i] = v / 255.0f; + + v = unpacked_src_blk.m_astc.m_endpoints[i * 2 + subset * 6 + 1]; + v = (v << 4) | v; + xh[i] = v / 255.0f; + } + + uint32_t best_pbits[2] = { 0, 0 }; + color_quad_u8 bestMinColor, bestMaxColor; + memset(&bestMinColor, 0, sizeof(bestMinColor)); + memset(&bestMaxColor, 0, sizeof(bestMaxColor)); + determine_shared_pbits(3, 6, xl, xh, bestMinColor, bestMaxColor, best_pbits); + + const uint32_t bc7_subset_index = invert_partition ? (1 - subset) : subset; + + for (uint32_t i = 0; i < 3; i++) + { + dst_blk.m_low[bc7_subset_index].m_c[i] = bestMinColor.m_c[i]; + dst_blk.m_high[bc7_subset_index].m_c[i] = bestMaxColor.m_c[i]; + } + + dst_blk.m_pbits[bc7_subset_index][0] = best_pbits[0]; + } // subset + + for (uint32_t i = 0; i < 16; i++) + dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i]; + + break; + } + case 3: + { + // DualPlane: 0, WeightRange : 2 (4), Subsets : 3, EndpointRange : 7 (12) - BC7 MODE2 + dst_blk.m_mode = 2; + dst_blk.m_partition = g_astc_bc7_common_partitions3[unpacked_src_blk.m_common_pattern].m_bc7; + + const uint32_t perm = g_astc_bc7_common_partitions3[unpacked_src_blk.m_common_pattern].m_astc_to_bc7_perm; + + for (uint32_t subset = 0; subset < 3; subset++) + { + for (uint32_t comp = 0; comp < 3; comp++) + { + uint32_t lo = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[comp * 2 + 0 + subset * 6]].m_unquant; + uint32_t hi = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[comp * 2 + 1 + subset * 6]].m_unquant; + + // TODO: I think this can be improved by using tables like Basis Universal does with ETC1S conversion. + lo = (lo * 31 + 127) / 255; + hi = (hi * 31 + 127) / 255; + + const uint32_t bc7_subset_index = g_astc_to_bc7_partition_index_perm_tables[perm][subset]; + + dst_blk.m_low[bc7_subset_index].m_c[comp] = (uint8_t)lo; + dst_blk.m_high[bc7_subset_index].m_c[comp] = (uint8_t)hi; + } + } + + for (uint32_t i = 0; i < 16; i++) + dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i]; + + break; + } + case 4: + { + // 4. DualPlane: 0, WeightRange: 2 (4), Subsets: 2, EndpointRange: 12 (40) - BC7 MODE3 + dst_blk.m_mode = 3; + dst_blk.m_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_bc7; + + const bool invert_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_invert; + + float xl[4], xh[4]; + xl[3] = 1.0f; + xh[3] = 1.0f; + + for (uint32_t subset = 0; subset < 2; subset++) + { + for (uint32_t i = 0; i < 3; i++) + { + xl[i] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[i * 2 + subset * 6]].m_unquant / 255.0f; + xh[i] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[i * 2 + subset * 6 + 1]].m_unquant / 255.0f; + } + + uint32_t best_pbits[2] = { 0, 0 }; + color_quad_u8 bestMinColor, bestMaxColor; + memset(&bestMinColor, 0, sizeof(bestMinColor)); + memset(&bestMaxColor, 0, sizeof(bestMaxColor)); + determine_unique_pbits(3, 7, xl, xh, bestMinColor, bestMaxColor, best_pbits); + + const uint32_t bc7_subset_index = invert_partition ? (1 - subset) : subset; + + for (uint32_t i = 0; i < 3; i++) + { + dst_blk.m_low[bc7_subset_index].m_c[i] = bestMinColor.m_c[i]; + dst_blk.m_high[bc7_subset_index].m_c[i] = bestMaxColor.m_c[i]; + } + dst_blk.m_low[bc7_subset_index].m_c[3] = 127; + dst_blk.m_high[bc7_subset_index].m_c[3] = 127; + + dst_blk.m_pbits[bc7_subset_index][0] = best_pbits[0]; + dst_blk.m_pbits[bc7_subset_index][1] = best_pbits[1]; + + } // subset + + for (uint32_t i = 0; i < 16; i++) + dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i]; + + break; + } + case 6: + case 11: + case 13: + case 17: + { + // MODE 6: DualPlane: 1, WeightRange : 2 (4), Subsets : 1, EndpointRange : 18 (160) - BC7 MODE5 RGB + // MODE 11: DualPlane: 1, WeightRange: 2 (4), Subsets: 1, EndpointRange: 13 (48) - BC7 MODE5 + // MODE 13: DualPlane: 1, WeightRange: 0 (2), Subsets : 1, EndpointRange : 20 (256) - BC7 MODE5 + // MODE 17: DualPlane: 1, WeightRange: 2 (4), Subsets: 1, CEM: 4 (LA Direct), EndpointRange: 20 (256) - BC7 MODE5 + dst_blk.m_mode = 5; + dst_blk.m_rotation = (unpacked_src_blk.m_astc.m_ccs + 1) & 3; + + if (total_comps == 2) + { + assert(unpacked_src_blk.m_astc.m_ccs == 3); + + dst_blk.m_low->m_c[0] = (uint8_t)((g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[0]].m_unquant * 127 + 127) / 255); + dst_blk.m_high->m_c[0] = (uint8_t)((g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[1]].m_unquant * 127 + 127) / 255); + + dst_blk.m_low->m_c[1] = dst_blk.m_low->m_c[0]; + dst_blk.m_high->m_c[1] = dst_blk.m_high->m_c[0]; + + dst_blk.m_low->m_c[2] = dst_blk.m_low->m_c[0]; + dst_blk.m_high->m_c[2] = dst_blk.m_high->m_c[0]; + + dst_blk.m_low->m_c[3] = (uint8_t)(g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[2]].m_unquant); + dst_blk.m_high->m_c[3] = (uint8_t)(g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[3]].m_unquant); + } + else + { + for (uint32_t astc_comp = 0; astc_comp < 4; astc_comp++) + { + uint32_t bc7_comp = astc_comp; + // ASTC and BC7 handle dual plane component rotations differently: + // ASTC: 2nd plane separately interpolates the CCS channel. + // BC7: 2nd plane channel is swapped with alpha, 2nd plane controls alpha interpolation, then we swap alpha with the desired channel. + if (astc_comp == (uint32_t)unpacked_src_blk.m_astc.m_ccs) + bc7_comp = 3; + else if (astc_comp == 3) + bc7_comp = unpacked_src_blk.m_astc.m_ccs; + + uint32_t l = 255, h = 255; + if (astc_comp < total_comps) + { + l = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[astc_comp * 2 + 0]].m_unquant; + h = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[astc_comp * 2 + 1]].m_unquant; + } + + if (bc7_comp < 3) + { + l = (l * 127 + 127) / 255; + h = (h * 127 + 127) / 255; + } + + dst_blk.m_low->m_c[bc7_comp] = (uint8_t)l; + dst_blk.m_high->m_c[bc7_comp] = (uint8_t)h; + } + } + + if (mode == 13) + { + for (uint32_t i = 0; i < 16; i++) + { + dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i * 2] ? 3 : 0; + dst_blk.m_alpha_selectors[i] = unpacked_src_blk.m_astc.m_weights[i * 2 + 1] ? 3 : 0; + } + } + else + { + for (uint32_t i = 0; i < 16; i++) + { + dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i * 2]; + dst_blk.m_alpha_selectors[i] = unpacked_src_blk.m_astc.m_weights[i * 2 + 1]; + } + } + + break; + } + case 7: + { + // DualPlane: 0, WeightRange : 2 (4), Subsets : 2, EndpointRange : 12 (40) - BC7 MODE2 + dst_blk.m_mode = 2; + dst_blk.m_partition = g_bc7_3_astc2_common_partitions[unpacked_src_blk.m_common_pattern].m_bc73; + + const uint32_t common_pattern_k = g_bc7_3_astc2_common_partitions[unpacked_src_blk.m_common_pattern].k; + + for (uint32_t bc7_part = 0; bc7_part < 3; bc7_part++) + { + const uint32_t astc_part = bc7_convert_partition_index_3_to_2(bc7_part, common_pattern_k); + + for (uint32_t c = 0; c < 3; c++) + { + dst_blk.m_low[bc7_part].m_c[c] = (g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[c * 2 + 0 + astc_part * 6]].m_unquant * 31 + 127) / 255; + dst_blk.m_high[bc7_part].m_c[c] = (g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[c * 2 + 1 + astc_part * 6]].m_unquant * 31 + 127) / 255; + } + } + + for (uint32_t i = 0; i < 16; i++) + dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i]; + + break; + } + case UASTC_MODE_INDEX_SOLID_COLOR: + { + // Void-Extent: Solid Color RGBA (BC7 MODE5 or MODE6) + const color32& solid_color = unpacked_src_blk.m_solid_color; + + uint32_t best_err0 = g_bc7_mode_6_optimal_endpoints[solid_color.r][0].m_error + g_bc7_mode_6_optimal_endpoints[solid_color.g][0].m_error + + g_bc7_mode_6_optimal_endpoints[solid_color.b][0].m_error + g_bc7_mode_6_optimal_endpoints[solid_color.a][0].m_error; + + uint32_t best_err1 = g_bc7_mode_6_optimal_endpoints[solid_color.r][1].m_error + g_bc7_mode_6_optimal_endpoints[solid_color.g][1].m_error + + g_bc7_mode_6_optimal_endpoints[solid_color.b][1].m_error + g_bc7_mode_6_optimal_endpoints[solid_color.a][1].m_error; + + if (best_err0 > 0 && best_err1 > 0) + { + dst_blk.m_mode = 5; + + for (uint32_t c = 0; c < 3; c++) + { + dst_blk.m_low[0].m_c[c] = g_bc7_mode_5_optimal_endpoints[solid_color.c[c]].m_lo; + dst_blk.m_high[0].m_c[c] = g_bc7_mode_5_optimal_endpoints[solid_color.c[c]].m_hi; + } + + memset(dst_blk.m_selectors, BC7ENC_MODE_5_OPTIMAL_INDEX, 16); + + dst_blk.m_low[0].m_c[3] = solid_color.c[3]; + dst_blk.m_high[0].m_c[3] = solid_color.c[3]; + + //memset(dst_blk.m_alpha_selectors, 0, 16); + } + else + { + dst_blk.m_mode = 6; + + uint32_t best_p = 0; + if (best_err1 < best_err0) + best_p = 1; + + for (uint32_t c = 0; c < 4; c++) + { + dst_blk.m_low[0].m_c[c] = g_bc7_mode_6_optimal_endpoints[solid_color.c[c]][best_p].m_lo; + dst_blk.m_high[0].m_c[c] = g_bc7_mode_6_optimal_endpoints[solid_color.c[c]][best_p].m_hi; + } + + dst_blk.m_pbits[0][0] = best_p; + dst_blk.m_pbits[0][1] = best_p; + memset(dst_blk.m_selectors, BC7ENC_MODE_6_OPTIMAL_INDEX, 16); + } + + break; + } + case 9: + case 16: + { + // 9. DualPlane: 0, WeightRange : 2 (4), Subsets : 2, EndpointRange : 8 (16) - BC7 MODE7 + // 16. DualPlane: 0, WeightRange: 2 (4), Subsets: 2, CEM: 4 (LA Direct), EndpointRange: 20 (256) - BC7 MODE7 + + dst_blk.m_mode = 7; + dst_blk.m_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_bc7; + + const bool invert_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_invert; + + for (uint32_t astc_subset = 0; astc_subset < 2; astc_subset++) + { + float xl[4], xh[4]; + + if (total_comps == 2) + { + xl[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[0 + astc_subset * 4]].m_unquant / 255.0f; + xh[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[1 + astc_subset * 4]].m_unquant / 255.0f; + + xl[1] = xl[0]; + xh[1] = xh[0]; + + xl[2] = xl[0]; + xh[2] = xh[0]; + + xl[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[2 + astc_subset * 4]].m_unquant / 255.0f; + xh[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[3 + astc_subset * 4]].m_unquant / 255.0f; + } + else + { + xl[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[0 + astc_subset * 8]].m_unquant / 255.0f; + xl[1] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[2 + astc_subset * 8]].m_unquant / 255.0f; + xl[2] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[4 + astc_subset * 8]].m_unquant / 255.0f; + xl[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[6 + astc_subset * 8]].m_unquant / 255.0f; + + xh[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[1 + astc_subset * 8]].m_unquant / 255.0f; + xh[1] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[3 + astc_subset * 8]].m_unquant / 255.0f; + xh[2] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[5 + astc_subset * 8]].m_unquant / 255.0f; + xh[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[7 + astc_subset * 8]].m_unquant / 255.0f; + } + + uint32_t best_pbits[2] = { 0, 0 }; + color_quad_u8 bestMinColor, bestMaxColor; + memset(&bestMinColor, 0, sizeof(bestMinColor)); + memset(&bestMaxColor, 0, sizeof(bestMaxColor)); + determine_unique_pbits(4, 5, xl, xh, bestMinColor, bestMaxColor, best_pbits); + + const uint32_t bc7_subset_index = invert_partition ? (1 - astc_subset) : astc_subset; + + dst_blk.m_low[bc7_subset_index] = bestMinColor; + dst_blk.m_high[bc7_subset_index] = bestMaxColor; + + dst_blk.m_pbits[bc7_subset_index][0] = best_pbits[0]; + dst_blk.m_pbits[bc7_subset_index][1] = best_pbits[1]; + } // astc_subset + + for (uint32_t i = 0; i < 16; i++) + dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i]; + + break; + } + default: + return false; + } + + return true; + } + + bool transcode_uastc_to_bc7(const uastc_block& src_blk, bc7_optimization_results& dst_blk) + { + unpacked_uastc_block unpacked_src_blk; + if (!unpack_uastc(src_blk, unpacked_src_blk, false, false)) + return false; + + return transcode_uastc_to_bc7(unpacked_src_blk, dst_blk); + } + + bool transcode_uastc_to_bc7(const uastc_block& src_blk, void* pDst) + { + bc7_optimization_results temp; + if (!transcode_uastc_to_bc7(src_blk, temp)) + return false; + + encode_bc7_block(pDst, &temp); + return true; + } + + color32 apply_etc1_bias(const color32 &block_color, uint32_t bias, uint32_t limit, uint32_t subblock) + { + color32 result; + + for (uint32_t c = 0; c < 3; c++) + { + static const int s_divs[3] = { 1, 3, 9 }; + + int delta = 0; + + switch (bias) + { + case 2: delta = subblock ? 0 : ((c == 0) ? -1 : 0); break; + case 5: delta = subblock ? 0 : ((c == 1) ? -1 : 0); break; + case 6: delta = subblock ? 0 : ((c == 2) ? -1 : 0); break; + + case 7: delta = subblock ? 0 : ((c == 0) ? 1 : 0); break; + case 11: delta = subblock ? 0 : ((c == 1) ? 1 : 0); break; + case 15: delta = subblock ? 0 : ((c == 2) ? 1 : 0); break; + + case 18: delta = subblock ? ((c == 0) ? -1 : 0) : 0; break; + case 19: delta = subblock ? ((c == 1) ? -1 : 0) : 0; break; + case 20: delta = subblock ? ((c == 2) ? -1 : 0) : 0; break; + + case 21: delta = subblock ? ((c == 0) ? 1 : 0) : 0; break; + case 24: delta = subblock ? ((c == 1) ? 1 : 0) : 0; break; + case 8: delta = subblock ? ((c == 2) ? 1 : 0) : 0; break; + + case 10: delta = -2; break; + + case 27: delta = subblock ? 0 : -1; break; + case 28: delta = subblock ? -1 : 1; break; + case 29: delta = subblock ? 1 : 0; break; + case 30: delta = subblock ? -1 : 0; break; + case 31: delta = subblock ? 0 : 1; break; + + default: + delta = ((bias / s_divs[c]) % 3) - 1; + break; + } + + int v = block_color[c]; + if (v == 0) + { + if (delta == -2) + v += 3; + else + v += delta + 1; + } + else if (v == (int)limit) + { + v += (delta - 1); + } + else + { + v += delta; + if ((v < 0) || (v > (int)limit)) + v = (v - delta) - delta; + } + + assert(v >= 0); + assert(v <= (int)limit); + + result[c] = (uint8_t)v; + } + + return result; + } + + static void etc1_determine_selectors(decoder_etc_block& dst_blk, const color32* pSource_pixels, uint32_t first_subblock, uint32_t last_subblock) + { + static const uint8_t s_tran[4] = { 1, 0, 2, 3 }; + + uint16_t l_bitmask = 0; + uint16_t h_bitmask = 0; + + for (uint32_t subblock = first_subblock; subblock < last_subblock; subblock++) + { + color32 block_colors[4]; + dst_blk.get_block_colors(block_colors, subblock); + + uint32_t block_y[4]; + for (uint32_t i = 0; i < 4; i++) + block_y[i] = block_colors[i][0] * 54 + block_colors[i][1] * 183 + block_colors[i][2] * 19; + + const uint32_t block_y01 = block_y[0] + block_y[1]; + const uint32_t block_y12 = block_y[1] + block_y[2]; + const uint32_t block_y23 = block_y[2] + block_y[3]; + + // X0 X0 X0 X0 X1 X1 X1 X1 X2 X2 X2 X2 X3 X3 X3 X3 + // Y0 Y1 Y2 Y3 Y0 Y1 Y2 Y3 Y0 Y1 Y2 Y3 Y0 Y1 Y2 Y3 + + if (dst_blk.get_flip_bit()) + { + uint32_t ofs = subblock * 2; + + for (uint32_t y = 0; y < 2; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const color32& c = pSource_pixels[x + (subblock * 2 + y) * 4]; + const uint32_t l = c[0] * 108 + c[1] * 366 + c[2] * 38; + + uint32_t t = s_tran[(l < block_y01) + (l < block_y12) + (l < block_y23)]; + + assert(ofs < 16); + l_bitmask |= ((t & 1) << ofs); + h_bitmask |= ((t >> 1) << ofs); + ofs += 4; + } + + ofs = (int)ofs + 1 - 4 * 4; + } + } + else + { + uint32_t ofs = (subblock * 2) * 4; + for (uint32_t x = 0; x < 2; x++) + { + for (uint32_t y = 0; y < 4; y++) + { + const color32& c = pSource_pixels[subblock * 2 + x + y * 4]; + const uint32_t l = c[0] * 108 + c[1] * 366 + c[2] * 38; + + uint32_t t = s_tran[(l < block_y01) + (l < block_y12) + (l < block_y23)]; + + assert(ofs < 16); + l_bitmask |= ((t & 1) << ofs); + h_bitmask |= ((t >> 1) << ofs); + ++ofs; + } + } + } + } + + dst_blk.m_bytes[7] = (uint8_t)(l_bitmask); + dst_blk.m_bytes[6] = (uint8_t)(l_bitmask >> 8); + dst_blk.m_bytes[5] = (uint8_t)(h_bitmask); + dst_blk.m_bytes[4] = (uint8_t)(h_bitmask >> 8); + } + + static const uint8_t s_etc1_solid_selectors[4][4] = { { 255, 255, 255, 255 }, { 255, 255, 0, 0 }, { 0, 0, 0, 0 }, {0, 0, 255, 255 } }; + + struct etc_coord2 + { + uint8_t m_x, m_y; + }; + + // [flip][subblock][pixel_index] + const etc_coord2 g_etc1_pixel_coords[2][2][8] = + { + { + { + { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 }, + { 1, 0 }, { 1, 1 }, { 1, 2 }, { 1, 3 } + }, + { + { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 }, + { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 } + } + }, + { + { + { 0, 0 }, { 1, 0 }, { 2, 0 }, { 3, 0 }, + { 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 } + }, + { + { 0, 2 }, { 1, 2 }, { 2, 2 }, { 3, 2 }, + { 0, 3 }, { 1, 3 }, { 2, 3 }, { 3, 3 } + }, + } + }; + + void transcode_uastc_to_etc1(unpacked_uastc_block& unpacked_src_blk, color32 block_pixels[4][4], void* pDst) + { + decoder_etc_block& dst_blk = *static_cast(pDst); + + if (unpacked_src_blk.m_mode == UASTC_MODE_INDEX_SOLID_COLOR) + { + dst_blk.m_bytes[3] = (uint8_t)((unpacked_src_blk.m_etc1_diff << 1) | (unpacked_src_blk.m_etc1_inten0 << 5) | (unpacked_src_blk.m_etc1_inten0 << 2)); + + if (unpacked_src_blk.m_etc1_diff) + { + dst_blk.m_bytes[0] = (uint8_t)(unpacked_src_blk.m_etc1_r << 3); + dst_blk.m_bytes[1] = (uint8_t)(unpacked_src_blk.m_etc1_g << 3); + dst_blk.m_bytes[2] = (uint8_t)(unpacked_src_blk.m_etc1_b << 3); + } + else + { + dst_blk.m_bytes[0] = (uint8_t)(unpacked_src_blk.m_etc1_r | (unpacked_src_blk.m_etc1_r << 4)); + dst_blk.m_bytes[1] = (uint8_t)(unpacked_src_blk.m_etc1_g | (unpacked_src_blk.m_etc1_g << 4)); + dst_blk.m_bytes[2] = (uint8_t)(unpacked_src_blk.m_etc1_b | (unpacked_src_blk.m_etc1_b << 4)); + } + + memcpy(dst_blk.m_bytes + 4, &s_etc1_solid_selectors[unpacked_src_blk.m_etc1_selector][0], 4); + + return; + } + + const bool flip = unpacked_src_blk.m_etc1_flip != 0; + const bool diff = unpacked_src_blk.m_etc1_diff != 0; + + dst_blk.m_bytes[3] = (uint8_t)((int)flip | (diff << 1) | (unpacked_src_blk.m_etc1_inten0 << 5) | (unpacked_src_blk.m_etc1_inten1 << 2)); + + const uint32_t limit = diff ? 31 : 15; + + color32 block_colors[2]; + + for (uint32_t subset = 0; subset < 2; subset++) + { + uint32_t avg_color[3]; + memset(avg_color, 0, sizeof(avg_color)); + + for (uint32_t j = 0; j < 8; j++) + { + const etc_coord2& c = g_etc1_pixel_coords[flip][subset][j]; + + avg_color[0] += block_pixels[c.m_y][c.m_x].r; + avg_color[1] += block_pixels[c.m_y][c.m_x].g; + avg_color[2] += block_pixels[c.m_y][c.m_x].b; + } // j + + block_colors[subset][0] = (uint8_t)((avg_color[0] * limit + 1020) / (8 * 255)); + block_colors[subset][1] = (uint8_t)((avg_color[1] * limit + 1020) / (8 * 255)); + block_colors[subset][2] = (uint8_t)((avg_color[2] * limit + 1020) / (8 * 255)); + block_colors[subset][3] = 0; + + if (g_uastc_mode_has_etc1_bias[unpacked_src_blk.m_mode]) + { + block_colors[subset] = apply_etc1_bias(block_colors[subset], unpacked_src_blk.m_etc1_bias, limit, subset); + } + + } // subset + + if (diff) + { + int dr = block_colors[1].r - block_colors[0].r; + int dg = block_colors[1].g - block_colors[0].g; + int db = block_colors[1].b - block_colors[0].b; + + dr = basisu::clamp(dr, cETC1ColorDeltaMin, cETC1ColorDeltaMax); + dg = basisu::clamp(dg, cETC1ColorDeltaMin, cETC1ColorDeltaMax); + db = basisu::clamp(db, cETC1ColorDeltaMin, cETC1ColorDeltaMax); + + if (dr < 0) dr += 8; + if (dg < 0) dg += 8; + if (db < 0) db += 8; + + dst_blk.m_bytes[0] = (uint8_t)((block_colors[0].r << 3) | dr); + dst_blk.m_bytes[1] = (uint8_t)((block_colors[0].g << 3) | dg); + dst_blk.m_bytes[2] = (uint8_t)((block_colors[0].b << 3) | db); + } + else + { + dst_blk.m_bytes[0] = (uint8_t)(block_colors[1].r | (block_colors[0].r << 4)); + dst_blk.m_bytes[1] = (uint8_t)(block_colors[1].g | (block_colors[0].g << 4)); + dst_blk.m_bytes[2] = (uint8_t)(block_colors[1].b | (block_colors[0].b << 4)); + } + + etc1_determine_selectors(dst_blk, &block_pixels[0][0], 0, 2); + } + + bool transcode_uastc_to_etc1(const uastc_block& src_blk, void* pDst) + { + unpacked_uastc_block unpacked_src_blk; + if (!unpack_uastc(src_blk, unpacked_src_blk, false)) + return false; + + color32 block_pixels[4][4]; + if (unpacked_src_blk.m_mode != UASTC_MODE_INDEX_SOLID_COLOR) + { + const bool unpack_srgb = false; + if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb)) + return false; + } + + transcode_uastc_to_etc1(unpacked_src_blk, block_pixels, pDst); + + return true; + } + + static inline int gray_distance2(const uint8_t c, int y) + { + int gray_dist = (int)c - y; + return gray_dist * gray_dist; + } + + static bool pack_etc1_y_estimate_flipped(const uint8_t* pSrc_pixels, + int& upper_avg, int& lower_avg, int& left_avg, int& right_avg) + { + int sums[2][2]; + +#define GET_XY(x, y) pSrc_pixels[(x) + ((y) * 4)] + + sums[0][0] = GET_XY(0, 0) + GET_XY(0, 1) + GET_XY(1, 0) + GET_XY(1, 1); + sums[1][0] = GET_XY(2, 0) + GET_XY(2, 1) + GET_XY(3, 0) + GET_XY(3, 1); + sums[0][1] = GET_XY(0, 2) + GET_XY(0, 3) + GET_XY(1, 2) + GET_XY(1, 3); + sums[1][1] = GET_XY(2, 2) + GET_XY(2, 3) + GET_XY(3, 2) + GET_XY(3, 3); + + upper_avg = (sums[0][0] + sums[1][0] + 4) / 8; + lower_avg = (sums[0][1] + sums[1][1] + 4) / 8; + left_avg = (sums[0][0] + sums[0][1] + 4) / 8; + right_avg = (sums[1][0] + sums[1][1] + 4) / 8; + +#undef GET_XY +#define GET_XY(x, y, a) gray_distance2(pSrc_pixels[(x) + ((y) * 4)], a) + + int upper_gray_dist = 0, lower_gray_dist = 0, left_gray_dist = 0, right_gray_dist = 0; + for (uint32_t i = 0; i < 4; i++) + { + for (uint32_t j = 0; j < 2; j++) + { + upper_gray_dist += GET_XY(i, j, upper_avg); + lower_gray_dist += GET_XY(i, 2 + j, lower_avg); + left_gray_dist += GET_XY(j, i, left_avg); + right_gray_dist += GET_XY(2 + j, i, right_avg); + } + } + +#undef GET_XY + + int upper_lower_sum = upper_gray_dist + lower_gray_dist; + int left_right_sum = left_gray_dist + right_gray_dist; + + return upper_lower_sum < left_right_sum; + } + + // Base Sel Table + // XXXXX XX XXX + static const uint16_t g_etc1_y_solid_block_configs[256] = + { + 0,781,64,161,260,192,33,131,96,320,65,162,261,193,34,291,97,224,66,163,262,194,35,549,98,4,67,653,164,195,523,36,99,5,578,68,165,353,196,37,135,100,324,69,166,354,197,38,295,101,228,70,167, + 355,198,39,553,102,8,71,608,168,199,527,40,103,9,582,72,169,357,200,41,139,104,328,73,170,358,201,42,299,105,232,74,171,359,202,43,557,106,12,75,612,172,203,531,44,107,13,586,76,173,361, + 204,45,143,108,332,77,174,362,205,46,303,109,236,78,175,363,206,47,561,110,16,79,616,176,207,535,48,111,17,590,80,177,365,208,49,147,112,336,81,178,366,209,50,307,113,240,82,179,367,210, + 51,565,114,20,83,620,180,211,539,52,115,21,594,84,181,369,212,53,151,116,340,85,182,370,213,54,311,117,244,86,183,371,214,55,569,118,24,87,624,184,215,543,56,119,25,598,88,185,373,216,57, + 155,120,344,89,186,374,217,58,315,121,248,90,187,375,218,59,573,122,28,91,628,188,219,754,60,123,29,602,92,189,377,220,61,159,124,348,93,190,378,221,62,319,125,252,94,191,379,222,63,882,126 + }; + + // individual + // table base sel0 sel1 sel2 sel3 + static const uint16_t g_etc1_y_solid_block_4i_configs[256] = + { + 0xA000,0xA800,0x540B,0xAA01,0xAA01,0xFE00,0xFF00,0xFF00,0x8,0x5515,0x5509,0x5509,0xAA03,0x5508,0x5508,0x9508,0xA508,0xA908,0xAA08,0x5513,0xAA09,0xAA09,0xAA05,0xFF08,0xFF08,0x10,0x551D,0x5511,0x5511, + 0xAA0B,0x5510,0x5510,0x9510,0xA510,0xA910,0xAA10,0x551B,0xAA11,0xAA11,0xAA0D,0xFF10,0xFF10,0x18,0x5525,0x5519,0x5519,0xAA13,0x5518,0x5518,0x9518,0xA518,0xA918,0xAA18,0x5523,0xAA19,0xAA19,0xAA15, + 0xFF18,0xFF18,0x20,0x552D,0x5521,0x5521,0xAA1B,0x5520,0x5520,0x9520,0xA520,0xA920,0xAA20,0x552B,0xAA21,0xAA21,0xAA1D,0xFF20,0xFF20,0x28,0x5535,0x5529,0x5529,0xAA23,0x5528,0x5528,0x9528,0xA528,0xA928, + 0xAA28,0x5533,0xAA29,0xAA29,0xAA25,0xFF28,0xFF28,0x30,0x553D,0x5531,0x5531,0xAA2B,0x5530,0x5530,0x9530,0xA530,0xA930,0xAA30,0x553B,0xAA31,0xAA31,0xAA2D,0xFF30,0xFF30,0x38,0x5545,0x5539,0x5539,0xAA33, + 0x5538,0x5538,0x9538,0xA538,0xA938,0xAA38,0x5543,0xAA39,0xAA39,0xAA35,0xFF38,0xFF38,0x40,0x554D,0x5541,0x5541,0xAA3B,0x5540,0x5540,0x9540,0xA540,0xA940,0xAA40,0x554B,0xAA41,0xAA41,0xAA3D,0xFF40,0xFF40, + 0x48,0x5555,0x5549,0x5549,0xAA43,0x5548,0x5548,0x9548,0xA548,0xA948,0xAA48,0x5553,0xAA49,0xAA49,0xAA45,0xFF48,0xFF48,0x50,0x555D,0x5551,0x5551,0xAA4B,0x5550,0x5550,0x9550,0xA550,0xA950,0xAA50,0x555B, + 0xAA51,0xAA51,0xAA4D,0xFF50,0xFF50,0x58,0x5565,0x5559,0x5559,0xAA53,0x5558,0x5558,0x9558,0xA558,0xA958,0xAA58,0x5563,0xAA59,0xAA59,0xAA55,0xFF58,0xFF58,0x60,0x556D,0x5561,0x5561,0xAA5B,0x5560,0x5560, + 0x9560,0xA560,0xA960,0xAA60,0x556B,0xAA61,0xAA61,0xAA5D,0xFF60,0xFF60,0x68,0x5575,0x5569,0x5569,0xAA63,0x5568,0x5568,0x9568,0xA568,0xA968,0xAA68,0x5573,0xAA69,0xAA69,0xAA65,0xFF68,0xFF68,0x70,0x557D, + 0x5571,0x5571,0xAA6B,0x5570,0x5570,0x9570,0xA570,0xA970,0xAA70,0x557B,0xAA71,0xAA71,0xAA6D,0xFF70,0xFF70,0x78,0x78,0x5579,0x5579,0xAA73,0x5578,0x9578,0x2578,0xE6E,0x278 + }; + + static const uint16_t g_etc1_y_solid_block_2i_configs[256] = + { + 0x416,0x800,0xA00,0x50B,0xA01,0xA01,0xF00,0xF00,0xF00,0x8,0x515,0x509,0x509,0xA03,0x508,0x508,0xF01,0xF01,0xA08,0xA08,0x513,0xA09,0xA09,0xA05,0xF08,0xF08,0x10,0x51D,0x511,0x511,0xA0B,0x510,0x510,0xF09, + 0xF09,0xA10,0xA10,0x51B,0xA11,0xA11,0xA0D,0xF10,0xF10,0x18,0x525,0x519,0x519,0xA13,0x518,0x518,0xF11,0xF11,0xA18,0xA18,0x523,0xA19,0xA19,0xA15,0xF18,0xF18,0x20,0x52D,0x521,0x521,0xA1B,0x520,0x520,0xF19, + 0xF19,0xA20,0xA20,0x52B,0xA21,0xA21,0xA1D,0xF20,0xF20,0x28,0x535,0x529,0x529,0xA23,0x528,0x528,0xF21,0xF21,0xA28,0xA28,0x533,0xA29,0xA29,0xA25,0xF28,0xF28,0x30,0x53D,0x531,0x531,0xA2B,0x530,0x530,0xF29, + 0xF29,0xA30,0xA30,0x53B,0xA31,0xA31,0xA2D,0xF30,0xF30,0x38,0x545,0x539,0x539,0xA33,0x538,0x538,0xF31,0xF31,0xA38,0xA38,0x543,0xA39,0xA39,0xA35,0xF38,0xF38,0x40,0x54D,0x541,0x541,0xA3B,0x540,0x540,0xF39, + 0xF39,0xA40,0xA40,0x54B,0xA41,0xA41,0xA3D,0xF40,0xF40,0x48,0x555,0x549,0x549,0xA43,0x548,0x548,0xF41,0xF41,0xA48,0xA48,0x553,0xA49,0xA49,0xA45,0xF48,0xF48,0x50,0x55D,0x551,0x551,0xA4B,0x550,0x550,0xF49, + 0xF49,0xA50,0xA50,0x55B,0xA51,0xA51,0xA4D,0xF50,0xF50,0x58,0x565,0x559,0x559,0xA53,0x558,0x558,0xF51,0xF51,0xA58,0xA58,0x563,0xA59,0xA59,0xA55,0xF58,0xF58,0x60,0x56D,0x561,0x561,0xA5B,0x560,0x560,0xF59, + 0xF59,0xA60,0xA60,0x56B,0xA61,0xA61,0xA5D,0xF60,0xF60,0x68,0x575,0x569,0x569,0xA63,0x568,0x568,0xF61,0xF61,0xA68,0xA68,0x573,0xA69,0xA69,0xA65,0xF68,0xF68,0x70,0x57D,0x571,0x571,0xA6B,0x570,0x570,0xF69, + 0xF69,0xA70,0xA70,0x57B,0xA71,0xA71,0xA6D,0xF70,0xF70,0x78,0x78,0x579,0x579,0xA73,0x578,0x578,0xE6E,0x278 + }; + + static const uint16_t g_etc1_y_solid_block_1i_configs[256] = + { + 0x0,0x116,0x200,0x200,0x10B,0x201,0x201,0x300,0x300,0x8,0x115,0x109,0x109,0x203,0x108,0x108,0x114,0x301,0x204,0x208,0x208,0x113,0x209,0x209,0x205,0x308,0x10,0x11D,0x111,0x111,0x20B,0x110,0x110,0x11C,0x309, + 0x20C,0x210,0x210,0x11B,0x211,0x211,0x20D,0x310,0x18,0x125,0x119,0x119,0x213,0x118,0x118,0x124,0x311,0x214,0x218,0x218,0x123,0x219,0x219,0x215,0x318,0x20,0x12D,0x121,0x121,0x21B,0x120,0x120,0x12C,0x319,0x21C, + 0x220,0x220,0x12B,0x221,0x221,0x21D,0x320,0x28,0x135,0x129,0x129,0x223,0x128,0x128,0x134,0x321,0x224,0x228,0x228,0x133,0x229,0x229,0x225,0x328,0x30,0x13D,0x131,0x131,0x22B,0x130,0x130,0x13C,0x329,0x22C,0x230, + 0x230,0x13B,0x231,0x231,0x22D,0x330,0x38,0x145,0x139,0x139,0x233,0x138,0x138,0x144,0x331,0x234,0x238,0x238,0x143,0x239,0x239,0x235,0x338,0x40,0x14D,0x141,0x141,0x23B,0x140,0x140,0x14C,0x339,0x23C,0x240,0x240, + 0x14B,0x241,0x241,0x23D,0x340,0x48,0x155,0x149,0x149,0x243,0x148,0x148,0x154,0x341,0x244,0x248,0x248,0x153,0x249,0x249,0x245,0x348,0x50,0x15D,0x151,0x151,0x24B,0x150,0x150,0x15C,0x349,0x24C,0x250,0x250,0x15B, + 0x251,0x251,0x24D,0x350,0x58,0x165,0x159,0x159,0x253,0x158,0x158,0x164,0x351,0x254,0x258,0x258,0x163,0x259,0x259,0x255,0x358,0x60,0x16D,0x161,0x161,0x25B,0x160,0x160,0x16C,0x359,0x25C,0x260,0x260,0x16B,0x261, + 0x261,0x25D,0x360,0x68,0x175,0x169,0x169,0x263,0x168,0x168,0x174,0x361,0x264,0x268,0x268,0x173,0x269,0x269,0x265,0x368,0x70,0x17D,0x171,0x171,0x26B,0x170,0x170,0x17C,0x369,0x26C,0x270,0x270,0x17B,0x271,0x271, + 0x26D,0x370,0x78,0x78,0x179,0x179,0x273,0x178,0x178,0x26E,0x278 + }; + + // We don't have any useful hints to accelerate single channel ETC1, so we need to real-time encode from scratch. + bool transcode_uastc_to_etc1(const uastc_block& src_blk, void* pDst, uint32_t channel) + { + unpacked_uastc_block unpacked_src_blk; + if (!unpack_uastc(src_blk, unpacked_src_blk, false)) + return false; + +#if 0 + for (uint32_t individ = 0; individ < 2; individ++) + { + uint32_t overall_error = 0; + + for (uint32_t c = 0; c < 256; c++) + { + uint32_t best_err = UINT32_MAX; + uint32_t best_individ = 0; + uint32_t best_base = 0; + uint32_t best_sels[4] = { 0,0,0,0 }; + uint32_t best_table = 0; + + const uint32_t limit = individ ? 16 : 32; + + for (uint32_t table = 0; table < 8; table++) + { + for (uint32_t base = 0; base < limit; base++) + { + uint32_t total_e = 0; + uint32_t sels[4] = { 0,0,0,0 }; + + const uint32_t N = 4; + for (uint32_t i = 0; i < basisu::minimum(N, (256 - c)); i++) + { + uint32_t best_sel_e = UINT32_MAX; + uint32_t best_sel = 0; + + for (uint32_t sel = 0; sel < 4; sel++) + { + int val = individ ? ((base << 4) | base) : ((base << 3) | (base >> 2)); + val = clamp255(val + g_etc1_inten_tables[table][sel]); + + int e = iabs(val - clamp255(c + i)); + if (e < best_sel_e) + { + best_sel_e = e; + best_sel = sel; + } + + } // sel + + sels[i] = best_sel; + total_e += best_sel_e * best_sel_e; + + } // i + + if (total_e < best_err) + { + best_err = total_e; + best_individ = individ; + best_base = base; + memcpy(best_sels, sels, sizeof(best_sels)); + best_table = table; + } + + } // base + } // table + + //printf("%u: %u,%u,%u,%u,%u,%u,%u,%u\n", c, best_err, best_individ, best_table, best_base, best_sels[0], best_sels[1], best_sels[2], best_sels[3]); + + uint32_t encoded = best_table | (best_base << 3) | + (best_sels[0] << 8) | + (best_sels[1] << 10) | + (best_sels[2] << 12) | + (best_sels[3] << 14); + + printf("0x%X,", encoded); + + overall_error += best_err; + } // c + + printf("\n"); + printf("Overall error: %u\n", overall_error); + + } // individ + + exit(0); +#endif + +#if 0 + for (uint32_t individ = 0; individ < 2; individ++) + { + uint32_t overall_error = 0; + + for (uint32_t c = 0; c < 256; c++) + { + uint32_t best_err = UINT32_MAX; + uint32_t best_individ = 0; + uint32_t best_base = 0; + uint32_t best_sels[4] = { 0,0,0,0 }; + uint32_t best_table = 0; + + const uint32_t limit = individ ? 16 : 32; + + for (uint32_t table = 0; table < 8; table++) + { + for (uint32_t base = 0; base < limit; base++) + { + uint32_t total_e = 0; + uint32_t sels[4] = { 0,0,0,0 }; + + const uint32_t N = 1; + for (uint32_t i = 0; i < basisu::minimum(N, (256 - c)); i++) + { + uint32_t best_sel_e = UINT32_MAX; + uint32_t best_sel = 0; + + for (uint32_t sel = 0; sel < 4; sel++) + { + int val = individ ? ((base << 4) | base) : ((base << 3) | (base >> 2)); + val = clamp255(val + g_etc1_inten_tables[table][sel]); + + int e = iabs(val - clamp255(c + i)); + if (e < best_sel_e) + { + best_sel_e = e; + best_sel = sel; + } + + } // sel + + sels[i] = best_sel; + total_e += best_sel_e * best_sel_e; + + } // i + + if (total_e < best_err) + { + best_err = total_e; + best_individ = individ; + best_base = base; + memcpy(best_sels, sels, sizeof(best_sels)); + best_table = table; + } + + } // base + } // table + + //printf("%u: %u,%u,%u,%u,%u,%u,%u,%u\n", c, best_err, best_individ, best_table, best_base, best_sels[0], best_sels[1], best_sels[2], best_sels[3]); + + uint32_t encoded = best_table | (best_base << 3) | + (best_sels[0] << 8) | + (best_sels[1] << 10) | + (best_sels[2] << 12) | + (best_sels[3] << 14); + + printf("0x%X,", encoded); + + overall_error += best_err; + } // c + + printf("\n"); + printf("Overall error: %u\n", overall_error); + + } // individ + + exit(0); +#endif + + decoder_etc_block& dst_blk = *static_cast(pDst); + + if (unpacked_src_blk.m_mode == UASTC_MODE_INDEX_SOLID_COLOR) + { + const uint32_t y = unpacked_src_blk.m_solid_color[channel]; + const uint32_t encoded_config = g_etc1_y_solid_block_configs[y]; + + const uint32_t base = encoded_config & 31; + const uint32_t sel = (encoded_config >> 5) & 3; + const uint32_t table = encoded_config >> 7; + + dst_blk.m_bytes[3] = (uint8_t)(2 | (table << 5) | (table << 2)); + + dst_blk.m_bytes[0] = (uint8_t)(base << 3); + dst_blk.m_bytes[1] = (uint8_t)(base << 3); + dst_blk.m_bytes[2] = (uint8_t)(base << 3); + + memcpy(dst_blk.m_bytes + 4, &s_etc1_solid_selectors[sel][0], 4); + return true; + } + + color32 block_pixels[4][4]; + const bool unpack_srgb = false; + if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb)) + return false; + + uint8_t block_y[4][4]; + for (uint32_t i = 0; i < 16; i++) + ((uint8_t*)block_y)[i] = ((color32*)block_pixels)[i][channel]; + + int upper_avg, lower_avg, left_avg, right_avg; + bool flip = pack_etc1_y_estimate_flipped(&block_y[0][0], upper_avg, lower_avg, left_avg, right_avg); + + // non-flipped: | | + // vs. + // flipped: -- + // -- + + uint32_t low[2] = { 255, 255 }, high[2] = { 0, 0 }; + + if (flip) + { + for (uint32_t y = 0; y < 2; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t v = block_y[y][x]; + low[0] = basisu::minimum(low[0], v); + high[0] = basisu::maximum(high[0], v); + } + } + for (uint32_t y = 2; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t v = block_y[y][x]; + low[1] = basisu::minimum(low[1], v); + high[1] = basisu::maximum(high[1], v); + } + } + } + else + { + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 2; x++) + { + const uint32_t v = block_y[y][x]; + low[0] = basisu::minimum(low[0], v); + high[0] = basisu::maximum(high[0], v); + } + } + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 2; x < 4; x++) + { + const uint32_t v = block_y[y][x]; + low[1] = basisu::minimum(low[1], v); + high[1] = basisu::maximum(high[1], v); + } + } + } + + const uint32_t range[2] = { high[0] - low[0], high[1] - low[1] }; + + dst_blk.m_bytes[3] = (uint8_t)((int)flip); + + if ((range[0] <= 3) && (range[1] <= 3)) + { + // This is primarily for better gradients. + dst_blk.m_bytes[0] = 0; + dst_blk.m_bytes[1] = 0; + dst_blk.m_bytes[2] = 0; + + uint16_t l_bitmask = 0, h_bitmask = 0; + + for (uint32_t subblock = 0; subblock < 2; subblock++) + { + const uint32_t encoded = (range[subblock] == 0) ? g_etc1_y_solid_block_1i_configs[low[subblock]] : ((range[subblock] < 2) ? g_etc1_y_solid_block_2i_configs[low[subblock]] : g_etc1_y_solid_block_4i_configs[low[subblock]]); + + const uint32_t table = encoded & 7; + const uint32_t base = (encoded >> 3) & 31; + assert(base <= 15); + const uint32_t sels[4] = { (encoded >> 8) & 3, (encoded >> 10) & 3, (encoded >> 12) & 3, (encoded >> 14) & 3 }; + + dst_blk.m_bytes[3] |= (uint8_t)(table << (subblock ? 2 : 5)); + + const uint32_t sv = base << (subblock ? 0 : 4); + dst_blk.m_bytes[0] |= (uint8_t)(sv); + dst_blk.m_bytes[1] |= (uint8_t)(sv); + dst_blk.m_bytes[2] |= (uint8_t)(sv); + + if (flip) + { + uint32_t ofs = subblock * 2; + for (uint32_t y = 0; y < 2; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t t = block_y[y + subblock * 2][x]; + assert(t >= low[subblock] && t <= high[subblock]); + t -= low[subblock]; + assert(t <= 3); + + t = g_selector_index_to_etc1[sels[t]]; + + assert(ofs < 16); + l_bitmask |= ((t & 1) << ofs); + h_bitmask |= ((t >> 1) << ofs); + ofs += 4; + } + + ofs = (int)ofs + 1 - 4 * 4; + } + } + else + { + uint32_t ofs = (subblock * 2) * 4; + for (uint32_t x = 0; x < 2; x++) + { + for (uint32_t y = 0; y < 4; y++) + { + uint32_t t = block_y[y][x + subblock * 2]; + assert(t >= low[subblock] && t <= high[subblock]); + t -= low[subblock]; + assert(t <= 3); + + t = g_selector_index_to_etc1[sels[t]]; + + assert(ofs < 16); + l_bitmask |= ((t & 1) << ofs); + h_bitmask |= ((t >> 1) << ofs); + ++ofs; + } + } + } + } // subblock + + dst_blk.m_bytes[7] = (uint8_t)(l_bitmask); + dst_blk.m_bytes[6] = (uint8_t)(l_bitmask >> 8); + dst_blk.m_bytes[5] = (uint8_t)(h_bitmask); + dst_blk.m_bytes[4] = (uint8_t)(h_bitmask >> 8); + + return true; + } + + uint32_t y0 = ((flip ? upper_avg : left_avg) * 31 + 127) / 255; + uint32_t y1 = ((flip ? lower_avg : right_avg) * 31 + 127) / 255; + + bool diff = true; + + int dy = y1 - y0; + + if ((dy < cETC1ColorDeltaMin) || (dy > cETC1ColorDeltaMax)) + { + diff = false; + + y0 = ((flip ? upper_avg : left_avg) * 15 + 127) / 255; + y1 = ((flip ? lower_avg : right_avg) * 15 + 127) / 255; + + dst_blk.m_bytes[0] = (uint8_t)(y1 | (y0 << 4)); + dst_blk.m_bytes[1] = (uint8_t)(y1 | (y0 << 4)); + dst_blk.m_bytes[2] = (uint8_t)(y1 | (y0 << 4)); + } + else + { + dy = basisu::clamp(dy, cETC1ColorDeltaMin, cETC1ColorDeltaMax); + + y1 = y0 + dy; + + if (dy < 0) dy += 8; + + dst_blk.m_bytes[0] = (uint8_t)((y0 << 3) | dy); + dst_blk.m_bytes[1] = (uint8_t)((y0 << 3) | dy); + dst_blk.m_bytes[2] = (uint8_t)((y0 << 3) | dy); + + dst_blk.m_bytes[3] |= 2; + } + + const uint32_t base_y[2] = { diff ? ((y0 << 3) | (y0 >> 2)) : ((y0 << 4) | y0), diff ? ((y1 << 3) | (y1 >> 2)) : ((y1 << 4) | y1) }; + + uint32_t enc_range[2]; + for (uint32_t subset = 0; subset < 2; subset++) + { + const int pos = basisu::iabs((int)high[subset] - (int)base_y[subset]); + const int neg = basisu::iabs((int)base_y[subset] - (int)low[subset]); + + enc_range[subset] = basisu::maximum(pos, neg); + } + + uint16_t l_bitmask = 0, h_bitmask = 0; + for (uint32_t subblock = 0; subblock < 2; subblock++) + { + if ((!diff) && (range[subblock] <= 3)) + { + const uint32_t encoded = (range[subblock] == 0) ? g_etc1_y_solid_block_1i_configs[low[subblock]] : ((range[subblock] < 2) ? g_etc1_y_solid_block_2i_configs[low[subblock]] : g_etc1_y_solid_block_4i_configs[low[subblock]]); + + const uint32_t table = encoded & 7; + const uint32_t base = (encoded >> 3) & 31; + assert(base <= 15); + const uint32_t sels[4] = { (encoded >> 8) & 3, (encoded >> 10) & 3, (encoded >> 12) & 3, (encoded >> 14) & 3 }; + + dst_blk.m_bytes[3] |= (uint8_t)(table << (subblock ? 2 : 5)); + + const uint32_t mask = ~(0xF << (subblock ? 0 : 4)); + + dst_blk.m_bytes[0] &= mask; + dst_blk.m_bytes[1] &= mask; + dst_blk.m_bytes[2] &= mask; + + const uint32_t sv = base << (subblock ? 0 : 4); + dst_blk.m_bytes[0] |= (uint8_t)(sv); + dst_blk.m_bytes[1] |= (uint8_t)(sv); + dst_blk.m_bytes[2] |= (uint8_t)(sv); + + if (flip) + { + uint32_t ofs = subblock * 2; + for (uint32_t y = 0; y < 2; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t t = block_y[y + subblock * 2][x]; + assert(t >= low[subblock] && t <= high[subblock]); + t -= low[subblock]; + assert(t <= 3); + + t = g_selector_index_to_etc1[sels[t]]; + + assert(ofs < 16); + l_bitmask |= ((t & 1) << ofs); + h_bitmask |= ((t >> 1) << ofs); + ofs += 4; + } + + ofs = (int)ofs + 1 - 4 * 4; + } + } + else + { + uint32_t ofs = (subblock * 2) * 4; + for (uint32_t x = 0; x < 2; x++) + { + for (uint32_t y = 0; y < 4; y++) + { + uint32_t t = block_y[y][x + subblock * 2]; + assert(t >= low[subblock] && t <= high[subblock]); + t -= low[subblock]; + assert(t <= 3); + + t = g_selector_index_to_etc1[sels[t]]; + + assert(ofs < 16); + l_bitmask |= ((t & 1) << ofs); + h_bitmask |= ((t >> 1) << ofs); + ++ofs; + } + } + } + + continue; + } // if + + uint32_t best_err = UINT32_MAX; + uint8_t best_sels[8]; + uint32_t best_inten = 0; + + const int base = base_y[subblock]; + + const int low_limit = -base; + const int high_limit = 255 - base; + + assert(low_limit <= 0 && high_limit >= 0); + + uint32_t inten_table_mask = 0xFF; + const uint32_t er = enc_range[subblock]; + // Each one of these tables is expensive to evaluate, so let's only examine the ones we know may be useful. + if (er <= 51) + { + inten_table_mask = 0xF; + + if (er > 22) + inten_table_mask &= ~(1 << 0); + + if ((er < 4) || (er > 39)) + inten_table_mask &= ~(1 << 1); + + if (er < 9) + inten_table_mask &= ~(1 << 2); + + if (er < 12) + inten_table_mask &= ~(1 << 3); + } + else + { + inten_table_mask &= ~((1 << 0) | (1 << 1)); + + if (er > 60) + inten_table_mask &= ~(1 << 2); + + if (er > 89) + inten_table_mask &= ~(1 << 3); + + if (er > 120) + inten_table_mask &= ~(1 << 4); + + if (er > 136) + inten_table_mask &= ~(1 << 5); + + if (er > 174) + inten_table_mask &= ~(1 << 6); + } + + for (uint32_t inten = 0; inten < 8; inten++) + { + if ((inten_table_mask & (1 << inten)) == 0) + continue; + + const int t0 = basisu::maximum(low_limit, g_etc1_inten_tables[inten][0]); + const int t1 = basisu::maximum(low_limit, g_etc1_inten_tables[inten][1]); + const int t2 = basisu::minimum(high_limit, g_etc1_inten_tables[inten][2]); + const int t3 = basisu::minimum(high_limit, g_etc1_inten_tables[inten][3]); + assert((t0 <= t1) && (t1 <= t2) && (t2 <= t3)); + + const int tv[4] = { t2, t3, t1, t0 }; + + const int thresh01 = t0 + t1; + const int thresh12 = t1 + t2; + const int thresh23 = t2 + t3; + + assert(thresh01 <= thresh12 && thresh12 <= thresh23); + + static const uint8_t s_table[4] = { 1, 0, 2, 3 }; + + uint32_t total_err = 0; + uint8_t sels[8]; + + if (flip) + { + if (((int)high[subblock] - base) * 2 < thresh01) + { + memset(sels, 3, 8); + + for (uint32_t y = 0; y < 2; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const int delta = (int)block_y[y + subblock * 2][x] - base; + + const uint32_t c = 3; + + uint32_t e = basisu::iabs(tv[c] - delta); + total_err += e * e; + } + if (total_err >= best_err) + break; + } + } + else if (((int)low[subblock] - base) * 2 >= thresh23) + { + memset(sels, 1, 8); + + for (uint32_t y = 0; y < 2; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const int delta = (int)block_y[y + subblock * 2][x] - base; + + const uint32_t c = 1; + + uint32_t e = basisu::iabs(tv[c] - delta); + total_err += e * e; + } + if (total_err >= best_err) + break; + } + } + else + { + for (uint32_t y = 0; y < 2; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const int delta = (int)block_y[y + subblock * 2][x] - base; + const int delta2 = delta * 2; + + uint32_t c = s_table[(delta2 < thresh01) + (delta2 < thresh12) + (delta2 < thresh23)]; + sels[y * 4 + x] = (uint8_t)c; + + uint32_t e = basisu::iabs(tv[c] - delta); + total_err += e * e; + } + if (total_err >= best_err) + break; + } + } + } + else + { + if (((int)high[subblock] - base) * 2 < thresh01) + { + memset(sels, 3, 8); + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 2; x++) + { + const int delta = (int)block_y[y][x + subblock * 2] - base; + + const uint32_t c = 3; + + uint32_t e = basisu::iabs(tv[c] - delta); + total_err += e * e; + } + if (total_err >= best_err) + break; + } + } + else if (((int)low[subblock] - base) * 2 >= thresh23) + { + memset(sels, 1, 8); + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 2; x++) + { + const int delta = (int)block_y[y][x + subblock * 2] - base; + + const uint32_t c = 1; + + uint32_t e = basisu::iabs(tv[c] - delta); + total_err += e * e; + } + if (total_err >= best_err) + break; + } + } + else + { + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 2; x++) + { + const int delta = (int)block_y[y][x + subblock * 2] - base; + const int delta2 = delta * 2; + + uint32_t c = s_table[(delta2 < thresh01) + (delta2 < thresh12) + (delta2 < thresh23)]; + sels[y * 2 + x] = (uint8_t)c; + + uint32_t e = basisu::iabs(tv[c] - delta); + total_err += e * e; + } + if (total_err >= best_err) + break; + } + } + } + + if (total_err < best_err) + { + best_err = total_err; + best_inten = inten; + memcpy(best_sels, sels, 8); + } + + } // inten + + //g_inten_hist[best_inten][enc_range[subblock]]++; + + dst_blk.m_bytes[3] |= (uint8_t)(best_inten << (subblock ? 2 : 5)); + + if (flip) + { + uint32_t ofs = subblock * 2; + for (uint32_t y = 0; y < 2; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t t = best_sels[y * 4 + x]; + + assert(ofs < 16); + l_bitmask |= ((t & 1) << ofs); + h_bitmask |= ((t >> 1) << ofs); + ofs += 4; + } + + ofs = (int)ofs + 1 - 4 * 4; + } + } + else + { + uint32_t ofs = (subblock * 2) * 4; + for (uint32_t x = 0; x < 2; x++) + { + for (uint32_t y = 0; y < 4; y++) + { + uint32_t t = best_sels[y * 2 + x]; + + assert(ofs < 16); + l_bitmask |= ((t & 1) << ofs); + h_bitmask |= ((t >> 1) << ofs); + ++ofs; + } + } + } + + } // subblock + + dst_blk.m_bytes[7] = (uint8_t)(l_bitmask); + dst_blk.m_bytes[6] = (uint8_t)(l_bitmask >> 8); + dst_blk.m_bytes[5] = (uint8_t)(h_bitmask); + dst_blk.m_bytes[4] = (uint8_t)(h_bitmask >> 8); + + return true; + } + + const uint32_t ETC2_EAC_MIN_VALUE_SELECTOR = 3, ETC2_EAC_MAX_VALUE_SELECTOR = 7; + + void transcode_uastc_to_etc2_eac_a8(unpacked_uastc_block& unpacked_src_blk, color32 block_pixels[4][4], void* pDst) + { + eac_block& dst = *static_cast(pDst); + const color32* pSrc_pixels = &block_pixels[0][0]; + + if ((!g_uastc_mode_has_alpha[unpacked_src_blk.m_mode]) || (unpacked_src_blk.m_mode == UASTC_MODE_INDEX_SOLID_COLOR)) + { + const uint32_t a = (unpacked_src_blk.m_mode == UASTC_MODE_INDEX_SOLID_COLOR) ? unpacked_src_blk.m_solid_color[3] : 255; + + dst.m_base = a; + dst.m_table = 13; + dst.m_multiplier = 1; + + memcpy(dst.m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4)); + + return; + } + + uint32_t min_a = 255, max_a = 0; + for (uint32_t i = 0; i < 16; i++) + { + min_a = basisu::minimum(min_a, pSrc_pixels[i].a); + max_a = basisu::maximum(max_a, pSrc_pixels[i].a); + } + + if (min_a == max_a) + { + dst.m_base = min_a; + dst.m_table = 13; + dst.m_multiplier = 1; + + memcpy(dst.m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4)); + return; + } + + const uint32_t table = unpacked_src_blk.m_etc2_hints & 0xF; + const int multiplier = unpacked_src_blk.m_etc2_hints >> 4; + + assert(multiplier >= 1); + + dst.m_multiplier = multiplier; + dst.m_table = table; + + const float range = (float)(g_eac_modifier_table[dst.m_table][ETC2_EAC_MAX_VALUE_SELECTOR] - g_eac_modifier_table[dst.m_table][ETC2_EAC_MIN_VALUE_SELECTOR]); + const int center = (int)roundf(basisu::lerp((float)min_a, (float)max_a, (float)(0 - g_eac_modifier_table[dst.m_table][ETC2_EAC_MIN_VALUE_SELECTOR]) / range)); + + dst.m_base = center; + + const int8_t* pTable = &g_eac_modifier_table[dst.m_table][0]; + + uint32_t vals[8]; + for (uint32_t j = 0; j < 8; j++) + vals[j] = clamp255(center + (pTable[j] * multiplier)); + + uint64_t sels = 0; + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t a = block_pixels[i & 3][i >> 2].a; + + const uint32_t err0 = (basisu::iabs(vals[0] - a) << 3) | 0; + const uint32_t err1 = (basisu::iabs(vals[1] - a) << 3) | 1; + const uint32_t err2 = (basisu::iabs(vals[2] - a) << 3) | 2; + const uint32_t err3 = (basisu::iabs(vals[3] - a) << 3) | 3; + const uint32_t err4 = (basisu::iabs(vals[4] - a) << 3) | 4; + const uint32_t err5 = (basisu::iabs(vals[5] - a) << 3) | 5; + const uint32_t err6 = (basisu::iabs(vals[6] - a) << 3) | 6; + const uint32_t err7 = (basisu::iabs(vals[7] - a) << 3) | 7; + + const uint32_t min_err = basisu::minimum(basisu::minimum(basisu::minimum(basisu::minimum(basisu::minimum(basisu::minimum(err0, err1, err2), err3), err4), err5), err6), err7); + + const uint64_t best_index = min_err & 7; + sels |= (best_index << (45 - i * 3)); + } + + dst.set_selector_bits(sels); + } + + bool transcode_uastc_to_etc2_rgba(const uastc_block& src_blk, void* pDst) + { + eac_block& dst_etc2_eac_a8_blk = *static_cast(pDst); + decoder_etc_block& dst_etc1_blk = static_cast(pDst)[1]; + + unpacked_uastc_block unpacked_src_blk; + if (!unpack_uastc(src_blk, unpacked_src_blk, false)) + return false; + + color32 block_pixels[4][4]; + if (unpacked_src_blk.m_mode != UASTC_MODE_INDEX_SOLID_COLOR) + { + const bool unpack_srgb = false; + if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb)) + return false; + } + + transcode_uastc_to_etc2_eac_a8(unpacked_src_blk, block_pixels, &dst_etc2_eac_a8_blk); + + transcode_uastc_to_etc1(unpacked_src_blk, block_pixels, &dst_etc1_blk); + + return true; + } + + static const uint8_t s_uastc5_to_bc1[32] = { 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1 }; + static const uint8_t s_uastc4_to_bc1[16] = { 0, 0, 0, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 1, 1, 1 }; + static const uint8_t s_uastc3_to_bc1[8] = { 0, 0, 2, 2, 3, 3, 1, 1 }; + static const uint8_t s_uastc2_to_bc1[4] = { 0, 2, 3, 1 }; + static const uint8_t s_uastc1_to_bc1[2] = { 0, 1 }; + const uint8_t* s_uastc_to_bc1_weights[6] = { nullptr, s_uastc1_to_bc1, s_uastc2_to_bc1, s_uastc3_to_bc1, s_uastc4_to_bc1, s_uastc5_to_bc1 }; + + void encode_bc4(void* pDst, const uint8_t* pPixels, uint32_t stride) + { + uint32_t min0_v, max0_v, min1_v, max1_v,min2_v, max2_v, min3_v, max3_v; + + { + min0_v = max0_v = pPixels[0 * stride]; + min1_v = max1_v = pPixels[1 * stride]; + min2_v = max2_v = pPixels[2 * stride]; + min3_v = max3_v = pPixels[3 * stride]; + } + + { + uint32_t v0 = pPixels[4 * stride]; min0_v = basisu::minimum(min0_v, v0); max0_v = basisu::maximum(max0_v, v0); + uint32_t v1 = pPixels[5 * stride]; min1_v = basisu::minimum(min1_v, v1); max1_v = basisu::maximum(max1_v, v1); + uint32_t v2 = pPixels[6 * stride]; min2_v = basisu::minimum(min2_v, v2); max2_v = basisu::maximum(max2_v, v2); + uint32_t v3 = pPixels[7 * stride]; min3_v = basisu::minimum(min3_v, v3); max3_v = basisu::maximum(max3_v, v3); + } + + { + uint32_t v0 = pPixels[8 * stride]; min0_v = basisu::minimum(min0_v, v0); max0_v = basisu::maximum(max0_v, v0); + uint32_t v1 = pPixels[9 * stride]; min1_v = basisu::minimum(min1_v, v1); max1_v = basisu::maximum(max1_v, v1); + uint32_t v2 = pPixels[10 * stride]; min2_v = basisu::minimum(min2_v, v2); max2_v = basisu::maximum(max2_v, v2); + uint32_t v3 = pPixels[11 * stride]; min3_v = basisu::minimum(min3_v, v3); max3_v = basisu::maximum(max3_v, v3); + } + + { + uint32_t v0 = pPixels[12 * stride]; min0_v = basisu::minimum(min0_v, v0); max0_v = basisu::maximum(max0_v, v0); + uint32_t v1 = pPixels[13 * stride]; min1_v = basisu::minimum(min1_v, v1); max1_v = basisu::maximum(max1_v, v1); + uint32_t v2 = pPixels[14 * stride]; min2_v = basisu::minimum(min2_v, v2); max2_v = basisu::maximum(max2_v, v2); + uint32_t v3 = pPixels[15 * stride]; min3_v = basisu::minimum(min3_v, v3); max3_v = basisu::maximum(max3_v, v3); + } + + const uint32_t min_v = basisu::minimum(min0_v, min1_v, min2_v, min3_v); + const uint32_t max_v = basisu::maximum(max0_v, max1_v, max2_v, max3_v); + + uint8_t* pDst_bytes = static_cast(pDst); + pDst_bytes[0] = (uint8_t)max_v; + pDst_bytes[1] = (uint8_t)min_v; + + if (max_v == min_v) + { + memset(pDst_bytes + 2, 0, 6); + return; + } + + const uint32_t delta = max_v - min_v; + + // min_v is now 0. Compute thresholds between values by scaling max_v. It's x14 because we're adding two x7 scale factors. + const int t0 = delta * 13; + const int t1 = delta * 11; + const int t2 = delta * 9; + const int t3 = delta * 7; + const int t4 = delta * 5; + const int t5 = delta * 3; + const int t6 = delta * 1; + + // BC4 floors in its divisions, which we compensate for with the 4 bias. + // This function is optimal for all possible inputs (i.e. it outputs the same results as checking all 8 values and choosing the closest one). + const int bias = 4 - min_v * 14; + + static const uint32_t s_tran0[8] = { 1U , 7U , 6U , 5U , 4U , 3U , 2U , 0U }; + static const uint32_t s_tran1[8] = { 1U << 3U, 7U << 3U, 6U << 3U, 5U << 3U, 4U << 3U, 3U << 3U, 2U << 3U, 0U << 3U }; + static const uint32_t s_tran2[8] = { 1U << 6U, 7U << 6U, 6U << 6U, 5U << 6U, 4U << 6U, 3U << 6U, 2U << 6U, 0U << 6U }; + static const uint32_t s_tran3[8] = { 1U << 9U, 7U << 9U, 6U << 9U, 5U << 9U, 4U << 9U, 3U << 9U, 2U << 9U, 0U << 9U }; + + uint64_t a0, a1, a2, a3; + { + const int v0 = pPixels[0 * stride] * 14 + bias; + const int v1 = pPixels[1 * stride] * 14 + bias; + const int v2 = pPixels[2 * stride] * 14 + bias; + const int v3 = pPixels[3 * stride] * 14 + bias; + a0 = s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]; + a1 = s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]; + a2 = s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]; + a3 = s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]; + } + + { + const int v0 = pPixels[4 * stride] * 14 + bias; + const int v1 = pPixels[5 * stride] * 14 + bias; + const int v2 = pPixels[6 * stride] * 14 + bias; + const int v3 = pPixels[7 * stride] * 14 + bias; + a0 |= (s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)] << 12U); + a1 |= (s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)] << 12U); + a2 |= (s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)] << 12U); + a3 |= (s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)] << 12U); + } + + { + const int v0 = pPixels[8 * stride] * 14 + bias; + const int v1 = pPixels[9 * stride] * 14 + bias; + const int v2 = pPixels[10 * stride] * 14 + bias; + const int v3 = pPixels[11 * stride] * 14 + bias; + a0 |= (((uint64_t)s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]) << 24U); + a1 |= (((uint64_t)s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]) << 24U); + a2 |= (((uint64_t)s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]) << 24U); + a3 |= (((uint64_t)s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]) << 24U); + } + + { + const int v0 = pPixels[12 * stride] * 14 + bias; + const int v1 = pPixels[13 * stride] * 14 + bias; + const int v2 = pPixels[14 * stride] * 14 + bias; + const int v3 = pPixels[15 * stride] * 14 + bias; + a0 |= (((uint64_t)s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]) << 36U); + a1 |= (((uint64_t)s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]) << 36U); + a2 |= (((uint64_t)s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]) << 36U); + a3 |= (((uint64_t)s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]) << 36U); + } + + const uint64_t f = a0 | a1 | a2 | a3; + + pDst_bytes[2] = (uint8_t)f; + pDst_bytes[3] = (uint8_t)(f >> 8U); + pDst_bytes[4] = (uint8_t)(f >> 16U); + pDst_bytes[5] = (uint8_t)(f >> 24U); + pDst_bytes[6] = (uint8_t)(f >> 32U); + pDst_bytes[7] = (uint8_t)(f >> 40U); + } + + static void bc1_find_sels(const color32 *pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16]) + { + uint32_t block_r[4], block_g[4], block_b[4]; + + block_r[0] = (lr << 3) | (lr >> 2); block_g[0] = (lg << 2) | (lg >> 4); block_b[0] = (lb << 3) | (lb >> 2); + block_r[3] = (hr << 3) | (hr >> 2); block_g[3] = (hg << 2) | (hg >> 4); block_b[3] = (hb << 3) | (hb >> 2); + block_r[1] = (block_r[0] * 2 + block_r[3]) / 3; block_g[1] = (block_g[0] * 2 + block_g[3]) / 3; block_b[1] = (block_b[0] * 2 + block_b[3]) / 3; + block_r[2] = (block_r[3] * 2 + block_r[0]) / 3; block_g[2] = (block_g[3] * 2 + block_g[0]) / 3; block_b[2] = (block_b[3] * 2 + block_b[0]) / 3; + + int ar = block_r[3] - block_r[0], ag = block_g[3] - block_g[0], ab = block_b[3] - block_b[0]; + + int dots[4]; + for (uint32_t i = 0; i < 4; i++) + dots[i] = (int)block_r[i] * ar + (int)block_g[i] * ag + (int)block_b[i] * ab; + + int t0 = dots[0] + dots[1], t1 = dots[1] + dots[2], t2 = dots[2] + dots[3]; + + ar *= 2; ag *= 2; ab *= 2; + + for (uint32_t i = 0; i < 16; i++) + { + const int d = pSrc_pixels[i].r * ar + pSrc_pixels[i].g * ag + pSrc_pixels[i].b * ab; + static const uint8_t s_sels[4] = { 3, 2, 1, 0 }; + + // Rounding matters here! + // d <= t0: <=, not <, to the later LS step "sees" a wider range of selectors. It matters for quality. + sels[i] = s_sels[(d <= t0) + (d < t1) + (d < t2)]; + } + } + + static inline void bc1_find_sels_2(const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16]) + { + uint32_t block_r[4], block_g[4], block_b[4]; + + block_r[0] = (lr << 3) | (lr >> 2); block_g[0] = (lg << 2) | (lg >> 4); block_b[0] = (lb << 3) | (lb >> 2); + block_r[3] = (hr << 3) | (hr >> 2); block_g[3] = (hg << 2) | (hg >> 4); block_b[3] = (hb << 3) | (hb >> 2); + block_r[1] = (block_r[0] * 2 + block_r[3]) / 3; block_g[1] = (block_g[0] * 2 + block_g[3]) / 3; block_b[1] = (block_b[0] * 2 + block_b[3]) / 3; + block_r[2] = (block_r[3] * 2 + block_r[0]) / 3; block_g[2] = (block_g[3] * 2 + block_g[0]) / 3; block_b[2] = (block_b[3] * 2 + block_b[0]) / 3; + + int ar = block_r[3] - block_r[0], ag = block_g[3] - block_g[0], ab = block_b[3] - block_b[0]; + + int dots[4]; + for (uint32_t i = 0; i < 4; i++) + dots[i] = (int)block_r[i] * ar + (int)block_g[i] * ag + (int)block_b[i] * ab; + + int t0 = dots[0] + dots[1], t1 = dots[1] + dots[2], t2 = dots[2] + dots[3]; + + ar *= 2; ag *= 2; ab *= 2; + + static const uint8_t s_sels[4] = { 3, 2, 1, 0 }; + + for (uint32_t i = 0; i < 16; i += 4) + { + const int d0 = pSrc_pixels[i+0].r * ar + pSrc_pixels[i+0].g * ag + pSrc_pixels[i+0].b * ab; + const int d1 = pSrc_pixels[i+1].r * ar + pSrc_pixels[i+1].g * ag + pSrc_pixels[i+1].b * ab; + const int d2 = pSrc_pixels[i+2].r * ar + pSrc_pixels[i+2].g * ag + pSrc_pixels[i+2].b * ab; + const int d3 = pSrc_pixels[i+3].r * ar + pSrc_pixels[i+3].g * ag + pSrc_pixels[i+3].b * ab; + + sels[i+0] = s_sels[(d0 <= t0) + (d0 < t1) + (d0 < t2)]; + sels[i+1] = s_sels[(d1 <= t0) + (d1 < t1) + (d1 < t2)]; + sels[i+2] = s_sels[(d2 <= t0) + (d2 < t1) + (d2 < t2)]; + sels[i+3] = s_sels[(d3 <= t0) + (d3 < t1) + (d3 < t2)]; + } + } + + static bool compute_least_squares_endpoints_rgb(const color32* pColors, const uint8_t* pSelectors, vec3F* pXl, vec3F* pXh) + { + // Derived from bc7enc16's LS function. + // Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf + // I did this in matrix form first, expanded out all the ops, then optimized it a bit. + uint32_t uq00_r = 0, uq10_r = 0, ut_r = 0, uq00_g = 0, uq10_g = 0, ut_g = 0, uq00_b = 0, uq10_b = 0, ut_b = 0; + + // This table is: 9 * (w * w), 9 * ((1.0f - w) * w), 9 * ((1.0f - w) * (1.0f - w)) + // where w is [0,1/3,2/3,1]. 9 is the perfect multiplier. + static const uint32_t s_weight_vals[4] = { 0x000009, 0x010204, 0x040201, 0x090000 }; + + uint32_t weight_accum = 0; + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t r = pColors[i].c[0], g = pColors[i].c[1], b = pColors[i].c[2]; + const uint32_t sel = pSelectors[i]; + ut_r += r; + ut_g += g; + ut_b += b; + weight_accum += s_weight_vals[sel]; + uq00_r += sel * r; + uq00_g += sel * g; + uq00_b += sel * b; + } + + float q00_r = (float)uq00_r, q10_r = (float)uq10_r, t_r = (float)ut_r; + float q00_g = (float)uq00_g, q10_g = (float)uq10_g, t_g = (float)ut_g; + float q00_b = (float)uq00_b, q10_b = (float)uq10_b, t_b = (float)ut_b; + + q10_r = t_r * 3.0f - q00_r; + q10_g = t_g * 3.0f - q00_g; + q10_b = t_b * 3.0f - q00_b; + + float z00 = (float)((weight_accum >> 16) & 0xFF); + float z10 = (float)((weight_accum >> 8) & 0xFF); + float z11 = (float)(weight_accum & 0xFF); + float z01 = z10; + + float det = z00 * z11 - z01 * z10; + if (fabs(det) < 1e-8f) + return false; + + det = 3.0f / det; + + float iz00, iz01, iz10, iz11; + iz00 = z11 * det; + iz01 = -z01 * det; + iz10 = -z10 * det; + iz11 = z00 * det; + + pXl->c[0] = iz00 * q00_r + iz01 * q10_r; pXh->c[0] = iz10 * q00_r + iz11 * q10_r; + pXl->c[1] = iz00 * q00_g + iz01 * q10_g; pXh->c[1] = iz10 * q00_g + iz11 * q10_g; + pXl->c[2] = iz00 * q00_b + iz01 * q10_b; pXh->c[2] = iz10 * q00_b + iz11 * q10_b; + + // Check and fix channel singularities - might not be needed, but is in UASTC's encoder. + for (uint32_t c = 0; c < 3; c++) + { + if ((pXl->c[c] < 0.0f) || (pXh->c[c] > 255.0f)) + { + uint32_t lo_v = UINT32_MAX, hi_v = 0; + for (uint32_t i = 0; i < 16; i++) + { + lo_v = basisu::minimumu(lo_v, pColors[i].c[c]); + hi_v = basisu::maximumu(hi_v, pColors[i].c[c]); + } + + if (lo_v == hi_v) + { + pXl->c[c] = (float)lo_v; + pXh->c[c] = (float)hi_v; + } + } + } + + return true; + } + + void encode_bc1_solid_block(void* pDst, uint32_t fr, uint32_t fg, uint32_t fb) + { + dxt1_block* pDst_block = static_cast(pDst); + + uint32_t mask = 0xAA; + uint32_t max16 = (g_bc1_match5_equals_1[fr].m_hi << 11) | (g_bc1_match6_equals_1[fg].m_hi << 5) | g_bc1_match5_equals_1[fb].m_hi; + uint32_t min16 = (g_bc1_match5_equals_1[fr].m_lo << 11) | (g_bc1_match6_equals_1[fg].m_lo << 5) | g_bc1_match5_equals_1[fb].m_lo; + + if (min16 == max16) + { + // Always forbid 3 color blocks + // This is to guarantee that BC3 blocks never use punchthrough alpha (3 color) mode, which isn't supported on some (all?) GPU's. + mask = 0; + + // Make l > h + if (min16 > 0) + min16--; + else + { + // l = h = 0 + assert(min16 == max16 && max16 == 0); + + max16 = 1; + min16 = 0; + mask = 0x55; + } + + assert(max16 > min16); + } + + if (max16 < min16) + { + std::swap(max16, min16); + mask ^= 0x55; + } + + pDst_block->set_low_color(static_cast(max16)); + pDst_block->set_high_color(static_cast(min16)); + pDst_block->m_selectors[0] = static_cast(mask); + pDst_block->m_selectors[1] = static_cast(mask); + pDst_block->m_selectors[2] = static_cast(mask); + pDst_block->m_selectors[3] = static_cast(mask); + } + + static inline uint8_t to_5(uint32_t v) { v = v * 31 + 128; return (uint8_t)((v + (v >> 8)) >> 8); } + static inline uint8_t to_6(uint32_t v) { v = v * 63 + 128; return (uint8_t)((v + (v >> 8)) >> 8); } + + // Good references: squish library, stb_dxt. + void encode_bc1(void* pDst, const uint8_t* pPixels, uint32_t flags) + { + const color32* pSrc_pixels = (const color32*)pPixels; + dxt1_block* pDst_block = static_cast(pDst); + + int avg_r = -1, avg_g = 0, avg_b = 0; + int lr = 0, lg = 0, lb = 0, hr = 0, hg = 0, hb = 0; + uint8_t sels[16]; + + const bool use_sels = (flags & cEncodeBC1UseSelectors) != 0; + if (use_sels) + { + // Caller is jamming in their own selectors for us to try. + const uint32_t s = pDst_block->m_selectors[0] | (pDst_block->m_selectors[1] << 8) | (pDst_block->m_selectors[2] << 16) | (pDst_block->m_selectors[3] << 24); + + static const uint8_t s_sel_tran[4] = { 0, 3, 1, 2 }; + + for (uint32_t i = 0; i < 16; i++) + sels[i] = s_sel_tran[(s >> (i * 2)) & 3]; + } + else + { + const uint32_t fr = pSrc_pixels[0].r, fg = pSrc_pixels[0].g, fb = pSrc_pixels[0].b; + + uint32_t j; + for (j = 1; j < 16; j++) + if ((pSrc_pixels[j].r != fr) || (pSrc_pixels[j].g != fg) || (pSrc_pixels[j].b != fb)) + break; + + if (j == 16) + { + encode_bc1_solid_block(pDst, fr, fg, fb); + return; + } + + // Select 2 colors along the principle axis. (There must be a faster/simpler way.) + int total_r = fr, total_g = fg, total_b = fb; + int max_r = fr, max_g = fg, max_b = fb; + int min_r = fr, min_g = fg, min_b = fb; + for (uint32_t i = 1; i < 16; i++) + { + const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; + max_r = basisu::maximum(max_r, r); max_g = basisu::maximum(max_g, g); max_b = basisu::maximum(max_b, b); + min_r = basisu::minimum(min_r, r); min_g = basisu::minimum(min_g, g); min_b = basisu::minimum(min_b, b); + total_r += r; total_g += g; total_b += b; + } + + avg_r = (total_r + 8) >> 4; + avg_g = (total_g + 8) >> 4; + avg_b = (total_b + 8) >> 4; + + int icov[6] = { 0, 0, 0, 0, 0, 0 }; + for (uint32_t i = 0; i < 16; i++) + { + int r = (int)pSrc_pixels[i].r - avg_r; + int g = (int)pSrc_pixels[i].g - avg_g; + int b = (int)pSrc_pixels[i].b - avg_b; + icov[0] += r * r; + icov[1] += r * g; + icov[2] += r * b; + icov[3] += g * g; + icov[4] += g * b; + icov[5] += b * b; + } + + float cov[6]; + for (uint32_t i = 0; i < 6; i++) + cov[i] = static_cast(icov[i])* (1.0f / 255.0f); + +#if 0 + // Seems silly to use full PCA to choose 2 colors. The diff in avg. PSNR between using PCA vs. not is small (~.025 difference). + // TODO: Try 2 or 3 different normalized diagonal vectors, choose the one that results in the largest dot delta + int saxis_r = max_r - min_r; + int saxis_g = max_g - min_g; + int saxis_b = max_b - min_b; +#else + float xr = (float)(max_r - min_r); + float xg = (float)(max_g - min_g); + float xb = (float)(max_b - min_b); + //float xr = (float)(max_r - avg_r); // max-avg is nearly the same, and doesn't require computing min's + //float xg = (float)(max_g - avg_g); + //float xb = (float)(max_b - avg_b); + for (uint32_t power_iter = 0; power_iter < 4; power_iter++) + { + float r = xr * cov[0] + xg * cov[1] + xb * cov[2]; + float g = xr * cov[1] + xg * cov[3] + xb * cov[4]; + float b = xr * cov[2] + xg * cov[4] + xb * cov[5]; + xr = r; xg = g; xb = b; + } + + float k = basisu::maximum(fabsf(xr), fabsf(xg), fabsf(xb)); + int saxis_r = 306, saxis_g = 601, saxis_b = 117; + if (k >= 2) + { + float m = 1024.0f / k; + saxis_r = (int)(xr * m); + saxis_g = (int)(xg * m); + saxis_b = (int)(xb * m); + } +#endif + + int low_dot = INT_MAX, high_dot = INT_MIN, low_c = 0, high_c = 0; + for (uint32_t i = 0; i < 16; i++) + { + int dot = pSrc_pixels[i].r * saxis_r + pSrc_pixels[i].g * saxis_g + pSrc_pixels[i].b * saxis_b; + if (dot < low_dot) + { + low_dot = dot; + low_c = i; + } + if (dot > high_dot) + { + high_dot = dot; + high_c = i; + } + } + + lr = to_5(pSrc_pixels[low_c].r); + lg = to_6(pSrc_pixels[low_c].g); + lb = to_5(pSrc_pixels[low_c].b); + + hr = to_5(pSrc_pixels[high_c].r); + hg = to_6(pSrc_pixels[high_c].g); + hb = to_5(pSrc_pixels[high_c].b); + + bc1_find_sels(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels); + } // if (use_sels) + + const uint32_t total_ls_passes = (flags & cEncodeBC1HigherQuality) ? 3 : (flags & cEncodeBC1HighQuality ? 2 : 1); + for (uint32_t ls_pass = 0; ls_pass < total_ls_passes; ls_pass++) + { + // This is where the real magic happens. We have an array of candidate selectors, so let's use least squares to compute the optimal low/high endpoint colors. + vec3F xl, xh; + if (!compute_least_squares_endpoints_rgb(pSrc_pixels, sels, &xl, &xh)) + { + if (avg_r < 0) + { + int total_r = 0, total_g = 0, total_b = 0; + for (uint32_t i = 0; i < 16; i++) + { + total_r += pSrc_pixels[i].r; + total_g += pSrc_pixels[i].g; + total_b += pSrc_pixels[i].b; + } + + avg_r = (total_r + 8) >> 4; + avg_g = (total_g + 8) >> 4; + avg_b = (total_b + 8) >> 4; + } + + // All selectors equal - treat it as a solid block which should always be equal or better. + lr = g_bc1_match5_equals_1[avg_r].m_hi; + lg = g_bc1_match6_equals_1[avg_g].m_hi; + lb = g_bc1_match5_equals_1[avg_b].m_hi; + + hr = g_bc1_match5_equals_1[avg_r].m_lo; + hg = g_bc1_match6_equals_1[avg_g].m_lo; + hb = g_bc1_match5_equals_1[avg_b].m_lo; + + // In high/higher quality mode, let it try again in case the optimal tables have caused the sels to diverge. + } + else + { + lr = basisu::clamp((int)((xl.c[0]) * (31.0f / 255.0f) + .5f), 0, 31); + lg = basisu::clamp((int)((xl.c[1]) * (63.0f / 255.0f) + .5f), 0, 63); + lb = basisu::clamp((int)((xl.c[2]) * (31.0f / 255.0f) + .5f), 0, 31); + + hr = basisu::clamp((int)((xh.c[0]) * (31.0f / 255.0f) + .5f), 0, 31); + hg = basisu::clamp((int)((xh.c[1]) * (63.0f / 255.0f) + .5f), 0, 63); + hb = basisu::clamp((int)((xh.c[2]) * (31.0f / 255.0f) + .5f), 0, 31); + } + + bc1_find_sels(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels); + } + + uint32_t lc16 = dxt1_block::pack_unscaled_color(lr, lg, lb); + uint32_t hc16 = dxt1_block::pack_unscaled_color(hr, hg, hb); + + // Always forbid 3 color blocks + if (lc16 == hc16) + { + uint8_t mask = 0; + + // Make l > h + if (hc16 > 0) + hc16--; + else + { + // lc16 = hc16 = 0 + assert(lc16 == hc16 && hc16 == 0); + + hc16 = 0; + lc16 = 1; + mask = 0x55; // select hc16 + } + + assert(lc16 > hc16); + pDst_block->set_low_color(static_cast(lc16)); + pDst_block->set_high_color(static_cast(hc16)); + + pDst_block->m_selectors[0] = mask; + pDst_block->m_selectors[1] = mask; + pDst_block->m_selectors[2] = mask; + pDst_block->m_selectors[3] = mask; + } + else + { + uint8_t invert_mask = 0; + if (lc16 < hc16) + { + std::swap(lc16, hc16); + invert_mask = 0x55; + } + + assert(lc16 > hc16); + pDst_block->set_low_color((uint16_t)lc16); + pDst_block->set_high_color((uint16_t)hc16); + + uint32_t packed_sels = 0; + static const uint8_t s_sel_trans[4] = { 0, 2, 3, 1 }; + for (uint32_t i = 0; i < 16; i++) + packed_sels |= ((uint32_t)s_sel_trans[sels[i]] << (i * 2)); + + pDst_block->m_selectors[0] = (uint8_t)packed_sels ^ invert_mask; + pDst_block->m_selectors[1] = (uint8_t)(packed_sels >> 8) ^ invert_mask; + pDst_block->m_selectors[2] = (uint8_t)(packed_sels >> 16) ^ invert_mask; + pDst_block->m_selectors[3] = (uint8_t)(packed_sels >> 24) ^ invert_mask; + } + } + + void encode_bc1_alt(void* pDst, const uint8_t* pPixels, uint32_t flags) + { + const color32* pSrc_pixels = (const color32*)pPixels; + dxt1_block* pDst_block = static_cast(pDst); + + int avg_r = -1, avg_g = 0, avg_b = 0; + int lr = 0, lg = 0, lb = 0, hr = 0, hg = 0, hb = 0; + uint8_t sels[16]; + + const bool use_sels = (flags & cEncodeBC1UseSelectors) != 0; + if (use_sels) + { + // Caller is jamming in their own selectors for us to try. + const uint32_t s = pDst_block->m_selectors[0] | (pDst_block->m_selectors[1] << 8) | (pDst_block->m_selectors[2] << 16) | (pDst_block->m_selectors[3] << 24); + + static const uint8_t s_sel_tran[4] = { 0, 3, 1, 2 }; + + for (uint32_t i = 0; i < 16; i++) + sels[i] = s_sel_tran[(s >> (i * 2)) & 3]; + } + else + { + const uint32_t fr = pSrc_pixels[0].r, fg = pSrc_pixels[0].g, fb = pSrc_pixels[0].b; + + uint32_t j; + for (j = 1; j < 16; j++) + if ((pSrc_pixels[j].r != fr) || (pSrc_pixels[j].g != fg) || (pSrc_pixels[j].b != fb)) + break; + + if (j == 16) + { + encode_bc1_solid_block(pDst, fr, fg, fb); + return; + } + + // Select 2 colors along the principle axis. (There must be a faster/simpler way.) + int total_r = fr, total_g = fg, total_b = fb; + int max_r = fr, max_g = fg, max_b = fb; + int min_r = fr, min_g = fg, min_b = fb; + uint32_t grayscale_flag = (fr == fg) && (fr == fb); + for (uint32_t i = 1; i < 16; i++) + { + const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; + grayscale_flag &= ((r == g) && (r == b)); + max_r = basisu::maximum(max_r, r); max_g = basisu::maximum(max_g, g); max_b = basisu::maximum(max_b, b); + min_r = basisu::minimum(min_r, r); min_g = basisu::minimum(min_g, g); min_b = basisu::minimum(min_b, b); + total_r += r; total_g += g; total_b += b; + } + + if (grayscale_flag) + { + // Grayscale blocks are a common enough case to specialize. + if ((max_r - min_r) < 2) + { + lr = lb = hr = hb = to_5(fr); + lg = hg = to_6(fr); + } + else + { + lr = lb = to_5(min_r); + lg = to_6(min_r); + + hr = hb = to_5(max_r); + hg = to_6(max_r); + } + } + else + { + avg_r = (total_r + 8) >> 4; + avg_g = (total_g + 8) >> 4; + avg_b = (total_b + 8) >> 4; + + // Find the shortest vector from a AABB corner to the block's average color. + // This is to help avoid outliers. + + uint32_t dist[3][2]; + dist[0][0] = basisu::square(min_r - avg_r) << 3; dist[0][1] = basisu::square(max_r - avg_r) << 3; + dist[1][0] = basisu::square(min_g - avg_g) << 3; dist[1][1] = basisu::square(max_g - avg_g) << 3; + dist[2][0] = basisu::square(min_b - avg_b) << 3; dist[2][1] = basisu::square(max_b - avg_b) << 3; + + uint32_t min_d0 = (dist[0][0] + dist[1][0] + dist[2][0]); + uint32_t d4 = (dist[0][0] + dist[1][0] + dist[2][1]) | 4; + min_d0 = basisu::minimum(min_d0, d4); + + uint32_t min_d1 = (dist[0][1] + dist[1][0] + dist[2][0]) | 1; + uint32_t d5 = (dist[0][1] + dist[1][0] + dist[2][1]) | 5; + min_d1 = basisu::minimum(min_d1, d5); + + uint32_t d2 = (dist[0][0] + dist[1][1] + dist[2][0]) | 2; + min_d0 = basisu::minimum(min_d0, d2); + + uint32_t d3 = (dist[0][1] + dist[1][1] + dist[2][0]) | 3; + min_d1 = basisu::minimum(min_d1, d3); + + uint32_t d6 = (dist[0][0] + dist[1][1] + dist[2][1]) | 6; + min_d0 = basisu::minimum(min_d0, d6); + + uint32_t d7 = (dist[0][1] + dist[1][1] + dist[2][1]) | 7; + min_d1 = basisu::minimum(min_d1, d7); + + uint32_t min_d = basisu::minimum(min_d0, min_d1); + uint32_t best_i = min_d & 7; + + int delta_r = (best_i & 1) ? (max_r - avg_r) : (avg_r - min_r); + int delta_g = (best_i & 2) ? (max_g - avg_g) : (avg_g - min_g); + int delta_b = (best_i & 4) ? (max_b - avg_b) : (avg_b - min_b); + + // Note: if delta_r/g/b==0, we actually want to choose a single color, so the block average color optimization kicks in. + uint32_t low_c = 0, high_c = 0; + if ((delta_r | delta_g | delta_b) != 0) + { + // Now we have a smaller AABB going from the block's average color to a cornerpoint of the larger AABB. + // Project all pixels colors along the 4 vectors going from a smaller AABB cornerpoint to the opposite cornerpoint, find largest projection. + // One of these vectors will be a decent approximation of the block's PCA. + const int saxis0_r = delta_r, saxis0_g = delta_g, saxis0_b = delta_b; + + int low_dot0 = INT_MAX, high_dot0 = INT_MIN; + int low_dot1 = INT_MAX, high_dot1 = INT_MIN; + int low_dot2 = INT_MAX, high_dot2 = INT_MIN; + int low_dot3 = INT_MAX, high_dot3 = INT_MIN; + + //int low_c0, low_c1, low_c2, low_c3; + //int high_c0, high_c1, high_c2, high_c3; + + for (uint32_t i = 0; i < 16; i++) + { + const int dotx = pSrc_pixels[i].r * saxis0_r; + const int doty = pSrc_pixels[i].g * saxis0_g; + const int dotz = pSrc_pixels[i].b * saxis0_b; + + const int dot0 = ((dotz + dotx + doty) << 4) + i; + const int dot1 = ((dotz - dotx - doty) << 4) + i; + const int dot2 = ((dotz - dotx + doty) << 4) + i; + const int dot3 = ((dotz + dotx - doty) << 4) + i; + + if (dot0 < low_dot0) + { + low_dot0 = dot0; + //low_c0 = i; + } + if ((dot0 ^ 15) > high_dot0) + { + high_dot0 = dot0 ^ 15; + //high_c0 = i; + } + + if (dot1 < low_dot1) + { + low_dot1 = dot1; + //low_c1 = i; + } + if ((dot1 ^ 15) > high_dot1) + { + high_dot1 = dot1 ^ 15; + //high_c1 = i; + } + + if (dot2 < low_dot2) + { + low_dot2 = dot2; + //low_c2 = i; + } + if ((dot2 ^ 15) > high_dot2) + { + high_dot2 = dot2 ^ 15; + //high_c2 = i; + } + + if (dot3 < low_dot3) + { + low_dot3 = dot3; + //low_c3 = i; + } + if ((dot3 ^ 15) > high_dot3) + { + high_dot3 = dot3 ^ 15; + //high_c3 = i; + } + } + + low_c = low_dot0 & 15; + high_c = ~high_dot0 & 15; + uint32_t r = (high_dot0 & ~15) - (low_dot0 & ~15); + + uint32_t tr = (high_dot1 & ~15) - (low_dot1 & ~15); + if (tr > r) { + low_c = low_dot1 & 15; + high_c = ~high_dot1 & 15; + r = tr; + } + + tr = (high_dot2 & ~15) - (low_dot2 & ~15); + if (tr > r) { + low_c = low_dot2 & 15; + high_c = ~high_dot2 & 15; + r = tr; + } + + tr = (high_dot3 & ~15) - (low_dot3 & ~15); + if (tr > r) { + low_c = low_dot3 & 15; + high_c = ~high_dot3 & 15; + } + } + + lr = to_5(pSrc_pixels[low_c].r); + lg = to_6(pSrc_pixels[low_c].g); + lb = to_5(pSrc_pixels[low_c].b); + + hr = to_5(pSrc_pixels[high_c].r); + hg = to_6(pSrc_pixels[high_c].g); + hb = to_5(pSrc_pixels[high_c].b); + } + + bc1_find_sels_2(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels); + } // if (use_sels) + + const uint32_t total_ls_passes = (flags & cEncodeBC1HigherQuality) ? 3 : (flags & cEncodeBC1HighQuality ? 2 : 1); + for (uint32_t ls_pass = 0; ls_pass < total_ls_passes; ls_pass++) + { + int prev_lr = lr, prev_lg = lg, prev_lb = lb, prev_hr = hr, prev_hg = hg, prev_hb = hb; + + // This is where the real magic happens. We have an array of candidate selectors, so let's use least squares to compute the optimal low/high endpoint colors. + vec3F xl, xh; + if (!compute_least_squares_endpoints_rgb(pSrc_pixels, sels, &xl, &xh)) + { + if (avg_r < 0) + { + int total_r = 0, total_g = 0, total_b = 0; + for (uint32_t i = 0; i < 16; i++) + { + total_r += pSrc_pixels[i].r; + total_g += pSrc_pixels[i].g; + total_b += pSrc_pixels[i].b; + } + + avg_r = (total_r + 8) >> 4; + avg_g = (total_g + 8) >> 4; + avg_b = (total_b + 8) >> 4; + } + + // All selectors equal - treat it as a solid block which should always be equal or better. + lr = g_bc1_match5_equals_1[avg_r].m_hi; + lg = g_bc1_match6_equals_1[avg_g].m_hi; + lb = g_bc1_match5_equals_1[avg_b].m_hi; + + hr = g_bc1_match5_equals_1[avg_r].m_lo; + hg = g_bc1_match6_equals_1[avg_g].m_lo; + hb = g_bc1_match5_equals_1[avg_b].m_lo; + + // In high/higher quality mode, let it try again in case the optimal tables have caused the sels to diverge. + } + else + { + lr = basisu::clamp((int)((xl.c[0]) * (31.0f / 255.0f) + .5f), 0, 31); + lg = basisu::clamp((int)((xl.c[1]) * (63.0f / 255.0f) + .5f), 0, 63); + lb = basisu::clamp((int)((xl.c[2]) * (31.0f / 255.0f) + .5f), 0, 31); + + hr = basisu::clamp((int)((xh.c[0]) * (31.0f / 255.0f) + .5f), 0, 31); + hg = basisu::clamp((int)((xh.c[1]) * (63.0f / 255.0f) + .5f), 0, 63); + hb = basisu::clamp((int)((xh.c[2]) * (31.0f / 255.0f) + .5f), 0, 31); + } + + if ((prev_lr == lr) && (prev_lg == lg) && (prev_lb == lb) && (prev_hr == hr) && (prev_hg == hg) && (prev_hb == hb)) + break; + + bc1_find_sels_2(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels); + } + + uint32_t lc16 = dxt1_block::pack_unscaled_color(lr, lg, lb); + uint32_t hc16 = dxt1_block::pack_unscaled_color(hr, hg, hb); + + // Always forbid 3 color blocks + if (lc16 == hc16) + { + uint8_t mask = 0; + + // Make l > h + if (hc16 > 0) + hc16--; + else + { + // lc16 = hc16 = 0 + assert(lc16 == hc16 && hc16 == 0); + + hc16 = 0; + lc16 = 1; + mask = 0x55; // select hc16 + } + + assert(lc16 > hc16); + pDst_block->set_low_color(static_cast(lc16)); + pDst_block->set_high_color(static_cast(hc16)); + + pDst_block->m_selectors[0] = mask; + pDst_block->m_selectors[1] = mask; + pDst_block->m_selectors[2] = mask; + pDst_block->m_selectors[3] = mask; + } + else + { + uint8_t invert_mask = 0; + if (lc16 < hc16) + { + std::swap(lc16, hc16); + invert_mask = 0x55; + } + + assert(lc16 > hc16); + pDst_block->set_low_color((uint16_t)lc16); + pDst_block->set_high_color((uint16_t)hc16); + + uint32_t packed_sels = 0; + static const uint8_t s_sel_trans[4] = { 0, 2, 3, 1 }; + for (uint32_t i = 0; i < 16; i++) + packed_sels |= ((uint32_t)s_sel_trans[sels[i]] << (i * 2)); + + pDst_block->m_selectors[0] = (uint8_t)packed_sels ^ invert_mask; + pDst_block->m_selectors[1] = (uint8_t)(packed_sels >> 8) ^ invert_mask; + pDst_block->m_selectors[2] = (uint8_t)(packed_sels >> 16) ^ invert_mask; + pDst_block->m_selectors[3] = (uint8_t)(packed_sels >> 24) ^ invert_mask; + } + } + + // Scale the UASTC first subset endpoints and first plane's weight indices directly to BC1's - fastest. + void transcode_uastc_to_bc1_hint0(const unpacked_uastc_block& unpacked_src_blk, void* pDst) + { + const uint32_t mode = unpacked_src_blk.m_mode; + const astc_block_desc& astc_blk = unpacked_src_blk.m_astc; + + dxt1_block& b = *static_cast(pDst); + + const uint32_t endpoint_range = g_uastc_mode_endpoint_ranges[mode]; + + const uint32_t total_comps = g_uastc_mode_comps[mode]; + + if (total_comps == 2) + { + const uint32_t l = g_astc_unquant[endpoint_range][astc_blk.m_endpoints[0]].m_unquant; + const uint32_t h = g_astc_unquant[endpoint_range][astc_blk.m_endpoints[1]].m_unquant; + + b.set_low_color(dxt1_block::pack_color(color32(l, l, l, 255), true, 127)); + b.set_high_color(dxt1_block::pack_color(color32(h, h, h, 255), true, 127)); + } + else + { + b.set_low_color(dxt1_block::pack_color( + color32(g_astc_unquant[endpoint_range][astc_blk.m_endpoints[0]].m_unquant, + g_astc_unquant[endpoint_range][astc_blk.m_endpoints[2]].m_unquant, + g_astc_unquant[endpoint_range][astc_blk.m_endpoints[4]].m_unquant, + 255), true, 127) + ); + + b.set_high_color(dxt1_block::pack_color( + color32(g_astc_unquant[endpoint_range][astc_blk.m_endpoints[1]].m_unquant, + g_astc_unquant[endpoint_range][astc_blk.m_endpoints[3]].m_unquant, + g_astc_unquant[endpoint_range][astc_blk.m_endpoints[5]].m_unquant, + 255), true, 127) + ); + } + + if (b.get_low_color() == b.get_high_color()) + { + // Always forbid 3 color blocks + uint16_t lc16 = (uint16_t)b.get_low_color(); + uint16_t hc16 = (uint16_t)b.get_high_color(); + + uint8_t mask = 0; + + // Make l > h + if (hc16 > 0) + hc16--; + else + { + // lc16 = hc16 = 0 + assert(lc16 == hc16 && hc16 == 0); + + hc16 = 0; + lc16 = 1; + mask = 0x55; // select hc16 + } + + assert(lc16 > hc16); + b.set_low_color(static_cast(lc16)); + b.set_high_color(static_cast(hc16)); + + b.m_selectors[0] = mask; + b.m_selectors[1] = mask; + b.m_selectors[2] = mask; + b.m_selectors[3] = mask; + } + else + { + bool invert = false; + if (b.get_low_color() < b.get_high_color()) + { + std::swap(b.m_low_color[0], b.m_high_color[0]); + std::swap(b.m_low_color[1], b.m_high_color[1]); + invert = true; + } + + const uint8_t* pTran = s_uastc_to_bc1_weights[g_uastc_mode_weight_bits[mode]]; + + const uint32_t plane_shift = g_uastc_mode_planes[mode] - 1; + + uint32_t sels = 0; + for (int i = 15; i >= 0; --i) + { + uint32_t s = pTran[astc_blk.m_weights[i << plane_shift]]; + + if (invert) + s ^= 1; + + sels = (sels << 2) | s; + } + b.m_selectors[0] = sels & 0xFF; + b.m_selectors[1] = (sels >> 8) & 0xFF; + b.m_selectors[2] = (sels >> 16) & 0xFF; + b.m_selectors[3] = (sels >> 24) & 0xFF; + } + } + + // Scale the UASTC first plane's weight indices to BC1, use 1 or 2 least squares passes to compute endpoints - no PCA needed. + void transcode_uastc_to_bc1_hint1(const unpacked_uastc_block& unpacked_src_blk, const color32 block_pixels[4][4], void* pDst, bool high_quality) + { + const uint32_t mode = unpacked_src_blk.m_mode; + + const astc_block_desc& astc_blk = unpacked_src_blk.m_astc; + + dxt1_block& b = *static_cast(pDst); + + b.set_low_color(1); + b.set_high_color(0); + + const uint8_t* pTran = s_uastc_to_bc1_weights[g_uastc_mode_weight_bits[mode]]; + + const uint32_t plane_shift = g_uastc_mode_planes[mode] - 1; + + uint32_t sels = 0; + for (int i = 15; i >= 0; --i) + { + sels <<= 2; + sels |= pTran[astc_blk.m_weights[i << plane_shift]]; + } + + b.m_selectors[0] = sels & 0xFF; + b.m_selectors[1] = (sels >> 8) & 0xFF; + b.m_selectors[2] = (sels >> 16) & 0xFF; + b.m_selectors[3] = (sels >> 24) & 0xFF; + + encode_bc1(&b, (const uint8_t*)&block_pixels[0][0].c[0], (high_quality ? cEncodeBC1HighQuality : 0) | cEncodeBC1UseSelectors); + } + + bool transcode_uastc_to_bc1(const uastc_block& src_blk, void* pDst, bool high_quality) + { + unpacked_uastc_block unpacked_src_blk; + if (!unpack_uastc(src_blk, unpacked_src_blk, false)) + return false; + + const uint32_t mode = unpacked_src_blk.m_mode; + + if (mode == UASTC_MODE_INDEX_SOLID_COLOR) + { + encode_bc1_solid_block(pDst, unpacked_src_blk.m_solid_color.r, unpacked_src_blk.m_solid_color.g, unpacked_src_blk.m_solid_color.b); + return true; + } + + if ((!high_quality) && (unpacked_src_blk.m_bc1_hint0)) + transcode_uastc_to_bc1_hint0(unpacked_src_blk, pDst); + else + { + color32 block_pixels[4][4]; + const bool unpack_srgb = false; + if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb)) + return false; + + if (unpacked_src_blk.m_bc1_hint1) + transcode_uastc_to_bc1_hint1(unpacked_src_blk, block_pixels, pDst, high_quality); + else + encode_bc1(pDst, &block_pixels[0][0].r, high_quality ? cEncodeBC1HighQuality : 0); + } + + return true; + } + + static void write_bc4_solid_block(uint8_t* pDst, uint32_t a) + { + pDst[0] = (uint8_t)a; + pDst[1] = (uint8_t)a; + memset(pDst + 2, 0, 6); + } + + bool transcode_uastc_to_bc3(const uastc_block& src_blk, void* pDst, bool high_quality) + { + unpacked_uastc_block unpacked_src_blk; + if (!unpack_uastc(src_blk, unpacked_src_blk, false)) + return false; + + const uint32_t mode = unpacked_src_blk.m_mode; + + void* pBC4_block = pDst; + dxt1_block* pBC1_block = &static_cast(pDst)[1]; + + if (mode == UASTC_MODE_INDEX_SOLID_COLOR) + { + write_bc4_solid_block(static_cast(pBC4_block), unpacked_src_blk.m_solid_color.a); + encode_bc1_solid_block(pBC1_block, unpacked_src_blk.m_solid_color.r, unpacked_src_blk.m_solid_color.g, unpacked_src_blk.m_solid_color.b); + return true; + } + + color32 block_pixels[4][4]; + const bool unpack_srgb = false; + if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb)) + return false; + + basist::encode_bc4(pBC4_block, &block_pixels[0][0].a, sizeof(color32)); + + if ((!high_quality) && (unpacked_src_blk.m_bc1_hint0)) + transcode_uastc_to_bc1_hint0(unpacked_src_blk, pBC1_block); + else + { + if (unpacked_src_blk.m_bc1_hint1) + transcode_uastc_to_bc1_hint1(unpacked_src_blk, block_pixels, pBC1_block, high_quality); + else + encode_bc1(pBC1_block, &block_pixels[0][0].r, high_quality ? cEncodeBC1HighQuality : 0); + } + + return true; + } + + bool transcode_uastc_to_bc4(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0) + { + BASISU_NOTE_UNUSED(high_quality); + + unpacked_uastc_block unpacked_src_blk; + if (!unpack_uastc(src_blk, unpacked_src_blk, false)) + return false; + + const uint32_t mode = unpacked_src_blk.m_mode; + + void* pBC4_block = pDst; + + if (mode == UASTC_MODE_INDEX_SOLID_COLOR) + { + write_bc4_solid_block(static_cast(pBC4_block), unpacked_src_blk.m_solid_color.c[chan0]); + return true; + } + + color32 block_pixels[4][4]; + const bool unpack_srgb = false; + if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb)) + return false; + + basist::encode_bc4(pBC4_block, &block_pixels[0][0].c[chan0], sizeof(color32)); + + return true; + } + + bool transcode_uastc_to_bc5(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0, uint32_t chan1) + { + BASISU_NOTE_UNUSED(high_quality); + + unpacked_uastc_block unpacked_src_blk; + if (!unpack_uastc(src_blk, unpacked_src_blk, false)) + return false; + + const uint32_t mode = unpacked_src_blk.m_mode; + + void* pBC4_block0 = pDst; + void* pBC4_block1 = (uint8_t*)pDst + 8; + + if (mode == UASTC_MODE_INDEX_SOLID_COLOR) + { + write_bc4_solid_block(static_cast(pBC4_block0), unpacked_src_blk.m_solid_color.c[chan0]); + write_bc4_solid_block(static_cast(pBC4_block1), unpacked_src_blk.m_solid_color.c[chan1]); + return true; + } + + color32 block_pixels[4][4]; + const bool unpack_srgb = false; + if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb)) + return false; + + basist::encode_bc4(pBC4_block0, &block_pixels[0][0].c[chan0], sizeof(color32)); + basist::encode_bc4(pBC4_block1, &block_pixels[0][0].c[chan1], sizeof(color32)); + + return true; + } + + static const uint8_t s_etc2_eac_bit_ofs[16] = { 45, 33, 21, 9, 42, 30, 18, 6, 39, 27, 15, 3, 36, 24, 12, 0 }; + + static void pack_eac_solid_block(eac_block& blk, uint32_t a) + { + blk.m_base = static_cast(a); + blk.m_table = 13; + blk.m_multiplier = 0; + + memcpy(blk.m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4)); + + return; + } + + // Only checks 4 tables. + static void pack_eac(eac_block& blk, const uint8_t* pPixels, uint32_t stride) + { + uint32_t min_alpha = 255, max_alpha = 0; + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t a = pPixels[i * stride]; + if (a < min_alpha) min_alpha = a; + if (a > max_alpha) max_alpha = a; + } + + if (min_alpha == max_alpha) + { + pack_eac_solid_block(blk, min_alpha); + return; + } + + const uint32_t alpha_range = max_alpha - min_alpha; + + const uint32_t SINGLE_TABLE_THRESH = 5; + if (alpha_range <= SINGLE_TABLE_THRESH) + { + // If alpha_range <= 5 table 13 is lossless + int base = clamp255((int)max_alpha - 2); + + blk.m_base = base; + blk.m_multiplier = 1; + blk.m_table = 13; + + base -= 3; + + uint64_t packed_sels = 0; + for (uint32_t i = 0; i < 16; i++) + { + const int a = pPixels[i * stride]; + + static const uint8_t s_sels[6] = { 2, 1, 0, 4, 5, 6 }; + + int sel = a - base; + assert(sel >= 0 && sel <= 5); + + packed_sels |= (static_cast(s_sels[sel]) << s_etc2_eac_bit_ofs[i]); + } + + blk.set_selector_bits(packed_sels); + + return; + } + + const uint32_t T0 = 2, T1 = 8, T2 = 11, T3 = 13; + static const uint8_t s_tables[4] = { T0, T1, T2, T3 }; + + int base[4], mul[4]; + uint32_t mul_or = 0; + for (uint32_t i = 0; i < 4; i++) + { + const uint32_t table = s_tables[i]; + + const float range = (float)(g_eac_modifier_table[table][ETC2_EAC_MAX_VALUE_SELECTOR] - g_eac_modifier_table[table][ETC2_EAC_MIN_VALUE_SELECTOR]); + + base[i] = clamp255((int)roundf(basisu::lerp((float)min_alpha, (float)max_alpha, (float)(0 - g_eac_modifier_table[table][ETC2_EAC_MIN_VALUE_SELECTOR]) / range))); + mul[i] = clampi((int)roundf(alpha_range / range), 1, 15); + mul_or |= mul[i]; + } + + uint32_t total_err[4] = { 0, 0, 0, 0 }; + uint8_t sels[4][16]; + + for (uint32_t i = 0; i < 16; i++) + { + const int a = pPixels[i * stride]; + + uint32_t l0 = UINT32_MAX, l1 = UINT32_MAX, l2 = UINT32_MAX, l3 = UINT32_MAX; + + if ((a < 7) || (a > (255 - 7))) + { + for (uint32_t s = 0; s < 8; s++) + { + const int v0 = clamp255(mul[0] * g_eac_modifier_table[T0][s] + base[0]); + const int v1 = clamp255(mul[1] * g_eac_modifier_table[T1][s] + base[1]); + const int v2 = clamp255(mul[2] * g_eac_modifier_table[T2][s] + base[2]); + const int v3 = clamp255(mul[3] * g_eac_modifier_table[T3][s] + base[3]); + + l0 = basisu::minimum(l0, (basisu::iabs(v0 - a) << 3) | s); + l1 = basisu::minimum(l1, (basisu::iabs(v1 - a) << 3) | s); + l2 = basisu::minimum(l2, (basisu::iabs(v2 - a) << 3) | s); + l3 = basisu::minimum(l3, (basisu::iabs(v3 - a) << 3) | s); + } + } + else if (mul_or == 1) + { + const int a0 = base[0] - a, a1 = base[1] - a, a2 = base[2] - a, a3 = base[3] - a; + + for (uint32_t s = 0; s < 8; s++) + { + const int v0 = g_eac_modifier_table[T0][s] + a0; + const int v1 = g_eac_modifier_table[T1][s] + a1; + const int v2 = g_eac_modifier_table[T2][s] + a2; + const int v3 = g_eac_modifier_table[T3][s] + a3; + + l0 = basisu::minimum(l0, (basisu::iabs(v0) << 3) | s); + l1 = basisu::minimum(l1, (basisu::iabs(v1) << 3) | s); + l2 = basisu::minimum(l2, (basisu::iabs(v2) << 3) | s); + l3 = basisu::minimum(l3, (basisu::iabs(v3) << 3) | s); + } + } + else + { + const int a0 = base[0] - a, a1 = base[1] - a, a2 = base[2] - a, a3 = base[3] - a; + + for (uint32_t s = 0; s < 8; s++) + { + const int v0 = mul[0] * g_eac_modifier_table[T0][s] + a0; + const int v1 = mul[1] * g_eac_modifier_table[T1][s] + a1; + const int v2 = mul[2] * g_eac_modifier_table[T2][s] + a2; + const int v3 = mul[3] * g_eac_modifier_table[T3][s] + a3; + + l0 = basisu::minimum(l0, (basisu::iabs(v0) << 3) | s); + l1 = basisu::minimum(l1, (basisu::iabs(v1) << 3) | s); + l2 = basisu::minimum(l2, (basisu::iabs(v2) << 3) | s); + l3 = basisu::minimum(l3, (basisu::iabs(v3) << 3) | s); + } + } + + sels[0][i] = l0 & 7; + sels[1][i] = l1 & 7; + sels[2][i] = l2 & 7; + sels[3][i] = l3 & 7; + + total_err[0] += basisu::square(l0 >> 3); + total_err[1] += basisu::square(l1 >> 3); + total_err[2] += basisu::square(l2 >> 3); + total_err[3] += basisu::square(l3 >> 3); + } + + uint32_t min_err = total_err[0], min_index = 0; + for (uint32_t i = 1; i < 4; i++) + { + if (total_err[i] < min_err) + { + min_err = total_err[i]; + min_index = i; + } + } + + blk.m_base = base[min_index]; + blk.m_multiplier = mul[min_index]; + blk.m_table = s_tables[min_index]; + + uint64_t packed_sels = 0; + const uint8_t* pSels = &sels[min_index][0]; + for (uint32_t i = 0; i < 16; i++) + packed_sels |= (static_cast(pSels[i]) << s_etc2_eac_bit_ofs[i]); + + blk.set_selector_bits(packed_sels); + } + + // Checks all 16 tables. Around ~2 dB better vs. pack_eac(), ~1.2 dB less than near-optimal. + static void pack_eac_high_quality(eac_block& blk, const uint8_t* pPixels, uint32_t stride) + { + uint32_t min_alpha = 255, max_alpha = 0; + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t a = pPixels[i * stride]; + if (a < min_alpha) min_alpha = a; + if (a > max_alpha) max_alpha = a; + } + + if (min_alpha == max_alpha) + { + pack_eac_solid_block(blk, min_alpha); + return; + } + + const uint32_t alpha_range = max_alpha - min_alpha; + + const uint32_t SINGLE_TABLE_THRESH = 5; + if (alpha_range <= SINGLE_TABLE_THRESH) + { + // If alpha_range <= 5 table 13 is lossless + int base = clamp255((int)max_alpha - 2); + + blk.m_base = base; + blk.m_multiplier = 1; + blk.m_table = 13; + + base -= 3; + + uint64_t packed_sels = 0; + for (uint32_t i = 0; i < 16; i++) + { + const int a = pPixels[i * stride]; + + static const uint8_t s_sels[6] = { 2, 1, 0, 4, 5, 6 }; + + int sel = a - base; + assert(sel >= 0 && sel <= 5); + + packed_sels |= (static_cast(s_sels[sel]) << s_etc2_eac_bit_ofs[i]); + } + + blk.set_selector_bits(packed_sels); + + return; + } + + int base[16], mul[16]; + for (uint32_t table = 0; table < 16; table++) + { + const float range = (float)(g_eac_modifier_table[table][ETC2_EAC_MAX_VALUE_SELECTOR] - g_eac_modifier_table[table][ETC2_EAC_MIN_VALUE_SELECTOR]); + + base[table] = clamp255((int)roundf(basisu::lerp((float)min_alpha, (float)max_alpha, (float)(0 - g_eac_modifier_table[table][ETC2_EAC_MIN_VALUE_SELECTOR]) / range))); + mul[table] = clampi((int)roundf(alpha_range / range), 1, 15); + } + + uint32_t total_err[16]; + memset(total_err, 0, sizeof(total_err)); + + uint8_t sels[16][16]; + + for (uint32_t table = 0; table < 16; table++) + { + const int8_t* pTable = &g_eac_modifier_table[table][0]; + const int m = mul[table], b = base[table]; + + uint32_t prev_l = 0, prev_a = UINT32_MAX; + + for (uint32_t i = 0; i < 16; i++) + { + const int a = pPixels[i * stride]; + + if ((uint32_t)a == prev_a) + { + sels[table][i] = prev_l & 7; + total_err[table] += basisu::square(prev_l >> 3); + } + else + { + uint32_t l = basisu::iabs(clamp255(m * pTable[0] + b) - a) << 3; + l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[1] + b) - a) << 3) | 1); + l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[2] + b) - a) << 3) | 2); + l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[3] + b) - a) << 3) | 3); + l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[4] + b) - a) << 3) | 4); + l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[5] + b) - a) << 3) | 5); + l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[6] + b) - a) << 3) | 6); + l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[7] + b) - a) << 3) | 7); + + sels[table][i] = l & 7; + total_err[table] += basisu::square(l >> 3); + + prev_l = l; + prev_a = a; + } + } + } + + uint32_t min_err = total_err[0], min_index = 0; + for (uint32_t i = 1; i < 16; i++) + { + if (total_err[i] < min_err) + { + min_err = total_err[i]; + min_index = i; + } + } + + blk.m_base = base[min_index]; + blk.m_multiplier = mul[min_index]; + blk.m_table = min_index; + + uint64_t packed_sels = 0; + const uint8_t* pSels = &sels[min_index][0]; + for (uint32_t i = 0; i < 16; i++) + packed_sels |= (static_cast(pSels[i]) << s_etc2_eac_bit_ofs[i]); + + blk.set_selector_bits(packed_sels); + } + + bool transcode_uastc_to_etc2_eac_r11(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0) + { + unpacked_uastc_block unpacked_src_blk; + if (!unpack_uastc(src_blk, unpacked_src_blk, false)) + return false; + + const uint32_t mode = unpacked_src_blk.m_mode; + + if (mode == UASTC_MODE_INDEX_SOLID_COLOR) + { + pack_eac_solid_block(*static_cast(pDst), unpacked_src_blk.m_solid_color.c[chan0]); + return true; + } + + color32 block_pixels[4][4]; + const bool unpack_srgb = false; + if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb)) + return false; + + if (chan0 == 3) + transcode_uastc_to_etc2_eac_a8(unpacked_src_blk, block_pixels, pDst); + else + (high_quality ? pack_eac_high_quality : pack_eac)(*static_cast(pDst), &block_pixels[0][0].c[chan0], sizeof(color32)); + + return true; + } + + bool transcode_uastc_to_etc2_eac_rg11(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0, uint32_t chan1) + { + unpacked_uastc_block unpacked_src_blk; + if (!unpack_uastc(src_blk, unpacked_src_blk, false)) + return false; + + const uint32_t mode = unpacked_src_blk.m_mode; + + if (mode == UASTC_MODE_INDEX_SOLID_COLOR) + { + pack_eac_solid_block(static_cast(pDst)[0], unpacked_src_blk.m_solid_color.c[chan0]); + pack_eac_solid_block(static_cast(pDst)[1], unpacked_src_blk.m_solid_color.c[chan1]); + return true; + } + + color32 block_pixels[4][4]; + const bool unpack_srgb = false; + if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb)) + return false; + + if (chan0 == 3) + transcode_uastc_to_etc2_eac_a8(unpacked_src_blk, block_pixels, &static_cast(pDst)[0]); + else + (high_quality ? pack_eac_high_quality : pack_eac)(static_cast(pDst)[0], &block_pixels[0][0].c[chan0], sizeof(color32)); + + if (chan1 == 3) + transcode_uastc_to_etc2_eac_a8(unpacked_src_blk, block_pixels, &static_cast(pDst)[1]); + else + (high_quality ? pack_eac_high_quality : pack_eac)(static_cast(pDst)[1], &block_pixels[0][0].c[chan1], sizeof(color32)); + return true; + } + + // PVRTC1 + static void fixup_pvrtc1_4_modulation_rgb( + const uastc_block* pSrc_blocks, + const uint32_t* pPVRTC_endpoints, + void* pDst_blocks, + uint32_t num_blocks_x, uint32_t num_blocks_y, bool from_alpha) + { + const uint32_t x_mask = num_blocks_x - 1; + const uint32_t y_mask = num_blocks_y - 1; + const uint32_t x_bits = basisu::total_bits(x_mask); + const uint32_t y_bits = basisu::total_bits(y_mask); + const uint32_t min_bits = basisu::minimum(x_bits, y_bits); + //const uint32_t max_bits = basisu::maximum(x_bits, y_bits); + const uint32_t swizzle_mask = (1 << (min_bits * 2)) - 1; + + uint32_t block_index = 0; + + // really 3x3 + int e0[4][4], e1[4][4]; + + for (int y = 0; y < static_cast(num_blocks_y); y++) + { + const uint32_t* pE_rows[3]; + + for (int ey = 0; ey < 3; ey++) + { + int by = y + ey - 1; + + const uint32_t* pE = &pPVRTC_endpoints[(by & y_mask) * num_blocks_x]; + + pE_rows[ey] = pE; + + for (int ex = 0; ex < 3; ex++) + { + int bx = 0 + ex - 1; + + const uint32_t e = pE[bx & x_mask]; + + e0[ex][ey] = (get_opaque_endpoint_l0(e) * 255) / 31; + e1[ex][ey] = (get_opaque_endpoint_l1(e) * 255) / 31; + } + } + + const uint32_t y_swizzle = (g_pvrtc_swizzle_table[y >> 8] << 16) | g_pvrtc_swizzle_table[y & 0xFF]; + + for (int x = 0; x < static_cast(num_blocks_x); x++, block_index++) + { + const uastc_block& src_block = pSrc_blocks[block_index]; + + color32 block_pixels[4][4]; + unpack_uastc(src_block, &block_pixels[0][0], false); + if (from_alpha) + { + // Just set RGB to alpha to avoid adding complexity below. + for (uint32_t i = 0; i < 16; i++) + { + const uint8_t a = ((color32*)block_pixels)[i].a; + ((color32*)block_pixels)[i].set(a, a, a, 255); + } + } + + const uint32_t x_swizzle = (g_pvrtc_swizzle_table[x >> 8] << 17) | (g_pvrtc_swizzle_table[x & 0xFF] << 1); + + uint32_t swizzled = x_swizzle | y_swizzle; + if (num_blocks_x != num_blocks_y) + { + swizzled &= swizzle_mask; + + if (num_blocks_x > num_blocks_y) + swizzled |= ((x >> min_bits) << (min_bits * 2)); + else + swizzled |= ((y >> min_bits) << (min_bits * 2)); + } + + pvrtc4_block* pDst_block = static_cast(pDst_blocks) + swizzled; + pDst_block->m_endpoints = pPVRTC_endpoints[block_index]; + + { + const uint32_t ex = 2; + int bx = x + ex - 1; + bx &= x_mask; + +#define DO_ROW(ey) \ + { \ + const uint32_t e = pE_rows[ey][bx]; \ + e0[ex][ey] = (get_opaque_endpoint_l0(e) * 255) / 31; \ + e1[ex][ey] = (get_opaque_endpoint_l1(e) * 255) / 31; \ + } + + DO_ROW(0); + DO_ROW(1); + DO_ROW(2); +#undef DO_ROW + } + + uint32_t mod = 0; + +#define DO_PIX(lx, ly, w0, w1, w2, w3) \ + { \ + int ca_l = a0 * w0 + a1 * w1 + a2 * w2 + a3 * w3; \ + int cb_l = b0 * w0 + b1 * w1 + b2 * w2 + b3 * w3; \ + int cl = (block_pixels[ly][lx].r + block_pixels[ly][lx].g + block_pixels[ly][lx].b) * 16; \ + int dl = cb_l - ca_l; \ + int vl = cl - ca_l; \ + int p = vl * 16; \ + if (ca_l > cb_l) { p = -p; dl = -dl; } \ + uint32_t m = 0; \ + if (p > 3 * dl) m = (uint32_t)(1 << ((ly) * 8 + (lx) * 2)); \ + if (p > 8 * dl) m = (uint32_t)(2 << ((ly) * 8 + (lx) * 2)); \ + if (p > 13 * dl) m = (uint32_t)(3 << ((ly) * 8 + (lx) * 2)); \ + mod |= m; \ + } + + { + const uint32_t ex = 0, ey = 0; + const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1]; + const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1]; + DO_PIX(0, 0, 4, 4, 4, 4); + DO_PIX(1, 0, 2, 6, 2, 6); + DO_PIX(0, 1, 2, 2, 6, 6); + DO_PIX(1, 1, 1, 3, 3, 9); + } + + { + const uint32_t ex = 1, ey = 0; + const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1]; + const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1]; + DO_PIX(2, 0, 8, 0, 8, 0); + DO_PIX(3, 0, 6, 2, 6, 2); + DO_PIX(2, 1, 4, 0, 12, 0); + DO_PIX(3, 1, 3, 1, 9, 3); + } + + { + const uint32_t ex = 0, ey = 1; + const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1]; + const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1]; + DO_PIX(0, 2, 8, 8, 0, 0); + DO_PIX(1, 2, 4, 12, 0, 0); + DO_PIX(0, 3, 6, 6, 2, 2); + DO_PIX(1, 3, 3, 9, 1, 3); + } + + { + const uint32_t ex = 1, ey = 1; + const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1]; + const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1]; + DO_PIX(2, 2, 16, 0, 0, 0); + DO_PIX(3, 2, 12, 4, 0, 0); + DO_PIX(2, 3, 12, 0, 4, 0); + DO_PIX(3, 3, 9, 3, 3, 1); + } +#undef DO_PIX + + pDst_block->m_modulation = mod; + + e0[0][0] = e0[1][0]; e0[1][0] = e0[2][0]; + e0[0][1] = e0[1][1]; e0[1][1] = e0[2][1]; + e0[0][2] = e0[1][2]; e0[1][2] = e0[2][2]; + + e1[0][0] = e1[1][0]; e1[1][0] = e1[2][0]; + e1[0][1] = e1[1][1]; e1[1][1] = e1[2][1]; + e1[0][2] = e1[1][2]; e1[1][2] = e1[2][2]; + + } // x + } // y + } + + static void fixup_pvrtc1_4_modulation_rgba( + const uastc_block* pSrc_blocks, + const uint32_t* pPVRTC_endpoints, + void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y) + { + const uint32_t x_mask = num_blocks_x - 1; + const uint32_t y_mask = num_blocks_y - 1; + const uint32_t x_bits = basisu::total_bits(x_mask); + const uint32_t y_bits = basisu::total_bits(y_mask); + const uint32_t min_bits = basisu::minimum(x_bits, y_bits); + //const uint32_t max_bits = basisu::maximum(x_bits, y_bits); + const uint32_t swizzle_mask = (1 << (min_bits * 2)) - 1; + + uint32_t block_index = 0; + + // really 3x3 + int e0[4][4], e1[4][4]; + + for (int y = 0; y < static_cast(num_blocks_y); y++) + { + const uint32_t* pE_rows[3]; + + for (int ey = 0; ey < 3; ey++) + { + int by = y + ey - 1; + + const uint32_t* pE = &pPVRTC_endpoints[(by & y_mask) * num_blocks_x]; + + pE_rows[ey] = pE; + + for (int ex = 0; ex < 3; ex++) + { + int bx = 0 + ex - 1; + + const uint32_t e = pE[bx & x_mask]; + + e0[ex][ey] = get_endpoint_l8(e, 0); + e1[ex][ey] = get_endpoint_l8(e, 1); + } + } + + const uint32_t y_swizzle = (g_pvrtc_swizzle_table[y >> 8] << 16) | g_pvrtc_swizzle_table[y & 0xFF]; + + for (int x = 0; x < static_cast(num_blocks_x); x++, block_index++) + { + const uastc_block& src_block = pSrc_blocks[block_index]; + + color32 block_pixels[4][4]; + unpack_uastc(src_block, &block_pixels[0][0], false); + + const uint32_t x_swizzle = (g_pvrtc_swizzle_table[x >> 8] << 17) | (g_pvrtc_swizzle_table[x & 0xFF] << 1); + + uint32_t swizzled = x_swizzle | y_swizzle; + if (num_blocks_x != num_blocks_y) + { + swizzled &= swizzle_mask; + + if (num_blocks_x > num_blocks_y) + swizzled |= ((x >> min_bits) << (min_bits * 2)); + else + swizzled |= ((y >> min_bits) << (min_bits * 2)); + } + + pvrtc4_block* pDst_block = static_cast(pDst_blocks) + swizzled; + pDst_block->m_endpoints = pPVRTC_endpoints[block_index]; + + { + const uint32_t ex = 2; + int bx = x + ex - 1; + bx &= x_mask; + +#define DO_ROW(ey) \ + { \ + const uint32_t e = pE_rows[ey][bx]; \ + e0[ex][ey] = get_endpoint_l8(e, 0); \ + e1[ex][ey] = get_endpoint_l8(e, 1); \ + } + + DO_ROW(0); + DO_ROW(1); + DO_ROW(2); +#undef DO_ROW + } + + uint32_t mod = 0; + +#define DO_PIX(lx, ly, w0, w1, w2, w3) \ + { \ + int ca_l = a0 * w0 + a1 * w1 + a2 * w2 + a3 * w3; \ + int cb_l = b0 * w0 + b1 * w1 + b2 * w2 + b3 * w3; \ + int cl = 16 * (block_pixels[ly][lx].r + block_pixels[ly][lx].g + block_pixels[ly][lx].b + block_pixels[ly][lx].a); \ + int dl = cb_l - ca_l; \ + int vl = cl - ca_l; \ + int p = vl * 16; \ + if (ca_l > cb_l) { p = -p; dl = -dl; } \ + uint32_t m = 0; \ + if (p > 3 * dl) m = (uint32_t)(1 << ((ly) * 8 + (lx) * 2)); \ + if (p > 8 * dl) m = (uint32_t)(2 << ((ly) * 8 + (lx) * 2)); \ + if (p > 13 * dl) m = (uint32_t)(3 << ((ly) * 8 + (lx) * 2)); \ + mod |= m; \ + } + + { + const uint32_t ex = 0, ey = 0; + const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1]; + const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1]; + DO_PIX(0, 0, 4, 4, 4, 4); + DO_PIX(1, 0, 2, 6, 2, 6); + DO_PIX(0, 1, 2, 2, 6, 6); + DO_PIX(1, 1, 1, 3, 3, 9); + } + + { + const uint32_t ex = 1, ey = 0; + const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1]; + const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1]; + DO_PIX(2, 0, 8, 0, 8, 0); + DO_PIX(3, 0, 6, 2, 6, 2); + DO_PIX(2, 1, 4, 0, 12, 0); + DO_PIX(3, 1, 3, 1, 9, 3); + } + + { + const uint32_t ex = 0, ey = 1; + const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1]; + const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1]; + DO_PIX(0, 2, 8, 8, 0, 0); + DO_PIX(1, 2, 4, 12, 0, 0); + DO_PIX(0, 3, 6, 6, 2, 2); + DO_PIX(1, 3, 3, 9, 1, 3); + } + + { + const uint32_t ex = 1, ey = 1; + const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1]; + const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1]; + DO_PIX(2, 2, 16, 0, 0, 0); + DO_PIX(3, 2, 12, 4, 0, 0); + DO_PIX(2, 3, 12, 0, 4, 0); + DO_PIX(3, 3, 9, 3, 3, 1); + } +#undef DO_PIX + + pDst_block->m_modulation = mod; + + e0[0][0] = e0[1][0]; e0[1][0] = e0[2][0]; + e0[0][1] = e0[1][1]; e0[1][1] = e0[2][1]; + e0[0][2] = e0[1][2]; e0[1][2] = e0[2][2]; + + e1[0][0] = e1[1][0]; e1[1][0] = e1[2][0]; + e1[0][1] = e1[1][1]; e1[1][1] = e1[2][1]; + e1[0][2] = e1[1][2]; e1[1][2] = e1[2][2]; + + } // x + } // y + } + + bool transcode_uastc_to_pvrtc1_4_rgb(const uastc_block* pSrc_blocks, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, bool high_quality, bool from_alpha) + { + BASISU_NOTE_UNUSED(high_quality); + + if ((!num_blocks_x) || (!num_blocks_y)) + return false; + + const uint32_t width = num_blocks_x * 4; + const uint32_t height = num_blocks_y * 4; + if (!basisu::is_pow2(width) || !basisu::is_pow2(height)) + return false; + + basisu::vector temp_endpoints(num_blocks_x * num_blocks_y); + + for (uint32_t y = 0; y < num_blocks_y; y++) + { + for (uint32_t x = 0; x < num_blocks_x; x++) + { + color32 block_pixels[16]; + if (!unpack_uastc(pSrc_blocks[x + y * num_blocks_x], block_pixels, false)) + return false; + + // Get block's RGB bounding box + color32 low_color(255, 255, 255, 255), high_color(0, 0, 0, 0); + + if (from_alpha) + { + uint32_t low_a = 255, high_a = 0; + for (uint32_t i = 0; i < 16; i++) + { + low_a = basisu::minimum(low_a, block_pixels[i].a); + high_a = basisu::maximum(high_a, block_pixels[i].a); + } + low_color.set(low_a, low_a, low_a, 255); + high_color.set(high_a, high_a, high_a, 255); + } + else + { + for (uint32_t i = 0; i < 16; i++) + { + low_color = color32::comp_min(low_color, block_pixels[i]); + high_color = color32::comp_max(high_color, block_pixels[i]); + } + } + + // Set PVRTC1 endpoints to floor/ceil of bounding box's coordinates. + pvrtc4_block temp; + temp.set_opaque_endpoint_floor(0, low_color); + temp.set_opaque_endpoint_ceil(1, high_color); + + temp_endpoints[x + y * num_blocks_x] = temp.m_endpoints; + } + } + + fixup_pvrtc1_4_modulation_rgb(pSrc_blocks, &temp_endpoints[0], pDst_blocks, num_blocks_x, num_blocks_y, from_alpha); + + return true; + } + + bool transcode_uastc_to_pvrtc1_4_rgba(const uastc_block* pSrc_blocks, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, bool high_quality) + { + BASISU_NOTE_UNUSED(high_quality); + + if ((!num_blocks_x) || (!num_blocks_y)) + return false; + + const uint32_t width = num_blocks_x * 4; + const uint32_t height = num_blocks_y * 4; + if (!basisu::is_pow2(width) || !basisu::is_pow2(height)) + return false; + + basisu::vector temp_endpoints(num_blocks_x * num_blocks_y); + + for (uint32_t y = 0; y < num_blocks_y; y++) + { + for (uint32_t x = 0; x < num_blocks_x; x++) + { + color32 block_pixels[16]; + if (!unpack_uastc(pSrc_blocks[x + y * num_blocks_x], block_pixels, false)) + return false; + + // Get block's RGBA bounding box + color32 low_color(255, 255, 255, 255), high_color(0, 0, 0, 0); + + for (uint32_t i = 0; i < 16; i++) + { + low_color = color32::comp_min(low_color, block_pixels[i]); + high_color = color32::comp_max(high_color, block_pixels[i]); + } + + // Set PVRTC1 endpoints to floor/ceil of bounding box's coordinates. + pvrtc4_block temp; + temp.set_endpoint_floor(0, low_color); + temp.set_endpoint_ceil(1, high_color); + + temp_endpoints[x + y * num_blocks_x] = temp.m_endpoints; + } + } + + fixup_pvrtc1_4_modulation_rgba(pSrc_blocks, &temp_endpoints[0], pDst_blocks, num_blocks_x, num_blocks_y); + + return true; + } + + void uastc_init() + { + for (uint32_t range = 0; range < BC7ENC_TOTAL_ASTC_RANGES; range++) + { + if (!astc_is_valid_endpoint_range(range)) + continue; + + const uint32_t levels = astc_get_levels(range); + + uint32_t vals[256]; + for (uint32_t i = 0; i < levels; i++) + vals[i] = (unquant_astc_endpoint_val(i, range) << 8) | i; + + std::sort(vals, vals + levels); + + for (uint32_t i = 0; i < levels; i++) + { + const uint32_t order = vals[i] & 0xFF; + const uint32_t unq = vals[i] >> 8; + + g_astc_unquant[range][order].m_unquant = (uint8_t)unq; + g_astc_unquant[range][order].m_index = (uint8_t)i; + + } // i + } + + // TODO: Precompute? + // BC7 777.1 + for (int c = 0; c < 256; c++) + { + for (uint32_t lp = 0; lp < 2; lp++) + { + endpoint_err best; + best.m_error = (uint16_t)UINT16_MAX; + + for (uint32_t l = 0; l < 128; l++) + { + const uint32_t low = (l << 1) | lp; + + for (uint32_t h = 0; h < 128; h++) + { + const uint32_t high = (h << 1) | lp; + + const int k = (low * (64 - g_bc7_weights4[BC7ENC_MODE_6_OPTIMAL_INDEX]) + high * g_bc7_weights4[BC7ENC_MODE_6_OPTIMAL_INDEX] + 32) >> 6; + + const int err = (k - c) * (k - c); + if (err < best.m_error) + { + best.m_error = (uint16_t)err; + best.m_lo = (uint8_t)l; + best.m_hi = (uint8_t)h; + } + } // h + } // l + + g_bc7_mode_6_optimal_endpoints[c][lp] = best; + } // lp + + } // c + + // BC7 777 + for (int c = 0; c < 256; c++) + { + endpoint_err best; + best.m_error = (uint16_t)UINT16_MAX; + + for (uint32_t l = 0; l < 128; l++) + { + const uint32_t low = (l << 1) | (l >> 6); + + for (uint32_t h = 0; h < 128; h++) + { + const uint32_t high = (h << 1) | (h >> 6); + + const int k = (low * (64 - g_bc7_weights2[BC7ENC_MODE_5_OPTIMAL_INDEX]) + high * g_bc7_weights2[BC7ENC_MODE_5_OPTIMAL_INDEX] + 32) >> 6; + + const int err = (k - c) * (k - c); + if (err < best.m_error) + { + best.m_error = (uint16_t)err; + best.m_lo = (uint8_t)l; + best.m_hi = (uint8_t)h; + } + } // h + } // l + + g_bc7_mode_5_optimal_endpoints[c] = best; + + } // c + } + +#endif // #if BASISD_SUPPORT_UASTC + +// ------------------------------------------------------------------------------------------------------ +// KTX2 +// ------------------------------------------------------------------------------------------------------ + +#if BASISD_SUPPORT_KTX2 + const uint8_t g_ktx2_file_identifier[12] = { 0xAB, 0x4B, 0x54, 0x58, 0x20, 0x32, 0x30, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A }; + + ktx2_transcoder::ktx2_transcoder() : + m_etc1s_transcoder() + { + clear(); + } + + void ktx2_transcoder::clear() + { + m_pData = nullptr; + m_data_size = 0; + + memset((void *)&m_header, 0, sizeof(m_header)); + m_levels.clear(); + m_dfd.clear(); + m_key_values.clear(); + memset((void *)&m_etc1s_header, 0, sizeof(m_etc1s_header)); + m_etc1s_image_descs.clear(); + m_astc_6x6_intermediate_image_descs.clear(); + + m_format = basist::basis_tex_format::cETC1S; + + m_dfd_color_model = 0; + m_dfd_color_prims = KTX2_DF_PRIMARIES_UNSPECIFIED; + m_dfd_transfer_func = 0; + m_dfd_flags = 0; + m_dfd_samples = 0; + m_dfd_chan0 = KTX2_DF_CHANNEL_UASTC_RGB; + m_dfd_chan1 = KTX2_DF_CHANNEL_UASTC_RGB; + + m_etc1s_transcoder.clear(); + + m_def_transcoder_state.clear(); + + m_has_alpha = false; + m_is_video = false; + m_ldr_hdr_upconversion_nit_multiplier = 0.0f; + } + + bool ktx2_transcoder::init(const void* pData, uint32_t data_size) + { + clear(); + + if (!pData) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: pData is nullptr\n"); + assert(0); + return false; + } + + if (data_size <= sizeof(ktx2_header)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: File is impossibly too small to be a valid KTX2 file\n"); + return false; + } + + if (memcmp(pData, g_ktx2_file_identifier, sizeof(g_ktx2_file_identifier)) != 0) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: KTX2 file identifier is not present\n"); + return false; + } + + m_pData = static_cast(pData); + m_data_size = data_size; + + memcpy((void *)&m_header, pData, sizeof(m_header)); + + // Check for supported VK formats. We may also need to parse the DFD. + if ((m_header.m_vk_format != KTX2_VK_FORMAT_UNDEFINED) && + (m_header.m_vk_format != basist::KTX2_FORMAT_ASTC_4x4_SFLOAT_BLOCK) && + (m_header.m_vk_format != basist::KTX2_FORMAT_ASTC_6x6_SFLOAT_BLOCK)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: KTX2 file must be in ETC1S or UASTC LDR/HDR format\n"); + return false; + } + + // 3.3: "When format is VK_FORMAT_UNDEFINED, typeSize must equal 1." + if (m_header.m_type_size != 1) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid type_size\n"); + return false; + } + + // We only currently support 2D textures (plain, cubemapped, or texture array), which is by far the most common use case. + // The BasisU library does not support 1D or 3D textures at all. + if ((m_header.m_pixel_width < 1) || (m_header.m_pixel_height < 1) || (m_header.m_pixel_depth > 0)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Only 2D or cubemap textures are supported\n"); + return false; + } + + // Face count must be 1 or 6 + if ((m_header.m_face_count != 1) && (m_header.m_face_count != 6)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid face count, file is corrupted or invalid\n"); + return false; + } + + if (m_header.m_face_count > 1) + { + // 3.4: Make sure cubemaps are square. + if (m_header.m_pixel_width != m_header.m_pixel_height) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Cubemap is not square\n"); + return false; + } + } + + // 3.7 levelCount: "levelCount=0 is allowed, except for block-compressed formats" + if (m_header.m_level_count < 1) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level count\n"); + return false; + } + + // Sanity check the level count. + if (m_header.m_level_count > KTX2_MAX_SUPPORTED_LEVEL_COUNT) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Too many levels or file is corrupted or invalid\n"); + return false; + } + + if (m_header.m_supercompression_scheme > KTX2_SS_ZSTANDARD) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid/unsupported supercompression or file is corrupted or invalid\n"); + return false; + } + + if (m_header.m_supercompression_scheme == KTX2_SS_BASISLZ) + { +#if 0 + if (m_header.m_sgd_byte_length <= sizeof(ktx2_etc1s_global_data_header)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Supercompression global data is too small\n"); + return false; + } +#endif + + if (m_header.m_sgd_byte_offset.get_uint64() < sizeof(ktx2_header)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Supercompression global data offset is too low\n"); + return false; + } + + if (m_header.m_sgd_byte_offset.get_uint64() + m_header.m_sgd_byte_length.get_uint64() > m_data_size) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Supercompression global data offset and/or length is too high\n"); + return false; + } + } + + if (!m_levels.try_resize(m_header.m_level_count)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Out of memory\n"); + return false; + } + + const uint32_t level_index_size_in_bytes = basisu::maximum(1U, (uint32_t)m_header.m_level_count) * sizeof(ktx2_level_index); + + if ((sizeof(ktx2_header) + level_index_size_in_bytes) > m_data_size) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: File is too small (can't read level index array)\n"); + return false; + } + + memcpy((void *)&m_levels[0], m_pData + sizeof(ktx2_header), level_index_size_in_bytes); + + // Sanity check the level offsets and byte sizes + for (uint32_t i = 0; i < m_levels.size(); i++) + { + if (m_levels[i].m_byte_offset.get_uint64() < sizeof(ktx2_header)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level offset (too low)\n"); + return false; + } + + if (!m_levels[i].m_byte_length.get_uint64()) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level byte length\n"); + } + + if ((m_levels[i].m_byte_offset.get_uint64() + m_levels[i].m_byte_length.get_uint64()) > m_data_size) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level offset and/or length\n"); + return false; + } + + const uint64_t MAX_SANE_LEVEL_UNCOMP_SIZE = 2048ULL * 1024ULL * 1024ULL; + + if (m_levels[i].m_uncompressed_byte_length.get_uint64() >= MAX_SANE_LEVEL_UNCOMP_SIZE) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level offset (too large)\n"); + return false; + } + + if (m_header.m_supercompression_scheme == KTX2_SS_BASISLZ) + { + if (m_levels[i].m_uncompressed_byte_length.get_uint64()) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid uncompressed length (0)\n"); + return false; + } + } + else if (m_header.m_supercompression_scheme >= KTX2_SS_ZSTANDARD) + { + if (!m_levels[i].m_uncompressed_byte_length.get_uint64()) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid uncompressed length (1)\n"); + return false; + } + } + } + + const uint32_t DFD_MINIMUM_SIZE = 44, DFD_MAXIMUM_SIZE = 60; + if ((m_header.m_dfd_byte_length != DFD_MINIMUM_SIZE) && (m_header.m_dfd_byte_length != DFD_MAXIMUM_SIZE)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Unsupported DFD size\n"); + return false; + } + + if (((m_header.m_dfd_byte_offset + m_header.m_dfd_byte_length) > m_data_size) || (m_header.m_dfd_byte_offset < sizeof(ktx2_header))) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid DFD offset and/or length\n"); + return false; + } + + const uint8_t* pDFD = m_pData + m_header.m_dfd_byte_offset; + + if (!m_dfd.try_resize(m_header.m_dfd_byte_length)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Out of memory\n"); + return false; + } + + memcpy(m_dfd.data(), pDFD, m_header.m_dfd_byte_length); + + // This is all hard coded for only ETC1S and UASTC. + uint32_t dfd_total_size = basisu::read_le_dword(pDFD); + + // 3.10.3: Sanity check + if (dfd_total_size != m_header.m_dfd_byte_length) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: DFD size validation failed (1)\n"); + return false; + } + + // 3.10.3: More sanity checking + if (m_header.m_kvd_byte_length) + { + if (dfd_total_size != m_header.m_kvd_byte_offset - m_header.m_dfd_byte_offset) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: DFD size validation failed (2)\n"); + return false; + } + } + + const uint32_t dfd_bits = basisu::read_le_dword(pDFD + 3 * sizeof(uint32_t)); + const uint32_t sample_channel0 = basisu::read_le_dword(pDFD + 7 * sizeof(uint32_t)); + + m_dfd_color_model = dfd_bits & 255; + m_dfd_color_prims = (ktx2_df_color_primaries)((dfd_bits >> 8) & 255); + m_dfd_transfer_func = (dfd_bits >> 16) & 255; + m_dfd_flags = (dfd_bits >> 24) & 255; + + // See 3.10.1.Restrictions + if ((m_dfd_transfer_func != KTX2_KHR_DF_TRANSFER_LINEAR) && (m_dfd_transfer_func != KTX2_KHR_DF_TRANSFER_SRGB)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid DFD transfer function\n"); + return false; + } + + if (m_dfd_color_model == KTX2_KDF_DF_MODEL_ETC1S) + { + if (m_header.m_vk_format != basist::KTX2_VK_FORMAT_UNDEFINED) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid header vkFormat\n"); + return false; + } + + m_format = basist::basis_tex_format::cETC1S; + + // 3.10.2: "Whether the image has 1 or 2 slices can be determined from the DFD's sample count." + // If m_has_alpha is true it may be 2-channel RRRG or 4-channel RGBA, but we let the caller deal with that. + m_has_alpha = (m_header.m_dfd_byte_length == 60); + + m_dfd_samples = m_has_alpha ? 2 : 1; + m_dfd_chan0 = (ktx2_df_channel_id)((sample_channel0 >> 24) & 15); + + if (m_has_alpha) + { + const uint32_t sample_channel1 = basisu::read_le_dword(pDFD + 11 * sizeof(uint32_t)); + m_dfd_chan1 = (ktx2_df_channel_id)((sample_channel1 >> 24) & 15); + } + } + else if (m_dfd_color_model == KTX2_KDF_DF_MODEL_UASTC_LDR_4X4) + { + if (m_header.m_vk_format != basist::KTX2_VK_FORMAT_UNDEFINED) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid header vkFormat\n"); + return false; + } + + m_format = basist::basis_tex_format::cUASTC4x4; + + m_dfd_samples = 1; + m_dfd_chan0 = (ktx2_df_channel_id)((sample_channel0 >> 24) & 15); + + // We're assuming "DATA" means RGBA so it has alpha. + m_has_alpha = (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RGBA) || (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RRRG); + } + else if (m_dfd_color_model == KTX2_KDF_DF_MODEL_UASTC_HDR_4X4) + { + // UASTC HDR 4x4 is standard ASTC HDR 4x4 texture data. Check the header's vkFormat. + if (m_header.m_vk_format != basist::KTX2_FORMAT_ASTC_4x4_SFLOAT_BLOCK) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid header vkFormat\n"); + return false; + } + + m_format = basist::basis_tex_format::cUASTC_HDR_4x4; + + m_dfd_samples = 1; + m_dfd_chan0 = (ktx2_df_channel_id)((sample_channel0 >> 24) & 15); + + // We're assuming "DATA" means RGBA so it has alpha. + // [11/26/2024] - changed to always false for now + m_has_alpha = false;// (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RGBA) || (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RRRG); + } + else if (m_dfd_color_model == KTX2_KDF_DF_MODEL_ASTC) + { + // The DFD indicates plain ASTC texture data. We only support ASTC HDR 6x6 - check the header's vkFormat. + if (m_header.m_vk_format != basist::KTX2_FORMAT_ASTC_6x6_SFLOAT_BLOCK) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: DVD color model is ASTC, but the header's vkFormat isn't KTX2_FORMAT_ASTC_6x6_SFLOAT_BLOCK\n"); + return false; + } + + m_format = basist::basis_tex_format::cASTC_HDR_6x6; + + m_dfd_samples = 1; + m_dfd_chan0 = (ktx2_df_channel_id)((sample_channel0 >> 24) & 15); + + m_has_alpha = false; + } + else if (m_dfd_color_model == KTX2_KDF_DF_MODEL_ASTC_HDR_6X6_INTERMEDIATE) + { + // Custom variable block size ASTC HDR 6x6 texture data. + if (m_header.m_vk_format != basist::KTX2_VK_FORMAT_UNDEFINED) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid header vkFormat\n"); + return false; + } + + m_format = basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE; + + m_dfd_samples = 1; + m_dfd_chan0 = (ktx2_df_channel_id)((sample_channel0 >> 24) & 15); + + m_has_alpha = false; + } + else + { + // Unsupported DFD color model. + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Unsupported DFD color model\n"); + return false; + } + + if (!read_key_values()) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: read_key_values() failed\n"); + return false; + } + + // Check for a KTXanimData key + for (uint32_t i = 0; i < m_key_values.size(); i++) + { + if (strcmp(reinterpret_cast(m_key_values[i].m_key.data()), "KTXanimData") == 0) + { + m_is_video = true; + break; + } + } + + m_ldr_hdr_upconversion_nit_multiplier = 0.0f; + + for (uint32_t i = 0; i < m_key_values.size(); i++) + { + if (strcmp(reinterpret_cast(m_key_values[i].m_key.data()), "LDRUpconversionMultiplier") == 0) + { + m_ldr_hdr_upconversion_nit_multiplier = (float)atof(reinterpret_cast(m_key_values[i].m_value.data())); + + if (std::isnan(m_ldr_hdr_upconversion_nit_multiplier) || std::isinf(m_ldr_hdr_upconversion_nit_multiplier) || (m_ldr_hdr_upconversion_nit_multiplier < 0.0f)) + m_ldr_hdr_upconversion_nit_multiplier = 0; + + break; + } + } + + return true; + } + + uint32_t ktx2_transcoder::get_etc1s_image_descs_image_flags(uint32_t level_index, uint32_t layer_index, uint32_t face_index) const + { + const uint32_t etc1s_image_index = + (level_index * basisu::maximum(m_header.m_layer_count, 1) * m_header.m_face_count) + + layer_index * m_header.m_face_count + + face_index; + + if (etc1s_image_index >= get_etc1s_image_descs().size()) + { + assert(0); + return 0; + } + + return get_etc1s_image_descs()[etc1s_image_index].m_image_flags; + } + + const basisu::uint8_vec* ktx2_transcoder::find_key(const std::string& key_name) const + { + for (uint32_t i = 0; i < m_key_values.size(); i++) + if (strcmp((const char *)m_key_values[i].m_key.data(), key_name.c_str()) == 0) + return &m_key_values[i].m_value; + + return nullptr; + } + + bool ktx2_transcoder::start_transcoding() + { + if (!m_pData) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::start_transcoding: Must call init() first\n"); + return false; + } + + if (m_header.m_supercompression_scheme == KTX2_SS_BASISLZ) + { + if (m_format == basis_tex_format::cETC1S) + { + // Check if we've already decompressed the ETC1S global data. If so don't unpack it again. + if (!m_etc1s_transcoder.get_endpoints().empty()) + return true; + + if (!decompress_etc1s_global_data()) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::start_transcoding: decompress_etc1s_global_data() failed\n"); + return false; + } + + if (!m_is_video) + { + // See if there are any P-frames. If so it must be a video, even if there wasn't a KTXanimData key. + // Video cannot be a cubemap, and it must be a texture array. + if ((m_header.m_face_count == 1) && (m_header.m_layer_count > 1)) + { + for (uint32_t i = 0; i < m_etc1s_image_descs.size(); i++) + { + if (m_etc1s_image_descs[i].m_image_flags & KTX2_IMAGE_IS_P_FRAME) + { + m_is_video = true; + break; + } + } + } + } + } + else if (m_format == basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE) + { + if (m_astc_6x6_intermediate_image_descs.size()) + return true; + + if (!read_astc_6x6_hdr_intermediate_global_data()) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::start_transcoding: read_astc_6x6_hdr_intermediate_global_data() failed\n"); + return false; + } + } + else + { + BASISU_DEVEL_ERROR("ktx2_transcoder::start_transcoding: Invalid supercompression scheme and/or format\n"); + return false; + } + } + else if (m_header.m_supercompression_scheme == KTX2_SS_ZSTANDARD) + { +#if !BASISD_SUPPORT_KTX2_ZSTD + BASISU_DEVEL_ERROR("ktx2_transcoder::start_transcoding: File uses zstd supercompression, but zstd support was not enabled at compilation time (BASISD_SUPPORT_KTX2_ZSTD == 0)\n"); + return false; +#endif + } + + return true; + } + + bool ktx2_transcoder::get_image_level_info(ktx2_image_level_info& level_info, uint32_t level_index, uint32_t layer_index, uint32_t face_index) const + { + if (level_index >= m_levels.size()) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::get_image_level_info: level_index >= m_levels.size()\n"); + return false; + } + + if (m_header.m_face_count > 1) + { + if (face_index >= 6) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::get_image_level_info: face_index >= 6\n"); + return false; + } + } + else if (face_index != 0) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::get_image_level_info: face_index != 0\n"); + return false; + } + + if (layer_index >= basisu::maximum(m_header.m_layer_count, 1)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::get_image_level_info: layer_index >= maximum(m_header.m_layer_count, 1)\n"); + return false; + } + + const uint32_t level_width = basisu::maximum(m_header.m_pixel_width >> level_index, 1); + const uint32_t level_height = basisu::maximum(m_header.m_pixel_height >> level_index, 1); + + const uint32_t block_width = get_block_width(); + const uint32_t block_height = get_block_height(); + + const uint32_t num_blocks_x = (level_width + block_width - 1) / block_width; + const uint32_t num_blocks_y = (level_height + block_height - 1) / block_height; + + level_info.m_face_index = face_index; + level_info.m_layer_index = layer_index; + level_info.m_level_index = level_index; + level_info.m_orig_width = level_width; + level_info.m_orig_height = level_height; + level_info.m_width = num_blocks_x * block_width; + level_info.m_height = num_blocks_y * block_height; + level_info.m_block_width = block_width; + level_info.m_block_height = block_height; + level_info.m_num_blocks_x = num_blocks_x; + level_info.m_num_blocks_y = num_blocks_y; + level_info.m_total_blocks = num_blocks_x * num_blocks_y; + level_info.m_alpha_flag = m_has_alpha; + level_info.m_iframe_flag = false; + + if (m_etc1s_image_descs.size()) + { + const uint32_t etc1s_image_index = + (level_index * basisu::maximum(m_header.m_layer_count, 1) * m_header.m_face_count) + + layer_index * m_header.m_face_count + + face_index; + + level_info.m_iframe_flag = (m_etc1s_image_descs[etc1s_image_index].m_image_flags & KTX2_IMAGE_IS_P_FRAME) == 0; + } + + return true; + } + + bool ktx2_transcoder::transcode_image_level( + uint32_t level_index, uint32_t layer_index, uint32_t face_index, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + basist::transcoder_texture_format fmt, + uint32_t decode_flags, uint32_t output_row_pitch_in_blocks_or_pixels, uint32_t output_rows_in_pixels, int channel0, int channel1, + ktx2_transcoder_state* pState) + { + if (!m_pData) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: Must call init() first\n"); + return false; + } + + if (!pState) + pState = &m_def_transcoder_state; + + if (level_index >= m_levels.size()) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: level_index >= m_levels.size()\n"); + return false; + } + + if (m_header.m_face_count > 1) + { + if (face_index >= 6) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: face_index >= 6\n"); + return false; + } + } + else if (face_index != 0) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: face_index != 0\n"); + return false; + } + + if (layer_index >= basisu::maximum(m_header.m_layer_count, 1)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: layer_index >= maximum(m_header.m_layer_count, 1)\n"); + return false; + } + + const uint8_t* pComp_level_data = m_pData + m_levels[level_index].m_byte_offset.get_uint64(); + uint64_t comp_level_data_size = m_levels[level_index].m_byte_length.get_uint64(); + + const uint8_t* pUncomp_level_data = pComp_level_data; + uint64_t uncomp_level_data_size = comp_level_data_size; + + if (uncomp_level_data_size > UINT32_MAX) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: uncomp_level_data_size > UINT32_MAX\n"); + return false; + } + + if (m_header.m_supercompression_scheme == KTX2_SS_ZSTANDARD) + { + // Check if we've already decompressed this level's supercompressed data. + if ((int)level_index != pState->m_uncomp_data_level_index) + { + // Uncompress the entire level's supercompressed data. + if (!decompress_level_data(level_index, pState->m_level_uncomp_data)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: decompress_level_data() failed\n"); + return false; + } + pState->m_uncomp_data_level_index = level_index; + } + + pUncomp_level_data = pState->m_level_uncomp_data.data(); + uncomp_level_data_size = pState->m_level_uncomp_data.size(); + } + + const uint32_t level_width = basisu::maximum(m_header.m_pixel_width >> level_index, 1); + const uint32_t level_height = basisu::maximum(m_header.m_pixel_height >> level_index, 1); + const uint32_t num_blocks4_x = (level_width + 3) >> 2; + const uint32_t num_blocks4_y = (level_height + 3) >> 2; + + if (m_format == basist::basis_tex_format::cETC1S) + { + // Ensure start_transcoding() was called. + if (m_etc1s_transcoder.get_endpoints().empty()) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: must call start_transcoding() first\n"); + return false; + } + + const uint32_t etc1s_image_index = + (level_index * basisu::maximum(m_header.m_layer_count, 1) * m_header.m_face_count) + + layer_index * m_header.m_face_count + + face_index; + + // Sanity check + if (etc1s_image_index >= m_etc1s_image_descs.size()) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: etc1s_image_index >= m_etc1s_image_descs.size()\n"); + assert(0); + return false; + } + + const ktx2_etc1s_image_desc& image_desc = m_etc1s_image_descs[etc1s_image_index]; + + if (!m_etc1s_transcoder.transcode_image(fmt, + pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, m_pData, m_data_size, + num_blocks4_x, num_blocks4_y, level_width, level_height, + level_index, + m_levels[level_index].m_byte_offset.get_uint64() + image_desc.m_rgb_slice_byte_offset, image_desc.m_rgb_slice_byte_length, + image_desc.m_alpha_slice_byte_length ? (m_levels[level_index].m_byte_offset.get_uint64() + image_desc.m_alpha_slice_byte_offset) : 0, image_desc.m_alpha_slice_byte_length, + decode_flags, m_has_alpha, + m_is_video, output_row_pitch_in_blocks_or_pixels, &pState->m_transcoder_state, output_rows_in_pixels)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: ETC1S transcode_image() failed, this is either a bug or the file is corrupted/invalid\n"); + return false; + } + } + else if (m_format == basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE) + { + if (!m_astc_6x6_intermediate_image_descs.size()) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: must call start_transcoding() first\n"); + return false; + } + + const uint32_t num_blocks6_x = (level_width + 5) / 6; + const uint32_t num_blocks6_y = (level_height + 5) / 6; + + const uint32_t image_index = + (level_index * basisu::maximum(m_header.m_layer_count, 1) * m_header.m_face_count) + + layer_index * m_header.m_face_count + + face_index; + + // Sanity check + if (image_index >= m_astc_6x6_intermediate_image_descs.size()) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: Invalid image_index\n"); + assert(0); + return false; + } + + const ktx2_astc_hdr_6x6_intermediate_image_desc& image_desc = m_astc_6x6_intermediate_image_descs[image_index]; + + if (!m_astc_hdr_6x6_intermediate_transcoder.transcode_image(fmt, + pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, + m_pData, m_data_size, num_blocks6_x, num_blocks6_y, level_width, level_height, level_index, + m_levels[level_index].m_byte_offset.get_uint64() + image_desc.m_rgb_slice_byte_offset, image_desc.m_rgb_slice_byte_length, + decode_flags, m_has_alpha, m_is_video, output_row_pitch_in_blocks_or_pixels, nullptr, output_rows_in_pixels, channel0, channel1)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: ASTC 6x6 HDR transcode_image() failed, this is either a bug or the file is corrupted/invalid\n"); + return false; + } + } + else if (m_format == basist::basis_tex_format::cASTC_HDR_6x6) + { + const uint32_t num_blocks6_x = (level_width + 5) / 6; + const uint32_t num_blocks6_y = (level_height + 5) / 6; + + // Compute length and offset to uncompressed 2D UASTC texture data, given the face/layer indices. + assert(uncomp_level_data_size == m_levels[level_index].m_uncompressed_byte_length.get_uint64()); + const uint32_t total_2D_image_size = num_blocks6_x * num_blocks6_y * sizeof(astc_helpers::astc_block); + + const uint32_t uncomp_ofs = (layer_index * m_header.m_face_count + face_index) * total_2D_image_size; + + // Sanity checks + if (uncomp_ofs >= uncomp_level_data_size) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: uncomp_ofs >= total_2D_image_size\n"); + return false; + } + + if ((uncomp_level_data_size - uncomp_ofs) < total_2D_image_size) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: (uncomp_level_data_size - uncomp_ofs) < total_2D_image_size\n"); + return false; + } + + if (!m_astc_hdr_6x6_transcoder.transcode_image(fmt, + pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, + (const uint8_t*)pUncomp_level_data + uncomp_ofs, (uint32_t)total_2D_image_size, num_blocks6_x, num_blocks6_y, level_width, level_height, level_index, + 0, (uint32_t)total_2D_image_size, + decode_flags, m_has_alpha, m_is_video, output_row_pitch_in_blocks_or_pixels, nullptr, output_rows_in_pixels, channel0, channel1)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: ASTC 6x6 HDR transcode_image() failed, this is either a bug or the file is corrupted/invalid\n"); + return false; + } + } + else if ((m_format == basist::basis_tex_format::cUASTC4x4) || + (m_format == basist::basis_tex_format::cUASTC_HDR_4x4)) + { + // Compute length and offset to uncompressed 2D UASTC texture data, given the face/layer indices. + assert(uncomp_level_data_size == m_levels[level_index].m_uncompressed_byte_length.get_uint64()); + const uint32_t total_2D_image_size = num_blocks4_x * num_blocks4_y * KTX2_UASTC_BLOCK_SIZE; + + const uint32_t uncomp_ofs = (layer_index * m_header.m_face_count + face_index) * total_2D_image_size; + + // Sanity checks + if (uncomp_ofs >= uncomp_level_data_size) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: uncomp_ofs >= total_2D_image_size\n"); + return false; + } + + if ((uncomp_level_data_size - uncomp_ofs) < total_2D_image_size) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: (uncomp_level_data_size - uncomp_ofs) < total_2D_image_size\n"); + return false; + } + + if (m_format == basist::basis_tex_format::cUASTC_HDR_4x4) + { + if (!m_uastc_hdr_transcoder.transcode_image(fmt, + pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, + (const uint8_t*)pUncomp_level_data + uncomp_ofs, (uint32_t)total_2D_image_size, num_blocks4_x, num_blocks4_y, level_width, level_height, level_index, + 0, (uint32_t)total_2D_image_size, + decode_flags, m_has_alpha, m_is_video, output_row_pitch_in_blocks_or_pixels, nullptr, output_rows_in_pixels, channel0, channel1)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: UASTC HDR transcode_image() failed, this is either a bug or the file is corrupted/invalid\n"); + return false; + } + } + else + { + if (!m_uastc_transcoder.transcode_image(fmt, + pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, + (const uint8_t*)pUncomp_level_data + uncomp_ofs, (uint32_t)total_2D_image_size, num_blocks4_x, num_blocks4_y, level_width, level_height, level_index, + 0, (uint32_t)total_2D_image_size, + decode_flags, m_has_alpha, m_is_video, output_row_pitch_in_blocks_or_pixels, nullptr, output_rows_in_pixels, channel0, channel1)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: UASTC transcode_image() failed, this is either a bug or the file is corrupted/invalid\n"); + return false; + } + } + } + else + { + // Shouldn't get here. + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: Internal error\n"); + assert(0); + return false; + } + + return true; + } + + bool ktx2_transcoder::decompress_level_data(uint32_t level_index, basisu::uint8_vec& uncomp_data) + { + const uint8_t* pComp_data = m_levels[level_index].m_byte_offset.get_uint64() + m_pData; + const uint64_t comp_size = m_levels[level_index].m_byte_length.get_uint64(); + + const uint64_t uncomp_size = m_levels[level_index].m_uncompressed_byte_length.get_uint64(); + + if (((size_t)comp_size) != comp_size) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Compressed data too large\n"); + return false; + } + if (((size_t)uncomp_size) != uncomp_size) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Uncompressed data too large\n"); + return false; + } + + if (!uncomp_data.try_resize((size_t)uncomp_size)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Out of memory\n"); + return false; + } + + if (m_header.m_supercompression_scheme == KTX2_SS_ZSTANDARD) + { +#if BASISD_SUPPORT_KTX2_ZSTD + size_t actualUncompSize = ZSTD_decompress(uncomp_data.data(), (size_t)uncomp_size, pComp_data, (size_t)comp_size); + if (ZSTD_isError(actualUncompSize)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Zstd decompression failed, file is invalid or corrupted\n"); + return false; + } + if (actualUncompSize != uncomp_size) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Zstd decompression returned too few bytes, file is invalid or corrupted\n"); + return false; + } +#else + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: File uses Zstd supercompression, but Zstd support was not enabled at compile time (BASISD_SUPPORT_KTX2_ZSTD is 0)\n"); + return false; +#endif + } + + return true; + } + + bool ktx2_transcoder::read_astc_6x6_hdr_intermediate_global_data() + { + const uint32_t image_count = basisu::maximum(m_header.m_layer_count, 1) * m_header.m_face_count * m_header.m_level_count; + assert(image_count); + + const uint8_t* pSrc = m_pData + m_header.m_sgd_byte_offset.get_uint64(); + + if (m_header.m_sgd_byte_length.get_uint64() != image_count * sizeof(ktx2_astc_hdr_6x6_intermediate_image_desc)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_astc_6x6_hdr_intermediate_global_data: Invalid global data length\n"); + return false; + } + + m_astc_6x6_intermediate_image_descs.resize(image_count); + + memcpy((void *)m_astc_6x6_intermediate_image_descs.data(), pSrc, sizeof(ktx2_astc_hdr_6x6_intermediate_image_desc) * image_count); + + // Sanity check the image descs + for (uint32_t i = 0; i < image_count; i++) + { + // transcode_image() will validate the slice offsets/lengths before transcoding. + + if (!m_astc_6x6_intermediate_image_descs[i].m_rgb_slice_byte_length) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_astc_6x6_hdr_intermediate_global_data: image descs sanity check failed (1)\n"); + return false; + } + } + + return true; + } + + bool ktx2_transcoder::decompress_etc1s_global_data() + { + // Note: we don't actually support 3D textures in here yet + //uint32_t layer_pixel_depth = basisu::maximum(m_header.m_pixel_depth, 1); + //for (uint32_t i = 1; i < m_header.m_level_count; i++) + // layer_pixel_depth += basisu::maximum(m_header.m_pixel_depth >> i, 1); + + const uint32_t image_count = basisu::maximum(m_header.m_layer_count, 1) * m_header.m_face_count * m_header.m_level_count; + assert(image_count); + + const uint8_t* pSrc = m_pData + m_header.m_sgd_byte_offset.get_uint64(); + + memcpy((void *)&m_etc1s_header, pSrc, sizeof(ktx2_etc1s_global_data_header)); + pSrc += sizeof(ktx2_etc1s_global_data_header); + + if ((!m_etc1s_header.m_endpoints_byte_length) || (!m_etc1s_header.m_selectors_byte_length) || (!m_etc1s_header.m_tables_byte_length)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: Invalid ETC1S global data\n"); + return false; + } + + if ((!m_etc1s_header.m_endpoint_count) || (!m_etc1s_header.m_selector_count)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: endpoint and/or selector count is 0, file is invalid or corrupted\n"); + return false; + } + + // Sanity check the ETC1S header. + if ((sizeof(ktx2_etc1s_global_data_header) + + sizeof(ktx2_etc1s_image_desc) * image_count + + m_etc1s_header.m_endpoints_byte_length + + m_etc1s_header.m_selectors_byte_length + + m_etc1s_header.m_tables_byte_length + + m_etc1s_header.m_extended_byte_length) > m_header.m_sgd_byte_length.get_uint64()) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: SGD byte length is too small, file is invalid or corrupted\n"); + return false; + } + + if (!m_etc1s_image_descs.try_resize(image_count)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: Out of memory\n"); + return false; + } + + memcpy((void *)m_etc1s_image_descs.data(), pSrc, sizeof(ktx2_etc1s_image_desc) * image_count); + pSrc += sizeof(ktx2_etc1s_image_desc) * image_count; + + // Sanity check the ETC1S image descs + for (uint32_t i = 0; i < image_count; i++) + { + // m_etc1s_transcoder.transcode_image() will validate the slice offsets/lengths before transcoding. + + if (!m_etc1s_image_descs[i].m_rgb_slice_byte_length) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: ETC1S image descs sanity check failed (1)\n"); + return false; + } + + if (m_has_alpha) + { + if (!m_etc1s_image_descs[i].m_alpha_slice_byte_length) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: ETC1S image descs sanity check failed (2)\n"); + return false; + } + } + } + + const uint8_t* pEndpoint_data = pSrc; + const uint8_t* pSelector_data = pSrc + m_etc1s_header.m_endpoints_byte_length; + const uint8_t* pTables_data = pSrc + m_etc1s_header.m_endpoints_byte_length + m_etc1s_header.m_selectors_byte_length; + + if (!m_etc1s_transcoder.decode_tables(pTables_data, m_etc1s_header.m_tables_byte_length)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: decode_tables() failed, file is invalid or corrupted\n"); + return false; + } + + if (!m_etc1s_transcoder.decode_palettes( + m_etc1s_header.m_endpoint_count, pEndpoint_data, m_etc1s_header.m_endpoints_byte_length, + m_etc1s_header.m_selector_count, pSelector_data, m_etc1s_header.m_selectors_byte_length)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: decode_palettes() failed, file is likely corrupted\n"); + return false; + } + + return true; + } + + bool ktx2_transcoder::read_key_values() + { + if (!m_header.m_kvd_byte_length) + { + if (m_header.m_kvd_byte_offset) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Invalid KVD byte offset (it should be zero when the length is zero)\n"); + return false; + } + + return true; + } + + if (m_header.m_kvd_byte_offset < sizeof(ktx2_header)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Invalid KVD byte offset\n"); + return false; + } + + if ((m_header.m_kvd_byte_offset + m_header.m_kvd_byte_length) > m_data_size) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Invalid KVD byte offset and/or length\n"); + return false; + } + + const uint8_t* pSrc = m_pData + m_header.m_kvd_byte_offset; + uint32_t src_left = m_header.m_kvd_byte_length; + + if (!m_key_values.try_reserve(8)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n"); + return false; + } + + while (src_left > sizeof(uint32_t)) + { + uint32_t l = basisu::read_le_dword(pSrc); + + pSrc += sizeof(uint32_t); + src_left -= sizeof(uint32_t); + + if (l < 2) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Failed reading key value fields (0)\n"); + return false; + } + + if (src_left < l) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Failed reading key value fields (1)\n"); + return false; + } + + if (!m_key_values.try_resize(m_key_values.size() + 1)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n"); + return false; + } + + basisu::uint8_vec& key_data = m_key_values.back().m_key; + basisu::uint8_vec& value_data = m_key_values.back().m_value; + + do + { + if (!l) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Failed reading key value fields (2)\n"); + return false; + } + + if (!key_data.try_push_back(*pSrc++)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n"); + return false; + } + + src_left--; + l--; + + } while (key_data.back()); + + // Ensure key and value are definitely 0 terminated + if (!key_data.try_push_back('\0')) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n"); + return false; + } + + if (!value_data.try_resize(l)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n"); + return false; + } + + if (l) + { + memcpy(value_data.data(), pSrc, l); + pSrc += l; + src_left -= l; + } + + // Ensure key and value are definitely 0 terminated + if (!value_data.try_push_back('\0')) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n"); + return false; + } + + uint32_t ofs = (uint32_t)(pSrc - m_pData) & 3; + uint32_t alignment_bytes = (4 - ofs) & 3; + + if (src_left < alignment_bytes) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Failed reading key value fields (3)\n"); + return false; + } + + pSrc += alignment_bytes; + src_left -= alignment_bytes; + } + + return true; + } + +#endif // BASISD_SUPPORT_KTX2 + + bool basisu_transcoder_supports_ktx2() + { +#if BASISD_SUPPORT_KTX2 + return true; +#else + return false; +#endif + } + + bool basisu_transcoder_supports_ktx2_zstd() + { +#if BASISD_SUPPORT_KTX2_ZSTD + return true; +#else + return false; +#endif + } + + //------------------------------- + +#if BASISD_SUPPORT_UASTC_HDR + // This float->half conversion matches how "F32TO16" works on Intel GPU's. + basist::half_float float_to_half(float val) + { + union { float f; int32_t i; uint32_t u; } fi = { val }; + const int flt_m = fi.i & 0x7FFFFF, flt_e = (fi.i >> 23) & 0xFF, flt_s = (fi.i >> 31) & 0x1; + int s = flt_s, e = 0, m = 0; + + // inf/NaN + if (flt_e == 0xff) + { + e = 31; + if (flt_m != 0) // NaN + m = 1; + } + // not zero or denormal + else if (flt_e != 0) + { + int new_exp = flt_e - 127; + if (new_exp > 15) + e = 31; + else if (new_exp < -14) + m = lrintf((1 << 24) * fabsf(fi.f)); + else + { + e = new_exp + 15; + m = lrintf(flt_m * (1.0f / ((float)(1 << 13)))); + } + } + + assert((0 <= m) && (m <= 1024)); + if (m == 1024) + { + e++; + m = 0; + } + + assert((s >= 0) && (s <= 1)); + assert((e >= 0) && (e <= 31)); + assert((m >= 0) && (m <= 1023)); + + basist::half_float result = (basist::half_float)((s << 15) | (e << 10) | m); + return result; + } + + //------------------------------------------------------------------------------------------------ + // HDR support + // + // Originally from bc6h_enc.cpp + // BC6H decoder fuzzed vs. DirectXTex's for unsigned/signed + + const uint8_t g_bc6h_mode_sig_bits[NUM_BC6H_MODES][4] = // base bits, r, g, b + { + // 2 subsets + { 10, 5, 5, 5, }, // 0, mode 1 in MS/D3D docs + { 7, 6, 6, 6, }, // 1 + { 11, 5, 4, 4, }, // 2 + { 11, 4, 5, 4, }, // 3 + { 11, 4, 4, 5, }, // 4 + { 9, 5, 5, 5, }, // 5 + { 8, 6, 5, 5, }, // 6 + { 8, 5, 6, 5, }, // 7 + { 8, 5, 5, 6, }, // 8 + { 6, 6, 6, 6, }, // 9, endpoints not delta encoded, mode 10 in MS/D3D docs + // 1 subset + { 10, 10, 10, 10, }, // 10, endpoints not delta encoded, mode 11 in MS/D3D docs + { 11, 9, 9, 9, }, // 11 + { 12, 8, 8, 8, }, // 12 + { 16, 4, 4, 4, } // 13, also useful for solid blocks + }; + + const int8_t g_bc6h_mode_lookup[32] = { 0, 1, 2, 10, 0, 1, 3, 11, 0, 1, 4, 12, 0, 1, 5, 13, 0, 1, 6, -1, 0, 1, 7, -1, 0, 1, 8, -1, 0, 1, 9, -1 }; + + const bc6h_bit_layout g_bc6h_bit_layouts[NUM_BC6H_MODES][MAX_BC6H_LAYOUT_INDEX] = + { + // comp_index, subset*2+lh_index, last_bit, first_bit + //------------------------ mode 0: 2 subsets, Weight bits: 46 bits, Endpoint bits: 75 bits (10.555, 10.555, 10.555), delta + { { 1, 2, 4, -1 }, { 2, 2, 4, -1 }, { 2, 3, 4, -1 }, { 0, 0, 9, 0 }, { 1, 0, 9, 0 }, { 2, 0, 9, 0 }, { 0, 1, 4, 0 }, + { 1, 3, 4, -1 }, { 1, 2, 3, 0 }, { 1, 1, 4, 0 }, { 2, 3, 0, -1 }, { 1, 3, 3, 0 }, { 2, 1, 4, 0 }, { 2, 3, 1, -1 }, + { 2, 2, 3, 0 }, { 0, 2, 4, 0 }, { 2, 3, 2, -1 }, { 0, 3, 4, 0 }, { 2, 3, 3, -1 }, { 3, -1, 4, 0 }, {-1, 0, 0, 0} }, + //------------------------ mode 1: 2 subsets, Weight bits: 46 bits, Endpoint bits: 75 bits (7.666, 7.666, 7.666), delta + { { 1, 2, 5, -1 },{ 1, 3, 4, -1 },{ 1, 3, 5, -1 },{ 0, 0, 6, 0 },{ 2, 3, 0, -1 },{ 2, 3, 1, -1 },{ 2, 2, 4, -1 }, + { 1, 0, 6, 0 },{ 2, 2, 5, -1 },{ 2, 3, 2, -1 },{ 1, 2, 4, -1 },{ 2, 0, 6, 0 },{ 2, 3, 3, -1 },{ 2, 3, 5, -1 }, + { 2, 3, 4, -1 },{ 0, 1, 5, 0 },{ 1, 2, 3, 0 },{ 1, 1, 5, 0 },{ 1, 3, 3, 0 },{ 2, 1, 5, 0 },{ 2, 2, 3, 0 },{ 0, 2, 5, 0 }, + { 0, 3, 5, 0 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} }, + //------------------------ mode 2: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (11.555, 11.444, 11.444), delta + { { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 4, 0 },{ 0, 0, 10, -1 },{ 1, 2, 3, 0 },{ 1, 1, 3, 0 },{ 1, 0, 10, -1 }, + { 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 3, 0 },{ 2, 0, 10, -1 },{ 2, 3, 1, -1 },{ 2, 2, 3, 0 },{ 0, 2, 4, 0 },{ 2, 3, 2, -1 }, + { 0, 3, 4, 0 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} }, + //------------------------ mode 3: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (11.444, 11.555, 11.444), delta + { { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 3, 0 },{ 0, 0, 10, -1 },{ 1, 3, 4, -1 },{ 1, 2, 3, 0 },{ 1, 1, 4, 0 }, + { 1, 0, 10, -1 },{ 1, 3, 3, 0 },{ 2, 1, 3, 0 },{ 2, 0, 10, -1 },{ 2, 3, 1, -1 },{ 2, 2, 3, 0 },{ 0, 2, 3, 0 },{ 2, 3, 0, -1 }, + { 2, 3, 2, -1 },{ 0, 3, 3, 0 },{ 1, 2, 4, -1 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} }, + //------------------------ mode 4: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (11.444, 11.444, 11.555), delta + { { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 3, 0 },{ 0, 0, 10, -1 },{ 2, 2, 4, -1 },{ 1, 2, 3, 0 },{ 1, 1, 3, 0 }, + { 1, 0, 10, -1 },{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 4, 0 },{ 2, 0, 10, -1 },{ 2, 2, 3, 0 },{ 0, 2, 3, 0 },{ 2, 3, 1, -1 }, + { 2, 3, 2, -1 },{ 0, 3, 3, 0 },{ 2, 3, 4, -1 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} }, + //------------------------ mode 5: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (9.555, 9.555, 9.555), delta + { { 0, 0, 8, 0 },{ 2, 2, 4, -1 },{ 1, 0, 8, 0 },{ 1, 2, 4, -1 },{ 2, 0, 8, 0 },{ 2, 3, 4, -1 },{ 0, 1, 4, 0 },{ 1, 3, 4, -1 }, + { 1, 2, 3, 0 },{ 1, 1, 4, 0 },{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 4, 0 },{ 2, 3, 1, -1 },{ 2, 2, 3, 0 },{ 0, 2, 4, 0 }, + { 2, 3, 2, -1 },{ 0, 3, 4, 0 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} }, + //------------------------ mode 6: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (8.666, 8.555, 8.555), delta + { { 0, 0, 7, 0 },{ 1, 3, 4, -1 },{ 2, 2, 4, -1 },{ 1, 0, 7, 0 },{ 2, 3, 2, -1 },{ 1, 2, 4, -1 },{ 2, 0, 7, 0 },{ 2, 3, 3, -1 }, + { 2, 3, 4, -1 },{ 0, 1, 5, 0 },{ 1, 2, 3, 0 },{ 1, 1, 4, 0 },{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 4, 0 },{ 2, 3, 1, -1 }, + { 2, 2, 3, 0 },{ 0, 2, 5, 0 },{ 0, 3, 5, 0 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} }, + //------------------------ mode 7: 2 subsets, Weight bits: 46 bits, Endpoints bits: 72 bits (8.555, 8.666, 8.555), delta + { { 0, 0, 7, 0 },{ 2, 3, 0, -1 },{ 2, 2, 4, -1 },{ 1, 0, 7, 0 },{ 1, 2, 5, -1 },{ 1, 2, 4, -1 },{ 2, 0, 7, 0 },{ 1, 3, 5, -1 }, + { 2, 3, 4, -1 },{ 0, 1, 4, 0 },{ 1, 3, 4, -1 },{ 1, 2, 3, 0 },{ 1, 1, 5, 0 },{ 1, 3, 3, 0 },{ 2, 1, 4, 0 },{ 2, 3, 1, -1 }, + { 2, 2, 3, 0 },{ 0, 2, 4, 0 },{ 2, 3, 2, -1 },{ 0, 3, 4, 0 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} }, + //------------------------ mode 8: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (8.555, 8.555, 8.666), delta + { { 0, 0, 7, 0 },{ 2, 3, 1, -1 },{ 2, 2, 4, -1 },{ 1, 0, 7, 0 },{ 2, 2, 5, -1 },{ 1, 2, 4, -1 },{ 2, 0, 7, 0 },{ 2, 3, 5, -1 }, + { 2, 3, 4, -1 },{ 0, 1, 4, 0 },{ 1, 3, 4, -1 },{ 1, 2, 3, 0 },{ 1, 1, 4, 0 },{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 5, 0 }, + { 2, 2, 3, 0 },{ 0, 2, 4, 0 },{ 2, 3, 2, -1 },{ 0, 3, 4, 0 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} }, + //------------------------ mode 9: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (6.6.6.6, 6.6.6.6, 6.6.6.6), NO delta + { { 0, 0, 5, 0 },{ 1, 3, 4, -1 },{ 2, 3, 0, -1 },{ 2, 3, 1, -1 },{ 2, 2, 4, -1 },{ 1, 0, 5, 0 },{ 1, 2, 5, -1 },{ 2, 2, 5, -1 }, + { 2, 3, 2, -1 },{ 1, 2, 4, -1 },{ 2, 0, 5, 0 },{ 1, 3, 5, -1 },{ 2, 3, 3, -1 },{ 2, 3, 5, -1 },{ 2, 3, 4, -1 },{ 0, 1, 5, 0 }, + { 1, 2, 3, 0 },{ 1, 1, 5, 0 },{ 1, 3, 3, 0 },{ 2, 1, 5, 0 },{ 2, 2, 3, 0 },{ 0, 2, 5, 0 },{ 0, 3, 5, 0 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} }, + //------------------------ mode 10: 1 subset, Weight bits: 63 bits, Endpoint bits: 60 bits (10.10, 10.10, 10.10), NO delta + { { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 9, 0 },{ 1, 1, 9, 0 },{ 2, 1, 9, 0 }, {-1, 0, 0, 0} }, + //------------------------ mode 11: 1 subset, Weight bits: 63 bits, Endpoint bits: 60 bits (11.9, 11.9, 11.9), delta + { { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 8, 0 },{ 0, 0, 10, -1 },{ 1, 1, 8, 0 },{ 1, 0, 10, -1 },{ 2, 1, 8, 0 },{ 2, 0, 10, -1 }, {-1, 0, 0, 0} }, + //------------------------ mode 12: 1 subset, Weight bits: 63 bits, Endpoint bits: 60 bits (12.8, 12.8, 12.8), delta + { { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 7, 0 },{ 0, 0, 10, 11 },{ 1, 1, 7, 0 },{ 1, 0, 10, 11 },{ 2, 1, 7, 0 },{ 2, 0, 10, 11 }, {-1, 0, 0, 0} }, + //------------------------ mode 13: 1 subset, Weight bits: 63 bits, Endpoint bits: 60 bits (16.4, 16.4, 16.4), delta + { { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 3, 0 },{ 0, 0, 10, 15 },{ 1, 1, 3, 0 },{ 1, 0, 10, 15 },{ 2, 1, 3, 0 },{ 2, 0, 10, 15 }, {-1, 0, 0, 0} } + }; + + // The same as the first 32 2-subset patterns in BC7. + // Bit 7 is a flag indicating that the weight uses 1 less bit than usual. + const uint8_t g_bc6h_2subset_patterns[TOTAL_BC6H_PARTITION_PATTERNS][4][4] = // [pat][y][x] + { + { {0x80, 0, 1, 1}, { 0, 0, 1, 1 }, { 0, 0, 1, 1 }, { 0, 0, 1, 0x81 }}, { {0x80, 0, 0, 1}, {0, 0, 0, 1}, {0, 0, 0, 1}, {0, 0, 0, 0x81} }, + { {0x80, 1, 1, 1}, {0, 1, 1, 1}, {0, 1, 1, 1}, {0, 1, 1, 0x81} }, { {0x80, 0, 0, 1}, {0, 0, 1, 1}, {0, 0, 1, 1}, {0, 1, 1, 0x81} }, + { {0x80, 0, 0, 0}, {0, 0, 0, 1}, {0, 0, 0, 1}, {0, 0, 1, 0x81} }, { {0x80, 0, 1, 1}, {0, 1, 1, 1}, {0, 1, 1, 1}, {1, 1, 1, 0x81} }, + { {0x80, 0, 0, 1}, {0, 0, 1, 1}, {0, 1, 1, 1}, {1, 1, 1, 0x81} }, { {0x80, 0, 0, 0}, {0, 0, 0, 1}, {0, 0, 1, 1}, {0, 1, 1, 0x81} }, + { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 1}, {0, 0, 1, 0x81} }, { {0x80, 0, 1, 1}, {0, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 0x81} }, + { {0x80, 0, 0, 0}, {0, 0, 0, 1}, {0, 1, 1, 1}, {1, 1, 1, 0x81} }, { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 1}, {0, 1, 1, 0x81} }, + { {0x80, 0, 0, 1}, {0, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 0x81} }, { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {1, 1, 1, 1}, {1, 1, 1, 0x81} }, + { {0x80, 0, 0, 0}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 0x81} }, { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}, {1, 1, 1, 0x81} }, + { {0x80, 0, 0, 0}, {1, 0, 0, 0}, {1, 1, 1, 0}, {1, 1, 1, 0x81} }, { {0x80, 1, 0x81, 1}, {0, 0, 0, 1}, {0, 0, 0, 0}, {0, 0, 0, 0} }, + { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0x81, 0, 0, 0}, {1, 1, 1, 0} }, { {0x80, 1, 0x81, 1}, {0, 0, 1, 1}, {0, 0, 0, 1}, {0, 0, 0, 0} }, + { {0x80, 0, 0x81, 1}, {0, 0, 0, 1}, {0, 0, 0, 0}, {0, 0, 0, 0} }, { {0x80, 0, 0, 0}, {1, 0, 0, 0}, {0x81, 1, 0, 0}, {1, 1, 1, 0} }, + { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0x81, 0, 0, 0}, {1, 1, 0, 0} }, { {0x80, 1, 1, 1}, {0, 0, 1, 1}, { 0, 0, 1, 1}, {0, 0, 0, 0x81} }, + { {0x80, 0, 0x81, 1}, {0, 0, 0, 1}, {0, 0, 0, 1}, {0, 0, 0, 0} }, { {0x80, 0, 0, 0}, {1, 0, 0, 0}, {0x81, 0, 0, 0}, {1, 1, 0, 0} }, + { {0x80, 1, 0x81, 0}, {0, 1, 1, 0}, {0, 1, 1, 0}, {0, 1, 1, 0} }, { {0x80, 0, 0x81, 1}, {0, 1, 1, 0}, {0, 1, 1, 0}, {1, 1, 0, 0} }, + { {0x80, 0, 0, 1}, {0, 1, 1, 1}, {0x81, 1, 1, 0}, {1, 0, 0, 0} }, { {0x80, 0, 0, 0}, {1, 1, 1, 1}, {0x81, 1, 1, 1}, {0, 0, 0, 0} }, + { {0x80, 1, 0x81, 1}, {0, 0, 0, 1}, {1, 0, 0, 0}, {1, 1, 1, 0} }, { {0x80, 0, 0x81, 1}, {1, 0, 0, 1}, {1, 0, 0, 1}, {1, 1, 0, 0} } + }; + + const uint8_t g_bc6h_weight3[8] = { 0, 9, 18, 27, 37, 46, 55, 64 }; + const uint8_t g_bc6h_weight4[16] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 }; + + static inline void write_bits(uint64_t val, uint32_t num_bits, uint32_t& bit_pos, uint64_t& l, uint64_t& h) + { + assert((num_bits) && (num_bits < 64) && (bit_pos < 128)); + assert(val < (1ULL << num_bits)); + + if (bit_pos < 64) + { + l |= (val << bit_pos); + + if ((bit_pos + num_bits) > 64) + h |= (val >> (64 - bit_pos)); + } + else + { + h |= (val << (bit_pos - 64)); + } + + bit_pos += num_bits; + assert(bit_pos <= 128); + } + + static inline void write_rev_bits(uint64_t val, uint32_t num_bits, uint32_t& bit_pos, uint64_t& l, uint64_t& h) + { + assert((num_bits) && (num_bits < 64) && (bit_pos < 128)); + assert(val < (1ULL << num_bits)); + + for (uint32_t i = 0; i < num_bits; i++) + write_bits((val >> (num_bits - 1u - i)) & 1, 1, bit_pos, l, h); + } + + void pack_bc6h_block(bc6h_block& dst_blk, bc6h_logical_block& log_blk) + { + const uint8_t s_mode_bits[NUM_BC6H_MODES] = { 0b00, 0b01, 0b00010, 0b00110, 0b01010, 0b01110, 0b10010, 0b10110, 0b11010, 0b11110, 0b00011, 0b00111, 0b01011, 0b01111 }; + + const uint32_t mode = log_blk.m_mode; + assert(mode < NUM_BC6H_MODES); + + uint64_t l = s_mode_bits[mode], h = 0; + uint32_t bit_pos = (mode >= 2) ? 5 : 2; + + const uint32_t num_subsets = (mode >= BC6H_FIRST_1SUBSET_MODE_INDEX) ? 1 : 2; + + assert(((num_subsets == 2) && (log_blk.m_partition_pattern < TOTAL_BC6H_PARTITION_PATTERNS)) || + ((num_subsets == 1) && (!log_blk.m_partition_pattern))); + + // Sanity checks + for (uint32_t c = 0; c < 3; c++) + { + assert(log_blk.m_endpoints[c][0] < (1u << g_bc6h_mode_sig_bits[mode][0])); // 1st subset l, base bits + assert(log_blk.m_endpoints[c][1] < (1u << g_bc6h_mode_sig_bits[mode][c + 1])); // 1st subset h, these are deltas except for modes 9,10 + assert(log_blk.m_endpoints[c][2] < (1u << g_bc6h_mode_sig_bits[mode][c + 1])); // 2nd subset l + assert(log_blk.m_endpoints[c][3] < (1u << g_bc6h_mode_sig_bits[mode][c + 1])); // 2nd subset h + } + + const bc6h_bit_layout* pLayout = &g_bc6h_bit_layouts[mode][0]; + + while (pLayout->m_comp != -1) + { + uint32_t v = (pLayout->m_comp == 3) ? log_blk.m_partition_pattern : log_blk.m_endpoints[pLayout->m_comp][pLayout->m_index]; + + if (pLayout->m_first_bit == -1) + { + write_bits((v >> pLayout->m_last_bit) & 1, 1, bit_pos, l, h); + } + else + { + const uint32_t total_bits = basisu::iabs(pLayout->m_last_bit - pLayout->m_first_bit) + 1; + + v >>= basisu::minimum(pLayout->m_first_bit, pLayout->m_last_bit); + v &= ((1 << total_bits) - 1); + + if (pLayout->m_first_bit > pLayout->m_last_bit) + write_rev_bits(v, total_bits, bit_pos, l, h); + else + write_bits(v, total_bits, bit_pos, l, h); + } + + pLayout++; + } + + const uint32_t num_mode_sel_bits = (num_subsets == 1) ? 4 : 3; + const uint8_t* pPat = &g_bc6h_2subset_patterns[log_blk.m_partition_pattern][0][0]; + + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t sel = log_blk.m_weights[i]; + + uint32_t num_bits = num_mode_sel_bits; + if (num_subsets == 2) + { + const uint32_t subset_index = pPat[i]; + num_bits -= (subset_index >> 7); + } + else if (!i) + { + num_bits--; + } + + assert(sel < (1u << num_bits)); + + write_bits(sel, num_bits, bit_pos, l, h); + } + + assert(bit_pos == 128); + + basisu::write_le_dword(&dst_blk.m_bytes[0], (uint32_t)l); + basisu::write_le_dword(&dst_blk.m_bytes[4], (uint32_t)(l >> 32u)); + basisu::write_le_dword(&dst_blk.m_bytes[8], (uint32_t)h); + basisu::write_le_dword(&dst_blk.m_bytes[12], (uint32_t)(h >> 32u)); + } + +#if 0 + static inline uint32_t bc6h_blog_dequantize_to_blog16(uint32_t comp, uint32_t bits_per_comp) + { + int unq; + + if (bits_per_comp >= 15) + unq = comp; + else if (comp == 0) + unq = 0; + else if (comp == ((1u << bits_per_comp) - 1u)) + unq = 0xFFFFu; + else + unq = ((comp << 16u) + 0x8000u) >> bits_per_comp; + + return unq; + } +#endif + + // 6,7,8,9,10,11,12 + const uint32_t BC6H_BLOG_TAB_MIN = 6; + const uint32_t BC6H_BLOG_TAB_MAX = 12; + //const uint32_t BC6H_BLOG_TAB_NUM = BC6H_BLOG_TAB_MAX - BC6H_BLOG_TAB_MIN + 1; + + // Handles 16, or 6-12 bits. Others assert. + static inline uint32_t half_to_blog_tab(half_float h, uint32_t num_bits) + { + assert(h <= MAX_BC6H_HALF_FLOAT_AS_UINT); + assert((num_bits == 16) || ((num_bits >= BC6H_BLOG_TAB_MIN) && (num_bits <= BC6H_BLOG_TAB_MAX))); + + return bc6h_half_to_blog(h, num_bits); +#if 0 + BASISU_NOTE_UNUSED(BC6H_BLOG_TAB_MIN); + BASISU_NOTE_UNUSED(BC6H_BLOG_TAB_MAX); + + if (num_bits == 16) + { + return bc6h_half_to_blog(h, 16); + } + else + { + assert((num_bits >= BC6H_BLOG_TAB_MIN) && (num_bits <= BC6H_BLOG_TAB_MAX)); + + // Note: This used to be done using a table lookup, but it required ~224KB of tables. This isn't quite as accurate, but the error is very slight (+-1 half values as ints). + return bc6h_half_to_blog(h, num_bits); + } +#endif + } + + bool g_bc6h_enc_initialized; + + void bc6h_enc_init() + { + if (g_bc6h_enc_initialized) + return; + + g_bc6h_enc_initialized = true; + } + + // mode 10, 4-bit weights + void bc6h_enc_block_mode10(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights) + { + assert(g_bc6h_enc_initialized); + + for (uint32_t i = 0; i < 16; i++) + { + assert(pWeights[i] <= 15); + } + + bc6h_logical_block log_blk; + log_blk.clear(); + + // Convert half endpoints to blog10 (mode 10 doesn't use delta encoding) + for (uint32_t c = 0; c < 3; c++) + { + log_blk.m_endpoints[c][0] = half_to_blog_tab(pEndpoints[c][0], 10); + log_blk.m_endpoints[c][1] = half_to_blog_tab(pEndpoints[c][1], 10); + } + + memcpy(log_blk.m_weights, pWeights, 16); + + if (log_blk.m_weights[0] & 8) + { + for (uint32_t i = 0; i < 16; i++) + log_blk.m_weights[i] = 15 - log_blk.m_weights[i]; + + for (uint32_t c = 0; c < 3; c++) + { + std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][1]); + } + } + + log_blk.m_mode = BC6H_FIRST_1SUBSET_MODE_INDEX; + pack_bc6h_block(*pPacked_block, log_blk); + } + + // Tries modes 11-13 (delta endpoint) encoding, falling back to mode 10 only when necessary, 4-bit weights + void bc6h_enc_block_1subset_4bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights) + { + assert(g_bc6h_enc_initialized); + + for (uint32_t i = 0; i < 16; i++) + { + assert(pWeights[i] <= 15); + } + + bc6h_logical_block log_blk; + log_blk.clear(); + + for (uint32_t mode = BC6H_LAST_MODE_INDEX; mode > BC6H_FIRST_1SUBSET_MODE_INDEX; mode--) + { + const uint32_t num_base_bits = g_bc6h_mode_sig_bits[mode][0], num_delta_bits = g_bc6h_mode_sig_bits[mode][1]; + const int base_bitmask = (1 << num_base_bits) - 1; + const int delta_bitmask = (1 << num_delta_bits) - 1; + BASISU_NOTE_UNUSED(base_bitmask); + + assert(num_delta_bits < num_base_bits); + assert((num_delta_bits == g_bc6h_mode_sig_bits[mode][2]) && (num_delta_bits == g_bc6h_mode_sig_bits[mode][3])); + + uint32_t blog_endpoints[3][2]; + + // Convert half endpoints to blog 16, 12, or 11 + for (uint32_t c = 0; c < 3; c++) + { + blog_endpoints[c][0] = half_to_blog_tab(pEndpoints[c][0], num_base_bits); + assert((int)blog_endpoints[c][0] <= base_bitmask); + + blog_endpoints[c][1] = half_to_blog_tab(pEndpoints[c][1], num_base_bits); + assert((int)blog_endpoints[c][1] <= base_bitmask); + } + + // Copy weights + memcpy(log_blk.m_weights, pWeights, 16); + + // Ensure first weight MSB is 0 + if (log_blk.m_weights[0] & 8) + { + // Invert weights + for (uint32_t i = 0; i < 16; i++) + log_blk.m_weights[i] = 15 - log_blk.m_weights[i]; + + // Swap blog quantized endpoints + for (uint32_t c = 0; c < 3; c++) + { + std::swap(blog_endpoints[c][0], blog_endpoints[c][1]); + } + } + + const int max_delta = (1 << (num_delta_bits - 1)) - 1; + const int min_delta = -(max_delta + 1); + assert((max_delta - min_delta) == delta_bitmask); + + bool failed_flag = false; + for (uint32_t c = 0; c < 3; c++) + { + log_blk.m_endpoints[c][0] = blog_endpoints[c][0]; + + int delta = (int)blog_endpoints[c][1] - (int)blog_endpoints[c][0]; + if ((delta < min_delta) || (delta > max_delta)) + { + failed_flag = true; + break; + } + + log_blk.m_endpoints[c][1] = delta & delta_bitmask; + } + + if (failed_flag) + continue; + + log_blk.m_mode = mode; + pack_bc6h_block(*pPacked_block, log_blk); + + return; + } + + // Worst case fall back to mode 10, which can handle any endpoints + bc6h_enc_block_mode10(pPacked_block, pEndpoints, pWeights); + } + + // Mode 9 (direct endpoint encoding), 3-bit weights, but only 1 subset + void bc6h_enc_block_1subset_mode9_3bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights) + { + assert(g_bc6h_enc_initialized); + + for (uint32_t i = 0; i < 16; i++) + { + assert(pWeights[i] <= 7); + } + + bc6h_logical_block log_blk; + log_blk.clear(); + + // Convert half endpoints to blog6 (mode 9 doesn't use delta encoding) + for (uint32_t c = 0; c < 3; c++) + { + log_blk.m_endpoints[c][0] = half_to_blog_tab(pEndpoints[c][0], 6); + log_blk.m_endpoints[c][2] = log_blk.m_endpoints[c][0]; + + log_blk.m_endpoints[c][1] = half_to_blog_tab(pEndpoints[c][1], 6); + log_blk.m_endpoints[c][3] = log_blk.m_endpoints[c][1]; + } + + memcpy(log_blk.m_weights, pWeights, 16); + + const uint32_t pat_index = 0; + const uint8_t* pPat = &g_bc6h_2subset_patterns[pat_index][0][0]; + + if (log_blk.m_weights[0] & 4) + { + for (uint32_t c = 0; c < 3; c++) + std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][1]); + + for (uint32_t i = 0; i < 16; i++) + if ((pPat[i] & 0x7F) == 0) + log_blk.m_weights[i] = 7 - log_blk.m_weights[i]; + } + + if (log_blk.m_weights[15] & 4) + { + for (uint32_t c = 0; c < 3; c++) + std::swap(log_blk.m_endpoints[c][2], log_blk.m_endpoints[c][3]); + + for (uint32_t i = 0; i < 16; i++) + if ((pPat[i] & 0x7F) == 1) + log_blk.m_weights[i] = 7 - log_blk.m_weights[i]; + } + + log_blk.m_mode = 9; + log_blk.m_partition_pattern = pat_index; + pack_bc6h_block(*pPacked_block, log_blk); + } + + // Tries modes 0-8, falls back to mode 9 + void bc6h_enc_block_1subset_3bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights) + { + assert(g_bc6h_enc_initialized); + + for (uint32_t i = 0; i < 16; i++) + { + assert(pWeights[i] <= 7); + } + + bc6h_logical_block log_blk; + log_blk.clear(); + + for (uint32_t mode_iter = 0; mode_iter <= 8; mode_iter++) + { + static const int s_mode_order[9] = { 2, 3, 4, 0, 5, 6, 7, 8, 1 }; // ordered from largest base bits to least + const uint32_t mode = s_mode_order[mode_iter]; + + const uint32_t num_base_bits = g_bc6h_mode_sig_bits[mode][0]; + const int base_bitmask = (1 << num_base_bits) - 1; + BASISU_NOTE_UNUSED(base_bitmask); + + const uint32_t num_delta_bits[3] = { g_bc6h_mode_sig_bits[mode][1], g_bc6h_mode_sig_bits[mode][2], g_bc6h_mode_sig_bits[mode][3] }; + const int delta_bitmasks[3] = { (1 << num_delta_bits[0]) - 1, (1 << num_delta_bits[1]) - 1, (1 << num_delta_bits[2]) - 1 }; + + uint32_t blog_endpoints[3][4]; + + // Convert half endpoints to blog 7-11 + for (uint32_t c = 0; c < 3; c++) + { + blog_endpoints[c][0] = half_to_blog_tab(pEndpoints[c][0], num_base_bits); + blog_endpoints[c][2] = blog_endpoints[c][0]; + assert((int)blog_endpoints[c][0] <= base_bitmask); + + blog_endpoints[c][1] = half_to_blog_tab(pEndpoints[c][1], num_base_bits); + blog_endpoints[c][3] = blog_endpoints[c][1]; + assert((int)blog_endpoints[c][1] <= base_bitmask); + } + + const uint32_t pat_index = 0; + const uint8_t* pPat = &g_bc6h_2subset_patterns[pat_index][0][0]; + + memcpy(log_blk.m_weights, pWeights, 16); + + if (log_blk.m_weights[0] & 4) + { + // Swap part 0's endpoints/weights + for (uint32_t c = 0; c < 3; c++) + std::swap(blog_endpoints[c][0], blog_endpoints[c][1]); + + for (uint32_t i = 0; i < 16; i++) + if ((pPat[i] & 0x7F) == 0) + log_blk.m_weights[i] = 7 - log_blk.m_weights[i]; + } + + if (log_blk.m_weights[15] & 4) + { + // Swap part 1's endpoints/weights + for (uint32_t c = 0; c < 3; c++) + std::swap(blog_endpoints[c][2], blog_endpoints[c][3]); + + for (uint32_t i = 0; i < 16; i++) + if ((pPat[i] & 0x7F) == 1) + log_blk.m_weights[i] = 7 - log_blk.m_weights[i]; + } + + bool failed_flag = false; + + for (uint32_t c = 0; c < 3; c++) + { + const int max_delta = (1 << (num_delta_bits[c] - 1)) - 1; + + const int min_delta = -(max_delta + 1); + assert((max_delta - min_delta) == delta_bitmasks[c]); + + log_blk.m_endpoints[c][0] = blog_endpoints[c][0]; + + int delta0 = (int)blog_endpoints[c][1] - (int)blog_endpoints[c][0]; + int delta1 = (int)blog_endpoints[c][2] - (int)blog_endpoints[c][0]; + int delta2 = (int)blog_endpoints[c][3] - (int)blog_endpoints[c][0]; + + if ((delta0 < min_delta) || (delta0 > max_delta) || + (delta1 < min_delta) || (delta1 > max_delta) || + (delta2 < min_delta) || (delta2 > max_delta)) + { + failed_flag = true; + break; + } + + log_blk.m_endpoints[c][1] = delta0 & delta_bitmasks[c]; + log_blk.m_endpoints[c][2] = delta1 & delta_bitmasks[c]; + log_blk.m_endpoints[c][3] = delta2 & delta_bitmasks[c]; + } + + if (failed_flag) + continue; + + log_blk.m_mode = mode; + log_blk.m_partition_pattern = pat_index; + pack_bc6h_block(*pPacked_block, log_blk); + + return; + + } // mode_iter + + bc6h_enc_block_1subset_mode9_3bit_weights(pPacked_block, pEndpoints, pWeights); + } + + // pEndpoints[subset][comp][lh_index] + void bc6h_enc_block_2subset_mode9_3bit_weights(bc6h_block* pPacked_block, uint32_t common_part_index, const half_float pEndpoints[2][3][2], const uint8_t* pWeights) + { + assert(g_bc6h_enc_initialized); + assert(common_part_index < basist::TOTAL_ASTC_BC7_COMMON_PARTITIONS2); + + for (uint32_t i = 0; i < 16; i++) + { + assert(pWeights[i] <= 7); + } + + bc6h_logical_block log_blk; + log_blk.clear(); + + // Convert half endpoints to blog6 (mode 9 doesn't use delta encoding) + for (uint32_t s = 0; s < 2; s++) + { + for (uint32_t c = 0; c < 3; c++) + { + log_blk.m_endpoints[c][0 + s * 2] = half_to_blog_tab(pEndpoints[s][c][0], 6); + log_blk.m_endpoints[c][1 + s * 2] = half_to_blog_tab(pEndpoints[s][c][1], 6); + } + } + + memcpy(log_blk.m_weights, pWeights, 16); + + //const uint32_t astc_pattern = basist::g_astc_bc7_common_partitions2[common_part_index].m_astc; + const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[common_part_index].m_bc7; + + const bool invert_flag = basist::g_astc_bc7_common_partitions2[common_part_index].m_invert; + if (invert_flag) + { + for (uint32_t c = 0; c < 3; c++) + { + std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][2]); + std::swap(log_blk.m_endpoints[c][1], log_blk.m_endpoints[c][3]); + } + } + + const uint32_t pat_index = bc7_pattern; + assert(pat_index < 32); + const uint8_t* pPat = &g_bc6h_2subset_patterns[pat_index][0][0]; + + bool swap_flags[2] = { false, false }; + for (uint32_t i = 0; i < 16; i++) + { + if ((pPat[i] & 0x80) == 0) + continue; + + if (log_blk.m_weights[i] & 4) + { + const uint32_t p = pPat[i] & 1; + swap_flags[p] = true; + } + } + + if (swap_flags[0]) + { + for (uint32_t c = 0; c < 3; c++) + std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][1]); + + for (uint32_t i = 0; i < 16; i++) + if ((pPat[i] & 0x7F) == 0) + log_blk.m_weights[i] = 7 - log_blk.m_weights[i]; + } + + if (swap_flags[1]) + { + for (uint32_t c = 0; c < 3; c++) + std::swap(log_blk.m_endpoints[c][2], log_blk.m_endpoints[c][3]); + + for (uint32_t i = 0; i < 16; i++) + if ((pPat[i] & 0x7F) == 1) + log_blk.m_weights[i] = 7 - log_blk.m_weights[i]; + } + + log_blk.m_mode = 9; + log_blk.m_partition_pattern = pat_index; + pack_bc6h_block(*pPacked_block, log_blk); + } + + void bc6h_enc_block_2subset_3bit_weights(bc6h_block* pPacked_block, uint32_t common_part_index, const half_float pEndpoints[2][3][2], const uint8_t* pWeights) + { + assert(g_bc6h_enc_initialized); + + for (uint32_t i = 0; i < 16; i++) + { + assert(pWeights[i] <= 7); + } + + bc6h_logical_block log_blk; + log_blk.clear(); + + for (uint32_t mode_iter = 0; mode_iter <= 8; mode_iter++) + { + static const int s_mode_order[9] = { 2, 3, 4, 0, 5, 6, 7, 8, 1 }; // ordered from largest base bits to least + const uint32_t mode = s_mode_order[mode_iter]; + + const uint32_t num_base_bits = g_bc6h_mode_sig_bits[mode][0]; + const int base_bitmask = (1 << num_base_bits) - 1; + BASISU_NOTE_UNUSED(base_bitmask); + + const uint32_t num_delta_bits[3] = { g_bc6h_mode_sig_bits[mode][1], g_bc6h_mode_sig_bits[mode][2], g_bc6h_mode_sig_bits[mode][3] }; + const int delta_bitmasks[3] = { (1 << num_delta_bits[0]) - 1, (1 << num_delta_bits[1]) - 1, (1 << num_delta_bits[2]) - 1 }; + + uint32_t blog_endpoints[3][4]; + + // Convert half endpoints to blog 7-11 + for (uint32_t s = 0; s < 2; s++) + { + for (uint32_t c = 0; c < 3; c++) + { + blog_endpoints[c][0 + s * 2] = half_to_blog_tab(pEndpoints[s][c][0], num_base_bits); + blog_endpoints[c][1 + s * 2] = half_to_blog_tab(pEndpoints[s][c][1], num_base_bits); + } + } + + memcpy(log_blk.m_weights, pWeights, 16); + + //const uint32_t astc_pattern = basist::g_astc_bc7_common_partitions2[common_part_index].m_astc; + const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[common_part_index].m_bc7; + + const bool invert_flag = basist::g_astc_bc7_common_partitions2[common_part_index].m_invert; + if (invert_flag) + { + for (uint32_t c = 0; c < 3; c++) + { + std::swap(blog_endpoints[c][0], blog_endpoints[c][2]); + std::swap(blog_endpoints[c][1], blog_endpoints[c][3]); + } + } + + const uint32_t pat_index = bc7_pattern; + assert(pat_index < 32); + const uint8_t* pPat = &g_bc6h_2subset_patterns[pat_index][0][0]; + + bool swap_flags[2] = { false, false }; + for (uint32_t i = 0; i < 16; i++) + { + if ((pPat[i] & 0x80) == 0) + continue; + + if (log_blk.m_weights[i] & 4) + { + const uint32_t p = pPat[i] & 1; + swap_flags[p] = true; + } + } + + if (swap_flags[0]) + { + for (uint32_t c = 0; c < 3; c++) + std::swap(blog_endpoints[c][0], blog_endpoints[c][1]); + + for (uint32_t i = 0; i < 16; i++) + if ((pPat[i] & 0x7F) == 0) + log_blk.m_weights[i] = 7 - log_blk.m_weights[i]; + } + + if (swap_flags[1]) + { + for (uint32_t c = 0; c < 3; c++) + std::swap(blog_endpoints[c][2], blog_endpoints[c][3]); + + for (uint32_t i = 0; i < 16; i++) + if ((pPat[i] & 0x7F) == 1) + log_blk.m_weights[i] = 7 - log_blk.m_weights[i]; + } + + // Try packing the endpoints + bool failed_flag = false; + + for (uint32_t c = 0; c < 3; c++) + { + const int max_delta = (1 << (num_delta_bits[c] - 1)) - 1; + + const int min_delta = -(max_delta + 1); + assert((max_delta - min_delta) == delta_bitmasks[c]); + + log_blk.m_endpoints[c][0] = blog_endpoints[c][0]; + + int delta0 = (int)blog_endpoints[c][1] - (int)blog_endpoints[c][0]; + int delta1 = (int)blog_endpoints[c][2] - (int)blog_endpoints[c][0]; + int delta2 = (int)blog_endpoints[c][3] - (int)blog_endpoints[c][0]; + + if ((delta0 < min_delta) || (delta0 > max_delta) || + (delta1 < min_delta) || (delta1 > max_delta) || + (delta2 < min_delta) || (delta2 > max_delta)) + { + failed_flag = true; + break; + } + + log_blk.m_endpoints[c][1] = delta0 & delta_bitmasks[c]; + log_blk.m_endpoints[c][2] = delta1 & delta_bitmasks[c]; + log_blk.m_endpoints[c][3] = delta2 & delta_bitmasks[c]; + } + + if (failed_flag) + continue; + + log_blk.m_mode = mode; + log_blk.m_partition_pattern = pat_index; + pack_bc6h_block(*pPacked_block, log_blk); + + //half_float blk[16 * 3]; + //unpack_bc6h(pPacked_block, blk, false); + + return; + } + + bc6h_enc_block_2subset_mode9_3bit_weights(pPacked_block, common_part_index, pEndpoints, pWeights); + } + + bool bc6h_enc_block_solid_color(bc6h_block* pPacked_block, const half_float pColor[3]) + { + assert(g_bc6h_enc_initialized); + + if ((pColor[0] | pColor[1] | pColor[2]) & 0x8000) + return false; + + // ASTC block unpacker won't allow Inf/NaN's to come through. + //if (is_half_inf_or_nan(pColor[0]) || is_half_inf_or_nan(pColor[1]) || is_half_inf_or_nan(pColor[2])) + // return false; + + uint8_t weights[16]; + memset(weights, 0, sizeof(weights)); + + half_float endpoints[3][2]; + endpoints[0][0] = pColor[0]; + endpoints[0][1] = pColor[0]; + + endpoints[1][0] = pColor[1]; + endpoints[1][1] = pColor[1]; + + endpoints[2][0] = pColor[2]; + endpoints[2][1] = pColor[2]; + + bc6h_enc_block_1subset_4bit_weights(pPacked_block, endpoints, weights); + + return true; + } + + //-------------------------------------------------------------------------------------------------------------------------- + // basisu_astc_hdr_core.cpp + + static bool g_astc_hdr_core_initialized; + static int8_t g_astc_partition_id_to_common_bc7_pat_index[1024]; + + //-------------------------------------------------------------------------------------------------------------------------- + + void astc_hdr_core_init() + { + if (g_astc_hdr_core_initialized) + return; + + memset(g_astc_partition_id_to_common_bc7_pat_index, 0xFF, sizeof(g_astc_partition_id_to_common_bc7_pat_index)); + + for (uint32_t part_index = 0; part_index < basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2; ++part_index) + { + const uint32_t astc_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_astc; + //const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_bc7; + + assert(astc_pattern < 1024); + g_astc_partition_id_to_common_bc7_pat_index[astc_pattern] = (int8_t)part_index; + } + + g_astc_hdr_core_initialized = true; + } + + //-------------------------------------------------------------------------------------------------------------------------- + + static inline int astc_hdr_sign_extend(int src, int num_src_bits) + { + assert(basisu::in_range(num_src_bits, 2, 31)); + + const bool negative = (src & (1 << (num_src_bits - 1))) != 0; + if (negative) + return src | ~((1 << num_src_bits) - 1); + else + return src & ((1 << num_src_bits) - 1); + } + + static inline void astc_hdr_pack_bit( + int& dst, int dst_bit, + int src_val, int src_bit = 0) + { + assert(dst_bit >= 0 && dst_bit <= 31); + int bit = basisu::get_bit(src_val, src_bit); + dst |= (bit << dst_bit); + } + + //-------------------------------------------------------------------------------------------------------------------------- + + void decode_mode7_to_qlog12_ise20( + const uint8_t* pEndpoints, + int e[2][3], + int* pScale) + { + assert(g_astc_hdr_core_initialized); + + for (uint32_t i = 0; i < NUM_MODE7_ENDPOINTS; i++) + { + assert(pEndpoints[i] <= 255); + } + + const int v0 = pEndpoints[0], v1 = pEndpoints[1], v2 = pEndpoints[2], v3 = pEndpoints[3]; + + // Extract mode bits and unpack to major component and mode. + const int modeval = ((v0 & 0xC0) >> 6) | ((v1 & 0x80) >> 5) | ((v2 & 0x80) >> 4); + + int majcomp, mode; + if ((modeval & 0xC) != 0xC) + { + majcomp = modeval >> 2; + mode = modeval & 3; + } + else if (modeval != 0xF) + { + majcomp = modeval & 3; + mode = 4; + } + else + { + majcomp = 0; + mode = 5; + } + + // Extract low-order bits of r, g, b, and s. + int red = v0 & 0x3f; + int green = v1 & 0x1f; + int blue = v2 & 0x1f; + int scale = v3 & 0x1f; + + // Extract high-order bits, which may be assigned depending on mode + int x0 = (v1 >> 6) & 1; + int x1 = (v1 >> 5) & 1; + int x2 = (v2 >> 6) & 1; + int x3 = (v2 >> 5) & 1; + int x4 = (v3 >> 7) & 1; + int x5 = (v3 >> 6) & 1; + int x6 = (v3 >> 5) & 1; + + // Now move the high-order xs into the right place. + const int ohm = 1 << mode; + if (ohm & 0x30) green |= x0 << 6; + if (ohm & 0x3A) green |= x1 << 5; + if (ohm & 0x30) blue |= x2 << 6; + if (ohm & 0x3A) blue |= x3 << 5; + if (ohm & 0x3D) scale |= x6 << 5; + if (ohm & 0x2D) scale |= x5 << 6; + if (ohm & 0x04) scale |= x4 << 7; + if (ohm & 0x3B) red |= x4 << 6; + if (ohm & 0x04) red |= x3 << 6; + if (ohm & 0x10) red |= x5 << 7; + if (ohm & 0x0F) red |= x2 << 7; + if (ohm & 0x05) red |= x1 << 8; + if (ohm & 0x0A) red |= x0 << 8; + if (ohm & 0x05) red |= x0 << 9; + if (ohm & 0x02) red |= x6 << 9; + if (ohm & 0x01) red |= x3 << 10; + if (ohm & 0x02) red |= x5 << 10; + + // Shift the bits to the top of the 12-bit result. + static const int s_shamts[6] = { 1,1,2,3,4,5 }; + + const int shamt = s_shamts[mode]; + red <<= shamt; + green <<= shamt; + blue <<= shamt; + scale <<= shamt; + + // Minor components are stored as differences + if (mode != 5) + { + green = red - green; + blue = red - blue; + } + + // Swizzle major component into place + if (majcomp == 1) + std::swap(red, green); + + if (majcomp == 2) + std::swap(red, blue); + + // Clamp output values, set alpha to 1.0 + e[1][0] = basisu::clamp(red, 0, 0xFFF); + e[1][1] = basisu::clamp(green, 0, 0xFFF); + e[1][2] = basisu::clamp(blue, 0, 0xFFF); + + e[0][0] = basisu::clamp(red - scale, 0, 0xFFF); + e[0][1] = basisu::clamp(green - scale, 0, 0xFFF); + e[0][2] = basisu::clamp(blue - scale, 0, 0xFFF); + + if (pScale) + *pScale = scale; + } + + //-------------------------------------------------------------------------------------------------------------------------- + + bool decode_mode7_to_qlog12( + const uint8_t* pEndpoints, + int e[2][3], + int* pScale, + uint32_t ise_endpoint_range) + { + assert(g_astc_hdr_core_initialized); + + if (ise_endpoint_range == astc_helpers::BISE_256_LEVELS) + { + decode_mode7_to_qlog12_ise20(pEndpoints, e, pScale); + } + else + { + uint8_t dequantized_endpoints[NUM_MODE7_ENDPOINTS]; + + for (uint32_t i = 0; i < NUM_MODE7_ENDPOINTS; i++) + dequantized_endpoints[i] = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_ISE_to_val[pEndpoints[i]]; + + decode_mode7_to_qlog12_ise20(dequantized_endpoints, e, pScale); + } + + for (uint32_t i = 0; i < 2; i++) + { + if (e[i][0] > (int)MAX_QLOG12) + return false; + + if (e[i][1] > (int)MAX_QLOG12) + return false; + + if (e[i][2] > (int)MAX_QLOG12) + return false; + } + + return true; + } + + //-------------------------------------------------------------------------------------------------------------------------- + + void decode_mode11_to_qlog12_ise20( + const uint8_t* pEndpoints, + int e[2][3]) + { +#ifdef _DEBUG + for (uint32_t i = 0; i < NUM_MODE11_ENDPOINTS; i++) + { + assert(pEndpoints[i] <= 255); + } +#endif + + const uint32_t maj_comp = basisu::get_bit(pEndpoints[4], 7) | (basisu::get_bit(pEndpoints[5], 7) << 1); + + if (maj_comp == 3) + { + // Direct, qlog8 and qlog7 + e[0][0] = pEndpoints[0] << 4; + e[1][0] = pEndpoints[1] << 4; + + e[0][1] = pEndpoints[2] << 4; + e[1][1] = pEndpoints[3] << 4; + + e[0][2] = (pEndpoints[4] & 127) << 5; + e[1][2] = (pEndpoints[5] & 127) << 5; + } + else + { + int v0 = pEndpoints[0]; + int v1 = pEndpoints[1]; + int v2 = pEndpoints[2]; + int v3 = pEndpoints[3]; + int v4 = pEndpoints[4]; + int v5 = pEndpoints[5]; + + int mode = 0; + astc_hdr_pack_bit(mode, 0, v1, 7); + astc_hdr_pack_bit(mode, 1, v2, 7); + astc_hdr_pack_bit(mode, 2, v3, 7); + + int va = v0; + astc_hdr_pack_bit(va, 8, v1, 6); + + int vb0 = v2 & 63; + int vb1 = v3 & 63; + int vc = v1 & 63; + + int vd0 = v4 & 0x7F; // this takes more bits than is sometimes needed + int vd1 = v5 & 0x7F; // this takes more bits than is sometimes needed + static const int8_t dbitstab[8] = { 7,6,7,6,5,6,5,6 }; + vd0 = astc_hdr_sign_extend(vd0, dbitstab[mode]); + vd1 = astc_hdr_sign_extend(vd1, dbitstab[mode]); + + int x0 = basisu::get_bit(v2, 6); + int x1 = basisu::get_bit(v3, 6); + int x2 = basisu::get_bit(v4, 6); + int x3 = basisu::get_bit(v5, 6); + int x4 = basisu::get_bit(v4, 5); + int x5 = basisu::get_bit(v5, 5); + + const uint32_t ohm = 1U << mode; + if (ohm & 0xA4) va |= (x0 << 9); + if (ohm & 0x08) va |= (x2 << 9); + if (ohm & 0x50) va |= (x4 << 9); + if (ohm & 0x50) va |= (x5 << 10); + if (ohm & 0xA0) va |= (x1 << 10); + if (ohm & 0xC0) va |= (x2 << 11); + if (ohm & 0x04) vc |= (x1 << 6); + if (ohm & 0xE8) vc |= (x3 << 6); + if (ohm & 0x20) vc |= (x2 << 7); + if (ohm & 0x5B) vb0 |= (x0 << 6); + if (ohm & 0x5B) vb1 |= (x1 << 6); + if (ohm & 0x12) vb0 |= (x2 << 7); + if (ohm & 0x12) vb1 |= (x3 << 7); + + const int shamt = (mode >> 1) ^ 3; + + va = (uint32_t)va << shamt; + vb0 = (uint32_t)vb0 << shamt; + vb1 = (uint32_t)vb1 << shamt; + vc = (uint32_t)vc << shamt; + vd0 = (uint32_t)vd0 << shamt; + vd1 = (uint32_t)vd1 << shamt; + + // qlog12 + e[1][0] = basisu::clamp(va, 0, 0xFFF); + e[1][1] = basisu::clamp(va - vb0, 0, 0xFFF); + e[1][2] = basisu::clamp(va - vb1, 0, 0xFFF); + + e[0][0] = basisu::clamp(va - vc, 0, 0xFFF); + e[0][1] = basisu::clamp(va - vb0 - vc - vd0, 0, 0xFFF); + e[0][2] = basisu::clamp(va - vb1 - vc - vd1, 0, 0xFFF); + + if (maj_comp) + { + std::swap(e[0][0], e[0][maj_comp]); + std::swap(e[1][0], e[1][maj_comp]); + } + } + } + + //-------------------------------------------------------------------------------------------------------------------------- + + bool decode_mode11_to_qlog12( + const uint8_t* pEndpoints, + int e[2][3], + uint32_t ise_endpoint_range) + { + assert(g_astc_hdr_core_initialized); + assert((ise_endpoint_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_endpoint_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE)); + + if (ise_endpoint_range == astc_helpers::BISE_256_LEVELS) + { + decode_mode11_to_qlog12_ise20(pEndpoints, e); + } + else + { + uint8_t dequantized_endpoints[NUM_MODE11_ENDPOINTS]; + + for (uint32_t i = 0; i < NUM_MODE11_ENDPOINTS; i++) + dequantized_endpoints[i] = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_ISE_to_val[pEndpoints[i]]; + + decode_mode11_to_qlog12_ise20(dequantized_endpoints, e); + } + + for (uint32_t i = 0; i < 2; i++) + { + if (e[i][0] > (int)MAX_QLOG12) + return false; + + if (e[i][1] > (int)MAX_QLOG12) + return false; + + if (e[i][2] > (int)MAX_QLOG12) + return false; + } + + return true; + } + + //-------------------------------------------------------------------------------------------------------------------------- + + bool transcode_bc6h_1subset(half_float h_e[3][2], const astc_helpers::log_astc_block& best_blk, bc6h_block& transcoded_bc6h_blk) + { + assert(g_astc_hdr_core_initialized); + assert((best_blk.m_weight_ise_range >= 1) && (best_blk.m_weight_ise_range <= 8)); + + if (best_blk.m_weight_ise_range == 5) + { + // Use 3-bit BC6H weights which are a perfect match for 3-bit ASTC weights, but encode 1-subset as 2 equal subsets + bc6h_enc_block_1subset_3bit_weights(&transcoded_bc6h_blk, h_e, best_blk.m_weights); + } + else + { + uint8_t bc6h_weights[16]; + + if (best_blk.m_weight_ise_range == 1) + { + // weight ISE 1: 3 levels + static const uint8_t s_astc1_to_bc6h_3[3] = { 0, 8, 15 }; + + for (uint32_t i = 0; i < 16; i++) + bc6h_weights[i] = s_astc1_to_bc6h_3[best_blk.m_weights[i]]; + } + else if (best_blk.m_weight_ise_range == 2) + { + // weight ISE 2: 4 levels + static const uint8_t s_astc2_to_bc6h_4[4] = { 0, 5, 10, 15 }; + + for (uint32_t i = 0; i < 16; i++) + bc6h_weights[i] = s_astc2_to_bc6h_4[best_blk.m_weights[i]]; + } + else if (best_blk.m_weight_ise_range == 3) + { + // weight ISE 3: 5 levels + static const uint8_t s_astc3_to_bc6h_4[5] = { 0, 4, 7, 11, 15 }; + + for (uint32_t i = 0; i < 16; i++) + bc6h_weights[i] = s_astc3_to_bc6h_4[best_blk.m_weights[i]]; + } + else if (best_blk.m_weight_ise_range == 4) + { + // weight ISE 4: 6 levels + static const uint8_t s_astc4_to_bc6h_4[6] = { 0, 15, 3, 12, 6, 9 }; + + for (uint32_t i = 0; i < 16; i++) + bc6h_weights[i] = s_astc4_to_bc6h_4[best_blk.m_weights[i]]; + } + else if (best_blk.m_weight_ise_range == 6) + { + // weight ISE 6: 10 levels + static const uint8_t s_astc6_to_bc6h_4[10] = { 0, 15, 2, 13, 3, 12, 5, 10, 6, 9 }; + + for (uint32_t i = 0; i < 16; i++) + bc6h_weights[i] = s_astc6_to_bc6h_4[best_blk.m_weights[i]]; + } + else if (best_blk.m_weight_ise_range == 7) + { + // weight ISE 7: 12 levels + static const uint8_t s_astc7_to_bc6h_4[12] = { 0, 15, 4, 11, 1, 14, 5, 10, 2, 13, 6, 9 }; + + for (uint32_t i = 0; i < 16; i++) + bc6h_weights[i] = s_astc7_to_bc6h_4[best_blk.m_weights[i]]; + } + else if (best_blk.m_weight_ise_range == 8) + { + // 16 levels + memcpy(bc6h_weights, best_blk.m_weights, 16); + } + else + { + assert(0); + return false; + } + + bc6h_enc_block_1subset_4bit_weights(&transcoded_bc6h_blk, h_e, bc6h_weights); + } + + return true; + } + + //-------------------------------------------------------------------------------------------------------------------------- + + bool transcode_bc6h_2subsets(uint32_t common_part_index, const astc_helpers::log_astc_block& best_blk, bc6h_block& transcoded_bc6h_blk) + { + assert(g_astc_hdr_core_initialized); + assert(best_blk.m_num_partitions == 2); + assert(common_part_index < basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2); + + half_float bc6h_endpoints[2][3][2]; // [subset][comp][lh_index] + + // UASTC HDR checks + // Both CEM's must be equal in 2-subset UASTC HDR. + if (best_blk.m_color_endpoint_modes[0] != best_blk.m_color_endpoint_modes[1]) + return false; + if ((best_blk.m_color_endpoint_modes[0] != 7) && (best_blk.m_color_endpoint_modes[0] != 11)) + return false; + + if (best_blk.m_color_endpoint_modes[0] == 7) + { + if (!(((best_blk.m_weight_ise_range == 1) && (best_blk.m_endpoint_ise_range == 20)) || + ((best_blk.m_weight_ise_range == 2) && (best_blk.m_endpoint_ise_range == 20)) || + ((best_blk.m_weight_ise_range == 3) && (best_blk.m_endpoint_ise_range == 19)) || + ((best_blk.m_weight_ise_range == 4) && (best_blk.m_endpoint_ise_range == 17)) || + ((best_blk.m_weight_ise_range == 5) && (best_blk.m_endpoint_ise_range == 15)))) + { + return false; + } + } + else + { + if (!(((best_blk.m_weight_ise_range == 1) && (best_blk.m_endpoint_ise_range == 14)) || + ((best_blk.m_weight_ise_range == 2) && (best_blk.m_endpoint_ise_range == 12)))) + { + return false; + } + } + + for (uint32_t s = 0; s < 2; s++) + { + int e[2][3]; + if (best_blk.m_color_endpoint_modes[0] == 7) + { + bool success = decode_mode7_to_qlog12(best_blk.m_endpoints + s * NUM_MODE7_ENDPOINTS, e, nullptr, best_blk.m_endpoint_ise_range); + if (!success) + return false; + } + else + { + bool success = decode_mode11_to_qlog12(best_blk.m_endpoints + s * NUM_MODE11_ENDPOINTS, e, best_blk.m_endpoint_ise_range); + if (!success) + return false; + } + + for (uint32_t c = 0; c < 3; c++) + { + bc6h_endpoints[s][c][0] = qlog_to_half(e[0][c], 12); + if (is_half_inf_or_nan(bc6h_endpoints[s][c][0])) + return false; + + bc6h_endpoints[s][c][1] = qlog_to_half(e[1][c], 12); + if (is_half_inf_or_nan(bc6h_endpoints[s][c][1])) + return false; + } + } + + uint8_t bc6h_weights[16]; + if (best_blk.m_weight_ise_range == 1) + { + static const uint8_t s_astc1_to_bc6h_3[3] = { 0, 4, 7 }; + + for (uint32_t i = 0; i < 16; i++) + bc6h_weights[i] = s_astc1_to_bc6h_3[best_blk.m_weights[i]]; + } + else if (best_blk.m_weight_ise_range == 2) + { + static const uint8_t s_astc2_to_bc6h_3[4] = { 0, 2, 5, 7 }; + + for (uint32_t i = 0; i < 16; i++) + bc6h_weights[i] = s_astc2_to_bc6h_3[best_blk.m_weights[i]]; + } + else if (best_blk.m_weight_ise_range == 3) + { + static const uint8_t s_astc3_to_bc6h_3[5] = { 0, 2, 4, 5, 7 }; + + for (uint32_t i = 0; i < 16; i++) + bc6h_weights[i] = s_astc3_to_bc6h_3[best_blk.m_weights[i]]; + } + else if (best_blk.m_weight_ise_range == 4) + { + static const uint8_t s_astc4_to_bc6h_3[6] = { 0, 7, 1, 6, 3, 4 }; + + for (uint32_t i = 0; i < 16; i++) + bc6h_weights[i] = s_astc4_to_bc6h_3[best_blk.m_weights[i]]; + } + else if (best_blk.m_weight_ise_range == 5) + { + memcpy(bc6h_weights, best_blk.m_weights, 16); + } + else + { + assert(0); + return false; + } + + bc6h_enc_block_2subset_3bit_weights(&transcoded_bc6h_blk, common_part_index, bc6h_endpoints, bc6h_weights); + + return true; + } + + //-------------------------------------------------------------------------------------------------------------------------- + // Transcodes an UASTC HDR block to BC6H. Must have been encoded to UASTC HDR, or this fails. + bool astc_hdr_transcode_to_bc6h(const astc_blk& src_blk, bc6h_block& dst_blk) + { + assert(g_astc_hdr_core_initialized); + if (!g_astc_hdr_core_initialized) + { + assert(0); + return false; + } + + astc_helpers::log_astc_block log_blk; + + if (!astc_helpers::unpack_block(&src_blk, log_blk, 4, 4)) + { + // Failed unpacking ASTC data + return false; + } + + return astc_hdr_transcode_to_bc6h(log_blk, dst_blk); + } + + //-------------------------------------------------------------------------------------------------------------------------- + // Transcodes an UASTC HDR block to BC6H. Must have been encoded to UASTC HDR, or this fails. + bool astc_hdr_transcode_to_bc6h(const astc_helpers::log_astc_block& log_blk, bc6h_block& dst_blk) + { + assert(g_astc_hdr_core_initialized); + if (!g_astc_hdr_core_initialized) + { + assert(0); + return false; + } + + if (log_blk.m_solid_color_flag_ldr) + { + // Don't support LDR solid colors. + return false; + } + + if (log_blk.m_solid_color_flag_hdr) + { + // Solid color HDR block + return bc6h_enc_block_solid_color(&dst_blk, log_blk.m_solid_color); + } + + // Only support 4x4 grid sizes + if ((log_blk.m_grid_width != 4) || (log_blk.m_grid_height != 4)) + return false; + + // Don't support dual plane encoding + if (log_blk.m_dual_plane) + return false; + + if (log_blk.m_num_partitions == 1) + { + // Handle 1 partition (or subset) + + // UASTC HDR checks + if ((log_blk.m_weight_ise_range < 1) || (log_blk.m_weight_ise_range > 8)) + return false; + + int e[2][3]; + bool success; + + if (log_blk.m_color_endpoint_modes[0] == 7) + { + if (log_blk.m_endpoint_ise_range != 20) + return false; + + success = decode_mode7_to_qlog12(log_blk.m_endpoints, e, nullptr, log_blk.m_endpoint_ise_range); + } + else if (log_blk.m_color_endpoint_modes[0] == 11) + { + // UASTC HDR checks + if (log_blk.m_weight_ise_range <= 7) + { + if (log_blk.m_endpoint_ise_range != 20) + return false; + } + else if (log_blk.m_endpoint_ise_range != 19) + { + return false; + } + + success = decode_mode11_to_qlog12(log_blk.m_endpoints, e, log_blk.m_endpoint_ise_range); + } + else + { + return false; + } + + if (!success) + return false; + + // Transform endpoints to half float + half_float h_e[3][2] = + { + { qlog_to_half(e[0][0], 12), qlog_to_half(e[1][0], 12) }, + { qlog_to_half(e[0][1], 12), qlog_to_half(e[1][1], 12) }, + { qlog_to_half(e[0][2], 12), qlog_to_half(e[1][2], 12) } + }; + + // Sanity check for NaN/Inf + for (uint32_t i = 0; i < 2; i++) + if (is_half_inf_or_nan(h_e[0][i]) || is_half_inf_or_nan(h_e[1][i]) || is_half_inf_or_nan(h_e[2][i])) + return false; + + // Transcode to bc6h + if (!transcode_bc6h_1subset(h_e, log_blk, dst_blk)) + return false; + } + else if (log_blk.m_num_partitions == 2) + { + // Handle 2 partition (or subset) + int common_bc7_pat_index = g_astc_partition_id_to_common_bc7_pat_index[log_blk.m_partition_id]; + if (common_bc7_pat_index < 0) + return false; + + assert(common_bc7_pat_index < (int)basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2); + + if (!transcode_bc6h_2subsets(common_bc7_pat_index, log_blk, dst_blk)) + return false; + } + else + { + // Only supports 1 or 2 partitions (or subsets) + return false; + } + + return true; + } + + // ASTC 6x6 support + namespace astc_6x6_hdr + { + const block_mode_desc g_block_mode_descs[TOTAL_BLOCK_MODE_DECS] = + { + // ------ mode 11 + { false, 11, 1, 6, 6, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_3_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 }, + { false, 11, 1, 6, 6, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 }, + + { false, 11, 1, 6, 5, astc_helpers::BISE_96_LEVELS, astc_helpers::BISE_5_LEVELS, astc_helpers::BISE_96_LEVELS, astc_helpers::BISE_5_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 }, + { false, 11, 1, 5, 6, astc_helpers::BISE_96_LEVELS, astc_helpers::BISE_5_LEVELS, astc_helpers::BISE_96_LEVELS, astc_helpers::BISE_5_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 }, + + { false, 11, 1, 6, 4, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_8_LEVELS, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_8_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 }, + { false, 11, 1, 4, 6, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_8_LEVELS, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_8_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 }, + + { false, 11, 1, 6, 3, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_16_LEVELS, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_16_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 }, + { false, 11, 1, 3, 6, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_16_LEVELS, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_16_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 }, + + { false, 11, 1, 5, 5, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_8_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_8_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 }, + { false, 11, 1, 4, 4, astc_helpers::BISE_192_LEVELS, astc_helpers::BISE_16_LEVELS, astc_helpers::BISE_192_LEVELS, astc_helpers::BISE_16_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 }, + + { false, 11, 1, 3, 3, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_16_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_16_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 }, + + // ------ mode 7 + { false, 7, 1, 6, 6, astc_helpers::BISE_96_LEVELS, astc_helpers::BISE_5_LEVELS, astc_helpers::BISE_96_LEVELS, astc_helpers::BISE_5_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 }, + + { false, 7, 1, 6, 6, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_3_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 }, + { false, 7, 1, 6, 6, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 }, + + { false, 7, 1, 5, 6, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_6_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_6_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 }, + { false, 7, 1, 6, 5, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_6_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_6_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 }, + + { false, 7, 1, 3, 6, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_20_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_20_LEVELS, BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 }, + { false, 7, 1, 6, 3, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_20_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_20_LEVELS, BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 }, + + // ------ mode 11, 2 subset + { false, 11, 2, 6, 6, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_2_LEVELS, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_2_LEVELS, BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 }, + + // 6x3/3x6 + { false, 11, 2, 6, 3, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_3_LEVELS, BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 }, + { false, 11, 2, 3, 6, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_3_LEVELS, BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 }, + + // 3x6/6x3 + { false, 11, 2, 3, 6, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 }, + { false, 11, 2, 6, 3, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 }, + + // 3x6/6x3 + { false, 11, 2, 4, 6, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_3_LEVELS, 0, 0 }, + { false, 11, 2, 6, 4, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_3_LEVELS, 0, 0 }, + + // ------ mode 7, 2 subset + + // 6x5/5x6 + { false, 7, 2, 5, 6, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_3_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 }, + { false, 7, 2, 6, 5, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_3_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 }, + + // 6x4/4x6 mode 7 + { false, 7, 2, 4, 6, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 }, + { false, 7, 2, 6, 4, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 }, + + // 6x6 + { false, 7, 2, 6, 6, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_3_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 }, + + // 6x6 + { false, 7, 2, 6, 6, astc_helpers::BISE_192_LEVELS, astc_helpers::BISE_2_LEVELS, astc_helpers::BISE_192_LEVELS, astc_helpers::BISE_2_LEVELS, 0, 0 }, + + // 5x5 + { false, 7, 2, 5, 5, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, 0, 0 }, + + // 6x3/3x6 mode 7 + { false, 7, 2, 3, 6, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_8_LEVELS, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_8_LEVELS, 0, 0 }, + { false, 7, 2, 6, 3, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_8_LEVELS, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_8_LEVELS, 0, 0 }, + + // 6x3/3x6 mode 7 + { false, 7, 2, 3, 6, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_6_LEVELS, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_6_LEVELS, 0, 0 }, + { false, 7, 2, 6, 3, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_6_LEVELS, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_6_LEVELS, 0, 0 }, + + // ------ dual plane + + // 3x6 + { true, 11, 1, 3, 6, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 }, + { true, 11, 1, 3, 6, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 1 }, + { true, 11, 1, 3, 6, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 2 }, + + // 6x3 + { true, 11, 1, 6, 3, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 }, + { true, 11, 1, 6, 3, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 1 }, + { true, 11, 1, 6, 3, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 2 }, + + // 3x3 + { true, 11, 1, 3, 3, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_16_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_16_LEVELS, BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 }, + { true, 11, 1, 3, 3, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_16_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_16_LEVELS, BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 1 }, + { true, 11, 1, 3, 3, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_16_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_16_LEVELS, BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 2 }, + + // 4x4 + { true, 11, 1, 4, 4, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_5_LEVELS, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_5_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 }, + { true, 11, 1, 4, 4, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_5_LEVELS, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_5_LEVELS, BASIST_HDR_6X6_LEVEL2, 1 }, + { true, 11, 1, 4, 4, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_5_LEVELS, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_5_LEVELS, BASIST_HDR_6X6_LEVEL2, 2 }, + + // 5x5 + { true, 11, 1, 5, 5, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_2_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_2_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 }, + { true, 11, 1, 5, 5, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_2_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_2_LEVELS, BASIST_HDR_6X6_LEVEL2, 1 }, + { true, 11, 1, 5, 5, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_2_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_2_LEVELS, BASIST_HDR_6X6_LEVEL2, 2 }, + + // ------ 2x2 modes for RDO + // note 2x2 modes will be upsampled to 4x4 during transcoding (the min # of weight bits is 7 in ASTC) + { true, 11, 1, 2, 2, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_8_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 }, + { true, 11, 1, 2, 2, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_8_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 1 }, + { true, 11, 1, 2, 2, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_8_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 2 }, + { false, 11, 1, 2, 2, astc_helpers::BISE_128_LEVELS, astc_helpers::BISE_2_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_3_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 }, + + // ------ 3 subsets + + // 6x6 + { false, 7, 3, 6, 6, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_2_LEVELS, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_2_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 }, + + // 5x5 + { false, 7, 3, 5, 5, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_2_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_2_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 }, + + // 4x4 + { false, 7, 3, 4, 4, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_3_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 }, + { false, 7, 3, 4, 4, astc_helpers::BISE_40_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_40_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 }, + { false, 7, 3, 4, 4, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_5_LEVELS, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_5_LEVELS, 0, 0 }, + + // 3x3 + { false, 7, 3, 3, 3, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_8_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_8_LEVELS, 0, 0 }, + + // 6x4 + { false, 7, 3, 6, 4, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_2_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_2_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 }, + { false, 7, 3, 4, 6, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_2_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_2_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 }, + + // 6x4 + { false, 7, 3, 6, 4, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_3_LEVELS, 0, 0 }, + { false, 7, 3, 4, 6, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_3_LEVELS, 0, 0 }, + + // 6x5 + { false, 7, 3, 6, 5, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_2_LEVELS, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_2_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 }, + { false, 7, 3, 5, 6, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_2_LEVELS, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_2_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 }, + + // 6x3 + { false, 7, 3, 6, 3, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_3_LEVELS, 0, 0 }, + { false, 7, 3, 3, 6, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_3_LEVELS, 0, 0 }, + + // 6x3 + { false, 7, 3, 6, 3, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 }, + { false, 7, 3, 3, 6, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 }, + + // 6x3 + { false, 7, 3, 6, 3, astc_helpers::BISE_24_LEVELS, astc_helpers::BISE_5_LEVELS, astc_helpers::BISE_24_LEVELS, astc_helpers::BISE_5_LEVELS, 0, 0 }, + { false, 7, 3, 3, 6, astc_helpers::BISE_24_LEVELS, astc_helpers::BISE_5_LEVELS, astc_helpers::BISE_24_LEVELS, astc_helpers::BISE_5_LEVELS, 0, 0 }, + + // 5x4 + { false, 7, 3, 5, 4, astc_helpers::BISE_40_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_40_LEVELS, astc_helpers::BISE_3_LEVELS, 0, 0 }, + { false, 7, 3, 4, 5, astc_helpers::BISE_40_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_40_LEVELS, astc_helpers::BISE_3_LEVELS, 0, 0 }, + }; + + const reuse_xy_delta g_reuse_xy_deltas[NUM_REUSE_XY_DELTAS] = + { + { -1, 0 }, { -2, 0 }, { -3, 0 }, { -4, 0 }, + { 3, -1 }, { 2, -1 }, { 1, -1 }, { 0, -1 }, { -1, -1 }, { -2, -1 }, { -3, -1 }, { -4, -1 }, + { 3, -2 }, { 2, -2 }, { 1, -2 }, { 0, -2 }, { -1, -2 }, { -2, -2 }, { -3, -2 }, { -4, -2 }, + { 3, -3 }, { 2, -3 }, { 1, -3 }, { 0, -3 }, { -1, -3 }, { -2, -3 }, { -3, -3 }, { -4, -3 }, + { 3, -4 }, { 2, -4 }, { 1, -4 }, { 0, -4 } + }; + + //-------------------------------------------------------------------------------------------------------------------------- + + void requantize_astc_weights(uint32_t n, const uint8_t* pSrc_ise_vals, uint32_t from_ise_range, uint8_t* pDst_ise_vals, uint32_t to_ise_range) + { + if (from_ise_range == to_ise_range) + { + if (pDst_ise_vals != pSrc_ise_vals) + memcpy(pDst_ise_vals, pSrc_ise_vals, n); + return; + } + + const auto& dequant_tab = astc_helpers::g_dequant_tables.get_weight_tab(from_ise_range).m_ISE_to_val; + const auto& quant_tab = astc_helpers::g_dequant_tables.get_weight_tab(to_ise_range).m_val_to_ise; + + for (uint32_t i = 0; i < n; i++) + pDst_ise_vals[i] = quant_tab[dequant_tab[pSrc_ise_vals[i]]]; + } + + //-------------------------------------------------------------------------------------------------------------------------- + + inline int get_bit( + int src_val, int src_bit) + { + assert(src_bit >= 0 && src_bit <= 31); + int bit = (src_val >> src_bit) & 1; + return bit; + } + + inline void pack_bit( + int& dst, int dst_bit, + int src_val, int src_bit = 0) + { + assert(dst_bit >= 0 && dst_bit <= 31); + int bit = get_bit(src_val, src_bit); + dst |= (bit << dst_bit); + } + + // Valid for weight ISE ranges 12-192 levels. Preserves upper 2 or 3 bits post-quantization. + static uint8_t g_quantize_tables_preserve2[astc_helpers::TOTAL_ISE_RANGES - 1][256]; + static uint8_t g_quantize_tables_preserve3[astc_helpers::TOTAL_ISE_RANGES - 1][256]; + + const uint32_t g_part2_unique_index_to_seed[NUM_UNIQUE_PARTITIONS2] = + { + 86, 959, 936, 476, 1007, 672, 447, 423, 488, 422, 273, 65, 267, 786, 585, 195, 108, 731, 878, 812, 264, 125, 868, 581, 258, 390, 549, 872, 661, 352, 645, 543, 988, + 906, 903, 616, 482, 529, 3, 286, 272, 303, 151, 504, 498, 260, 79, 66, 608, 769, 305, 610, 1014, 967, 835, 789, 7, 951, 691, 15, 763, 976, 438, 314, 601, 673, 177, + 252, 615, 436, 220, 899, 623, 433, 674, 278, 797, 107, 847, 114, 470, 760, 821, 490, 329, 945, 387, 471, 225, 172, 83, 418, 966, 439, 316, 247, 43, 343, 625, 798, + 1, 61, 73, 307, 136, 474, 42, 664, 1013, 249, 389, 227, 374, 121, 48, 538, 226, 309, 554, 802, 834, 335, 495, 10, 955, 461, 293, 508, 153, 101, 63, 139, 31, 687, + 132, 174, 324, 545, 289, 39, 178, 594, 963, 854, 222, 323, 998, 964, 598, 475, 720, 1019, 983, 91, 703, 614, 394, 612, 281, 207, 930, 758, 586, 128, 517, 426, 306, + 168, 713, 36, 458, 876, 368, 780, 5, 9, 214, 109, 553, 726, 175, 103, 753, 684, 44, 665, 53, 500, 367, 611, 119, 732, 639, 326, 203, 156, 686, 910, 255, 62, 392, 591, + 112, 88, 213, 19, 1022, 478, 90, 486, 799, 702, 730, 414, 99, 1008, 142, 886, 373, 216, 69, 393, 299, 648, 415, 822, 912, 110, 567, 550, 693, 2, 138, 59, 271, 562, 295, + 714, 719, 199, 893, 831, 1006, 662, 235, 262, 78, 51, 902, 298, 190, 169, 583, 347, 890, 958, 909, 49, 987, 696, 633, 480, 50, 764, 826, 1023, 1016, 437, 891, 774, 257, + 724, 791, 526, 593, 690, 638, 858, 895, 794, 995, 130, 87, 877, 819, 318, 649, 376, 211, 284, 937, 370, 688, 229, 994, 115, 842, 60, 521, 95, 694, 804, 146, 754, 487, 55, + 17, 770, 450, 223, 4, 137, 911, 236, 683, 523, 47, 181, 24, 270, 602, 736, 11, 355, 148, 351, 762, 1009, 16, 210, 619, 805, 874, 807, 887, 403, 999, 810, 27, 402, 551, 135, + 778, 33, 409, 993, 71, 363, 159, 183, 77, 596, 670, 380, 968, 811, 404, 348, 539, 158, 578, 196, 621, 68, 530, 193, 100, 167, 919, 353, 366, 327, 643, 948, 518, 756, 801, 558, + 28, 705, 116, 94, 898, 453, 622, 647, 231, 445, 652, 230, 191, 277, 292, 254, 198, 766, 386, 232, 29, 70, 942, 740, 291, 607, 411, 496, 839, 8, 675, 319, 742, 21, 547, 627, 716, + 663, 23, 914, 631, 595, 499, 685, 950, 510, 54, 587, 432, 45, 646, 25, 122, 947, 171, 862, 441, 808, 722, 14, 74, 658, 129, 266, 1001, 534, 395, 527, 250, 206, 237, 67, 897, 634, + 572, 569, 533, 37, 341, 89, 463, 419, 75, 134, 283, 943, 519, 362, 144, 681, 407, 954, 131, 455, 934, 46, 513, 339, 194, 361, 606, 852, 546, 655, 1015, 147, 506, 240, 56, 836, 76, + 98, 600, 430, 388, 980, 695, 817, 279, 58, 215, 149, 170, 531, 870, 18, 727, 154, 26, 938, 929, 302, 697, 452, 218, 700, 524, 828, 751, 869, 217, 440, 354 + }; + + const uint32_t g_part3_unique_index_to_seed[NUM_UNIQUE_PARTITIONS3] = + { + 0, 8, 11, 14, 15, 17, 18, 19, 26, 31, 34, 35, 36, 38, 44, 47, 48, 49, 51, 56, + 59, 61, 70, 74, 76, 82, 88, 90, 96, 100, 103, 104, 108, 110, 111, 117, 122, 123, + 126, 127, 132, 133, 135, 139, 147, 150, 151, 152, 156, 157, 163, 166, 168, 171, + 175, 176, 179, 181, 182, 183, 186, 189, 192, 199, 203, 205, 207, 210, 214, 216, + 222, 247, 249, 250, 252, 254, 260, 261, 262, 263, 266, 272, 273, 275, 276, 288, + 291, 292, 293, 294, 297, 302, 309, 310, 313, 314, 318, 327, 328, 331, 335, 337, + 346, 356, 357, 358, 363, 365, 368, 378, 381, 384, 386, 390, 391, 392, 396, 397, + 398, 399, 401, 410, 411, 419, 427, 430, 431, 437, 439, 440, 451, 455, 457, 458, + 459, 460, 462, 468, 470, 471, 472, 474, 475, 477, 479, 482, 483, 488, 493, 495, + 496, 502, 503, 504, 507, 510, 511, 512, 515, 516, 518, 519, 522, 523, 525, 526, + 527, 538, 543, 544, 546, 547, 549, 550, 552, 553, 554, 562, 570, 578, 579, 581, + 582, 588, 589, 590, 593, 595, 600, 606, 611, 613, 618, 623, 625, 632, 637, 638, + 645, 646, 650, 651, 658, 659, 662, 666, 667, 669, 670, 678, 679, 685, 686, 687, + 688, 691, 694, 696, 698, 699, 700, 701, 703, 704, 707, 713, 714, 715, 717, 719, + 722, 724, 727, 730, 731, 734, 738, 739, 743, 747, 748, 750, 751, 753, 758, 760, + 764, 766, 769, 775, 776, 783, 784, 785, 787, 791, 793, 798, 799, 802, 804, 805, + 806, 807, 808, 809, 810, 813, 822, 823, 825, 831, 835, 837, 838, 839, 840, 842, + 845, 846, 848, 853, 854, 858, 859, 860, 866, 874, 882, 884, 887, 888, 892, 894, + 898, 902, 907, 914, 915, 918, 919, 922, 923, 925, 927, 931, 932, 937, 938, 940, + 943, 944, 945, 953, 955, 958, 959, 963, 966, 971, 974, 979, 990, 991, 998, 999, + 1007, 1010, 1011, 1012, 1015, 1020, 1023 + }; + + static void init_quantize_tables() + { + for (uint32_t ise_range = astc_helpers::BISE_192_LEVELS; ise_range >= astc_helpers::BISE_12_LEVELS; ise_range--) + { + const uint32_t num_levels = astc_helpers::get_ise_levels(ise_range); + const auto& ise_to_val_tab = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_range).m_ISE_to_val; + + for (uint32_t desired_val = 0; desired_val < 256; desired_val++) + { + { + uint32_t best_err = UINT32_MAX; + int best_ise_val = -1; + + for (uint32_t ise_val = 0; ise_val < num_levels; ise_val++) + { + const uint32_t quant_val = ise_to_val_tab[ise_val]; + + if ((quant_val & 0b11000000) != (desired_val & 0b11000000)) + continue; + + uint32_t err = basisu::squarei((int)quant_val - (int)desired_val); + if (err < best_err) + { + best_err = err; + best_ise_val = ise_val; + } + + } // ise_val + + assert(best_ise_val != -1); + + g_quantize_tables_preserve2[ise_range][desired_val] = (uint8_t)best_ise_val; + } + + { + uint32_t best_err = UINT32_MAX; + int best_ise_val = -1; + + for (uint32_t ise_val = 0; ise_val < num_levels; ise_val++) + { + const uint32_t quant_val = ise_to_val_tab[ise_val]; + + if ((quant_val & 0b11100000) != (desired_val & 0b11100000)) + continue; + + uint32_t err = basisu::squarei((int)quant_val - (int)desired_val); + if (err < best_err) + { + best_err = err; + best_ise_val = ise_val; + } + + } // ise_val + + assert(best_ise_val != -1); + + g_quantize_tables_preserve3[ise_range][desired_val] = (uint8_t)best_ise_val; + } + + } // desired_val + +#if 0 + for (uint32_t i = 0; i < 256; i++) + { + if (g_quantize_tables_preserve2[ise_range][i] != astc_helpers::g_dequant_tables.get_endpoint_tab(ise_range).m_val_to_ise[i]) + { + fmt_printf("P2, Range: {}, {} vs. {}\n", ise_range, g_quantize_tables_preserve2[ise_range][i], astc_helpers::g_dequant_tables.get_endpoint_tab(ise_range).m_val_to_ise[i]); + } + + if (g_quantize_tables_preserve3[ise_range][i] != astc_helpers::g_dequant_tables.get_endpoint_tab(ise_range).m_val_to_ise[i]) + { + fmt_printf("P3, Range: {}, {} vs. {}\n", ise_range, g_quantize_tables_preserve3[ise_range][i], astc_helpers::g_dequant_tables.get_endpoint_tab(ise_range).m_val_to_ise[i]); + } + } +#endif + + } // ise_range + } + + void requantize_ise_endpoints(uint32_t cem, uint32_t src_ise_endpoint_range, const uint8_t* pSrc_endpoints, uint32_t dst_ise_endpoint_range, uint8_t* pDst_endpoints) + { + assert(pSrc_endpoints != pDst_endpoints); + assert((src_ise_endpoint_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (src_ise_endpoint_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE)); + assert((dst_ise_endpoint_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (dst_ise_endpoint_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE)); + + // must be >=12 ISE levels for g_quantize_tables_preserve2 etc. + assert(dst_ise_endpoint_range >= astc_helpers::BISE_12_LEVELS); + + const uint32_t n = (cem == 11) ? basist::NUM_MODE11_ENDPOINTS : basist::NUM_MODE7_ENDPOINTS; + + if (src_ise_endpoint_range == dst_ise_endpoint_range) + { + memcpy(pDst_endpoints, pSrc_endpoints, n); + return; + } + + uint8_t temp_endpoints[basist::NUM_MODE11_ENDPOINTS]; + if (src_ise_endpoint_range != astc_helpers::BISE_256_LEVELS) + { + assert(n <= basist::NUM_MODE11_ENDPOINTS); + + const auto& endpoint_dequant_tab = astc_helpers::g_dequant_tables.get_endpoint_tab(src_ise_endpoint_range).m_ISE_to_val; + + for (uint32_t i = 0; i < n; i++) + temp_endpoints[i] = endpoint_dequant_tab[pSrc_endpoints[i]]; + + pSrc_endpoints = temp_endpoints; + } + + if (dst_ise_endpoint_range == astc_helpers::BISE_256_LEVELS) + { + memcpy(pDst_endpoints, pSrc_endpoints, n); + return; + } + + const auto& quant_tab = astc_helpers::g_dequant_tables.get_endpoint_tab(dst_ise_endpoint_range).m_val_to_ise; + + const auto& dequant_tab = astc_helpers::g_dequant_tables.get_endpoint_tab(dst_ise_endpoint_range).m_ISE_to_val; + BASISU_NOTE_UNUSED(dequant_tab); + +#if 1 + // A smarter value quantization that preserves the key upper bits. (If these bits get corrupted, the entire meaning of the encoding can get lost.) + if (cem == 11) + { + assert(n == 6); + + int maj_comp = 0; + pack_bit(maj_comp, 0, pSrc_endpoints[4], 7); + pack_bit(maj_comp, 1, pSrc_endpoints[5], 7); + + if (maj_comp == 3) + { + // Direct + pDst_endpoints[0] = quant_tab[pSrc_endpoints[0]]; + pDst_endpoints[1] = quant_tab[pSrc_endpoints[1]]; + pDst_endpoints[2] = quant_tab[pSrc_endpoints[2]]; + pDst_endpoints[3] = quant_tab[pSrc_endpoints[3]]; + // No need for preserve1 tables, we can use the regular quantization tables because they preserve the MSB. + pDst_endpoints[4] = quant_tab[pSrc_endpoints[4]]; + pDst_endpoints[5] = quant_tab[pSrc_endpoints[5]]; + + assert((dequant_tab[pDst_endpoints[4]] & 128) == (pSrc_endpoints[4] & 128)); + assert((dequant_tab[pDst_endpoints[5]] & 128) == (pSrc_endpoints[5] & 128)); + } + else + { + pDst_endpoints[0] = quant_tab[pSrc_endpoints[0]]; + pDst_endpoints[1] = g_quantize_tables_preserve2[dst_ise_endpoint_range][pSrc_endpoints[1]]; + pDst_endpoints[2] = g_quantize_tables_preserve2[dst_ise_endpoint_range][pSrc_endpoints[2]]; + pDst_endpoints[3] = g_quantize_tables_preserve2[dst_ise_endpoint_range][pSrc_endpoints[3]]; + pDst_endpoints[4] = g_quantize_tables_preserve3[dst_ise_endpoint_range][pSrc_endpoints[4]]; + pDst_endpoints[5] = g_quantize_tables_preserve3[dst_ise_endpoint_range][pSrc_endpoints[5]]; + + assert((dequant_tab[pDst_endpoints[1]] & 0b11000000) == (pSrc_endpoints[1] & 0b11000000)); + assert((dequant_tab[pDst_endpoints[2]] & 0b11000000) == (pSrc_endpoints[2] & 0b11000000)); + assert((dequant_tab[pDst_endpoints[3]] & 0b11000000) == (pSrc_endpoints[3] & 0b11000000)); + assert((dequant_tab[pDst_endpoints[4]] & 0b11100000) == (pSrc_endpoints[4] & 0b11100000)); + assert((dequant_tab[pDst_endpoints[5]] & 0b11100000) == (pSrc_endpoints[5] & 0b11100000)); + } + } + else if (cem == 7) + { + assert(n == 4); + + pDst_endpoints[0] = g_quantize_tables_preserve2[dst_ise_endpoint_range][pSrc_endpoints[0]]; + pDst_endpoints[1] = g_quantize_tables_preserve3[dst_ise_endpoint_range][pSrc_endpoints[1]]; + pDst_endpoints[2] = g_quantize_tables_preserve3[dst_ise_endpoint_range][pSrc_endpoints[2]]; + pDst_endpoints[3] = g_quantize_tables_preserve3[dst_ise_endpoint_range][pSrc_endpoints[3]]; + + assert((dequant_tab[pDst_endpoints[0]] & 0b11000000) == (pSrc_endpoints[0] & 0b11000000)); + assert((dequant_tab[pDst_endpoints[1]] & 0b11100000) == (pSrc_endpoints[1] & 0b11100000)); + assert((dequant_tab[pDst_endpoints[2]] & 0b11100000) == (pSrc_endpoints[2] & 0b11100000)); + assert((dequant_tab[pDst_endpoints[3]] & 0b11100000) == (pSrc_endpoints[3] & 0b11100000)); + } + else + { + assert(0); + } +#else + for (uint32_t i = 0; i < n; i++) + { + uint32_t v = pSrc_endpoints[i]; + assert(v <= 255); + + pDst_endpoints[i] = quant_tab[v]; + } +#endif + } + + void copy_weight_grid(bool dual_plane, uint32_t grid_x, uint32_t grid_y, const uint8_t* transcode_weights, astc_helpers::log_astc_block& decomp_blk) + { + assert(decomp_blk.m_weight_ise_range >= astc_helpers::BISE_2_LEVELS); + assert(decomp_blk.m_weight_ise_range <= astc_helpers::BISE_32_LEVELS); + + // Special case for 2x2 which isn't typically valid ASTC (too few weight bits without dual plane). Upsample to 4x4. + if ((!dual_plane) && (grid_x == 2) && (grid_y == 2)) + { + decomp_blk.m_grid_width = 4; + decomp_blk.m_grid_height = 4; + + //const uint32_t total_weight_levels = astc_helpers::bise_levels(decomp_blk.m_weight_ise_range); + const auto& dequant_weight = astc_helpers::g_dequant_tables.get_weight_tab(decomp_blk.m_weight_ise_range).m_ISE_to_val; + const auto& quant_weight = astc_helpers::g_dequant_tables.get_weight_tab(decomp_blk.m_weight_ise_range).m_val_to_ise; + + astc_helpers::weighted_sample weights[16]; + + compute_upsample_weights(4, 4, 2, 2, weights); + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const astc_helpers::weighted_sample& sample = weights[x + y * 4]; + + uint32_t total_weight = 8; + + for (uint32_t yo = 0; yo < 2; yo++) + { + for (uint32_t xo = 0; xo < 2; xo++) + { + if (!sample.m_weights[yo][xo]) + continue; + + total_weight += dequant_weight[transcode_weights[basisu::in_bounds((x + xo) + (y + yo) * grid_x, 0, grid_x * grid_y)]] * sample.m_weights[yo][xo]; + } // x + } // y + + total_weight >>= 4; + + assert(total_weight <= 64); + + decomp_blk.m_weights[x + y * 4] = quant_weight[total_weight]; + } + } + } + else + { + const uint32_t num_planes = dual_plane ? 2 : 1; + + decomp_blk.m_grid_width = (uint8_t)grid_x; + decomp_blk.m_grid_height = (uint8_t)grid_y; + memcpy(decomp_blk.m_weights, transcode_weights, grid_x * grid_y * num_planes); + } + } + + // cur_y is the current destination row + // prev_y is the row we want to access + static inline int calc_row_index(int cur_y, int prev_y, int cur_row_index) + { + assert((cur_y >= 0) && (prev_y >= 0)); + assert((cur_row_index >= 0) && (cur_row_index < REUSE_MAX_BUFFER_ROWS)); + + int delta_y = prev_y - cur_y; + assert((delta_y > -REUSE_MAX_BUFFER_ROWS) && (delta_y <= 0)); + + cur_row_index += delta_y; + if (cur_row_index < 0) + cur_row_index += REUSE_MAX_BUFFER_ROWS; + + assert((cur_row_index >= 0) && (cur_row_index < REUSE_MAX_BUFFER_ROWS)); + + return cur_row_index; + } + + bool decode_values(basist::bitwise_decoder& decoder, uint32_t total_values, uint32_t ise_range, uint8_t* pValues) + { + assert(ise_range <= astc_helpers::BISE_256_LEVELS); + + const uint32_t ep_bits = astc_helpers::g_ise_range_table[ise_range][0]; + const uint32_t ep_trits = astc_helpers::g_ise_range_table[ise_range][1]; + const uint32_t ep_quints = astc_helpers::g_ise_range_table[ise_range][2]; + + uint32_t total_tqs = 0; + uint32_t bundle_size = 0, mul = 0; + if (ep_trits) + { + total_tqs = (total_values + 4) / 5; + bundle_size = 5; + mul = 3; + } + else if (ep_quints) + { + total_tqs = (total_values + 2) / 3; + bundle_size = 3; + mul = 5; + } + + const uint32_t MAX_TQ_VALUES = 32; + assert(total_tqs <= MAX_TQ_VALUES); + uint32_t tq_values[MAX_TQ_VALUES]; + + for (uint32_t i = 0; i < total_tqs; i++) + { + uint32_t num_bits = ep_trits ? 8 : 7; + + if (i == (total_tqs - 1)) + { + uint32_t num_remaining = total_values - (total_tqs - 1) * bundle_size; + if (ep_trits) + { + switch (num_remaining) + { + case 1: num_bits = 2; break; + case 2: num_bits = 4; break; + case 3: num_bits = 5; break; + case 4: num_bits = 7; break; + default: break; + } + } + else if (ep_quints) + { + switch (num_remaining) + { + case 1: num_bits = 3; break; + case 2: num_bits = 5; break; + default: break; + } + } + } + + tq_values[i] = (uint32_t)decoder.get_bits(num_bits); + } // i + + uint32_t accum = 0; + uint32_t accum_remaining = 0; + uint32_t next_tq_index = 0; + + for (uint32_t i = 0; i < total_values; i++) + { + uint32_t value = (uint32_t)decoder.get_bits(ep_bits); + + if (total_tqs) + { + if (!accum_remaining) + { + assert(next_tq_index < total_tqs); + accum = tq_values[next_tq_index++]; + accum_remaining = bundle_size; + } + + uint32_t v = accum % mul; + accum /= mul; + accum_remaining--; + + value |= (v << ep_bits); + } + + pValues[i] = (uint8_t)value; + } + + return true; + } + + static inline uint32_t get_num_endpoint_vals(uint32_t cem) + { + assert((cem == 7) || (cem == 11)); + return (cem == 11) ? basist::NUM_MODE11_ENDPOINTS : basist::NUM_MODE7_ENDPOINTS; + } + + const uint32_t g_bc6h_weights4[16] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 }; + +#if 0 + static BASISU_FORCE_INLINE int pos_lrintf(float x) + { + assert(x >= 0.0f); + return (int)(x + .5f); + } + + static BASISU_FORCE_INLINE basist::half_float fast_float_to_half_non_neg_no_nan_inf(float val) + { + union { float f; int32_t i; uint32_t u; } fi = { val }; + const int flt_m = fi.i & 0x7FFFFF, flt_e = (fi.i >> 23) & 0xFF; + int e = 0, m = 0; + + assert(((fi.i >> 31) == 0) && (flt_e != 0xFF)); + + // not zero or denormal + if (flt_e != 0) + { + int new_exp = flt_e - 127; + if (new_exp > 15) + e = 31; + else if (new_exp < -14) + m = pos_lrintf((1 << 24) * fabsf(fi.f)); + else + { + e = new_exp + 15; + m = pos_lrintf(flt_m * (1.0f / ((float)(1 << 13)))); + } + } + + assert((0 <= m) && (m <= 1024)); + if (m == 1024) + { + e++; + m = 0; + } + + assert((e >= 0) && (e <= 31)); + assert((m >= 0) && (m <= 1023)); + + basist::half_float result = (basist::half_float)((e << 10) | m); + return result; + } +#endif + + union fu32 + { + uint32_t u; + float f; + }; + + static BASISU_FORCE_INLINE basist::half_float fast_float_to_half_no_clamp_neg_nan_or_inf(float f) + { + assert(!isnan(f) && !isinf(f)); + assert((f >= 0.0f) && (f <= basist::MAX_HALF_FLOAT)); + + // Sutract 112 from the exponent, to change the bias from 127 to 15. + static const fu32 g_f_to_h{ 0x7800000 }; + + fu32 fu; + + fu.f = f * g_f_to_h.f; + + uint32_t h = (basist::half_float)((fu.u >> (23 - 10)) & 0x7FFF); + + // round to even + uint32_t mant = fu.u & 8191; // examine lowest 13 bits + h += (mant > 4096); + + if (h > basist::MAX_HALF_FLOAT_AS_INT_BITS) + h = basist::MAX_HALF_FLOAT_AS_INT_BITS; + + return (basist::half_float)h; + } + + static BASISU_FORCE_INLINE float ftoh(float f) + { + //float res = (float)fast_float_to_half_non_neg_no_nan_inf(fabsf(f)) * ((f < 0.0f) ? -1.0f : 1.0f); + float res = (float)fast_float_to_half_no_clamp_neg_nan_or_inf(fabsf(f)) * ((f < 0.0f) ? -1.0f : 1.0f); + return res; + } + + // Supports positive and denormals only. No NaN or Inf. + static BASISU_FORCE_INLINE float fast_half_to_float_pos_not_inf_or_nan(basist::half_float h) + { + assert(!basist::half_is_signed(h) && !basist::is_half_inf_or_nan(h)); + + // add 112 to the exponent (112+half float's exp bias of 15=float32's bias of 127) + static const fu32 K = { 0x77800000 }; + + fu32 o; + o.u = h << 13; + o.f *= K.f; + + return o.f; + } + + static BASISU_FORCE_INLINE float inv_sqrt(float v) + { + union + { + float flt; + uint32_t ui; + } un; + + un.flt = v; + un.ui = 0x5F1FFFF9UL - (un.ui >> 1); + + return 0.703952253f * un.flt * (2.38924456f - v * (un.flt * un.flt)); + } + + static const int FAST_BC6H_STD_DEV_THRESH = 256; + static const int FAST_BC6H_COMPLEX_STD_DEV_THRESH = 512; + static const int FAST_BC6H_VERY_COMPLEX_STD_DEV_THRESH = 2048; + + static void assign_weights_simple_4( + const basist::half_float* pPixels, + uint8_t* pWeights, + int min_r, int min_g, int min_b, + int max_r, int max_g, int max_b, int64_t block_max_var) + { + BASISU_NOTE_UNUSED(block_max_var); + + float fmin_r = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)min_r); + float fmin_g = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)min_g); + float fmin_b = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)min_b); + + float fmax_r = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)max_r); + float fmax_g = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)max_g); + float fmax_b = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)max_b); + + float fdir_r = fmax_r - fmin_r; + float fdir_g = fmax_g - fmin_g; + float fdir_b = fmax_b - fmin_b; + + float l = inv_sqrt(fdir_r * fdir_r + fdir_g * fdir_g + fdir_b * fdir_b); + if (l != 0.0f) + { + fdir_r *= l; + fdir_g *= l; + fdir_b *= l; + } + + float lr = ftoh(fmin_r * fdir_r + fmin_g * fdir_g + fmin_b * fdir_b); + float hr = ftoh(fmax_r * fdir_r + fmax_g * fdir_g + fmax_b * fdir_b); + + float frr = (hr == lr) ? 0.0f : (14.93333f / (float)(hr - lr)); + + lr = (-lr * frr) + 0.53333f; + for (uint32_t i = 0; i < 16; i++) + { + const float r = fast_half_to_float_pos_not_inf_or_nan(pPixels[i * 3 + 0]); + const float g = fast_half_to_float_pos_not_inf_or_nan(pPixels[i * 3 + 1]); + const float b = fast_half_to_float_pos_not_inf_or_nan(pPixels[i * 3 + 2]); + const float w = ftoh(r * fdir_r + g * fdir_g + b * fdir_b); + + pWeights[i] = (uint8_t)basisu::clamp((int)(w * frr + lr), 0, 15); + } + } + + static double assign_weights_4( + const vec3F* pFloat_pixels, const float* pPixel_scales, + uint8_t* pWeights, + int min_r, int min_g, int min_b, + int max_r, int max_g, int max_b, int64_t block_max_var, bool try_2subsets_flag, + const fast_bc6h_params& params) + { + float cr[16], cg[16], cb[16]; + + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t w = g_bc6h_weights4[i]; + + cr[i] = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)((min_r * (64 - w) + max_r * w + 32) >> 6)); + cg[i] = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)((min_g * (64 - w) + max_g * w + 32) >> 6)); + cb[i] = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)((min_b * (64 - w) + max_b * w + 32) >> 6)); + } + + double total_err = 0.0f; + + if (params.m_brute_force_weight4_assignment) + { + for (uint32_t i = 0; i < 16; i++) + { + const float qr = pFloat_pixels[i].c[0], qg = pFloat_pixels[i].c[1], qb = pFloat_pixels[i].c[2]; + + float best_err = basisu::squaref(cr[0] - qr) + basisu::squaref(cg[0] - qg) + basisu::squaref(cb[0] - qb); + uint32_t best_idx = 0; + + for (uint32_t j = 1; j < 16; j++) + { + float rd = cr[j] - qr, gd = cg[j] - qg, bd = cb[j] - qb; + float e = rd * rd + gd * gd + bd * bd; + + if (e < best_err) + { + best_err = e; + best_idx = j; + } + } + + pWeights[i] = (uint8_t)best_idx; + + total_err += best_err * pPixel_scales[i]; + } + } + else + { + const float dir_r = cr[15] - cr[0], dir_g = cg[15] - cg[0], dir_b = cb[15] - cb[0]; + + float dots[16]; + for (uint32_t i = 0; i < 16; i++) + dots[i] = cr[i] * dir_r + cg[i] * dir_g + cb[i] * dir_b; + + float mid_dots[15]; + bool monotonically_increasing = true; + for (uint32_t i = 0; i < 15; i++) + { + mid_dots[i] = (dots[i] + dots[i + 1]) * .5f; + + if (dots[i] > dots[i + 1]) + monotonically_increasing = false; + } + + const bool check_more_colors = block_max_var > (FAST_BC6H_VERY_COMPLEX_STD_DEV_THRESH * FAST_BC6H_VERY_COMPLEX_STD_DEV_THRESH * 16); // watch prec + + if (!monotonically_increasing) + { + // Seems very rare, not worth optimizing the other cases + for (uint32_t i = 0; i < 16; i++) + { + const float qr = pFloat_pixels[i].c[0], qg = pFloat_pixels[i].c[1], qb = pFloat_pixels[i].c[2]; + + float d = qr * dir_r + qg * dir_g + qb * dir_b; + + float best_e = fabsf(d - dots[0]); + int best_idx = 0; + + for (int j = 1; j < 16; j++) + { + float e = fabsf(d - dots[j]); + if (e < best_e) + { + best_e = e; + best_idx = j; + } + } + + assert((best_idx >= 0) && (best_idx <= 15)); + + pWeights[i] = (uint8_t)best_idx; + + float err = basisu::squaref(qr - cr[best_idx]) + basisu::squaref(qg - cg[best_idx]) + basisu::squaref(qb - cb[best_idx]); + total_err += err * pPixel_scales[i]; + } + } + else if ((!try_2subsets_flag) || (!check_more_colors)) + { + for (uint32_t i = 0; i < 16; i++) + { + const float qr = pFloat_pixels[i].c[0], qg = pFloat_pixels[i].c[1], qb = pFloat_pixels[i].c[2]; + + uint32_t best_idx = 0; + + float d = qr * dir_r + qg * dir_g + qb * dir_b; + + int low = 0; + + int mid = low + 7; + if (d >= mid_dots[mid]) low = mid + 1; + mid = low + 3; + if (d >= mid_dots[mid]) low = mid + 1; + mid = low + 1; + if (d >= mid_dots[mid]) low = mid + 1; + mid = low; + if (d >= mid_dots[mid]) low = mid + 1; + + best_idx = low; + assert((best_idx >= 0) && (best_idx <= 15)); + + pWeights[i] = (uint8_t)best_idx; + + // Giesen's MRSSE (Mean Relative Sum of Squared Errors). + // Our ASTC HDR encoder uses slightly slower approx. MSLE, and it's too late/risky to eval the difference vs. MRSSE on the larger ASTC HDR blocks. + float err = basisu::squaref(qr - cr[best_idx]) + basisu::squaref(qg - cg[best_idx]) + basisu::squaref(qb - cb[best_idx]); + total_err += err * pPixel_scales[i]; + } + } + else + { + for (uint32_t i = 0; i < 16; i++) + { + const float qr = pFloat_pixels[i].c[0], qg = pFloat_pixels[i].c[1], qb = pFloat_pixels[i].c[2]; + + uint32_t best_idx = 0; + + float d = qr * dir_r + qg * dir_g + qb * dir_b; + + int low = 0; + + int mid = low + 7; + if (d >= mid_dots[mid]) low = mid + 1; + mid = low + 3; + if (d >= mid_dots[mid]) low = mid + 1; + mid = low + 1; + if (d >= mid_dots[mid]) low = mid + 1; + mid = low; + if (d >= mid_dots[mid]) low = mid + 1; + + best_idx = low; + assert((best_idx >= 0) && (best_idx <= 15)); + + float err = basisu::squaref(qr - cr[best_idx]) + basisu::squaref(qg - cg[best_idx]) + basisu::squaref(qb - cb[best_idx]); + + { + int alt_idx = best_idx + 1; + if (alt_idx > 15) + alt_idx = 13; + + float alt_err = basisu::squaref(qr - cr[alt_idx]) + basisu::squaref(qg - cg[alt_idx]) + basisu::squaref(qb - cb[alt_idx]); + if (alt_err < err) + { + err = alt_err; + best_idx = alt_idx; + } + } + + { + int alt_idx2 = best_idx - 1; + if (alt_idx2 < 0) + alt_idx2 = 2; + float alt_err2 = basisu::squaref(qr - cr[alt_idx2]) + basisu::squaref(qg - cg[alt_idx2]) + basisu::squaref(qb - cb[alt_idx2]); + if (alt_err2 < err) + { + err = alt_err2; + best_idx = alt_idx2; + } + } + + pWeights[i] = (uint8_t)best_idx; + + total_err += err * pPixel_scales[i]; + } + } + } + + return total_err; + } + + static void assign_weights3(uint8_t trial_weights[16], + uint32_t best_pat_bits, + uint32_t subset_min_r[2], uint32_t subset_min_g[2], uint32_t subset_min_b[2], + uint32_t subset_max_r[2], uint32_t subset_max_g[2], uint32_t subset_max_b[2], + const vec3F* pFloat_pixels) + { + float subset_cr[2][8], subset_cg[2][8], subset_cb[2][8]; + + for (uint32_t subset = 0; subset < 2; subset++) + { + const uint32_t min_r = subset_min_r[subset], min_g = subset_min_g[subset], min_b = subset_min_b[subset]; + const uint32_t max_r = subset_max_r[subset], max_g = subset_max_g[subset], max_b = subset_max_b[subset]; + + for (uint32_t j = 0; j < 8; j++) + { + const uint32_t w = g_bc7_weights3[j]; + + subset_cr[subset][j] = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)((min_r * (64 - w) + max_r * w + 32) >> 6)); + subset_cg[subset][j] = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)((min_g * (64 - w) + max_g * w + 32) >> 6)); + subset_cb[subset][j] = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)((min_b * (64 - w) + max_b * w + 32) >> 6)); + } // j + + } // subset + + // TODO: Plane optimization? + + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t subset = (best_pat_bits >> i) & 1; + const float qr = pFloat_pixels[i].c[0], qg = pFloat_pixels[i].c[1], qb = pFloat_pixels[i].c[2]; + + float best_error = basisu::squaref(subset_cr[subset][0] - qr) + basisu::squaref(subset_cg[subset][0] - qg) + basisu::squaref(subset_cb[subset][0] - qb); + uint32_t best_idx = 0; + + for (uint32_t j = 1; j < 8; j++) + { + float e = basisu::squaref(subset_cr[subset][j] - qr) + basisu::squaref(subset_cg[subset][j] - qg) + basisu::squaref(subset_cb[subset][j] - qb); + if (e < best_error) + { + best_error = e; + best_idx = j; + } + } + + trial_weights[i] = (uint8_t)best_idx; + + } // i + } + + static double assign_weights_error_3(uint8_t trial_weights[16], + uint32_t best_pat_bits, + uint32_t subset_min_r[2], uint32_t subset_min_g[2], uint32_t subset_min_b[2], + uint32_t subset_max_r[2], uint32_t subset_max_g[2], uint32_t subset_max_b[2], + const vec3F* pFloat_pixels, const float* pPixel_scales) + { + float subset_cr[2][8], subset_cg[2][8], subset_cb[2][8]; + + for (uint32_t subset = 0; subset < 2; subset++) + { + const uint32_t min_r = subset_min_r[subset], min_g = subset_min_g[subset], min_b = subset_min_b[subset]; + const uint32_t max_r = subset_max_r[subset], max_g = subset_max_g[subset], max_b = subset_max_b[subset]; + + for (uint32_t j = 0; j < 8; j++) + { + const uint32_t w = g_bc7_weights3[j]; + + subset_cr[subset][j] = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)((min_r * (64 - w) + max_r * w + 32) >> 6)); + subset_cg[subset][j] = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)((min_g * (64 - w) + max_g * w + 32) >> 6)); + subset_cb[subset][j] = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)((min_b * (64 - w) + max_b * w + 32) >> 6)); + } // j + + } // subset + + double trial_error = 0.0f; + + // TODO: Plane optimization? + + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t subset = (best_pat_bits >> i) & 1; + const float qr = pFloat_pixels[i].c[0], qg = pFloat_pixels[i].c[1], qb = pFloat_pixels[i].c[2]; + + float best_error = basisu::squaref(subset_cr[subset][0] - qr) + basisu::squaref(subset_cg[subset][0] - qg) + basisu::squaref(subset_cb[subset][0] - qb); + uint32_t best_idx = 0; + + for (uint32_t j = 1; j < 8; j++) + { + float e = basisu::squaref(subset_cr[subset][j] - qr) + basisu::squaref(subset_cg[subset][j] - qg) + basisu::squaref(subset_cb[subset][j] - qb); + if (e < best_error) + { + best_error = e; + best_idx = j; + } + } + + trial_weights[i] = (uint8_t)best_idx; + + trial_error += best_error * pPixel_scales[i]; + + } // i + + return trial_error; + } + + static basist::vec4F g_bc6h_ls_weights_3[8]; + static basist::vec4F g_bc6h_ls_weights_4[16]; + + const uint32_t BC6H_NUM_PATS = 32; + static uint32_t g_bc6h_pats2[BC6H_NUM_PATS]; + + static void fast_encode_bc6h_init() + { + for (uint32_t i = 0; i < 8; i++) + { + const float w = (float)g_bc7_weights3[i] * (1.0f / 64.0f); + g_bc6h_ls_weights_3[i].set(w * w, (1.0f - w) * w, (1.0f - w) * (1.0f - w), w); + } + + for (uint32_t i = 0; i < 16; i++) + { + const float w = (float)g_bc6h_weights4[i] * (1.0f / 64.0f); + g_bc6h_ls_weights_4[i].set(w * w, (1.0f - w) * w, (1.0f - w) * (1.0f - w), w); + } + + for (uint32_t pat_index = 0; pat_index < BC6H_NUM_PATS; pat_index++) + { + uint32_t pat_bits = 0; + + for (uint32_t j = 0; j < 16; j++) + pat_bits |= (g_bc7_partition2[pat_index * 16 + j] << j); + + g_bc6h_pats2[pat_index] = pat_bits; + } + } + + static int bc6h_dequantize(int val, int bits) + { + assert(val < (1 << bits)); + + int result; + if (bits >= 15) + result = val; + else if (!val) + result = 0; + else if (val == ((1 << bits) - 1)) + result = 0xFFFF; + else + result = ((val << 16) + 0x8000) >> bits; + return result; + } + + static inline basist::half_float bc6h_convert_to_half(int val) + { + assert(val < 65536); + + // scale by 31/64 + return (basist::half_float)((val * 31) >> 6); + } + + static void bc6h_quant_dequant_endpoints(uint32_t& min_r, uint32_t& min_g, uint32_t& min_b, uint32_t& max_r, uint32_t& max_g, uint32_t& max_b, int bits) // bits=10 + { + min_r = bc6h_convert_to_half(bc6h_dequantize(basist::bc6h_half_to_blog((basist::half_float)min_r, bits), bits)); + min_g = bc6h_convert_to_half(bc6h_dequantize(basist::bc6h_half_to_blog((basist::half_float)min_g, bits), bits)); + min_b = bc6h_convert_to_half(bc6h_dequantize(basist::bc6h_half_to_blog((basist::half_float)min_b, bits), bits)); + + max_r = bc6h_convert_to_half(bc6h_dequantize(basist::bc6h_half_to_blog((basist::half_float)max_r, bits), bits)); + max_g = bc6h_convert_to_half(bc6h_dequantize(basist::bc6h_half_to_blog((basist::half_float)max_g, bits), bits)); + max_b = bc6h_convert_to_half(bc6h_dequantize(basist::bc6h_half_to_blog((basist::half_float)max_b, bits), bits)); + } + + static void bc6h_quant_endpoints( + uint32_t min_hr, uint32_t min_hg, uint32_t min_hb, uint32_t max_hr, uint32_t max_hg, uint32_t max_hb, + uint32_t& min_r, uint32_t& min_g, uint32_t& min_b, uint32_t& max_r, uint32_t& max_g, uint32_t& max_b, + int bits) + { + min_r = basist::bc6h_half_to_blog((basist::half_float)min_hr, bits); + min_g = basist::bc6h_half_to_blog((basist::half_float)min_hg, bits); + min_b = basist::bc6h_half_to_blog((basist::half_float)min_hb, bits); + + max_r = basist::bc6h_half_to_blog((basist::half_float)max_hr, bits); + max_g = basist::bc6h_half_to_blog((basist::half_float)max_hg, bits); + max_b = basist::bc6h_half_to_blog((basist::half_float)max_hb, bits); + } + + static void bc6h_dequant_endpoints( + uint32_t min_br, uint32_t min_bg, uint32_t min_bb, uint32_t max_br, uint32_t max_bg, uint32_t max_bb, + uint32_t& min_hr, uint32_t& min_hg, uint32_t& min_hb, uint32_t& max_hr, uint32_t& max_hg, uint32_t& max_hb, + int bits) + { + min_hr = bc6h_convert_to_half(bc6h_dequantize(min_br, bits)); + min_hg = bc6h_convert_to_half(bc6h_dequantize(min_bg, bits)); + min_hb = bc6h_convert_to_half(bc6h_dequantize(min_bb, bits)); + + max_hr = bc6h_convert_to_half(bc6h_dequantize(max_br, bits)); + max_hg = bc6h_convert_to_half(bc6h_dequantize(max_bg, bits)); + max_hb = bc6h_convert_to_half(bc6h_dequantize(max_bb, bits)); + } + + static BASISU_FORCE_INLINE int popcount32(uint32_t x) + { +#if defined(__EMSCRIPTEN__) || defined(__clang__) || defined(__GNUC__) + return __builtin_popcount(x); +#elif defined(_MSC_VER) + return __popcnt(x); +#else + int count = 0; + while (x) + { + x &= (x - 1); + ++count; + } + return count; +#endif + } + + static BASISU_FORCE_INLINE int fast_roundf_int(float x) + { + return (x >= 0.0f) ? (int)(x + 0.5f) : (int)(x - 0.5f); + } + + static void fast_encode_bc6h_2subsets_pattern( + uint32_t best_pat_index, uint32_t best_pat_bits, + const basist::half_float* pPixels, const vec3F* pFloat_pixels, const float* pPixel_scales, + double& cur_error, basist::bc6h_logical_block& log_blk, + int64_t block_max_var, + int mean_r, int mean_g, int mean_b, + const fast_bc6h_params& params) + { + BASISU_NOTE_UNUSED(block_max_var); + + uint32_t subset_means[2][3] = { { 0 } }; + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t subset_index = (best_pat_bits >> i) & 1; + const uint32_t r = pPixels[i * 3 + 0], g = pPixels[i * 3 + 1], b = pPixels[i * 3 + 2]; + + subset_means[subset_index][0] += r; + subset_means[subset_index][1] += g; + subset_means[subset_index][2] += b; + } + + for (uint32_t s = 0; s < 2; s++) + for (uint32_t c = 0; c < 3; c++) + subset_means[s][c] = (subset_means[s][c] + 8) / 16; + + int64_t subset_icov[2][6] = { { 0 } }; + + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t subset_index = (best_pat_bits >> i) & 1; + const int r = (int)pPixels[i * 3 + 0] - mean_r, g = (int)pPixels[i * 3 + 1] - mean_g, b = (int)pPixels[i * 3 + 2] - mean_b; + + subset_icov[subset_index][0] += r * r; + subset_icov[subset_index][1] += r * g; + subset_icov[subset_index][2] += r * b; + subset_icov[subset_index][3] += g * g; + subset_icov[subset_index][4] += g * b; + subset_icov[subset_index][5] += b * b; + } + + vec3F subset_axis[2]; + + for (uint32_t subset_index = 0; subset_index < 2; subset_index++) + { + float cov[6]; + for (uint32_t i = 0; i < 6; i++) + cov[i] = (float)subset_icov[subset_index][i]; + + const float sc = 1.0f / (basisu::maximum(cov[0], cov[3], cov[5]) + basisu::REALLY_SMALL_FLOAT_VAL); + const float wx = sc * cov[0], wy = sc * cov[3], wz = sc * cov[5]; + + const float alt_xr = cov[0] * wx + cov[1] * wy + cov[2] * wz; + const float alt_xg = cov[1] * wx + cov[3] * wy + cov[4] * wz; + const float alt_xb = cov[2] * wx + cov[4] * wy + cov[5] * wz; + + float l = basisu::squaref(alt_xr) + basisu::squaref(alt_xg) + basisu::squaref(alt_xb); + + float axis_r = 0.57735027f, axis_g = 0.57735027f, axis_b = 0.57735027f; + if (fabs(l) >= basisu::SMALL_FLOAT_VAL) + { + const float inv_l = inv_sqrt(l); + axis_r = alt_xr * inv_l; + axis_g = alt_xg * inv_l; + axis_b = alt_xb * inv_l; + } + + subset_axis[subset_index].set(axis_r, axis_g, axis_b); + } // s + + float subset_min_dot[2] = { basisu::BIG_FLOAT_VAL, basisu::BIG_FLOAT_VAL }; + float subset_max_dot[2] = { -basisu::BIG_FLOAT_VAL, -basisu::BIG_FLOAT_VAL }; + int subset_min_idx[2] = { 0 }, subset_max_idx[2] = { 0 }; + + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t subset_index = (best_pat_bits >> i) & 1; + const float r = (float)pPixels[i * 3 + 0], g = (float)pPixels[i * 3 + 1], b = (float)pPixels[i * 3 + 2]; + const float dot = r * subset_axis[subset_index].c[0] + g * subset_axis[subset_index].c[1] + b * subset_axis[subset_index].c[2]; + + if (dot < subset_min_dot[subset_index]) + { + subset_min_dot[subset_index] = dot; + subset_min_idx[subset_index] = i; + } + + if (dot > subset_max_dot[subset_index]) + { + subset_max_dot[subset_index] = dot; + subset_max_idx[subset_index] = i; + } + } // i + + uint32_t subset_min_r[2], subset_min_g[2], subset_min_b[2]; + uint32_t subset_max_r[2], subset_max_g[2], subset_max_b[2]; + + for (uint32_t subset_index = 0; subset_index < 2; subset_index++) + { + const uint32_t min_index = subset_min_idx[subset_index] * 3, max_index = subset_max_idx[subset_index] * 3; + + subset_min_r[subset_index] = pPixels[min_index + 0]; + subset_min_g[subset_index] = pPixels[min_index + 1]; + subset_min_b[subset_index] = pPixels[min_index + 2]; + + subset_max_r[subset_index] = pPixels[max_index + 0]; + subset_max_g[subset_index] = pPixels[max_index + 1]; + subset_max_b[subset_index] = pPixels[max_index + 2]; + + } // subset_index + + // least squares with unquantized endpoints + const bool use_ls = true; + if (use_ls) + { + uint8_t trial_weights[16]; + assign_weights3(trial_weights, best_pat_bits, subset_min_r, subset_min_g, subset_min_b, subset_max_r, subset_max_g, subset_max_b, pFloat_pixels); + + float z00[2] = { 0.0f }, z01[2] = { 0.0f }, z10[2] = { 0.0f }, z11[2] = { 0.0f }; + float q00_r[2] = { 0.0f }, q10_r[2] = { 0.0f }, t_r[2] = { 0.0f }; + float q00_g[2] = { 0.0f }, q10_g[2] = { 0.0f }, t_g[2] = { 0.0f }; + float q00_b[2] = { 0.0f }, q10_b[2] = { 0.0f }, t_b[2] = { 0.0f }; + + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t subset = (best_pat_bits >> i) & 1; + + float r = (float)pPixels[i * 3 + 0]; + float g = (float)pPixels[i * 3 + 1]; + float b = (float)pPixels[i * 3 + 2]; + + const uint32_t sel = trial_weights[i]; + + z00[subset] += g_bc6h_ls_weights_3[sel][0]; + z10[subset] += g_bc6h_ls_weights_3[sel][1]; + z11[subset] += g_bc6h_ls_weights_3[sel][2]; + + float w = g_bc6h_ls_weights_3[sel][3]; + + q00_r[subset] += w * r; + t_r[subset] += r; + + q00_g[subset] += w * g; + t_g[subset] += g; + + q00_b[subset] += w * b; + t_b[subset] += b; + } + + for (uint32_t subset = 0; subset < 2; subset++) + { + q10_r[subset] = t_r[subset] - q00_r[subset]; + q10_g[subset] = t_g[subset] - q00_g[subset]; + q10_b[subset] = t_b[subset] - q00_b[subset]; + + z01[subset] = z10[subset]; + + float det = z00[subset] * z11[subset] - z01[subset] * z10[subset]; + if (fabs(det) >= basisu::SMALL_FLOAT_VAL) + { + det = 1.0f / det; + + float iz00 = z11[subset] * det; + float iz01 = -z01[subset] * det; + float iz10 = -z10[subset] * det; + float iz11 = z00[subset] * det; + + subset_max_r[subset] = basisu::clamp(fast_roundf_int(iz00 * q00_r[subset] + iz01 * q10_r[subset]), 0, (int)basist::MAX_BC6H_HALF_FLOAT_AS_UINT); + subset_min_r[subset] = basisu::clamp(fast_roundf_int(iz10 * q00_r[subset] + iz11 * q10_r[subset]), 0, (int)basist::MAX_BC6H_HALF_FLOAT_AS_UINT); + + subset_max_g[subset] = basisu::clamp(fast_roundf_int(iz00 * q00_g[subset] + iz01 * q10_g[subset]), 0, (int)basist::MAX_BC6H_HALF_FLOAT_AS_UINT); + subset_min_g[subset] = basisu::clamp(fast_roundf_int(iz10 * q00_g[subset] + iz11 * q10_g[subset]), 0, (int)basist::MAX_BC6H_HALF_FLOAT_AS_UINT); + + subset_max_b[subset] = basisu::clamp(fast_roundf_int(iz00 * q00_b[subset] + iz01 * q10_b[subset]), 0, (int)basist::MAX_BC6H_HALF_FLOAT_AS_UINT); + subset_min_b[subset] = basisu::clamp(fast_roundf_int(iz10 * q00_b[subset] + iz11 * q10_b[subset]), 0, (int)basist::MAX_BC6H_HALF_FLOAT_AS_UINT); + } + } // subset + } + + const int BC6H_2SUBSET_ABS_ENDPOINT_MODE = 9; + + int bc6h_mode_index = BC6H_2SUBSET_ABS_ENDPOINT_MODE, num_endpoint_bits = 6; + uint32_t abs_blog_endpoints[3][4]; + + if (params.m_num_diff_endpoint_modes_to_try) + { + // ordered from largest base bits to least + static const int s_bc6h_mode_order2[2] = { 5, 1 }; + static const int s_bc6h_mode_order4[4] = { 0, 5, 7, 1 }; + static const int s_bc6h_mode_order9[9] = { 2, 3, 4, 0, 5, 6, 7, 8, 1 }; + + uint32_t num_endpoint_modes = 2; + const int* pBC6H_mode_order = s_bc6h_mode_order2; + + if (params.m_num_diff_endpoint_modes_to_try >= 9) + { + num_endpoint_modes = 9; + pBC6H_mode_order = s_bc6h_mode_order9; + } + else if (params.m_num_diff_endpoint_modes_to_try >= 4) + { + num_endpoint_modes = 4; + pBC6H_mode_order = s_bc6h_mode_order4; + } + + // Find the BC6H mode that will conservatively encode our trial endpoints. The mode chosen will handle any endpoint swaps. + for (uint32_t bc6h_mode_iter = 0; bc6h_mode_iter < num_endpoint_modes; bc6h_mode_iter++) + { + const uint32_t mode = pBC6H_mode_order[bc6h_mode_iter]; + + const uint32_t num_base_bits = g_bc6h_mode_sig_bits[mode][0]; + const int base_bitmask = (1 << num_base_bits) - 1; + BASISU_NOTE_UNUSED(base_bitmask); + + const uint32_t num_delta_bits[3] = { g_bc6h_mode_sig_bits[mode][1], g_bc6h_mode_sig_bits[mode][2], g_bc6h_mode_sig_bits[mode][3] }; + const int delta_bitmasks[3] = { (1 << num_delta_bits[0]) - 1, (1 << num_delta_bits[1]) - 1, (1 << num_delta_bits[2]) - 1 }; + + for (uint32_t subset_index = 0; subset_index < 2; subset_index++) + { + bc6h_quant_endpoints( + subset_min_r[subset_index], subset_min_g[subset_index], subset_min_b[subset_index], subset_max_r[subset_index], subset_max_g[subset_index], subset_max_b[subset_index], + abs_blog_endpoints[0][subset_index * 2 + 0], abs_blog_endpoints[1][subset_index * 2 + 0], abs_blog_endpoints[2][subset_index * 2 + 0], + abs_blog_endpoints[0][subset_index * 2 + 1], abs_blog_endpoints[1][subset_index * 2 + 1], abs_blog_endpoints[2][subset_index * 2 + 1], + num_base_bits); + } + + uint32_t c; + for (c = 0; c < 3; c++) + { + // a very conservative check because we don't have the weight indices yet, so we don't know how to swap end point values + // purposely enforcing a symmetric limit here so we can invert any endpoints later if needed + const int max_delta = (1 << (num_delta_bits[c] - 1)) - 1; + const int min_delta = -max_delta; + + int delta0 = (int)abs_blog_endpoints[c][1] - (int)abs_blog_endpoints[c][0]; + if ((delta0 < min_delta) || (delta0 > max_delta)) + break; + + int delta1 = (int)abs_blog_endpoints[c][2] - (int)abs_blog_endpoints[c][0]; + if ((delta1 < min_delta) || (delta1 > max_delta)) + break; + + int delta2 = (int)abs_blog_endpoints[c][3] - (int)abs_blog_endpoints[c][0]; + if ((delta2 < min_delta) || (delta2 > max_delta)) + break; + + // in case the endpoints are swapped + int delta3 = (int)abs_blog_endpoints[c][2] - (int)abs_blog_endpoints[c][1]; + if ((delta3 < min_delta) || (delta3 > max_delta)) + break; + + int delta4 = (int)abs_blog_endpoints[c][3] - (int)abs_blog_endpoints[c][1]; + if ((delta4 < min_delta) || (delta4 > max_delta)) + break; + } + + if (c == 3) + { + bc6h_mode_index = mode; + num_endpoint_bits = num_base_bits; + break; + } + } + } + + if (bc6h_mode_index == BC6H_2SUBSET_ABS_ENDPOINT_MODE) + { + for (uint32_t subset_index = 0; subset_index < 2; subset_index++) + { + bc6h_quant_endpoints( + subset_min_r[subset_index], subset_min_g[subset_index], subset_min_b[subset_index], subset_max_r[subset_index], subset_max_g[subset_index], subset_max_b[subset_index], + abs_blog_endpoints[0][subset_index * 2 + 0], abs_blog_endpoints[1][subset_index * 2 + 0], abs_blog_endpoints[2][subset_index * 2 + 0], + abs_blog_endpoints[0][subset_index * 2 + 1], abs_blog_endpoints[1][subset_index * 2 + 1], abs_blog_endpoints[2][subset_index * 2 + 1], + num_endpoint_bits); + } + } + + for (uint32_t subset_index = 0; subset_index < 2; subset_index++) + { + bc6h_dequant_endpoints( + abs_blog_endpoints[0][subset_index * 2 + 0], abs_blog_endpoints[1][subset_index * 2 + 0], abs_blog_endpoints[2][subset_index * 2 + 0], + abs_blog_endpoints[0][subset_index * 2 + 1], abs_blog_endpoints[1][subset_index * 2 + 1], abs_blog_endpoints[2][subset_index * 2 + 1], + subset_min_r[subset_index], subset_min_g[subset_index], subset_min_b[subset_index], + subset_max_r[subset_index], subset_max_g[subset_index], subset_max_b[subset_index], num_endpoint_bits); + } + + uint8_t trial_weights[16]; + double trial_error = assign_weights_error_3(trial_weights, best_pat_bits, subset_min_r, subset_min_g, subset_min_b, subset_max_r, subset_max_g, subset_max_b, pFloat_pixels, pPixel_scales); + + if (trial_error < cur_error) + { + basist::bc6h_logical_block trial_log_blk; + + trial_log_blk.m_mode = bc6h_mode_index; + trial_log_blk.m_partition_pattern = best_pat_index; + + memcpy(trial_log_blk.m_endpoints, abs_blog_endpoints, sizeof(trial_log_blk.m_endpoints)); + memcpy(trial_log_blk.m_weights, trial_weights, 16); + + if (trial_log_blk.m_weights[0] & 4) + { + for (uint32_t c = 0; c < 3; c++) + std::swap(trial_log_blk.m_endpoints[c][0], trial_log_blk.m_endpoints[c][1]); + + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t subset_index = (best_pat_bits >> i) & 1; + if (subset_index == 0) + trial_log_blk.m_weights[i] = 7 - trial_log_blk.m_weights[i]; + } + } + + const uint32_t subset2_anchor_index = g_bc7_table_anchor_index_second_subset[best_pat_index]; + if (trial_log_blk.m_weights[subset2_anchor_index] & 4) + { + for (uint32_t c = 0; c < 3; c++) + std::swap(trial_log_blk.m_endpoints[c][2], trial_log_blk.m_endpoints[c][3]); + + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t subset_index = (best_pat_bits >> i) & 1; + if (subset_index == 1) + trial_log_blk.m_weights[i] = 7 - trial_log_blk.m_weights[i]; + } + } + + if (bc6h_mode_index != BC6H_2SUBSET_ABS_ENDPOINT_MODE) + { + const uint32_t num_delta_bits[3] = { g_bc6h_mode_sig_bits[bc6h_mode_index][1], g_bc6h_mode_sig_bits[bc6h_mode_index][2], g_bc6h_mode_sig_bits[bc6h_mode_index][3] }; + const int delta_bitmasks[3] = { (1 << num_delta_bits[0]) - 1, (1 << num_delta_bits[1]) - 1, (1 << num_delta_bits[2]) - 1 }; + + for (uint32_t c = 0; c < 3; c++) + { + const int delta0 = (int)trial_log_blk.m_endpoints[c][1] - (int)trial_log_blk.m_endpoints[c][0]; + const int delta1 = (int)trial_log_blk.m_endpoints[c][2] - (int)trial_log_blk.m_endpoints[c][0]; + const int delta2 = (int)trial_log_blk.m_endpoints[c][3] - (int)trial_log_blk.m_endpoints[c][0]; + +#ifdef _DEBUG + // sanity check the final endpoints + const int max_delta = (1 << (num_delta_bits[c] - 1)) - 1; + const int min_delta = -(max_delta + 1); + assert((max_delta - min_delta) == delta_bitmasks[c]); + + if ((delta0 < min_delta) || (delta0 > max_delta) || (delta1 < min_delta) || (delta1 > max_delta) || (delta2 < min_delta) || (delta2 > max_delta)) + { + assert(0); + break; + } +#endif + + trial_log_blk.m_endpoints[c][1] = delta0 & delta_bitmasks[c]; + trial_log_blk.m_endpoints[c][2] = delta1 & delta_bitmasks[c]; + trial_log_blk.m_endpoints[c][3] = delta2 & delta_bitmasks[c]; + + } // c + } + + cur_error = trial_error; + log_blk = trial_log_blk; + } + } + + static void fast_encode_bc6h_2subsets( + const basist::half_float* pPixels, const vec3F* pFloat_pixels, const float* pPixel_scales, + double& cur_error, basist::bc6h_logical_block& log_blk, + int64_t block_max_var, + int mean_r, int mean_g, int mean_b, float block_axis_r, float block_axis_g, float block_axis_b, + const fast_bc6h_params& params) + { + assert((params.m_max_2subset_pats_to_try > 0) && (params.m_max_2subset_pats_to_try <= BC6H_NUM_PATS)); + + if (params.m_max_2subset_pats_to_try == BC6H_NUM_PATS) + { + for (uint32_t i = 0; i < BC6H_NUM_PATS; i++) + { + const uint32_t best_pat_index = i; + const uint32_t best_pat_bits = g_bc6h_pats2[best_pat_index]; + + fast_encode_bc6h_2subsets_pattern( + best_pat_index, best_pat_bits, + pPixels, pFloat_pixels, pPixel_scales, + cur_error, log_blk, + block_max_var, + mean_r, mean_g, mean_b, params); + } + return; + } + + uint32_t desired_pat_bits = 0; + for (uint32_t i = 0; i < 16; i++) + { + float f = (float)(pPixels[i * 3 + 0] - mean_r) * block_axis_r + + (float)(pPixels[i * 3 + 1] - mean_g) * block_axis_g + + (float)(pPixels[i * 3 + 2] - mean_b) * block_axis_b; + + desired_pat_bits |= (((f >= 0.0f) ? 1 : 0) << i); + } // i + + if (params.m_max_2subset_pats_to_try == 1) + { + uint32_t best_diff = UINT32_MAX; + for (uint32_t p = 0; p < BC6H_NUM_PATS; p++) + { + const uint32_t bc6h_pat_bits = g_bc6h_pats2[p]; + + int diff = popcount32(bc6h_pat_bits ^ desired_pat_bits); + int diff_inv = 16 - diff; + + uint32_t min_diff = (basisu::minimum(diff, diff_inv) << 8) | p; + if (min_diff < best_diff) + best_diff = min_diff; + } // p + + const uint32_t best_pat_index = best_diff & 0xFF; + const uint32_t best_pat_bits = g_bc6h_pats2[best_pat_index]; + + fast_encode_bc6h_2subsets_pattern( + best_pat_index, best_pat_bits, + pPixels, pFloat_pixels, pPixel_scales, + cur_error, log_blk, + block_max_var, + mean_r, mean_g, mean_b, params); + } + else + { + assert(params.m_max_2subset_pats_to_try <= BC6H_NUM_PATS); + uint32_t pat_diffs[BC6H_NUM_PATS]; + + for (uint32_t p = 0; p < BC6H_NUM_PATS; p++) + { + const uint32_t bc6h_pat_bits = g_bc6h_pats2[p]; + + int diff = popcount32(bc6h_pat_bits ^ desired_pat_bits); + int diff_inv = 16 - diff; + + pat_diffs[p] = (basisu::minimum(diff, diff_inv) << 8) | p; + } // p + + std::sort(pat_diffs, pat_diffs + BC6H_NUM_PATS); + + for (uint32_t pat_iter = 0; pat_iter < params.m_max_2subset_pats_to_try; pat_iter++) + { + const uint32_t best_pat_index = pat_diffs[pat_iter] & 0xFF; + const uint32_t best_pat_bits = g_bc6h_pats2[best_pat_index]; + + fast_encode_bc6h_2subsets_pattern( + best_pat_index, best_pat_bits, + pPixels, pFloat_pixels, pPixel_scales, + cur_error, log_blk, + block_max_var, + mean_r, mean_g, mean_b, params); + } + } + } + + void fast_encode_bc6h(const basist::half_float* pPixels, basist::bc6h_block* pBlock, const fast_bc6h_params ¶ms) + { + basist::bc6h_logical_block log_blk; + log_blk.clear(); + + log_blk.m_mode = basist::BC6H_FIRST_1SUBSET_MODE_INDEX; + + uint32_t omin_r = UINT32_MAX, omin_g = UINT32_MAX, omin_b = UINT32_MAX; + uint32_t omax_r = 0, omax_g = 0, omax_b = 0; + uint32_t total_r = 0, total_g = 0, total_b = 0; + + for (uint32_t i = 0; i < 16; i++) + { + uint32_t r = pPixels[i * 3 + 0]; + uint32_t g = pPixels[i * 3 + 1]; + uint32_t b = pPixels[i * 3 + 2]; + + total_r += r; + total_g += g; + total_b += b; + + omin_r = basisu::minimum(omin_r, r); + omin_g = basisu::minimum(omin_g, g); + omin_b = basisu::minimum(omin_b, b); + + omax_r = basisu::maximum(omax_r, r); + omax_g = basisu::maximum(omax_g, g); + omax_b = basisu::maximum(omax_b, b); + } + + if ((omin_r == omax_r) && (omin_g == omax_g) && (omin_b == omax_b)) + { + // Solid block + log_blk.m_endpoints[0][0] = basist::bc6h_half_to_blog16((basist::half_float)omin_r); + log_blk.m_endpoints[0][1] = 0; + + log_blk.m_endpoints[1][0] = basist::bc6h_half_to_blog16((basist::half_float)omin_g); + log_blk.m_endpoints[1][1] = 0; + + log_blk.m_endpoints[2][0] = basist::bc6h_half_to_blog16((basist::half_float)omin_b); + log_blk.m_endpoints[2][1] = 0; + + log_blk.m_mode = 13; + pack_bc6h_block(*pBlock, log_blk); + + return; + } + + uint32_t min_r, min_g, min_b, max_r, max_g, max_b; + + int mean_r = (total_r + 8) / 16; + int mean_g = (total_g + 8) / 16; + int mean_b = (total_b + 8) / 16; + + int64_t icov[6] = { 0, 0, 0, 0, 0, 0 }; + + for (uint32_t i = 0; i < 16; i++) + { + int r = (int)pPixels[i * 3 + 0] - mean_r; + int g = (int)pPixels[i * 3 + 1] - mean_g; + int b = (int)pPixels[i * 3 + 2] - mean_b; + + icov[0] += r * r; + icov[1] += r * g; + icov[2] += r * b; + icov[3] += g * g; + icov[4] += g * b; + icov[5] += b * b; + } + + int64_t block_max_var = basisu::maximum(icov[0], icov[3], icov[5]); // not divided by 16, i.e. scaled by 16 + + if (block_max_var < (FAST_BC6H_STD_DEV_THRESH * FAST_BC6H_STD_DEV_THRESH * 16)) + { + // Simple block + min_r = (omax_r - omin_r) / 32 + omin_r; + min_g = (omax_g - omin_g) / 32 + omin_g; + min_b = (omax_b - omin_b) / 32 + omin_b; + + max_r = ((omax_r - omin_r) * 31) / 32 + omin_r; + max_g = ((omax_g - omin_g) * 31) / 32 + omin_g; + max_b = ((omax_b - omin_b) * 31) / 32 + omin_b; + + assert((max_r < MAX_HALF_FLOAT_AS_INT_BITS) && (max_g < MAX_HALF_FLOAT_AS_INT_BITS) && (max_b < MAX_HALF_FLOAT_AS_INT_BITS)); + + bc6h_quant_dequant_endpoints(min_r, min_g, min_b, max_r, max_g, max_b, 10); + + assign_weights_simple_4(pPixels, log_blk.m_weights, min_r, min_g, min_b, max_r, max_g, max_b, block_max_var); + + log_blk.m_endpoints[0][0] = basist::bc6h_half_to_blog((basist::half_float)min_r, 10); + log_blk.m_endpoints[0][1] = basist::bc6h_half_to_blog((basist::half_float)max_r, 10); + + log_blk.m_endpoints[1][0] = basist::bc6h_half_to_blog((basist::half_float)min_g, 10); + log_blk.m_endpoints[1][1] = basist::bc6h_half_to_blog((basist::half_float)max_g, 10); + + log_blk.m_endpoints[2][0] = basist::bc6h_half_to_blog((basist::half_float)min_b, 10); + log_blk.m_endpoints[2][1] = basist::bc6h_half_to_blog((basist::half_float)max_b, 10); + + if (log_blk.m_weights[0] & 8) + { + for (uint32_t i = 0; i < 16; i++) + log_blk.m_weights[i] = 15 - log_blk.m_weights[i]; + + for (uint32_t c = 0; c < 3; c++) + { + std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][1]); + } + } + + pack_bc6h_block(*pBlock, log_blk); + + return; + } + + // block_max_var cannot be 0 here, also trace cannot be 0 + + // Complex block (edges/strong gradients) + bool try_2subsets = false; + double cur_err = 0.0f; + vec3F float_pixels[16]; + float pixel_scales[16]; + + // covar rows are: + // 0, 1, 2 + // 1, 3, 4 + // 2, 4, 5 + float cov[6]; + for (uint32_t i = 0; i < 6; i++) + cov[i] = (float)icov[i]; + + const float sc = 1.0f / (float)block_max_var; + const float wx = sc * cov[0], wy = sc * cov[3], wz = sc * cov[5]; + + const float alt_xr = cov[0] * wx + cov[1] * wy + cov[2] * wz; + const float alt_xg = cov[1] * wx + cov[3] * wy + cov[4] * wz; + const float alt_xb = cov[2] * wx + cov[4] * wy + cov[5] * wz; + + float l = basisu::squaref(alt_xr) + basisu::squaref(alt_xg) + basisu::squaref(alt_xb); + + float axis_r = 0.57735027f, axis_g = 0.57735027f, axis_b = 0.57735027f; + if (fabs(l) >= basisu::SMALL_FLOAT_VAL) + { + const float inv_l = inv_sqrt(l); + axis_r = alt_xr * inv_l; + axis_g = alt_xg * inv_l; + axis_b = alt_xb * inv_l; + } + + const float tr = axis_r * cov[0] + axis_g * cov[1] + axis_b * cov[2]; + const float tg = axis_r * cov[1] + axis_g * cov[3] + axis_b * cov[4]; + const float tb = axis_r * cov[2] + axis_g * cov[4] + axis_b * cov[5]; + const float principle_axis_var = tr * axis_r + tg * axis_g + tb * axis_b; + + const float inv_principle_axis_var = 1.0f / (principle_axis_var + basisu::REALLY_SMALL_FLOAT_VAL); + axis_r = tr * inv_principle_axis_var; + axis_g = tg * inv_principle_axis_var; + axis_b = tb * inv_principle_axis_var; + + float total_var = cov[0] + cov[3] + cov[5]; + + // If the principle axis variance vs. the block's total variance accounts for less than this threshold, it's a "very complex" block that may benefit from 2 subsets. + const float COMPLEX_BLOCK_PRINCIPLE_AXIS_FRACT_THRESH = .995f; + try_2subsets = principle_axis_var < (total_var * COMPLEX_BLOCK_PRINCIPLE_AXIS_FRACT_THRESH); + + uint32_t min_idx = 0, max_idx = 0; + float min_dot = basisu::BIG_FLOAT_VAL, max_dot = -basisu::BIG_FLOAT_VAL; + + for (uint32_t i = 0; i < 16; i++) + { + float r = (float)pPixels[i * 3 + 0]; + float g = (float)pPixels[i * 3 + 1]; + float b = (float)pPixels[i * 3 + 2]; + + float_pixels[i].c[0] = fast_half_to_float_pos_not_inf_or_nan((half_float)r); + float_pixels[i].c[1] = fast_half_to_float_pos_not_inf_or_nan((half_float)g); + float_pixels[i].c[2] = fast_half_to_float_pos_not_inf_or_nan((half_float)b); + + pixel_scales[i] = 1.0f / (basisu::squaref(float_pixels[i].c[0]) + basisu::squaref(float_pixels[i].c[1]) + basisu::squaref(float_pixels[i].c[2]) + (float)MIN_HALF_FLOAT); + + float dot = r * axis_r + g * axis_g + b * axis_b; + + if (dot < min_dot) + { + min_dot = dot; + min_idx = i; + } + + if (dot > max_dot) + { + max_dot = dot; + max_idx = i; + } + } + + min_r = pPixels[min_idx * 3 + 0]; + min_g = pPixels[min_idx * 3 + 1]; + min_b = pPixels[min_idx * 3 + 2]; + + max_r = pPixels[max_idx * 3 + 0]; + max_g = pPixels[max_idx * 3 + 1]; + max_b = pPixels[max_idx * 3 + 2]; + + assert((max_r < MAX_HALF_FLOAT_AS_INT_BITS) && (max_g < MAX_HALF_FLOAT_AS_INT_BITS) && (max_b < MAX_HALF_FLOAT_AS_INT_BITS)); + + bc6h_quant_dequant_endpoints(min_r, min_g, min_b, max_r, max_g, max_b, 10); + + cur_err = assign_weights_4(float_pixels, pixel_scales, log_blk.m_weights, min_r, min_g, min_b, max_r, max_g, max_b, block_max_var, try_2subsets, params); + + const uint32_t MAX_LS_PASSES = params.m_hq_ls ? 2 : 1; + for (uint32_t pass = 0; pass < MAX_LS_PASSES; pass++) + { + float z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f; + float q00_r = 0.0f, q10_r = 0.0f, t_r = 0.0f; + float q00_g = 0.0f, q10_g = 0.0f, t_g = 0.0f; + float q00_b = 0.0f, q10_b = 0.0f, t_b = 0.0f; + + for (uint32_t i = 0; i < 16; i++) + { + float r = (float)pPixels[i * 3 + 0]; + float g = (float)pPixels[i * 3 + 1]; + float b = (float)pPixels[i * 3 + 2]; + + const uint32_t sel = log_blk.m_weights[i]; + + z00 += g_bc6h_ls_weights_4[sel][0]; + z10 += g_bc6h_ls_weights_4[sel][1]; + z11 += g_bc6h_ls_weights_4[sel][2]; + + float w = g_bc6h_ls_weights_4[sel][3]; + + q00_r += w * r; + t_r += r; + + q00_g += w * g; + t_g += g; + + q00_b += w * b; + t_b += b; + } + + q10_r = t_r - q00_r; + q10_g = t_g - q00_g; + q10_b = t_b - q00_b; + + z01 = z10; + + float det = z00 * z11 - z01 * z10; + if (fabs(det) < basisu::SMALL_FLOAT_VAL) + break; + + det = 1.0f / det; + + float iz00 = z11 * det; + float iz01 = -z01 * det; + float iz10 = -z10 * det; + float iz11 = z00 * det; + + uint32_t trial_max_r = (int)basisu::clamp(std::round(iz00 * q00_r + iz01 * q10_r), 0, (float)basist::MAX_BC6H_HALF_FLOAT_AS_UINT); + uint32_t trial_min_r = (int)basisu::clamp(std::round(iz10 * q00_r + iz11 * q10_r), 0, (float)basist::MAX_BC6H_HALF_FLOAT_AS_UINT); + + uint32_t trial_max_g = (int)basisu::clamp(std::round(iz00 * q00_g + iz01 * q10_g), 0, (float)basist::MAX_BC6H_HALF_FLOAT_AS_UINT); + uint32_t trial_min_g = (int)basisu::clamp(std::round(iz10 * q00_g + iz11 * q10_g), 0, (float)basist::MAX_BC6H_HALF_FLOAT_AS_UINT); + + uint32_t trial_max_b = (int)basisu::clamp(std::round(iz00 * q00_b + iz01 * q10_b), 0, (float)basist::MAX_BC6H_HALF_FLOAT_AS_UINT); + uint32_t trial_min_b = (int)basisu::clamp(std::round(iz10 * q00_b + iz11 * q10_b), 0, (float)basist::MAX_BC6H_HALF_FLOAT_AS_UINT); + + bc6h_quant_dequant_endpoints(trial_min_r, trial_min_g, trial_min_b, trial_max_r, trial_max_g, trial_max_b, 10); + + uint8_t trial_weights[16]; + double trial_err = assign_weights_4(float_pixels, pixel_scales, trial_weights, trial_min_r, trial_min_g, trial_min_b, trial_max_r, trial_max_g, trial_max_b, block_max_var, try_2subsets, params); + + if (trial_err < cur_err) + { + cur_err = trial_err; + + min_r = trial_min_r; + max_r = trial_max_r; + + min_g = trial_min_g; + max_g = trial_max_g; + + min_b = trial_min_b; + max_b = trial_max_b; + + memcpy(log_blk.m_weights, trial_weights, 16); + } + else + { + break; + } + + } // pass + +#if 0 + //if (full_flag) + if ((try_2subsets) && (block_max_var > (FAST_BC6H_COMPLEX_STD_DEV_THRESH * FAST_BC6H_COMPLEX_STD_DEV_THRESH * 16))) + { + min_r = 0; + max_r = 0; + min_g = 0; + max_g = 0; + min_b = 0; + max_b = 0; + } +#endif + + log_blk.m_endpoints[0][0] = basist::bc6h_half_to_blog((basist::half_float)min_r, 10); + log_blk.m_endpoints[0][1] = basist::bc6h_half_to_blog((basist::half_float)max_r, 10); + + log_blk.m_endpoints[1][0] = basist::bc6h_half_to_blog((basist::half_float)min_g, 10); + log_blk.m_endpoints[1][1] = basist::bc6h_half_to_blog((basist::half_float)max_g, 10); + + log_blk.m_endpoints[2][0] = basist::bc6h_half_to_blog((basist::half_float)min_b, 10); + log_blk.m_endpoints[2][1] = basist::bc6h_half_to_blog((basist::half_float)max_b, 10); + + if (log_blk.m_weights[0] & 8) + { + for (uint32_t i = 0; i < 16; i++) + log_blk.m_weights[i] = 15 - log_blk.m_weights[i]; + + for (uint32_t c = 0; c < 3; c++) + { + std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][1]); + } + } + + if ((params.m_max_2subset_pats_to_try > 0) && ((try_2subsets) && (block_max_var > (FAST_BC6H_COMPLEX_STD_DEV_THRESH * FAST_BC6H_COMPLEX_STD_DEV_THRESH * 16)))) + { + fast_encode_bc6h_2subsets(pPixels, float_pixels, pixel_scales, cur_err, log_blk, block_max_var, mean_r, mean_g, mean_b, axis_r, axis_g, axis_b, params); + } + + pack_bc6h_block(*pBlock, log_blk); + } + + bool decode_6x6_hdr(const uint8_t *pComp_data, uint32_t comp_data_size, basisu::vector2D& decoded_blocks, uint32_t& width, uint32_t& height) + { + const uint32_t BLOCK_W = 6, BLOCK_H = 6; + + //interval_timer tm; + //tm.start(); + + width = 0; + height = 0; + + if (comp_data_size <= (2 * 3 + 1)) + return false; + + basist::bitwise_decoder decoder; + if (!decoder.init(pComp_data, comp_data_size)) + return false; + + if (decoder.get_bits(16) != 0xABCD) + return false; + + width = decoder.get_bits(16); + height = decoder.get_bits(16); + + if (!width || !height || (width > MAX_ASTC_HDR_6X6_DIM) || (height > MAX_ASTC_HDR_6X6_DIM)) + return false; + + const uint32_t num_blocks_x = (width + BLOCK_W - 1) / BLOCK_W; + const uint32_t num_blocks_y = (height + BLOCK_H - 1) / BLOCK_H; + + const uint32_t total_blocks = num_blocks_x * num_blocks_y; + + decoded_blocks.resize(num_blocks_x, num_blocks_y); + //memset(decoded_blocks.get_ptr(), 0, decoded_blocks.size_in_bytes()); + + // These are the decoded log blocks, NOT the output log blocks. + basisu::vector2D decoded_log_blocks(num_blocks_x, REUSE_MAX_BUFFER_ROWS); + memset(decoded_log_blocks.get_ptr(), 0, decoded_log_blocks.size_in_bytes()); + + uint32_t cur_bx = 0, cur_by = 0; + int cur_row_index = 0; + + uint32_t step_counter = 0; + BASISU_NOTE_UNUSED(step_counter); + + while (cur_by < num_blocks_y) + { + step_counter++; + + //if ((cur_bx == 9) && (cur_by == 13)) + // printf("!"); + +#if SYNC_MARKERS + uint32_t mk = decoder.get_bits(16); + if (mk != 0xDEAD) + { + printf("!"); + assert(0); + return false; + } +#endif + if (decoder.get_bits_remaining() < 1) + return false; + + encoding_type et = encoding_type::cBlock; + + uint32_t b0 = decoder.get_bits(1); + if (!b0) + { + uint32_t b1 = decoder.get_bits(1); + if (b1) + et = encoding_type::cReuse; + else + { + uint32_t b2 = decoder.get_bits(1); + if (b2) + et = encoding_type::cSolid; + else + et = encoding_type::cRun; + } + } + + switch (et) + { + case encoding_type::cRun: + { + if (!cur_bx && !cur_by) + return false; + + const uint32_t run_len = decoder.decode_vlc(5) + 1; + + uint32_t num_blocks_remaining = total_blocks - (cur_bx + cur_by * num_blocks_x); + if (run_len > num_blocks_remaining) + return false; + + uint32_t prev_bx = cur_bx, prev_by = cur_by; + + if (cur_bx) + prev_bx--; + else + { + prev_bx = num_blocks_x - 1; + prev_by--; + } + + const astc_helpers::log_astc_block& prev_log_blk = decoded_log_blocks(prev_bx, calc_row_index(cur_by, prev_by, cur_row_index)); + const astc_helpers::astc_block& prev_phys_blk = decoded_blocks(prev_bx, prev_by); + + assert((prev_log_blk.m_user_mode == 255) || (prev_log_blk.m_user_mode < TOTAL_BLOCK_MODE_DECS)); + + for (uint32_t i = 0; i < run_len; i++) + { + decoded_log_blocks(cur_bx, calc_row_index(cur_by, cur_by, cur_row_index)) = prev_log_blk; + decoded_blocks(cur_bx, cur_by) = prev_phys_blk; + + cur_bx++; + if (cur_bx == num_blocks_x) + { + cur_bx = 0; + cur_by++; + cur_row_index = (cur_row_index + 1) % REUSE_MAX_BUFFER_ROWS; + } + } + + break; + } + case encoding_type::cSolid: + { + const basist::half_float rh = (basist::half_float)decoder.get_bits(15); + const basist::half_float gh = (basist::half_float)decoder.get_bits(15); + const basist::half_float bh = (basist::half_float)decoder.get_bits(15); + + astc_helpers::log_astc_block& log_blk = decoded_log_blocks(cur_bx, calc_row_index(cur_by, cur_by, cur_row_index)); + + log_blk.clear(); + log_blk.m_user_mode = 255; + log_blk.m_solid_color_flag_hdr = true; + log_blk.m_solid_color[0] = rh; + log_blk.m_solid_color[1] = gh; + log_blk.m_solid_color[2] = bh; + log_blk.m_solid_color[3] = basist::float_to_half(1.0f); + + bool status = astc_helpers::pack_astc_block(decoded_blocks(cur_bx, cur_by), log_blk); + if (!status) + return false; + + cur_bx++; + if (cur_bx == num_blocks_x) + { + cur_bx = 0; + cur_by++; + cur_row_index = (cur_row_index + 1) % REUSE_MAX_BUFFER_ROWS; + } + + break; + } + case encoding_type::cReuse: + { + if (!cur_bx && !cur_by) + return false; + + const uint32_t reuse_delta_index = decoder.get_bits(REUSE_XY_DELTA_BITS); + + const int reuse_delta_x = g_reuse_xy_deltas[reuse_delta_index].m_x; + const int reuse_delta_y = g_reuse_xy_deltas[reuse_delta_index].m_y; + + const int prev_bx = cur_bx + reuse_delta_x, prev_by = cur_by + reuse_delta_y; + if ((prev_bx < 0) || (prev_bx >= (int)num_blocks_x)) + return false; + if (prev_by < 0) + return false; + + const astc_helpers::log_astc_block& prev_log_blk = decoded_log_blocks(prev_bx, calc_row_index(cur_by, prev_by, cur_row_index)); + + if (prev_log_blk.m_solid_color_flag_hdr) + return false; + assert(prev_log_blk.m_user_mode < TOTAL_BLOCK_MODE_DECS); + + astc_helpers::log_astc_block& log_blk = decoded_log_blocks(cur_bx, calc_row_index(cur_by, cur_by, cur_row_index)); + astc_helpers::astc_block& phys_blk = decoded_blocks(cur_bx, cur_by); + + log_blk = prev_log_blk; + + const uint32_t total_grid_weights = log_blk.m_grid_width * log_blk.m_grid_height * (log_blk.m_dual_plane ? 2 : 1); + + bool status = decode_values(decoder, total_grid_weights, log_blk.m_weight_ise_range, log_blk.m_weights); + if (!status) + return false; + +#if 0 + const astc_helpers::astc_block& prev_phys_blk = decoded_blocks(prev_bx, prev_by); + + astc_helpers::log_astc_block decomp_blk; + status = astc_helpers::unpack_block(&prev_phys_blk, decomp_blk, BLOCK_W, BLOCK_H); + if (!status) + return false; + + uint8_t transcode_weights[MAX_BLOCK_W * MAX_BLOCK_H * 2]; + requantize_astc_weights(total_grid_weights, log_blk.m_weights, log_blk.m_weight_ise_range, transcode_weights, decomp_blk.m_weight_ise_range); + + copy_weight_grid(log_blk.m_dual_plane, log_blk.m_grid_width, log_blk.m_grid_height, transcode_weights, decomp_blk); +#else + assert(log_blk.m_user_mode < TOTAL_BLOCK_MODE_DECS); + const block_mode_desc& bmd = g_block_mode_descs[(uint32_t)log_blk.m_user_mode]; + const uint32_t num_endpoint_values = get_num_endpoint_vals(bmd.m_cem); + + assert(bmd.m_grid_x == log_blk.m_grid_width && bmd.m_grid_y == log_blk.m_grid_height); + assert(bmd.m_dp == log_blk.m_dual_plane); + assert(bmd.m_cem == log_blk.m_color_endpoint_modes[0]); + assert(bmd.m_num_partitions == log_blk.m_num_partitions); + assert(bmd.m_dp_channel == log_blk.m_color_component_selector); + + // important: bmd.m_weight_ise_range/m_endpoint_ise_range may not match the logical block's due to deltas. + + astc_helpers::log_astc_block decomp_blk; + decomp_blk.clear(); + decomp_blk.m_dual_plane = bmd.m_dp; + decomp_blk.m_color_component_selector = (uint8_t)bmd.m_dp_channel; + decomp_blk.m_partition_id = log_blk.m_partition_id; + + decomp_blk.m_num_partitions = (uint8_t)bmd.m_num_partitions; + + for (uint32_t p = 0; p < bmd.m_num_partitions; p++) + decomp_blk.m_color_endpoint_modes[p] = (uint8_t)bmd.m_cem; + + decomp_blk.m_endpoint_ise_range = (uint8_t)bmd.m_transcode_endpoint_ise_range; + decomp_blk.m_weight_ise_range = (uint8_t)bmd.m_transcode_weight_ise_range; + + for (uint32_t p = 0; p < bmd.m_num_partitions; p++) + requantize_ise_endpoints(bmd.m_cem, log_blk.m_endpoint_ise_range, log_blk.m_endpoints + num_endpoint_values * p, bmd.m_transcode_endpoint_ise_range, decomp_blk.m_endpoints + num_endpoint_values * p); + + uint8_t transcode_weights[BLOCK_W * BLOCK_H * 2]; + requantize_astc_weights(total_grid_weights, log_blk.m_weights, log_blk.m_weight_ise_range, transcode_weights, bmd.m_transcode_weight_ise_range); + + copy_weight_grid(bmd.m_dp, bmd.m_grid_x, bmd.m_grid_y, transcode_weights, decomp_blk); +#endif + status = astc_helpers::pack_astc_block(phys_blk, decomp_blk); + if (!status) + return false; + + cur_bx++; + if (cur_bx == num_blocks_x) + { + cur_bx = 0; + cur_by++; + cur_row_index = (cur_row_index + 1) % REUSE_MAX_BUFFER_ROWS; + } + + break; + } + case encoding_type::cBlock: + { + const block_mode bm = (block_mode)decoder.decode_truncated_binary((uint32_t)block_mode::cBMTotalModes); + const endpoint_mode em = (endpoint_mode)decoder.decode_truncated_binary((uint32_t)endpoint_mode::cTotal); + + switch (em) + { + case endpoint_mode::cUseLeft: + case endpoint_mode::cUseUpper: + { + int neighbor_bx = cur_bx, neighbor_by = cur_by; + + if (em == endpoint_mode::cUseLeft) + neighbor_bx--; + else + neighbor_by--; + + if ((neighbor_bx < 0) || (neighbor_by < 0)) + return false; + + const astc_helpers::log_astc_block& neighbor_blk = decoded_log_blocks(neighbor_bx, calc_row_index(cur_by, neighbor_by, cur_row_index)); + if (!neighbor_blk.m_color_endpoint_modes[0]) + return false; + + const block_mode_desc& bmd = g_block_mode_descs[(uint32_t)bm]; + const uint32_t num_endpoint_values = get_num_endpoint_vals(bmd.m_cem); + + if (bmd.m_cem != neighbor_blk.m_color_endpoint_modes[0]) + return false; + + astc_helpers::log_astc_block& log_blk = decoded_log_blocks(cur_bx, calc_row_index(cur_by, cur_by, cur_row_index)); + astc_helpers::astc_block& phys_blk = decoded_blocks(cur_bx, cur_by); + + log_blk.clear(); + assert((uint32_t)bm <= UINT8_MAX); + log_blk.m_user_mode = (uint8_t)bm; + log_blk.m_num_partitions = 1; + log_blk.m_color_endpoint_modes[0] = (uint8_t)bmd.m_cem; + // Important: Notice how we're copying the neighbor's endpoint ISE range. Not using the mode's endpoint ISE range here. + // This is to avoid introducing more quantization error. + log_blk.m_endpoint_ise_range = neighbor_blk.m_endpoint_ise_range; + log_blk.m_weight_ise_range = (uint8_t)bmd.m_weight_ise_range; + log_blk.m_grid_width = (uint8_t)bmd.m_grid_x; + log_blk.m_grid_height = (uint8_t)bmd.m_grid_y; + log_blk.m_dual_plane = (uint8_t)bmd.m_dp; + log_blk.m_color_component_selector = (uint8_t)bmd.m_dp_channel; + + memcpy(log_blk.m_endpoints, neighbor_blk.m_endpoints, num_endpoint_values); + + const uint32_t total_grid_weights = bmd.m_grid_x * bmd.m_grid_y * (bmd.m_dp ? 2 : 1); + + bool status = decode_values(decoder, total_grid_weights, bmd.m_weight_ise_range, log_blk.m_weights); + if (!status) + return false; + + astc_helpers::log_astc_block decomp_blk; + decomp_blk.clear(); + + decomp_blk.m_num_partitions = 1; + decomp_blk.m_color_endpoint_modes[0] = (uint8_t)bmd.m_cem; + decomp_blk.m_endpoint_ise_range = (uint8_t)bmd.m_transcode_endpoint_ise_range; + decomp_blk.m_weight_ise_range = (uint8_t)bmd.m_transcode_weight_ise_range; + decomp_blk.m_dual_plane = (uint8_t)bmd.m_dp; + decomp_blk.m_color_component_selector = (uint8_t)bmd.m_dp_channel; + + requantize_ise_endpoints(bmd.m_cem, log_blk.m_endpoint_ise_range, log_blk.m_endpoints, bmd.m_transcode_endpoint_ise_range, decomp_blk.m_endpoints); + + uint8_t transcode_weights[BLOCK_W * BLOCK_H * 2]; + requantize_astc_weights(total_grid_weights, log_blk.m_weights, bmd.m_weight_ise_range, transcode_weights, bmd.m_transcode_weight_ise_range); + + copy_weight_grid(bmd.m_dp, bmd.m_grid_x, bmd.m_grid_y, transcode_weights, decomp_blk); + + status = astc_helpers::pack_astc_block(phys_blk, decomp_blk); + if (!status) + return false; + + cur_bx++; + if (cur_bx == num_blocks_x) + { + cur_bx = 0; + cur_by++; + cur_row_index = (cur_row_index + 1) % REUSE_MAX_BUFFER_ROWS; + } + + break; + } + case endpoint_mode::cUseLeftDelta: + case endpoint_mode::cUseUpperDelta: + { + int neighbor_bx = cur_bx, neighbor_by = cur_by; + + if (em == endpoint_mode::cUseLeftDelta) + neighbor_bx--; + else + neighbor_by--; + + if ((neighbor_bx < 0) || (neighbor_by < 0)) + return false; + + const astc_helpers::log_astc_block& neighbor_blk = decoded_log_blocks(neighbor_bx, calc_row_index(cur_by, neighbor_by, cur_row_index)); + if (!neighbor_blk.m_color_endpoint_modes[0]) + return false; + + const block_mode_desc& bmd = g_block_mode_descs[(uint32_t)bm]; + const uint32_t num_endpoint_values = get_num_endpoint_vals(bmd.m_cem); + + if (bmd.m_cem != neighbor_blk.m_color_endpoint_modes[0]) + return false; + + astc_helpers::log_astc_block& log_blk = decoded_log_blocks(cur_bx, calc_row_index(cur_by, cur_by, cur_row_index)); + astc_helpers::astc_block& phys_blk = decoded_blocks(cur_bx, cur_by); + + log_blk.clear(); + assert((uint32_t)bm <= UINT8_MAX); + log_blk.m_user_mode = (uint8_t)bm; + log_blk.m_num_partitions = 1; + log_blk.m_color_endpoint_modes[0] = (uint8_t)bmd.m_cem; + log_blk.m_dual_plane = bmd.m_dp; + log_blk.m_color_component_selector = (uint8_t)bmd.m_dp_channel; + + log_blk.m_endpoint_ise_range = (uint8_t)bmd.m_endpoint_ise_range; + requantize_ise_endpoints(bmd.m_cem, neighbor_blk.m_endpoint_ise_range, neighbor_blk.m_endpoints, bmd.m_endpoint_ise_range, log_blk.m_endpoints); + + const int total_endpoint_delta_vals = 1 << NUM_ENDPOINT_DELTA_BITS; + const int low_delta_limit = -(total_endpoint_delta_vals / 2); // high_delta_limit = (total_endpoint_delta_vals / 2) - 1; + + const auto& ise_to_rank = astc_helpers::g_dequant_tables.get_endpoint_tab(log_blk.m_endpoint_ise_range).m_ISE_to_rank; + const auto& rank_to_ise = astc_helpers::g_dequant_tables.get_endpoint_tab(log_blk.m_endpoint_ise_range).m_rank_to_ISE; + const int total_endpoint_levels = astc_helpers::get_ise_levels(log_blk.m_endpoint_ise_range); + + for (uint32_t i = 0; i < num_endpoint_values; i++) + { + int cur_val = ise_to_rank[log_blk.m_endpoints[i]]; + + int delta = (int)decoder.get_bits(NUM_ENDPOINT_DELTA_BITS) + low_delta_limit; + + cur_val += delta; + if ((cur_val < 0) || (cur_val >= total_endpoint_levels)) + return false; + + log_blk.m_endpoints[i] = rank_to_ise[cur_val]; + } + + log_blk.m_weight_ise_range = (uint8_t)bmd.m_weight_ise_range; + log_blk.m_grid_width = (uint8_t)bmd.m_grid_x; + log_blk.m_grid_height = (uint8_t)bmd.m_grid_y; + + const uint32_t total_grid_weights = bmd.m_grid_x * bmd.m_grid_y * (bmd.m_dp ? 2 : 1); + + bool status = decode_values(decoder, total_grid_weights, bmd.m_weight_ise_range, log_blk.m_weights); + if (!status) + return false; + + astc_helpers::log_astc_block decomp_blk; + decomp_blk.clear(); + + decomp_blk.m_num_partitions = 1; + decomp_blk.m_color_endpoint_modes[0] = (uint8_t)bmd.m_cem; + decomp_blk.m_endpoint_ise_range = (uint8_t)bmd.m_transcode_endpoint_ise_range; + decomp_blk.m_weight_ise_range = (uint8_t)bmd.m_transcode_weight_ise_range; + decomp_blk.m_dual_plane = (uint8_t)bmd.m_dp; + decomp_blk.m_color_component_selector = (uint8_t)bmd.m_dp_channel; + + requantize_ise_endpoints(bmd.m_cem, log_blk.m_endpoint_ise_range, log_blk.m_endpoints, bmd.m_transcode_endpoint_ise_range, decomp_blk.m_endpoints); + + uint8_t transcode_weights[BLOCK_W * BLOCK_H * 2]; + requantize_astc_weights(total_grid_weights, log_blk.m_weights, bmd.m_weight_ise_range, transcode_weights, bmd.m_transcode_weight_ise_range); + + copy_weight_grid(bmd.m_dp, bmd.m_grid_x, bmd.m_grid_y, transcode_weights, decomp_blk); + + status = astc_helpers::pack_astc_block(phys_blk, decomp_blk); + if (!status) + return false; + + cur_bx++; + if (cur_bx == num_blocks_x) + { + cur_bx = 0; + cur_by++; + cur_row_index = (cur_row_index + 1) % REUSE_MAX_BUFFER_ROWS; + } + + break; + } + case endpoint_mode::cRaw: + { + const block_mode_desc& bmd = g_block_mode_descs[(uint32_t)bm]; + + const uint32_t num_endpoint_values = get_num_endpoint_vals(bmd.m_cem); + + astc_helpers::log_astc_block& log_blk = decoded_log_blocks(cur_bx, calc_row_index(cur_by, cur_by, cur_row_index)); + astc_helpers::astc_block& phys_blk = decoded_blocks(cur_bx, cur_by); + + log_blk.clear(); + + assert((uint32_t)bm <= UINT8_MAX); + log_blk.m_user_mode = (uint8_t)bm; + + log_blk.m_num_partitions = (uint8_t)bmd.m_num_partitions; + + for (uint32_t p = 0; p < bmd.m_num_partitions; p++) + log_blk.m_color_endpoint_modes[p] = (uint8_t)bmd.m_cem; + + log_blk.m_endpoint_ise_range = (uint8_t)bmd.m_endpoint_ise_range; + log_blk.m_weight_ise_range = (uint8_t)bmd.m_weight_ise_range; + + log_blk.m_grid_width = (uint8_t)bmd.m_grid_x; + log_blk.m_grid_height = (uint8_t)bmd.m_grid_y; + log_blk.m_dual_plane = (uint8_t)bmd.m_dp; + log_blk.m_color_component_selector = (uint8_t)bmd.m_dp_channel; + + if (bmd.m_num_partitions == 2) + { + const uint32_t unique_partition_index = decoder.decode_truncated_binary(NUM_UNIQUE_PARTITIONS2); + log_blk.m_partition_id = (uint16_t)g_part2_unique_index_to_seed[unique_partition_index]; + } + else if (bmd.m_num_partitions == 3) + { + const uint32_t unique_partition_index = decoder.decode_truncated_binary(NUM_UNIQUE_PARTITIONS3); + log_blk.m_partition_id = (uint16_t)g_part3_unique_index_to_seed[unique_partition_index]; + } + + bool status = decode_values(decoder, num_endpoint_values * bmd.m_num_partitions, bmd.m_endpoint_ise_range, log_blk.m_endpoints); + if (!status) + return false; + + const uint32_t total_grid_weights = bmd.m_grid_x * bmd.m_grid_y * (bmd.m_dp ? 2 : 1); + + status = decode_values(decoder, total_grid_weights, bmd.m_weight_ise_range, log_blk.m_weights); + if (!status) + return false; + + astc_helpers::log_astc_block decomp_blk; + decomp_blk.clear(); + decomp_blk.m_dual_plane = bmd.m_dp; + decomp_blk.m_color_component_selector = (uint8_t)bmd.m_dp_channel; + decomp_blk.m_partition_id = log_blk.m_partition_id; + + decomp_blk.m_num_partitions = (uint8_t)bmd.m_num_partitions; + + for (uint32_t p = 0; p < bmd.m_num_partitions; p++) + decomp_blk.m_color_endpoint_modes[p] = (uint8_t)bmd.m_cem; + + decomp_blk.m_endpoint_ise_range = (uint8_t)bmd.m_transcode_endpoint_ise_range; + decomp_blk.m_weight_ise_range = (uint8_t)bmd.m_transcode_weight_ise_range; + + for (uint32_t p = 0; p < bmd.m_num_partitions; p++) + requantize_ise_endpoints(bmd.m_cem, bmd.m_endpoint_ise_range, log_blk.m_endpoints + num_endpoint_values * p, bmd.m_transcode_endpoint_ise_range, decomp_blk.m_endpoints + num_endpoint_values * p); + + uint8_t transcode_weights[BLOCK_W * BLOCK_H * 2]; + requantize_astc_weights(total_grid_weights, log_blk.m_weights, bmd.m_weight_ise_range, transcode_weights, bmd.m_transcode_weight_ise_range); + + copy_weight_grid(bmd.m_dp, bmd.m_grid_x, bmd.m_grid_y, transcode_weights, decomp_blk); + + status = astc_helpers::pack_astc_block(phys_blk, decomp_blk); + if (!status) + return false; + + cur_bx++; + if (cur_bx == num_blocks_x) + { + cur_bx = 0; + cur_by++; + cur_row_index = (cur_row_index + 1) % REUSE_MAX_BUFFER_ROWS; + } + + break; + } + default: + { + assert(0); + return false; + } + } + + break; + } + default: + { + assert(0); + return false; + } + } + } + + if (decoder.get_bits(16) != 0xA742) + { + //fmt_error_printf("End marker not found!\n"); + return false; + } + + //fmt_printf("Total decode_file() time: {} secs\n", tm.get_elapsed_secs()); + + return true; + } + + } // namespace astc_6x6_hdr + +#endif // BASISD_SUPPORT_UASTC_HDR + +} // namespace basist diff --git a/thirdparty/basisu/transcoder/basisu_transcoder.h b/thirdparty/basisu/transcoder/basisu_transcoder.h new file mode 100644 index 000000000..92d1526c5 --- /dev/null +++ b/thirdparty/basisu/transcoder/basisu_transcoder.h @@ -0,0 +1,1192 @@ +// basisu_transcoder.h +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +// By default KTX2 support is enabled to simplify compilation. This implies the need for the Zstandard library (which we distribute as a single source file in the "zstd" directory) by default. +// Set BASISD_SUPPORT_KTX2 to 0 to completely disable KTX2 support as well as Zstd/miniz usage which is only required for UASTC supercompression in KTX2 files. +// Also see BASISD_SUPPORT_KTX2_ZSTD in basisu_transcoder.cpp, which individually disables Zstd usage. +#ifndef BASISD_SUPPORT_KTX2 + #define BASISD_SUPPORT_KTX2 1 +#endif + +// Set BASISD_SUPPORT_KTX2_ZSTD to 0 to disable Zstd usage and KTX2 UASTC Zstd supercompression support +#ifndef BASISD_SUPPORT_KTX2_ZSTD + #define BASISD_SUPPORT_KTX2_ZSTD 1 +#endif + +// Set BASISU_FORCE_DEVEL_MESSAGES to 1 to enable debug printf()'s whenever an error occurs, for easier debugging during development. +#ifndef BASISU_FORCE_DEVEL_MESSAGES + // TODO - disable before checking in + #define BASISU_FORCE_DEVEL_MESSAGES 0 +#endif + +#include "basisu_transcoder_internal.h" +#include "basisu_transcoder_uastc.h" +#include "basisu_file_headers.h" + +namespace basist +{ + // High-level composite texture formats supported by the transcoder. + // Each of these texture formats directly correspond to OpenGL/D3D/Vulkan etc. texture formats. + // Notes: + // - If you specify a texture format that supports alpha, but the .basis file doesn't have alpha, the transcoder will automatically output a + // fully opaque (255) alpha channel. + // - The PVRTC1 texture formats only support power of 2 dimension .basis files, but this may be relaxed in a future version. + // - The PVRTC1 transcoders are real-time encoders, so don't expect the highest quality. We may add a slower encoder with improved quality. + // - These enums must be kept in sync with Javascript code that calls the transcoder. + enum class transcoder_texture_format + { + // Compressed formats + + // ETC1-2 + cTFETC1_RGB = 0, // Opaque only, returns RGB or alpha data if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified + cTFETC2_RGBA = 1, // Opaque+alpha, ETC2_EAC_A8 block followed by a ETC1 block, alpha channel will be opaque for opaque .basis files + + // BC1-5, BC7 (desktop, some mobile devices) + cTFBC1_RGB = 2, // Opaque only, no punchthrough alpha support yet, transcodes alpha slice if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified + cTFBC3_RGBA = 3, // Opaque+alpha, BC4 followed by a BC1 block, alpha channel will be opaque for opaque .basis files + cTFBC4_R = 4, // Red only, alpha slice is transcoded to output if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified + cTFBC5_RG = 5, // XY: Two BC4 blocks, X=R and Y=Alpha, .basis file should have alpha data (if not Y will be all 255's) + cTFBC7_RGBA = 6, // RGB or RGBA, mode 5 for ETC1S, modes (1,2,3,5,6,7) for UASTC + + // PVRTC1 4bpp (mobile, PowerVR devices) + cTFPVRTC1_4_RGB = 8, // Opaque only, RGB or alpha if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified, nearly lowest quality of any texture format. + cTFPVRTC1_4_RGBA = 9, // Opaque+alpha, most useful for simple opacity maps. If .basis file doesn't have alpha cTFPVRTC1_4_RGB will be used instead. Lowest quality of any supported texture format. + + // ASTC (mobile, Intel devices, hopefully all desktop GPU's one day) + cTFASTC_4x4_RGBA = 10, // LDR. Opaque+alpha, ASTC 4x4, alpha channel will be opaque for opaque .basis files. + // LDR: Transcoder uses RGB/RGBA/L/LA modes, void extent, and up to two ([0,47] and [0,255]) endpoint precisions. + + // ATC (mobile, Adreno devices, this is a niche format) + cTFATC_RGB = 11, // Opaque, RGB or alpha if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified. ATI ATC (GL_ATC_RGB_AMD) + cTFATC_RGBA = 12, // Opaque+alpha, alpha channel will be opaque for opaque .basis files. ATI ATC (GL_ATC_RGBA_INTERPOLATED_ALPHA_AMD) + + // FXT1 (desktop, Intel devices, this is a super obscure format) + cTFFXT1_RGB = 17, // Opaque only, uses exclusively CC_MIXED blocks. Notable for having a 8x4 block size. GL_3DFX_texture_compression_FXT1 is supported on Intel integrated GPU's (such as HD 630). + // Punch-through alpha is relatively easy to support, but full alpha is harder. This format is only here for completeness so opaque-only is fine for now. + // See the BASISU_USE_ORIGINAL_3DFX_FXT1_ENCODING macro in basisu_transcoder_internal.h. + + cTFPVRTC2_4_RGB = 18, // Opaque-only, almost BC1 quality, much faster to transcode and supports arbitrary texture dimensions (unlike PVRTC1 RGB). + cTFPVRTC2_4_RGBA = 19, // Opaque+alpha, slower to encode than cTFPVRTC2_4_RGB. Premultiplied alpha is highly recommended, otherwise the color channel can leak into the alpha channel on transparent blocks. + + cTFETC2_EAC_R11 = 20, // R only (ETC2 EAC R11 unsigned) + cTFETC2_EAC_RG11 = 21, // RG only (ETC2 EAC RG11 unsigned), R=opaque.r, G=alpha - for tangent space normal maps + + cTFBC6H = 22, // HDR, RGB only, unsigned + cTFASTC_HDR_4x4_RGBA = 23, // HDR, RGBA (currently UASTC HDR 4x4 encoders are only RGB), unsigned + + // Uncompressed (raw pixel) formats + // Note these uncompressed formats (RGBA32, 565, and 4444) can only be transcoded to from LDR input files (ETC1S or UASTC LDR). + cTFRGBA32 = 13, // 32bpp RGBA image stored in raster (not block) order in memory, R is first byte, A is last byte. + cTFRGB565 = 14, // 16bpp RGB image stored in raster (not block) order in memory, R at bit position 11 + cTFBGR565 = 15, // 16bpp RGB image stored in raster (not block) order in memory, R at bit position 0 + cTFRGBA4444 = 16, // 16bpp RGBA image stored in raster (not block) order in memory, R at bit position 12, A at bit position 0 + + // Note these uncompressed formats (HALF and 9E5) can only be transcoded to from HDR input files (UASTC HDR 4x4 or ASTC HDR 6x6). + cTFRGB_HALF = 24, // 48bpp RGB half (16-bits/component, 3 components) + cTFRGBA_HALF = 25, // 64bpp RGBA half (16-bits/component, 4 components) (A will always currently 1.0, UASTC_HDR doesn't support alpha) + cTFRGB_9E5 = 26, // 32bpp RGB 9E5 (shared exponent, positive only, see GL_EXT_texture_shared_exponent) + + cTFASTC_HDR_6x6_RGBA = 27, // HDR, RGBA (currently our ASTC HDR 6x6 encodes are only RGB), unsigned + + cTFTotalTextureFormats = 28, + + // ----- The following are old/legacy enums for compatibility with code compiled against previous versions + cTFETC1 = cTFETC1_RGB, + cTFETC2 = cTFETC2_RGBA, + cTFBC1 = cTFBC1_RGB, + cTFBC3 = cTFBC3_RGBA, + cTFBC4 = cTFBC4_R, + cTFBC5 = cTFBC5_RG, + + // Previously, the caller had some control over which BC7 mode the transcoder output. We've simplified this due to UASTC, which supports numerous modes. + cTFBC7_M6_RGB = cTFBC7_RGBA, // Opaque only, RGB or alpha if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified. Highest quality of all the non-ETC1 formats. + cTFBC7_M5_RGBA = cTFBC7_RGBA, // Opaque+alpha, alpha channel will be opaque for opaque .basis files + cTFBC7_M6_OPAQUE_ONLY = cTFBC7_RGBA, + cTFBC7_M5 = cTFBC7_RGBA, + cTFBC7_ALT = 7, + + cTFASTC_4x4 = cTFASTC_4x4_RGBA, + + cTFATC_RGBA_INTERPOLATED_ALPHA = cTFATC_RGBA, + }; + + // For compressed texture formats, this returns the # of bytes per block. For uncompressed, it returns the # of bytes per pixel. + // NOTE: Previously, this function was called basis_get_bytes_per_block(), and it always returned 16*bytes_per_pixel for uncompressed formats which was confusing. + uint32_t basis_get_bytes_per_block_or_pixel(transcoder_texture_format fmt); + + // Returns format's name in ASCII + const char* basis_get_format_name(transcoder_texture_format fmt); + + // Returns block format name in ASCII + const char* basis_get_block_format_name(block_format fmt); + + // Returns true if the format supports an alpha channel. + bool basis_transcoder_format_has_alpha(transcoder_texture_format fmt); + + // Returns true if the format is HDR. + bool basis_transcoder_format_is_hdr(transcoder_texture_format fmt); + + // Returns true if the format is LDR. + inline bool basis_transcoder_format_is_ldr(transcoder_texture_format fmt) { return !basis_transcoder_format_is_hdr(fmt); } + + // Returns the basisu::texture_format corresponding to the specified transcoder_texture_format. + basisu::texture_format basis_get_basisu_texture_format(transcoder_texture_format fmt); + + // Returns the texture type's name in ASCII. + const char* basis_get_texture_type_name(basis_texture_type tex_type); + + // Returns true if the transcoder texture type is an uncompressed (raw pixel) format. + bool basis_transcoder_format_is_uncompressed(transcoder_texture_format tex_type); + + // Returns the # of bytes per pixel for uncompressed formats, or 0 for block texture formats. + uint32_t basis_get_uncompressed_bytes_per_pixel(transcoder_texture_format fmt); + + // Returns the block width for the specified texture format, which is currently either 4 or 8 for FXT1. + uint32_t basis_get_block_width(transcoder_texture_format tex_type); + + // Returns the block height for the specified texture format, which is currently always 4. + uint32_t basis_get_block_height(transcoder_texture_format tex_type); + + // Returns true if the specified format was enabled at compile time, and is supported for the specific basis/ktx2 texture format (ETC1S, UASTC, or UASTC HDR). + bool basis_is_format_supported(transcoder_texture_format tex_type, basis_tex_format fmt = basis_tex_format::cETC1S); + + // Returns the block width/height for the specified basis texture file format. + uint32_t basis_tex_format_get_block_width(basis_tex_format fmt); + uint32_t basis_tex_format_get_block_height(basis_tex_format fmt); + + bool basis_tex_format_is_hdr(basis_tex_format fmt); + inline bool basis_tex_format_is_ldr(basis_tex_format fmt) { return !basis_tex_format_is_hdr(fmt); } + + // Validates that the output buffer is large enough to hold the entire transcoded texture. + // For uncompressed texture formats, most input parameters are in pixels, not blocks. Blocks are 4x4 pixels. + bool basis_validate_output_buffer_size(transcoder_texture_format target_format, + uint32_t output_blocks_buf_size_in_blocks_or_pixels, + uint32_t orig_width, uint32_t orig_height, + uint32_t output_row_pitch_in_blocks_or_pixels, + uint32_t output_rows_in_pixels); + + // Computes the size in bytes of a transcoded image or texture, taking into account the format's block width/height and any minimum size PVRTC1 requirements required by OpenGL. + // Note the returned value is not necessarily the # of bytes a transcoder could write to the output buffer due to these minimum PVRTC1 requirements. + // (These PVRTC1 requirements are not ours, but OpenGL's.) + uint32_t basis_compute_transcoded_image_size_in_bytes(transcoder_texture_format target_format, uint32_t orig_width, uint32_t orig_height); + + class basisu_transcoder; + + // This struct holds all state used during transcoding. For video, it needs to persist between image transcodes (it holds the previous frame). + // For threading you can use one state per thread. + struct basisu_transcoder_state + { + struct block_preds + { + uint16_t m_endpoint_index; + uint8_t m_pred_bits; + }; + + basisu::vector m_block_endpoint_preds[2]; + + enum { cMaxPrevFrameLevels = 16 }; + basisu::vector m_prev_frame_indices[2][cMaxPrevFrameLevels]; // [alpha_flag][level_index] + + void clear() + { + for (uint32_t i = 0; i < 2; i++) + { + m_block_endpoint_preds[i].clear(); + + for (uint32_t j = 0; j < cMaxPrevFrameLevels; j++) + m_prev_frame_indices[i][j].clear(); + } + } + }; + + // Low-level helper classes that do the actual transcoding. + + // ETC1S + class basisu_lowlevel_etc1s_transcoder + { + friend class basisu_transcoder; + + public: + basisu_lowlevel_etc1s_transcoder(); + + void set_global_codebooks(const basisu_lowlevel_etc1s_transcoder* pGlobal_codebook) { m_pGlobal_codebook = pGlobal_codebook; } + const basisu_lowlevel_etc1s_transcoder* get_global_codebooks() const { return m_pGlobal_codebook; } + + bool decode_palettes( + uint32_t num_endpoints, const uint8_t* pEndpoints_data, uint32_t endpoints_data_size, + uint32_t num_selectors, const uint8_t* pSelectors_data, uint32_t selectors_data_size); + + bool decode_tables(const uint8_t* pTable_data, uint32_t table_data_size); + + bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const bool is_video, const bool is_alpha_slice, const uint32_t level_index, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, bool astc_transcode_alpha = false, void* pAlpha_blocks = nullptr, uint32_t output_rows_in_pixels = 0, uint32_t decode_flags = 0); + + bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const basis_file_header& header, const basis_slice_desc& slice_desc, uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, bool astc_transcode_alpha = false, void* pAlpha_blocks = nullptr, uint32_t output_rows_in_pixels = 0, uint32_t decode_flags = 0) + { + return transcode_slice(pDst_blocks, num_blocks_x, num_blocks_y, pImage_data, image_data_size, fmt, output_block_or_pixel_stride_in_bytes, bc1_allow_threecolor_blocks, + header.m_tex_type == cBASISTexTypeVideoFrames, (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0, slice_desc.m_level_index, + slice_desc.m_orig_width, slice_desc.m_orig_height, output_row_pitch_in_blocks_or_pixels, pState, + astc_transcode_alpha, + pAlpha_blocks, + output_rows_in_pixels, decode_flags); + } + + // Container independent transcoding + bool transcode_image( + transcoder_texture_format target_format, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + const uint8_t* pCompressed_data, uint32_t compressed_data_length, + uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index, + uint64_t rgb_offset, uint32_t rgb_length, uint64_t alpha_offset, uint32_t alpha_length, + uint32_t decode_flags = 0, + bool basis_file_has_alpha_slices = false, + bool is_video = false, + uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, + uint32_t output_rows_in_pixels = 0); + + void clear() + { + m_local_endpoints.clear(); + m_local_selectors.clear(); + m_endpoint_pred_model.clear(); + m_delta_endpoint_model.clear(); + m_selector_model.clear(); + m_selector_history_buf_rle_model.clear(); + m_selector_history_buf_size = 0; + } + + // Low-level methods + typedef basisu::vector endpoint_vec; + const endpoint_vec& get_endpoints() const { return m_local_endpoints; } + + typedef basisu::vector selector_vec; + const selector_vec& get_selectors() const { return m_local_selectors; } + + private: + const basisu_lowlevel_etc1s_transcoder* m_pGlobal_codebook; + + endpoint_vec m_local_endpoints; + selector_vec m_local_selectors; + + huffman_decoding_table m_endpoint_pred_model, m_delta_endpoint_model, m_selector_model, m_selector_history_buf_rle_model; + + uint32_t m_selector_history_buf_size; + + basisu_transcoder_state m_def_state; + }; + + enum basisu_decode_flags + { + // PVRTC1: decode non-pow2 ETC1S texture level to the next larger power of 2 (not implemented yet, but we're going to support it). Ignored if the slice's dimensions are already a power of 2. + cDecodeFlagsPVRTCDecodeToNextPow2 = 2, + + // When decoding to an opaque texture format, if the basis file has alpha, decode the alpha slice instead of the color slice to the output texture format. + // This is primarily to allow decoding of textures with alpha to multiple ETC1 textures (one for color, another for alpha). + cDecodeFlagsTranscodeAlphaDataToOpaqueFormats = 4, + + // Forbid usage of BC1 3 color blocks (we don't support BC1 punchthrough alpha yet). + // This flag is used internally when decoding to BC3. + cDecodeFlagsBC1ForbidThreeColorBlocks = 8, + + // The output buffer contains alpha endpoint/selector indices. + // Used internally when decoding formats like ASTC that require both color and alpha data to be available when transcoding to the output format. + cDecodeFlagsOutputHasAlphaIndices = 16, + + cDecodeFlagsHighQuality = 32, + + cDecodeFlagsNoETC1SChromaFiltering = 64 + }; + + // UASTC LDR 4x4 + class basisu_lowlevel_uastc_ldr_4x4_transcoder + { + friend class basisu_transcoder; + + public: + basisu_lowlevel_uastc_ldr_4x4_transcoder(); + + bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, uint32_t decode_flags = 0); + + bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const basis_file_header& header, const basis_slice_desc& slice_desc, uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, uint32_t decode_flags = 0) + { + return transcode_slice(pDst_blocks, num_blocks_x, num_blocks_y, pImage_data, image_data_size, fmt, + output_block_or_pixel_stride_in_bytes, bc1_allow_threecolor_blocks, (header.m_flags & cBASISHeaderFlagHasAlphaSlices) != 0, slice_desc.m_orig_width, slice_desc.m_orig_height, output_row_pitch_in_blocks_or_pixels, + pState, output_rows_in_pixels, channel0, channel1, decode_flags); + } + + // Container independent transcoding + bool transcode_image( + transcoder_texture_format target_format, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + const uint8_t* pCompressed_data, uint32_t compressed_data_length, + uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index, + uint64_t slice_offset, uint32_t slice_length, + uint32_t decode_flags = 0, + bool has_alpha = false, + bool is_video = false, + uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, + uint32_t output_rows_in_pixels = 0, + int channel0 = -1, int channel1 = -1); + }; + + // UASTC HDR 4x4 + class basisu_lowlevel_uastc_hdr_4x4_transcoder + { + friend class basisu_transcoder; + + public: + basisu_lowlevel_uastc_hdr_4x4_transcoder(); + + bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, uint32_t decode_flags = 0); + + bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const basis_file_header& header, const basis_slice_desc& slice_desc, uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, uint32_t decode_flags = 0) + { + return transcode_slice(pDst_blocks, num_blocks_x, num_blocks_y, pImage_data, image_data_size, fmt, + output_block_or_pixel_stride_in_bytes, bc1_allow_threecolor_blocks, (header.m_flags & cBASISHeaderFlagHasAlphaSlices) != 0, slice_desc.m_orig_width, slice_desc.m_orig_height, output_row_pitch_in_blocks_or_pixels, + pState, output_rows_in_pixels, channel0, channel1, decode_flags); + } + + // Container independent transcoding + bool transcode_image( + transcoder_texture_format target_format, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + const uint8_t* pCompressed_data, uint32_t compressed_data_length, + uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index, + uint64_t slice_offset, uint32_t slice_length, + uint32_t decode_flags = 0, + bool has_alpha = false, + bool is_video = false, + uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, + uint32_t output_rows_in_pixels = 0, + int channel0 = -1, int channel1 = -1); + }; + + // ASTC HDR 6x6 + class basisu_lowlevel_astc_hdr_6x6_transcoder + { + friend class basisu_transcoder; + + public: + basisu_lowlevel_astc_hdr_6x6_transcoder(); + + bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, uint32_t decode_flags = 0); + + bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const basis_file_header& header, const basis_slice_desc& slice_desc, uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, uint32_t decode_flags = 0) + { + return transcode_slice(pDst_blocks, num_blocks_x, num_blocks_y, pImage_data, image_data_size, fmt, + output_block_or_pixel_stride_in_bytes, bc1_allow_threecolor_blocks, (header.m_flags & cBASISHeaderFlagHasAlphaSlices) != 0, slice_desc.m_orig_width, slice_desc.m_orig_height, output_row_pitch_in_blocks_or_pixels, + pState, output_rows_in_pixels, channel0, channel1, decode_flags); + } + + // Container independent transcoding + bool transcode_image( + transcoder_texture_format target_format, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + const uint8_t* pCompressed_data, uint32_t compressed_data_length, + uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index, + uint64_t slice_offset, uint32_t slice_length, + uint32_t decode_flags = 0, + bool has_alpha = false, + bool is_video = false, + uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, + uint32_t output_rows_in_pixels = 0, + int channel0 = -1, int channel1 = -1); + }; + + // ASTC HDR 6x6 intermediate + class basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder + { + friend class basisu_transcoder; + + public: + basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder(); + + bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, uint32_t decode_flags = 0); + + bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const basis_file_header& header, const basis_slice_desc& slice_desc, uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, uint32_t decode_flags = 0) + { + return transcode_slice(pDst_blocks, num_blocks_x, num_blocks_y, pImage_data, image_data_size, fmt, + output_block_or_pixel_stride_in_bytes, bc1_allow_threecolor_blocks, (header.m_flags & cBASISHeaderFlagHasAlphaSlices) != 0, slice_desc.m_orig_width, slice_desc.m_orig_height, output_row_pitch_in_blocks_or_pixels, + pState, output_rows_in_pixels, channel0, channel1, decode_flags); + } + + // Container independent transcoding + bool transcode_image( + transcoder_texture_format target_format, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + const uint8_t* pCompressed_data, uint32_t compressed_data_length, + uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index, + uint64_t slice_offset, uint32_t slice_length, + uint32_t decode_flags = 0, + bool has_alpha = false, + bool is_video = false, + uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, + uint32_t output_rows_in_pixels = 0, + int channel0 = -1, int channel1 = -1); + }; + + struct basisu_slice_info + { + uint32_t m_orig_width; + uint32_t m_orig_height; + + uint32_t m_width; + uint32_t m_height; + + uint32_t m_num_blocks_x; + uint32_t m_num_blocks_y; + uint32_t m_total_blocks; + + uint32_t m_block_width; + uint32_t m_block_height; + + uint32_t m_compressed_size; + + uint32_t m_slice_index; // the slice index in the .basis file + uint32_t m_image_index; // the source image index originally provided to the encoder + uint32_t m_level_index; // the mipmap level within this image + + uint32_t m_unpacked_slice_crc16; + + bool m_alpha_flag; // true if the slice has alpha data + bool m_iframe_flag; // true if the slice is an I-Frame + }; + + typedef basisu::vector basisu_slice_info_vec; + + struct basisu_image_info + { + uint32_t m_image_index; + uint32_t m_total_levels; + + uint32_t m_orig_width; + uint32_t m_orig_height; + + uint32_t m_width; + uint32_t m_height; + + uint32_t m_block_width; + uint32_t m_block_height; + + uint32_t m_num_blocks_x; + uint32_t m_num_blocks_y; + uint32_t m_total_blocks; + + uint32_t m_first_slice_index; + + bool m_alpha_flag; // true if the image has alpha data + bool m_iframe_flag; // true if the image is an I-Frame + }; + + struct basisu_image_level_info + { + uint32_t m_image_index; + uint32_t m_level_index; + + uint32_t m_orig_width; + uint32_t m_orig_height; + + uint32_t m_width; + uint32_t m_height; + + uint32_t m_block_width; + uint32_t m_block_height; + + uint32_t m_num_blocks_x; + uint32_t m_num_blocks_y; + uint32_t m_total_blocks; + + uint32_t m_first_slice_index; + + uint32_t m_rgb_file_ofs; + uint32_t m_rgb_file_len; + uint32_t m_alpha_file_ofs; + uint32_t m_alpha_file_len; + + bool m_alpha_flag; // true if the image has alpha data + bool m_iframe_flag; // true if the image is an I-Frame + }; + + struct basisu_file_info + { + uint32_t m_version; + uint32_t m_total_header_size; + + uint32_t m_total_selectors; + // will be 0 for UASTC or if the file uses global codebooks + uint32_t m_selector_codebook_ofs; + uint32_t m_selector_codebook_size; + + uint32_t m_total_endpoints; + // will be 0 for UASTC or if the file uses global codebooks + uint32_t m_endpoint_codebook_ofs; + uint32_t m_endpoint_codebook_size; + + uint32_t m_tables_ofs; + uint32_t m_tables_size; + + uint32_t m_slices_size; + + basis_texture_type m_tex_type; + uint32_t m_us_per_frame; + + // Low-level slice information (1 slice per image for color-only basis files, 2 for alpha basis files) + basisu_slice_info_vec m_slice_info; + + uint32_t m_total_images; // total # of images + basisu::vector m_image_mipmap_levels; // the # of mipmap levels for each image + + uint32_t m_userdata0; + uint32_t m_userdata1; + + basis_tex_format m_tex_format; // ETC1S, UASTC, etc. + + uint32_t m_block_width; + uint32_t m_block_height; + + bool m_y_flipped; // true if the image was Y flipped + bool m_etc1s; // true if the file is ETC1S + bool m_has_alpha_slices; // true if the texture has alpha slices (for ETC1S: even slices RGB, odd slices alpha) + }; + + // High-level transcoder class which accepts .basis file data and allows the caller to query information about the file and transcode image levels to various texture formats. + // If you're just starting out this is the class you care about. + class basisu_transcoder + { + basisu_transcoder(basisu_transcoder&); + basisu_transcoder& operator= (const basisu_transcoder&); + + public: + basisu_transcoder(); + + // Validates the .basis file. This computes a crc16 over the entire file, so it's slow. + bool validate_file_checksums(const void* pData, uint32_t data_size, bool full_validation) const; + + // Quick header validation - no crc16 checks. + bool validate_header(const void* pData, uint32_t data_size) const; + + basis_texture_type get_texture_type(const void* pData, uint32_t data_size) const; + bool get_userdata(const void* pData, uint32_t data_size, uint32_t& userdata0, uint32_t& userdata1) const; + + // Returns the total number of images in the basis file (always 1 or more). + // Note that the number of mipmap levels for each image may differ, and that images may have different resolutions. + uint32_t get_total_images(const void* pData, uint32_t data_size) const; + + basis_tex_format get_basis_tex_format(const void* pData, uint32_t data_size) const; + + // Returns the number of mipmap levels in an image. + uint32_t get_total_image_levels(const void* pData, uint32_t data_size, uint32_t image_index) const; + + // Returns basic information about an image. Note that orig_width/orig_height may not be a multiple of 4. + bool get_image_level_desc(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index, uint32_t& orig_width, uint32_t& orig_height, uint32_t& total_blocks) const; + + // Returns information about the specified image. + bool get_image_info(const void* pData, uint32_t data_size, basisu_image_info& image_info, uint32_t image_index) const; + + // Returns information about the specified image's mipmap level. + bool get_image_level_info(const void* pData, uint32_t data_size, basisu_image_level_info& level_info, uint32_t image_index, uint32_t level_index) const; + + // Get a description of the basis file and low-level information about each slice. + bool get_file_info(const void* pData, uint32_t data_size, basisu_file_info& file_info) const; + + // start_transcoding() must be called before calling transcode_slice() or transcode_image_level(). + // For ETC1S files, this call decompresses the selector/endpoint codebooks, so ideally you would only call this once per .basis file (not each image/mipmap level). + bool start_transcoding(const void* pData, uint32_t data_size); + + bool stop_transcoding(); + + // Returns true if start_transcoding() has been called. + bool get_ready_to_transcode() const { return m_ready_to_transcode; } + + // transcode_image_level() decodes a single mipmap level from the .basis file to any of the supported output texture formats. + // It'll first find the slice(s) to transcode, then call transcode_slice() one or two times to decode both the color and alpha texture data (or RG texture data from two slices for BC5). + // If the .basis file doesn't have alpha slices, the output alpha blocks will be set to fully opaque (all 255's). + // Currently, to decode to PVRTC1 the basis texture's dimensions in pixels must be a power of 2, due to PVRTC1 format requirements. + // output_blocks_buf_size_in_blocks_or_pixels should be at least the image level's total_blocks (num_blocks_x * num_blocks_y), or the total number of output pixels if fmt==cTFRGBA32 etc. + // output_row_pitch_in_blocks_or_pixels: Number of blocks or pixels per row. If 0, the transcoder uses the slice's num_blocks_x or orig_width (NOT num_blocks_x * 4). Ignored for PVRTC1 (due to texture swizzling). + // output_rows_in_pixels: Ignored unless fmt is uncompressed (cRGBA32, etc.). The total number of output rows in the output buffer. If 0, the transcoder assumes the slice's orig_height (NOT num_blocks_y * 4). + // Notes: + // - basisu_transcoder_init() must have been called first to initialize the transcoder lookup tables before calling this function. + // - This method assumes the output texture buffer is readable. In some cases to handle alpha, the transcoder will write temporary data to the output texture in + // a first pass, which will be read in a second pass. + bool transcode_image_level( + const void* pData, uint32_t data_size, + uint32_t image_index, uint32_t level_index, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + transcoder_texture_format fmt, + uint32_t decode_flags = 0, uint32_t output_row_pitch_in_blocks_or_pixels = 0, basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0) const; + + // Finds the basis slice corresponding to the specified image/level/alpha params, or -1 if the slice can't be found. + int find_slice(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index, bool alpha_data) const; + + // transcode_slice() decodes a single slice from the .basis file. It's a low-level API - most likely you want to use transcode_image_level(). + // This is a low-level API, and will be needed to be called multiple times to decode some texture formats (like BC3, BC5, or ETC2). + // output_blocks_buf_size_in_blocks_or_pixels is just used for verification to make sure the output buffer is large enough. + // output_blocks_buf_size_in_blocks_or_pixels should be at least the image level's total_blocks (num_blocks_x * num_blocks_y), or the total number of output pixels if fmt==cTFRGBA32. + // output_block_stride_in_bytes: Number of bytes between each output block. + // output_row_pitch_in_blocks_or_pixels: Number of blocks or pixels per row. If 0, the transcoder uses the slice's num_blocks_x or orig_width (NOT num_blocks_x * 4). Ignored for PVRTC1 (due to texture swizzling). + // output_rows_in_pixels: Ignored unless fmt is cRGBA32. The total number of output rows in the output buffer. If 0, the transcoder assumes the slice's orig_height (NOT num_blocks_y * 4). + // Notes: + // - basisu_transcoder_init() must have been called first to initialize the transcoder lookup tables before calling this function. + bool transcode_slice(const void* pData, uint32_t data_size, uint32_t slice_index, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + block_format fmt, uint32_t output_block_stride_in_bytes, uint32_t decode_flags = 0, uint32_t output_row_pitch_in_blocks_or_pixels = 0, basisu_transcoder_state* pState = nullptr, void* pAlpha_blocks = nullptr, + uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1) const; + + static void write_opaque_alpha_blocks( + uint32_t num_blocks_x, uint32_t num_blocks_y, + void* pOutput_blocks, block_format fmt, + uint32_t block_stride_in_bytes, uint32_t output_row_pitch_in_blocks_or_pixels); + + void set_global_codebooks(const basisu_lowlevel_etc1s_transcoder* pGlobal_codebook) { m_lowlevel_etc1s_decoder.set_global_codebooks(pGlobal_codebook); } + const basisu_lowlevel_etc1s_transcoder* get_global_codebooks() const { return m_lowlevel_etc1s_decoder.get_global_codebooks(); } + + const basisu_lowlevel_etc1s_transcoder& get_lowlevel_etc1s_decoder() const { return m_lowlevel_etc1s_decoder; } + basisu_lowlevel_etc1s_transcoder& get_lowlevel_etc1s_decoder() { return m_lowlevel_etc1s_decoder; } + + const basisu_lowlevel_uastc_ldr_4x4_transcoder& get_lowlevel_uastc_decoder() const { return m_lowlevel_uastc_decoder; } + basisu_lowlevel_uastc_ldr_4x4_transcoder& get_lowlevel_uastc_decoder() { return m_lowlevel_uastc_decoder; } + + private: + mutable basisu_lowlevel_etc1s_transcoder m_lowlevel_etc1s_decoder; + mutable basisu_lowlevel_uastc_ldr_4x4_transcoder m_lowlevel_uastc_decoder; + mutable basisu_lowlevel_uastc_hdr_4x4_transcoder m_lowlevel_uastc_4x4_hdr_decoder; + mutable basisu_lowlevel_astc_hdr_6x6_transcoder m_lowlevel_astc_6x6_hdr_decoder; + mutable basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder m_lowlevel_astc_6x6_hdr_intermediate_decoder; + + bool m_ready_to_transcode; + + int find_first_slice_index(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index) const; + + bool validate_header_quick(const void* pData, uint32_t data_size) const; + }; + + // basisu_transcoder_init() MUST be called before a .basis file can be transcoded. + void basisu_transcoder_init(); + + enum debug_flags_t + { + cDebugFlagVisCRs = 1, + cDebugFlagVisBC1Sels = 2, + cDebugFlagVisBC1Endpoints = 4 + }; + uint32_t get_debug_flags(); + void set_debug_flags(uint32_t f); + + // ------------------------------------------------------------------------------------------------------ + // Optional .KTX2 file format support + // KTX2 reading optionally requires miniz or Zstd decompressors for supercompressed UASTC files. + // ------------------------------------------------------------------------------------------------------ +#if BASISD_SUPPORT_KTX2 +#pragma pack(push) +#pragma pack(1) + struct ktx2_header + { + uint8_t m_identifier[12]; + basisu::packed_uint<4> m_vk_format; + basisu::packed_uint<4> m_type_size; + basisu::packed_uint<4> m_pixel_width; + basisu::packed_uint<4> m_pixel_height; + basisu::packed_uint<4> m_pixel_depth; + basisu::packed_uint<4> m_layer_count; + basisu::packed_uint<4> m_face_count; + basisu::packed_uint<4> m_level_count; + basisu::packed_uint<4> m_supercompression_scheme; + basisu::packed_uint<4> m_dfd_byte_offset; + basisu::packed_uint<4> m_dfd_byte_length; + basisu::packed_uint<4> m_kvd_byte_offset; + basisu::packed_uint<4> m_kvd_byte_length; + basisu::packed_uint<8> m_sgd_byte_offset; + basisu::packed_uint<8> m_sgd_byte_length; + }; + + struct ktx2_level_index + { + basisu::packed_uint<8> m_byte_offset; + basisu::packed_uint<8> m_byte_length; + basisu::packed_uint<8> m_uncompressed_byte_length; + }; + + struct ktx2_etc1s_global_data_header + { + basisu::packed_uint<2> m_endpoint_count; + basisu::packed_uint<2> m_selector_count; + basisu::packed_uint<4> m_endpoints_byte_length; + basisu::packed_uint<4> m_selectors_byte_length; + basisu::packed_uint<4> m_tables_byte_length; + basisu::packed_uint<4> m_extended_byte_length; + }; + + struct ktx2_etc1s_image_desc + { + basisu::packed_uint<4> m_image_flags; + basisu::packed_uint<4> m_rgb_slice_byte_offset; + basisu::packed_uint<4> m_rgb_slice_byte_length; + basisu::packed_uint<4> m_alpha_slice_byte_offset; + basisu::packed_uint<4> m_alpha_slice_byte_length; + }; + + struct ktx2_astc_hdr_6x6_intermediate_image_desc + { + basisu::packed_uint<4> m_rgb_slice_byte_offset; + basisu::packed_uint<4> m_rgb_slice_byte_length; + }; + + struct ktx2_animdata + { + basisu::packed_uint<4> m_duration; + basisu::packed_uint<4> m_timescale; + basisu::packed_uint<4> m_loopcount; + }; +#pragma pack(pop) + + const uint32_t KTX2_VK_FORMAT_UNDEFINED = 0; + + // These are standard Vulkan texture VkFormat ID's, see https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkFormat.html + const uint32_t KTX2_FORMAT_ASTC_4x4_SFLOAT_BLOCK = 1000066000; + const uint32_t KTX2_FORMAT_ASTC_5x4_SFLOAT_BLOCK = 1000066001; + const uint32_t KTX2_FORMAT_ASTC_5x5_SFLOAT_BLOCK = 1000066002; + const uint32_t KTX2_FORMAT_ASTC_6x5_SFLOAT_BLOCK = 1000066003; + const uint32_t KTX2_FORMAT_ASTC_6x6_SFLOAT_BLOCK = 1000066004; + const uint32_t KTX2_FORMAT_ASTC_8x5_SFLOAT_BLOCK = 1000066005; + const uint32_t KTX2_FORMAT_ASTC_8x6_SFLOAT_BLOCK = 1000066006; + + const uint32_t KTX2_KDF_DF_MODEL_ASTC = 162; // 0xA2 + const uint32_t KTX2_KDF_DF_MODEL_ETC1S = 163; // 0xA3 + const uint32_t KTX2_KDF_DF_MODEL_UASTC_LDR_4X4 = 166; // 0xA6 + const uint32_t KTX2_KDF_DF_MODEL_UASTC_HDR_4X4 = 167; // 0xA7 + const uint32_t KTX2_KDF_DF_MODEL_ASTC_HDR_6X6_INTERMEDIATE = 168; // 0xA8, TODO - coordinate with Khronos on this + + const uint32_t KTX2_IMAGE_IS_P_FRAME = 2; + const uint32_t KTX2_UASTC_BLOCK_SIZE = 16; // also the block size for UASTC_HDR + const uint32_t KTX2_MAX_SUPPORTED_LEVEL_COUNT = 16; // this is an implementation specific constraint and can be increased + + // The KTX2 transfer functions supported by KTX2 + const uint32_t KTX2_KHR_DF_TRANSFER_LINEAR = 1; + const uint32_t KTX2_KHR_DF_TRANSFER_SRGB = 2; + + enum ktx2_supercompression + { + KTX2_SS_NONE = 0, + KTX2_SS_BASISLZ = 1, + KTX2_SS_ZSTANDARD = 2, + KTX2_SS_BASIS + }; + + extern const uint8_t g_ktx2_file_identifier[12]; + + enum ktx2_df_channel_id + { + KTX2_DF_CHANNEL_ETC1S_RGB = 0U, + KTX2_DF_CHANNEL_ETC1S_RRR = 3U, + KTX2_DF_CHANNEL_ETC1S_GGG = 4U, + KTX2_DF_CHANNEL_ETC1S_AAA = 15U, + + KTX2_DF_CHANNEL_UASTC_DATA = 0U, + KTX2_DF_CHANNEL_UASTC_RGB = 0U, + KTX2_DF_CHANNEL_UASTC_RGBA = 3U, + KTX2_DF_CHANNEL_UASTC_RRR = 4U, + KTX2_DF_CHANNEL_UASTC_RRRG = 5U, + KTX2_DF_CHANNEL_UASTC_RG = 6U, + }; + + inline const char* ktx2_get_etc1s_df_channel_id_str(ktx2_df_channel_id id) + { + switch (id) + { + case KTX2_DF_CHANNEL_ETC1S_RGB: return "RGB"; + case KTX2_DF_CHANNEL_ETC1S_RRR: return "RRR"; + case KTX2_DF_CHANNEL_ETC1S_GGG: return "GGG"; + case KTX2_DF_CHANNEL_ETC1S_AAA: return "AAA"; + default: break; + } + return "?"; + } + + inline const char* ktx2_get_uastc_df_channel_id_str(ktx2_df_channel_id id) + { + switch (id) + { + case KTX2_DF_CHANNEL_UASTC_RGB: return "RGB"; + case KTX2_DF_CHANNEL_UASTC_RGBA: return "RGBA"; + case KTX2_DF_CHANNEL_UASTC_RRR: return "RRR"; + case KTX2_DF_CHANNEL_UASTC_RRRG: return "RRRG"; + case KTX2_DF_CHANNEL_UASTC_RG: return "RG"; + default: break; + } + return "?"; + } + + enum ktx2_df_color_primaries + { + KTX2_DF_PRIMARIES_UNSPECIFIED = 0, + KTX2_DF_PRIMARIES_BT709 = 1, + KTX2_DF_PRIMARIES_SRGB = 1, + KTX2_DF_PRIMARIES_BT601_EBU = 2, + KTX2_DF_PRIMARIES_BT601_SMPTE = 3, + KTX2_DF_PRIMARIES_BT2020 = 4, + KTX2_DF_PRIMARIES_CIEXYZ = 5, + KTX2_DF_PRIMARIES_ACES = 6, + KTX2_DF_PRIMARIES_ACESCC = 7, + KTX2_DF_PRIMARIES_NTSC1953 = 8, + KTX2_DF_PRIMARIES_PAL525 = 9, + KTX2_DF_PRIMARIES_DISPLAYP3 = 10, + KTX2_DF_PRIMARIES_ADOBERGB = 11 + }; + + inline const char* ktx2_get_df_color_primaries_str(ktx2_df_color_primaries p) + { + switch (p) + { + case KTX2_DF_PRIMARIES_UNSPECIFIED: return "UNSPECIFIED"; + case KTX2_DF_PRIMARIES_BT709: return "BT709"; + case KTX2_DF_PRIMARIES_BT601_EBU: return "EBU"; + case KTX2_DF_PRIMARIES_BT601_SMPTE: return "SMPTE"; + case KTX2_DF_PRIMARIES_BT2020: return "BT2020"; + case KTX2_DF_PRIMARIES_CIEXYZ: return "CIEXYZ"; + case KTX2_DF_PRIMARIES_ACES: return "ACES"; + case KTX2_DF_PRIMARIES_ACESCC: return "ACESCC"; + case KTX2_DF_PRIMARIES_NTSC1953: return "NTSC1953"; + case KTX2_DF_PRIMARIES_PAL525: return "PAL525"; + case KTX2_DF_PRIMARIES_DISPLAYP3: return "DISPLAYP3"; + case KTX2_DF_PRIMARIES_ADOBERGB: return "ADOBERGB"; + default: break; + } + return "?"; + } + + // Information about a single 2D texture "image" in a KTX2 file. + struct ktx2_image_level_info + { + // The mipmap level index (0=largest), texture array layer index, and cubemap face index of the image. + uint32_t m_level_index; + uint32_t m_layer_index; + uint32_t m_face_index; + + // The image's actual (or the original source image's) width/height in pixels, which may not be divisible by 4 pixels. + uint32_t m_orig_width; + uint32_t m_orig_height; + + // The image's physical width/height, which will always be divisible by 4 pixels. + uint32_t m_width; + uint32_t m_height; + + // The texture's dimensions in 4x4 or 6x6 texel blocks. + uint32_t m_num_blocks_x; + uint32_t m_num_blocks_y; + + // The format's block width/height (currently either 4 or 6). + uint32_t m_block_width; + uint32_t m_block_height; + + // The total number of blocks + uint32_t m_total_blocks; + + // true if the image has alpha data + bool m_alpha_flag; + + // true if the image is an I-Frame. Currently, for ETC1S textures, the first frame will always be an I-Frame, and subsequent frames will always be P-Frames. + bool m_iframe_flag; + }; + + // Thread-specific ETC1S/supercompressed UASTC transcoder state. (If you're not doing multithreading transcoding you can ignore this.) + struct ktx2_transcoder_state + { + basist::basisu_transcoder_state m_transcoder_state; + basisu::uint8_vec m_level_uncomp_data; + int m_uncomp_data_level_index; + + void clear() + { + m_transcoder_state.clear(); + m_level_uncomp_data.clear(); + m_uncomp_data_level_index = -1; + } + }; + + // This class is quite similar to basisu_transcoder. It treats KTX2 files as a simple container for ETC1S/UASTC texture data. + // It does not support 1D or 3D textures. + // It only supports 2D and cubemap textures, with or without mipmaps, texture arrays of 2D/cubemap textures, and texture video files. + // It only supports raw non-supercompressed UASTC, ETC1S, UASTC+Zstd, or UASTC+zlib compressed files. + // DFD (Data Format Descriptor) parsing is purposely as simple as possible. + // If you need to know how to interpret the texture channels you'll need to parse the DFD yourself after calling get_dfd(). + class ktx2_transcoder + { + public: + ktx2_transcoder(); + + // Frees all allocations, resets object. + void clear(); + + // init() parses the KTX2 header, level index array, DFD, and key values, but nothing else. + // Importantly, it does not parse or decompress the ETC1S global supercompressed data, so some things (like which frames are I/P-Frames) won't be available until start_transcoding() is called. + // This method holds a pointer to the file data until clear() is called. + bool init(const void* pData, uint32_t data_size); + + // Returns the data/size passed to init(). + const uint8_t* get_data() const { return m_pData; } + uint32_t get_data_size() const { return m_data_size; } + + // Returns the KTX2 header. Valid after init(). + const ktx2_header& get_header() const { return m_header; } + + // Returns the KTX2 level index array. There will be one entry for each mipmap level. Valid after init(). + const basisu::vector& get_level_index() const { return m_levels; } + + // Returns the texture's width in texels. Always non-zero, might not be divisible by 4. Valid after init(). + uint32_t get_width() const { return m_header.m_pixel_width; } + + // Returns the texture's height in texels. Always non-zero, might not be divisible by 4. Valid after init(). + uint32_t get_height() const { return m_header.m_pixel_height; } + + // Returns the texture's number of mipmap levels. Always returns 1 or higher. Valid after init(). + uint32_t get_levels() const { return m_header.m_level_count; } + + // Returns the number of faces. Returns 1 for 2D textures and or 6 for cubemaps. Valid after init(). + uint32_t get_faces() const { return m_header.m_face_count; } + + // Returns 0 or the number of layers in the texture array or texture video. Valid after init(). + uint32_t get_layers() const { return m_header.m_layer_count; } + + // Returns cETC1S, cUASTC4x4, cUASTC_HDR_4x4, cASTC_HDR_6x6, cASTC_HDR_6x6_INTERMEDIATE. Valid after init(). + basist::basis_tex_format get_basis_tex_format() const { return m_format; } + + // ETC1S LDR 4x4 + bool is_etc1s() const { return get_basis_tex_format() == basist::basis_tex_format::cETC1S; } + + // UASTC LDR 4x4 (only) + bool is_uastc() const { return get_basis_tex_format() == basist::basis_tex_format::cUASTC4x4; } + + // Is ASTC HDR 4x4 or 6x6 + bool is_hdr() const + { + return basis_tex_format_is_hdr(get_basis_tex_format()); + } + + bool is_ldr() const + { + return !is_hdr(); + } + + bool is_hdr_4x4() const + { + return (get_basis_tex_format() == basist::basis_tex_format::cUASTC_HDR_4x4); + } + + bool is_hdr_6x6() const + { + return (get_basis_tex_format() == basist::basis_tex_format::cASTC_HDR_6x6) || (get_basis_tex_format() == basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE); + } + + uint32_t get_block_width() const { return basis_tex_format_get_block_width(get_basis_tex_format()); } + uint32_t get_block_height() const { return basis_tex_format_get_block_height(get_basis_tex_format()); } + + // Returns true if the ETC1S file has two planes (typically RGBA, or RRRG), or true if the UASTC file has alpha data. Valid after init(). + uint32_t get_has_alpha() const { return m_has_alpha; } + + // Returns the entire Data Format Descriptor (DFD) from the KTX2 file. Valid after init(). + // See https://www.khronos.org/registry/DataFormat/specs/1.3/dataformat.1.3.html#_the_khronos_data_format_descriptor_overview + const basisu::uint8_vec& get_dfd() const { return m_dfd; } + + // Some basic DFD accessors. Valid after init(). + uint32_t get_dfd_color_model() const { return m_dfd_color_model; } + + // Returns the DFD color primary. + // We do not validate the color primaries, so the returned value may not be in the ktx2_df_color_primaries enum. + ktx2_df_color_primaries get_dfd_color_primaries() const { return m_dfd_color_prims; } + + // Returns KTX2_KHR_DF_TRANSFER_LINEAR or KTX2_KHR_DF_TRANSFER_SRGB. + uint32_t get_dfd_transfer_func() const { return m_dfd_transfer_func; } + + uint32_t get_dfd_flags() const { return m_dfd_flags; } + + // Returns 1 (ETC1S/UASTC) or 2 (ETC1S with an internal alpha channel). + uint32_t get_dfd_total_samples() const { return m_dfd_samples; } + + // Returns the channel mapping for each DFD "sample". UASTC always has 1 sample, ETC1S can have one or two. + // Note the returned value SHOULD be one of the ktx2_df_channel_id enums, but we don't validate that. + // It's up to the caller to decide what to do if the value isn't in the enum. + ktx2_df_channel_id get_dfd_channel_id0() const { return m_dfd_chan0; } + ktx2_df_channel_id get_dfd_channel_id1() const { return m_dfd_chan1; } + + // Key value field data. + struct key_value + { + // The key field is UTF8 and always zero terminated. + // In memory we always append a zero terminator to the key. + basisu::uint8_vec m_key; + + // The value may be empty. In the KTX2 file it consists of raw bytes which may or may not be zero terminated. + // In memory we always append a zero terminator to the value. + basisu::uint8_vec m_value; + + bool operator< (const key_value& rhs) const { return strcmp((const char*)m_key.data(), (const char *)rhs.m_key.data()) < 0; } + }; + typedef basisu::vector key_value_vec; + + // Returns the array of key-value entries. This may be empty. Valid after init(). + // The order of key values fields in this array exactly matches the order they were stored in the file. The keys are supposed to be sorted by their Unicode code points. + const key_value_vec& get_key_values() const { return m_key_values; } + + const basisu::uint8_vec *find_key(const std::string& key_name) const; + + // Low-level ETC1S specific accessors + + // Returns the ETC1S global supercompression data header, which is only valid after start_transcoding() is called. + const ktx2_etc1s_global_data_header& get_etc1s_header() const { return m_etc1s_header; } + + // Returns the array of ETC1S image descriptors, which is only valid after get_etc1s_image_descs() is called. + const basisu::vector& get_etc1s_image_descs() const { return m_etc1s_image_descs; } + + const basisu::vector& get_astc_hdr_6x6_intermediate_image_descs() const { return m_astc_6x6_intermediate_image_descs; } + + // Must have called startTranscoding() first + uint32_t get_etc1s_image_descs_image_flags(uint32_t level_index, uint32_t layer_index, uint32_t face_index) const; + + // is_video() is only valid after start_transcoding() is called. + // For ETC1S data, if this returns true you must currently transcode the file from first to last frame, in order, without skipping any frames. + bool is_video() const { return m_is_video; } + + // Defaults to 0, only non-zero if the key existed in the source KTX2 file. + float get_ldr_hdr_upconversion_nit_multiplier() const { return m_ldr_hdr_upconversion_nit_multiplier; } + + // start_transcoding() MUST be called before calling transcode_image(). + // This method decompresses the ETC1S global endpoint/selector codebooks, which is not free, so try to avoid calling it excessively. + bool start_transcoding(); + + // get_image_level_info() be called after init(), but the m_iframe_flag's won't be valid until start_transcoding() is called. + // You can call this method before calling transcode_image_level() to retrieve basic information about the mipmap level's dimensions, etc. + bool get_image_level_info(ktx2_image_level_info& level_info, uint32_t level_index, uint32_t layer_index, uint32_t face_index) const; + + // transcode_image_level() transcodes a single 2D texture or cubemap face from the KTX2 file. + // Internally it uses the same low-level transcode API's as basisu_transcoder::transcode_image_level(). + // If the file is UASTC and is supercompressed with Zstandard, and the file is a texture array or cubemap, it's highly recommended that each mipmap level is + // completely transcoded before switching to another level. Every time the mipmap level is changed all supercompressed level data must be decompressed using Zstandard as a single unit. + // Currently ETC1S videos must always be transcoded from first to last frame (or KTX2 "layer"), in order, with no skipping of frames. + // By default this method is not thread safe unless you specify a pointer to a user allocated thread-specific transcoder_state struct. + bool transcode_image_level( + uint32_t level_index, uint32_t layer_index, uint32_t face_index, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + basist::transcoder_texture_format fmt, + uint32_t decode_flags = 0, uint32_t output_row_pitch_in_blocks_or_pixels = 0, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, + ktx2_transcoder_state *pState = nullptr); + + private: + const uint8_t* m_pData; + uint32_t m_data_size; + + ktx2_header m_header; + basisu::vector m_levels; + basisu::uint8_vec m_dfd; + key_value_vec m_key_values; + + ktx2_etc1s_global_data_header m_etc1s_header; + basisu::vector m_etc1s_image_descs; + basisu::vector m_astc_6x6_intermediate_image_descs; + + basist::basis_tex_format m_format; + + uint32_t m_dfd_color_model; + ktx2_df_color_primaries m_dfd_color_prims; + uint32_t m_dfd_transfer_func; + uint32_t m_dfd_flags; + uint32_t m_dfd_samples; + ktx2_df_channel_id m_dfd_chan0, m_dfd_chan1; + + basist::basisu_lowlevel_etc1s_transcoder m_etc1s_transcoder; + basist::basisu_lowlevel_uastc_ldr_4x4_transcoder m_uastc_transcoder; + basist::basisu_lowlevel_uastc_hdr_4x4_transcoder m_uastc_hdr_transcoder; + basist::basisu_lowlevel_astc_hdr_6x6_transcoder m_astc_hdr_6x6_transcoder; + basist::basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder m_astc_hdr_6x6_intermediate_transcoder; + + ktx2_transcoder_state m_def_transcoder_state; + + bool m_has_alpha; + bool m_is_video; + float m_ldr_hdr_upconversion_nit_multiplier; + + bool decompress_level_data(uint32_t level_index, basisu::uint8_vec& uncomp_data); + bool read_astc_6x6_hdr_intermediate_global_data(); + bool decompress_etc1s_global_data(); + bool read_key_values(); + }; + + // Replaces if the key already exists + inline void ktx2_add_key_value(ktx2_transcoder::key_value_vec& key_values, const std::string& key, const std::string& val) + { + assert(key.size()); + + basist::ktx2_transcoder::key_value* p = nullptr; + + // Try to find an existing key + for (size_t i = 0; i < key_values.size(); i++) + { + if (strcmp((const char*)key_values[i].m_key.data(), key.c_str()) == 0) + { + p = &key_values[i]; + break; + } + } + + if (!p) + p = key_values.enlarge(1); + + p->m_key.resize(0); + p->m_value.resize(0); + + p->m_key.resize(key.size() + 1); + memcpy(p->m_key.data(), key.c_str(), key.size()); + + p->m_value.resize(val.size() + 1); + if (val.size()) + memcpy(p->m_value.data(), val.c_str(), val.size()); + } + +#endif // BASISD_SUPPORT_KTX2 + + // Returns true if the transcoder was compiled with KTX2 support. + bool basisu_transcoder_supports_ktx2(); + + // Returns true if the transcoder was compiled with Zstandard support. + bool basisu_transcoder_supports_ktx2_zstd(); + +} // namespace basisu + diff --git a/thirdparty/basisu/transcoder/basisu_transcoder_internal.h b/thirdparty/basisu/transcoder/basisu_transcoder_internal.h new file mode 100644 index 000000000..8bf4abad6 --- /dev/null +++ b/thirdparty/basisu/transcoder/basisu_transcoder_internal.h @@ -0,0 +1,1056 @@ +// basisu_transcoder_internal.h - Universal texture format transcoder library. +// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// +// Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#ifdef _MSC_VER +#pragma warning (disable: 4127) // conditional expression is constant +#endif + +// v1.50: Added UASTC HDR 4x4 support +// v1.60: Added RDO ASTC HDR 6x6 and intermediate support +#define BASISD_LIB_VERSION 160 +#define BASISD_VERSION_STRING "01.60" + +#ifdef _DEBUG +#define BASISD_BUILD_DEBUG +#else +#define BASISD_BUILD_RELEASE +#endif + +#include "basisu.h" + +#define BASISD_znew (z = 36969 * (z & 65535) + (z >> 16)) + +namespace basisu +{ + extern bool g_debug_printf; +} + +namespace basist +{ + // Low-level formats directly supported by the transcoder (other supported texture formats are combinations of these low-level block formats). + // You probably don't care about these enum's unless you are going pretty low-level and calling the transcoder to decode individual slices. + enum class block_format + { + cETC1, // ETC1S RGB + cETC2_RGBA, // full ETC2 EAC RGBA8 block + cBC1, // DXT1 RGB + cBC3, // BC4 block followed by a four color BC1 block + cBC4, // DXT5A (alpha block only) + cBC5, // two BC4 blocks + cPVRTC1_4_RGB, // opaque-only PVRTC1 4bpp + cPVRTC1_4_RGBA, // PVRTC1 4bpp RGBA + cBC7, // Full BC7 block, any mode + cBC7_M5_COLOR, // RGB BC7 mode 5 color (writes an opaque mode 5 block) + cBC7_M5_ALPHA, // alpha portion of BC7 mode 5 (cBC7_M5_COLOR output data must have been written to the output buffer first to set the mode/rot fields etc.) + cETC2_EAC_A8, // alpha block of ETC2 EAC (first 8 bytes of the 16-bit ETC2 EAC RGBA format) + cASTC_4x4, // ASTC 4x4 (either color-only or color+alpha). Note that the transcoder always currently assumes sRGB is not enabled when outputting ASTC + // data. If you use a sRGB ASTC format you'll get ~1 LSB of additional error, because of the different way ASTC decoders scale 8-bit endpoints to 16-bits during unpacking. + + cATC_RGB, + cATC_RGBA_INTERPOLATED_ALPHA, + cFXT1_RGB, // Opaque-only, has oddball 8x4 pixel block size + + cPVRTC2_4_RGB, + cPVRTC2_4_RGBA, + + cETC2_EAC_R11, + cETC2_EAC_RG11, + + cIndices, // Used internally: Write 16-bit endpoint and selector indices directly to output (output block must be at least 32-bits) + + cRGB32, // Writes RGB components to 32bpp output pixels + cRGBA32, // Writes RGB255 components to 32bpp output pixels + cA32, // Writes alpha component to 32bpp output pixels + + cRGB565, + cBGR565, + + cRGBA4444_COLOR, + cRGBA4444_ALPHA, + cRGBA4444_COLOR_OPAQUE, + cRGBA4444, + cRGBA_HALF, + cRGB_HALF, + cRGB_9E5, + + cUASTC_4x4, // LDR, universal + cUASTC_HDR_4x4, // HDR, transcodes only to 4x4 HDR ASTC, BC6H, or uncompressed + cBC6H, + cASTC_HDR_4x4, + cASTC_HDR_6x6, + + cTotalBlockFormats + }; + + inline uint32_t get_block_width(block_format fmt) + { + switch (fmt) + { + case block_format::cFXT1_RGB: + return 8; + case block_format::cASTC_HDR_6x6: + return 6; + default: + break; + } + return 4; + } + + inline uint32_t get_block_height(block_format fmt) + { + switch (fmt) + { + case block_format::cASTC_HDR_6x6: + return 6; + default: + break; + } + return 4; + } + + const int COLOR5_PAL0_PREV_HI = 9, COLOR5_PAL0_DELTA_LO = -9, COLOR5_PAL0_DELTA_HI = 31; + const int COLOR5_PAL1_PREV_HI = 21, COLOR5_PAL1_DELTA_LO = -21, COLOR5_PAL1_DELTA_HI = 21; + const int COLOR5_PAL2_PREV_HI = 31, COLOR5_PAL2_DELTA_LO = -31, COLOR5_PAL2_DELTA_HI = 9; + const int COLOR5_PAL_MIN_DELTA_B_RUNLEN = 3, COLOR5_PAL_DELTA_5_RUNLEN_VLC_BITS = 3; + + const uint32_t ENDPOINT_PRED_TOTAL_SYMBOLS = (4 * 4 * 4 * 4) + 1; + const uint32_t ENDPOINT_PRED_REPEAT_LAST_SYMBOL = ENDPOINT_PRED_TOTAL_SYMBOLS - 1; + const uint32_t ENDPOINT_PRED_MIN_REPEAT_COUNT = 3; + const uint32_t ENDPOINT_PRED_COUNT_VLC_BITS = 4; + + const uint32_t NUM_ENDPOINT_PREDS = 3;// BASISU_ARRAY_SIZE(g_endpoint_preds); + const uint32_t CR_ENDPOINT_PRED_INDEX = NUM_ENDPOINT_PREDS - 1; + const uint32_t NO_ENDPOINT_PRED_INDEX = 3;//NUM_ENDPOINT_PREDS; + const uint32_t MAX_SELECTOR_HISTORY_BUF_SIZE = 64; + const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH = 3; + const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_BITS = 6; + const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL = (1 << SELECTOR_HISTORY_BUF_RLE_COUNT_BITS); + + uint16_t crc16(const void *r, size_t size, uint16_t crc); + + class huffman_decoding_table + { + friend class bitwise_decoder; + + public: + huffman_decoding_table() + { + } + + void clear() + { + basisu::clear_vector(m_code_sizes); + basisu::clear_vector(m_lookup); + basisu::clear_vector(m_tree); + } + + bool init(uint32_t total_syms, const uint8_t *pCode_sizes, uint32_t fast_lookup_bits = basisu::cHuffmanFastLookupBits) + { + if (!total_syms) + { + clear(); + return true; + } + + m_code_sizes.resize(total_syms); + memcpy(&m_code_sizes[0], pCode_sizes, total_syms); + + const uint32_t huffman_fast_lookup_size = 1 << fast_lookup_bits; + + m_lookup.resize(0); + m_lookup.resize(huffman_fast_lookup_size); + + m_tree.resize(0); + m_tree.resize(total_syms * 2); + + uint32_t syms_using_codesize[basisu::cHuffmanMaxSupportedInternalCodeSize + 1]; + basisu::clear_obj(syms_using_codesize); + for (uint32_t i = 0; i < total_syms; i++) + { + if (pCode_sizes[i] > basisu::cHuffmanMaxSupportedInternalCodeSize) + return false; + syms_using_codesize[pCode_sizes[i]]++; + } + + uint32_t next_code[basisu::cHuffmanMaxSupportedInternalCodeSize + 1]; + next_code[0] = next_code[1] = 0; + + uint32_t used_syms = 0, total = 0; + for (uint32_t i = 1; i < basisu::cHuffmanMaxSupportedInternalCodeSize; i++) + { + used_syms += syms_using_codesize[i]; + next_code[i + 1] = (total = ((total + syms_using_codesize[i]) << 1)); + } + + if (((1U << basisu::cHuffmanMaxSupportedInternalCodeSize) != total) && (used_syms != 1U)) + return false; + + for (int tree_next = -1, sym_index = 0; sym_index < (int)total_syms; ++sym_index) + { + uint32_t rev_code = 0, l, cur_code, code_size = pCode_sizes[sym_index]; + if (!code_size) + continue; + + cur_code = next_code[code_size]++; + + for (l = code_size; l > 0; l--, cur_code >>= 1) + rev_code = (rev_code << 1) | (cur_code & 1); + + if (code_size <= fast_lookup_bits) + { + uint32_t k = (code_size << 16) | sym_index; + while (rev_code < huffman_fast_lookup_size) + { + if (m_lookup[rev_code] != 0) + { + // Supplied codesizes can't create a valid prefix code. + return false; + } + + m_lookup[rev_code] = k; + rev_code += (1 << code_size); + } + continue; + } + + int tree_cur; + if (0 == (tree_cur = m_lookup[rev_code & (huffman_fast_lookup_size - 1)])) + { + const uint32_t idx = rev_code & (huffman_fast_lookup_size - 1); + if (m_lookup[idx] != 0) + { + // Supplied codesizes can't create a valid prefix code. + return false; + } + + m_lookup[idx] = tree_next; + tree_cur = tree_next; + tree_next -= 2; + } + + if (tree_cur >= 0) + { + // Supplied codesizes can't create a valid prefix code. + return false; + } + + rev_code >>= (fast_lookup_bits - 1); + + for (int j = code_size; j > ((int)fast_lookup_bits + 1); j--) + { + tree_cur -= ((rev_code >>= 1) & 1); + + const int idx = -tree_cur - 1; + if (idx < 0) + return false; + else if (idx >= (int)m_tree.size()) + m_tree.resize(idx + 1); + + if (!m_tree[idx]) + { + m_tree[idx] = (int16_t)tree_next; + tree_cur = tree_next; + tree_next -= 2; + } + else + { + tree_cur = m_tree[idx]; + if (tree_cur >= 0) + { + // Supplied codesizes can't create a valid prefix code. + return false; + } + } + } + + tree_cur -= ((rev_code >>= 1) & 1); + + const int idx = -tree_cur - 1; + if (idx < 0) + return false; + else if (idx >= (int)m_tree.size()) + m_tree.resize(idx + 1); + + if (m_tree[idx] != 0) + { + // Supplied codesizes can't create a valid prefix code. + return false; + } + + m_tree[idx] = (int16_t)sym_index; + } + + return true; + } + + const basisu::uint8_vec &get_code_sizes() const { return m_code_sizes; } + const basisu::int_vec &get_lookup() const { return m_lookup; } + const basisu::int16_vec &get_tree() const { return m_tree; } + + bool is_valid() const { return m_code_sizes.size() > 0; } + + private: + basisu::uint8_vec m_code_sizes; + basisu::int_vec m_lookup; + basisu::int16_vec m_tree; + }; + + class bitwise_decoder + { + public: + bitwise_decoder() : + m_buf_size(0), + m_pBuf(nullptr), + m_pBuf_start(nullptr), + m_pBuf_end(nullptr), + m_bit_buf(0), + m_bit_buf_size(0) + { + } + + void clear() + { + m_buf_size = 0; + m_pBuf = nullptr; + m_pBuf_start = nullptr; + m_pBuf_end = nullptr; + m_bit_buf = 0; + m_bit_buf_size = 0; + } + + bool init(const uint8_t *pBuf, uint32_t buf_size) + { + if ((!pBuf) && (buf_size)) + return false; + + m_buf_size = buf_size; + m_pBuf = pBuf; + m_pBuf_start = pBuf; + m_pBuf_end = pBuf + buf_size; + m_bit_buf = 0; + m_bit_buf_size = 0; + return true; + } + + void stop() + { + } + + inline uint32_t peek_bits(uint32_t num_bits) + { + if (!num_bits) + return 0; + + assert(num_bits <= 25); + + while (m_bit_buf_size < num_bits) + { + uint32_t c = 0; + if (m_pBuf < m_pBuf_end) + c = *m_pBuf++; + + m_bit_buf |= (c << m_bit_buf_size); + m_bit_buf_size += 8; + assert(m_bit_buf_size <= 32); + } + + return m_bit_buf & ((1 << num_bits) - 1); + } + + void remove_bits(uint32_t num_bits) + { + assert(m_bit_buf_size >= num_bits); + + m_bit_buf >>= num_bits; + m_bit_buf_size -= num_bits; + } + + uint32_t get_bits(uint32_t num_bits) + { + if (num_bits > 25) + { + assert(num_bits <= 32); + + const uint32_t bits0 = peek_bits(25); + m_bit_buf >>= 25; + m_bit_buf_size -= 25; + num_bits -= 25; + + const uint32_t bits = peek_bits(num_bits); + m_bit_buf >>= num_bits; + m_bit_buf_size -= num_bits; + + return bits0 | (bits << 25); + } + + const uint32_t bits = peek_bits(num_bits); + + m_bit_buf >>= num_bits; + m_bit_buf_size -= num_bits; + + return bits; + } + + uint32_t decode_truncated_binary(uint32_t n) + { + assert(n >= 2); + + const uint32_t k = basisu::floor_log2i(n); + const uint32_t u = (1 << (k + 1)) - n; + + uint32_t result = get_bits(k); + + if (result >= u) + result = ((result << 1) | get_bits(1)) - u; + + return result; + } + + uint32_t decode_rice(uint32_t m) + { + assert(m); + + uint32_t q = 0; + for (;;) + { + uint32_t k = peek_bits(16); + + uint32_t l = 0; + while (k & 1) + { + l++; + k >>= 1; + } + + q += l; + + remove_bits(l); + + if (l < 16) + break; + } + + return (q << m) + (get_bits(m + 1) >> 1); + } + + inline uint32_t decode_vlc(uint32_t chunk_bits) + { + assert(chunk_bits); + + const uint32_t chunk_size = 1 << chunk_bits; + const uint32_t chunk_mask = chunk_size - 1; + + uint32_t v = 0; + uint32_t ofs = 0; + + for ( ; ; ) + { + uint32_t s = get_bits(chunk_bits + 1); + v |= ((s & chunk_mask) << ofs); + ofs += chunk_bits; + + if ((s & chunk_size) == 0) + break; + + if (ofs >= 32) + { + assert(0); + break; + } + } + + return v; + } + + inline uint32_t decode_huffman(const huffman_decoding_table &ct, int fast_lookup_bits = basisu::cHuffmanFastLookupBits) + { + assert(ct.m_code_sizes.size()); + + const uint32_t huffman_fast_lookup_size = 1 << fast_lookup_bits; + + while (m_bit_buf_size < 16) + { + uint32_t c = 0; + if (m_pBuf < m_pBuf_end) + c = *m_pBuf++; + + m_bit_buf |= (c << m_bit_buf_size); + m_bit_buf_size += 8; + assert(m_bit_buf_size <= 32); + } + + int code_len; + + int sym; + if ((sym = ct.m_lookup[m_bit_buf & (huffman_fast_lookup_size - 1)]) >= 0) + { + code_len = sym >> 16; + sym &= 0xFFFF; + } + else + { + code_len = fast_lookup_bits; + do + { + sym = ct.m_tree[~sym + ((m_bit_buf >> code_len++) & 1)]; // ~sym = -sym - 1 + } while (sym < 0); + } + + m_bit_buf >>= code_len; + m_bit_buf_size -= code_len; + + return sym; + } + + bool read_huffman_table(huffman_decoding_table &ct) + { + ct.clear(); + + const uint32_t total_used_syms = get_bits(basisu::cHuffmanMaxSymsLog2); + + if (!total_used_syms) + return true; + if (total_used_syms > basisu::cHuffmanMaxSyms) + return false; + + uint8_t code_length_code_sizes[basisu::cHuffmanTotalCodelengthCodes]; + basisu::clear_obj(code_length_code_sizes); + + const uint32_t num_codelength_codes = get_bits(5); + if ((num_codelength_codes < 1) || (num_codelength_codes > basisu::cHuffmanTotalCodelengthCodes)) + return false; + + for (uint32_t i = 0; i < num_codelength_codes; i++) + code_length_code_sizes[basisu::g_huffman_sorted_codelength_codes[i]] = static_cast(get_bits(3)); + + huffman_decoding_table code_length_table; + if (!code_length_table.init(basisu::cHuffmanTotalCodelengthCodes, code_length_code_sizes)) + return false; + + if (!code_length_table.is_valid()) + return false; + + basisu::uint8_vec code_sizes(total_used_syms); + + uint32_t cur = 0; + while (cur < total_used_syms) + { + int c = decode_huffman(code_length_table); + + if (c <= 16) + code_sizes[cur++] = static_cast(c); + else if (c == basisu::cHuffmanSmallZeroRunCode) + cur += get_bits(basisu::cHuffmanSmallZeroRunExtraBits) + basisu::cHuffmanSmallZeroRunSizeMin; + else if (c == basisu::cHuffmanBigZeroRunCode) + cur += get_bits(basisu::cHuffmanBigZeroRunExtraBits) + basisu::cHuffmanBigZeroRunSizeMin; + else + { + if (!cur) + return false; + + uint32_t l; + if (c == basisu::cHuffmanSmallRepeatCode) + l = get_bits(basisu::cHuffmanSmallRepeatExtraBits) + basisu::cHuffmanSmallRepeatSizeMin; + else + l = get_bits(basisu::cHuffmanBigRepeatExtraBits) + basisu::cHuffmanBigRepeatSizeMin; + + const uint8_t prev = code_sizes[cur - 1]; + if (prev == 0) + return false; + do + { + if (cur >= total_used_syms) + return false; + code_sizes[cur++] = prev; + } while (--l > 0); + } + } + + if (cur != total_used_syms) + return false; + + return ct.init(total_used_syms, &code_sizes[0]); + } + + size_t get_bits_remaining() const + { + size_t total_bytes_remaining = m_pBuf_end - m_pBuf; + return total_bytes_remaining * 8 + m_bit_buf_size; + } + + private: + uint32_t m_buf_size; + const uint8_t *m_pBuf; + const uint8_t *m_pBuf_start; + const uint8_t *m_pBuf_end; + + uint32_t m_bit_buf; + uint32_t m_bit_buf_size; + }; + + inline uint32_t basisd_rand(uint32_t seed) + { + if (!seed) + seed++; + uint32_t z = seed; + BASISD_znew; + return z; + } + + // Returns random number in [0,limit). Max limit is 0xFFFF. + inline uint32_t basisd_urand(uint32_t& seed, uint32_t limit) + { + seed = basisd_rand(seed); + return (((seed ^ (seed >> 16)) & 0xFFFF) * limit) >> 16; + } + + class approx_move_to_front + { + public: + approx_move_to_front(uint32_t n) + { + init(n); + } + + void init(uint32_t n) + { + m_values.resize(n); + m_rover = n / 2; + } + + const basisu::int_vec& get_values() const { return m_values; } + basisu::int_vec& get_values() { return m_values; } + + uint32_t size() const { return (uint32_t)m_values.size(); } + + const int& operator[] (uint32_t index) const { return m_values[index]; } + int operator[] (uint32_t index) { return m_values[index]; } + + void add(int new_value) + { + m_values[m_rover++] = new_value; + if (m_rover == m_values.size()) + m_rover = (uint32_t)m_values.size() / 2; + } + + void use(uint32_t index) + { + if (index) + { + //std::swap(m_values[index / 2], m_values[index]); + int x = m_values[index / 2]; + int y = m_values[index]; + m_values[index / 2] = y; + m_values[index] = x; + } + } + + // returns -1 if not found + int find(int value) const + { + for (uint32_t i = 0; i < m_values.size(); i++) + if (m_values[i] == value) + return i; + return -1; + } + + void reset() + { + const uint32_t n = (uint32_t)m_values.size(); + + m_values.clear(); + + init(n); + } + + private: + basisu::int_vec m_values; + uint32_t m_rover; + }; + + struct decoder_etc_block; + + inline uint8_t clamp255(int32_t i) + { + return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i); + } + + enum eNoClamp + { + cNoClamp = 0 + }; + + struct color32 + { + union + { + struct + { + uint8_t r; + uint8_t g; + uint8_t b; + uint8_t a; + }; + + uint8_t c[4]; + + uint32_t m; + }; + + color32() { } + + color32(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); } + color32(eNoClamp unused, uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { (void)unused; set_noclamp_rgba(vr, vg, vb, va); } + + void set(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { c[0] = static_cast(vr); c[1] = static_cast(vg); c[2] = static_cast(vb); c[3] = static_cast(va); } + + void set_noclamp_rgb(uint32_t vr, uint32_t vg, uint32_t vb) { c[0] = static_cast(vr); c[1] = static_cast(vg); c[2] = static_cast(vb); } + void set_noclamp_rgba(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); } + + void set_clamped(int vr, int vg, int vb, int va) { c[0] = clamp255(vr); c[1] = clamp255(vg); c[2] = clamp255(vb); c[3] = clamp255(va); } + + uint8_t operator[] (uint32_t idx) const { assert(idx < 4); return c[idx]; } + uint8_t &operator[] (uint32_t idx) { assert(idx < 4); return c[idx]; } + + bool operator== (const color32&rhs) const { return m == rhs.m; } + + static color32 comp_min(const color32& a, const color32& b) { return color32(cNoClamp, basisu::minimum(a[0], b[0]), basisu::minimum(a[1], b[1]), basisu::minimum(a[2], b[2]), basisu::minimum(a[3], b[3])); } + static color32 comp_max(const color32& a, const color32& b) { return color32(cNoClamp, basisu::maximum(a[0], b[0]), basisu::maximum(a[1], b[1]), basisu::maximum(a[2], b[2]), basisu::maximum(a[3], b[3])); } + }; + + struct endpoint + { + color32 m_color5; + uint8_t m_inten5; + bool operator== (const endpoint& rhs) const + { + return (m_color5.r == rhs.m_color5.r) && (m_color5.g == rhs.m_color5.g) && (m_color5.b == rhs.m_color5.b) && (m_inten5 == rhs.m_inten5); + } + bool operator!= (const endpoint& rhs) const { return !(*this == rhs); } + }; + + struct selector + { + // Plain selectors (2-bits per value) + uint8_t m_selectors[4]; + + // ETC1 selectors + uint8_t m_bytes[4]; + + uint8_t m_lo_selector, m_hi_selector; + uint8_t m_num_unique_selectors; + bool operator== (const selector& rhs) const + { + return (m_selectors[0] == rhs.m_selectors[0]) && + (m_selectors[1] == rhs.m_selectors[1]) && + (m_selectors[2] == rhs.m_selectors[2]) && + (m_selectors[3] == rhs.m_selectors[3]); + } + bool operator!= (const selector& rhs) const + { + return !(*this == rhs); + } + + void init_flags() + { + uint32_t hist[4] = { 0, 0, 0, 0 }; + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t s = get_selector(x, y); + hist[s]++; + } + } + + m_lo_selector = 3; + m_hi_selector = 0; + m_num_unique_selectors = 0; + + for (uint32_t i = 0; i < 4; i++) + { + if (hist[i]) + { + m_num_unique_selectors++; + if (i < m_lo_selector) m_lo_selector = static_cast(i); + if (i > m_hi_selector) m_hi_selector = static_cast(i); + } + } + } + + // Returned selector value ranges from 0-3 and is a direct index into g_etc1_inten_tables. + inline uint32_t get_selector(uint32_t x, uint32_t y) const + { + assert((x < 4) && (y < 4)); + return (m_selectors[y] >> (x * 2)) & 3; + } + + void set_selector(uint32_t x, uint32_t y, uint32_t val) + { + static const uint8_t s_selector_index_to_etc1[4] = { 3, 2, 0, 1 }; + + assert((x | y | val) < 4); + + m_selectors[y] &= ~(3 << (x * 2)); + m_selectors[y] |= (val << (x * 2)); + + const uint32_t etc1_bit_index = x * 4 + y; + + uint8_t *p = &m_bytes[3 - (etc1_bit_index >> 3)]; + + const uint32_t byte_bit_ofs = etc1_bit_index & 7; + const uint32_t mask = 1 << byte_bit_ofs; + + const uint32_t etc1_val = s_selector_index_to_etc1[val]; + + const uint32_t lsb = etc1_val & 1; + const uint32_t msb = etc1_val >> 1; + + p[0] &= ~mask; + p[0] |= (lsb << byte_bit_ofs); + + p[-2] &= ~mask; + p[-2] |= (msb << byte_bit_ofs); + } + }; + + bool basis_block_format_is_uncompressed(block_format tex_type); + + //------------------------------------ + + typedef uint16_t half_float; + + const double MIN_DENORM_HALF_FLOAT = 0.000000059604645; // smallest positive subnormal number + const double MIN_HALF_FLOAT = 0.00006103515625; // smallest positive normal number + const double MAX_HALF_FLOAT = 65504.0; // largest normal number + const uint32_t MAX_HALF_FLOAT_AS_INT_BITS = 0x7BFF; // the half float rep for 65504.0 + + inline uint32_t get_bits(uint32_t val, int low, int high) + { + const int num_bits = (high - low) + 1; + assert((num_bits >= 1) && (num_bits <= 32)); + + val >>= low; + if (num_bits != 32) + val &= ((1u << num_bits) - 1); + + return val; + } + + inline bool is_half_inf_or_nan(half_float v) + { + return get_bits(v, 10, 14) == 31; + } + + inline bool is_half_denorm(half_float v) + { + int e = (v >> 10) & 31; + return !e; + } + + inline int get_half_exp(half_float v) + { + int e = ((v >> 10) & 31); + return e ? (e - 15) : -14; + } + + inline int get_half_mantissa(half_float v) + { + if (is_half_denorm(v)) + return v & 0x3FF; + return (v & 0x3FF) | 0x400; + } + + inline float get_half_mantissaf(half_float v) + { + return ((float)get_half_mantissa(v)) / 1024.0f; + } + + inline int get_half_sign(half_float v) + { + return v ? ((v & 0x8000) ? -1 : 1) : 0; + } + + inline bool half_is_signed(half_float v) + { + return (v & 0x8000) != 0; + } + +#if 0 + int hexp = get_half_exp(Cf); + float hman = get_half_mantissaf(Cf); + int hsign = get_half_sign(Cf); + float k = powf(2.0f, hexp) * hman * hsign; + if (is_half_inf_or_nan(Cf)) + k = std::numeric_limits::quiet_NaN(); +#endif + + half_float float_to_half(float val); + + inline float half_to_float(half_float hval) + { + union { float f; uint32_t u; } x = { 0 }; + + uint32_t s = ((uint32_t)hval >> 15) & 1; + uint32_t e = ((uint32_t)hval >> 10) & 0x1F; + uint32_t m = (uint32_t)hval & 0x3FF; + + if (!e) + { + if (!m) + { + // +- 0 + x.u = s << 31; + return x.f; + } + else + { + // denormalized + while (!(m & 0x00000400)) + { + m <<= 1; + --e; + } + + ++e; + m &= ~0x00000400; + } + } + else if (e == 31) + { + if (m == 0) + { + // +/- INF + x.u = (s << 31) | 0x7f800000; + return x.f; + } + else + { + // +/- NaN + x.u = (s << 31) | 0x7f800000 | (m << 13); + return x.f; + } + } + + e = e + (127 - 15); + m = m << 13; + + assert(s <= 1); + assert(m <= 0x7FFFFF); + assert(e <= 255); + + x.u = m | (e << 23) | (s << 31); + return x.f; + } + + // Originally from bc6h_enc.h + + void bc6h_enc_init(); + + const uint32_t MAX_BLOG16_VAL = 0xFFFF; + + // BC6H internals + const uint32_t NUM_BC6H_MODES = 14; + const uint32_t BC6H_LAST_MODE_INDEX = 13; + const uint32_t BC6H_FIRST_1SUBSET_MODE_INDEX = 10; // in the MS docs, this is "mode 11" (where the first mode is 1), 60 bits for endpoints (10.10, 10.10, 10.10), 63 bits for weights + const uint32_t TOTAL_BC6H_PARTITION_PATTERNS = 32; + + extern const uint8_t g_bc6h_mode_sig_bits[NUM_BC6H_MODES][4]; // base, r, g, b + + struct bc6h_bit_layout + { + int8_t m_comp; // R=0,G=1,B=2,D=3 (D=partition index) + int8_t m_index; // 0-3, 0-1 Low/High subset 1, 2-3 Low/High subset 2, -1=partition index (d) + int8_t m_last_bit; + int8_t m_first_bit; // may be -1 if a single bit, may be >m_last_bit if reversed + }; + + const uint32_t MAX_BC6H_LAYOUT_INDEX = 25; + extern const bc6h_bit_layout g_bc6h_bit_layouts[NUM_BC6H_MODES][MAX_BC6H_LAYOUT_INDEX]; + + extern const uint8_t g_bc6h_2subset_patterns[TOTAL_BC6H_PARTITION_PATTERNS][4][4]; // [y][x] + + extern const uint8_t g_bc6h_weight3[8]; + extern const uint8_t g_bc6h_weight4[16]; + + extern const int8_t g_bc6h_mode_lookup[32]; + + // Converts b16 to half float + inline half_float bc6h_blog16_to_half(uint32_t comp) + { + assert(comp <= 0xFFFF); + + // scale the magnitude by 31/64 + comp = (comp * 31u) >> 6u; + return (half_float)comp; + } + + const uint32_t MAX_BC6H_HALF_FLOAT_AS_UINT = 0x7BFF; + + // Inverts bc6h_blog16_to_half(). + // Returns the nearest blog16 given a half value. + inline uint32_t bc6h_half_to_blog16(half_float h) + { + assert(h <= MAX_BC6H_HALF_FLOAT_AS_UINT); + return (h * 64 + 30) / 31; + } + + // Suboptimal, but very close. + inline uint32_t bc6h_half_to_blog(half_float h, uint32_t num_bits) + { + assert(h <= MAX_BC6H_HALF_FLOAT_AS_UINT); + return (h * 64 + 30) / (31 * (1 << (16 - num_bits))); + } + + struct bc6h_block + { + uint8_t m_bytes[16]; + }; + + void bc6h_enc_block_mode10(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights); + void bc6h_enc_block_1subset_4bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights); + void bc6h_enc_block_1subset_mode9_3bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights); + void bc6h_enc_block_1subset_3bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights); + void bc6h_enc_block_2subset_mode9_3bit_weights(bc6h_block* pPacked_block, uint32_t common_part_index, const half_float pEndpoints[2][3][2], const uint8_t* pWeights); // pEndpoints[subset][comp][lh_index] + void bc6h_enc_block_2subset_3bit_weights(bc6h_block* pPacked_block, uint32_t common_part_index, const half_float pEndpoints[2][3][2], const uint8_t* pWeights); // pEndpoints[subset][comp][lh_index] + bool bc6h_enc_block_solid_color(bc6h_block* pPacked_block, const half_float pColor[3]); + + struct bc6h_logical_block + { + uint32_t m_mode; + uint32_t m_partition_pattern; // must be 0 if 1 subset + uint32_t m_endpoints[3][4]; // [comp][subset*2+lh_index] - must be already properly packed + uint8_t m_weights[16]; // weights must be of the proper size, taking into account skipped MSB's which must be 0 + + void clear() + { + basisu::clear_obj(*this); + } + }; + + void pack_bc6h_block(bc6h_block& dst_blk, bc6h_logical_block& log_blk); + + namespace bc7_mode_5_encoder + { + void encode_bc7_mode_5_block(void* pDst_block, color32* pPixels, bool hq_mode); + } + +} // namespace basist + + + diff --git a/thirdparty/basisu/transcoder/basisu_transcoder_tables_astc.inc b/thirdparty/basisu/transcoder/basisu_transcoder_tables_astc.inc new file mode 100644 index 000000000..cd634c0df --- /dev/null +++ b/thirdparty/basisu/transcoder/basisu_transcoder_tables_astc.inc @@ -0,0 +1,481 @@ +{0,2,18},{0,32,2},{0,16,10},{0,16,10},{0,32,35},{0,16,27},{0,16,11},{0,16,27},{0,16,36},{0,16,28},{0,2,18},{0,32,2},{0,16,10},{0,16,10},{16,0,35},{0,16,27},{0,16,11},{0,16,27},{32,0,35},{0,16,27},{0,16,1},{0,16,1},{0,16,1},{0,16,1},{0,16,2},{0,16,2},{0,16,2},{0,0,4},{0,0,4},{0,0,4},{0,16,1}, +{0,16,1},{0,16,1},{0,16,1},{0,16,2},{0,16,2},{0,16,2},{0,0,4},{16,0,2},{0,0,4},{0,2,18},{0,32,2},{0,16,10},{0,16,10},{0,2,18},{2,0,18},{0,16,10},{0,16,18},{2,0,18},{0,16,18},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{16,4,44},{16,18,27},{16,2,30}, +{16,2,30},{0,4,52},{0,18,20},{0,2,1},{0,32,27},{0,2,77},{0,32,36},{16,4,19},{16,18,2},{16,2,5},{16,2,5},{32,32,51},{0,18,20},{0,2,1},{0,32,27},{32,32,51},{0,32,27},{16,18,26},{16,18,26},{16,18,26},{16,32,27},{0,34,8},{0,2,1},{0,2,1},{0,32,2},{0,32,19},{0,32,11},{16,18,1},{16,18,1},{16,18,1},{16,32,2},{16,2,8}, +{0,2,1},{0,2,1},{0,32,2},{34,0,8},{0,32,2},{16,4,18},{16,18,1},{16,2,4},{0,2,0},{16,4,18},{4,16,18},{0,2,0},{0,32,26},{4,16,18},{0,32,26},{16,0,26},{16,0,26},{16,0,26},{16,0,26},{0,18,0},{0,18,0},{0,18,0},{0,32,1},{0,16,10},{0,16,10},{32,36,36},{32,4,20},{32,34,31},{32,18,28},{16,6,60},{32,34,31},{32,18,19}, +{16,18,28},{0,34,52},{0,18,22},{2,4,22},{2,34,6},{2,18,14},{2,18,14},{18,2,51},{32,34,22},{32,18,10},{0,18,18},{38,0,51},{0,18,18},{32,4,19},{32,4,19},{32,4,19},{32,18,19},{32,34,17},{32,18,10},{32,18,10},{32,2,13},{16,18,14},{16,2,8},{2,18,5},{2,18,5},{2,18,5},{2,18,5},{2,2,8},{32,18,1},{32,18,1},{32,2,4},{2,2,8}, +{32,2,4},{4,0,18},{32,4,2},{2,18,10},{0,34,9},{4,0,18},{8,0,18},{0,34,9},{0,18,18},{8,0,18},{0,18,18},{32,0,18},{32,0,18},{32,0,18},{32,0,18},{32,18,9},{32,18,9},{32,18,9},{32,2,9},{16,2,4},{16,2,4},{18,6,44},{18,20,26},{18,4,30},{18,4,30},{2,6,52},{2,20,21},{2,4,1},{2,34,27},{16,20,52},{32,34,27},{18,6,19}, +{18,20,1},{18,4,5},{18,4,5},{20,16,51},{2,20,21},{2,4,1},{2,34,27},{16,20,51},{2,34,27},{18,20,26},{18,20,26},{18,20,26},{18,34,27},{2,36,8},{2,4,1},{2,4,1},{2,34,2},{32,4,11},{32,34,2},{18,20,1},{18,20,1},{18,20,1},{18,34,2},{18,4,8},{2,4,1},{2,4,1},{2,34,2},{36,2,8},{2,34,2},{36,0,18},{18,20,0},{18,4,4}, +{2,4,0},{36,0,18},{6,18,18},{2,4,0},{0,34,26},{6,18,18},{0,34,26},{18,0,26},{18,0,26},{18,0,26},{18,0,26},{2,4,1},{2,4,1},{2,4,1},{2,34,1},{32,34,1},{32,34,1},{34,8,40},{34,6,21},{34,36,33},{34,36,28},{34,22,55},{34,36,25},{34,36,13},{34,20,31},{32,6,55},{18,20,20},{4,6,21},{4,36,5},{4,36,5},{4,20,11},{22,0,51}, +{34,36,21},{34,36,9},{2,20,19},{32,6,51},{2,20,19},{34,6,21},{34,6,21},{34,6,21},{34,20,21},{34,6,14},{34,20,6},{34,20,6},{34,4,14},{18,20,11},{18,20,11},{4,36,1},{4,36,1},{4,36,1},{4,20,2},{2,24,8},{34,20,2},{34,20,2},{4,4,10},{28,0,8},{4,4,10},{22,16,18},{34,6,1},{4,36,4},{18,36,4},{22,16,18},{44,16,18},{18,36,4}, +{0,20,18},{44,16,18},{0,20,18},{34,0,20},{34,0,20},{34,0,20},{34,0,20},{34,20,5},{34,20,5},{34,20,5},{34,4,5},{18,20,2},{18,20,2},{20,8,46},{20,22,27},{20,6,31},{20,6,28},{4,24,52},{4,22,19},{4,6,7},{4,6,36},{18,22,56},{34,6,31},{20,8,21},{20,22,2},{20,6,6},{20,6,3},{36,6,51},{4,22,18},{4,6,6},{18,6,26},{31,0,51}, +{18,6,26},{20,22,26},{20,22,26},{20,22,26},{20,6,27},{4,38,9},{4,6,6},{4,6,6},{4,36,1},{34,6,9},{4,36,4},{20,22,1},{20,22,1},{20,22,1},{20,6,2},{22,2,8},{20,36,4},{20,36,4},{4,36,0},{42,2,8},{4,36,0},{22,4,18},{20,22,1},{36,6,2},{4,6,2},{22,4,18},{15,0,18},{4,6,2},{0,6,26},{15,0,18},{0,6,26},{20,0,26}, +{20,0,26},{20,0,26},{20,0,26},{4,22,1},{4,22,1},{4,22,1},{4,36,1},{4,36,4},{4,36,4},{36,40,38},{36,8,22},{36,38,33},{36,38,28},{36,24,55},{36,38,25},{36,38,13},{20,22,28},{34,8,55},{4,22,23},{6,8,21},{6,38,5},{6,38,5},{6,22,11},{24,2,51},{36,38,21},{20,38,9},{4,22,19},{13,0,51},{4,22,19},{36,8,21},{36,8,21},{36,8,21}, +{36,22,21},{36,8,14},{36,22,6},{36,22,6},{36,6,14},{4,38,10},{4,22,14},{6,38,1},{6,38,1},{6,38,1},{6,22,2},{8,2,8},{36,22,2},{36,22,2},{6,6,10},{30,2,8},{6,6,10},{10,0,18},{36,8,2},{6,38,4},{4,38,5},{10,0,18},{4,8,18},{4,38,5},{0,22,18},{4,8,18},{0,22,18},{36,0,20},{36,0,20},{36,0,20},{36,0,20},{36,22,5}, +{36,22,5},{36,22,5},{36,6,5},{4,22,5},{4,22,5},{22,10,46},{22,24,26},{22,8,30},{22,8,30},{6,26,52},{6,24,21},{6,8,5},{6,38,37},{20,24,53},{6,38,40},{22,10,21},{22,24,1},{22,8,5},{22,8,5},{16,11,51},{6,24,20},{6,8,4},{6,38,36},{25,0,51},{6,38,36},{22,24,26},{22,24,26},{22,24,26},{22,8,30},{6,40,9},{6,8,5},{6,8,5}, +{6,38,1},{36,8,11},{6,38,4},{22,24,1},{22,24,1},{22,24,1},{22,8,5},{24,4,8},{6,8,4},{6,8,4},{6,38,0},{11,0,8},{6,38,0},{42,0,18},{22,24,0},{22,8,4},{6,8,0},{42,0,18},{9,0,18},{6,8,0},{0,8,36},{9,0,18},{0,8,36},{22,0,26},{22,0,26},{22,0,26},{22,0,26},{6,24,1},{6,24,1},{6,24,1},{6,38,1},{6,38,4}, +{6,38,4},{38,12,44},{38,10,30},{38,40,46},{38,40,34},{38,26,53},{38,40,21},{38,40,9},{38,24,25},{6,10,57},{22,24,22},{8,10,21},{8,40,5},{8,40,5},{8,24,17},{28,0,51},{38,40,20},{38,40,8},{6,24,21},{37,0,51},{6,24,21},{38,26,26},{38,26,26},{38,26,26},{38,24,30},{38,10,9},{38,40,5},{38,40,5},{38,24,9},{22,24,11},{22,24,6},{8,40,1}, +{8,40,1},{8,40,1},{8,24,1},{38,10,8},{8,24,4},{8,24,4},{6,24,5},{23,0,8},{6,24,5},{16,7,18},{38,10,4},{8,40,4},{6,40,5},{16,7,18},{21,0,18},{6,40,5},{0,24,20},{21,0,18},{0,24,20},{38,0,26},{38,0,26},{38,0,26},{38,0,26},{38,40,1},{38,40,1},{38,40,1},{38,8,2},{22,24,2},{22,24,2},{24,12,38},{24,26,22},{24,10,30}, +{24,10,22},{8,28,52},{8,26,19},{8,10,6},{8,40,37},{38,26,55},{38,10,30},{24,12,22},{24,26,6},{24,10,14},{24,10,6},{42,6,51},{8,26,18},{24,10,3},{22,10,26},{19,0,51},{22,10,26},{24,26,21},{24,26,21},{24,26,21},{24,10,21},{8,42,9},{8,10,5},{8,10,5},{8,40,1},{38,10,8},{8,40,4},{24,26,5},{24,26,5},{24,26,5},{24,10,5},{28,2,8}, +{24,10,2},{24,10,2},{8,40,0},{42,8,8},{8,40,0},{26,8,18},{24,26,2},{40,10,2},{8,10,1},{26,8,18},{15,6,18},{8,10,1},{0,10,26},{15,6,18},{0,10,26},{24,0,20},{24,0,20},{24,0,20},{24,0,20},{8,26,1},{8,26,1},{8,26,1},{8,40,1},{38,10,4},{38,10,4},{40,14,40},{40,12,21},{40,42,33},{40,42,33},{40,28,55},{40,42,23},{40,42,18}, +{40,26,33},{8,12,57},{24,26,22},{10,12,21},{10,42,5},{10,42,5},{10,26,17},{30,2,51},{40,42,19},{24,42,14},{8,26,21},{35,2,51},{8,26,21},{40,12,21},{40,12,21},{40,12,21},{40,26,21},{40,12,14},{40,26,6},{40,26,6},{40,10,14},{8,42,10},{24,26,6},{10,42,1},{10,42,1},{10,42,1},{10,26,1},{16,3,8},{40,26,2},{40,26,2},{8,26,5},{17,0,8}, +{8,26,5},{16,1,18},{40,12,1},{10,42,4},{8,42,5},{16,1,18},{19,2,18},{8,42,5},{0,26,20},{19,2,18},{0,26,20},{40,0,20},{40,0,20},{40,0,20},{40,0,20},{40,26,5},{40,26,5},{40,26,5},{40,10,5},{24,26,2},{24,26,2},{26,14,38},{26,28,22},{26,12,30},{26,12,22},{10,30,52},{10,28,21},{10,12,6},{10,42,37},{24,28,53},{40,12,31},{26,14,22}, +{26,28,6},{26,12,14},{26,12,6},{47,2,51},{10,28,20},{26,12,3},{24,12,26},{17,2,51},{24,12,26},{26,28,21},{26,28,21},{26,28,21},{26,12,21},{10,44,9},{10,12,5},{10,12,5},{10,42,1},{40,12,9},{40,42,2},{26,28,5},{26,28,5},{26,28,5},{26,12,5},{30,4,8},{26,12,2},{26,12,2},{10,42,0},{15,8,8},{10,42,0},{15,0,18},{26,28,2},{42,12,5}, +{10,12,1},{15,0,18},{5,4,18},{10,12,1},{0,12,26},{5,4,18},{0,12,26},{26,0,20},{26,0,20},{26,0,20},{26,0,20},{10,28,1},{10,28,1},{10,28,1},{10,42,1},{40,42,2},{40,42,2},{42,47,46},{42,14,31},{12,44,37},{42,44,31},{42,46,52},{42,44,21},{42,44,6},{42,28,25},{10,14,51},{26,28,20},{12,30,18},{12,14,2},{12,44,1},{12,44,10},{29,0,51}, +{42,44,20},{42,44,5},{26,28,20},{37,6,51},{26,28,20},{42,30,26},{42,30,26},{42,30,26},{42,28,30},{42,14,9},{42,44,5},{42,44,5},{42,28,9},{10,44,9},{26,28,4},{12,44,0},{12,44,0},{12,44,0},{12,28,0},{45,0,8},{12,28,4},{12,28,4},{26,28,4},{27,8,8},{26,28,4},{45,2,18},{12,14,2},{12,44,1},{26,44,1},{45,2,18},{25,8,18},{26,44,1}, +{0,28,20},{25,8,18},{0,28,20},{42,0,26},{42,0,26},{42,0,26},{42,0,26},{42,44,1},{42,44,1},{42,44,1},{42,12,5},{26,28,0},{26,28,0},{28,47,38},{28,30,22},{28,14,33},{28,14,25},{12,31,55},{12,30,23},{12,14,18},{12,14,33},{42,30,55},{42,14,21},{28,47,22},{28,30,6},{44,14,14},{28,14,9},{46,10,51},{12,30,19},{28,14,6},{26,14,21},{23,8,51}, +{26,14,21},{28,30,21},{28,30,21},{28,30,21},{28,14,21},{12,46,14},{12,14,14},{12,14,14},{12,44,6},{42,14,10},{12,44,6},{28,30,5},{28,30,5},{28,30,5},{28,14,5},{29,2,8},{28,14,2},{28,14,2},{12,44,2},{35,6,8},{12,44,2},{8,7,18},{28,30,2},{44,14,5},{12,14,5},{8,7,18},{33,6,18},{12,14,5},{0,14,20},{33,6,18},{0,14,20},{28,0,20}, +{28,0,20},{28,0,20},{28,0,20},{12,30,5},{12,30,5},{12,30,5},{12,44,5},{42,14,1},{42,14,1},{44,15,46},{44,47,27},{14,46,37},{44,46,31},{44,31,53},{44,46,21},{44,46,6},{44,30,25},{42,47,55},{28,30,22},{14,31,20},{14,47,8},{14,46,1},{14,46,10},{38,3,51},{44,46,20},{44,46,5},{12,30,21},{35,8,51},{12,30,21},{44,31,26},{44,31,26},{44,31,26}, +{44,30,30},{44,47,11},{44,46,5},{44,46,5},{44,30,9},{12,46,9},{28,30,6},{14,46,0},{14,46,0},{14,46,0},{14,30,0},{43,2,8},{14,30,4},{14,30,4},{12,30,5},{21,8,8},{12,30,5},{41,0,18},{44,47,1},{14,46,1},{28,46,1},{41,0,18},{19,8,18},{28,46,1},{0,30,20},{19,8,18},{0,30,20},{44,0,26},{44,0,26},{44,0,26},{44,0,26},{44,46,1}, +{44,46,1},{44,46,1},{44,14,5},{28,30,2},{28,30,2},{30,45,38},{30,31,21},{30,47,24},{30,47,24},{14,29,55},{14,31,22},{14,47,8},{14,46,55},{28,31,53},{44,47,41},{30,45,22},{30,31,5},{30,47,8},{30,47,8},{41,2,51},{14,31,18},{14,47,4},{12,47,37},{17,8,51},{12,47,37},{30,31,20},{30,31,20},{30,31,20},{30,47,24},{14,15,14},{14,47,8},{14,47,8}, +{14,46,6},{44,47,9},{14,46,6},{30,31,4},{30,31,4},{30,31,4},{30,47,8},{38,1,8},{14,47,4},{14,47,4},{14,46,2},{43,28,8},{14,46,2},{9,0,18},{30,31,1},{46,47,1},{14,47,0},{9,0,18},{0,9,18},{14,47,0},{0,47,36},{0,9,18},{0,47,36},{30,0,20},{30,0,20},{30,0,20},{30,0,20},{14,31,4},{14,31,4},{14,31,4},{14,46,5},{14,46,5}, +{14,46,5},{46,43,54},{46,45,41},{47,15,55},{46,15,44},{46,13,51},{46,15,20},{46,15,8},{46,31,24},{14,45,56},{30,31,21},{47,29,21},{47,15,6},{47,15,6},{47,31,17},{23,0,51},{46,15,20},{46,15,8},{30,31,20},{18,9,51},{30,31,20},{46,13,37},{46,13,37},{46,13,37},{46,15,40},{46,45,8},{46,15,4},{46,15,4},{46,31,8},{14,15,14},{30,31,5},{47,15,2}, +{47,15,2},{47,15,2},{47,31,1},{39,0,8},{46,15,4},{46,15,4},{30,31,4},{9,28,8},{30,31,4},{39,2,18},{47,15,5},{47,15,5},{30,15,4},{39,2,18},{34,9,18},{30,15,4},{0,31,20},{34,9,18},{0,31,20},{46,0,36},{46,0,36},{46,0,36},{46,0,36},{46,15,0},{46,15,0},{46,15,0},{46,47,1},{30,31,1},{30,31,1},{31,43,38},{31,29,22},{31,45,25}, +{31,45,22},{47,43,53},{47,29,19},{47,45,6},{47,15,37},{46,29,52},{46,45,27},{31,43,22},{31,29,6},{31,45,9},{31,45,6},{9,6,51},{47,29,18},{47,45,5},{30,45,26},{32,7,51},{30,45,26},{31,13,21},{31,13,21},{31,13,21},{31,45,21},{47,13,9},{47,45,5},{47,45,5},{47,15,1},{46,45,10},{46,15,8},{31,13,5},{31,13,5},{31,13,5},{31,45,5},{23,2,8}, +{31,15,4},{31,15,4},{47,15,0},{20,9,8},{47,15,0},{47,11,18},{31,29,2},{15,45,5},{47,45,1},{47,11,18},{22,9,18},{47,45,1},{0,45,26},{22,9,18},{0,45,26},{31,0,20},{31,0,20},{31,0,20},{31,0,20},{47,29,1},{47,29,1},{47,29,1},{47,15,1},{46,45,1},{46,45,1},{15,11,38},{15,43,21},{15,13,33},{15,13,33},{15,27,55},{15,13,23},{15,13,18}, +{15,29,33},{47,13,59},{31,29,22},{45,27,21},{45,13,6},{45,13,6},{45,29,17},{21,2,51},{15,13,19},{15,13,14},{31,29,21},{24,9,51},{31,29,21},{15,27,21},{15,27,21},{15,27,21},{15,29,21},{15,13,14},{15,29,6},{15,29,6},{15,45,14},{47,13,10},{31,29,6},{45,13,2},{45,13,2},{45,13,2},{45,29,1},{37,2,8},{15,29,2},{15,29,2},{31,29,5},{34,7,8}, +{31,29,5},{35,0,18},{15,43,1},{45,13,5},{31,13,5},{35,0,18},{36,7,18},{31,13,5},{0,29,20},{36,7,18},{0,29,20},{15,0,20},{15,0,20},{15,0,20},{15,0,20},{15,13,5},{15,13,5},{15,13,5},{15,45,5},{31,29,2},{31,29,2},{29,41,38},{29,27,20},{29,43,25},{29,43,25},{45,41,53},{45,27,21},{45,43,6},{45,13,37},{31,27,53},{15,43,31},{29,41,22}, +{29,27,4},{29,43,9},{29,43,9},{35,2,51},{45,27,20},{45,43,5},{31,43,26},{34,5,51},{31,43,26},{29,27,20},{29,27,20},{29,27,20},{29,13,24},{45,11,9},{45,43,5},{45,43,5},{45,13,1},{15,43,9},{15,13,2},{29,27,4},{29,27,4},{29,27,4},{29,13,8},{44,1,8},{29,13,4},{29,13,4},{45,13,0},{26,9,8},{45,13,0},{3,0,18},{29,27,0},{13,43,5}, +{45,43,1},{3,0,18},{0,3,18},{45,43,1},{0,43,26},{0,3,18},{0,43,26},{29,0,20},{29,0,20},{29,0,20},{29,0,20},{45,27,1},{45,27,1},{45,27,1},{45,13,1},{15,13,2},{15,13,2},{13,9,46},{13,41,31},{43,11,37},{13,11,31},{13,25,53},{13,11,21},{13,11,6},{13,27,30},{29,11,56},{29,27,22},{43,25,18},{43,41,2},{43,11,1},{43,27,16},{17,0,51}, +{13,11,20},{13,11,5},{29,27,21},{18,3,51},{29,27,21},{13,25,26},{13,25,26},{13,25,26},{13,27,27},{13,41,9},{13,27,3},{13,27,3},{13,27,14},{45,11,9},{29,27,6},{43,11,0},{43,11,0},{43,11,0},{43,27,0},{31,5,8},{13,27,2},{13,27,2},{29,27,5},{14,9,8},{29,27,5},{31,3,18},{43,41,2},{43,11,1},{29,11,1},{31,3,18},{34,3,18},{29,11,1}, +{0,27,20},{34,3,18},{0,27,20},{13,0,26},{13,0,26},{13,0,26},{13,0,26},{13,11,1},{13,11,1},{13,11,1},{13,43,5},{29,27,2},{29,27,2},{27,39,38},{27,25,22},{27,41,33},{27,41,25},{43,23,55},{43,25,23},{43,41,18},{43,41,33},{13,25,55},{13,41,21},{27,39,22},{27,25,6},{11,41,14},{27,41,9},{27,9,51},{43,25,19},{27,41,6},{13,41,21},{14,7,51}, +{13,41,21},{27,9,21},{27,9,21},{27,9,21},{27,41,21},{43,9,14},{43,25,14},{43,25,14},{43,11,5},{13,41,10},{43,11,5},{27,9,5},{27,9,5},{27,9,5},{27,41,5},{17,2,8},{27,41,2},{27,41,2},{43,11,1},{16,1,8},{43,11,1},{43,7,18},{27,25,2},{11,41,5},{27,41,5},{43,7,18},{18,1,18},{27,41,5},{0,41,20},{18,1,18},{0,41,20},{27,0,20}, +{27,0,20},{27,0,20},{27,0,20},{43,9,5},{43,9,5},{43,9,5},{43,11,4},{13,41,1},{13,41,1},{11,7,46},{11,39,30},{41,9,37},{11,9,31},{11,23,53},{11,9,21},{11,9,6},{11,25,30},{13,39,55},{27,25,22},{41,23,18},{41,9,4},{41,9,1},{41,25,16},{17,6,51},{11,9,20},{11,9,5},{27,25,21},{20,1,51},{27,25,21},{11,23,26},{11,23,26},{11,23,26}, +{11,25,27},{11,39,9},{11,25,3},{11,25,3},{11,25,14},{43,9,9},{27,25,6},{41,9,0},{41,9,0},{41,9,0},{41,25,0},{29,3,8},{11,25,2},{11,25,2},{27,25,5},{43,9,8},{27,25,5},{29,1,18},{41,9,4},{41,9,1},{27,9,1},{29,1,18},{36,1,18},{27,9,1},{0,25,20},{36,1,18},{0,25,20},{11,0,26},{11,0,26},{11,0,26},{11,0,26},{11,9,1}, +{11,9,1},{11,9,1},{11,41,2},{27,25,2},{27,25,2},{25,37,38},{25,23,22},{25,39,25},{25,39,25},{41,21,55},{41,23,23},{41,39,9},{41,39,46},{27,23,53},{11,39,30},{25,37,22},{25,23,6},{25,39,9},{25,39,9},{27,3,51},{41,23,19},{41,39,5},{27,39,26},{39,9,51},{27,39,26},{25,7,21},{25,7,21},{25,7,21},{25,9,24},{41,7,14},{41,39,8},{41,39,8}, +{41,9,5},{11,39,8},{41,9,5},{25,7,5},{25,7,5},{25,7,5},{25,9,8},{39,11,8},{25,9,4},{25,9,4},{41,9,1},{22,1,8},{41,9,1},{41,5,18},{25,23,2},{9,39,2},{41,39,1},{41,5,18},{23,9,18},{41,39,1},{0,39,26},{23,9,18},{0,39,26},{25,0,20},{25,0,20},{25,0,20},{25,0,20},{41,7,5},{41,7,5},{41,7,5},{41,9,4},{41,9,4}, +{41,9,4},{39,21,54},{39,7,40},{39,7,37},{9,7,41},{9,5,51},{9,7,20},{9,7,5},{9,23,30},{41,37,51},{25,23,26},{39,21,18},{39,7,4},{39,7,1},{39,7,17},{11,1,51},{9,7,20},{9,7,5},{25,23,26},{5,9,51},{25,23,26},{39,7,36},{39,7,36},{39,7,36},{39,23,36},{9,21,10},{9,7,4},{9,7,4},{9,23,5},{41,7,12},{25,23,1},{39,7,0}, +{39,7,0},{39,7,0},{39,23,0},{25,5,8},{9,7,4},{9,7,4},{25,23,1},{10,1,8},{25,23,1},{25,3,18},{39,7,4},{39,7,1},{25,7,1},{25,3,18},{12,1,18},{25,7,1},{0,23,26},{12,1,18},{0,23,26},{9,0,36},{9,0,36},{9,0,36},{9,0,36},{9,7,0},{9,7,0},{9,7,0},{9,23,4},{25,23,0},{25,23,0},{23,19,38},{23,5,23},{23,21,28}, +{23,37,23},{39,19,55},{39,21,25},{39,37,13},{39,37,33},{9,21,53},{9,37,22},{7,21,27},{23,5,14},{7,37,14},{23,37,14},{23,5,51},{39,21,21},{23,37,6},{9,37,21},{14,1,51},{9,37,21},{23,5,19},{23,5,19},{23,5,19},{23,37,19},{39,5,14},{39,21,9},{39,21,9},{39,7,5},{9,37,11},{39,7,5},{23,5,10},{23,5,10},{23,5,10},{7,7,10},{9,3,8}, +{23,37,2},{23,37,2},{39,7,1},{31,3,8},{39,7,1},{37,7,18},{23,5,5},{7,37,5},{23,37,5},{37,7,18},{30,1,18},{23,37,5},{0,37,20},{30,1,18},{0,37,20},{23,0,18},{23,0,18},{23,0,18},{23,0,18},{39,5,5},{39,5,5},{39,5,5},{39,7,4},{9,37,2},{9,37,2},{7,33,44},{7,35,31},{7,5,36},{7,5,31},{7,19,53},{7,5,22},{7,5,7}, +{7,21,31},{39,35,57},{23,21,27},{37,19,20},{37,5,4},{37,5,1},{37,5,17},{23,17,51},{7,5,21},{7,5,6},{23,21,26},{45,17,51},{23,21,26},{7,19,26},{7,19,26},{7,19,26},{7,5,30},{7,35,9},{7,5,6},{7,5,6},{7,21,6},{39,5,9},{23,21,2},{37,5,0},{37,5,0},{37,5,0},{37,21,0},{23,3,8},{37,21,4},{37,21,4},{23,21,1},{43,3,8}, +{23,21,1},{23,1,18},{37,5,4},{37,5,1},{23,5,1},{23,1,18},{33,7,18},{23,5,1},{0,21,26},{33,7,18},{0,21,26},{7,0,26},{7,0,26},{7,0,26},{7,0,26},{7,5,2},{7,5,2},{7,5,2},{7,37,2},{23,21,1},{23,21,1},{21,17,38},{21,19,20},{21,35,31},{21,35,23},{37,17,55},{37,19,22},{37,35,13},{37,35,33},{23,19,56},{7,35,21},{5,19,27}, +{21,19,11},{5,35,14},{21,35,14},{21,3,51},{37,19,18},{21,35,6},{7,35,21},{43,1,51},{7,35,21},{21,3,19},{21,3,19},{21,3,19},{21,35,19},{37,3,14},{37,35,9},{37,35,9},{37,5,5},{7,35,10},{37,5,5},{21,3,10},{21,3,10},{21,3,10},{5,5,10},{3,25,8},{21,35,2},{21,35,2},{37,5,1},{29,1,8},{37,5,1},{35,5,18},{21,19,2},{5,35,5}, +{21,35,5},{35,5,18},{27,1,18},{21,35,5},{0,35,20},{27,1,18},{0,35,20},{21,0,18},{21,0,18},{21,0,18},{21,0,18},{37,19,4},{37,19,4},{37,19,4},{37,5,4},{7,35,1},{7,35,1},{35,17,44},{35,33,27},{35,3,27},{35,3,35},{5,1,51},{5,3,26},{5,3,1},{5,19,30},{37,33,51},{21,19,26},{35,17,19},{35,33,2},{35,3,2},{35,3,10},{5,1,51}, +{5,3,26},{5,3,1},{21,19,26},{9,1,51},{21,19,26},{35,3,27},{35,3,27},{35,3,27},{35,19,27},{5,17,10},{5,3,1},{5,3,1},{5,19,5},{37,3,12},{21,19,1},{35,3,2},{35,3,2},{35,3,2},{35,19,2},{19,5,8},{5,3,1},{5,3,1},{21,19,1},{37,3,8},{21,19,1},{19,3,18},{35,33,1},{35,3,1},{5,3,1},{19,3,18},{39,1,18},{5,3,1}, +{0,19,26},{39,1,18},{0,19,26},{35,0,26},{35,0,26},{35,0,26},{35,0,26},{5,3,0},{5,3,0},{5,3,0},{5,19,4},{21,19,0},{21,19,0},{19,1,54},{19,1,22},{19,17,28},{19,33,27},{19,1,61},{35,17,30},{19,33,19},{35,33,31},{5,17,52},{5,33,20},{3,1,24},{3,17,8},{3,33,13},{3,33,17},{17,5,51},{35,17,21},{19,33,10},{5,33,19},{5,17,51}, +{5,33,19},{19,1,18},{19,1,18},{19,1,18},{19,33,18},{19,17,19},{19,33,10},{19,33,10},{19,3,14},{5,33,11},{35,3,6},{3,33,4},{3,33,4},{3,33,4},{3,3,8},{3,3,8},{19,33,1},{19,33,1},{19,3,5},{3,3,8},{19,3,5},{33,33,18},{19,1,4},{3,33,9},{19,33,9},{33,33,18},{33,33,18},{19,33,9},{0,33,18},{33,33,18},{0,33,18},{19,0,18}, +{19,0,18},{19,0,18},{19,0,18},{35,1,9},{35,1,9},{35,1,9},{19,3,10},{35,3,2},{35,3,2},{33,1,76},{33,1,36},{33,1,27},{33,1,35},{33,1,84},{3,1,26},{3,1,1},{3,17,30},{19,1,56},{19,17,27},{17,1,43},{33,1,11},{33,1,2},{33,1,10},{1,3,51},{3,1,26},{3,1,1},{19,17,26},{3,1,51},{19,17,26},{33,1,27},{33,1,27},{33,1,27}, +{33,17,26},{3,1,16},{3,1,1},{3,1,1},{3,17,5},{35,1,12},{19,17,2},{33,1,2},{33,1,2},{33,1,2},{33,17,1},{17,3,8},{3,1,1},{3,1,1},{19,17,1},{35,1,8},{19,17,1},{17,1,18},{17,1,10},{33,1,1},{19,1,0},{17,1,18},{33,1,18},{19,1,0},{0,17,26},{33,1,18},{0,17,26},{33,0,26},{33,0,26},{33,0,26},{33,0,26},{3,1,0}, +{3,1,0},{3,1,0},{3,17,4},{19,17,1},{19,17,1},{17,1,36},{17,1,28},{17,1,27},{17,1,19},{17,1,28},{17,1,12},{17,1,11},{17,1,10},{33,1,20},{33,1,2},{1,1,4},{1,1,4},{1,1,4},{1,1,4},{1,17,3},{17,1,3},{17,1,2},{17,1,1},{17,1,3},{17,1,1},{17,1,27},{17,1,27},{17,1,27},{17,1,19},{17,1,19},{17,1,11},{17,1,11}, +{17,1,10},{33,1,11},{33,1,2},{1,1,4},{1,1,4},{1,1,4},{1,1,4},{1,17,2},{17,1,2},{17,1,2},{17,1,1},{17,1,2},{17,1,1},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{17,0,18},{17,0,18},{17,0,18},{17,0,18},{17,1,10},{17,1,10},{17,1,10},{17,1,10},{33,1,2}, +{33,1,2},{0,4,74},{0,18,10},{0,2,1},{0,2,26},{0,34,154},{0,2,99},{0,32,49},{0,32,121},{0,32,162},{0,32,130},{0,4,74},{0,18,10},{0,2,1},{0,2,26},{32,0,153},{0,2,99},{0,32,49},{0,32,121},{0,32,153},{0,32,121},{0,2,0},{0,2,0},{0,2,0},{0,16,4},{0,32,13},{0,16,5},{0,16,5},{0,16,9},{0,16,14},{0,16,10},{0,2,0}, +{0,2,0},{0,2,0},{0,16,4},{16,0,13},{0,16,5},{0,16,5},{0,16,9},{32,0,13},{0,16,9},{32,32,72},{0,18,10},{0,2,1},{0,2,26},{32,32,72},{32,32,72},{0,2,26},{0,32,72},{32,32,72},{0,32,72},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,38,81},{0,20,10},{16,34,34}, +{0,18,26},{0,36,243},{0,34,99},{0,18,26},{0,2,139},{0,18,279},{0,2,164},{16,6,76},{16,4,8},{16,34,9},{16,18,24},{18,0,243},{0,34,99},{0,18,26},{0,2,139},{36,0,243},{0,2,139},{0,20,10},{0,20,10},{0,20,10},{0,18,10},{0,34,50},{0,18,10},{0,18,10},{0,32,20},{0,32,61},{0,32,29},{16,4,4},{16,4,4},{16,4,4},{16,2,5},{16,2,50}, +{0,18,10},{0,18,10},{0,32,20},{34,0,50},{0,32,20},{18,2,72},{0,20,1},{16,34,5},{0,18,17},{18,2,72},{38,0,72},{0,18,17},{0,18,80},{38,0,72},{0,18,80},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,32,0},{0,32,0},{0,32,0},{0,16,0},{0,16,1},{0,16,1},{16,24,135},{16,6,66},{16,20,122},{16,4,66},{0,8,244},{0,20,81},{0,4,2}, +{0,34,121},{0,4,344},{0,34,185},{32,8,74},{32,36,1},{32,4,17},{32,4,17},{16,38,243},{0,20,81},{0,4,2},{0,34,121},{38,16,243},{0,34,121},{16,22,61},{16,22,61},{16,22,61},{16,4,65},{0,6,52},{0,4,1},{0,4,1},{0,18,9},{0,18,94},{0,18,45},{32,36,0},{32,36,0},{32,36,0},{32,34,0},{2,2,50},{0,4,1},{0,4,1},{0,18,9},{2,2,50}, +{0,18,9},{20,16,72},{32,36,1},{2,4,1},{0,4,1},{20,16,72},{16,20,72},{0,4,1},{0,34,72},{16,20,72},{0,34,72},{16,0,61},{16,0,61},{16,0,61},{16,0,61},{0,4,1},{0,4,1},{0,4,1},{0,2,1},{0,32,25},{0,32,25},{32,26,152},{2,22,91},{2,36,119},{32,20,89},{16,10,247},{16,6,78},{16,36,13},{16,4,110},{0,36,293},{0,4,103},{18,8,76}, +{18,6,8},{18,36,9},{2,20,19},{20,2,243},{16,6,74},{32,20,9},{0,4,94},{42,0,243},{0,4,94},{32,8,81},{32,8,81},{32,8,81},{32,20,80},{16,8,54},{16,36,9},{16,36,9},{16,34,8},{0,4,77},{0,34,6},{18,6,4},{18,6,4},{18,6,4},{18,4,5},{18,4,50},{32,20,0},{32,20,0},{16,34,4},{36,2,50},{16,34,4},{18,8,72},{2,22,1},{18,36,5}, +{0,36,4},{18,8,72},{44,0,72},{0,36,4},{0,4,90},{44,0,72},{0,4,90},{32,0,80},{32,0,80},{32,0,80},{32,0,80},{16,6,4},{16,6,4},{16,6,4},{16,34,4},{0,34,2},{0,34,2},{18,42,146},{18,8,83},{18,22,139},{18,6,79},{2,26,243},{2,38,78},{2,6,9},{2,36,110},{0,38,252},{0,36,79},{34,10,73},{34,8,6},{4,6,21},{34,6,14},{38,0,243}, +{2,38,78},{18,6,6},{0,36,75},{30,0,243},{0,36,75},{18,24,75},{18,24,75},{18,24,75},{18,6,75},{2,24,50},{2,6,5},{2,6,5},{2,20,4},{0,6,53},{16,20,2},{34,8,2},{34,8,2},{34,8,2},{34,36,2},{2,24,50},{18,6,2},{18,6,2},{16,20,1},{28,0,50},{16,20,1},{36,6,72},{34,8,5},{4,6,5},{2,6,5},{36,6,72},{31,0,72},{2,6,5}, +{0,36,74},{31,0,72},{0,36,74},{18,0,74},{18,0,74},{18,0,74},{18,0,74},{2,22,0},{2,22,0},{2,22,0},{2,4,4},{16,20,2},{16,20,2},{4,12,154},{4,24,85},{4,38,106},{34,38,95},{18,12,253},{18,8,90},{34,38,15},{18,6,122},{0,24,244},{16,22,93},{20,10,76},{20,8,9},{20,38,6},{20,22,24},{24,0,243},{18,8,81},{34,38,6},{0,22,80},{32,8,243}, +{0,22,80},{4,40,80},{4,40,80},{4,40,80},{4,22,80},{34,24,59},{34,22,10},{34,22,10},{18,36,19},{32,22,53},{32,6,5},{20,8,5},{20,8,5},{20,8,5},{20,6,8},{22,2,50},{34,22,1},{34,22,1},{32,6,4},{42,2,50},{32,6,4},{24,2,72},{20,8,5},{20,38,2},{18,38,1},{24,2,72},{13,0,72},{18,38,1},{0,22,80},{13,0,72},{0,22,80},{4,0,80}, +{4,0,80},{4,0,80},{4,0,80},{34,22,9},{34,22,9},{34,22,9},{18,36,10},{32,6,1},{32,6,1},{20,28,146},{20,10,77},{20,8,122},{20,8,77},{4,28,243},{4,40,78},{4,8,5},{4,38,110},{32,40,244},{2,38,79},{36,12,73},{36,40,2},{6,8,18},{36,8,18},{38,6,243},{34,40,73},{4,8,5},{2,38,75},{27,0,243},{2,38,75},{20,26,73},{20,26,73},{20,26,73}, +{20,8,76},{4,26,50},{4,8,4},{4,8,4},{4,22,4},{2,8,50},{18,22,2},{36,40,1},{36,40,1},{36,40,1},{36,38,2},{8,2,50},{4,8,4},{4,8,4},{18,22,1},{30,2,50},{18,22,1},{16,11,72},{36,40,1},{6,8,2},{4,8,1},{16,11,72},{25,0,72},{4,8,1},{0,38,74},{25,0,72},{0,38,74},{20,0,72},{20,0,72},{20,0,72},{20,0,72},{4,24,0}, +{4,24,0},{4,24,0},{4,6,4},{18,22,2},{18,22,2},{6,14,154},{6,26,85},{6,40,109},{6,24,97},{20,14,248},{20,10,78},{20,40,13},{20,8,110},{2,26,244},{34,8,91},{22,12,76},{22,10,9},{22,40,9},{22,24,29},{26,2,243},{20,10,74},{20,40,9},{2,24,80},{38,8,243},{2,24,80},{6,42,80},{6,42,80},{6,42,80},{6,24,81},{20,12,54},{20,40,9},{20,40,9}, +{20,38,13},{34,24,53},{34,8,10},{22,10,5},{22,10,5},{22,10,5},{22,8,5},{24,4,50},{36,24,1},{36,24,1},{34,8,9},{11,0,50},{34,8,9},{26,4,72},{22,10,5},{22,40,5},{4,40,5},{26,4,72},{7,0,72},{4,40,5},{0,24,80},{7,0,72},{0,24,80},{6,0,80},{6,0,80},{6,0,80},{6,0,80},{20,10,4},{20,10,4},{20,10,4},{20,38,4},{34,8,1}, +{34,8,1},{22,46,146},{22,12,83},{38,26,122},{22,10,79},{6,30,245},{6,42,74},{6,10,10},{6,40,111},{4,42,247},{20,40,79},{38,14,76},{38,12,5},{8,10,21},{38,10,17},{44,0,243},{6,42,73},{6,10,9},{4,40,74},{26,8,243},{4,40,74},{22,28,75},{22,28,75},{22,28,75},{22,10,75},{6,28,51},{6,10,6},{6,10,6},{6,24,6},{4,10,53},{20,24,6},{38,12,4}, +{38,12,4},{38,12,4},{38,40,5},{38,10,50},{22,40,4},{22,40,4},{36,24,1},{23,0,50},{36,24,1},{42,6,72},{38,12,1},{8,10,5},{6,10,5},{42,6,72},{19,0,72},{6,10,5},{0,40,74},{19,0,72},{0,40,74},{22,0,74},{22,0,74},{22,0,74},{22,0,74},{6,26,2},{6,26,2},{6,26,2},{6,8,5},{20,24,5},{20,24,5},{8,46,154},{8,28,85},{8,42,106}, +{8,42,97},{38,46,247},{38,12,82},{38,42,10},{22,26,119},{4,28,247},{36,26,83},{24,14,75},{24,28,6},{24,42,6},{24,26,30},{30,0,243},{38,12,78},{38,42,6},{4,26,75},{33,0,243},{4,26,75},{8,44,80},{8,44,80},{8,44,80},{8,26,80},{38,28,56},{38,26,9},{38,26,9},{38,40,14},{20,42,54},{6,10,9},{24,12,1},{24,12,1},{24,12,1},{24,10,5},{28,2,50}, +{38,26,5},{38,26,5},{6,10,5},{42,8,50},{6,10,5},{30,2,72},{8,28,5},{24,42,5},{22,42,1},{30,2,72},{35,2,72},{22,42,1},{0,26,74},{35,2,72},{0,26,74},{8,0,80},{8,0,80},{8,0,80},{8,0,80},{38,26,5},{38,26,5},{38,26,5},{38,40,5},{6,40,4},{6,40,4},{24,15,146},{24,14,83},{24,28,126},{24,12,79},{8,31,243},{8,44,78},{8,12,9}, +{8,42,110},{36,44,245},{22,42,79},{40,46,75},{40,14,6},{10,12,21},{40,12,14},{47,0,243},{8,44,78},{8,12,9},{6,42,74},{27,6,243},{6,42,74},{24,30,75},{24,30,75},{24,30,75},{24,12,75},{8,14,52},{8,12,5},{8,12,5},{8,26,5},{6,12,53},{22,26,6},{40,14,2},{40,14,2},{40,14,2},{40,42,1},{16,3,50},{24,42,4},{24,42,4},{38,26,1},{17,0,50}, +{38,26,1},{47,2,72},{40,14,5},{10,12,5},{8,12,5},{47,2,72},{17,2,72},{8,12,5},{0,42,74},{17,2,72},{0,42,74},{24,0,74},{24,0,74},{24,0,74},{24,0,74},{8,28,0},{8,28,0},{8,28,0},{8,10,1},{22,26,5},{22,26,5},{10,15,152},{10,30,85},{10,44,106},{40,44,95},{40,31,252},{24,14,90},{40,44,15},{24,12,122},{6,30,247},{38,28,89},{26,47,73}, +{26,30,6},{26,44,6},{26,28,21},{34,3,243},{8,30,76},{40,44,6},{6,28,80},{35,4,243},{6,28,80},{10,46,80},{10,46,80},{10,46,80},{10,28,81},{40,30,59},{40,28,10},{40,28,10},{24,42,18},{22,44,51},{8,12,9},{26,14,1},{26,14,1},{26,14,1},{26,12,5},{30,4,50},{40,28,1},{40,28,1},{8,12,5},{15,8,50},{8,12,5},{15,2,72},{10,30,5},{26,44,5}, +{24,44,1},{15,2,72},{33,4,72},{24,44,1},{0,28,80},{33,4,72},{0,28,80},{10,0,80},{10,0,80},{10,0,80},{10,0,80},{40,28,9},{40,28,9},{40,28,9},{24,42,9},{8,12,5},{8,12,5},{26,13,152},{26,47,89},{42,30,122},{26,14,89},{10,29,245},{10,46,74},{10,30,15},{10,44,106},{8,46,247},{24,44,81},{42,45,76},{42,47,9},{12,30,18},{42,14,17},{4,1,243}, +{10,46,73},{26,14,10},{8,44,80},{1,4,243},{8,44,80},{26,15,80},{26,15,80},{26,15,80},{26,14,80},{10,31,51},{10,30,6},{10,30,6},{10,28,6},{24,14,54},{24,28,6},{42,47,5},{42,47,5},{42,47,5},{42,44,5},{45,0,50},{26,14,1},{26,14,1},{40,28,1},{27,8,50},{40,28,1},{46,10,72},{12,46,5},{12,30,9},{26,14,9},{46,10,72},{23,8,72},{26,14,9}, +{0,44,80},{23,8,72},{0,44,80},{26,0,80},{26,0,80},{26,0,80},{26,0,80},{10,30,2},{10,30,2},{10,30,2},{10,12,5},{24,44,1},{24,44,1},{12,13,148},{12,31,79},{12,46,110},{12,46,83},{42,29,248},{42,47,85},{42,46,5},{26,30,126},{8,31,247},{40,30,83},{28,29,75},{28,47,5},{28,46,5},{28,46,26},{27,0,243},{26,31,80},{42,46,1},{8,30,75},{37,8,243}, +{8,30,75},{12,15,74},{12,15,74},{12,15,74},{12,30,74},{42,15,56},{42,46,5},{42,46,5},{26,14,21},{24,46,56},{10,14,2},{28,47,1},{28,47,1},{28,47,1},{28,14,5},{29,2,50},{42,46,1},{42,46,1},{10,14,1},{35,6,50},{10,14,1},{38,3,72},{28,47,4},{44,46,1},{26,46,1},{38,3,72},{35,8,72},{26,46,1},{0,30,74},{35,8,72},{0,30,74},{12,0,74}, +{12,0,74},{12,0,74},{12,0,74},{42,46,4},{42,46,4},{42,46,4},{42,44,5},{10,14,1},{10,14,1},{28,11,146},{28,45,84},{44,47,121},{28,47,79},{12,27,244},{12,15,79},{12,47,6},{12,46,106},{40,15,244},{26,46,81},{44,43,76},{44,15,6},{14,47,11},{44,47,14},{28,13,243},{42,15,76},{12,47,5},{10,46,80},{27,12,243},{10,46,80},{28,13,75},{28,13,75},{28,13,75}, +{28,47,78},{12,29,51},{12,47,5},{12,47,5},{12,30,6},{10,47,51},{26,30,6},{44,15,5},{44,15,5},{44,15,5},{44,46,5},{43,2,50},{12,47,4},{12,47,4},{42,30,1},{21,8,50},{42,30,1},{41,2,72},{44,15,2},{14,47,2},{12,47,1},{41,2,72},{17,8,72},{12,47,1},{0,46,80},{17,8,72},{0,46,80},{28,0,74},{28,0,74},{28,0,74},{28,0,74},{12,31,1}, +{12,31,1},{12,31,1},{12,14,5},{26,46,1},{26,46,1},{14,11,148},{14,29,79},{14,15,114},{14,31,90},{44,27,248},{28,45,84},{28,15,18},{28,47,115},{10,29,247},{42,47,91},{30,27,75},{30,45,5},{30,15,6},{30,31,21},{40,3,243},{28,45,75},{28,15,9},{10,31,80},{16,9,243},{10,31,80},{14,13,74},{14,13,74},{14,13,74},{14,31,74},{44,13,56},{44,31,6},{44,31,6}, +{44,46,21},{42,31,52},{42,47,10},{30,45,1},{30,45,1},{30,45,1},{30,47,2},{38,1,50},{44,31,2},{44,31,2},{42,47,9},{43,28,50},{42,47,9},{29,12,72},{30,45,4},{30,15,5},{12,15,5},{29,12,72},{26,13,72},{12,15,5},{0,31,80},{26,13,72},{0,31,80},{14,0,74},{14,0,74},{14,0,74},{14,0,74},{44,15,5},{44,15,5},{44,15,5},{44,46,5},{42,47,1}, +{42,47,1},{30,9,154},{46,43,91},{46,29,115},{30,45,85},{14,9,247},{14,13,77},{14,29,18},{14,15,114},{12,13,247},{28,15,79},{46,25,83},{46,43,10},{47,45,21},{46,45,17},{10,1,243},{14,13,73},{30,45,6},{12,15,74},{4,9,243},{12,15,74},{30,11,80},{30,11,80},{30,11,80},{30,45,81},{14,11,56},{14,29,9},{14,29,9},{14,31,6},{12,45,56},{44,31,5},{46,43,9}, +{46,43,9},{46,43,9},{46,15,10},{39,0,50},{30,45,2},{30,45,2},{44,31,1},{9,28,50},{44,31,1},{9,6,72},{46,43,1},{47,45,5},{14,45,5},{9,6,72},{32,7,72},{14,45,5},{0,15,74},{32,7,72},{0,15,74},{30,0,80},{30,0,80},{30,0,80},{30,0,80},{14,13,5},{14,13,5},{14,13,5},{14,31,5},{44,31,4},{44,31,4},{47,9,154},{47,27,81},{47,13,106}, +{47,13,97},{46,9,244},{46,43,79},{46,13,6},{46,45,121},{12,27,248},{44,29,84},{31,25,75},{31,27,6},{31,13,6},{31,29,30},{21,0,243},{46,43,78},{46,13,5},{12,29,75},{18,7,243},{12,29,75},{47,11,80},{47,11,80},{47,11,80},{47,29,80},{46,11,51},{46,13,5},{46,13,5},{46,15,11},{28,13,54},{14,45,6},{31,43,1},{31,43,1},{31,43,1},{31,29,5},{23,2,50}, +{46,13,4},{46,13,4},{14,45,5},{20,9,50},{14,45,5},{21,2,72},{47,27,1},{15,13,5},{30,13,1},{21,2,72},{24,9,72},{30,13,1},{0,29,74},{24,9,72},{0,29,74},{47,0,80},{47,0,80},{47,0,80},{47,0,80},{46,13,1},{46,13,1},{46,13,1},{46,15,2},{14,45,2},{14,45,2},{31,7,146},{31,41,83},{31,27,126},{31,43,79},{47,23,243},{47,11,78},{47,43,5}, +{47,13,110},{14,11,247},{30,13,79},{15,9,75},{15,11,2},{15,27,21},{15,43,14},{14,3,243},{47,11,78},{47,43,5},{14,13,74},{32,5,243},{14,13,74},{31,9,75},{31,9,75},{31,9,75},{31,43,75},{47,25,52},{47,43,1},{47,43,1},{47,29,5},{14,43,56},{46,29,5},{15,11,1},{15,11,1},{15,11,1},{15,13,1},{37,2,50},{47,43,1},{47,43,1},{46,29,1},{34,7,50}, +{46,29,1},{35,2,72},{15,11,1},{45,43,5},{47,43,4},{35,2,72},{34,5,72},{47,43,4},{0,13,74},{34,5,72},{0,13,74},{31,0,74},{31,0,74},{31,0,74},{31,0,74},{47,27,1},{47,27,1},{47,27,1},{47,45,1},{46,29,4},{46,29,4},{45,7,152},{45,25,81},{45,11,106},{45,11,97},{15,23,253},{31,25,90},{31,11,15},{31,43,122},{14,25,247},{46,27,89},{29,39,73}, +{29,25,6},{29,11,6},{29,27,26},{46,3,243},{47,25,76},{31,11,6},{14,27,80},{16,3,243},{14,27,80},{45,9,80},{45,9,80},{45,9,80},{45,27,81},{15,25,59},{15,27,10},{15,27,10},{31,13,18},{30,11,54},{46,43,9},{29,41,1},{29,41,1},{29,41,1},{29,43,2},{44,1,50},{15,27,1},{15,27,1},{46,43,5},{26,9,50},{46,43,5},{46,1,72},{45,25,1},{13,11,5}, +{31,11,2},{46,1,72},{26,7,72},{31,11,2},{0,27,80},{26,7,72},{0,27,80},{45,0,80},{45,0,80},{45,0,80},{45,0,80},{15,27,9},{15,27,9},{15,27,9},{31,13,9},{47,13,5},{47,13,5},{29,5,154},{29,39,89},{13,25,122},{29,41,89},{45,21,245},{45,9,74},{45,41,15},{45,11,106},{47,9,247},{31,11,85},{13,37,76},{13,9,9},{43,25,18},{13,41,17},{1,0,243}, +{45,9,73},{29,41,10},{47,11,80},{0,1,243},{47,11,80},{29,7,80},{29,7,80},{29,7,80},{29,41,80},{45,23,51},{45,41,6},{45,41,6},{45,27,6},{31,41,54},{31,27,6},{13,9,5},{13,9,5},{13,9,5},{13,11,5},{31,5,50},{29,41,1},{29,41,1},{15,27,1},{14,9,50},{15,27,1},{27,9,72},{13,9,5},{43,25,9},{29,41,9},{27,9,72},{14,7,72},{29,41,9}, +{0,11,80},{14,7,72},{0,11,80},{29,0,80},{29,0,80},{29,0,80},{29,0,80},{45,25,1},{45,25,1},{45,25,1},{45,27,5},{31,11,5},{31,11,5},{43,5,148},{43,23,79},{43,9,110},{43,9,83},{13,21,248},{13,39,85},{13,9,9},{29,25,126},{47,23,248},{15,25,83},{27,21,75},{27,23,6},{27,9,5},{27,9,26},{15,1,243},{29,23,74},{13,9,5},{31,25,75},{9,11,243}, +{31,25,75},{43,7,74},{43,7,74},{43,7,74},{43,25,74},{13,7,56},{13,9,9},{13,9,9},{13,11,21},{15,9,56},{15,41,6},{27,39,1},{27,39,1},{27,39,1},{27,25,5},{17,2,50},{43,25,4},{43,25,4},{15,41,2},{16,1,50},{15,41,2},{17,6,72},{27,23,5},{11,9,1},{29,9,0},{17,6,72},{20,1,72},{29,9,0},{0,25,74},{20,1,72},{0,25,74},{43,0,74}, +{43,0,74},{43,0,74},{43,0,74},{13,9,5},{13,9,5},{13,9,5},{13,11,5},{15,41,5},{15,41,5},{27,3,146},{27,37,83},{27,23,119},{27,39,79},{43,19,244},{43,7,79},{43,39,10},{43,9,106},{15,7,245},{29,9,85},{11,35,76},{11,7,9},{41,39,14},{11,39,17},{27,5,243},{13,7,78},{27,39,9},{45,9,80},{6,1,243},{45,9,80},{27,5,75},{27,5,75},{27,5,75}, +{27,39,75},{43,21,51},{43,39,6},{43,39,6},{43,25,6},{45,39,53},{29,25,6},{11,7,5},{11,7,5},{11,7,5},{11,9,5},{29,3,50},{27,39,5},{27,39,5},{13,25,1},{43,9,50},{13,25,1},{27,3,72},{41,7,4},{41,39,5},{27,39,5},{27,3,72},{39,9,72},{27,39,5},{0,9,80},{39,9,72},{0,9,80},{27,0,74},{27,0,74},{27,0,74},{27,0,74},{43,23,1}, +{43,23,1},{43,23,1},{43,25,5},{29,9,5},{29,9,5},{41,3,148},{41,21,79},{41,7,111},{41,7,91},{11,19,248},{11,37,85},{11,7,10},{27,39,122},{45,21,248},{13,23,83},{25,19,75},{25,21,6},{25,7,6},{25,23,30},{17,10,243},{27,21,81},{11,7,6},{29,23,75},{24,1,243},{29,23,75},{41,5,74},{41,5,74},{41,5,74},{41,23,74},{11,5,56},{11,7,9},{11,7,9}, +{11,9,21},{29,7,54},{13,39,5},{25,37,1},{25,37,1},{25,37,1},{25,39,2},{39,11,50},{41,23,4},{41,23,4},{13,39,4},{22,1,50},{13,39,4},{39,7,72},{25,21,5},{9,7,5},{27,7,2},{39,7,72},{26,1,72},{27,7,2},{0,23,74},{26,1,72},{0,23,74},{41,0,74},{41,0,74},{41,0,74},{41,0,74},{11,7,5},{11,7,5},{11,7,5},{11,9,5},{13,39,1}, +{13,39,1},{25,1,154},{9,35,91},{9,21,110},{25,37,89},{41,1,247},{41,5,77},{41,21,13},{41,7,109},{43,5,247},{27,7,85},{39,19,81},{9,35,10},{39,21,13},{9,37,22},{41,1,243},{41,5,73},{41,21,9},{43,7,80},{47,3,243},{43,7,80},{25,3,80},{25,3,80},{25,3,80},{25,37,80},{41,3,56},{41,21,9},{41,21,9},{41,23,9},{27,37,54},{11,23,9},{39,21,9}, +{39,21,9},{39,21,9},{9,7,10},{25,5,50},{25,37,1},{25,37,1},{11,23,5},{10,1,50},{11,23,5},{23,5,72},{9,35,1},{39,21,4},{11,21,4},{23,5,72},{14,1,72},{11,21,4},{0,7,80},{14,1,72},{0,7,80},{25,0,80},{25,0,80},{25,0,80},{25,0,80},{41,5,5},{41,5,5},{41,5,5},{41,23,5},{27,7,5},{27,7,5},{39,1,148},{39,3,79},{39,5,110}, +{39,5,83},{9,1,244},{9,35,79},{9,5,5},{9,21,122},{43,19,248},{11,21,77},{23,17,72},{23,19,2},{23,5,4},{23,5,25},{9,1,243},{25,19,74},{9,5,4},{27,21,73},{46,1,243},{27,21,73},{39,3,75},{39,3,75},{39,3,75},{39,21,75},{9,3,51},{9,5,5},{9,5,5},{9,7,18},{11,5,53},{41,37,2},{23,19,1},{23,19,1},{23,19,1},{23,21,1},{9,3,50}, +{9,5,4},{9,5,4},{41,37,1},{31,3,50},{41,37,1},{23,17,72},{23,19,2},{7,5,4},{25,5,0},{23,17,72},{45,17,72},{25,5,0},{0,21,72},{45,17,72},{0,21,72},{39,0,74},{39,0,74},{39,0,74},{39,0,74},{9,5,1},{9,5,1},{9,5,1},{9,7,2},{41,37,1},{41,37,1},{7,1,184},{23,17,93},{7,19,122},{23,35,89},{23,1,260},{39,3,74},{39,35,15}, +{39,5,106},{41,3,247},{25,5,85},{37,17,81},{7,33,5},{37,19,19},{7,35,17},{19,9,243},{39,3,73},{23,35,10},{41,5,80},{45,1,243},{41,5,80},{23,1,80},{23,1,80},{23,1,80},{23,35,80},{39,17,51},{39,35,6},{39,35,6},{39,21,6},{25,35,54},{9,21,9},{7,33,4},{7,33,4},{7,33,4},{7,5,5},{23,3,50},{23,35,1},{23,35,1},{9,21,5},{43,3,50}, +{9,21,5},{21,3,72},{7,33,1},{37,19,10},{23,35,9},{21,3,72},{43,1,72},{23,35,9},{0,5,80},{43,1,72},{0,5,80},{23,0,80},{23,0,80},{23,0,80},{23,0,80},{39,19,1},{39,19,1},{39,19,1},{39,21,2},{25,5,5},{25,5,5},{21,1,234},{37,1,79},{37,3,110},{37,3,90},{37,1,300},{7,33,82},{7,3,9},{23,19,139},{41,17,248},{9,19,83},{21,1,90}, +{21,17,2},{21,3,4},{21,3,25},{21,17,243},{23,17,74},{7,3,5},{25,19,75},{17,21,243},{25,19,75},{37,1,75},{37,1,75},{37,1,75},{37,19,74},{7,1,56},{7,19,6},{7,19,6},{7,5,21},{9,3,53},{9,35,6},{21,17,1},{21,17,1},{21,17,1},{21,19,4},{3,25,50},{7,19,2},{7,19,2},{9,35,2},{29,1,50},{9,35,2},{17,39,72},{21,17,2},{5,3,4}, +{23,3,0},{17,39,72},{39,17,72},{23,3,0},{0,19,74},{39,17,72},{0,19,74},{37,0,74},{37,0,74},{37,0,74},{37,0,74},{7,3,5},{7,3,5},{7,3,5},{7,5,5},{9,35,5},{9,35,5},{5,1,290},{5,1,103},{5,17,110},{21,33,106},{5,1,345},{37,1,78},{37,17,13},{37,3,119},{39,1,248},{23,3,91},{35,1,126},{35,1,6},{35,17,8},{5,33,29},{35,1,243}, +{37,1,74},{37,17,9},{9,33,81},{1,35,243},{9,33,81},{5,1,94},{5,1,94},{5,1,94},{21,33,90},{21,1,61},{37,17,9},{37,17,9},{37,19,9},{23,33,53},{7,19,8},{35,17,4},{35,17,4},{35,17,4},{35,3,4},{19,5,50},{21,33,0},{21,33,0},{7,19,4},{37,3,50},{7,19,4},{17,5,72},{35,1,2},{35,17,4},{7,17,4},{17,5,72},{5,17,72},{7,17,4}, +{0,33,80},{5,17,72},{0,33,80},{5,0,90},{5,0,90},{5,0,90},{5,0,90},{37,1,4},{37,1,4},{37,1,4},{37,19,5},{23,3,1},{23,3,1},{19,1,349},{35,1,185},{35,1,121},{35,1,81},{19,1,398},{5,1,102},{5,1,2},{21,17,122},{37,1,270},{7,17,66},{3,1,126},{19,1,45},{19,1,9},{19,1,25},{33,33,221},{35,1,82},{5,1,1},{23,17,61},{33,33,221}, +{23,17,61},{35,1,121},{35,1,121},{35,1,121},{35,17,73},{35,1,94},{5,1,2},{5,1,2},{5,33,17},{7,1,53},{37,33,1},{19,1,9},{19,1,9},{19,1,9},{19,17,1},{3,3,50},{5,1,1},{5,1,1},{37,33,0},{3,3,50},{37,33,0},{1,3,61},{33,1,25},{3,1,1},{5,1,1},{1,3,61},{3,1,61},{5,1,1},{0,17,61},{3,1,61},{0,17,61},{35,0,72}, +{35,0,72},{35,0,72},{35,0,72},{5,1,1},{5,1,1},{5,1,1},{5,3,1},{37,33,1},{37,33,1},{3,1,239},{3,1,164},{3,1,139},{19,1,89},{3,1,239},{19,1,62},{19,1,26},{35,17,34},{35,1,163},{21,1,10},{33,1,69},{33,1,29},{33,1,20},{33,1,4},{1,3,93},{3,1,38},{19,1,10},{21,1,10},{3,1,93},{21,1,10},{3,1,139},{3,1,139},{3,1,139}, +{19,1,89},{19,1,138},{19,1,26},{19,1,26},{35,17,9},{5,1,74},{5,17,8},{33,1,20},{33,1,20},{33,1,20},{33,1,4},{17,3,50},{19,1,10},{19,1,10},{5,17,4},{35,1,50},{5,17,4},{1,17,5},{17,1,1},{17,1,0},{33,1,0},{1,17,5},{17,1,5},{33,1,0},{0,1,9},{17,1,5},{0,1,9},{19,0,80},{19,0,80},{19,0,80},{19,0,80},{19,1,17}, +{19,1,17},{19,1,17},{35,17,5},{21,1,1},{21,1,1},{17,1,162},{33,1,130},{33,1,121},{33,1,81},{33,1,138},{33,1,58},{33,1,49},{3,1,1},{19,1,82},{19,1,10},{17,1,18},{17,1,10},{17,1,9},{17,1,1},{1,17,18},{17,1,6},{17,1,5},{3,1,0},{17,1,18},{3,1,0},{33,1,121},{33,1,121},{33,1,121},{33,1,81},{33,1,89},{33,1,49},{33,1,49}, +{3,1,1},{19,1,46},{19,1,10},{17,1,9},{17,1,9},{17,1,9},{17,1,1},{17,1,13},{17,1,5},{17,1,5},{3,1,0},{33,1,13},{3,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{33,0,72},{33,0,72},{33,0,72},{33,0,72},{3,1,26},{3,1,26},{3,1,26},{3,1,1},{19,1,10}, +{19,1,10},{0,38,200},{0,20,25},{0,34,5},{0,34,85},{0,20,442},{0,34,266},{0,18,125},{0,2,318},{0,18,482},{0,2,343},{0,38,200},{0,20,25},{0,34,5},{0,34,85},{16,4,441},{0,34,266},{0,18,125},{0,2,318},{4,16,441},{0,2,318},{0,4,1},{0,4,1},{0,4,1},{0,2,1},{0,2,41},{0,32,13},{0,32,13},{0,16,25},{0,16,46},{0,16,26},{0,4,1}, +{0,4,1},{0,4,1},{0,2,1},{0,2,41},{0,32,13},{0,32,13},{0,16,25},{2,0,41},{0,16,25},{18,2,200},{0,20,25},{0,34,5},{0,34,85},{18,2,200},{38,0,200},{0,34,85},{0,18,208},{38,0,200},{0,18,208},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,10,202},{0,22,1},{0,20,26}, +{0,4,41},{0,22,689},{0,20,352},{0,4,141},{0,18,468},{0,34,750},{0,18,504},{0,10,202},{0,22,1},{0,20,26},{0,4,41},{34,0,686},{0,20,352},{0,4,141},{0,18,468},{0,34,686},{0,18,468},{0,22,0},{0,22,0},{0,22,0},{0,34,1},{0,34,145},{0,18,45},{0,18,45},{0,32,85},{0,32,158},{0,32,94},{0,22,0},{0,22,0},{0,22,0},{0,34,1},{16,2,145}, +{0,18,45},{0,18,45},{0,32,85},{34,0,145},{0,32,85},{20,16,200},{0,22,1},{16,20,5},{0,4,41},{20,16,200},{16,20,200},{0,4,41},{0,34,200},{16,20,200},{0,34,200},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{16,12,229},{16,8,30},{16,6,74},{16,36,58},{0,10,723},{0,6,282},{0,36,69}, +{0,4,414},{0,20,868},{0,34,513},{16,12,204},{16,8,5},{32,6,29},{16,36,33},{4,2,723},{0,6,282},{0,36,69},{0,4,414},{10,0,723},{0,4,414},{16,8,30},{16,8,30},{16,8,30},{16,4,33},{0,6,164},{0,4,25},{0,4,25},{0,18,65},{0,18,206},{0,18,101},{16,8,5},{16,8,5},{16,8,5},{16,4,8},{2,2,162},{0,4,25},{0,4,25},{0,18,65},{2,2,162}, +{0,18,65},{18,8,200},{16,8,1},{2,36,5},{0,36,20},{18,8,200},{44,0,200},{0,36,20},{0,4,218},{44,0,200},{0,4,218},{16,0,29},{16,0,29},{16,0,29},{16,0,29},{0,18,1},{0,18,1},{0,18,1},{0,32,0},{0,32,9},{0,32,9},{16,30,327},{32,40,139},{32,22,202},{16,6,151},{0,28,724},{0,8,236},{0,22,14},{0,20,350},{0,6,956},{0,20,494},{2,12,203}, +{2,24,2},{2,22,27},{2,6,42},{36,2,723},{0,8,236},{0,22,14},{0,20,350},{20,4,723},{0,20,350},{16,42,125},{16,42,125},{16,42,125},{16,6,126},{0,24,164},{0,6,1},{0,6,1},{0,4,37},{0,4,280},{0,34,109},{2,24,1},{2,24,1},{2,24,1},{2,36,2},{18,4,162},{0,6,1},{0,6,1},{0,4,37},{36,2,162},{0,4,37},{8,0,200},{2,24,1},{18,22,5}, +{0,22,10},{8,0,200},{47,0,200},{0,22,10},{0,36,200},{47,0,200},{0,36,200},{16,0,125},{16,0,125},{16,0,125},{16,0,125},{0,6,0},{0,6,0},{0,6,0},{0,18,1},{0,18,37},{0,18,37},{2,46,410},{2,26,218},{2,8,317},{2,38,226},{16,30,725},{16,40,217},{16,38,15},{0,6,312},{0,38,988},{0,6,417},{34,28,209},{18,26,13},{34,8,27},{18,38,34},{8,0,723}, +{0,40,203},{16,38,14},{0,6,296},{47,0,723},{0,6,296},{2,12,208},{2,12,208},{2,12,208},{2,22,209},{16,42,163},{16,38,6},{16,38,6},{16,20,26},{0,6,285},{0,20,81},{18,26,9},{18,26,9},{18,26,9},{34,6,10},{2,24,162},{32,22,5},{32,22,5},{0,20,17},{28,0,162},{0,20,17},{24,2,200},{18,26,4},{4,8,9},{0,8,10},{24,2,200},{13,0,200},{0,8,10}, +{0,22,208},{13,0,200},{0,22,208},{2,0,208},{2,0,208},{2,0,208},{2,0,208},{16,8,1},{16,8,1},{16,8,1},{16,4,2},{0,4,45},{0,4,45},{18,15,402},{18,12,222},{34,24,331},{18,24,218},{32,47,725},{32,26,212},{32,24,5},{32,38,324},{0,24,884},{0,38,300},{4,30,200},{4,26,5},{4,24,36},{4,8,41},{40,0,723},{16,26,203},{32,24,4},{0,38,251},{46,2,723}, +{0,38,251},{18,14,203},{18,14,203},{18,14,203},{18,8,202},{32,28,163},{32,24,5},{32,24,5},{32,6,21},{0,38,219},{0,6,13},{4,26,1},{4,26,1},{4,26,1},{4,38,0},{22,2,162},{2,8,1},{2,8,1},{0,6,4},{42,2,162},{0,6,4},{16,11,200},{34,42,4},{20,24,4},{32,24,0},{16,11,200},{25,0,200},{32,24,0},{0,38,202},{25,0,200},{0,38,202},{18,0,202}, +{18,0,202},{18,0,202},{18,0,202},{32,40,1},{32,40,1},{32,40,1},{32,36,1},{0,6,9},{0,6,9},{4,15,408},{4,28,221},{4,10,317},{4,40,221},{18,31,725},{18,42,217},{18,40,6},{2,8,315},{0,26,788},{0,8,228},{20,47,204},{20,12,6},{36,10,27},{20,40,33},{10,2,723},{32,12,203},{18,40,5},{0,8,227},{45,2,723},{0,8,227},{4,14,208},{4,14,208},{4,14,208}, +{4,24,209},{18,44,163},{18,40,2},{18,40,2},{18,22,26},{0,24,179},{0,38,3},{20,12,5},{20,12,5},{20,12,5},{20,24,5},{8,2,162},{18,40,1},{18,40,1},{0,38,2},{30,2,162},{0,38,2},{26,4,200},{20,12,2},{6,40,5},{18,40,4},{26,4,200},{7,0,200},{18,40,4},{0,24,208},{7,0,200},{0,24,208},{4,0,208},{4,0,208},{4,0,208},{4,0,208},{18,10,1}, +{18,10,1},{18,10,1},{18,6,2},{0,38,2},{0,38,2},{20,13,402},{20,14,220},{36,26,331},{20,26,216},{34,45,725},{34,12,213},{34,26,5},{34,24,337},{0,12,740},{16,40,216},{6,31,202},{6,28,1},{6,26,36},{6,10,41},{42,2,723},{18,28,200},{34,26,4},{0,40,209},{44,6,723},{0,40,209},{20,46,200},{20,46,200},{20,46,200},{20,10,201},{34,30,163},{34,26,5},{34,26,5}, +{34,8,18},{0,26,164},{32,8,2},{6,28,0},{6,28,0},{6,28,0},{6,40,1},{24,4,162},{4,10,1},{4,10,1},{2,8,2},{11,0,162},{2,8,2},{14,0,200},{6,28,1},{22,26,4},{34,26,0},{14,0,200},{8,10,200},{34,26,0},{0,40,200},{8,10,200},{0,40,200},{20,0,200},{20,0,200},{20,0,200},{20,0,200},{34,42,1},{34,42,1},{34,42,1},{34,38,1},{32,8,1}, +{32,8,1},{6,13,404},{6,30,219},{6,12,327},{6,42,215},{20,29,725},{20,44,217},{20,12,9},{4,10,321},{0,14,723},{32,26,222},{38,15,206},{38,14,8},{38,12,24},{22,42,34},{14,0,723},{4,14,203},{20,12,8},{0,26,203},{8,10,723},{0,26,203},{6,47,202},{6,47,202},{6,47,202},{6,26,206},{20,46,163},{20,12,5},{20,12,5},{20,24,30},{32,42,164},{2,40,6},{38,44,4}, +{38,44,4},{38,44,4},{38,10,5},{38,10,162},{20,12,4},{20,12,4},{2,40,2},{23,0,162},{2,40,2},{30,2,200},{38,14,4},{8,12,4},{4,12,4},{30,2,200},{35,2,200},{4,12,4},{0,26,202},{35,2,200},{0,26,202},{6,0,202},{6,0,202},{6,0,202},{6,0,202},{20,12,1},{20,12,1},{20,12,1},{20,8,2},{2,10,4},{2,10,4},{22,11,402},{22,47,227},{38,44,321}, +{22,28,218},{36,27,727},{36,30,215},{36,28,13},{36,42,327},{32,30,725},{18,42,212},{8,29,200},{8,30,4},{24,28,29},{8,12,45},{0,1,723},{20,30,203},{6,28,5},{32,42,202},{1,0,723},{32,42,202},{22,15,202},{22,15,202},{22,15,202},{22,12,202},{36,15,168},{36,28,13},{36,28,13},{36,10,17},{2,28,163},{34,10,9},{8,30,0},{8,30,0},{8,30,0},{8,42,0},{28,2,162}, +{6,12,5},{6,12,5},{4,10,4},{42,8,162},{4,10,4},{47,2,200},{8,30,4},{24,28,4},{36,28,0},{47,2,200},{17,2,200},{36,28,0},{0,42,202},{17,2,200},{0,42,202},{22,0,202},{22,0,202},{22,0,202},{22,0,202},{36,14,5},{36,14,5},{36,14,5},{36,40,4},{34,26,2},{34,26,2},{8,11,408},{8,31,221},{8,14,312},{8,44,226},{22,27,724},{22,46,217},{22,44,12}, +{6,12,321},{2,47,728},{34,28,234},{40,29,209},{24,47,10},{40,14,22},{24,44,34},{18,3,723},{36,47,203},{22,44,11},{32,28,208},{45,8,723},{32,28,208},{8,15,209},{8,15,209},{8,15,209},{8,28,209},{22,15,165},{22,44,3},{22,44,3},{22,26,30},{34,44,164},{4,42,6},{40,30,10},{40,30,10},{40,30,10},{40,12,10},{16,3,162},{22,44,2},{22,44,2},{4,42,2},{17,0,162}, +{4,42,2},{15,2,200},{24,47,1},{10,14,4},{6,14,5},{15,2,200},{33,4,200},{6,14,5},{0,28,208},{33,4,200},{0,28,208},{8,0,208},{8,0,208},{8,0,208},{8,0,208},{22,14,1},{22,14,1},{22,14,1},{22,10,2},{4,12,4},{4,12,4},{24,9,402},{24,45,227},{40,46,324},{24,30,218},{38,25,727},{38,47,213},{38,30,13},{38,28,325},{34,31,727},{20,44,218},{10,27,202}, +{10,31,1},{26,30,29},{10,14,45},{4,3,723},{22,31,200},{8,30,8},{34,44,202},{3,4,723},{34,44,202},{24,13,202},{24,13,202},{24,13,202},{24,14,202},{38,13,168},{38,30,13},{38,30,13},{38,12,17},{4,30,163},{36,12,9},{10,31,0},{10,31,0},{10,31,0},{10,44,0},{30,4,162},{8,14,4},{8,14,4},{6,12,4},{15,8,162},{6,12,4},{43,0,200},{10,31,1},{26,30,4}, +{38,30,0},{43,0,200},{19,6,200},{38,30,0},{0,44,202},{19,6,200},{0,44,202},{24,0,202},{24,0,202},{24,0,202},{24,0,202},{38,46,5},{38,46,5},{38,46,5},{38,42,4},{20,28,4},{20,28,4},{10,9,404},{10,29,219},{26,47,330},{10,46,222},{24,9,723},{24,15,209},{24,47,18},{8,14,324},{4,45,723},{36,30,222},{42,11,206},{42,15,8},{42,47,22},{26,46,41},{43,0,723}, +{8,15,202},{24,47,18},{4,30,202},{19,6,723},{4,30,202},{10,43,202},{10,43,202},{10,43,202},{10,30,206},{24,13,162},{24,46,8},{24,46,8},{24,44,25},{20,47,168},{22,44,4},{42,15,4},{42,15,4},{42,15,4},{42,14,8},{45,0,162},{40,46,4},{40,46,4},{22,44,0},{27,8,162},{22,44,0},{38,3,200},{42,15,4},{12,47,9},{8,47,10},{38,3,200},{35,8,200},{8,47,10}, +{0,30,202},{35,8,200},{0,30,202},{10,0,202},{10,0,202},{10,0,202},{10,0,202},{24,47,0},{24,47,0},{24,47,0},{24,12,4},{22,44,4},{22,44,4},{26,7,408},{42,13,236},{42,15,332},{26,31,224},{40,23,727},{40,45,213},{40,31,8},{40,46,312},{36,29,725},{22,46,218},{12,25,201},{12,13,6},{28,31,29},{12,47,42},{11,0,723},{24,29,203},{40,31,4},{6,46,209},{5,8,723}, +{6,46,209},{26,41,208},{26,41,208},{26,41,208},{26,47,208},{40,11,168},{40,31,8},{40,31,8},{40,14,22},{6,31,163},{38,30,10},{12,13,2},{12,13,2},{12,13,2},{12,46,2},{29,2,162},{10,47,2},{10,47,2},{22,30,9},{35,6,162},{22,30,9},{41,2,200},{42,13,4},{44,31,1},{40,31,0},{41,2,200},{17,8,200},{40,31,0},{0,46,208},{17,8,200},{0,46,208},{26,0,208}, +{26,0,208},{26,0,208},{26,0,208},{40,15,4},{40,15,4},{40,15,4},{40,44,5},{38,30,1},{38,30,1},{12,7,404},{12,27,212},{12,45,332},{12,15,215},{26,23,724},{26,13,216},{26,15,8},{26,47,318},{6,43,723},{8,47,227},{44,9,206},{44,13,8},{44,45,22},{28,15,38},{24,3,723},{10,13,202},{26,15,8},{6,31,209},{3,24,723},{6,31,209},{12,41,202},{12,41,202},{12,41,202}, +{12,31,203},{26,11,162},{26,15,4},{26,15,4},{26,46,25},{38,15,162},{24,46,4},{44,13,4},{44,13,4},{44,13,4},{44,47,5},{43,2,162},{26,15,4},{26,15,4},{24,46,0},{21,8,162},{24,46,0},{29,12,200},{28,43,2},{14,15,8},{26,15,4},{29,12,200},{26,13,200},{26,15,4},{0,31,208},{26,13,200},{0,31,208},{12,0,202},{12,0,202},{12,0,202},{12,0,202},{26,45,0}, +{26,45,0},{26,45,0},{26,14,4},{24,46,4},{24,46,4},{28,5,402},{28,41,222},{44,13,332},{28,29,218},{42,21,727},{42,43,213},{42,29,8},{42,31,340},{38,27,724},{24,15,217},{14,23,201},{14,27,3},{30,29,29},{14,45,42},{9,2,723},{26,27,201},{42,29,4},{22,15,201},{2,9,723},{22,15,201},{28,9,202},{28,9,202},{28,9,202},{28,45,203},{42,9,168},{42,29,8},{42,29,8}, +{42,47,21},{8,29,163},{40,47,2},{14,11,2},{14,11,2},{14,11,2},{14,15,2},{38,1,162},{12,45,2},{12,45,2},{10,47,2},{43,28,162},{10,47,2},{37,0,200},{14,27,2},{46,29,1},{42,29,0},{37,0,200},{36,9,200},{42,29,0},{0,15,200},{36,9,200},{0,15,200},{28,0,202},{28,0,202},{28,0,202},{28,0,202},{42,13,4},{42,13,4},{42,13,4},{42,46,5},{40,47,1}, +{40,47,1},{14,35,400},{14,25,217},{30,43,340},{14,13,213},{28,5,723},{28,11,209},{28,43,8},{12,45,332},{8,41,724},{40,29,222},{46,7,201},{46,41,2},{46,43,21},{30,13,41},{37,0,723},{12,41,203},{28,43,8},{8,29,202},{36,9,723},{8,29,202},{14,23,201},{14,23,201},{14,23,201},{14,13,204},{28,9,162},{28,43,4},{28,43,4},{28,31,29},{40,13,164},{26,15,3},{46,11,2}, +{46,11,2},{46,11,2},{46,29,5},{39,0,162},{44,13,2},{44,13,2},{10,15,2},{9,28,162},{10,15,2},{21,2,200},{46,41,1},{47,43,5},{12,43,4},{21,2,200},{24,9,200},{12,43,4},{0,29,202},{24,9,200},{0,29,202},{14,0,200},{14,0,200},{14,0,200},{14,0,200},{28,43,0},{28,43,0},{28,43,0},{28,47,1},{26,15,2},{26,15,2},{30,3,410},{46,9,227},{46,27,318}, +{30,27,224},{14,5,728},{44,25,215},{14,27,8},{44,13,332},{40,25,725},{26,13,212},{47,21,200},{47,25,4},{47,27,25},{47,43,52},{5,0,723},{28,25,203},{14,27,4},{40,13,202},{0,5,723},{40,13,202},{30,7,209},{30,7,209},{30,7,209},{30,43,208},{14,39,166},{14,27,8},{14,27,8},{44,45,22},{10,27,163},{12,45,8},{47,25,0},{47,25,0},{47,25,0},{47,13,0},{23,2,162}, +{14,27,4},{14,27,4},{12,45,4},{20,9,162},{12,45,4},{35,2,200},{47,25,4},{15,27,4},{44,27,0},{35,2,200},{34,5,200},{44,27,0},{0,13,202},{34,5,200},{0,13,202},{30,0,208},{30,0,208},{30,0,208},{30,0,208},{14,27,4},{14,27,4},{14,27,4},{14,15,8},{42,29,2},{42,29,2},{47,3,408},{47,23,218},{47,41,312},{47,11,226},{30,3,723},{30,9,209},{30,41,8}, +{14,43,332},{10,39,723},{12,43,236},{15,21,209},{31,39,10},{15,41,22},{31,11,43},{30,3,723},{44,39,204},{30,41,8},{40,27,208},{42,9,723},{40,27,208},{47,7,209},{47,7,209},{47,7,209},{47,27,208},{30,7,162},{30,41,4},{30,41,4},{30,29,29},{42,11,164},{12,13,6},{31,23,9},{31,23,9},{31,23,9},{31,27,10},{37,2,162},{46,11,2},{46,11,2},{12,13,2},{34,7,162}, +{12,13,2},{46,1,200},{31,39,1},{45,41,5},{14,41,4},{46,1,200},{26,7,200},{14,41,4},{0,27,208},{26,7,200},{0,27,208},{47,0,208},{47,0,208},{47,0,208},{47,0,208},{30,41,0},{30,41,0},{30,41,0},{30,45,1},{12,43,4},{12,43,4},{31,1,402},{31,37,222},{15,9,324},{31,25,218},{46,17,733},{46,23,215},{46,25,18},{46,27,330},{42,23,725},{28,11,219},{45,19,202}, +{45,23,4},{45,25,25},{45,41,52},{3,2,723},{30,23,203},{47,25,8},{42,11,202},{2,3,723},{42,11,202},{31,5,202},{31,5,202},{31,5,202},{31,41,202},{46,5,173},{46,25,18},{46,25,18},{46,43,22},{12,25,163},{14,43,8},{45,23,0},{45,23,0},{45,23,0},{45,11,0},{44,1,162},{47,41,4},{47,41,4},{14,43,4},{26,9,162},{14,43,4},{31,1,200},{45,23,4},{13,25,4}, +{46,25,0},{31,1,200},{32,1,200},{46,25,0},{0,11,202},{32,1,200},{0,11,202},{31,0,202},{31,0,202},{31,0,202},{31,0,202},{46,9,10},{46,9,10},{46,9,10},{46,13,9},{28,27,4},{28,27,4},{45,1,404},{45,21,218},{29,39,325},{45,9,222},{31,1,723},{31,7,209},{31,39,13},{47,41,324},{12,37,725},{44,25,227},{13,3,206},{13,37,9},{13,39,17},{29,9,48},{31,1,723}, +{47,7,208},{31,39,13},{12,25,202},{32,1,723},{12,25,202},{45,35,202},{45,35,202},{45,35,202},{45,25,203},{31,5,162},{31,9,8},{31,9,8},{31,27,29},{14,9,166},{30,11,1},{13,7,4},{13,7,4},{13,7,4},{13,25,8},{31,5,162},{15,9,4},{15,9,4},{30,11,0},{14,9,162},{30,11,0},{17,6,200},{29,21,4},{43,39,4},{46,39,5},{17,6,200},{20,1,200},{46,39,5}, +{0,25,202},{20,1,200},{0,25,202},{45,0,202},{45,0,202},{45,0,202},{45,0,202},{31,39,0},{31,39,0},{31,39,0},{31,27,4},{30,11,1},{30,11,1},{13,1,440},{29,35,234},{13,7,321},{29,23,218},{45,1,732},{15,21,215},{45,23,12},{15,9,312},{44,21,731},{30,9,221},{43,17,201},{43,5,6},{27,23,30},{43,39,46},{45,1,723},{31,21,203},{45,23,3},{14,9,209},{27,9,723}, +{14,9,209},{29,33,208},{29,33,208},{29,33,208},{29,39,208},{15,3,168},{45,23,11},{45,23,11},{15,41,22},{14,23,168},{46,25,10},{43,5,2},{43,5,2},{43,5,2},{43,9,2},{17,2,162},{45,23,2},{45,23,2},{30,25,10},{16,1,162},{30,25,10},{27,3,200},{13,5,4},{11,23,2},{15,23,1},{27,3,200},{39,9,200},{15,23,1},{0,9,208},{39,9,200},{0,9,208},{29,0,208}, +{29,0,208},{29,0,208},{29,0,208},{15,7,5},{15,7,5},{15,7,5},{15,11,4},{46,25,1},{46,25,1},{43,1,500},{43,19,212},{43,37,327},{43,7,220},{13,1,760},{29,5,209},{29,37,13},{45,39,321},{14,35,724},{46,23,227},{11,1,206},{11,35,9},{11,37,17},{27,7,43},{33,10,723},{15,35,204},{29,37,13},{14,23,202},{38,1,723},{14,23,202},{43,33,202},{43,33,202},{43,33,202}, +{43,23,203},{29,3,162},{29,7,5},{29,7,5},{29,25,29},{46,7,165},{31,9,4},{11,5,4},{11,5,4},{11,5,4},{11,39,5},{29,3,162},{13,7,5},{13,7,5},{31,9,0},{43,9,162},{31,9,0},{39,7,200},{27,35,2},{41,37,4},{15,37,5},{39,7,200},{26,1,200},{15,37,5},{0,23,202},{26,1,200},{0,23,202},{43,0,202},{43,0,202},{43,0,202},{43,0,202},{29,37,0}, +{29,37,0},{29,37,0},{29,25,4},{31,9,4},{31,9,4},{11,1,530},{27,33,222},{11,5,321},{27,21,219},{27,1,812},{13,19,215},{13,21,9},{13,7,327},{46,19,724},{31,7,219},{25,1,225},{41,3,6},{25,21,30},{41,37,46},{41,3,723},{29,19,203},{13,21,5},{46,7,202},{21,9,723},{46,7,202},{27,1,203},{27,1,203},{27,1,203},{27,37,202},{13,1,168},{13,21,8},{13,21,8}, +{13,39,24},{47,21,163},{15,39,8},{41,3,2},{41,3,2},{41,3,2},{41,7,1},{39,11,162},{13,21,4},{13,21,4},{45,39,4},{22,1,162},{45,39,4},{25,1,200},{11,3,4},{9,21,2},{13,21,1},{25,1,200},{33,9,200},{13,21,1},{0,7,202},{33,9,200},{0,7,202},{27,0,202},{27,0,202},{27,0,202},{27,0,202},{13,5,4},{13,5,4},{13,5,4},{13,9,4},{15,39,4}, +{15,39,4},{25,1,634},{41,17,216},{25,35,337},{41,5,220},{41,1,863},{27,3,211},{27,35,5},{27,37,331},{47,33,725},{15,21,220},{9,1,251},{9,33,2},{9,35,18},{25,5,48},{25,1,723},{43,33,203},{27,35,5},{47,21,200},{33,9,723},{47,21,200},{41,1,209},{41,1,209},{41,1,209},{41,5,204},{27,1,164},{27,35,4},{27,35,4},{27,7,36},{15,5,164},{29,7,1},{9,3,2}, +{9,3,2},{9,3,2},{9,21,2},{25,5,162},{11,5,1},{11,5,1},{29,7,0},{10,1,162},{29,7,0},{23,17,200},{9,33,1},{39,35,1},{43,35,1},{23,17,200},{45,17,200},{43,35,1},{0,21,200},{45,17,200},{0,21,200},{41,0,200},{41,0,200},{41,0,200},{41,0,200},{27,35,0},{27,35,0},{27,35,0},{27,23,4},{29,7,1},{29,7,1},{39,1,724},{9,1,228},{9,3,315}, +{25,19,218},{9,1,932},{11,17,213},{41,19,6},{11,5,317},{15,17,725},{29,5,221},{23,1,288},{39,1,3},{23,19,26},{39,19,51},{39,1,723},{27,17,203},{41,19,2},{15,5,208},{31,1,723},{15,5,208},{9,1,227},{9,1,227},{9,1,227},{25,35,209},{41,1,174},{41,19,5},{41,19,5},{11,37,27},{45,19,163},{13,21,6},{39,1,2},{39,1,2},{39,1,2},{39,5,2},{9,3,162}, +{41,19,1},{41,19,1},{13,21,5},{31,3,162},{13,21,5},{21,3,200},{39,1,2},{7,19,2},{11,19,1},{21,3,200},{43,1,200},{11,19,1},{0,5,208},{43,1,200},{0,5,208},{25,0,208},{25,0,208},{25,0,208},{25,0,208},{41,19,4},{41,19,4},{41,19,4},{41,7,5},{13,21,2},{13,21,2},{23,1,864},{39,1,300},{39,33,324},{39,3,222},{39,1,1020},{25,1,211},{25,33,5}, +{25,35,331},{29,1,732},{13,19,222},{37,1,347},{7,1,13},{7,33,21},{23,3,48},{1,13,723},{25,1,211},{25,33,5},{15,19,203},{13,1,723},{15,19,203},{39,1,251},{39,1,251},{39,1,251},{39,19,203},{9,1,195},{25,33,4},{25,33,4},{25,5,36},{13,3,164},{27,5,5},{7,1,4},{7,1,4},{7,1,4},{7,35,8},{23,3,162},{9,3,1},{9,3,1},{27,5,1},{43,3,162}, +{27,5,1},{17,39,200},{7,1,9},{37,33,1},{41,33,1},{17,39,200},{39,17,200},{41,33,1},{0,19,202},{39,17,200},{0,19,202},{39,0,202},{39,0,202},{39,0,202},{39,0,202},{25,33,0},{25,33,0},{25,33,0},{25,21,4},{43,35,4},{43,35,4},{37,1,1012},{7,1,417},{7,1,312},{23,17,218},{7,1,1144},{39,1,268},{39,17,15},{9,3,317},{27,1,804},{27,3,218},{5,1,398}, +{21,1,81},{21,17,26},{37,17,51},{35,3,723},{23,1,254},{39,17,6},{13,3,208},{25,1,723},{13,3,208},{7,1,296},{7,1,296},{7,1,296},{23,33,209},{39,1,243},{39,17,14},{39,17,14},{9,35,27},{43,17,163},{27,19,13},{21,1,17},{21,1,17},{21,1,17},{37,3,1},{3,25,162},{39,17,5},{39,17,5},{27,19,9},{29,1,162},{27,19,9},{19,1,200},{5,1,45},{5,17,2}, +{9,17,1},{19,1,200},{37,1,200},{9,17,1},{0,3,208},{37,1,200},{0,3,208},{23,0,208},{23,0,208},{23,0,208},{23,0,208},{9,1,10},{9,1,10},{9,1,10},{9,5,9},{27,19,4},{27,19,4},{5,1,919},{21,1,494},{21,1,350},{37,1,201},{21,1,1014},{7,1,251},{23,1,14},{23,33,202},{9,1,721},{41,33,139},{19,1,341},{35,1,109},{5,1,37},{5,1,26},{3,3,546}, +{37,1,213},{7,1,1},{43,17,125},{3,3,546},{43,17,125},{21,1,350},{21,1,350},{21,1,350},{37,1,201},{37,1,312},{23,1,14},{23,1,14},{23,3,27},{41,1,168},{25,3,2},{5,1,37},{5,1,37},{5,1,37},{5,17,5},{19,5,162},{7,1,1},{7,1,1},{25,3,1},{37,3,162},{25,3,1},{33,1,113},{19,1,37},{19,1,1},{7,1,0},{33,1,113},{35,1,113},{7,1,0}, +{0,17,125},{35,1,113},{0,17,125},{37,0,200},{37,0,200},{37,0,200},{37,0,200},{23,1,10},{23,1,10},{23,1,10},{23,19,5},{25,3,1},{25,3,1},{35,1,773},{35,1,513},{5,1,414},{5,1,227},{35,1,818},{21,1,218},{37,1,69},{7,17,74},{23,1,534},{9,17,30},{3,1,190},{19,1,101},{19,1,65},{35,1,5},{33,33,333},{35,1,114},{5,1,25},{9,17,30},{33,33,333}, +{9,17,30},{5,1,414},{5,1,414},{5,1,414},{5,1,227},{21,1,373},{37,1,69},{37,1,69},{7,33,29},{9,1,216},{9,17,5},{19,1,65},{19,1,65},{19,1,65},{35,1,5},{3,3,162},{5,1,25},{5,1,25},{9,17,5},{3,3,162},{9,17,5},{17,1,25},{33,1,9},{33,1,0},{19,1,1},{17,1,25},{33,1,25},{19,1,1},{0,17,29},{33,1,25},{0,17,29},{5,0,218}, +{5,0,218},{5,0,218},{5,0,218},{37,1,20},{37,1,20},{37,1,20},{37,3,5},{9,17,1},{9,17,1},{3,1,642},{19,1,504},{19,1,468},{35,1,264},{19,1,657},{35,1,229},{5,1,141},{21,1,26},{21,1,457},{23,1,1},{33,1,134},{33,1,94},{33,1,85},{3,1,26},{1,19,193},{19,1,81},{19,1,45},{23,1,0},{19,1,193},{23,1,0},{19,1,468},{19,1,468},{19,1,468}, +{35,1,264},{35,1,425},{5,1,141},{5,1,141},{21,1,26},{7,1,254},{23,1,1},{33,1,85},{33,1,85},{33,1,85},{3,1,26},{17,3,145},{19,1,45},{19,1,45},{23,1,0},{35,1,145},{23,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{35,0,200},{35,0,200},{35,0,200},{35,0,200},{5,1,41}, +{5,1,41},{5,1,41},{21,17,5},{23,1,1},{23,1,1},{3,1,418},{3,1,343},{3,1,318},{3,1,243},{3,1,370},{19,1,161},{19,1,125},{35,1,5},{35,1,250},{21,1,25},{17,1,34},{17,1,26},{17,1,25},{33,1,13},{17,1,54},{33,1,22},{33,1,13},{5,1,1},{33,1,54},{5,1,1},{3,1,318},{3,1,318},{3,1,318},{3,1,243},{3,1,270},{19,1,125},{19,1,125}, +{35,1,5},{5,1,165},{21,1,25},{17,1,25},{17,1,25},{17,1,25},{33,1,13},{1,3,41},{33,1,13},{33,1,13},{5,1,1},{3,1,41},{5,1,1},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{19,0,208},{19,0,208},{19,0,208},{19,0,208},{35,1,85},{35,1,85},{35,1,85},{35,1,5},{21,1,25}, +{21,1,25},{0,26,421},{0,38,45},{0,36,2},{0,20,160},{0,38,926},{0,20,577},{0,4,264},{0,18,701},{0,34,989},{0,18,737},{0,26,421},{0,38,45},{0,36,2},{0,20,160},{34,0,925},{0,20,577},{0,4,264},{0,18,701},{0,34,925},{0,18,701},{0,20,1},{0,20,1},{0,20,1},{0,18,1},{0,18,85},{0,2,34},{0,2,34},{0,32,53},{0,32,94},{0,32,62},{0,20,1}, +{0,20,1},{0,20,1},{0,18,1},{32,0,85},{0,2,34},{0,2,34},{0,32,53},{18,0,85},{0,32,53},{34,4,421},{0,38,45},{0,36,2},{0,20,160},{34,4,421},{26,0,421},{0,20,160},{0,34,421},{26,0,421},{0,34,421},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,44,421},{0,24,13},{0,22,32}, +{0,36,113},{0,24,1262},{0,6,670},{0,20,304},{0,4,886},{0,4,1382},{0,34,953},{0,44,421},{0,24,13},{0,22,32},{0,36,113},{18,4,1261},{0,6,670},{0,20,304},{0,4,886},{36,2,1261},{0,4,886},{0,8,0},{0,8,0},{0,8,0},{0,4,0},{0,4,221},{0,34,73},{0,34,73},{0,2,125},{0,2,246},{0,2,150},{0,8,0},{0,8,0},{0,8,0},{0,4,0},{2,0,221}, +{0,34,73},{0,34,73},{0,2,125},{4,0,221},{0,2,125},{22,0,421},{0,24,13},{16,6,8},{0,36,113},{22,0,421},{44,0,421},{0,36,113},{0,20,433},{44,0,421},{0,20,433},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,31,430},{0,26,10},{16,38,79},{0,22,74},{0,42,1514},{0,38,717},{0,6,253}, +{0,20,965},{0,36,1713},{0,4,1109},{16,46,425},{16,10,9},{16,38,54},{0,22,74},{20,2,1514},{0,38,717},{0,6,253},{0,20,965},{42,0,1514},{0,20,965},{0,26,10},{0,26,10},{0,26,10},{0,36,9},{0,6,340},{0,20,97},{0,20,97},{0,18,185},{0,18,382},{0,18,221},{16,10,5},{16,10,5},{16,10,5},{16,36,5},{2,2,338},{0,20,97},{0,20,97},{0,18,185},{2,2,338}, +{0,18,185},{8,0,421},{0,26,1},{2,38,2},{0,22,65},{8,0,421},{31,0,421},{0,22,65},{0,36,421},{31,0,421},{0,36,421},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,32,0},{0,32,0},{0,32,0},{0,16,0},{0,16,1},{0,16,1},{16,45,482},{16,12,66},{32,24,186},{16,8,126},{0,14,1517},{0,24,605},{0,38,117},{0,6,886},{0,22,1815},{0,36,1085},{32,31,421}, +{32,42,1},{2,8,51},{32,38,85},{22,16,1514},{0,24,605},{0,38,117},{0,6,886},{44,16,1514},{0,6,886},{16,28,61},{16,28,61},{16,28,61},{16,22,62},{0,24,340},{0,22,37},{0,22,37},{0,4,125},{0,4,456},{0,34,213},{32,42,0},{32,42,0},{32,42,0},{32,6,1},{18,4,338},{0,22,37},{0,22,37},{0,4,125},{36,2,338},{0,4,125},{40,0,421},{32,42,1},{18,8,5}, +{0,8,37},{40,0,421},{13,0,421},{0,8,37},{0,22,433},{13,0,421},{0,22,433},{16,0,61},{16,0,61},{16,0,61},{16,0,61},{0,4,1},{0,4,1},{0,4,1},{0,2,1},{0,32,25},{0,32,25},{32,43,623},{32,44,219},{32,10,382},{32,24,243},{0,15,1517},{0,26,497},{0,24,35},{0,22,761},{0,8,2003},{0,22,1050},{18,15,422},{18,28,6},{18,40,51},{2,24,77},{24,0,1514}, +{0,26,497},{0,24,35},{0,22,761},{32,8,1514},{0,22,761},{32,30,202},{32,30,202},{32,30,202},{32,8,203},{0,28,338},{0,8,10},{0,8,10},{0,20,89},{0,6,565},{0,20,233},{18,12,1},{18,12,1},{18,12,1},{18,38,1},{2,24,338},{0,8,10},{0,8,10},{0,20,89},{28,0,338},{0,20,89},{40,4,421},{2,28,5},{4,40,1},{0,24,26},{40,4,421},{25,0,421},{0,24,26}, +{0,38,425},{25,0,421},{0,38,425},{32,0,202},{32,0,202},{32,0,202},{32,0,202},{0,8,1},{0,8,1},{0,8,1},{0,4,1},{0,34,68},{0,34,68},{2,11,821},{2,30,429},{2,26,634},{2,10,429},{0,27,1514},{0,28,446},{0,10,8},{0,8,670},{0,40,2187},{0,38,1083},{34,29,422},{34,14,6},{4,26,50},{34,10,86},{38,6,1514},{0,28,446},{0,10,8},{0,8,670},{27,0,1514}, +{0,8,670},{2,31,401},{2,31,401},{2,31,401},{2,24,404},{0,46,340},{0,10,4},{0,10,4},{0,6,52},{0,22,677},{0,6,277},{34,14,2},{34,14,2},{34,14,2},{34,8,5},{22,2,338},{0,10,4},{0,10,4},{0,6,52},{42,2,338},{0,6,52},{28,0,421},{34,14,5},{36,10,5},{0,10,4},{28,0,421},{7,0,421},{0,10,4},{0,24,425},{7,0,421},{0,24,425},{2,0,400}, +{2,0,400},{2,0,400},{2,0,400},{0,26,0},{0,26,0},{0,26,0},{0,36,1},{0,4,149},{0,4,149},{18,9,842},{18,47,462},{34,12,661},{18,26,450},{16,25,1521},{16,14,441},{16,42,15},{16,24,653},{0,26,2030},{0,24,882},{20,13,425},{20,30,9},{20,42,54},{4,26,77},{26,2,1514},{0,14,422},{16,42,11},{0,24,626},{38,8,1514},{0,24,626},{18,45,421},{18,45,421},{18,45,421}, +{18,10,422},{16,15,344},{16,42,14},{16,42,14},{16,38,38},{0,24,602},{0,38,162},{20,14,4},{20,14,4},{20,14,4},{20,40,5},{8,2,338},{32,26,2},{32,26,2},{0,38,18},{30,2,338},{0,38,18},{14,0,421},{4,30,5},{6,42,1},{0,42,1},{14,0,421},{19,0,421},{0,42,1},{0,40,421},{19,0,421},{0,40,421},{18,0,421},{18,0,421},{18,0,421},{18,0,421},{16,12,5}, +{16,12,5},{16,12,5},{16,6,8},{0,6,80},{0,6,80},{4,9,854},{4,31,458},{4,28,674},{34,12,453},{2,25,1515},{32,30,445},{2,12,9},{32,10,657},{0,28,1850},{0,10,666},{36,27,422},{36,46,6},{6,28,48},{36,12,86},{16,7,1514},{16,46,421},{2,12,8},{0,10,545},{21,0,1514},{0,10,545},{34,13,433},{34,13,433},{34,13,433},{34,42,433},{2,15,339},{2,12,5},{2,12,5}, +{32,8,50},{0,10,477},{0,8,62},{36,46,2},{36,46,2},{36,46,2},{36,10,5},{24,4,338},{2,12,4},{2,12,4},{0,24,9},{11,0,338},{0,24,9},{0,1,421},{36,46,5},{38,12,5},{2,12,4},{0,1,421},{1,0,421},{2,12,4},{0,26,425},{1,0,421},{0,26,425},{4,0,433},{4,0,433},{4,0,433},{4,0,433},{2,28,1},{2,28,1},{2,28,1},{2,38,2},{0,8,37}, +{0,8,37},{20,7,842},{20,45,455},{36,14,655},{20,28,450},{34,9,1523},{18,47,450},{34,44,13},{18,42,659},{0,14,1731},{0,42,542},{22,11,422},{22,47,5},{22,44,50},{6,28,86},{30,0,1514},{2,47,421},{34,44,4},{0,42,506},{33,0,1514},{0,42,506},{20,43,421},{20,43,421},{20,43,421},{20,12,425},{18,13,347},{34,28,11},{34,28,11},{18,40,43},{0,12,389},{0,10,9},{22,47,1}, +{22,47,1},{22,47,1},{22,42,1},{38,10,338},{34,28,2},{34,28,2},{0,10,5},{23,0,338},{0,10,5},{2,1,421},{22,47,4},{8,44,0},{18,44,0},{2,1,421},{1,2,421},{18,44,0},{0,42,425},{1,2,421},{0,42,425},{20,0,421},{20,0,421},{20,0,421},{20,0,421},{18,30,9},{18,30,9},{18,30,9},{34,8,10},{0,10,5},{0,10,5},{6,37,846},{6,29,454},{6,46,685}, +{6,14,459},{4,7,1517},{4,31,446},{4,14,19},{34,12,655},{0,47,1635},{0,12,478},{38,9,425},{38,15,6},{8,30,41},{38,14,77},{47,0,1514},{18,15,422},{20,14,11},{0,12,477},{27,6,1514},{0,12,477},{6,27,426},{6,27,426},{6,27,426},{6,44,426},{4,13,340},{4,14,10},{4,14,10},{4,26,50},{0,44,355},{16,26,6},{38,15,5},{38,15,5},{38,15,5},{38,28,5},{28,2,338}, +{20,14,2},{20,14,2},{32,26,1},{42,8,338},{32,26,1},{29,0,421},{38,15,2},{40,14,10},{18,30,9},{29,0,421},{33,4,421},{18,30,9},{0,28,425},{33,4,421},{0,28,425},{6,0,425},{6,0,425},{6,0,425},{6,0,425},{4,30,1},{4,30,1},{4,30,1},{4,40,0},{16,26,5},{16,26,5},{22,5,842},{22,43,455},{38,47,679},{22,30,455},{20,21,1521},{20,15,438},{36,46,13}, +{20,28,653},{0,15,1553},{16,44,454},{24,9,422},{24,45,5},{24,46,50},{8,30,77},{34,3,1514},{4,45,422},{36,46,4},{0,44,434},{35,4,1514},{0,44,434},{22,41,421},{22,41,421},{22,41,421},{22,14,422},{20,11,342},{36,30,11},{36,30,11},{20,42,38},{0,46,340},{32,12,5},{24,45,1},{24,45,1},{24,45,1},{24,44,1},{16,3,338},{36,30,2},{36,30,2},{32,12,4},{17,0,338}, +{32,12,4},{43,0,421},{24,45,4},{10,46,0},{4,46,1},{43,0,421},{19,6,421},{4,46,1},{0,44,425},{19,6,421},{0,44,425},{22,0,421},{22,0,421},{22,0,421},{22,0,421},{20,31,5},{20,31,5},{20,31,5},{20,26,5},{32,12,1},{32,12,1},{8,5,854},{8,27,458},{24,31,670},{38,47,462},{6,5,1517},{6,29,446},{6,47,6},{36,14,655},{0,29,1530},{32,30,465},{40,23,422}, +{40,13,2},{10,31,48},{40,46,90},{45,2,1514},{20,13,425},{6,47,6},{0,30,434},{25,8,1514},{0,30,434},{8,25,434},{8,25,434},{8,25,434},{8,30,437},{6,11,340},{6,47,5},{6,47,5},{6,12,52},{16,47,342},{18,28,1},{40,13,1},{40,13,1},{40,13,1},{40,14,5},{30,4,338},{22,46,4},{22,46,4},{18,28,1},{15,8,338},{18,28,1},{11,0,421},{40,13,1},{26,47,5}, +{6,47,2},{11,0,421},{5,8,421},{6,47,2},{0,30,425},{5,8,421},{0,30,425},{8,0,433},{8,0,433},{8,0,433},{8,0,433},{6,31,0},{6,31,0},{6,31,0},{6,42,0},{18,28,1},{18,28,1},{24,33,846},{24,41,465},{40,15,658},{24,31,454},{38,5,1518},{22,13,446},{38,15,14},{22,46,667},{0,27,1518},{18,46,453},{26,7,421},{26,27,2},{42,15,53},{10,31,86},{27,0,1514}, +{6,43,425},{38,15,10},{16,46,434},{37,8,1514},{16,46,434},{24,23,426},{24,23,426},{24,23,426},{24,47,426},{38,25,344},{38,31,9},{38,31,9},{22,44,41},{18,31,341},{4,14,2},{26,27,1},{26,27,1},{26,27,1},{26,46,1},{45,0,338},{8,31,5},{8,31,5},{4,14,1},{27,8,338},{4,14,1},{8,1,421},{26,27,2},{12,15,1},{6,15,2},{8,1,421},{1,8,421},{6,15,2}, +{0,46,433},{1,8,421},{0,46,433},{24,0,425},{24,0,425},{24,0,425},{24,0,425},{38,15,4},{38,15,4},{38,15,4},{38,12,8},{4,14,1},{4,14,1},{10,33,846},{10,25,454},{26,29,658},{10,45,454},{8,3,1517},{8,27,446},{8,45,8},{38,47,650},{32,11,1518},{4,47,462},{42,5,425},{42,11,9},{12,29,43},{42,15,89},{28,13,1514},{22,11,422},{8,45,8},{16,31,426},{27,12,1514}, +{16,31,426},{10,23,425},{10,23,425},{10,23,425},{10,15,429},{8,9,340},{8,45,4},{8,45,4},{8,30,50},{34,45,341},{20,30,6},{42,11,5},{42,11,5},{42,11,5},{42,31,8},{29,2,338},{8,45,4},{8,45,4},{36,30,1},{35,6,338},{36,30,1},{23,0,421},{12,11,4},{44,45,5},{8,45,4},{23,0,421},{18,9,421},{8,45,4},{0,31,425},{18,9,421},{0,31,425},{10,0,425}, +{10,0,425},{10,0,425},{10,0,425},{8,13,1},{8,13,1},{8,13,1},{8,44,0},{20,30,5},{20,30,5},{26,1,846},{26,39,470},{42,13,658},{26,29,454},{40,3,1523},{24,11,446},{24,13,20},{24,31,658},{2,25,1521},{20,15,458},{28,5,422},{28,25,6},{28,13,51},{12,29,86},{40,3,1514},{8,41,421},{24,13,11},{18,15,421},{16,9,1514},{18,15,421},{26,21,426},{26,21,426},{26,21,426}, +{26,45,426},{24,7,347},{40,29,11},{40,29,11},{24,46,41},{20,29,342},{36,47,10},{28,41,1},{28,41,1},{28,41,1},{28,15,2},{43,2,338},{40,29,2},{40,29,2},{36,47,9},{21,8,338},{36,47,9},{37,0,421},{12,25,5},{14,13,1},{8,13,2},{37,0,421},{32,7,421},{8,13,2},{0,15,421},{32,7,421},{0,15,421},{26,0,425},{26,0,425},{26,0,425},{26,0,425},{24,27,9}, +{24,27,9},{24,27,9},{40,14,10},{36,47,1},{36,47,1},{12,1,850},{12,23,454},{12,27,666},{12,43,454},{10,1,1517},{10,25,446},{10,43,8},{40,45,666},{34,9,1515},{6,45,462},{44,3,425},{44,9,9},{14,27,50},{44,13,89},{39,2,1514},{24,9,422},{10,43,8},{18,29,426},{34,9,1514},{18,29,426},{12,21,425},{12,21,425},{12,21,425},{12,13,429},{10,7,340},{10,43,4},{10,43,4}, +{10,47,50},{36,43,341},{22,31,1},{44,9,5},{44,9,5},{44,9,5},{44,29,8},{38,1,338},{26,13,4},{26,13,4},{22,31,1},{43,28,338},{22,31,1},{5,0,421},{14,9,4},{46,43,5},{10,43,4},{5,0,421},{0,5,421},{10,43,4},{0,29,425},{0,5,421},{0,29,425},{12,0,425},{12,0,425},{12,0,425},{12,0,425},{10,27,0},{10,27,0},{10,27,0},{10,46,0},{22,31,1}, +{22,31,1},{44,1,890},{44,7,462},{44,41,666},{28,27,454},{42,1,1518},{26,39,446},{42,11,8},{26,13,666},{4,23,1523},{22,13,454},{30,33,421},{30,23,1},{46,11,50},{30,27,89},{21,0,1514},{40,23,425},{42,11,4},{20,13,425},{18,7,1514},{20,13,425},{28,19,426},{28,19,426},{28,19,426},{28,27,429},{42,21,344},{42,11,8},{42,11,8},{26,15,50},{6,11,341},{8,45,9},{30,23,1}, +{30,23,1},{30,23,1},{30,13,0},{39,0,338},{42,11,4},{42,11,4},{8,45,5},{9,28,338},{8,45,5},{14,1,421},{30,23,1},{47,11,0},{26,11,0},{14,1,421},{4,5,421},{26,11,0},{0,13,425},{4,5,421},{0,13,425},{28,0,425},{28,0,425},{28,0,425},{28,0,425},{42,11,4},{42,11,4},{42,11,4},{42,47,5},{8,15,4},{8,15,4},{30,1,950},{14,21,458},{30,25,658}, +{14,41,446},{28,1,1542},{12,23,438},{12,25,20},{12,43,658},{36,7,1518},{38,27,470},{46,1,430},{46,37,10},{47,25,41},{46,41,82},{14,3,1514},{26,7,422},{28,41,11},{20,27,426},{32,5,1514},{20,27,426},{14,19,421},{14,19,421},{14,19,421},{14,11,422},{12,35,339},{12,25,11},{12,25,11},{12,29,51},{38,41,338},{24,29,6},{46,37,9},{46,37,9},{46,37,9},{46,27,10},{23,2,338}, +{28,41,2},{28,41,2},{40,29,1},{20,9,338},{40,29,1},{17,0,421},{46,37,1},{31,25,10},{26,25,9},{17,0,421},{18,3,421},{26,25,9},{0,27,425},{18,3,421},{0,27,425},{14,0,421},{14,0,421},{14,0,421},{14,0,421},{12,9,2},{12,9,2},{12,9,2},{12,15,1},{24,13,5},{24,13,5},{46,1,1010},{46,5,462},{46,39,650},{30,25,454},{14,1,1575},{28,7,446},{44,9,8}, +{28,27,658},{6,21,1523},{24,11,454},{31,1,422},{31,21,6},{31,9,50},{47,25,77},{46,3,1514},{42,21,425},{44,9,4},{22,11,425},{16,3,1514},{22,11,425},{30,17,426},{30,17,426},{30,17,426},{30,25,429},{44,19,344},{44,9,8},{44,9,8},{28,13,43},{8,9,340},{10,43,9},{31,37,1},{31,37,1},{31,37,1},{31,11,2},{37,2,338},{44,9,4},{44,9,4},{10,43,5},{34,7,338}, +{10,43,5},{31,1,421},{31,21,5},{45,9,0},{12,9,1},{31,1,421},{32,1,421},{12,9,1},{0,11,425},{32,1,421},{0,11,425},{30,0,425},{30,0,425},{30,0,425},{30,0,425},{44,9,4},{44,9,4},{44,9,4},{44,45,5},{10,13,4},{10,13,4},{31,1,1098},{47,19,453},{47,23,667},{47,39,473},{46,1,1641},{14,21,438},{14,39,14},{14,41,658},{38,5,1518},{40,25,465},{45,1,437}, +{15,5,2},{45,23,41},{15,39,86},{31,3,1514},{28,5,422},{30,39,9},{22,25,426},{34,3,1514},{22,25,426},{47,17,434},{47,17,434},{47,17,434},{47,9,437},{14,33,339},{14,39,10},{14,39,10},{14,43,53},{40,39,341},{26,27,2},{15,5,1},{15,5,1},{15,5,1},{15,25,5},{44,1,338},{30,9,5},{30,9,5},{26,27,1},{26,9,338},{26,27,1},{45,1,421},{15,5,1},{13,39,8}, +{14,39,4},{45,1,421},{20,1,421},{14,39,4},{0,25,425},{20,1,421},{0,25,425},{47,0,433},{47,0,433},{47,0,433},{47,0,433},{14,7,2},{14,7,2},{14,7,2},{14,13,1},{26,27,2},{26,27,2},{45,1,1202},{31,33,465},{15,37,655},{31,7,461},{47,1,1725},{30,35,446},{46,7,6},{30,25,670},{24,19,1518},{26,9,458},{13,1,469},{29,19,1},{13,7,52},{29,23,85},{15,1,1514}, +{14,35,426},{46,7,5},{24,9,434},{9,11,1514},{24,9,434},{31,1,434},{31,1,434},{31,1,434},{31,39,426},{46,17,339},{46,7,6},{46,7,6},{30,11,48},{10,7,341},{12,41,2},{29,19,1},{29,19,1},{29,19,1},{29,9,0},{31,5,338},{47,23,4},{47,23,4},{12,41,1},{14,9,338},{12,41,1},{43,1,421},{29,19,1},{43,7,0},{30,7,0},{43,1,421},{8,1,421},{30,7,0}, +{0,9,433},{8,1,421},{0,9,433},{31,0,425},{31,0,425},{31,0,425},{31,0,425},{46,7,2},{46,7,2},{46,7,2},{46,27,5},{12,41,1},{12,41,1},{29,1,1346},{45,17,454},{29,21,653},{45,37,459},{45,1,1818},{47,19,446},{47,37,13},{46,39,679},{10,3,1518},{42,23,455},{27,1,526},{13,33,5},{43,21,38},{13,7,93},{27,5,1514},{30,3,425},{31,37,11},{40,23,421},{6,1,1514}, +{40,23,421},{45,1,434},{45,1,434},{45,1,434},{45,7,429},{47,1,340},{47,37,4},{47,37,4},{47,25,50},{42,37,338},{44,25,5},{13,33,4},{13,33,4},{13,33,4},{13,23,5},{17,2,338},{31,37,2},{31,37,2},{44,25,1},{16,1,338},{44,25,1},{11,1,421},{13,33,1},{27,21,5},{30,21,5},{11,1,421},{26,1,421},{30,21,5},{0,23,421},{26,1,421},{0,23,421},{45,0,425}, +{45,0,425},{45,0,425},{45,0,425},{47,5,1},{47,5,1},{47,5,1},{47,11,0},{44,25,4},{44,25,4},{43,1,1502},{13,1,478},{13,35,655},{29,5,461},{29,1,1926},{31,3,446},{15,5,19},{47,7,685},{26,17,1518},{28,7,454},{11,1,569},{27,17,6},{27,5,50},{43,21,97},{17,10,1514},{47,33,425},{15,5,10},{26,7,426},{24,1,1514},{26,7,426},{13,1,477},{13,1,477},{13,1,477}, +{29,37,426},{15,1,355},{15,21,11},{15,21,11},{31,9,41},{12,5,341},{14,39,6},{27,33,1},{27,33,1},{27,33,1},{27,7,2},{29,3,338},{15,21,2},{15,21,2},{14,39,5},{43,9,338},{14,39,5},{25,1,421},{43,1,5},{41,5,0},{31,5,1},{25,1,421},{14,1,421},{31,5,1},{0,7,425},{14,1,421},{0,7,425},{29,0,425},{29,0,425},{29,0,425},{29,0,425},{31,19,9}, +{31,19,9},{31,19,9},{31,25,10},{14,39,2},{14,39,2},{11,1,1634},{43,1,542},{43,19,659},{43,35,459},{43,1,2070},{45,17,446},{45,35,13},{15,37,655},{12,1,1518},{44,21,455},{25,1,646},{11,1,9},{41,19,43},{11,5,93},{25,3,1514},{31,1,422},{29,35,11},{42,21,421},{12,1,1514},{42,21,421},{43,1,506},{43,1,506},{43,1,506},{43,5,429},{29,1,381},{45,35,4},{45,35,4}, +{45,23,50},{44,35,338},{46,23,5},{11,1,5},{11,1,5},{11,1,5},{11,21,5},{39,11,338},{29,35,2},{29,35,2},{46,23,1},{22,1,338},{46,23,1},{39,1,421},{11,1,5},{25,19,10},{31,19,9},{39,1,421},{31,1,421},{31,19,9},{0,21,421},{31,1,421},{0,21,421},{43,0,425},{43,0,425},{43,0,425},{43,0,425},{45,19,0},{45,19,0},{45,19,0},{45,9,0},{46,23,4}, +{46,23,4},{25,1,1874},{11,1,666},{11,33,657},{27,3,451},{11,1,2201},{13,1,454},{13,3,9},{29,5,674},{14,1,1557},{30,5,458},{39,1,722},{9,1,62},{9,33,50},{25,19,85},{9,1,1514},{29,1,446},{13,3,5},{44,5,433},{46,1,1514},{44,5,433},{11,1,545},{11,1,545},{11,1,545},{27,19,429},{43,1,437},{13,3,8},{13,3,8},{29,7,48},{30,19,341},{47,37,6},{25,1,9}, +{25,1,9},{25,1,9},{25,5,0},{25,5,338},{13,3,4},{13,3,4},{47,37,2},{10,1,338},{47,37,2},{37,1,421},{9,1,37},{39,3,2},{29,3,1},{37,1,421},{43,1,421},{29,3,1},{0,5,433},{43,1,421},{0,5,433},{27,0,425},{27,0,425},{27,0,425},{27,0,425},{13,3,4},{13,3,4},{13,3,4},{13,39,5},{46,21,5},{46,21,5},{9,1,2030},{25,1,882},{25,17,653}, +{41,33,453},{9,1,2382},{27,1,546},{43,17,15},{13,35,661},{47,1,1653},{46,19,462},{7,1,837},{39,1,162},{39,17,38},{9,33,89},{19,9,1514},{27,1,521},{43,17,14},{44,19,421},{45,1,1514},{44,19,421},{25,1,626},{25,1,626},{25,1,626},{41,3,422},{11,1,488},{43,17,11},{43,17,11},{43,21,54},{46,33,340},{31,21,9},{39,1,18},{39,1,18},{39,1,18},{9,19,10},{9,3,338}, +{27,33,2},{27,33,2},{15,21,4},{31,3,338},{15,21,4},{5,1,421},{7,1,80},{7,17,8},{13,17,5},{5,1,421},{9,1,421},{13,17,5},{0,19,421},{9,1,421},{0,19,421},{41,0,421},{41,0,421},{41,0,421},{41,0,421},{43,1,1},{43,1,1},{43,1,1},{43,7,1},{31,5,5},{31,5,5},{23,1,2201},{39,1,1083},{9,1,670},{25,1,450},{39,1,2443},{11,1,684},{11,1,8}, +{27,3,634},{29,1,1751},{31,3,429},{21,1,890},{7,1,277},{7,1,52},{23,17,74},{21,17,1459},{25,1,603},{11,1,4},{30,3,401},{17,21,1459},{30,3,401},{9,1,670},{9,1,670},{9,1,670},{25,17,429},{25,1,579},{11,1,8},{11,1,8},{27,5,50},{47,1,340},{15,35,6},{7,1,52},{7,1,52},{7,1,52},{23,3,0},{23,3,338},{11,1,4},{11,1,4},{15,35,2},{43,3,338}, +{15,35,2},{19,1,392},{5,1,149},{37,1,1},{27,1,0},{19,1,392},{37,1,392},{27,1,0},{0,3,400},{37,1,392},{0,3,400},{25,0,425},{25,0,425},{25,0,425},{25,0,425},{11,1,4},{11,1,4},{11,1,4},{11,37,5},{15,35,5},{15,35,5},{7,1,1901},{23,1,1050},{23,1,761},{39,1,426},{7,1,2093},{9,1,551},{25,1,35},{11,33,382},{13,1,1407},{45,33,219},{5,1,638}, +{21,1,233},{21,1,89},{7,1,29},{17,39,1064},{23,1,398},{9,1,10},{31,33,202},{39,17,1064},{31,33,202},{23,1,761},{23,1,761},{23,1,761},{39,1,426},{9,1,638},{25,1,35},{25,1,35},{41,19,51},{45,1,355},{29,19,6},{21,1,89},{21,1,89},{21,1,89},{7,17,5},{3,25,338},{9,1,10},{9,1,10},{13,19,1},{29,1,338},{13,19,1},{3,1,200},{35,1,68},{5,1,1}, +{9,1,1},{3,1,200},{5,1,200},{9,1,1},{0,33,202},{5,1,200},{0,33,202},{39,0,425},{39,0,425},{39,0,425},{39,0,425},{25,1,26},{25,1,26},{25,1,26},{41,5,1},{29,3,5},{29,3,5},{21,1,1606},{37,1,1085},{7,1,886},{7,1,461},{21,1,1749},{23,1,470},{39,1,117},{25,33,186},{11,1,1166},{13,17,66},{19,1,461},{35,1,213},{5,1,125},{21,1,2},{3,3,722}, +{7,1,266},{23,1,37},{29,17,61},{3,3,722},{29,17,61},{7,1,886},{7,1,886},{7,1,886},{7,1,461},{23,1,770},{39,1,117},{39,1,117},{9,3,51},{43,1,426},{43,33,1},{5,1,125},{5,1,125},{5,1,125},{21,1,2},{19,5,338},{23,1,37},{23,1,37},{43,33,0},{37,3,338},{43,33,0},{1,3,61},{33,1,25},{3,1,1},{5,1,1},{1,3,61},{3,1,61},{5,1,1}, +{0,17,61},{3,1,61},{0,17,61},{23,0,433},{23,0,433},{23,0,433},{23,0,433},{9,1,37},{9,1,37},{9,1,37},{9,19,5},{43,33,1},{43,33,1},{5,1,1450},{5,1,1109},{21,1,965},{37,1,542},{5,1,1505},{7,1,478},{7,1,253},{39,17,79},{9,1,1054},{27,1,10},{3,1,318},{19,1,221},{19,1,185},{35,1,29},{33,33,509},{35,1,210},{21,1,97},{27,1,10},{33,33,509}, +{27,1,10},{21,1,965},{21,1,965},{21,1,965},{37,1,542},{21,1,900},{7,1,253},{7,1,253},{39,17,54},{41,1,549},{11,17,9},{19,1,185},{19,1,185},{19,1,185},{35,1,29},{3,3,338},{21,1,97},{21,1,97},{11,17,5},{3,3,338},{11,17,5},{1,17,5},{17,1,1},{17,1,0},{33,1,0},{1,17,5},{17,1,5},{33,1,0},{0,1,9},{17,1,5},{0,1,9},{37,0,421}, +{37,0,421},{37,0,421},{37,0,421},{23,1,65},{23,1,65},{23,1,65},{39,3,2},{27,1,1},{27,1,1},{35,1,1213},{35,1,953},{5,1,886},{5,1,545},{35,1,1186},{21,1,448},{21,1,304},{23,1,32},{23,1,790},{25,1,13},{33,1,198},{3,1,150},{3,1,125},{19,1,36},{33,1,294},{19,1,121},{35,1,73},{9,1,0},{1,33,294},{9,1,0},{5,1,886},{5,1,886},{5,1,886}, +{5,1,545},{5,1,765},{21,1,304},{21,1,304},{23,1,32},{39,1,486},{25,1,13},{3,1,125},{3,1,125},{3,1,125},{19,1,36},{3,1,221},{35,1,73},{35,1,73},{9,1,0},{5,1,221},{9,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{21,0,433},{21,0,433},{21,0,433},{21,0,433},{37,1,113}, +{37,1,113},{37,1,113},{7,17,8},{25,1,13},{25,1,13},{3,1,885},{19,1,737},{19,1,701},{35,1,485},{19,1,834},{5,1,364},{5,1,264},{37,1,2},{7,1,554},{39,1,45},{17,1,82},{33,1,62},{33,1,53},{33,1,13},{1,3,114},{33,1,54},{3,1,34},{21,1,1},{3,1,114},{21,1,1},{19,1,701},{19,1,701},{19,1,701},{35,1,485},{35,1,574},{5,1,264},{5,1,264}, +{37,1,2},{7,1,329},{39,1,45},{33,1,53},{33,1,53},{33,1,53},{33,1,13},{33,1,85},{3,1,34},{3,1,34},{21,1,1},{19,1,85},{21,1,1},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{35,0,421},{35,0,421},{35,0,421},{35,0,421},{21,1,160},{21,1,160},{21,1,160},{37,1,2},{39,1,45}, +{39,1,45},{0,30,882},{0,10,100},{0,38,2},{0,6,340},{0,10,1896},{0,22,1189},{0,6,565},{0,4,1421},{0,20,2043},{0,4,1521},{0,30,882},{0,10,100},{0,38,2},{0,6,340},{4,2,1896},{0,22,1189},{0,6,565},{0,4,1421},{10,0,1896},{0,4,1421},{0,22,1},{0,22,1},{0,22,1},{0,34,0},{0,4,164},{0,18,52},{0,18,52},{0,32,98},{0,32,179},{0,32,107},{0,22,1}, +{0,22,1},{0,22,1},{0,34,0},{32,16,162},{0,18,52},{0,18,52},{0,32,98},{16,32,162},{0,32,98},{38,0,882},{0,10,100},{0,38,2},{0,6,340},{38,0,882},{30,0,882},{0,6,340},{0,20,884},{30,0,882},{0,20,884},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,15,884},{0,12,34},{0,24,13}, +{0,38,250},{0,12,2355},{0,8,1355},{0,22,585},{0,20,1669},{0,36,2567},{0,20,1813},{0,15,884},{0,12,34},{0,24,13},{0,38,250},{0,12,2355},{0,8,1355},{0,22,585},{0,20,1669},{12,0,2355},{0,20,1669},{0,10,0},{0,10,0},{0,10,0},{0,20,1},{0,20,340},{0,4,125},{0,4,125},{0,2,200},{0,18,376},{0,2,225},{0,10,0},{0,10,0},{0,10,0},{0,20,1},{16,4,338}, +{0,4,125},{0,4,125},{0,2,200},{4,16,338},{0,2,200},{24,0,882},{0,12,34},{16,24,4},{0,38,250},{24,0,882},{32,8,882},{0,38,250},{0,6,884},{32,8,882},{0,6,884},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,27,882},{0,14,9},{0,40,81},{0,8,202},{0,14,2899},{0,24,1539},{0,8,643}, +{0,6,2004},{0,6,3189},{0,36,2207},{0,27,882},{0,14,9},{16,40,61},{0,8,202},{6,2,2899},{0,24,1539},{0,8,643},{0,6,2004},{14,0,2899},{0,6,2004},{0,28,1},{0,28,1},{0,28,1},{0,6,4},{0,22,578},{0,36,221},{0,36,221},{0,18,365},{0,18,632},{0,18,401},{0,28,1},{0,28,1},{0,28,1},{0,6,4},{32,4,578},{0,36,221},{0,36,221},{0,18,365},{22,0,578}, +{0,18,365},{38,6,882},{0,14,9},{2,40,1},{0,8,202},{38,6,882},{27,0,882},{0,8,202},{0,22,884},{27,0,882},{0,22,884},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{16,25,910},{16,46,31},{16,26,151},{0,40,181},{0,31,3048},{0,26,1416},{0,40,506},{0,22,1944},{0,38,3484},{0,6,2233},{16,25,885}, +{16,46,6},{32,26,78},{16,40,165},{36,6,3048},{0,26,1416},{0,40,506},{0,22,1944},{31,0,3048},{0,22,1944},{16,30,26},{16,30,26},{16,30,26},{16,38,27},{0,24,650},{0,22,157},{0,22,157},{0,4,325},{0,4,766},{0,4,425},{16,30,1},{16,30,1},{16,30,1},{16,38,2},{18,4,648},{0,22,157},{0,22,157},{0,4,325},{36,2,648},{0,4,325},{26,2,882},{0,46,5},{18,26,4}, +{0,40,145},{26,2,882},{38,8,882},{0,40,145},{0,8,890},{38,8,882},{0,8,890},{16,0,26},{16,0,26},{16,0,26},{16,0,26},{0,18,0},{0,18,0},{0,18,0},{0,32,1},{0,16,10},{0,16,10},{16,7,1009},{16,31,134},{32,12,297},{16,26,238},{0,43,3048},{0,28,1224},{0,26,267},{0,8,1764},{0,24,3685},{0,38,2157},{2,9,885},{2,47,5},{18,12,77},{32,26,165},{6,8,3048}, +{0,28,1224},{0,26,267},{0,8,1764},{43,0,3048},{0,8,1764},{16,45,125},{16,45,125},{16,45,125},{16,40,126},{0,28,648},{0,24,80},{0,24,80},{0,36,260},{0,6,875},{0,20,413},{2,46,1},{2,46,1},{2,46,1},{2,24,2},{2,24,648},{0,24,80},{0,24,80},{0,36,260},{28,0,648},{0,36,260},{44,0,882},{2,47,4},{4,42,1},{0,26,98},{44,0,882},{26,8,882},{0,26,98}, +{0,24,890},{26,8,882},{0,24,890},{16,0,125},{16,0,125},{16,0,125},{16,0,125},{0,6,0},{0,6,0},{0,6,0},{0,18,1},{0,18,37},{0,18,37},{32,5,1147},{32,45,282},{2,44,523},{32,12,381},{0,9,3051},{0,14,1110},{0,12,116},{0,24,1658},{0,26,3859},{0,8,2173},{18,23,886},{18,15,6},{34,28,69},{18,42,166},{40,4,3048},{0,14,1110},{0,12,116},{0,24,1658},{22,8,3048}, +{0,24,1658},{32,13,265},{32,13,265},{32,13,265},{32,10,266},{0,46,650},{0,26,26},{0,26,26},{0,22,194},{0,22,987},{0,6,427},{18,15,5},{18,15,5},{18,15,5},{18,40,5},{22,2,648},{0,26,26},{0,26,26},{0,22,194},{42,2,648},{0,22,194},{30,0,882},{18,15,2},{20,28,4},{0,12,52},{30,0,882},{33,0,882},{0,12,52},{0,10,884},{33,0,882},{0,10,884},{32,0,265}, +{32,0,265},{32,0,265},{32,0,265},{0,24,0},{0,24,0},{0,24,0},{0,4,4},{0,34,89},{0,34,89},{2,3,1365},{2,13,510},{18,14,813},{2,28,566},{0,37,3048},{0,46,1013},{0,44,42},{0,10,1509},{0,12,4057},{0,10,2185},{4,7,885},{4,45,10},{20,44,86},{34,28,173},{28,0,3048},{0,46,1013},{0,44,42},{0,10,1509},{37,0,3048},{0,10,1509},{2,11,482},{2,11,482},{2,11,482}, +{2,42,481},{0,29,650},{0,12,4},{0,12,4},{0,38,128},{0,8,1161},{0,22,483},{4,15,2},{4,15,2},{4,15,2},{4,26,2},{8,2,648},{0,12,4},{0,12,4},{0,38,128},{30,2,648},{0,38,128},{47,0,882},{34,45,2},{6,44,1},{0,44,26},{47,0,882},{27,6,882},{0,44,26},{0,26,890},{27,6,882},{0,26,890},{2,0,481},{2,0,481},{2,0,481},{2,0,481},{0,12,0}, +{0,12,0},{0,12,0},{0,6,1},{0,20,180},{0,20,180},{18,17,1647},{18,27,810},{34,46,1178},{2,14,837},{0,19,3048},{0,15,933},{0,14,21},{0,26,1443},{0,28,4329},{0,26,2227},{20,21,885},{20,29,5},{36,30,69},{20,44,182},{42,6,3048},{0,15,933},{0,14,21},{0,26,1443},{19,0,3048},{0,26,1443},{18,25,765},{18,25,765},{18,25,765},{18,28,766},{0,11,648},{0,14,5},{0,14,5}, +{0,24,89},{0,10,1342},{0,8,557},{20,29,1},{20,29,1},{20,29,1},{20,42,2},{24,4,648},{16,44,2},{16,44,2},{0,24,89},{11,0,648},{0,24,89},{34,3,882},{4,13,4},{22,30,4},{0,14,17},{34,3,882},{35,4,882},{0,14,17},{0,12,884},{35,4,882},{0,12,884},{18,0,765},{18,0,765},{18,0,765},{18,0,765},{0,30,0},{0,30,0},{0,30,0},{0,38,0},{0,6,274}, +{0,6,274},{34,1,1782},{34,41,940},{4,31,1356},{34,46,950},{16,1,3052},{16,29,910},{16,46,22},{0,12,1335},{0,46,4231},{0,12,1924},{6,5,882},{6,43,5},{22,47,86},{36,30,174},{32,3,3048},{0,13,900},{32,46,13},{0,12,1299},{39,4,3048},{0,12,1299},{34,23,882},{34,23,882},{34,23,882},{34,14,883},{16,9,651},{16,46,18},{16,46,18},{16,10,74},{0,42,1256},{0,40,434},{6,13,0}, +{6,13,0},{6,13,0},{6,28,1},{38,10,648},{2,30,2},{2,30,2},{0,10,45},{23,0,648},{0,10,45},{4,1,882},{36,27,4},{8,46,4},{0,46,5},{4,1,882},{1,4,882},{0,46,5},{0,28,890},{1,4,882},{0,28,890},{34,0,882},{34,0,882},{34,0,882},{34,0,882},{16,31,1},{16,31,1},{16,31,1},{16,24,1},{0,8,250},{0,8,250},{20,1,1814},{20,25,945},{36,15,1363}, +{4,47,945},{2,1,3052},{32,43,916},{2,47,23},{16,44,1337},{0,31,3975},{0,28,1612},{22,19,886},{22,11,6},{38,31,77},{22,46,185},{2,1,3048},{0,27,883},{2,47,19},{0,44,1188},{1,2,3048},{0,44,1188},{20,7,901},{20,7,901},{20,7,901},{20,30,900},{2,9,654},{2,47,14},{2,47,14},{32,26,70},{0,28,1059},{0,26,236},{22,27,4},{22,27,4},{22,27,4},{22,14,8},{28,2,648}, +{18,46,5},{18,46,5},{0,42,36},{42,8,648},{0,42,36},{27,0,882},{22,11,2},{24,31,4},{0,31,5},{27,0,882},{37,8,882},{0,31,5},{0,14,882},{37,8,882},{0,14,882},{4,0,900},{4,0,900},{4,0,900},{4,0,900},{2,15,5},{2,15,5},{2,15,5},{2,40,4},{0,40,146},{0,40,146},{6,1,1864},{36,39,947},{6,29,1356},{36,31,943},{34,1,3073},{18,27,910},{18,15,21}, +{2,14,1335},{0,45,3751},{0,14,1379},{8,3,885},{8,41,5},{24,15,85},{38,31,182},{29,0,3048},{32,11,884},{34,15,16},{0,30,1146},{37,6,3048},{0,30,1146},{36,21,882},{36,21,882},{36,21,882},{36,47,886},{18,7,649},{18,15,21},{18,15,21},{18,12,74},{0,14,945},{0,12,91},{8,11,2},{8,11,2},{8,11,2},{8,30,2},{16,3,648},{34,31,1},{34,31,1},{0,12,10},{17,0,648}, +{0,12,10},{28,13,882},{38,25,4},{10,15,1},{2,15,1},{28,13,882},{27,12,882},{2,15,1},{0,30,890},{27,12,882},{0,30,890},{36,0,882},{36,0,882},{36,0,882},{36,0,882},{18,29,1},{18,29,1},{18,29,1},{18,26,1},{0,42,68},{0,42,68},{22,1,1944},{22,23,945},{38,13,1363},{6,45,945},{20,1,3115},{34,41,916},{4,45,18},{18,46,1337},{0,13,3580},{0,46,1153},{24,17,886}, +{24,9,6},{40,29,69},{24,15,168},{46,10,3048},{2,25,885},{4,45,14},{0,46,1053},{23,8,3048},{0,46,1053},{22,5,901},{22,5,901},{22,5,901},{6,31,901},{4,7,654},{4,45,9},{4,45,9},{34,28,70},{0,47,825},{0,44,25},{24,25,4},{24,25,4},{24,25,4},{24,46,8},{30,4,648},{20,15,0},{20,15,0},{0,44,0},{15,8,648},{0,44,0},{40,3,882},{24,9,2},{26,29,4}, +{2,29,5},{40,3,882},{16,9,882},{2,29,5},{0,47,890},{16,9,882},{0,47,890},{6,0,900},{6,0,900},{6,0,900},{6,0,900},{4,13,5},{4,13,5},{4,13,5},{4,42,4},{0,28,18},{0,28,18},{8,1,2056},{38,21,956},{8,43,1348},{38,29,949},{6,1,3156},{20,25,910},{20,13,22},{4,47,1318},{0,11,3384},{0,31,1015},{10,1,882},{10,39,5},{26,13,75},{40,29,174},{27,2,3048}, +{34,9,885},{36,13,10},{0,31,990},{39,10,3048},{0,31,990},{38,3,885},{38,3,885},{38,3,885},{38,45,885},{20,5,649},{20,13,21},{20,13,21},{20,14,66},{0,15,729},{0,30,5},{10,9,0},{10,9,0},{10,9,0},{10,31,1},{45,0,648},{6,29,2},{6,29,2},{16,30,4},{27,8,648},{16,30,4},{10,1,882},{40,23,1},{12,13,1},{4,13,1},{10,1,882},{4,9,882},{4,13,1}, +{0,31,890},{4,9,882},{0,31,890},{38,0,884},{38,0,884},{38,0,884},{38,0,884},{20,27,1},{20,27,1},{20,27,1},{20,28,1},{0,30,1},{0,30,1},{40,1,2134},{24,21,943},{40,11,1348},{8,43,952},{38,1,3240},{36,39,914},{6,27,22},{36,15,1330},{0,25,3244},{0,15,951},{42,1,891},{26,7,10},{42,27,77},{26,13,171},{8,1,3048},{4,23,886},{6,27,21},{0,15,950},{1,8,3048}, +{0,15,950},{24,3,890},{24,3,890},{24,3,890},{24,29,890},{6,35,649},{6,43,10},{6,43,10},{36,30,77},{0,13,675},{32,46,6},{26,7,10},{26,7,10},{26,7,10},{26,15,10},{29,2,648},{22,13,2},{22,13,2},{2,46,2},{35,6,648},{2,46,2},{21,0,882},{26,7,1},{28,27,4},{4,27,5},{21,0,882},{18,7,882},{4,27,5},{0,45,884},{18,7,882},{0,45,884},{24,0,890}, +{24,0,890},{24,0,890},{24,0,890},{6,11,1},{6,11,1},{6,11,1},{6,44,2},{16,47,2},{16,47,2},{26,1,2252},{40,35,940},{10,41,1348},{40,27,954},{24,1,3321},{22,23,910},{22,11,22},{6,45,1318},{0,39,3156},{0,29,940},{28,1,920},{12,37,5},{28,41,77},{12,27,180},{23,0,3048},{36,7,884},{38,11,10},{0,29,915},{18,9,3048},{0,29,915},{40,17,882},{40,17,882},{40,17,882}, +{40,43,883},{22,3,649},{38,27,20},{38,27,20},{22,47,86},{0,27,656},{2,31,9},{12,7,0},{12,7,0},{12,7,0},{12,29,1},{43,2,648},{38,27,4},{38,27,4},{34,47,4},{21,8,648},{34,47,4},{14,3,882},{42,21,1},{14,11,1},{6,11,1},{14,3,882},{32,5,882},{6,11,1},{0,29,890},{32,5,882},{0,29,890},{40,0,882},{40,0,882},{40,0,882},{40,0,882},{22,25,1}, +{22,25,1},{22,25,1},{22,30,2},{2,31,0},{2,31,0},{42,1,2404},{26,19,935},{42,9,1348},{10,41,952},{40,1,3409},{38,37,914},{8,41,30},{38,29,1354},{0,37,3087},{2,13,951},{14,1,954},{28,5,6},{44,25,77},{28,11,185},{9,6,3048},{6,21,886},{24,41,21},{0,43,900},{32,7,3048},{0,43,900},{26,1,891},{26,1,891},{26,1,891},{26,27,891},{8,3,654},{8,41,14},{8,41,14}, +{38,31,77},{16,41,651},{34,15,3},{28,5,5},{28,5,5},{28,5,5},{28,13,5},{38,1,648},{24,11,2},{24,11,2},{4,15,2},{43,28,648},{4,15,2},{46,3,882},{28,5,2},{30,25,4},{6,25,5},{46,3,882},{16,3,882},{6,25,5},{0,43,884},{16,3,882},{0,43,884},{26,0,890},{26,0,890},{26,0,890},{26,0,890},{8,9,5},{8,9,5},{8,9,5},{8,46,4},{34,15,2}, +{34,15,2},{28,1,2612},{12,3,951},{28,39,1354},{42,9,952},{26,1,3544},{24,21,910},{40,9,30},{8,43,1348},{0,35,3060},{18,27,935},{30,1,1005},{14,35,3},{30,39,77},{44,25,185},{41,12,3048},{38,35,890},{40,9,14},{0,27,891},{16,5,3048},{0,27,891},{42,1,900},{42,1,900},{42,1,900},{42,41,885},{24,1,654},{40,25,21},{40,25,21},{24,45,77},{2,9,651},{4,29,6},{14,5,2}, +{14,5,2},{14,5,2},{14,27,2},{39,0,648},{10,25,2},{10,25,2},{4,29,5},{9,28,648},{4,29,5},{1,0,882},{44,19,2},{47,9,4},{8,9,5},{1,0,882},{0,1,882},{8,9,5},{0,27,890},{0,1,882},{0,27,890},{42,0,884},{42,0,884},{42,0,884},{42,0,884},{24,7,5},{24,7,5},{24,7,5},{24,31,4},{4,29,2},{4,29,2},{14,1,2774},{28,1,940},{44,7,1318}, +{12,39,966},{12,1,3700},{40,35,916},{10,23,22},{40,11,1348},{16,19,3051},{34,41,940},{47,1,1061},{30,3,9},{46,23,86},{30,9,190},{14,1,3048},{8,19,886},{26,39,20},{16,41,882},{4,5,3048},{16,41,882},{28,1,915},{28,1,915},{28,1,915},{28,25,890},{10,1,657},{10,39,10},{10,39,10},{40,29,77},{18,39,654},{36,13,5},{46,35,4},{46,35,4},{46,35,4},{46,11,4},{23,2,648}, +{42,9,4},{42,9,4},{6,13,0},{20,9,648},{6,13,0},{15,1,882},{30,3,0},{31,23,2},{24,23,1},{15,1,882},{9,11,882},{24,23,1},{0,41,882},{9,11,882},{0,41,882},{28,0,890},{28,0,890},{28,0,890},{28,0,890},{10,7,1},{10,7,1},{10,7,1},{10,15,1},{20,43,1},{20,43,1},{46,1,3014},{14,1,951},{14,37,1330},{44,23,958},{14,1,3865},{26,19,913},{26,7,22}, +{10,41,1348},{32,33,3049},{20,25,943},{15,1,1154},{47,33,6},{31,37,77},{46,23,185},{17,0,3048},{40,3,885},{42,7,10},{2,25,890},{18,3,3048},{2,25,890},{14,1,950},{14,1,950},{14,1,950},{44,39,885},{42,1,672},{26,7,21},{26,7,21},{26,43,77},{34,7,652},{6,27,10},{47,3,2},{47,3,2},{47,3,2},{47,25,2},{37,2,648},{12,23,2},{12,23,2},{6,27,10},{34,7,648}, +{6,27,10},{27,5,882},{46,17,2},{45,7,2},{10,7,1},{27,5,882},{6,1,882},{10,7,1},{0,25,890},{6,1,882},{0,25,890},{44,0,884},{44,0,884},{44,0,884},{44,0,884},{26,5,5},{26,5,5},{26,5,5},{26,29,4},{6,27,1},{6,27,1},{47,1,3214},{30,1,1015},{46,5,1318},{14,37,966},{30,1,4009},{42,33,916},{12,21,22},{42,9,1348},{18,17,3051},{20,39,956},{29,1,1240}, +{31,1,5},{15,21,66},{31,7,185},{27,9,3048},{10,17,886},{12,21,21},{2,39,885},{14,7,3048},{2,39,885},{30,1,990},{30,1,990},{30,1,990},{30,23,890},{28,1,715},{12,37,10},{12,37,10},{12,27,75},{20,37,651},{38,11,5},{31,17,4},{31,17,4},{31,17,4},{31,9,8},{44,1,648},{28,7,2},{28,7,2},{8,11,0},{26,9,648},{8,11,0},{17,10,882},{31,1,1},{29,21,1}, +{26,21,1},{17,10,882},{24,1,882},{26,21,1},{0,39,884},{24,1,882},{0,39,884},{30,0,890},{30,0,890},{30,0,890},{30,0,890},{12,5,1},{12,5,1},{12,5,1},{12,13,1},{22,41,1},{22,41,1},{15,1,3526},{47,1,1153},{47,19,1337},{46,5,958},{47,1,4231},{28,17,910},{44,5,18},{12,39,1363},{20,1,3067},{22,23,945},{43,1,1380},{45,1,25},{29,35,70},{45,21,185},{13,3,3048}, +{12,1,888},{44,5,9},{18,7,901},{44,5,3048},{18,7,901},{47,1,1053},{47,1,1053},{47,1,1053},{46,37,890},{14,1,762},{44,5,14},{44,5,14},{28,41,69},{6,5,650},{8,25,6},{45,1,0},{45,1,0},{45,1,0},{45,23,0},{31,5,648},{14,21,0},{14,21,0},{24,25,4},{14,9,648},{24,25,4},{41,1,882},{29,1,18},{43,5,4},{12,5,5},{41,1,882},{47,3,882},{12,5,5}, +{0,7,900},{47,3,882},{0,7,900},{46,0,890},{46,0,890},{46,0,890},{46,0,890},{28,3,5},{28,3,5},{28,3,5},{28,27,4},{8,25,2},{8,25,2},{29,1,3764},{15,1,1379},{15,3,1335},{47,35,950},{15,1,4477},{14,1,925},{14,19,21},{28,7,1356},{22,1,3145},{38,37,947},{11,1,1485},{13,1,91},{13,19,74},{29,35,178},{43,1,3048},{14,1,925},{14,19,21},{20,37,882},{8,1,3048}, +{20,37,882},{31,1,1146},{31,1,1146},{31,1,1146},{31,21,890},{46,1,841},{14,35,16},{14,35,16},{14,25,85},{38,35,649},{40,9,5},{13,1,10},{13,1,10},{13,1,10},{13,7,9},{17,2,648},{30,35,1},{30,35,1},{10,9,2},{16,1,648},{10,9,2},{9,1,882},{43,1,68},{27,19,1},{28,19,1},{9,1,882},{46,1,882},{28,19,1},{0,37,882},{46,1,882},{0,37,882},{31,0,890}, +{31,0,890},{31,0,890},{31,0,890},{14,3,1},{14,3,1},{14,3,1},{14,11,1},{40,9,4},{40,9,4},{13,1,4076},{29,1,1612},{45,17,1337},{15,3,950},{29,1,4684},{46,1,1035},{46,3,23},{14,37,1363},{24,1,3256},{24,21,945},{41,1,1650},{27,1,236},{27,33,70},{13,19,174},{11,1,3048},{47,1,1013},{46,3,14},{20,5,901},{5,9,3048},{20,5,901},{45,1,1188},{45,1,1188},{45,1,1188}, +{15,35,883},{47,1,910},{46,3,19},{46,3,19},{30,39,77},{8,3,651},{10,23,6},{43,1,36},{43,1,36},{43,1,36},{43,21,1},{29,3,648},{47,19,5},{47,19,5},{26,23,4},{43,9,648},{26,23,4},{19,9,882},{41,1,146},{41,3,4},{14,3,5},{19,9,882},{45,1,882},{14,3,5},{0,5,900},{45,1,882},{0,5,900},{15,0,882},{15,0,882},{15,0,882},{15,0,882},{30,1,5}, +{30,1,5},{30,1,5},{30,25,4},{10,23,2},{10,23,2},{11,1,4374},{13,1,1924},{13,1,1335},{45,33,961},{13,1,4972},{47,1,1225},{47,17,22},{30,5,1356},{26,1,3460},{40,35,940},{9,1,1755},{41,1,434},{11,17,74},{27,3,185},{23,5,3048},{15,1,1146},{47,17,18},{22,35,882},{14,1,3048},{22,35,882},{13,1,1299},{13,1,1299},{13,1,1299},{29,19,890},{15,1,1017},{47,33,13},{47,33,13}, +{46,23,86},{40,33,652},{42,7,5},{11,1,45},{11,1,45},{11,1,45},{27,5,5},{39,11,648},{31,3,2},{31,3,2},{12,7,0},{22,1,648},{12,7,0},{21,17,882},{9,1,250},{25,17,1},{30,17,1},{21,17,882},{17,21,882},{30,17,1},{0,35,882},{17,21,882},{0,35,882},{29,0,890},{29,0,890},{29,0,890},{29,0,890},{47,1,5},{47,1,5},{47,1,5},{47,9,4},{26,37,4}, +{26,37,4},{41,1,4427},{27,1,2227},{27,1,1443},{13,17,925},{11,1,4878},{45,1,1331},{15,1,21},{47,35,1178},{44,1,3438},{26,19,810},{23,1,1746},{9,1,557},{25,1,89},{11,17,142},{23,17,2814},{13,1,1125},{15,1,5},{24,19,765},{45,17,2814},{24,19,765},{27,1,1443},{27,1,1443},{27,1,1443},{13,33,884},{29,1,1132},{15,1,21},{15,1,21},{31,37,69},{10,1,648},{28,21,5},{25,1,89}, +{25,1,89},{25,1,89},{41,19,1},{25,5,648},{45,17,2},{45,17,2},{28,21,1},{10,1,648},{28,21,1},{19,3,761},{7,1,274},{39,1,0},{31,1,0},{19,3,761},{39,1,761},{31,1,0},{0,19,765},{39,1,761},{0,19,765},{13,0,884},{13,0,884},{13,0,884},{13,0,884},{15,1,17},{15,1,17},{15,1,17},{31,23,4},{12,5,4},{12,5,4},{9,1,3933},{11,1,2185},{11,1,1509}, +{27,1,899},{41,1,4346},{13,1,1109},{45,1,42},{15,19,813},{30,1,2958},{12,3,510},{7,1,1386},{23,1,483},{39,1,128},{25,17,59},{3,25,2249},{11,1,870},{13,1,4},{10,3,482},{29,1,2249},{10,3,482},{11,1,1509},{11,1,1509},{11,1,1509},{27,17,890},{43,1,1275},{45,1,42},{45,1,42},{45,21,86},{12,1,675},{44,5,10},{39,1,128},{39,1,128},{39,1,128},{9,3,5},{9,3,648}, +{13,1,4},{13,1,4},{14,5,2},{31,3,648},{14,5,2},{3,3,481},{21,1,180},{7,1,1},{13,1,0},{3,3,481},{7,1,481},{13,1,0},{0,3,481},{7,1,481},{0,3,481},{27,0,890},{27,0,890},{27,0,890},{27,0,890},{45,1,26},{45,1,26},{45,1,26},{45,7,1},{44,35,2},{44,35,2},{39,1,3541},{9,1,2173},{25,1,1658},{11,1,893},{9,1,3801},{27,1,1035},{13,1,116}, +{45,3,523},{47,1,2574},{44,33,282},{21,1,1070},{7,1,427},{23,1,194},{39,1,16},{21,17,1769},{25,1,673},{27,1,26},{12,33,265},{17,21,1769},{12,33,265},{25,1,1658},{25,1,1658},{25,1,1658},{11,1,893},{11,1,1386},{13,1,116},{13,1,116},{29,35,69},{14,1,734},{14,19,6},{23,1,194},{23,1,194},{23,1,194},{39,17,1},{23,3,648},{27,1,26},{27,1,26},{14,19,5},{43,3,648}, +{14,19,5},{33,3,265},{35,1,89},{5,1,4},{25,1,0},{33,3,265},{21,1,265},{25,1,0},{0,33,265},{21,1,265},{0,33,265},{11,0,884},{11,0,884},{11,0,884},{11,0,884},{13,1,52},{13,1,52},{13,1,52},{29,21,4},{14,19,2},{14,19,2},{23,1,3267},{39,1,2157},{9,1,1764},{25,1,954},{39,1,3397},{11,1,950},{27,1,267},{13,33,297},{29,1,2313},{30,17,134},{5,1,838}, +{21,1,413},{37,1,260},{7,1,9},{17,39,1374},{23,1,518},{25,1,80},{44,17,125},{39,17,1374},{44,17,125},{9,1,1764},{9,1,1764},{9,1,1764},{25,1,954},{25,1,1569},{27,1,267},{27,1,267},{13,19,77},{47,1,846},{46,3,5},{37,1,260},{37,1,260},{37,1,260},{7,1,9},{3,25,648},{25,1,80},{25,1,80},{47,3,1},{29,1,648},{47,3,1},{33,1,113},{19,1,37},{19,1,1}, +{7,1,0},{33,1,113},{35,1,113},{7,1,0},{0,17,125},{35,1,113},{0,17,125},{25,0,890},{25,0,890},{25,0,890},{25,0,890},{27,1,98},{27,1,98},{27,1,98},{43,5,1},{46,3,4},{46,3,4},{7,1,3032},{7,1,2233},{23,1,1944},{39,1,1083},{7,1,3096},{9,1,1028},{41,1,506},{27,17,151},{13,1,2068},{47,17,31},{35,1,693},{5,1,425},{5,1,325},{21,1,52},{3,3,1032}, +{7,1,406},{23,1,157},{31,17,26},{3,3,1032},{31,17,26},{23,1,1944},{23,1,1944},{23,1,1944},{39,1,1083},{39,1,1723},{41,1,506},{41,1,506},{27,33,78},{45,1,1034},{47,17,6},{5,1,325},{5,1,325},{5,1,325},{21,1,52},{19,5,648},{23,1,157},{23,1,157},{31,17,1},{37,3,648},{31,17,1},{17,1,18},{17,1,10},{33,1,1},{19,1,0},{17,1,18},{33,1,18},{19,1,0}, +{0,17,26},{33,1,18},{0,17,26},{9,0,890},{9,0,890},{9,0,890},{9,0,890},{41,1,145},{41,1,145},{41,1,145},{27,19,4},{47,1,5},{47,1,5},{21,1,2710},{37,1,2207},{7,1,2004},{23,1,1173},{21,1,2775},{39,1,1036},{9,1,643},{41,1,81},{11,1,1900},{15,1,9},{3,1,524},{19,1,401},{19,1,365},{5,1,104},{33,33,771},{5,1,369},{37,1,221},{29,1,1},{33,33,771}, +{29,1,1},{7,1,2004},{7,1,2004},{7,1,2004},{23,1,1173},{23,1,1784},{9,1,643},{9,1,643},{41,17,61},{13,1,1091},{15,1,9},{19,1,365},{19,1,365},{19,1,365},{5,1,104},{33,5,578},{37,1,221},{37,1,221},{29,1,1},{23,1,578},{29,1,1},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{23,0,884}, +{23,0,884},{23,0,884},{23,0,884},{9,1,202},{9,1,202},{9,1,202},{41,3,1},{15,1,9},{15,1,9},{5,1,2214},{21,1,1813},{21,1,1669},{7,1,1109},{21,1,2151},{23,1,874},{23,1,585},{25,1,13},{25,1,1508},{13,1,34},{3,1,300},{3,1,225},{3,1,200},{19,1,61},{33,17,451},{35,1,192},{5,1,125},{11,1,0},{17,33,451},{11,1,0},{21,1,1669},{21,1,1669},{21,1,1669}, +{7,1,1109},{7,1,1460},{23,1,585},{23,1,585},{25,1,13},{11,1,872},{13,1,34},{3,1,200},{3,1,200},{3,1,200},{19,1,61},{17,5,338},{5,1,125},{5,1,125},{11,1,0},{5,17,338},{11,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{7,0,884},{7,0,884},{7,0,884},{7,0,884},{39,1,250}, +{39,1,250},{39,1,250},{25,17,4},{13,1,34},{13,1,34},{35,1,1818},{5,1,1521},{5,1,1421},{21,1,1028},{5,1,1675},{7,1,790},{7,1,565},{39,1,2},{23,1,1155},{11,1,100},{33,1,147},{33,1,107},{33,1,98},{3,1,29},{1,19,216},{19,1,88},{19,1,52},{23,1,1},{19,1,216},{23,1,1},{5,1,1421},{5,1,1421},{5,1,1421},{21,1,1028},{21,1,1158},{7,1,565},{7,1,565}, +{39,1,2},{25,1,723},{11,1,100},{33,1,98},{33,1,98},{33,1,98},{3,1,29},{33,17,162},{19,1,52},{19,1,52},{23,1,1},{17,33,162},{23,1,1},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{21,0,884},{21,0,884},{21,0,884},{21,0,884},{7,1,340},{7,1,340},{7,1,340},{39,1,2},{11,1,100}, +{11,1,100},{0,13,1568},{0,14,185},{0,10,5},{0,8,586},{0,44,3371},{0,24,2147},{0,8,1027},{0,36,2571},{0,6,3617},{0,20,2729},{0,13,1568},{0,14,185},{0,10,5},{0,8,586},{18,8,3371},{0,24,2147},{0,8,1027},{0,36,2571},{44,0,3371},{0,36,2571},{0,24,1},{0,24,1},{0,24,1},{0,20,1},{0,20,288},{0,34,100},{0,34,100},{0,2,164},{0,2,321},{0,2,189},{0,24,1}, +{0,24,1},{0,24,1},{0,20,1},{32,2,288},{0,34,100},{0,34,100},{0,2,164},{20,0,288},{0,2,164},{24,2,1568},{0,14,185},{0,10,5},{0,8,586},{24,2,1568},{13,0,1568},{0,8,586},{0,22,1576},{13,0,1568},{0,22,1576},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,25,1568},{0,46,89},{0,26,20}, +{0,40,505},{0,46,3968},{0,10,2316},{0,24,1078},{0,6,2880},{0,22,4305},{0,6,3105},{0,25,1568},{0,46,89},{0,26,20},{0,40,505},{18,10,3968},{0,10,2316},{0,24,1078},{0,6,2880},{46,0,3968},{0,6,2880},{0,12,1},{0,12,1},{0,12,1},{0,6,0},{0,6,514},{0,20,193},{0,20,193},{0,18,317},{0,18,556},{0,18,353},{0,12,1},{0,12,1},{0,12,1},{0,6,0},{2,2,512}, +{0,20,193},{0,20,193},{0,18,317},{2,2,512},{0,18,317},{16,11,1568},{0,46,89},{16,26,5},{0,40,505},{16,11,1568},{25,0,1568},{0,40,505},{0,38,1570},{25,0,1568},{0,38,1570},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,7,1568},{0,15,37},{16,12,76},{0,10,421},{0,15,4652},{0,42,2540},{0,10,1097}, +{0,38,3251},{0,8,5108},{0,22,3545},{0,7,1568},{0,15,37},{16,12,51},{0,10,421},{38,2,4651},{0,42,2540},{0,10,1097},{0,38,3251},{16,8,4651},{0,38,3251},{0,46,0},{0,46,0},{0,46,0},{0,38,1},{0,8,802},{0,6,289},{0,6,289},{0,34,493},{0,34,872},{0,34,557},{0,46,0},{0,46,0},{0,46,0},{0,38,1},{4,0,802},{0,6,289},{0,6,289},{0,34,493},{8,0,802}, +{0,34,493},{26,4,1568},{0,15,37},{32,12,2},{0,10,421},{26,4,1568},{7,0,1568},{0,10,421},{0,24,1576},{7,0,1568},{0,24,1576},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,19,1570},{0,29,8},{16,44,166},{0,12,338},{0,29,5420},{0,28,2755},{0,26,1208},{0,8,3659},{0,24,5988},{0,38,4028},{0,19,1570}, +{0,29,8},{16,44,141},{0,12,338},{40,0,5419},{0,28,2755},{0,26,1208},{0,8,3659},{46,2,5419},{0,8,3659},{0,45,0},{0,45,0},{0,45,0},{0,24,1},{0,24,1154},{0,22,433},{0,22,433},{0,4,697},{0,4,1270},{0,4,797},{0,45,0},{0,45,0},{0,45,0},{0,24,1},{18,4,1152},{0,22,433},{0,22,433},{0,4,697},{36,2,1152},{0,4,697},{14,0,1568},{0,29,8},{18,28,1}, +{0,12,338},{14,0,1568},{8,10,1568},{0,12,338},{0,40,1568},{8,10,1568},{0,40,1568},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{16,17,1609},{16,43,45},{32,30,282},{0,28,325},{0,25,5419},{0,14,2514},{0,12,804},{0,24,3462},{0,10,6191},{0,8,3965},{32,3,1574},{32,13,17},{2,14,130},{16,28,312},{16,11,5419}, +{0,14,2514},{0,12,804},{0,24,3462},{25,0,5419},{0,24,3462},{16,43,41},{16,43,41},{16,43,41},{16,10,42},{0,28,1152},{0,40,292},{0,40,292},{0,36,596},{0,6,1379},{0,20,761},{32,29,4},{32,29,4},{32,29,4},{32,10,5},{2,24,1152},{0,40,292},{0,40,292},{0,36,596},{28,0,1152},{0,36,596},{30,2,1568},{0,27,4},{4,14,1},{0,28,244},{30,2,1568},{35,2,1568},{0,28,244}, +{0,26,1570},{35,2,1568},{0,26,1570},{16,0,41},{16,0,41},{16,0,41},{16,0,41},{0,4,1},{0,4,1},{0,4,1},{0,2,1},{0,32,13},{0,32,13},{32,1,1733},{32,11,159},{32,46,455},{16,14,377},{0,7,5419},{0,47,2294},{0,44,542},{0,10,3225},{0,12,6401},{0,10,3901},{2,17,1569},{2,27,6},{18,46,125},{32,14,296},{26,4,5419},{0,47,2294},{0,44,542},{0,10,3225},{7,0,5419}, +{0,10,3225},{32,11,158},{32,11,158},{32,11,158},{32,42,157},{0,46,1154},{0,26,170},{0,26,170},{0,22,482},{0,22,1491},{0,6,739},{2,27,2},{2,27,2},{2,27,2},{2,26,5},{22,2,1152},{0,26,170},{0,26,170},{0,22,482},{42,2,1152},{0,22,482},{47,2,1568},{32,11,2},{20,30,5},{0,14,185},{47,2,1568},{17,2,1568},{0,14,185},{0,42,1570},{17,2,1568},{0,42,1570},{32,0,157}, +{32,0,157},{32,0,157},{32,0,157},{0,22,0},{0,22,0},{0,22,0},{0,34,1},{0,18,45},{0,18,45},{2,1,2003},{32,9,330},{2,31,710},{32,30,517},{0,35,5420},{0,15,2081},{0,30,345},{0,42,3099},{0,28,6641},{0,10,3885},{34,1,1574},{34,11,17},{4,47,134},{18,30,312},{14,0,5419},{0,15,2081},{0,30,345},{0,42,3099},{8,10,5419},{0,42,3099},{32,23,317},{32,23,317},{32,23,317}, +{32,28,317},{0,29,1154},{0,28,80},{0,28,80},{0,8,388},{0,8,1665},{0,38,753},{34,27,4},{34,27,4},{34,27,4},{34,12,5},{8,2,1152},{0,28,80},{0,28,80},{0,8,388},{30,2,1152},{0,8,388},{15,2,1568},{18,41,4},{36,47,5},{0,46,125},{15,2,1568},{33,4,1568},{0,46,125},{0,28,1576},{33,4,1568},{0,28,1576},{32,0,317},{32,0,317},{32,0,317},{32,0,317},{0,40,0}, +{0,40,0},{0,40,0},{0,20,0},{0,34,113},{0,34,113},{18,1,2395},{2,23,570},{18,45,1046},{2,47,725},{0,1,5419},{0,13,1947},{0,47,185},{0,12,2880},{0,14,6964},{0,12,3841},{20,1,1593},{4,25,6},{20,15,131},{34,46,326},{0,1,5419},{0,13,1947},{0,47,185},{0,12,2880},{1,0,5419},{0,12,2880},{2,21,546},{2,21,546},{2,21,546},{2,14,545},{0,11,1152},{0,30,37},{0,30,37}, +{0,24,317},{0,10,1846},{0,8,797},{4,25,2},{4,25,2},{4,25,2},{4,28,2},{24,4,1152},{0,30,37},{0,30,37},{0,24,317},{11,0,1152},{0,24,317},{43,0,1568},{34,9,1},{22,31,1},{0,47,85},{43,0,1568},{19,6,1568},{0,47,85},{0,44,1570},{19,6,1568},{0,44,1570},{2,0,545},{2,0,545},{2,0,545},{2,0,545},{0,28,0},{0,28,0},{0,28,0},{0,6,1},{0,20,208}, +{0,20,208},{4,1,3030},{18,37,927},{34,29,1474},{18,15,1042},{16,1,5540},{0,27,1787},{0,15,57},{0,44,2668},{0,46,7299},{0,28,3836},{6,1,1612},{36,39,10},{6,29,136},{20,31,312},{47,2,5419},{0,27,1787},{0,15,57},{0,44,2668},{17,2,5419},{0,44,2668},{18,19,883},{18,19,883},{18,19,883},{18,46,882},{0,23,1152},{0,31,5},{0,31,5},{0,10,225},{0,26,2064},{0,10,901},{36,9,2}, +{36,9,2},{36,9,2},{36,14,1},{38,10,1152},{0,31,5},{0,31,5},{0,10,225},{23,0,1152},{0,10,225},{38,3,1568},{20,39,4},{8,45,5},{0,15,41},{38,3,1568},{35,8,1568},{0,15,41},{0,30,1570},{35,8,1568},{0,30,1570},{18,0,882},{18,0,882},{18,0,882},{18,0,882},{0,47,1},{0,47,1},{0,47,1},{0,8,1},{0,6,325},{0,6,325},{20,1,3734},{34,5,1332},{4,43,1958}, +{18,45,1395},{2,1,5808},{0,25,1676},{0,29,20},{0,30,2566},{0,15,7631},{0,44,3925},{22,1,1656},{6,7,9},{22,13,132},{6,45,309},{15,2,5419},{0,25,1676},{0,29,20},{0,30,2566},{33,4,5419},{0,30,2566},{34,33,1258},{34,33,1258},{34,33,1258},{34,47,1259},{0,5,1154},{0,45,5},{0,45,5},{0,42,180},{0,12,2275},{0,10,981},{6,23,4},{6,23,4},{6,23,4},{6,46,5},{28,2,1152}, +{16,15,1},{16,15,1},{0,42,180},{42,8,1152},{0,42,180},{41,2,1568},{36,7,2},{24,29,5},{0,29,20},{41,2,1568},{17,8,1568},{0,29,20},{0,46,1576},{17,8,1568},{0,46,1576},{34,0,1258},{34,0,1258},{34,0,1258},{34,0,1258},{0,29,0},{0,29,0},{0,29,0},{0,40,1},{0,38,482},{0,38,482},{36,1,4356},{4,19,1676},{20,27,2370},{34,13,1683},{18,1,6121},{0,39,1616},{16,43,36}, +{0,46,2397},{0,45,7815},{0,46,3837},{8,1,1715},{38,37,10},{8,43,131},{22,29,312},{43,0,5419},{0,39,1612},{16,43,27},{0,46,2393},{19,6,5419},{0,46,2393},{4,1,1577},{4,1,1577},{4,1,1577},{4,15,1576},{0,17,1156},{16,13,22},{16,13,22},{0,12,134},{0,44,2441},{0,12,971},{38,23,1},{38,23,1},{38,23,1},{38,47,2},{16,3,1152},{32,29,1},{32,29,1},{0,12,130},{17,0,1152}, +{0,12,130},{29,12,1568},{22,37,4},{10,43,5},{0,43,10},{29,12,1568},{26,13,1568},{0,43,10},{0,31,1576},{26,13,1568},{0,31,1576},{4,0,1576},{4,0,1576},{4,0,1576},{4,0,1576},{0,41,4},{0,41,4},{0,41,4},{0,26,4},{0,24,562},{0,24,562},{22,1,4616},{20,3,1677},{6,41,2378},{20,43,1681},{4,1,6311},{16,7,1620},{32,27,20},{0,31,2365},{0,43,7444},{0,47,3383},{40,1,1766}, +{8,5,11},{24,11,132},{8,13,314},{11,0,5419},{0,37,1577},{32,27,19},{0,31,2265},{5,8,5419},{0,31,2265},{20,1,1593},{20,1,1593},{20,1,1593},{20,29,1569},{32,1,1155},{32,27,19},{32,27,19},{16,44,125},{0,46,2150},{0,28,659},{8,21,2},{8,21,2},{8,21,2},{8,31,2},{30,4,1152},{18,13,1},{18,13,1},{0,44,72},{15,8,1152},{0,44,72},{37,0,1568},{38,5,2},{26,27,2}, +{0,27,2},{37,0,1568},{36,9,1568},{0,27,2},{0,15,1568},{36,9,1568},{0,15,1568},{20,0,1568},{20,0,1568},{20,0,1568},{20,0,1568},{32,41,1},{32,41,1},{32,41,1},{32,42,2},{0,26,388},{0,26,388},{8,1,4936},{6,17,1676},{22,9,2363},{36,11,1689},{36,1,6476},{2,37,1620},{18,41,24},{32,15,2370},{0,41,7036},{0,15,2859},{26,1,1851},{40,35,11},{10,25,136},{24,27,321},{41,2,5419}, +{16,5,1571},{18,41,20},{0,15,2130},{17,8,5419},{0,15,2130},{6,1,1619},{6,1,1619},{6,1,1619},{6,13,1571},{18,1,1158},{18,11,21},{18,11,21},{2,30,125},{0,31,1905},{0,14,425},{40,5,2},{40,5,2},{40,5,2},{40,45,2},{45,0,1152},{4,27,5},{4,27,5},{0,30,41},{27,8,1152},{0,30,41},{21,2,1568},{24,19,2},{12,41,1},{16,41,1},{21,2,1568},{24,9,1568},{16,41,1}, +{0,29,1570},{24,9,1568},{0,29,1570},{6,0,1570},{6,0,1570},{6,0,1570},{6,0,1570},{18,25,4},{18,25,4},{18,25,4},{18,28,5},{0,28,232},{0,28,232},{24,1,5154},{38,1,1680},{8,39,2378},{22,41,1683},{22,1,6708},{18,5,1628},{34,25,24},{2,29,2363},{0,9,6740},{0,45,2553},{12,1,1964},{10,3,8},{26,9,139},{40,41,309},{29,12,5419},{2,35,1572},{4,25,21},{0,29,2027},{26,13,5419}, +{0,29,2027},{38,1,1664},{38,1,1664},{38,1,1664},{22,27,1570},{4,1,1185},{34,25,20},{34,25,20},{34,46,130},{0,45,1721},{0,46,218},{10,19,4},{10,19,4},{10,19,4},{10,13,8},{29,2,1152},{20,11,5},{20,11,5},{0,47,25},{35,6,1152},{0,47,25},{35,2,1568},{10,3,4},{28,25,5},{18,25,4},{35,2,1568},{34,5,1568},{18,25,4},{0,13,1570},{34,5,1568},{0,13,1570},{22,0,1570}, +{22,0,1570},{22,0,1570},{22,0,1570},{34,23,1},{34,23,1},{34,23,1},{34,14,4},{0,14,149},{0,14,149},{26,1,5444},{8,1,1724},{24,23,2378},{38,9,1689},{8,1,6964},{4,35,1620},{20,39,36},{34,13,2370},{0,23,6513},{0,13,2244},{44,1,2099},{42,33,11},{12,23,136},{26,25,321},{37,0,5419},{18,3,1571},{20,39,27},{0,43,1924},{36,9,5419},{0,43,1924},{8,1,1720},{8,1,1720},{8,1,1720}, +{8,11,1577},{36,1,1224},{20,9,19},{20,9,19},{4,47,122},{0,43,1548},{0,31,90},{42,19,1},{42,19,1},{42,19,1},{42,43,2},{43,2,1152},{6,25,5},{6,25,5},{0,31,9},{21,8,1152},{0,31,9},{46,1,1568},{26,17,2},{14,39,2},{18,39,1},{46,1,1568},{26,7,1568},{18,39,1},{0,27,1576},{26,7,1568},{0,27,1576},{8,0,1576},{8,0,1576},{8,0,1576},{8,0,1576},{20,23,9}, +{20,23,9},{20,23,9},{4,30,10},{0,47,73},{0,47,73},{42,1,5700},{40,1,1798},{40,37,2375},{24,39,1683},{40,1,7153},{20,3,1626},{36,23,24},{4,27,2386},{0,21,6243},{0,27,1980},{30,1,2210},{12,1,8},{28,7,132},{42,39,323},{5,0,5419},{4,33,1572},{6,23,21},{0,27,1836},{0,5,5419},{0,27,1836},{40,1,1762},{40,1,1762},{40,1,1762},{24,25,1570},{6,1,1275},{36,23,20},{36,23,20}, +{36,15,125},{0,41,1395},{0,45,25},{12,17,4},{12,17,4},{12,17,4},{12,11,8},{38,1,1152},{22,9,4},{22,9,4},{0,45,0},{43,28,1152},{0,45,0},{31,1,1568},{42,1,2},{30,23,5},{4,23,5},{31,1,1568},{32,1,1568},{4,23,5},{0,11,1570},{32,1,1568},{0,11,1570},{24,0,1570},{24,0,1570},{24,0,1570},{24,0,1570},{36,21,1},{36,21,1},{36,21,1},{36,46,4},{0,15,25}, +{0,15,25},{28,1,6116},{26,1,1980},{26,5,2386},{40,7,1689},{26,1,7408},{6,17,1620},{22,37,24},{36,41,2375},{0,35,6044},{0,41,1798},{47,1,2385},{44,1,25},{14,37,125},{28,23,315},{35,2,5419},{20,1,1569},{22,37,20},{0,41,1762},{34,5,5419},{0,41,1762},{26,1,1836},{26,1,1836},{26,1,1836},{10,9,1574},{38,1,1363},{22,7,21},{22,7,21},{6,29,132},{0,9,1284},{0,13,8},{44,1,0}, +{44,1,0},{44,1,0},{44,41,0},{39,0,1152},{8,23,4},{8,23,4},{16,13,4},{9,28,1152},{16,13,4},{17,6,1568},{14,1,25},{47,37,4},{20,37,1},{17,6,1568},{20,1,1568},{20,37,1},{0,25,1570},{20,1,1568},{0,25,1570},{10,0,1570},{10,0,1570},{10,0,1570},{10,0,1570},{22,5,5},{22,5,5},{22,5,5},{22,31,5},{0,43,2},{0,43,2},{14,1,6434},{12,1,2244},{12,35,2370}, +{26,21,1685},{12,1,7724},{38,1,1620},{38,21,36},{22,25,2378},{0,3,5839},{0,9,1724},{31,1,2546},{30,1,90},{46,5,122},{14,7,322},{46,1,5419},{22,1,1602},{8,21,19},{0,9,1720},{26,7,5419},{0,9,1720},{42,1,1924},{42,1,1924},{42,1,1924},{26,23,1577},{24,1,1414},{38,21,27},{38,21,27},{22,13,136},{0,7,1218},{32,43,11},{30,1,9},{30,1,9},{30,1,9},{14,9,10},{23,2,1152}, +{24,7,5},{24,7,5},{18,43,1},{20,9,1152},{18,43,1},{27,3,1568},{46,1,73},{31,5,10},{36,5,9},{27,3,1568},{39,9,1568},{36,5,9},{0,9,1576},{39,9,1568},{0,9,1576},{26,0,1576},{26,0,1576},{26,0,1576},{26,0,1576},{38,19,1},{38,19,1},{38,19,1},{38,15,2},{16,27,2},{16,27,2},{30,1,6786},{44,1,2553},{28,3,2363},{42,5,1689},{28,1,8052},{24,1,1671},{24,35,24}, +{38,9,2378},{0,1,5715},{0,39,1680},{45,1,2675},{47,1,218},{47,35,130},{30,21,315},{31,1,5419},{24,1,1667},{24,35,20},{0,39,1664},{32,1,5419},{0,39,1664},{28,1,2027},{28,1,2027},{28,1,2027},{12,7,1571},{10,1,1521},{24,5,21},{24,5,21},{8,27,139},{0,21,1169},{2,11,8},{46,1,25},{46,1,25},{46,1,25},{46,39,1},{37,2,1152},{10,21,5},{10,21,5},{18,11,4},{34,7,1152}, +{18,11,4},{39,7,1568},{15,1,149},{15,35,4},{22,35,1},{39,7,1568},{26,1,1568},{22,35,1},{0,23,1570},{26,1,1568},{0,23,1570},{12,0,1570},{12,0,1570},{12,0,1570},{12,0,1570},{24,19,4},{24,19,4},{24,19,4},{24,29,5},{2,11,4},{2,11,4},{47,1,7186},{14,1,2859},{14,33,2370},{28,35,1683},{30,1,8313},{40,1,1819},{40,19,24},{8,23,2363},{16,1,5820},{16,7,1676},{13,1,2892}, +{15,1,425},{31,3,125},{47,35,321},{45,1,5419},{26,1,1796},{10,19,21},{0,7,1619},{27,9,5419},{0,7,1619},{14,1,2130},{14,1,2130},{14,1,2130},{28,21,1570},{42,1,1608},{40,19,20},{40,19,20},{24,11,136},{0,19,1155},{34,41,11},{31,1,41},{31,1,41},{31,1,41},{47,7,5},{44,1,1152},{26,5,5},{26,5,5},{4,41,2},{26,9,1152},{4,41,2},{25,1,1568},{29,1,232},{29,19,5}, +{24,19,4},{25,1,1568},{33,9,1568},{24,19,4},{0,7,1570},{33,9,1568},{0,7,1570},{28,0,1570},{28,0,1570},{28,0,1570},{28,0,1570},{40,17,1},{40,17,1},{40,17,1},{40,13,1},{18,25,2},{18,25,2},{15,1,7706},{46,1,3383},{30,1,2365},{14,3,1685},{47,1,8695},{42,1,2092},{26,33,20},{40,7,2378},{4,1,6099},{2,21,1677},{27,1,3152},{29,1,659},{45,17,125},{31,19,315},{27,3,5419}, +{44,1,1993},{26,33,19},{0,21,1593},{39,9,5419},{0,21,1593},{30,1,2265},{30,1,2265},{30,1,2265},{14,5,1572},{28,1,1764},{26,33,19},{26,33,19},{10,25,132},{32,3,1155},{4,9,11},{45,1,72},{45,1,72},{45,1,72},{15,37,1},{31,5,1152},{12,19,1},{12,19,1},{20,9,2},{14,9,1152},{20,9,2},{23,17,1568},{27,1,388},{43,33,2},{40,33,1},{23,17,1568},{45,17,1568},{40,33,1}, +{0,21,1568},{45,17,1568},{0,21,1568},{14,0,1568},{14,0,1568},{14,0,1568},{14,0,1568},{26,1,2},{26,1,2},{26,1,2},{26,27,2},{4,39,2},{4,39,2},{45,1,8016},{47,1,3837},{47,1,2397},{30,17,1685},{31,1,9093},{28,1,2484},{42,17,36},{26,21,2370},{36,1,6379},{18,5,1676},{11,1,3345},{13,1,971},{13,1,134},{15,33,326},{39,7,5419},{30,1,2185},{12,17,22},{0,5,1577},{26,1,5419}, +{0,5,1577},{47,1,2393},{47,1,2393},{47,1,2393},{30,19,1576},{14,1,1890},{42,17,27},{42,17,27},{42,9,131},{2,17,1152},{36,39,10},{13,1,130},{13,1,130},{13,1,130},{45,5,5},{17,2,1152},{28,33,1},{28,33,1},{22,39,1},{16,1,1152},{22,39,1},{21,3,1568},{25,1,562},{27,1,4},{40,1,4},{21,3,1568},{43,1,1568},{40,1,4},{0,5,1576},{43,1,1568},{0,5,1576},{30,0,1576}, +{30,0,1576},{30,0,1576},{30,0,1576},{42,1,10},{42,1,10},{42,1,10},{42,11,5},{36,23,4},{36,23,4},{13,1,7700},{45,1,3925},{31,1,2566},{47,17,1637},{45,1,8460},{14,1,2285},{28,1,20},{42,5,1958},{38,1,5932},{4,35,1332},{9,1,2987},{11,1,981},{43,1,180},{29,17,219},{9,3,4803},{47,1,1925},{44,1,5},{32,35,1258},{31,3,4803},{32,35,1258},{31,1,2566},{31,1,2566},{31,1,2566}, +{47,3,1577},{30,1,2054},{28,1,20},{28,1,20},{12,23,132},{34,1,1155},{6,7,9},{43,1,180},{43,1,180},{43,1,180},{13,35,1},{29,3,1152},{14,17,1},{14,17,1},{22,7,4},{43,9,1152},{22,7,4},{21,1,1250},{39,1,482},{41,1,1},{28,1,0},{21,1,1250},{41,1,1250},{28,1,0},{0,35,1258},{41,1,1250},{0,35,1258},{47,0,1576},{47,0,1576},{47,0,1576},{47,0,1576},{28,1,20}, +{28,1,20},{28,1,20},{28,25,5},{6,37,2},{6,37,2},{43,1,7164},{29,1,3836},{45,1,2668},{31,1,1579},{13,1,7780},{30,1,2041},{14,1,57},{28,35,1474},{24,1,5308},{36,19,927},{9,1,2475},{11,1,901},{11,1,225},{43,17,110},{37,7,4056},{15,1,1590},{30,1,5},{18,19,883},{30,1,4056},{18,19,883},{45,1,2668},{45,1,2668},{45,1,2668},{31,17,1570},{47,1,2214},{14,1,57},{14,1,57}, +{28,7,136},{36,1,1186},{38,37,10},{11,1,225},{11,1,225},{11,1,225},{43,3,5},{39,11,1152},{30,1,5},{30,1,5},{8,37,2},{22,1,1152},{8,37,2},{5,1,882},{7,1,325},{9,1,1},{46,1,1},{5,1,882},{9,1,882},{46,1,1},{0,19,882},{9,1,882},{0,19,882},{31,0,1570},{31,0,1570},{31,0,1570},{31,0,1570},{14,1,41},{14,1,41},{14,1,41},{44,9,5},{38,21,4}, +{38,21,4},{11,1,6493},{13,1,3841},{13,1,2880},{45,1,1574},{43,1,7071},{47,1,1822},{46,1,185},{44,19,1046},{26,1,4761},{22,3,570},{23,1,2034},{9,1,797},{25,1,317},{11,17,34},{23,17,3318},{13,1,1221},{31,1,37},{20,3,546},{45,17,3318},{20,3,546},{13,1,2880},{13,1,2880},{13,1,2880},{45,1,1574},{45,1,2443},{46,1,185},{46,1,185},{14,21,131},{38,1,1275},{24,5,6},{25,1,317}, +{25,1,317},{25,1,317},{11,33,1},{25,5,1152},{31,1,37},{31,1,37},{24,5,2},{10,1,1152},{24,5,2},{3,3,545},{21,1,208},{7,1,1},{29,1,0},{3,3,545},{23,1,545},{29,1,0},{0,3,545},{23,1,545},{0,3,545},{45,0,1570},{45,0,1570},{45,0,1570},{45,0,1570},{46,1,85},{46,1,85},{46,1,85},{30,23,1},{8,35,1},{8,35,1},{41,1,6095},{11,1,3885},{43,1,3099}, +{13,1,1636},{11,1,6422},{45,1,1767},{31,1,345},{30,3,710},{44,1,4358},{8,33,330},{7,1,1698},{39,1,753},{9,1,388},{41,1,9},{3,25,2753},{27,1,1018},{29,1,80},{22,33,317},{29,1,2753},{22,33,317},{43,1,3099},{43,1,3099},{43,1,3099},{13,1,1636},{29,1,2628},{31,1,345},{31,1,345},{46,5,134},{40,1,1395},{10,35,17},{9,1,388},{9,1,388},{9,1,388},{41,1,9},{9,3,1152}, +{29,1,80},{29,1,80},{26,35,4},{31,3,1152},{26,35,4},{33,3,313},{35,1,113},{21,1,0},{41,1,0},{33,3,313},{21,1,313},{41,1,0},{0,33,317},{21,1,313},{0,33,317},{29,0,1576},{29,0,1576},{29,0,1576},{29,0,1576},{47,1,125},{47,1,125},{47,1,125},{46,37,5},{40,19,4},{40,19,4},{9,1,5661},{11,1,3901},{11,1,3225},{43,1,1739},{41,1,5978},{29,1,1685},{45,1,542}, +{47,33,455},{14,1,3994},{10,33,159},{21,1,1418},{7,1,739},{23,1,482},{9,1,16},{21,17,2273},{11,1,878},{27,1,170},{10,33,158},{17,21,2273},{10,33,158},{11,1,3225},{11,1,3225},{11,1,3225},{43,1,1739},{43,1,2875},{45,1,542},{45,1,542},{47,19,125},{42,1,1584},{26,3,6},{23,1,482},{23,1,482},{23,1,482},{9,1,16},{23,3,1152},{27,1,170},{27,1,170},{26,3,2},{43,3,1152}, +{26,3,2},{17,3,145},{19,1,45},{35,1,1},{23,1,0},{17,3,145},{35,1,145},{23,1,0},{0,33,157},{35,1,145},{0,33,157},{43,0,1570},{43,0,1570},{43,0,1570},{43,0,1570},{15,1,185},{15,1,185},{15,1,185},{31,21,5},{10,33,2},{10,33,2},{39,1,5341},{9,1,3965},{25,1,3462},{11,1,1889},{9,1,5505},{13,1,1765},{13,1,804},{31,33,282},{47,1,3750},{42,17,45},{5,1,1210}, +{21,1,761},{37,1,596},{23,1,73},{17,39,1878},{9,1,781},{41,1,292},{42,17,41},{39,17,1878},{42,17,41},{25,1,3462},{25,1,3462},{25,1,3462},{11,1,1889},{11,1,3058},{13,1,804},{13,1,804},{15,3,130},{44,1,1798},{12,33,17},{37,1,596},{37,1,596},{37,1,596},{23,1,73},{3,25,1152},{41,1,292},{41,1,292},{28,33,4},{29,1,1152},{28,33,4},{1,3,41},{33,1,13},{3,1,1}, +{5,1,1},{1,3,41},{3,1,41},{5,1,1},{0,17,41},{3,1,41},{0,17,41},{27,0,1570},{27,0,1570},{27,0,1570},{27,0,1570},{29,1,244},{29,1,244},{29,1,244},{15,5,1},{26,1,4},{26,1,4},{23,1,5128},{39,1,4028},{9,1,3659},{25,1,2169},{39,1,5148},{11,1,1917},{27,1,1208},{45,17,166},{29,1,3628},{28,1,8},{35,1,1089},{5,1,797},{5,1,697},{37,1,221},{3,3,1536}, +{7,1,706},{23,1,433},{44,1,0},{3,3,1536},{44,1,0},{9,1,3659},{9,1,3659},{9,1,3659},{25,1,2169},{25,1,3366},{27,1,1208},{27,1,1208},{45,17,141},{47,1,2105},{28,1,8},{5,1,697},{5,1,697},{5,1,697},{37,1,221},{19,5,1152},{23,1,433},{23,1,433},{44,1,0},{37,3,1152},{44,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0}, +{0,1,0},{1,1,0},{0,1,0},{41,0,1568},{41,0,1568},{41,0,1568},{41,0,1568},{13,1,338},{13,1,338},{13,1,338},{29,19,1},{28,1,8},{28,1,8},{7,1,4416},{23,1,3545},{39,1,3251},{9,1,2027},{23,1,4372},{41,1,1771},{11,1,1097},{13,17,76},{29,1,2956},{14,1,37},{19,1,753},{35,1,557},{35,1,493},{21,1,148},{17,5,1067},{21,1,513},{7,1,289},{47,1,0},{5,17,1067}, +{47,1,0},{39,1,3251},{39,1,3251},{39,1,3251},{9,1,2027},{9,1,2819},{11,1,1097},{11,1,1097},{13,17,51},{47,1,1769},{14,1,37},{35,1,493},{35,1,493},{35,1,493},{21,1,148},{5,1,802},{7,1,289},{7,1,289},{47,1,0},{9,1,802},{47,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{25,0,1576}, +{25,0,1576},{25,0,1576},{25,0,1576},{11,1,421},{11,1,421},{11,1,421},{13,33,2},{14,1,37},{14,1,37},{21,1,3786},{7,1,3105},{7,1,2880},{23,1,1929},{7,1,3648},{9,1,1532},{25,1,1078},{27,1,20},{13,1,2452},{47,1,89},{3,1,456},{19,1,353},{19,1,317},{35,1,89},{33,33,683},{5,1,321},{21,1,193},{13,1,1},{33,33,683},{13,1,1},{7,1,2880},{7,1,2880},{7,1,2880}, +{23,1,1929},{39,1,2411},{25,1,1078},{25,1,1078},{27,1,20},{29,1,1451},{47,1,89},{19,1,317},{19,1,317},{19,1,317},{35,1,89},{3,3,512},{21,1,193},{21,1,193},{13,1,1},{3,3,512},{13,1,1},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{39,0,1570},{39,0,1570},{39,0,1570},{39,0,1570},{41,1,505}, +{41,1,505},{41,1,505},{27,17,5},{47,1,89},{47,1,89},{21,1,3210},{21,1,2729},{37,1,2571},{7,1,1825},{21,1,3015},{39,1,1388},{9,1,1027},{11,1,5},{11,1,2032},{15,1,185},{3,1,264},{3,1,189},{3,1,164},{19,1,45},{17,3,384},{35,1,164},{35,1,100},{25,1,1},{35,1,384},{25,1,1},{37,1,2571},{37,1,2571},{37,1,2571},{7,1,1825},{7,1,2112},{9,1,1027},{9,1,1027}, +{11,1,5},{13,1,1235},{15,1,185},{3,1,164},{3,1,164},{3,1,164},{19,1,45},{33,3,288},{35,1,100},{35,1,100},{25,1,1},{21,1,288},{25,1,1},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{23,0,1576},{23,0,1576},{23,0,1576},{23,0,1576},{9,1,586},{9,1,586},{9,1,586},{11,1,5},{15,1,185}, +{15,1,185},{0,37,2665},{0,45,274},{0,44,8},{0,26,1025},{0,15,5885},{0,12,3666},{0,10,1742},{0,38,4406},{0,8,6359},{0,22,4730},{0,37,2665},{0,45,274},{0,44,8},{0,26,1025},{22,4,5885},{0,12,3666},{0,10,1742},{0,38,4406},{15,0,5885},{0,38,4406},{0,28,0},{0,28,0},{0,28,0},{0,6,1},{0,22,545},{0,20,208},{0,20,208},{0,18,340},{0,18,593},{0,18,376},{0,28,0}, +{0,28,0},{0,28,0},{0,6,1},{2,2,545},{0,20,208},{0,20,208},{0,18,340},{22,0,545},{0,18,340},{28,0,2665},{0,45,274},{0,44,8},{0,26,1025},{28,0,2665},{37,0,2665},{0,26,1025},{0,24,2665},{37,0,2665},{0,24,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,3,2665},{0,43,169},{0,14,17}, +{0,12,865},{0,13,6669},{0,28,3898},{0,42,1825},{0,8,4826},{0,24,7263},{0,38,5231},{0,3,2665},{0,43,169},{0,14,17},{0,12,865},{24,2,6669},{0,28,3898},{0,42,1825},{0,8,4826},{13,0,6669},{0,8,4826},{0,46,1},{0,46,1},{0,46,1},{0,8,4},{0,8,841},{0,6,306},{0,6,306},{0,34,520},{0,34,917},{0,34,584},{0,46,1},{0,46,1},{0,46,1},{0,8,4},{4,0,841}, +{0,6,306},{0,6,306},{0,34,520},{8,0,841},{0,34,520},{44,2,2665},{0,43,169},{16,14,2},{0,12,865},{44,2,2665},{3,0,2665},{0,12,865},{0,40,2669},{3,0,2665},{0,40,2669},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,1,2669},{0,41,85},{16,46,79},{0,44,725},{0,11,7538},{0,14,4214},{0,12,1842}, +{0,8,5354},{0,10,8241},{0,8,5795},{0,1,2669},{0,41,85},{16,46,54},{0,44,725},{24,4,7538},{0,14,4214},{0,12,1842},{0,8,5354},{11,0,7538},{0,8,5354},{0,29,1},{0,29,1},{0,29,1},{0,24,4},{0,40,1201},{0,22,458},{0,22,458},{0,4,730},{0,4,1325},{0,4,830},{0,29,1},{0,29,1},{0,29,1},{0,24,4},{20,0,1201},{0,22,458},{0,22,458},{0,4,730},{40,0,1201}, +{0,4,730},{30,2,2665},{0,41,85},{32,46,5},{0,44,725},{30,2,2665},{35,2,2665},{0,44,725},{0,26,2665},{35,2,2665},{0,26,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{16,1,2799},{0,9,29},{16,31,167},{0,14,650},{0,9,8493},{0,46,4485},{0,44,1934},{0,40,5878},{0,26,9333},{0,40,6503},{16,1,2774}, +{0,9,29},{16,31,142},{0,14,650},{42,0,8493},{0,46,4485},{0,44,1934},{0,40,5878},{9,0,8493},{0,40,5878},{0,11,0},{0,11,0},{0,11,0},{0,26,1},{0,26,1625},{0,24,629},{0,24,629},{0,20,986},{0,20,1793},{0,20,1130},{0,11,0},{0,11,0},{0,11,0},{0,26,1},{36,0,1625},{0,24,629},{0,24,629},{0,20,986},{26,0,1625},{0,20,986},{2,1,2665},{0,9,29},{18,47,1}, +{0,14,650},{2,1,2665},{1,2,2665},{0,14,650},{0,42,2669},{1,2,2665},{0,42,2669},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{32,1,3171},{0,7,4},{16,15,315},{0,46,514},{0,7,9669},{0,31,4865},{0,46,2114},{0,10,6505},{0,12,10663},{0,10,7181},{32,1,3050},{0,7,4},{32,15,274},{0,46,514},{26,4,9669}, +{0,31,4865},{0,46,2114},{0,10,6505},{7,0,9669},{0,10,6505},{0,23,0},{0,23,0},{0,23,0},{0,12,1},{0,28,2178},{0,10,820},{0,10,820},{0,6,1348},{0,6,2405},{0,20,1553},{0,23,0},{0,23,0},{0,23,0},{0,12,1},{2,24,2178},{0,10,820},{0,10,820},{0,6,1348},{28,0,2178},{0,6,1348},{29,0,2665},{0,7,4},{4,31,9},{0,46,514},{29,0,2665},{17,4,2665},{0,46,514}, +{0,28,2665},{17,4,2665},{0,28,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{2,1,3529},{16,21,35},{32,29,444},{0,31,474},{0,19,9670},{0,45,4529},{0,46,1634},{0,42,6265},{0,28,10919},{0,26,7149},{18,1,3146},{16,21,10},{2,29,285},{0,31,474},{14,0,9669},{0,45,4529},{0,46,1634},{0,42,6265},{8,10,9669}, +{0,42,6265},{16,21,34},{16,21,34},{16,21,34},{16,44,34},{0,46,2180},{0,12,610},{0,12,610},{0,22,1184},{0,22,2517},{0,22,1473},{16,21,9},{16,21,9},{16,21,9},{16,44,9},{22,2,2178},{0,12,610},{0,12,610},{0,22,1184},{42,2,2178},{0,22,1184},{13,2,2665},{16,21,1},{20,45,0},{0,31,410},{13,2,2665},{3,6,2665},{0,31,410},{0,14,2677},{3,6,2665},{0,14,2677},{16,0,34}, +{16,0,34},{16,0,34},{16,0,34},{0,34,0},{0,34,0},{0,34,0},{0,32,1},{0,32,10},{0,32,10},{18,1,3971},{32,35,150},{2,43,644},{16,45,534},{0,1,9669},{0,43,4214},{0,31,1226},{0,28,5954},{0,14,11246},{0,12,6951},{4,1,3285},{2,5,4},{34,13,274},{32,15,483},{0,1,9669},{0,43,4214},{0,31,1226},{0,28,5954},{1,0,9669},{0,28,5954},{16,3,146},{16,3,146},{16,3,146}, +{16,30,147},{0,29,2180},{0,28,458},{0,28,458},{0,8,1018},{0,8,2691},{0,38,1419},{2,21,0},{2,21,0},{2,21,0},{2,14,1},{8,2,2178},{0,28,458},{0,28,458},{0,8,1018},{30,2,2178},{0,8,1018},{27,2,2665},{2,5,4},{6,29,9},{0,15,338},{27,2,2665},{35,8,2665},{0,15,338},{0,30,2665},{35,8,2665},{0,30,2665},{16,0,146},{16,0,146},{16,0,146},{16,0,146},{0,6,1}, +{0,6,1},{0,6,1},{0,18,4},{0,18,40},{0,18,40},{34,1,4603},{32,3,307},{2,11,925},{32,29,667},{16,1,9779},{0,41,3905},{0,45,913},{0,44,5653},{0,46,11530},{0,44,6878},{36,1,3390},{18,19,11},{4,27,269},{2,29,474},{18,3,9669},{0,41,3905},{0,45,913},{0,44,5653},{45,8,9669},{0,44,5653},{32,1,291},{32,1,291},{32,1,291},{32,47,291},{0,11,2178},{0,46,305},{0,46,305}, +{0,40,925},{0,10,2872},{0,8,1427},{34,5,10},{34,5,10},{34,5,10},{18,46,9},{24,4,2178},{0,46,305},{0,46,305},{0,40,925},{11,0,2178},{0,40,925},{8,1,2665},{18,19,2},{22,43,0},{0,29,265},{8,1,2665},{1,8,2665},{0,29,265},{0,46,2677},{1,8,2665},{0,46,2677},{32,0,290},{32,0,290},{32,0,290},{32,0,290},{0,24,1},{0,24,1},{0,24,1},{0,20,1},{0,34,100}, +{0,34,100},{20,1,5538},{2,17,582},{34,41,1298},{2,43,882},{2,1,10086},{0,9,3618},{0,43,581},{0,30,5418},{0,15,11905},{0,14,6895},{22,1,3586},{4,3,3},{36,11,273},{34,43,478},{15,2,9669},{0,9,3618},{0,43,581},{0,30,5418},{33,4,9669},{0,30,5418},{2,1,570},{2,1,570},{2,1,570},{2,15,549},{0,23,2178},{0,15,185},{0,15,185},{0,10,765},{0,26,3090},{0,10,1441},{4,3,2}, +{4,3,2},{4,3,2},{4,47,2},{38,10,2178},{0,15,185},{0,15,185},{0,10,765},{23,0,2178},{0,10,765},{23,0,2665},{4,3,2},{8,11,8},{0,43,181},{23,0,2665},{18,9,2665},{0,43,181},{0,31,2665},{18,9,2665},{0,31,2665},{2,0,545},{2,0,545},{2,0,545},{2,0,545},{0,28,0},{0,28,0},{0,28,0},{0,6,1},{0,20,208},{0,20,208},{36,1,6378},{18,1,926},{4,9,1734}, +{18,27,1131},{18,1,10495},{0,7,3434},{0,27,353},{0,47,5186},{0,45,12293},{0,46,6789},{8,1,3785},{20,17,10},{6,25,270},{4,27,491},{43,0,9669},{0,7,3434},{0,27,353},{0,47,5186},{19,6,9669},{0,47,5186},{18,1,922},{18,1,922},{18,1,922},{18,45,842},{0,5,2180},{0,29,106},{0,29,106},{0,42,666},{0,12,3301},{0,26,1514},{36,3,4},{36,3,4},{36,3,4},{36,15,8},{28,2,2178}, +{0,29,106},{0,29,106},{0,42,666},{42,8,2178},{0,42,666},{7,2,2665},{20,17,1},{24,41,2},{0,11,136},{7,2,2665},{2,7,2665},{0,11,136},{0,15,2669},{2,7,2665},{0,15,2669},{18,0,841},{18,0,841},{18,0,841},{18,0,841},{0,46,1},{0,46,1},{0,46,1},{0,8,4},{0,6,306},{0,6,306},{6,1,7490},{34,1,1446},{4,39,2218},{18,41,1450},{34,1,11103},{0,21,3209},{0,41,187}, +{0,31,4909},{0,43,12686},{0,31,6845},{40,1,3938},{6,1,3},{38,9,273},{36,41,478},{11,0,9669},{0,21,3209},{0,41,187},{0,31,4909},{5,8,9669},{0,31,4909},{34,1,1382},{34,1,1382},{34,1,1382},{34,13,1213},{0,17,2178},{0,27,40},{0,27,40},{0,28,544},{0,44,3603},{0,12,1541},{6,1,2},{6,1,2},{6,1,2},{6,45,2},{16,3,2178},{0,27,40},{0,27,40},{0,28,544},{17,0,2178}, +{0,28,544},{21,2,2665},{6,1,2},{10,9,8},{0,25,90},{21,2,2665},{16,5,2665},{0,25,90},{0,29,2665},{16,5,2665},{0,29,2665},{34,0,1213},{34,0,1213},{34,0,1213},{34,0,1213},{0,29,1},{0,29,1},{0,29,1},{0,24,4},{0,22,458},{0,22,458},{22,1,8710},{20,1,2145},{20,7,2826},{34,25,1850},{20,1,11913},{0,35,3073},{0,9,86},{0,15,4721},{0,41,13118},{0,15,6837},{26,1,4118}, +{38,1,29},{8,23,258},{6,25,491},{24,3,9669},{0,35,3073},{0,9,86},{0,15,4721},{3,24,9669},{0,15,4721},{4,1,1973},{4,1,1973},{4,1,1973},{34,27,1630},{16,1,2221},{0,25,10},{0,25,10},{0,44,450},{0,46,3876},{0,28,1633},{38,1,4},{38,1,4},{38,1,4},{38,29,8},{30,4,2178},{0,25,10},{0,25,10},{0,44,450},{15,8,2178},{0,44,450},{14,1,2665},{8,1,25},{26,39,1}, +{0,9,61},{14,1,2665},{4,5,2665},{0,9,61},{0,13,2669},{4,5,2665},{0,13,2669},{34,0,1629},{34,0,1629},{34,0,1629},{34,0,1629},{0,11,0},{0,11,0},{0,11,0},{0,26,1},{0,24,629},{0,24,629},{8,1,10335},{36,1,3100},{36,21,3546},{4,39,2361},{36,1,12883},{0,33,2901},{0,23,25},{0,13,4485},{0,25,13589},{0,45,6982},{42,1,4353},{24,1,117},{40,37,270},{38,39,491},{29,12,9669}, +{0,33,2901},{0,23,25},{0,13,4485},{26,13,9669},{0,13,4485},{36,1,2739},{36,1,2739},{36,1,2739},{4,41,2181},{32,1,2427},{0,39,4},{0,39,4},{0,46,353},{0,31,4242},{0,14,1830},{24,1,17},{24,1,17},{24,1,17},{8,43,2},{45,0,2178},{16,39,2},{16,39,2},{0,46,353},{27,8,2178},{0,46,353},{17,0,2665},{40,1,73},{12,7,5},{0,23,25},{17,0,2665},{18,3,2665},{0,23,25}, +{0,27,2665},{18,3,2665},{0,27,2665},{4,0,2180},{4,0,2180},{4,0,2180},{4,0,2180},{0,23,0},{0,23,0},{0,23,0},{0,12,1},{0,10,820},{0,10,820},{24,1,11582},{6,1,4137},{6,35,4199},{20,7,2845},{6,1,13958},{0,1,2826},{0,37,31},{0,43,4255},{0,39,13958},{0,43,6958},{28,1,4610},{10,1,278},{10,21,261},{8,23,481},{37,0,9669},{0,1,2825},{16,37,18},{0,43,4254},{36,9,9669}, +{0,43,4254},{36,1,3454},{36,1,3454},{36,1,3454},{20,9,2665},{18,1,2740},{16,7,26},{16,7,26},{0,47,278},{0,15,4491},{0,46,1858},{40,1,29},{40,1,29},{40,1,29},{40,27,5},{29,2,2178},{32,23,2},{32,23,2},{0,47,277},{35,6,2178},{0,47,277},{1,2,2665},{42,1,157},{28,37,2},{0,37,5},{1,2,2665},{2,1,2665},{0,37,5},{0,11,2677},{2,1,2665},{0,11,2677},{20,0,2665}, +{20,0,2665},{20,0,2665},{20,0,2665},{0,5,2},{0,5,2},{0,5,2},{0,14,5},{0,12,981},{0,12,981},{40,1,12090},{38,1,4554},{22,19,4203},{36,21,2837},{38,1,14410},{32,1,2930},{32,21,37},{0,27,4187},{0,37,13477},{0,27,6222},{14,1,4826},{42,1,465},{42,35,270},{40,7,488},{5,0,9669},{2,1,2921},{2,21,25},{0,27,4106},{0,5,9669},{0,27,4106},{22,1,3593},{22,1,3593},{22,1,3593}, +{6,39,2677},{34,1,2840},{32,21,21},{32,21,21},{16,15,277},{0,43,4186},{0,31,1450},{26,1,52},{26,1,52},{26,1,52},{10,41,2},{43,2,2178},{18,7,4},{18,7,4},{0,15,205},{21,8,2178},{0,15,205},{13,3,2665},{44,1,260},{14,5,5},{0,5,8},{13,3,2665},{20,1,2665},{0,5,8},{0,25,2665},{20,1,2665},{0,25,2665},{6,0,2677},{6,0,2677},{6,0,2677},{6,0,2677},{32,35,4}, +{32,35,4},{32,35,4},{32,30,5},{0,28,745},{0,28,745},{26,1,12542},{24,1,4990},{8,33,4178},{22,5,2845},{24,1,14719},{2,1,3162},{18,35,34},{16,41,4197},{0,5,13013},{0,41,5610},{46,1,5121},{28,1,754},{12,19,270},{26,21,484},{30,3,9669},{4,1,3110},{18,35,18},{0,41,3929},{42,9,9669},{0,41,3929},{8,1,3770},{8,1,3770},{8,1,3770},{22,7,2666},{20,1,3011},{18,5,26},{18,5,26}, +{32,45,261},{0,27,3822},{0,15,1062},{12,1,98},{12,1,98},{12,1,98},{42,25,5},{38,1,2178},{34,21,2},{34,21,2},{0,29,160},{43,28,2178},{0,29,160},{43,1,2665},{30,1,388},{30,35,2},{16,35,2},{43,1,2665},{8,1,2665},{16,35,2},{0,9,2677},{8,1,2665},{0,9,2677},{22,0,2665},{22,0,2665},{22,0,2665},{22,0,2665},{2,3,2},{2,3,2},{2,3,2},{2,46,5},{0,46,578}, +{0,46,578},{12,1,13222},{40,1,5610},{40,17,4197},{38,19,2849},{26,1,15194},{4,1,3497},{34,19,34},{32,9,4178},{0,3,12493},{0,25,4990},{47,1,5429},{14,1,1062},{44,33,261},{42,5,499},{46,1,9669},{36,1,3341},{4,19,26},{0,9,3770},{26,7,9669},{0,9,3770},{40,1,3929},{40,1,3929},{40,1,3929},{8,37,2678},{6,1,3174},{34,19,18},{34,19,18},{18,13,270},{0,25,3462},{0,29,754},{28,1,160}, +{28,1,160},{28,1,160},{12,39,5},{39,0,2178},{20,35,2},{20,35,2},{0,13,98},{9,28,2178},{0,13,98},{11,1,2665},{47,1,578},{47,3,5},{2,3,2},{11,1,2665},{42,1,2665},{2,3,2},{0,23,2665},{42,1,2665},{0,23,2665},{8,0,2677},{8,0,2677},{8,0,2677},{8,0,2677},{34,17,2},{34,17,2},{34,17,2},{34,31,2},{0,31,388},{0,31,388},{28,1,13826},{26,1,6222},{26,1,4187}, +{24,3,2835},{42,1,15614},{36,1,3886},{20,33,37},{18,23,4203},{0,17,12134},{0,39,4554},{45,1,5669},{30,1,1450},{14,17,277},{28,19,484},{31,1,9669},{38,1,3605},{20,33,21},{0,23,3593},{32,1,9669},{0,23,3593},{26,1,4106},{26,1,4106},{26,1,4106},{24,5,2665},{22,1,3378},{20,3,25},{20,3,25},{34,43,270},{0,39,3206},{0,43,465},{14,1,205},{14,1,205},{14,1,205},{44,7,5},{23,2,2178}, +{36,3,4},{36,3,4},{0,27,52},{20,9,2178},{0,27,52},{39,3,2665},{29,1,745},{31,33,5},{34,33,4},{39,3,2665},{14,1,2665},{34,33,4},{0,7,2677},{14,1,2665},{0,7,2677},{24,0,2665},{24,0,2665},{24,0,2665},{24,0,2665},{4,1,8},{4,1,8},{4,1,8},{4,15,5},{0,45,260},{0,45,260},{14,1,14322},{42,1,6958},{42,1,4255},{40,17,2837},{28,1,16150},{38,1,4422},{36,1,31}, +{34,7,4199},{0,1,11889},{0,7,4137},{29,1,6018},{47,1,1858},{46,1,278},{44,3,499},{45,1,9669},{40,1,3905},{6,17,26},{0,37,3454},{27,9,9669},{0,37,3454},{42,1,4254},{42,1,4254},{42,1,4254},{40,19,2678},{24,1,3540},{36,17,18},{36,17,18},{20,11,261},{0,37,2979},{0,11,278},{46,1,277},{46,1,277},{46,1,277},{14,37,5},{37,2,2178},{22,33,2},{22,33,2},{0,41,29},{34,7,2178}, +{0,41,29},{7,3,2665},{13,1,981},{15,1,5},{4,1,2},{7,3,2665},{15,1,2665},{4,1,2},{0,21,2665},{15,1,2665},{0,21,2665},{10,0,2677},{10,0,2677},{10,0,2677},{10,0,2677},{36,1,5},{36,1,5},{36,1,5},{36,29,2},{0,43,157},{0,43,157},{30,1,13683},{44,1,6982},{12,1,4485},{26,17,2739},{14,1,15204},{24,1,4100},{22,1,25},{20,37,3546},{0,1,10840},{0,37,3100},{13,1,5451}, +{15,1,1830},{47,1,353},{30,33,333},{39,11,8712},{26,1,3507},{38,1,4},{0,37,2739},{22,1,8712},{0,37,2739},{12,1,4485},{12,1,4485},{12,1,4485},{26,3,2665},{40,1,3736},{22,1,25},{22,1,25},{36,41,270},{0,5,2779},{0,25,117},{47,1,353},{47,1,353},{47,1,353},{46,5,5},{44,1,2178},{38,17,2},{38,17,2},{0,25,17},{26,9,2178},{0,25,17},{3,25,2178},{11,1,820},{13,1,1}, +{22,1,0},{3,25,2178},{29,1,2178},{22,1,0},{0,5,2180},{29,1,2178},{0,5,2180},{26,0,2665},{26,0,2665},{26,0,2665},{26,0,2665},{22,1,25},{22,1,25},{22,1,25},{6,13,5},{0,41,73},{0,41,73},{47,1,12750},{14,1,6837},{14,1,4721},{12,1,2694},{30,1,14061},{40,1,3663},{8,1,86},{6,21,2826},{2,1,9775},{0,21,2145},{11,1,4689},{29,1,1633},{45,1,450},{47,17,195},{17,10,7578}, +{44,1,2961},{24,1,10},{0,5,1973},{24,1,7578},{0,5,1973},{14,1,4721},{14,1,4721},{14,1,4721},{12,17,2673},{26,1,3965},{8,1,86},{8,1,86},{22,9,258},{0,3,2571},{0,39,29},{45,1,450},{45,1,450},{45,1,450},{47,19,5},{31,5,2178},{24,1,10},{24,1,10},{0,39,4},{14,9,2178},{0,39,4},{37,1,1625},{25,1,629},{27,1,1},{10,1,0},{37,1,1625},{27,1,1625},{10,1,0}, +{0,35,1629},{27,1,1625},{0,35,1629},{12,0,2669},{12,0,2669},{12,0,2669},{12,0,2669},{8,1,61},{8,1,61},{8,1,61},{38,27,1},{0,9,25},{0,9,25},{15,1,12134},{30,1,6845},{30,1,4909},{28,1,2666},{47,1,13165},{26,1,3426},{40,1,187},{38,5,2218},{2,1,9023},{0,35,1446},{11,1,4097},{13,1,1541},{29,1,544},{31,17,90},{39,7,6661},{14,1,2525},{26,1,40},{0,35,1382},{26,1,6661}, +{0,35,1382},{30,1,4909},{30,1,4909},{30,1,4909},{28,1,2666},{12,1,4230},{40,1,187},{40,1,187},{8,39,273},{0,17,2453},{0,7,3},{29,1,544},{29,1,544},{29,1,544},{15,3,5},{17,2,2178},{26,1,40},{26,1,40},{0,7,2},{16,1,2178},{0,7,2},{21,1,1201},{23,1,458},{25,1,4},{28,1,1},{21,1,1201},{41,1,1201},{28,1,1},{0,35,1213},{41,1,1201},{0,35,1213},{28,0,2665}, +{28,0,2665},{28,0,2665},{28,0,2665},{24,1,90},{24,1,90},{24,1,90},{8,11,8},{0,7,2},{0,7,2},{45,1,11330},{47,1,6789},{46,1,5186},{14,1,2694},{47,1,12365},{42,1,3246},{26,1,353},{8,5,1734},{20,1,8393},{0,19,926},{25,1,3614},{27,1,1514},{43,1,666},{45,17,35},{9,3,5829},{47,1,2177},{28,1,106},{0,19,922},{31,3,5829},{0,19,922},{46,1,5186},{46,1,5186},{46,1,5186}, +{14,1,2694},{14,1,4504},{26,1,353},{26,1,353},{24,7,270},{16,1,2450},{16,21,10},{43,1,666},{43,1,666},{43,1,666},{45,33,2},{29,3,2178},{28,1,106},{28,1,106},{2,37,4},{43,9,2178},{2,37,4},{5,1,841},{7,1,306},{9,1,4},{47,1,1},{5,1,841},{9,1,841},{47,1,1},{0,19,841},{9,1,841},{0,19,841},{14,0,2669},{14,0,2669},{14,0,2669},{14,0,2669},{10,1,136}, +{10,1,136},{10,1,136},{40,25,2},{16,21,1},{16,21,1},{29,1,10834},{15,1,6895},{31,1,5418},{30,1,2786},{45,1,11530},{44,1,3154},{42,1,581},{40,35,1298},{36,1,7857},{16,3,582},{9,1,3105},{11,1,1441},{11,1,765},{13,1,8},{37,7,5082},{47,1,1905},{14,1,185},{0,3,570},{30,1,5082},{0,3,570},{31,1,5418},{31,1,5418},{31,1,5418},{30,1,2786},{30,1,4724},{42,1,581},{42,1,581}, +{10,37,273},{2,1,2587},{2,5,3},{11,1,765},{11,1,765},{11,1,765},{13,1,8},{39,11,2178},{14,1,185},{14,1,185},{2,5,2},{22,1,2178},{2,5,2},{3,3,545},{21,1,208},{7,1,1},{29,1,0},{3,3,545},{23,1,545},{29,1,0},{0,3,545},{23,1,545},{0,3,545},{30,0,2665},{30,0,2665},{30,0,2665},{30,0,2665},{42,1,181},{42,1,181},{42,1,181},{10,9,8},{2,5,2}, +{2,5,2},{13,1,10311},{45,1,6878},{45,1,5653},{47,1,2933},{29,1,10827},{14,1,3066},{44,1,913},{10,3,925},{8,1,7297},{2,33,307},{39,1,2707},{9,1,1427},{41,1,925},{27,1,32},{23,17,4344},{29,1,1611},{47,1,305},{0,33,291},{45,17,4344},{0,33,291},{45,1,5653},{45,1,5653},{45,1,5653},{47,1,2933},{47,1,5051},{44,1,913},{44,1,913},{26,5,269},{4,1,2859},{18,19,11},{41,1,925}, +{41,1,925},{41,1,925},{27,1,32},{25,5,2178},{47,1,305},{47,1,305},{18,19,10},{10,1,2178},{18,19,10},{33,3,288},{35,1,100},{21,1,1},{25,1,1},{33,3,288},{21,1,288},{25,1,1},{0,33,290},{21,1,288},{0,33,290},{47,0,2677},{47,0,2677},{47,0,2677},{47,0,2677},{28,1,265},{28,1,265},{28,1,265},{42,23,0},{18,19,2},{18,19,2},{11,1,9837},{13,1,6951},{29,1,5954}, +{15,1,3166},{13,1,10279},{47,1,3138},{30,1,1226},{42,3,644},{40,1,6929},{34,33,150},{7,1,2436},{39,1,1419},{9,1,1018},{11,1,101},{3,25,3779},{13,1,1475},{29,1,458},{2,17,146},{29,1,3779},{2,17,146},{29,1,5954},{29,1,5954},{29,1,5954},{15,1,3166},{31,1,5396},{30,1,1226},{30,1,1226},{12,35,274},{36,1,3147},{4,3,4},{9,1,1018},{9,1,1018},{9,1,1018},{11,1,101},{9,3,2178}, +{29,1,458},{29,1,458},{20,3,0},{31,3,2178},{20,3,0},{17,3,128},{19,1,40},{19,1,4},{7,1,1},{17,3,128},{35,1,128},{7,1,1},{0,17,146},{35,1,128},{0,17,146},{31,0,2665},{31,0,2665},{31,0,2665},{31,0,2665},{14,1,338},{14,1,338},{14,1,338},{28,7,9},{4,3,4},{4,3,4},{11,1,9437},{27,1,7149},{43,1,6265},{29,1,3402},{11,1,9788},{47,1,3234},{47,1,1634}, +{28,33,444},{42,1,6719},{20,17,35},{21,1,2210},{23,1,1473},{23,1,1184},{25,1,241},{21,17,3299},{11,1,1400},{13,1,610},{20,17,34},{17,21,3299},{20,17,34},{43,1,6265},{43,1,6265},{43,1,6265},{29,1,3402},{45,1,5621},{47,1,1634},{47,1,1634},{28,3,285},{8,1,3421},{20,17,10},{23,1,1184},{23,1,1184},{23,1,1184},{25,1,241},{23,3,2178},{13,1,610},{13,1,610},{20,17,9},{43,3,2178}, +{20,17,9},{17,1,34},{33,1,10},{33,1,1},{35,1,0},{17,1,34},{3,1,34},{35,1,0},{0,17,34},{3,1,34},{0,17,34},{15,0,2677},{15,0,2677},{15,0,2677},{15,0,2677},{30,1,410},{30,1,410},{30,1,410},{44,21,0},{20,17,1},{20,17,1},{9,1,9175},{11,1,7181},{11,1,6505},{13,1,3686},{11,1,9340},{45,1,3447},{47,1,2114},{14,17,315},{44,1,6532},{6,1,4},{21,1,2034}, +{21,1,1553},{7,1,1348},{39,1,410},{17,39,2904},{9,1,1411},{11,1,820},{22,1,0},{39,17,2904},{22,1,0},{11,1,6505},{11,1,6505},{11,1,6505},{13,1,3686},{13,1,5990},{47,1,2114},{47,1,2114},{14,33,274},{40,1,3789},{6,1,4},{7,1,1348},{7,1,1348},{7,1,1348},{39,1,410},{3,25,2178},{11,1,820},{11,1,820},{22,1,0},{29,1,2178},{22,1,0},{1,1,0},{1,1,0},{1,1,0}, +{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{29,0,2665},{29,0,2665},{29,0,2665},{29,0,2665},{47,1,514},{47,1,514},{47,1,514},{30,5,9},{6,1,4},{6,1,4},{9,1,7987},{41,1,6503},{41,1,5878},{27,1,3561},{25,1,8118},{29,1,3051},{45,1,1934},{30,17,167},{30,1,5562},{8,1,29},{5,1,1507},{21,1,1130},{21,1,986},{23,1,298},{19,3,2166}, +{23,1,1019},{25,1,629},{10,1,0},{39,1,2166},{10,1,0},{41,1,5878},{41,1,5878},{41,1,5878},{27,1,3561},{27,1,5301},{45,1,1934},{45,1,1934},{30,17,142},{42,1,3266},{8,1,29},{21,1,986},{21,1,986},{21,1,986},{23,1,298},{37,1,1625},{25,1,629},{25,1,629},{10,1,0},{27,1,1625},{10,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0}, +{0,1,0},{1,1,0},{0,1,0},{43,0,2669},{43,0,2669},{43,0,2669},{43,0,2669},{15,1,650},{15,1,650},{15,1,650},{46,19,1},{8,1,29},{8,1,29},{39,1,7111},{9,1,5795},{9,1,5354},{11,1,3381},{9,1,6983},{13,1,2803},{13,1,1842},{47,17,79},{47,1,4802},{40,1,85},{35,1,1132},{5,1,830},{5,1,730},{7,1,226},{3,3,1601},{7,1,739},{23,1,458},{28,1,1},{3,3,1601}, +{28,1,1},{9,1,5354},{9,1,5354},{9,1,5354},{11,1,3381},{11,1,4622},{13,1,1842},{13,1,1842},{47,17,54},{44,1,2834},{40,1,85},{5,1,730},{5,1,730},{5,1,730},{7,1,226},{21,1,1201},{23,1,458},{23,1,458},{28,1,1},{41,1,1201},{28,1,1},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{27,0,2665}, +{27,0,2665},{27,0,2665},{27,0,2665},{45,1,725},{45,1,725},{45,1,725},{47,33,5},{40,1,85},{40,1,85},{23,1,6361},{39,1,5231},{9,1,4826},{41,1,3294},{39,1,6071},{11,1,2610},{43,1,1825},{15,1,17},{47,1,4162},{42,1,169},{19,1,792},{35,1,584},{35,1,520},{21,1,153},{19,1,1121},{7,1,531},{7,1,306},{47,1,1},{37,1,1121},{47,1,1},{9,1,4826},{9,1,4826},{9,1,4826}, +{41,1,3294},{41,1,4145},{43,1,1825},{43,1,1825},{15,1,17},{30,1,2474},{42,1,169},{35,1,520},{35,1,520},{35,1,520},{21,1,153},{5,1,841},{7,1,306},{7,1,306},{47,1,1},{9,1,841},{47,1,1},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{41,0,2669},{41,0,2669},{41,0,2669},{41,0,2669},{13,1,865}, +{13,1,865},{13,1,865},{15,17,2},{42,1,169},{42,1,169},{7,1,5637},{23,1,4730},{39,1,4406},{9,1,3146},{39,1,5287},{11,1,2418},{11,1,1742},{45,1,8},{29,1,3547},{44,1,274},{3,1,489},{19,1,376},{19,1,340},{35,1,100},{33,33,726},{5,1,344},{21,1,208},{29,1,0},{33,33,726},{29,1,0},{39,1,4406},{39,1,4406},{39,1,4406},{9,1,3146},{9,1,3630},{11,1,1742},{11,1,1742}, +{45,1,8},{47,1,2178},{44,1,274},{19,1,340},{19,1,340},{19,1,340},{35,1,100},{3,3,545},{21,1,208},{21,1,208},{29,1,0},{23,1,545},{29,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{25,0,2665},{25,0,2665},{25,0,2665},{25,0,2665},{27,1,1025},{27,1,1025},{27,1,1025},{45,1,8},{44,1,274}, +{44,1,274},{36,1,50644},{0,1,2121},{0,25,169},{0,41,4591},{20,1,59804},{0,3,19310},{0,11,7401},{0,43,24008},{0,9,65535},{0,15,40741},{18,1,10267},{0,3,1445},{0,41,137},{0,15,3985},{42,6,18065},{0,45,12064},{0,31,6081},{0,12,14121},{19,0,18065},{0,12,14121},{0,15,1},{0,15,1},{0,15,1},{0,24,0},{0,24,1105},{0,22,410},{0,22,410},{0,4,666},{0,4,1217},{0,4,766},{0,15,1}, +{0,15,1},{0,15,1},{0,24,0},{34,2,1105},{0,22,410},{0,22,410},{0,4,666},{24,0,1105},{0,4,666},{43,2,9248},{0,3,1445},{0,41,137},{0,15,3985},{43,2,9248},{21,8,9248},{0,15,3985},{0,14,9256},{21,8,9248},{0,14,9256},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{36,1,53600},{16,1,2998},{0,9,44}, +{0,25,3941},{36,1,62123},{0,1,18506},{0,25,6750},{0,27,23131},{0,39,65535},{0,29,40569},{34,1,10859},{0,1,1241},{0,9,50},{0,13,3690},{16,3,19334},{0,43,12449},{0,15,6117},{0,28,14809},{17,0,19334},{0,28,14809},{0,27,0},{0,27,0},{0,27,0},{0,10,1},{0,26,1513},{0,8,585},{0,8,585},{0,20,914},{0,20,1669},{0,20,1058},{0,27,0},{0,27,0},{0,27,0},{0,10,1},{34,4,1513}, +{0,8,585},{0,8,585},{0,20,914},{26,0,1513},{0,20,914},{38,1,9248},{0,1,1241},{0,9,50},{0,13,3690},{38,1,9248},{43,28,9248},{0,13,3690},{0,46,9250},{43,28,9248},{0,46,9250},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{36,1,56716},{16,1,4497},{0,23,30},{0,39,3500},{36,1,64625},{0,1,18101},{0,9,6313}, +{0,11,22459},{0,37,65535},{0,43,39632},{4,1,11624},{0,1,1225},{0,39,10},{0,27,3400},{16,1,20689},{0,41,12854},{0,43,6221},{0,44,15490},{19,2,20689},{0,44,15490},{0,9,1},{0,9,1},{0,9,1},{0,42,1},{0,12,1985},{0,10,757},{0,10,757},{0,36,1241},{0,36,2193},{0,20,1394},{0,9,1},{0,9,1},{0,9,1},{0,42,1},{6,0,1985},{0,10,757},{0,10,757},{0,36,1241},{12,0,1985}, +{0,36,1241},{24,1,9248},{0,1,1225},{0,39,10},{0,27,3400},{24,1,9248},{32,9,9248},{0,27,3400},{0,47,9248},{32,9,9248},{0,47,9248},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{6,1,58324},{32,1,6344},{0,7,6},{0,23,2873},{36,1,65535},{16,1,17968},{0,23,5727},{0,25,21128},{0,21,63585},{0,27,38087},{36,1,12449}, +{16,1,1437},{0,7,9},{0,41,3185},{47,2,22129},{0,25,13298},{0,43,6189},{0,44,16354},{17,2,22129},{0,44,16354},{0,21,0},{0,21,0},{0,21,0},{0,44,1},{0,44,2521},{0,26,953},{0,26,953},{0,6,1553},{0,6,2770},{0,6,1778},{0,21,0},{0,21,0},{0,21,0},{0,44,1},{22,0,2521},{0,26,953},{0,26,953},{0,6,1553},{44,0,2521},{0,6,1553},{7,0,9248},{16,1,1412},{16,7,0}, +{0,41,3185},{7,0,9248},{0,7,9248},{0,41,3185},{0,15,9266},{0,7,9248},{0,15,9266},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{22,1,58878},{32,1,8497},{0,21,52},{0,37,2302},{22,1,65535},{16,1,18091},{0,37,5062},{0,9,19646},{0,5,60796},{0,41,35977},{36,1,13547},{16,1,1923},{16,21,62},{0,25,2897},{45,0,23851}, +{0,39,13856},{0,27,6323},{0,46,17289},{27,8,23851},{0,46,17289},{0,33,0},{0,33,0},{0,33,0},{0,30,0},{0,46,3200},{0,12,1186},{0,12,1186},{0,38,1962},{0,22,3521},{0,22,2261},{0,33,0},{0,33,0},{0,33,0},{0,30,0},{18,10,3200},{0,12,1186},{0,12,1186},{0,38,1962},{46,0,3200},{0,38,1962},{37,2,9248},{2,1,1717},{2,21,5},{0,25,2897},{37,2,9248},{34,7,9248},{0,25,2897}, +{0,45,9250},{34,7,9248},{0,45,9250},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{22,1,59528},{2,1,10468},{0,5,122},{0,21,1965},{22,1,65535},{16,1,18728},{0,21,4602},{0,23,18427},{0,3,58418},{0,25,34396},{6,1,14752},{2,1,2501},{16,35,141},{0,39,2720},{45,2,25472},{0,37,14401},{0,41,6413},{0,46,18185},{25,8,25472}, +{0,46,18185},{0,1,16},{0,1,16},{0,1,16},{0,47,0},{0,31,3872},{0,28,1450},{0,28,1450},{0,38,2362},{0,38,4283},{0,38,2723},{0,1,16},{0,1,16},{0,1,16},{0,47,0},{36,6,3872},{0,28,1450},{0,28,1450},{0,38,2362},{31,0,3872},{0,38,2362},{44,1,9248},{20,1,2041},{18,5,2},{0,39,2720},{44,1,9248},{26,9,9248},{0,39,2720},{0,13,9256},{26,9,9248},{0,13,9256},{0,0,0}, +{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{24,1,60070},{2,1,12544},{0,19,188},{0,35,1604},{22,1,65535},{32,1,19503},{0,5,4081},{0,7,17117},{0,3,56204},{0,25,32856},{22,1,15824},{2,1,3225},{32,19,229},{0,7,2478},{46,10,26744},{0,21,14657},{0,25,6357},{0,31,18737},{23,8,26744},{0,31,18737},{16,1,115},{16,1,115},{16,1,115}, +{0,15,5},{0,29,4420},{0,46,1613},{0,46,1613},{0,8,2642},{0,8,4931},{0,8,3083},{16,1,90},{16,1,90},{16,1,90},{0,15,5},{8,2,4418},{0,46,1613},{0,46,1613},{0,8,2642},{30,2,4418},{0,8,2642},{33,0,9248},{36,1,2405},{34,19,5},{0,7,2474},{33,0,9248},{32,3,9248},{0,7,2474},{0,43,9250},{32,3,9248},{0,43,9250},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,16,1}, +{0,16,1},{0,16,1},{0,16,1},{0,16,2},{0,16,2},{24,1,60699},{2,1,14864},{16,3,314},{0,19,1400},{24,1,65535},{2,1,20230},{0,19,3323},{0,37,15488},{0,17,54456},{0,39,31364},{24,1,16210},{4,1,3849},{2,33,221},{0,37,2328},{38,3,26744},{0,3,14114},{0,23,5618},{0,15,18273},{35,8,26744},{0,15,18273},{32,1,291},{32,1,291},{32,1,291},{16,29,50},{0,11,4418},{0,31,1325},{0,31,1325}, +{0,10,2465},{0,10,5112},{0,8,3051},{2,1,136},{2,1,136},{2,1,136},{32,29,5},{24,4,4418},{0,31,1325},{0,31,1325},{0,10,2465},{11,0,4418},{0,10,2465},{47,1,9248},{8,1,2738},{20,3,1},{0,37,2228},{47,1,9248},{31,9,9248},{0,37,2228},{0,11,9256},{31,9,9248},{0,11,9256},{16,0,50},{16,0,50},{16,0,50},{16,0,50},{0,4,0},{0,4,0},{0,4,0},{0,2,0},{0,32,18}, +{0,32,18},{24,1,61549},{2,1,17597},{16,17,476},{0,33,1268},{24,1,65535},{2,1,21346},{0,33,2615},{0,5,13900},{0,17,52724},{0,37,29656},{40,1,16729},{36,1,4545},{34,1,221},{32,21,2384},{8,1,26744},{0,3,13529},{0,37,4710},{0,29,17819},{1,8,26744},{0,29,17819},{2,1,626},{2,1,626},{2,1,626},{32,43,185},{0,23,4418},{0,45,1037},{0,45,1037},{0,10,2249},{0,26,5330},{0,10,2925},{34,1,185}, +{34,1,185},{34,1,185},{18,13,4},{38,10,4418},{0,45,1037},{0,45,1037},{0,10,2249},{23,0,4418},{0,10,2249},{29,3,9248},{40,1,3188},{6,17,2},{0,35,2041},{29,3,9248},{43,9,9248},{0,35,2041},{0,41,9256},{43,9,9248},{0,41,9256},{32,0,185},{32,0,185},{32,0,185},{32,0,185},{0,38,0},{0,38,0},{0,38,0},{0,34,1},{0,18,61},{0,18,61},{40,1,62083},{4,1,19345},{16,1,697}, +{0,17,1188},{24,1,65535},{2,1,22086},{0,17,2057},{0,35,12551},{0,1,51532},{0,37,28158},{26,1,16691},{36,1,5081},{20,1,265},{32,35,2281},{29,12,26259},{0,17,12803},{0,5,3981},{0,43,16952},{26,13,26259},{0,43,16952},{18,1,1006},{18,1,1006},{18,1,1006},{2,11,378},{0,5,4420},{0,43,820},{0,43,820},{0,12,2020},{0,12,5541},{0,42,2966},{20,1,265},{20,1,265},{20,1,265},{34,27,1},{28,2,4418}, +{0,43,820},{0,43,820},{0,12,2020},{42,8,4418},{0,12,2020},{43,3,8978},{42,1,3434},{22,1,1},{0,19,1737},{43,3,8978},{45,7,8978},{0,19,1737},{0,9,8986},{45,7,8978},{0,9,8986},{2,0,377},{2,0,377},{2,0,377},{2,0,377},{0,10,1},{0,10,1},{0,10,1},{0,20,4},{0,4,136},{0,4,136},{40,1,62361},{4,1,19653},{16,1,1095},{0,17,1126},{24,1,65535},{2,1,21601},{0,17,1502}, +{0,19,11253},{0,1,50904},{0,37,27226},{42,1,15419},{38,1,4708},{36,1,320},{18,19,1862},{23,2,24371},{0,1,11221},{0,35,2905},{0,27,15080},{20,9,24371},{0,27,15080},{34,1,1522},{34,1,1522},{34,1,1522},{2,25,618},{0,17,4418},{0,41,610},{0,41,610},{0,44,1810},{0,44,5843},{0,12,2885},{36,1,320},{36,1,320},{36,1,320},{20,11,9},{16,3,4418},{0,41,610},{0,41,610},{0,44,1810},{17,0,4418}, +{0,44,1810},{27,3,7938},{44,1,3033},{8,1,4},{0,19,1225},{27,3,7938},{39,9,7938},{0,19,1225},{0,9,7946},{39,9,7938},{0,9,7946},{2,0,617},{2,0,617},{2,0,617},{2,0,617},{0,44,0},{0,44,0},{0,44,0},{0,22,1},{0,36,232},{0,36,232},{40,1,62690},{4,1,20049},{32,1,1585},{16,17,1131},{40,1,65535},{2,1,21187},{0,17,1035},{0,19,10004},{0,1,50320},{0,37,26351},{28,1,14260}, +{24,1,4365},{22,1,410},{4,19,1523},{47,11,22568},{0,1,9893},{0,19,1997},{0,27,13320},{22,9,22568},{0,27,13320},{4,1,2169},{4,1,2169},{4,1,2169},{18,9,929},{16,1,4461},{0,9,442},{0,9,442},{0,14,1665},{0,46,6116},{0,44,2907},{22,1,410},{22,1,410},{22,1,410},{36,25,1},{30,4,4418},{0,9,442},{0,9,442},{0,14,1665},{15,8,4418},{0,14,1665},{11,3,6962},{14,1,2645},{10,1,1}, +{0,3,832},{11,3,6962},{44,3,6962},{0,3,832},{0,39,6962},{44,3,6962},{0,39,6962},{18,0,925},{18,0,925},{18,0,925},{18,0,925},{0,47,0},{0,47,0},{0,47,0},{0,8,0},{0,22,338},{0,22,338},{40,1,63078},{4,1,20586},{32,1,2208},{16,1,1221},{40,1,65535},{2,1,20797},{0,17,616},{0,19,8676},{0,1,49684},{0,37,25424},{14,1,12942},{40,1,4061},{8,1,530},{36,19,1147},{21,2,20642}, +{16,1,8678},{0,3,1157},{0,41,11489},{24,9,20642},{0,41,11489},{20,1,3009},{20,1,3009},{20,1,3009},{34,7,1358},{32,1,4667},{0,7,305},{0,7,305},{0,46,1445},{0,31,6482},{0,14,3034},{8,1,530},{8,1,530},{8,1,530},{22,9,5},{45,0,4418},{0,7,305},{0,7,305},{0,46,1445},{27,8,4418},{0,46,1445},{11,1,5941},{30,1,2260},{42,1,0},{0,33,445},{11,1,5941},{26,1,5941},{0,33,445}, +{0,23,5941},{26,1,5941},{0,23,5941},{34,0,1354},{34,0,1354},{34,0,1354},{34,0,1354},{0,13,0},{0,13,0},{0,13,0},{0,40,1},{0,8,522},{0,8,522},{40,1,63433},{4,1,21145},{32,1,2873},{16,1,1404},{40,1,65535},{2,1,20517},{0,1,339},{0,19,7570},{0,1,49136},{0,37,24652},{30,1,11862},{26,1,3845},{40,1,617},{22,3,868},{44,1,19021},{2,1,7769},{0,33,621},{0,25,9957},{26,9,19021}, +{0,25,9957},{36,1,3819},{36,1,3819},{36,1,3819},{4,37,1809},{2,1,5012},{0,5,185},{0,5,185},{0,31,1285},{0,15,6822},{0,46,3029},{40,1,617},{40,1,617},{40,1,617},{38,23,2},{29,2,4418},{0,5,185},{0,5,185},{0,31,1285},{35,6,4418},{0,31,1285},{41,1,5101},{47,1,1924},{28,1,1},{0,17,221},{41,1,5101},{28,1,5101},{0,17,221},{0,7,5113},{28,1,5101},{0,7,5113},{4,0,1808}, +{4,0,1808},{4,0,1808},{4,0,1808},{0,25,0},{0,25,0},{0,25,0},{0,26,4},{0,24,698},{0,24,698},{26,1,63733},{4,1,21777},{32,1,3641},{16,1,1687},{40,1,65535},{2,1,20303},{0,1,133},{0,3,6539},{0,1,48607},{0,37,23935},{30,1,10886},{42,1,3641},{26,1,724},{8,33,659},{46,3,17485},{2,1,6985},{0,17,257},{0,25,8565},{16,3,17485},{0,25,8565},{6,1,4820},{6,1,4820},{6,1,4820}, +{4,5,2324},{18,1,5437},{0,3,101},{0,3,101},{0,15,1129},{0,45,7234},{0,31,3141},{26,1,724},{26,1,724},{26,1,724},{24,7,4},{43,2,4418},{0,3,101},{0,3,101},{0,15,1129},{21,8,4418},{0,15,1129},{25,1,4325},{15,1,1658},{14,1,4},{0,17,61},{25,1,4325},{14,1,4325},{0,17,61},{0,7,4329},{14,1,4325},{0,7,4329},{4,0,2320},{4,0,2320},{4,0,2320},{4,0,2320},{0,7,0}, +{0,7,0},{0,7,0},{0,28,1},{0,10,872},{0,10,872},{26,1,63992},{4,1,22482},{2,1,4507},{32,1,2059},{40,1,65535},{2,1,20157},{0,1,26},{0,3,5537},{0,1,48100},{0,21,23272},{47,1,9918},{28,1,3518},{42,1,832},{24,33,446},{46,1,16034},{4,1,6314},{0,1,65},{0,39,7293},{26,7,16034},{0,39,7293},{22,1,5900},{22,1,5900},{22,1,5900},{20,19,2888},{34,1,6029},{0,17,40},{0,17,40}, +{0,29,1000},{0,43,7619},{0,15,3261},{42,1,832},{42,1,832},{42,1,832},{40,21,1},{38,1,4418},{0,17,40},{0,17,40},{0,29,1000},{43,28,4418},{0,29,1000},{9,1,3613},{29,1,1345},{47,1,4},{0,1,1},{9,1,3613},{46,1,3613},{0,1,1},{0,37,3613},{46,1,3613},{0,37,3613},{20,0,2888},{20,0,2888},{20,0,2888},{20,0,2888},{0,19,1},{0,19,1},{0,19,1},{0,14,0},{0,12,1082}, +{0,12,1082},{26,1,64289},{20,1,23310},{2,1,5546},{32,1,2553},{40,1,65535},{2,1,20076},{0,1,26},{0,3,4514},{0,1,47560},{0,5,22518},{31,1,9017},{14,1,3261},{28,1,1000},{10,33,281},{17,2,14504},{36,1,5594},{16,1,40},{0,23,5900},{16,1,14504},{0,23,5900},{38,1,7293},{38,1,7293},{38,1,7293},{36,33,3614},{4,1,6900},{0,1,65},{0,1,65},{0,43,832},{0,41,8070},{0,29,3518},{28,1,1000}, +{28,1,1000},{28,1,1000},{26,5,2},{39,0,4418},{16,1,40},{16,1,40},{0,43,832},{9,28,4418},{0,43,832},{23,17,2888},{13,1,1082},{15,1,0},{18,1,1},{23,17,2888},{45,17,2888},{18,1,1},{0,21,2888},{45,17,2888},{0,21,2888},{36,0,3613},{36,0,3613},{36,0,3613},{36,0,3613},{0,1,1},{0,1,1},{0,1,1},{0,46,4},{0,28,1345},{0,28,1345},{26,1,64605},{36,1,24062},{2,1,6574}, +{32,1,3098},{26,1,65535},{2,1,20094},{0,1,133},{0,33,3661},{0,1,47145},{0,5,21893},{45,1,8116},{30,1,3141},{14,1,1129},{42,17,147},{43,7,13235},{38,1,5012},{2,1,101},{0,7,4820},{18,1,13235},{0,7,4820},{24,1,8565},{24,1,8565},{24,1,8565},{6,1,4329},{36,1,7725},{16,1,257},{16,1,257},{0,27,724},{0,25,8530},{0,43,3641},{14,1,1129},{14,1,1129},{14,1,1129},{42,19,2},{23,2,4418}, +{2,1,101},{2,1,101},{0,27,724},{20,9,4418},{0,27,724},{37,3,2312},{11,1,872},{29,1,1},{6,1,0},{37,3,2312},{21,5,2312},{6,1,0},{0,5,2320},{21,5,2312},{0,5,2320},{6,0,4329},{6,0,4329},{6,0,4329},{6,0,4329},{16,1,61},{16,1,61},{16,1,61},{0,15,4},{0,14,1658},{0,14,1658},{26,1,64960},{36,1,24888},{18,1,7643},{32,1,3742},{26,1,65535},{4,1,20161},{0,1,342}, +{0,33,2900},{0,1,46786},{0,5,21347},{29,1,7443},{47,1,3029},{30,1,1285},{12,17,66},{17,6,12051},{24,1,4500},{4,1,185},{0,37,3819},{20,1,12051},{0,37,3819},{24,1,9957},{24,1,9957},{24,1,9957},{22,1,5161},{36,1,8717},{32,1,621},{32,1,621},{0,41,617},{0,39,9026},{0,27,3845},{30,1,1285},{30,1,1285},{30,1,1285},{28,3,4},{37,2,4418},{4,1,185},{4,1,185},{0,41,617},{34,7,4418}, +{0,41,617},{21,3,1800},{25,1,698},{27,1,4},{24,1,0},{21,3,1800},{43,1,1800},{24,1,0},{0,5,1808},{43,1,1800},{0,5,1808},{6,0,5113},{6,0,5113},{6,0,5113},{6,0,5113},{16,1,221},{16,1,221},{16,1,221},{0,29,1},{0,46,1924},{0,46,1924},{26,1,65314},{36,1,25774},{18,1,8796},{32,1,4480},{26,1,65535},{4,1,20229},{16,1,625},{0,33,2238},{0,1,46456},{0,5,20870},{13,1,6795}, +{15,1,3034},{47,1,1445},{44,1,17},{39,11,10952},{40,1,4076},{6,1,305},{0,21,3009},{22,1,10952},{0,21,3009},{40,1,11489},{40,1,11489},{40,1,11489},{38,1,6125},{6,1,9922},{2,1,1157},{2,1,1157},{0,9,530},{0,37,9571},{0,41,4061},{47,1,1445},{47,1,1445},{47,1,1445},{44,17,2},{44,1,4418},{6,1,305},{6,1,305},{0,9,530},{26,9,4418},{0,9,530},{5,3,1352},{9,1,522},{41,1,1}, +{12,1,0},{5,3,1352},{11,1,1352},{12,1,0},{0,35,1354},{11,1,1352},{0,35,1354},{22,0,5941},{22,0,5941},{22,0,5941},{22,0,5941},{32,1,445},{32,1,445},{32,1,445},{0,43,0},{0,31,2260},{0,31,2260},{26,1,65535},{36,1,26766},{18,1,10162},{2,1,5359},{26,1,65359},{4,1,20334},{16,1,1051},{0,33,1610},{0,1,45998},{0,5,20364},{11,1,6173},{45,1,2907},{15,1,1665},{30,1,2},{17,10,9818}, +{42,1,3693},{8,1,442},{0,5,2169},{24,1,9818},{0,5,2169},{26,1,13320},{26,1,13320},{26,1,13320},{8,1,7395},{22,1,11384},{18,1,1997},{18,1,1997},{0,23,410},{0,21,10181},{0,25,4365},{15,1,1665},{15,1,1665},{15,1,1665},{30,1,2},{31,5,4418},{8,1,442},{8,1,442},{0,23,410},{14,9,4418},{0,23,410},{5,1,925},{23,1,338},{9,1,0},{46,1,0},{5,1,925},{9,1,925},{46,1,0}, +{0,19,925},{9,1,925},{0,19,925},{38,0,6962},{38,0,6962},{38,0,6962},{38,0,6962},{2,1,832},{2,1,832},{2,1,832},{0,11,1},{0,15,2645},{0,15,2645},{26,1,65535},{36,1,27616},{18,1,11415},{2,1,6203},{26,1,65014},{4,1,20439},{16,1,1524},{0,17,1111},{0,1,45494},{0,5,19935},{11,1,5581},{13,1,2885},{45,1,1810},{47,1,50},{39,7,8901},{44,1,3373},{40,1,610},{0,35,1522},{26,1,8901}, +{0,35,1522},{26,1,15080},{26,1,15080},{26,1,15080},{24,1,8661},{24,1,12846},{34,1,2905},{34,1,2905},{0,37,320},{0,3,10790},{0,39,4708},{45,1,1810},{45,1,1810},{45,1,1810},{47,1,50},{17,2,4418},{40,1,610},{40,1,610},{0,37,320},{16,1,4418},{0,37,320},{35,1,613},{37,1,232},{23,1,1},{45,1,0},{35,1,613},{23,1,613},{45,1,0},{0,3,617},{23,1,613},{0,3,617},{8,0,7946}, +{8,0,7946},{8,0,7946},{8,0,7946},{18,1,1225},{18,1,1225},{18,1,1225},{0,9,4},{0,45,3033},{0,45,3033},{26,1,65535},{36,1,28505},{34,1,12706},{2,1,7117},{26,1,64677},{4,1,20609},{16,1,2082},{0,17,705},{0,1,45031},{0,5,19583},{41,1,5202},{43,1,2966},{13,1,2020},{31,1,148},{9,3,8069},{30,1,3125},{42,1,820},{0,19,1006},{31,3,8069},{0,19,1006},{42,1,16952},{42,1,16952},{42,1,16952}, +{24,1,10085},{24,1,14318},{4,1,3981},{4,1,3981},{0,21,265},{0,3,11302},{0,37,5081},{13,1,2020},{13,1,2020},{13,1,2020},{31,1,148},{29,3,4418},{42,1,820},{42,1,820},{0,21,265},{43,9,4418},{0,21,265},{19,1,365},{5,1,136},{21,1,4},{11,1,1},{19,1,365},{37,1,365},{11,1,1},{0,3,377},{37,1,365},{0,3,377},{8,0,8986},{8,0,8986},{8,0,8986},{8,0,8986},{18,1,1737}, +{18,1,1737},{18,1,1737},{0,23,1},{0,43,3434},{0,43,3434},{42,1,65535},{36,1,29412},{4,1,13785},{18,1,7875},{26,1,64490},{20,1,20801},{32,1,2593},{16,17,472},{0,1,43813},{0,3,17452},{9,1,4729},{11,1,2925},{11,1,2249},{45,1,281},{37,7,7322},{47,1,2941},{44,1,1037},{0,3,626},{30,1,7322},{0,3,626},{28,1,17819},{28,1,17819},{28,1,17819},{40,1,10777},{40,1,15150},{36,1,4710},{36,1,4710}, +{0,35,221},{0,17,11076},{0,37,4545},{11,1,2249},{11,1,2249},{11,1,2249},{45,1,281},{39,11,4418},{44,1,1037},{44,1,1037},{0,35,185},{22,1,4418},{0,35,185},{3,1,181},{19,1,61},{35,1,1},{39,1,0},{3,1,181},{5,1,181},{39,1,0},{0,33,185},{5,1,181},{0,33,185},{40,0,9256},{40,0,9256},{40,0,9256},{40,0,9256},{34,1,2041},{34,1,2041},{34,1,2041},{16,7,2},{0,41,3188}, +{0,41,3188},{42,1,65535},{38,1,30127},{36,1,14877},{4,1,8601},{42,1,64081},{36,1,20736},{18,1,3192},{2,17,302},{0,1,42247},{0,3,14278},{39,1,4387},{9,1,3051},{11,1,2465},{13,1,490},{23,17,6584},{29,1,2843},{30,1,1325},{0,33,291},{45,17,6584},{0,33,291},{14,1,18273},{14,1,18273},{14,1,18273},{42,1,11259},{26,1,15731},{22,1,5618},{22,1,5618},{32,3,221},{0,1,10637},{0,5,3849},{11,1,2465}, +{11,1,2465},{11,1,2465},{13,1,490},{25,5,4418},{30,1,1325},{30,1,1325},{0,3,136},{10,1,4418},{0,3,136},{1,3,50},{33,1,18},{3,1,0},{5,1,0},{1,3,50},{3,1,50},{5,1,0},{0,17,50},{3,1,50},{0,17,50},{10,0,9256},{10,0,9256},{10,0,9256},{10,0,9256},{36,1,2228},{36,1,2228},{36,1,2228},{2,21,1},{0,9,2738},{0,9,2738},{28,1,65535},{24,1,30766},{6,1,16028}, +{36,1,9391},{28,1,64158},{36,1,21269},{4,1,3821},{18,1,176},{0,1,41339},{0,3,11746},{23,1,4216},{9,1,3083},{9,1,2642},{27,1,776},{3,25,6019},{13,1,2763},{47,1,1613},{0,17,115},{29,1,6019},{0,17,115},{30,1,18737},{30,1,18737},{30,1,18737},{12,1,11820},{42,1,16379},{24,1,6357},{24,1,6357},{18,33,229},{0,1,10589},{0,3,3225},{9,1,2642},{9,1,2642},{9,1,2642},{27,1,776},{9,3,4418}, +{47,1,1613},{47,1,1613},{0,17,90},{31,3,4418},{0,17,90},{1,17,2},{17,1,2},{17,1,1},{17,1,1},{1,17,2},{17,1,2},{17,1,1},{0,1,4},{17,1,2},{0,1,4},{42,0,9250},{42,0,9250},{42,0,9250},{42,0,9250},{6,1,2474},{6,1,2474},{6,1,2474},{18,35,5},{0,37,2405},{0,37,2405},{14,1,65535},{24,1,31241},{22,1,16737},{6,1,10024},{14,1,64173},{38,1,21415},{20,1,4180}, +{4,1,111},{16,1,40689},{0,3,9508},{7,1,3648},{39,1,2723},{39,1,2362},{11,1,725},{37,1,5163},{13,1,2451},{29,1,1450},{0,1,16},{7,19,5163},{0,1,16},{47,1,18185},{47,1,18185},{47,1,18185},{44,1,11714},{28,1,15784},{40,1,6413},{40,1,6413},{34,17,141},{0,1,9881},{0,3,2501},{39,1,2362},{39,1,2362},{39,1,2362},{11,1,725},{37,7,3872},{29,1,1450},{29,1,1450},{0,1,16},{30,1,3872}, +{0,1,16},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{12,0,9256},{12,0,9256},{12,0,9256},{12,0,9256},{38,1,2720},{38,1,2720},{38,1,2720},{4,19,2},{0,21,2041},{0,21,2041},{30,1,65535},{40,1,31563},{8,1,17236},{38,1,10554},{14,1,63701},{24,1,21372},{36,1,4441},{20,1,45},{16,1,40151},{0,33,7454},{21,1,3014}, +{23,1,2261},{39,1,1962},{25,1,629},{21,17,4267},{11,1,2028},{13,1,1186},{32,1,0},{17,21,4267},{32,1,0},{47,1,17289},{47,1,17289},{47,1,17289},{14,1,11436},{14,1,14726},{26,1,6323},{26,1,6323},{20,17,62},{16,1,9032},{0,17,1923},{39,1,1962},{39,1,1962},{39,1,1962},{25,1,629},{19,11,3200},{13,1,1186},{13,1,1186},{32,1,0},{47,1,3200},{32,1,0},{1,1,0},{1,1,0},{1,1,0}, +{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{44,0,9250},{44,0,9250},{44,0,9250},{44,0,9250},{24,1,2897},{24,1,2897},{24,1,2897},{20,3,5},{0,3,1717},{0,3,1717},{30,1,65535},{26,1,31988},{24,1,17745},{8,1,11181},{30,1,63430},{40,1,21435},{22,1,4810},{6,1,5},{2,1,39477},{0,33,5328},{21,1,2339},{7,1,1778},{7,1,1553},{9,1,477},{19,5,3361}, +{25,1,1634},{27,1,953},{20,1,0},{37,3,3361},{20,1,0},{45,1,16354},{45,1,16354},{45,1,16354},{30,1,11202},{30,1,13722},{42,1,6189},{42,1,6189},{6,1,9},{2,1,8249},{0,17,1437},{7,1,1553},{7,1,1553},{7,1,1553},{9,1,477},{23,1,2521},{27,1,953},{27,1,953},{20,1,0},{45,1,2521},{20,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0}, +{0,1,0},{1,1,0},{0,1,0},{14,0,9266},{14,0,9266},{14,0,9266},{14,0,9266},{40,1,3185},{40,1,3185},{40,1,3185},{6,17,0},{0,17,1412},{0,17,1412},{47,1,65535},{42,1,32389},{10,1,18354},{40,1,11850},{30,1,63370},{40,1,21737},{8,1,5159},{22,1,24},{2,1,39380},{0,17,3675},{5,1,1843},{21,1,1394},{37,1,1241},{23,1,370},{5,1,2646},{9,1,1282},{11,1,757},{8,1,1},{9,1,2646}, +{8,1,1},{45,1,15490},{45,1,15490},{45,1,15490},{47,1,10946},{47,1,12914},{42,1,6221},{42,1,6221},{38,1,10},{2,1,7753},{0,1,1225},{37,1,1241},{37,1,1241},{37,1,1241},{23,1,370},{7,1,1985},{11,1,757},{11,1,757},{8,1,1},{13,1,1985},{8,1,1},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{46,0,9248}, +{46,0,9248},{46,0,9248},{46,0,9248},{26,1,3400},{26,1,3400},{26,1,3400},{38,1,10},{0,1,1225},{0,1,1225},{47,1,65535},{28,1,33179},{26,1,18917},{10,1,12588},{47,1,62997},{26,1,21996},{24,1,5521},{8,1,36},{4,1,39403},{0,17,2452},{5,1,1411},{21,1,1058},{21,1,914},{7,1,274},{35,1,2017},{23,1,939},{9,1,585},{26,1,0},{1,35,2017},{26,1,0},{29,1,14809},{29,1,14809},{29,1,14809}, +{31,1,10801},{47,1,12162},{14,1,6117},{14,1,6117},{8,1,50},{20,1,7322},{0,1,1241},{21,1,914},{21,1,914},{21,1,914},{7,1,274},{35,5,1513},{9,1,585},{9,1,585},{26,1,0},{27,1,1513},{26,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{47,0,9250},{47,0,9250},{47,0,9250},{47,0,9250},{12,1,3690}, +{12,1,3690},{12,1,3690},{8,1,50},{0,1,1241},{0,1,1241},{45,1,65535},{14,1,33274},{42,1,19608},{42,1,13375},{47,1,62627},{42,1,22211},{10,1,6045},{24,1,138},{36,1,39015},{0,1,1732},{35,1,1048},{5,1,766},{5,1,666},{37,1,212},{3,3,1473},{7,1,675},{23,1,410},{14,1,1},{3,3,1473},{14,1,1},{13,1,14121},{13,1,14121},{13,1,14121},{45,1,10571},{45,1,11434},{30,1,6081},{30,1,6081}, +{40,1,137},{36,1,6926},{2,1,1445},{5,1,666},{5,1,666},{5,1,666},{37,1,212},{35,3,1105},{23,1,410},{23,1,410},{14,1,1},{25,1,1105},{14,1,1},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{15,0,9256},{15,0,9256},{15,0,9256},{15,0,9256},{14,1,3985},{14,1,3985},{14,1,3985},{40,1,137},{2,1,1445}, +{2,1,1445}, diff --git a/thirdparty/basisu/transcoder/basisu_transcoder_tables_astc_0_255.inc b/thirdparty/basisu/transcoder/basisu_transcoder_tables_astc_0_255.inc new file mode 100644 index 000000000..da4e7fee9 --- /dev/null +++ b/thirdparty/basisu/transcoder/basisu_transcoder_tables_astc_0_255.inc @@ -0,0 +1,481 @@ +{0,16,18},{0,12,1},{0,8,0},{0,7,5},{0,10,35},{0,6,21},{0,6,9},{0,4,24},{1,4,36},{0,4,25},{0,16,18},{0,12,1},{0,8,0},{0,7,5},{5,0,35},{0,6,21},{0,6,9},{0,4,24},{10,0,35},{0,4,24},{0,7,0},{0,7,0},{0,7,0},{0,3,0},{0,4,2},{0,3,0},{0,3,0},{0,1,1},{0,2,2},{0,1,1},{0,7,0}, +{0,7,0},{0,7,0},{0,3,0},{2,0,2},{0,3,0},{0,3,0},{0,1,1},{4,0,2},{0,1,1},{8,0,18},{0,12,1},{0,8,0},{0,7,5},{8,0,18},{16,0,18},{0,7,5},{0,5,18},{16,0,18},{0,5,18},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{3,34,36},{3,23,19},{4,17,25}, +{3,16,19},{0,34,51},{0,20,18},{0,16,1},{0,13,22},{0,15,68},{0,12,33},{6,28,18},{6,21,0},{6,16,1},{5,15,3},{17,0,51},{1,19,18},{2,15,1},{0,13,22},{34,0,51},{0,13,22},{3,25,18},{3,25,18},{3,25,18},{3,14,18},{0,25,8},{0,16,1},{0,16,1},{0,10,0},{0,12,17},{0,9,5},{6,19,0},{6,19,0},{6,19,0},{6,12,0},{12,0,8}, +{4,13,0},{4,13,0},{0,10,0},{25,0,8},{0,10,0},{20,0,18},{6,21,0},{8,16,0},{0,16,0},{20,0,18},{40,0,18},{0,16,0},{0,13,18},{40,0,18},{0,13,18},{3,0,18},{3,0,18},{3,0,18},{3,0,18},{0,19,0},{0,19,0},{0,19,0},{0,9,0},{0,9,4},{0,9,4},{11,42,36},{11,31,19},{12,24,24},{11,24,19},{8,42,51},{8,28,18},{8,24,1}, +{7,20,22},{0,27,52},{2,21,18},{14,36,18},{14,29,0},{14,24,1},{13,23,3},{29,0,51},{9,27,18},{10,23,1},{0,21,18},{58,0,51},{0,21,18},{11,33,18},{11,33,18},{11,33,18},{11,22,18},{8,33,8},{8,24,1},{8,24,1},{8,18,0},{2,23,8},{4,18,1},{14,26,0},{14,26,0},{14,26,0},{14,20,0},{24,0,8},{11,22,0},{11,22,0},{6,18,0},{49,0,8}, +{6,18,0},{32,0,18},{14,29,0},{15,24,0},{6,24,0},{32,0,18},{64,0,18},{6,24,0},{0,21,18},{64,0,18},{0,21,18},{11,0,18},{11,0,18},{11,0,18},{11,0,18},{8,27,0},{8,27,0},{8,27,0},{8,17,0},{4,19,0},{4,19,0},{19,50,36},{19,39,19},{20,32,24},{19,32,19},{16,50,51},{16,36,18},{16,32,1},{15,28,22},{7,37,51},{10,29,18},{22,43,18}, +{22,36,1},{22,32,1},{21,31,3},{41,0,51},{17,35,18},{18,31,1},{8,29,18},{82,0,51},{8,29,18},{19,41,18},{19,41,18},{19,41,18},{19,30,18},{16,41,8},{16,31,1},{16,31,1},{16,26,0},{10,31,8},{12,26,1},{22,34,0},{22,34,0},{22,34,0},{22,28,0},{36,0,8},{18,30,0},{18,30,0},{14,26,0},{73,0,8},{14,26,0},{43,0,18},{20,38,0},{23,32,0}, +{14,32,0},{43,0,18},{89,0,18},{14,32,0},{0,29,18},{89,0,18},{0,29,18},{19,0,18},{19,0,18},{19,0,18},{19,0,18},{16,35,0},{16,35,0},{16,35,0},{16,25,0},{12,27,0},{12,27,0},{28,59,36},{28,48,19},{29,41,24},{28,41,19},{25,59,51},{25,45,18},{25,41,1},{24,37,22},{16,46,51},{19,38,18},{31,52,18},{31,45,1},{31,41,1},{30,40,3},{54,0,51}, +{24,45,18},{27,40,1},{17,38,18},{110,0,51},{17,38,18},{28,49,18},{28,49,18},{28,49,18},{28,39,18},{25,49,8},{25,40,1},{25,40,1},{25,35,0},{19,40,8},{20,35,1},{31,43,0},{31,43,0},{31,43,0},{31,37,0},{49,0,8},{27,39,0},{27,39,0},{23,35,0},{101,0,8},{23,35,0},{57,0,18},{29,47,0},{32,41,0},{23,41,0},{57,0,18},{116,0,18},{23,41,0}, +{0,38,18},{116,0,18},{0,38,18},{28,0,18},{28,0,18},{28,0,18},{28,0,18},{25,43,0},{25,43,0},{25,43,0},{25,34,0},{19,37,0},{19,37,0},{36,66,36},{36,56,19},{37,49,24},{36,49,19},{33,66,51},{33,53,18},{33,49,1},{32,45,22},{23,54,51},{27,46,18},{39,60,18},{39,53,1},{39,49,1},{38,48,3},{66,0,51},{32,53,18},{35,48,1},{25,46,18},{134,0,51}, +{25,46,18},{36,57,18},{36,57,18},{36,57,18},{36,47,18},{33,57,8},{33,48,1},{33,48,1},{33,43,0},{25,49,8},{28,43,1},{39,51,0},{39,51,0},{39,51,0},{39,45,0},{61,0,8},{35,47,0},{35,47,0},{31,43,0},{125,0,8},{31,43,0},{69,0,18},{37,55,0},{40,49,0},{31,49,0},{69,0,18},{140,0,18},{31,49,0},{0,46,18},{140,0,18},{0,46,18},{36,0,18}, +{36,0,18},{36,0,18},{36,0,18},{33,51,0},{33,51,0},{33,51,0},{33,42,0},{27,45,0},{27,45,0},{44,74,36},{44,64,19},{45,57,24},{44,57,19},{41,74,51},{41,61,18},{41,57,1},{40,53,22},{31,62,51},{35,54,18},{47,68,18},{47,61,1},{47,57,1},{46,56,3},{78,0,51},{40,61,18},{43,56,1},{33,54,18},{158,0,51},{33,54,18},{44,65,18},{44,65,18},{44,65,18}, +{44,55,18},{41,65,8},{41,56,1},{41,56,1},{41,51,0},{33,57,8},{36,51,1},{47,59,0},{47,59,0},{47,59,0},{47,53,0},{73,0,8},{43,55,0},{43,55,0},{39,51,0},{149,0,8},{39,51,0},{81,0,18},{45,63,0},{48,57,0},{39,57,0},{81,0,18},{164,0,18},{39,57,0},{0,54,18},{164,0,18},{0,54,18},{44,0,18},{44,0,18},{44,0,18},{44,0,18},{41,59,0}, +{41,59,0},{41,59,0},{41,50,0},{35,53,0},{35,53,0},{52,82,36},{52,71,19},{53,65,24},{52,65,19},{49,82,51},{49,68,18},{49,65,1},{48,61,22},{39,70,51},{43,62,18},{55,76,18},{55,69,1},{55,65,1},{54,64,3},{89,0,51},{49,68,18},{51,64,1},{41,62,18},{183,0,51},{41,62,18},{52,73,18},{52,73,18},{52,73,18},{52,63,18},{49,73,8},{49,64,1},{49,64,1}, +{49,59,0},{42,64,8},{44,59,1},{55,67,0},{55,67,0},{55,67,0},{55,61,0},{85,0,8},{51,63,0},{51,63,0},{47,59,0},{174,0,8},{47,59,0},{92,0,18},{54,70,0},{56,65,0},{47,65,0},{92,0,18},{189,0,18},{47,65,0},{0,62,18},{189,0,18},{0,62,18},{52,0,18},{52,0,18},{52,0,18},{52,0,18},{49,67,0},{49,67,0},{49,67,0},{49,58,0},{43,61,0}, +{43,61,0},{61,91,36},{61,80,19},{62,74,24},{61,73,20},{58,91,51},{58,77,18},{58,73,2},{57,70,22},{48,79,51},{50,71,19},{64,85,18},{64,77,1},{64,74,1},{63,73,3},{103,0,51},{58,77,18},{59,74,1},{49,71,18},{210,0,51},{49,71,18},{61,82,18},{61,82,18},{61,82,18},{61,72,18},{58,82,8},{58,73,1},{58,73,1},{58,68,0},{51,73,8},{53,68,1},{64,76,0}, +{64,76,0},{64,76,0},{64,70,0},{98,0,8},{60,72,0},{60,72,0},{56,68,0},{201,0,8},{56,68,0},{106,0,18},{63,79,0},{65,74,0},{55,74,0},{106,0,18},{216,0,18},{55,74,0},{0,71,18},{216,0,18},{0,71,18},{61,0,18},{61,0,18},{61,0,18},{61,0,18},{58,76,0},{58,76,0},{58,76,0},{58,67,0},{53,69,0},{53,69,0},{69,99,36},{69,88,19},{70,82,24}, +{69,81,20},{66,99,51},{66,85,18},{66,81,2},{65,79,23},{56,87,51},{58,79,19},{72,93,18},{72,85,1},{72,82,1},{72,80,5},{115,0,51},{66,85,18},{67,82,1},{57,79,18},{234,0,51},{57,79,18},{69,90,18},{69,90,18},{69,90,18},{69,79,18},{66,90,8},{66,81,1},{66,81,1},{66,75,1},{59,81,8},{61,76,1},{72,84,0},{72,84,0},{72,84,0},{72,78,0},{110,0,8}, +{69,79,0},{69,79,0},{63,76,0},{225,0,8},{63,76,0},{118,0,18},{71,87,0},{73,82,0},{63,82,0},{118,0,18},{240,0,18},{63,82,0},{0,79,18},{240,0,18},{0,79,18},{69,0,18},{69,0,18},{69,0,18},{69,0,18},{66,84,0},{66,84,0},{66,84,0},{66,75,0},{61,77,0},{61,77,0},{77,107,36},{77,96,19},{78,90,24},{77,89,20},{74,107,51},{74,93,18},{74,89,2}, +{73,87,23},{64,95,51},{66,87,19},{80,101,18},{80,93,1},{80,90,1},{80,88,5},{127,0,51},{74,93,18},{75,90,1},{65,87,18},{254,2,51},{65,87,18},{77,98,18},{77,98,18},{77,98,18},{77,87,18},{74,98,8},{74,89,1},{74,89,1},{74,83,1},{67,89,8},{69,84,1},{80,92,0},{80,92,0},{80,92,0},{80,86,0},{122,0,8},{77,87,0},{77,87,0},{71,84,0},{249,0,8}, +{71,84,0},{129,0,18},{79,95,0},{81,90,0},{71,90,0},{129,0,18},{254,5,18},{71,90,0},{0,87,18},{254,5,18},{0,87,18},{77,0,18},{77,0,18},{77,0,18},{77,0,18},{74,92,0},{74,92,0},{74,92,0},{74,83,0},{69,85,0},{69,85,0},{85,115,36},{85,104,19},{86,98,24},{85,97,20},{82,115,51},{82,101,18},{82,97,2},{81,95,23},{72,103,51},{74,95,19},{88,109,18}, +{88,101,1},{88,98,1},{88,96,5},{138,0,51},{82,101,18},{83,98,1},{73,95,18},{254,14,51},{73,95,18},{85,106,18},{85,106,18},{85,106,18},{85,95,18},{82,106,8},{82,97,1},{82,97,1},{82,91,1},{75,97,8},{77,92,1},{88,100,0},{88,100,0},{88,100,0},{88,94,0},{134,0,8},{85,95,0},{85,95,0},{79,92,0},{255,9,8},{79,92,0},{141,0,18},{87,103,0},{89,98,0}, +{79,98,0},{141,0,18},{254,17,18},{79,98,0},{0,95,18},{254,17,18},{0,95,18},{85,0,18},{85,0,18},{85,0,18},{85,0,18},{82,100,0},{82,100,0},{82,100,0},{82,91,0},{77,93,0},{77,93,0},{94,124,36},{94,113,19},{95,107,24},{94,106,20},{91,124,51},{91,110,18},{91,106,2},{90,104,23},{81,112,51},{83,104,19},{97,118,18},{97,110,1},{97,107,1},{97,105,5},{152,0,51}, +{91,110,18},{92,107,1},{82,104,18},{255,27,51},{82,104,18},{94,115,18},{94,115,18},{94,115,18},{94,104,18},{91,115,8},{91,106,1},{91,106,1},{91,100,1},{84,106,8},{86,101,1},{97,108,0},{97,108,0},{97,108,0},{97,103,0},{147,0,8},{94,104,0},{94,104,0},{88,101,0},{254,23,8},{88,101,0},{155,0,18},{96,112,0},{98,107,0},{88,107,0},{155,0,18},{255,30,18},{88,107,0}, +{0,104,18},{255,30,18},{0,104,18},{94,0,18},{94,0,18},{94,0,18},{94,0,18},{91,109,0},{91,109,0},{91,109,0},{91,100,0},{86,102,0},{86,102,0},{102,132,36},{102,121,19},{102,116,23},{102,114,20},{99,132,51},{99,118,18},{99,114,2},{98,112,23},{89,120,51},{91,112,19},{105,125,18},{105,118,1},{105,115,1},{105,113,5},{164,0,51},{99,118,18},{100,114,1},{90,112,18},{255,39,51}, +{90,112,18},{102,123,18},{102,123,18},{102,123,18},{102,112,18},{99,123,8},{99,114,1},{99,114,1},{99,108,1},{92,114,8},{94,109,1},{105,116,0},{105,116,0},{105,116,0},{105,111,0},{159,0,8},{102,112,0},{102,112,0},{96,109,0},{254,35,8},{96,109,0},{167,0,18},{104,120,0},{106,115,0},{96,115,0},{167,0,18},{254,42,18},{96,115,0},{0,112,18},{254,42,18},{0,112,18},{102,0,18}, +{102,0,18},{102,0,18},{102,0,18},{99,117,0},{99,117,0},{99,117,0},{99,108,0},{94,110,0},{94,110,0},{110,140,36},{110,130,18},{110,124,23},{110,122,20},{107,140,51},{107,126,18},{107,122,2},{106,120,23},{97,128,51},{99,120,19},{113,133,18},{113,126,1},{113,123,1},{113,121,5},{175,0,51},{107,126,18},{108,122,1},{98,120,18},{254,51,51},{98,120,18},{110,130,18},{110,130,18},{110,130,18}, +{110,120,18},{107,131,8},{107,122,1},{107,122,1},{107,116,1},{100,122,8},{102,117,1},{113,124,0},{113,124,0},{113,124,0},{113,119,0},{171,0,8},{110,120,0},{110,120,0},{104,117,0},{255,46,8},{104,117,0},{178,0,18},{112,128,0},{114,123,0},{104,123,0},{178,0,18},{254,54,18},{104,123,0},{0,120,18},{254,54,18},{0,120,18},{110,0,18},{110,0,18},{110,0,18},{110,0,18},{107,124,0}, +{107,124,0},{107,124,0},{107,116,0},{102,118,0},{102,118,0},{118,147,36},{118,138,18},{118,132,23},{118,130,20},{115,148,51},{115,134,18},{115,130,2},{114,128,23},{105,136,51},{108,128,18},{121,141,18},{121,134,1},{121,131,1},{121,129,5},{187,0,51},{115,134,18},{116,130,1},{106,128,18},{254,63,51},{106,128,18},{118,138,18},{118,138,18},{118,138,18},{118,128,18},{115,138,8},{115,130,1},{115,130,1}, +{115,124,1},{108,130,8},{110,125,1},{121,132,0},{121,132,0},{121,132,0},{121,127,0},{183,0,8},{118,128,0},{118,128,0},{112,125,0},{255,58,8},{112,125,0},{190,0,18},{120,136,0},{122,131,0},{112,131,0},{190,0,18},{254,66,18},{112,131,0},{0,128,18},{254,66,18},{0,128,18},{118,0,18},{118,0,18},{118,0,18},{118,0,18},{115,132,0},{115,132,0},{115,132,0},{115,124,0},{110,126,0}, +{110,126,0},{127,156,36},{127,147,18},{127,141,23},{127,139,20},{124,156,51},{124,143,18},{124,139,2},{123,137,23},{114,145,51},{117,137,18},{130,150,18},{130,143,1},{130,140,1},{130,138,5},{201,0,51},{124,143,18},{125,139,1},{115,137,18},{255,76,51},{115,137,18},{127,147,18},{127,147,18},{127,147,18},{127,137,18},{124,147,8},{124,139,1},{124,139,1},{124,133,1},{117,139,8},{119,134,1},{130,141,0}, +{130,141,0},{130,141,0},{130,136,0},{196,0,8},{127,137,0},{127,137,0},{121,134,0},{254,72,8},{121,134,0},{204,0,18},{129,145,0},{131,140,0},{121,140,0},{204,0,18},{255,79,18},{121,140,0},{0,137,18},{255,79,18},{0,137,18},{127,0,18},{127,0,18},{127,0,18},{127,0,18},{124,141,0},{124,141,0},{124,141,0},{124,133,0},{119,135,0},{119,135,0},{135,164,36},{135,154,19},{135,149,23}, +{135,147,20},{132,164,51},{132,151,18},{132,147,2},{131,145,23},{121,153,51},{125,145,18},{138,158,18},{138,151,1},{138,148,1},{138,146,5},{213,0,51},{131,151,18},{133,147,1},{123,145,18},{254,88,51},{123,145,18},{135,155,18},{135,155,18},{135,155,18},{135,145,18},{132,155,8},{132,147,1},{132,147,1},{132,141,1},{125,147,8},{127,142,1},{138,149,0},{138,149,0},{138,149,0},{138,144,0},{208,0,8}, +{135,145,0},{135,145,0},{129,142,0},{254,84,8},{129,142,0},{215,0,18},{137,153,0},{139,148,0},{129,148,0},{215,0,18},{254,91,18},{129,148,0},{0,145,18},{254,91,18},{0,145,18},{135,0,18},{135,0,18},{135,0,18},{135,0,18},{132,149,0},{132,149,0},{132,149,0},{132,141,0},{127,143,0},{127,143,0},{143,172,36},{143,162,19},{143,157,23},{143,155,20},{140,172,51},{140,159,18},{140,155,2}, +{139,153,23},{129,161,51},{132,153,19},{146,166,18},{146,159,1},{146,156,1},{146,154,5},{224,0,51},{139,159,18},{141,155,1},{130,153,18},{254,100,51},{130,153,18},{143,163,18},{143,163,18},{143,163,18},{143,153,18},{140,163,8},{140,155,1},{140,155,1},{140,149,1},{132,155,8},{135,150,1},{146,157,0},{146,157,0},{146,157,0},{146,152,0},{220,0,8},{143,153,0},{143,153,0},{137,150,0},{255,95,8}, +{137,150,0},{227,0,18},{144,161,0},{147,156,0},{136,156,0},{227,0,18},{254,103,18},{136,156,0},{0,153,18},{254,103,18},{0,153,18},{143,0,18},{143,0,18},{143,0,18},{143,0,18},{140,157,0},{140,157,0},{140,157,0},{140,149,0},{135,151,0},{135,151,0},{151,180,36},{151,170,19},{151,165,23},{151,163,20},{148,180,51},{148,167,18},{148,163,2},{148,160,24},{137,169,51},{140,161,19},{154,174,18}, +{154,167,1},{154,164,1},{154,162,5},{236,0,51},{147,167,18},{149,164,1},{138,161,18},{254,112,51},{138,161,18},{151,171,18},{151,171,18},{151,171,18},{151,161,18},{148,171,8},{148,162,1},{148,162,1},{148,157,1},{140,163,8},{143,158,1},{154,165,0},{154,165,0},{154,165,0},{154,160,0},{232,0,8},{150,161,0},{150,161,0},{144,158,0},{255,107,8},{144,158,0},{239,0,18},{152,169,0},{155,164,0}, +{144,164,0},{239,0,18},{254,115,18},{144,164,0},{0,161,18},{254,115,18},{0,161,18},{151,0,18},{151,0,18},{151,0,18},{151,0,18},{148,165,0},{148,165,0},{148,165,0},{148,157,0},{142,159,0},{142,159,0},{160,189,36},{160,179,19},{160,174,23},{160,172,20},{157,189,51},{157,176,18},{157,172,2},{157,169,24},{146,178,51},{149,170,19},{163,183,18},{163,176,1},{163,173,1},{163,172,5},{250,0,51}, +{156,176,18},{158,173,1},{147,170,18},{255,125,51},{147,170,18},{160,180,18},{160,180,18},{160,180,18},{160,170,18},{157,180,8},{157,171,1},{157,171,1},{157,166,1},{149,172,8},{152,167,1},{163,174,0},{163,174,0},{163,174,0},{163,168,0},{245,0,8},{159,170,0},{159,170,0},{153,167,0},{254,121,8},{153,167,0},{253,0,18},{161,178,0},{164,173,0},{153,173,0},{253,0,18},{254,128,18},{153,173,0}, +{0,170,18},{254,128,18},{0,170,18},{160,0,18},{160,0,18},{160,0,18},{160,0,18},{157,174,0},{157,174,0},{157,174,0},{157,165,0},{151,168,0},{151,168,0},{168,197,36},{168,187,19},{168,182,23},{168,180,20},{165,197,51},{165,184,18},{165,180,2},{165,177,24},{154,186,51},{157,178,19},{171,191,18},{171,184,1},{171,181,1},{171,180,5},{255,13,51},{164,184,18},{166,181,1},{155,178,18},{254,137,51}, +{155,178,18},{168,188,18},{168,188,18},{168,188,18},{168,178,18},{165,188,8},{165,179,1},{165,179,1},{165,174,1},{157,180,8},{160,175,1},{171,182,0},{171,182,0},{171,182,0},{171,176,0},{255,4,8},{167,178,0},{167,178,0},{161,175,0},{255,132,8},{161,175,0},{255,19,18},{169,186,0},{172,181,0},{161,181,0},{255,19,18},{254,140,18},{161,181,0},{0,178,18},{254,140,18},{0,178,18},{168,0,18}, +{168,0,18},{168,0,18},{168,0,18},{165,182,0},{165,182,0},{165,182,0},{165,173,0},{159,176,0},{159,176,0},{176,205,36},{176,195,19},{176,190,23},{176,188,20},{173,205,51},{173,192,18},{173,188,2},{173,185,24},{162,194,51},{165,186,19},{179,199,18},{179,192,1},{179,189,1},{179,188,5},{255,37,51},{172,192,18},{174,189,1},{163,186,18},{254,149,51},{163,186,18},{176,196,18},{176,196,18},{176,196,18}, +{176,186,18},{173,196,8},{173,187,1},{173,187,1},{173,182,1},{165,188,8},{168,183,1},{179,190,0},{179,190,0},{179,190,0},{179,184,0},{255,28,8},{175,186,0},{175,186,0},{169,183,0},{255,144,8},{169,183,0},{255,43,18},{177,194,0},{180,189,0},{169,189,0},{255,43,18},{254,152,18},{169,189,0},{0,186,18},{254,152,18},{0,186,18},{176,0,18},{176,0,18},{176,0,18},{176,0,18},{173,190,0}, +{173,190,0},{173,190,0},{173,181,0},{167,184,0},{167,184,0},{184,213,36},{184,203,19},{184,197,22},{184,196,20},{181,213,51},{181,200,18},{181,196,2},{181,193,24},{170,202,51},{173,194,19},{187,207,18},{187,201,1},{187,197,0},{187,196,5},{255,61,51},{180,200,18},{182,197,1},{171,194,18},{254,161,51},{171,194,18},{184,204,18},{184,204,18},{184,204,18},{184,194,18},{181,204,8},{181,195,1},{181,195,1}, +{181,190,1},{173,196,8},{176,191,1},{187,197,0},{187,197,0},{187,197,0},{187,192,0},{255,52,8},{183,194,0},{183,194,0},{177,191,0},{255,156,8},{177,191,0},{255,67,18},{185,202,0},{187,197,0},{177,197,0},{255,67,18},{254,164,18},{177,197,0},{0,194,18},{254,164,18},{0,194,18},{184,0,18},{184,0,18},{184,0,18},{184,0,18},{181,198,0},{181,198,0},{181,198,0},{181,189,0},{175,192,0}, +{175,192,0},{193,222,36},{193,212,18},{193,206,22},{193,205,20},{190,222,51},{189,209,19},{190,206,1},{190,202,24},{179,211,51},{182,203,19},{196,215,18},{196,210,1},{196,206,0},{196,205,5},{255,89,51},{189,209,18},{190,206,1},{180,203,18},{254,174,51},{180,203,18},{193,212,18},{193,212,18},{193,212,18},{193,203,18},{190,213,8},{190,204,1},{190,204,1},{190,199,1},{182,205,8},{185,200,1},{196,206,0}, +{196,206,0},{196,206,0},{196,201,0},{255,79,8},{192,203,0},{192,203,0},{186,200,0},{253,170,8},{186,200,0},{255,95,18},{194,211,0},{196,206,0},{186,206,0},{255,95,18},{254,177,18},{186,206,0},{0,203,18},{254,177,18},{0,203,18},{193,0,18},{193,0,18},{193,0,18},{193,0,18},{190,206,0},{190,206,0},{190,206,0},{190,198,0},{184,201,0},{184,201,0},{201,229,36},{201,220,18},{201,214,22}, +{201,213,20},{198,230,51},{197,217,19},{198,214,1},{198,210,24},{187,219,51},{190,211,19},{204,223,18},{204,218,1},{204,214,0},{204,213,5},{255,113,51},{197,217,18},{198,214,1},{188,211,18},{254,186,51},{188,211,18},{201,220,18},{201,220,18},{201,220,18},{201,211,18},{198,220,8},{198,212,1},{198,212,1},{198,207,1},{190,213,8},{192,208,1},{204,214,0},{204,214,0},{204,214,0},{204,209,0},{255,104,8}, +{200,211,0},{200,211,0},{194,208,0},{255,181,8},{194,208,0},{255,119,18},{202,219,0},{204,214,0},{194,214,0},{255,119,18},{254,189,18},{194,214,0},{0,211,18},{254,189,18},{0,211,18},{201,0,18},{201,0,18},{201,0,18},{201,0,18},{198,214,0},{198,214,0},{198,214,0},{198,206,0},{192,209,0},{192,209,0},{209,237,36},{209,228,18},{209,222,22},{209,221,20},{206,237,51},{205,225,19},{206,222,1}, +{206,218,24},{196,226,51},{197,219,19},{212,231,18},{212,226,1},{212,222,0},{212,221,5},{255,137,51},{205,225,18},{206,222,1},{196,219,18},{254,198,51},{196,219,18},{209,228,18},{209,228,18},{209,228,18},{209,219,18},{206,228,8},{206,220,1},{206,220,1},{206,215,1},{198,221,8},{200,216,1},{212,222,0},{212,222,0},{212,222,0},{212,217,0},{255,128,8},{208,219,0},{208,219,0},{202,216,0},{255,193,8}, +{202,216,0},{255,143,18},{210,227,0},{212,222,0},{202,222,0},{255,143,18},{254,201,18},{202,222,0},{0,219,18},{254,201,18},{0,219,18},{209,0,18},{209,0,18},{209,0,18},{209,0,18},{206,222,0},{206,222,0},{206,222,0},{206,214,0},{200,217,0},{200,217,0},{217,245,36},{217,236,18},{217,230,22},{217,229,20},{214,245,51},{213,233,19},{214,230,1},{214,226,24},{204,234,51},{205,227,19},{220,239,18}, +{220,234,1},{220,230,0},{220,229,5},{255,161,51},{213,233,18},{214,230,1},{204,227,18},{254,210,51},{204,227,18},{217,236,18},{217,236,18},{217,236,18},{217,227,18},{214,236,8},{214,228,1},{214,228,1},{214,223,1},{206,229,8},{208,224,1},{220,230,0},{220,230,0},{220,230,0},{220,225,0},{255,152,8},{216,227,0},{216,227,0},{210,224,0},{255,205,8},{210,224,0},{255,167,18},{218,235,0},{220,230,0}, +{210,230,0},{255,167,18},{253,213,18},{210,230,0},{0,227,18},{253,213,18},{0,227,18},{217,0,18},{217,0,18},{217,0,18},{217,0,18},{214,230,0},{214,230,0},{214,230,0},{214,222,0},{208,225,0},{208,225,0},{226,254,36},{226,245,18},{226,239,22},{226,238,20},{223,254,51},{223,241,18},{223,239,1},{223,235,24},{213,243,51},{214,236,19},{229,248,18},{228,243,1},{229,239,0},{229,238,5},{255,189,51}, +{223,241,18},{223,239,1},{212,236,18},{254,223,51},{212,236,18},{226,245,18},{226,245,18},{226,245,18},{226,236,18},{223,245,8},{223,237,1},{223,237,1},{223,232,1},{216,237,8},{217,233,1},{229,239,0},{229,239,0},{229,239,0},{229,234,0},{255,180,8},{225,236,0},{225,236,0},{219,233,0},{255,218,8},{219,233,0},{255,195,18},{228,243,0},{229,239,0},{218,239,0},{255,195,18},{254,226,18},{218,239,0}, +{0,236,18},{254,226,18},{0,236,18},{226,0,18},{226,0,18},{226,0,18},{226,0,18},{223,239,0},{223,239,0},{223,239,0},{223,231,0},{217,234,0},{217,234,0},{235,254,46},{234,253,18},{234,247,22},{233,246,22},{233,255,56},{231,249,18},{231,247,1},{231,243,24},{221,251,51},{222,244,19},{238,254,19},{237,249,1},{237,247,0},{237,246,5},{255,213,51},{231,249,18},{231,247,1},{220,244,18},{254,235,51}, +{220,244,18},{234,253,18},{234,253,18},{234,253,18},{234,244,18},{231,253,8},{231,245,1},{231,245,1},{231,240,1},{224,245,8},{226,241,0},{237,247,0},{237,247,0},{237,247,0},{237,242,0},{255,204,8},{233,244,0},{233,244,0},{226,241,0},{255,230,8},{226,241,0},{255,219,18},{236,251,0},{237,247,0},{226,247,0},{255,219,18},{254,238,18},{226,247,0},{0,244,18},{254,238,18},{0,244,18},{234,0,18}, +{234,0,18},{234,0,18},{234,0,18},{231,247,0},{231,247,0},{231,247,0},{231,239,0},{226,241,0},{226,241,0},{245,255,72},{243,255,33},{242,255,22},{241,254,22},{242,255,81},{240,255,21},{239,255,1},{238,251,25},{234,255,56},{230,252,19},{248,255,29},{246,255,5},{245,255,0},{244,254,6},{255,237,51},{240,255,20},{239,255,1},{228,252,18},{254,247,51},{228,252,18},{242,255,22},{242,255,22},{242,255,22}, +{242,251,18},{241,254,14},{239,253,1},{239,253,1},{239,249,1},{232,253,8},{234,249,0},{245,255,0},{245,255,0},{245,255,0},{245,250,0},{255,228,8},{242,251,0},{242,251,0},{234,249,0},{255,242,8},{234,249,0},{255,243,18},{246,255,4},{245,255,0},{234,255,0},{255,243,18},{254,250,18},{234,255,0},{0,252,18},{254,250,18},{0,252,18},{242,0,18},{242,0,18},{242,0,18},{242,0,18},{239,255,0}, +{239,255,0},{239,255,0},{239,247,0},{234,249,0},{234,249,0},{251,255,28},{251,255,25},{251,255,24},{250,255,19},{251,255,24},{249,255,10},{248,255,9},{247,255,0},{246,255,12},{243,255,1},{254,255,1},{253,255,1},{253,255,1},{253,255,0},{255,249,3},{252,255,0},{252,255,0},{246,255,0},{254,253,3},{246,255,0},{251,255,24},{251,255,24},{251,255,24},{250,255,19},{250,254,19},{248,255,9},{248,255,9}, +{247,255,0},{246,255,8},{243,255,1},{253,254,1},{253,254,1},{253,254,1},{253,255,0},{255,249,2},{252,255,0},{252,255,0},{246,255,0},{254,253,2},{246,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{250,0,18},{250,0,18},{250,0,18},{250,0,18},{248,255,5},{248,255,5},{248,255,5},{247,255,0},{243,255,1}, +{243,255,1},{0,34,72},{0,24,5},{0,17,0},{0,13,25},{0,22,153},{0,15,90},{0,13,41},{0,10,110},{0,9,162},{0,8,119},{0,34,72},{0,24,5},{0,17,0},{0,13,25},{11,0,153},{0,15,90},{0,13,41},{0,10,110},{22,0,153},{0,10,110},{0,16,0},{0,16,0},{0,16,0},{0,8,0},{0,7,13},{0,6,2},{0,6,2},{0,4,5},{0,3,13},{0,4,6},{0,16,0}, +{0,16,0},{0,16,0},{0,8,0},{3,0,13},{0,6,2},{0,6,2},{0,4,5},{7,0,13},{0,4,5},{17,0,72},{0,24,5},{0,17,0},{0,13,25},{17,0,72},{34,0,72},{0,13,25},{0,11,72},{34,0,72},{0,11,72},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{2,54,77},{2,37,5},{3,25,18}, +{1,23,13},{0,43,243},{0,27,99},{0,23,24},{0,16,139},{0,18,276},{0,16,164},{3,52,72},{3,36,0},{4,26,6},{2,24,11},{21,0,243},{0,27,99},{0,23,24},{0,16,139},{43,0,243},{0,16,139},{2,36,5},{2,36,5},{2,36,5},{1,19,5},{0,25,50},{0,18,5},{0,18,5},{0,10,18},{0,12,59},{0,10,27},{3,34,0},{3,34,0},{3,34,0},{3,18,0},{12,0,50}, +{0,18,5},{0,18,5},{0,10,18},{25,0,50},{0,10,18},{29,0,72},{3,36,0},{8,25,0},{0,23,8},{29,0,72},{58,0,72},{0,23,8},{0,19,72},{58,0,72},{0,19,72},{1,0,5},{1,0,5},{1,0,5},{1,0,5},{0,10,0},{0,10,0},{0,10,0},{0,5,0},{0,3,1},{0,3,1},{6,70,133},{6,46,65},{7,35,94},{5,33,65},{0,67,243},{0,39,75},{0,32,2}, +{0,25,105},{0,30,332},{0,24,164},{11,60,72},{11,44,0},{12,34,6},{10,32,11},{33,0,243},{0,39,75},{1,32,1},{0,25,105},{67,0,243},{0,25,105},{6,52,61},{6,52,61},{6,52,61},{5,29,61},{0,49,50},{0,32,1},{0,32,1},{0,19,5},{0,21,94},{0,18,35},{11,42,0},{11,42,0},{11,42,0},{11,26,0},{24,0,50},{3,30,0},{3,30,0},{0,19,5},{49,0,50}, +{0,19,5},{41,0,72},{11,44,0},{16,33,0},{0,32,1},{41,0,72},{82,0,72},{0,32,1},{0,27,72},{82,0,72},{0,27,72},{5,0,61},{5,0,61},{5,0,61},{5,0,61},{0,34,0},{0,34,0},{0,34,0},{0,17,0},{0,15,17},{0,15,17},{13,80,144},{13,56,77},{15,43,109},{12,40,76},{7,77,243},{7,49,73},{7,41,1},{5,33,100},{0,42,287},{0,33,98},{19,68,72}, +{19,52,0},{20,42,6},{18,40,11},{45,0,243},{4,50,72},{9,40,1},{0,34,83},{92,0,243},{0,34,83},{13,62,72},{13,62,72},{13,62,72},{13,37,72},{7,59,50},{7,41,1},{7,41,1},{6,28,3},{0,36,66},{0,29,4},{19,50,0},{19,50,0},{19,50,0},{19,34,0},{36,0,50},{11,38,0},{11,38,0},{0,29,0},{73,0,50},{0,29,0},{52,0,72},{18,52,0},{23,41,0}, +{5,41,0},{52,0,72},{107,0,72},{5,41,0},{0,35,72},{107,0,72},{0,35,72},{13,0,72},{13,0,72},{13,0,72},{13,0,72},{7,44,0},{7,44,0},{7,44,0},{7,25,0},{0,27,2},{0,27,2},{22,89,144},{22,65,77},{24,52,109},{21,49,76},{16,86,243},{16,58,73},{16,50,1},{14,42,100},{0,57,248},{3,43,75},{28,77,72},{28,61,0},{29,50,5},{27,49,11},{58,0,243}, +{13,59,72},{18,49,1},{0,44,73},{119,0,243},{0,44,73},{22,71,72},{22,71,72},{22,71,72},{22,46,72},{16,68,50},{16,50,1},{16,50,1},{15,37,3},{0,49,50},{6,38,1},{28,59,0},{28,59,0},{28,59,0},{28,43,0},{49,0,50},{20,47,0},{20,47,0},{8,38,0},{101,0,50},{8,38,0},{66,0,72},{27,61,0},{32,50,0},{13,50,0},{66,0,72},{134,0,72},{13,50,0}, +{0,44,72},{134,0,72},{0,44,72},{22,0,72},{22,0,72},{22,0,72},{22,0,72},{16,53,0},{16,53,0},{16,53,0},{16,34,0},{6,39,0},{6,39,0},{30,97,144},{30,73,77},{32,59,106},{30,56,77},{24,94,243},{24,66,73},{24,58,2},{22,50,100},{2,69,243},{10,51,76},{36,85,72},{36,67,1},{37,58,5},{35,57,11},{70,0,243},{21,67,72},{26,58,1},{3,52,72},{143,0,243}, +{3,52,72},{30,79,72},{30,79,72},{30,79,72},{30,54,72},{24,76,50},{25,56,2},{25,56,2},{23,45,3},{8,57,50},{14,46,1},{36,66,0},{36,66,0},{36,66,0},{36,51,0},{61,0,50},{27,56,0},{27,56,0},{16,46,0},{125,0,50},{16,46,0},{78,0,72},{35,69,0},{40,58,0},{21,58,0},{78,0,72},{158,0,72},{21,58,0},{0,52,72},{158,0,72},{0,52,72},{30,0,72}, +{30,0,72},{30,0,72},{30,0,72},{24,61,0},{24,61,0},{24,61,0},{24,42,0},{14,47,0},{14,47,0},{38,105,144},{38,81,77},{40,67,106},{38,64,77},{32,102,243},{32,74,73},{32,66,2},{30,59,103},{10,77,243},{18,59,76},{44,93,72},{44,75,1},{45,66,5},{44,63,13},{82,0,243},{29,75,72},{33,66,1},{11,60,72},{167,0,243},{11,60,72},{38,87,72},{38,87,72},{38,87,72}, +{38,62,72},{32,84,50},{33,64,2},{33,64,2},{31,53,3},{16,65,50},{22,54,1},{44,74,0},{44,74,0},{44,74,0},{44,59,0},{73,0,50},{35,63,0},{35,63,0},{23,54,0},{149,0,50},{23,54,0},{89,0,72},{43,77,0},{48,66,0},{29,66,0},{89,0,72},{183,0,72},{29,66,0},{0,60,72},{183,0,72},{0,60,72},{38,0,72},{38,0,72},{38,0,72},{38,0,72},{32,69,0}, +{32,69,0},{32,69,0},{32,50,0},{21,55,0},{21,55,0},{46,113,144},{46,88,76},{48,75,106},{46,72,77},{40,110,243},{40,82,73},{40,73,2},{38,67,103},{18,85,243},{26,67,76},{52,100,72},{52,83,1},{53,74,5},{52,72,13},{94,0,243},{37,83,72},{41,74,1},{19,68,72},{192,0,243},{19,68,72},{46,95,72},{46,95,72},{46,95,72},{46,70,72},{40,92,50},{40,73,1},{40,73,1}, +{39,61,3},{24,73,50},{30,62,1},{52,82,0},{52,82,0},{52,82,0},{52,67,0},{85,0,50},{43,71,0},{43,71,0},{31,62,0},{174,0,50},{31,62,0},{101,0,72},{51,85,0},{56,74,0},{37,74,0},{101,0,72},{207,0,72},{37,74,0},{0,68,72},{207,0,72},{0,68,72},{46,0,72},{46,0,72},{46,0,72},{46,0,72},{40,76,0},{40,76,0},{40,76,0},{40,58,0},{29,63,0}, +{29,63,0},{55,122,144},{55,97,76},{57,84,106},{55,81,77},{49,119,243},{48,91,74},{49,82,2},{47,76,103},{27,94,243},{35,76,76},{61,109,72},{61,92,1},{62,83,5},{61,81,13},{107,0,243},{46,92,72},{50,83,1},{28,77,72},{219,0,243},{28,77,72},{55,103,72},{55,103,72},{55,103,72},{55,79,72},{49,101,50},{49,82,1},{49,82,1},{49,69,5},{33,82,50},{38,71,1},{61,91,0}, +{61,91,0},{61,91,0},{61,76,0},{98,0,50},{52,80,0},{52,80,0},{40,71,0},{201,0,50},{40,71,0},{115,0,72},{60,94,0},{65,83,0},{46,83,0},{115,0,72},{234,0,72},{46,83,0},{0,77,72},{234,0,72},{0,77,72},{55,0,72},{55,0,72},{55,0,72},{55,0,72},{49,85,0},{49,85,0},{49,85,0},{49,67,0},{38,72,0},{38,72,0},{63,130,144},{63,105,76},{65,92,106}, +{63,89,77},{57,127,243},{56,99,74},{57,90,2},{55,84,103},{35,102,243},{42,84,76},{69,117,72},{69,100,1},{70,91,5},{69,89,13},{119,0,243},{54,100,72},{58,91,1},{36,85,72},{243,0,243},{36,85,72},{63,111,72},{63,111,72},{63,111,72},{63,87,72},{57,108,50},{57,90,1},{57,90,1},{57,77,5},{41,90,50},{46,79,1},{69,99,0},{69,99,0},{69,99,0},{69,84,0},{110,0,50}, +{60,88,0},{60,88,0},{48,79,0},{225,0,50},{48,79,0},{127,0,72},{68,102,0},{73,91,0},{54,91,0},{127,0,72},{254,2,72},{54,91,0},{0,85,72},{254,2,72},{0,85,72},{63,0,72},{63,0,72},{63,0,72},{63,0,72},{57,93,0},{57,93,0},{57,93,0},{57,75,0},{46,80,0},{46,80,0},{71,137,144},{71,113,76},{73,100,106},{71,97,77},{65,135,243},{64,107,74},{65,98,2}, +{63,92,103},{44,109,243},{50,92,76},{77,125,72},{77,108,1},{78,99,5},{77,97,13},{131,0,243},{62,108,72},{66,99,1},{44,93,72},{255,6,243},{44,93,72},{71,119,72},{71,119,72},{71,119,72},{71,95,72},{65,116,50},{65,98,1},{65,98,1},{65,85,5},{49,98,50},{54,87,1},{77,107,0},{77,107,0},{77,107,0},{77,92,0},{122,0,50},{68,96,0},{68,96,0},{56,87,0},{249,0,50}, +{56,87,0},{138,0,72},{76,110,0},{81,99,0},{62,99,0},{138,0,72},{254,14,72},{62,99,0},{0,93,72},{254,14,72},{0,93,72},{71,0,72},{71,0,72},{71,0,72},{71,0,72},{65,101,0},{65,101,0},{65,101,0},{65,83,0},{54,88,0},{54,88,0},{79,145,144},{79,121,76},{81,108,106},{79,105,77},{73,142,243},{72,115,74},{73,106,2},{71,100,103},{52,117,243},{58,100,76},{85,133,72}, +{85,116,1},{86,107,5},{85,105,13},{143,0,243},{70,116,72},{74,107,1},{52,101,72},{255,18,243},{52,101,72},{79,127,72},{79,127,72},{79,127,72},{79,103,72},{73,124,50},{73,106,1},{73,106,1},{73,93,5},{57,106,50},{62,95,1},{85,115,0},{85,115,0},{85,115,0},{85,100,0},{134,0,50},{76,104,0},{76,104,0},{64,95,0},{255,9,50},{64,95,0},{150,0,72},{84,118,0},{89,107,0}, +{70,107,0},{150,0,72},{254,26,72},{70,107,0},{0,101,72},{254,26,72},{0,101,72},{79,0,72},{79,0,72},{79,0,72},{79,0,72},{73,109,0},{73,109,0},{73,109,0},{73,91,0},{62,96,0},{62,96,0},{88,154,144},{88,130,76},{90,117,106},{88,114,77},{82,151,243},{81,124,74},{82,115,2},{80,109,103},{61,126,243},{67,109,76},{94,142,72},{94,125,1},{95,116,5},{94,114,13},{156,0,243}, +{79,125,72},{83,116,1},{61,110,72},{254,32,243},{61,110,72},{88,136,72},{88,136,72},{88,136,72},{88,112,72},{82,133,50},{82,115,1},{82,115,1},{82,102,5},{66,115,50},{71,104,1},{94,124,0},{94,124,0},{94,124,0},{94,109,0},{147,0,50},{85,113,0},{85,113,0},{73,104,0},{254,23,50},{73,104,0},{164,0,72},{93,127,0},{98,116,0},{79,116,0},{164,0,72},{255,39,72},{79,116,0}, +{0,110,72},{255,39,72},{0,110,72},{88,0,72},{88,0,72},{88,0,72},{88,0,72},{82,118,0},{82,118,0},{82,118,0},{82,100,0},{71,105,0},{71,105,0},{96,162,144},{96,138,76},{98,125,106},{96,122,77},{90,159,243},{90,131,73},{90,123,2},{88,117,103},{69,134,243},{75,117,76},{102,150,72},{102,133,1},{103,124,5},{102,122,13},{168,0,243},{88,132,72},{91,124,1},{69,118,72},{255,43,243}, +{69,118,72},{96,144,72},{96,144,72},{96,144,72},{96,120,72},{90,141,50},{90,123,1},{90,123,1},{90,110,5},{74,123,50},{79,112,1},{102,132,0},{102,132,0},{102,132,0},{102,117,0},{159,0,50},{93,121,0},{93,121,0},{81,112,0},{254,35,50},{81,112,0},{175,0,72},{101,135,0},{106,124,0},{87,124,0},{175,0,72},{254,51,72},{87,124,0},{0,118,72},{254,51,72},{0,118,72},{96,0,72}, +{96,0,72},{96,0,72},{96,0,72},{90,126,0},{90,126,0},{90,126,0},{90,108,0},{79,113,0},{79,113,0},{104,170,144},{104,146,76},{106,133,106},{104,130,77},{98,167,243},{98,139,73},{98,131,2},{96,125,103},{77,142,243},{83,125,76},{110,158,72},{110,142,0},{111,132,5},{110,130,13},{180,0,243},{96,140,72},{99,132,1},{76,126,72},{255,55,243},{76,126,72},{104,152,72},{104,152,72},{104,152,72}, +{104,128,72},{98,149,50},{98,131,1},{98,131,1},{98,118,5},{83,130,50},{87,120,1},{110,140,0},{110,140,0},{110,140,0},{110,125,0},{171,0,50},{101,129,0},{101,129,0},{89,120,0},{255,46,50},{89,120,0},{187,0,72},{110,142,0},{114,132,0},{94,132,0},{187,0,72},{254,63,72},{94,132,0},{0,126,72},{254,63,72},{0,126,72},{104,0,72},{104,0,72},{104,0,72},{104,0,72},{98,134,0}, +{98,134,0},{98,134,0},{98,116,0},{87,121,0},{87,121,0},{112,178,144},{112,154,76},{114,140,105},{112,138,77},{106,175,243},{106,147,73},{106,139,2},{104,133,103},{85,150,243},{91,133,76},{118,166,72},{118,149,1},{119,140,5},{118,138,13},{192,0,243},{104,148,72},{107,140,1},{84,134,72},{255,67,243},{84,134,72},{112,160,72},{112,160,72},{112,160,72},{112,136,72},{106,157,50},{106,139,1},{106,139,1}, +{106,126,5},{91,138,50},{95,128,1},{118,147,0},{118,147,0},{118,147,0},{118,133,0},{183,0,50},{109,137,0},{109,137,0},{97,128,0},{255,58,50},{97,128,0},{199,0,72},{117,151,0},{122,140,0},{102,140,0},{199,0,72},{254,75,72},{102,140,0},{0,134,72},{254,75,72},{0,134,72},{112,0,72},{112,0,72},{112,0,72},{112,0,72},{106,142,0},{106,142,0},{106,142,0},{106,124,0},{95,129,0}, +{95,129,0},{121,187,144},{121,163,76},{122,151,103},{121,148,77},{115,184,243},{115,156,73},{115,148,2},{112,141,105},{94,159,243},{100,142,76},{127,175,72},{127,158,1},{128,149,5},{127,147,13},{205,0,243},{113,157,72},{116,149,1},{93,143,72},{254,81,243},{93,143,72},{121,169,72},{121,169,72},{121,169,72},{121,145,72},{115,166,50},{115,148,1},{115,148,1},{115,135,5},{100,147,50},{104,137,1},{127,156,0}, +{127,156,0},{127,156,0},{127,142,0},{196,0,50},{117,146,0},{117,146,0},{106,137,0},{254,72,50},{106,137,0},{213,0,72},{125,160,0},{131,149,0},{111,149,0},{213,0,72},{254,88,72},{111,149,0},{0,143,72},{254,88,72},{0,143,72},{121,0,72},{121,0,72},{121,0,72},{121,0,72},{115,151,0},{115,151,0},{115,151,0},{115,133,0},{104,138,0},{104,138,0},{129,195,144},{129,170,76},{130,159,103}, +{129,156,77},{123,192,243},{123,164,73},{123,156,2},{122,149,106},{102,167,243},{108,150,76},{135,182,72},{135,166,1},{136,157,5},{134,155,14},{217,0,243},{121,165,72},{124,157,1},{101,151,72},{255,92,243},{101,151,72},{129,177,72},{129,177,72},{129,177,72},{129,152,72},{123,174,50},{123,155,1},{123,155,1},{123,143,5},{108,155,50},{113,145,0},{135,164,0},{135,164,0},{135,164,0},{135,150,0},{208,0,50}, +{125,154,0},{125,154,0},{113,145,0},{254,84,50},{113,145,0},{224,0,72},{133,168,0},{139,157,0},{119,157,0},{224,0,72},{254,100,72},{119,157,0},{0,151,72},{254,100,72},{0,151,72},{129,0,72},{129,0,72},{129,0,72},{129,0,72},{123,158,0},{123,158,0},{123,158,0},{123,141,0},{113,145,0},{113,145,0},{137,203,144},{137,178,76},{138,167,103},{137,164,77},{131,200,243},{130,173,74},{131,164,2}, +{130,157,106},{110,175,243},{116,158,76},{143,190,72},{143,174,1},{144,165,5},{142,163,14},{229,0,243},{129,173,72},{132,165,1},{109,159,72},{255,104,243},{109,159,72},{137,184,72},{137,184,72},{137,184,72},{137,160,72},{131,182,50},{131,163,1},{131,163,1},{131,152,5},{116,163,50},{120,153,1},{143,172,0},{143,172,0},{143,172,0},{143,157,0},{220,0,50},{133,162,0},{133,162,0},{121,153,0},{255,95,50}, +{121,153,0},{236,0,72},{141,176,0},{147,165,0},{127,165,0},{236,0,72},{254,112,72},{127,165,0},{0,159,72},{254,112,72},{0,159,72},{137,0,72},{137,0,72},{137,0,72},{137,0,72},{131,166,0},{131,166,0},{131,166,0},{131,149,0},{120,154,0},{120,154,0},{145,211,144},{145,186,76},{146,175,103},{145,172,77},{139,208,243},{138,181,74},{139,172,2},{138,165,106},{118,183,243},{124,166,76},{151,198,72}, +{151,182,1},{152,173,5},{150,171,14},{241,0,243},{135,182,72},{140,173,1},{117,167,72},{255,116,243},{117,167,72},{145,192,72},{145,192,72},{145,192,72},{145,168,72},{139,189,50},{139,171,1},{139,171,1},{139,160,5},{124,171,50},{128,161,1},{151,180,0},{151,180,0},{151,180,0},{151,165,0},{232,0,50},{141,170,0},{141,170,0},{129,161,0},{255,107,50},{129,161,0},{248,0,72},{149,184,0},{155,173,0}, +{135,173,0},{248,0,72},{254,124,72},{135,173,0},{0,167,72},{254,124,72},{0,167,72},{145,0,72},{145,0,72},{145,0,72},{145,0,72},{139,174,0},{139,174,0},{139,174,0},{139,156,0},{127,162,0},{127,162,0},{154,219,144},{154,195,76},{155,184,103},{154,181,77},{148,217,243},{147,190,74},{148,181,2},{147,174,106},{126,192,243},{133,175,76},{160,207,72},{160,191,1},{161,182,5},{159,180,14},{254,0,243}, +{144,191,72},{149,182,1},{126,176,72},{255,129,243},{126,176,72},{154,201,72},{154,201,72},{154,201,72},{154,177,72},{148,198,50},{148,180,1},{148,180,1},{148,169,5},{132,181,50},{137,170,1},{160,189,0},{160,189,0},{160,189,0},{160,174,0},{245,0,50},{150,179,0},{150,179,0},{138,170,0},{254,121,50},{138,170,0},{255,13,72},{158,193,0},{164,182,0},{144,182,0},{255,13,72},{254,137,72},{144,182,0}, +{0,176,72},{254,137,72},{0,176,72},{154,0,72},{154,0,72},{154,0,72},{154,0,72},{148,183,0},{148,183,0},{148,183,0},{148,165,0},{136,171,0},{136,171,0},{162,227,144},{162,203,76},{163,192,103},{162,189,77},{156,224,243},{155,198,74},{156,189,2},{155,182,106},{134,200,243},{141,183,76},{168,215,72},{168,199,1},{169,190,5},{167,188,14},{255,22,243},{152,199,72},{157,190,1},{134,184,72},{255,141,243}, +{134,184,72},{162,209,72},{162,209,72},{162,209,72},{162,185,72},{156,206,50},{156,188,1},{156,188,1},{156,177,5},{139,189,50},{145,178,1},{168,197,0},{168,197,0},{168,197,0},{168,182,0},{255,4,50},{158,187,0},{158,187,0},{146,178,0},{255,132,50},{146,178,0},{255,37,72},{166,201,0},{172,190,0},{152,190,0},{255,37,72},{254,149,72},{152,190,0},{0,184,72},{254,149,72},{0,184,72},{162,0,72}, +{162,0,72},{162,0,72},{162,0,72},{156,191,0},{156,191,0},{156,191,0},{156,173,0},{144,179,0},{144,179,0},{170,235,144},{170,211,76},{171,200,103},{170,197,77},{164,232,243},{163,206,74},{164,197,2},{163,190,106},{142,208,243},{149,191,76},{176,223,72},{176,207,1},{177,198,5},{175,196,14},{255,46,243},{160,207,72},{165,198,1},{142,192,72},{255,153,243},{142,192,72},{170,217,72},{170,217,72},{170,217,72}, +{170,193,72},{164,214,50},{164,196,1},{164,196,1},{164,185,5},{147,197,50},{153,186,1},{176,205,0},{176,205,0},{176,205,0},{176,190,0},{255,28,50},{166,195,0},{166,195,0},{154,186,0},{255,144,50},{154,186,0},{255,61,72},{174,209,0},{180,198,0},{160,198,0},{255,61,72},{254,161,72},{160,198,0},{0,192,72},{254,161,72},{0,192,72},{170,0,72},{170,0,72},{170,0,72},{170,0,72},{164,199,0}, +{164,199,0},{164,199,0},{164,181,0},{152,187,0},{152,187,0},{178,243,144},{178,219,76},{179,208,103},{178,205,77},{172,240,243},{171,214,74},{172,205,2},{171,198,106},{150,216,243},{157,199,76},{184,231,72},{184,215,1},{185,206,5},{183,204,14},{255,70,243},{169,214,72},{173,206,1},{150,200,72},{255,165,243},{150,200,72},{178,225,72},{178,225,72},{178,225,72},{178,201,72},{172,222,50},{172,204,1},{172,204,1}, +{172,193,5},{155,205,50},{161,194,1},{184,213,0},{184,213,0},{184,213,0},{184,198,0},{255,52,50},{174,203,0},{174,203,0},{162,194,0},{255,156,50},{162,194,0},{255,86,72},{182,217,0},{188,206,0},{168,206,0},{255,86,72},{255,172,72},{168,206,0},{0,200,72},{255,172,72},{0,200,72},{178,0,72},{178,0,72},{178,0,72},{178,0,72},{172,207,0},{172,207,0},{172,207,0},{172,189,0},{160,195,0}, +{160,195,0},{187,252,144},{187,228,76},{188,217,103},{187,214,77},{181,249,243},{180,222,74},{181,214,2},{180,207,106},{159,225,243},{166,208,76},{193,240,72},{193,224,1},{194,216,3},{192,213,14},{255,98,243},{178,223,72},{182,215,1},{158,209,72},{255,178,243},{158,209,72},{187,234,72},{187,234,72},{187,234,72},{187,210,72},{181,231,50},{181,213,1},{181,213,1},{181,202,5},{165,213,50},{170,203,1},{193,222,0}, +{193,222,0},{193,222,0},{193,207,0},{255,79,50},{183,212,0},{183,212,0},{171,203,0},{253,170,50},{171,203,0},{255,113,72},{192,225,0},{196,215,0},{177,215,0},{255,113,72},{254,186,72},{177,215,0},{0,209,72},{254,186,72},{0,209,72},{187,0,72},{187,0,72},{187,0,72},{187,0,72},{181,216,0},{181,216,0},{181,216,0},{181,198,0},{169,204,0},{169,204,0},{196,255,148},{195,236,76},{196,225,103}, +{195,222,77},{189,254,244},{188,230,74},{189,222,2},{188,215,106},{167,233,243},{173,217,77},{201,248,72},{201,231,1},{202,224,3},{200,221,14},{255,122,243},{186,231,72},{189,222,2},{166,217,72},{255,190,243},{166,217,72},{195,242,72},{195,242,72},{195,242,72},{195,218,72},{189,239,50},{189,221,1},{189,221,1},{189,210,5},{173,221,50},{178,211,1},{201,229,0},{201,229,0},{201,229,0},{201,215,0},{255,104,50}, +{191,220,0},{191,220,0},{179,211,0},{255,181,50},{179,211,0},{255,137,72},{200,233,0},{204,223,0},{184,223,0},{255,137,72},{254,198,72},{184,223,0},{0,217,72},{254,198,72},{0,217,72},{195,0,72},{195,0,72},{195,0,72},{195,0,72},{189,224,0},{189,224,0},{189,224,0},{189,206,0},{177,212,0},{177,212,0},{205,255,170},{203,244,76},{204,232,100},{202,230,79},{199,255,255},{196,238,74},{197,231,2}, +{196,223,106},{175,241,243},{181,225,77},{210,254,73},{209,239,1},{210,232,3},{208,229,14},{255,146,243},{194,239,72},{198,230,2},{174,225,72},{255,202,243},{174,225,72},{203,250,72},{203,250,72},{203,250,72},{203,226,72},{197,247,50},{197,229,1},{197,229,1},{197,218,5},{181,229,50},{186,219,1},{209,237,0},{209,237,0},{209,237,0},{209,223,0},{255,128,50},{199,228,0},{199,228,0},{187,219,0},{255,193,50}, +{187,219,0},{255,161,72},{208,241,0},{212,231,0},{192,231,0},{255,161,72},{254,210,72},{192,231,0},{0,225,72},{254,210,72},{0,225,72},{203,0,72},{203,0,72},{203,0,72},{203,0,72},{197,232,0},{197,232,0},{197,232,0},{197,214,0},{185,220,0},{185,220,0},{215,255,208},{212,252,75},{212,240,100},{210,238,79},{211,255,287},{204,246,74},{205,239,1},{203,231,109},{183,249,243},{189,233,77},{218,255,81}, +{217,247,1},{218,240,3},{215,237,17},{255,171,243},{202,247,72},{205,239,1},{182,233,72},{254,214,243},{182,233,72},{211,255,73},{211,255,73},{211,255,73},{211,234,72},{205,255,50},{205,237,1},{205,237,1},{205,226,5},{189,237,50},{194,227,0},{217,245,0},{217,245,0},{217,245,0},{217,231,0},{255,152,50},{208,235,0},{208,235,0},{194,227,0},{255,205,50},{194,227,0},{255,186,72},{216,249,0},{220,239,0}, +{200,239,0},{255,186,72},{255,221,72},{200,239,0},{0,233,72},{255,221,72},{0,233,72},{211,0,72},{211,0,72},{211,0,72},{211,0,72},{205,239,0},{205,239,0},{205,239,0},{205,222,0},{194,227,0},{194,227,0},{224,255,274},{222,255,98},{221,249,100},{219,247,79},{221,255,332},{213,255,74},{214,248,1},{212,240,109},{195,255,245},{198,242,77},{230,255,106},{226,255,4},{227,249,3},{224,246,17},{255,198,243}, +{213,255,73},{214,248,1},{191,242,72},{255,227,243},{191,242,72},{221,255,83},{221,255,83},{221,255,83},{220,243,72},{215,255,59},{214,246,1},{214,246,1},{213,234,6},{198,246,50},{203,236,0},{226,254,0},{226,254,0},{226,254,0},{226,240,0},{255,180,50},{217,244,0},{217,244,0},{203,236,0},{255,218,50},{203,236,0},{255,213,72},{228,255,2},{229,248,0},{209,248,0},{255,213,72},{254,235,72},{209,248,0}, +{0,242,72},{254,235,72},{0,242,72},{220,0,72},{220,0,72},{220,0,72},{220,0,72},{214,248,0},{214,248,0},{214,248,0},{214,231,0},{203,236,0},{203,236,0},{233,255,327},{231,255,164},{230,255,105},{228,253,77},{230,255,370},{225,255,95},{222,254,2},{220,248,94},{213,255,262},{208,249,65},{239,255,126},{237,255,35},{235,255,5},{234,253,13},{255,219,221},{228,255,82},{223,254,1},{201,249,61},{254,238,221}, +{201,249,61},{230,255,105},{230,255,105},{230,255,105},{228,251,72},{227,255,83},{222,254,1},{222,254,1},{221,243,6},{206,254,50},{211,244,0},{235,254,5},{235,254,5},{235,254,5},{234,248,0},{255,204,50},{225,252,0},{225,252,0},{211,244,0},{255,230,50},{211,244,0},{255,234,61},{240,255,17},{238,255,0},{219,255,0},{255,234,61},{255,245,61},{219,255,0},{0,249,61},{255,245,61},{0,249,61},{228,0,72}, +{228,0,72},{228,0,72},{228,0,72},{222,253,1},{222,253,1},{222,253,1},{222,239,0},{211,244,0},{211,244,0},{242,255,233},{239,255,164},{239,255,139},{237,255,78},{239,255,239},{234,255,62},{232,255,24},{230,252,18},{225,255,158},{218,253,5},{248,255,53},{245,255,27},{245,255,18},{242,255,1},{255,237,93},{240,255,26},{237,255,5},{217,253,5},{254,247,93},{217,253,5},{239,255,139},{239,255,139},{239,255,139}, +{237,255,78},{236,255,118},{232,255,24},{232,255,24},{229,251,6},{222,255,67},{219,252,0},{245,255,18},{245,255,18},{245,255,18},{242,254,1},{255,228,50},{237,255,5},{237,255,5},{219,252,0},{255,242,50},{219,252,0},{255,246,5},{251,254,1},{250,255,0},{243,255,0},{255,246,5},{255,251,5},{243,255,0},{0,253,5},{255,251,5},{0,253,5},{236,0,72},{236,0,72},{236,0,72},{236,0,72},{232,254,8}, +{232,254,8},{232,254,8},{230,247,0},{219,252,0},{219,252,0},{248,255,136},{245,255,119},{245,255,110},{244,255,81},{245,255,122},{243,255,52},{241,255,41},{238,255,0},{237,255,75},{231,255,5},{251,255,9},{251,255,6},{251,255,5},{250,255,1},{255,246,17},{249,255,3},{249,255,2},{237,255,0},{255,251,17},{237,255,0},{245,255,110},{245,255,110},{245,255,110},{244,255,81},{245,255,86},{241,255,41},{241,255,41}, +{238,255,0},{234,255,46},{231,255,5},{251,255,5},{251,255,5},{251,255,5},{250,255,1},{255,243,13},{249,255,2},{249,255,2},{237,255,0},{254,250,13},{237,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{244,0,72},{244,0,72},{244,0,72},{244,0,72},{241,254,25},{241,254,25},{241,254,25},{238,255,0},{231,255,5}, +{231,255,5},{0,58,200},{0,42,17},{0,29,0},{0,25,65},{0,40,441},{0,27,266},{0,23,121},{0,16,318},{0,18,467},{0,16,343},{0,58,200},{0,42,17},{0,29,0},{0,25,65},{20,0,441},{0,27,266},{0,23,121},{0,16,318},{40,0,441},{0,16,318},{0,28,0},{0,28,0},{0,28,0},{0,14,0},{0,13,41},{0,12,10},{0,12,10},{0,7,20},{0,6,42},{0,7,24},{0,28,0}, +{0,28,0},{0,28,0},{0,14,0},{6,0,41},{0,12,10},{0,12,10},{0,7,20},{13,0,41},{0,7,20},{29,0,200},{0,42,17},{0,29,0},{0,25,65},{29,0,200},{58,0,200},{0,25,65},{0,19,200},{58,0,200},{0,19,200},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,82,200},{0,54,1},{1,38,19}, +{0,34,34},{0,55,686},{0,36,339},{0,30,139},{0,22,446},{0,24,747},{0,22,495},{0,82,200},{0,54,1},{2,39,17},{0,34,34},{27,0,686},{0,36,339},{0,30,139},{0,22,446},{55,0,686},{0,22,446},{0,52,0},{0,52,0},{0,52,0},{0,26,0},{0,25,145},{0,21,45},{0,21,45},{0,13,80},{0,12,154},{0,10,94},{0,52,0},{0,52,0},{0,52,0},{0,26,0},{12,0,145}, +{0,21,45},{0,21,45},{0,13,80},{25,0,145},{0,13,80},{41,0,200},{0,54,1},{8,37,0},{0,34,34},{41,0,200},{82,0,200},{0,34,34},{0,27,200},{82,0,200},{0,27,200},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{4,99,225},{4,65,26},{6,46,74},{3,43,45},{0,79,723},{0,48,282},{0,42,54}, +{0,31,401},{0,36,852},{0,29,497},{7,92,200},{7,64,0},{9,46,21},{5,42,29},{39,0,723},{0,48,282},{0,42,54},{0,31,401},{79,0,723},{0,31,401},{4,68,25},{4,68,25},{4,68,25},{3,36,25},{0,49,162},{0,36,17},{0,36,17},{0,22,58},{0,21,206},{0,19,97},{7,62,0},{7,62,0},{7,62,0},{7,34,0},{24,0,162},{0,36,17},{0,36,17},{0,22,58},{49,0,162}, +{0,22,58},{52,0,200},{6,64,0},{16,45,0},{0,44,17},{52,0,200},{107,0,200},{0,44,17},{0,35,200},{107,0,200},{0,35,200},{3,0,25},{3,0,25},{3,0,25},{3,0,25},{0,22,0},{0,22,0},{0,22,0},{0,11,0},{0,9,5},{0,9,5},{8,115,313},{8,78,121},{10,56,198},{7,51,126},{0,104,723},{0,63,227},{0,51,6},{0,40,339},{0,48,956},{0,38,494},{15,100,200}, +{15,72,0},{17,54,21},{15,49,32},{51,0,723},{0,63,227},{0,51,6},{0,40,339},{104,0,723},{0,40,339},{8,84,113},{8,84,113},{8,84,113},{7,46,113},{0,73,162},{0,48,1},{0,48,1},{0,31,29},{0,33,270},{0,27,109},{15,70,0},{15,70,0},{15,70,0},{15,42,0},{36,0,162},{0,48,1},{0,48,1},{0,31,29},{73,0,162},{0,31,29},{64,0,200},{14,72,0},{24,53,0}, +{0,53,4},{64,0,200},{131,0,200},{0,53,4},{0,43,200},{131,0,200},{0,43,200},{7,0,113},{7,0,113},{7,0,113},{7,0,113},{0,46,0},{0,46,0},{0,46,0},{0,23,0},{0,18,34},{0,18,34},{14,130,400},{15,88,215},{17,66,315},{13,60,210},{4,123,723},{3,75,207},{4,62,2},{2,49,303},{0,60,969},{0,48,417},{24,109,200},{24,81,0},{26,63,21},{23,58,33},{64,0,723}, +{0,78,201},{6,62,1},{0,50,289},{131,0,723},{0,50,289},{14,99,200},{14,99,200},{14,99,200},{14,56,200},{4,92,162},{5,60,2},{5,60,2},{3,40,14},{0,45,280},{0,39,77},{24,79,0},{24,79,0},{24,79,0},{24,51,0},{49,0,162},{7,59,0},{7,59,0},{0,40,8},{101,0,162},{0,40,8},{78,0,200},{23,81,0},{32,62,0},{1,62,0},{78,0,200},{158,0,200},{1,62,0}, +{0,52,200},{158,0,200},{0,52,200},{14,0,200},{14,0,200},{14,0,200},{14,0,200},{4,65,0},{4,65,0},{4,65,0},{4,34,0},{0,33,40},{0,33,40},{22,138,400},{23,96,215},{25,74,315},{21,68,210},{12,131,723},{11,83,207},{12,70,2},{10,57,303},{0,72,865},{0,57,290},{32,117,200},{32,89,0},{34,71,21},{31,66,33},{76,0,723},{5,87,200},{14,70,1},{0,59,251},{155,0,723}, +{0,59,251},{22,107,200},{22,107,200},{22,107,200},{22,64,200},{12,100,162},{13,68,2},{13,68,2},{10,47,17},{0,60,213},{0,48,13},{32,87,0},{32,87,0},{32,87,0},{32,59,0},{61,0,162},{15,67,0},{15,67,0},{0,50,1},{125,0,162},{0,50,1},{89,0,200},{31,89,0},{40,70,0},{9,70,0},{89,0,200},{183,0,200},{9,70,0},{0,60,200},{183,0,200},{0,60,200},{22,0,200}, +{22,0,200},{22,0,200},{22,0,200},{12,73,0},{12,73,0},{12,73,0},{12,42,0},{0,45,8},{0,45,8},{30,145,400},{31,104,215},{33,82,315},{29,76,210},{20,139,723},{19,91,207},{20,78,2},{18,65,303},{0,88,787},{0,66,225},{40,125,200},{40,97,0},{43,79,19},{39,74,33},{88,0,723},{14,94,200},{22,78,1},{0,66,224},{180,0,723},{0,66,224},{30,115,200},{30,115,200},{30,115,200}, +{30,72,200},{20,108,162},{21,76,2},{21,76,2},{18,56,17},{0,72,173},{2,58,1},{40,95,0},{40,95,0},{40,95,0},{40,67,0},{73,0,162},{23,75,0},{23,75,0},{3,58,0},{149,0,162},{3,58,0},{101,0,200},{39,97,0},{48,78,0},{17,78,0},{101,0,200},{207,0,200},{17,78,0},{0,68,200},{207,0,200},{0,68,200},{30,0,200},{30,0,200},{30,0,200},{30,0,200},{20,81,0}, +{20,81,0},{20,81,0},{20,50,0},{1,59,0},{1,59,0},{38,153,400},{39,112,215},{41,90,315},{37,84,210},{28,147,723},{27,99,207},{28,86,2},{26,73,303},{0,100,739},{4,75,212},{48,133,200},{48,105,0},{51,87,19},{47,82,33},{100,0,723},{22,102,200},{30,86,1},{0,75,206},{204,0,723},{0,75,206},{38,123,200},{38,123,200},{38,123,200},{38,80,200},{28,116,162},{29,84,2},{29,84,2}, +{26,64,17},{0,85,162},{10,66,1},{48,103,0},{48,103,0},{48,103,0},{48,75,0},{85,0,162},{31,83,0},{31,83,0},{11,66,0},{174,0,162},{11,66,0},{113,0,200},{47,105,0},{56,86,0},{25,86,0},{113,0,200},{231,0,200},{25,86,0},{0,76,200},{231,0,200},{0,76,200},{38,0,200},{38,0,200},{38,0,200},{38,0,200},{28,89,0},{28,89,0},{28,89,0},{28,58,0},{9,67,0}, +{9,67,0},{47,162,400},{48,121,215},{50,98,308},{46,93,210},{37,155,723},{36,107,207},{37,95,2},{35,82,303},{0,113,723},{13,84,212},{57,142,200},{57,112,1},{60,96,19},{56,91,33},{113,0,723},{31,111,200},{39,95,1},{0,85,200},{231,0,723},{0,85,200},{47,132,200},{47,132,200},{47,132,200},{47,89,200},{37,125,162},{38,93,2},{38,93,2},{35,73,17},{10,93,162},{19,75,1},{57,111,0}, +{57,111,0},{57,111,0},{57,84,0},{98,0,162},{40,92,0},{40,92,0},{20,75,0},{201,0,162},{20,75,0},{127,0,200},{56,114,0},{65,95,0},{34,95,0},{127,0,200},{254,2,200},{34,95,0},{0,85,200},{254,2,200},{0,85,200},{47,0,200},{47,0,200},{47,0,200},{47,0,200},{37,98,0},{37,98,0},{37,98,0},{37,67,0},{18,76,0},{18,76,0},{55,170,400},{56,129,215},{58,106,308}, +{54,101,210},{45,163,723},{44,115,207},{45,103,2},{43,90,303},{8,121,723},{21,92,212},{65,150,200},{65,121,0},{68,104,19},{64,99,33},{125,0,723},{39,119,200},{47,103,1},{7,93,200},{255,0,723},{7,93,200},{55,140,200},{55,140,200},{55,140,200},{55,97,200},{45,133,162},{46,101,2},{46,101,2},{43,81,17},{18,101,162},{27,83,1},{65,119,0},{65,119,0},{65,119,0},{65,92,0},{110,0,162}, +{48,100,0},{48,100,0},{28,83,0},{225,0,162},{28,83,0},{138,0,200},{65,121,0},{73,103,0},{42,103,0},{138,0,200},{254,14,200},{42,103,0},{0,93,200},{254,14,200},{0,93,200},{55,0,200},{55,0,200},{55,0,200},{55,0,200},{45,106,0},{45,106,0},{45,106,0},{45,75,0},{26,84,0},{26,84,0},{63,178,400},{64,137,215},{66,114,308},{62,109,210},{53,171,723},{52,123,207},{53,111,2}, +{51,98,303},{16,129,723},{27,100,215},{73,158,200},{73,129,0},{76,112,19},{72,107,33},{137,0,723},{47,127,200},{55,111,1},{15,101,200},{255,12,723},{15,101,200},{63,148,200},{63,148,200},{63,148,200},{63,105,200},{53,141,162},{54,109,2},{54,109,2},{51,89,17},{26,109,162},{35,91,1},{73,127,0},{73,127,0},{73,127,0},{73,100,0},{122,0,162},{56,108,0},{56,108,0},{36,91,0},{249,0,162}, +{36,91,0},{150,0,200},{73,129,0},{81,111,0},{49,111,0},{150,0,200},{254,26,200},{49,111,0},{0,101,200},{254,26,200},{0,101,200},{63,0,200},{63,0,200},{63,0,200},{63,0,200},{53,114,0},{53,114,0},{53,114,0},{53,83,0},{34,92,0},{34,92,0},{71,186,400},{72,144,212},{74,122,308},{70,117,210},{61,179,723},{60,131,207},{61,118,2},{59,106,303},{24,137,723},{35,108,215},{81,166,200}, +{81,137,0},{84,120,19},{80,115,33},{149,0,723},{55,135,200},{62,119,1},{23,109,200},{255,24,723},{23,109,200},{71,156,200},{71,156,200},{71,156,200},{71,113,200},{61,149,162},{61,118,1},{61,118,1},{59,97,17},{34,117,162},{43,99,1},{81,135,0},{81,135,0},{81,135,0},{81,108,0},{134,0,162},{64,116,0},{64,116,0},{44,99,0},{255,9,162},{44,99,0},{162,0,200},{81,137,0},{89,119,0}, +{57,119,0},{162,0,200},{254,38,200},{57,119,0},{0,109,200},{254,38,200},{0,109,200},{71,0,200},{71,0,200},{71,0,200},{71,0,200},{61,121,0},{61,121,0},{61,121,0},{61,91,0},{42,100,0},{42,100,0},{80,195,400},{81,153,212},{83,131,308},{79,127,212},{70,188,723},{69,140,207},{70,127,2},{68,115,303},{34,145,723},{44,117,215},{90,174,200},{90,146,0},{93,129,19},{89,124,33},{162,0,723}, +{64,144,200},{71,128,1},{32,118,200},{254,38,723},{32,118,200},{80,165,200},{80,165,200},{80,165,200},{80,122,200},{70,158,162},{70,127,1},{70,127,1},{68,106,17},{43,126,162},{52,108,1},{90,144,0},{90,144,0},{90,144,0},{90,117,0},{147,0,162},{74,124,0},{74,124,0},{53,108,0},{254,23,162},{53,108,0},{175,0,200},{90,146,0},{98,128,0},{66,128,0},{175,0,200},{254,51,200},{66,128,0}, +{0,118,200},{254,51,200},{0,118,200},{80,0,200},{80,0,200},{80,0,200},{80,0,200},{70,130,0},{70,130,0},{70,130,0},{70,100,0},{51,109,0},{51,109,0},{88,203,400},{89,161,212},{91,139,308},{87,135,212},{78,196,723},{77,148,207},{78,135,2},{75,122,305},{42,153,723},{52,125,215},{98,182,200},{98,154,0},{101,137,19},{97,132,33},{174,0,723},{72,152,200},{79,136,1},{40,126,200},{255,49,723}, +{40,126,200},{88,172,200},{88,172,200},{88,172,200},{88,129,200},{78,166,162},{78,135,1},{78,135,1},{76,114,17},{51,134,162},{60,116,0},{98,152,0},{98,152,0},{98,152,0},{98,125,0},{159,0,162},{82,132,0},{82,132,0},{60,116,0},{254,35,162},{60,116,0},{187,0,200},{98,154,0},{106,136,0},{74,136,0},{187,0,200},{254,63,200},{74,136,0},{0,126,200},{254,63,200},{0,126,200},{88,0,200}, +{88,0,200},{88,0,200},{88,0,200},{78,138,0},{78,138,0},{78,138,0},{78,108,0},{60,116,0},{60,116,0},{96,211,400},{97,169,212},{99,147,308},{95,143,212},{86,204,723},{85,156,207},{86,143,2},{83,131,308},{50,161,723},{60,133,215},{106,190,200},{106,162,0},{109,145,19},{105,141,35},{186,0,723},{80,160,200},{87,144,1},{48,134,200},{255,61,723},{48,134,200},{96,180,200},{96,180,200},{96,180,200}, +{96,137,200},{86,174,162},{86,143,1},{86,143,1},{84,122,17},{59,142,162},{68,124,0},{106,160,0},{106,160,0},{106,160,0},{106,133,0},{171,0,162},{90,140,0},{90,140,0},{68,124,0},{255,46,162},{68,124,0},{199,0,200},{106,162,0},{114,144,0},{82,144,0},{199,0,200},{254,75,200},{82,144,0},{0,134,200},{254,75,200},{0,134,200},{96,0,200},{96,0,200},{96,0,200},{96,0,200},{86,146,0}, +{86,146,0},{86,146,0},{86,116,0},{68,124,0},{68,124,0},{104,219,400},{105,177,212},{107,155,308},{103,151,212},{94,212,723},{93,164,207},{94,151,2},{91,139,308},{58,169,723},{68,141,215},{114,198,200},{114,170,0},{117,153,19},{113,149,35},{198,0,723},{88,168,200},{95,152,1},{56,142,200},{255,73,723},{56,142,200},{104,188,200},{104,188,200},{104,188,200},{104,145,200},{94,181,162},{94,151,1},{94,151,1}, +{92,130,17},{67,150,162},{76,132,0},{114,168,0},{114,168,0},{114,168,0},{114,141,0},{183,0,162},{98,148,0},{98,148,0},{76,132,0},{255,58,162},{76,132,0},{211,0,200},{114,170,0},{122,152,0},{90,152,0},{211,0,200},{255,86,200},{90,152,0},{0,142,200},{255,86,200},{0,142,200},{104,0,200},{104,0,200},{104,0,200},{104,0,200},{94,154,0},{94,154,0},{94,154,0},{94,124,0},{76,132,0}, +{76,132,0},{113,228,400},{114,186,215},{116,164,308},{112,160,212},{103,221,723},{102,173,207},{103,160,2},{100,148,308},{67,178,723},{78,150,212},{123,207,200},{123,179,0},{125,163,17},{122,158,35},{211,0,723},{96,177,200},{104,161,1},{65,151,200},{255,86,723},{65,151,200},{113,197,200},{113,197,200},{113,197,200},{113,154,200},{103,190,162},{103,160,1},{103,160,1},{102,138,19},{76,159,162},{85,141,0},{123,177,0}, +{123,177,0},{123,177,0},{123,149,0},{196,0,162},{107,157,0},{107,157,0},{85,141,0},{254,72,162},{85,141,0},{224,0,200},{123,179,0},{131,161,0},{99,161,0},{224,0,200},{254,100,200},{99,161,0},{0,151,200},{254,100,200},{0,151,200},{113,0,200},{113,0,200},{113,0,200},{113,0,200},{103,163,0},{103,163,0},{103,163,0},{103,133,0},{85,141,0},{85,141,0},{121,235,400},{122,194,215},{124,172,308}, +{120,168,212},{111,229,723},{110,181,207},{111,168,2},{108,156,308},{75,186,723},{86,158,212},{131,215,200},{131,187,0},{133,171,17},{130,166,35},{223,0,723},{104,185,200},{112,169,1},{73,159,200},{255,98,723},{73,159,200},{121,205,200},{121,205,200},{121,205,200},{121,162,200},{111,198,162},{111,168,1},{111,168,1},{110,146,19},{84,167,162},{93,149,0},{131,185,0},{131,185,0},{131,185,0},{131,157,0},{208,0,162}, +{115,165,0},{115,165,0},{93,149,0},{254,84,162},{93,149,0},{236,0,200},{131,187,0},{139,169,0},{107,169,0},{236,0,200},{254,112,200},{107,169,0},{0,159,200},{254,112,200},{0,159,200},{121,0,200},{121,0,200},{121,0,200},{121,0,200},{111,171,0},{111,171,0},{111,171,0},{111,141,0},{93,149,0},{93,149,0},{129,243,400},{130,202,215},{133,180,305},{128,176,212},{119,236,723},{118,189,207},{119,176,2}, +{116,164,308},{83,194,723},{94,166,212},{139,223,200},{139,195,0},{141,179,17},{138,174,35},{235,0,723},{112,193,200},{120,177,1},{81,167,200},{255,110,723},{81,167,200},{129,213,200},{129,213,200},{129,213,200},{129,170,200},{119,206,162},{119,176,1},{119,176,1},{118,154,19},{91,175,162},{101,157,0},{139,192,0},{139,192,0},{139,192,0},{139,165,0},{220,0,162},{123,173,0},{123,173,0},{101,157,0},{255,95,162}, +{101,157,0},{248,0,200},{139,195,0},{147,177,0},{115,177,0},{248,0,200},{254,124,200},{115,177,0},{0,167,200},{254,124,200},{0,167,200},{129,0,200},{129,0,200},{129,0,200},{129,0,200},{119,179,0},{119,179,0},{119,179,0},{119,148,0},{101,157,0},{101,157,0},{137,251,400},{138,210,215},{140,187,303},{136,184,212},{127,244,723},{126,197,207},{127,184,2},{124,172,308},{91,202,723},{102,174,212},{147,231,200}, +{147,202,1},{149,187,17},{146,182,35},{247,0,723},{120,201,200},{128,185,1},{88,175,200},{255,122,723},{88,175,200},{137,221,200},{137,221,200},{137,221,200},{137,178,200},{127,214,162},{127,184,1},{127,184,1},{126,162,19},{99,183,162},{109,165,0},{147,200,0},{147,200,0},{147,200,0},{147,173,0},{232,0,162},{131,181,0},{131,181,0},{109,165,0},{255,107,162},{109,165,0},{255,10,200},{146,203,0},{155,185,0}, +{123,185,0},{255,10,200},{255,135,200},{123,185,0},{0,175,200},{255,135,200},{0,175,200},{137,0,200},{137,0,200},{137,0,200},{137,0,200},{127,187,0},{127,187,0},{127,187,0},{127,156,0},{109,165,0},{109,165,0},{147,255,404},{147,219,215},{149,196,303},{145,193,212},{136,253,723},{135,206,207},{136,193,2},{133,181,308},{100,211,723},{111,183,212},{156,240,200},{156,210,1},{158,196,17},{155,191,35},{255,10,723}, +{129,210,200},{137,194,1},{97,184,200},{255,135,723},{97,184,200},{146,230,200},{146,230,200},{146,230,200},{146,187,200},{136,223,162},{136,193,1},{136,193,1},{135,171,19},{108,192,162},{118,174,0},{156,209,0},{156,209,0},{156,209,0},{156,182,0},{245,0,162},{139,191,0},{139,191,0},{118,174,0},{254,121,162},{118,174,0},{255,37,200},{155,212,0},{164,194,0},{131,194,0},{255,37,200},{254,149,200},{131,194,0}, +{0,184,200},{254,149,200},{0,184,200},{146,0,200},{146,0,200},{146,0,200},{146,0,200},{136,196,0},{136,196,0},{136,196,0},{136,165,0},{118,174,0},{118,174,0},{156,255,426},{155,227,215},{157,204,303},{153,201,212},{146,254,728},{143,214,207},{144,202,2},{141,189,308},{108,219,723},{118,191,215},{164,248,200},{164,218,1},{166,204,17},{163,199,35},{255,34,723},{137,218,200},{145,201,2},{105,192,200},{255,147,723}, +{105,192,200},{154,238,200},{154,238,200},{154,238,200},{154,195,200},{144,231,162},{144,200,1},{144,200,1},{143,179,19},{116,200,162},{126,182,0},{164,217,0},{164,217,0},{164,217,0},{164,190,0},{255,4,162},{146,199,0},{146,199,0},{126,182,0},{255,132,162},{126,182,0},{255,61,200},{163,220,0},{172,202,0},{139,202,0},{255,61,200},{254,161,200},{139,202,0},{0,192,200},{254,161,200},{0,192,200},{154,0,200}, +{154,0,200},{154,0,200},{154,0,200},{144,204,0},{144,204,0},{144,204,0},{144,173,0},{126,182,0},{126,182,0},{165,255,468},{163,234,212},{165,212,303},{162,207,213},{156,255,747},{151,222,207},{152,210,2},{149,197,308},{115,228,723},{126,199,215},{172,255,200},{172,226,1},{174,212,17},{171,207,35},{255,58,723},{145,226,200},{153,209,2},{113,200,200},{255,159,723},{113,200,200},{162,246,200},{162,246,200},{162,246,200}, +{162,203,200},{152,239,162},{152,208,1},{152,208,1},{151,187,19},{124,208,162},{134,190,0},{172,225,0},{172,225,0},{172,225,0},{172,198,0},{255,28,162},{155,206,0},{155,206,0},{134,190,0},{255,144,162},{134,190,0},{255,86,200},{171,228,0},{180,210,0},{147,210,0},{255,86,200},{255,172,200},{147,210,0},{0,200,200},{255,172,200},{0,200,200},{162,0,200},{162,0,200},{162,0,200},{162,0,200},{152,211,0}, +{152,211,0},{152,211,0},{152,181,0},{134,190,0},{134,190,0},{175,255,522},{171,242,212},{173,220,303},{170,216,213},{165,255,788},{158,232,208},{160,218,2},{157,205,308},{123,235,723},{134,207,215},{181,255,209},{180,234,1},{182,220,17},{179,215,35},{255,82,723},{153,234,200},{161,217,2},{121,208,200},{254,171,723},{121,208,200},{170,253,200},{170,253,200},{170,253,200},{170,211,200},{160,247,162},{160,216,1},{160,216,1}, +{159,195,19},{132,216,162},{141,198,1},{180,233,0},{180,233,0},{180,233,0},{180,206,0},{255,52,162},{163,214,0},{163,214,0},{142,198,0},{255,156,162},{142,198,0},{255,110,200},{179,236,0},{188,218,0},{155,218,0},{255,110,200},{255,184,200},{155,218,0},{0,208,200},{255,184,200},{0,208,200},{170,0,200},{170,0,200},{170,0,200},{170,0,200},{160,219,0},{160,219,0},{160,219,0},{160,189,0},{141,199,0}, +{141,199,0},{184,255,612},{180,251,212},{182,229,303},{179,225,213},{178,255,844},{167,239,210},{169,227,2},{165,214,315},{132,244,723},{143,216,215},{193,255,234},{189,243,1},{191,229,17},{187,224,38},{255,110,723},{162,243,200},{170,226,2},{130,217,200},{255,184,723},{130,217,200},{180,254,206},{180,254,206},{180,254,206},{179,220,200},{170,253,163},{169,225,1},{169,225,1},{168,204,19},{141,225,162},{150,207,0},{189,242,0}, +{189,242,0},{189,242,0},{189,215,0},{255,79,162},{172,223,0},{172,223,0},{150,207,0},{253,170,162},{150,207,0},{255,137,200},{188,245,0},{197,227,0},{164,227,0},{255,137,200},{254,198,200},{164,227,0},{0,217,200},{254,198,200},{0,217,200},{179,0,200},{179,0,200},{179,0,200},{179,0,200},{169,228,0},{169,228,0},{169,228,0},{169,198,0},{150,207,0},{150,207,0},{193,255,714},{189,255,225},{190,237,303}, +{187,233,213},{187,255,919},{175,247,210},{177,235,2},{173,222,315},{140,252,723},{151,224,215},{202,255,275},{197,251,1},{199,237,17},{195,232,38},{255,134,723},{170,251,200},{178,234,2},{138,225,200},{255,196,723},{138,225,200},{189,254,224},{189,254,224},{189,254,224},{187,228,200},{178,255,171},{177,233,1},{177,233,1},{176,212,19},{149,233,162},{158,215,0},{197,250,0},{197,250,0},{197,250,0},{197,223,0},{255,104,162}, +{180,231,0},{180,231,0},{158,215,0},{255,181,162},{158,215,0},{255,161,200},{196,253,0},{204,235,0},{172,235,0},{255,161,200},{254,210,200},{172,235,0},{0,225,200},{254,210,200},{0,225,200},{187,0,200},{187,0,200},{187,0,200},{187,0,200},{177,236,0},{177,236,0},{177,236,0},{177,206,0},{158,215,0},{158,215,0},{202,255,836},{198,255,290},{198,245,303},{195,241,213},{199,255,1015},{183,255,210},{185,243,2}, +{181,230,315},{152,255,732},{159,232,215},{214,254,331},{207,255,13},{207,245,17},{203,240,38},{255,158,723},{183,255,206},{186,242,2},{146,233,200},{255,208,723},{146,233,200},{196,255,251},{196,255,251},{196,255,251},{195,236,200},{190,255,195},{185,241,1},{185,241,1},{184,221,21},{157,241,162},{166,223,0},{205,255,1},{205,255,1},{205,255,1},{205,231,0},{255,128,162},{188,239,0},{188,239,0},{166,223,0},{255,193,162}, +{166,223,0},{255,186,200},{210,255,8},{212,243,0},{180,243,0},{255,186,200},{255,221,200},{180,243,0},{0,233,200},{255,221,200},{0,233,200},{195,0,200},{195,0,200},{195,0,200},{195,0,200},{185,244,0},{185,244,0},{185,244,0},{185,214,0},{166,223,0},{166,223,0},{215,255,976},{207,255,417},{206,253,303},{203,249,213},{208,255,1124},{195,255,258},{193,251,2},{189,238,315},{167,255,797},{166,240,215},{221,255,392}, +{216,255,77},{215,252,14},{211,248,38},{255,183,723},{198,255,248},{194,250,2},{154,241,200},{254,220,723},{154,241,200},{205,255,289},{205,255,289},{205,255,289},{203,244,200},{199,255,230},{193,249,1},{193,249,1},{192,229,21},{165,249,162},{174,231,0},{215,255,8},{215,255,8},{215,255,8},{213,239,0},{255,152,162},{196,247,0},{196,247,0},{174,231,0},{255,205,162},{174,231,0},{255,210,200},{222,255,40},{220,251,0}, +{188,251,0},{255,210,200},{255,233,200},{188,251,0},{0,241,200},{255,233,200},{0,241,200},{203,0,200},{203,0,200},{203,0,200},{203,0,200},{193,252,0},{193,252,0},{193,252,0},{193,222,0},{174,231,0},{174,231,0},{221,255,895},{217,255,494},{215,255,339},{212,255,201},{218,255,994},{207,255,251},{204,255,6},{199,245,198},{189,255,702},{176,247,121},{233,255,318},{228,255,109},{224,255,29},{222,253,13},{255,204,546}, +{213,255,198},{205,255,1},{169,247,113},{255,230,546},{169,247,113},{215,255,339},{215,255,339},{215,255,339},{212,253,200},{211,255,293},{204,255,6},{204,255,6},{201,238,21},{177,255,165},{183,240,0},{224,255,29},{224,255,29},{224,255,29},{222,248,0},{255,180,162},{206,254,1},{206,254,1},{183,240,0},{255,218,162},{183,240,0},{255,228,113},{237,255,34},{232,255,0},{207,255,0},{255,228,113},{255,242,113},{207,255,0}, +{0,247,113},{255,242,113},{0,247,113},{212,0,200},{212,0,200},{212,0,200},{212,0,200},{202,255,4},{202,255,4},{202,255,4},{202,231,0},{183,240,0},{183,240,0},{230,255,737},{226,255,497},{224,255,401},{220,255,216},{227,255,783},{216,255,206},{213,255,54},{209,249,74},{201,255,534},{189,251,26},{239,255,190},{235,255,97},{233,255,58},{230,255,1},{255,219,333},{225,255,110},{219,255,17},{185,251,25},{254,238,333}, +{185,251,25},{224,255,401},{224,255,401},{224,255,401},{220,255,216},{218,255,354},{213,255,54},{213,255,54},{209,246,21},{192,255,203},{191,248,0},{233,255,58},{233,255,58},{233,255,58},{230,254,1},{255,204,162},{219,255,17},{219,255,17},{191,248,0},{255,230,162},{191,248,0},{255,240,25},{246,255,5},{244,255,0},{231,255,0},{255,240,25},{255,248,25},{231,255,0},{0,251,25},{255,248,25},{0,251,25},{220,0,200}, +{220,0,200},{220,0,200},{220,0,200},{211,255,17},{211,255,17},{211,255,17},{210,239,0},{191,248,0},{191,248,0},{236,255,616},{233,255,495},{233,255,446},{228,255,264},{233,255,626},{225,255,220},{225,255,139},{217,254,19},{216,255,434},{199,255,1},{245,255,121},{244,255,94},{242,255,80},{240,255,20},{255,234,193},{237,255,75},{234,255,45},{201,255,0},{255,245,193},{201,255,0},{233,255,446},{233,255,446},{233,255,446}, +{228,255,264},{230,255,401},{225,255,139},{225,255,139},{216,253,17},{207,255,254},{199,255,1},{242,255,80},{242,255,80},{242,255,80},{240,255,20},{255,225,145},{234,255,45},{234,255,45},{201,255,0},{254,241,145},{201,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{228,0,200},{228,0,200},{228,0,200},{228,0,200},{221,255,34}, +{221,255,34},{221,255,34},{218,247,0},{200,254,1},{200,254,1},{242,255,400},{239,255,343},{239,255,318},{237,255,227},{239,255,370},{234,255,161},{232,255,121},{226,255,0},{225,255,243},{213,255,17},{251,255,33},{248,255,24},{248,255,20},{246,255,4},{255,243,54},{246,255,17},{243,255,10},{225,255,0},{254,250,54},{225,255,0},{239,255,318},{239,255,318},{239,255,318},{237,255,227},{236,255,253},{232,255,121},{232,255,121}, +{226,255,0},{219,255,150},{213,255,17},{248,255,20},{248,255,20},{248,255,20},{246,255,4},{255,237,41},{243,255,10},{243,255,10},{225,255,0},{254,247,41},{225,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{236,0,200},{236,0,200},{236,0,200},{236,0,200},{230,255,65},{230,255,65},{230,255,65},{226,255,0},{213,255,17}, +{213,255,17},{0,82,421},{0,60,40},{0,42,0},{0,34,145},{0,55,925},{0,36,566},{0,33,262},{0,22,677},{0,27,989},{0,22,726},{0,82,421},{0,60,40},{0,42,0},{0,34,145},{27,0,925},{0,36,566},{0,33,262},{0,22,677},{55,0,925},{0,22,677},{0,40,0},{0,40,0},{0,40,0},{0,20,0},{0,19,85},{0,15,25},{0,15,25},{0,10,45},{0,9,89},{0,8,54},{0,40,0}, +{0,40,0},{0,40,0},{0,20,0},{9,0,85},{0,15,25},{0,15,25},{0,10,45},{19,0,85},{0,10,45},{41,0,421},{0,60,40},{0,42,0},{0,34,145},{41,0,421},{82,0,421},{0,34,145},{0,27,421},{82,0,421},{0,27,421},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,107,421},{0,72,8},{1,50,18}, +{0,44,100},{0,73,1261},{0,48,670},{0,42,282},{0,28,857},{0,33,1369},{0,28,938},{0,107,421},{0,72,8},{2,51,14},{0,44,100},{36,0,1261},{0,48,670},{0,42,282},{0,28,857},{73,0,1261},{0,28,857},{0,64,0},{0,64,0},{0,64,0},{0,32,0},{0,31,221},{0,27,73},{0,27,73},{0,16,125},{0,15,237},{0,13,144},{0,64,0},{0,64,0},{0,64,0},{0,32,0},{15,0,221}, +{0,27,73},{0,27,73},{0,16,125},{31,0,221},{0,16,125},{52,0,421},{0,72,8},{7,50,0},{0,44,100},{52,0,421},{107,0,421},{0,44,100},{0,35,421},{107,0,421},{0,35,421},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{2,127,426},{1,86,5},{4,59,67},{0,53,70},{0,92,1514},{0,60,701},{0,51,243}, +{0,37,946},{0,42,1695},{0,34,1083},{3,125,421},{3,84,1},{7,59,42},{1,54,69},{45,0,1514},{0,60,701},{0,51,243},{0,37,946},{92,0,1514},{0,37,946},{2,84,5},{2,84,5},{2,84,5},{1,43,5},{0,49,338},{0,36,89},{0,36,89},{0,22,170},{0,21,382},{0,19,217},{3,82,0},{3,82,0},{3,82,0},{3,42,0},{24,0,338},{0,36,89},{0,36,89},{0,22,170},{49,0,338}, +{0,22,170},{64,0,421},{1,86,0},{15,58,0},{0,53,61},{64,0,421},{131,0,421},{0,53,61},{0,43,421},{131,0,421},{0,43,421},{1,0,5},{1,0,5},{1,0,5},{1,0,5},{0,10,0},{0,10,0},{0,10,0},{0,5,0},{0,3,1},{0,3,1},{6,143,482},{6,97,63},{8,69,163},{4,63,110},{0,116,1514},{0,72,589},{0,60,109},{0,44,857},{0,54,1815},{0,44,1053},{11,133,421}, +{11,91,1},{15,67,42},{9,62,69},{57,0,1514},{0,72,589},{0,60,109},{0,44,857},{116,0,1514},{0,44,857},{6,101,61},{6,101,61},{6,101,61},{6,52,61},{0,73,338},{0,51,34},{0,51,34},{0,31,117},{0,33,446},{0,29,209},{11,90,0},{11,90,0},{11,90,0},{11,50,0},{36,0,338},{0,51,34},{0,51,34},{0,31,117},{73,0,338},{0,31,117},{76,0,421},{10,93,0},{23,66,0}, +{0,62,32},{76,0,421},{155,0,421},{0,62,32},{0,51,421},{155,0,421},{0,51,421},{5,0,61},{5,0,61},{5,0,61},{5,0,61},{0,34,0},{0,34,0},{0,34,0},{0,17,0},{0,15,17},{0,15,17},{10,162,621},{11,109,215},{14,78,362},{9,71,234},{0,143,1514},{0,88,489},{0,72,22},{0,56,750},{0,66,1982},{0,53,1047},{20,142,421},{20,100,1},{24,76,42},{18,71,69},{70,0,1514}, +{0,88,489},{0,72,22},{0,56,750},{143,0,1514},{0,56,750},{10,120,200},{10,120,200},{10,120,200},{10,64,200},{0,101,338},{0,69,4},{0,69,4},{0,40,72},{0,45,552},{0,39,229},{20,99,0},{20,99,0},{20,99,0},{20,59,0},{49,0,338},{0,69,4},{0,69,4},{0,40,72},{101,0,338},{0,40,72},{89,0,421},{19,102,0},{32,75,0},{0,74,10},{89,0,421},{183,0,421},{0,74,10}, +{0,60,421},{183,0,421},{0,60,421},{10,0,200},{10,0,200},{10,0,200},{10,0,200},{0,61,0},{0,61,0},{0,61,0},{0,30,0},{0,24,65},{0,24,65},{14,178,813},{15,119,423},{18,88,618},{13,81,414},{0,167,1514},{0,100,441},{0,83,2},{0,65,670},{0,75,2165},{0,60,1070},{28,150,421},{28,108,1},{32,84,42},{26,79,69},{82,0,1514},{0,100,441},{2,82,2},{0,65,670},{167,0,1514}, +{0,65,670},{14,136,392},{14,136,392},{14,136,392},{14,74,392},{0,125,338},{1,80,2},{1,80,2},{0,50,41},{0,54,677},{0,48,277},{28,107,0},{28,107,0},{28,107,0},{28,67,0},{61,0,338},{3,79,0},{3,79,0},{0,50,41},{125,0,338},{0,50,41},{101,0,421},{27,110,0},{40,83,0},{0,83,1},{101,0,421},{207,0,421},{0,83,1},{0,68,421},{207,0,421},{0,68,421},{14,0,392}, +{14,0,392},{14,0,392},{14,0,392},{0,85,0},{0,85,0},{0,85,0},{0,42,0},{0,33,136},{0,33,136},{22,186,842},{22,128,450},{26,96,655},{21,89,441},{7,177,1514},{5,111,434},{8,90,4},{4,72,639},{0,88,2003},{0,69,858},{36,157,421},{36,116,1},{40,92,42},{34,87,69},{94,0,1514},{0,113,422},{8,90,3},{0,71,602},{192,0,1514},{0,71,602},{22,144,421},{22,144,421},{22,144,421}, +{21,82,421},{7,135,338},{8,90,3},{8,90,3},{6,59,35},{0,69,581},{0,57,150},{36,115,0},{36,115,0},{36,115,0},{36,75,0},{73,0,338},{11,87,0},{11,87,0},{0,59,18},{149,0,338},{0,59,18},{113,0,421},{35,118,0},{48,91,0},{2,91,0},{113,0,421},{231,0,421},{2,91,0},{0,76,421},{231,0,421},{0,76,421},{21,0,421},{21,0,421},{21,0,421},{21,0,421},{7,95,0}, +{7,95,0},{7,95,0},{7,50,0},{0,48,80},{0,48,80},{30,194,842},{30,136,450},{34,104,655},{29,97,441},{15,185,1514},{13,119,434},{16,98,4},{12,80,639},{0,103,1850},{0,80,663},{44,165,421},{44,124,1},{48,100,42},{42,95,69},{106,0,1514},{6,122,421},{16,98,3},{0,80,542},{216,0,1514},{0,80,542},{30,152,421},{30,152,421},{30,152,421},{29,90,421},{15,143,338},{16,98,3},{16,98,3}, +{14,67,35},{0,81,477},{0,66,52},{44,123,0},{44,123,0},{44,123,0},{44,83,0},{85,0,338},{19,95,0},{19,95,0},{0,68,5},{174,0,338},{0,68,5},{125,0,421},{43,126,0},{56,99,0},{10,99,0},{125,0,421},{255,0,421},{10,99,0},{0,84,421},{255,0,421},{0,84,421},{29,0,421},{29,0,421},{29,0,421},{29,0,421},{15,103,0},{15,103,0},{15,103,0},{15,58,0},{0,63,29}, +{0,63,29},{39,203,842},{39,145,450},{43,113,655},{37,106,445},{24,194,1514},{22,126,438},{25,107,4},{21,89,639},{0,115,1710},{0,90,519},{53,174,421},{53,133,1},{56,110,38},{51,104,69},{119,0,1514},{15,131,421},{25,107,3},{0,90,494},{243,0,1514},{0,90,494},{39,160,421},{39,160,421},{39,160,421},{38,99,421},{24,152,338},{25,107,3},{25,107,3},{23,76,35},{0,97,389},{0,78,3},{53,132,0}, +{53,132,0},{53,132,0},{53,92,0},{98,0,338},{29,103,0},{29,103,0},{0,79,0},{201,0,338},{0,79,0},{138,0,421},{52,135,0},{65,108,0},{19,108,0},{138,0,421},{254,14,421},{19,108,0},{0,93,421},{254,14,421},{0,93,421},{38,0,421},{38,0,421},{38,0,421},{38,0,421},{24,112,0},{24,112,0},{24,112,0},{24,67,0},{0,78,2},{0,78,2},{47,211,842},{47,153,450},{51,121,655}, +{45,114,445},{32,202,1514},{30,134,438},{33,115,4},{29,97,639},{0,129,1617},{0,99,458},{61,182,421},{61,141,1},{64,118,38},{59,112,69},{131,0,1514},{23,139,421},{33,115,3},{0,99,458},{255,6,1514},{0,99,458},{47,168,421},{47,168,421},{47,168,421},{46,107,421},{32,160,338},{33,115,3},{33,115,3},{31,84,35},{0,109,349},{5,87,1},{61,140,0},{61,140,0},{61,140,0},{61,100,0},{110,0,338}, +{37,111,0},{37,111,0},{8,87,0},{225,0,338},{8,87,0},{150,0,421},{60,143,0},{73,116,0},{27,116,0},{150,0,421},{254,26,421},{27,116,0},{0,101,421},{254,26,421},{0,101,421},{46,0,421},{46,0,421},{46,0,421},{46,0,421},{32,120,0},{32,120,0},{32,120,0},{32,75,0},{5,88,0},{5,88,0},{55,219,842},{55,161,450},{59,129,655},{53,122,445},{40,210,1514},{38,142,438},{41,123,4}, +{36,105,646},{0,141,1553},{3,108,450},{69,190,421},{69,149,1},{72,126,38},{68,118,73},{143,0,1514},{32,146,421},{41,123,3},{0,108,434},{255,18,1514},{0,108,434},{55,176,421},{55,176,421},{55,176,421},{55,114,421},{40,168,338},{41,123,3},{41,123,3},{39,92,35},{0,122,338},{13,95,1},{69,148,0},{69,148,0},{69,148,0},{69,108,0},{122,0,338},{45,119,0},{45,119,0},{15,95,0},{249,0,338}, +{15,95,0},{162,0,421},{68,151,0},{81,124,0},{35,124,0},{162,0,421},{254,38,421},{35,124,0},{0,109,421},{254,38,421},{0,109,421},{54,0,421},{54,0,421},{54,0,421},{54,0,421},{40,128,0},{40,128,0},{40,128,0},{40,83,0},{13,96,0},{13,96,0},{63,227,842},{63,169,450},{68,134,654},{61,130,445},{48,218,1514},{46,151,434},{49,131,4},{44,113,646},{0,153,1521},{11,116,450},{77,198,421}, +{77,157,1},{80,134,38},{74,128,74},{155,0,1514},{40,154,421},{49,131,3},{0,117,422},{255,30,1514},{0,117,422},{63,184,421},{63,184,421},{63,184,421},{63,122,421},{48,175,338},{49,131,3},{49,131,3},{47,100,35},{8,130,338},{21,103,1},{77,156,0},{77,156,0},{77,156,0},{77,116,0},{134,0,338},{53,127,0},{53,127,0},{23,103,0},{255,9,338},{23,103,0},{174,0,421},{76,159,0},{89,132,0}, +{43,132,0},{174,0,421},{255,49,421},{43,132,0},{0,117,421},{255,49,421},{0,117,421},{62,0,421},{62,0,421},{62,0,421},{62,0,421},{48,136,0},{48,136,0},{48,136,0},{48,91,0},{21,104,0},{21,104,0},{72,236,842},{72,178,450},{76,145,646},{70,139,445},{57,227,1514},{55,160,434},{58,140,4},{53,122,646},{4,166,1514},{20,125,450},{86,207,421},{86,166,1},{89,143,38},{83,137,74},{168,0,1514}, +{49,163,421},{58,140,3},{4,126,421},{255,43,1514},{4,126,421},{72,193,421},{72,193,421},{72,193,421},{72,131,421},{57,184,338},{58,140,3},{58,140,3},{56,109,35},{18,138,338},{30,112,1},{86,164,0},{86,164,0},{86,164,0},{86,125,0},{147,0,338},{62,136,0},{62,136,0},{32,112,0},{254,23,338},{32,112,0},{187,0,421},{85,168,0},{98,141,0},{52,141,0},{187,0,421},{254,63,421},{52,141,0}, +{0,126,421},{254,63,421},{0,126,421},{71,0,421},{71,0,421},{71,0,421},{71,0,421},{57,145,0},{57,145,0},{57,145,0},{57,100,0},{30,113,0},{30,113,0},{80,243,842},{80,186,450},{84,153,646},{78,147,445},{65,235,1514},{63,168,434},{66,148,4},{61,130,646},{12,174,1514},{27,133,450},{94,215,421},{94,174,1},{97,151,38},{91,145,74},{180,0,1514},{57,171,421},{66,148,3},{11,134,421},{255,55,1514}, +{11,134,421},{80,201,421},{80,201,421},{80,201,421},{80,139,421},{65,192,338},{66,148,3},{66,148,3},{63,116,36},{26,146,338},{38,120,1},{94,172,0},{94,172,0},{94,172,0},{94,132,0},{159,0,338},{70,144,0},{70,144,0},{40,120,0},{254,35,338},{40,120,0},{199,0,421},{92,176,0},{106,149,0},{60,149,0},{199,0,421},{254,75,421},{60,149,0},{0,134,421},{254,75,421},{0,134,421},{79,0,421}, +{79,0,421},{79,0,421},{79,0,421},{65,153,0},{65,153,0},{65,153,0},{65,108,0},{38,121,0},{38,121,0},{88,251,842},{88,194,450},{92,161,646},{86,155,445},{73,243,1514},{71,176,434},{74,156,4},{69,138,646},{21,181,1514},{35,141,450},{102,223,421},{102,182,1},{105,159,38},{99,153,74},{192,0,1514},{65,179,421},{74,156,3},{19,142,421},{255,67,1514},{19,142,421},{88,209,421},{88,209,421},{88,209,421}, +{88,147,421},{73,200,338},{74,156,3},{74,156,3},{71,125,38},{34,154,338},{46,128,1},{102,180,0},{102,180,0},{102,180,0},{102,140,0},{171,0,338},{78,152,0},{78,152,0},{48,128,0},{255,46,338},{48,128,0},{211,0,421},{100,184,0},{114,157,0},{68,157,0},{211,0,421},{255,86,421},{68,157,0},{0,142,421},{255,86,421},{0,142,421},{87,0,421},{87,0,421},{87,0,421},{87,0,421},{73,161,0}, +{73,161,0},{73,161,0},{73,116,0},{46,129,0},{46,129,0},{97,254,850},{96,202,450},{100,169,646},{94,163,445},{81,251,1514},{79,184,434},{82,164,4},{77,146,646},{29,189,1514},{43,149,450},{110,231,421},{110,190,1},{113,167,38},{107,161,74},{204,0,1514},{73,187,421},{83,164,2},{27,150,421},{255,79,1514},{27,150,421},{96,217,421},{96,217,421},{96,217,421},{96,155,421},{81,208,338},{82,163,2},{82,163,2}, +{79,133,38},{42,162,338},{54,136,1},{110,188,0},{110,188,0},{110,188,0},{110,148,0},{183,0,338},{86,160,0},{86,160,0},{56,136,0},{255,58,338},{56,136,0},{223,0,421},{108,192,0},{122,165,0},{76,165,0},{223,0,421},{255,98,421},{76,165,0},{0,150,421},{255,98,421},{0,150,421},{95,0,421},{95,0,421},{95,0,421},{95,0,421},{81,169,0},{81,169,0},{81,169,0},{81,124,0},{54,137,0}, +{54,137,0},{107,255,878},{105,211,450},{109,178,646},{103,172,445},{91,254,1518},{88,193,434},{91,173,4},{86,155,646},{38,198,1514},{52,158,450},{119,240,421},{119,199,1},{122,176,38},{116,170,74},{217,0,1514},{82,196,421},{92,173,2},{36,159,421},{255,92,1514},{36,159,421},{105,226,421},{105,226,421},{105,226,421},{105,164,421},{90,217,338},{91,172,2},{91,172,2},{88,142,38},{51,171,338},{63,145,1},{119,197,0}, +{119,197,0},{119,197,0},{119,157,0},{196,0,338},{95,169,0},{95,169,0},{65,145,0},{254,72,338},{65,145,0},{236,0,421},{117,201,0},{130,174,0},{84,174,0},{236,0,421},{254,112,421},{84,174,0},{0,159,421},{254,112,421},{0,159,421},{104,0,421},{104,0,421},{104,0,421},{104,0,421},{90,177,0},{90,177,0},{90,177,0},{90,133,0},{63,146,0},{63,146,0},{116,255,926},{113,218,450},{117,186,646}, +{111,180,445},{101,255,1535},{96,201,434},{99,181,4},{94,163,646},{46,206,1514},{60,166,450},{127,247,421},{127,207,1},{130,184,38},{124,178,74},{229,0,1514},{90,204,421},{99,181,3},{44,167,421},{255,104,1514},{44,167,421},{113,234,421},{113,234,421},{113,234,421},{113,172,421},{98,225,338},{99,180,3},{99,180,3},{96,150,38},{59,179,338},{71,153,1},{127,205,0},{127,205,0},{127,205,0},{127,165,0},{208,0,338}, +{103,177,0},{103,177,0},{73,153,0},{254,84,338},{73,153,0},{248,0,421},{125,209,0},{138,182,0},{92,182,0},{248,0,421},{254,124,421},{92,182,0},{0,167,421},{254,124,421},{0,167,421},{112,0,421},{112,0,421},{112,0,421},{112,0,421},{98,185,0},{98,185,0},{98,185,0},{98,141,0},{71,154,0},{71,154,0},{125,255,994},{121,226,450},{125,194,646},{120,186,446},{113,255,1575},{104,209,434},{107,189,4}, +{102,171,646},{54,214,1514},{68,174,450},{135,255,421},{135,215,1},{139,192,36},{132,186,74},{241,0,1514},{98,212,421},{107,189,3},{52,175,421},{255,116,1514},{52,175,421},{121,241,421},{121,241,421},{121,241,421},{121,180,421},{106,233,338},{107,188,3},{107,188,3},{104,158,38},{67,187,338},{79,161,1},{135,213,0},{135,213,0},{135,213,0},{135,173,0},{220,0,338},{110,185,0},{110,185,0},{81,161,0},{255,95,338}, +{81,161,0},{255,10,421},{133,217,0},{146,190,0},{100,190,0},{255,10,421},{255,135,421},{100,190,0},{0,175,421},{255,135,421},{0,175,421},{120,0,421},{120,0,421},{120,0,421},{120,0,421},{106,193,0},{106,193,0},{106,193,0},{106,149,0},{79,162,0},{79,162,0},{135,255,1070},{129,234,450},{133,202,646},{128,195,446},{122,255,1626},{112,216,438},{115,197,4},{110,179,646},{62,222,1514},{76,182,450},{146,254,434}, +{143,223,1},{146,199,35},{140,194,74},{253,0,1514},{106,220,421},{115,197,3},{60,183,421},{254,128,1514},{60,183,421},{129,249,421},{129,249,421},{129,249,421},{129,188,421},{114,241,338},{115,196,3},{115,196,3},{112,166,38},{75,195,338},{87,169,1},{143,221,0},{143,221,0},{143,221,0},{143,181,0},{232,0,338},{118,193,0},{118,193,0},{89,169,0},{255,107,338},{89,169,0},{255,34,421},{141,225,0},{154,198,0}, +{108,198,0},{255,34,421},{255,147,421},{108,198,0},{0,183,421},{255,147,421},{0,183,421},{128,0,421},{128,0,421},{128,0,421},{128,0,421},{114,201,0},{114,201,0},{114,201,0},{114,157,0},{87,170,0},{87,170,0},{144,255,1190},{138,243,450},{142,211,646},{137,204,446},{132,255,1703},{121,225,438},{124,206,4},{121,187,654},{71,231,1514},{85,191,450},{156,255,461},{152,232,1},{155,208,35},{151,202,75},{255,22,1514}, +{114,229,421},{124,206,3},{69,192,421},{255,141,1514},{69,192,421},{138,255,422},{138,255,422},{138,255,422},{137,197,421},{123,250,338},{124,205,3},{124,205,3},{121,175,38},{84,204,338},{96,178,1},{152,230,0},{152,230,0},{152,230,0},{152,190,0},{245,0,338},{127,202,0},{127,202,0},{97,178,0},{254,121,338},{97,178,0},{255,61,421},{150,234,0},{163,207,0},{117,207,0},{255,61,421},{254,161,421},{117,207,0}, +{0,192,421},{254,161,421},{0,192,421},{137,0,421},{137,0,421},{137,0,421},{137,0,421},{123,210,0},{123,210,0},{123,210,0},{123,166,0},{95,179,0},{95,179,0},{153,255,1318},{146,251,450},{150,219,646},{145,212,446},{144,255,1791},{129,233,438},{132,214,4},{126,196,655},{78,240,1514},{93,199,450},{165,255,506},{160,240,1},{163,216,35},{158,210,78},{255,46,1514},{122,237,421},{132,214,3},{77,200,421},{255,153,1514}, +{77,200,421},{147,255,434},{147,255,434},{147,255,434},{145,205,421},{132,255,339},{132,213,3},{132,213,3},{129,183,38},{92,212,338},{104,186,1},{160,238,0},{160,238,0},{160,238,0},{160,198,0},{255,4,338},{135,210,0},{135,210,0},{105,186,0},{255,132,338},{105,186,0},{255,86,421},{158,242,0},{171,215,0},{125,215,0},{255,86,421},{255,172,421},{125,215,0},{0,200,421},{255,172,421},{0,200,421},{145,0,421}, +{145,0,421},{145,0,421},{145,0,421},{131,218,0},{131,218,0},{131,218,0},{131,174,0},{103,187,0},{103,187,0},{162,255,1466},{156,255,458},{158,226,639},{153,220,446},{153,255,1902},{137,240,438},{140,222,4},{134,204,655},{85,248,1514},{101,207,450},{175,255,554},{168,249,1},{171,224,35},{166,218,78},{255,70,1514},{130,245,421},{140,222,3},{85,208,421},{255,165,1514},{85,208,421},{156,255,458},{156,255,458},{156,255,458}, +{153,213,421},{141,255,350},{140,221,3},{140,221,3},{137,191,38},{99,220,338},{112,194,1},{168,245,0},{168,245,0},{168,245,0},{168,206,0},{255,28,338},{143,218,0},{143,218,0},{113,194,0},{255,144,338},{113,194,0},{255,110,421},{166,250,0},{179,223,0},{133,223,0},{255,110,421},{255,184,421},{133,223,0},{0,208,421},{255,184,421},{0,208,421},{153,0,421},{153,0,421},{153,0,421},{153,0,421},{139,226,0}, +{139,226,0},{139,226,0},{139,182,0},{111,195,0},{111,195,0},{172,255,1606},{165,255,519},{166,234,639},{161,228,446},{165,255,2030},{145,248,438},{148,230,4},{142,212,655},{95,255,1515},{109,215,450},{187,255,626},{177,255,3},{179,232,35},{174,226,78},{255,95,1514},{138,253,421},{148,230,3},{93,216,421},{254,177,1514},{93,216,421},{165,255,494},{165,255,494},{165,255,494},{161,221,421},{150,255,379},{148,229,3},{148,229,3}, +{145,199,38},{107,228,338},{120,202,1},{176,253,0},{176,253,0},{176,253,0},{176,214,0},{255,52,338},{151,226,0},{151,226,0},{121,202,0},{255,156,338},{121,202,0},{255,134,421},{177,255,2},{187,231,0},{141,231,0},{255,134,421},{255,196,421},{141,231,0},{0,216,421},{255,196,421},{0,216,421},{161,0,421},{161,0,421},{161,0,421},{161,0,421},{147,234,0},{147,234,0},{147,234,0},{147,190,0},{119,203,0}, +{119,203,0},{184,255,1818},{175,255,663},{175,243,639},{170,237,446},{175,255,2175},{155,255,443},{157,239,4},{151,221,655},{113,255,1557},{118,224,450},{196,255,722},{189,255,52},{188,241,35},{183,235,78},{255,122,1514},{155,255,442},{157,239,3},{101,225,421},{255,190,1514},{101,225,421},{175,255,542},{175,255,542},{175,255,542},{170,230,421},{162,255,424},{157,238,3},{157,238,3},{155,207,42},{116,237,338},{129,211,1},{186,254,5}, +{186,254,5},{186,254,5},{185,223,0},{255,79,338},{160,235,0},{160,235,0},{130,211,0},{253,170,338},{130,211,0},{255,161,421},{192,255,29},{196,240,0},{150,240,0},{255,161,421},{254,210,421},{150,240,0},{0,225,421},{254,210,421},{0,225,421},{170,0,421},{170,0,421},{170,0,421},{170,0,421},{156,243,0},{156,243,0},{156,243,0},{156,199,0},{128,212,0},{128,212,0},{193,255,2022},{184,255,858},{183,251,639}, +{178,245,446},{187,255,2343},{167,255,523},{165,247,4},{159,229,655},{131,255,1653},{126,232,450},{208,255,826},{198,255,150},{196,249,35},{191,243,78},{255,146,1514},{167,255,514},{165,247,3},{109,233,421},{255,202,1514},{109,233,421},{181,255,602},{181,255,602},{181,255,602},{178,238,421},{172,255,474},{165,246,3},{165,246,3},{163,215,42},{124,245,338},{137,219,1},{196,255,18},{196,255,18},{196,255,18},{193,231,0},{255,104,338}, +{168,243,0},{168,243,0},{138,219,0},{255,181,338},{138,219,0},{255,186,421},{207,255,80},{204,248,0},{158,248,0},{255,186,421},{255,221,421},{158,248,0},{0,233,421},{255,221,421},{0,233,421},{178,0,421},{178,0,421},{178,0,421},{178,0,421},{164,251,0},{164,251,0},{164,251,0},{164,207,0},{136,220,0},{136,220,0},{202,255,2175},{195,255,1070},{190,255,670},{186,253,441},{196,255,2443},{177,255,663},{172,255,2}, +{167,237,618},{146,255,1735},{135,240,423},{215,255,876},{207,255,277},{205,255,41},{199,251,69},{255,171,1459},{186,255,584},{174,254,2},{117,241,392},{254,214,1459},{117,241,392},{190,255,670},{190,255,670},{190,255,670},{186,246,421},{181,255,547},{173,253,2},{173,253,2},{171,223,42},{132,253,338},{145,227,1},{205,255,41},{205,255,41},{205,255,41},{201,239,0},{255,128,338},{176,251,0},{176,251,0},{146,227,0},{255,193,338}, +{146,227,0},{255,210,392},{222,255,136},{213,255,0},{167,255,0},{255,210,392},{255,233,392},{167,255,0},{0,241,392},{255,233,392},{0,241,392},{186,0,421},{186,0,421},{186,0,421},{186,0,421},{172,255,1},{172,255,1},{172,255,1},{172,214,0},{144,228,0},{144,228,0},{208,255,1867},{202,255,1047},{199,255,750},{194,255,421},{205,255,2052},{186,255,524},{183,255,22},{177,241,362},{161,255,1400},{145,244,215},{224,255,625}, +{216,255,229},{215,255,72},{207,253,17},{255,186,1064},{198,255,392},{186,255,4},{133,245,200},{255,221,1064},{133,245,200},{199,255,750},{199,255,750},{199,255,750},{194,254,421},{193,255,635},{183,255,22},{183,255,22},{179,231,42},{146,255,350},{153,235,1},{215,255,72},{215,255,72},{215,255,72},{209,247,0},{255,152,338},{186,255,4},{186,255,4},{154,235,0},{255,205,338},{154,235,0},{255,222,200},{231,255,65},{224,255,0}, +{192,255,0},{255,222,200},{255,239,200},{192,255,0},{0,245,200},{255,239,200},{0,245,200},{194,0,421},{194,0,421},{194,0,421},{194,0,421},{181,255,10},{181,255,10},{181,255,10},{180,222,0},{152,236,0},{152,236,0},{215,255,1586},{211,255,1053},{211,255,857},{204,255,446},{215,255,1698},{198,255,455},{195,255,109},{186,247,163},{177,255,1161},{158,249,63},{230,255,425},{226,255,209},{224,255,117},{218,255,1},{255,204,722}, +{210,255,254},{204,255,34},{152,249,61},{255,230,722},{152,249,61},{211,255,857},{211,255,857},{211,255,857},{204,255,446},{202,255,749},{195,255,109},{195,255,109},{188,240,42},{164,255,413},{162,244,1},{224,255,117},{224,255,117},{224,255,117},{218,254,1},{255,180,338},{204,255,34},{204,255,34},{163,244,0},{255,218,338},{163,244,0},{255,234,61},{240,255,17},{238,255,0},{219,255,0},{255,234,61},{255,245,61},{219,255,0}, +{0,249,61},{255,245,61},{0,249,61},{203,0,421},{203,0,421},{203,0,421},{203,0,421},{193,255,32},{193,255,32},{193,255,32},{189,231,0},{161,245,0},{161,245,0},{224,255,1422},{221,255,1083},{218,255,946},{213,255,525},{221,255,1470},{207,255,478},{204,255,243},{196,251,67},{189,255,1025},{169,253,5},{239,255,318},{235,255,217},{233,255,170},{228,255,29},{255,219,509},{222,255,198},{219,255,89},{169,253,5},{254,238,509}, +{169,253,5},{218,255,946},{218,255,946},{218,255,946},{213,255,525},{215,255,862},{204,255,243},{204,255,243},{196,248,42},{180,255,530},{169,252,1},{233,255,170},{233,255,170},{233,255,170},{228,255,29},{255,204,338},{219,255,89},{219,255,89},{171,252,0},{255,230,338},{171,252,0},{255,246,5},{251,254,1},{250,255,0},{243,255,0},{255,246,5},{255,251,5},{243,255,0},{0,253,5},{255,251,5},{0,253,5},{211,0,421}, +{211,0,421},{211,0,421},{211,0,421},{202,255,61},{202,255,61},{202,255,61},{197,239,0},{169,253,0},{169,253,0},{230,255,1153},{227,255,938},{227,255,857},{222,255,533},{227,255,1141},{216,255,434},{213,255,282},{204,253,18},{201,255,790},{183,255,8},{242,255,192},{242,255,144},{239,255,125},{237,255,34},{255,231,294},{234,255,121},{228,255,73},{189,255,0},{254,244,294},{189,255,0},{227,255,857},{227,255,857},{227,255,857}, +{222,255,533},{221,255,741},{213,255,282},{213,255,282},{204,253,14},{192,255,465},{183,255,8},{239,255,125},{239,255,125},{239,255,125},{237,255,34},{255,219,221},{228,255,73},{228,255,73},{189,255,0},{254,238,221},{189,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{219,0,421},{219,0,421},{219,0,421},{219,0,421},{211,255,100}, +{211,255,100},{211,255,100},{205,247,0},{183,255,8},{183,255,8},{236,255,853},{233,255,726},{233,255,677},{228,255,485},{233,255,793},{225,255,355},{222,255,262},{213,255,0},{213,255,534},{195,255,40},{248,255,68},{245,255,54},{245,255,45},{243,255,10},{255,240,113},{240,255,41},{240,255,25},{213,255,0},{255,248,113},{213,255,0},{233,255,677},{233,255,677},{233,255,677},{228,255,485},{227,255,545},{222,255,262},{222,255,262}, +{213,255,0},{204,255,329},{195,255,40},{245,255,45},{245,255,45},{245,255,45},{243,255,10},{255,231,85},{240,255,25},{240,255,25},{213,255,0},{254,244,85},{213,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{227,0,421},{227,0,421},{227,0,421},{227,0,421},{221,255,145},{221,255,145},{221,255,145},{213,255,0},{195,255,40}, +{195,255,40},{0,119,882},{0,84,97},{0,60,1},{0,50,325},{0,79,1896},{0,51,1188},{0,47,563},{0,31,1410},{0,36,2029},{0,31,1510},{0,119,882},{0,84,97},{0,60,1},{0,50,325},{39,0,1896},{0,51,1188},{0,47,563},{0,31,1410},{79,0,1896},{0,31,1410},{0,55,0},{0,55,0},{0,55,0},{0,27,0},{0,28,162},{0,21,52},{0,21,52},{0,13,89},{0,12,173},{0,13,105},{0,55,0}, +{0,55,0},{0,55,0},{0,27,0},{14,0,162},{0,21,52},{0,21,52},{0,13,89},{28,0,162},{0,13,89},{58,0,882},{0,84,97},{0,60,1},{0,50,325},{58,0,882},{119,0,882},{0,50,325},{0,39,882},{119,0,882},{0,39,882},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,143,882},{0,97,34},{1,68,11}, +{0,59,250},{0,95,2355},{0,63,1332},{0,54,585},{0,40,1656},{0,45,2556},{0,37,1802},{0,143,882},{0,97,34},{2,69,9},{0,59,250},{46,0,2355},{0,63,1332},{0,54,585},{0,40,1656},{95,0,2355},{0,40,1656},{0,79,0},{0,79,0},{0,79,0},{0,39,0},{0,40,338},{0,33,116},{0,33,116},{0,19,193},{0,18,365},{0,16,225},{0,79,0},{0,79,0},{0,79,0},{0,39,0},{20,0,338}, +{0,33,116},{0,33,116},{0,19,193},{40,0,338},{0,19,193},{70,0,882},{0,97,34},{6,68,0},{0,59,250},{70,0,882},{143,0,882},{0,59,250},{0,47,882},{143,0,882},{0,47,882},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,167,882},{0,112,5},{2,78,61},{0,68,185},{0,113,2899},{0,75,1508},{0,63,633}, +{0,44,1965},{0,51,3176},{0,44,2161},{0,167,882},{0,112,5},{4,77,53},{0,68,185},{55,0,2899},{0,75,1508},{0,63,633},{0,44,1965},{113,0,2899},{0,44,1965},{0,104,0},{0,104,0},{0,104,0},{0,51,0},{0,52,578},{0,42,205},{0,42,205},{0,25,337},{0,24,629},{0,22,389},{0,104,0},{0,104,0},{0,104,0},{0,51,0},{26,0,578},{0,42,205},{0,42,205},{0,25,337},{52,0,578}, +{0,25,337},{82,0,882},{0,112,5},{14,76,0},{0,68,185},{82,0,882},{167,0,882},{0,68,185},{0,55,882},{167,0,882},{0,55,882},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{3,186,900},{3,124,18},{6,88,133},{1,78,162},{0,134,3048},{0,88,1398},{0,72,469},{0,53,1923},{0,60,3457},{0,50,2205},{6,179,882}, +{6,121,2},{10,86,69},{3,77,158},{66,0,3048},{0,88,1398},{0,72,469},{0,53,1923},{134,0,3048},{0,53,1923},{3,122,18},{3,122,18},{3,122,18},{3,61,18},{0,73,648},{0,54,157},{0,54,157},{0,31,317},{0,33,756},{0,31,417},{6,116,0},{6,116,0},{6,116,0},{6,60,0},{36,0,648},{0,54,157},{0,54,157},{0,31,317},{73,0,648},{0,31,317},{94,0,882},{2,124,0},{22,84,0}, +{0,77,130},{94,0,882},{192,0,882},{0,77,130},{0,63,882},{192,0,882},{0,63,882},{3,0,18},{3,0,18},{3,0,18},{3,0,18},{0,19,0},{0,19,0},{0,19,0},{0,9,0},{0,9,4},{0,9,4},{8,203,995},{8,136,115},{11,96,290},{6,86,230},{0,161,3048},{0,100,1221},{0,84,250},{0,62,1758},{0,75,3651},{0,60,2156},{15,188,882},{15,130,2},{19,95,69},{12,86,158},{79,0,3048}, +{0,100,1221},{0,84,250},{0,62,1758},{161,0,3048},{0,62,1758},{8,139,113},{8,139,113},{8,139,113},{8,72,113},{0,101,648},{0,72,73},{0,72,73},{0,40,242},{0,45,862},{0,39,409},{15,125,0},{15,125,0},{15,125,0},{15,69,0},{49,0,648},{0,72,73},{0,72,73},{0,40,242},{101,0,648},{0,40,242},{107,0,882},{11,133,0},{31,93,0},{0,87,85},{107,0,882},{219,0,882},{0,87,85}, +{0,72,882},{219,0,882},{0,72,882},{7,0,113},{7,0,113},{7,0,113},{7,0,113},{0,46,0},{0,46,0},{0,46,0},{0,23,0},{0,18,34},{0,18,34},{12,219,1147},{12,146,275},{16,104,510},{11,95,365},{0,186,3048},{0,115,1096},{0,94,114},{0,71,1620},{0,88,3844},{0,69,2137},{23,196,882},{23,138,2},{27,103,69},{20,94,158},{91,0,3048},{0,115,1096},{0,94,114},{0,71,1620},{186,0,3048}, +{0,71,1620},{12,155,265},{12,155,265},{12,155,265},{12,82,265},{0,125,648},{0,84,25},{0,84,25},{0,53,180},{0,54,987},{0,48,427},{23,133,0},{23,133,0},{23,133,0},{23,77,0},{61,0,648},{0,84,25},{0,84,25},{0,53,180},{125,0,648},{0,53,180},{119,0,882},{20,140,0},{39,101,0},{0,96,50},{119,0,882},{243,0,882},{0,96,50},{0,80,882},{243,0,882},{0,80,882},{11,0,265}, +{11,0,265},{11,0,265},{11,0,265},{0,70,0},{0,70,0},{0,70,0},{0,35,0},{0,27,89},{0,27,89},{16,235,1363},{17,157,505},{20,114,802},{14,104,559},{0,210,3048},{0,127,1000},{0,105,34},{0,80,1494},{0,94,4056},{0,78,2148},{31,204,882},{31,145,2},{35,111,69},{27,102,165},{103,0,3048},{0,127,1000},{0,105,34},{0,80,1494},{210,0,3048},{0,80,1494},{16,171,481},{16,171,481},{16,171,481}, +{16,92,481},{0,149,648},{0,100,1},{0,100,1},{0,62,125},{0,66,1139},{0,57,473},{31,140,0},{31,140,0},{31,140,0},{31,85,0},{73,0,648},{0,100,1},{0,100,1},{0,62,125},{149,0,648},{0,62,125},{131,0,882},{28,148,0},{47,109,0},{0,105,25},{131,0,882},{255,6,882},{0,105,25},{0,88,882},{255,6,882},{0,88,882},{15,0,481},{15,0,481},{15,0,481},{15,0,481},{0,95,0}, +{0,95,0},{0,95,0},{0,46,0},{0,39,169},{0,39,169},{20,251,1643},{21,167,805},{25,122,1170},{19,112,822},{0,234,3048},{0,141,933},{1,115,10},{0,90,1395},{0,106,4312},{0,84,2188},{39,212,882},{39,153,2},{43,119,69},{35,110,165},{115,0,3048},{0,141,933},{1,115,9},{0,90,1395},{234,0,3048},{0,90,1395},{20,187,761},{20,187,761},{20,187,761},{19,102,761},{0,174,648},{1,111,5},{1,111,5}, +{0,71,80},{0,75,1322},{0,66,547},{39,148,0},{39,148,0},{39,148,0},{39,93,0},{85,0,648},{5,109,0},{5,109,0},{0,71,80},{174,0,648},{0,71,80},{143,0,882},{36,156,0},{55,117,0},{0,115,8},{143,0,882},{255,18,882},{0,115,8},{0,96,882},{255,18,882},{0,96,882},{19,0,761},{19,0,761},{19,0,761},{19,0,761},{0,119,0},{0,119,0},{0,119,0},{0,58,0},{0,48,274}, +{0,48,274},{29,254,1780},{29,178,935},{33,133,1327},{26,122,936},{6,249,3048},{3,153,909},{8,125,13},{0,99,1314},{0,121,4212},{0,97,1924},{48,221,882},{48,162,2},{52,128,69},{44,119,165},{128,0,3048},{0,156,891},{10,124,9},{0,99,1278},{255,3,3048},{0,99,1278},{27,200,882},{27,200,882},{27,200,882},{27,112,882},{6,189,648},{7,123,11},{7,123,11},{3,80,61},{0,91,1227},{0,75,409},{48,157,0}, +{48,157,0},{48,157,0},{48,102,0},{98,0,648},{14,118,0},{14,118,0},{0,80,41},{201,0,648},{0,80,41},{156,0,882},{45,165,0},{63,126,0},{0,126,0},{156,0,882},{254,32,882},{0,126,0},{0,105,882},{254,32,882},{0,105,882},{27,0,882},{27,0,882},{27,0,882},{27,0,882},{6,134,0},{6,134,0},{6,134,0},{6,69,0},{0,63,232},{0,63,232},{38,254,1814},{37,186,935},{41,141,1327}, +{34,130,936},{15,255,3049},{11,161,909},{16,133,13},{8,107,1314},{0,132,3964},{0,106,1605},{56,229,882},{56,170,2},{60,136,69},{52,127,165},{140,0,3048},{2,167,882},{18,132,9},{0,108,1188},{255,15,3048},{0,108,1188},{35,208,882},{35,208,882},{35,208,882},{35,120,882},{14,196,648},{15,131,11},{15,131,11},{11,88,61},{0,103,1059},{0,88,221},{56,165,0},{56,165,0},{56,165,0},{56,110,0},{110,0,648}, +{22,126,0},{22,126,0},{0,90,20},{225,0,648},{0,90,20},{168,0,882},{53,173,0},{71,134,0},{5,134,0},{168,0,882},{255,43,882},{5,134,0},{0,113,882},{255,43,882},{0,113,882},{35,0,882},{35,0,882},{35,0,882},{35,0,882},{14,142,0},{14,142,0},{14,142,0},{14,77,0},{0,75,136},{0,75,136},{46,255,1854},{45,194,935},{49,149,1327},{42,138,936},{24,255,3064},{19,169,909},{22,141,14}, +{16,115,1314},{0,144,3748},{0,115,1348},{64,237,882},{64,178,2},{68,144,69},{60,135,165},{152,0,3048},{10,175,882},{26,140,9},{0,117,1110},{255,27,3048},{0,117,1110},{43,216,882},{43,216,882},{43,216,882},{43,128,882},{22,204,648},{23,139,11},{23,139,11},{21,95,62},{0,118,922},{0,97,91},{64,173,0},{64,173,0},{64,173,0},{64,117,0},{122,0,648},{30,134,0},{30,134,0},{0,99,5},{249,0,648}, +{0,99,5},{180,0,882},{61,181,0},{79,142,0},{13,142,0},{180,0,882},{255,55,882},{13,142,0},{0,121,882},{255,55,882},{0,121,882},{43,0,882},{43,0,882},{43,0,882},{43,0,882},{22,150,0},{22,150,0},{22,150,0},{22,85,0},{0,91,58},{0,91,58},{55,255,1924},{53,202,935},{57,157,1327},{50,146,936},{36,255,3096},{27,177,909},{30,149,14},{24,123,1314},{0,159,3559},{0,124,1153},{72,245,882}, +{72,186,2},{77,152,65},{68,143,165},{164,0,3048},{19,182,882},{33,148,9},{0,126,1044},{255,39,3048},{0,126,1044},{51,224,882},{51,224,882},{51,224,882},{51,136,882},{30,212,648},{32,147,9},{32,147,9},{28,103,65},{0,129,810},{0,106,21},{72,181,0},{72,181,0},{72,181,0},{72,125,0},{134,0,648},{38,142,0},{38,142,0},{0,108,0},{255,9,648},{0,108,0},{192,0,882},{69,189,0},{87,150,0}, +{21,150,0},{192,0,882},{255,67,882},{21,150,0},{0,129,882},{255,67,882},{0,129,882},{51,0,882},{51,0,882},{51,0,882},{51,0,882},{30,158,0},{30,158,0},{30,158,0},{30,93,0},{0,106,17},{0,106,17},{67,255,2024},{62,211,935},{66,166,1327},{59,155,936},{46,255,3145},{36,186,909},{40,158,10},{33,132,1314},{0,172,3364},{0,133,1012},{81,254,882},{81,195,2},{86,161,65},{77,152,165},{177,0,3048}, +{28,191,882},{42,157,9},{0,136,990},{255,52,3048},{0,136,990},{60,233,882},{60,233,882},{60,233,882},{60,145,882},{39,221,648},{41,156,9},{41,156,9},{37,112,65},{0,144,720},{2,118,3},{81,190,0},{81,190,0},{81,190,0},{81,134,0},{147,0,648},{47,151,0},{47,151,0},{7,117,0},{254,23,648},{7,117,0},{205,0,882},{78,198,0},{96,159,0},{30,159,0},{205,0,882},{254,81,882},{30,159,0}, +{0,138,882},{254,81,882},{0,138,882},{60,0,882},{60,0,882},{60,0,882},{60,0,882},{39,167,0},{39,167,0},{39,167,0},{39,102,0},{0,121,0},{0,121,0},{76,255,2134},{70,218,935},{74,174,1327},{66,163,942},{58,255,3217},{45,193,904},{48,166,10},{41,140,1314},{0,184,3244},{0,145,948},{91,254,888},{89,203,2},{94,169,65},{85,160,165},{189,0,3048},{36,199,882},{50,165,9},{0,145,948},{255,64,3048}, +{0,145,948},{68,241,882},{68,241,882},{68,241,882},{68,153,882},{47,229,648},{49,164,9},{49,164,9},{45,120,65},{0,156,672},{10,126,3},{89,198,0},{89,198,0},{89,198,0},{89,142,0},{159,0,648},{56,158,0},{56,158,0},{15,125,0},{254,35,648},{15,125,0},{217,0,882},{86,206,0},{104,167,0},{38,167,0},{217,0,882},{255,92,882},{38,167,0},{0,146,882},{255,92,882},{0,146,882},{68,0,882}, +{68,0,882},{68,0,882},{68,0,882},{47,174,0},{47,174,0},{47,174,0},{47,109,0},{7,129,0},{7,129,0},{86,255,2252},{78,226,935},{82,182,1327},{74,171,942},{67,255,3300},{53,201,904},{56,174,10},{49,148,1314},{0,199,3151},{3,152,935},{101,255,906},{97,211,2},{102,177,65},{93,168,165},{201,0,3048},{44,207,882},{58,173,9},{0,152,915},{255,76,3048},{0,152,915},{76,249,882},{76,249,882},{76,249,882}, +{76,161,882},{55,237,648},{57,172,9},{57,172,9},{53,128,65},{0,171,649},{18,134,3},{97,206,0},{97,206,0},{97,206,0},{97,150,0},{171,0,648},{64,166,0},{64,166,0},{23,133,0},{255,46,648},{23,133,0},{229,0,882},{94,214,0},{112,175,0},{46,175,0},{229,0,882},{255,104,882},{46,175,0},{0,154,882},{255,104,882},{0,154,882},{76,0,882},{76,0,882},{76,0,882},{76,0,882},{55,182,0}, +{55,182,0},{55,182,0},{55,117,0},{16,136,0},{16,136,0},{95,255,2398},{86,234,935},{90,190,1327},{82,179,942},{76,255,3409},{61,209,904},{64,182,10},{56,155,1318},{0,211,3087},{11,160,935},{110,255,939},{105,219,2},{110,185,65},{101,176,165},{213,0,3048},{52,215,882},{66,181,9},{0,161,893},{254,88,3048},{0,161,893},{84,254,883},{84,254,883},{84,254,883},{84,169,882},{63,245,648},{65,180,9},{65,180,9}, +{61,136,65},{7,179,648},{27,141,2},{105,214,0},{105,214,0},{105,214,0},{105,158,0},{183,0,648},{72,174,0},{72,174,0},{31,141,0},{255,58,648},{31,141,0},{241,0,882},{101,222,0},{120,183,0},{54,183,0},{241,0,882},{255,116,882},{54,183,0},{0,162,882},{255,116,882},{0,162,882},{84,0,882},{84,0,882},{84,0,882},{84,0,882},{63,190,0},{63,190,0},{63,190,0},{63,125,0},{24,144,0}, +{24,144,0},{104,255,2584},{95,243,935},{100,199,1318},{91,188,942},{89,255,3529},{69,219,909},{73,191,10},{65,165,1327},{0,224,3052},{20,169,935},{122,255,996},{114,228,2},{119,194,65},{111,183,171},{226,0,3048},{61,224,882},{75,190,9},{0,171,883},{255,101,3048},{0,171,883},{94,254,893},{94,254,893},{94,254,893},{93,177,882},{72,254,648},{74,189,9},{74,189,9},{70,145,65},{16,188,648},{36,150,2},{114,222,0}, +{114,222,0},{114,222,0},{114,167,0},{196,0,648},{81,183,0},{81,183,0},{39,150,0},{254,72,648},{39,150,0},{254,0,882},{110,231,0},{129,192,0},{63,192,0},{254,0,882},{255,129,882},{63,192,0},{0,171,882},{255,129,882},{0,171,882},{93,0,882},{93,0,882},{93,0,882},{93,0,882},{72,199,0},{72,199,0},{72,199,0},{72,134,0},{33,153,0},{33,153,0},{113,255,2774},{103,251,935},{107,206,1314}, +{99,196,942},{98,255,3672},{77,227,909},{81,199,10},{73,173,1327},{5,235,3048},{28,177,935},{132,255,1054},{121,236,3},{127,202,65},{119,192,173},{238,0,3048},{69,232,882},{83,198,9},{4,179,882},{255,113,3048},{4,179,882},{103,254,915},{103,254,915},{103,254,915},{101,185,882},{82,255,654},{82,197,9},{82,197,9},{78,153,65},{24,196,648},{44,158,2},{122,230,0},{122,230,0},{122,230,0},{122,175,0},{208,0,648}, +{89,191,0},{89,191,0},{47,158,0},{254,84,648},{47,158,0},{255,22,882},{118,239,0},{137,200,0},{71,200,0},{255,22,882},{255,141,882},{71,200,0},{0,179,882},{255,141,882},{0,179,882},{101,0,882},{101,0,882},{101,0,882},{101,0,882},{80,207,0},{80,207,0},{80,207,0},{80,142,0},{41,161,0},{41,161,0},{122,255,2984},{110,255,948},{115,214,1314},{107,204,942},{110,255,3832},{85,235,909},{89,207,10}, +{81,181,1327},{13,243,3048},{36,185,935},{141,255,1131},{129,244,3},{135,210,65},{127,200,173},{250,0,3048},{77,240,882},{91,206,9},{12,187,882},{255,125,3048},{12,187,882},{110,255,948},{110,255,948},{110,255,948},{109,193,882},{92,255,672},{90,205,9},{90,205,9},{86,161,65},{32,204,648},{52,166,2},{130,238,0},{130,238,0},{130,238,0},{130,183,0},{220,0,648},{97,199,0},{97,199,0},{55,166,0},{255,95,648}, +{55,166,0},{255,46,882},{126,247,0},{145,208,0},{79,208,0},{255,46,882},{255,153,882},{79,208,0},{0,187,882},{255,153,882},{0,187,882},{109,0,882},{109,0,882},{109,0,882},{109,0,882},{88,215,0},{88,215,0},{88,215,0},{88,150,0},{49,169,0},{49,169,0},{132,255,3182},{122,255,1012},{123,222,1314},{115,212,942},{119,255,4009},{93,243,909},{97,215,10},{89,189,1327},{21,251,3048},{44,193,935},{150,255,1226}, +{137,252,3},{143,218,65},{135,208,173},{255,13,3048},{83,249,882},{99,214,9},{20,195,882},{254,137,3048},{20,195,882},{119,255,990},{119,255,990},{119,255,990},{117,201,882},{101,255,705},{98,213,9},{98,213,9},{94,169,65},{40,212,648},{60,174,2},{138,246,0},{138,246,0},{138,246,0},{138,191,0},{232,0,648},{104,208,0},{104,208,0},{63,174,0},{255,107,648},{63,174,0},{255,70,882},{134,255,0},{153,216,0}, +{86,216,0},{255,70,882},{255,165,882},{86,216,0},{0,195,882},{255,165,882},{0,195,882},{117,0,882},{117,0,882},{117,0,882},{117,0,882},{96,223,0},{96,223,0},{96,223,0},{96,158,0},{57,177,0},{57,177,0},{141,255,3464},{131,255,1153},{132,231,1314},{124,221,942},{132,255,4209},{102,252,909},{105,224,14},{98,198,1327},{37,255,3060},{53,202,935},{162,255,1349},{149,255,21},{152,227,65},{144,217,173},{255,40,3048}, +{95,255,885},{108,223,9},{29,204,882},{255,150,3048},{29,204,882},{129,255,1044},{129,255,1044},{129,255,1044},{126,210,882},{113,255,762},{107,222,9},{107,222,9},{103,178,65},{49,221,648},{69,183,2},{147,255,0},{147,255,0},{147,255,0},{147,200,0},{245,0,648},{112,217,0},{112,217,0},{72,183,0},{254,121,648},{72,183,0},{255,98,882},{149,255,17},{162,225,0},{95,225,0},{255,98,882},{255,178,882},{95,225,0}, +{0,204,882},{255,178,882},{0,204,882},{126,0,882},{126,0,882},{126,0,882},{126,0,882},{105,232,0},{105,232,0},{105,232,0},{105,167,0},{66,186,0},{66,186,0},{150,255,3734},{140,255,1348},{140,239,1314},{132,229,942},{141,255,4420},{113,255,925},{113,232,14},{106,206,1327},{52,255,3132},{61,210,935},{172,255,1459},{158,255,91},{160,234,62},{152,225,173},{255,64,3048},{110,255,923},{116,232,11},{37,212,882},{255,162,3048}, +{37,212,882},{138,255,1110},{138,255,1110},{138,255,1110},{134,218,882},{122,255,827},{115,229,9},{115,229,9},{111,187,69},{57,229,648},{77,191,2},{156,255,5},{156,255,5},{156,255,5},{155,208,0},{255,4,648},{120,225,0},{120,225,0},{80,191,0},{255,132,648},{80,191,0},{255,122,882},{164,255,58},{170,233,0},{103,233,0},{255,122,882},{255,190,882},{103,233,0},{0,212,882},{255,190,882},{0,212,882},{134,0,882}, +{134,0,882},{134,0,882},{134,0,882},{113,240,0},{113,240,0},{113,240,0},{113,175,0},{74,194,0},{74,194,0},{162,255,4022},{149,255,1605},{148,247,1314},{140,237,942},{150,255,4657},{122,255,1020},{122,239,13},{114,214,1327},{70,255,3256},{69,218,935},{181,255,1598},{167,255,221},{167,244,61},{160,233,173},{255,89,3048},{128,255,1003},{123,240,11},{45,220,882},{254,174,3048},{45,220,882},{147,255,1188},{147,255,1188},{147,255,1188}, +{142,226,882},{132,255,897},{123,237,9},{123,237,9},{119,195,69},{65,237,648},{85,199,2},{165,255,20},{165,255,20},{165,255,20},{163,216,0},{255,28,648},{128,233,0},{128,233,0},{88,199,0},{255,144,648},{88,199,0},{255,146,882},{180,255,136},{178,241,0},{111,241,0},{255,146,882},{255,202,882},{111,241,0},{0,220,882},{255,202,882},{0,220,882},{142,0,882},{142,0,882},{142,0,882},{142,0,882},{121,248,0}, +{121,248,0},{121,248,0},{121,183,0},{82,202,0},{82,202,0},{172,255,4300},{158,255,1924},{156,255,1314},{150,244,943},{162,255,4905},{134,255,1204},{130,247,13},{122,222,1327},{82,255,3448},{77,226,935},{190,255,1755},{180,255,409},{175,252,61},{168,241,173},{255,113,3048},{143,255,1125},{131,248,11},{53,228,882},{254,186,3048},{53,228,882},{156,255,1278},{156,255,1278},{156,255,1278},{150,234,882},{141,255,992},{131,245,9},{131,245,9}, +{127,203,69},{73,245,648},{93,207,2},{175,255,41},{175,255,41},{175,255,41},{171,224,0},{255,52,648},{137,240,0},{137,240,0},{96,207,0},{255,156,648},{96,207,0},{255,171,882},{192,255,232},{186,249,0},{119,249,0},{255,171,882},{254,214,882},{119,249,0},{0,228,882},{254,214,882},{0,228,882},{150,0,882},{150,0,882},{150,0,882},{150,0,882},{129,255,0},{129,255,0},{129,255,0},{129,191,0},{90,210,0}, +{90,210,0},{178,255,4349},{171,255,2188},{165,255,1395},{159,251,923},{172,255,4837},{146,255,1309},{140,254,10},{132,229,1170},{104,255,3433},{88,234,805},{202,255,1725},{189,255,547},{184,255,80},{176,249,133},{255,137,2814},{158,255,1125},{143,254,5},{66,235,761},{254,198,2814},{66,235,761},{165,255,1395},{165,255,1395},{165,255,1395},{159,243,882},{153,255,1115},{140,254,9},{140,254,9},{136,212,69},{81,254,648},{102,216,2},{184,255,80}, +{184,255,80},{184,255,80},{180,233,0},{255,79,648},{146,249,0},{146,249,0},{105,216,0},{253,170,648},{105,216,0},{255,192,761},{207,255,274},{196,255,0},{134,255,0},{255,192,761},{255,224,761},{134,255,0},{0,235,761},{255,224,761},{0,235,761},{159,0,882},{159,0,882},{159,0,882},{159,0,882},{140,254,8},{140,254,8},{140,254,8},{138,200,0},{99,219,0},{99,219,0},{187,255,3903},{177,255,2148},{175,255,1494}, +{167,253,887},{181,255,4274},{155,255,1106},{149,255,34},{140,234,802},{119,255,2958},{98,238,505},{208,255,1361},{198,255,473},{193,255,125},{187,251,53},{255,152,2249},{167,255,857},{155,255,1},{82,239,481},{255,205,2249},{82,239,481},{175,255,1494},{175,255,1494},{175,255,1494},{167,251,882},{162,255,1242},{149,255,34},{149,255,34},{144,220,69},{95,255,670},{110,224,2},{193,255,125},{193,255,125},{193,255,125},{188,241,0},{255,104,648}, +{155,255,1},{155,255,1},{113,224,0},{255,181,648},{113,224,0},{255,204,481},{216,255,169},{208,255,0},{158,255,0},{255,204,481},{255,230,481},{158,255,0},{0,239,481},{255,230,481},{0,239,481},{167,0,882},{167,0,882},{167,0,882},{167,0,882},{149,254,25},{149,254,25},{149,254,25},{146,208,0},{106,227,0},{106,227,0},{193,255,3535},{186,255,2137},{184,255,1620},{175,255,891},{187,255,3794},{167,255,978},{159,255,114}, +{150,238,510},{131,255,2574},{109,243,275},{215,255,1046},{207,255,427},{202,255,180},{195,253,9},{255,171,1769},{183,255,650},{167,255,25},{98,243,265},{254,214,1769},{98,243,265},{184,255,1620},{184,255,1620},{184,255,1620},{175,255,891},{172,255,1364},{159,255,114},{159,255,114},{152,228,69},{113,255,734},{117,232,2},{202,255,180},{202,255,180},{202,255,180},{196,249,0},{255,128,648},{167,255,25},{167,255,25},{120,232,0},{255,193,648}, +{120,232,0},{255,216,265},{228,255,89},{220,255,0},{183,255,0},{255,216,265},{255,236,265},{183,255,0},{0,243,265},{255,236,265},{0,243,265},{175,0,882},{175,0,882},{175,0,882},{175,0,882},{159,255,50},{159,255,50},{159,255,50},{154,216,0},{114,235,0},{114,235,0},{202,255,3229},{195,255,2156},{193,255,1758},{183,255,946},{196,255,3397},{174,255,950},{171,255,250},{158,243,290},{146,255,2281},{119,247,115},{221,255,822}, +{216,255,409},{215,255,242},{204,255,1},{255,186,1374},{195,255,498},{183,255,73},{114,247,113},{255,221,1374},{114,247,113},{193,255,1758},{193,255,1758},{193,255,1758},{183,255,946},{181,255,1521},{171,255,250},{171,255,250},{160,236,69},{128,255,840},{125,240,2},{215,255,242},{215,255,242},{215,255,242},{204,255,1},{255,152,648},{183,255,73},{183,255,73},{128,240,0},{255,205,648},{128,240,0},{255,228,113},{237,255,34},{232,255,0}, +{207,255,0},{255,228,113},{255,242,113},{207,255,0},{0,247,113},{255,242,113},{0,247,113},{183,0,882},{183,0,882},{183,0,882},{183,0,882},{168,255,85},{168,255,85},{168,255,85},{162,224,0},{122,243,0},{122,243,0},{211,255,2974},{205,255,2205},{202,255,1923},{195,255,1069},{205,255,3055},{186,255,981},{180,255,469},{167,249,133},{161,255,2061},{131,252,18},{230,255,645},{224,255,417},{224,255,317},{216,255,45},{255,204,1032}, +{210,255,404},{201,255,157},{131,252,18},{255,230,1032},{131,252,18},{202,255,1923},{202,255,1923},{202,255,1923},{195,255,1069},{193,255,1710},{180,255,469},{180,255,469},{169,245,69},{146,255,1011},{134,249,2},{224,255,317},{224,255,317},{224,255,317},{216,255,45},{255,180,648},{201,255,157},{201,255,157},{137,249,0},{255,218,648},{137,249,0},{255,243,18},{246,255,4},{245,255,0},{234,255,0},{255,243,18},{254,250,18},{234,255,0}, +{0,252,18},{254,250,18},{0,252,18},{192,0,882},{192,0,882},{192,0,882},{192,0,882},{178,255,130},{178,255,130},{178,255,130},{171,233,0},{131,252,0},{131,252,0},{218,255,2682},{211,255,2161},{211,255,1965},{204,255,1170},{215,255,2712},{195,255,1014},{192,255,633},{177,253,61},{167,255,1893},{143,255,5},{236,255,513},{233,255,389},{230,255,337},{225,255,97},{255,219,771},{219,255,342},{213,255,205},{149,255,0},{254,238,771}, +{149,255,0},{211,255,1965},{211,255,1965},{211,255,1965},{204,255,1170},{202,255,1755},{192,255,633},{192,255,633},{178,251,53},{161,255,1085},{143,255,5},{230,255,337},{230,255,337},{230,255,337},{225,255,97},{255,201,578},{213,255,205},{213,255,205},{149,255,0},{254,229,578},{149,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{200,0,882}, +{200,0,882},{200,0,882},{200,0,882},{187,255,185},{187,255,185},{187,255,185},{179,241,0},{143,255,5},{143,255,5},{221,255,2188},{218,255,1802},{215,255,1656},{210,255,1086},{218,255,2117},{204,255,865},{201,255,585},{187,254,11},{183,255,1467},{158,255,34},{242,255,297},{239,255,225},{236,255,193},{231,255,53},{255,225,451},{228,255,192},{222,255,116},{174,255,0},{254,241,451},{174,255,0},{215,255,1656},{215,255,1656},{215,255,1656}, +{210,255,1086},{211,255,1426},{201,255,585},{201,255,585},{186,253,9},{167,255,869},{158,255,34},{236,255,193},{236,255,193},{236,255,193},{231,255,53},{255,213,338},{222,255,116},{222,255,116},{174,255,0},{254,235,338},{174,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{208,0,882},{208,0,882},{208,0,882},{208,0,882},{196,255,250}, +{196,255,250},{196,255,250},{187,249,0},{158,255,34},{158,255,34},{227,255,1772},{224,255,1510},{224,255,1410},{219,255,1021},{224,255,1645},{210,255,761},{208,255,563},{195,255,1},{195,255,1123},{171,255,97},{245,255,136},{242,255,105},{242,255,89},{237,255,25},{255,234,216},{237,255,86},{234,255,52},{198,255,0},{255,245,216},{198,255,0},{224,255,1410},{224,255,1410},{224,255,1410},{219,255,1021},{215,255,1140},{208,255,563},{208,255,563}, +{195,255,1},{186,255,696},{171,255,97},{242,255,89},{242,255,89},{242,255,89},{237,255,25},{255,225,162},{234,255,52},{234,255,52},{198,255,0},{254,241,162},{198,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{216,0,882},{216,0,882},{216,0,882},{216,0,882},{205,255,325},{205,255,325},{205,255,325},{195,255,1},{171,255,97}, +{171,255,97},{0,158,1568},{0,112,169},{0,80,4},{0,68,585},{0,107,3371},{0,69,2124},{0,62,1013},{0,40,2532},{0,48,3617},{0,40,2701},{0,158,1568},{0,112,169},{0,80,4},{0,68,585},{52,0,3371},{0,69,2124},{0,62,1013},{0,40,2532},{107,0,3371},{0,40,2532},{0,73,0},{0,73,0},{0,73,0},{0,36,0},{0,37,288},{0,30,97},{0,30,97},{0,16,164},{0,15,312},{0,16,189},{0,73,0}, +{0,73,0},{0,73,0},{0,36,0},{18,0,288},{0,30,97},{0,30,97},{0,16,164},{37,0,288},{0,16,164},{78,0,1568},{0,112,169},{0,80,4},{0,68,585},{78,0,1568},{158,0,1568},{0,68,585},{0,52,1568},{158,0,1568},{0,52,1568},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,183,1568},{0,124,89},{0,89,10}, +{0,77,482},{0,122,3968},{0,81,2316},{0,69,1041},{0,50,2857},{0,57,4304},{0,47,3092},{0,183,1568},{0,124,89},{2,88,8},{0,77,482},{60,0,3968},{0,81,2316},{0,69,1041},{0,50,2857},{122,0,3968},{0,50,2857},{0,98,0},{0,98,0},{0,98,0},{0,48,0},{0,49,512},{0,39,180},{0,39,180},{0,22,296},{0,21,556},{0,22,345},{0,98,0},{0,98,0},{0,98,0},{0,48,0},{24,0,512}, +{0,39,180},{0,39,180},{0,22,296},{49,0,512},{0,22,296},{89,0,1568},{0,124,89},{5,88,0},{0,77,482},{89,0,1568},{183,0,1568},{0,77,482},{0,60,1568},{183,0,1568},{0,60,1568},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,207,1568},{0,138,34},{2,97,58},{0,83,394},{0,137,4651},{0,91,2507},{0,78,1093}, +{0,56,3225},{0,63,5084},{0,53,3532},{0,207,1568},{0,138,34},{3,98,49},{0,83,394},{67,0,4651},{0,91,2507},{0,78,1093},{0,56,3225},{137,0,4651},{0,56,3225},{0,122,0},{0,122,0},{0,122,0},{0,60,0},{0,61,800},{0,48,289},{0,48,289},{0,28,468},{0,27,872},{0,25,545},{0,122,0},{0,122,0},{0,122,0},{0,60,0},{30,0,800},{0,48,289},{0,48,289},{0,28,468},{61,0,800}, +{0,28,468},{101,0,1568},{0,138,34},{13,96,0},{0,83,394},{101,0,1568},{207,0,1568},{0,83,394},{0,68,1568},{207,0,1568},{0,68,1568},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,231,1568},{0,153,5},{3,107,148},{0,93,317},{0,155,5419},{0,100,2754},{0,88,1161},{0,62,3641},{0,69,5968},{0,59,4028},{0,231,1568}, +{0,153,5},{6,107,126},{0,93,317},{76,0,5419},{0,100,2754},{0,88,1161},{0,62,3641},{155,0,5419},{0,62,3641},{0,146,0},{0,146,0},{0,146,0},{0,72,0},{0,73,1152},{0,57,424},{0,57,424},{0,34,680},{0,33,1260},{0,31,789},{0,146,0},{0,146,0},{0,146,0},{0,72,0},{36,0,1152},{0,57,424},{0,57,424},{0,34,680},{73,0,1152},{0,34,680},{113,0,1568},{0,153,5},{21,104,0}, +{0,93,317},{113,0,1568},{231,0,1568},{0,93,317},{0,76,1568},{231,0,1568},{0,76,1568},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{5,248,1609},{4,166,41},{9,116,259},{2,104,303},{0,183,5419},{0,115,2480},{0,100,798},{0,71,3404},{0,84,6188},{0,68,3926},{9,240,1568},{9,162,5},{15,115,121},{5,104,293},{89,0,5419}, +{0,115,2480},{0,100,798},{0,71,3404},{183,0,5419},{0,71,3404},{5,163,41},{5,163,41},{5,163,41},{4,83,41},{0,101,1152},{0,75,274},{0,75,274},{0,44,569},{0,45,1366},{0,40,747},{9,155,0},{9,155,0},{9,155,0},{9,81,0},{49,0,1152},{0,75,274},{0,75,274},{0,44,569},{101,0,1152},{0,44,569},{127,0,1568},{4,166,0},{30,113,0},{0,105,233},{127,0,1568},{254,2,1568},{0,105,233}, +{0,85,1568},{254,2,1568},{0,85,1568},{4,0,41},{4,0,41},{4,0,41},{4,0,41},{0,28,0},{0,28,0},{0,28,0},{0,14,0},{0,12,10},{0,12,10},{9,255,1731},{9,176,146},{13,126,435},{7,112,374},{0,207,5419},{0,129,2265},{0,109,532},{0,80,3202},{0,94,6384},{0,77,3861},{17,248,1568},{17,170,5},{23,123,121},{13,112,293},{101,0,5419},{0,129,2265},{0,109,532},{0,80,3202},{207,0,5419}, +{0,80,3202},{9,179,145},{9,179,145},{9,179,145},{9,92,145},{0,125,1152},{0,88,157},{0,88,157},{0,53,468},{0,54,1491},{0,50,737},{17,163,0},{17,163,0},{17,163,0},{17,89,0},{61,0,1152},{0,88,157},{0,88,157},{0,53,468},{125,0,1152},{0,53,468},{138,0,1568},{12,174,0},{38,121,0},{0,114,170},{138,0,1568},{254,14,1568},{0,114,170},{0,93,1568},{254,14,1568},{0,93,1568},{8,0,145}, +{8,0,145},{8,0,145},{8,0,145},{0,52,0},{0,52,0},{0,52,0},{0,26,0},{0,21,45},{0,21,45},{15,255,1991},{13,189,321},{18,134,687},{10,121,510},{0,231,5419},{0,141,2081},{0,121,324},{0,90,3035},{0,106,6640},{0,87,3833},{26,254,1569},{25,178,5},{31,131,121},{21,120,293},{113,0,5419},{0,141,2081},{0,121,324},{0,90,3035},{231,0,5419},{0,90,3035},{13,195,313},{13,195,313},{13,195,313}, +{13,102,313},{0,149,1152},{0,103,80},{0,103,80},{0,62,377},{0,66,1643},{0,57,749},{25,171,0},{25,171,0},{25,171,0},{25,97,0},{73,0,1152},{0,103,80},{0,103,80},{0,62,377},{149,0,1152},{0,62,377},{150,0,1568},{21,181,0},{46,129,0},{0,123,117},{150,0,1568},{254,26,1568},{0,123,117},{0,101,1568},{254,26,1568},{0,101,1568},{12,0,313},{12,0,313},{12,0,313},{12,0,313},{0,76,0}, +{0,76,0},{0,76,0},{0,38,0},{0,30,106},{0,30,106},{24,255,2387},{17,199,565},{23,142,1015},{15,129,713},{0,255,5419},{0,156,1924},{0,130,166},{0,99,2857},{0,115,6921},{0,94,3818},{36,255,1579},{33,187,4},{39,139,121},{29,128,293},{125,0,5419},{0,156,1924},{0,130,166},{0,99,2857},{255,0,5419},{0,99,2857},{17,212,545},{17,212,545},{17,212,545},{17,112,545},{0,174,1152},{0,118,29},{0,118,29}, +{0,71,296},{0,75,1826},{0,66,787},{33,179,0},{33,179,0},{33,179,0},{33,104,0},{85,0,1152},{0,118,29},{0,118,29},{0,71,296},{174,0,1152},{0,71,296},{162,0,1568},{29,189,0},{54,137,0},{0,133,80},{162,0,1568},{254,38,1568},{0,133,80},{0,109,1568},{254,38,1568},{0,109,1568},{16,0,545},{16,0,545},{16,0,545},{16,0,545},{0,101,0},{0,101,0},{0,101,0},{0,49,0},{0,39,193}, +{0,39,193},{30,255,3004},{22,210,924},{28,153,1470},{19,140,1026},{6,255,5520},{0,172,1772},{0,142,57},{0,108,2668},{0,127,7276},{0,103,3836},{46,255,1602},{41,195,3},{48,148,121},{38,137,293},{138,0,5419},{0,172,1772},{0,142,57},{0,108,2668},{254,14,5419},{0,108,2668},{21,231,882},{21,231,882},{21,231,882},{21,124,882},{0,201,1152},{0,132,2},{0,132,2},{0,83,218},{0,88,2034},{0,75,865},{42,188,0}, +{42,188,0},{42,188,0},{42,113,0},{98,0,1152},{0,132,2},{0,132,2},{0,83,218},{201,0,1152},{0,83,218},{175,0,1568},{38,198,0},{63,146,0},{0,142,41},{175,0,1568},{254,51,1568},{0,142,41},{0,118,1568},{254,51,1568},{0,118,1568},{21,0,882},{21,0,882},{21,0,882},{21,0,882},{0,128,0},{0,128,0},{0,128,0},{0,63,0},{0,51,320},{0,51,320},{36,255,3708},{27,221,1316},{33,161,1956}, +{24,148,1369},{15,255,5777},{0,184,1676},{0,151,19},{0,117,2514},{0,138,7620},{0,112,3881},{55,255,1643},{49,203,3},{56,156,121},{46,145,293},{150,0,5419},{0,184,1676},{0,151,19},{0,117,2514},{254,26,5419},{0,117,2514},{25,247,1250},{25,247,1250},{25,247,1250},{25,134,1250},{0,225,1152},{1,147,3},{1,147,3},{0,90,164},{0,100,2274},{0,84,961},{50,196,0},{50,196,0},{50,196,0},{50,121,0},{110,0,1152}, +{5,143,0},{5,143,0},{0,90,164},{225,0,1152},{0,90,164},{187,0,1568},{46,206,0},{71,154,0},{0,151,18},{187,0,1568},{254,63,1568},{0,151,18},{0,126,1568},{254,63,1568},{0,126,1568},{25,0,1250},{25,0,1250},{25,0,1250},{25,0,1250},{0,152,0},{0,152,0},{0,152,0},{0,75,0},{0,60,461},{0,60,461},{43,255,4356},{32,232,1665},{38,169,2370},{29,156,1670},{21,255,6121},{0,196,1616},{3,161,20}, +{0,126,2376},{0,150,7804},{0,121,3804},{67,255,1699},{57,211,3},{64,164,121},{54,153,293},{162,0,5419},{0,196,1612},{5,160,17},{0,126,2372},{254,38,5419},{0,126,2372},{30,255,1572},{30,255,1572},{30,255,1572},{30,143,1568},{2,245,1152},{4,159,17},{4,159,17},{0,99,117},{0,109,2403},{0,94,953},{58,204,0},{58,204,0},{58,204,0},{58,129,0},{122,0,1152},{13,151,0},{13,151,0},{0,99,113},{249,0,1152}, +{0,99,113},{199,0,1568},{54,214,0},{79,162,0},{0,160,5},{199,0,1568},{254,75,1568},{0,160,5},{0,134,1568},{254,75,1568},{0,134,1568},{30,0,1568},{30,0,1568},{30,0,1568},{30,0,1568},{2,172,0},{2,172,0},{2,172,0},{2,86,0},{0,72,541},{0,72,541},{52,255,4586},{40,240,1665},{47,177,2355},{37,164,1670},{33,255,6289},{7,206,1615},{11,169,20},{1,134,2353},{0,162,7444},{0,130,3321},{76,255,1766}, +{65,219,3},{72,172,121},{61,161,302},{174,0,5419},{0,211,1577},{13,168,17},{0,133,2259},{255,49,5419},{0,133,2259},{39,255,1586},{39,255,1586},{39,255,1586},{38,151,1568},{10,253,1152},{12,167,17},{12,167,17},{7,109,115},{0,124,2150},{0,103,659},{66,211,0},{66,211,0},{66,211,0},{66,137,0},{134,0,1152},{21,159,0},{21,159,0},{0,108,72},{255,9,1152},{0,108,72},{211,0,1568},{62,222,0},{86,170,0}, +{0,170,0},{211,0,1568},{255,86,1568},{0,170,0},{0,142,1568},{255,86,1568},{0,142,1568},{38,0,1568},{38,0,1568},{38,0,1568},{38,0,1568},{10,180,0},{10,180,0},{10,180,0},{10,93,0},{0,88,373},{0,88,373},{64,255,4866},{49,249,1665},{55,188,2353},{46,173,1670},{43,255,6476},{16,215,1615},{20,178,20},{10,143,2353},{0,178,7036},{0,139,2856},{86,255,1851},{75,228,5},{81,181,121},{70,170,302},{187,0,5419}, +{4,222,1568},{22,177,17},{0,145,2124},{254,63,5419},{0,145,2124},{49,255,1612},{49,255,1612},{49,255,1612},{47,160,1568},{21,255,1158},{21,176,17},{21,176,17},{16,118,115},{0,138,1900},{0,115,392},{75,220,0},{75,220,0},{75,220,0},{75,146,0},{147,0,1152},{30,168,0},{30,168,0},{0,120,34},{254,23,1152},{0,120,34},{224,0,1568},{71,231,0},{95,179,0},{7,179,0},{224,0,1568},{254,100,1568},{7,179,0}, +{0,151,1568},{254,100,1568},{0,151,1568},{47,0,1568},{47,0,1568},{47,0,1568},{47,0,1568},{19,189,0},{19,189,0},{19,189,0},{19,102,0},{0,103,232},{0,103,232},{73,255,5136},{57,254,1666},{63,196,2353},{54,181,1670},{55,255,6684},{24,223,1615},{28,186,20},{18,151,2353},{0,190,6740},{0,151,2504},{95,255,1954},{83,236,5},{89,189,121},{78,178,302},{199,0,5419},{12,230,1568},{30,185,17},{0,154,2018},{254,75,5419}, +{0,154,2018},{58,255,1650},{58,255,1650},{58,255,1650},{55,168,1568},{30,255,1179},{29,184,17},{29,184,17},{24,126,115},{0,150,1708},{0,127,216},{83,228,0},{83,228,0},{83,228,0},{83,154,0},{159,0,1152},{38,176,0},{38,176,0},{0,130,17},{254,35,1152},{0,130,17},{236,0,1568},{78,240,0},{103,187,0},{15,187,0},{236,0,1568},{254,112,1568},{15,187,0},{0,159,1568},{254,112,1568},{0,159,1568},{55,0,1568}, +{55,0,1568},{55,0,1568},{55,0,1568},{27,197,0},{27,197,0},{27,197,0},{27,110,0},{0,115,136},{0,115,136},{82,255,5426},{67,255,1701},{71,204,2353},{62,189,1670},{64,255,6905},{32,231,1615},{36,194,20},{26,159,2353},{0,202,6476},{0,160,2211},{107,255,2066},{91,244,5},{97,197,121},{86,186,302},{211,0,5419},{20,238,1568},{38,193,17},{0,160,1922},{255,86,5419},{0,160,1922},{67,255,1700},{67,255,1700},{67,255,1700}, +{63,176,1568},{39,255,1218},{37,192,17},{37,192,17},{32,134,115},{0,165,1545},{0,136,90},{91,236,0},{91,236,0},{91,236,0},{91,162,0},{171,0,1152},{46,184,0},{46,184,0},{0,139,4},{255,46,1152},{0,139,4},{248,0,1568},{85,248,0},{111,195,0},{23,195,0},{248,0,1568},{254,124,1568},{23,195,0},{0,167,1568},{254,124,1568},{0,167,1568},{63,0,1568},{63,0,1568},{63,0,1568},{63,0,1568},{35,205,0}, +{35,205,0},{35,205,0},{35,118,0},{0,129,65},{0,129,65},{92,255,5700},{76,255,1798},{79,212,2353},{70,197,1670},{73,255,7152},{40,239,1615},{44,202,20},{34,167,2353},{0,215,6213},{0,169,1980},{116,255,2195},{99,252,5},{104,206,115},{94,194,302},{223,0,5419},{28,246,1568},{46,201,17},{0,169,1836},{255,98,5419},{0,169,1836},{76,255,1762},{76,255,1762},{76,255,1762},{71,184,1568},{49,255,1260},{45,200,17},{45,200,17}, +{40,142,115},{0,178,1395},{0,145,24},{99,244,0},{99,244,0},{99,244,0},{99,170,0},{183,0,1152},{54,192,0},{54,192,0},{0,147,0},{255,58,1152},{0,147,0},{255,10,1568},{94,254,1},{119,203,0},{31,203,0},{255,10,1568},{255,135,1568},{31,203,0},{0,175,1568},{255,135,1568},{0,175,1568},{71,0,1568},{71,0,1568},{71,0,1568},{71,0,1568},{43,213,0},{43,213,0},{43,213,0},{43,126,0},{0,144,20}, +{0,144,20},{101,255,6066},{86,255,1980},{88,221,2353},{79,206,1670},{86,255,7408},{49,248,1615},{53,211,20},{43,176,2353},{0,230,5988},{0,179,1798},{129,255,2347},{110,255,24},{113,215,115},{103,203,302},{236,0,5419},{37,255,1568},{55,210,17},{0,179,1762},{254,112,5419},{0,179,1762},{86,255,1836},{86,255,1836},{86,255,1836},{80,193,1568},{61,255,1331},{54,209,17},{54,209,17},{49,151,115},{0,193,1281},{3,156,5},{108,253,0}, +{108,253,0},{108,253,0},{108,179,0},{196,0,1152},{63,201,0},{63,201,0},{9,156,0},{254,72,1152},{9,156,0},{255,37,1568},{110,255,20},{128,212,0},{40,212,0},{255,37,1568},{254,149,1568},{40,212,0},{0,184,1568},{254,149,1568},{0,184,1568},{80,0,1568},{80,0,1568},{80,0,1568},{80,0,1568},{52,222,0},{52,222,0},{52,222,0},{52,135,0},{0,159,1},{0,159,1},{110,255,6416},{95,255,2211},{96,229,2353}, +{86,213,1674},{95,255,7689},{58,254,1616},{61,219,20},{51,184,2353},{0,242,5820},{0,188,1701},{138,255,2502},{119,255,90},{121,223,115},{111,211,302},{248,0,5419},{52,255,1595},{63,218,17},{0,188,1700},{254,124,5419},{0,188,1700},{95,255,1922},{95,255,1922},{95,255,1922},{88,201,1568},{70,255,1414},{62,217,17},{62,217,17},{58,158,121},{0,205,1209},{11,164,5},{116,255,4},{116,255,4},{116,255,4},{116,187,0},{208,0,1152}, +{70,209,0},{70,209,0},{17,164,0},{254,84,1152},{17,164,0},{255,61,1568},{125,255,65},{136,220,0},{48,220,0},{255,61,1568},{254,161,1568},{48,220,0},{0,192,1568},{254,161,1568},{0,192,1568},{88,0,1568},{88,0,1568},{88,0,1568},{88,0,1568},{60,230,0},{60,230,0},{60,230,0},{60,143,0},{7,169,0},{7,169,0},{119,255,6786},{104,255,2504},{104,237,2353},{94,222,1676},{107,255,7985},{68,255,1665},{69,227,20}, +{59,192,2353},{0,254,5684},{0,197,1666},{147,255,2675},{128,255,216},{129,231,115},{119,219,302},{255,10,5419},{67,255,1665},{71,226,17},{0,197,1650},{255,135,5419},{0,197,1650},{101,255,2018},{101,255,2018},{101,255,2018},{96,209,1568},{82,255,1510},{70,224,17},{70,224,17},{66,166,121},{0,218,1163},{19,172,5},{125,255,17},{125,255,17},{125,255,17},{124,195,0},{220,0,1152},{78,217,0},{78,217,0},{25,172,0},{255,95,1152}, +{25,172,0},{255,86,1568},{140,255,136},{144,228,0},{56,228,0},{255,86,1568},{255,172,1568},{56,228,0},{0,200,1568},{255,172,1568},{0,200,1568},{96,0,1568},{96,0,1568},{96,0,1568},{96,0,1568},{68,238,0},{68,238,0},{68,238,0},{68,151,0},{15,177,0},{15,177,0},{129,255,7124},{116,255,2856},{112,245,2353},{102,230,1676},{116,255,8300},{79,255,1802},{77,235,20},{67,200,2353},{7,255,5788},{5,206,1665},{156,255,2866}, +{140,255,392},{137,239,115},{127,227,302},{255,34,5419},{82,255,1779},{79,234,17},{0,206,1612},{255,147,5419},{0,206,1612},{110,255,2124},{110,255,2124},{110,255,2124},{104,217,1568},{92,255,1608},{78,232,17},{78,232,17},{74,174,121},{1,231,1152},{27,180,5},{135,255,34},{135,255,34},{135,255,34},{132,203,0},{232,0,1152},{86,225,0},{86,225,0},{33,180,0},{255,107,1152},{33,180,0},{255,110,1568},{152,255,232},{152,236,0}, +{64,236,0},{255,110,1568},{255,184,1568},{64,236,0},{0,208,1568},{255,184,1568},{0,208,1568},{104,0,1568},{104,0,1568},{104,0,1568},{104,0,1568},{76,246,0},{76,246,0},{76,246,0},{76,159,0},{24,184,0},{24,184,0},{138,255,7586},{125,255,3321},{121,254,2353},{111,239,1676},{129,255,8636},{92,255,2092},{86,244,20},{78,208,2355},{28,255,6049},{14,215,1665},{168,255,3097},{152,255,659},{146,248,115},{136,236,302},{255,61,5419}, +{101,255,1977},{88,243,17},{0,216,1586},{254,161,5419},{0,216,1586},{122,255,2259},{122,255,2259},{122,255,2259},{113,226,1568},{101,255,1746},{87,241,17},{87,241,17},{83,183,121},{10,240,1152},{36,190,3},{147,255,72},{147,255,72},{147,255,72},{141,212,0},{245,0,1152},{95,234,0},{95,234,0},{42,189,0},{254,121,1152},{42,189,0},{255,137,1568},{167,255,373},{161,245,0},{73,245,0},{255,137,1568},{254,198,1568},{73,245,0}, +{0,217,1568},{254,198,1568},{0,217,1568},{113,0,1568},{113,0,1568},{113,0,1568},{113,0,1568},{85,254,0},{85,254,0},{85,254,0},{85,168,0},{33,193,0},{33,193,0},{147,255,8016},{134,255,3804},{129,255,2376},{119,247,1676},{138,255,8985},{104,255,2436},{94,252,20},{85,216,2370},{40,255,6353},{22,223,1665},{178,255,3291},{161,255,953},{156,255,117},{146,243,305},{255,86,5419},{119,255,2185},{96,251,17},{0,225,1572},{255,172,5419}, +{0,225,1572},{129,255,2372},{129,255,2372},{129,255,2372},{121,233,1568},{113,255,1890},{95,249,17},{95,249,17},{91,191,121},{18,248,1152},{44,198,3},{156,255,113},{156,255,113},{156,255,113},{149,220,0},{255,4,1152},{103,242,0},{103,242,0},{49,197,0},{255,132,1152},{49,197,0},{255,161,1568},{183,255,541},{169,253,0},{80,253,0},{255,161,1568},{254,210,1568},{80,253,0},{0,225,1568},{254,210,1568},{0,225,1568},{121,0,1568}, +{121,0,1568},{121,0,1568},{121,0,1568},{94,254,5},{94,254,5},{94,254,5},{93,176,0},{41,201,0},{41,201,0},{156,255,7638},{143,255,3881},{138,255,2514},{128,250,1620},{147,255,8460},{113,255,2285},{104,255,19},{94,222,1956},{58,255,5932},{34,228,1316},{184,255,2947},{171,255,961},{165,255,164},{152,246,206},{255,104,4803},{131,255,1925},{108,254,3},{6,230,1250},{255,181,4803},{6,230,1250},{138,255,2514},{138,255,2514},{138,255,2514}, +{129,241,1568},{122,255,2043},{104,255,19},{104,255,19},{99,199,121},{28,255,1153},{52,206,3},{165,255,164},{165,255,164},{165,255,164},{157,228,0},{255,28,1152},{111,250,0},{111,250,0},{57,205,0},{255,144,1152},{57,205,0},{255,177,1250},{195,255,461},{180,255,0},{101,255,0},{255,177,1250},{254,217,1250},{101,255,0},{0,230,1250},{254,217,1250},{0,230,1250},{129,0,1568},{129,0,1568},{129,0,1568},{129,0,1568},{104,255,18}, +{104,255,18},{104,255,18},{101,184,0},{49,209,0},{49,209,0},{165,255,7060},{152,255,3836},{147,255,2668},{137,253,1576},{156,255,7717},{122,255,2020},{113,255,57},{102,227,1470},{73,255,5307},{44,232,924},{193,255,2466},{180,255,865},{172,255,218},{162,250,98},{255,119,4056},{143,255,1557},{122,255,2},{22,234,882},{254,189,4056},{22,234,882},{147,255,2668},{147,255,2668},{147,255,2668},{137,249,1568},{132,255,2193},{113,255,57},{113,255,57}, +{107,207,121},{40,255,1185},{60,214,3},{172,255,218},{172,255,218},{172,255,218},{165,236,0},{255,52,1152},{122,255,2},{122,255,2},{65,213,0},{255,156,1152},{65,213,0},{255,189,882},{204,255,320},{192,255,0},{125,255,0},{255,189,882},{254,223,882},{125,255,0},{0,234,882},{254,223,882},{0,234,882},{137,0,1568},{137,0,1568},{137,0,1568},{137,0,1568},{113,255,41},{113,255,41},{113,255,41},{109,192,0},{57,217,0}, +{57,217,0},{172,255,6429},{161,255,3818},{156,255,2857},{146,255,1572},{165,255,6979},{134,255,1813},{125,255,166},{113,232,1015},{82,255,4731},{56,238,565},{199,255,2010},{189,255,787},{184,255,296},{173,252,26},{255,137,3318},{155,255,1221},{137,255,29},{41,238,545},{254,198,3318},{41,238,545},{156,255,2857},{156,255,2857},{156,255,2857},{146,255,1572},{141,255,2403},{125,255,166},{125,255,166},{116,216,121},{61,255,1273},{68,222,4},{184,255,296}, +{184,255,296},{184,255,296},{174,245,0},{255,79,1152},{137,255,29},{137,255,29},{74,222,0},{253,170,1152},{74,222,0},{255,201,545},{216,255,193},{205,255,0},{152,255,0},{255,201,545},{254,229,545},{152,255,0},{0,238,545},{254,229,545},{0,238,545},{146,0,1568},{146,0,1568},{146,0,1568},{146,0,1568},{122,255,80},{122,255,80},{122,255,80},{118,201,0},{66,226,0},{66,226,0},{178,255,5997},{168,255,3833},{165,255,3035}, +{155,255,1619},{172,255,6365},{143,255,1710},{134,255,324},{121,237,687},{101,255,4330},{66,242,321},{208,255,1673},{198,255,749},{193,255,377},{182,255,1},{255,152,2753},{167,255,989},{152,255,80},{57,242,313},{255,205,2753},{57,242,313},{165,255,3035},{165,255,3035},{165,255,3035},{155,255,1619},{153,255,2603},{134,255,324},{134,255,324},{124,224,121},{76,255,1395},{76,230,5},{193,255,377},{193,255,377},{193,255,377},{182,253,0},{255,104,1152}, +{152,255,80},{152,255,80},{82,230,0},{255,181,1152},{82,230,0},{255,213,313},{225,255,106},{217,255,0},{177,255,0},{255,213,313},{254,235,313},{177,255,0},{0,242,313},{254,235,313},{0,242,313},{154,0,1568},{154,0,1568},{154,0,1568},{154,0,1568},{132,255,117},{132,255,117},{132,255,117},{126,209,0},{74,234,0},{74,234,0},{187,255,5627},{178,255,3861},{175,255,3202},{164,255,1720},{178,255,5889},{152,255,1685},{146,255,532}, +{129,242,435},{116,255,3993},{79,246,146},{215,255,1382},{205,255,737},{202,255,468},{192,255,13},{255,171,2273},{180,255,850},{167,255,157},{74,246,145},{254,214,2273},{74,246,145},{175,255,3202},{175,255,3202},{175,255,3202},{164,255,1720},{162,255,2818},{146,255,532},{146,255,532},{132,232,121},{92,255,1584},{84,238,5},{202,255,468},{202,255,468},{202,255,468},{192,255,13},{255,128,1152},{167,255,157},{167,255,157},{90,238,0},{255,193,1152}, +{90,238,0},{255,225,145},{234,255,45},{229,255,0},{201,255,0},{255,225,145},{254,241,145},{201,255,0},{0,246,145},{254,241,145},{0,246,145},{162,0,1568},{162,0,1568},{162,0,1568},{162,0,1568},{141,255,170},{141,255,170},{141,255,170},{134,217,0},{80,243,0},{80,243,0},{193,255,5331},{187,255,3926},{184,255,3404},{174,255,1889},{187,255,5490},{161,255,1738},{155,255,798},{139,246,259},{128,255,3745},{88,251,41},{221,255,1182}, +{215,255,747},{211,255,569},{201,255,73},{255,186,1878},{192,255,746},{180,255,274},{90,250,41},{255,221,1878},{90,250,41},{184,255,3404},{184,255,3404},{184,255,3404},{174,255,1889},{172,255,3020},{155,255,798},{155,255,798},{140,240,121},{107,255,1798},{92,246,5},{211,255,569},{211,255,569},{211,255,569},{201,255,73},{255,152,1152},{180,255,274},{180,255,274},{98,246,0},{255,205,1152},{98,246,0},{255,237,41},{243,255,10},{241,255,0}, +{225,255,0},{255,237,41},{254,247,41},{225,255,0},{0,250,41},{254,247,41},{0,250,41},{170,0,1568},{170,0,1568},{170,0,1568},{170,0,1568},{150,255,233},{150,255,233},{150,255,233},{142,225,0},{88,251,0},{88,251,0},{202,255,5076},{196,255,4028},{193,255,3641},{183,255,2129},{196,255,5148},{174,255,1917},{167,255,1161},{147,251,148},{143,255,3577},{101,255,5},{230,255,1041},{224,255,789},{221,255,680},{210,255,205},{255,204,1536}, +{207,255,706},{198,255,424},{107,255,0},{255,230,1536},{107,255,0},{193,255,3641},{193,255,3641},{193,255,3641},{183,255,2129},{184,255,3299},{167,255,1161},{167,255,1161},{148,249,126},{125,255,2089},{101,255,5},{221,255,680},{221,255,680},{221,255,680},{210,255,205},{255,180,1152},{198,255,424},{198,255,424},{107,255,0},{255,218,1152},{107,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0}, +{0,255,0},{255,254,0},{0,255,0},{179,0,1568},{179,0,1568},{179,0,1568},{179,0,1568},{162,255,317},{162,255,317},{162,255,317},{151,234,0},{101,255,5},{101,255,5},{208,255,4372},{202,255,3532},{199,255,3225},{189,255,2017},{202,255,4324},{180,255,1693},{177,255,1093},{157,252,58},{155,255,2953},{116,255,34},{233,255,716},{230,255,545},{227,255,468},{219,255,137},{255,213,1067},{213,255,482},{207,255,289},{131,255,0},{254,235,1067}, +{131,255,0},{199,255,3225},{199,255,3225},{199,255,3225},{189,255,2017},{190,255,2819},{177,255,1093},{177,255,1093},{157,252,49},{137,255,1737},{116,255,34},{227,255,468},{227,255,468},{227,255,468},{219,255,137},{255,192,800},{207,255,289},{207,255,289},{131,255,0},{255,224,800},{131,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{187,0,1568}, +{187,0,1568},{187,0,1568},{187,0,1568},{172,255,394},{172,255,394},{172,255,394},{159,242,0},{116,255,34},{116,255,34},{215,255,3720},{208,255,3092},{205,255,2857},{198,255,1910},{208,255,3604},{189,255,1510},{183,255,1041},{165,254,10},{164,255,2420},{131,255,89},{239,255,456},{233,255,345},{233,255,296},{225,255,85},{255,219,683},{222,255,300},{216,255,180},{155,255,0},{254,238,683},{155,255,0},{205,255,2857},{205,255,2857},{205,255,2857}, +{198,255,1910},{196,255,2411},{183,255,1041},{183,255,1041},{167,253,8},{149,255,1449},{131,255,89},{233,255,296},{233,255,296},{233,255,296},{225,255,85},{255,204,512},{216,255,180},{216,255,180},{155,255,0},{255,230,512},{155,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{195,0,1568},{195,0,1568},{195,0,1568},{195,0,1568},{178,255,482}, +{178,255,482},{178,255,482},{167,250,0},{131,255,89},{131,255,89},{218,255,3170},{215,255,2701},{215,255,2532},{207,255,1825},{215,255,2956},{198,255,1373},{192,255,1013},{175,255,4},{167,255,2025},{143,255,169},{242,255,249},{239,255,189},{239,255,164},{234,255,45},{255,228,384},{231,255,162},{225,255,97},{180,255,0},{255,242,384},{180,255,0},{215,255,2532},{215,255,2532},{215,255,2532},{207,255,1825},{205,255,2070},{192,255,1013},{192,255,1013}, +{175,255,4},{161,255,1225},{143,255,169},{239,255,164},{239,255,164},{239,255,164},{234,255,45},{255,216,288},{225,255,97},{225,255,97},{180,255,0},{255,236,288},{180,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{203,0,1568},{203,0,1568},{203,0,1568},{203,0,1568},{187,255,585},{187,255,585},{187,255,585},{175,255,4},{143,255,169}, +{143,255,169},{0,210,2665},{0,147,274},{0,106,1},{0,90,985},{0,140,5885},{0,94,3649},{0,81,1742},{0,56,4398},{0,63,6341},{0,56,4722},{0,210,2665},{0,147,274},{0,106,1},{0,90,985},{69,0,5885},{0,94,3649},{0,81,1742},{0,56,4398},{140,0,5885},{0,56,4398},{0,101,0},{0,101,0},{0,101,0},{0,49,0},{0,49,545},{0,39,193},{0,39,193},{0,22,317},{0,21,593},{0,22,366},{0,101,0}, +{0,101,0},{0,101,0},{0,49,0},{24,0,545},{0,39,193},{0,39,193},{0,22,317},{49,0,545},{0,22,317},{103,0,2665},{0,147,274},{0,106,1},{0,90,985},{103,0,2665},{210,0,2665},{0,90,985},{0,69,2665},{210,0,2665},{0,69,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,234,2665},{0,162,169},{1,114,11}, +{0,99,850},{0,158,6669},{0,103,3898},{0,91,1770},{0,62,4826},{0,72,7238},{0,62,5226},{0,234,2665},{0,162,169},{2,115,9},{0,99,850},{78,0,6669},{0,103,3898},{0,91,1770},{0,62,4826},{158,0,6669},{0,62,4826},{0,125,0},{0,125,0},{0,125,0},{0,61,0},{0,61,841},{0,51,305},{0,51,305},{0,28,493},{0,27,917},{0,28,574},{0,125,0},{0,125,0},{0,125,0},{0,61,0},{30,0,841}, +{0,51,305},{0,51,305},{0,28,493},{61,0,841},{0,28,493},{115,0,2665},{0,162,169},{6,114,0},{0,99,850},{115,0,2665},{234,0,2665},{0,99,850},{0,77,2665},{234,0,2665},{0,77,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,255,2669},{0,175,80},{2,124,61},{0,108,725},{0,174,7538},{0,115,4178},{0,100,1832}, +{0,71,5294},{0,78,8238},{0,68,5786},{2,254,2669},{0,175,80},{4,123,53},{0,108,725},{85,0,7538},{0,115,4178},{0,100,1832},{0,71,5294},{174,0,7538},{0,71,5294},{0,149,0},{0,149,0},{0,149,0},{0,73,0},{0,73,1201},{0,60,442},{0,60,442},{0,34,709},{0,33,1313},{0,31,824},{0,149,0},{0,149,0},{0,149,0},{0,73,0},{36,0,1201},{0,60,442},{0,60,442},{0,34,709},{73,0,1201}, +{0,34,709},{127,0,2665},{0,175,80},{14,122,0},{0,108,725},{127,0,2665},{254,2,2665},{0,108,725},{0,85,2665},{254,2,2665},{0,85,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{3,255,2795},{0,190,29},{3,134,155},{0,117,610},{0,189,8493},{0,124,4485},{0,109,1922},{0,77,5810},{0,88,9310},{0,74,6402},{6,255,2761}, +{0,190,29},{6,134,133},{0,117,610},{92,0,8493},{0,124,4485},{0,109,1922},{0,77,5810},{189,0,8493},{0,77,5810},{0,174,0},{0,174,0},{0,174,0},{0,85,0},{0,85,1625},{0,69,605},{0,69,605},{0,40,965},{0,39,1781},{0,37,1120},{0,174,0},{0,174,0},{0,174,0},{0,85,0},{42,0,1625},{0,69,605},{0,69,605},{0,40,965},{85,0,1625},{0,40,965},{138,0,2665},{0,190,29},{22,130,0}, +{0,117,610},{138,0,2665},{254,14,2665},{0,117,610},{0,93,2665},{254,14,2665},{0,93,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{9,255,3139},{0,205,2},{5,145,311},{0,126,493},{0,207,9669},{0,135,4865},{0,118,2054},{0,83,6450},{0,94,10654},{0,80,7162},{12,255,3029},{0,205,2},{9,143,266},{0,126,493},{101,0,9669}, +{0,135,4865},{0,118,2054},{0,83,6450},{207,0,9669},{0,83,6450},{0,201,0},{0,201,0},{0,201,0},{0,98,0},{0,101,2178},{0,81,820},{0,81,820},{0,47,1322},{0,45,2392},{0,40,1521},{0,201,0},{0,201,0},{0,201,0},{0,98,0},{49,0,2178},{0,81,820},{0,81,820},{0,47,1322},{101,0,2178},{0,47,1322},{152,0,2665},{0,205,2},{30,139,0},{0,126,493},{152,0,2665},{255,27,2665},{0,126,493}, +{0,102,2665},{255,27,2665},{0,102,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{15,255,3483},{4,217,32},{10,153,429},{2,135,473},{0,231,9669},{0,150,4526},{0,127,1610},{0,93,6193},{0,106,10910},{0,90,7050},{24,255,3141},{8,213,2},{17,151,266},{2,135,469},{113,0,9669},{0,150,4526},{0,127,1610},{0,93,6193},{231,0,9669}, +{0,93,6193},{4,217,32},{4,217,32},{4,217,32},{4,108,32},{0,125,2178},{0,94,605},{0,94,605},{0,56,1165},{0,54,2517},{0,50,1457},{8,209,0},{8,209,0},{8,209,0},{8,106,0},{61,0,2178},{0,94,605},{0,94,605},{0,56,1165},{125,0,2178},{0,56,1165},{164,0,2665},{5,216,0},{38,147,0},{0,136,410},{164,0,2665},{255,39,2665},{0,136,410},{0,110,2665},{255,39,2665},{0,110,2665},{4,0,32}, +{4,0,32},{4,0,32},{4,0,32},{0,25,0},{0,25,0},{0,25,0},{0,12,0},{0,9,8},{0,9,8},{21,255,3971},{8,227,132},{15,161,623},{5,144,523},{0,255,9669},{0,162,4214},{0,139,1218},{0,102,5913},{0,118,11198},{0,96,6942},{33,255,3266},{16,222,1},{25,159,266},{10,143,469},{125,0,9669},{0,162,4214},{0,139,1218},{0,102,5913},{255,0,9669},{0,102,5913},{8,233,128},{8,233,128},{8,233,128}, +{8,118,128},{0,149,2178},{0,109,442},{0,109,442},{0,65,1018},{0,66,2669},{0,59,1419},{16,217,0},{16,217,0},{16,217,0},{16,114,0},{73,0,2178},{0,109,442},{0,109,442},{0,65,1018},{149,0,2178},{0,65,1018},{175,0,2665},{14,223,0},{46,155,0},{0,145,325},{175,0,2665},{254,51,2665},{0,145,325},{0,118,2665},{254,51,2665},{0,118,2665},{8,0,128},{8,0,128},{8,0,128},{8,0,128},{0,49,0}, +{0,49,0},{0,49,0},{0,24,0},{0,21,40},{0,21,40},{27,255,4603},{13,238,300},{19,171,891},{10,152,642},{6,255,9761},{0,175,3898},{0,148,882},{0,111,5645},{0,127,11511},{0,105,6861},{43,255,3390},{24,230,1},{33,167,266},{18,151,469},{137,0,9669},{0,175,3898},{0,148,882},{0,111,5645},{255,12,9669},{0,111,5645},{12,249,288},{12,249,288},{12,249,288},{12,128,288},{0,174,2178},{0,124,305},{0,124,305}, +{0,74,881},{0,75,2852},{0,68,1409},{24,225,0},{24,225,0},{24,225,0},{24,122,0},{85,0,2178},{0,124,305},{0,124,305},{0,74,881},{174,0,2178},{0,74,881},{187,0,2665},{22,231,0},{54,163,0},{0,154,250},{187,0,2665},{254,63,2665},{0,154,250},{0,126,2665},{254,63,2665},{0,126,2665},{12,0,288},{12,0,288},{12,0,288},{12,0,288},{0,73,0},{0,73,0},{0,73,0},{0,36,0},{0,30,97}, +{0,30,97},{36,255,5482},{17,251,574},{24,179,1282},{14,162,853},{15,255,10055},{0,190,3618},{0,160,569},{0,120,5354},{0,138,11902},{0,114,6807},{55,255,3569},{33,239,1},{42,176,266},{27,160,469},{150,0,9669},{0,190,3618},{0,160,569},{0,120,5354},{254,26,9669},{0,120,5354},{18,255,558},{18,255,558},{18,255,558},{16,139,545},{0,201,2178},{0,138,180},{0,138,180},{0,83,740},{0,88,3060},{0,78,1427},{33,233,0}, +{33,233,0},{33,233,0},{33,131,0},{98,0,2178},{0,138,180},{0,138,180},{0,83,740},{201,0,2178},{0,83,740},{201,0,2665},{31,240,0},{63,172,0},{0,166,180},{201,0,2665},{255,76,2665},{0,166,180},{0,135,2665},{255,76,2665},{0,135,2665},{16,0,545},{16,0,545},{16,0,545},{16,0,545},{0,101,0},{0,101,0},{0,101,0},{0,49,0},{0,39,193},{0,39,193},{43,255,6378},{24,255,915},{29,187,1710}, +{18,172,1113},{21,255,10495},{0,205,3401},{0,169,353},{0,126,5126},{0,150,12278},{0,123,6786},{64,255,3738},{41,246,2},{50,183,259},{35,168,469},{162,0,9669},{0,205,3401},{0,169,353},{0,126,5126},{254,38,9669},{0,126,5126},{24,255,914},{24,255,914},{24,255,914},{20,149,841},{0,225,2178},{0,153,97},{0,153,97},{0,93,637},{0,100,3300},{0,88,1469},{41,241,0},{41,241,0},{41,241,0},{41,139,0},{110,0,2178}, +{0,153,97},{0,153,97},{0,93,637},{225,0,2178},{0,93,637},{213,0,2665},{38,249,0},{71,180,0},{0,173,130},{213,0,2665},{254,88,2665},{0,173,130},{0,143,2665},{254,88,2665},{0,143,2665},{20,0,841},{20,0,841},{20,0,841},{20,0,841},{0,125,0},{0,125,0},{0,125,0},{0,61,0},{0,51,305},{0,51,305},{49,255,7446},{30,255,1431},{33,197,2210},{22,179,1438},{30,255,11102},{0,218,3189},{0,182,185}, +{0,136,4909},{0,162,12686},{0,130,6797},{73,255,3925},{49,254,2},{57,193,258},{43,176,469},{174,0,9669},{0,218,3189},{0,182,185},{0,136,4909},{255,49,9669},{0,136,4909},{27,255,1382},{27,255,1382},{27,255,1382},{24,159,1201},{0,249,2178},{0,168,40},{0,168,40},{0,102,530},{0,109,3565},{0,94,1537},{49,249,0},{49,249,0},{49,249,0},{49,147,0},{122,0,2178},{0,168,40},{0,168,40},{0,102,530},{249,0,2178}, +{0,102,530},{224,0,2665},{46,255,1},{79,188,0},{0,182,85},{224,0,2665},{254,100,2665},{0,182,85},{0,151,2665},{254,100,2665},{0,151,2665},{24,0,1201},{24,0,1201},{24,0,1201},{24,0,1201},{0,149,0},{0,149,0},{0,149,0},{0,73,0},{0,60,442},{0,60,442},{55,255,8658},{36,255,2131},{38,205,2786},{27,188,1837},{36,255,11866},{0,230,3029},{0,191,75},{0,145,4685},{0,172,13066},{0,139,6826},{86,255,4118}, +{58,255,26},{65,201,258},{51,184,469},{186,0,9669},{0,230,3029},{0,191,75},{0,145,4685},{255,61,9669},{0,145,4685},{33,255,1954},{33,255,1954},{33,255,1954},{28,169,1625},{3,255,2219},{0,181,5},{0,181,5},{0,111,433},{0,121,3861},{0,103,1633},{57,254,1},{57,254,1},{57,254,1},{57,155,0},{134,0,2178},{0,181,5},{0,181,5},{0,111,433},{255,9,2178},{0,111,433},{236,0,2665},{61,255,20},{87,196,0}, +{0,191,50},{236,0,2665},{254,112,2665},{0,191,50},{0,159,2665},{254,112,2665},{0,159,2665},{28,0,1625},{28,0,1625},{28,0,1625},{28,0,1625},{0,174,0},{0,174,0},{0,174,0},{0,85,0},{0,69,605},{0,69,605},{61,255,10195},{43,255,3100},{43,216,3523},{31,198,2356},{43,255,12883},{0,245,2885},{0,203,20},{0,157,4450},{0,184,13589},{0,148,6898},{95,255,4346},{70,255,117},{74,210,258},{60,193,469},{199,0,9669}, +{0,245,2885},{0,203,20},{0,157,4450},{254,75,9669},{0,157,4450},{39,255,2734},{39,255,2734},{39,255,2734},{33,180,2178},{12,255,2420},{1,196,2},{1,196,2},{0,123,337},{0,132,4227},{0,112,1777},{67,255,10},{67,255,10},{67,255,10},{66,164,0},{147,0,2178},{4,194,0},{4,194,0},{0,123,337},{254,23,2178},{0,123,337},{250,0,2665},{76,255,73},{96,205,0},{0,203,20},{250,0,2665},{255,125,2665},{0,203,20}, +{0,168,2665},{255,125,2665},{0,168,2665},{33,0,2178},{33,0,2178},{33,0,2178},{33,0,2178},{0,201,0},{0,201,0},{0,201,0},{0,98,0},{0,81,820},{0,81,820},{67,255,11582},{49,255,4083},{48,224,4162},{36,206,2818},{49,255,13898},{1,255,2805},{2,212,20},{0,163,4255},{0,196,13958},{0,157,6886},{104,255,4577},{79,255,259},{82,218,258},{68,201,469},{211,0,9669},{1,255,2805},{4,211,17},{0,163,4254},{255,86,9669}, +{0,163,4254},{46,255,3434},{46,255,3434},{46,255,3434},{37,190,2665},{18,255,2709},{3,210,17},{3,210,17},{0,130,270},{0,141,4491},{0,121,1854},{76,255,29},{76,255,29},{76,255,29},{74,172,0},{159,0,2178},{12,202,0},{12,202,0},{0,130,269},{254,35,2178},{0,130,269},{255,13,2665},{92,255,157},{104,213,0},{0,212,5},{255,13,2665},{254,137,2665},{0,212,5},{0,176,2665},{254,137,2665},{0,176,2665},{37,0,2665}, +{37,0,2665},{37,0,2665},{37,0,2665},{1,223,0},{1,223,0},{1,223,0},{1,110,0},{0,91,953},{0,91,953},{79,255,12086},{58,255,4502},{56,232,4162},{44,214,2818},{61,255,14298},{10,255,2910},{10,220,20},{0,173,4166},{0,208,13470},{0,166,6215},{113,255,4826},{89,255,465},{90,226,258},{78,208,474},{223,0,9669},{10,255,2909},{12,219,17},{0,173,4085},{255,98,9669},{0,173,4085},{55,255,3574},{55,255,3574},{55,255,3574}, +{46,197,2665},{27,255,2840},{11,218,17},{11,218,17},{4,140,258},{0,156,4142},{0,133,1430},{86,255,52},{86,255,52},{86,255,52},{82,180,0},{171,0,2178},{20,210,0},{20,210,0},{0,139,202},{255,46,2178},{0,139,202},{255,37,2665},{107,255,260},{112,221,0},{0,220,1},{255,37,2665},{254,149,2665},{0,220,1},{0,184,2665},{254,149,2665},{0,184,2665},{45,0,2665},{45,0,2665},{45,0,2665},{45,0,2665},{9,231,0}, +{9,231,0},{9,231,0},{9,118,0},{0,103,745},{0,103,745},{86,255,12542},{67,255,4983},{64,240,4162},{52,222,2818},{70,255,14719},{22,255,3118},{18,228,20},{5,181,4162},{0,221,12955},{0,176,5593},{125,255,5090},{101,255,713},{98,234,258},{84,218,481},{235,0,9669},{28,255,3073},{20,227,17},{0,182,3909},{255,110,9669},{0,182,3909},{64,255,3726},{64,255,3726},{64,255,3726},{54,205,2665},{36,255,2989},{19,226,17},{19,226,17}, +{12,148,258},{0,172,3797},{0,142,1062},{95,255,89},{95,255,89},{95,255,89},{90,188,0},{183,0,2178},{28,218,0},{28,218,0},{0,148,145},{255,58,2178},{0,148,145},{255,61,2665},{119,255,388},{120,229,0},{5,229,0},{255,61,2665},{254,161,2665},{5,229,0},{0,192,2665},{254,161,2665},{0,192,2665},{53,0,2665},{53,0,2665},{53,0,2665},{53,0,2665},{17,239,0},{17,239,0},{17,239,0},{17,126,0},{0,118,562}, +{0,118,562},{98,255,13154},{79,255,5593},{73,249,4162},{61,231,2818},{86,255,15194},{34,255,3462},{27,237,20},{14,190,4162},{0,236,12478},{0,188,4983},{135,255,5365},{113,255,1062},{107,243,258},{93,227,481},{248,0,9669},{40,255,3330},{29,236,17},{0,191,3726},{254,124,9669},{0,191,3726},{73,255,3909},{73,255,3909},{73,255,3909},{63,214,2665},{46,255,3156},{28,235,17},{28,235,17},{21,157,258},{0,184,3462},{0,154,713},{107,255,145}, +{107,255,145},{107,255,145},{99,197,0},{196,0,2178},{37,227,0},{37,227,0},{0,160,89},{254,72,2178},{0,160,89},{255,89,2665},{137,255,562},{129,238,0},{14,238,0},{255,89,2665},{254,174,2665},{14,238,0},{0,201,2665},{254,174,2665},{0,201,2665},{62,0,2665},{62,0,2665},{62,0,2665},{62,0,2665},{26,248,0},{26,248,0},{26,248,0},{26,135,0},{0,135,388},{0,135,388},{107,255,13718},{89,255,6215},{82,255,4166}, +{69,239,2818},{92,255,15614},{46,255,3885},{35,245,20},{22,198,4162},{0,248,12086},{0,197,4502},{144,255,5658},{122,255,1430},{115,251,258},{101,235,481},{255,10,9669},{61,255,3601},{37,244,17},{0,200,3574},{255,135,9669},{0,200,3574},{82,255,4085},{82,255,4085},{82,255,4085},{71,222,2665},{58,255,3332},{36,242,17},{36,242,17},{29,165,258},{0,196,3206},{0,163,465},{116,255,202},{116,255,202},{116,255,202},{107,205,0},{208,0,2178}, +{44,235,0},{44,235,0},{0,169,52},{254,84,2178},{0,169,52},{255,113,2665},{152,255,745},{137,246,0},{22,246,0},{255,113,2665},{254,186,2665},{22,246,0},{0,209,2665},{254,186,2665},{0,209,2665},{70,0,2665},{70,0,2665},{70,0,2665},{70,0,2665},{34,253,1},{34,253,1},{34,253,1},{34,143,0},{0,147,260},{0,147,260},{116,255,14302},{98,255,6886},{89,255,4255},{77,247,2818},{104,255,16094},{55,255,4382},{43,253,20}, +{30,206,4162},{0,254,11806},{0,206,4083},{153,255,5969},{134,255,1854},{125,255,270},{109,243,481},{255,34,9669},{76,255,3905},{45,252,17},{0,209,3434},{255,147,9669},{0,209,3434},{89,255,4254},{89,255,4254},{89,255,4254},{79,230,2665},{67,255,3525},{44,250,17},{44,250,17},{37,173,258},{0,211,2979},{0,176,259},{125,255,269},{125,255,269},{125,255,269},{115,213,0},{220,0,2178},{52,243,0},{52,243,0},{0,179,29},{255,95,2178}, +{0,179,29},{255,137,2665},{164,255,953},{145,254,0},{30,254,0},{255,137,2665},{254,198,2665},{30,254,0},{0,217,2665},{254,198,2665},{0,217,2665},{78,0,2665},{78,0,2665},{78,0,2665},{78,0,2665},{43,255,5},{43,255,5},{43,255,5},{42,150,0},{0,162,157},{0,162,157},{122,255,13635},{107,255,6898},{98,255,4450},{86,250,2739},{110,255,15195},{67,255,4071},{52,255,20},{39,212,3523},{1,255,10735},{0,212,3100},{162,255,5381}, +{143,255,1777},{132,255,337},{118,246,331},{255,52,8712},{82,255,3492},{59,254,2},{0,216,2734},{255,156,8712},{0,216,2734},{98,255,4450},{98,255,4450},{98,255,4450},{87,238,2665},{76,255,3736},{52,255,20},{52,255,20},{45,181,258},{0,224,2757},{0,185,117},{132,255,337},{132,255,337},{132,255,337},{123,221,0},{232,0,2178},{60,251,0},{60,251,0},{0,188,10},{255,107,2178},{0,188,10},{255,152,2178},{174,255,820},{156,255,0}, +{52,255,0},{255,152,2178},{255,205,2178},{52,255,0},{0,222,2178},{255,205,2178},{0,222,2178},{86,0,2665},{86,0,2665},{86,0,2665},{86,0,2665},{52,255,20},{52,255,20},{52,255,20},{50,158,0},{0,178,73},{0,178,73},{132,255,12678},{116,255,6826},{110,255,4685},{95,253,2678},{119,255,14061},{76,255,3663},{64,255,75},{49,216,2786},{10,255,9739},{0,219,2131},{172,255,4629},{152,255,1633},{144,255,433},{129,248,179},{255,70,7578}, +{101,255,2949},{73,255,5},{0,222,1954},{255,165,7578},{0,222,1954},{110,255,4685},{110,255,4685},{110,255,4685},{96,247,2665},{89,255,3960},{64,255,75},{64,255,75},{54,190,258},{0,239,2571},{0,197,26},{144,255,433},{144,255,433},{144,255,433},{132,230,0},{245,0,2178},{73,255,5},{73,255,5},{0,197,1},{254,121,2178},{0,197,1},{255,164,1625},{186,255,605},{170,255,0},{79,255,0},{255,164,1625},{255,211,1625},{79,255,0}, +{0,226,1625},{255,211,1625},{0,226,1625},{95,0,2665},{95,0,2665},{95,0,2665},{95,0,2665},{64,255,50},{64,255,50},{64,255,50},{59,167,0},{0,193,20},{0,193,20},{138,255,11970},{125,255,6797},{119,255,4909},{104,255,2665},{129,255,13086},{82,255,3411},{73,255,185},{57,221,2210},{22,255,8987},{0,225,1431},{178,255,4049},{161,255,1537},{153,255,530},{137,250,83},{255,86,6661},{113,255,2525},{86,255,40},{0,225,1382},{255,172,6661}, +{0,225,1382},{119,255,4909},{119,255,4909},{119,255,4909},{104,255,2665},{98,255,4197},{73,255,185},{73,255,185},{62,198,258},{0,251,2435},{1,206,2},{153,255,530},{153,255,530},{153,255,530},{140,238,0},{255,4,2178},{86,255,40},{86,255,40},{4,206,0},{255,132,2178},{4,206,0},{255,177,1201},{195,255,442},{181,255,0},{104,255,0},{255,177,1201},{254,217,1201},{104,255,0},{0,230,1201},{254,217,1201},{0,230,1201},{103,0,2665}, +{103,0,2665},{103,0,2665},{103,0,2665},{73,255,85},{73,255,85},{73,255,85},{67,175,0},{0,208,1},{0,208,1},{147,255,11330},{132,255,6786},{129,255,5126},{113,255,2694},{135,255,12250},{95,255,3225},{86,255,353},{67,225,1710},{37,255,8326},{0,231,915},{184,255,3541},{167,255,1469},{162,255,637},{146,253,24},{255,104,5829},{125,255,2165},{101,255,97},{0,231,914},{255,181,5829},{0,231,914},{129,255,5126},{129,255,5126},{129,255,5126}, +{113,255,2694},{107,255,4452},{86,255,353},{86,255,353},{72,205,259},{1,255,2421},{9,214,2},{162,255,637},{162,255,637},{162,255,637},{148,245,0},{255,28,2178},{101,255,97},{101,255,97},{12,214,0},{255,144,2178},{12,214,0},{255,189,841},{204,255,305},{193,255,0},{128,255,0},{255,189,841},{254,223,841},{128,255,0},{0,234,841},{254,223,841},{0,234,841},{111,0,2665},{111,0,2665},{111,0,2665},{111,0,2665},{82,255,130}, +{82,255,130},{82,255,130},{75,183,0},{6,217,0},{6,217,0},{153,255,10758},{141,255,6807},{135,255,5354},{122,255,2777},{144,255,11503},{104,255,3102},{95,255,569},{75,230,1282},{49,255,7825},{3,237,574},{190,255,3105},{177,255,1427},{172,255,740},{156,254,1},{255,119,5082},{137,255,1869},{116,255,180},{0,237,558},{254,189,5082},{0,237,558},{135,255,5354},{135,255,5354},{135,255,5354},{122,255,2777},{119,255,4724},{95,255,569},{95,255,569}, +{79,213,266},{16,255,2587},{16,222,1},{172,255,740},{172,255,740},{172,255,740},{156,253,0},{255,52,2178},{116,255,180},{116,255,180},{20,222,0},{255,156,2178},{20,222,0},{255,201,545},{216,255,193},{205,255,0},{152,255,0},{255,201,545},{254,229,545},{152,255,0},{0,238,545},{254,229,545},{0,238,545},{119,0,2665},{119,0,2665},{119,0,2665},{119,0,2665},{89,255,180},{89,255,180},{89,255,180},{83,191,0},{15,224,0}, +{15,224,0},{162,255,10197},{150,255,6861},{144,255,5645},{131,255,2933},{153,255,10765},{116,255,3051},{107,255,882},{84,236,891},{64,255,7297},{17,242,300},{199,255,2694},{186,255,1409},{181,255,881},{167,255,20},{255,137,4344},{152,255,1611},{131,255,305},{4,243,288},{254,198,4344},{4,243,288},{144,255,5645},{144,255,5645},{144,255,5645},{131,255,2933},{129,255,5005},{107,255,882},{107,255,882},{88,222,266},{34,255,2824},{25,231,1},{181,255,881}, +{181,255,881},{181,255,881},{167,255,20},{255,79,2178},{131,255,305},{131,255,305},{28,231,0},{253,170,2178},{28,231,0},{255,216,288},{225,255,97},{218,255,0},{180,255,0},{255,216,288},{255,236,288},{180,255,0},{0,243,288},{255,236,288},{0,243,288},{128,0,2665},{128,0,2665},{128,0,2665},{128,0,2665},{101,255,250},{101,255,250},{101,255,250},{92,200,0},{24,233,0},{24,233,0},{172,255,9731},{159,255,6942},{153,255,5913}, +{140,255,3130},{162,255,10204},{125,255,3090},{116,255,1218},{94,240,623},{79,255,6924},{27,246,132},{205,255,2402},{196,255,1419},{190,255,1018},{177,255,97},{255,152,3779},{164,255,1451},{146,255,442},{20,247,128},{255,205,3779},{20,247,128},{153,255,5913},{153,255,5913},{153,255,5913},{140,255,3130},{138,255,5304},{116,255,1218},{116,255,1218},{96,230,266},{49,255,3115},{33,239,1},{190,255,1018},{190,255,1018},{190,255,1018},{177,255,97},{255,104,2178}, +{146,255,442},{146,255,442},{36,239,0},{255,181,2178},{36,239,0},{255,228,128},{234,255,40},{230,255,0},{204,255,0},{255,228,128},{255,242,128},{204,255,0},{0,247,128},{255,242,128},{0,247,128},{136,0,2665},{136,0,2665},{136,0,2665},{136,0,2665},{110,255,325},{110,255,325},{110,255,325},{100,208,0},{32,241,0},{32,241,0},{175,255,9359},{165,255,7050},{162,255,6193},{149,255,3381},{172,255,9691},{134,255,3207},{128,255,1610}, +{102,245,429},{92,255,6719},{38,251,32},{215,255,2156},{205,255,1457},{199,255,1165},{186,255,221},{255,171,3299},{177,255,1398},{161,255,605},{36,251,32},{254,214,3299},{36,251,32},{162,255,6193},{162,255,6193},{162,255,6193},{149,255,3381},{147,255,5621},{128,255,1610},{128,255,1610},{104,238,266},{67,255,3419},{41,247,2},{199,255,1165},{199,255,1165},{199,255,1165},{186,255,221},{255,128,2178},{161,255,605},{161,255,605},{44,247,0},{255,193,2178}, +{44,247,0},{255,240,32},{246,255,8},{242,255,0},{228,255,0},{255,240,32},{255,248,32},{228,255,0},{0,251,32},{255,248,32},{0,251,32},{144,0,2665},{144,0,2665},{144,0,2665},{144,0,2665},{119,255,410},{119,255,410},{119,255,410},{108,216,0},{39,250,0},{39,250,0},{184,255,9067},{175,255,7162},{172,255,6450},{158,255,3686},{175,255,9268},{146,255,3399},{137,255,2054},{110,250,311},{107,255,6532},{49,255,2},{221,255,1992}, +{211,255,1521},{208,255,1322},{195,255,397},{255,186,2904},{189,255,1366},{174,255,820},{52,255,0},{255,221,2904},{52,255,0},{172,255,6450},{172,255,6450},{172,255,6450},{158,255,3686},{159,255,5949},{137,255,2054},{137,255,2054},{112,246,266},{82,255,3765},{49,255,2},{208,255,1322},{208,255,1322},{208,255,1322},{195,255,397},{255,152,2178},{174,255,820},{174,255,820},{52,255,0},{255,205,2178},{52,255,0},{255,252,0},{255,254,0},{254,255,0}, +{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{152,0,2665},{152,0,2665},{152,0,2665},{152,0,2665},{129,255,493},{129,255,493},{129,255,493},{116,224,0},{49,255,2},{49,255,2},{190,255,7987},{181,255,6402},{178,255,5810},{167,255,3509},{184,255,7999},{155,255,3048},{146,255,1922},{121,252,155},{119,255,5562},{64,255,29},{224,255,1476},{218,255,1120},{215,255,965},{204,255,292},{255,195,2166}, +{198,255,1009},{186,255,605},{79,255,0},{254,226,2166},{79,255,0},{178,255,5810},{178,255,5810},{178,255,5810},{167,255,3509},{165,255,5209},{146,255,1922},{146,255,1922},{121,249,133},{95,255,3258},{64,255,29},{215,255,965},{215,255,965},{215,255,965},{204,255,292},{255,164,1625},{186,255,605},{186,255,605},{79,255,0},{255,211,1625},{79,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0}, +{0,255,0},{255,254,0},{0,255,0},{161,0,2665},{161,0,2665},{161,0,2665},{161,0,2665},{138,255,610},{138,255,610},{138,255,610},{125,233,0},{64,255,29},{64,255,29},{196,255,7111},{187,255,5786},{184,255,5294},{174,255,3381},{187,255,6982},{161,255,2780},{155,255,1832},{131,253,61},{128,255,4795},{79,255,80},{230,255,1088},{224,255,824},{221,255,709},{210,255,212},{255,204,1601},{204,255,737},{195,255,442},{104,255,0},{255,230,1601}, +{104,255,0},{184,255,5294},{184,255,5294},{184,255,5294},{174,255,3381},{172,255,4582},{155,255,1832},{155,255,1832},{132,251,53},{107,255,2834},{79,255,80},{221,255,709},{221,255,709},{221,255,709},{210,255,212},{255,177,1201},{195,255,442},{195,255,442},{104,255,0},{254,217,1201},{104,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{169,0,2665}, +{169,0,2665},{169,0,2665},{169,0,2665},{147,255,725},{147,255,725},{147,255,725},{133,241,0},{79,255,80},{79,255,80},{202,255,6315},{193,255,5226},{190,255,4826},{180,255,3253},{193,255,6066},{167,255,2568},{164,255,1770},{141,254,11},{137,255,4122},{92,255,169},{233,255,753},{227,255,574},{227,255,493},{219,255,146},{255,210,1121},{213,255,507},{204,255,305},{128,255,0},{255,233,1121},{128,255,0},{190,255,4826},{190,255,4826},{190,255,4826}, +{180,255,3253},{181,255,4065},{164,255,1770},{164,255,1770},{140,253,9},{119,255,2474},{92,255,169},{227,255,493},{227,255,493},{227,255,493},{219,255,146},{255,189,841},{204,255,305},{204,255,305},{128,255,0},{254,223,841},{128,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{177,0,2665},{177,0,2665},{177,0,2665},{177,0,2665},{156,255,850}, +{156,255,850},{156,255,850},{141,249,0},{92,255,169},{92,255,169},{208,255,5599},{199,255,4722},{199,255,4398},{189,255,3130},{199,255,5254},{177,255,2395},{171,255,1742},{149,255,1},{149,255,3538},{107,255,274},{236,255,484},{233,255,366},{233,255,317},{225,255,90},{255,219,726},{222,255,321},{216,255,193},{152,255,0},{254,238,726},{152,255,0},{199,255,4398},{199,255,4398},{199,255,4398},{189,255,3130},{187,255,3613},{171,255,1742},{171,255,1742}, +{149,255,1},{131,255,2178},{107,255,274},{233,255,317},{233,255,317},{233,255,317},{225,255,90},{255,201,545},{216,255,193},{216,255,193},{152,255,0},{254,229,545},{152,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{185,0,2665},{185,0,2665},{185,0,2665},{185,0,2665},{165,255,985},{165,255,985},{165,255,985},{149,255,1},{107,255,274}, +{107,255,274},{43,255,50657},{1,255,1974},{0,182,128},{0,176,4572},{36,255,59540},{0,242,19268},{0,176,7306},{0,160,23941},{0,187,65535},{0,148,40590},{21,255,10267},{0,248,1412},{0,179,137},{0,148,3929},{115,0,18065},{0,153,12036},{0,139,6077},{0,93,14060},{234,0,18065},{0,93,14060},{0,143,0},{0,143,0},{0,143,0},{0,70,0},{0,70,1105},{0,57,405},{0,57,405},{0,34,653},{0,33,1209},{0,31,756},{0,143,0}, +{0,143,0},{0,143,0},{0,70,0},{35,0,1105},{0,57,405},{0,57,405},{0,34,653},{70,0,1105},{0,34,653},{171,0,9248},{0,248,1412},{0,179,137},{0,148,3929},{171,0,9248},{255,46,9248},{0,148,3929},{0,115,9248},{255,46,9248},{0,115,9248},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{43,255,53600},{4,255,2885},{0,191,38}, +{0,185,3941},{43,255,62123},{0,254,18376},{0,182,6726},{0,169,23131},{0,196,65535},{0,154,40086},{27,255,10859},{0,254,1184},{0,188,45},{0,157,3656},{122,0,19334},{0,162,12449},{0,145,6089},{0,102,14754},{249,0,19334},{0,102,14754},{0,167,0},{0,167,0},{0,167,0},{0,82,0},{0,82,1513},{0,66,562},{0,66,562},{0,37,900},{0,36,1658},{0,37,1044},{0,167,0},{0,167,0},{0,167,0},{0,82,0},{41,0,1513}, +{0,66,562},{0,66,562},{0,37,900},{82,0,1513},{0,37,900},{183,0,9248},{0,254,1184},{0,188,45},{0,157,3656},{183,0,9248},{255,58,9248},{0,157,3656},{0,123,9248},{255,58,9248},{0,123,9248},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{46,255,56765},{7,255,4404},{0,199,3},{0,194,3404},{43,255,64774},{1,255,17986},{0,191,6229}, +{0,176,22449},{0,215,65535},{0,160,39678},{33,255,11587},{1,255,1171},{0,199,4},{0,166,3393},{129,0,20689},{0,172,12834},{0,154,6125},{0,108,15490},{254,5,20689},{0,108,15490},{0,192,0},{0,192,0},{0,192,0},{0,94,0},{0,95,1985},{0,75,745},{0,75,745},{0,44,1202},{0,42,2178},{0,40,1374},{0,192,0},{0,192,0},{0,192,0},{0,94,0},{46,0,1985},{0,75,745},{0,75,745},{0,44,1202},{95,0,1985}, +{0,44,1202},{195,0,9248},{1,255,1170},{0,199,4},{0,166,3393},{195,0,9248},{255,70,9248},{0,166,3393},{0,131,9248},{255,70,9248},{0,131,9248},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{49,255,58131},{7,255,6200},{0,207,6},{0,203,2858},{46,255,65535},{1,255,17772},{0,200,5638},{0,182,21148},{0,215,63337},{0,166,38166},{43,255,12449}, +{1,255,1395},{1,208,6},{0,176,3170},{138,0,22129},{0,184,13298},{0,163,6189},{0,114,16274},{254,14,22129},{0,114,16274},{0,216,0},{0,216,0},{0,216,0},{0,106,0},{0,107,2521},{0,88,928},{0,88,928},{0,50,1530},{0,48,2770},{0,47,1762},{0,216,0},{0,216,0},{0,216,0},{0,106,0},{52,0,2521},{0,88,928},{0,88,928},{0,50,1530},{107,0,2521},{0,50,1530},{207,0,9248},{7,255,1378},{5,207,0}, +{0,176,3170},{207,0,9248},{255,82,9248},{0,176,3170},{0,139,9248},{255,82,9248},{0,139,9248},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{55,255,58853},{10,255,8410},{0,216,47},{0,212,2312},{52,255,65535},{4,255,18024},{0,209,5015},{0,191,19619},{0,224,60697},{0,176,36050},{46,255,13542},{7,255,1877},{4,217,53},{0,185,2897},{147,0,23851}, +{0,196,13856},{0,173,6281},{0,120,17216},{254,23,23851},{0,120,17216},{0,243,0},{0,243,0},{0,243,0},{0,119,0},{0,122,3200},{0,97,1186},{0,97,1186},{0,56,1945},{0,54,3521},{0,53,2243},{0,243,0},{0,243,0},{0,243,0},{0,119,0},{60,0,3200},{0,97,1186},{0,97,1186},{0,56,1945},{122,0,3200},{0,56,1945},{220,0,9248},{22,255,1693},{14,216,0},{0,185,2897},{220,0,9248},{255,95,9248},{0,185,2897}, +{0,148,9248},{255,95,9248},{0,148,9248},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{58,255,59395},{13,255,10486},{0,225,107},{0,219,1933},{55,255,65535},{7,255,18658},{0,215,4619},{0,200,18436},{0,233,58566},{0,182,34489},{52,255,14690},{13,255,2489},{6,228,133},{0,194,2664},{155,0,25472},{0,208,14384},{0,182,6401},{0,126,18104},{255,30,25472}, +{0,126,18104},{0,255,16},{0,255,16},{0,255,16},{0,131,0},{0,134,3872},{0,106,1445},{0,106,1445},{0,62,2357},{0,60,4265},{0,56,2717},{2,254,13},{2,254,13},{2,254,13},{0,131,0},{66,0,3872},{0,106,1445},{0,106,1445},{0,62,2357},{134,0,3872},{0,62,2357},{232,0,9248},{37,255,2000},{22,224,0},{0,194,2664},{232,0,9248},{255,107,9248},{0,194,2664},{0,156,9248},{255,107,9248},{0,156,9248},{0,0,0}, +{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{61,255,59976},{16,255,12621},{2,233,174},{0,228,1613},{58,255,65535},{10,255,19459},{0,224,4093},{0,206,17181},{0,242,56514},{0,188,32973},{58,255,15746},{19,255,3205},{10,235,217},{0,206,2444},{164,0,26744},{0,218,14587},{0,191,6305},{0,136,18737},{255,39,26744},{0,136,18737},{3,255,110},{3,255,110},{3,255,110}, +{1,142,2},{0,149,4418},{0,118,1585},{0,118,1585},{0,68,2633},{0,66,4909},{0,62,3077},{6,255,80},{6,255,80},{6,255,80},{2,142,0},{73,0,4418},{0,118,1585},{0,118,1585},{0,68,2633},{149,0,4418},{0,68,2633},{244,0,9248},{49,255,2377},{30,232,0},{0,206,2440},{244,0,9248},{255,119,9248},{0,206,2440},{0,164,9248},{255,119,9248},{0,164,9248},{1,0,2},{1,0,2},{1,0,2},{1,0,2},{0,7,0}, +{0,7,0},{0,7,0},{0,3,0},{0,3,0},{0,3,0},{67,255,60796},{19,255,14846},{3,240,286},{0,234,1405},{64,255,65535},{13,255,20226},{0,233,3321},{0,212,15538},{0,254,54317},{0,197,31240},{67,255,16195},{31,255,3805},{18,243,217},{0,212,2328},{175,0,26744},{0,230,14051},{0,200,5525},{0,142,18273},{254,51,26744},{0,142,18273},{9,255,278},{9,255,278},{9,255,278},{5,152,50},{0,174,4418},{0,129,1313},{0,129,1313}, +{0,77,2410},{0,75,5092},{0,71,2987},{15,255,125},{15,255,125},{15,255,125},{10,150,0},{85,0,4418},{0,129,1313},{0,129,1313},{0,77,2410},{174,0,4418},{0,77,2410},{255,1,9248},{64,255,2738},{37,240,0},{0,212,2228},{255,1,9248},{254,131,9248},{0,212,2228},{0,172,9248},{254,131,9248},{0,172,9248},{5,0,50},{5,0,50},{5,0,50},{5,0,50},{0,31,0},{0,31,0},{0,31,0},{0,15,0},{0,12,13}, +{0,12,13},{70,255,61549},{25,255,17357},{4,250,459},{1,244,1256},{70,255,65535},{16,255,21346},{0,242,2603},{0,225,13862},{0,254,52221},{0,206,29474},{79,255,16720},{43,255,4545},{27,252,217},{7,223,2341},{189,0,26744},{0,245,13481},{0,212,4710},{0,154,17762},{255,64,26744},{0,154,17762},{15,255,602},{15,255,602},{15,255,602},{10,163,181},{0,201,4418},{0,147,1037},{0,147,1037},{0,83,2196},{0,88,5300},{0,80,2921},{27,255,185}, +{27,255,185},{27,255,185},{19,159,0},{98,0,4418},{0,147,1037},{0,147,1037},{0,83,2196},{201,0,4418},{0,83,2196},{255,28,9248},{82,255,3176},{46,249,0},{0,225,2020},{255,28,9248},{255,144,9248},{0,225,2020},{0,181,9248},{255,144,9248},{0,181,9248},{9,0,181},{9,0,181},{9,0,181},{9,0,181},{0,58,0},{0,58,0},{0,58,0},{0,29,0},{0,24,58},{0,24,58},{73,255,62082},{28,255,19099},{6,255,688}, +{2,251,1170},{73,255,65535},{19,255,22086},{0,248,2054},{0,231,12530},{0,254,51038},{0,212,28165},{86,255,16691},{52,255,4985},{36,255,250},{14,230,2238},{199,0,26259},{0,254,12692},{0,221,3965},{0,160,16946},{254,75,26259},{0,160,16946},{21,255,1006},{21,255,1006},{21,255,1006},{14,173,365},{0,225,4418},{0,162,820},{0,162,820},{0,96,1994},{0,100,5540},{0,90,2891},{36,255,250},{36,255,250},{36,255,250},{27,167,0},{110,0,4418}, +{0,162,820},{0,162,820},{0,96,1994},{225,0,4418},{0,96,1994},{255,49,8978},{95,255,3433},{55,255,0},{0,231,1732},{255,49,8978},{254,155,8978},{0,231,1732},{0,188,8978},{254,155,8978},{0,188,8978},{13,0,365},{13,0,365},{13,0,365},{13,0,365},{0,82,0},{0,82,0},{0,82,0},{0,41,0},{0,33,125},{0,33,125},{76,255,62399},{28,255,19454},{6,255,1075},{2,251,1090},{73,255,65535},{19,255,21590},{0,250,1503}, +{0,231,11251},{0,254,50424},{0,212,27242},{95,255,15410},{58,255,4661},{46,255,317},{24,231,1853},{208,0,24371},{0,254,11124},{0,227,2881},{0,166,15066},{254,84,24371},{0,166,15066},{27,255,1522},{27,255,1522},{27,255,1522},{17,183,613},{0,249,4418},{0,175,605},{0,175,605},{0,105,1801},{0,109,5805},{0,97,2885},{46,255,317},{46,255,317},{46,255,317},{35,175,0},{122,0,4418},{0,175,605},{0,175,605},{0,105,1801},{249,0,4418}, +{0,105,1801},{255,61,7938},{104,255,3026},{67,255,0},{0,237,1224},{255,61,7938},{254,161,7938},{0,237,1224},{0,192,7938},{254,161,7938},{0,192,7938},{17,0,613},{17,0,613},{17,0,613},{17,0,613},{0,107,0},{0,107,0},{0,107,0},{0,52,0},{0,42,218},{0,42,218},{76,255,62711},{28,255,19886},{9,255,1549},{3,252,1093},{73,255,65535},{19,255,21152},{0,251,1032},{0,234,10008},{0,254,49821},{0,212,26360},{101,255,14198}, +{67,255,4361},{55,255,400},{33,234,1502},{215,0,22568},{1,255,9795},{0,230,1973},{0,169,13320},{254,91,22568},{0,169,13320},{33,255,2150},{33,255,2150},{33,255,2150},{21,193,925},{3,255,4459},{0,190,442},{0,190,442},{0,114,1618},{0,121,6101},{0,106,2901},{55,255,400},{55,255,400},{55,255,400},{43,183,0},{134,0,4418},{0,190,442},{0,190,442},{0,114,1618},{255,9,4418},{0,114,1618},{255,73,6962},{113,255,2645},{79,255,0}, +{0,240,801},{255,73,6962},{254,167,6962},{0,240,801},{0,196,6962},{254,167,6962},{0,196,6962},{21,0,925},{21,0,925},{21,0,925},{21,0,925},{0,131,0},{0,131,0},{0,131,0},{0,64,0},{0,51,337},{0,51,337},{76,255,63078},{31,255,20439},{9,255,2192},{4,253,1202},{76,255,65535},{19,255,20732},{0,251,606},{0,234,8676},{0,254,49164},{0,212,25424},{110,255,12917},{79,255,4059},{64,255,505},{43,234,1147},{224,0,20642}, +{1,255,8589},{0,236,1155},{0,179,11489},{254,100,20642},{0,179,11489},{39,255,2986},{39,255,2986},{39,255,2986},{26,204,1352},{12,255,4660},{0,205,289},{0,205,289},{0,126,1424},{0,132,6467},{0,115,2955},{64,255,505},{64,255,505},{64,255,505},{52,192,0},{147,0,4418},{0,205,289},{0,205,289},{0,126,1424},{254,23,4418},{0,126,1424},{255,86,5941},{125,255,2248},{92,255,0},{0,243,433},{255,86,5941},{255,172,5941},{0,243,433}, +{0,200,5941},{255,172,5941},{0,200,5941},{26,0,1352},{26,0,1352},{26,0,1352},{26,0,1352},{0,158,0},{0,158,0},{0,158,0},{0,78,0},{0,63,500},{0,63,500},{79,255,63411},{31,255,21008},{12,255,2858},{7,253,1393},{76,255,65535},{22,255,20416},{0,253,320},{0,237,7549},{0,254,48613},{0,218,24643},{116,255,11849},{82,255,3845},{73,255,610},{53,238,867},{232,0,19021},{10,255,7741},{0,242,611},{0,182,9957},{255,107,19021}, +{0,182,9957},{43,255,3819},{43,255,3819},{43,255,3819},{30,214,1800},{15,255,4981},{0,221,169},{0,221,169},{0,133,1282},{0,141,6822},{0,124,3029},{73,255,610},{73,255,610},{73,255,610},{60,200,0},{159,0,4418},{0,221,169},{0,221,169},{0,133,1282},{254,35,4418},{0,133,1282},{255,98,5101},{134,255,1921},{104,255,0},{0,246,202},{255,98,5101},{255,178,5101},{0,246,202},{0,204,5101},{255,178,5101},{0,204,5101},{30,0,1800}, +{30,0,1800},{30,0,1800},{30,0,1800},{0,183,0},{0,183,0},{0,183,0},{0,89,0},{0,72,673},{0,72,673},{86,255,63733},{34,255,21637},{12,255,3614},{8,254,1668},{76,255,65535},{22,255,20164},{0,254,123},{0,237,6489},{0,254,48082},{0,218,23857},{122,255,10853},{95,255,3629},{86,255,724},{62,241,632},{241,0,17485},{22,255,6965},{0,248,243},{0,188,8529},{255,116,17485},{0,188,8529},{49,255,4787},{49,255,4787},{49,255,4787}, +{34,224,2312},{21,255,5437},{0,233,89},{0,233,89},{0,142,1129},{0,153,7206},{0,133,3131},{86,255,724},{86,255,724},{86,255,724},{68,207,0},{171,0,4418},{0,233,89},{0,233,89},{0,142,1129},{255,46,4418},{0,142,1129},{255,110,4325},{143,255,1620},{116,255,0},{0,252,58},{255,110,4325},{255,184,4325},{0,252,58},{0,208,4325},{255,184,4325},{0,208,4325},{34,0,2312},{34,0,2312},{34,0,2312},{34,0,2312},{0,207,0}, +{0,207,0},{0,207,0},{0,101,0},{0,88,865},{0,88,865},{86,255,63992},{34,255,22322},{15,255,4457},{8,255,2033},{76,255,65535},{22,255,19980},{0,254,23},{0,240,5494},{0,254,47573},{0,218,23129},{129,255,9866},{104,255,3441},{92,255,832},{71,244,435},{248,0,16034},{34,255,6253},{0,254,51},{0,194,7213},{254,124,16034},{0,194,7213},{55,255,5867},{55,255,5867},{55,255,5867},{38,234,2888},{27,255,6029},{0,248,34},{0,248,34}, +{0,154,985},{0,162,7619},{0,142,3261},{92,255,832},{92,255,832},{92,255,832},{76,215,0},{183,0,4418},{0,248,34},{0,248,34},{0,154,985},{255,58,4418},{0,154,985},{255,122,3613},{152,255,1345},{128,255,0},{0,255,1},{255,122,3613},{255,190,3613},{0,255,1},{0,212,3613},{255,190,3613},{0,212,3613},{38,0,2888},{38,0,2888},{38,0,2888},{38,0,2888},{0,231,0},{0,231,0},{0,231,0},{0,113,0},{0,91,1066}, +{0,91,1066},{86,255,64310},{37,255,23174},{15,255,5504},{10,255,2546},{79,255,65535},{25,255,19854},{1,255,23},{0,240,4466},{0,254,47048},{0,221,22366},{138,255,8897},{113,255,3261},{101,255,985},{80,247,258},{255,4,14504},{40,255,5561},{7,255,34},{0,200,5867},{255,132,14504},{0,200,5867},{61,255,7213},{61,255,7213},{61,255,7213},{42,245,3613},{33,255,6859},{1,255,51},{1,255,51},{0,163,832},{0,175,8059},{0,151,3441},{101,255,985}, +{101,255,985},{101,255,985},{85,224,0},{196,0,4418},{7,255,34},{7,255,34},{0,163,832},{254,72,4418},{0,163,832},{255,137,2888},{164,255,1066},{141,255,0},{22,255,0},{255,137,2888},{254,198,2888},{22,255,0},{0,217,2888},{254,198,2888},{0,217,2888},{42,0,3613},{42,0,3613},{42,0,3613},{42,0,3613},{0,255,1},{0,255,1},{0,255,1},{0,127,0},{0,103,1345},{0,103,1345},{86,255,64605},{37,255,23983},{18,255,6523}, +{10,255,3097},{86,255,65535},{25,255,19808},{1,255,124},{0,243,3633},{0,254,46617},{0,221,21751},{144,255,8113},{122,255,3131},{113,255,1129},{90,248,139},{255,19,13235},{58,255,5012},{22,255,89},{0,206,4787},{254,140,13235},{0,206,4787},{67,255,8529},{67,255,8529},{67,255,8529},{46,255,4325},{43,255,7725},{7,255,243},{7,255,243},{0,169,724},{0,184,8530},{0,160,3629},{113,255,1129},{113,255,1129},{113,255,1129},{93,232,0},{208,0,4418}, +{22,255,89},{22,255,89},{0,169,724},{254,84,4418},{0,169,724},{255,149,2312},{167,255,865},{153,255,0},{46,255,0},{255,149,2312},{254,204,2312},{46,255,0},{0,221,2312},{254,204,2312},{0,221,2312},{46,0,4325},{46,0,4325},{46,0,4325},{46,0,4325},{3,255,58},{3,255,58},{3,255,58},{0,138,0},{0,112,1620},{0,112,1620},{86,255,64960},{37,255,24878},{18,255,7621},{13,255,3738},{86,255,65535},{25,255,19851},{2,255,323}, +{0,243,2885},{0,254,46257},{0,224,21209},{153,255,7392},{131,255,3029},{122,255,1282},{99,251,56},{255,37,12051},{70,255,4500},{34,255,169},{0,212,3819},{254,149,12051},{0,212,3819},{70,255,9957},{70,255,9957},{70,255,9957},{52,255,5141},{46,255,8712},{13,255,611},{13,255,611},{0,182,610},{0,196,9026},{0,169,3845},{122,255,1282},{122,255,1282},{122,255,1282},{101,240,0},{220,0,4418},{34,255,169},{34,255,169},{0,182,610},{255,95,4418}, +{0,182,610},{255,161,1800},{183,255,673},{165,255,0},{70,255,0},{255,161,1800},{254,210,1800},{70,255,0},{0,225,1800},{254,210,1800},{0,225,1800},{50,0,5101},{50,0,5101},{50,0,5101},{50,0,5101},{9,255,202},{9,255,202},{9,255,202},{0,150,0},{0,121,1921},{0,121,1921},{86,255,65314},{43,255,25774},{21,255,8796},{13,255,4456},{86,255,65535},{25,255,19965},{4,255,614},{0,246,2222},{0,254,45929},{0,224,20720},{159,255,6740}, +{140,255,2955},{129,255,1424},{108,254,11},{255,52,10952},{82,255,4052},{49,255,289},{0,216,2986},{255,156,10952},{0,216,2986},{76,255,11489},{76,255,11489},{76,255,11489},{58,255,6109},{52,255,9860},{19,255,1155},{19,255,1155},{0,191,505},{0,208,9554},{0,176,4059},{129,255,1424},{129,255,1424},{129,255,1424},{109,248,0},{232,0,4418},{49,255,289},{49,255,289},{0,191,505},{255,107,4418},{0,191,505},{255,174,1352},{192,255,500},{177,255,0}, +{95,255,0},{255,174,1352},{255,215,1352},{95,255,0},{0,229,1352},{255,215,1352},{0,229,1352},{54,0,5941},{54,0,5941},{54,0,5941},{54,0,5941},{12,255,433},{12,255,433},{12,255,433},{0,162,0},{0,129,2248},{0,129,2248},{86,255,65535},{43,255,26766},{21,255,10162},{13,255,5358},{86,255,65359},{28,255,20101},{4,255,1047},{0,246,1573},{0,254,45474},{0,227,20192},{172,255,6085},{149,255,2901},{141,255,1618},{119,255,2},{255,70,9818}, +{95,255,3685},{64,255,442},{0,222,2150},{255,165,9818},{0,222,2150},{86,255,13320},{86,255,13320},{86,255,13320},{61,255,7370},{58,255,11310},{22,255,1973},{22,255,1973},{0,200,400},{0,218,10107},{0,188,4361},{141,255,1618},{141,255,1618},{141,255,1618},{119,255,2},{245,0,4418},{64,255,442},{64,255,442},{0,200,400},{254,121,4418},{0,200,400},{255,186,925},{204,255,337},{190,255,0},{122,255,0},{255,186,925},{255,221,925},{122,255,0}, +{0,233,925},{255,221,925},{0,233,925},{59,0,6962},{59,0,6962},{59,0,6962},{59,0,6962},{15,255,801},{15,255,801},{15,255,801},{0,175,0},{0,141,2645},{0,141,2645},{86,255,65535},{43,255,27616},{24,255,11405},{16,255,6203},{86,255,65014},{28,255,20233},{5,255,1524},{0,249,1090},{1,255,44974},{0,227,19721},{175,255,5534},{156,255,2885},{150,255,1801},{128,255,40},{255,86,8901},{107,255,3373},{79,255,605},{0,228,1522},{255,172,8901}, +{0,228,1522},{89,255,15066},{89,255,15066},{89,255,15066},{67,255,8646},{64,255,12746},{28,255,2881},{28,255,2881},{0,209,317},{0,230,10691},{0,194,4661},{150,255,1801},{150,255,1801},{150,255,1801},{128,255,40},{255,4,4418},{79,255,605},{79,255,605},{0,209,317},{255,132,4418},{0,209,317},{255,198,613},{213,255,218},{202,255,0},{146,255,0},{255,198,613},{255,227,613},{146,255,0},{0,237,613},{255,227,613},{0,237,613},{63,0,7938}, +{63,0,7938},{63,0,7938},{63,0,7938},{18,255,1224},{18,255,1224},{18,255,1224},{0,187,0},{0,150,3026},{0,150,3026},{89,255,65535},{43,255,28505},{24,255,12681},{16,255,7117},{86,255,64678},{28,255,20430},{7,255,2079},{0,249,697},{1,255,44506},{0,227,19330},{181,255,5094},{165,255,2891},{159,255,1994},{137,255,130},{255,104,8069},{119,255,3125},{92,255,820},{0,234,1006},{255,181,8069},{0,234,1006},{95,255,16946},{95,255,16946},{95,255,16946}, +{73,255,10074},{67,255,14315},{34,255,3965},{34,255,3965},{0,219,250},{0,239,11302},{0,203,4985},{159,255,1994},{159,255,1994},{159,255,1994},{137,255,130},{255,28,4418},{92,255,820},{92,255,820},{0,219,250},{255,144,4418},{0,219,250},{255,210,365},{222,255,125},{214,255,0},{171,255,0},{255,210,365},{255,233,365},{171,255,0},{0,241,365},{255,233,365},{0,241,365},{67,0,8978},{67,0,8978},{67,0,8978},{67,0,8978},{24,255,1732}, +{24,255,1732},{24,255,1732},{0,199,0},{0,159,3433},{0,159,3433},{92,255,65535},{49,255,29231},{30,255,13748},{22,255,7832},{89,255,64474},{34,255,20616},{13,255,2581},{5,251,455},{1,255,43254},{0,230,17214},{187,255,4726},{175,255,2921},{172,255,2196},{146,255,272},{255,119,7322},{131,255,2941},{107,255,1037},{0,240,602},{254,189,7322},{0,240,602},{101,255,17762},{101,255,17762},{101,255,17762},{79,255,10742},{76,255,15150},{43,255,4710},{43,255,4710}, +{3,228,217},{0,251,10994},{0,212,4545},{172,255,2196},{172,255,2196},{172,255,2196},{146,255,272},{255,52,4418},{107,255,1037},{107,255,1037},{0,228,185},{255,156,4418},{0,228,185},{255,222,181},{231,255,58},{226,255,0},{195,255,0},{255,222,181},{255,239,181},{195,255,0},{0,245,181},{255,239,181},{0,245,181},{74,0,9248},{74,0,9248},{74,0,9248},{74,0,9248},{30,255,2020},{30,255,2020},{30,255,2020},{6,208,0},{0,172,3176}, +{0,172,3176},{98,255,65535},{58,255,30030},{43,255,14936},{31,255,8624},{95,255,64295},{43,255,20818},{22,255,3192},{15,252,275},{1,255,41806},{0,236,14271},{196,255,4387},{184,255,2987},{178,255,2410},{158,255,490},{255,137,6584},{146,255,2811},{125,255,1313},{0,246,278},{254,198,6584},{0,246,278},{113,255,18273},{113,255,18273},{113,255,18273},{89,255,11256},{89,255,15726},{55,255,5525},{55,255,5525},{12,237,217},{0,254,10500},{0,224,3805},{178,255,2410}, +{178,255,2410},{178,255,2410},{158,255,490},{255,79,4418},{125,255,1313},{125,255,1313},{0,240,125},{253,170,4418},{0,240,125},{255,237,50},{243,255,13},{239,255,0},{222,255,0},{255,237,50},{254,247,50},{222,255,0},{0,250,50},{254,247,50},{0,250,50},{83,0,9248},{83,0,9248},{83,0,9248},{83,0,9248},{43,255,2228},{43,255,2228},{43,255,2228},{15,217,0},{0,190,2738},{0,190,2738},{104,255,65535},{67,255,30820},{49,255,16060}, +{40,255,9410},{101,255,64140},{52,255,21086},{31,255,3826},{22,253,162},{1,255,40863},{0,239,11797},{202,255,4163},{193,255,3077},{187,255,2633},{167,255,740},{255,152,6019},{158,255,2763},{137,255,1585},{0,252,110},{255,205,6019},{0,252,110},{119,255,18737},{119,255,18737},{119,255,18737},{98,255,11747},{98,255,16315},{64,255,6305},{64,255,6305},{20,245,217},{1,255,10451},{0,236,3205},{187,255,2633},{187,255,2633},{187,255,2633},{167,255,740},{255,104,4418}, +{137,255,1585},{137,255,1585},{0,249,80},{255,181,4418},{0,249,80},{255,249,2},{252,255,0},{251,255,0},{246,255,0},{255,249,2},{254,253,2},{246,255,0},{0,254,2},{254,253,2},{0,254,2},{91,0,9248},{91,0,9248},{91,0,9248},{91,0,9248},{49,255,2440},{49,255,2440},{49,255,2440},{23,225,0},{0,205,2377},{0,205,2377},{110,255,65535},{73,255,31223},{55,255,16690},{49,255,9985},{107,255,63957},{58,255,21217},{38,255,4182}, +{30,255,97},{1,255,40169},{0,242,9493},{208,255,3603},{199,255,2717},{193,255,2357},{174,255,725},{255,164,5163},{167,255,2409},{149,255,1445},{0,255,16},{255,211,5163},{0,255,16},{129,255,18104},{129,255,18104},{129,255,18104},{107,255,11680},{107,255,15698},{73,255,6401},{73,255,6401},{27,249,133},{1,255,9723},{0,242,2489},{193,255,2357},{193,255,2357},{193,255,2357},{174,255,725},{255,119,3872},{149,255,1445},{149,255,1445},{0,252,13},{254,189,3872}, +{0,252,13},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{99,0,9248},{99,0,9248},{99,0,9248},{99,0,9248},{61,255,2664},{61,255,2664},{61,255,2664},{31,233,0},{0,218,2000},{0,218,2000},{116,255,65535},{79,255,31523},{64,255,17156},{55,255,10522},{113,255,63756},{67,255,21315},{46,255,4385},{38,254,41},{4,255,39799},{0,245,7354},{215,255,2952}, +{202,255,2243},{199,255,1945},{183,255,593},{255,171,4267},{167,255,2009},{158,255,1186},{10,255,0},{254,214,4267},{10,255,0},{135,255,17216},{135,255,17216},{135,255,17216},{116,255,11435},{113,255,14726},{82,255,6281},{82,255,6281},{38,251,53},{1,255,8923},{0,248,1877},{199,255,1945},{199,255,1945},{199,255,1945},{183,255,593},{255,131,3200},{158,255,1186},{158,255,1186},{10,255,0},{254,195,3200},{10,255,0},{255,252,0},{255,254,0},{254,255,0}, +{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{107,0,9248},{107,0,9248},{107,0,9248},{107,0,9248},{70,255,2897},{70,255,2897},{70,255,2897},{39,241,0},{0,233,1693},{0,233,1693},{122,255,65535},{89,255,32024},{73,255,17745},{64,255,11181},{119,255,63505},{73,255,21450},{55,255,4731},{48,255,5},{13,255,39517},{0,248,5202},{218,255,2308},{208,255,1762},{205,255,1530},{192,255,464},{255,180,3361}, +{183,255,1587},{167,255,928},{37,255,0},{255,218,3361},{37,255,0},{141,255,16274},{141,255,16274},{141,255,16274},{122,255,11169},{122,255,13721},{92,255,6189},{92,255,6189},{47,254,6},{13,255,8241},{0,254,1395},{205,255,1530},{205,255,1530},{205,255,1530},{192,255,464},{255,143,2521},{167,255,928},{167,255,928},{37,255,0},{254,201,2521},{37,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0}, +{0,255,0},{255,254,0},{0,255,0},{116,0,9248},{116,0,9248},{116,0,9248},{116,0,9248},{79,255,3170},{79,255,3170},{79,255,3170},{48,250,0},{0,248,1378},{0,248,1378},{129,255,65535},{95,255,32533},{79,255,18407},{73,255,11877},{129,255,63435},{82,255,21690},{64,255,5107},{56,255,2},{25,255,39436},{0,248,3611},{221,255,1809},{215,255,1374},{211,255,1202},{198,255,360},{255,189,2646},{192,255,1241},{180,255,745},{61,255,0},{254,223,2646}, +{61,255,0},{147,255,15490},{147,255,15490},{147,255,15490},{131,255,10946},{129,255,12826},{101,255,6125},{101,255,6125},{56,255,4},{25,255,7705},{0,254,1171},{211,255,1202},{211,255,1202},{211,255,1202},{198,255,360},{255,155,1985},{180,255,745},{180,255,745},{61,255,0},{254,207,1985},{61,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{124,0,9248}, +{124,0,9248},{124,0,9248},{124,0,9248},{89,255,3393},{89,255,3393},{89,255,3393},{56,255,4},{0,254,1170},{0,254,1170},{132,255,65535},{101,255,32957},{86,255,19017},{82,255,12610},{129,255,62977},{89,255,22061},{73,255,5530},{64,255,31},{34,255,39178},{0,251,2372},{224,255,1376},{218,255,1044},{218,255,900},{204,255,272},{255,198,2017},{198,255,937},{189,255,562},{86,255,0},{255,227,2017},{86,255,0},{153,255,14754},{153,255,14754},{153,255,14754}, +{137,255,10742},{135,255,12066},{107,255,6089},{107,255,6089},{67,255,45},{37,255,7233},{1,255,1184},{218,255,900},{218,255,900},{218,255,900},{204,255,272},{255,167,1513},{189,255,562},{189,255,562},{86,255,0},{253,213,1513},{86,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{132,0,9248},{132,0,9248},{132,0,9248},{132,0,9248},{98,255,3656}, +{98,255,3656},{98,255,3656},{67,255,45},{1,255,1184},{1,255,1184},{138,255,65535},{107,255,33448},{95,255,19729},{89,255,13446},{135,255,62717},{95,255,22307},{79,255,6021},{73,255,105},{40,255,38959},{0,254,1627},{230,255,996},{224,255,756},{221,255,653},{213,255,194},{255,204,1473},{207,255,675},{198,255,405},{110,255,0},{255,230,1473},{110,255,0},{162,255,14060},{162,255,14060},{162,255,14060},{146,255,10545},{141,255,11378},{116,255,6077},{116,255,6077}, +{76,255,137},{40,255,6873},{7,255,1412},{221,255,653},{221,255,653},{221,255,653},{213,255,194},{255,180,1105},{198,255,405},{198,255,405},{110,255,0},{255,218,1105},{110,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{140,0,9248},{140,0,9248},{140,0,9248},{140,0,9248},{107,255,3929},{107,255,3929},{107,255,3929},{76,255,137},{7,255,1412}, +{7,255,1412}, diff --git a/thirdparty/basisu/transcoder/basisu_transcoder_tables_atc_55.inc b/thirdparty/basisu/transcoder/basisu_transcoder_tables_atc_55.inc new file mode 100644 index 000000000..7acedd6a6 --- /dev/null +++ b/thirdparty/basisu/transcoder/basisu_transcoder_tables_atc_55.inc @@ -0,0 +1,481 @@ +{0,2,20},{0,1,10},{0,1,1},{0,1,9},{0,1,35},{0,1,27},{0,1,18},{0,1,61},{0,1,52},{0,0,68},{0,2,20},{0,1,10},{0,1,1},{0,1,9},{0,1,35},{0,1,27},{0,1,18},{0,1,61},{1,0,35},{0,1,61},{0,1,1},{0,1,1},{0,1,1},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,1,1}, +{0,1,1},{0,1,1},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{1,0,18},{0,1,10},{0,1,1},{0,1,9},{1,0,18},{0,1,18},{0,1,9},{0,1,36},{0,1,18},{0,1,36},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,4,56},{0,3,38},{0,2,52}, +{0,2,36},{0,4,56},{0,3,35},{0,2,0},{0,2,52},{0,2,88},{0,1,78},{1,3,24},{1,2,14},{1,2,5},{1,2,13},{1,2,51},{0,3,35},{0,2,0},{0,2,52},{2,1,51},{0,2,52},{0,3,37},{0,3,37},{0,3,37},{0,2,36},{0,3,10},{0,2,0},{0,2,0},{0,1,5},{0,1,35},{0,1,14},{1,2,5},{1,2,5},{1,2,5},{1,1,8},{1,1,8}, +{0,2,0},{0,2,0},{0,1,5},{1,1,8},{0,1,5},{2,1,18},{0,3,2},{1,2,1},{0,2,0},{2,1,18},{1,2,18},{0,2,0},{0,2,36},{1,2,18},{0,2,36},{0,0,36},{0,0,36},{0,0,36},{0,0,36},{0,2,0},{0,2,0},{0,2,0},{0,1,1},{0,1,10},{0,1,10},{1,5,56},{1,4,38},{1,3,52},{1,3,36},{1,5,56},{1,4,35},{1,3,0}, +{1,3,52},{0,4,72},{0,3,38},{2,4,24},{2,3,14},{2,3,5},{2,3,13},{2,3,51},{0,4,24},{1,3,0},{0,3,37},{3,2,51},{0,3,37},{1,4,37},{1,4,37},{1,4,37},{1,3,36},{1,4,10},{1,3,0},{1,3,0},{1,2,5},{0,3,11},{1,2,14},{2,3,5},{2,3,5},{2,3,5},{2,2,8},{2,2,8},{1,3,0},{1,3,0},{1,2,5},{2,2,8}, +{1,2,5},{3,2,18},{1,4,2},{2,3,1},{1,3,0},{3,2,18},{7,0,18},{1,3,0},{0,3,36},{7,0,18},{0,3,36},{1,0,36},{1,0,36},{1,0,36},{1,0,36},{1,3,0},{1,3,0},{1,3,0},{1,2,1},{0,3,2},{0,3,2},{2,6,56},{2,5,38},{2,4,53},{2,4,37},{2,6,56},{2,5,35},{2,4,1},{2,4,66},{0,5,60},{1,4,52},{3,5,24}, +{3,4,14},{3,4,6},{3,4,14},{3,4,51},{1,5,24},{2,4,1},{1,4,51},{9,0,51},{1,4,51},{2,5,37},{2,5,37},{2,5,37},{2,4,36},{2,5,10},{2,4,0},{2,4,0},{2,3,5},{1,4,11},{2,3,14},{3,4,5},{3,4,5},{3,4,5},{3,3,8},{3,3,8},{2,4,0},{2,4,0},{2,3,5},{8,0,8},{2,3,5},{4,3,18},{2,5,2},{3,4,2}, +{2,4,1},{4,3,18},{8,1,18},{2,4,1},{0,4,50},{8,1,18},{0,4,50},{2,0,36},{2,0,36},{2,0,36},{2,0,36},{2,4,0},{2,4,0},{2,4,0},{2,3,1},{1,4,2},{1,4,2},{3,8,70},{3,6,58},{4,5,69},{3,5,51},{3,7,52},{3,6,25},{3,5,3},{3,5,46},{2,6,60},{2,5,36},{4,6,24},{4,5,14},{4,5,5},{4,5,13},{6,2,51}, +{3,6,24},{3,5,2},{2,5,36},{7,3,51},{2,5,36},{3,7,51},{3,7,51},{3,7,51},{3,5,51},{3,6,9},{3,5,3},{3,5,3},{3,4,9},{1,6,12},{3,4,12},{4,5,5},{4,5,5},{4,5,5},{4,4,8},{6,1,8},{3,5,2},{3,5,2},{3,4,8},{11,0,8},{3,4,8},{7,1,18},{3,6,8},{4,5,1},{3,5,1},{7,1,18},{11,1,18},{3,5,1}, +{0,5,36},{11,1,18},{0,5,36},{3,0,50},{3,0,50},{3,0,50},{3,0,50},{3,5,2},{3,5,2},{3,5,2},{3,4,5},{2,5,0},{2,5,0},{4,8,56},{4,7,38},{4,6,52},{4,6,36},{4,8,56},{4,7,35},{4,6,0},{4,6,52},{3,7,60},{3,6,36},{5,7,24},{5,6,14},{5,6,5},{5,6,13},{8,1,51},{3,7,35},{4,6,0},{3,6,36},{6,5,51}, +{3,6,36},{4,7,37},{4,7,37},{4,7,37},{4,6,36},{4,7,10},{4,6,0},{4,6,0},{4,5,5},{2,7,12},{4,5,14},{5,6,5},{5,6,5},{5,6,5},{5,5,8},{8,0,8},{4,6,0},{4,6,0},{4,5,5},{5,5,8},{4,5,5},{9,0,18},{4,7,2},{5,6,1},{4,6,0},{9,0,18},{15,0,18},{4,6,0},{0,6,36},{15,0,18},{0,6,36},{4,0,36}, +{4,0,36},{4,0,36},{4,0,36},{4,6,0},{4,6,0},{4,6,0},{4,5,1},{3,6,0},{3,6,0},{5,9,56},{5,8,38},{5,7,52},{5,7,36},{5,9,56},{5,8,35},{5,7,0},{5,7,52},{3,8,63},{4,7,38},{6,8,24},{6,7,14},{6,7,5},{6,7,13},{9,2,51},{4,8,24},{5,7,0},{4,7,37},{17,0,51},{4,7,37},{5,8,37},{5,8,37},{5,8,37}, +{5,7,36},{5,8,10},{5,7,0},{5,7,0},{5,6,5},{4,7,11},{5,6,14},{6,7,5},{6,7,5},{6,7,5},{6,6,8},{9,1,8},{5,7,0},{5,7,0},{5,6,5},{16,0,8},{5,6,5},{10,1,18},{5,8,2},{6,7,1},{5,7,0},{10,1,18},{16,1,18},{5,7,0},{0,7,36},{16,1,18},{0,7,36},{5,0,36},{5,0,36},{5,0,36},{5,0,36},{5,7,0}, +{5,7,0},{5,7,0},{5,6,1},{4,7,2},{4,7,2},{6,10,56},{6,9,38},{6,8,53},{6,8,37},{6,10,56},{6,9,35},{6,8,1},{6,8,66},{4,9,60},{5,8,52},{7,9,24},{7,8,14},{7,8,6},{7,8,14},{10,3,51},{5,9,24},{6,8,1},{5,8,51},{18,1,51},{5,8,51},{6,9,37},{6,9,37},{6,9,37},{6,8,36},{6,9,10},{6,8,0},{6,8,0}, +{6,7,5},{5,8,11},{6,7,14},{7,8,5},{7,8,5},{7,8,5},{7,7,8},{10,2,8},{6,8,0},{6,8,0},{6,7,5},{17,1,8},{6,7,5},{11,2,18},{6,9,2},{7,8,2},{6,8,1},{11,2,18},{17,2,18},{6,8,1},{0,8,50},{17,2,18},{0,8,50},{6,0,36},{6,0,36},{6,0,36},{6,0,36},{6,8,0},{6,8,0},{6,8,0},{6,7,1},{5,8,2}, +{5,8,2},{7,12,70},{7,10,58},{8,9,69},{7,9,51},{7,11,52},{7,10,25},{7,9,3},{7,9,46},{6,10,60},{6,9,36},{8,10,24},{8,9,14},{8,9,5},{8,9,13},{13,1,51},{7,10,24},{7,9,2},{6,9,36},{21,1,51},{6,9,36},{7,11,51},{7,11,51},{7,11,51},{7,9,51},{7,10,9},{7,9,3},{7,9,3},{7,8,9},{5,10,12},{7,8,12},{8,9,5}, +{8,9,5},{8,9,5},{8,8,8},{13,0,8},{7,9,2},{7,9,2},{7,8,8},{20,1,8},{7,8,8},{14,0,18},{7,10,8},{8,9,1},{7,9,1},{14,0,18},{20,2,18},{7,9,1},{0,9,36},{20,2,18},{0,9,36},{7,0,50},{7,0,50},{7,0,50},{7,0,50},{7,9,2},{7,9,2},{7,9,2},{7,8,5},{6,9,0},{6,9,0},{8,12,56},{8,11,38},{8,10,52}, +{8,10,36},{8,12,56},{8,11,35},{8,10,0},{8,10,52},{7,11,60},{7,10,36},{9,11,24},{9,10,14},{9,10,5},{9,10,13},{14,2,51},{7,11,35},{8,10,0},{7,10,36},{25,0,51},{7,10,36},{8,11,37},{8,11,37},{8,11,37},{8,10,36},{8,11,10},{8,10,0},{8,10,0},{8,9,5},{6,11,12},{8,9,14},{9,10,5},{9,10,5},{9,10,5},{9,9,8},{14,1,8}, +{8,10,0},{8,10,0},{8,9,5},{24,0,8},{8,9,5},{15,1,18},{8,11,2},{9,10,1},{8,10,0},{15,1,18},{24,1,18},{8,10,0},{0,10,36},{24,1,18},{0,10,36},{8,0,36},{8,0,36},{8,0,36},{8,0,36},{8,10,0},{8,10,0},{8,10,0},{8,9,1},{7,10,0},{7,10,0},{9,13,56},{9,12,38},{9,11,52},{9,11,36},{9,13,56},{9,12,35},{9,11,0}, +{9,11,52},{7,12,63},{8,11,38},{10,12,24},{10,11,14},{10,11,5},{10,11,13},{16,1,51},{8,12,24},{9,11,0},{8,11,37},{26,1,51},{8,11,37},{9,12,37},{9,12,37},{9,12,37},{9,11,36},{9,12,10},{9,11,0},{9,11,0},{9,10,5},{8,11,11},{9,10,14},{10,11,5},{10,11,5},{10,11,5},{10,10,8},{16,0,8},{9,11,0},{9,11,0},{9,10,5},{25,1,8}, +{9,10,5},{17,0,18},{9,12,2},{10,11,1},{9,11,0},{17,0,18},{25,2,18},{9,11,0},{0,11,36},{25,2,18},{0,11,36},{9,0,36},{9,0,36},{9,0,36},{9,0,36},{9,11,0},{9,11,0},{9,11,0},{9,10,1},{8,11,2},{8,11,2},{10,14,56},{10,13,38},{10,12,53},{10,12,37},{10,14,56},{10,13,35},{10,12,1},{10,12,66},{8,13,60},{9,12,52},{11,13,24}, +{11,12,14},{11,12,6},{11,12,14},{17,2,51},{9,13,24},{10,12,1},{9,12,51},{27,2,51},{9,12,51},{10,13,37},{10,13,37},{10,13,37},{10,12,36},{10,13,10},{10,12,0},{10,12,0},{10,11,5},{9,12,11},{10,11,14},{11,12,5},{11,12,5},{11,12,5},{11,11,8},{17,1,8},{10,12,0},{10,12,0},{10,11,5},{26,2,8},{10,11,5},{18,1,18},{10,13,2},{11,12,2}, +{10,12,1},{18,1,18},{31,0,18},{10,12,1},{0,12,50},{31,0,18},{0,12,50},{10,0,36},{10,0,36},{10,0,36},{10,0,36},{10,12,0},{10,12,0},{10,12,0},{10,11,1},{9,12,2},{9,12,2},{11,16,70},{11,14,58},{12,13,69},{11,13,51},{11,15,52},{11,14,25},{11,13,3},{11,13,46},{10,14,60},{10,13,36},{12,14,24},{12,13,14},{12,13,5},{12,13,13},{17,5,51}, +{11,14,24},{11,13,2},{10,13,36},{30,2,51},{10,13,36},{11,15,51},{11,15,51},{11,15,51},{11,13,51},{11,14,9},{11,13,3},{11,13,3},{11,12,9},{9,14,12},{11,12,12},{12,13,5},{12,13,5},{12,13,5},{12,12,8},{17,4,8},{11,13,2},{11,13,2},{11,12,8},{29,2,8},{11,12,8},{18,4,18},{11,14,8},{12,13,1},{11,13,1},{18,4,18},{29,3,18},{11,13,1}, +{0,13,36},{29,3,18},{0,13,36},{11,0,50},{11,0,50},{11,0,50},{11,0,50},{11,13,2},{11,13,2},{11,13,2},{11,12,5},{10,13,0},{10,13,0},{12,16,56},{12,15,38},{12,14,52},{12,14,36},{12,16,56},{12,15,35},{12,14,0},{12,14,52},{11,15,60},{11,14,36},{13,15,24},{13,14,14},{13,14,5},{13,14,13},{18,6,51},{11,15,35},{12,14,0},{11,14,36},{31,3,51}, +{11,14,36},{12,15,37},{12,15,37},{12,15,37},{12,14,36},{12,15,10},{12,14,0},{12,14,0},{12,13,5},{10,15,12},{12,13,14},{13,14,5},{13,14,5},{13,14,5},{13,13,8},{18,5,8},{12,14,0},{12,14,0},{12,13,5},{30,3,8},{12,13,5},{20,3,18},{12,15,2},{13,14,1},{12,14,0},{20,3,18},{28,5,18},{12,14,0},{0,14,36},{28,5,18},{0,14,36},{12,0,36}, +{12,0,36},{12,0,36},{12,0,36},{12,14,0},{12,14,0},{12,14,0},{12,13,1},{11,14,0},{11,14,0},{13,17,56},{13,16,38},{13,15,52},{13,15,36},{13,17,56},{13,16,35},{13,15,0},{13,15,52},{11,16,63},{12,15,38},{14,16,24},{14,15,14},{14,15,5},{14,15,13},{23,0,51},{12,16,24},{13,15,0},{12,15,37},{30,5,51},{12,15,37},{13,16,37},{13,16,37},{13,16,37}, +{13,15,36},{13,16,10},{13,15,0},{13,15,0},{13,14,5},{12,15,11},{13,14,14},{14,15,5},{14,15,5},{14,15,5},{14,14,8},{20,4,8},{13,15,0},{13,15,0},{13,14,5},{29,5,8},{13,14,5},{21,4,18},{13,16,2},{14,15,1},{13,15,0},{21,4,18},{29,6,18},{13,15,0},{0,15,36},{29,6,18},{0,15,36},{13,0,36},{13,0,36},{13,0,36},{13,0,36},{13,15,0}, +{13,15,0},{13,15,0},{13,14,1},{12,15,2},{12,15,2},{14,18,56},{14,17,38},{14,16,53},{14,16,37},{14,18,56},{14,17,35},{14,16,1},{14,16,66},{12,17,60},{13,16,52},{15,17,24},{15,16,14},{15,16,6},{15,16,14},{24,1,51},{13,17,24},{14,16,1},{13,16,51},{31,6,51},{13,16,51},{14,17,37},{14,17,37},{14,17,37},{14,16,36},{14,17,10},{14,16,0},{14,16,0}, +{14,15,5},{13,16,11},{14,15,14},{15,16,5},{15,16,5},{15,16,5},{15,15,8},{24,0,8},{14,16,0},{14,16,0},{14,15,5},{30,6,8},{14,15,5},{25,0,18},{14,17,2},{15,16,2},{14,16,1},{25,0,18},{30,7,18},{14,16,1},{0,16,50},{30,7,18},{0,16,50},{14,0,36},{14,0,36},{14,0,36},{14,0,36},{14,16,0},{14,16,0},{14,16,0},{14,15,1},{13,16,2}, +{13,16,2},{15,20,70},{15,18,58},{16,17,69},{15,17,51},{15,19,52},{15,18,25},{15,17,3},{15,17,46},{14,18,60},{14,17,36},{16,18,24},{16,17,14},{16,17,5},{16,17,13},{21,9,51},{15,18,24},{15,17,2},{14,17,36},{29,9,51},{14,17,36},{15,19,51},{15,19,51},{15,19,51},{15,17,51},{15,18,9},{15,17,3},{15,17,3},{15,16,9},{13,18,12},{15,16,12},{16,17,5}, +{16,17,5},{16,17,5},{16,16,8},{24,3,8},{15,17,2},{15,17,2},{15,16,8},{28,9,8},{15,16,8},{25,3,18},{15,18,8},{16,17,1},{15,17,1},{25,3,18},{28,10,18},{15,17,1},{0,17,36},{28,10,18},{0,17,36},{15,0,50},{15,0,50},{15,0,50},{15,0,50},{15,17,2},{15,17,2},{15,17,2},{15,16,5},{14,17,0},{14,17,0},{16,20,56},{16,19,38},{16,18,52}, +{16,18,36},{16,20,56},{16,19,35},{16,18,0},{16,18,52},{15,19,60},{15,18,36},{17,19,24},{17,18,14},{17,18,5},{17,18,13},{22,10,51},{15,19,35},{16,18,0},{15,18,36},{30,10,51},{15,18,36},{16,19,37},{16,19,37},{16,19,37},{16,18,36},{16,19,10},{16,18,0},{16,18,0},{16,17,5},{14,19,12},{16,17,14},{17,18,5},{17,18,5},{17,18,5},{17,17,8},{22,9,8}, +{16,18,0},{16,18,0},{16,17,5},{29,10,8},{16,17,5},{24,7,18},{16,19,2},{17,18,1},{16,18,0},{24,7,18},{29,11,18},{16,18,0},{0,18,36},{29,11,18},{0,18,36},{16,0,36},{16,0,36},{16,0,36},{16,0,36},{16,18,0},{16,18,0},{16,18,0},{16,17,1},{15,18,0},{15,18,0},{17,21,56},{17,20,38},{17,19,52},{17,19,36},{17,21,56},{17,20,35},{17,19,0}, +{17,19,52},{15,20,63},{16,19,38},{18,20,24},{18,19,14},{18,19,5},{18,19,13},{27,4,51},{16,20,24},{17,19,0},{16,19,37},{31,11,51},{16,19,37},{17,20,37},{17,20,37},{17,20,37},{17,19,36},{17,20,10},{17,19,0},{17,19,0},{17,18,5},{16,19,11},{17,18,14},{18,19,5},{18,19,5},{18,19,5},{18,18,8},{24,8,8},{17,19,0},{17,19,0},{17,18,5},{30,11,8}, +{17,18,5},{28,3,18},{17,20,2},{18,19,1},{17,19,0},{28,3,18},{28,13,18},{17,19,0},{0,19,36},{28,13,18},{0,19,36},{17,0,36},{17,0,36},{17,0,36},{17,0,36},{17,19,0},{17,19,0},{17,19,0},{17,18,1},{16,19,2},{16,19,2},{18,22,56},{18,21,38},{18,20,53},{18,20,37},{18,22,56},{18,21,35},{18,20,1},{18,20,66},{16,21,60},{17,20,52},{19,21,24}, +{19,20,14},{19,20,6},{19,20,14},{31,0,51},{17,21,24},{18,20,1},{17,20,51},{30,13,51},{17,20,51},{18,21,37},{18,21,37},{18,21,37},{18,20,36},{18,21,10},{18,20,0},{18,20,0},{18,19,5},{17,20,11},{18,19,14},{19,20,5},{19,20,5},{19,20,5},{19,19,8},{28,4,8},{18,20,0},{18,20,0},{18,19,5},{29,13,8},{18,19,5},{29,4,18},{18,21,2},{19,20,2}, +{18,20,1},{29,4,18},{29,14,18},{18,20,1},{0,20,50},{29,14,18},{0,20,50},{18,0,36},{18,0,36},{18,0,36},{18,0,36},{18,20,0},{18,20,0},{18,20,0},{18,19,1},{17,20,2},{17,20,2},{19,24,70},{19,22,58},{20,21,69},{19,21,51},{19,23,52},{19,22,25},{19,21,3},{19,21,46},{18,22,60},{18,21,36},{20,22,24},{20,21,14},{20,21,5},{20,21,13},{31,3,51}, +{19,22,24},{19,21,2},{18,21,36},{23,19,51},{18,21,36},{19,23,51},{19,23,51},{19,23,51},{19,21,51},{19,22,9},{19,21,3},{19,21,3},{19,20,9},{17,22,12},{19,20,12},{20,21,5},{20,21,5},{20,21,5},{20,20,8},{31,2,8},{19,21,2},{19,21,2},{19,20,8},{27,16,8},{19,20,8},{29,7,18},{19,22,8},{20,21,1},{19,21,1},{29,7,18},{27,17,18},{19,21,1}, +{0,21,36},{27,17,18},{0,21,36},{19,0,50},{19,0,50},{19,0,50},{19,0,50},{19,21,2},{19,21,2},{19,21,2},{19,20,5},{18,21,0},{18,21,0},{20,24,56},{20,23,38},{20,22,52},{20,22,36},{20,24,56},{20,23,35},{20,22,0},{20,22,52},{19,23,60},{19,22,36},{21,23,24},{21,22,14},{21,22,5},{21,22,13},{26,14,51},{19,23,35},{20,22,0},{19,22,36},{22,21,51}, +{19,22,36},{20,23,37},{20,23,37},{20,23,37},{20,22,36},{20,23,10},{20,22,0},{20,22,0},{20,21,5},{18,23,12},{20,21,14},{21,22,5},{21,22,5},{21,22,5},{21,21,8},{26,13,8},{20,22,0},{20,22,0},{20,21,5},{21,21,8},{20,21,5},{28,11,18},{20,23,2},{21,22,1},{20,22,0},{28,11,18},{31,16,18},{20,22,0},{0,22,36},{31,16,18},{0,22,36},{20,0,36}, +{20,0,36},{20,0,36},{20,0,36},{20,22,0},{20,22,0},{20,22,0},{20,21,1},{19,22,0},{19,22,0},{21,25,56},{21,24,38},{21,23,52},{21,23,36},{21,25,56},{21,24,35},{21,23,0},{21,23,52},{19,24,63},{20,23,38},{22,24,24},{22,23,14},{22,23,5},{22,23,13},{31,8,51},{20,24,24},{21,23,0},{20,23,37},{28,19,51},{20,23,37},{21,24,37},{21,24,37},{21,24,37}, +{21,23,36},{21,24,10},{21,23,0},{21,23,0},{21,22,5},{20,23,11},{21,22,14},{22,23,5},{22,23,5},{22,23,5},{22,22,8},{28,12,8},{21,23,0},{21,23,0},{21,22,5},{22,22,8},{21,22,5},{29,12,18},{21,24,2},{22,23,1},{21,23,0},{29,12,18},{27,20,18},{21,23,0},{0,23,36},{27,20,18},{0,23,36},{21,0,36},{21,0,36},{21,0,36},{21,0,36},{21,23,0}, +{21,23,0},{21,23,0},{21,22,1},{20,23,2},{20,23,2},{22,26,56},{22,25,38},{22,24,53},{22,24,37},{22,26,56},{22,25,35},{22,24,1},{22,24,66},{20,25,60},{21,24,52},{23,25,24},{23,24,14},{23,24,6},{23,24,14},{29,14,51},{21,25,24},{22,24,1},{21,24,51},{29,20,51},{21,24,51},{22,25,37},{22,25,37},{22,25,37},{22,24,36},{22,25,10},{22,24,0},{22,24,0}, +{22,23,5},{21,24,11},{22,23,14},{23,24,5},{23,24,5},{23,24,5},{23,23,8},{29,13,8},{22,24,0},{22,24,0},{22,23,5},{28,20,8},{22,23,5},{30,13,18},{22,25,2},{23,24,2},{22,24,1},{30,13,18},{28,21,18},{22,24,1},{0,24,50},{28,21,18},{0,24,50},{22,0,36},{22,0,36},{22,0,36},{22,0,36},{22,24,0},{22,24,0},{22,24,0},{22,23,1},{21,24,2}, +{21,24,2},{23,28,70},{23,26,58},{24,25,69},{23,25,51},{23,27,52},{23,26,25},{23,25,3},{23,25,46},{22,26,60},{22,25,36},{24,26,24},{24,25,14},{24,25,5},{24,25,13},{29,17,51},{23,26,24},{23,25,2},{22,25,36},{27,23,51},{22,25,36},{23,27,51},{23,27,51},{23,27,51},{23,25,51},{23,26,9},{23,25,3},{23,25,3},{23,24,9},{21,26,12},{23,24,12},{24,25,5}, +{24,25,5},{24,25,5},{24,24,8},{29,16,8},{23,25,2},{23,25,2},{23,24,8},{31,20,8},{23,24,8},{30,16,18},{23,26,8},{24,25,1},{23,25,1},{30,16,18},{31,21,18},{23,25,1},{0,25,36},{31,21,18},{0,25,36},{23,0,50},{23,0,50},{23,0,50},{23,0,50},{23,25,2},{23,25,2},{23,25,2},{23,24,5},{22,25,0},{22,25,0},{24,28,56},{24,27,38},{24,26,52}, +{24,26,36},{24,28,56},{24,27,35},{24,26,0},{24,26,52},{23,27,60},{23,26,36},{25,27,24},{25,26,14},{25,26,5},{25,26,13},{30,18,51},{23,27,35},{24,26,0},{23,26,36},{26,25,51},{23,26,36},{24,27,37},{24,27,37},{24,27,37},{24,26,36},{24,27,10},{24,26,0},{24,26,0},{24,25,5},{22,27,12},{24,25,14},{25,26,5},{25,26,5},{25,26,5},{25,25,8},{30,17,8}, +{24,26,0},{24,26,0},{24,25,5},{25,25,8},{24,25,5},{31,17,18},{24,27,2},{25,26,1},{24,26,0},{31,17,18},{25,26,18},{24,26,0},{0,26,36},{25,26,18},{0,26,36},{24,0,36},{24,0,36},{24,0,36},{24,0,36},{24,26,0},{24,26,0},{24,26,0},{24,25,1},{23,26,0},{23,26,0},{25,29,56},{25,28,38},{25,27,52},{25,27,36},{25,29,56},{25,28,35},{25,27,0}, +{25,27,52},{23,28,63},{24,27,38},{26,28,24},{26,27,14},{26,27,5},{26,27,13},{31,19,51},{24,28,24},{25,27,0},{24,27,37},{27,26,51},{24,27,37},{25,28,37},{25,28,37},{25,28,37},{25,27,36},{25,28,10},{25,27,0},{25,27,0},{25,26,5},{24,27,11},{25,26,14},{26,27,5},{26,27,5},{26,27,5},{26,26,8},{31,18,8},{25,27,0},{25,27,0},{25,26,5},{26,26,8}, +{25,26,5},{30,21,18},{25,28,2},{26,27,1},{25,27,0},{30,21,18},{31,24,18},{25,27,0},{0,27,36},{31,24,18},{0,27,36},{25,0,36},{25,0,36},{25,0,36},{25,0,36},{25,27,0},{25,27,0},{25,27,0},{25,26,1},{24,27,2},{24,27,2},{26,30,56},{26,29,38},{26,28,53},{26,28,37},{26,30,56},{26,29,35},{26,28,1},{26,28,66},{24,29,60},{25,28,52},{27,29,24}, +{27,28,14},{27,28,6},{27,28,14},{30,23,51},{25,29,24},{26,28,1},{25,28,51},{28,27,51},{25,28,51},{26,29,37},{26,29,37},{26,29,37},{26,28,36},{26,29,10},{26,28,0},{26,28,0},{26,27,5},{25,28,11},{26,27,14},{27,28,5},{27,28,5},{27,28,5},{27,27,8},{30,22,8},{26,28,0},{26,28,0},{26,27,5},{27,27,8},{26,27,5},{31,22,18},{26,29,2},{27,28,2}, +{26,28,1},{31,22,18},{27,28,18},{26,28,1},{0,28,50},{27,28,18},{0,28,50},{26,0,36},{26,0,36},{26,0,36},{26,0,36},{26,28,0},{26,28,0},{26,28,0},{26,27,1},{25,28,2},{25,28,2},{27,31,76},{27,30,58},{28,29,69},{27,29,51},{27,31,52},{27,30,25},{27,29,3},{27,29,46},{26,30,60},{26,29,36},{28,30,24},{28,29,14},{28,29,5},{28,29,13},{30,26,51}, +{27,30,24},{27,29,2},{26,29,36},{31,27,51},{26,29,36},{27,31,51},{27,31,51},{27,31,51},{27,29,51},{27,30,9},{27,29,3},{27,29,3},{27,28,9},{25,30,12},{27,28,12},{28,29,5},{28,29,5},{28,29,5},{28,28,8},{30,25,8},{27,29,2},{27,29,2},{27,28,8},{30,27,8},{27,28,8},{31,25,18},{27,30,8},{28,29,1},{27,29,1},{31,25,18},{28,29,18},{27,29,1}, +{0,29,36},{28,29,18},{0,29,36},{27,0,50},{27,0,50},{27,0,50},{27,0,50},{27,29,2},{27,29,2},{27,29,2},{27,28,5},{26,29,0},{26,29,0},{28,31,86},{28,31,38},{28,30,52},{28,30,36},{28,31,59},{28,31,35},{28,30,0},{28,30,52},{27,31,60},{27,30,36},{29,31,24},{29,30,14},{29,30,5},{29,30,13},{31,27,51},{27,31,35},{28,30,0},{27,30,36},{30,29,51}, +{27,30,36},{28,31,37},{28,31,37},{28,31,37},{28,30,36},{28,31,10},{28,30,0},{28,30,0},{28,29,5},{26,31,12},{28,29,14},{29,30,5},{29,30,5},{29,30,5},{29,29,8},{31,26,8},{28,30,0},{28,30,0},{28,29,5},{29,29,8},{28,29,5},{30,29,18},{28,31,2},{29,30,1},{28,30,0},{30,29,18},{29,30,18},{28,30,0},{0,30,36},{29,30,18},{0,30,36},{28,0,36}, +{28,0,36},{28,0,36},{28,0,36},{28,30,0},{28,30,0},{28,30,0},{28,29,1},{27,30,0},{27,30,0},{30,31,94},{30,31,78},{29,31,52},{29,31,36},{30,31,115},{29,31,36},{29,31,0},{29,31,52},{29,31,88},{28,31,38},{30,31,30},{30,31,14},{30,31,5},{30,31,13},{30,31,51},{29,31,36},{29,31,0},{28,31,37},{31,30,51},{28,31,37},{29,31,52},{29,31,52},{29,31,52}, +{29,31,36},{29,31,16},{29,31,0},{29,31,0},{29,30,5},{28,31,11},{29,30,14},{30,31,5},{30,31,5},{30,31,5},{30,30,8},{30,30,8},{29,31,0},{29,31,0},{29,30,5},{30,30,8},{29,30,5},{31,30,18},{30,31,10},{30,31,1},{29,31,0},{31,30,18},{30,31,18},{29,31,0},{0,31,36},{30,31,18},{0,31,36},{29,0,36},{29,0,36},{29,0,36},{29,0,36},{29,31,0}, +{29,31,0},{29,31,0},{29,30,1},{28,31,2},{28,31,2},{31,31,68},{31,31,68},{30,31,61},{30,31,45},{30,31,59},{30,31,27},{30,31,18},{30,31,1},{30,31,28},{30,31,10},{31,31,4},{31,31,4},{31,31,4},{31,31,4},{31,31,4},{31,31,4},{31,31,4},{30,31,1},{31,31,4},{30,31,1},{30,31,61},{30,31,61},{30,31,61},{30,31,45},{30,31,34},{30,31,18},{30,31,18}, +{30,31,1},{30,31,19},{30,31,10},{31,31,4},{31,31,4},{31,31,4},{31,31,4},{31,31,4},{31,31,4},{31,31,4},{30,31,1},{31,31,4},{30,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{30,0,36},{30,0,36},{30,0,36},{30,0,36},{30,31,9},{30,31,9},{30,31,9},{30,31,1},{30,31,10}, +{30,31,10},{0,4,74},{0,3,20},{0,2,2},{0,2,26},{0,2,158},{0,2,110},{0,2,62},{0,1,115},{0,1,178},{0,1,124},{0,4,74},{0,3,20},{0,2,2},{0,2,26},{1,1,154},{0,2,110},{0,2,62},{0,1,115},{1,1,154},{0,1,115},{0,2,1},{0,2,1},{0,2,1},{0,1,0},{0,1,13},{0,1,9},{0,1,9},{0,0,25},{0,0,25},{0,0,25},{0,2,1}, +{0,2,1},{0,2,1},{0,1,0},{0,1,13},{0,1,9},{0,1,9},{0,0,25},{1,0,13},{0,0,25},{1,2,72},{0,3,20},{0,2,2},{0,2,26},{1,2,72},{2,1,72},{0,2,26},{0,1,90},{2,1,72},{0,1,90},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,6,83},{0,5,13},{0,3,26}, +{0,3,14},{0,5,248},{0,3,140},{0,3,41},{0,2,139},{0,2,319},{0,2,175},{0,6,83},{0,5,13},{0,3,26},{0,3,14},{1,3,243},{0,3,140},{0,3,41},{0,2,139},{3,1,243},{0,2,139},{0,4,10},{0,4,10},{0,4,10},{0,3,13},{0,3,52},{0,2,18},{0,2,18},{0,1,29},{0,1,77},{0,1,38},{0,4,10},{0,4,10},{0,4,10},{0,3,13},{1,1,50}, +{0,2,18},{0,2,18},{0,1,29},{1,1,50},{0,1,29},{2,3,72},{0,5,4},{1,3,2},{0,3,5},{2,3,72},{3,2,72},{0,3,5},{0,2,90},{3,2,72},{0,2,90},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,1,0},{0,1,0},{0,1,0},{0,1,4},{0,0,9},{0,0,9},{1,7,147},{1,6,77},{1,4,89},{1,4,77},{0,7,244},{0,5,96},{0,4,2}, +{0,3,106},{0,4,395},{0,3,187},{1,7,83},{1,6,13},{1,4,25},{1,4,13},{2,4,243},{0,5,96},{0,4,2},{0,3,106},{4,2,243},{0,3,106},{1,5,74},{1,5,74},{1,5,74},{1,4,77},{0,5,52},{0,4,2},{0,4,2},{0,3,25},{0,3,133},{0,2,62},{1,5,10},{1,5,10},{1,5,10},{1,4,13},{2,2,50},{0,4,2},{0,4,2},{0,3,25},{2,2,50}, +{0,3,25},{3,4,72},{1,6,4},{2,4,1},{0,4,1},{3,4,72},{9,0,72},{0,4,1},{0,3,90},{9,0,72},{0,3,90},{1,0,73},{1,0,73},{1,0,73},{1,0,73},{0,4,1},{0,4,1},{0,4,1},{0,2,1},{0,2,37},{0,2,37},{2,8,164},{2,7,94},{2,5,106},{2,5,94},{1,8,245},{1,6,97},{1,5,3},{1,4,97},{0,6,364},{0,4,106},{2,8,83}, +{2,7,13},{2,5,25},{2,5,13},{6,0,243},{0,7,76},{1,5,2},{0,4,81},{10,0,243},{0,4,81},{2,6,91},{2,6,91},{2,6,91},{2,5,94},{1,6,53},{1,5,3},{1,5,3},{1,3,27},{0,4,107},{0,4,42},{2,6,10},{2,6,10},{2,6,10},{2,5,13},{3,3,50},{1,5,2},{1,5,2},{0,4,17},{8,0,50},{0,4,17},{7,0,72},{2,7,4},{3,5,1}, +{1,5,1},{7,0,72},{10,1,72},{1,5,1},{0,4,80},{10,1,72},{0,4,80},{2,0,90},{2,0,90},{2,0,90},{2,0,90},{1,5,2},{1,5,2},{1,5,2},{1,3,2},{0,3,20},{0,3,20},{3,10,154},{3,8,81},{3,6,106},{3,6,82},{2,10,248},{2,7,99},{2,6,5},{2,5,99},{0,7,308},{0,5,100},{3,10,90},{3,8,17},{4,6,27},{3,6,18},{6,3,243}, +{1,8,73},{2,6,5},{1,5,90},{8,3,243},{1,5,90},{3,8,81},{3,8,81},{3,8,81},{3,6,81},{2,8,52},{2,6,4},{2,6,4},{2,5,18},{0,6,72},{0,5,19},{3,8,17},{3,8,17},{3,8,17},{3,6,17},{6,1,50},{2,6,4},{2,6,4},{1,5,9},{11,0,50},{1,5,9},{8,1,72},{3,8,1},{4,6,2},{3,6,2},{8,1,72},{6,5,72},{3,6,2}, +{0,5,90},{6,5,72},{0,5,90},{3,0,80},{3,0,80},{3,0,80},{3,0,80},{2,6,0},{2,6,0},{2,6,0},{2,4,4},{0,6,8},{0,6,8},{4,10,164},{4,9,94},{4,7,107},{4,7,95},{3,11,248},{3,8,89},{3,7,5},{3,6,99},{0,9,253},{1,6,100},{4,10,83},{4,9,13},{4,7,26},{4,7,14},{8,2,243},{2,9,73},{3,7,5},{2,6,90},{12,2,243}, +{2,6,90},{4,8,91},{4,8,91},{4,8,91},{4,7,94},{3,9,52},{3,7,4},{3,7,4},{3,6,18},{0,8,50},{1,6,19},{4,8,10},{4,8,10},{4,8,10},{4,7,13},{8,0,50},{3,7,4},{3,7,4},{2,6,9},{5,5,50},{2,6,9},{9,2,72},{4,9,4},{5,7,2},{3,7,5},{9,2,72},{17,0,72},{3,7,5},{0,6,90},{17,0,72},{0,6,90},{4,0,90}, +{4,0,90},{4,0,90},{4,0,90},{3,7,0},{3,7,0},{3,7,0},{3,5,4},{1,7,8},{1,7,8},{5,11,164},{5,10,94},{5,8,106},{5,8,94},{4,11,245},{4,9,97},{4,8,3},{4,7,107},{0,11,249},{2,7,100},{5,11,83},{5,10,13},{5,8,25},{5,8,13},{9,3,243},{3,10,73},{4,8,2},{3,7,90},{18,0,243},{3,7,90},{5,9,91},{5,9,91},{5,9,91}, +{5,8,94},{4,9,53},{4,8,3},{4,8,3},{4,7,26},{1,9,50},{2,7,19},{5,9,10},{5,9,10},{5,9,10},{5,8,13},{9,1,50},{4,8,2},{4,8,2},{3,7,9},{16,0,50},{3,7,9},{10,3,72},{5,10,4},{6,8,1},{4,8,1},{10,3,72},{18,1,72},{4,8,1},{0,7,90},{18,1,72},{0,7,90},{5,0,90},{5,0,90},{5,0,90},{5,0,90},{4,8,2}, +{4,8,2},{4,8,2},{4,6,2},{1,8,9},{1,8,9},{6,12,164},{6,11,94},{6,9,106},{6,9,94},{5,12,245},{5,10,97},{5,9,3},{5,8,97},{1,12,252},{3,8,85},{6,12,83},{6,11,13},{6,9,25},{6,9,13},{10,4,243},{4,11,76},{5,9,2},{4,8,81},{19,1,243},{4,8,81},{6,10,91},{6,10,91},{6,10,91},{6,9,94},{5,10,53},{5,9,3},{5,9,3}, +{5,7,27},{2,10,50},{3,8,21},{6,10,10},{6,10,10},{6,10,10},{6,9,13},{10,2,50},{5,9,2},{5,9,2},{4,8,17},{17,1,50},{4,8,17},{11,4,72},{6,11,4},{7,9,1},{5,9,1},{11,4,72},{19,2,72},{5,9,1},{0,8,80},{19,2,72},{0,8,80},{6,0,90},{6,0,90},{6,0,90},{6,0,90},{5,9,2},{5,9,2},{5,9,2},{5,7,2},{3,8,5}, +{3,8,5},{7,14,154},{7,12,81},{7,10,106},{7,10,82},{6,14,248},{6,11,99},{6,10,5},{6,9,99},{2,13,244},{4,9,100},{7,14,90},{7,12,17},{8,10,27},{7,10,18},{13,2,243},{5,12,73},{6,10,5},{5,9,90},{22,1,243},{5,9,90},{7,12,81},{7,12,81},{7,12,81},{7,10,81},{6,12,52},{6,10,4},{6,10,4},{6,9,18},{3,11,53},{4,9,19},{7,12,17}, +{7,12,17},{7,12,17},{7,10,17},{13,0,50},{6,10,4},{6,10,4},{5,9,9},{20,1,50},{5,9,9},{14,2,72},{7,12,1},{8,10,2},{7,10,2},{14,2,72},{25,0,72},{7,10,2},{0,9,90},{25,0,72},{0,9,90},{7,0,80},{7,0,80},{7,0,80},{7,0,80},{6,10,0},{6,10,0},{6,10,0},{6,8,4},{4,10,8},{4,10,8},{8,14,164},{8,13,94},{8,11,107}, +{8,11,95},{7,15,248},{7,12,89},{7,11,5},{7,10,99},{3,14,244},{5,10,100},{8,14,83},{8,13,13},{8,11,26},{8,11,14},{14,3,243},{6,13,73},{7,11,5},{6,10,90},{26,0,243},{6,10,90},{8,12,91},{8,12,91},{8,12,91},{8,11,94},{7,13,52},{7,11,4},{7,11,4},{7,10,18},{4,12,50},{5,10,19},{8,12,10},{8,12,10},{8,12,10},{8,11,13},{14,1,50}, +{7,11,4},{7,11,4},{6,10,9},{24,0,50},{6,10,9},{16,1,72},{8,13,4},{9,11,2},{7,11,5},{16,1,72},{26,1,72},{7,11,5},{0,10,90},{26,1,72},{0,10,90},{8,0,90},{8,0,90},{8,0,90},{8,0,90},{7,11,0},{7,11,0},{7,11,0},{7,9,4},{5,11,8},{5,11,8},{9,15,164},{9,14,94},{9,12,106},{9,12,94},{8,15,245},{8,13,97},{8,12,3}, +{8,11,107},{4,15,249},{6,11,100},{9,15,83},{9,14,13},{9,12,25},{9,12,13},{16,2,243},{7,14,73},{8,12,2},{7,11,90},{27,1,243},{7,11,90},{9,13,91},{9,13,91},{9,13,91},{9,12,94},{8,13,53},{8,12,3},{8,12,3},{8,11,26},{5,13,50},{6,11,19},{9,13,10},{9,13,10},{9,13,10},{9,12,13},{16,0,50},{8,12,2},{8,12,2},{7,11,9},{25,1,50}, +{7,11,9},{17,2,72},{9,14,4},{10,12,1},{8,12,1},{17,2,72},{27,2,72},{8,12,1},{0,11,90},{27,2,72},{0,11,90},{9,0,90},{9,0,90},{9,0,90},{9,0,90},{8,12,2},{8,12,2},{8,12,2},{8,10,2},{5,12,9},{5,12,9},{10,16,164},{10,15,94},{10,13,106},{10,13,94},{9,16,245},{9,14,97},{9,13,3},{9,12,97},{5,16,252},{7,12,85},{10,16,83}, +{10,15,13},{10,13,25},{10,13,13},{17,3,243},{8,15,76},{9,13,2},{8,12,81},{28,2,243},{8,12,81},{10,14,91},{10,14,91},{10,14,91},{10,13,94},{9,14,53},{9,13,3},{9,13,3},{9,11,27},{6,14,50},{7,12,21},{10,14,10},{10,14,10},{10,14,10},{10,13,13},{17,1,50},{9,13,2},{9,13,2},{8,12,17},{26,2,50},{8,12,17},{18,3,72},{10,15,4},{11,13,1}, +{9,13,1},{18,3,72},{28,3,72},{9,13,1},{0,12,80},{28,3,72},{0,12,80},{10,0,90},{10,0,90},{10,0,90},{10,0,90},{9,13,2},{9,13,2},{9,13,2},{9,11,2},{7,12,5},{7,12,5},{11,18,154},{11,16,81},{11,14,106},{11,14,82},{10,18,248},{10,15,99},{10,14,5},{10,13,99},{6,17,244},{8,13,100},{11,18,90},{11,16,17},{12,14,27},{11,14,18},{17,6,243}, +{9,16,73},{10,14,5},{9,13,90},{31,2,243},{9,13,90},{11,16,81},{11,16,81},{11,16,81},{11,14,81},{10,16,52},{10,14,4},{10,14,4},{10,13,18},{7,15,53},{8,13,19},{11,16,17},{11,16,17},{11,16,17},{11,14,17},{17,4,50},{10,14,4},{10,14,4},{9,13,9},{29,2,50},{9,13,9},{18,6,72},{11,16,1},{12,14,2},{11,14,2},{18,6,72},{31,3,72},{11,14,2}, +{0,13,90},{31,3,72},{0,13,90},{11,0,80},{11,0,80},{11,0,80},{11,0,80},{10,14,0},{10,14,0},{10,14,0},{10,12,4},{8,14,8},{8,14,8},{12,18,164},{12,17,94},{12,15,107},{12,15,95},{11,19,248},{11,16,89},{11,15,5},{11,14,99},{7,18,244},{9,14,100},{12,18,83},{12,17,13},{12,15,26},{12,15,14},{22,0,243},{10,17,73},{11,15,5},{10,14,90},{30,4,243}, +{10,14,90},{12,16,91},{12,16,91},{12,16,91},{12,15,94},{11,17,52},{11,15,4},{11,15,4},{11,14,18},{8,16,50},{9,14,19},{12,16,10},{12,16,10},{12,16,10},{12,15,13},{18,5,50},{11,15,4},{11,15,4},{10,14,9},{30,3,50},{10,14,9},{23,0,72},{12,17,4},{13,15,2},{11,15,5},{23,0,72},{30,5,72},{11,15,5},{0,14,90},{30,5,72},{0,14,90},{12,0,90}, +{12,0,90},{12,0,90},{12,0,90},{11,15,0},{11,15,0},{11,15,0},{11,13,4},{9,15,8},{9,15,8},{13,19,164},{13,18,94},{13,16,106},{13,16,94},{12,19,245},{12,17,97},{12,16,3},{12,15,107},{8,19,249},{10,15,100},{13,19,83},{13,18,13},{13,16,25},{13,16,13},{23,1,243},{11,18,73},{12,16,2},{11,15,90},{31,5,243},{11,15,90},{13,17,91},{13,17,91},{13,17,91}, +{13,16,94},{12,17,53},{12,16,3},{12,16,3},{12,15,26},{9,17,50},{10,15,19},{13,17,10},{13,17,10},{13,17,10},{13,16,13},{20,4,50},{12,16,2},{12,16,2},{11,15,9},{29,5,50},{11,15,9},{24,1,72},{13,18,4},{14,16,1},{12,16,1},{24,1,72},{31,6,72},{12,16,1},{0,15,90},{31,6,72},{0,15,90},{13,0,90},{13,0,90},{13,0,90},{13,0,90},{12,16,2}, +{12,16,2},{12,16,2},{12,14,2},{9,16,9},{9,16,9},{14,20,164},{14,19,94},{14,17,106},{14,17,94},{13,20,245},{13,18,97},{13,17,3},{13,16,97},{9,20,252},{11,16,85},{14,20,83},{14,19,13},{14,17,25},{14,17,13},{24,2,243},{12,19,76},{13,17,2},{12,16,81},{27,9,243},{12,16,81},{14,18,91},{14,18,91},{14,18,91},{14,17,94},{13,18,53},{13,17,3},{13,17,3}, +{13,15,27},{10,18,50},{11,16,21},{14,18,10},{14,18,10},{14,18,10},{14,17,13},{24,0,50},{13,17,2},{13,17,2},{12,16,17},{30,6,50},{12,16,17},{25,2,72},{14,19,4},{15,17,1},{13,17,1},{25,2,72},{27,10,72},{13,17,1},{0,16,80},{27,10,72},{0,16,80},{14,0,90},{14,0,90},{14,0,90},{14,0,90},{13,17,2},{13,17,2},{13,17,2},{13,15,2},{11,16,5}, +{11,16,5},{15,22,154},{15,20,81},{15,18,106},{15,18,82},{14,22,248},{14,19,99},{14,18,5},{14,17,99},{10,21,244},{12,17,100},{15,22,90},{15,20,17},{16,18,27},{15,18,18},{27,0,243},{13,20,73},{14,18,5},{13,17,90},{30,9,243},{13,17,90},{15,20,81},{15,20,81},{15,20,81},{15,18,81},{14,20,52},{14,18,4},{14,18,4},{14,17,18},{11,19,53},{12,17,19},{15,20,17}, +{15,20,17},{15,20,17},{15,18,17},{24,3,50},{14,18,4},{14,18,4},{13,17,9},{28,9,50},{13,17,9},{22,10,72},{15,20,1},{16,18,2},{15,18,2},{22,10,72},{30,10,72},{15,18,2},{0,17,90},{30,10,72},{0,17,90},{15,0,80},{15,0,80},{15,0,80},{15,0,80},{14,18,0},{14,18,0},{14,18,0},{14,16,4},{12,18,8},{12,18,8},{16,22,164},{16,21,94},{16,19,107}, +{16,19,95},{15,23,248},{15,20,89},{15,19,5},{15,18,99},{11,22,244},{13,18,100},{16,22,83},{16,21,13},{16,19,26},{16,19,14},{26,4,243},{14,21,73},{15,19,5},{14,18,90},{31,10,243},{14,18,90},{16,20,91},{16,20,91},{16,20,91},{16,19,94},{15,21,52},{15,19,4},{15,19,4},{15,18,18},{12,20,50},{13,18,19},{16,20,10},{16,20,10},{16,20,10},{16,19,13},{22,9,50}, +{15,19,4},{15,19,4},{14,18,9},{29,10,50},{14,18,9},{27,4,72},{16,21,4},{17,19,2},{15,19,5},{27,4,72},{31,11,72},{15,19,5},{0,18,90},{31,11,72},{0,18,90},{16,0,90},{16,0,90},{16,0,90},{16,0,90},{15,19,0},{15,19,0},{15,19,0},{15,17,4},{13,19,8},{13,19,8},{17,23,164},{17,22,94},{17,20,106},{17,20,94},{16,23,245},{16,21,97},{16,20,3}, +{16,19,107},{12,23,249},{14,19,100},{17,23,83},{17,22,13},{17,20,25},{17,20,13},{30,0,243},{15,22,73},{16,20,2},{15,19,90},{30,12,243},{15,19,90},{17,21,91},{17,21,91},{17,21,91},{17,20,94},{16,21,53},{16,20,3},{16,20,3},{16,19,26},{13,21,50},{14,19,19},{17,21,10},{17,21,10},{17,21,10},{17,20,13},{24,8,50},{16,20,2},{16,20,2},{15,19,9},{30,11,50}, +{15,19,9},{31,0,72},{17,22,4},{18,20,1},{16,20,1},{31,0,72},{30,13,72},{16,20,1},{0,19,90},{30,13,72},{0,19,90},{17,0,90},{17,0,90},{17,0,90},{17,0,90},{16,20,2},{16,20,2},{16,20,2},{16,18,2},{13,20,9},{13,20,9},{18,24,164},{18,23,94},{18,21,106},{18,21,94},{17,24,245},{17,22,97},{17,21,3},{17,20,97},{13,24,252},{15,20,85},{18,24,83}, +{18,23,13},{18,21,25},{18,21,13},{31,1,243},{16,23,76},{17,21,2},{16,20,81},{31,13,243},{16,20,81},{18,22,91},{18,22,91},{18,22,91},{18,21,94},{17,22,53},{17,21,3},{17,21,3},{17,19,27},{14,22,50},{15,20,21},{18,22,10},{18,22,10},{18,22,10},{18,21,13},{28,4,50},{17,21,2},{17,21,2},{16,20,17},{29,13,50},{16,20,17},{29,6,72},{18,23,4},{19,21,1}, +{17,21,1},{29,6,72},{31,14,72},{17,21,1},{0,20,80},{31,14,72},{0,20,80},{18,0,90},{18,0,90},{18,0,90},{18,0,90},{17,21,2},{17,21,2},{17,21,2},{17,19,2},{15,20,5},{15,20,5},{19,26,154},{19,24,81},{19,22,106},{19,22,82},{18,26,248},{18,23,99},{18,22,5},{18,21,99},{14,25,244},{16,21,100},{19,26,90},{19,24,17},{20,22,27},{19,22,18},{31,4,243}, +{17,24,73},{18,22,5},{17,21,90},{24,19,243},{17,21,90},{19,24,81},{19,24,81},{19,24,81},{19,22,81},{18,24,52},{18,22,4},{18,22,4},{18,21,18},{15,23,53},{16,21,19},{19,24,17},{19,24,17},{19,24,17},{19,22,17},{31,2,50},{18,22,4},{18,22,4},{17,21,9},{27,16,50},{17,21,9},{26,14,72},{19,24,1},{20,22,2},{19,22,2},{26,14,72},{22,21,72},{19,22,2}, +{0,21,90},{22,21,72},{0,21,90},{19,0,80},{19,0,80},{19,0,80},{19,0,80},{18,22,0},{18,22,0},{18,22,0},{18,20,4},{16,22,8},{16,22,8},{20,26,164},{20,25,94},{20,23,107},{20,23,95},{19,27,248},{19,24,89},{19,23,5},{19,22,99},{15,26,244},{17,22,100},{20,26,83},{20,25,13},{20,23,26},{20,23,14},{30,8,243},{18,25,73},{19,23,5},{18,22,90},{28,18,243}, +{18,22,90},{20,24,91},{20,24,91},{20,24,91},{20,23,94},{19,25,52},{19,23,4},{19,23,4},{19,22,18},{16,24,50},{17,22,19},{20,24,10},{20,24,10},{20,24,10},{20,23,13},{26,13,50},{19,23,4},{19,23,4},{18,22,9},{21,21,50},{18,22,9},{31,8,72},{20,25,4},{21,23,2},{19,23,5},{31,8,72},{28,19,72},{19,23,5},{0,22,90},{28,19,72},{0,22,90},{20,0,90}, +{20,0,90},{20,0,90},{20,0,90},{19,23,0},{19,23,0},{19,23,0},{19,21,4},{17,23,8},{17,23,8},{21,27,164},{21,26,94},{21,24,106},{21,24,94},{20,27,245},{20,25,97},{20,24,3},{20,23,107},{16,27,249},{18,23,100},{21,27,83},{21,26,13},{21,24,25},{21,24,13},{31,9,243},{19,26,73},{20,24,2},{19,23,90},{29,19,243},{19,23,90},{21,25,91},{21,25,91},{21,25,91}, +{21,24,94},{20,25,53},{20,24,3},{20,24,3},{20,23,26},{17,25,50},{18,23,19},{21,25,10},{21,25,10},{21,25,10},{21,24,13},{28,12,50},{20,24,2},{20,24,2},{19,23,9},{22,22,50},{19,23,9},{29,14,72},{21,26,4},{22,24,1},{20,24,1},{29,14,72},{29,20,72},{20,24,1},{0,23,90},{29,20,72},{0,23,90},{21,0,90},{21,0,90},{21,0,90},{21,0,90},{20,24,2}, +{20,24,2},{20,24,2},{20,22,2},{17,24,9},{17,24,9},{22,28,164},{22,27,94},{22,25,106},{22,25,94},{21,28,245},{21,26,97},{21,25,3},{21,24,97},{17,28,252},{19,24,85},{22,28,83},{22,27,13},{22,25,25},{22,25,13},{29,15,243},{20,27,76},{21,25,2},{20,24,81},{30,20,243},{20,24,81},{22,26,91},{22,26,91},{22,26,91},{22,25,94},{21,26,53},{21,25,3},{21,25,3}, +{21,23,27},{18,26,50},{19,24,21},{22,26,10},{22,26,10},{22,26,10},{22,25,13},{29,13,50},{21,25,2},{21,25,2},{20,24,17},{28,20,50},{20,24,17},{30,15,72},{22,27,4},{23,25,1},{21,25,1},{30,15,72},{30,21,72},{21,25,1},{0,24,80},{30,21,72},{0,24,80},{22,0,90},{22,0,90},{22,0,90},{22,0,90},{21,25,2},{21,25,2},{21,25,2},{21,23,2},{19,24,5}, +{19,24,5},{23,30,154},{23,28,81},{23,26,106},{23,26,82},{22,30,248},{22,27,99},{22,26,5},{22,25,99},{18,29,244},{20,25,100},{23,30,90},{23,28,17},{24,26,27},{23,26,18},{29,18,243},{21,28,73},{22,26,5},{21,25,90},{28,23,243},{21,25,90},{23,28,81},{23,28,81},{23,28,81},{23,26,81},{22,28,52},{22,26,4},{22,26,4},{22,25,18},{19,27,53},{20,25,19},{23,28,17}, +{23,28,17},{23,28,17},{23,26,17},{29,16,50},{22,26,4},{22,26,4},{21,25,9},{31,20,50},{21,25,9},{30,18,72},{23,28,1},{24,26,2},{23,26,2},{30,18,72},{26,25,72},{23,26,2},{0,25,90},{26,25,72},{0,25,90},{23,0,80},{23,0,80},{23,0,80},{23,0,80},{22,26,0},{22,26,0},{22,26,0},{22,24,4},{20,26,8},{20,26,8},{24,30,164},{24,29,94},{24,27,107}, +{24,27,95},{23,31,248},{23,28,89},{23,27,5},{23,26,99},{19,30,244},{21,26,100},{24,30,83},{24,29,13},{24,27,26},{24,27,14},{30,19,243},{22,29,73},{23,27,5},{22,26,90},{27,25,243},{22,26,90},{24,28,91},{24,28,91},{24,28,91},{24,27,94},{23,29,52},{23,27,4},{23,27,4},{23,26,18},{20,28,50},{21,26,19},{24,28,10},{24,28,10},{24,28,10},{24,27,13},{30,17,50}, +{23,27,4},{23,27,4},{22,26,9},{25,25,50},{22,26,9},{31,19,72},{24,29,4},{25,27,2},{23,27,5},{31,19,72},{27,26,72},{23,27,5},{0,26,90},{27,26,72},{0,26,90},{24,0,90},{24,0,90},{24,0,90},{24,0,90},{23,27,0},{23,27,0},{23,27,0},{23,25,4},{21,27,8},{21,27,8},{25,31,164},{25,30,94},{25,28,106},{25,28,94},{24,31,245},{24,29,97},{24,28,3}, +{24,27,107},{20,31,249},{22,27,100},{25,31,83},{25,30,13},{25,28,25},{25,28,13},{29,23,243},{23,30,73},{24,28,2},{23,27,90},{28,26,243},{23,27,90},{25,29,91},{25,29,91},{25,29,91},{25,28,94},{24,29,53},{24,28,3},{24,28,3},{24,27,26},{21,29,50},{22,27,19},{25,29,10},{25,29,10},{25,29,10},{25,28,13},{31,18,50},{24,28,2},{24,28,2},{23,27,9},{26,26,50}, +{23,27,9},{30,23,72},{25,30,4},{26,28,1},{24,28,1},{30,23,72},{28,27,72},{24,28,1},{0,27,90},{28,27,72},{0,27,90},{25,0,90},{25,0,90},{25,0,90},{25,0,90},{24,28,2},{24,28,2},{24,28,2},{24,26,2},{21,28,9},{21,28,9},{26,31,194},{26,31,94},{26,29,106},{26,29,94},{25,31,284},{25,30,97},{25,29,3},{25,28,97},{22,31,253},{23,28,85},{27,31,99}, +{26,31,13},{26,29,25},{26,29,13},{30,24,243},{24,31,76},{25,29,2},{24,28,81},{29,27,243},{24,28,81},{26,30,91},{26,30,91},{26,30,91},{26,29,94},{25,30,53},{25,29,3},{25,29,3},{25,27,27},{22,30,50},{23,28,21},{26,30,10},{26,30,10},{26,30,10},{26,29,13},{30,22,50},{25,29,2},{25,29,2},{24,28,17},{27,27,50},{24,28,17},{31,24,72},{26,31,4},{27,29,1}, +{25,29,1},{31,24,72},{24,31,72},{25,29,1},{0,28,80},{24,31,72},{0,28,80},{26,0,90},{26,0,90},{26,0,90},{26,0,90},{25,29,2},{25,29,2},{25,29,2},{25,27,2},{23,28,5},{23,28,5},{27,31,280},{27,31,120},{27,30,106},{27,30,82},{27,31,328},{26,31,99},{26,30,5},{26,29,99},{24,31,308},{24,29,100},{28,31,105},{28,31,45},{28,30,27},{27,30,18},{30,27,243}, +{26,31,99},{26,30,5},{25,29,90},{30,28,243},{25,29,90},{27,31,84},{27,31,84},{27,31,84},{27,30,81},{26,31,58},{26,30,4},{26,30,4},{26,29,18},{23,31,53},{24,29,19},{27,31,20},{27,31,20},{27,31,20},{27,30,17},{30,25,50},{26,30,4},{26,30,4},{25,29,9},{30,27,50},{25,29,9},{31,27,72},{28,31,20},{28,30,2},{27,30,2},{31,27,72},{30,29,72},{27,30,2}, +{0,29,90},{30,29,72},{0,29,90},{27,0,80},{27,0,80},{27,0,80},{27,0,80},{26,30,0},{26,30,0},{26,30,0},{26,28,4},{24,30,8},{24,30,8},{28,31,331},{28,31,187},{28,31,106},{28,31,94},{28,31,358},{27,31,173},{27,31,4},{27,30,82},{26,31,355},{25,30,83},{29,31,126},{29,31,62},{28,31,25},{28,31,13},{30,29,221},{28,31,121},{27,31,4},{26,30,73},{29,30,221}, +{26,30,73},{28,31,106},{28,31,106},{28,31,106},{28,31,94},{27,31,100},{27,31,4},{27,31,4},{27,30,18},{25,31,72},{25,30,19},{28,31,25},{28,31,25},{28,31,25},{28,31,13},{31,26,50},{27,31,4},{27,31,4},{26,30,9},{29,29,50},{26,30,9},{31,29,61},{29,31,37},{29,31,1},{27,31,4},{31,29,61},{31,30,61},{27,31,4},{0,30,73},{31,30,61},{0,30,73},{28,0,90}, +{28,0,90},{28,0,90},{28,0,90},{27,31,0},{27,31,0},{27,31,0},{27,29,4},{25,31,8},{25,31,8},{29,31,239},{29,31,175},{29,31,139},{29,31,99},{29,31,239},{28,31,122},{28,31,41},{28,31,26},{28,31,233},{26,31,19},{30,31,54},{30,31,38},{30,31,29},{29,31,18},{30,31,93},{29,31,54},{29,31,18},{27,31,9},{31,30,93},{27,31,9},{29,31,139},{29,31,139},{29,31,139}, +{29,31,99},{29,31,139},{28,31,41},{28,31,41},{28,31,26},{27,31,116},{26,31,19},{30,31,29},{30,31,29},{30,31,29},{29,31,18},{30,30,50},{29,31,18},{29,31,18},{27,31,9},{30,30,50},{27,31,9},{31,30,9},{31,31,9},{30,31,4},{30,31,0},{31,30,9},{30,31,9},{30,31,0},{0,31,9},{30,31,9},{0,31,9},{29,0,90},{29,0,90},{29,0,90},{29,0,90},{28,31,5}, +{28,31,5},{28,31,5},{28,30,2},{26,31,10},{26,31,10},{30,31,140},{30,31,124},{30,31,115},{30,31,99},{30,31,131},{29,31,98},{29,31,62},{29,31,2},{29,31,122},{28,31,20},{31,31,25},{31,31,25},{31,31,25},{30,31,18},{31,30,22},{30,31,18},{30,31,9},{29,31,1},{30,31,22},{29,31,1},{30,31,115},{30,31,115},{30,31,115},{30,31,99},{30,31,106},{29,31,62},{29,31,62}, +{29,31,2},{29,31,86},{28,31,20},{31,31,25},{31,31,25},{31,31,25},{30,31,18},{31,30,13},{30,31,9},{30,31,9},{29,31,1},{30,31,13},{29,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{30,0,90},{30,0,90},{30,0,90},{30,0,90},{29,31,26},{29,31,26},{29,31,26},{29,31,2},{28,31,20}, +{28,31,20},{0,6,202},{0,5,52},{0,3,25},{0,3,61},{0,4,442},{0,3,313},{0,3,142},{0,2,318},{0,2,498},{0,2,354},{0,6,202},{0,5,52},{0,3,25},{0,3,61},{2,1,441},{0,3,313},{0,3,142},{0,2,318},{1,2,441},{0,2,318},{0,3,0},{0,3,0},{0,3,0},{0,2,1},{0,1,45},{0,1,25},{0,1,25},{0,1,26},{0,1,50},{0,1,35},{0,3,0}, +{0,3,0},{0,3,0},{0,2,1},{1,0,41},{0,1,25},{0,1,25},{0,1,26},{0,1,41},{0,1,26},{2,3,200},{0,5,52},{0,3,25},{0,3,61},{2,3,200},{3,2,200},{0,3,61},{0,2,218},{3,2,200},{0,2,218},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,9,200},{0,7,20},{0,5,20}, +{0,4,25},{0,6,686},{0,5,433},{0,4,169},{0,3,443},{0,3,794},{0,3,524},{0,9,200},{0,7,20},{0,5,20},{0,4,25},{3,1,686},{0,5,433},{0,4,169},{0,3,443},{6,0,686},{0,3,443},{0,6,1},{0,6,1},{0,6,1},{0,3,4},{0,3,145},{0,2,85},{0,2,85},{0,2,101},{0,1,178},{0,1,115},{0,6,1},{0,6,1},{0,6,1},{0,3,4},{1,1,145}, +{0,2,85},{0,2,85},{0,2,101},{3,0,145},{0,2,101},{3,4,200},{0,7,20},{1,4,16},{0,4,25},{3,4,200},{9,0,200},{0,4,25},{0,3,218},{9,0,200},{0,3,218},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,11,257},{0,9,54},{1,6,85},{0,5,65},{0,9,728},{0,6,371},{0,5,80}, +{0,4,377},{0,5,949},{0,4,521},{1,10,201},{1,8,18},{1,6,21},{1,5,26},{4,2,723},{0,6,371},{0,5,80},{0,4,377},{7,1,723},{0,4,377},{0,8,50},{0,8,50},{0,8,50},{0,5,49},{0,5,164},{0,4,50},{0,4,50},{0,3,65},{0,3,245},{0,2,126},{1,7,2},{1,7,2},{1,7,2},{1,4,5},{2,2,162},{0,4,50},{0,4,50},{0,3,65},{2,2,162}, +{0,3,65},{7,0,200},{0,9,5},{2,5,16},{0,5,16},{7,0,200},{10,1,200},{0,5,16},{0,4,208},{10,1,200},{0,4,208},{0,0,49},{0,0,49},{0,0,49},{0,0,49},{0,2,1},{0,2,1},{0,2,1},{0,1,4},{0,1,13},{0,1,13},{1,12,315},{1,10,118},{1,7,178},{1,6,129},{0,11,724},{0,8,289},{0,6,34},{0,5,308},{0,6,1087},{0,5,533},{2,11,201}, +{2,9,18},{2,7,21},{2,6,26},{5,3,723},{0,8,289},{0,6,34},{0,5,308},{8,2,723},{0,5,308},{1,9,114},{1,9,114},{1,9,114},{1,6,113},{0,8,162},{0,6,18},{0,6,18},{0,4,25},{0,4,338},{0,3,162},{2,8,2},{2,8,2},{2,8,2},{2,5,5},{3,3,162},{0,6,18},{0,6,18},{0,4,25},{8,0,162},{0,4,25},{6,4,200},{1,10,5},{3,6,16}, +{1,6,16},{6,4,200},{14,0,200},{1,6,16},{0,5,208},{14,0,200},{0,5,208},{1,0,113},{1,0,113},{1,0,113},{1,0,113},{0,5,0},{0,5,0},{0,5,0},{0,3,0},{0,2,61},{0,2,61},{2,14,410},{2,11,209},{2,8,288},{2,7,234},{0,14,739},{0,10,254},{0,8,33},{0,6,270},{0,8,1131},{0,6,450},{3,12,200},{3,10,13},{3,8,25},{3,7,29},{6,4,723}, +{0,10,238},{0,8,17},{0,6,254},{14,0,723},{0,6,254},{2,11,209},{2,11,209},{2,11,209},{2,7,209},{0,11,178},{0,8,17},{0,8,17},{0,5,18},{0,6,376},{0,5,123},{3,9,0},{3,9,0},{3,9,0},{3,7,4},{6,1,162},{0,8,1},{0,8,1},{0,5,2},{11,0,162},{0,5,2},{9,2,200},{2,11,1},{4,7,25},{0,8,17},{9,2,200},{17,0,200},{0,8,17}, +{0,6,218},{17,0,200},{0,6,218},{2,0,208},{2,0,208},{2,0,208},{2,0,208},{0,8,16},{0,8,16},{0,8,16},{1,4,17},{0,4,80},{0,4,80},{3,15,410},{3,12,212},{3,9,288},{3,8,224},{1,15,739},{1,11,254},{1,9,33},{1,7,270},{0,9,1013},{0,7,308},{4,13,201},{4,11,21},{4,9,21},{4,8,26},{10,0,723},{0,12,227},{1,9,17},{0,7,227},{15,1,723}, +{0,7,227},{3,11,212},{3,11,212},{3,11,212},{3,8,208},{2,10,180},{1,9,17},{1,9,17},{1,6,18},{0,8,306},{0,6,41},{4,10,2},{4,10,2},{4,10,2},{4,7,5},{8,0,162},{1,9,1},{1,9,1},{1,6,2},{5,5,162},{1,6,2},{10,3,200},{3,12,4},{5,8,16},{3,8,16},{10,3,200},{18,1,200},{3,8,16},{0,7,218},{18,1,200},{0,7,218},{3,0,208}, +{3,0,208},{3,0,208},{3,0,208},{1,9,16},{1,9,16},{1,9,16},{2,5,17},{0,6,40},{0,6,40},{4,15,426},{4,13,223},{4,10,283},{4,9,234},{2,16,739},{2,12,267},{2,10,33},{2,8,273},{0,11,913},{0,8,225},{5,14,201},{5,12,18},{5,10,21},{5,9,26},{11,1,723},{0,13,208},{2,10,17},{0,8,209},{16,2,723},{0,8,209},{4,12,219},{4,12,219},{4,12,219}, +{4,9,218},{3,11,180},{2,10,17},{2,10,17},{2,7,18},{0,9,229},{0,7,27},{5,11,2},{5,11,2},{5,11,2},{5,8,5},{9,1,162},{2,10,1},{2,10,1},{2,7,2},{16,0,162},{2,7,2},{11,4,200},{3,14,4},{6,9,16},{4,9,16},{11,4,200},{19,2,200},{4,9,16},{0,8,208},{19,2,200},{0,8,208},{4,0,218},{4,0,218},{4,0,218},{4,0,218},{2,10,16}, +{2,10,16},{2,10,16},{3,6,17},{0,8,17},{0,8,17},{5,16,420},{5,14,223},{5,11,283},{5,10,234},{3,17,739},{3,13,267},{3,11,33},{3,9,273},{0,13,868},{0,9,213},{6,15,201},{6,13,18},{6,11,21},{6,10,26},{12,2,723},{0,15,204},{3,11,17},{1,9,209},{22,0,723},{1,9,209},{5,13,219},{5,13,219},{5,13,219},{5,10,218},{3,14,180},{3,11,17},{3,11,17}, +{3,8,20},{0,11,189},{1,8,17},{6,12,2},{6,12,2},{6,12,2},{6,9,5},{10,2,162},{3,11,1},{3,11,1},{2,8,1},{17,1,162},{2,8,1},{15,0,200},{5,14,5},{7,10,16},{5,10,16},{15,0,200},{20,3,200},{5,10,16},{0,9,208},{20,3,200},{0,9,208},{5,0,218},{5,0,218},{5,0,218},{5,0,218},{3,11,16},{3,11,16},{3,11,16},{3,8,20},{0,9,5}, +{0,9,5},{6,18,410},{6,15,209},{6,12,288},{6,11,234},{4,18,739},{4,14,254},{4,12,33},{4,10,270},{0,15,804},{2,10,227},{7,16,200},{7,14,13},{7,12,25},{7,11,29},{15,0,723},{1,16,209},{4,12,17},{2,10,218},{20,3,723},{2,10,218},{6,15,209},{6,15,209},{6,15,209},{6,11,209},{4,15,178},{4,12,17},{4,12,17},{4,9,18},{0,13,171},{2,9,26},{7,13,0}, +{7,13,0},{7,13,0},{7,11,4},{13,0,162},{4,12,1},{4,12,1},{4,9,2},{20,1,162},{4,9,2},{16,1,200},{6,15,1},{8,11,25},{4,12,17},{16,1,200},{26,1,200},{4,12,17},{0,10,218},{26,1,200},{0,10,218},{6,0,208},{6,0,208},{6,0,208},{6,0,208},{4,12,16},{4,12,16},{4,12,16},{5,8,17},{1,11,5},{1,11,5},{7,19,410},{7,16,212},{7,13,288}, +{7,12,224},{5,19,739},{5,15,254},{5,13,33},{5,11,270},{0,16,747},{3,11,227},{8,17,201},{8,15,21},{8,13,21},{8,12,26},{14,4,723},{3,16,209},{5,13,17},{3,11,218},{24,2,723},{3,11,218},{7,15,212},{7,15,212},{7,15,212},{7,12,208},{6,14,180},{5,13,17},{5,13,17},{5,10,18},{1,14,171},{3,10,26},{8,14,2},{8,14,2},{8,14,2},{8,11,5},{14,1,162}, +{5,13,1},{5,13,1},{5,10,2},{24,0,162},{5,10,2},{17,2,200},{7,16,4},{9,12,16},{7,12,16},{17,2,200},{27,2,200},{7,12,16},{0,11,218},{27,2,200},{0,11,218},{7,0,208},{7,0,208},{7,0,208},{7,0,208},{5,13,16},{5,13,16},{5,13,16},{6,9,17},{2,12,8},{2,12,8},{8,19,426},{8,17,223},{8,14,283},{8,13,234},{6,20,739},{6,16,267},{6,14,33}, +{6,12,273},{0,18,727},{3,12,218},{9,18,201},{9,16,18},{9,14,21},{9,13,26},{18,0,723},{4,17,208},{6,14,17},{4,12,209},{30,0,723},{4,12,209},{8,16,219},{8,16,219},{8,16,219},{8,13,218},{7,15,180},{6,14,17},{6,14,17},{6,11,18},{2,15,171},{4,11,27},{9,15,2},{9,15,2},{9,15,2},{9,12,5},{16,0,162},{6,14,1},{6,14,1},{6,11,2},{25,1,162}, +{6,11,2},{18,3,200},{7,18,4},{10,13,16},{8,13,16},{18,3,200},{28,3,200},{8,13,16},{0,12,208},{28,3,200},{0,12,208},{8,0,218},{8,0,218},{8,0,218},{8,0,218},{6,14,16},{6,14,16},{6,14,16},{7,10,17},{3,13,8},{3,13,8},{9,20,420},{9,18,223},{9,15,283},{9,14,234},{7,21,739},{7,17,267},{7,15,33},{7,13,273},{1,19,727},{4,13,213},{10,19,201}, +{10,17,18},{10,15,21},{10,14,26},{19,1,723},{4,19,204},{7,15,17},{5,13,209},{31,1,723},{5,13,209},{9,17,219},{9,17,219},{9,17,219},{9,14,218},{7,18,180},{7,15,17},{7,15,17},{7,12,20},{3,16,173},{5,12,17},{10,16,2},{10,16,2},{10,16,2},{10,13,5},{17,1,162},{7,15,1},{7,15,1},{6,12,1},{26,2,162},{6,12,1},{20,2,200},{9,18,5},{11,14,16}, +{9,14,16},{20,2,200},{24,7,200},{9,14,16},{0,13,208},{24,7,200},{0,13,208},{9,0,218},{9,0,218},{9,0,218},{9,0,218},{7,15,16},{7,15,16},{7,15,16},{7,12,20},{4,13,5},{4,13,5},{10,22,410},{10,19,209},{10,16,288},{10,15,234},{8,22,739},{8,18,254},{8,16,33},{8,14,270},{2,20,724},{6,14,227},{11,20,200},{11,18,13},{11,16,25},{11,15,29},{20,2,723}, +{5,20,209},{8,16,17},{6,14,218},{24,7,723},{6,14,218},{10,19,209},{10,19,209},{10,19,209},{10,15,209},{8,19,178},{8,16,17},{8,16,17},{8,13,18},{3,18,170},{6,13,26},{11,17,0},{11,17,0},{11,17,0},{11,15,4},{17,4,162},{8,16,1},{8,16,1},{8,13,2},{29,2,162},{8,13,2},{23,0,200},{10,19,1},{12,15,25},{8,16,17},{23,0,200},{30,5,200},{8,16,17}, +{0,14,218},{30,5,200},{0,14,218},{10,0,208},{10,0,208},{10,0,208},{10,0,208},{8,16,16},{8,16,16},{8,16,16},{9,12,17},{5,15,5},{5,15,5},{11,23,410},{11,20,212},{11,17,288},{11,16,224},{9,23,739},{9,19,254},{9,17,33},{9,15,270},{3,21,724},{7,15,227},{12,21,201},{12,19,21},{12,17,21},{12,16,26},{21,3,723},{7,20,209},{9,17,17},{7,15,218},{28,6,723}, +{7,15,218},{11,19,212},{11,19,212},{11,19,212},{11,16,208},{10,18,180},{9,17,17},{9,17,17},{9,14,18},{5,18,171},{7,14,26},{12,18,2},{12,18,2},{12,18,2},{12,15,5},{18,5,162},{9,17,1},{9,17,1},{9,14,2},{30,3,162},{9,14,2},{24,1,200},{11,20,4},{13,16,16},{11,16,16},{24,1,200},{31,6,200},{11,16,16},{0,15,218},{31,6,200},{0,15,218},{11,0,208}, +{11,0,208},{11,0,208},{11,0,208},{9,17,16},{9,17,16},{9,17,16},{10,13,17},{6,16,8},{6,16,8},{12,23,426},{12,21,223},{12,18,283},{12,17,234},{10,24,739},{10,20,267},{10,18,33},{10,16,273},{4,22,727},{7,16,218},{13,22,201},{13,20,18},{13,18,21},{13,17,26},{22,4,723},{8,21,208},{10,18,17},{8,16,209},{29,7,723},{8,16,209},{12,20,219},{12,20,219},{12,20,219}, +{12,17,218},{11,19,180},{10,18,17},{10,18,17},{10,15,18},{6,19,171},{8,15,27},{13,19,2},{13,19,2},{13,19,2},{13,16,5},{20,4,162},{10,18,1},{10,18,1},{10,15,2},{29,5,162},{10,15,2},{25,2,200},{11,22,4},{14,17,16},{12,17,16},{25,2,200},{27,10,200},{12,17,16},{0,16,208},{27,10,200},{0,16,208},{12,0,218},{12,0,218},{12,0,218},{12,0,218},{10,18,16}, +{10,18,16},{10,18,16},{11,14,17},{7,17,8},{7,17,8},{13,24,420},{13,22,223},{13,19,283},{13,18,234},{11,25,739},{11,21,267},{11,19,33},{11,17,273},{5,23,727},{8,17,213},{14,23,201},{14,21,18},{14,19,21},{14,18,26},{26,0,723},{8,23,204},{11,19,17},{9,17,209},{30,8,723},{9,17,209},{13,21,219},{13,21,219},{13,21,219},{13,18,218},{11,22,180},{11,19,17},{11,19,17}, +{11,16,20},{7,20,173},{9,16,17},{14,20,2},{14,20,2},{14,20,2},{14,17,5},{24,0,162},{11,19,1},{11,19,1},{10,16,1},{30,6,162},{10,16,1},{26,3,200},{13,22,5},{15,18,16},{13,18,16},{26,3,200},{28,11,200},{13,18,16},{0,17,208},{28,11,200},{0,17,208},{13,0,218},{13,0,218},{13,0,218},{13,0,218},{11,19,16},{11,19,16},{11,19,16},{11,16,20},{8,17,5}, +{8,17,5},{14,26,410},{14,23,209},{14,20,288},{14,19,234},{12,26,739},{12,22,254},{12,20,33},{12,18,270},{6,24,724},{10,18,227},{15,24,200},{15,22,13},{15,20,25},{15,19,29},{26,3,723},{9,24,209},{12,20,17},{10,18,218},{28,11,723},{10,18,218},{14,23,209},{14,23,209},{14,23,209},{14,19,209},{12,23,178},{12,20,17},{12,20,17},{12,17,18},{7,22,170},{10,17,26},{15,21,0}, +{15,21,0},{15,21,0},{15,19,4},{24,3,162},{12,20,1},{12,20,1},{12,17,2},{28,9,162},{12,17,2},{27,4,200},{14,23,1},{16,19,25},{12,20,17},{27,4,200},{31,11,200},{12,20,17},{0,18,218},{31,11,200},{0,18,218},{14,0,208},{14,0,208},{14,0,208},{14,0,208},{12,20,16},{12,20,16},{12,20,16},{13,16,17},{9,19,5},{9,19,5},{15,27,410},{15,24,212},{15,21,288}, +{15,20,224},{13,27,739},{13,23,254},{13,21,33},{13,19,270},{7,25,724},{11,19,227},{16,25,201},{16,23,21},{16,21,21},{16,20,26},{28,2,723},{11,24,209},{13,21,17},{11,19,218},{27,13,723},{11,19,218},{15,23,212},{15,23,212},{15,23,212},{15,20,208},{14,22,180},{13,21,17},{13,21,17},{13,18,18},{9,22,171},{11,18,26},{16,22,2},{16,22,2},{16,22,2},{16,19,5},{22,9,162}, +{13,21,1},{13,21,1},{13,18,2},{29,10,162},{13,18,2},{31,0,200},{15,24,4},{17,20,16},{15,20,16},{31,0,200},{30,13,200},{15,20,16},{0,19,218},{30,13,200},{0,19,218},{15,0,208},{15,0,208},{15,0,208},{15,0,208},{13,21,16},{13,21,16},{13,21,16},{14,17,17},{10,20,8},{10,20,8},{16,27,426},{16,25,223},{16,22,283},{16,21,234},{14,28,739},{14,24,267},{14,22,33}, +{14,20,273},{8,26,727},{11,20,218},{17,26,201},{17,24,18},{17,22,21},{17,21,26},{29,3,723},{12,25,208},{14,22,17},{12,20,209},{28,14,723},{12,20,209},{16,24,219},{16,24,219},{16,24,219},{16,21,218},{15,23,180},{14,22,17},{14,22,17},{14,19,18},{10,23,171},{12,19,27},{17,23,2},{17,23,2},{17,23,2},{17,20,5},{24,8,162},{14,22,1},{14,22,1},{14,19,2},{30,11,162}, +{14,19,2},{29,6,200},{15,26,4},{18,21,16},{16,21,16},{29,6,200},{31,14,200},{16,21,16},{0,20,208},{31,14,200},{0,20,208},{16,0,218},{16,0,218},{16,0,218},{16,0,218},{14,22,16},{14,22,16},{14,22,16},{15,18,17},{11,21,8},{11,21,8},{17,28,420},{17,26,223},{17,23,283},{17,22,234},{15,29,739},{15,25,267},{15,23,33},{15,21,273},{9,27,727},{12,21,213},{18,27,201}, +{18,25,18},{18,23,21},{18,22,26},{30,4,723},{12,27,204},{15,23,17},{13,21,209},{29,15,723},{13,21,209},{17,25,219},{17,25,219},{17,25,219},{17,22,218},{15,26,180},{15,23,17},{15,23,17},{15,20,20},{11,24,173},{13,20,17},{18,24,2},{18,24,2},{18,24,2},{18,21,5},{28,4,162},{15,23,1},{15,23,1},{14,20,1},{29,13,162},{14,20,1},{30,7,200},{17,26,5},{19,22,16}, +{17,22,16},{30,7,200},{30,16,200},{17,22,16},{0,21,208},{30,16,200},{0,21,208},{17,0,218},{17,0,218},{17,0,218},{17,0,218},{15,23,16},{15,23,16},{15,23,16},{15,20,20},{12,21,5},{12,21,5},{18,30,410},{18,27,209},{18,24,288},{18,23,234},{16,30,739},{16,26,254},{16,24,33},{16,22,270},{10,28,724},{14,22,227},{19,28,200},{19,26,13},{19,24,25},{19,23,29},{30,7,723}, +{13,28,209},{16,24,17},{14,22,218},{30,16,723},{14,22,218},{18,27,209},{18,27,209},{18,27,209},{18,23,209},{16,27,178},{16,24,17},{16,24,17},{16,21,18},{11,26,170},{14,21,26},{19,25,0},{19,25,0},{19,25,0},{19,23,4},{31,2,162},{16,24,1},{16,24,1},{16,21,2},{27,16,162},{16,21,2},{31,8,200},{18,27,1},{20,23,25},{16,24,17},{31,8,200},{28,19,200},{16,24,17}, +{0,22,218},{28,19,200},{0,22,218},{18,0,208},{18,0,208},{18,0,208},{18,0,208},{16,24,16},{16,24,16},{16,24,16},{17,20,17},{13,23,5},{13,23,5},{19,31,410},{19,28,212},{19,25,288},{19,24,224},{17,31,739},{17,27,254},{17,25,33},{17,23,270},{11,29,724},{15,23,227},{20,29,201},{20,27,21},{20,25,21},{20,24,26},{29,11,723},{15,28,209},{17,25,17},{15,23,218},{31,17,723}, +{15,23,218},{19,27,212},{19,27,212},{19,27,212},{19,24,208},{18,26,180},{17,25,17},{17,25,17},{17,22,18},{13,26,171},{15,22,26},{20,26,2},{20,26,2},{20,26,2},{20,23,5},{26,13,162},{17,25,1},{17,25,1},{17,22,2},{21,21,162},{17,22,2},{29,14,200},{19,28,4},{21,24,16},{19,24,16},{29,14,200},{29,20,200},{19,24,16},{0,23,218},{29,20,200},{0,23,218},{19,0,208}, +{19,0,208},{19,0,208},{19,0,208},{17,25,16},{17,25,16},{17,25,16},{18,21,17},{14,24,8},{14,24,8},{20,31,426},{20,29,223},{20,26,283},{20,25,234},{19,30,740},{18,28,267},{18,26,33},{18,24,273},{12,30,727},{15,24,218},{21,30,201},{21,28,18},{21,26,21},{21,25,26},{30,12,723},{16,29,208},{18,26,17},{16,24,209},{27,21,723},{16,24,209},{20,28,219},{20,28,219},{20,28,219}, +{20,25,218},{19,27,180},{18,26,17},{18,26,17},{18,23,18},{14,27,171},{16,23,27},{21,27,2},{21,27,2},{21,27,2},{21,24,5},{28,12,162},{18,26,1},{18,26,1},{18,23,2},{22,22,162},{18,23,2},{30,15,200},{19,30,4},{22,25,16},{20,25,16},{30,15,200},{30,21,200},{20,25,16},{0,24,208},{30,21,200},{0,24,208},{20,0,218},{20,0,218},{20,0,218},{20,0,218},{18,26,16}, +{18,26,16},{18,26,16},{19,22,17},{15,25,8},{15,25,8},{21,31,468},{21,30,223},{21,27,283},{21,26,234},{20,31,749},{19,29,267},{19,27,33},{19,25,273},{13,31,727},{16,25,213},{22,31,201},{22,29,18},{22,27,21},{22,26,26},{31,13,723},{16,31,204},{19,27,17},{17,25,209},{28,22,723},{17,25,209},{21,29,219},{21,29,219},{21,29,219},{21,26,218},{19,30,180},{19,27,17},{19,27,17}, +{19,24,20},{15,28,173},{17,24,17},{22,28,2},{22,28,2},{22,28,2},{22,25,5},{29,13,162},{19,27,1},{19,27,1},{18,24,1},{28,20,162},{18,24,1},{31,16,200},{21,30,5},{23,26,16},{21,26,16},{31,16,200},{31,22,200},{21,26,16},{0,25,208},{31,22,200},{0,25,208},{21,0,218},{21,0,218},{21,0,218},{21,0,218},{19,27,16},{19,27,16},{19,27,16},{19,24,20},{16,25,5}, +{16,25,5},{22,31,570},{22,31,209},{22,28,288},{22,27,234},{21,31,804},{20,30,254},{20,28,33},{20,26,270},{15,31,753},{18,26,227},{23,31,232},{23,30,13},{23,28,25},{23,27,29},{31,16,723},{19,31,216},{20,28,17},{18,26,218},{31,22,723},{18,26,218},{22,31,209},{22,31,209},{22,31,209},{22,27,209},{20,31,178},{20,28,17},{20,28,17},{20,25,18},{15,30,170},{18,25,26},{23,29,0}, +{23,29,0},{23,29,0},{23,27,4},{29,16,162},{20,28,1},{20,28,1},{20,25,2},{31,20,162},{20,25,2},{31,19,200},{22,31,1},{24,27,25},{20,28,17},{31,19,200},{27,26,200},{20,28,17},{0,26,218},{27,26,200},{0,26,218},{22,0,208},{22,0,208},{22,0,208},{22,0,208},{20,28,16},{20,28,16},{20,28,16},{21,24,17},{17,27,5},{17,27,5},{23,31,696},{23,31,237},{23,29,288}, +{23,28,224},{23,31,888},{21,31,254},{21,29,33},{21,27,270},{17,31,824},{19,27,227},{24,31,273},{24,31,21},{24,29,21},{24,28,26},{30,20,723},{20,31,233},{21,29,17},{19,27,218},{30,24,723},{19,27,218},{23,31,212},{23,31,212},{23,31,212},{23,28,208},{22,30,180},{21,29,17},{21,29,17},{21,26,18},{17,30,171},{19,26,26},{24,30,2},{24,30,2},{24,30,2},{24,27,5},{30,17,162}, +{21,29,1},{21,29,1},{21,26,2},{25,25,162},{21,26,2},{30,23,200},{24,31,20},{25,28,16},{23,28,16},{30,23,200},{28,27,200},{23,28,16},{0,27,218},{28,27,200},{0,27,218},{23,0,208},{23,0,208},{23,0,208},{23,0,208},{21,29,16},{21,29,16},{21,29,16},{22,25,17},{18,28,8},{18,28,8},{25,31,804},{24,31,334},{24,30,283},{24,29,234},{24,31,957},{22,31,297},{22,30,33}, +{22,28,273},{20,31,913},{19,28,218},{26,31,313},{25,31,51},{25,30,21},{25,29,26},{31,21,723},{22,31,281},{22,30,17},{20,28,209},{31,25,723},{20,28,209},{24,31,234},{24,31,234},{24,31,234},{24,29,218},{23,31,180},{22,30,17},{22,30,17},{22,27,18},{18,31,171},{20,27,27},{25,31,2},{25,31,2},{25,31,2},{25,28,5},{31,18,162},{22,30,1},{22,30,1},{22,27,2},{26,26,162}, +{22,27,2},{31,24,200},{25,31,50},{26,29,16},{24,29,16},{31,24,200},{24,31,200},{24,29,16},{0,28,208},{24,31,200},{0,28,208},{24,0,218},{24,0,218},{24,0,218},{24,0,218},{22,30,16},{22,30,16},{22,30,16},{23,26,17},{19,29,8},{19,29,8},{26,31,930},{25,31,492},{25,31,283},{25,30,234},{25,31,1068},{24,31,389},{23,31,33},{23,29,273},{21,31,999},{20,29,213},{27,31,379}, +{26,31,149},{26,31,21},{26,30,26},{29,27,723},{24,31,364},{23,31,17},{21,29,209},{27,29,723},{21,29,209},{25,31,267},{25,31,267},{25,31,267},{25,30,218},{24,31,205},{23,31,17},{23,31,17},{23,28,20},{20,31,189},{21,28,17},{26,31,5},{26,31,5},{26,31,5},{26,29,5},{30,22,162},{23,31,1},{23,31,1},{22,28,1},{27,27,162},{22,28,1},{30,28,200},{27,31,90},{27,30,16}, +{25,30,16},{30,28,200},{28,30,200},{25,30,16},{0,29,208},{28,30,200},{0,29,208},{25,0,218},{25,0,218},{25,0,218},{25,0,218},{23,31,16},{23,31,16},{23,31,16},{23,28,20},{20,29,5},{20,29,5},{27,31,877},{26,31,585},{26,31,329},{26,31,209},{26,31,990},{25,31,397},{25,31,36},{24,30,165},{23,31,910},{22,30,122},{28,31,306},{28,31,162},{27,31,36},{27,31,4},{31,26,546}, +{26,31,306},{25,31,20},{22,30,113},{29,29,546},{22,30,113},{26,31,329},{26,31,329},{26,31,329},{26,31,209},{25,31,276},{25,31,36},{25,31,36},{24,29,18},{22,31,230},{22,29,26},{27,31,36},{27,31,36},{27,31,36},{27,31,4},{30,25,162},{25,31,20},{25,31,20},{24,29,2},{30,27,162},{24,29,2},{30,30,113},{29,31,61},{28,31,0},{26,31,1},{30,30,113},{30,30,113},{26,31,1}, +{0,30,113},{30,30,113},{0,30,113},{26,0,208},{26,0,208},{26,0,208},{26,0,208},{25,30,17},{25,30,17},{25,30,17},{25,28,17},{21,31,5},{21,31,5},{28,31,731},{27,31,573},{27,31,404},{27,31,244},{27,31,797},{26,31,354},{26,31,98},{25,30,82},{25,31,737},{23,31,58},{29,31,190},{29,31,126},{28,31,65},{28,31,5},{30,29,333},{28,31,185},{27,31,52},{23,31,49},{29,30,333}, +{23,31,49},{27,31,404},{27,31,404},{27,31,404},{27,31,244},{27,31,356},{26,31,98},{26,31,98},{25,30,18},{24,31,315},{23,30,26},{28,31,65},{28,31,65},{28,31,65},{28,31,5},{31,26,162},{27,31,52},{27,31,52},{25,30,2},{29,29,162},{25,30,2},{31,30,25},{30,31,13},{30,31,4},{29,31,1},{31,30,25},{30,31,25},{29,31,1},{0,31,49},{30,31,25},{0,31,49},{27,0,208}, +{27,0,208},{27,0,208},{27,0,208},{26,31,17},{26,31,17},{26,31,17},{26,29,17},{23,31,9},{23,31,9},{29,31,642},{28,31,524},{28,31,443},{28,31,299},{28,31,623},{28,31,335},{27,31,201},{26,31,17},{27,31,610},{24,31,26},{30,31,131},{30,31,115},{29,31,101},{29,31,37},{31,29,193},{29,31,121},{28,31,85},{26,31,1},{29,31,193},{26,31,1},{28,31,443},{28,31,443},{28,31,443}, +{28,31,299},{28,31,398},{27,31,201},{27,31,201},{26,31,17},{26,31,378},{24,31,26},{29,31,101},{29,31,101},{29,31,101},{29,31,37},{31,28,145},{28,31,85},{28,31,85},{26,31,1},{30,30,145},{26,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{28,0,218},{28,0,218},{28,0,218},{28,0,218},{27,31,32}, +{27,31,32},{27,31,32},{27,30,17},{24,31,26},{24,31,26},{29,31,418},{29,31,354},{29,31,318},{29,31,254},{29,31,370},{28,31,223},{28,31,142},{28,31,25},{28,31,358},{26,31,58},{30,31,51},{30,31,35},{30,31,26},{30,31,10},{31,30,54},{30,31,34},{30,31,25},{28,31,0},{30,31,54},{28,31,0},{29,31,318},{29,31,318},{29,31,318},{29,31,254},{29,31,270},{28,31,142},{28,31,142}, +{28,31,25},{27,31,249},{26,31,58},{30,31,26},{30,31,26},{30,31,26},{30,31,10},{30,31,41},{30,31,25},{30,31,25},{28,31,0},{31,30,41},{28,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{29,0,218},{29,0,218},{29,0,218},{29,0,218},{28,31,61},{28,31,61},{28,31,61},{28,31,25},{26,31,58}, +{26,31,58},{0,9,421},{0,7,113},{0,5,5},{0,4,130},{0,6,925},{0,5,658},{0,4,274},{0,3,670},{0,3,1039},{0,3,751},{0,9,421},{0,7,113},{0,5,5},{0,4,130},{3,1,925},{0,5,658},{0,4,274},{0,3,670},{6,0,925},{0,3,670},{0,4,1},{0,4,1},{0,4,1},{0,3,4},{0,2,85},{0,2,45},{0,2,45},{0,1,50},{0,1,98},{0,1,59},{0,4,1}, +{0,4,1},{0,4,1},{0,3,4},{0,2,85},{0,2,45},{0,2,45},{0,1,50},{2,0,85},{0,1,50},{5,1,421},{0,7,113},{0,5,5},{0,4,130},{5,1,421},{9,0,421},{0,4,130},{0,3,445},{9,0,421},{0,3,445},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,12,425},{0,9,52},{0,6,10}, +{0,6,82},{0,8,1261},{0,6,805},{0,5,322},{0,4,833},{0,4,1445},{0,4,977},{0,12,425},{0,9,52},{0,6,10},{0,6,82},{3,3,1261},{0,6,805},{0,5,322},{0,4,833},{8,0,1261},{0,4,833},{0,7,0},{0,7,0},{0,7,0},{0,4,1},{0,3,225},{0,3,117},{0,3,117},{0,2,125},{0,2,257},{0,2,161},{0,7,0},{0,7,0},{0,7,0},{0,4,1},{2,0,221}, +{0,3,117},{0,3,117},{0,2,125},{2,1,221},{0,2,125},{7,0,421},{0,9,52},{1,6,5},{0,6,82},{7,0,421},{10,1,421},{0,6,82},{0,4,433},{10,1,421},{0,4,433},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,14,430},{0,11,29},{0,7,74},{0,7,46},{0,10,1514},{0,8,874},{0,6,307}, +{0,5,917},{0,5,1814},{0,4,1074},{0,14,430},{0,11,29},{1,7,35},{0,7,46},{6,0,1514},{0,8,874},{0,6,307},{0,5,917},{10,0,1514},{0,5,917},{0,10,10},{0,10,10},{0,10,10},{0,6,10},{0,5,340},{0,5,160},{0,5,160},{0,3,169},{0,3,421},{0,3,250},{0,10,10},{0,10,10},{0,10,10},{0,6,10},{2,2,338},{0,5,160},{0,5,160},{0,3,169},{2,2,338}, +{0,3,169},{8,1,421},{0,11,20},{2,7,5},{0,7,37},{8,1,421},{14,0,421},{0,7,37},{0,5,433},{14,0,421},{0,5,433},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,1,0},{0,1,0},{0,1,0},{0,1,4},{0,0,9},{0,0,9},{1,15,494},{1,12,102},{1,8,137},{1,8,122},{0,12,1517},{0,9,737},{0,7,185},{0,6,794},{0,7,1982},{0,5,1062},{1,15,430}, +{1,12,38},{2,8,34},{1,8,58},{7,1,1514},{0,9,737},{0,7,185},{0,6,794},{11,1,1514},{0,6,794},{1,11,74},{1,11,74},{1,11,74},{1,7,74},{0,8,338},{0,6,98},{0,6,98},{0,4,97},{0,4,514},{0,4,241},{1,11,10},{1,11,10},{1,11,10},{1,7,10},{3,3,338},{0,6,98},{0,6,98},{0,4,97},{8,0,338},{0,4,97},{10,0,421},{0,13,9},{3,8,4}, +{0,8,16},{10,0,421},{17,0,421},{0,8,16},{0,6,433},{17,0,421},{0,6,433},{1,0,73},{1,0,73},{1,0,73},{1,0,73},{0,4,1},{0,4,1},{0,4,1},{0,2,1},{0,2,37},{0,2,37},{1,18,629},{1,14,213},{2,9,354},{1,9,218},{0,15,1517},{0,11,630},{0,9,50},{0,7,670},{0,8,2198},{0,6,1109},{3,15,437},{2,13,41},{3,9,33},{2,9,53},{8,2,1514}, +{0,11,630},{0,9,50},{0,7,670},{12,2,1514},{0,7,670},{1,13,209},{1,13,209},{1,13,209},{1,8,212},{0,11,338},{0,8,41},{0,8,41},{0,5,50},{0,6,680},{0,5,275},{3,10,17},{3,10,17},{3,10,17},{2,8,17},{6,1,338},{0,8,41},{0,8,41},{0,5,50},{11,0,338},{0,5,50},{12,0,421},{0,15,1},{4,9,5},{0,9,1},{12,0,421},{20,0,421},{0,9,1}, +{0,7,445},{20,0,421},{0,7,445},{1,0,208},{1,0,208},{1,0,208},{1,0,208},{0,7,1},{0,7,1},{0,7,1},{0,4,0},{0,3,106},{0,3,106},{2,19,821},{2,15,405},{2,11,570},{2,10,410},{0,18,1514},{0,13,577},{0,10,14},{0,8,602},{0,10,2462},{0,7,1175},{3,17,441},{3,14,41},{4,10,35},{3,10,53},{9,3,1514},{0,13,577},{0,10,14},{0,8,602},{18,0,1514}, +{0,8,602},{2,14,401},{2,14,401},{2,14,401},{2,9,404},{0,13,340},{0,10,13},{0,10,13},{0,6,29},{0,7,851},{0,6,353},{3,13,17},{3,13,17},{3,13,17},{3,9,17},{8,0,338},{0,10,13},{0,10,13},{0,6,29},{5,5,338},{0,6,29},{13,1,421},{1,16,4},{5,10,5},{1,10,1},{13,1,421},{21,1,421},{1,10,1},{0,8,433},{21,1,421},{0,8,433},{2,0,400}, +{2,0,400},{2,0,400},{2,0,400},{0,9,1},{0,9,1},{0,9,1},{0,6,4},{0,4,208},{0,4,208},{3,20,854},{3,16,437},{3,12,597},{3,11,443},{1,19,1515},{0,15,570},{1,11,15},{0,9,582},{0,11,2337},{0,9,933},{4,18,430},{4,15,29},{5,11,35},{4,11,46},{10,4,1514},{0,15,521},{1,11,14},{0,9,533},{19,1,1514},{0,9,533},{3,15,434},{3,15,434},{3,15,434}, +{3,10,437},{1,14,341},{1,11,14},{1,11,14},{1,7,30},{0,8,755},{0,7,222},{4,14,10},{4,14,10},{4,14,10},{4,10,10},{9,1,338},{0,12,2},{0,12,2},{0,7,26},{16,0,338},{0,7,26},{15,0,421},{3,16,4},{6,11,5},{2,11,1},{15,0,421},{25,0,421},{2,11,1},{0,9,433},{25,0,421},{0,9,433},{3,0,433},{3,0,433},{3,0,433},{3,0,433},{1,10,2}, +{1,10,2},{1,10,2},{1,7,5},{0,6,157},{0,6,157},{4,21,866},{4,17,454},{4,13,609},{4,12,461},{2,20,1515},{1,16,570},{2,12,19},{1,10,582},{0,13,2214},{0,10,707},{5,19,430},{5,16,38},{6,12,34},{5,12,58},{14,0,1514},{0,16,458},{2,12,18},{0,10,482},{20,2,1514},{0,10,482},{4,16,445},{4,16,445},{4,16,445},{4,11,449},{2,15,341},{2,12,19},{2,12,19}, +{2,8,26},{0,10,635},{0,8,106},{5,15,10},{5,15,10},{5,15,10},{5,11,10},{10,2,338},{1,13,2},{1,13,2},{1,8,16},{17,1,338},{1,8,16},{16,1,421},{3,18,4},{7,12,4},{3,12,0},{16,1,421},{26,1,421},{3,12,0},{0,10,433},{26,1,421},{0,10,433},{4,0,445},{4,0,445},{4,0,445},{4,0,445},{2,11,2},{2,11,2},{2,11,2},{2,7,10},{0,8,90}, +{0,8,90},{5,22,854},{5,18,438},{5,14,603},{5,13,443},{3,21,1517},{3,16,554},{3,13,21},{3,11,589},{0,15,2046},{0,11,535},{7,19,437},{6,17,41},{7,13,33},{6,13,53},{14,3,1514},{0,18,429},{3,13,21},{0,11,454},{26,0,1514},{0,11,454},{5,17,434},{5,17,434},{5,17,434},{5,12,437},{3,17,340},{3,13,20},{3,13,20},{3,9,29},{0,12,557},{0,10,49},{7,14,17}, +{7,14,17},{7,14,17},{6,12,17},{13,0,338},{2,14,4},{2,14,4},{1,10,10},{20,1,338},{1,10,10},{17,2,421},{4,19,1},{8,13,5},{4,13,1},{17,2,421},{29,1,421},{4,13,1},{0,11,445},{29,1,421},{0,11,445},{5,0,433},{5,0,433},{5,0,433},{5,0,433},{3,12,1},{3,12,1},{3,12,1},{3,9,4},{0,10,40},{0,10,40},{6,23,854},{6,19,438},{6,15,603}, +{6,14,443},{4,22,1515},{3,18,566},{4,14,15},{3,12,578},{0,16,1911},{0,12,458},{7,21,441},{7,18,41},{8,14,35},{7,14,53},{16,2,1514},{0,20,425},{4,14,14},{0,12,433},{27,1,1514},{0,12,433},{6,18,434},{6,18,434},{6,18,434},{6,13,437},{4,17,341},{4,14,14},{4,14,14},{4,10,30},{0,14,477},{0,11,35},{7,17,17},{7,17,17},{7,17,17},{7,13,17},{14,1,338}, +{3,15,4},{3,15,4},{2,11,10},{24,0,338},{2,11,10},{18,3,421},{5,20,4},{9,14,5},{5,14,1},{18,3,421},{30,2,421},{5,14,1},{0,12,433},{30,2,421},{0,12,433},{6,0,433},{6,0,433},{6,0,433},{6,0,433},{4,13,2},{4,13,2},{4,13,2},{4,10,5},{0,12,25},{0,12,25},{7,24,854},{7,20,437},{7,16,597},{7,15,443},{5,23,1515},{4,19,570},{5,15,15}, +{4,13,582},{0,18,1787},{0,13,442},{8,22,430},{8,19,29},{9,15,35},{8,15,46},{17,3,1514},{2,20,425},{5,15,14},{1,13,433},{28,2,1514},{1,13,433},{7,19,434},{7,19,434},{7,19,434},{7,14,437},{5,18,341},{5,15,14},{5,15,14},{5,11,30},{0,16,419},{1,12,45},{8,18,10},{8,18,10},{8,18,10},{8,14,10},{16,0,338},{4,16,2},{4,16,2},{3,12,17},{25,1,338}, +{3,12,17},{20,2,421},{7,20,4},{10,15,5},{6,15,1},{20,2,421},{31,3,421},{6,15,1},{0,13,433},{31,3,421},{0,13,433},{7,0,433},{7,0,433},{7,0,433},{7,0,433},{5,14,2},{5,14,2},{5,14,2},{5,11,5},{0,13,9},{0,13,9},{8,25,866},{8,21,454},{8,17,609},{8,16,461},{6,24,1515},{5,20,570},{6,16,19},{5,14,582},{0,20,1686},{1,14,442},{9,23,430}, +{9,20,38},{10,16,34},{9,16,58},{18,4,1514},{3,21,425},{6,16,18},{2,14,433},{29,3,1514},{2,14,433},{8,20,445},{8,20,445},{8,20,445},{8,15,449},{6,19,341},{6,16,19},{6,16,19},{6,12,26},{0,17,372},{2,13,45},{9,19,10},{9,19,10},{9,19,10},{9,15,10},{17,1,338},{5,17,2},{5,17,2},{5,12,16},{26,2,338},{5,12,16},{23,0,421},{7,22,4},{11,16,4}, +{7,16,0},{23,0,421},{30,5,421},{7,16,0},{0,14,433},{30,5,421},{0,14,433},{8,0,445},{8,0,445},{8,0,445},{8,0,445},{6,15,2},{6,15,2},{6,15,2},{6,11,10},{0,15,5},{0,15,5},{9,26,854},{9,22,438},{9,18,603},{9,17,443},{7,25,1517},{7,20,554},{7,17,21},{7,15,589},{0,22,1614},{2,15,462},{11,23,437},{10,21,41},{11,17,33},{10,17,53},{22,0,1514}, +{3,23,422},{7,17,21},{3,15,446},{30,4,1514},{3,15,446},{9,21,434},{9,21,434},{9,21,434},{9,16,437},{7,21,340},{7,17,20},{7,17,20},{7,13,29},{0,19,347},{3,14,46},{11,18,17},{11,18,17},{11,18,17},{10,16,17},{17,4,338},{6,18,4},{6,18,4},{5,14,10},{29,2,338},{5,14,10},{24,1,421},{8,23,1},{12,17,5},{8,17,1},{24,1,421},{31,6,421},{8,17,1}, +{0,15,445},{31,6,421},{0,15,445},{9,0,433},{9,0,433},{9,0,433},{9,0,433},{7,16,1},{7,16,1},{7,16,1},{7,13,4},{2,16,8},{2,16,8},{10,27,854},{10,23,438},{10,19,603},{10,18,443},{8,26,1515},{7,22,566},{8,18,15},{7,16,578},{0,23,1566},{3,16,443},{11,25,441},{11,22,41},{12,18,35},{11,18,53},{23,1,1514},{4,24,425},{8,18,14},{4,16,433},{31,5,1514}, +{4,16,433},{10,22,434},{10,22,434},{10,22,434},{10,17,437},{8,21,341},{8,18,14},{8,18,14},{8,14,30},{1,20,341},{4,15,35},{11,21,17},{11,21,17},{11,21,17},{11,17,17},{18,5,338},{7,19,4},{7,19,4},{6,15,10},{30,3,338},{6,15,10},{25,2,421},{9,24,4},{13,18,5},{9,18,1},{25,2,421},{29,9,421},{9,18,1},{0,16,433},{29,9,421},{0,16,433},{10,0,433}, +{10,0,433},{10,0,433},{10,0,433},{8,17,2},{8,17,2},{8,17,2},{8,14,5},{3,17,8},{3,17,8},{11,28,854},{11,24,437},{11,20,597},{11,19,443},{9,27,1515},{8,23,570},{9,19,15},{8,17,582},{0,25,1533},{4,17,442},{12,26,430},{12,23,29},{13,19,35},{12,19,46},{24,2,1514},{6,24,425},{9,19,14},{5,17,433},{27,9,1514},{5,17,433},{11,23,434},{11,23,434},{11,23,434}, +{11,18,437},{9,22,341},{9,19,14},{9,19,14},{9,15,30},{2,21,341},{5,16,45},{12,22,10},{12,22,10},{12,22,10},{12,18,10},{20,4,338},{8,20,2},{8,20,2},{7,16,17},{29,5,338},{7,16,17},{26,3,421},{11,24,4},{14,19,5},{10,19,1},{26,3,421},{30,10,421},{10,19,1},{0,17,433},{30,10,421},{0,17,433},{11,0,433},{11,0,433},{11,0,433},{11,0,433},{9,18,2}, +{9,18,2},{9,18,2},{9,15,5},{4,17,9},{4,17,9},{12,29,866},{12,25,454},{12,21,609},{12,20,461},{10,28,1515},{9,24,570},{10,20,19},{9,18,582},{0,27,1521},{5,18,442},{13,27,430},{13,24,38},{14,20,34},{13,20,58},{25,3,1514},{7,25,425},{10,20,18},{6,18,433},{28,10,1514},{6,18,433},{12,24,445},{12,24,445},{12,24,445},{12,19,449},{10,23,341},{10,20,19},{10,20,19}, +{10,16,26},{3,22,341},{6,17,45},{13,23,10},{13,23,10},{13,23,10},{13,19,10},{24,0,338},{9,21,2},{9,21,2},{9,16,16},{30,6,338},{9,16,16},{28,2,421},{11,26,4},{15,20,4},{11,20,0},{28,2,421},{31,11,421},{11,20,0},{0,18,433},{31,11,421},{0,18,433},{12,0,445},{12,0,445},{12,0,445},{12,0,445},{10,19,2},{10,19,2},{10,19,2},{10,15,10},{4,19,5}, +{4,19,5},{13,30,854},{13,26,438},{13,22,603},{13,21,443},{11,29,1517},{11,24,554},{11,21,21},{11,19,589},{1,28,1518},{6,19,462},{15,27,437},{14,25,41},{15,21,33},{14,21,53},{26,4,1514},{7,27,422},{11,21,21},{7,19,446},{31,10,1514},{7,19,446},{13,25,434},{13,25,434},{13,25,434},{13,20,437},{11,25,340},{11,21,20},{11,21,20},{11,17,29},{3,24,339},{7,18,46},{15,22,17}, +{15,22,17},{15,22,17},{14,20,17},{24,3,338},{10,22,4},{10,22,4},{9,18,10},{28,9,338},{9,18,10},{31,0,421},{12,27,1},{16,21,5},{12,21,1},{31,0,421},{30,13,421},{12,21,1},{0,19,445},{30,13,421},{0,19,445},{13,0,433},{13,0,433},{13,0,433},{13,0,433},{11,20,1},{11,20,1},{11,20,1},{11,17,4},{6,20,8},{6,20,8},{14,31,854},{14,27,438},{14,23,603}, +{14,22,443},{12,30,1515},{11,26,566},{12,22,15},{11,20,578},{2,29,1518},{7,20,443},{15,29,441},{15,26,41},{16,22,35},{15,22,53},{30,0,1514},{8,28,425},{12,22,14},{8,20,433},{30,12,1514},{8,20,433},{14,26,434},{14,26,434},{14,26,434},{14,21,437},{12,25,341},{12,22,14},{12,22,14},{12,18,30},{5,24,341},{8,19,35},{15,25,17},{15,25,17},{15,25,17},{15,21,17},{22,9,338}, +{11,23,4},{11,23,4},{10,19,10},{29,10,338},{10,19,10},{31,3,421},{13,28,4},{17,22,5},{13,22,1},{31,3,421},{31,14,421},{13,22,1},{0,20,433},{31,14,421},{0,20,433},{14,0,433},{14,0,433},{14,0,433},{14,0,433},{12,21,2},{12,21,2},{12,21,2},{12,18,5},{7,21,8},{7,21,8},{15,31,878},{15,28,437},{15,24,597},{15,23,443},{13,31,1515},{12,27,570},{13,23,15}, +{12,21,582},{3,30,1518},{8,21,442},{16,30,430},{16,27,29},{17,23,35},{16,23,46},{31,1,1514},{10,28,425},{13,23,14},{9,21,433},{31,13,1514},{9,21,433},{15,27,434},{15,27,434},{15,27,434},{15,22,437},{13,26,341},{13,23,14},{13,23,14},{13,19,30},{6,25,341},{9,20,45},{16,26,10},{16,26,10},{16,26,10},{16,22,10},{24,8,338},{12,24,2},{12,24,2},{11,20,17},{30,11,338}, +{11,20,17},{30,7,421},{15,28,4},{18,23,5},{14,23,1},{30,7,421},{30,16,421},{14,23,1},{0,21,433},{30,16,421},{0,21,433},{15,0,433},{15,0,433},{15,0,433},{15,0,433},{13,22,2},{13,22,2},{13,22,2},{13,19,5},{8,21,9},{8,21,9},{16,31,926},{16,29,454},{16,25,609},{16,24,461},{14,31,1542},{13,28,570},{14,24,19},{13,22,582},{4,31,1521},{9,22,442},{17,31,430}, +{17,28,38},{18,24,34},{17,24,58},{29,7,1514},{11,29,425},{14,24,18},{10,22,433},{27,17,1514},{10,22,433},{16,28,445},{16,28,445},{16,28,445},{16,23,449},{14,27,341},{14,24,19},{14,24,19},{14,20,26},{7,26,341},{10,21,45},{17,27,10},{17,27,10},{17,27,10},{17,23,10},{28,4,338},{13,25,2},{13,25,2},{13,20,16},{29,13,338},{13,20,16},{31,8,421},{15,30,4},{19,24,4}, +{15,24,0},{31,8,421},{31,17,421},{15,24,0},{0,22,433},{31,17,421},{0,22,433},{16,0,445},{16,0,445},{16,0,445},{16,0,445},{14,23,2},{14,23,2},{14,23,2},{14,19,10},{8,23,5},{8,23,5},{17,31,1034},{17,30,438},{17,26,603},{17,25,443},{16,31,1598},{15,28,554},{15,25,21},{15,23,589},{6,31,1535},{10,23,462},{19,31,437},{18,29,41},{19,25,33},{18,25,53},{30,8,1514}, +{11,31,422},{15,25,21},{11,23,446},{28,18,1514},{11,23,446},{17,29,434},{17,29,434},{17,29,434},{17,24,437},{15,29,340},{15,25,20},{15,25,20},{15,21,29},{7,28,339},{11,22,46},{19,26,17},{19,26,17},{19,26,17},{18,24,17},{31,2,338},{14,26,4},{14,26,4},{13,22,10},{27,16,338},{13,22,10},{31,11,421},{16,31,1},{20,25,5},{16,25,1},{31,11,421},{31,19,421},{16,25,1}, +{0,23,445},{31,19,421},{0,23,445},{17,0,433},{17,0,433},{17,0,433},{17,0,433},{15,24,1},{15,24,1},{15,24,1},{15,21,4},{10,24,8},{10,24,8},{18,31,1166},{18,31,438},{18,27,603},{18,26,443},{17,31,1643},{15,30,566},{16,26,15},{15,24,578},{8,31,1566},{11,24,443},{20,31,458},{19,30,41},{20,26,35},{19,26,53},{31,9,1514},{13,31,429},{16,26,14},{12,24,433},{29,19,1514}, +{12,24,433},{18,30,434},{18,30,434},{18,30,434},{18,25,437},{16,29,341},{16,26,14},{16,26,14},{16,22,30},{9,28,341},{12,23,35},{19,29,17},{19,29,17},{19,29,17},{19,25,17},{26,13,338},{15,27,4},{15,27,4},{14,23,10},{21,21,338},{14,23,10},{30,15,421},{18,31,5},{21,26,5},{17,26,1},{30,15,421},{30,21,421},{17,26,1},{0,24,433},{30,21,421},{0,24,433},{18,0,433}, +{18,0,433},{18,0,433},{18,0,433},{16,25,2},{16,25,2},{16,25,2},{16,22,5},{11,25,8},{11,25,8},{20,31,1326},{19,31,470},{19,28,597},{19,27,443},{18,31,1742},{16,31,570},{17,27,15},{16,25,582},{10,31,1638},{12,25,442},{21,31,506},{20,31,29},{21,27,35},{20,27,46},{29,15,1514},{15,31,461},{17,27,14},{13,25,433},{30,20,1514},{13,25,433},{19,31,434},{19,31,434},{19,31,434}, +{19,26,437},{17,30,341},{17,27,14},{17,27,14},{17,23,30},{10,29,341},{13,24,45},{20,30,10},{20,30,10},{20,30,10},{20,26,10},{28,12,338},{16,28,2},{16,28,2},{15,24,17},{22,22,338},{15,24,17},{31,16,421},{20,31,20},{22,27,5},{18,27,1},{31,16,421},{31,22,421},{18,27,1},{0,25,433},{31,22,421},{0,25,433},{19,0,433},{19,0,433},{19,0,433},{19,0,433},{17,26,2}, +{17,26,2},{17,26,2},{17,23,5},{12,25,9},{12,25,9},{21,31,1470},{20,31,561},{20,29,609},{20,28,461},{19,31,1895},{18,31,578},{18,28,19},{17,26,582},{12,31,1761},{13,26,442},{22,31,590},{21,31,59},{22,28,34},{21,28,58},{30,16,1514},{17,31,530},{18,28,18},{14,26,433},{31,21,1514},{14,26,433},{20,31,461},{20,31,461},{20,31,461},{20,27,449},{18,31,341},{18,28,19},{18,28,19}, +{18,24,26},{11,30,341},{14,25,45},{21,31,10},{21,31,10},{21,31,10},{21,27,10},{29,13,338},{17,29,2},{17,29,2},{17,24,16},{28,20,338},{17,24,16},{31,19,421},{21,31,50},{23,28,4},{19,28,0},{31,19,421},{30,24,421},{19,28,0},{0,26,433},{30,24,421},{0,26,433},{20,0,445},{20,0,445},{20,0,445},{20,0,445},{18,27,2},{18,27,2},{18,27,2},{18,23,10},{12,27,5}, +{12,27,5},{22,31,1674},{21,31,753},{21,30,603},{21,29,443},{21,31,2046},{19,31,629},{19,29,21},{19,27,589},{15,31,1917},{14,27,462},{24,31,674},{23,31,120},{23,29,33},{22,29,53},{30,19,1514},{19,31,629},{19,29,21},{15,27,446},{27,25,1514},{15,27,446},{21,31,497},{21,31,497},{21,31,497},{21,28,437},{19,31,388},{19,29,20},{19,29,20},{19,25,29},{12,31,347},{15,26,46},{23,30,17}, +{23,30,17},{23,30,17},{22,28,17},{29,16,338},{18,30,4},{18,30,4},{17,26,10},{31,20,338},{17,26,10},{30,23,421},{23,31,104},{24,29,5},{20,29,1},{30,23,421},{30,26,421},{20,29,1},{0,27,445},{30,26,421},{0,27,445},{21,0,433},{21,0,433},{21,0,433},{21,0,433},{19,28,1},{19,28,1},{19,28,1},{19,25,4},{14,28,8},{14,28,8},{23,31,1902},{22,31,995},{22,31,603}, +{22,30,443},{22,31,2235},{20,31,759},{20,30,15},{19,28,578},{17,31,2118},{15,28,443},{25,31,770},{24,31,250},{24,30,35},{23,30,53},{29,23,1514},{21,31,701},{20,30,14},{16,28,433},{28,26,1514},{16,28,433},{22,31,554},{22,31,554},{22,31,554},{22,29,437},{21,31,437},{20,30,14},{20,30,14},{20,26,30},{14,31,379},{16,27,35},{24,31,25},{24,31,25},{24,31,25},{23,29,17},{30,17,338}, +{19,31,4},{19,31,4},{18,27,10},{25,25,338},{18,27,10},{31,24,421},{25,31,169},{25,30,5},{21,30,1},{31,24,421},{31,27,421},{21,30,1},{0,28,433},{31,27,421},{0,28,433},{22,0,433},{22,0,433},{22,0,433},{22,0,433},{20,29,2},{20,29,2},{20,29,2},{20,26,5},{15,29,8},{15,29,8},{24,31,2045},{24,31,1233},{23,31,629},{23,31,442},{24,31,2360},{22,31,914},{21,31,14}, +{20,29,549},{19,31,2241},{16,29,409},{26,31,849},{25,31,395},{25,31,34},{24,31,45},{30,24,1459},{23,31,778},{21,31,13},{17,29,400},{29,27,1459},{17,29,400},{23,31,629},{23,31,629},{23,31,629},{23,30,437},{22,31,491},{21,31,14},{21,31,14},{21,27,30},{16,31,446},{17,28,45},{25,31,34},{25,31,34},{25,31,34},{24,30,10},{31,18,338},{21,31,13},{21,31,13},{19,28,17},{26,26,338}, +{19,28,17},{30,28,392},{27,31,218},{26,31,4},{22,31,0},{30,28,392},{28,30,392},{22,31,0},{0,29,400},{28,30,392},{0,29,400},{23,0,433},{23,0,433},{23,0,433},{23,0,433},{21,30,2},{21,30,2},{21,30,2},{21,27,5},{16,29,9},{16,29,9},{25,31,1767},{25,31,1167},{24,31,701},{24,31,449},{24,31,1976},{23,31,747},{22,31,66},{22,29,337},{20,31,1820},{17,30,217},{27,31,611}, +{26,31,317},{26,31,61},{25,31,10},{31,24,1064},{25,31,587},{23,31,41},{18,30,208},{24,31,1064},{18,30,208},{24,31,701},{24,31,701},{24,31,701},{24,31,449},{23,31,581},{22,31,66},{22,31,66},{22,28,26},{18,31,530},{18,29,45},{26,31,61},{26,31,61},{26,31,61},{25,31,10},{30,22,338},{23,31,41},{23,31,41},{21,28,16},{27,27,338},{21,28,16},{29,31,200},{28,31,106},{27,31,1}, +{25,31,1},{29,31,200},{31,29,200},{25,31,1},{0,30,208},{31,29,200},{0,30,208},{24,0,445},{24,0,445},{24,0,445},{24,0,445},{22,31,2},{22,31,2},{22,31,2},{22,27,10},{16,31,5},{16,31,5},{26,31,1542},{26,31,1122},{25,31,833},{25,31,497},{26,31,1647},{24,31,687},{24,31,203},{23,30,122},{22,31,1515},{19,30,110},{28,31,410},{28,31,266},{27,31,116},{27,31,20},{31,26,722}, +{26,31,402},{25,31,100},{21,30,74},{29,29,722},{21,30,74},{25,31,833},{25,31,833},{25,31,833},{25,31,497},{24,31,707},{24,31,203},{24,31,203},{23,29,29},{20,31,619},{19,30,46},{27,31,116},{27,31,116},{27,31,116},{27,31,20},{30,25,338},{25,31,100},{25,31,100},{21,30,10},{30,27,338},{21,30,10},{31,29,61},{29,31,37},{29,31,1},{27,31,4},{31,29,61},{31,30,61},{27,31,4}, +{0,30,73},{31,30,61},{0,30,73},{25,0,433},{25,0,433},{25,0,433},{25,0,433},{23,31,25},{23,31,25},{23,31,25},{23,29,4},{18,31,17},{18,31,17},{27,31,1406},{27,31,1134},{26,31,962},{26,31,602},{27,31,1454},{25,31,702},{25,31,341},{24,31,59},{24,31,1378},{20,31,35},{29,31,318},{28,31,250},{28,31,169},{28,31,61},{30,29,509},{28,31,313},{27,31,164},{22,31,10},{29,30,509}, +{22,31,10},{26,31,962},{26,31,962},{26,31,962},{26,31,602},{26,31,827},{25,31,341},{25,31,341},{24,30,30},{22,31,747},{20,31,35},{28,31,169},{28,31,169},{28,31,169},{28,31,61},{31,26,338},{27,31,164},{27,31,164},{22,31,10},{29,29,338},{22,31,10},{31,30,9},{31,31,9},{30,31,4},{30,31,0},{31,30,9},{30,31,9},{30,31,0},{0,31,9},{30,31,9},{0,31,9},{26,0,433}, +{26,0,433},{26,0,433},{26,0,433},{24,31,50},{24,31,50},{24,31,50},{24,30,5},{20,31,26},{20,31,26},{28,31,1135},{28,31,991},{27,31,874},{27,31,602},{28,31,1162},{26,31,618},{26,31,362},{25,31,5},{25,31,1087},{22,31,58},{30,31,219},{29,31,161},{29,31,125},{29,31,61},{31,29,297},{29,31,193},{28,31,117},{24,31,1},{30,30,297},{24,31,1},{27,31,874},{27,31,874},{27,31,874}, +{27,31,602},{27,31,730},{26,31,362},{26,31,362},{25,31,5},{24,31,681},{22,31,58},{29,31,125},{29,31,125},{29,31,125},{29,31,61},{30,29,221},{28,31,117},{28,31,117},{24,31,1},{31,29,221},{24,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{27,0,433},{27,0,433},{27,0,433},{27,0,433},{25,31,101}, +{25,31,101},{25,31,101},{25,31,5},{22,31,58},{22,31,58},{29,31,885},{28,31,751},{28,31,670},{28,31,526},{28,31,778},{27,31,483},{27,31,314},{26,31,10},{26,31,777},{24,31,117},{30,31,75},{30,31,59},{30,31,50},{30,31,34},{30,31,114},{29,31,81},{29,31,45},{27,31,0},{31,30,114},{27,31,0},{28,31,670},{28,31,670},{28,31,670},{28,31,526},{28,31,553},{27,31,314},{27,31,314}, +{26,31,10},{25,31,518},{24,31,117},{30,31,50},{30,31,50},{30,31,50},{30,31,34},{31,29,85},{29,31,45},{29,31,45},{27,31,0},{29,31,85},{27,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{28,0,445},{28,0,445},{28,0,445},{28,0,445},{27,31,145},{27,31,145},{27,31,145},{26,31,10},{24,31,117}, +{24,31,117},{0,13,884},{0,10,225},{0,7,18},{0,6,265},{0,9,1899},{0,7,1355},{0,6,589},{0,4,1354},{0,5,2124},{0,4,1498},{0,13,884},{0,10,225},{0,7,18},{0,6,265},{4,2,1896},{0,7,1355},{0,6,589},{0,4,1354},{7,1,1896},{0,4,1354},{0,6,0},{0,6,0},{0,6,0},{0,4,4},{0,3,162},{0,3,90},{0,3,90},{0,2,104},{0,2,200},{0,1,134},{0,6,0}, +{0,6,0},{0,6,0},{0,4,4},{0,3,162},{0,3,90},{0,3,90},{0,2,104},{3,0,162},{0,2,104},{6,3,882},{0,10,225},{0,7,18},{0,6,265},{6,3,882},{8,3,882},{0,6,265},{0,5,890},{8,3,882},{0,5,890},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,15,884},{0,12,170},{0,8,8}, +{0,7,202},{0,10,2360},{0,8,1530},{0,7,643},{0,5,1579},{0,6,2684},{0,5,1804},{0,15,884},{0,12,170},{0,8,8},{0,7,202},{5,2,2355},{0,8,1530},{0,7,643},{0,5,1579},{7,2,2355},{0,5,1579},{0,9,1},{0,9,1},{0,9,1},{0,5,1},{0,4,340},{0,4,180},{0,4,180},{0,2,200},{0,2,392},{0,2,236},{0,9,1},{0,9,1},{0,9,1},{0,5,1},{2,1,338}, +{0,4,180},{0,4,180},{0,2,200},{1,2,338},{0,2,200},{8,2,882},{0,12,170},{0,8,8},{0,7,202},{8,2,882},{12,2,882},{0,7,202},{0,6,890},{12,2,882},{0,6,890},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,18,882},{0,14,106},{0,10,52},{0,9,148},{0,12,2899},{0,9,1773},{0,8,725}, +{0,6,1854},{0,7,3348},{0,5,2124},{0,18,882},{0,14,106},{0,10,52},{0,9,148},{2,9,2899},{0,9,1773},{0,8,725},{0,6,1854},{12,0,2899},{0,6,1854},{0,11,1},{0,11,1},{0,11,1},{0,7,1},{0,6,580},{0,5,306},{0,5,306},{0,3,325},{0,3,667},{0,3,406},{0,11,1},{0,11,1},{0,11,1},{0,7,1},{1,4,578},{0,5,306},{0,5,306},{0,3,325},{4,1,578}, +{0,3,325},{9,3,882},{0,14,106},{1,9,8},{0,9,148},{9,3,882},{18,0,882},{0,9,148},{0,7,890},{18,0,882},{0,7,890},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,21,920},{0,16,89},{1,11,120},{0,10,121},{0,14,3051},{0,11,1709},{0,9,557},{0,7,1795},{0,8,3651},{0,6,2174},{1,19,886}, +{0,16,89},{1,11,56},{0,10,121},{8,1,3048},{0,11,1709},{0,9,557},{0,7,1795},{6,5,3048},{0,7,1795},{0,14,37},{0,14,37},{0,14,37},{0,8,37},{0,8,648},{0,7,274},{0,7,274},{0,4,277},{0,4,824},{0,4,421},{1,12,4},{1,12,4},{1,12,4},{1,8,8},{3,3,648},{0,7,274},{0,7,274},{0,4,277},{8,0,648},{0,4,277},{10,4,882},{0,16,53},{2,10,8}, +{0,10,85},{10,4,882},{19,1,882},{0,10,85},{0,8,900},{19,1,882},{0,8,900},{0,0,36},{0,0,36},{0,0,36},{0,0,36},{0,2,0},{0,2,0},{0,2,0},{0,1,1},{0,1,10},{0,1,10},{1,22,995},{1,17,158},{1,12,230},{1,11,186},{0,17,3051},{0,13,1579},{0,10,346},{0,8,1630},{0,9,3924},{0,7,2173},{2,20,885},{2,16,90},{2,12,53},{1,11,122},{8,4,3048}, +{0,13,1579},{0,10,346},{0,8,1630},{14,2,3048},{0,8,1630},{1,15,113},{1,15,113},{1,15,113},{1,9,117},{0,11,648},{0,9,169},{0,9,169},{0,5,200},{0,6,990},{0,5,425},{2,13,2},{2,13,2},{2,13,2},{2,9,2},{6,1,648},{0,9,169},{0,9,169},{0,5,200},{11,0,648},{0,5,200},{13,2,882},{0,18,17},{3,11,18},{0,11,34},{13,2,882},{22,1,882},{0,11,34}, +{0,9,890},{22,1,882},{0,9,890},{1,0,113},{1,0,113},{1,0,113},{1,0,113},{0,5,0},{0,5,0},{0,5,0},{0,3,0},{0,2,61},{0,2,61},{1,24,1173},{1,19,306},{2,13,422},{1,12,318},{0,20,3048},{0,15,1443},{0,12,204},{0,9,1483},{0,11,4212},{0,8,2174},{3,21,885},{3,17,90},{3,13,53},{2,12,117},{12,0,3048},{0,15,1443},{0,12,204},{0,9,1483},{20,0,3048}, +{0,9,1483},{1,18,290},{1,18,290},{1,18,290},{1,11,289},{0,13,650},{0,11,109},{0,11,109},{0,7,148},{0,7,1161},{0,6,473},{3,14,2},{3,14,2},{3,14,2},{3,10,2},{8,0,648},{0,11,109},{0,11,109},{0,7,148},{5,5,648},{0,7,148},{14,3,882},{0,20,8},{4,12,8},{0,12,8},{14,3,882},{26,0,882},{0,12,8},{0,10,890},{26,0,882},{0,10,890},{1,0,289}, +{1,0,289},{1,0,289},{1,0,289},{0,8,1},{0,8,1},{0,8,1},{0,5,4},{0,3,145},{0,3,145},{2,25,1365},{2,20,497},{2,14,713},{2,13,510},{0,23,3051},{0,16,1278},{0,13,86},{0,10,1354},{0,12,4609},{0,9,2228},{4,22,886},{3,19,94},{4,14,56},{3,13,117},{13,1,3048},{0,16,1278},{0,13,86},{0,10,1354},{21,1,3048},{0,10,1354},{2,19,482},{2,19,482},{2,19,482}, +{2,12,481},{0,16,648},{0,12,72},{0,12,72},{0,8,101},{0,8,1352},{0,7,557},{4,15,5},{4,15,5},{4,15,5},{4,11,5},{9,1,648},{0,12,72},{0,12,72},{0,8,101},{16,0,648},{0,8,101},{16,2,882},{1,21,8},{5,13,8},{0,13,5},{16,2,882},{27,1,882},{0,13,5},{0,11,890},{27,1,882},{0,11,890},{2,0,481},{2,0,481},{2,0,481},{2,0,481},{0,10,1}, +{0,10,1},{0,10,1},{0,6,1},{0,5,261},{0,5,261},{2,28,1667},{2,22,793},{3,15,1033},{2,14,793},{0,25,3048},{0,18,1170},{0,14,36},{0,11,1243},{0,14,5005},{0,10,2318},{5,23,886},{4,20,89},{5,15,56},{4,14,121},{14,2,3048},{0,18,1170},{0,14,36},{0,11,1243},{25,0,3048},{0,11,1243},{2,21,786},{2,21,786},{2,21,786},{2,14,789},{0,19,650},{0,14,32},{0,14,32}, +{0,9,50},{0,9,1619},{0,8,661},{5,16,4},{5,16,4},{5,16,4},{5,12,8},{10,2,648},{0,14,32},{0,14,32},{0,9,50},{17,1,648},{0,9,50},{17,3,882},{2,22,8},{6,14,8},{1,14,5},{17,3,882},{28,2,882},{1,14,5},{0,12,900},{28,2,882},{0,12,900},{2,0,785},{2,0,785},{2,0,785},{2,0,785},{0,13,1},{0,13,1},{0,13,1},{0,8,4},{0,6,405}, +{0,6,405},{3,29,1784},{3,23,902},{4,16,1186},{3,15,910},{1,26,3055},{0,20,1095},{1,15,47},{0,12,1159},{0,16,4945},{0,12,2084},{6,24,885},{6,20,90},{6,16,53},{5,15,122},{12,8,3048},{0,20,1059},{0,16,41},{0,12,1123},{28,0,3048},{0,12,1123},{3,23,901},{3,23,901},{3,23,901},{3,15,901},{1,20,652},{1,15,38},{1,15,38},{1,10,44},{0,11,1577},{0,9,545},{6,17,2}, +{6,17,2},{6,17,2},{6,13,2},{13,0,648},{0,16,5},{0,16,5},{0,10,13},{20,1,648},{0,10,13},{17,6,882},{3,23,2},{7,15,18},{3,15,10},{17,6,882},{31,2,882},{3,15,10},{0,13,890},{31,2,882},{0,13,890},{3,0,900},{3,0,900},{3,0,900},{3,0,900},{1,14,4},{1,14,4},{1,14,4},{1,9,5},{0,8,373},{0,8,373},{4,30,1772},{4,24,898},{5,17,1186}, +{4,16,898},{2,27,3055},{1,21,1095},{2,16,33},{1,13,1159},{0,17,4639},{0,13,1730},{7,25,885},{7,21,90},{7,17,53},{6,16,117},{16,4,3048},{0,21,996},{2,16,29},{0,13,1054},{29,1,3048},{0,13,1054},{4,23,891},{4,23,891},{4,23,891},{4,16,894},{2,21,652},{2,16,29},{2,16,29},{2,11,44},{0,13,1452},{0,11,365},{7,18,2},{7,18,2},{7,18,2},{7,14,2},{14,1,648}, +{0,18,1},{0,18,1},{0,11,4},{24,0,648},{0,11,4},{22,0,882},{4,24,8},{8,16,8},{3,16,5},{22,0,882},{30,4,882},{3,16,5},{0,14,890},{30,4,882},{0,14,890},{4,0,890},{4,0,890},{4,0,890},{4,0,890},{2,15,4},{2,15,4},{2,15,4},{2,10,5},{0,9,269},{0,9,269},{5,31,1772},{5,25,898},{6,18,1186},{5,17,898},{3,28,3055},{2,22,1095},{3,17,33}, +{2,14,1159},{0,19,4419},{0,14,1444},{8,26,886},{7,23,94},{8,18,56},{7,17,117},{17,5,3048},{0,23,936},{3,17,29},{0,14,1003},{30,2,3048},{0,14,1003},{5,24,891},{5,24,891},{5,24,891},{5,17,894},{3,22,652},{3,17,29},{3,17,29},{3,12,41},{0,15,1296},{0,12,235},{8,19,5},{8,19,5},{8,19,5},{8,15,5},{16,0,648},{1,19,1},{1,19,1},{1,12,1},{25,1,648}, +{1,12,1},{23,1,882},{5,25,8},{9,17,8},{4,17,5},{23,1,882},{31,5,882},{4,17,5},{0,15,890},{31,5,882},{0,15,890},{5,0,890},{5,0,890},{5,0,890},{5,0,890},{3,16,4},{3,16,4},{3,16,4},{3,11,5},{0,11,185},{0,11,185},{6,31,1790},{6,26,898},{7,19,1186},{6,18,898},{4,29,3057},{3,23,1095},{4,18,45},{3,15,1159},{0,20,4156},{0,15,1226},{9,27,886}, +{8,24,89},{9,19,56},{8,18,121},{18,6,3048},{0,25,909},{4,18,36},{0,15,970},{31,3,3048},{0,15,970},{6,25,891},{6,25,891},{6,25,891},{6,18,894},{4,23,659},{4,18,41},{4,18,41},{3,13,46},{0,16,1137},{0,13,137},{9,20,4},{9,20,4},{9,20,4},{9,16,8},{17,1,648},{2,20,2},{2,20,2},{2,13,1},{26,2,648},{2,13,1},{24,2,882},{6,26,8},{10,18,8}, +{5,18,5},{24,2,882},{27,9,882},{5,18,5},{0,16,900},{27,9,882},{0,16,900},{6,0,890},{6,0,890},{6,0,890},{6,0,890},{4,17,10},{4,17,10},{4,17,10},{4,12,13},{0,13,136},{0,13,136},{8,31,1844},{7,27,902},{8,20,1186},{7,19,910},{5,30,3055},{4,24,1095},{5,19,47},{4,16,1159},{0,22,3940},{0,16,1055},{10,28,885},{10,24,90},{10,20,53},{9,19,122},{22,2,3048}, +{0,27,886},{4,20,41},{0,17,926},{22,10,3048},{0,17,926},{7,27,901},{7,27,901},{7,27,901},{7,19,901},{5,24,652},{5,19,38},{5,19,38},{5,14,44},{0,18,1002},{0,15,110},{10,21,2},{10,21,2},{10,21,2},{10,17,2},{17,4,648},{3,21,4},{3,21,4},{3,14,5},{29,2,648},{3,14,5},{27,0,882},{7,27,2},{11,19,18},{7,19,10},{27,0,882},{30,9,882},{7,19,10}, +{0,17,890},{30,9,882},{0,17,890},{7,0,900},{7,0,900},{7,0,900},{7,0,900},{5,18,4},{5,18,4},{5,18,4},{5,13,5},{0,15,74},{0,15,74},{9,31,1886},{8,28,898},{9,21,1186},{8,20,898},{6,31,3055},{5,25,1095},{6,20,33},{5,17,1159},{0,23,3820},{0,18,963},{11,29,885},{11,25,90},{11,21,53},{10,20,117},{23,3,3048},{1,28,888},{6,20,29},{0,18,899},{28,8,3048}, +{0,18,899},{8,27,891},{8,27,891},{8,27,891},{8,20,894},{6,25,652},{6,20,29},{6,20,29},{6,15,44},{0,20,876},{0,16,102},{11,22,2},{11,22,2},{11,22,2},{11,18,2},{18,5,648},{4,22,1},{4,22,1},{4,15,4},{30,3,648},{4,15,4},{26,4,882},{8,28,8},{12,20,8},{7,20,5},{26,4,882},{31,10,882},{7,20,5},{0,18,890},{31,10,882},{0,18,890},{8,0,890}, +{8,0,890},{8,0,890},{8,0,890},{6,19,4},{6,19,4},{6,19,4},{6,14,5},{0,17,29},{0,17,29},{10,31,1964},{9,29,898},{10,22,1186},{9,21,898},{7,31,3100},{6,26,1095},{7,21,33},{6,18,1159},{0,25,3679},{0,19,899},{12,30,886},{11,27,94},{12,22,56},{11,21,117},{21,9,3048},{2,29,888},{7,21,29},{0,19,890},{29,9,3048},{0,19,890},{9,28,891},{9,28,891},{9,28,891}, +{9,21,894},{7,26,652},{7,21,29},{7,21,29},{7,16,41},{0,22,800},{2,16,98},{12,23,5},{12,23,5},{12,23,5},{12,19,5},{20,4,648},{5,23,1},{5,23,1},{5,16,1},{29,5,648},{5,16,1},{30,0,882},{9,29,8},{13,21,8},{8,21,5},{30,0,882},{30,12,882},{8,21,5},{0,19,890},{30,12,882},{0,19,890},{9,0,890},{9,0,890},{9,0,890},{9,0,890},{7,20,4}, +{7,20,4},{7,20,4},{7,15,5},{0,19,9},{0,19,9},{11,31,2078},{10,30,898},{11,23,1186},{10,22,898},{9,31,3181},{7,27,1095},{8,22,45},{7,19,1159},{0,27,3523},{0,20,908},{13,31,886},{12,28,89},{13,23,56},{12,22,121},{22,10,3048},{3,30,888},{8,22,36},{1,20,901},{30,10,3048},{1,20,901},{10,29,891},{10,29,891},{10,29,891},{10,22,894},{8,27,659},{8,22,41},{8,22,41}, +{7,17,46},{0,23,747},{3,17,98},{13,24,4},{13,24,4},{13,24,4},{13,20,8},{24,0,648},{6,24,2},{6,24,2},{6,17,1},{30,6,648},{6,17,1},{31,1,882},{10,30,8},{14,22,8},{9,22,5},{31,1,882},{31,13,882},{9,22,5},{0,20,900},{31,13,882},{0,20,900},{10,0,890},{10,0,890},{10,0,890},{10,0,890},{8,21,10},{8,21,10},{8,21,10},{8,16,13},{0,20,8}, +{0,20,8},{12,31,2228},{11,31,902},{12,24,1186},{11,23,910},{10,31,3256},{8,28,1095},{9,23,47},{8,20,1159},{0,29,3364},{2,21,894},{14,31,915},{14,28,90},{14,24,53},{13,23,122},{29,1,3048},{4,31,886},{8,24,41},{2,21,890},{26,14,3048},{2,21,890},{11,31,901},{11,31,901},{11,31,901},{11,23,901},{9,28,652},{9,23,38},{9,23,38},{9,18,44},{0,25,705},{3,19,101},{14,25,2}, +{14,25,2},{14,25,2},{14,21,2},{24,3,648},{7,25,4},{7,25,4},{7,18,5},{28,9,648},{7,18,5},{31,4,882},{11,31,2},{15,23,18},{11,23,10},{31,4,882},{24,19,882},{11,23,10},{0,21,890},{24,19,882},{0,21,890},{11,0,900},{11,0,900},{11,0,900},{11,0,900},{9,22,4},{9,22,4},{9,22,4},{9,17,5},{2,21,4},{2,21,4},{13,31,2414},{12,31,907},{13,25,1186}, +{12,24,898},{11,31,3391},{9,29,1095},{10,24,33},{9,21,1159},{0,31,3276},{3,22,894},{15,31,981},{15,29,90},{15,25,53},{14,24,117},{30,2,3048},{6,31,906},{10,24,29},{3,22,890},{27,15,3048},{3,22,890},{12,31,891},{12,31,891},{12,31,891},{12,24,894},{10,29,652},{10,24,29},{10,24,29},{10,19,44},{0,27,665},{4,20,102},{15,26,2},{15,26,2},{15,26,2},{15,22,2},{22,9,648}, +{8,26,1},{8,26,1},{8,19,4},{29,10,648},{8,19,4},{30,8,882},{12,31,17},{16,24,8},{11,24,5},{30,8,882},{28,18,882},{11,24,5},{0,22,890},{28,18,882},{0,22,890},{12,0,890},{12,0,890},{12,0,890},{12,0,890},{10,23,4},{10,23,4},{10,23,4},{10,18,5},{3,22,4},{3,22,4},{15,31,2606},{13,31,987},{14,26,1186},{13,25,898},{13,31,3517},{10,30,1095},{11,25,33}, +{10,22,1159},{1,31,3300},{4,23,899},{17,31,1014},{15,31,94},{16,26,56},{15,25,117},{31,3,3048},{8,31,936},{11,25,29},{4,23,890},{23,19,3048},{4,23,890},{13,31,906},{13,31,906},{13,31,906},{13,25,894},{11,30,652},{11,25,29},{11,25,29},{11,20,41},{0,29,651},{6,20,98},{16,27,5},{16,27,5},{16,27,5},{16,23,5},{24,8,648},{9,27,1},{9,27,1},{9,20,1},{30,11,648}, +{9,20,1},{31,9,882},{14,31,37},{17,25,8},{12,25,5},{31,9,882},{29,19,882},{12,25,5},{0,23,890},{29,19,882},{0,23,890},{13,0,890},{13,0,890},{13,0,890},{13,0,890},{11,24,4},{11,24,4},{11,24,4},{11,19,5},{3,24,5},{3,24,5},{16,31,2792},{15,31,1079},{15,27,1186},{14,26,898},{14,31,3652},{11,31,1095},{12,26,45},{11,23,1159},{3,31,3436},{4,24,908},{18,31,1080}, +{17,31,110},{17,27,56},{16,26,121},{26,14,3048},{10,31,996},{12,26,36},{5,24,901},{22,21,3048},{5,24,901},{14,31,939},{14,31,939},{14,31,939},{14,26,894},{12,31,659},{12,26,41},{12,26,41},{11,21,46},{1,30,651},{7,21,98},{17,28,4},{17,28,4},{17,28,4},{17,24,8},{28,4,648},{10,28,2},{10,28,2},{10,21,1},{29,13,648},{10,21,1},{29,15,882},{16,31,80},{18,26,8}, +{13,26,5},{29,15,882},{30,20,882},{13,26,5},{0,24,900},{30,20,882},{0,24,900},{14,0,890},{14,0,890},{14,0,890},{14,0,890},{12,25,10},{12,25,10},{12,25,10},{12,20,13},{4,24,8},{4,24,8},{17,31,3038},{16,31,1268},{16,28,1186},{15,27,910},{15,31,3879},{12,31,1146},{13,27,47},{12,24,1159},{5,31,3667},{6,25,894},{19,31,1205},{18,31,147},{18,28,53},{17,27,122},{30,10,3048}, +{12,31,1110},{12,28,41},{6,25,890},{30,18,3048},{6,25,890},{16,31,979},{16,31,979},{16,31,979},{15,27,901},{13,31,670},{13,27,38},{13,27,38},{13,22,44},{2,31,648},{7,23,101},{18,29,2},{18,29,2},{18,29,2},{18,25,2},{31,2,648},{11,29,4},{11,29,4},{11,22,5},{27,16,648},{11,22,5},{29,18,882},{18,31,146},{19,27,18},{15,27,10},{29,18,882},{28,23,882},{15,27,10}, +{0,25,890},{28,23,882},{0,25,890},{15,0,900},{15,0,900},{15,0,900},{15,0,900},{13,26,4},{13,26,4},{13,26,4},{13,21,5},{6,25,4},{6,25,4},{18,31,3308},{17,31,1502},{17,29,1186},{16,28,898},{17,31,4077},{14,31,1230},{14,28,33},{13,25,1159},{8,31,3820},{7,26,894},{21,31,1368},{19,31,261},{19,29,53},{18,28,117},{31,11,3048},{14,31,1226},{14,28,29},{7,26,890},{31,19,3048}, +{7,26,890},{17,31,1018},{17,31,1018},{17,31,1018},{16,28,894},{14,31,724},{14,28,29},{14,28,29},{14,23,44},{4,31,665},{8,24,102},{19,30,2},{19,30,2},{19,30,2},{19,26,2},{26,13,648},{12,30,1},{12,30,1},{12,23,4},{21,21,648},{12,23,4},{30,19,882},{20,31,193},{20,28,8},{15,28,5},{30,19,882},{27,25,882},{15,28,5},{0,26,890},{27,25,882},{0,26,890},{16,0,890}, +{16,0,890},{16,0,890},{16,0,890},{14,27,4},{14,27,4},{14,27,4},{14,22,5},{7,26,4},{7,26,4},{19,31,3614},{18,31,1804},{18,30,1186},{17,29,898},{18,31,4284},{15,31,1417},{15,29,33},{14,26,1159},{9,31,4036},{8,27,899},{22,31,1494},{20,31,405},{20,30,56},{19,29,117},{29,17,3048},{16,31,1395},{15,29,29},{8,27,890},{27,23,3048},{8,27,890},{18,31,1075},{18,31,1075},{18,31,1075}, +{17,29,894},{16,31,787},{15,29,29},{15,29,29},{15,24,41},{6,31,705},{10,24,98},{20,31,5},{20,31,5},{20,31,5},{20,27,5},{28,12,648},{13,31,1},{13,31,1},{13,24,1},{22,22,648},{13,24,1},{29,23,882},{22,31,277},{21,29,8},{16,29,5},{29,23,882},{28,26,882},{16,29,5},{0,27,890},{28,26,882},{0,27,890},{17,0,890},{17,0,890},{17,0,890},{17,0,890},{15,28,4}, +{15,28,4},{15,28,4},{15,23,5},{7,28,5},{7,28,5},{20,31,4014},{19,31,2174},{19,31,1186},{18,30,898},{19,31,4545},{17,31,1725},{16,30,45},{15,27,1159},{11,31,4300},{8,28,908},{23,31,1656},{22,31,585},{21,31,56},{20,30,121},{30,18,3048},{18,31,1563},{16,30,36},{9,28,901},{26,25,3048},{9,28,901},{19,31,1150},{19,31,1150},{19,31,1150},{18,30,894},{17,31,841},{16,30,41},{16,30,41}, +{15,25,46},{8,31,747},{11,25,98},{21,31,20},{21,31,20},{21,31,20},{21,28,8},{29,13,648},{15,31,5},{15,31,5},{14,25,1},{28,20,648},{14,25,1},{30,24,882},{23,31,397},{22,30,8},{17,30,5},{30,24,882},{29,27,882},{17,30,5},{0,28,900},{29,27,882},{0,28,900},{18,0,890},{18,0,890},{18,0,890},{18,0,890},{16,29,10},{16,29,10},{16,29,10},{16,24,13},{8,28,8}, +{8,28,8},{22,31,4123},{21,31,2404},{20,31,1278},{19,31,901},{20,31,4626},{18,31,1849},{17,31,38},{16,28,1006},{14,31,4330},{10,29,789},{24,31,1629},{23,31,715},{22,31,65},{22,30,101},{31,19,2814},{20,31,1505},{17,31,34},{10,29,785},{27,26,2814},{10,29,785},{20,31,1278},{20,31,1278},{20,31,1278},{19,31,901},{18,31,948},{17,31,38},{17,31,38},{17,26,44},{10,31,840},{11,27,101},{22,31,65}, +{22,31,65},{22,31,65},{22,29,2},{29,16,648},{17,31,34},{17,31,34},{15,26,5},{31,20,648},{15,26,5},{31,25,761},{25,31,425},{23,31,9},{19,31,1},{31,25,761},{28,29,761},{19,31,1},{0,29,785},{28,29,761},{0,29,785},{19,0,900},{19,0,900},{19,0,900},{19,0,900},{17,30,4},{17,30,4},{17,30,4},{17,25,5},{10,29,4},{10,29,4},{23,31,3735},{22,31,2314},{21,31,1395}, +{20,31,899},{22,31,4090},{19,31,1618},{18,31,104},{17,29,686},{15,31,3826},{11,29,507},{25,31,1285},{24,31,609},{23,31,122},{23,31,37},{30,22,2249},{21,31,1186},{19,31,74},{13,29,482},{27,27,2249},{13,29,482},{21,31,1395},{21,31,1395},{21,31,1395},{20,31,899},{19,31,1086},{18,31,104},{18,31,104},{18,27,44},{12,31,969},{12,28,102},{23,31,122},{23,31,122},{23,31,122},{23,30,2},{30,17,648}, +{19,31,74},{19,31,74},{16,27,4},{25,25,648},{16,27,4},{31,26,481},{27,31,269},{25,31,0},{21,31,0},{31,26,481},{31,28,481},{21,31,0},{0,29,481},{31,28,481},{0,29,481},{20,0,890},{20,0,890},{20,0,890},{20,0,890},{18,31,4},{18,31,4},{18,31,4},{18,26,5},{11,30,4},{11,30,4},{23,31,3399},{23,31,2260},{22,31,1530},{21,31,954},{23,31,3639},{20,31,1402},{19,31,238}, +{18,29,405},{17,31,3443},{12,30,314},{26,31,1009},{25,31,525},{25,31,164},{24,31,5},{30,24,1769},{23,31,918},{21,31,113},{14,30,290},{29,27,1769},{14,30,290},{22,31,1530},{22,31,1530},{22,31,1530},{21,31,954},{21,31,1251},{19,31,238},{19,31,238},{19,28,41},{14,31,1105},{14,28,98},{25,31,164},{25,31,164},{25,31,164},{24,31,5},{31,18,648},{21,31,113},{21,31,113},{17,28,1},{26,26,648}, +{17,28,1},{30,29,265},{28,31,145},{27,31,4},{24,31,1},{30,29,265},{29,30,265},{24,31,1},{0,30,289},{29,30,265},{0,30,289},{21,0,890},{21,0,890},{21,0,890},{21,0,890},{19,31,13},{19,31,13},{19,31,13},{19,27,5},{12,31,9},{12,31,9},{24,31,3069},{24,31,2257},{23,31,1683},{23,31,1054},{24,31,3258},{22,31,1330},{21,31,378},{19,30,213},{20,31,3102},{14,30,166},{27,31,801}, +{26,31,477},{26,31,221},{25,31,20},{31,24,1374},{24,31,758},{23,31,181},{16,30,114},{24,31,1374},{16,30,114},{23,31,1683},{23,31,1683},{23,31,1683},{23,31,1054},{22,31,1401},{21,31,378},{21,31,378},{19,29,46},{16,31,1296},{15,29,98},{26,31,221},{26,31,221},{26,31,221},{25,31,20},{30,22,648},{23,31,181},{23,31,181},{18,29,1},{27,27,648},{18,29,1},{30,30,113},{29,31,61},{28,31,0}, +{26,31,1},{30,30,113},{30,30,113},{26,31,1},{0,30,113},{30,30,113},{0,30,113},{22,0,890},{22,0,890},{22,0,890},{22,0,890},{20,31,45},{20,31,45},{20,31,45},{20,28,13},{13,31,25},{13,31,25},{25,31,2860},{25,31,2260},{24,31,1854},{24,31,1210},{25,31,2932},{23,31,1310},{22,31,609},{21,30,108},{21,31,2731},{15,31,101},{28,31,630},{27,31,475},{27,31,306},{26,31,101},{31,26,1032}, +{26,31,612},{25,31,290},{18,31,37},{29,29,1032},{18,31,37},{24,31,1854},{24,31,1854},{24,31,1854},{24,31,1210},{23,31,1620},{22,31,609},{22,31,609},{21,30,44},{18,31,1515},{15,31,101},{27,31,306},{27,31,306},{27,31,306},{26,31,101},{30,25,648},{25,31,290},{25,31,290},{19,30,5},{30,27,648},{19,30,5},{31,30,18},{30,31,10},{30,31,1},{29,31,0},{31,30,18},{30,31,18},{29,31,0}, +{0,31,36},{30,31,18},{0,31,36},{23,0,900},{23,0,900},{23,0,900},{23,0,900},{21,31,104},{21,31,104},{21,31,104},{21,29,5},{15,31,65},{15,31,65},{26,31,2626},{26,31,2206},{25,31,1915},{25,31,1315},{26,31,2641},{24,31,1333},{23,31,789},{22,31,40},{22,31,2445},{17,31,116},{29,31,524},{28,31,406},{28,31,325},{27,31,170},{30,29,771},{27,31,507},{26,31,320},{20,31,0},{29,30,771}, +{20,31,0},{25,31,1915},{25,31,1915},{25,31,1915},{25,31,1315},{24,31,1661},{23,31,789},{23,31,789},{22,31,40},{20,31,1517},{17,31,116},{28,31,325},{28,31,325},{28,31,325},{27,31,170},{31,26,580},{26,31,320},{26,31,320},{20,31,0},{30,28,580},{20,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{24,0,890}, +{24,0,890},{24,0,890},{24,0,890},{23,31,164},{23,31,164},{23,31,164},{22,30,5},{17,31,116},{17,31,116},{27,31,2156},{27,31,1884},{26,31,1630},{26,31,1210},{26,31,2081},{25,31,1108},{24,31,705},{23,31,5},{23,31,1927},{19,31,180},{29,31,300},{29,31,236},{29,31,200},{28,31,85},{31,28,451},{28,31,283},{27,31,194},{23,31,1},{28,31,451},{23,31,1},{26,31,1630},{26,31,1630},{26,31,1630}, +{26,31,1210},{25,31,1347},{24,31,705},{24,31,705},{23,31,5},{22,31,1229},{19,31,180},{29,31,200},{29,31,200},{29,31,200},{28,31,85},{31,27,338},{27,31,194},{27,31,194},{23,31,1},{30,29,338},{23,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{25,0,890},{25,0,890},{25,0,890},{25,0,890},{24,31,221}, +{24,31,221},{24,31,221},{23,31,5},{19,31,180},{19,31,180},{28,31,1782},{27,31,1564},{27,31,1395},{27,31,1123},{27,31,1620},{26,31,937},{25,31,651},{24,31,25},{24,31,1560},{21,31,233},{30,31,150},{30,31,134},{29,31,104},{29,31,40},{31,29,216},{29,31,136},{28,31,90},{25,31,1},{29,31,216},{25,31,1},{27,31,1395},{27,31,1395},{27,31,1395},{27,31,1123},{26,31,1101},{25,31,651},{25,31,651}, +{24,31,25},{23,31,998},{21,31,233},{29,31,104},{29,31,104},{29,31,104},{29,31,40},{31,28,162},{28,31,90},{28,31,90},{25,31,1},{28,31,162},{25,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{26,0,890},{26,0,890},{26,0,890},{26,0,890},{25,31,290},{25,31,290},{25,31,290},{24,31,25},{21,31,233}, +{21,31,233},{0,17,1568},{0,14,442},{0,10,40},{0,8,485},{0,11,3379},{0,9,2369},{0,8,1061},{0,5,2435},{0,6,3760},{0,5,2660},{0,17,1568},{0,14,442},{0,10,40},{0,8,485},{7,0,3371},{0,9,2369},{0,8,1061},{0,5,2435},{10,1,3371},{0,5,2435},{0,8,0},{0,8,0},{0,8,0},{0,5,1},{0,4,288},{0,4,160},{0,4,160},{0,2,164},{0,2,332},{0,2,200},{0,8,0}, +{0,8,0},{0,8,0},{0,5,1},{0,4,288},{0,4,160},{0,4,160},{0,2,164},{4,0,288},{0,2,164},{9,2,1568},{0,14,442},{0,10,40},{0,8,485},{9,2,1568},{17,0,1568},{0,8,485},{0,6,1586},{17,0,1568},{0,6,1586},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,20,1570},{0,16,325},{0,11,5}, +{0,9,392},{0,13,3968},{0,10,2630},{0,9,1121},{0,6,2710},{0,7,4484},{0,6,3034},{0,20,1570},{0,16,325},{0,11,5},{0,9,392},{2,10,3968},{0,10,2630},{0,9,1121},{0,6,2710},{13,0,3968},{0,6,2710},{0,11,1},{0,11,1},{0,11,1},{0,6,4},{0,5,514},{0,5,274},{0,5,274},{0,3,289},{0,3,595},{0,3,370},{0,11,1},{0,11,1},{0,11,1},{0,6,4},{2,2,512}, +{0,5,274},{0,5,274},{0,3,289},{2,2,512},{0,3,289},{10,3,1568},{0,16,325},{0,11,5},{0,9,392},{10,3,1568},{18,1,1568},{0,9,392},{0,7,1586},{18,1,1568},{0,7,1586},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,22,1570},{0,17,225},{0,12,18},{0,11,292},{0,15,4652},{0,11,2945},{0,10,1217}, +{0,7,3035},{0,8,5283},{0,7,3476},{0,22,1570},{0,17,225},{0,12,18},{0,11,292},{4,8,4651},{0,11,2945},{0,10,1217},{0,7,3035},{13,1,4651},{0,7,3035},{0,13,0},{0,13,0},{0,13,0},{0,8,1},{0,7,802},{0,6,424},{0,6,424},{0,4,449},{0,3,931},{0,3,562},{0,13,0},{0,13,0},{0,13,0},{0,8,1},{4,0,800},{0,6,424},{0,6,424},{0,4,449},{5,1,800}, +{0,4,449},{11,4,1568},{0,17,225},{1,12,13},{0,11,292},{11,4,1568},{19,2,1568},{0,11,292},{0,8,1576},{19,2,1568},{0,8,1576},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,25,1570},{0,19,149},{0,13,73},{0,12,194},{0,17,5424},{0,13,3368},{0,11,1349},{0,8,3449},{0,9,6213},{0,7,3956},{0,25,1570}, +{0,19,149},{0,13,73},{0,12,194},{10,0,5419},{0,13,3368},{0,11,1349},{0,8,3449},{15,1,5419},{0,8,3449},{0,16,1},{0,16,1},{0,16,1},{0,9,4},{0,8,1152},{0,7,610},{0,7,610},{0,4,625},{0,4,1328},{0,4,769},{0,16,1},{0,16,1},{0,16,1},{0,9,4},{3,3,1152},{0,7,610},{0,7,610},{0,4,625},{8,0,1152},{0,4,625},{15,0,1568},{0,19,149},{2,13,13}, +{0,12,194},{15,0,1568},{20,3,1568},{0,12,194},{0,9,1576},{20,3,1568},{0,9,1576},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{1,26,1633},{0,21,155},{1,14,150},{0,13,198},{0,20,5424},{0,15,3099},{0,12,996},{0,9,3179},{0,10,6544},{0,8,3890},{1,26,1569},{1,20,131},{2,14,69},{1,13,181},{10,3,5419}, +{0,15,3099},{0,12,996},{0,9,3179},{18,1,5419},{0,9,3179},{1,17,65},{1,17,65},{1,17,65},{1,11,69},{0,11,1152},{0,9,445},{0,9,445},{0,6,505},{0,6,1494},{0,5,737},{1,17,1},{1,17,1},{1,17,1},{1,11,5},{6,1,1152},{0,9,445},{0,9,445},{0,6,505},{11,0,1152},{0,6,505},{16,1,1568},{0,21,74},{3,14,5},{0,13,117},{16,1,1568},{26,1,1568},{0,13,117}, +{0,10,1586},{26,1,1568},{0,10,1586},{1,0,65},{1,0,65},{1,0,65},{1,0,65},{0,3,0},{0,3,0},{0,3,0},{0,2,1},{0,1,25},{0,1,25},{1,29,1715},{1,22,219},{2,15,342},{1,14,262},{0,22,5420},{0,16,2834},{0,13,726},{0,10,2966},{0,11,6916},{0,9,3860},{2,27,1569},{2,21,131},{3,15,69},{2,14,181},{11,4,5419},{0,16,2834},{0,13,726},{0,10,2966},{19,2,5419}, +{0,10,2966},{1,20,146},{1,20,146},{1,20,146},{1,12,146},{0,13,1154},{0,11,337},{0,11,337},{0,7,388},{0,7,1665},{0,6,749},{2,18,1},{2,18,1},{2,18,1},{2,12,5},{8,0,1152},{0,11,337},{0,11,337},{0,7,388},{5,5,1152},{0,7,388},{17,2,1568},{0,23,34},{4,15,5},{0,14,72},{17,2,1568},{27,2,1568},{0,14,72},{0,11,1586},{27,2,1568},{0,11,1586},{1,0,145}, +{1,0,145},{1,0,145},{1,0,145},{0,6,1},{0,6,1},{0,6,1},{0,3,4},{0,2,85},{0,2,85},{2,30,1907},{1,24,398},{2,16,542},{1,15,425},{0,25,5424},{0,18,2630},{0,15,486},{0,11,2771},{0,13,7299},{0,11,3860},{3,28,1569},{3,22,131},{4,16,82},{3,15,181},{15,0,5419},{0,18,2630},{0,15,486},{0,11,2771},{20,3,5419},{0,11,2771},{2,21,338},{2,21,338},{2,21,338}, +{2,13,338},{0,16,1152},{0,13,274},{0,13,274},{0,8,305},{0,8,1856},{0,7,797},{3,19,1},{3,19,1},{3,19,1},{3,13,5},{9,1,1152},{0,13,274},{0,13,274},{0,8,305},{16,0,1152},{0,8,305},{18,3,1568},{0,25,17},{5,16,13},{0,15,45},{18,3,1568},{28,3,1568},{0,15,45},{0,12,1576},{28,3,1568},{0,12,1576},{2,0,337},{2,0,337},{2,0,337},{2,0,337},{0,8,1}, +{0,8,1},{0,8,1},{0,5,0},{0,4,169},{0,4,169},{2,31,2145},{2,25,590},{3,17,862},{2,16,619},{0,27,5420},{0,20,2424},{0,16,282},{0,12,2552},{0,15,7711},{0,11,3908},{4,29,1570},{4,23,149},{4,17,73},{4,16,194},{14,4,5419},{0,20,2424},{0,16,282},{0,12,2552},{24,2,5419},{0,12,2552},{2,23,546},{2,23,546},{2,23,546},{2,15,546},{0,19,1154},{0,15,194},{0,15,194}, +{0,9,218},{0,9,2123},{0,8,865},{4,20,1},{4,20,1},{4,20,1},{4,13,4},{10,2,1152},{0,15,194},{0,15,194},{0,9,218},{17,1,1152},{0,9,218},{20,2,1568},{0,27,5},{6,17,13},{0,16,26},{20,2,1568},{24,7,1568},{0,16,26},{0,13,1576},{24,7,1568},{0,13,1576},{2,0,545},{2,0,545},{2,0,545},{2,0,545},{0,11,0},{0,11,0},{0,11,0},{0,7,4},{0,5,289}, +{0,5,289},{3,31,2596},{3,26,941},{3,19,1289},{3,17,972},{0,30,5420},{0,22,2243},{0,17,145},{0,13,2386},{0,16,8161},{0,13,3986},{5,30,1569},{5,24,131},{6,18,69},{5,17,181},{17,2,5419},{0,22,2243},{0,17,145},{0,13,2386},{27,2,5419},{0,13,2386},{3,24,901},{3,24,901},{3,24,901},{3,16,901},{0,22,1154},{0,17,109},{0,17,109},{0,10,145},{0,11,2441},{0,9,1001},{5,21,1}, +{5,21,1},{5,21,1},{5,15,5},{13,0,1152},{0,17,109},{0,17,109},{0,10,145},{20,1,1152},{0,10,145},{23,0,1568},{1,28,2},{7,18,5},{0,18,8},{23,0,1568},{30,5,1568},{0,18,8},{0,14,1586},{30,5,1568},{0,14,1586},{3,0,900},{3,0,900},{3,0,900},{3,0,900},{0,14,1},{0,14,1},{0,14,1},{0,8,1},{0,6,468},{0,6,468},{4,31,3146},{3,28,1262},{4,19,1743}, +{3,18,1297},{1,31,5484},{0,23,2096},{0,19,69},{0,14,2251},{0,18,8669},{0,14,4100},{6,31,1569},{6,25,131},{7,19,69},{6,18,181},{18,3,5419},{0,23,2096},{0,19,69},{0,14,2251},{28,3,5419},{0,14,2251},{3,27,1252},{3,27,1252},{3,27,1252},{3,17,1256},{0,24,1152},{0,18,61},{0,18,61},{0,11,100},{0,12,2859},{0,10,1157},{6,22,1},{6,22,1},{6,22,1},{6,16,5},{14,1,1152}, +{0,18,61},{0,18,61},{0,11,100},{24,0,1152},{0,11,100},{24,1,1568},{2,29,2},{8,19,5},{0,19,5},{24,1,1568},{31,6,1568},{0,19,5},{0,15,1586},{31,6,1568},{0,15,1586},{3,0,1252},{3,0,1252},{3,0,1252},{3,0,1252},{0,16,1},{0,16,1},{0,16,1},{0,10,1},{0,8,657},{0,8,657},{5,31,3716},{4,29,1603},{4,21,2148},{4,19,1631},{2,31,5655},{0,25,2005},{0,20,31}, +{0,15,2138},{0,19,8963},{0,15,4070},{7,31,1587},{7,26,131},{8,20,82},{7,19,181},{20,2,5419},{0,25,2001},{0,20,27},{0,15,2134},{24,7,5419},{0,15,2134},{4,28,1587},{4,28,1587},{4,28,1587},{4,18,1590},{0,27,1158},{0,20,22},{0,20,22},{0,12,62},{0,14,3075},{0,11,1221},{7,23,1},{7,23,1},{7,23,1},{7,17,5},{16,0,1152},{0,20,18},{0,20,18},{0,12,58},{25,1,1152}, +{0,12,58},{25,2,1568},{3,30,2},{9,20,13},{1,20,9},{25,2,1568},{27,10,1568},{1,20,9},{0,16,1576},{27,10,1568},{0,16,1576},{4,0,1586},{4,0,1586},{4,0,1586},{4,0,1586},{0,19,4},{0,19,4},{0,19,4},{0,11,8},{0,9,769},{0,9,769},{6,31,3890},{5,30,1603},{5,22,2148},{5,20,1627},{3,31,5748},{0,27,1989},{1,21,31},{0,17,2117},{0,21,8560},{0,16,3545},{9,31,1634}, +{8,27,149},{8,21,73},{8,20,194},{21,3,5419},{0,27,1889},{1,21,27},{0,17,2017},{28,6,5419},{0,17,2017},{5,29,1587},{5,29,1587},{5,29,1587},{5,19,1590},{1,28,1158},{1,21,22},{1,21,22},{1,13,62},{0,16,2801},{0,13,949},{8,24,1},{8,24,1},{8,24,1},{8,17,4},{17,1,1152},{0,22,2},{0,22,2},{0,14,26},{26,2,1152},{0,14,26},{26,3,1568},{4,31,5},{10,21,13}, +{2,21,9},{26,3,1568},{28,11,1568},{2,21,9},{0,17,1576},{28,11,1568},{0,17,1576},{5,0,1586},{5,0,1586},{5,0,1586},{5,0,1586},{1,20,4},{1,20,4},{1,20,4},{1,12,8},{0,10,625},{0,10,625},{7,31,4136},{6,31,1589},{7,23,2157},{6,21,1621},{4,31,5895},{1,28,1977},{2,22,33},{1,18,2107},{0,23,8196},{0,17,3043},{10,31,1667},{9,28,131},{10,22,69},{9,21,181},{24,1,5419}, +{0,29,1772},{2,22,24},{0,18,1875},{31,6,5419},{0,18,1875},{6,30,1576},{6,30,1576},{6,30,1576},{6,20,1580},{2,29,1161},{2,22,29},{2,22,29},{2,15,58},{0,17,2529},{0,14,656},{9,25,1},{9,25,1},{9,25,1},{9,19,5},{17,4,1152},{0,24,1},{0,24,1},{0,15,1},{29,2,1152},{0,15,1},{27,4,1568},{6,31,13},{11,22,5},{3,22,5},{27,4,1568},{31,11,1568},{3,22,5}, +{0,18,1586},{31,11,1568},{0,18,1586},{6,0,1576},{6,0,1576},{6,0,1576},{6,0,1576},{2,21,10},{2,21,10},{2,21,10},{2,14,13},{0,12,520},{0,12,520},{8,31,4436},{7,31,1625},{8,23,2175},{7,22,1621},{6,31,6079},{3,28,1973},{3,23,33},{2,19,2107},{0,24,7969},{0,18,2675},{11,31,1745},{10,29,131},{11,23,69},{10,22,181},{25,2,5419},{0,30,1699},{3,23,24},{0,19,1782},{27,10,5419}, +{0,19,1782},{7,31,1576},{7,31,1576},{7,31,1576},{7,21,1580},{3,30,1161},{3,23,29},{3,23,29},{3,16,74},{0,19,2313},{0,15,474},{10,26,1},{10,26,1},{10,26,1},{10,20,5},{18,5,1152},{1,25,1},{1,25,1},{0,16,2},{30,3,1152},{0,16,2},{31,0,1568},{8,31,34},{12,23,5},{4,23,5},{31,0,1568},{30,13,1568},{4,23,5},{0,19,1586},{30,13,1568},{0,19,1586},{7,0,1576}, +{7,0,1576},{7,0,1576},{7,0,1576},{3,22,10},{3,22,10},{3,22,10},{3,15,13},{0,14,400},{0,14,400},{9,31,4730},{8,31,1716},{8,25,2148},{8,23,1631},{7,31,6244},{3,30,1977},{4,24,31},{4,19,2138},{0,26,7669},{0,19,2375},{12,31,1832},{11,30,131},{12,24,82},{11,23,181},{26,3,5419},{1,31,1699},{4,24,27},{0,20,1720},{28,11,5419},{0,20,1720},{8,31,1595},{8,31,1595},{8,31,1595}, +{8,22,1590},{4,31,1158},{4,24,22},{4,24,22},{4,16,62},{0,21,2091},{0,17,306},{11,27,1},{11,27,1},{11,27,1},{11,21,5},{20,4,1152},{2,26,1},{2,26,1},{1,17,2},{29,5,1152},{1,17,2},{29,6,1568},{9,31,68},{13,24,13},{5,24,9},{29,6,1568},{31,14,1568},{5,24,9},{0,20,1576},{31,14,1568},{0,20,1576},{8,0,1586},{8,0,1586},{8,0,1586},{8,0,1586},{4,23,4}, +{4,23,4},{4,23,4},{4,15,8},{0,16,277},{0,16,277},{11,31,5010},{9,31,1878},{9,26,2148},{9,24,1627},{8,31,6508},{4,31,1989},{5,25,31},{4,21,2117},{0,28,7364},{0,21,2098},{14,31,1952},{12,31,149},{12,25,73},{12,24,194},{28,2,5419},{3,31,1787},{5,25,27},{0,21,1657},{27,13,5419},{0,21,1657},{9,31,1622},{9,31,1622},{9,31,1622},{9,23,1590},{5,31,1164},{5,25,22},{5,25,22}, +{5,17,62},{0,22,1928},{0,18,194},{12,28,1},{12,28,1},{12,28,1},{12,21,4},{24,0,1152},{3,27,1},{3,27,1},{2,18,2},{30,6,1152},{2,18,2},{30,7,1568},{11,31,116},{14,25,13},{6,25,9},{30,7,1568},{30,16,1568},{6,25,9},{0,21,1576},{30,16,1568},{0,21,1576},{9,0,1586},{9,0,1586},{9,0,1586},{9,0,1586},{5,24,4},{5,24,4},{5,24,4},{5,16,8},{0,18,193}, +{0,18,193},{12,31,5316},{11,31,2154},{11,27,2157},{10,25,1621},{10,31,6800},{6,31,1999},{6,26,33},{5,22,2107},{0,29,7068},{0,22,1836},{15,31,2081},{13,31,206},{14,26,69},{13,25,181},{31,0,5419},{5,31,1937},{6,26,24},{0,22,1611},{30,13,5419},{0,22,1611},{10,31,1676},{10,31,1676},{10,31,1676},{10,24,1580},{7,31,1179},{6,26,29},{6,26,29},{6,19,58},{0,24,1798},{0,19,157},{13,29,1}, +{13,29,1},{13,29,1},{13,23,5},{24,3,1152},{4,28,1},{4,28,1},{4,19,1},{28,9,1152},{4,19,1},{31,8,1568},{13,31,205},{15,26,5},{7,26,5},{31,8,1568},{28,19,1568},{7,26,5},{0,22,1586},{28,19,1568},{0,22,1586},{10,0,1576},{10,0,1576},{10,0,1576},{10,0,1576},{6,25,10},{6,25,10},{6,25,10},{6,18,13},{0,20,106},{0,20,106},{13,31,5658},{12,31,2435},{12,27,2175}, +{11,26,1621},{11,31,7055},{7,31,2090},{7,27,33},{6,23,2107},{0,31,6820},{0,23,1690},{16,31,2216},{14,31,334},{15,27,69},{14,26,181},{29,6,5419},{7,31,2081},{7,27,24},{0,23,1590},{31,14,5419},{0,23,1590},{11,31,1745},{11,31,1745},{11,31,1745},{11,25,1580},{8,31,1220},{7,27,29},{7,27,29},{7,20,74},{0,26,1650},{1,20,137},{14,30,1},{14,30,1},{14,30,1},{14,24,5},{22,9,1152}, +{5,29,1},{5,29,1},{4,20,2},{29,10,1152},{4,20,2},{29,14,1568},{15,31,289},{16,27,5},{8,27,5},{29,14,1568},{29,20,1568},{8,27,5},{0,23,1586},{29,20,1568},{0,23,1586},{11,0,1576},{11,0,1576},{11,0,1576},{11,0,1576},{7,26,10},{7,26,10},{7,26,10},{7,19,13},{0,22,58},{0,22,58},{14,31,6036},{13,31,2751},{12,29,2148},{12,27,1631},{12,31,7316},{8,31,2228},{8,28,31}, +{8,23,2138},{0,31,6884},{0,24,1613},{17,31,2402},{16,31,500},{16,28,82},{15,27,181},{30,7,5419},{9,31,2195},{8,28,27},{0,24,1577},{30,16,5419},{0,24,1577},{12,31,1811},{12,31,1811},{12,31,1811},{12,26,1590},{9,31,1286},{8,28,22},{8,28,22},{8,20,62},{0,28,1508},{2,21,137},{15,31,1},{15,31,1},{15,31,1},{15,25,5},{24,8,1152},{6,30,1},{6,30,1},{5,21,2},{30,11,1152}, +{5,21,2},{30,15,1568},{17,31,410},{17,28,13},{9,28,9},{30,15,1568},{30,21,1568},{9,28,9},{0,24,1576},{30,21,1568},{0,24,1576},{12,0,1586},{12,0,1586},{12,0,1586},{12,0,1586},{8,27,4},{8,27,4},{8,27,4},{8,19,8},{0,24,37},{0,24,37},{15,31,6450},{14,31,3135},{13,30,2148},{13,28,1627},{13,31,7661},{10,31,2448},{9,29,31},{8,25,2117},{2,31,7196},{0,25,1593},{19,31,2594}, +{17,31,698},{16,29,73},{16,28,194},{29,11,5419},{11,31,2379},{9,29,27},{1,25,1577},{31,17,5419},{1,25,1577},{13,31,1910},{13,31,1910},{13,31,1910},{13,27,1590},{10,31,1388},{9,29,22},{9,29,22},{9,21,62},{0,30,1416},{3,22,137},{16,31,4},{16,31,4},{16,31,4},{16,25,4},{28,4,1152},{7,31,1},{7,31,1},{6,22,2},{29,13,1152},{6,22,2},{31,16,1568},{19,31,530},{18,29,13}, +{10,29,9},{31,16,1568},{31,22,1568},{10,29,9},{0,25,1576},{31,22,1568},{0,25,1576},{13,0,1586},{13,0,1586},{13,0,1586},{13,0,1586},{9,28,4},{9,28,4},{9,28,4},{9,20,8},{0,25,17},{0,25,17},{16,31,6900},{15,31,3657},{15,31,2157},{14,29,1621},{15,31,8023},{11,31,2845},{10,30,33},{9,26,2107},{5,31,7651},{1,26,1611},{20,31,2866},{18,31,1011},{18,30,69},{17,29,181},{29,14,5419}, +{13,31,2657},{10,30,24},{3,26,1587},{29,20,5419},{3,26,1587},{15,31,2057},{15,31,2057},{15,31,2057},{14,28,1580},{12,31,1476},{10,30,29},{10,30,29},{10,23,58},{0,31,1324},{4,23,157},{17,31,37},{17,31,37},{17,31,37},{17,27,5},{31,2,1152},{9,31,4},{9,31,4},{8,23,1},{27,16,1152},{8,23,1},{31,19,1568},{21,31,637},{19,30,5},{11,30,5},{31,19,1568},{27,26,1568},{11,30,5}, +{0,26,1586},{27,26,1568},{0,26,1586},{14,0,1576},{14,0,1576},{14,0,1576},{14,0,1576},{10,29,10},{10,29,10},{10,29,10},{10,22,13},{1,27,9},{1,27,9},{18,31,7332},{16,31,4196},{16,31,2175},{15,30,1621},{16,31,8348},{13,31,3285},{11,31,33},{10,27,2107},{8,31,8004},{2,27,1611},{21,31,3112},{20,31,1281},{19,31,69},{18,30,181},{30,15,5419},{15,31,2897},{11,31,24},{3,27,1590},{30,21,5419}, +{3,27,1590},{16,31,2171},{16,31,2171},{16,31,2171},{15,29,1580},{13,31,1590},{11,31,29},{11,31,29},{11,24,74},{2,31,1424},{5,24,137},{19,31,65},{19,31,65},{19,31,65},{18,28,5},{26,13,1152},{11,31,20},{11,31,20},{8,24,2},{21,21,1152},{8,24,2},{30,23,1568},{23,31,785},{20,31,5},{12,31,5},{30,23,1568},{28,27,1568},{12,31,5},{0,27,1586},{28,27,1568},{0,27,1586},{15,0,1576}, +{15,0,1576},{15,0,1576},{15,0,1576},{11,30,10},{11,30,10},{11,30,10},{11,23,13},{1,28,8},{1,28,8},{19,31,7014},{17,31,4230},{17,31,2294},{16,31,1595},{17,31,7865},{14,31,3114},{12,31,85},{12,27,1706},{8,31,7436},{3,28,1268},{22,31,2794},{21,31,1221},{20,31,113},{19,30,114},{30,17,4803},{17,31,2648},{13,31,61},{4,28,1253},{25,25,4803},{4,28,1253},{17,31,2294},{17,31,2294},{17,31,2294}, +{16,30,1590},{14,31,1740},{12,31,85},{12,31,85},{12,24,62},{3,31,1571},{6,25,137},{20,31,113},{20,31,113},{20,31,113},{19,29,5},{28,12,1152},{13,31,61},{13,31,61},{9,25,2},{22,22,1152},{9,25,2},{31,23,1250},{24,31,680},{21,31,4},{15,31,0},{31,23,1250},{23,31,1250},{15,31,0},{0,28,1252},{23,31,1250},{0,28,1252},{16,0,1586},{16,0,1586},{16,0,1586},{16,0,1586},{12,31,4}, +{12,31,4},{12,31,4},{12,23,8},{2,29,8},{2,29,8},{19,31,6534},{18,31,4116},{18,31,2435},{17,31,1590},{18,31,7164},{15,31,2809},{14,31,161},{12,28,1256},{10,31,6748},{5,28,945},{23,31,2340},{22,31,1065},{21,31,164},{20,31,64},{31,17,4056},{18,31,2211},{15,31,113},{7,28,900},{25,26,4056},{7,28,900},{18,31,2435},{18,31,2435},{18,31,2435},{17,31,1590},{16,31,1923},{14,31,161},{14,31,161}, +{13,25,62},{6,31,1729},{7,26,137},{21,31,164},{21,31,164},{21,31,164},{20,29,4},{29,13,1152},{15,31,113},{15,31,113},{10,26,2},{28,20,1152},{10,26,2},{30,26,882},{25,31,482},{23,31,0},{18,31,1},{30,26,882},{31,27,882},{18,31,1},{0,28,900},{31,27,882},{0,28,900},{17,0,1586},{17,0,1586},{17,0,1586},{17,0,1586},{13,31,13},{13,31,13},{13,31,13},{13,24,8},{3,30,8}, +{3,30,8},{21,31,6091},{20,31,4022},{19,31,2609},{18,31,1640},{19,31,6490},{16,31,2617},{15,31,318},{14,28,835},{11,31,6135},{6,29,598},{24,31,1881},{23,31,931},{22,31,245},{21,31,5},{31,19,3318},{20,31,1733},{17,31,202},{8,29,545},{27,26,3318},{8,29,545},{19,31,2609},{19,31,2609},{19,31,2609},{18,31,1640},{17,31,2086},{15,31,318},{15,31,318},{14,27,58},{8,31,1868},{8,27,157},{22,31,245}, +{22,31,245},{22,31,245},{21,31,5},{29,16,1152},{17,31,202},{17,31,202},{12,27,1},{31,20,1152},{12,27,1},{31,26,545},{26,31,305},{25,31,4},{20,31,1},{31,26,545},{29,29,545},{20,31,1},{0,29,545},{29,29,545},{0,29,545},{18,0,1576},{18,0,1576},{18,0,1576},{18,0,1576},{15,31,29},{15,31,29},{15,31,29},{14,26,13},{5,31,9},{5,31,9},{22,31,5719},{21,31,3980},{20,31,2834}, +{19,31,1745},{20,31,6050},{18,31,2457},{16,31,536},{15,29,515},{14,31,5674},{7,30,406},{25,31,1573},{24,31,861},{23,31,338},{22,31,10},{30,22,2753},{21,31,1438},{19,31,290},{11,29,338},{27,27,2753},{11,29,338},{20,31,2834},{20,31,2834},{20,31,2834},{19,31,1745},{18,31,2284},{16,31,536},{16,31,536},{15,28,74},{10,31,2064},{9,28,137},{23,31,338},{23,31,338},{23,31,338},{22,31,10},{30,17,1152}, +{19,31,290},{19,31,290},{12,28,2},{25,25,1152},{12,28,2},{31,27,313},{28,31,181},{26,31,1},{23,31,0},{31,27,313},{30,29,313},{23,31,0},{0,29,337},{30,29,313},{0,29,337},{19,0,1576},{19,0,1576},{19,0,1576},{19,0,1576},{16,31,52},{16,31,52},{16,31,52},{15,27,13},{6,31,25},{6,31,25},{22,31,5399},{22,31,3974},{21,31,3035},{20,31,1875},{21,31,5619},{19,31,2378},{18,31,776}, +{16,30,318},{15,31,5258},{9,30,225},{26,31,1333},{25,31,813},{24,31,425},{23,31,65},{30,24,2273},{23,31,1218},{20,31,353},{12,30,146},{29,27,2273},{12,30,146},{21,31,3035},{21,31,3035},{21,31,3035},{20,31,1875},{19,31,2518},{18,31,776},{18,31,776},{16,28,62},{11,31,2323},{10,29,137},{24,31,425},{24,31,425},{24,31,425},{23,31,65},{31,18,1152},{20,31,353},{20,31,353},{13,29,2},{26,26,1152}, +{13,29,2},{31,28,145},{28,31,85},{28,31,4},{26,31,1},{31,28,145},{30,30,145},{26,31,1},{0,30,145},{30,30,145},{0,30,145},{20,0,1586},{20,0,1586},{20,0,1586},{20,0,1586},{17,31,85},{17,31,85},{17,31,85},{16,27,8},{8,31,40},{8,31,40},{23,31,5143},{23,31,4004},{22,31,3254},{21,31,2070},{22,31,5274},{20,31,2310},{19,31,1062},{17,30,133},{17,31,5011},{10,31,161},{27,31,1161}, +{26,31,801},{26,31,545},{25,31,164},{31,24,1878},{24,31,1094},{22,31,461},{14,30,66},{24,31,1878},{14,30,66},{22,31,3254},{22,31,3254},{22,31,3254},{21,31,2070},{20,31,2833},{19,31,1062},{19,31,1062},{17,29,62},{14,31,2577},{11,30,137},{26,31,545},{26,31,545},{26,31,545},{25,31,164},{30,22,1152},{22,31,461},{22,31,461},{14,30,2},{27,27,1152},{14,30,2},{30,31,41},{30,31,25},{29,31,1}, +{28,31,0},{30,31,41},{31,30,41},{28,31,0},{0,30,65},{31,30,41},{0,30,65},{21,0,1586},{21,0,1586},{21,0,1586},{21,0,1586},{18,31,136},{18,31,136},{18,31,136},{17,28,8},{10,31,80},{10,31,80},{24,31,4882},{24,31,4070},{23,31,3532},{22,31,2360},{24,31,4945},{21,31,2422},{20,31,1433},{18,31,58},{20,31,4717},{12,31,157},{28,31,1026},{27,31,835},{27,31,666},{26,31,305},{31,26,1536}, +{26,31,996},{24,31,628},{16,31,1},{29,29,1536},{16,31,1},{23,31,3532},{23,31,3532},{23,31,3532},{22,31,2360},{22,31,3110},{20,31,1433},{20,31,1433},{18,31,58},{16,31,2939},{12,31,157},{27,31,666},{27,31,666},{27,31,666},{26,31,305},{30,25,1152},{24,31,628},{24,31,628},{16,31,1},{30,27,1152},{16,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0}, +{0,31,0},{31,31,0},{0,31,0},{22,0,1576},{22,0,1576},{22,0,1576},{22,0,1576},{19,31,221},{19,31,221},{19,31,221},{18,30,13},{12,31,157},{12,31,157},{25,31,4212},{24,31,3590},{24,31,3106},{23,31,2201},{24,31,4129},{22,31,2101},{21,31,1301},{19,31,13},{20,31,3869},{14,31,233},{28,31,706},{28,31,562},{28,31,481},{27,31,218},{31,27,1067},{27,31,699},{25,31,442},{18,31,1},{30,29,1067}, +{18,31,1},{24,31,3106},{24,31,3106},{24,31,3106},{23,31,2201},{23,31,2668},{21,31,1301},{21,31,1301},{19,31,13},{18,31,2523},{14,31,233},{28,31,481},{28,31,481},{28,31,481},{27,31,218},{31,25,802},{25,31,442},{25,31,442},{18,31,1},{31,27,802},{18,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{23,0,1576}, +{23,0,1576},{23,0,1576},{23,0,1576},{20,31,325},{20,31,325},{20,31,325},{19,31,13},{14,31,233},{14,31,233},{26,31,3642},{25,31,3132},{25,31,2771},{24,31,2070},{25,31,3444},{23,31,1834},{22,31,1205},{20,31,8},{21,31,3219},{16,31,346},{29,31,456},{28,31,370},{28,31,289},{28,31,145},{30,29,683},{28,31,451},{27,31,290},{21,31,1},{29,30,683},{21,31,1},{25,31,2771},{25,31,2771},{25,31,2771}, +{24,31,2070},{24,31,2273},{22,31,1205},{22,31,1205},{20,31,8},{20,31,2121},{16,31,346},{28,31,289},{28,31,289},{28,31,289},{28,31,145},{31,26,512},{27,31,290},{27,31,290},{21,31,1},{29,29,512},{21,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{24,0,1586},{24,0,1586},{24,0,1586},{24,0,1586},{22,31,421}, +{22,31,421},{22,31,421},{20,31,8},{16,31,346},{16,31,346},{26,31,3162},{26,31,2742},{26,31,2486},{25,31,1947},{26,31,2877},{24,31,1641},{23,31,1145},{22,31,52},{22,31,2673},{18,31,458},{29,31,264},{29,31,200},{29,31,164},{28,31,81},{30,30,384},{28,31,243},{28,31,162},{23,31,1},{30,30,384},{23,31,1},{26,31,2486},{26,31,2486},{26,31,2486},{25,31,1947},{24,31,1969},{23,31,1145},{23,31,1145}, +{22,31,52},{20,31,1785},{18,31,458},{29,31,164},{29,31,164},{29,31,164},{28,31,81},{31,27,290},{28,31,162},{28,31,162},{23,31,1},{30,29,290},{23,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{25,0,1586},{25,0,1586},{25,0,1586},{25,0,1586},{23,31,520},{23,31,520},{23,31,520},{22,31,52},{18,31,458}, +{18,31,458},{0,23,2665},{0,18,680},{0,13,50},{0,11,785},{0,15,5885},{0,11,4118},{0,10,1800},{0,7,4202},{0,8,6546},{0,7,4643},{0,23,2665},{0,18,680},{0,13,50},{0,11,785},{9,0,5885},{0,11,4118},{0,10,1800},{0,7,4202},{15,0,5885},{0,7,4202},{0,11,0},{0,11,0},{0,11,0},{0,7,4},{0,5,549},{0,5,289},{0,5,289},{0,3,306},{0,3,630},{0,3,387},{0,11,0}, +{0,11,0},{0,11,0},{0,7,4},{2,2,545},{0,5,289},{0,5,289},{0,3,306},{4,1,545},{0,3,306},{13,1,2665},{0,18,680},{0,13,50},{0,11,785},{13,1,2665},{23,0,2665},{0,11,785},{0,8,2689},{23,0,2665},{0,8,2689},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,25,2665},{0,20,521},{0,14,5}, +{0,12,625},{0,17,6669},{0,13,4529},{0,11,1890},{0,8,4610},{0,9,7494},{0,7,5171},{0,25,2665},{0,20,521},{0,14,5},{0,12,625},{9,2,6669},{0,13,4529},{0,11,1890},{0,8,4610},{17,0,6669},{0,8,4610},{0,13,1},{0,13,1},{0,13,1},{0,8,0},{0,7,841},{0,6,445},{0,6,445},{0,4,464},{0,3,982},{0,3,595},{0,13,1},{0,13,1},{0,13,1},{0,8,0},{4,0,841}, +{0,6,445},{0,6,445},{0,4,464},{7,0,841},{0,4,464},{14,2,2665},{0,20,521},{0,14,5},{0,12,625},{14,2,2665},{25,0,2665},{0,12,625},{0,9,2689},{25,0,2665},{0,9,2689},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,28,2665},{0,22,405},{0,15,10},{0,13,514},{0,19,7541},{0,14,4934},{0,12,2042}, +{0,9,5045},{0,10,8546},{0,8,5682},{0,28,2665},{0,22,405},{0,15,10},{0,13,514},{10,2,7538},{0,14,4934},{0,12,2042},{0,9,5045},{17,1,7538},{0,9,5045},{0,16,0},{0,16,0},{0,16,0},{0,10,4},{0,8,1201},{0,7,637},{0,7,637},{0,4,656},{0,4,1385},{0,4,800},{0,16,0},{0,16,0},{0,16,0},{0,10,4},{5,0,1201},{0,7,637},{0,7,637},{0,4,656},{8,0,1201}, +{0,4,656},{16,1,2665},{0,22,405},{1,15,5},{0,13,514},{16,1,2665},{28,0,2665},{0,13,514},{0,10,2689},{28,0,2665},{0,10,2689},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,30,2669},{0,23,313},{0,16,68},{0,15,410},{0,20,8498},{0,16,5330},{0,13,2210},{0,10,5530},{0,11,9702},{0,9,6270},{0,30,2669}, +{0,23,313},{0,16,68},{0,15,410},{11,2,8493},{0,16,5330},{0,13,2210},{0,10,5530},{17,2,8493},{0,10,5530},{0,19,1},{0,19,1},{0,19,1},{0,11,1},{0,9,1629},{0,8,832},{0,8,832},{0,5,881},{0,5,1874},{0,5,1106},{0,19,1},{0,19,1},{0,19,1},{0,11,1},{5,1,1625},{0,8,832},{0,8,832},{0,5,881},{8,1,1625},{0,5,881},{17,2,2665},{0,23,313},{2,16,8}, +{0,15,410},{17,2,2665},{29,1,2665},{0,15,410},{0,11,2689},{29,1,2665},{0,11,2689},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,31,2777},{0,26,232},{0,17,197},{0,16,305},{0,22,9674},{0,17,5849},{0,14,2450},{0,10,6106},{0,12,11199},{0,10,7006},{1,31,2741},{0,26,232},{1,17,146},{0,16,305},{11,4,9669}, +{0,17,5849},{0,14,2450},{0,10,6106},{19,2,9669},{0,10,6106},{0,22,1},{0,22,1},{0,22,1},{0,13,0},{0,11,2178},{0,10,1125},{0,10,1125},{0,6,1189},{0,6,2520},{0,5,1475},{0,22,1},{0,22,1},{0,22,1},{0,13,0},{6,1,2178},{0,10,1125},{0,10,1125},{0,6,1189},{11,0,2178},{0,6,1189},{20,0,2665},{0,26,232},{3,17,2},{0,16,305},{20,0,2665},{30,2,2665},{0,16,305}, +{0,12,2689},{30,2,2665},{0,12,2689},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{1,31,2949},{0,28,217},{1,18,261},{0,17,282},{0,25,9670},{0,19,5529},{0,16,1970},{0,12,5738},{0,13,11589},{0,11,6898},{2,31,2789},{0,28,217},{2,18,146},{0,17,282},{15,0,9669},{0,19,5529},{0,16,1970},{0,12,5738},{20,3,9669}, +{0,12,5738},{0,24,64},{0,24,64},{0,24,64},{1,14,64},{0,13,2180},{0,11,949},{0,11,949},{0,7,1018},{0,7,2691},{0,6,1433},{1,23,1},{1,23,1},{1,23,1},{1,14,0},{8,0,2178},{0,11,949},{0,11,949},{0,7,1018},{5,5,2178},{0,7,1018},{21,1,2665},{0,28,153},{4,18,5},{0,17,218},{21,1,2665},{31,3,2665},{0,17,218},{0,13,2689},{31,3,2665},{0,13,2689},{0,0,64}, +{0,0,64},{0,0,64},{0,0,64},{0,3,1},{0,3,1},{0,3,1},{0,2,4},{0,1,18},{0,1,18},{2,31,3285},{1,28,273},{2,19,453},{1,18,346},{0,27,9674},{0,20,5170},{0,17,1546},{0,13,5429},{0,15,11993},{0,12,6819},{3,31,2873},{1,28,209},{3,19,146},{1,18,282},{14,4,9669},{0,20,5170},{0,17,1546},{0,13,5429},{24,2,9669},{0,13,5429},{1,25,128},{1,25,128},{1,25,128}, +{1,15,137},{0,16,2178},{0,13,832},{0,13,832},{0,8,881},{0,8,2882},{0,7,1427},{2,23,4},{2,23,4},{2,23,4},{2,15,0},{9,1,2178},{0,13,832},{0,13,832},{0,8,881},{16,0,2178},{0,8,881},{23,0,2665},{0,29,85},{5,19,5},{0,18,149},{23,0,2665},{30,5,2665},{0,18,149},{0,14,2689},{30,5,2665},{0,14,2689},{1,0,128},{1,0,128},{1,0,128},{1,0,128},{0,5,1}, +{0,5,1},{0,5,1},{0,3,1},{0,2,72},{0,2,72},{3,31,3785},{1,30,405},{2,21,676},{1,19,469},{0,30,9669},{0,22,4878},{0,18,1190},{0,14,5138},{0,16,12390},{0,13,6789},{4,31,2966},{2,29,209},{4,20,149},{2,19,282},{18,0,9669},{0,22,4878},{0,18,1190},{0,14,5138},{30,0,9669},{0,14,5138},{2,26,320},{2,26,320},{2,26,320},{1,17,320},{0,19,2180},{0,15,680},{0,15,680}, +{0,9,740},{0,9,3149},{0,8,1441},{3,24,1},{3,24,1},{3,24,1},{3,16,1},{10,2,2178},{0,15,680},{0,15,680},{0,9,740},{17,1,2178},{0,9,740},{24,1,2665},{0,31,41},{6,20,8},{0,19,98},{24,1,2665},{31,6,2665},{0,19,98},{0,15,2689},{31,6,2665},{0,15,2689},{1,0,320},{1,0,320},{1,0,320},{1,0,320},{0,8,0},{0,8,0},{0,8,0},{0,5,1},{0,4,160}, +{0,4,160},{4,31,4514},{2,31,630},{3,22,1027},{2,20,694},{1,31,9738},{0,23,4646},{0,20,849},{0,15,4826},{0,18,12955},{0,14,6798},{6,31,3101},{3,31,218},{5,21,146},{3,20,299},{18,3,9669},{0,23,4646},{0,20,849},{0,15,4826},{28,3,9669},{0,15,4826},{2,29,545},{2,29,545},{2,29,545},{2,18,546},{0,22,2180},{0,17,505},{0,17,505},{0,11,610},{0,11,3467},{0,10,1513},{4,26,1}, +{4,26,1},{4,26,1},{4,17,0},{13,0,2178},{0,17,505},{0,17,505},{0,11,610},{20,1,2178},{0,11,610},{26,1,2665},{2,31,85},{7,21,2},{0,20,65},{26,1,2665},{31,8,2665},{0,20,65},{0,16,2689},{31,8,2665},{0,16,2689},{2,0,545},{2,0,545},{2,0,545},{2,0,545},{0,11,0},{0,11,0},{0,11,0},{0,7,4},{0,5,289},{0,5,289},{4,31,5330},{3,31,1018},{3,23,1430}, +{2,21,979},{2,31,9981},{0,26,4406},{0,21,579},{0,16,4610},{0,19,13489},{0,15,6846},{7,31,3233},{4,31,226},{6,22,146},{4,21,282},{20,2,9669},{0,26,4406},{0,21,579},{0,16,4610},{24,7,9669},{0,16,4610},{3,30,865},{3,30,865},{3,30,865},{3,19,866},{0,24,2178},{0,19,389},{0,19,389},{0,12,464},{0,12,3885},{0,11,1603},{5,27,1},{5,27,1},{5,27,1},{5,18,0},{14,1,2178}, +{0,19,389},{0,19,389},{0,12,464},{24,0,2178},{0,12,464},{28,0,2665},{3,31,153},{8,22,5},{0,22,37},{28,0,2665},{30,10,2665},{0,22,37},{0,17,2689},{30,10,2665},{0,17,2689},{3,0,865},{3,0,865},{3,0,865},{3,0,865},{0,13,1},{0,13,1},{0,13,1},{0,8,0},{0,6,445},{0,6,445},{5,31,6270},{3,31,1626},{4,24,1886},{3,22,1299},{2,31,10381},{0,28,4146},{0,22,377}, +{0,17,4373},{0,20,14006},{0,16,6915},{8,31,3434},{6,31,242},{7,23,146},{5,22,282},{21,3,9669},{0,28,4146},{0,22,377},{0,17,4373},{28,6,9669},{0,17,4373},{3,31,1226},{3,31,1226},{3,31,1226},{3,21,1205},{0,27,2180},{0,21,274},{0,21,274},{0,13,353},{0,14,4269},{0,11,1763},{6,27,4},{6,27,4},{6,27,4},{6,19,0},{16,0,2178},{0,21,274},{0,21,274},{0,13,353},{25,1,2178}, +{0,13,353},{29,1,2665},{5,31,232},{9,23,5},{0,23,10},{29,1,2665},{31,11,2665},{0,23,10},{0,18,2689},{31,11,2665},{0,18,2689},{3,0,1201},{3,0,1201},{3,0,1201},{3,0,1201},{0,16,0},{0,16,0},{0,16,0},{0,10,4},{0,7,637},{0,7,637},{6,31,7374},{4,31,2339},{4,25,2441},{3,23,1730},{3,31,10950},{0,29,3909},{0,23,243},{0,18,4154},{0,22,14614},{0,17,7029},{9,31,3638}, +{7,31,320},{8,24,149},{6,23,282},{22,4,9669},{0,29,3909},{0,23,243},{0,18,4154},{29,7,9669},{0,18,4154},{4,31,1714},{4,31,1714},{4,31,1714},{3,22,1666},{0,29,2180},{0,23,194},{0,23,194},{0,14,260},{0,15,4686},{0,13,1937},{7,28,1},{7,28,1},{7,28,1},{7,20,1},{17,1,2178},{0,23,194},{0,23,194},{0,14,260},{26,2,2178},{0,14,260},{31,0,2665},{8,31,313},{10,24,8}, +{0,24,4},{31,0,2665},{30,13,2665},{0,24,4},{0,19,2689},{30,13,2665},{0,19,2689},{3,0,1665},{3,0,1665},{3,0,1665},{3,0,1665},{0,19,1},{0,19,1},{0,19,1},{0,11,1},{0,8,832},{0,8,832},{7,31,8807},{5,31,3388},{5,26,3116},{4,24,2243},{4,31,11766},{0,31,3686},{0,25,138},{0,19,3938},{0,23,15369},{0,18,7206},{11,31,3853},{8,31,457},{9,25,146},{7,24,299},{25,2,9669}, +{0,31,3686},{0,25,138},{0,19,3938},{27,10,9669},{0,19,3938},{5,31,2427},{5,31,2427},{5,31,2427},{4,23,2182},{0,31,2210},{0,25,137},{0,25,137},{0,15,181},{0,16,5157},{0,14,2163},{8,30,1},{8,30,1},{8,30,1},{8,21,0},{17,4,2178},{0,25,137},{0,25,137},{0,15,181},{29,2,2178},{0,15,181},{31,3,2665},{9,31,405},{11,25,2},{1,25,2},{31,3,2665},{30,15,2665},{1,25,2}, +{0,20,2689},{30,15,2665},{0,20,2689},{4,0,2178},{4,0,2178},{4,0,2178},{4,0,2178},{0,22,1},{0,22,1},{0,22,1},{0,13,0},{0,10,1125},{0,10,1125},{7,31,10230},{6,31,4421},{6,27,3739},{4,26,2742},{5,31,12634},{0,31,3719},{0,26,87},{0,20,3771},{0,25,16061},{0,19,7283},{12,31,4050},{10,31,629},{10,26,146},{8,25,282},{26,3,9669},{0,31,3718},{0,26,86},{0,20,3770},{28,11,9669}, +{0,20,3770},{5,31,3050},{5,31,3050},{5,31,3050},{5,24,2690},{1,31,2325},{0,26,86},{0,26,86},{0,16,129},{0,18,5544},{0,15,2318},{9,31,1},{9,31,1},{9,31,1},{9,22,0},{18,5,2178},{0,26,85},{0,26,85},{0,16,128},{30,3,2178},{0,16,128},{31,6,2665},{11,31,521},{12,26,5},{2,26,2},{31,6,2665},{29,17,2665},{2,26,2},{0,21,2689},{29,17,2665},{0,21,2689},{5,0,2689}, +{5,0,2689},{5,0,2689},{5,0,2689},{0,24,1},{0,24,1},{0,24,1},{0,15,5},{0,11,1348},{0,11,1348},{9,31,10738},{7,31,4899},{7,28,3705},{5,27,2742},{6,31,13045},{1,31,4002},{1,27,87},{0,21,3686},{0,27,15601},{0,20,6570},{13,31,4302},{11,31,857},{11,27,146},{9,26,282},{28,2,9669},{2,31,3954},{1,27,86},{0,21,3605},{27,13,9669},{0,21,3605},{6,31,3173},{6,31,3173},{6,31,3173}, +{6,25,2690},{2,31,2427},{1,28,83},{1,28,83},{1,17,129},{0,20,5170},{0,16,1856},{10,31,4},{10,31,4},{10,31,4},{10,23,0},{20,4,2178},{0,28,32},{0,28,32},{0,17,89},{29,5,2178},{0,17,89},{31,8,2665},{13,31,680},{13,27,5},{3,27,2},{31,8,2665},{30,18,2665},{3,27,2},{0,22,2689},{30,18,2665},{0,22,2689},{6,0,2689},{6,0,2689},{6,0,2689},{6,0,2689},{1,25,1}, +{1,25,1},{1,25,1},{1,15,10},{0,13,1217},{0,13,1217},{10,31,11278},{8,31,5402},{7,29,3750},{6,28,2745},{7,31,13510},{3,31,4314},{2,28,77},{1,22,3686},{0,28,15046},{0,21,5958},{14,31,4590},{12,31,1171},{12,28,149},{10,27,282},{29,3,9669},{3,31,4265},{2,28,76},{0,22,3458},{28,14,9669},{0,22,3458},{7,31,3314},{7,31,3314},{7,31,3314},{7,26,2690},{4,31,2532},{2,28,73},{2,28,73}, +{2,18,129},{0,21,4837},{0,17,1490},{11,31,25},{11,31,25},{11,31,25},{11,24,1},{24,0,2178},{0,30,8},{0,30,8},{0,19,49},{30,6,2178},{0,19,49},{31,11,2665},{15,31,832},{14,28,8},{4,28,4},{31,11,2665},{31,19,2665},{4,28,4},{0,23,2689},{31,19,2665},{0,23,2689},{7,0,2689},{7,0,2689},{7,0,2689},{7,0,2689},{2,26,1},{2,26,1},{2,26,1},{2,16,5},{0,14,1037}, +{0,14,1037},{11,31,11942},{10,31,6090},{9,30,3739},{7,29,2751},{9,31,14053},{4,31,4863},{3,29,79},{2,24,3689},{0,30,14558},{0,23,5274},{16,31,4858},{14,31,1556},{13,29,146},{11,28,299},{29,6,9669},{6,31,4594},{3,29,75},{0,24,3265},{31,14,9669},{0,24,3265},{9,31,3505},{9,31,3505},{9,31,3505},{8,27,2693},{5,31,2645},{3,30,72},{3,30,72},{3,19,134},{0,23,4506},{0,19,1109},{12,31,64}, +{12,31,64},{12,31,64},{12,25,0},{24,3,2178},{1,31,10},{1,31,10},{0,20,16},{28,9,2178},{0,20,16},{31,14,2665},{17,31,1053},{15,29,2},{5,29,2},{31,14,2665},{29,22,2665},{5,29,2},{0,24,2689},{29,22,2665},{0,24,2689},{8,0,2689},{8,0,2689},{8,0,2689},{8,0,2689},{3,27,5},{3,27,5},{3,27,5},{3,18,8},{0,16,818},{0,16,818},{12,31,12466},{11,31,6718},{10,31,3739}, +{8,30,2742},{10,31,14554},{6,31,5363},{4,30,87},{3,24,3654},{0,31,14190},{0,24,4785},{17,31,5158},{15,31,1938},{14,30,146},{12,29,282},{30,7,9669},{8,31,4806},{4,30,86},{0,25,3130},{30,16,9669},{0,25,3130},{10,31,3658},{10,31,3658},{10,31,3658},{9,28,2690},{6,31,2795},{4,30,86},{4,30,86},{4,20,129},{0,25,4315},{0,20,809},{14,31,100},{14,31,100},{14,31,100},{13,26,0},{22,9,2178}, +{3,31,34},{3,31,34},{0,21,1},{29,10,2178},{0,21,1},{30,18,2665},{19,31,1241},{16,30,5},{6,30,2},{30,18,2665},{30,23,2665},{6,30,2},{0,25,2689},{30,23,2665},{0,25,2689},{9,0,2689},{9,0,2689},{9,0,2689},{9,0,2689},{4,28,1},{4,28,1},{4,28,1},{4,19,5},{0,18,666},{0,18,666},{14,31,13094},{12,31,7445},{11,31,3830},{9,31,2742},{12,31,14998},{8,31,5926},{5,31,87}, +{4,25,3686},{0,31,14254},{0,25,4323},{18,31,5494},{16,31,2414},{15,31,146},{13,30,282},{29,11,9669},{10,31,5138},{5,31,86},{0,26,3013},{31,17,9669},{0,26,3013},{11,31,3829},{11,31,3829},{11,31,3829},{10,29,2690},{7,31,2981},{5,31,86},{5,31,86},{5,21,129},{0,27,4059},{0,21,597},{15,31,145},{15,31,145},{15,31,145},{14,27,0},{24,8,2178},{5,31,85},{5,31,85},{1,22,1},{30,11,2178}, +{1,22,1},{31,19,2665},{20,31,1378},{17,31,5},{7,31,2},{31,19,2665},{29,25,2665},{7,31,2},{0,26,2689},{29,25,2665},{0,26,2689},{10,0,2689},{10,0,2689},{10,0,2689},{10,0,2689},{5,29,1},{5,29,1},{5,29,1},{5,19,10},{0,20,505},{0,20,505},{15,31,12507},{13,31,7370},{12,31,4001},{11,31,2705},{13,31,14148},{8,31,5491},{6,31,154},{5,26,3063},{1,31,13399},{0,26,3306},{19,31,4949}, +{17,31,2261},{16,31,202},{15,30,185},{29,13,8712},{11,31,4644},{7,31,145},{0,27,2403},{28,20,8712},{0,27,2403},{12,31,4001},{12,31,4001},{12,31,4001},{11,30,2690},{9,31,3204},{6,31,154},{6,31,154},{6,22,129},{0,28,3762},{0,23,425},{16,31,202},{16,31,202},{16,31,202},{15,28,1},{28,4,2178},{7,31,145},{7,31,145},{2,23,1},{29,13,2178},{2,23,1},{30,22,2178},{22,31,1145},{18,31,1}, +{10,31,1},{30,22,2178},{27,27,2178},{10,31,1},{0,27,2178},{27,27,2178},{0,27,2178},{11,0,2689},{11,0,2689},{11,0,2689},{11,0,2689},{6,30,1},{6,30,1},{6,30,1},{6,20,5},{0,22,389},{0,22,389},{16,31,11658},{14,31,7195},{13,31,4225},{12,31,2693},{14,31,13066},{10,31,5014},{8,31,261},{6,27,2390},{3,31,12366},{0,27,2277},{20,31,4338},{18,31,2037},{17,31,289},{16,30,89},{29,15,7578}, +{13,31,4037},{9,31,202},{0,27,1701},{30,20,7578},{0,27,1701},{13,31,4225},{13,31,4225},{13,31,4225},{12,31,2693},{10,31,3429},{8,31,261},{8,31,261},{7,23,134},{0,30,3509},{0,24,306},{17,31,289},{17,31,289},{17,31,289},{16,29,0},{31,2,2178},{9,31,202},{9,31,202},{3,24,0},{27,16,2178},{3,24,0},{31,22,1625},{23,31,850},{20,31,0},{13,31,1},{31,22,1625},{30,26,1625},{13,31,1}, +{0,27,1665},{30,26,1625},{0,27,1665},{12,0,2689},{12,0,2689},{12,0,2689},{12,0,2689},{7,31,5},{7,31,5},{7,31,5},{7,22,8},{0,24,306},{0,24,306},{16,31,11002},{15,31,7081},{14,31,4450},{13,31,2738},{15,31,12205},{11,31,4663},{9,31,411},{7,27,1845},{4,31,11643},{0,28,1578},{21,31,3802},{20,31,1845},{18,31,388},{17,31,25},{30,15,6661},{15,31,3525},{11,31,290},{0,28,1217},{30,21,6661}, +{0,28,1217},{14,31,4450},{14,31,4450},{14,31,4450},{13,31,2738},{11,31,3675},{9,31,411},{9,31,411},{8,24,129},{0,31,3354},{0,25,244},{18,31,388},{18,31,388},{18,31,388},{17,30,0},{26,13,2178},{11,31,290},{11,31,290},{4,25,1},{21,21,2178},{4,25,1},{31,23,1201},{24,31,653},{22,31,4},{15,31,1},{31,23,1201},{30,27,1201},{15,31,1},{0,28,1201},{30,27,1201},{0,28,1201},{13,0,2689}, +{13,0,2689},{13,0,2689},{13,0,2689},{8,31,17},{8,31,17},{8,31,17},{8,23,5},{0,26,218},{0,26,218},{17,31,10434},{16,31,7010},{15,31,4693},{14,31,2833},{16,31,11374},{12,31,4462},{10,31,629},{8,28,1387},{6,31,10895},{0,28,1002},{22,31,3334},{20,31,1701},{19,31,505},{18,31,0},{30,17,5829},{16,31,3145},{12,31,405},{1,28,866},{25,25,5829},{1,28,866},{15,31,4693},{15,31,4693},{15,31,4693}, +{14,31,2833},{12,31,3906},{10,31,629},{10,31,629},{9,25,129},{1,31,3525},{0,27,228},{19,31,505},{19,31,505},{19,31,505},{18,31,0},{28,12,2178},{12,31,405},{12,31,405},{5,26,1},{22,22,2178},{5,26,1},{30,26,841},{25,31,461},{23,31,1},{18,31,0},{30,26,841},{31,27,841},{18,31,0},{0,28,865},{31,27,841},{0,28,865},{14,0,2689},{14,0,2689},{14,0,2689},{14,0,2689},{9,31,50}, +{9,31,50},{9,31,50},{9,23,10},{0,28,137},{0,28,137},{18,31,9934},{17,31,6962},{16,31,4913},{15,31,2978},{17,31,10683},{13,31,4277},{11,31,915},{9,28,994},{8,31,10078},{0,29,630},{23,31,2934},{22,31,1605},{21,31,650},{19,31,25},{31,17,5082},{18,31,2769},{14,31,521},{2,29,546},{25,26,5082},{2,29,546},{16,31,4913},{16,31,4913},{16,31,4913},{15,31,2978},{14,31,4170},{11,31,915},{11,31,915}, +{10,26,129},{3,31,3789},{1,28,226},{21,31,650},{21,31,650},{21,31,650},{19,31,25},{29,13,2178},{14,31,521},{14,31,521},{6,27,1},{28,20,2178},{6,27,1},{31,26,545},{26,31,305},{25,31,4},{20,31,1},{31,26,545},{29,29,545},{20,31,1},{0,29,545},{29,29,545},{0,29,545},{15,0,2689},{15,0,2689},{15,0,2689},{15,0,2689},{11,31,74},{11,31,74},{11,31,74},{10,24,5},{0,29,85}, +{0,29,85},{19,31,9465},{18,31,6955},{17,31,5233},{16,31,3218},{18,31,10003},{14,31,4183},{13,31,1258},{10,29,645},{9,31,9445},{1,30,409},{24,31,2529},{23,31,1525},{22,31,785},{20,31,100},{31,19,4344},{20,31,2345},{16,31,698},{5,29,321},{27,26,4344},{5,29,321},{17,31,5233},{17,31,5233},{17,31,5233},{16,31,3218},{15,31,4491},{13,31,1258},{13,31,1258},{11,27,134},{5,31,4171},{2,29,213},{22,31,785}, +{22,31,785},{22,31,785},{20,31,100},{29,16,2178},{16,31,698},{16,31,698},{7,28,0},{31,20,2178},{7,28,0},{31,27,290},{28,31,162},{26,31,4},{23,31,1},{31,27,290},{30,29,290},{23,31,1},{0,29,320},{30,29,290},{0,29,320},{16,0,2689},{16,0,2689},{16,0,2689},{16,0,2689},{12,31,113},{12,31,113},{12,31,113},{11,26,8},{0,31,45},{0,31,45},{20,31,9219},{19,31,6985},{18,31,5530}, +{17,31,3473},{19,31,9496},{15,31,4186},{14,31,1630},{12,29,426},{11,31,8961},{3,30,277},{25,31,2275},{24,31,1509},{23,31,932},{22,31,208},{30,22,3779},{21,31,2086},{18,31,850},{6,30,129},{27,27,3779},{6,30,129},{18,31,5530},{18,31,5530},{18,31,5530},{17,31,3473},{16,31,4770},{14,31,1630},{14,31,1630},{12,28,129},{8,31,4442},{3,30,213},{23,31,932},{23,31,932},{23,31,932},{22,31,208},{30,17,2178}, +{18,31,850},{18,31,850},{8,29,1},{25,25,2178},{8,29,1},{30,30,128},{29,31,72},{28,31,1},{26,31,0},{30,30,128},{30,30,128},{26,31,0},{0,30,128},{30,30,128},{0,30,128},{17,0,2689},{17,0,2689},{17,0,2689},{17,0,2689},{13,31,170},{13,31,170},{13,31,170},{12,27,5},{2,31,89},{2,31,89},{21,31,8929},{20,31,7062},{19,31,5845},{18,31,3778},{20,31,9188},{17,31,4260},{15,31,2070}, +{13,30,234},{13,31,8680},{4,31,228},{26,31,2089},{25,31,1515},{24,31,1073},{23,31,353},{30,24,3299},{22,31,1913},{20,31,965},{9,30,65},{29,27,3299},{9,30,65},{19,31,5845},{19,31,5845},{19,31,5845},{18,31,3778},{17,31,5124},{15,31,2070},{15,31,2070},{13,29,129},{9,31,4725},{4,31,228},{24,31,1073},{24,31,1073},{24,31,1073},{23,31,353},{31,18,2178},{20,31,965},{20,31,965},{9,30,1},{26,26,2178}, +{9,30,1},{31,30,34},{30,31,18},{29,31,4},{28,31,1},{31,30,34},{31,30,34},{28,31,1},{0,30,64},{31,30,34},{0,30,64},{18,0,2689},{18,0,2689},{18,0,2689},{18,0,2689},{14,31,245},{14,31,245},{14,31,245},{13,27,10},{4,31,164},{4,31,164},{22,31,8707},{21,31,7170},{21,31,6209},{19,31,4133},{21,31,8853},{18,31,4387},{17,31,2548},{14,31,170},{14,31,8388},{6,31,244},{27,31,1971}, +{26,31,1557},{25,31,1250},{24,31,565},{31,24,2904},{23,31,1826},{22,31,1145},{10,31,1},{24,31,2904},{10,31,1},{21,31,6209},{21,31,6209},{21,31,6209},{19,31,4133},{19,31,5460},{17,31,2548},{17,31,2548},{14,30,129},{11,31,5085},{6,31,244},{25,31,1250},{25,31,1250},{25,31,1250},{24,31,565},{30,22,2178},{22,31,1145},{22,31,1145},{10,31,1},{27,27,2178},{10,31,1},{31,31,0},{31,31,0},{31,31,0}, +{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{19,0,2689},{19,0,2689},{19,0,2689},{19,0,2689},{15,31,338},{15,31,338},{15,31,338},{14,28,5},{6,31,244},{6,31,244},{23,31,7705},{22,31,6418},{21,31,5633},{20,31,3845},{22,31,7654},{19,31,3874},{18,31,2310},{15,31,53},{15,31,7258},{8,31,317},{27,31,1458},{27,31,1186},{26,31,932},{25,31,425},{31,25,2166}, +{25,31,1398},{23,31,850},{13,31,1},{28,29,2166},{13,31,1},{21,31,5633},{21,31,5633},{21,31,5633},{20,31,3845},{19,31,4830},{18,31,2310},{18,31,2310},{15,31,53},{13,31,4506},{8,31,317},{26,31,932},{26,31,932},{26,31,932},{25,31,425},{31,22,1625},{23,31,850},{23,31,850},{13,31,1},{30,26,1625},{13,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0}, +{0,31,0},{31,31,0},{0,31,0},{20,0,2689},{20,0,2689},{20,0,2689},{20,0,2689},{16,31,449},{16,31,449},{16,31,449},{15,30,8},{8,31,317},{8,31,317},{24,31,6881},{23,31,5814},{22,31,5138},{21,31,3650},{23,31,6713},{20,31,3400},{19,31,2142},{16,31,5},{17,31,6397},{9,31,425},{28,31,1075},{27,31,866},{27,31,697},{26,31,320},{31,26,1601},{26,31,1041},{24,31,653},{15,31,1},{29,29,1601}, +{15,31,1},{22,31,5138},{22,31,5138},{22,31,5138},{21,31,3650},{21,31,4313},{19,31,2142},{19,31,2142},{16,31,5},{14,31,3981},{9,31,425},{27,31,697},{27,31,697},{27,31,697},{26,31,320},{31,23,1201},{24,31,653},{24,31,653},{15,31,1},{30,27,1201},{15,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{21,0,2689}, +{21,0,2689},{21,0,2689},{21,0,2689},{18,31,549},{18,31,549},{18,31,549},{16,31,5},{9,31,425},{9,31,425},{24,31,6097},{24,31,5285},{23,31,4693},{22,31,3473},{23,31,5833},{21,31,3067},{20,31,1988},{17,31,10},{18,31,5571},{11,31,541},{28,31,739},{28,31,595},{27,31,505},{27,31,233},{30,28,1121},{26,31,737},{25,31,461},{18,31,0},{28,30,1121},{18,31,0},{23,31,4693},{23,31,4693},{23,31,4693}, +{22,31,3473},{22,31,3845},{20,31,1988},{20,31,1988},{17,31,10},{15,31,3542},{11,31,541},{27,31,505},{27,31,505},{27,31,505},{27,31,233},{30,26,841},{25,31,461},{25,31,461},{18,31,0},{31,27,841},{18,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{22,0,2689},{22,0,2689},{22,0,2689},{22,0,2689},{19,31,666}, +{19,31,666},{19,31,666},{17,31,10},{11,31,541},{11,31,541},{25,31,5427},{24,31,4757},{24,31,4273},{23,31,3314},{24,31,5002},{22,31,2788},{21,31,1898},{18,31,65},{20,31,4714},{13,31,698},{29,31,489},{28,31,387},{28,31,306},{28,31,162},{30,29,726},{27,31,482},{26,31,305},{20,31,1},{29,30,726},{20,31,1},{24,31,4273},{24,31,4273},{24,31,4273},{23,31,3314},{22,31,3429},{21,31,1898},{21,31,1898}, +{18,31,65},{17,31,3213},{13,31,698},{28,31,306},{28,31,306},{28,31,306},{28,31,162},{31,26,545},{26,31,305},{26,31,305},{20,31,1},{29,29,545},{20,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{23,0,2689},{23,0,2689},{23,0,2689},{23,0,2689},{20,31,832},{20,31,832},{20,31,832},{18,31,65},{13,31,698}, +{13,31,698},{4,31,33740},{0,31,5184},{0,22,420},{0,21,4221},{3,31,45594},{0,29,24105},{0,21,8317},{0,18,24790},{0,21,63990},{0,16,38959},{2,31,9704},{0,30,2866},{0,21,389},{0,19,3229},{14,2,18065},{0,20,13257},{0,17,6153},{0,12,13481},{25,0,18065},{0,12,13481},{0,15,1},{0,15,1},{0,15,1},{0,9,1},{0,8,1105},{0,7,585},{0,7,585},{0,4,596},{0,4,1273},{0,4,740},{0,15,1}, +{0,15,1},{0,15,1},{0,9,1},{4,1,1105},{0,7,585},{0,7,585},{0,4,596},{8,0,1105},{0,4,596},{20,4,9248},{0,30,2866},{0,21,389},{0,19,3229},{20,4,9248},{29,5,9248},{0,19,3229},{0,14,9248},{29,5,9248},{0,14,9248},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{4,31,38380},{1,31,6614},{0,23,245}, +{0,22,3864},{4,31,50747},{0,31,24961},{0,22,8353},{0,19,25735},{0,22,65535},{0,17,41319},{2,31,10152},{0,31,2624},{0,23,229},{0,20,2980},{16,0,19334},{0,20,13769},{0,18,6243},{0,13,14116},{25,1,19334},{0,13,14116},{0,18,0},{0,18,0},{0,18,0},{0,11,1},{0,9,1513},{0,8,772},{0,8,772},{0,5,821},{0,5,1750},{0,4,1028},{0,18,0},{0,18,0},{0,18,0},{0,11,1},{5,1,1513}, +{0,8,772},{0,8,772},{0,5,821},{9,0,1513},{0,5,821},{24,0,9248},{0,31,2624},{0,23,229},{0,20,2980},{24,0,9248},{30,6,9248},{0,20,2980},{0,15,9248},{30,6,9248},{0,15,9248},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{4,31,43788},{1,31,8598},{0,24,126},{0,23,3525},{4,31,56155},{0,31,26241},{0,23,8425}, +{0,20,26793},{0,23,65535},{0,18,43819},{3,31,10706},{0,31,2624},{0,24,122},{0,21,2701},{17,0,20689},{0,22,14385},{0,19,6369},{0,13,14756},{25,2,20689},{0,13,14756},{0,21,1},{0,21,1},{0,21,1},{0,12,4},{0,10,1989},{0,9,1018},{0,9,1018},{0,6,1096},{0,5,2294},{0,5,1334},{0,21,1},{0,21,1},{0,21,1},{0,12,4},{5,2,1985},{0,9,1018},{0,9,1018},{0,6,1096},{9,1,1985}, +{0,6,1096},{25,1,9248},{0,31,2624},{0,24,122},{0,21,2701},{25,1,9248},{31,7,9248},{0,21,2701},{0,16,9250},{31,7,9248},{0,16,9250},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{5,31,49566},{1,31,11350},{0,25,41},{0,24,3109},{4,31,62331},{0,31,28289},{0,24,8585},{0,21,27848},{0,23,65535},{0,19,46459},{4,31,11395}, +{0,31,2880},{0,25,37},{0,22,2440},{17,2,22129},{0,23,15030},{0,20,6509},{0,14,15441},{27,2,22129},{0,14,15441},{0,23,1},{0,23,1},{0,23,1},{0,14,0},{0,12,2525},{0,10,1300},{0,10,1300},{0,6,1384},{0,6,2905},{0,6,1708},{0,23,1},{0,23,1},{0,23,1},{0,14,0},{7,0,2521},{0,10,1300},{0,10,1300},{0,6,1384},{10,1,2521},{0,6,1384},{26,2,9248},{1,31,2866},{0,25,37}, +{0,22,2440},{26,2,9248},{27,11,9248},{0,22,2440},{0,17,9250},{27,11,9248},{0,17,9250},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{5,31,56892},{2,31,15166},{0,26,20},{0,25,2804},{5,31,65535},{0,31,31511},{0,25,8733},{0,22,29095},{0,26,65535},{0,20,49444},{4,31,12385},{1,31,3380},{0,26,4},{0,23,2173},{17,4,23851}, +{0,23,15948},{0,21,6729},{0,15,16274},{29,2,23851},{0,15,16274},{0,26,0},{0,26,0},{0,26,0},{0,16,4},{0,13,3200},{0,11,1665},{0,11,1665},{0,7,1754},{0,7,3691},{0,6,2185},{0,26,0},{0,26,0},{0,26,0},{0,16,4},{2,10,3200},{0,11,1665},{0,11,1665},{0,7,1754},{13,0,3200},{0,7,1754},{24,8,9248},{3,31,3204},{0,26,4},{0,23,2173},{24,8,9248},{30,11,9248},{0,23,2173}, +{0,18,9248},{30,11,9248},{0,18,9248},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{6,31,63870},{2,31,19230},{0,27,45},{0,27,2520},{5,31,65535},{1,31,35016},{0,26,8925},{0,23,30250},{0,28,65535},{0,21,52374},{5,31,13379},{2,31,4026},{0,27,29},{0,24,1901},{18,4,25472},{0,26,16706},{0,22,6963},{0,16,17124},{29,3,25472}, +{0,16,17124},{0,29,1},{0,29,1},{0,29,1},{0,17,1},{0,14,3874},{0,13,2084},{0,13,2084},{0,8,2165},{0,8,4466},{0,7,2627},{0,29,1},{0,29,1},{0,29,1},{0,17,1},{8,1,3872},{0,13,2084},{0,13,2084},{0,8,2165},{6,5,3872},{0,8,2165},{28,4,9248},{5,31,3589},{1,27,4},{0,24,1901},{28,4,9248},{29,13,9248},{0,24,1901},{0,19,9248},{29,13,9248},{0,19,9248},{0,0,0}, +{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{6,31,65535},{2,31,24002},{0,28,109},{0,27,2268},{6,31,65535},{1,31,38780},{0,27,8825},{0,24,30825},{0,28,65535},{0,22,54996},{6,31,14345},{2,31,4766},{1,28,62},{0,26,1697},{18,6,26744},{0,28,17104},{0,23,6957},{0,17,17625},{31,3,26744},{0,17,17625},{0,31,5},{0,31,5},{0,31,5}, +{0,19,5},{0,16,4418},{0,14,2306},{0,14,2306},{0,9,2420},{0,8,5122},{0,8,2997},{0,31,5},{0,31,5},{0,31,5},{0,19,5},{9,1,4418},{0,14,2306},{0,14,2306},{0,9,2420},{16,0,4418},{0,9,2420},{29,5,9248},{8,31,3904},{2,28,1},{0,26,1693},{29,5,9248},{30,14,9248},{0,26,1693},{0,20,9250},{30,14,9248},{0,20,9250},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,1,1}, +{0,1,1},{0,1,1},{0,0,4},{0,0,4},{0,0,4},{7,31,65535},{3,31,29032},{0,29,330},{0,28,2105},{6,31,65535},{2,31,42151},{0,28,7781},{0,25,30108},{0,29,65535},{0,22,56388},{7,31,14819},{3,31,5416},{2,29,62},{0,27,1580},{23,0,26744},{0,29,16547},{0,24,6221},{0,18,17124},{30,5,26744},{0,18,17124},{1,31,84},{1,31,84},{1,31,84},{1,20,72},{0,19,4420},{0,16,2005},{0,16,2005}, +{0,10,2165},{0,9,5389},{0,9,2925},{1,31,20},{1,31,20},{1,31,20},{1,20,8},{10,2,4418},{0,16,2005},{0,16,2005},{0,10,2165},{17,1,4418},{0,10,2165},{30,6,9248},{8,31,4160},{3,29,1},{0,27,1480},{30,6,9248},{31,15,9248},{0,27,1480},{0,21,9250},{31,15,9248},{0,21,9250},{1,0,68},{1,0,68},{1,0,68},{1,0,68},{0,3,1},{0,3,1},{0,3,1},{0,2,0},{0,1,34}, +{0,1,34},{7,31,65535},{3,31,35719},{1,30,717},{0,30,2062},{7,31,65535},{2,31,46660},{0,29,6696},{0,26,29322},{0,31,65535},{0,23,58077},{9,31,15473},{5,31,6173},{3,30,65},{1,28,1601},{23,3,26744},{0,31,15992},{0,26,5346},{0,19,16582},{28,8,26744},{0,19,16582},{2,31,329},{2,31,329},{2,31,329},{1,21,189},{0,22,4420},{0,18,1737},{0,18,1737},{0,11,1898},{0,11,5707},{0,10,2885},{3,31,34}, +{3,31,34},{3,31,34},{2,21,10},{13,0,4418},{0,18,1737},{0,18,1737},{0,11,1898},{20,1,4418},{0,11,1898},{28,12,9248},{10,31,4570},{4,30,4},{0,28,1285},{28,12,9248},{22,22,9248},{0,28,1285},{0,22,9248},{22,22,9248},{0,22,9248},{1,0,185},{1,0,185},{1,0,185},{1,0,185},{0,6,1},{0,6,1},{0,6,1},{0,4,1},{0,3,97},{0,3,97},{7,31,65535},{4,31,40786},{1,31,1122}, +{0,30,2138},{7,31,65535},{2,31,49800},{0,30,5634},{0,27,27967},{0,31,65535},{0,24,58770},{10,31,15531},{6,31,6593},{4,31,61},{2,29,1533},{25,2,26259},{0,31,15284},{0,27,4514},{0,20,15812},{27,10,26259},{0,20,15812},{2,31,633},{2,31,633},{2,31,633},{2,22,381},{0,24,4418},{0,20,1480},{0,20,1480},{0,12,1640},{0,12,6125},{0,11,2891},{4,31,61},{4,31,61},{4,31,61},{3,22,10},{14,1,4418}, +{0,20,1480},{0,20,1480},{0,12,1640},{24,0,4418},{0,12,1640},{30,11,8978},{11,31,4744},{5,31,0},{0,29,1040},{30,11,8978},{31,18,8978},{0,29,1040},{0,23,8980},{31,18,8978},{0,23,8980},{2,0,377},{2,0,377},{2,0,377},{2,0,377},{0,9,0},{0,9,0},{0,9,0},{0,5,4},{0,4,193},{0,4,193},{8,31,65535},{4,31,40898},{1,31,1890},{0,31,2125},{7,31,65535},{3,31,47871},{0,30,4194}, +{0,27,24703},{0,31,65535},{0,24,56130},{11,31,14325},{8,31,6051},{5,31,100},{3,29,1218},{22,9,24371},{0,31,13716},{0,28,3402},{0,21,13989},{29,10,24371},{0,21,13989},{3,31,1058},{3,31,1058},{3,31,1058},{2,24,617},{0,27,4420},{0,22,1280},{0,22,1280},{0,13,1445},{0,14,6509},{0,12,2945},{5,31,100},{5,31,100},{5,31,100},{4,23,5},{16,0,4418},{0,22,1280},{0,22,1280},{0,13,1445},{25,1,4418}, +{0,13,1445},{29,14,7938},{13,31,4225},{7,31,4},{0,29,656},{29,14,7938},{29,20,7938},{0,29,656},{0,23,7956},{29,20,7938},{0,23,7956},{2,0,617},{2,0,617},{2,0,617},{2,0,617},{0,11,4},{0,11,4},{0,11,4},{0,7,0},{0,5,325},{0,5,325},{8,31,65535},{4,31,41266},{1,31,2914},{1,31,2170},{7,31,65535},{3,31,46175},{0,30,3010},{0,27,21695},{0,31,65535},{0,25,53636},{12,31,13140}, +{8,31,5571},{6,31,157},{4,29,932},{24,7,22568},{0,31,12404},{0,28,2474},{0,21,12245},{29,11,22568},{0,21,12245},{4,31,1630},{4,31,1630},{4,31,1630},{3,25,937},{0,29,4420},{0,23,1090},{0,23,1090},{0,14,1268},{0,15,6926},{0,13,3029},{6,31,157},{6,31,157},{6,31,157},{5,24,8},{17,1,4418},{0,23,1090},{0,23,1090},{0,14,1268},{26,2,4418},{0,14,1268},{31,12,6962},{14,31,3709},{8,31,1}, +{0,30,353},{31,12,6962},{27,22,6962},{0,30,353},{0,24,6970},{27,22,6962},{0,24,6970},{3,0,937},{3,0,937},{3,0,937},{3,0,937},{0,14,0},{0,14,0},{0,14,0},{0,8,4},{0,6,493},{0,6,493},{9,31,65535},{5,31,41956},{2,31,4257},{1,31,2512},{7,31,65535},{3,31,44573},{0,30,1984},{0,28,18569},{0,31,65535},{0,25,51026},{13,31,11930},{10,31,5125},{7,31,250},{5,29,701},{27,4,20642}, +{1,31,11209},{0,29,1634},{0,22,10422},{31,11,20642},{0,22,10422},{4,31,2350},{4,31,2350},{4,31,2350},{3,27,1361},{0,31,4450},{0,25,949},{0,25,949},{0,16,1096},{0,16,7397},{0,14,3171},{7,31,250},{7,31,250},{7,31,250},{6,25,10},{17,4,4418},{0,25,949},{0,25,949},{0,16,1096},{29,2,4418},{0,16,1096},{30,15,5941},{15,31,3176},{10,31,0},{0,30,128},{30,15,5941},{30,21,5941},{0,30,128}, +{0,24,5953},{30,21,5941},{0,24,5953},{3,0,1360},{3,0,1360},{3,0,1360},{3,0,1360},{0,17,0},{0,17,0},{0,17,0},{0,10,1},{0,8,697},{0,8,697},{9,31,65535},{5,31,42660},{2,31,5617},{1,31,3088},{8,31,65535},{3,31,43421},{0,31,1250},{0,28,15865},{0,31,65535},{0,25,48978},{13,31,10922},{11,31,4753},{8,31,360},{6,30,509},{28,4,19021},{2,31,10246},{0,30,1088},{0,23,8945},{29,13,19021}, +{0,23,8945},{5,31,3131},{5,31,3131},{5,31,3131},{4,28,1822},{1,31,4580},{0,28,776},{0,28,776},{0,17,925},{0,18,7893},{0,15,3333},{8,31,360},{8,31,360},{8,31,360},{7,26,10},{18,5,4418},{0,28,776},{0,28,776},{0,17,925},{30,3,4418},{0,17,925},{31,15,5101},{17,31,2777},{11,31,9},{0,31,25},{31,15,5101},{30,22,5101},{0,31,25},{0,25,5105},{30,22,5101},{0,25,5105},{4,0,1818}, +{4,0,1818},{4,0,1818},{4,0,1818},{0,20,1},{0,20,1},{0,20,1},{0,12,1},{0,9,925},{0,9,925},{9,31,65535},{5,31,43620},{2,31,7233},{1,31,3920},{8,31,65535},{3,31,42525},{0,31,738},{0,28,13417},{0,31,65535},{0,25,47186},{14,31,9978},{11,31,4449},{10,31,452},{7,30,354},{31,1,17485},{3,31,9369},{0,30,704},{0,24,7570},{31,13,17485},{0,24,7570},{6,31,4058},{6,31,4058},{6,31,4058}, +{4,29,2315},{2,31,4874},{0,29,610},{0,29,610},{0,18,772},{0,20,8427},{0,16,3497},{10,31,452},{10,31,452},{10,31,452},{8,27,5},{20,4,4418},{0,29,610},{0,29,610},{0,18,772},{29,5,4418},{0,18,772},{31,16,4325},{18,31,2357},{13,31,0},{1,31,0},{31,16,4325},{31,22,4325},{1,31,0},{0,25,4337},{31,22,4325},{0,25,4337},{4,0,2314},{4,0,2314},{4,0,2314},{4,0,2314},{0,22,1}, +{0,22,1},{0,22,1},{0,13,4},{0,10,1189},{0,10,1189},{9,31,65535},{5,31,44836},{2,31,9105},{2,31,4905},{8,31,65535},{3,31,41885},{0,31,482},{0,28,11225},{0,31,65535},{0,26,45590},{15,31,9102},{13,31,4161},{11,31,557},{9,30,212},{29,6,16034},{5,31,8602},{0,31,482},{0,24,6242},{31,14,16034},{0,24,6242},{6,31,5066},{6,31,5066},{6,31,5066},{5,30,2907},{2,31,5322},{0,31,482},{0,31,482}, +{0,19,637},{0,20,8939},{0,17,3725},{11,31,557},{11,31,557},{11,31,557},{9,28,8},{24,0,4418},{0,31,482},{0,31,482},{0,19,637},{30,6,4418},{0,19,637},{30,19,3613},{20,31,1940},{15,31,4},{4,31,1},{30,19,3613},{31,23,3613},{4,31,1},{0,26,3617},{31,23,3613},{0,26,3617},{5,0,2906},{5,0,2906},{5,0,2906},{5,0,2906},{0,25,1},{0,25,1},{0,25,1},{0,15,0},{0,11,1489}, +{0,11,1489},{9,31,65535},{5,31,46510},{3,31,11362},{2,31,6237},{9,31,65535},{3,31,41471},{0,31,500},{0,29,8976},{0,31,65535},{0,26,43934},{16,31,8139},{14,31,3853},{12,31,680},{10,30,109},{26,13,14504},{8,31,7667},{0,31,500},{0,25,4979},{21,21,14504},{0,25,4979},{7,31,6337},{7,31,6337},{7,31,6337},{5,31,3642},{3,31,5962},{0,31,500},{0,31,500},{0,20,520},{0,22,9629},{0,18,4035},{12,31,680}, +{12,31,680},{12,31,680},{10,29,10},{24,3,4418},{0,31,500},{0,31,500},{0,20,520},{28,9,4418},{0,20,520},{31,19,2888},{20,31,1517},{16,31,1},{7,31,1},{31,19,2888},{27,26,2888},{7,31,1},{0,26,2906},{27,26,2888},{0,26,2906},{5,0,3617},{5,0,3617},{5,0,3617},{5,0,3617},{0,28,1},{0,28,1},{0,28,1},{0,17,4},{0,11,1930},{0,11,1930},{10,31,65535},{6,31,48082},{3,31,13570}, +{2,31,7693},{9,31,65535},{3,31,41375},{0,31,788},{0,29,7120},{0,31,65535},{0,26,42734},{17,31,7409},{15,31,3625},{13,31,821},{11,31,45},{28,11,13235},{8,31,6899},{2,31,628},{0,25,3987},{31,16,13235},{0,25,3987},{7,31,7681},{7,31,7681},{7,31,7681},{6,31,4437},{4,31,6659},{1,31,738},{1,31,738},{0,21,421},{0,23,10286},{0,20,4305},{13,31,821},{13,31,821},{13,31,821},{11,30,10},{22,9,4418}, +{2,31,628},{2,31,628},{0,21,421},{29,10,4418},{0,21,421},{31,20,2312},{21,31,1217},{18,31,1},{9,31,0},{31,20,2312},{30,25,2312},{9,31,0},{0,27,2314},{30,25,2312},{0,27,2314},{6,0,4337},{6,0,4337},{6,0,4337},{6,0,4337},{0,30,1},{0,30,1},{0,30,1},{0,18,1},{0,13,2329},{0,13,2329},{10,31,65535},{6,31,49890},{3,31,16034},{2,31,9405},{9,31,65535},{4,31,41526},{0,31,1332}, +{0,29,5520},{0,31,65535},{0,26,41790},{18,31,6747},{16,31,3459},{14,31,980},{12,31,5},{31,8,12051},{10,31,6275},{4,31,801},{0,26,3066},{28,19,12051},{0,26,3066},{8,31,9062},{8,31,9062},{8,31,9062},{7,31,5410},{4,31,7555},{1,31,1154},{1,31,1154},{0,23,325},{0,23,11118},{0,20,4625},{14,31,980},{14,31,980},{14,31,980},{12,31,5},{24,8,4418},{4,31,801},{4,31,801},{0,23,325},{30,11,4418}, +{0,23,325},{30,23,1800},{23,31,949},{19,31,4},{12,31,1},{30,23,1800},{28,27,1800},{12,31,1},{0,27,1818},{28,27,1800},{0,27,1818},{6,0,5105},{6,0,5105},{6,0,5105},{6,0,5105},{0,31,36},{0,31,36},{0,31,36},{0,20,4},{0,15,2741},{0,15,2741},{10,31,65535},{6,31,51954},{3,31,18754},{3,31,11330},{9,31,65535},{4,31,41798},{1,31,2082},{0,29,4176},{0,31,65535},{0,27,41092},{19,31,6153}, +{17,31,3297},{16,31,1154},{13,31,20},{29,13,10952},{11,31,5708},{6,31,965},{0,27,2291},{28,20,10952},{0,27,2291},{9,31,10545},{9,31,10545},{9,31,10545},{7,31,6482},{5,31,8549},{2,31,1716},{2,31,1716},{0,24,221},{0,26,11876},{0,22,4989},{16,31,1154},{16,31,1154},{16,31,1154},{13,31,20},{28,4,4418},{6,31,965},{6,31,965},{0,24,221},{29,13,4418},{0,24,221},{29,26,1352},{23,31,725},{21,31,0}, +{14,31,1},{29,26,1352},{31,26,1352},{14,31,1},{0,28,1360},{31,26,1352},{0,28,1360},{7,0,5953},{7,0,5953},{7,0,5953},{7,0,5953},{1,31,145},{1,31,145},{1,31,145},{0,21,1},{0,16,3130},{0,16,3130},{10,31,65535},{6,31,54582},{4,31,21886},{3,31,13652},{9,31,65535},{4,31,42410},{1,31,3144},{0,30,2841},{0,31,65535},{0,27,40390},{19,31,5649},{18,31,3157},{17,31,1325},{15,31,74},{29,15,9818}, +{13,31,5241},{8,31,1108},{0,27,1589},{30,20,9818},{0,27,1589},{10,31,12376},{10,31,12376},{10,31,12376},{8,31,7844},{6,31,9861},{3,31,2576},{3,31,2576},{0,25,136},{0,28,12696},{0,23,5429},{17,31,1325},{17,31,1325},{17,31,1325},{15,31,74},{31,2,4418},{8,31,1108},{8,31,1108},{0,25,136},{27,16,4418},{0,25,136},{31,24,925},{25,31,505},{23,31,1},{17,31,1},{31,24,925},{31,27,925},{17,31,1}, +{0,28,937},{31,27,925},{0,28,937},{7,0,6970},{7,0,6970},{7,0,6970},{7,0,6970},{1,31,388},{1,31,388},{1,31,388},{0,23,0},{0,17,3665},{0,17,3665},{10,31,65535},{7,31,57052},{4,31,24910},{3,31,15988},{9,31,65535},{4,31,43226},{1,31,4360},{0,30,1833},{0,31,65535},{0,27,40038},{21,31,5202},{19,31,3073},{18,31,1508},{16,31,180},{30,15,8901},{14,31,4814},{10,31,1300},{0,28,1021},{30,21,8901}, +{0,28,1021},{10,31,14136},{10,31,14136},{10,31,14136},{8,31,9252},{7,31,11195},{3,31,3536},{3,31,3536},{0,26,85},{0,29,13491},{0,23,5925},{18,31,1508},{18,31,1508},{18,31,1508},{16,31,180},{26,13,4418},{10,31,1300},{10,31,1300},{0,26,85},{21,21,4418},{0,26,85},{30,27,613},{26,31,337},{24,31,1},{20,31,1},{30,27,613},{30,28,613},{20,31,1},{0,29,617},{30,28,613},{0,29,617},{8,0,7956}, +{8,0,7956},{8,0,7956},{8,0,7956},{2,31,697},{2,31,697},{2,31,697},{0,25,4},{0,18,4181},{0,18,4181},{11,31,65535},{7,31,59708},{4,31,28190},{3,31,18580},{10,31,65535},{5,31,44295},{1,31,5832},{0,30,1081},{0,31,65535},{0,27,39942},{22,31,4818},{20,31,3017},{19,31,1709},{17,31,325},{30,17,8069},{15,31,4473},{11,31,1514},{0,29,602},{25,25,8069},{0,29,602},{11,31,15965},{11,31,15965},{11,31,15965}, +{9,31,10757},{7,31,12667},{4,31,4662},{4,31,4662},{0,27,52},{0,30,14340},{0,25,6449},{19,31,1709},{19,31,1709},{19,31,1709},{17,31,325},{28,12,4418},{11,31,1514},{11,31,1514},{0,27,52},{22,22,4418},{0,27,52},{31,27,365},{27,31,205},{26,31,1},{22,31,1},{31,27,365},{30,29,365},{22,31,1},{0,29,377},{30,29,365},{0,29,377},{8,0,8980},{8,0,8980},{8,0,8980},{8,0,8980},{2,31,1097}, +{2,31,1097},{2,31,1097},{0,26,1},{0,20,4682},{0,20,4682},{11,31,65535},{8,31,58981},{5,31,29551},{4,31,19751},{10,31,65535},{5,31,43215},{2,31,6910},{1,30,686},{0,31,65535},{0,28,34909},{23,31,4502},{21,31,3011},{20,31,1973},{18,31,520},{31,17,7322},{17,31,4242},{13,31,1769},{0,29,314},{25,26,7322},{0,29,314},{12,31,16739},{12,31,16739},{12,31,16739},{10,31,11492},{8,31,13636},{5,31,5510},{5,31,5510}, +{1,28,54},{0,31,14139},{0,26,6041},{20,31,1973},{20,31,1973},{20,31,1973},{18,31,520},{29,13,4418},{13,31,1769},{13,31,1769},{0,28,29},{28,20,4418},{0,28,29},{31,28,181},{28,31,97},{27,31,4},{25,31,0},{31,28,181},{31,29,181},{25,31,0},{0,30,185},{31,29,181},{0,30,185},{9,0,9248},{9,0,9248},{9,0,9248},{9,0,9248},{3,31,1348},{3,31,1348},{3,31,1348},{1,27,5},{0,21,4520}, +{0,21,4520},{12,31,65535},{9,31,57270},{6,31,30345},{5,31,20521},{11,31,65535},{6,31,41449},{3,31,8015},{2,31,301},{0,31,65535},{0,28,28330},{24,31,4181},{22,31,3053},{21,31,2248},{20,31,772},{31,19,6584},{20,31,3941},{15,31,2041},{0,30,77},{27,26,6584},{0,30,77},{13,31,17289},{13,31,17289},{13,31,17289},{11,31,12050},{10,31,14315},{7,31,6389},{7,31,6389},{2,29,53},{0,31,13860},{0,28,5286},{21,31,2248}, +{21,31,2248},{21,31,2248},{20,31,772},{29,16,4418},{15,31,2041},{15,31,2041},{0,30,13},{31,20,4418},{0,30,13},{30,31,50},{30,31,34},{29,31,0},{28,31,1},{30,31,50},{31,30,50},{28,31,1},{0,30,68},{31,30,50},{0,30,68},{10,0,9250},{10,0,9250},{10,0,9250},{10,0,9250},{4,31,1549},{4,31,1549},{4,31,1549},{2,28,2},{0,23,4114},{0,23,4114},{13,31,65535},{9,31,55894},{7,31,31068}, +{6,31,21256},{12,31,65535},{8,31,39740},{4,31,9073},{3,31,90},{0,31,65535},{0,29,23356},{24,31,3973},{23,31,3125},{23,31,2500},{21,31,1037},{30,22,6019},{20,31,3701},{17,31,2340},{0,31,4},{27,27,6019},{0,31,4},{14,31,17796},{14,31,17796},{14,31,17796},{12,31,12625},{11,31,14957},{8,31,7139},{8,31,7139},{3,30,53},{0,31,14020},{0,29,4652},{23,31,2500},{23,31,2500},{23,31,2500},{21,31,1037},{30,17,4418}, +{17,31,2340},{17,31,2340},{0,31,4},{25,25,4418},{0,31,4},{31,31,4},{31,31,4},{31,31,4},{30,31,1},{31,31,4},{31,31,4},{30,31,1},{0,31,4},{31,31,4},{0,31,4},{11,0,9250},{11,0,9250},{11,0,9250},{11,0,9250},{6,31,1765},{6,31,1765},{6,31,1765},{3,29,2},{0,25,3877},{0,25,3877},{13,31,65535},{10,31,53236},{8,31,30487},{7,31,21105},{13,31,65535},{8,31,37332},{5,31,9177}, +{4,31,36},{1,31,65535},{0,29,18680},{25,31,3443},{24,31,2741},{23,31,2248},{22,31,980},{29,25,5163},{21,31,3218},{20,31,2117},{3,31,1},{30,26,5163},{3,31,1},{15,31,17289},{15,31,17289},{15,31,17289},{13,31,12512},{12,31,14328},{9,31,7149},{9,31,7149},{4,31,20},{0,31,13376},{0,29,3944},{23,31,2248},{23,31,2248},{23,31,2248},{22,31,980},{31,17,3872},{20,31,2117},{20,31,2117},{3,31,1},{25,26,3872}, +{3,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{12,0,9248},{12,0,9248},{12,0,9248},{12,0,9248},{7,31,1972},{7,31,1972},{7,31,1972},{4,30,5},{0,27,3545},{0,27,3545},{14,31,65535},{11,31,50266},{9,31,29322},{8,31,20567},{13,31,65535},{9,31,35025},{6,31,8985},{5,31,21},{2,31,65535},{0,29,14712},{26,31,2873}, +{25,31,2283},{24,31,1825},{22,31,820},{30,24,4267},{22,31,2657},{20,31,1685},{5,31,1},{29,27,4267},{5,31,1},{16,31,16427},{16,31,16427},{16,31,16427},{14,31,12185},{13,31,13442},{10,31,6915},{10,31,6915},{5,31,5},{1,31,12539},{0,30,3314},{24,31,1825},{24,31,1825},{24,31,1825},{22,31,820},{28,23,3200},{20,31,1685},{20,31,1685},{5,31,1},{28,25,3200},{5,31,1},{31,31,0},{31,31,0},{31,31,0}, +{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{13,0,9248},{13,0,9248},{13,0,9248},{13,0,9248},{8,31,2250},{8,31,2250},{8,31,2250},{5,31,5},{0,28,3170},{0,28,3170},{15,31,65535},{12,31,47239},{10,31,28065},{9,31,20104},{14,31,65535},{10,31,32574},{7,31,8839},{6,31,54},{3,31,64890},{0,30,10964},{26,31,2252},{25,31,1806},{25,31,1445},{23,31,650},{30,25,3361}, +{23,31,2091},{21,31,1322},{8,31,0},{30,27,3361},{8,31,0},{17,31,15584},{17,31,15584},{17,31,15584},{15,31,11846},{14,31,12522},{11,31,6697},{11,31,6697},{6,31,50},{3,31,11669},{0,31,2834},{25,31,1445},{25,31,1445},{25,31,1445},{23,31,650},{30,21,2521},{21,31,1322},{21,31,1322},{8,31,0},{31,24,2521},{8,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0}, +{0,31,0},{31,31,0},{0,31,0},{14,0,9250},{14,0,9250},{14,0,9250},{14,0,9250},{9,31,2525},{9,31,2525},{9,31,2525},{6,31,50},{0,31,2834},{0,31,2834},{16,31,65535},{13,31,44559},{11,31,27000},{10,31,19705},{15,31,64179},{11,31,30525},{8,31,8677},{7,31,149},{3,31,60570},{0,30,8308},{27,31,1782},{26,31,1416},{25,31,1157},{24,31,520},{30,26,2646},{23,31,1691},{22,31,1040},{11,31,1},{31,27,2646}, +{11,31,1},{18,31,14889},{18,31,14889},{18,31,14889},{16,31,11585},{15,31,11778},{12,31,6555},{12,31,6555},{7,31,145},{3,31,11061},{0,31,2610},{25,31,1157},{25,31,1157},{25,31,1157},{24,31,520},{31,21,1985},{22,31,1040},{22,31,1040},{11,31,1},{31,25,1985},{11,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{15,0,9250}, +{15,0,9250},{15,0,9250},{15,0,9250},{10,31,2792},{10,31,2792},{10,31,2792},{7,31,145},{0,31,2610},{0,31,2610},{16,31,63318},{14,31,42019},{12,31,25930},{11,31,19324},{16,31,59178},{11,31,28845},{9,31,8605},{8,31,276},{6,31,56253},{0,30,6420},{27,31,1366},{27,31,1094},{26,31,872},{25,31,397},{30,27,2017},{25,31,1298},{23,31,794},{13,31,1},{30,28,2017},{13,31,1},{19,31,14244},{19,31,14244},{19,31,14244}, +{17,31,11312},{16,31,11037},{13,31,6429},{13,31,6429},{8,31,260},{6,31,10457},{0,31,2642},{26,31,872},{26,31,872},{26,31,872},{25,31,397},{31,22,1513},{23,31,794},{23,31,794},{13,31,1},{29,27,1513},{13,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{16,0,9248},{16,0,9248},{16,0,9248},{16,0,9248},{12,31,3074}, +{12,31,3074},{12,31,3074},{8,31,260},{0,31,2642},{0,31,2642},{17,31,58848},{15,31,39619},{13,31,24975},{12,31,19007},{16,31,54474},{13,31,27057},{10,31,8569},{9,31,461},{8,31,51302},{0,31,5046},{28,31,979},{27,31,806},{27,31,637},{26,31,292},{31,26,1473},{26,31,953},{24,31,605},{16,31,0},{29,29,1473},{16,31,0},{19,31,13604},{19,31,13604},{19,31,13604},{18,31,11057},{16,31,10429},{14,31,6339},{14,31,6339}, +{10,31,424},{8,31,9713},{1,31,2900},{27,31,637},{27,31,637},{27,31,637},{26,31,292},{30,25,1105},{24,31,605},{24,31,605},{16,31,0},{30,27,1105},{16,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{17,0,9248},{17,0,9248},{17,0,9248},{17,0,9248},{12,31,3330},{12,31,3330},{12,31,3330},{10,31,424},{1,31,2900}, +{1,31,2900}, diff --git a/thirdparty/basisu/transcoder/basisu_transcoder_tables_atc_56.inc b/thirdparty/basisu/transcoder/basisu_transcoder_tables_atc_56.inc new file mode 100644 index 000000000..2b56c0944 --- /dev/null +++ b/thirdparty/basisu/transcoder/basisu_transcoder_tables_atc_56.inc @@ -0,0 +1,481 @@ +{0,3,20},{0,3,5},{0,2,1},{0,2,9},{0,2,35},{0,2,27},{0,1,17},{0,1,24},{0,1,41},{0,1,25},{0,3,20},{0,3,5},{0,2,1},{0,2,9},{0,2,35},{0,2,27},{0,1,17},{0,1,24},{1,0,35},{0,1,24},{0,1,1},{0,1,1},{0,1,1},{0,1,0},{0,1,2},{0,1,1},{0,1,1},{0,0,4},{0,0,4},{0,0,4},{0,1,1}, +{0,1,1},{0,1,1},{0,1,0},{0,1,2},{0,1,1},{0,1,1},{0,0,4},{0,0,4},{0,0,4},{1,0,18},{0,3,5},{0,2,1},{0,2,9},{1,0,18},{1,1,18},{0,2,9},{0,1,20},{1,1,18},{0,1,20},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,9,54},{0,7,37},{0,4,52}, +{0,4,36},{0,7,52},{0,5,21},{0,4,0},{0,3,21},{0,4,88},{0,3,37},{1,5,24},{1,5,9},{1,4,5},{1,4,13},{2,1,51},{0,5,21},{0,4,0},{0,3,21},{3,1,51},{0,3,21},{0,7,36},{0,7,36},{0,7,36},{0,4,36},{0,5,10},{0,4,0},{0,4,0},{0,2,5},{0,3,26},{0,2,14},{1,3,5},{1,3,5},{1,3,5},{1,3,4},{1,2,8}, +{0,4,0},{0,4,0},{0,2,5},{2,1,8},{0,2,5},{2,2,18},{0,7,1},{1,4,1},{0,4,0},{2,2,18},{2,3,18},{0,4,0},{0,3,20},{2,3,18},{0,3,20},{0,0,36},{0,0,36},{0,0,36},{0,0,36},{0,4,0},{0,4,0},{0,4,0},{0,2,1},{0,2,10},{0,2,10},{1,11,54},{1,9,37},{1,6,52},{1,6,36},{1,9,52},{1,7,21},{1,6,0}, +{1,5,21},{0,7,63},{0,5,25},{2,7,24},{2,7,9},{2,6,5},{2,6,13},{3,3,51},{1,7,21},{1,6,0},{1,5,21},{4,3,51},{1,5,21},{1,9,36},{1,9,36},{1,9,36},{1,6,36},{1,7,10},{1,6,0},{1,6,0},{1,4,5},{0,6,11},{0,5,9},{2,5,5},{2,5,5},{2,5,5},{2,5,4},{3,1,8},{1,6,0},{1,6,0},{1,4,5},{3,3,8}, +{1,4,5},{3,4,18},{1,9,1},{2,6,1},{1,6,0},{3,4,18},{7,0,18},{1,6,0},{0,5,20},{7,0,18},{0,5,20},{1,0,36},{1,0,36},{1,0,36},{1,0,36},{1,6,0},{1,6,0},{1,6,0},{1,4,1},{0,6,2},{0,6,2},{2,13,54},{2,11,37},{2,8,52},{2,8,36},{2,11,52},{2,9,21},{2,8,0},{2,7,21},{0,11,51},{1,7,25},{3,9,24}, +{3,9,9},{3,8,5},{3,8,13},{5,1,51},{2,9,21},{2,8,0},{2,7,21},{9,0,51},{2,7,21},{2,11,36},{2,11,36},{2,11,36},{2,8,36},{2,9,10},{2,8,0},{2,8,0},{2,6,5},{1,8,11},{1,7,9},{3,7,5},{3,7,5},{3,7,5},{3,7,4},{3,6,8},{2,8,0},{2,8,0},{2,6,5},{8,0,8},{2,6,5},{4,6,18},{2,11,1},{3,8,1}, +{2,8,0},{4,6,18},{8,2,18},{2,8,0},{0,7,20},{8,2,18},{0,7,20},{2,0,36},{2,0,36},{2,0,36},{2,0,36},{2,8,0},{2,8,0},{2,8,0},{2,6,1},{1,8,2},{1,8,2},{3,15,70},{3,13,51},{4,10,69},{3,10,52},{3,14,52},{3,12,25},{3,10,4},{3,9,27},{1,13,53},{2,10,26},{4,12,22},{4,11,12},{4,10,5},{4,10,9},{6,4,51}, +{2,13,22},{3,10,3},{3,9,26},{11,1,51},{3,9,26},{3,13,51},{3,13,51},{3,13,51},{3,10,51},{3,12,9},{3,11,2},{3,11,2},{3,9,2},{2,11,9},{3,9,10},{4,10,4},{4,10,4},{4,10,4},{4,9,5},{6,2,8},{3,11,1},{3,11,1},{3,9,1},{11,0,8},{3,9,1},{7,2,18},{3,13,1},{4,10,1},{3,10,2},{7,2,18},{11,2,18},{3,10,2}, +{0,9,26},{11,2,18},{0,9,26},{3,0,50},{3,0,50},{3,0,50},{3,0,50},{3,11,1},{3,11,1},{3,11,1},{3,9,1},{2,10,0},{2,10,0},{4,17,54},{4,15,36},{4,13,54},{4,12,38},{4,15,52},{4,14,24},{4,12,3},{4,11,26},{2,15,53},{3,12,26},{5,14,22},{5,13,12},{5,12,5},{5,12,9},{8,2,51},{3,15,22},{4,12,3},{4,11,26},{12,3,51}, +{4,11,26},{4,15,36},{4,15,36},{4,15,36},{4,12,37},{4,14,8},{4,12,2},{4,12,2},{4,11,1},{3,13,9},{3,11,14},{5,12,4},{5,12,4},{5,12,4},{5,11,5},{8,0,8},{4,12,2},{4,12,2},{4,11,1},{11,3,8},{4,11,1},{9,0,18},{4,15,0},{5,12,1},{4,12,2},{9,0,18},{15,0,18},{4,12,2},{0,11,26},{15,0,18},{0,11,26},{4,0,36}, +{4,0,36},{4,0,36},{4,0,36},{4,12,1},{4,12,1},{4,12,1},{4,11,0},{3,12,0},{3,12,0},{5,19,54},{5,17,37},{5,15,54},{5,14,38},{5,17,52},{5,15,27},{5,14,3},{5,13,26},{3,17,52},{4,14,26},{6,15,24},{6,15,12},{6,14,5},{6,14,9},{10,1,51},{4,17,21},{5,14,3},{5,13,26},{17,0,51},{5,13,26},{5,17,36},{5,17,36},{5,17,36}, +{5,14,37},{5,15,10},{5,14,2},{5,14,2},{5,13,1},{4,15,12},{4,13,11},{6,14,4},{6,14,4},{6,14,4},{6,13,5},{9,2,8},{5,14,2},{5,14,2},{5,13,1},{16,0,8},{5,13,1},{10,2,18},{5,17,1},{6,14,1},{5,14,2},{10,2,18},{16,2,18},{5,14,2},{0,13,26},{16,2,18},{0,13,26},{5,0,36},{5,0,36},{5,0,36},{5,0,36},{5,14,1}, +{5,14,1},{5,14,1},{5,13,0},{4,14,0},{4,14,0},{6,21,54},{6,19,37},{6,16,52},{6,16,36},{6,19,52},{6,17,21},{6,16,0},{6,15,26},{4,19,51},{5,15,36},{7,17,24},{7,17,9},{7,16,5},{7,16,13},{11,3,51},{6,17,21},{6,16,0},{6,15,26},{18,2,51},{6,15,26},{6,19,36},{6,19,36},{6,19,36},{6,16,36},{6,17,10},{6,16,0},{6,16,0}, +{6,15,1},{5,16,11},{5,15,11},{7,15,5},{7,15,5},{7,15,5},{7,15,5},{11,1,8},{6,16,0},{6,16,0},{6,15,1},{17,2,8},{6,15,1},{11,4,18},{6,19,1},{7,16,1},{6,16,0},{11,4,18},{17,4,18},{6,16,0},{0,15,26},{17,4,18},{0,15,26},{6,0,36},{6,0,36},{6,0,36},{6,0,36},{6,16,0},{6,16,0},{6,16,0},{6,15,0},{5,16,2}, +{5,16,2},{7,23,70},{7,21,51},{8,18,69},{7,18,52},{7,22,52},{7,20,25},{7,18,4},{7,17,27},{5,21,53},{6,18,26},{8,20,22},{8,19,12},{8,18,5},{8,18,9},{13,2,51},{6,21,22},{7,18,3},{7,17,26},{21,2,51},{7,17,26},{7,21,51},{7,21,51},{7,21,51},{7,18,51},{7,20,9},{7,19,2},{7,19,2},{7,17,2},{6,19,9},{7,17,10},{8,18,4}, +{8,18,4},{8,18,4},{8,17,5},{13,0,8},{7,19,1},{7,19,1},{7,17,1},{20,2,8},{7,17,1},{14,0,18},{7,21,1},{8,18,1},{7,18,2},{14,0,18},{20,4,18},{7,18,2},{0,17,26},{20,4,18},{0,17,26},{7,0,50},{7,0,50},{7,0,50},{7,0,50},{7,19,1},{7,19,1},{7,19,1},{7,17,1},{6,18,0},{6,18,0},{8,25,54},{8,23,36},{8,21,54}, +{8,20,38},{8,24,51},{8,22,24},{8,20,3},{8,19,26},{6,23,53},{7,20,26},{9,22,22},{9,21,12},{9,20,5},{9,20,9},{14,4,51},{7,23,22},{8,20,3},{8,19,26},{25,0,51},{8,19,26},{8,23,36},{8,23,36},{8,23,36},{8,20,37},{8,22,8},{8,20,2},{8,20,2},{8,19,1},{7,21,9},{7,19,14},{9,20,4},{9,20,4},{9,20,4},{9,19,5},{14,2,8}, +{8,20,2},{8,20,2},{8,19,1},{24,0,8},{8,19,1},{15,2,18},{8,23,0},{9,20,1},{8,20,2},{15,2,18},{25,1,18},{8,20,2},{0,19,26},{25,1,18},{0,19,26},{8,0,36},{8,0,36},{8,0,36},{8,0,36},{8,20,1},{8,20,1},{8,20,1},{8,19,0},{7,20,0},{7,20,0},{9,27,54},{9,25,36},{9,23,54},{9,22,38},{9,26,51},{9,24,24},{9,22,3}, +{9,21,26},{7,25,53},{8,22,26},{10,24,22},{10,23,12},{10,22,5},{10,22,9},{16,2,51},{8,25,19},{9,22,3},{9,21,26},{27,1,51},{9,21,26},{9,25,36},{9,25,36},{9,25,36},{9,22,37},{9,24,8},{9,22,2},{9,22,2},{9,21,1},{8,23,12},{8,21,11},{10,22,4},{10,22,4},{10,22,4},{10,21,5},{16,0,8},{9,22,2},{9,22,2},{9,21,1},{26,1,8}, +{9,21,1},{17,0,18},{9,25,0},{10,22,1},{9,22,2},{17,0,18},{26,3,18},{9,22,2},{0,21,26},{26,3,18},{0,21,26},{9,0,36},{9,0,36},{9,0,36},{9,0,36},{9,22,1},{9,22,1},{9,22,1},{9,21,0},{8,22,0},{8,22,0},{10,29,54},{10,27,36},{10,25,54},{10,24,38},{10,28,51},{10,26,24},{10,24,3},{10,23,26},{8,27,52},{9,24,26},{11,26,22}, +{11,25,12},{11,24,5},{11,24,9},{17,4,51},{9,27,19},{10,24,3},{10,23,26},{27,4,51},{10,23,26},{10,27,36},{10,27,36},{10,27,36},{10,24,37},{10,26,8},{10,24,2},{10,24,2},{10,23,1},{9,25,12},{9,23,11},{11,24,4},{11,24,4},{11,24,4},{11,23,5},{17,2,8},{10,24,2},{10,24,2},{10,23,1},{27,3,8},{10,23,1},{18,2,18},{10,27,0},{11,24,1}, +{10,24,2},{18,2,18},{31,0,18},{10,24,2},{0,23,26},{31,0,18},{0,23,26},{10,0,36},{10,0,36},{10,0,36},{10,0,36},{10,24,1},{10,24,1},{10,24,1},{10,23,0},{9,24,0},{9,24,0},{11,31,70},{11,29,52},{11,27,70},{11,27,54},{11,30,53},{11,28,20},{11,27,5},{11,26,25},{10,28,56},{10,26,22},{12,28,22},{12,27,9},{12,26,8},{12,26,9},{19,3,51}, +{11,28,19},{11,27,4},{10,26,21},{30,4,51},{10,26,21},{11,30,50},{11,30,50},{11,30,50},{11,27,50},{11,28,11},{11,27,1},{11,27,1},{11,25,2},{10,27,11},{11,25,10},{12,26,4},{12,26,4},{12,26,4},{12,25,5},{19,1,8},{11,27,0},{11,27,0},{11,25,1},{29,4,8},{11,25,1},{20,1,18},{11,29,2},{12,26,4},{11,27,4},{20,1,18},{29,6,18},{11,27,4}, +{0,26,20},{29,6,18},{0,26,20},{11,0,50},{11,0,50},{11,0,50},{11,0,50},{11,27,1},{11,27,1},{11,27,1},{11,25,2},{10,26,2},{10,26,2},{12,33,54},{12,31,37},{12,29,56},{12,29,41},{12,32,51},{12,30,22},{12,29,5},{12,28,24},{11,30,56},{11,28,22},{13,30,22},{13,29,9},{13,28,8},{13,28,9},{20,5,51},{12,30,22},{12,29,5},{11,28,21},{31,6,51}, +{11,28,21},{12,31,37},{12,31,37},{12,31,37},{12,29,37},{12,30,8},{12,29,1},{12,29,1},{12,27,0},{11,29,11},{12,27,9},{13,28,4},{13,28,4},{13,28,4},{13,27,5},{20,3,8},{12,29,1},{12,29,1},{12,27,0},{30,6,8},{12,27,0},{21,3,18},{12,31,1},{13,28,4},{13,28,5},{21,3,18},{30,8,18},{13,28,5},{0,28,20},{30,8,18},{0,28,20},{12,0,36}, +{12,0,36},{12,0,36},{12,0,36},{12,29,0},{12,29,0},{12,29,0},{12,27,0},{11,28,2},{11,28,2},{13,35,54},{13,33,36},{13,31,56},{13,31,41},{13,34,51},{13,32,24},{13,31,5},{13,30,24},{11,33,53},{12,30,21},{14,32,22},{14,31,9},{14,30,8},{14,30,9},{23,0,51},{12,33,19},{13,31,5},{12,30,21},{31,9,51},{12,30,21},{13,33,36},{13,33,36},{13,33,36}, +{13,31,37},{13,32,8},{13,31,1},{13,31,1},{13,29,0},{12,31,9},{13,29,9},{14,30,4},{14,30,4},{14,30,4},{14,29,5},{21,5,8},{13,31,1},{13,31,1},{13,29,0},{31,8,8},{13,29,0},{22,5,18},{13,33,0},{14,30,4},{14,30,5},{22,5,18},{31,10,18},{14,30,5},{0,30,20},{31,10,18},{0,30,20},{13,0,36},{13,0,36},{13,0,36},{13,0,36},{13,31,0}, +{13,31,0},{13,31,0},{13,29,0},{12,30,1},{12,30,1},{14,37,54},{14,35,36},{14,33,54},{14,32,38},{14,36,51},{14,34,24},{14,32,3},{14,32,35},{12,35,52},{13,32,26},{15,34,22},{15,33,12},{15,32,5},{15,32,9},{24,2,51},{13,35,19},{14,32,3},{13,32,26},{27,17,51},{13,32,26},{14,35,36},{14,35,36},{14,35,36},{14,32,37},{14,34,8},{14,32,2},{14,32,2}, +{14,31,0},{13,33,12},{14,31,9},{15,32,4},{15,32,4},{15,32,4},{15,31,5},{24,0,8},{14,32,2},{14,32,2},{14,31,0},{31,11,8},{14,31,0},{25,0,18},{14,35,0},{15,32,1},{14,32,2},{25,0,18},{31,13,18},{14,32,2},{0,32,26},{31,13,18},{0,32,26},{14,0,36},{14,0,36},{14,0,36},{14,0,36},{14,32,1},{14,32,1},{14,32,1},{14,31,0},{13,32,0}, +{13,32,0},{15,40,68},{15,37,52},{15,35,70},{15,35,54},{15,38,53},{15,36,20},{15,35,5},{15,34,25},{14,36,56},{14,34,22},{16,36,22},{16,35,9},{16,34,8},{16,34,9},{26,1,51},{15,36,19},{15,35,4},{14,34,21},{30,17,51},{14,34,21},{15,38,50},{15,38,50},{15,38,50},{15,35,50},{15,36,11},{15,35,1},{15,35,1},{15,33,2},{14,35,11},{15,33,10},{16,34,4}, +{16,34,4},{16,34,4},{16,33,5},{24,6,8},{15,35,0},{15,35,0},{15,33,1},{29,17,8},{15,33,1},{25,6,18},{15,37,2},{16,34,4},{15,35,4},{25,6,18},{29,19,18},{15,35,4},{0,34,20},{29,19,18},{0,34,20},{15,0,50},{15,0,50},{15,0,50},{15,0,50},{15,35,1},{15,35,1},{15,35,1},{15,33,2},{14,34,2},{14,34,2},{16,41,56},{16,39,37},{16,37,56}, +{16,37,41},{16,40,51},{16,38,22},{16,37,5},{16,36,24},{15,38,56},{15,36,22},{17,38,22},{17,37,9},{17,36,8},{17,36,9},{27,3,51},{16,38,22},{16,37,5},{15,36,21},{31,19,51},{15,36,21},{16,39,37},{16,39,37},{16,39,37},{16,37,37},{16,38,8},{16,37,1},{16,37,1},{16,35,0},{15,37,11},{16,35,9},{17,36,4},{17,36,4},{17,36,4},{17,35,5},{27,1,8}, +{16,37,1},{16,37,1},{16,35,0},{30,19,8},{16,35,0},{28,1,18},{16,39,1},{17,36,4},{17,36,5},{28,1,18},{30,21,18},{17,36,5},{0,36,20},{30,21,18},{0,36,20},{16,0,36},{16,0,36},{16,0,36},{16,0,36},{16,37,0},{16,37,0},{16,37,0},{16,35,0},{15,36,2},{15,36,2},{17,43,56},{17,41,37},{17,39,56},{17,39,41},{17,42,51},{17,40,22},{17,39,5}, +{17,38,24},{15,42,56},{16,38,21},{18,40,22},{18,39,9},{18,38,8},{18,38,9},{28,5,51},{17,40,22},{17,39,5},{16,38,21},{31,22,51},{16,38,21},{17,41,37},{17,41,37},{17,41,37},{17,39,37},{17,40,8},{17,39,1},{17,39,1},{17,37,0},{16,39,9},{17,37,9},{18,38,4},{18,38,4},{18,38,4},{18,37,5},{28,3,8},{17,39,1},{17,39,1},{17,37,0},{31,21,8}, +{17,37,0},{29,3,18},{17,41,1},{18,38,4},{18,38,5},{29,3,18},{31,23,18},{18,38,5},{0,38,20},{31,23,18},{0,38,20},{17,0,36},{17,0,36},{17,0,36},{17,0,36},{17,39,0},{17,39,0},{17,39,0},{17,37,0},{16,38,1},{16,38,1},{18,45,56},{18,43,37},{18,41,56},{18,41,41},{18,44,51},{18,42,22},{18,41,5},{18,40,24},{16,43,53},{17,40,21},{19,42,22}, +{19,41,9},{19,40,8},{19,40,9},{31,0,51},{18,42,22},{18,41,5},{17,40,21},{28,29,51},{17,40,21},{18,43,37},{18,43,37},{18,43,37},{18,41,37},{18,42,8},{18,41,1},{18,41,1},{18,39,0},{17,41,9},{18,39,9},{19,40,4},{19,40,4},{19,40,4},{19,39,5},{29,5,8},{18,41,1},{18,41,1},{18,39,0},{31,24,8},{18,39,0},{30,5,18},{18,43,1},{19,40,4}, +{19,40,5},{30,5,18},{31,26,18},{19,40,5},{0,40,20},{31,26,18},{0,40,20},{18,0,36},{18,0,36},{18,0,36},{18,0,36},{18,41,0},{18,41,0},{18,41,0},{18,39,0},{17,40,1},{17,40,1},{19,48,68},{19,46,51},{20,43,70},{19,43,51},{19,47,52},{19,44,22},{19,43,3},{19,42,20},{17,46,51},{18,42,23},{20,44,24},{20,44,8},{20,43,6},{20,42,14},{31,6,51}, +{19,44,21},{19,43,2},{19,42,19},{31,29,51},{19,42,19},{19,46,50},{19,46,50},{19,46,50},{19,43,50},{19,45,9},{19,43,2},{19,43,2},{19,41,3},{18,43,10},{19,41,6},{20,42,5},{20,42,5},{20,42,5},{20,42,5},{31,4,8},{19,43,1},{19,43,1},{19,41,2},{30,29,8},{19,41,2},{30,11,18},{19,46,1},{20,43,2},{19,43,1},{30,11,18},{30,31,18},{19,43,1}, +{0,42,18},{30,31,18},{0,42,18},{19,0,50},{19,0,50},{19,0,50},{19,0,50},{19,43,2},{19,43,2},{19,43,2},{19,41,2},{18,43,1},{18,43,1},{20,49,56},{20,47,38},{20,45,53},{20,45,37},{20,48,51},{20,46,19},{20,45,1},{20,44,22},{18,48,56},{19,44,23},{21,46,24},{21,46,8},{21,45,6},{21,44,14},{30,15,51},{20,46,19},{20,45,1},{19,44,22},{31,32,51}, +{19,44,22},{20,47,37},{20,47,37},{20,47,37},{20,45,36},{20,46,10},{20,45,0},{20,45,0},{20,43,2},{19,45,10},{20,43,11},{21,44,5},{21,44,5},{21,44,5},{21,44,5},{30,13,8},{20,45,0},{20,45,0},{20,43,2},{31,31,8},{20,43,2},{31,13,18},{20,47,2},{21,45,2},{20,45,1},{31,13,18},{30,34,18},{20,45,1},{0,44,18},{30,34,18},{0,44,18},{20,0,36}, +{20,0,36},{20,0,36},{20,0,36},{20,45,0},{20,45,0},{20,45,0},{20,43,1},{19,45,1},{19,45,1},{21,51,56},{21,49,37},{21,47,53},{21,47,37},{21,50,51},{21,48,22},{21,47,1},{21,46,22},{19,50,56},{20,46,20},{22,48,22},{22,48,13},{22,47,6},{22,46,14},{30,20,51},{21,48,22},{21,47,1},{20,46,19},{31,35,51},{20,46,19},{21,49,37},{21,49,37},{21,49,37}, +{21,47,36},{21,48,8},{21,47,0},{21,47,0},{21,45,2},{20,47,11},{21,45,11},{22,46,5},{22,46,5},{22,46,5},{22,46,5},{31,15,8},{21,47,0},{21,47,0},{21,45,2},{31,34,8},{21,45,2},{31,18,18},{21,49,1},{22,47,2},{21,47,1},{31,18,18},{31,36,18},{21,47,1},{0,46,18},{31,36,18},{0,46,18},{21,0,36},{21,0,36},{21,0,36},{21,0,36},{21,47,0}, +{21,47,0},{21,47,0},{21,45,1},{20,46,2},{20,46,2},{22,53,56},{22,51,37},{22,49,56},{22,49,41},{22,52,51},{22,50,22},{22,49,5},{22,48,24},{20,51,53},{21,48,21},{23,50,22},{23,49,9},{23,48,8},{23,48,9},{31,22,51},{22,50,22},{22,49,5},{21,48,21},{28,42,51},{21,48,21},{22,51,37},{22,51,37},{22,51,37},{22,49,37},{22,50,8},{22,49,1},{22,49,1}, +{22,47,2},{21,49,9},{22,47,11},{23,48,4},{23,48,4},{23,48,4},{23,48,5},{31,20,8},{22,49,1},{22,49,1},{22,47,2},{31,37,8},{22,47,2},{31,23,18},{22,51,1},{23,48,4},{23,48,5},{31,23,18},{31,39,18},{23,48,5},{0,48,20},{31,39,18},{0,48,20},{22,0,36},{22,0,36},{22,0,36},{22,0,36},{22,49,0},{22,49,0},{22,49,0},{22,47,1},{21,48,1}, +{21,48,1},{23,56,68},{23,54,51},{24,51,70},{23,51,51},{23,55,52},{23,52,22},{23,51,3},{23,50,20},{21,54,51},{22,50,23},{24,52,24},{24,52,8},{24,51,6},{24,50,14},{31,28,51},{23,52,21},{23,51,2},{23,50,19},{31,42,51},{23,50,19},{23,54,50},{23,54,50},{23,54,50},{23,51,50},{23,53,9},{23,51,2},{23,51,2},{23,49,3},{22,51,10},{23,49,6},{24,50,5}, +{24,50,5},{24,50,5},{24,50,5},{31,26,8},{23,51,1},{23,51,1},{23,49,2},{30,42,8},{23,49,2},{31,29,18},{23,54,1},{24,51,2},{23,51,1},{31,29,18},{30,44,18},{23,51,1},{0,50,18},{30,44,18},{0,50,18},{23,0,50},{23,0,50},{23,0,50},{23,0,50},{23,51,2},{23,51,2},{23,51,2},{23,49,2},{22,51,1},{22,51,1},{24,58,54},{24,55,38},{24,53,53}, +{24,53,37},{24,56,52},{24,54,19},{24,53,1},{24,52,22},{22,56,51},{23,52,23},{25,54,24},{25,54,8},{25,53,6},{25,52,14},{31,33,51},{24,54,19},{24,53,1},{23,52,22},{31,45,51},{23,52,22},{24,56,36},{24,56,36},{24,56,36},{24,53,36},{24,54,10},{24,53,0},{24,53,0},{24,51,2},{23,53,10},{24,51,11},{25,52,5},{25,52,5},{25,52,5},{25,52,5},{31,31,8}, +{24,53,0},{24,53,0},{24,51,2},{31,44,8},{24,51,2},{31,34,18},{24,55,2},{25,53,2},{24,53,1},{31,34,18},{31,46,18},{24,53,1},{0,52,18},{31,46,18},{0,52,18},{24,0,36},{24,0,36},{24,0,36},{24,0,36},{24,53,0},{24,53,0},{24,53,0},{24,51,1},{23,53,1},{23,53,1},{25,60,54},{25,57,38},{25,55,53},{25,55,37},{25,58,52},{25,56,19},{25,55,1}, +{25,54,22},{23,58,51},{24,54,20},{26,56,24},{26,56,8},{26,55,6},{26,54,14},{31,38,51},{25,56,19},{25,55,1},{24,54,19},{31,48,51},{24,54,19},{25,58,36},{25,58,36},{25,58,36},{25,55,36},{25,56,10},{25,55,0},{25,55,0},{25,53,2},{24,55,11},{25,53,11},{26,54,5},{26,54,5},{26,54,5},{26,54,5},{31,36,8},{25,55,0},{25,55,0},{25,53,2},{31,47,8}, +{25,53,2},{30,43,18},{25,57,2},{26,55,2},{25,55,1},{30,43,18},{30,50,18},{25,55,1},{0,54,18},{30,50,18},{0,54,18},{25,0,36},{25,0,36},{25,0,36},{25,0,36},{25,55,0},{25,55,0},{25,55,0},{25,53,1},{24,54,2},{24,54,2},{26,62,54},{26,59,38},{26,57,53},{26,57,37},{26,60,52},{26,58,19},{26,57,1},{26,56,22},{25,58,56},{25,56,20},{27,58,24}, +{27,58,8},{27,57,6},{27,56,14},{30,47,51},{26,58,19},{26,57,1},{25,56,19},{31,51,51},{25,56,19},{26,60,36},{26,60,36},{26,60,36},{26,57,36},{26,58,10},{26,57,0},{26,57,0},{26,55,2},{25,57,11},{26,55,11},{27,56,5},{27,56,5},{27,56,5},{27,56,5},{30,45,8},{26,57,0},{26,57,0},{26,55,2},{31,50,8},{26,55,2},{31,45,18},{26,59,2},{27,57,2}, +{26,57,1},{31,45,18},{31,52,18},{26,57,1},{0,56,18},{31,52,18},{0,56,18},{26,0,36},{26,0,36},{26,0,36},{26,0,36},{26,57,0},{26,57,0},{26,57,0},{26,55,1},{25,56,2},{25,56,2},{27,63,76},{27,62,52},{28,59,69},{27,59,51},{27,63,52},{27,60,21},{27,59,3},{27,58,22},{25,62,52},{26,58,28},{28,60,24},{28,60,9},{28,59,5},{28,59,13},{31,49,51}, +{27,60,20},{27,59,2},{27,58,21},{30,56,51},{27,58,21},{27,62,51},{27,62,51},{27,62,51},{27,59,51},{27,61,9},{27,59,3},{27,59,3},{27,58,6},{26,60,11},{27,57,12},{28,58,5},{28,58,5},{28,58,5},{28,58,4},{29,54,8},{27,59,2},{27,59,2},{27,58,5},{29,56,8},{27,58,5},{30,54,18},{27,62,2},{28,59,1},{27,59,1},{30,54,18},{29,58,18},{27,59,1}, +{0,58,20},{29,58,18},{0,58,20},{27,0,50},{27,0,50},{27,0,50},{27,0,50},{27,60,1},{27,60,1},{27,60,1},{27,58,2},{26,59,0},{26,59,0},{28,63,86},{28,63,38},{28,61,52},{28,61,36},{28,63,59},{28,62,21},{28,61,0},{28,60,21},{27,62,60},{27,60,28},{29,62,24},{29,62,9},{29,61,5},{29,61,13},{30,58,51},{28,62,21},{28,61,0},{28,60,21},{31,58,51}, +{28,60,21},{28,63,37},{28,63,37},{28,63,37},{28,61,36},{28,62,10},{28,61,0},{28,61,0},{28,59,5},{27,62,11},{27,60,12},{29,60,5},{29,60,5},{29,60,5},{29,60,4},{30,56,8},{28,61,0},{28,61,0},{28,59,5},{30,58,8},{28,59,5},{31,56,18},{28,63,2},{29,61,1},{28,61,0},{31,56,18},{30,60,18},{28,61,0},{0,60,20},{30,60,18},{0,60,20},{28,0,36}, +{28,0,36},{28,0,36},{28,0,36},{28,61,0},{28,61,0},{28,61,0},{28,59,1},{27,61,0},{27,61,0},{30,63,94},{30,63,78},{29,63,52},{29,63,36},{30,63,115},{29,63,36},{29,63,0},{29,62,21},{29,63,88},{28,62,25},{30,63,30},{30,63,14},{30,63,5},{30,63,13},{31,60,51},{29,63,36},{29,63,0},{29,62,21},{31,61,51},{29,62,21},{29,63,52},{29,63,52},{29,63,52}, +{29,63,36},{29,63,16},{29,63,0},{29,63,0},{29,61,5},{28,63,11},{28,62,9},{30,62,5},{30,62,5},{30,62,5},{30,62,4},{31,58,8},{29,63,0},{29,63,0},{29,61,5},{31,60,8},{29,61,5},{31,61,18},{30,63,10},{30,63,1},{29,63,0},{31,61,18},{31,62,18},{29,63,0},{0,62,20},{31,62,18},{0,62,20},{29,0,36},{29,0,36},{29,0,36},{29,0,36},{29,63,0}, +{29,63,0},{29,63,0},{29,61,1},{28,63,2},{28,63,2},{31,63,68},{31,63,68},{30,63,61},{30,63,45},{30,63,59},{30,63,27},{30,63,18},{30,63,1},{30,63,28},{30,63,10},{31,63,4},{31,63,4},{31,63,4},{31,63,4},{31,63,4},{31,63,4},{31,63,4},{30,63,1},{31,63,4},{30,63,1},{30,63,61},{30,63,61},{30,63,61},{30,63,45},{30,63,34},{30,63,18},{30,63,18}, +{30,63,1},{30,63,19},{30,63,10},{31,63,4},{31,63,4},{31,63,4},{31,63,4},{31,62,4},{31,63,4},{31,63,4},{30,63,1},{31,63,4},{30,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{30,0,36},{30,0,36},{30,0,36},{30,0,36},{30,63,9},{30,63,9},{30,63,9},{30,63,1},{30,63,10}, +{30,63,10},{0,7,74},{0,6,20},{0,4,2},{0,4,26},{0,5,153},{0,4,110},{0,3,45},{0,2,115},{0,3,169},{0,2,124},{0,7,74},{0,6,20},{0,4,2},{0,4,26},{0,5,153},{0,4,110},{0,3,45},{0,2,115},{0,3,153},{0,2,115},{0,3,1},{0,3,1},{0,3,1},{0,2,0},{0,2,13},{0,2,9},{0,2,9},{0,1,5},{0,1,14},{0,1,6},{0,3,1}, +{0,3,1},{0,3,1},{0,2,0},{0,2,13},{0,2,9},{0,2,9},{0,1,5},{1,0,13},{0,1,5},{2,1,72},{0,6,20},{0,4,2},{0,4,26},{2,1,72},{3,1,72},{0,4,26},{0,3,74},{3,1,72},{0,3,74},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,13,81},{0,10,13},{0,6,26}, +{0,6,14},{0,9,244},{0,7,129},{0,6,41},{0,4,139},{0,5,300},{0,4,175},{0,13,81},{0,10,13},{0,6,26},{0,6,14},{2,3,243},{0,7,129},{0,6,41},{0,4,139},{4,1,243},{0,4,139},{0,9,9},{0,9,9},{0,9,9},{0,5,10},{0,5,52},{0,5,17},{0,5,17},{0,3,17},{0,3,68},{0,3,33},{0,9,9},{0,9,9},{0,9,9},{0,5,10},{1,2,50}, +{0,5,17},{0,5,17},{0,3,17},{2,1,50},{0,3,17},{3,3,72},{0,10,4},{1,6,2},{0,6,5},{3,3,72},{4,3,72},{0,6,5},{0,5,74},{4,3,72},{0,5,74},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,2,0},{0,2,0},{0,2,0},{0,1,1},{0,1,2},{0,1,2},{1,15,145},{1,12,77},{1,8,90},{1,8,78},{0,15,243},{0,10,96},{0,8,3}, +{0,6,106},{0,8,395},{0,6,187},{1,15,81},{1,12,13},{1,8,26},{1,8,14},{4,1,243},{0,10,96},{0,8,3},{0,6,106},{5,3,243},{0,6,106},{1,11,73},{1,11,73},{1,11,73},{1,7,74},{0,11,50},{0,8,2},{0,8,2},{0,5,2},{0,5,131},{0,5,51},{1,11,9},{1,11,9},{1,11,9},{1,7,10},{3,1,50},{0,8,2},{0,8,2},{0,5,2},{3,3,50}, +{0,5,2},{5,1,72},{1,12,4},{2,8,2},{0,8,2},{5,1,72},{9,0,72},{0,8,2},{0,7,74},{9,0,72},{0,7,74},{1,0,73},{1,0,73},{1,0,73},{1,0,73},{0,7,1},{0,7,1},{0,7,1},{0,4,1},{0,3,32},{0,3,32},{2,17,162},{2,14,94},{2,10,107},{2,10,95},{1,17,244},{1,12,97},{1,10,4},{1,8,107},{0,11,345},{0,8,116},{2,17,81}, +{2,14,13},{2,10,26},{2,10,14},{6,0,243},{0,14,76},{1,10,3},{0,9,83},{10,0,243},{0,9,83},{2,13,90},{2,13,90},{2,13,90},{2,9,91},{1,13,51},{1,10,3},{1,10,3},{1,7,3},{0,9,94},{0,7,14},{2,13,9},{2,13,9},{2,13,9},{2,9,10},{3,6,50},{1,10,2},{1,10,2},{1,7,2},{8,0,50},{1,7,2},{7,0,72},{2,14,4},{3,10,2}, +{1,10,2},{7,0,72},{10,2,72},{1,10,2},{0,9,74},{10,2,72},{0,9,74},{2,0,90},{2,0,90},{2,0,90},{2,0,90},{1,9,2},{1,9,2},{1,9,2},{1,6,2},{0,7,13},{0,7,13},{3,19,154},{3,16,82},{3,13,100},{3,12,85},{2,19,244},{2,15,90},{2,13,5},{2,11,97},{0,15,287},{0,11,73},{3,19,90},{3,16,18},{4,12,29},{3,12,21},{6,6,243}, +{1,16,75},{2,13,5},{0,11,73},{12,1,243},{0,11,73},{3,15,81},{3,15,81},{3,15,81},{3,12,81},{2,15,52},{2,13,1},{2,13,1},{2,9,4},{0,13,61},{0,10,14},{3,15,17},{3,15,17},{3,15,17},{3,12,17},{6,2,50},{2,13,1},{2,13,1},{2,9,4},{11,0,50},{2,9,4},{8,2,72},{3,16,2},{4,12,4},{2,13,4},{8,2,72},{12,3,72},{2,13,4}, +{0,11,72},{12,3,72},{0,11,72},{3,0,80},{3,0,80},{3,0,80},{3,0,80},{2,12,0},{2,12,0},{2,12,0},{2,9,0},{0,11,1},{0,11,1},{4,21,162},{4,18,94},{4,15,103},{4,14,95},{3,21,244},{3,17,88},{3,15,5},{3,13,97},{0,18,260},{1,13,73},{4,21,81},{4,18,13},{4,15,22},{4,14,14},{9,1,243},{2,18,75},{3,15,5},{1,13,73},{13,3,243}, +{1,13,73},{4,17,90},{4,17,90},{4,17,90},{4,14,91},{3,17,52},{3,15,1},{3,15,1},{3,11,4},{0,16,52},{1,12,14},{4,17,9},{4,17,9},{4,17,9},{4,14,10},{8,0,50},{3,15,1},{3,15,1},{3,11,4},{11,3,50},{3,11,4},{10,1,72},{3,20,2},{5,14,4},{3,15,4},{10,1,72},{17,0,72},{3,15,4},{0,13,72},{17,0,72},{0,13,72},{4,0,90}, +{4,0,90},{4,0,90},{4,0,90},{3,14,0},{3,14,0},{3,14,0},{3,11,0},{1,13,1},{1,13,1},{5,23,162},{5,20,94},{5,16,107},{5,16,95},{4,23,244},{4,18,97},{4,16,4},{4,15,98},{0,22,244},{2,15,73},{5,23,81},{5,20,13},{5,16,26},{5,16,14},{10,3,243},{3,20,75},{4,16,3},{2,15,73},{18,0,243},{2,15,73},{5,19,90},{5,19,90},{5,19,90}, +{5,15,94},{4,19,51},{4,16,3},{4,16,3},{4,13,5},{1,18,52},{2,14,14},{5,19,9},{5,19,9},{5,19,9},{5,15,13},{9,2,50},{4,16,2},{4,16,2},{4,13,4},{16,0,50},{4,13,4},{11,3,72},{5,20,4},{6,16,2},{4,16,2},{11,3,72},{18,2,72},{4,16,2},{0,15,72},{18,2,72},{0,15,72},{5,0,90},{5,0,90},{5,0,90},{5,0,90},{4,15,2}, +{4,15,2},{4,15,2},{4,13,1},{2,15,1},{2,15,1},{6,25,162},{6,22,94},{6,18,107},{6,18,95},{5,25,244},{5,20,97},{5,18,4},{5,16,107},{1,24,244},{3,17,74},{6,25,81},{6,22,13},{6,18,26},{6,18,14},{12,1,243},{3,24,75},{5,18,3},{3,17,74},{19,2,243},{3,17,74},{6,21,90},{6,21,90},{6,21,90},{6,17,91},{5,21,51},{5,18,3},{5,18,3}, +{5,15,5},{2,20,52},{3,16,19},{6,21,9},{6,21,9},{6,21,9},{6,17,10},{11,1,50},{5,18,2},{5,18,2},{5,15,4},{17,2,50},{5,15,4},{13,1,72},{6,22,4},{7,18,2},{5,18,2},{13,1,72},{19,4,72},{5,18,2},{0,17,74},{19,4,72},{0,17,74},{6,0,90},{6,0,90},{6,0,90},{6,0,90},{5,17,2},{5,17,2},{5,17,2},{5,15,1},{3,17,0}, +{3,17,0},{7,27,154},{7,24,82},{7,21,100},{7,20,85},{6,27,244},{6,23,90},{6,21,5},{6,19,97},{2,26,244},{4,19,73},{7,27,90},{7,24,18},{8,20,29},{7,20,21},{13,4,243},{5,24,75},{6,21,5},{4,19,73},{22,2,243},{4,19,73},{7,23,81},{7,23,81},{7,23,81},{7,20,81},{6,23,52},{6,21,1},{6,21,1},{6,17,4},{3,23,52},{4,18,14},{7,23,17}, +{7,23,17},{7,23,17},{7,20,17},{13,0,50},{6,21,1},{6,21,1},{6,17,4},{20,2,50},{6,17,4},{14,4,72},{7,24,2},{8,20,4},{6,21,4},{14,4,72},{25,0,72},{6,21,4},{0,19,72},{25,0,72},{0,19,72},{7,0,80},{7,0,80},{7,0,80},{7,0,80},{6,20,0},{6,20,0},{6,20,0},{6,17,0},{4,19,1},{4,19,1},{8,29,162},{8,26,92},{8,23,103}, +{8,22,95},{7,29,244},{7,25,90},{7,23,5},{7,21,97},{3,28,244},{5,21,73},{8,29,81},{8,26,11},{8,23,22},{8,22,14},{14,6,243},{6,26,75},{7,23,5},{5,21,73},{26,0,243},{5,21,73},{8,25,90},{8,25,90},{8,25,90},{8,22,91},{7,25,52},{7,23,1},{7,23,1},{7,19,4},{4,24,52},{5,20,14},{8,25,9},{8,25,9},{8,25,9},{8,22,10},{14,2,50}, +{7,23,1},{7,23,1},{7,19,4},{24,0,50},{7,19,4},{16,2,72},{8,26,2},{9,22,4},{7,23,4},{16,2,72},{27,1,72},{7,23,4},{0,21,72},{27,1,72},{0,21,72},{8,0,90},{8,0,90},{8,0,90},{8,0,90},{7,22,0},{7,22,0},{7,22,0},{7,19,0},{5,21,1},{5,21,1},{9,31,162},{9,28,92},{9,25,103},{9,24,95},{8,31,244},{8,26,100},{8,24,9}, +{8,23,98},{4,30,244},{6,23,73},{9,31,81},{9,28,11},{9,25,22},{9,24,14},{16,4,243},{7,28,75},{8,24,8},{6,23,73},{27,2,243},{6,23,73},{9,27,90},{9,27,90},{9,27,90},{9,24,91},{8,27,51},{8,24,5},{8,24,5},{8,21,5},{5,26,52},{6,22,14},{9,27,9},{9,27,9},{9,27,9},{9,24,10},{16,0,50},{8,24,4},{8,24,4},{8,21,4},{26,1,50}, +{8,21,4},{17,4,72},{9,28,2},{10,24,4},{8,25,4},{17,4,72},{27,4,72},{8,25,4},{0,23,72},{27,4,72},{0,23,72},{9,0,90},{9,0,90},{9,0,90},{9,0,90},{8,24,1},{8,24,1},{8,24,1},{8,21,1},{6,23,1},{6,23,1},{10,33,162},{10,30,92},{10,27,103},{10,26,95},{9,33,244},{9,28,100},{9,26,9},{9,25,98},{5,32,244},{7,25,73},{10,33,81}, +{10,30,11},{10,27,22},{10,26,14},{17,6,243},{8,30,75},{9,26,8},{7,25,73},{28,4,243},{7,25,73},{10,29,90},{10,29,90},{10,29,90},{10,26,91},{9,29,51},{9,26,5},{9,26,5},{9,23,5},{6,28,52},{7,24,14},{10,29,9},{10,29,9},{10,29,9},{10,26,10},{17,2,50},{9,26,4},{9,26,4},{9,23,4},{27,3,50},{9,23,4},{18,6,72},{10,30,2},{11,26,4}, +{9,27,4},{18,6,72},{28,6,72},{9,27,4},{0,25,72},{28,6,72},{0,25,72},{10,0,90},{10,0,90},{10,0,90},{10,0,90},{9,26,1},{9,26,1},{9,26,1},{9,23,1},{7,25,1},{7,25,1},{11,35,154},{11,32,82},{11,29,97},{11,29,85},{10,35,244},{10,31,96},{10,29,3},{10,27,90},{6,34,244},{8,27,78},{11,35,90},{11,32,18},{12,29,27},{11,29,21},{19,5,243}, +{9,32,75},{10,29,3},{9,27,75},{31,4,243},{9,27,75},{11,31,81},{11,31,81},{11,31,81},{11,28,80},{10,31,52},{10,29,2},{10,29,2},{10,26,5},{7,31,50},{9,26,11},{11,31,17},{11,31,17},{11,31,17},{11,28,16},{19,1,50},{10,29,2},{10,29,2},{9,26,2},{29,4,50},{9,26,2},{20,5,72},{11,32,2},{12,29,2},{10,29,2},{20,5,72},{31,6,72},{10,29,2}, +{0,27,74},{31,6,72},{0,27,74},{11,0,80},{11,0,80},{11,0,80},{11,0,80},{10,28,1},{10,28,1},{10,28,1},{10,25,1},{8,28,2},{8,28,2},{12,37,162},{12,34,92},{12,31,107},{12,30,99},{11,37,244},{11,33,90},{11,31,3},{11,29,90},{7,36,244},{9,29,78},{12,37,81},{12,34,11},{12,31,26},{12,30,18},{22,0,243},{10,34,75},{11,31,3},{10,29,75},{31,7,243}, +{10,29,75},{12,33,90},{12,33,90},{12,33,90},{12,30,90},{11,33,52},{11,31,2},{11,31,2},{11,28,5},{8,32,52},{10,28,11},{12,33,9},{12,33,9},{12,33,9},{12,30,9},{20,3,50},{11,31,2},{11,31,2},{10,28,2},{30,6,50},{10,28,2},{23,0,72},{12,34,2},{13,31,2},{11,31,2},{23,0,72},{31,9,72},{11,31,2},{0,29,74},{31,9,72},{0,29,74},{12,0,90}, +{12,0,90},{12,0,90},{12,0,90},{11,30,1},{11,30,1},{11,30,1},{11,27,1},{9,30,2},{9,30,2},{13,39,162},{13,36,92},{13,33,103},{13,32,95},{12,39,244},{12,34,100},{12,32,9},{12,31,100},{8,38,244},{10,31,78},{13,39,81},{13,36,11},{13,33,22},{13,32,14},{23,2,243},{11,36,75},{12,32,8},{11,31,75},{28,14,243},{11,31,75},{13,35,90},{13,35,90},{13,35,90}, +{13,32,91},{12,35,51},{12,32,5},{12,32,5},{12,30,6},{9,34,52},{11,30,11},{13,35,9},{13,35,9},{13,35,9},{13,32,10},{21,5,50},{12,32,4},{12,32,4},{11,30,2},{31,8,50},{11,30,2},{24,2,72},{13,36,2},{14,32,4},{12,33,4},{24,2,72},{27,17,72},{12,33,4},{0,31,74},{27,17,72},{0,31,74},{13,0,90},{13,0,90},{13,0,90},{13,0,90},{12,32,1}, +{12,32,1},{12,32,1},{12,29,1},{10,31,4},{10,31,4},{14,41,162},{14,38,92},{14,35,103},{14,34,95},{13,41,244},{13,36,100},{13,34,9},{13,33,98},{9,40,244},{11,33,73},{14,41,81},{14,38,11},{14,35,22},{14,34,14},{24,4,243},{12,38,75},{13,34,8},{11,33,73},{28,17,243},{11,33,73},{14,37,90},{14,37,90},{14,37,90},{14,34,91},{13,37,51},{13,34,5},{13,34,5}, +{13,31,10},{10,36,52},{11,32,14},{14,37,9},{14,37,9},{14,37,9},{14,34,10},{24,0,50},{13,34,4},{13,34,4},{12,32,4},{31,11,50},{12,32,4},{25,4,72},{14,38,2},{15,34,4},{13,35,4},{25,4,72},{28,19,72},{13,35,4},{0,33,72},{28,19,72},{0,33,72},{14,0,90},{14,0,90},{14,0,90},{14,0,90},{13,34,1},{13,34,1},{13,34,1},{13,31,1},{11,33,1}, +{11,33,1},{15,44,152},{15,40,84},{15,37,97},{15,37,85},{14,44,243},{14,39,96},{14,37,3},{14,35,90},{10,42,244},{12,35,78},{15,44,88},{15,40,20},{16,37,27},{15,37,21},{27,0,243},{13,41,76},{14,37,3},{13,35,75},{31,17,243},{13,35,75},{15,40,80},{15,40,80},{15,40,80},{15,36,80},{14,40,50},{14,37,2},{14,37,2},{14,34,5},{11,39,50},{13,34,11},{15,40,16}, +{15,40,16},{15,40,16},{15,36,16},{24,6,50},{14,37,2},{14,37,2},{13,34,2},{29,17,50},{13,34,2},{27,3,72},{15,40,4},{16,37,2},{14,37,2},{27,3,72},{31,19,72},{14,37,2},{0,35,74},{31,19,72},{0,35,74},{15,0,80},{15,0,80},{15,0,80},{15,0,80},{14,36,1},{14,36,1},{14,36,1},{14,33,1},{12,36,2},{12,36,2},{16,45,164},{16,42,95},{16,39,107}, +{16,38,99},{15,46,243},{15,41,96},{15,39,3},{15,37,90},{11,44,244},{13,37,78},{16,45,83},{16,42,14},{16,39,26},{16,38,18},{27,5,243},{14,43,76},{15,39,3},{14,37,75},{31,20,243},{14,37,75},{16,41,91},{16,41,91},{16,41,91},{16,38,90},{15,42,50},{15,39,2},{15,39,2},{15,36,5},{12,41,51},{14,36,11},{16,41,10},{16,41,10},{16,41,10},{16,38,9},{27,1,50}, +{15,39,2},{15,39,2},{14,36,2},{30,19,50},{14,36,2},{28,5,72},{15,44,4},{17,39,2},{15,39,2},{28,5,72},{31,22,72},{15,39,2},{0,37,74},{31,22,72},{0,37,74},{16,0,90},{16,0,90},{16,0,90},{16,0,90},{15,38,1},{15,38,1},{15,38,1},{15,35,1},{13,38,2},{13,38,2},{17,47,164},{17,44,95},{17,41,107},{17,40,99},{16,47,245},{16,43,91},{16,41,3}, +{16,39,100},{12,46,244},{14,39,78},{17,47,83},{17,44,14},{17,41,26},{17,40,18},{30,0,243},{15,45,76},{16,41,2},{15,39,75},{28,27,243},{15,39,75},{17,43,91},{17,43,91},{17,43,91},{17,40,90},{16,43,53},{16,41,2},{16,41,2},{16,38,6},{13,43,51},{15,38,11},{17,43,10},{17,43,10},{17,43,10},{17,40,9},{28,3,50},{16,41,1},{16,41,1},{15,38,2},{31,21,50}, +{15,38,2},{31,0,72},{16,46,1},{18,41,2},{16,41,1},{31,0,72},{28,29,72},{16,41,1},{0,39,74},{28,29,72},{0,39,74},{17,0,90},{17,0,90},{17,0,90},{17,0,90},{16,40,1},{16,40,1},{16,40,1},{16,37,1},{14,40,2},{14,40,2},{18,49,162},{18,46,95},{18,43,107},{18,42,99},{17,49,244},{17,45,91},{17,43,3},{17,41,100},{13,48,244},{15,41,78},{18,49,81}, +{18,46,14},{18,43,26},{18,42,18},{31,2,243},{15,48,75},{17,43,2},{15,41,78},{29,29,243},{15,41,78},{18,45,91},{18,45,91},{18,45,91},{18,42,90},{17,45,53},{17,43,2},{17,43,2},{17,40,6},{14,45,51},{16,40,18},{18,45,10},{18,45,10},{18,45,10},{18,42,9},{29,5,50},{17,43,1},{17,43,1},{16,40,2},{31,24,50},{16,40,2},{30,9,72},{17,48,2},{19,43,2}, +{17,43,1},{30,9,72},{29,31,72},{17,43,1},{0,41,74},{29,31,72},{0,41,74},{18,0,90},{18,0,90},{18,0,90},{18,0,90},{17,42,1},{17,42,1},{17,42,1},{17,39,1},{15,42,2},{15,42,2},{19,52,152},{19,48,84},{19,45,105},{19,45,84},{18,52,243},{18,47,89},{18,45,1},{18,43,96},{14,50,244},{16,44,81},{19,52,88},{19,48,20},{20,45,26},{19,45,20},{31,8,243}, +{17,49,76},{18,45,1},{16,44,80},{31,30,243},{16,44,80},{19,48,80},{19,48,80},{19,48,80},{19,44,81},{18,48,50},{18,45,1},{18,45,1},{18,42,1},{15,47,52},{17,42,13},{19,48,16},{19,48,16},{19,48,16},{19,44,17},{31,4,50},{18,45,1},{18,45,1},{18,42,1},{30,29,50},{18,42,1},{30,15,72},{19,48,4},{20,45,1},{18,45,1},{30,15,72},{31,32,72},{18,45,1}, +{0,43,80},{31,32,72},{0,43,80},{19,0,80},{19,0,80},{19,0,80},{19,0,80},{18,45,0},{18,45,0},{18,45,0},{18,41,1},{16,44,1},{16,44,1},{20,53,164},{20,50,95},{20,47,106},{20,47,94},{19,54,243},{19,49,96},{19,47,1},{19,45,96},{15,52,244},{17,46,81},{20,53,83},{20,50,14},{20,47,25},{20,47,13},{29,20,243},{18,51,76},{19,47,1},{17,46,80},{31,33,243}, +{17,46,80},{20,49,91},{20,49,91},{20,49,91},{20,46,90},{19,50,50},{19,47,1},{19,47,1},{19,44,1},{16,49,51},{18,44,13},{20,49,10},{20,49,10},{20,49,10},{20,46,9},{30,13,50},{19,47,1},{19,47,1},{19,44,1},{31,31,50},{19,44,1},{30,20,72},{19,52,4},{21,47,1},{19,47,1},{30,20,72},{31,35,72},{19,47,1},{0,45,80},{31,35,72},{0,45,80},{20,0,90}, +{20,0,90},{20,0,90},{20,0,90},{19,47,0},{19,47,0},{19,47,0},{19,43,1},{17,46,1},{17,46,1},{21,55,164},{21,52,95},{21,49,107},{21,48,99},{20,55,245},{20,51,91},{20,49,3},{20,47,97},{16,54,244},{18,47,85},{21,55,83},{21,52,14},{21,49,26},{21,48,18},{30,22,243},{19,53,76},{20,49,2},{19,47,81},{28,40,243},{19,47,81},{21,51,91},{21,51,91},{21,51,91}, +{21,48,90},{20,51,53},{20,49,2},{20,49,2},{20,46,5},{17,51,51},{19,46,13},{21,51,10},{21,51,10},{21,51,10},{21,48,9},{31,15,50},{20,49,1},{20,49,1},{19,46,4},{31,34,50},{19,46,4},{31,22,72},{20,54,1},{22,49,2},{20,49,1},{31,22,72},{28,42,72},{20,49,1},{0,47,80},{28,42,72},{0,47,80},{21,0,90},{21,0,90},{21,0,90},{21,0,90},{20,48,1}, +{20,48,1},{20,48,1},{20,45,2},{18,48,2},{18,48,2},{22,57,164},{22,54,95},{22,51,107},{22,50,99},{21,57,245},{21,53,91},{21,51,3},{21,49,100},{17,56,244},{19,49,78},{22,57,83},{22,54,14},{22,51,26},{22,50,18},{31,24,243},{19,56,76},{21,51,2},{19,49,78},{29,42,243},{19,49,78},{22,53,91},{22,53,91},{22,53,91},{22,50,90},{21,53,53},{21,51,2},{21,51,2}, +{21,48,6},{18,53,51},{20,48,18},{22,53,10},{22,53,10},{22,53,10},{22,50,9},{31,20,50},{21,51,1},{21,51,1},{20,48,2},{31,37,50},{20,48,2},{31,27,72},{21,56,1},{23,51,2},{21,51,1},{31,27,72},{29,44,72},{21,51,1},{0,49,74},{29,44,72},{0,49,74},{22,0,90},{22,0,90},{22,0,90},{22,0,90},{21,50,1},{21,50,1},{21,50,1},{21,47,2},{19,50,2}, +{19,50,2},{23,60,152},{23,57,81},{23,53,105},{23,53,84},{22,60,243},{22,55,89},{22,53,1},{22,51,96},{18,59,244},{20,52,81},{23,60,88},{23,57,17},{24,53,26},{23,53,20},{31,30,243},{21,57,73},{22,53,1},{20,52,80},{31,43,243},{20,52,80},{23,56,80},{23,56,80},{23,56,80},{23,52,81},{22,56,50},{22,53,1},{22,53,1},{22,50,1},{19,55,52},{21,50,13},{23,56,16}, +{23,56,16},{23,56,16},{23,52,17},{31,26,50},{22,53,1},{22,53,1},{22,50,1},{30,42,50},{22,50,1},{31,33,72},{23,57,1},{24,53,1},{22,53,1},{31,33,72},{31,45,72},{22,53,1},{0,51,80},{31,45,72},{0,51,80},{23,0,80},{23,0,80},{23,0,80},{23,0,80},{22,53,0},{22,53,0},{22,53,0},{22,49,1},{20,52,1},{20,52,1},{24,62,162},{24,58,94},{24,55,106}, +{24,55,94},{23,62,243},{23,57,89},{23,55,1},{23,53,96},{19,61,244},{21,54,81},{24,62,81},{24,58,13},{24,55,25},{24,55,13},{31,35,243},{22,59,73},{23,55,1},{21,54,80},{27,51,243},{21,54,80},{24,58,90},{24,58,90},{24,58,90},{24,54,90},{23,58,50},{23,55,1},{23,55,1},{23,52,1},{20,57,50},{22,52,13},{24,58,9},{24,58,9},{24,58,9},{24,54,9},{31,31,50}, +{23,55,1},{23,55,1},{23,52,1},{31,44,50},{23,52,1},{31,38,72},{24,58,4},{25,55,1},{23,55,1},{31,38,72},{31,48,72},{23,55,1},{0,53,80},{31,48,72},{0,53,80},{24,0,90},{24,0,90},{24,0,90},{24,0,90},{23,55,0},{23,55,0},{23,55,0},{23,51,1},{21,54,1},{21,54,1},{25,63,164},{25,60,94},{25,57,106},{25,57,94},{24,63,245},{24,59,97},{24,57,3}, +{24,55,97},{20,63,249},{22,56,81},{25,63,83},{25,60,13},{25,57,25},{25,57,13},{31,40,243},{23,61,73},{24,57,2},{22,56,80},{31,49,243},{22,56,80},{25,60,90},{25,60,90},{25,60,90},{25,56,90},{24,60,51},{24,57,3},{24,57,3},{24,54,5},{21,59,50},{23,54,13},{25,60,9},{25,60,9},{25,60,9},{25,56,9},{31,36,50},{24,57,2},{24,57,2},{23,54,4},{31,47,50}, +{23,54,4},{30,47,72},{25,60,4},{26,57,1},{24,57,1},{30,47,72},{31,51,72},{24,57,1},{0,55,80},{31,51,72},{0,55,80},{25,0,90},{25,0,90},{25,0,90},{25,0,90},{24,56,2},{24,56,2},{24,56,2},{24,53,2},{22,56,1},{22,56,1},{26,63,194},{26,62,94},{26,59,106},{26,59,94},{25,63,284},{25,61,97},{25,59,3},{25,57,97},{22,63,253},{23,58,81},{27,62,99}, +{26,62,13},{26,59,25},{26,59,13},{29,52,243},{24,63,76},{25,59,2},{23,58,80},{28,56,243},{23,58,80},{26,62,90},{26,62,90},{26,62,90},{26,58,90},{25,62,51},{25,59,3},{25,59,3},{25,56,5},{22,61,50},{24,56,10},{26,62,9},{26,62,9},{26,62,9},{26,58,9},{30,45,50},{25,59,2},{25,59,2},{24,56,1},{31,50,50},{24,56,1},{30,52,72},{26,62,4},{27,59,1}, +{25,59,1},{30,52,72},{28,58,72},{25,59,1},{0,57,80},{28,58,72},{0,57,80},{26,0,90},{26,0,90},{26,0,90},{26,0,90},{25,58,2},{25,58,2},{25,58,2},{25,55,2},{23,58,1},{23,58,1},{27,63,280},{27,63,120},{27,62,105},{27,61,82},{27,63,328},{26,63,99},{26,61,5},{26,59,99},{24,63,308},{24,60,74},{28,63,105},{28,63,45},{28,61,27},{27,61,18},{31,51,243}, +{26,63,99},{26,61,5},{24,60,74},{31,56,243},{24,60,74},{27,63,84},{27,63,84},{27,63,84},{27,60,81},{26,63,58},{26,62,2},{26,62,2},{26,58,2},{23,63,53},{25,58,9},{27,63,20},{27,63,20},{27,63,20},{27,60,17},{29,54,50},{26,62,2},{26,62,2},{26,58,2},{29,56,50},{26,58,2},{30,58,72},{28,63,20},{28,61,2},{27,61,2},{30,58,72},{31,58,72},{27,61,2}, +{0,60,74},{31,58,72},{0,60,74},{27,0,80},{27,0,80},{27,0,80},{27,0,80},{26,61,0},{26,61,0},{26,61,0},{26,58,1},{24,60,0},{24,60,0},{28,63,331},{28,63,187},{28,63,106},{28,63,94},{28,63,358},{27,63,173},{27,63,4},{27,61,82},{26,63,355},{25,62,65},{29,63,126},{29,63,62},{28,63,25},{28,63,13},{31,56,221},{28,63,121},{27,63,4},{25,62,65},{30,60,221}, +{25,62,65},{28,63,106},{28,63,106},{28,63,106},{28,62,91},{27,63,100},{27,63,4},{27,63,4},{27,60,2},{25,63,72},{26,60,9},{28,63,25},{28,63,25},{28,63,25},{28,62,10},{30,56,50},{27,63,4},{27,63,4},{27,60,2},{30,58,50},{27,60,2},{31,59,61},{29,63,37},{29,63,1},{27,63,4},{31,59,61},{31,61,61},{27,63,4},{0,62,65},{31,61,61},{0,62,65},{28,0,90}, +{28,0,90},{28,0,90},{28,0,90},{27,63,0},{27,63,0},{27,63,0},{27,60,1},{25,62,0},{25,62,0},{29,63,239},{29,63,175},{29,63,139},{29,63,99},{29,63,239},{28,63,122},{28,63,41},{28,62,19},{28,63,233},{26,63,19},{30,63,54},{30,63,38},{30,63,29},{29,63,18},{31,60,93},{29,63,54},{29,63,18},{27,63,9},{31,61,93},{27,63,9},{29,63,139},{29,63,139},{29,63,139}, +{29,63,99},{29,63,139},{28,63,41},{28,63,41},{28,62,3},{27,63,116},{27,62,9},{30,63,29},{30,63,29},{30,63,29},{29,63,18},{31,58,50},{29,63,18},{29,63,18},{28,62,2},{31,60,50},{28,62,2},{31,62,5},{31,63,9},{30,63,4},{30,63,0},{31,62,5},{31,62,9},{30,63,0},{0,63,9},{31,62,9},{0,63,9},{29,0,90},{29,0,90},{29,0,90},{29,0,90},{28,63,5}, +{28,63,5},{28,63,5},{28,61,2},{27,62,8},{27,62,8},{30,63,140},{30,63,124},{30,63,115},{30,63,99},{30,63,131},{29,63,98},{29,63,62},{29,63,2},{29,63,122},{28,63,20},{31,63,25},{31,63,25},{31,63,25},{30,63,18},{31,62,17},{30,63,18},{30,63,9},{29,63,1},{31,62,22},{29,63,1},{30,63,115},{30,63,115},{30,63,115},{30,63,99},{30,63,106},{29,63,62},{29,63,62}, +{29,63,2},{29,63,86},{28,63,20},{31,63,25},{31,63,25},{31,63,25},{30,63,18},{31,61,13},{30,63,9},{30,63,9},{29,63,1},{31,62,13},{29,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{30,0,90},{30,0,90},{30,0,90},{30,0,90},{29,63,26},{29,63,26},{29,63,26},{29,63,2},{28,63,20}, +{28,63,20},{0,13,200},{0,10,52},{0,7,2},{0,6,61},{0,9,441},{0,7,308},{0,5,139},{0,4,318},{0,5,491},{0,4,354},{0,13,200},{0,10,52},{0,7,2},{0,6,61},{2,2,441},{0,7,308},{0,5,139},{0,4,318},{2,3,441},{0,4,318},{0,6,0},{0,6,0},{0,6,0},{0,4,1},{0,3,41},{0,3,20},{0,3,20},{0,2,26},{0,2,50},{0,1,30},{0,6,0}, +{0,6,0},{0,6,0},{0,4,1},{1,0,41},{0,3,20},{0,3,20},{0,2,26},{1,1,41},{0,2,26},{3,3,200},{0,10,52},{0,7,2},{0,6,61},{3,3,200},{4,3,200},{0,6,61},{0,5,202},{4,3,200},{0,5,202},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,18,200},{0,14,20},{0,10,13}, +{0,9,26},{0,12,686},{0,9,419},{0,8,178},{0,5,442},{0,7,789},{0,5,491},{0,18,200},{0,14,20},{0,10,13},{0,9,26},{3,2,686},{0,9,419},{0,8,178},{0,5,442},{6,0,686},{0,5,442},{0,11,1},{0,11,1},{0,11,1},{0,7,0},{0,6,145},{0,5,74},{0,5,74},{0,3,74},{0,3,165},{0,3,90},{0,11,1},{0,11,1},{0,11,1},{0,7,0},{1,2,145}, +{0,5,74},{0,5,74},{0,3,74},{3,0,145},{0,3,74},{5,1,200},{0,14,20},{1,9,2},{0,9,26},{5,1,200},{9,0,200},{0,9,26},{0,7,202},{9,0,200},{0,7,202},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,23,251},{0,17,53},{1,12,78},{0,11,54},{0,17,724},{0,13,362},{0,11,86}, +{0,8,387},{0,9,932},{0,7,498},{1,20,201},{1,16,21},{1,12,14},{1,11,27},{4,4,723},{0,13,362},{0,11,86},{0,8,387},{7,2,723},{0,8,387},{0,17,49},{0,17,49},{0,17,49},{0,10,49},{0,11,162},{0,9,45},{0,9,45},{0,5,50},{0,5,243},{0,5,99},{1,13,2},{1,13,2},{1,13,2},{1,9,1},{3,1,162},{0,9,45},{0,9,45},{0,5,50},{3,3,162}, +{0,5,50},{7,0,200},{0,17,4},{2,11,2},{0,11,5},{7,0,200},{10,2,200},{0,11,5},{0,9,202},{10,2,200},{0,9,202},{0,0,49},{0,0,49},{0,0,49},{0,0,49},{0,5,0},{0,5,0},{0,5,0},{0,3,0},{0,2,13},{0,2,13},{1,25,315},{1,19,117},{1,14,171},{1,13,118},{0,23,723},{0,16,299},{0,13,18},{0,10,318},{0,12,1087},{0,9,516},{2,22,201}, +{2,17,21},{2,14,14},{2,13,27},{5,6,723},{0,16,299},{0,13,18},{0,10,318},{8,4,723},{0,10,318},{1,19,113},{1,19,113},{1,19,113},{1,12,113},{0,16,162},{0,13,17},{0,13,17},{0,8,26},{0,8,338},{0,7,129},{2,15,2},{2,15,2},{2,15,2},{2,11,1},{3,6,162},{0,13,17},{0,13,17},{0,8,26},{8,0,162},{0,8,26},{7,5,200},{1,19,4},{3,13,2}, +{0,13,2},{7,5,200},{14,0,200},{0,13,2},{0,11,202},{14,0,200},{0,11,202},{1,0,113},{1,0,113},{1,0,113},{1,0,113},{0,10,0},{0,10,0},{0,10,0},{0,6,0},{0,5,58},{0,5,58},{2,28,408},{2,22,210},{2,16,281},{2,15,213},{0,29,739},{0,20,260},{0,16,29},{0,13,280},{0,15,1143},{0,12,464},{3,25,200},{3,21,16},{3,16,18},{3,15,20},{7,5,723}, +{0,20,244},{0,16,13},{0,13,264},{14,0,723},{0,13,264},{2,21,209},{2,21,209},{2,21,209},{2,14,209},{1,19,178},{0,17,18},{0,17,18},{0,10,21},{0,12,376},{0,9,121},{3,18,0},{3,18,0},{3,18,0},{3,13,1},{6,2,162},{0,17,2},{0,17,2},{0,10,5},{11,0,162},{0,10,5},{10,1,200},{2,22,2},{4,15,5},{2,15,5},{10,1,200},{17,0,200},{2,15,5}, +{0,13,200},{17,0,200},{0,13,200},{2,0,208},{2,0,208},{2,0,208},{2,0,208},{1,13,16},{1,13,16},{1,13,16},{1,8,17},{0,8,80},{0,8,80},{3,30,408},{3,24,210},{3,18,281},{3,17,213},{1,31,739},{1,22,260},{2,17,27},{1,15,280},{0,19,1000},{0,14,322},{4,26,201},{4,22,21},{4,18,14},{4,17,27},{10,0,723},{0,23,212},{2,17,11},{0,15,225},{15,2,723}, +{0,15,225},{3,23,209},{3,23,209},{3,23,209},{3,16,209},{2,21,178},{1,19,18},{1,19,18},{1,12,21},{0,15,294},{0,12,44},{4,19,2},{4,19,2},{4,19,2},{4,15,2},{8,0,162},{1,19,2},{1,19,2},{0,13,4},{11,3,162},{0,13,4},{11,3,200},{3,24,2},{5,17,2},{2,17,2},{11,3,200},{18,2,200},{2,17,2},{0,15,200},{18,2,200},{0,15,200},{3,0,208}, +{3,0,208},{3,0,208},{3,0,208},{2,15,16},{2,15,16},{2,15,16},{2,10,17},{0,12,40},{0,12,40},{4,31,420},{4,26,222},{4,20,276},{4,19,223},{3,29,740},{2,24,260},{3,19,27},{2,16,270},{0,23,920},{0,16,234},{5,28,201},{5,24,21},{5,20,14},{5,19,27},{11,2,723},{0,27,200},{3,19,11},{0,17,211},{16,4,723},{0,17,211},{4,25,218},{4,25,218},{4,25,218}, +{4,18,218},{3,23,178},{2,21,18},{2,21,18},{2,14,21},{0,19,228},{0,15,17},{5,21,2},{5,21,2},{5,21,2},{5,17,1},{9,2,162},{2,21,2},{2,21,2},{1,15,4},{16,0,162},{1,15,4},{13,1,200},{3,28,2},{6,19,2},{3,19,2},{13,1,200},{19,4,200},{3,19,2},{0,17,202},{19,4,200},{0,17,202},{4,0,218},{4,0,218},{4,0,218},{4,0,218},{3,17,16}, +{3,17,16},{3,17,16},{3,12,17},{0,15,13},{0,15,13},{5,33,420},{5,28,222},{5,22,276},{5,21,223},{3,34,740},{3,26,260},{3,22,29},{3,18,270},{0,25,844},{0,19,202},{6,30,201},{6,26,21},{6,22,14},{6,21,27},{12,4,723},{1,29,200},{3,22,13},{0,19,202},{22,0,723},{0,19,202},{5,27,218},{5,27,218},{5,27,218},{5,20,218},{3,28,178},{3,23,18},{3,23,18}, +{3,16,18},{0,22,195},{1,17,17},{6,23,2},{6,23,2},{6,23,2},{6,19,1},{11,1,162},{3,23,2},{3,23,2},{3,16,2},{17,2,162},{3,16,2},{15,0,200},{5,28,4},{7,21,2},{4,21,2},{15,0,200},{24,1,200},{4,21,2},{0,19,202},{24,1,200},{0,19,202},{5,0,218},{5,0,218},{5,0,218},{5,0,218},{3,22,16},{3,22,16},{3,22,16},{3,16,17},{0,19,0}, +{0,19,0},{6,36,408},{6,30,210},{6,24,276},{6,23,213},{4,37,739},{4,28,260},{4,24,24},{4,21,280},{0,29,780},{1,21,202},{7,33,200},{7,29,17},{7,24,13},{7,23,20},{15,0,723},{2,31,203},{4,24,8},{1,21,201},{24,1,723},{1,21,201},{6,29,209},{6,29,209},{6,29,209},{6,22,209},{5,27,178},{4,25,17},{4,25,17},{4,18,21},{0,26,168},{2,19,14},{7,26,0}, +{7,26,0},{7,26,0},{7,21,1},{13,0,162},{4,25,1},{4,25,1},{3,19,4},{20,2,162},{3,19,4},{16,2,200},{6,30,2},{8,23,5},{6,23,5},{16,2,200},{27,1,200},{6,23,5},{0,21,200},{27,1,200},{0,21,200},{6,0,208},{6,0,208},{6,0,208},{6,0,208},{5,21,16},{5,21,16},{5,21,16},{5,16,17},{1,21,2},{1,21,2},{7,38,408},{7,32,210},{7,26,276}, +{7,25,213},{5,39,739},{5,30,260},{5,26,24},{5,23,280},{0,33,749},{2,23,202},{8,34,201},{8,30,19},{8,26,14},{8,25,21},{16,1,723},{3,33,202},{5,26,8},{2,23,201},{25,3,723},{2,23,201},{7,31,209},{7,31,209},{7,31,209},{7,24,209},{6,29,178},{5,27,17},{5,27,17},{5,20,21},{0,30,164},{3,21,14},{8,28,1},{8,28,1},{8,28,1},{8,23,2},{14,2,162}, +{5,27,1},{5,27,1},{4,21,4},{24,0,162},{4,21,4},{17,4,200},{7,32,2},{9,25,5},{7,25,5},{17,4,200},{27,4,200},{7,25,5},{0,23,200},{27,4,200},{0,23,200},{7,0,208},{7,0,208},{7,0,208},{7,0,208},{6,23,16},{6,23,16},{6,23,16},{6,18,17},{2,23,2},{2,23,2},{8,39,420},{8,34,222},{8,28,286},{8,27,223},{6,41,739},{6,32,260},{6,28,24}, +{6,25,280},{0,36,725},{3,25,202},{9,36,201},{9,32,21},{9,28,14},{9,27,21},{18,0,723},{4,35,200},{6,28,8},{3,25,201},{30,0,723},{3,25,201},{8,33,218},{8,33,218},{8,33,218},{8,26,219},{7,31,178},{6,29,17},{6,29,17},{6,22,21},{2,30,168},{4,23,17},{9,30,1},{9,30,1},{9,30,1},{9,25,2},{16,0,162},{6,29,1},{6,29,1},{5,23,4},{26,1,162}, +{5,23,4},{18,6,200},{7,36,2},{10,27,5},{8,27,5},{18,6,200},{28,6,200},{8,27,5},{0,25,200},{28,6,200},{0,25,200},{8,0,218},{8,0,218},{8,0,218},{8,0,218},{7,25,16},{7,25,16},{7,25,16},{7,20,17},{3,25,2},{3,25,2},{9,41,420},{9,36,222},{9,30,286},{9,29,223},{7,43,739},{7,34,260},{7,30,24},{7,27,280},{1,38,725},{4,27,201},{10,38,201}, +{10,34,21},{10,30,14},{10,29,21},{19,2,723},{5,37,200},{7,30,8},{4,27,201},{31,2,723},{4,27,201},{9,35,218},{9,35,218},{9,35,218},{9,28,219},{7,36,178},{7,31,17},{7,31,17},{7,24,21},{2,33,165},{5,25,17},{10,31,2},{10,31,2},{10,31,2},{10,27,2},{17,2,162},{7,31,1},{7,31,1},{6,25,4},{27,3,162},{6,25,4},{21,1,200},{9,36,4},{11,29,5}, +{9,29,5},{21,1,200},{29,8,200},{9,29,5},{0,27,200},{29,8,200},{0,27,200},{9,0,218},{9,0,218},{9,0,218},{9,0,218},{7,30,16},{7,30,16},{7,30,16},{7,24,17},{4,27,1},{4,27,1},{10,44,408},{10,38,210},{10,32,276},{10,31,217},{8,45,739},{8,36,260},{8,32,24},{8,29,267},{2,41,727},{5,29,207},{11,41,200},{11,37,17},{11,32,13},{11,31,18},{21,1,723}, +{6,39,203},{8,32,8},{6,29,203},{29,8,723},{6,29,203},{10,37,209},{10,37,209},{10,37,209},{10,31,208},{9,35,178},{8,33,17},{8,33,17},{8,27,21},{3,36,165},{6,27,18},{11,34,0},{11,34,0},{11,34,0},{11,30,1},{19,1,162},{8,33,1},{8,33,1},{7,27,2},{29,4,162},{7,27,2},{23,0,200},{10,38,2},{12,32,8},{9,32,5},{23,0,200},{31,9,200},{9,32,5}, +{0,29,202},{31,9,200},{0,29,202},{10,0,208},{10,0,208},{10,0,208},{10,0,208},{9,29,16},{9,29,16},{9,29,16},{9,25,16},{5,30,2},{5,30,2},{11,46,408},{11,40,210},{11,34,276},{11,33,213},{9,47,739},{9,38,260},{9,34,24},{9,31,267},{3,43,727},{6,31,207},{12,42,203},{12,38,19},{12,34,14},{12,33,21},{22,3,723},{7,41,203},{9,34,8},{7,31,203},{30,10,723}, +{7,31,203},{11,39,209},{11,39,209},{11,39,209},{11,32,209},{10,37,178},{9,35,17},{9,35,17},{9,29,21},{4,38,164},{7,29,18},{12,36,1},{12,36,1},{12,36,1},{12,31,2},{20,3,162},{9,35,1},{9,35,1},{8,29,2},{30,6,162},{8,29,2},{24,2,200},{11,40,2},{13,33,5},{11,33,5},{24,2,200},{27,17,200},{11,33,5},{0,31,202},{27,17,200},{0,31,202},{11,0,208}, +{11,0,208},{11,0,208},{11,0,208},{10,31,16},{10,31,16},{10,31,16},{10,27,16},{6,32,2},{6,32,2},{12,47,420},{12,42,220},{12,36,286},{12,35,223},{10,49,739},{10,40,260},{10,36,24},{10,33,280},{3,46,727},{7,33,202},{13,44,203},{13,40,19},{13,36,14},{13,35,21},{24,1,723},{8,43,202},{10,36,8},{7,33,201},{31,12,723},{7,33,201},{12,41,218},{12,41,218},{12,41,218}, +{12,34,219},{11,39,178},{10,37,17},{10,37,17},{10,31,21},{5,40,164},{8,31,21},{13,38,1},{13,38,1},{13,38,1},{13,33,2},{21,5,162},{10,37,1},{10,37,1},{9,31,2},{31,8,162},{9,31,2},{25,4,200},{12,42,2},{14,35,5},{12,35,5},{25,4,200},{28,19,200},{12,35,5},{0,33,200},{28,19,200},{0,33,200},{12,0,218},{12,0,218},{12,0,218},{12,0,218},{11,33,16}, +{11,33,16},{11,33,16},{11,29,16},{7,33,2},{7,33,2},{13,49,420},{13,44,220},{13,38,286},{13,37,223},{11,51,739},{11,42,260},{11,38,24},{11,35,280},{4,48,729},{8,35,201},{14,46,203},{14,42,19},{14,38,14},{14,37,21},{26,0,723},{9,45,202},{11,38,8},{8,35,201},{31,15,723},{8,35,201},{13,43,218},{13,43,218},{13,43,218},{13,36,219},{11,44,178},{11,39,17},{11,39,17}, +{11,32,21},{6,42,164},{9,33,17},{14,40,1},{14,40,1},{14,40,1},{14,35,2},{24,0,162},{11,39,1},{11,39,1},{10,33,4},{31,11,162},{10,33,4},{26,6,200},{13,44,2},{15,37,5},{13,37,5},{26,6,200},{29,21,200},{13,37,5},{0,35,200},{29,21,200},{0,35,200},{13,0,218},{13,0,218},{13,0,218},{13,0,218},{11,38,16},{11,38,16},{11,38,16},{11,32,17},{8,35,1}, +{8,35,1},{14,52,408},{14,46,212},{14,41,282},{14,39,217},{12,53,739},{12,44,259},{12,40,27},{12,37,267},{6,49,727},{9,37,207},{15,49,200},{15,44,20},{15,40,17},{15,39,18},{26,6,723},{10,48,203},{12,40,11},{10,37,203},{29,21,723},{10,37,203},{14,46,208},{14,46,208},{14,46,208},{14,39,208},{13,43,178},{12,41,18},{12,41,18},{12,35,21},{7,44,163},{10,35,18},{15,42,1}, +{15,42,1},{15,42,1},{15,38,1},{24,6,162},{12,41,2},{12,41,2},{11,35,2},{29,17,162},{11,35,2},{28,5,200},{13,48,2},{16,40,5},{13,40,1},{28,5,200},{31,22,200},{13,40,1},{0,37,202},{31,22,200},{0,37,202},{14,0,208},{14,0,208},{14,0,208},{14,0,208},{13,37,16},{13,37,16},{13,37,16},{13,33,16},{9,38,2},{9,38,2},{15,54,408},{15,48,210},{15,43,282}, +{15,41,217},{13,55,739},{13,46,259},{13,42,27},{13,39,267},{7,51,727},{10,39,207},{16,50,203},{16,47,18},{16,42,11},{16,41,26},{29,1,723},{11,49,203},{13,42,11},{11,39,203},{30,23,723},{11,39,203},{15,47,209},{15,47,209},{15,47,209},{15,41,208},{14,45,178},{13,43,18},{13,43,18},{13,37,21},{8,46,164},{11,37,18},{16,44,1},{16,44,1},{16,44,1},{16,39,2},{27,1,162}, +{13,43,2},{13,43,2},{12,37,2},{30,19,162},{12,37,2},{31,0,200},{15,48,2},{17,42,5},{14,42,1},{31,0,200},{28,29,200},{14,42,1},{0,39,202},{28,29,200},{0,39,202},{15,0,208},{15,0,208},{15,0,208},{15,0,208},{14,39,16},{14,39,16},{14,39,16},{14,35,16},{10,40,2},{10,40,2},{16,56,418},{16,50,220},{16,44,283},{16,43,228},{14,57,739},{14,48,260},{14,44,27}, +{14,41,267},{7,54,727},{11,41,207},{17,52,203},{17,48,19},{17,44,11},{17,43,26},{30,3,723},{12,51,202},{14,44,11},{12,41,203},{31,25,723},{12,41,203},{16,49,218},{16,49,218},{16,49,218},{16,43,219},{15,47,178},{14,45,18},{14,45,18},{14,39,21},{9,48,164},{12,39,21},{17,46,1},{17,46,1},{17,46,1},{17,41,2},{28,3,162},{14,45,2},{14,45,2},{13,39,2},{31,21,162}, +{13,39,2},{30,9,200},{16,50,2},{18,44,5},{15,44,1},{30,9,200},{29,31,200},{15,44,1},{0,41,202},{29,31,200},{0,41,202},{16,0,218},{16,0,218},{16,0,218},{16,0,218},{15,41,16},{15,41,16},{15,41,16},{15,37,16},{11,42,2},{11,42,2},{17,58,418},{17,52,220},{17,46,283},{17,45,228},{15,59,739},{15,50,260},{15,46,27},{15,43,267},{8,56,724},{12,43,206},{18,54,203}, +{18,50,19},{18,46,11},{18,45,26},{31,5,723},{13,53,202},{15,46,11},{13,43,203},{31,28,723},{13,43,203},{17,51,218},{17,51,218},{17,51,218},{17,45,219},{15,52,178},{15,47,18},{15,47,18},{15,41,21},{10,50,164},{13,41,21},{18,48,1},{18,48,1},{18,48,1},{18,43,2},{29,5,162},{15,47,2},{15,47,2},{14,41,2},{31,24,162},{14,41,2},{31,11,200},{17,52,2},{19,46,5}, +{16,46,1},{31,11,200},{29,34,200},{16,46,1},{0,43,202},{29,34,200},{0,43,202},{17,0,218},{17,0,218},{17,0,218},{17,0,218},{15,46,17},{15,46,17},{15,46,17},{15,40,16},{12,44,2},{12,44,2},{18,60,410},{18,54,212},{18,49,282},{18,48,218},{17,58,739},{16,52,259},{16,48,27},{16,45,273},{10,57,724},{13,46,208},{19,57,200},{19,52,20},{19,48,17},{19,47,25},{31,11,723}, +{14,56,203},{16,48,11},{14,45,208},{29,34,723},{14,45,208},{18,54,208},{18,54,208},{18,54,208},{18,47,208},{17,51,178},{16,49,18},{16,49,18},{16,43,20},{11,52,163},{14,44,24},{19,50,1},{19,50,1},{19,50,1},{19,46,0},{31,4,162},{16,49,2},{16,49,2},{15,43,4},{30,29,162},{15,43,4},{30,20,200},{18,54,4},{20,48,5},{17,48,1},{30,20,200},{31,35,200},{17,48,1}, +{0,45,208},{31,35,200},{0,45,208},{18,0,208},{18,0,208},{18,0,208},{18,0,208},{17,45,17},{17,45,17},{17,45,17},{17,41,16},{13,46,0},{13,46,0},{19,62,410},{19,56,212},{19,51,282},{19,49,217},{18,60,739},{17,54,259},{17,50,27},{17,47,273},{11,59,724},{14,47,218},{20,59,201},{20,55,18},{20,50,11},{20,49,26},{31,16,723},{15,58,203},{17,50,11},{15,47,208},{30,36,723}, +{15,47,208},{19,56,208},{19,56,208},{19,56,208},{19,49,208},{18,53,178},{17,51,18},{17,51,18},{17,45,20},{12,54,164},{15,46,24},{20,52,1},{20,52,1},{20,52,1},{20,48,2},{30,13,162},{17,51,2},{17,51,2},{16,45,1},{31,31,162},{16,45,1},{31,22,200},{19,56,4},{21,50,5},{18,50,1},{31,22,200},{28,42,200},{18,50,1},{0,47,208},{28,42,200},{0,47,208},{19,0,208}, +{19,0,208},{19,0,208},{19,0,208},{18,47,17},{18,47,17},{18,47,17},{18,43,16},{14,48,2},{14,48,2},{20,63,426},{20,58,223},{20,52,283},{20,51,228},{19,62,739},{18,56,259},{18,52,27},{18,49,267},{12,61,727},{15,49,207},{21,61,201},{21,57,18},{21,52,11},{21,51,26},{31,21,723},{16,60,203},{18,52,11},{16,49,203},{31,38,723},{16,49,203},{20,57,219},{20,57,219},{20,57,219}, +{20,51,219},{19,55,178},{18,53,18},{18,53,18},{18,47,20},{13,56,164},{16,47,17},{21,54,1},{21,54,1},{21,54,1},{21,49,2},{31,15,162},{18,53,2},{18,53,2},{17,47,1},{31,34,162},{17,47,1},{31,27,200},{19,60,4},{22,52,5},{19,52,1},{31,27,200},{29,44,200},{19,52,1},{0,49,202},{29,44,200},{0,49,202},{20,0,218},{20,0,218},{20,0,218},{20,0,218},{19,49,16}, +{19,49,16},{19,49,16},{19,45,16},{15,50,2},{15,50,2},{21,63,468},{21,60,223},{21,54,283},{21,53,228},{20,63,749},{19,58,259},{19,54,27},{19,51,267},{13,63,727},{16,51,206},{22,63,201},{22,59,18},{22,54,11},{22,53,26},{30,30,723},{17,62,203},{19,54,11},{17,51,203},{31,41,723},{17,51,203},{21,59,219},{21,59,219},{21,59,219},{21,53,219},{19,60,180},{19,55,18},{19,55,18}, +{19,49,21},{14,58,164},{17,49,21},{22,56,1},{22,56,1},{22,56,1},{22,51,2},{31,20,162},{19,55,2},{19,55,2},{18,49,2},{31,37,162},{18,49,2},{31,32,200},{20,62,1},{23,54,5},{20,54,1},{31,32,200},{30,46,200},{20,54,1},{0,51,202},{30,46,200},{0,51,202},{21,0,218},{21,0,218},{21,0,218},{21,0,218},{19,54,17},{19,54,17},{19,54,17},{19,48,16},{16,52,2}, +{16,52,2},{22,63,570},{22,63,209},{22,57,288},{22,56,212},{21,63,804},{20,61,254},{20,56,33},{20,53,273},{15,63,753},{17,54,208},{23,63,232},{23,61,13},{23,56,16},{23,55,25},{31,32,723},{19,62,212},{20,56,17},{18,53,208},{30,46,723},{18,53,208},{22,62,208},{22,62,208},{22,62,208},{22,55,208},{20,63,178},{20,57,17},{20,57,17},{20,51,20},{15,60,163},{18,52,24},{23,59,0}, +{23,59,0},{23,59,0},{23,54,0},{31,26,162},{20,57,1},{20,57,1},{19,51,4},{30,42,162},{19,51,4},{31,38,200},{22,63,1},{24,56,1},{21,56,1},{31,38,200},{31,48,200},{21,56,1},{0,53,208},{31,48,200},{0,53,208},{22,0,208},{22,0,208},{22,0,208},{22,0,208},{20,57,16},{20,57,16},{20,57,16},{21,49,16},{17,54,0},{17,54,0},{23,63,696},{23,63,237},{23,59,288}, +{23,58,212},{23,63,888},{21,63,254},{21,58,33},{21,55,273},{17,63,824},{18,56,208},{24,63,273},{24,62,21},{24,58,17},{24,57,26},{30,41,723},{20,63,233},{21,58,17},{19,55,208},{29,50,723},{19,55,208},{23,63,212},{23,63,212},{23,63,212},{23,57,208},{22,61,180},{21,59,17},{21,59,17},{21,53,20},{16,62,163},{19,54,24},{24,60,2},{24,60,2},{24,60,2},{24,56,2},{31,31,162}, +{21,59,1},{21,59,1},{20,53,1},{31,44,162},{20,53,1},{30,47,200},{24,62,20},{25,58,1},{22,58,1},{30,47,200},{31,51,200},{22,58,1},{0,55,208},{31,51,200},{0,55,208},{23,0,208},{23,0,208},{23,0,208},{23,0,208},{21,59,16},{21,59,16},{21,59,16},{22,51,16},{18,56,0},{18,56,0},{25,63,804},{24,63,334},{24,61,283},{24,60,227},{24,63,957},{22,63,297},{22,60,33}, +{22,57,273},{20,63,913},{19,58,208},{26,63,313},{25,63,51},{25,60,17},{25,59,26},{31,43,723},{22,63,281},{22,60,17},{19,58,208},{30,52,723},{19,58,208},{24,63,234},{24,63,234},{24,63,234},{24,59,218},{23,63,180},{22,61,17},{22,61,17},{22,55,20},{18,62,171},{20,55,17},{25,62,2},{25,62,2},{25,62,2},{25,58,2},{31,36,162},{22,61,1},{22,61,1},{21,55,1},{31,47,162}, +{21,55,1},{30,52,200},{25,63,50},{26,60,1},{23,60,1},{30,52,200},{28,58,200},{23,60,1},{0,57,208},{28,58,200},{0,57,208},{24,0,218},{24,0,218},{24,0,218},{24,0,218},{22,61,16},{22,61,16},{22,61,16},{23,53,16},{19,58,0},{19,58,0},{26,63,930},{25,63,492},{25,63,283},{25,62,227},{25,63,1068},{24,63,389},{23,62,33},{23,59,273},{21,63,999},{20,60,209},{27,63,379}, +{26,63,149},{26,62,17},{26,61,26},{31,48,723},{24,63,364},{24,62,16},{20,60,208},{31,54,723},{20,60,208},{25,63,267},{25,63,267},{25,63,267},{25,61,218},{24,63,205},{23,63,17},{23,63,17},{23,57,20},{20,63,189},{21,57,17},{26,63,5},{26,63,5},{26,63,5},{26,60,2},{30,45,162},{23,63,1},{23,63,1},{22,57,1},{31,50,162},{22,57,1},{31,54,200},{27,63,90},{27,62,1}, +{24,62,0},{31,54,200},{29,60,200},{24,62,0},{0,59,208},{29,60,200},{0,59,208},{25,0,218},{25,0,218},{25,0,218},{25,0,218},{23,63,16},{23,63,16},{23,63,16},{23,56,17},{20,60,1},{20,60,1},{27,63,877},{26,63,585},{26,63,329},{26,63,209},{26,63,990},{25,63,397},{25,63,36},{24,61,165},{23,63,910},{22,61,122},{28,63,306},{28,63,162},{27,63,36},{27,63,4},{30,56,546}, +{26,63,306},{25,63,20},{22,61,113},{30,58,546},{22,61,113},{26,63,329},{26,63,329},{26,63,329},{26,63,209},{25,63,276},{25,63,36},{25,63,36},{24,59,18},{22,63,230},{22,60,22},{27,63,36},{27,63,36},{27,63,36},{27,62,0},{29,54,162},{25,63,20},{25,63,20},{24,59,2},{29,56,162},{24,59,2},{31,58,113},{29,63,61},{28,63,0},{26,63,1},{31,58,113},{31,60,113},{26,63,1}, +{0,61,113},{31,60,113},{0,61,113},{26,0,208},{26,0,208},{26,0,208},{26,0,208},{25,62,16},{25,62,16},{25,62,16},{25,57,17},{21,62,1},{21,62,1},{28,63,731},{27,63,573},{27,63,404},{27,63,244},{27,63,797},{26,63,354},{26,63,98},{25,62,57},{25,63,737},{23,62,38},{29,63,190},{29,63,126},{28,63,65},{28,63,5},{31,56,333},{28,63,185},{27,63,52},{24,62,26},{30,60,333}, +{24,62,26},{27,63,404},{27,63,404},{27,63,404},{27,63,244},{27,63,356},{26,63,98},{26,63,98},{25,61,18},{24,63,315},{23,62,22},{28,63,65},{28,63,65},{28,63,65},{28,63,5},{30,56,162},{27,63,52},{27,63,52},{25,61,2},{30,58,162},{25,61,2},{31,61,25},{30,63,13},{30,63,4},{29,63,1},{31,61,25},{31,62,25},{29,63,1},{0,62,25},{31,62,25},{0,62,25},{27,0,208}, +{27,0,208},{27,0,208},{27,0,208},{26,63,17},{26,63,17},{26,63,17},{26,59,17},{23,63,9},{23,63,9},{29,63,642},{28,63,524},{28,63,443},{28,63,299},{28,63,623},{28,63,335},{27,63,201},{26,63,17},{27,63,610},{24,63,26},{30,63,131},{30,63,115},{29,63,101},{29,63,37},{31,59,193},{29,63,121},{28,63,85},{26,63,1},{30,62,193},{26,63,1},{28,63,443},{28,63,443},{28,63,443}, +{28,63,299},{28,63,398},{27,63,201},{27,63,201},{26,63,17},{26,63,378},{24,63,26},{29,63,101},{29,63,101},{29,63,101},{29,63,37},{31,57,145},{28,63,85},{28,63,85},{26,63,1},{31,60,145},{26,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{28,0,218},{28,0,218},{28,0,218},{28,0,218},{27,63,32}, +{27,63,32},{27,63,32},{27,61,17},{24,63,26},{24,63,26},{29,63,418},{29,63,354},{29,63,318},{29,63,254},{29,63,370},{28,63,223},{28,63,142},{28,63,25},{28,63,358},{26,63,58},{30,63,51},{30,63,35},{30,63,26},{30,63,10},{31,61,54},{30,63,34},{30,63,25},{28,63,0},{31,62,54},{28,63,0},{29,63,318},{29,63,318},{29,63,318},{29,63,254},{29,63,270},{28,63,142},{28,63,142}, +{28,63,25},{27,63,249},{26,63,58},{30,63,26},{30,63,26},{30,63,26},{30,63,10},{31,60,41},{30,63,25},{30,63,25},{28,63,0},{31,61,41},{28,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{29,0,218},{29,0,218},{29,0,218},{29,0,218},{28,63,61},{28,63,61},{28,63,61},{28,63,25},{26,63,58}, +{26,63,58},{0,18,421},{0,15,106},{0,10,8},{0,9,117},{0,12,925},{0,9,650},{0,9,286},{0,6,670},{0,7,1030},{0,5,726},{0,18,421},{0,15,106},{0,10,8},{0,9,117},{3,2,925},{0,9,650},{0,9,286},{0,6,670},{6,0,925},{0,6,670},{0,9,0},{0,9,0},{0,9,0},{0,5,1},{0,4,85},{0,4,45},{0,4,45},{0,2,50},{0,2,98},{0,2,59},{0,9,0}, +{0,9,0},{0,9,0},{0,5,1},{1,1,85},{0,4,45},{0,4,45},{0,2,50},{2,0,85},{0,2,50},{5,1,421},{0,15,106},{0,10,8},{0,9,117},{5,1,421},{9,0,421},{0,9,117},{0,7,421},{9,0,421},{0,7,421},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,24,421},{0,19,53},{0,13,8}, +{0,11,72},{0,16,1261},{0,12,805},{0,11,328},{0,7,822},{0,9,1438},{0,7,922},{0,24,421},{0,19,53},{0,13,8},{0,11,72},{3,6,1261},{0,12,805},{0,11,328},{0,7,822},{8,0,1261},{0,7,822},{0,14,0},{0,14,0},{0,14,0},{0,8,1},{0,7,221},{0,7,116},{0,7,116},{0,4,125},{0,4,257},{0,3,146},{0,14,0},{0,14,0},{0,14,0},{0,8,1},{2,0,221}, +{0,7,116},{0,7,116},{0,4,125},{3,1,221},{0,4,125},{7,0,421},{0,19,53},{1,12,8},{0,11,72},{7,0,421},{11,1,421},{0,11,72},{0,9,421},{11,1,421},{0,9,421},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,29,430},{0,22,34},{0,15,62},{0,14,49},{0,20,1514},{0,15,866},{0,13,301}, +{0,9,894},{0,11,1797},{0,9,1063},{0,29,430},{0,22,34},{1,15,33},{0,14,49},{6,0,1514},{0,15,866},{0,13,301},{0,9,894},{10,0,1514},{0,9,894},{0,19,10},{0,19,10},{0,19,10},{0,12,10},{0,11,338},{0,9,149},{0,9,149},{0,5,162},{0,5,419},{0,5,211},{0,19,10},{0,19,10},{0,19,10},{0,12,10},{3,1,338},{0,9,149},{0,9,149},{0,5,162},{3,3,338}, +{0,5,162},{8,2,421},{0,22,25},{2,14,8},{0,14,40},{8,2,421},{14,0,421},{0,14,40},{0,11,421},{14,0,421},{0,11,421},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,2,0},{0,2,0},{0,2,0},{0,1,1},{0,1,2},{0,1,2},{1,31,494},{1,24,98},{1,17,131},{1,16,110},{0,25,1517},{0,19,734},{0,15,157},{0,11,789},{0,14,1982},{0,11,1045},{1,31,430}, +{1,24,34},{2,16,35},{1,16,46},{7,2,1514},{0,19,734},{0,15,157},{0,11,789},{11,2,1514},{0,11,789},{1,21,74},{1,21,74},{1,21,74},{1,14,74},{0,16,338},{0,13,89},{0,13,89},{0,8,106},{0,8,514},{0,7,217},{1,21,10},{1,21,10},{1,21,10},{1,14,10},{3,6,338},{0,13,89},{0,13,89},{0,8,106},{8,0,338},{0,8,106},{10,0,421},{0,26,5},{3,16,5}, +{0,16,10},{10,0,421},{17,0,421},{0,16,10},{0,13,421},{17,0,421},{0,13,421},{1,0,73},{1,0,73},{1,0,73},{1,0,73},{0,7,1},{0,7,1},{0,7,1},{0,4,1},{0,3,32},{0,3,32},{1,37,629},{1,28,213},{2,19,340},{1,18,216},{0,31,1517},{0,23,629},{0,18,54},{0,14,686},{0,17,2187},{0,13,1070},{3,30,437},{2,27,33},{3,19,29},{2,18,45},{9,1,1514}, +{0,23,629},{0,18,54},{0,14,686},{13,3,1514},{0,14,686},{1,27,209},{1,27,209},{1,27,209},{1,17,208},{0,22,338},{0,17,34},{0,17,34},{0,11,53},{0,11,666},{0,9,273},{3,21,16},{3,21,16},{3,21,16},{3,15,16},{6,2,338},{0,17,34},{0,17,34},{0,11,53},{11,0,338},{0,11,53},{12,0,421},{0,30,1},{4,18,8},{0,18,5},{12,0,421},{20,0,421},{0,18,5}, +{0,15,421},{20,0,421},{0,15,421},{1,0,208},{1,0,208},{1,0,208},{1,0,208},{0,13,1},{0,13,1},{0,13,1},{0,8,0},{0,6,106},{0,6,106},{2,39,821},{2,30,405},{2,22,557},{2,20,408},{0,36,1517},{0,25,562},{0,21,14},{0,16,589},{0,19,2445},{0,15,1130},{3,35,437},{3,29,33},{4,21,33},{3,20,45},{10,3,1514},{0,25,562},{0,21,14},{0,16,589},{18,0,1514}, +{0,16,589},{2,29,401},{2,29,401},{2,29,401},{2,19,400},{0,27,340},{0,21,10},{0,21,10},{0,13,20},{0,14,851},{0,11,357},{3,26,16},{3,26,16},{3,26,16},{3,18,16},{8,0,338},{0,21,10},{0,21,10},{0,13,20},{11,3,338},{0,13,20},{13,1,421},{1,32,2},{5,20,8},{1,20,5},{13,1,421},{21,2,421},{1,20,5},{0,17,421},{21,2,421},{0,17,421},{2,0,400}, +{2,0,400},{2,0,400},{2,0,400},{0,19,0},{0,19,0},{0,19,0},{0,11,1},{0,9,205},{0,9,205},{3,41,854},{3,32,435},{3,24,590},{3,22,441},{1,38,1518},{0,29,543},{1,23,15},{0,18,575},{0,23,2318},{0,17,906},{4,37,430},{4,30,34},{5,23,33},{4,22,49},{12,1,1514},{0,29,494},{1,23,14},{0,18,526},{19,2,1514},{0,18,526},{3,31,434},{3,31,434},{3,31,434}, +{3,21,433},{1,29,341},{1,23,11},{1,23,11},{1,15,21},{0,17,734},{0,15,230},{4,27,10},{4,27,10},{4,27,10},{4,20,10},{9,2,338},{0,25,2},{0,25,2},{0,15,5},{16,0,338},{0,15,5},{15,0,421},{3,32,2},{6,22,8},{2,22,5},{15,0,421},{25,0,421},{2,22,5},{0,19,421},{25,0,421},{0,19,421},{3,0,433},{3,0,433},{3,0,433},{3,0,433},{1,21,1}, +{1,21,1},{1,21,1},{1,13,2},{0,12,157},{0,12,157},{4,42,866},{4,34,450},{4,26,581},{4,24,458},{2,40,1518},{1,31,543},{2,25,15},{1,20,575},{0,27,2166},{0,20,706},{5,39,430},{5,32,34},{6,25,33},{5,24,49},{14,0,1514},{0,32,461},{2,25,14},{0,20,481},{20,4,1514},{0,20,481},{4,33,445},{4,33,445},{4,33,445},{4,23,445},{2,31,341},{2,25,11},{2,25,11}, +{2,17,27},{0,21,626},{0,17,102},{5,29,10},{5,29,10},{5,29,10},{5,22,10},{11,1,338},{1,27,2},{1,27,2},{0,17,2},{17,2,338},{0,17,2},{16,1,421},{3,36,2},{7,24,8},{3,24,5},{16,1,421},{27,1,421},{3,24,5},{0,21,421},{27,1,421},{0,21,421},{4,0,445},{4,0,445},{4,0,445},{4,0,445},{2,23,1},{2,23,1},{2,23,1},{2,15,2},{0,16,97}, +{0,16,97},{5,45,854},{5,36,438},{5,28,579},{5,26,446},{3,43,1514},{3,32,545},{3,27,9},{3,22,582},{0,31,2010},{0,22,546},{7,38,437},{6,35,33},{7,27,26},{6,26,50},{14,6,1514},{0,37,430},{3,27,9},{0,23,446},{26,0,1514},{0,23,446},{5,35,434},{5,35,434},{5,35,434},{5,25,434},{3,34,338},{3,27,8},{3,27,8},{3,19,20},{0,25,525},{0,19,45},{7,29,16}, +{7,29,16},{7,29,16},{7,23,16},{13,0,338},{2,29,0},{2,29,0},{1,20,4},{20,2,338},{1,20,4},{18,1,421},{4,38,1},{8,26,10},{4,27,2},{18,1,421},{29,2,421},{4,27,2},{0,23,421},{29,2,421},{0,23,421},{5,0,433},{5,0,433},{5,0,433},{5,0,433},{3,25,1},{3,25,1},{3,25,1},{3,17,1},{0,19,41},{0,19,41},{6,47,854},{6,38,438},{6,30,579}, +{6,28,446},{4,45,1515},{3,36,553},{4,29,15},{3,24,589},{0,34,1887},{0,25,450},{7,44,437},{7,37,33},{8,29,30},{7,28,50},{16,4,1514},{0,40,422},{4,29,14},{0,25,425},{27,2,1514},{0,25,425},{6,37,434},{6,37,434},{6,37,434},{6,27,434},{4,35,341},{4,29,14},{4,29,14},{4,21,21},{0,29,461},{0,22,41},{7,34,16},{7,34,16},{7,34,16},{7,26,16},{14,2,338}, +{3,31,0},{3,31,0},{2,22,4},{24,0,338},{2,22,4},{19,3,421},{5,40,1},{8,29,5},{5,29,2},{19,3,421},{30,4,421},{5,29,2},{0,25,421},{30,4,421},{0,25,421},{6,0,433},{6,0,433},{6,0,433},{6,0,433},{4,27,1},{4,27,1},{4,27,1},{4,19,2},{0,23,13},{0,23,13},{7,49,854},{7,40,438},{7,32,590},{7,30,446},{5,47,1515},{4,37,543},{5,31,15}, +{4,26,591},{0,36,1785},{0,27,422},{8,45,430},{8,38,34},{9,31,30},{8,30,43},{17,6,1514},{1,42,422},{5,31,14},{0,27,422},{28,4,1514},{0,27,422},{7,39,434},{7,39,434},{7,39,434},{7,29,434},{5,37,341},{5,31,14},{5,31,14},{5,23,21},{0,32,404},{1,24,41},{8,35,10},{8,35,10},{8,35,10},{8,28,9},{16,0,338},{4,33,2},{4,33,2},{3,24,4},{26,1,338}, +{3,24,4},{21,1,421},{6,42,1},{9,31,5},{6,31,2},{21,1,421},{31,6,421},{6,31,2},{0,27,421},{31,6,421},{0,27,421},{7,0,433},{7,0,433},{7,0,433},{7,0,433},{5,29,1},{5,29,1},{5,29,1},{5,21,2},{0,27,1},{0,27,1},{8,50,866},{8,42,450},{8,34,581},{8,32,458},{6,48,1518},{5,39,543},{6,33,15},{5,28,591},{0,40,1685},{1,29,422},{9,47,430}, +{9,40,34},{10,33,33},{9,32,49},{20,1,1514},{2,44,422},{6,33,14},{1,29,422},{29,6,1514},{1,29,422},{8,41,445},{8,41,445},{8,41,445},{8,31,446},{6,39,341},{6,33,11},{6,33,11},{6,25,21},{0,35,371},{2,26,41},{9,37,10},{9,37,10},{9,37,10},{9,30,9},{17,2,338},{5,35,2},{5,35,2},{5,25,5},{27,3,338},{5,25,5},{23,0,421},{7,44,1},{11,32,8}, +{7,32,5},{23,0,421},{31,9,421},{7,32,5},{0,29,421},{31,9,421},{0,29,421},{8,0,445},{8,0,445},{8,0,445},{8,0,445},{6,31,1},{6,31,1},{6,31,1},{6,23,2},{1,29,1},{1,29,1},{9,53,854},{9,45,437},{9,36,579},{9,34,446},{7,51,1514},{7,41,546},{7,35,9},{6,31,589},{0,44,1607},{2,31,430},{11,46,437},{10,43,36},{11,35,26},{10,34,50},{22,0,1514}, +{3,47,421},{7,35,9},{3,31,426},{31,7,1514},{3,31,426},{9,43,434},{9,43,434},{9,43,434},{9,33,434},{7,42,338},{7,35,8},{7,35,8},{7,27,18},{0,39,344},{3,29,42},{11,37,16},{11,37,16},{11,37,16},{10,32,16},{19,1,338},{6,37,0},{6,37,0},{5,28,1},{29,4,338},{5,28,1},{24,2,421},{8,46,1},{12,34,10},{8,35,2},{24,2,421},{30,14,421},{8,35,2}, +{0,31,425},{30,14,421},{0,31,425},{9,0,433},{9,0,433},{9,0,433},{9,0,433},{7,33,1},{7,33,1},{7,33,1},{7,26,1},{2,32,2},{2,32,2},{10,55,854},{10,47,437},{10,38,579},{10,36,446},{8,53,1515},{7,43,561},{8,37,15},{7,32,589},{0,47,1577},{3,33,425},{11,52,437},{11,45,36},{12,37,30},{11,36,50},{23,2,1514},{4,48,422},{8,37,14},{3,33,425},{28,14,1514}, +{3,33,425},{10,45,434},{10,45,434},{10,45,434},{10,35,434},{8,43,341},{8,37,14},{8,37,14},{8,29,19},{0,43,340},{4,30,38},{11,42,16},{11,42,16},{11,42,16},{11,34,16},{20,3,338},{7,39,0},{7,39,0},{6,30,1},{30,6,338},{6,30,1},{26,1,421},{9,48,1},{12,37,5},{9,37,2},{26,1,421},{30,17,421},{9,37,2},{0,33,421},{30,17,421},{0,33,421},{10,0,433}, +{10,0,433},{10,0,433},{10,0,433},{8,35,1},{8,35,1},{8,35,1},{8,28,2},{3,34,2},{3,34,2},{11,57,854},{11,48,438},{11,40,579},{11,38,446},{9,55,1515},{8,46,555},{9,39,15},{8,34,591},{0,51,1530},{4,35,422},{12,53,430},{12,46,34},{13,39,30},{12,38,43},{24,4,1514},{5,50,422},{9,39,14},{4,35,422},{28,17,1514},{4,35,422},{11,47,434},{11,47,434},{11,47,434}, +{11,37,434},{9,45,341},{9,39,14},{9,39,14},{9,31,19},{1,45,340},{5,32,41},{12,44,9},{12,44,9},{12,44,9},{12,36,9},{21,5,338},{8,41,1},{8,41,1},{7,32,4},{31,8,338},{7,32,4},{27,3,421},{10,50,1},{13,39,5},{10,39,2},{27,3,421},{31,19,421},{10,39,2},{0,35,421},{31,19,421},{0,35,421},{11,0,433},{11,0,433},{11,0,433},{11,0,433},{9,37,1}, +{9,37,1},{9,37,1},{9,30,2},{4,35,1},{4,35,1},{12,58,866},{12,50,450},{12,41,590},{12,40,458},{10,57,1515},{9,48,555},{10,41,15},{9,36,591},{0,54,1518},{5,37,422},{13,55,430},{13,48,34},{14,41,30},{13,40,43},{25,6,1514},{6,52,422},{10,41,14},{5,37,422},{29,19,1514},{5,37,422},{12,49,445},{12,49,445},{12,49,445},{12,39,446},{10,47,341},{10,41,14},{10,41,14}, +{10,33,21},{2,47,340},{6,34,41},{13,46,9},{13,46,9},{13,46,9},{13,38,9},{24,0,338},{9,43,1},{9,43,1},{9,33,5},{31,11,338},{9,33,5},{29,1,421},{11,52,1},{14,41,5},{11,41,2},{29,1,421},{31,22,421},{11,41,2},{0,37,421},{31,22,421},{0,37,421},{12,0,445},{12,0,445},{12,0,445},{12,0,445},{10,39,1},{10,39,1},{10,39,1},{10,31,5},{5,37,1}, +{5,37,1},{13,61,854},{13,53,437},{13,44,593},{13,43,442},{11,59,1517},{11,49,546},{11,43,13},{10,39,589},{1,57,1518},{6,39,430},{15,54,437},{14,51,36},{15,43,25},{14,42,48},{27,5,1514},{7,55,421},{11,43,13},{7,39,426},{31,20,1514},{7,39,426},{13,51,434},{13,51,434},{13,51,434},{13,42,434},{11,50,338},{11,43,13},{11,43,13},{11,35,18},{4,47,341},{7,37,42},{15,45,16}, +{15,45,16},{15,45,16},{15,39,17},{24,6,338},{10,45,1},{10,45,1},{9,36,1},{29,17,338},{9,36,1},{31,0,421},{12,54,1},{16,43,4},{12,43,0},{31,0,421},{30,27,421},{12,43,0},{0,39,425},{30,27,421},{0,39,425},{13,0,433},{13,0,433},{13,0,433},{13,0,433},{11,41,1},{11,41,1},{11,41,1},{11,34,1},{6,40,1},{6,40,1},{14,63,854},{14,55,437},{14,46,593}, +{14,45,442},{12,61,1515},{11,51,561},{12,45,19},{11,41,589},{2,59,1518},{7,41,430},{15,60,437},{15,53,36},{16,45,34},{15,44,48},{30,0,1514},{9,55,425},{12,45,18},{8,41,426},{28,27,1514},{8,41,426},{14,53,434},{14,53,434},{14,53,434},{14,44,434},{12,51,341},{12,46,17},{12,46,17},{12,37,19},{4,51,340},{8,38,38},{15,50,16},{15,50,16},{15,50,16},{15,42,17},{27,1,338}, +{11,47,1},{11,47,1},{10,38,1},{30,19,338},{10,38,1},{31,6,421},{13,56,1},{17,45,4},{13,45,0},{31,6,421},{31,29,421},{13,45,0},{0,41,425},{31,29,421},{0,41,425},{14,0,433},{14,0,433},{14,0,433},{14,0,433},{12,43,1},{12,43,1},{12,43,1},{12,36,2},{7,42,1},{7,42,1},{15,63,878},{15,57,437},{15,48,579},{15,47,442},{13,63,1515},{12,54,555},{13,47,19}, +{12,43,574},{3,61,1518},{8,43,429},{16,61,430},{16,54,34},{17,47,34},{16,46,41},{31,2,1514},{10,57,425},{13,47,18},{9,43,426},{29,29,1514},{9,43,426},{15,55,434},{15,55,434},{15,55,434},{15,46,434},{13,53,341},{13,47,19},{13,47,19},{13,39,19},{5,53,340},{9,40,38},{16,52,9},{16,52,9},{16,52,9},{16,44,10},{28,3,338},{12,49,1},{12,49,1},{11,40,1},{31,21,338}, +{11,40,1},{31,11,421},{14,58,1},{18,47,4},{14,47,0},{31,11,421},{31,32,421},{14,47,0},{0,43,425},{31,32,421},{0,43,425},{15,0,433},{15,0,433},{15,0,433},{15,0,433},{13,45,1},{13,45,1},{13,45,1},{13,38,2},{8,43,4},{8,43,4},{16,63,926},{16,58,450},{16,49,590},{16,48,458},{14,63,1542},{13,56,555},{14,49,15},{13,45,574},{4,62,1517},{9,45,429},{17,63,430}, +{17,56,34},{18,49,30},{17,48,43},{30,11,1514},{11,59,425},{14,49,14},{10,45,426},{30,31,1514},{10,45,426},{16,57,445},{16,57,445},{16,57,445},{16,47,446},{14,55,341},{14,49,14},{14,49,14},{14,41,19},{6,55,340},{10,42,38},{17,54,9},{17,54,9},{17,54,9},{17,46,10},{29,5,338},{13,51,1},{13,51,1},{12,42,1},{31,24,338},{12,42,1},{31,16,421},{15,60,1},{18,49,5}, +{15,49,2},{31,16,421},{31,35,421},{15,49,2},{0,45,425},{31,35,421},{0,45,425},{16,0,445},{16,0,445},{16,0,445},{16,0,445},{14,47,1},{14,47,1},{14,47,1},{14,40,2},{9,45,4},{9,45,4},{17,63,1034},{17,61,438},{17,52,593},{17,51,442},{16,63,1598},{15,57,554},{15,51,13},{15,47,577},{6,63,1535},{10,48,434},{19,63,437},{18,59,41},{19,51,25},{18,50,48},{29,20,1514}, +{11,63,422},{15,51,13},{11,47,433},{31,33,1514},{11,47,433},{17,60,433},{17,60,433},{17,60,433},{17,50,434},{15,58,338},{15,51,13},{15,51,13},{15,43,20},{7,57,339},{12,44,41},{19,53,16},{19,53,16},{19,53,16},{19,47,17},{31,4,338},{14,53,1},{14,53,1},{13,44,1},{30,29,338},{13,44,1},{31,22,421},{16,63,1},{20,51,4},{16,51,0},{31,22,421},{30,40,421},{16,51,0}, +{0,47,433},{30,40,421},{0,47,433},{17,0,433},{17,0,433},{17,0,433},{17,0,433},{15,49,1},{15,49,1},{15,49,1},{15,42,0},{10,48,1},{10,48,1},{18,63,1166},{18,63,438},{18,54,593},{18,53,442},{17,63,1643},{15,60,561},{16,53,19},{15,49,589},{8,63,1566},{11,49,430},{20,63,458},{19,61,41},{20,53,34},{19,52,48},{30,22,1514},{13,63,429},{16,53,18},{12,49,426},{28,40,1514}, +{12,49,426},{18,62,433},{18,62,433},{18,62,433},{18,52,434},{16,60,339},{16,54,17},{16,54,17},{16,45,26},{9,57,341},{13,46,41},{19,58,17},{19,58,17},{19,58,17},{19,50,17},{30,13,338},{15,55,1},{15,55,1},{14,46,1},{31,31,338},{14,46,1},{31,27,421},{18,63,5},{21,53,4},{17,53,0},{31,27,421},{31,42,421},{17,53,0},{0,49,425},{31,42,421},{0,49,425},{18,0,433}, +{18,0,433},{18,0,433},{18,0,433},{16,51,1},{16,51,1},{16,51,1},{16,44,1},{11,50,1},{11,50,1},{20,63,1326},{19,63,470},{19,56,593},{19,55,442},{18,63,1742},{16,62,546},{17,55,19},{16,51,574},{10,63,1638},{12,51,429},{21,63,506},{20,63,29},{21,55,34},{20,54,41},{31,24,1514},{15,63,461},{17,55,18},{13,51,426},{29,42,1514},{13,51,426},{19,63,434},{19,63,434},{19,63,434}, +{19,54,434},{17,62,339},{17,56,17},{17,56,17},{17,47,26},{10,59,341},{13,48,38},{20,60,9},{20,60,9},{20,60,9},{20,52,10},{31,15,338},{16,57,2},{16,57,2},{15,48,1},{31,34,338},{15,48,1},{31,32,421},{20,63,20},{22,55,4},{18,55,0},{31,32,421},{31,45,421},{18,55,0},{0,51,425},{31,45,421},{0,51,425},{19,0,433},{19,0,433},{19,0,433},{19,0,433},{17,53,1}, +{17,53,1},{17,53,1},{17,46,1},{12,51,4},{12,51,4},{21,63,1470},{20,63,561},{20,58,582},{20,57,461},{19,63,1895},{18,62,562},{18,57,19},{17,53,574},{12,63,1761},{13,53,429},{22,63,590},{21,63,59},{22,57,34},{21,56,41},{31,29,1514},{17,63,530},{18,57,18},{14,53,426},{30,44,1514},{14,53,426},{20,63,461},{20,63,461},{20,63,461},{20,55,446},{18,63,341},{18,58,17},{18,58,17}, +{18,49,19},{11,61,341},{14,50,38},{21,62,9},{21,62,9},{21,62,9},{21,54,10},{31,20,338},{17,59,2},{17,59,2},{16,50,1},{31,37,338},{16,50,1},{31,38,421},{21,63,50},{23,57,4},{19,57,0},{31,38,421},{31,48,421},{19,57,0},{0,53,425},{31,48,421},{0,53,425},{20,0,445},{20,0,445},{20,0,445},{20,0,445},{18,55,1},{18,55,1},{18,55,1},{18,48,2},{13,53,4}, +{13,53,4},{22,63,1674},{21,63,753},{21,60,586},{21,59,443},{21,63,2046},{19,63,629},{19,60,19},{19,55,577},{15,63,1917},{14,56,426},{24,63,674},{23,63,120},{23,60,29},{22,58,50},{31,35,1514},{19,63,629},{19,60,19},{14,56,425},{27,51,1514},{14,56,425},{21,63,497},{21,63,497},{21,63,497},{21,58,433},{19,63,388},{19,60,10},{19,60,10},{19,51,20},{12,63,347},{16,52,41},{23,61,17}, +{23,61,17},{23,61,17},{23,55,17},{31,26,338},{18,62,2},{18,62,2},{17,52,1},{30,42,338},{17,52,1},{31,44,421},{23,63,104},{24,59,5},{20,59,1},{31,44,421},{31,51,421},{20,59,1},{0,56,425},{31,51,421},{0,56,425},{21,0,433},{21,0,433},{21,0,433},{21,0,433},{19,58,0},{19,58,0},{19,58,0},{19,50,0},{14,56,1},{14,56,1},{23,63,1902},{22,63,995},{22,62,586}, +{22,61,443},{22,63,2235},{20,63,759},{20,61,15},{19,57,578},{17,63,2118},{15,58,426},{25,63,770},{24,63,250},{24,61,35},{23,60,50},{31,40,1514},{21,63,701},{20,61,14},{15,58,425},{31,49,1514},{15,58,425},{22,63,554},{22,63,554},{22,63,554},{22,60,433},{21,63,437},{20,61,14},{20,61,14},{20,53,26},{14,63,379},{17,54,41},{24,63,25},{24,63,25},{24,63,25},{23,59,17},{31,31,338}, +{19,63,4},{19,63,4},{18,54,1},{31,44,338},{18,54,1},{31,49,421},{25,63,169},{25,61,5},{21,61,1},{31,49,421},{30,56,421},{21,61,1},{0,58,425},{30,56,421},{0,58,425},{22,0,433},{22,0,433},{22,0,433},{22,0,433},{20,59,2},{20,59,2},{20,59,2},{20,52,1},{15,58,1},{15,58,1},{24,63,2045},{24,63,1233},{23,63,629},{23,63,442},{24,63,2360},{22,63,914},{21,63,14}, +{20,59,549},{19,63,2241},{16,60,401},{26,63,849},{25,63,395},{25,63,34},{24,62,41},{29,52,1459},{23,63,778},{21,63,13},{17,59,400},{28,56,1459},{17,59,400},{23,63,629},{23,63,629},{23,63,629},{23,62,433},{22,63,491},{21,63,14},{21,63,14},{21,55,26},{16,63,446},{18,56,41},{25,63,34},{25,63,34},{25,63,34},{24,60,10},{31,36,338},{21,63,13},{21,63,13},{19,56,1},{31,47,338}, +{19,56,1},{31,54,392},{27,63,218},{26,63,4},{22,63,0},{31,54,392},{29,60,392},{22,63,0},{0,59,400},{29,60,392},{0,59,400},{23,0,433},{23,0,433},{23,0,433},{23,0,433},{21,61,2},{21,61,2},{21,61,2},{21,54,1},{16,60,1},{16,60,1},{25,63,1767},{25,63,1167},{24,63,701},{24,63,449},{24,63,1976},{23,63,747},{22,63,66},{21,60,306},{20,63,1820},{17,61,217},{27,63,611}, +{26,63,317},{26,63,61},{25,63,10},{30,52,1064},{25,63,587},{23,63,41},{18,61,208},{28,58,1064},{18,61,208},{24,63,701},{24,63,701},{24,63,701},{24,63,449},{23,63,581},{22,63,66},{22,63,66},{22,57,26},{18,63,530},{19,58,41},{26,63,61},{26,63,61},{26,63,61},{25,62,10},{30,45,338},{23,63,41},{23,63,41},{20,58,1},{31,50,338},{20,58,1},{30,60,200},{28,63,106},{27,63,1}, +{25,63,1},{30,60,200},{31,59,200},{25,63,1},{0,60,208},{31,59,200},{0,60,208},{24,0,445},{24,0,445},{24,0,445},{24,0,445},{22,63,2},{22,63,2},{22,63,2},{22,56,1},{17,62,1},{17,62,1},{26,63,1542},{26,63,1122},{25,63,833},{25,63,497},{26,63,1647},{24,63,687},{24,63,203},{23,61,122},{22,63,1515},{19,62,78},{28,63,410},{28,63,266},{27,63,116},{27,63,20},{30,56,722}, +{26,63,402},{25,63,100},{20,62,65},{30,58,722},{20,62,65},{25,63,833},{25,63,833},{25,63,833},{25,63,497},{24,63,707},{24,63,203},{24,63,203},{23,60,17},{20,63,619},{19,61,46},{27,63,116},{27,63,116},{27,63,116},{27,63,20},{29,54,338},{25,63,100},{25,63,100},{22,60,2},{29,56,338},{22,60,2},{31,59,61},{29,63,37},{29,63,1},{27,63,4},{31,59,61},{31,61,61},{27,63,4}, +{0,62,65},{31,61,61},{0,62,65},{25,0,433},{25,0,433},{25,0,433},{25,0,433},{23,63,25},{23,63,25},{23,63,25},{23,58,1},{19,62,13},{19,62,13},{27,63,1406},{27,63,1134},{26,63,962},{26,63,602},{27,63,1454},{25,63,702},{25,63,341},{24,62,43},{24,63,1378},{20,63,35},{29,63,318},{28,63,250},{28,63,169},{28,63,61},{31,56,509},{28,63,313},{27,63,164},{22,63,10},{30,60,509}, +{22,63,10},{26,63,962},{26,63,962},{26,63,962},{26,63,602},{26,63,827},{25,63,341},{25,63,341},{24,62,27},{22,63,747},{20,63,35},{28,63,169},{28,63,169},{28,63,169},{28,63,61},{30,56,338},{27,63,164},{27,63,164},{23,62,2},{30,58,338},{23,62,2},{31,62,5},{31,63,9},{30,63,4},{30,63,0},{31,62,5},{31,62,9},{30,63,0},{0,63,9},{31,62,9},{0,63,9},{26,0,433}, +{26,0,433},{26,0,433},{26,0,433},{24,63,50},{24,63,50},{24,63,50},{24,60,1},{20,63,26},{20,63,26},{28,63,1135},{28,63,991},{27,63,874},{27,63,602},{28,63,1162},{26,63,618},{26,63,362},{25,63,5},{25,63,1087},{22,63,58},{30,63,219},{29,63,161},{29,63,125},{29,63,61},{30,62,294},{29,63,193},{28,63,117},{24,63,1},{31,60,297},{24,63,1},{27,63,874},{27,63,874},{27,63,874}, +{27,63,602},{27,63,730},{26,63,362},{26,63,362},{25,63,5},{24,63,681},{22,63,58},{29,63,125},{29,63,125},{29,63,125},{29,63,61},{31,56,221},{28,63,117},{28,63,117},{24,63,1},{31,59,221},{24,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{27,0,433},{27,0,433},{27,0,433},{27,0,433},{25,63,101}, +{25,63,101},{25,63,101},{25,62,1},{22,63,58},{22,63,58},{29,63,885},{28,63,751},{28,63,670},{28,63,526},{28,63,778},{27,63,483},{27,63,314},{26,63,10},{26,63,777},{24,63,117},{30,63,75},{30,63,59},{30,63,50},{30,63,34},{31,60,114},{29,63,81},{29,63,45},{27,63,0},{31,61,114},{27,63,0},{28,63,670},{28,63,670},{28,63,670},{28,63,526},{28,63,553},{27,63,314},{27,63,314}, +{26,63,10},{25,63,518},{24,63,117},{30,63,50},{30,63,50},{30,63,50},{30,63,34},{31,59,85},{29,63,45},{29,63,45},{27,63,0},{30,62,85},{27,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{28,0,445},{28,0,445},{28,0,445},{28,0,445},{27,63,145},{27,63,145},{27,63,145},{26,63,10},{24,63,117}, +{24,63,117},{0,26,882},{0,21,218},{0,15,16},{0,13,260},{0,17,1899},{0,13,1341},{0,11,593},{0,8,1380},{0,9,2113},{0,7,1513},{0,26,882},{0,21,218},{0,15,16},{0,13,260},{4,4,1896},{0,13,1341},{0,11,593},{0,8,1380},{7,2,1896},{0,8,1380},{0,12,0},{0,12,0},{0,12,0},{0,7,1},{0,6,162},{0,5,85},{0,5,85},{0,3,85},{0,3,186},{0,3,101},{0,12,0}, +{0,12,0},{0,12,0},{0,7,1},{1,3,162},{0,5,85},{0,5,85},{0,3,85},{3,0,162},{0,3,85},{6,6,882},{0,21,218},{0,15,16},{0,13,260},{6,6,882},{12,1,882},{0,13,260},{0,10,884},{12,1,882},{0,10,884},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,31,884},{0,25,146},{0,17,2}, +{0,15,185},{0,21,2355},{0,17,1539},{0,13,653},{0,10,1605},{0,11,2667},{0,9,1777},{0,31,884},{0,25,146},{0,17,2},{0,15,185},{5,4,2355},{0,17,1539},{0,13,653},{0,10,1605},{7,4,2355},{0,10,1605},{0,17,1},{0,17,1},{0,17,1},{0,10,1},{0,9,338},{0,8,180},{0,8,180},{0,5,180},{0,5,389},{0,5,229},{0,17,1},{0,17,1},{0,17,1},{0,10,1},{2,2,338}, +{0,8,180},{0,8,180},{0,5,180},{2,3,338},{0,5,180},{9,1,882},{0,25,146},{0,17,2},{0,15,185},{9,1,882},{13,3,882},{0,15,185},{0,12,884},{13,3,882},{0,12,884},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,36,884},{0,29,90},{0,19,37},{0,17,130},{0,25,2899},{0,19,1764},{0,16,733}, +{0,11,1853},{0,13,3325},{0,11,2109},{0,36,884},{0,29,90},{0,19,37},{0,17,130},{7,1,2899},{0,19,1764},{0,16,733},{0,11,1853},{12,0,2899},{0,11,1853},{0,23,0},{0,23,0},{0,23,0},{0,14,1},{0,11,580},{0,11,305},{0,11,305},{0,6,325},{0,6,667},{0,5,389},{0,23,0},{0,23,0},{0,23,0},{0,14,1},{2,5,578},{0,11,305},{0,11,305},{0,6,325},{5,1,578}, +{0,6,325},{10,3,882},{0,29,90},{1,19,2},{0,17,130},{10,3,882},{18,0,882},{0,17,130},{0,14,884},{18,0,882},{0,14,884},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,42,918},{0,32,81},{1,21,105},{0,20,109},{0,29,3051},{0,21,1707},{0,19,569},{0,13,1800},{0,16,3672},{0,13,2161},{1,38,888}, +{0,32,81},{1,21,41},{0,20,109},{8,2,3048},{0,21,1707},{0,19,569},{0,13,1800},{12,3,3048},{0,13,1800},{0,28,36},{0,28,36},{0,28,36},{0,17,37},{0,16,648},{0,13,269},{0,13,269},{0,9,292},{0,8,824},{0,7,417},{1,25,4},{1,25,4},{1,25,4},{1,16,5},{3,6,648},{0,13,269},{0,13,269},{0,9,292},{8,0,648},{0,9,292},{12,1,882},{0,32,45},{2,21,2}, +{0,20,73},{12,1,882},{19,2,882},{0,20,73},{0,16,890},{19,2,882},{0,16,890},{0,0,36},{0,0,36},{0,0,36},{0,0,36},{0,4,0},{0,4,0},{0,4,0},{0,2,1},{0,2,10},{0,2,10},{1,44,997},{1,34,154},{1,24,229},{1,22,178},{0,35,3048},{0,25,1528},{0,21,324},{0,16,1605},{0,19,3907},{0,15,2138},{2,41,883},{2,32,86},{2,24,42},{1,22,114},{9,5,3048}, +{0,25,1528},{0,21,324},{0,16,1605},{15,3,3048},{0,16,1605},{1,31,113},{1,31,113},{1,31,113},{1,19,113},{0,22,648},{0,17,164},{0,17,164},{0,11,193},{0,11,976},{0,9,443},{2,27,2},{2,27,2},{2,27,2},{2,18,1},{6,2,648},{0,17,164},{0,17,164},{0,11,193},{11,0,648},{0,11,193},{13,4,882},{0,36,13},{3,23,4},{0,22,32},{13,4,882},{22,2,882},{0,22,32}, +{0,18,884},{22,2,882},{0,18,884},{1,0,113},{1,0,113},{1,0,113},{1,0,113},{0,10,0},{0,10,0},{0,10,0},{0,6,0},{0,5,58},{0,5,58},{1,49,1173},{1,38,302},{2,26,421},{1,24,321},{0,40,3051},{0,29,1380},{0,23,186},{0,18,1464},{0,21,4201},{0,17,2149},{3,43,883},{3,34,86},{3,26,42},{2,24,114},{12,0,3048},{0,29,1380},{0,23,186},{0,18,1464},{20,0,3048}, +{0,18,1464},{1,36,289},{1,36,289},{1,36,289},{1,22,290},{0,27,650},{0,21,100},{0,21,100},{0,13,130},{0,14,1161},{0,13,491},{3,29,2},{3,29,2},{3,29,2},{3,20,1},{8,0,648},{0,21,100},{0,21,100},{0,13,130},{11,3,648},{0,13,130},{14,6,882},{0,40,1},{4,25,1},{0,25,16},{14,6,882},{26,0,882},{0,25,16},{0,20,884},{26,0,882},{0,20,884},{1,0,289}, +{1,0,289},{1,0,289},{1,0,289},{0,15,1},{0,15,1},{0,15,1},{0,9,1},{0,7,136},{0,7,136},{2,51,1365},{2,40,494},{2,28,722},{2,26,513},{0,46,3048},{0,32,1269},{0,27,82},{0,20,1341},{0,25,4525},{0,19,2197},{4,45,886},{3,38,90},{4,28,36},{3,26,114},{13,2,3048},{0,32,1269},{0,27,82},{0,20,1341},{21,2,3048},{0,20,1341},{2,38,481},{2,38,481},{2,38,481}, +{2,24,482},{0,32,650},{0,25,52},{0,25,52},{0,15,85},{0,17,1345},{0,15,569},{4,31,4},{4,31,4},{4,31,4},{4,22,5},{9,2,648},{0,25,52},{0,25,52},{0,15,85},{16,0,648},{0,15,85},{16,4,882},{1,42,1},{5,27,1},{0,27,1},{16,4,882},{27,2,882},{0,27,1},{0,22,884},{27,2,882},{0,22,884},{2,0,481},{2,0,481},{2,0,481},{2,0,481},{0,21,0}, +{0,21,0},{0,21,0},{0,12,1},{0,9,250},{0,9,250},{2,57,1667},{2,44,786},{3,30,1042},{2,29,801},{0,51,3048},{0,36,1157},{0,29,20},{0,22,1236},{0,27,4891},{0,21,2281},{5,47,886},{4,41,86},{5,30,36},{4,28,116},{14,4,3048},{0,36,1157},{0,29,20},{0,22,1236},{25,0,3048},{0,22,1236},{2,43,785},{2,43,785},{2,43,785},{2,27,786},{0,38,648},{0,29,20},{0,29,20}, +{0,18,40},{0,19,1594},{0,16,677},{5,33,4},{5,33,4},{5,33,4},{5,24,5},{11,1,648},{0,29,20},{0,29,20},{0,18,40},{17,2,648},{0,18,40},{17,6,882},{2,44,1},{6,29,1},{1,29,1},{17,6,882},{28,4,882},{1,29,1},{0,24,884},{28,4,882},{0,24,884},{2,0,785},{2,0,785},{2,0,785},{2,0,785},{0,26,0},{0,26,0},{0,26,0},{0,16,1},{0,11,400}, +{0,11,400},{3,59,1784},{3,46,901},{4,32,1195},{3,31,910},{1,53,3055},{0,40,1094},{1,31,23},{0,25,1175},{0,31,4840},{0,23,2054},{6,49,883},{6,41,81},{6,32,42},{5,30,123},{14,10,3048},{0,40,1058},{1,31,19},{0,25,1139},{28,0,3048},{0,25,1139},{3,46,900},{3,46,900},{3,46,900},{3,30,900},{1,40,654},{1,31,22},{1,31,22},{1,20,38},{0,23,1560},{0,19,533},{6,35,2}, +{6,35,2},{6,35,2},{6,26,1},{13,0,648},{0,33,5},{0,33,5},{0,20,13},{20,2,648},{0,20,13},{19,5,882},{3,46,1},{7,31,5},{2,31,1},{19,5,882},{31,4,882},{2,31,1},{0,26,882},{31,4,882},{0,26,882},{3,0,900},{3,0,900},{3,0,900},{3,0,900},{1,29,4},{1,29,4},{1,29,4},{1,18,4},{0,15,377},{0,15,377},{4,61,1772},{4,48,891},{5,34,1195}, +{4,33,906},{2,55,3055},{1,42,1094},{2,33,29},{1,27,1175},{0,34,4609},{0,26,1716},{7,51,883},{7,43,81},{7,34,42},{6,32,114},{18,1,3048},{0,44,990},{2,33,25},{0,27,1058},{29,2,3048},{0,27,1058},{4,47,891},{4,47,891},{4,47,891},{4,32,891},{2,42,654},{2,33,29},{2,33,29},{2,22,38},{0,27,1396},{0,21,347},{7,37,2},{7,37,2},{7,37,2},{7,28,1},{14,2,648}, +{0,37,1},{0,37,1},{0,23,5},{24,0,648},{0,23,5},{22,0,882},{4,48,1},{8,33,1},{3,33,0},{22,0,882},{31,7,882},{3,33,0},{0,28,882},{31,7,882},{0,28,882},{4,0,890},{4,0,890},{4,0,890},{4,0,890},{2,31,4},{2,31,4},{2,31,4},{2,20,4},{0,19,260},{0,19,260},{5,63,1772},{5,50,891},{6,36,1195},{5,35,906},{3,57,3055},{2,44,1094},{3,35,29}, +{2,29,1175},{0,38,4381},{0,29,1436},{8,53,886},{7,47,89},{8,36,36},{7,34,114},{19,3,3048},{0,47,949},{3,35,25},{0,29,995},{30,4,3048},{0,29,995},{5,49,890},{5,49,890},{5,49,890},{5,33,891},{3,44,654},{3,35,29},{3,35,29},{3,24,38},{0,29,1251},{0,24,221},{8,39,4},{8,39,4},{8,39,4},{8,30,4},{16,0,648},{1,39,1},{1,39,1},{0,25,2},{26,1,648}, +{0,25,2},{23,2,882},{5,50,1},{9,35,1},{4,35,1},{23,2,882},{28,14,882},{4,35,1},{0,30,882},{28,14,882},{0,30,882},{5,0,890},{5,0,890},{5,0,890},{5,0,890},{3,32,5},{3,32,5},{3,32,5},{3,22,4},{0,23,180},{0,23,180},{6,63,1790},{6,52,891},{7,38,1195},{6,37,906},{4,59,3057},{3,46,1094},{4,37,29},{3,31,1175},{0,42,4185},{0,31,1206},{9,55,886}, +{8,49,86},{9,38,36},{8,36,116},{20,5,3048},{0,51,907},{4,37,20},{0,31,950},{31,6,3048},{0,31,950},{6,51,890},{6,51,890},{6,51,890},{6,35,891},{4,46,657},{4,37,29},{4,37,29},{3,26,49},{0,34,1121},{0,27,117},{9,41,4},{9,41,4},{9,41,4},{9,32,5},{17,2,648},{2,41,1},{2,41,1},{1,27,2},{27,3,648},{1,27,2},{24,4,882},{6,52,1},{10,37,1}, +{5,37,1},{24,4,882},{28,17,882},{5,37,1},{0,32,884},{28,17,882},{0,32,884},{6,0,890},{6,0,890},{6,0,890},{6,0,890},{4,34,9},{4,34,9},{4,34,9},{4,24,10},{0,27,116},{0,27,116},{8,63,1844},{7,54,901},{8,40,1188},{7,39,910},{5,62,3052},{4,48,1094},{5,39,23},{4,33,1175},{0,44,3969},{0,33,1039},{10,57,885},{10,49,81},{10,40,35},{9,38,123},{23,1,3048}, +{0,55,888},{5,39,19},{0,34,907},{29,12,3048},{0,34,907},{7,54,900},{7,54,900},{7,54,900},{7,38,900},{5,48,654},{5,39,22},{5,39,22},{5,28,45},{0,38,990},{0,29,80},{10,43,2},{10,43,2},{10,43,2},{10,34,1},{19,1,648},{3,43,1},{3,43,1},{3,29,1},{29,4,648},{3,29,1},{27,0,882},{7,54,1},{11,39,5},{6,39,1},{27,0,882},{31,17,882},{6,39,1}, +{0,34,882},{31,17,882},{0,34,882},{7,0,900},{7,0,900},{7,0,900},{7,0,900},{5,37,4},{5,37,4},{5,37,4},{5,26,4},{0,31,58},{0,31,58},{9,63,1886},{8,56,892},{9,42,1188},{8,41,900},{6,63,3055},{5,50,1094},{6,41,23},{5,35,1175},{0,49,3804},{0,36,935},{11,59,885},{11,51,81},{11,42,35},{10,40,123},{23,6,3048},{0,58,883},{6,41,19},{0,36,886},{30,14,3048}, +{0,36,886},{8,55,891},{8,55,891},{8,55,891},{8,40,891},{6,50,654},{6,41,22},{6,41,22},{6,30,45},{0,40,889},{1,31,80},{11,45,2},{11,45,2},{11,45,2},{11,36,1},{20,3,648},{5,43,2},{5,43,2},{4,31,1},{30,6,648},{4,31,1},{27,5,882},{8,56,2},{12,41,2},{7,41,1},{27,5,882},{31,20,882},{7,41,1},{0,36,882},{31,20,882},{0,36,882},{8,0,890}, +{8,0,890},{8,0,890},{8,0,890},{6,39,4},{6,39,4},{6,39,4},{6,28,4},{0,34,25},{0,34,25},{10,63,1964},{9,58,892},{10,44,1188},{9,43,900},{7,63,3100},{6,52,1094},{7,43,23},{6,37,1175},{0,51,3640},{0,38,887},{12,61,886},{11,55,89},{12,44,38},{11,42,123},{26,1,3048},{1,60,883},{7,43,19},{0,38,883},{30,17,3048},{0,38,883},{9,57,891},{9,57,891},{9,57,891}, +{9,42,891},{7,52,654},{7,43,22},{7,43,22},{7,32,38},{0,44,801},{2,34,86},{12,47,4},{12,47,4},{12,47,4},{12,38,4},{21,5,648},{6,45,2},{6,45,2},{4,33,2},{31,8,648},{4,33,2},{30,0,882},{9,58,2},{13,43,2},{8,43,1},{30,0,882},{28,27,882},{8,43,1},{0,38,882},{28,27,882},{0,38,882},{9,0,890},{9,0,890},{9,0,890},{9,0,890},{7,41,4}, +{7,41,4},{7,41,4},{7,30,4},{0,38,5},{0,38,5},{11,63,2078},{10,60,892},{11,46,1188},{10,45,900},{9,63,3181},{7,54,1094},{8,45,35},{7,39,1175},{0,55,3496},{1,40,887},{13,63,886},{12,56,88},{13,46,38},{12,44,110},{27,3,3048},{2,62,883},{7,46,25},{1,40,883},{31,19,3048},{1,40,883},{10,59,891},{10,59,891},{10,59,891},{10,44,891},{8,54,657},{8,45,34},{8,45,34}, +{7,34,49},{0,48,756},{3,36,86},{13,49,4},{13,49,4},{13,49,4},{13,40,4},{24,0,648},{6,49,1},{6,49,1},{5,35,2},{31,11,648},{5,35,2},{31,2,882},{10,60,2},{14,45,2},{9,45,1},{31,2,882},{29,29,882},{9,45,1},{0,40,882},{29,29,882},{0,40,882},{10,0,890},{10,0,890},{10,0,890},{10,0,890},{8,42,9},{8,42,9},{8,42,9},{8,32,10},{0,42,1}, +{0,42,1},{12,63,2228},{11,63,902},{12,48,1188},{11,47,908},{10,63,3256},{8,57,1095},{9,47,33},{8,41,1164},{0,59,3364},{2,42,889},{14,63,915},{14,57,90},{14,48,35},{13,47,117},{29,2,3048},{4,63,886},{8,48,25},{3,42,885},{29,25,3048},{3,42,885},{11,62,900},{11,62,900},{11,62,900},{11,46,900},{9,57,652},{9,47,29},{9,47,29},{9,36,45},{0,51,691},{4,37,80},{14,51,2}, +{14,51,2},{14,51,2},{14,42,2},{24,6,648},{7,51,1},{7,51,1},{7,37,1},{29,17,648},{7,37,1},{31,8,882},{11,63,2},{15,47,8},{10,47,5},{31,8,882},{31,30,882},{10,47,5},{0,42,884},{31,30,882},{0,42,884},{11,0,900},{11,0,900},{11,0,900},{11,0,900},{9,45,4},{9,45,4},{9,45,4},{9,34,4},{1,44,1},{1,44,1},{13,63,2414},{12,63,907},{13,50,1188}, +{12,49,900},{11,63,3391},{9,59,1095},{10,49,23},{9,43,1164},{0,63,3276},{3,44,889},{15,63,981},{15,59,90},{15,50,35},{14,48,123},{31,1,3048},{6,63,906},{10,49,19},{4,44,885},{30,27,3048},{4,44,885},{12,63,891},{12,63,891},{12,63,891},{12,48,891},{10,59,652},{10,49,22},{10,49,22},{10,38,45},{0,55,659},{5,39,80},{15,53,2},{15,53,2},{15,53,2},{15,44,2},{27,1,648}, +{9,51,2},{9,51,2},{8,39,1},{30,19,648},{8,39,1},{29,20,882},{12,63,17},{16,49,2},{11,49,1},{29,20,882},{31,33,882},{11,49,1},{0,44,884},{31,33,882},{0,44,884},{12,0,890},{12,0,890},{12,0,890},{12,0,890},{10,47,4},{10,47,4},{10,47,4},{10,36,4},{2,46,1},{2,46,1},{15,63,2606},{13,63,987},{14,52,1188},{13,51,900},{13,63,3517},{10,61,1095},{11,51,23}, +{10,45,1164},{1,63,3300},{4,46,892},{17,63,1014},{15,63,94},{16,52,38},{15,50,123},{31,6,3048},{8,63,936},{11,51,19},{5,46,885},{31,29,3048},{5,46,885},{13,63,906},{13,63,906},{13,63,906},{13,50,891},{11,61,652},{11,51,22},{11,51,22},{11,40,45},{0,59,651},{6,41,80},{16,55,4},{16,55,4},{16,55,4},{16,46,5},{28,3,648},{10,53,2},{10,53,2},{9,41,1},{31,21,648}, +{9,41,1},{30,22,882},{14,63,37},{17,51,2},{12,51,1},{30,22,882},{28,40,882},{12,51,1},{0,46,884},{28,40,882},{0,46,884},{13,0,890},{13,0,890},{13,0,890},{13,0,890},{11,49,4},{11,49,4},{11,49,4},{11,38,4},{3,48,0},{3,48,0},{16,63,2792},{15,63,1079},{15,54,1188},{14,53,900},{14,63,3652},{11,63,1095},{12,53,35},{11,47,1164},{3,63,3436},{5,48,887},{18,63,1080}, +{17,62,102},{17,54,38},{16,52,110},{30,15,3048},{10,63,996},{11,54,25},{5,48,883},{31,32,3048},{5,48,883},{14,63,939},{14,63,939},{14,63,939},{14,52,891},{12,62,657},{12,53,34},{12,53,34},{12,42,50},{1,61,651},{7,43,80},{17,57,4},{17,57,4},{17,57,4},{17,48,4},{29,5,648},{11,55,2},{11,55,2},{10,43,1},{31,24,648},{10,43,1},{31,24,882},{16,63,80},{18,53,2}, +{13,53,1},{31,24,882},{29,42,882},{13,53,1},{0,48,882},{29,42,882},{0,48,882},{14,0,890},{14,0,890},{14,0,890},{14,0,890},{12,50,9},{12,50,9},{12,50,9},{12,40,9},{4,50,1},{4,50,1},{17,63,3038},{16,63,1268},{16,57,1186},{15,55,908},{15,63,3879},{12,63,1146},{13,55,33},{12,49,1164},{5,63,3667},{6,50,889},{19,63,1205},{18,63,147},{18,56,41},{17,55,117},{31,17,3048}, +{12,63,1110},{12,56,20},{7,50,885},{29,38,3048},{7,50,885},{16,63,979},{16,63,979},{16,63,979},{15,54,900},{13,63,670},{13,55,29},{13,55,29},{13,45,41},{2,63,648},{8,46,81},{18,60,1},{18,60,1},{18,60,1},{18,50,2},{31,4,648},{11,59,4},{11,59,4},{11,45,1},{30,29,648},{11,45,1},{31,30,882},{18,63,146},{19,56,5},{14,55,5},{31,30,882},{31,43,882},{14,55,5}, +{0,50,884},{31,43,882},{0,50,884},{15,0,900},{15,0,900},{15,0,900},{15,0,900},{13,53,4},{13,53,4},{13,53,4},{13,42,5},{5,52,1},{5,52,1},{18,63,3308},{17,63,1502},{17,59,1186},{16,57,898},{17,63,4077},{14,63,1230},{14,57,33},{13,51,1164},{8,63,3820},{7,52,889},{21,63,1368},{19,63,261},{19,58,41},{18,57,117},{30,26,3048},{14,63,1226},{13,58,20},{8,52,885},{30,40,3048}, +{8,52,885},{17,63,1018},{17,63,1018},{17,63,1018},{16,56,890},{14,63,724},{14,57,29},{14,57,29},{14,47,41},{4,63,665},{9,48,88},{19,62,1},{19,62,1},{19,62,1},{19,52,2},{30,13,648},{12,61,1},{12,61,1},{12,47,1},{31,31,648},{12,47,1},{31,35,882},{20,63,193},{20,58,4},{15,57,5},{31,35,882},{27,51,882},{15,57,5},{0,52,884},{27,51,882},{0,52,884},{16,0,890}, +{16,0,890},{16,0,890},{16,0,890},{14,55,4},{14,55,4},{14,55,4},{14,44,5},{6,54,1},{6,54,1},{19,63,3614},{18,63,1804},{18,61,1186},{17,59,898},{18,63,4284},{15,63,1417},{15,59,33},{14,53,1164},{9,63,4036},{8,54,892},{22,63,1494},{20,63,405},{20,60,33},{19,59,117},{31,28,3048},{16,63,1395},{14,60,20},{9,54,885},{31,42,3048},{9,54,885},{18,63,1075},{18,63,1075},{18,63,1075}, +{17,58,890},{16,63,787},{15,59,29},{15,59,29},{15,48,45},{6,63,705},{10,49,80},{20,63,5},{20,63,5},{20,63,5},{20,54,5},{31,15,648},{13,63,1},{13,63,1},{13,49,1},{31,34,648},{13,49,1},{31,40,882},{22,63,277},{21,60,4},{16,59,5},{31,40,882},{31,49,882},{16,59,5},{0,54,884},{31,49,882},{0,54,884},{17,0,890},{17,0,890},{17,0,890},{17,0,890},{15,57,4}, +{15,57,4},{15,57,4},{15,46,5},{7,56,1},{7,56,1},{20,63,4014},{19,63,2174},{19,63,1186},{18,61,898},{19,63,4545},{17,63,1725},{16,62,39},{15,55,1164},{11,63,4300},{9,56,892},{23,63,1656},{22,63,585},{21,62,33},{20,60,108},{31,33,3048},{18,63,1563},{15,62,20},{10,56,885},{31,45,3048},{10,56,885},{19,63,1150},{19,63,1150},{19,63,1150},{18,60,890},{17,63,841},{16,62,35},{16,62,35}, +{16,50,50},{8,63,747},{11,51,80},{21,63,20},{21,63,20},{21,63,20},{21,56,5},{31,20,648},{15,63,5},{15,63,5},{14,51,1},{31,37,648},{14,51,1},{29,52,882},{23,63,397},{22,62,4},{17,61,5},{29,52,882},{28,56,882},{17,61,5},{0,56,884},{28,56,882},{0,56,884},{18,0,890},{18,0,890},{18,0,890},{18,0,890},{16,58,10},{16,58,10},{16,58,10},{16,48,9},{8,58,0}, +{8,58,0},{22,63,4123},{21,63,2404},{20,63,1278},{19,63,901},{20,63,4626},{18,63,1849},{17,63,38},{16,57,1006},{14,63,4330},{10,58,771},{24,63,1629},{23,63,715},{22,63,65},{22,62,81},{31,38,2814},{20,63,1505},{17,63,34},{11,58,761},{31,48,2814},{11,58,761},{20,63,1278},{20,63,1278},{20,63,1278},{19,62,901},{18,63,948},{17,63,38},{17,63,38},{17,53,41},{10,63,840},{12,54,81},{22,63,65}, +{22,63,65},{22,63,65},{22,59,2},{31,26,648},{17,63,34},{17,63,34},{15,53,1},{30,42,648},{15,53,1},{31,50,761},{25,63,425},{23,63,9},{19,63,1},{31,50,761},{31,55,761},{19,63,1},{0,58,761},{31,55,761},{0,58,761},{19,0,900},{19,0,900},{19,0,900},{19,0,900},{17,61,4},{17,61,4},{17,61,4},{17,50,5},{9,60,2},{9,60,2},{23,63,3735},{22,63,2314},{21,63,1395}, +{20,63,899},{22,63,4090},{19,63,1618},{18,63,104},{17,58,678},{15,63,3826},{11,59,507},{25,63,1285},{24,63,609},{23,63,122},{23,62,26},{30,45,2249},{21,63,1186},{19,63,74},{13,59,482},{31,50,2249},{13,59,482},{21,63,1395},{21,63,1395},{21,63,1395},{20,63,899},{19,63,1086},{18,63,104},{18,63,104},{18,55,41},{12,63,969},{13,56,81},{23,63,122},{23,63,122},{23,63,122},{23,61,2},{31,31,648}, +{19,63,74},{19,63,74},{16,55,1},{31,44,648},{16,55,1},{31,53,481},{27,63,269},{25,63,0},{21,63,0},{31,53,481},{31,57,481},{21,63,0},{0,59,481},{31,57,481},{0,59,481},{20,0,890},{20,0,890},{20,0,890},{20,0,890},{18,63,4},{18,63,4},{18,63,4},{18,52,5},{10,62,2},{10,62,2},{23,63,3399},{23,63,2260},{22,63,1530},{21,63,954},{23,63,3639},{20,63,1402},{19,63,238}, +{18,59,405},{17,63,3443},{13,60,297},{26,63,1009},{25,63,525},{25,63,164},{24,63,5},{29,52,1769},{23,63,918},{21,63,113},{14,60,266},{28,56,1769},{14,60,266},{22,63,1530},{22,63,1530},{22,63,1530},{21,63,954},{21,63,1251},{19,63,238},{19,63,238},{19,57,41},{14,63,1105},{14,58,81},{25,63,164},{25,63,164},{25,63,164},{24,62,5},{31,36,648},{21,63,113},{21,63,113},{17,57,1},{31,47,648}, +{17,57,1},{31,55,265},{28,63,145},{27,63,4},{24,63,1},{31,55,265},{30,60,265},{24,63,1},{0,60,265},{30,60,265},{0,60,265},{21,0,890},{21,0,890},{21,0,890},{21,0,890},{19,63,13},{19,63,13},{19,63,13},{19,54,5},{12,62,8},{12,62,8},{24,63,3069},{24,63,2257},{23,63,1683},{23,63,1054},{24,63,3258},{22,63,1330},{21,63,378},{19,61,213},{20,63,3102},{14,61,166},{27,63,801}, +{26,63,477},{26,63,221},{25,63,20},{30,52,1374},{24,63,758},{23,63,181},{16,61,114},{28,58,1374},{16,61,114},{23,63,1683},{23,63,1683},{23,63,1683},{23,63,1054},{22,63,1401},{21,63,378},{21,63,378},{19,59,46},{16,63,1296},{15,60,81},{26,63,221},{26,63,221},{26,63,221},{25,63,20},{30,45,648},{23,63,181},{23,63,181},{18,59,1},{31,50,648},{18,59,1},{31,58,113},{29,63,61},{28,63,0}, +{26,63,1},{31,58,113},{31,60,113},{26,63,1},{0,61,113},{31,60,113},{0,61,113},{22,0,890},{22,0,890},{22,0,890},{22,0,890},{20,63,45},{20,63,45},{20,63,45},{20,56,10},{13,63,25},{13,63,25},{25,63,2860},{25,63,2260},{24,63,1854},{24,63,1210},{25,63,2932},{23,63,1310},{22,63,609},{20,62,81},{21,63,2731},{16,62,94},{28,63,630},{27,63,475},{27,63,306},{26,63,101},{30,56,1032}, +{26,63,612},{25,63,290},{18,62,21},{30,58,1032},{18,62,21},{24,63,1854},{24,63,1854},{24,63,1854},{24,63,1210},{23,63,1620},{22,63,609},{22,63,609},{21,61,44},{18,63,1515},{16,62,78},{27,63,306},{27,63,306},{27,63,306},{26,63,101},{29,54,648},{25,63,290},{25,63,290},{19,61,5},{29,56,648},{19,61,5},{31,61,18},{30,63,10},{30,63,1},{29,63,0},{31,61,18},{31,62,18},{29,63,0}, +{0,62,20},{31,62,18},{0,62,20},{23,0,900},{23,0,900},{23,0,900},{23,0,900},{21,63,104},{21,63,104},{21,63,104},{21,59,5},{15,63,65},{15,63,65},{26,63,2626},{26,63,2206},{25,63,1915},{25,63,1315},{26,63,2641},{24,63,1333},{23,63,789},{22,63,40},{22,63,2445},{17,63,116},{29,63,524},{28,63,406},{28,63,325},{27,63,170},{31,56,771},{27,63,507},{26,63,320},{20,63,0},{30,60,771}, +{20,63,0},{25,63,1915},{25,63,1915},{25,63,1915},{25,63,1315},{24,63,1661},{23,63,789},{23,63,789},{22,62,29},{20,63,1517},{17,63,116},{28,63,325},{28,63,325},{28,63,325},{27,63,170},{31,52,578},{26,63,320},{26,63,320},{20,63,0},{28,60,578},{20,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{24,0,890}, +{24,0,890},{24,0,890},{24,0,890},{23,63,164},{23,63,164},{23,63,164},{22,61,5},{17,63,116},{17,63,116},{27,63,2156},{27,63,1884},{26,63,1630},{26,63,1210},{26,63,2081},{25,63,1108},{24,63,705},{23,63,5},{23,63,1927},{19,63,180},{29,63,300},{29,63,236},{29,63,200},{28,63,85},{31,57,451},{28,63,283},{27,63,194},{23,63,1},{29,62,451},{23,63,1},{26,63,1630},{26,63,1630},{26,63,1630}, +{26,63,1210},{25,63,1347},{24,63,705},{24,63,705},{23,63,5},{22,63,1229},{19,63,180},{29,63,200},{29,63,200},{29,63,200},{28,63,85},{30,58,338},{27,63,194},{27,63,194},{23,63,1},{31,58,338},{23,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{25,0,890},{25,0,890},{25,0,890},{25,0,890},{24,63,221}, +{24,63,221},{24,63,221},{23,63,5},{19,63,180},{19,63,180},{28,63,1782},{27,63,1564},{27,63,1395},{27,63,1123},{27,63,1620},{26,63,937},{25,63,651},{24,63,25},{24,63,1560},{21,63,233},{30,63,150},{30,63,134},{29,63,104},{29,63,40},{31,59,216},{29,63,136},{28,63,90},{25,63,1},{30,62,216},{25,63,1},{27,63,1395},{27,63,1395},{27,63,1395},{27,63,1123},{26,63,1101},{25,63,651},{25,63,651}, +{24,63,25},{23,63,998},{21,63,233},{29,63,104},{29,63,104},{29,63,104},{29,63,40},{31,57,162},{28,63,90},{28,63,90},{25,63,1},{29,62,162},{25,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{26,0,890},{26,0,890},{26,0,890},{26,0,890},{25,63,290},{25,63,290},{25,63,290},{24,63,25},{21,63,233}, +{21,63,233},{0,34,1570},{0,27,400},{0,19,25},{0,16,481},{0,23,3372},{0,17,2355},{0,15,1053},{0,11,2425},{0,13,3753},{0,11,2681},{0,34,1570},{0,27,400},{0,19,25},{0,16,481},{7,0,3371},{0,17,2355},{0,15,1053},{0,11,2425},{10,2,3371},{0,11,2425},{0,16,0},{0,16,0},{0,16,0},{0,10,1},{0,8,288},{0,7,149},{0,7,149},{0,5,160},{0,4,332},{0,4,200},{0,16,0}, +{0,16,0},{0,16,0},{0,10,1},{1,5,288},{0,7,149},{0,7,149},{0,5,160},{4,0,288},{0,5,160},{10,1,1568},{0,27,400},{0,19,25},{0,16,481},{10,1,1568},{17,0,1568},{0,16,481},{0,13,1568},{17,0,1568},{0,13,1568},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,40,1568},{0,31,296},{0,22,2}, +{0,20,373},{0,27,3968},{0,21,2627},{0,17,1107},{0,13,2720},{0,15,4479},{0,11,3065},{0,40,1568},{0,31,296},{0,22,2},{0,20,373},{7,3,3968},{0,21,2627},{0,17,1107},{0,13,2720},{13,0,3968},{0,13,2720},{0,21,1},{0,21,1},{0,21,1},{0,13,0},{0,11,512},{0,9,269},{0,9,269},{0,5,288},{0,5,593},{0,5,337},{0,21,1},{0,21,1},{0,21,1},{0,13,0},{3,1,512}, +{0,9,269},{0,9,269},{0,5,288},{3,3,512},{0,5,288},{11,3,1568},{0,31,296},{0,22,2},{0,20,373},{11,3,1568},{18,2,1568},{0,20,373},{0,15,1568},{18,2,1568},{0,15,1568},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,45,1568},{0,34,221},{0,24,17},{0,22,274},{0,30,4651},{0,23,2924},{0,19,1209}, +{0,15,3065},{0,17,5292},{0,13,3465},{0,45,1568},{0,34,221},{0,24,17},{0,22,274},{8,3,4651},{0,23,2924},{0,19,1209},{0,15,3065},{14,1,4651},{0,15,3065},{0,27,0},{0,27,0},{0,27,0},{0,16,0},{0,13,802},{0,12,424},{0,12,424},{0,7,433},{0,7,918},{0,7,533},{0,27,0},{0,27,0},{0,27,0},{0,16,0},{4,0,800},{0,12,424},{0,12,424},{0,7,433},{6,1,800}, +{0,7,433},{13,1,1568},{0,34,221},{1,24,2},{0,22,274},{13,1,1568},{19,4,1568},{0,22,274},{0,17,1570},{19,4,1568},{0,17,1570},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,50,1568},{0,38,145},{0,26,82},{0,24,193},{0,34,5419},{0,25,3275},{0,21,1347},{0,16,3410},{0,17,6220},{0,15,3933},{0,50,1568}, +{0,38,145},{1,26,81},{0,24,193},{10,0,5419},{0,25,3275},{0,21,1347},{0,16,3410},{15,2,5419},{0,16,3410},{0,32,0},{0,32,0},{0,32,0},{0,19,0},{0,16,1152},{0,15,605},{0,15,605},{0,9,628},{0,8,1328},{0,7,789},{0,32,0},{0,32,0},{0,32,0},{0,19,0},{3,6,1152},{0,15,605},{0,15,605},{0,9,628},{8,0,1152},{0,9,628},{15,0,1568},{0,38,145},{2,26,2}, +{0,24,193},{15,0,1568},{24,1,1568},{0,24,193},{0,19,1570},{24,1,1568},{0,19,1570},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{1,53,1633},{0,42,166},{1,29,162},{0,26,209},{0,40,5419},{0,29,3012},{0,25,964},{0,18,3152},{0,21,6513},{0,17,3861},{1,53,1569},{1,42,138},{2,28,74},{1,26,186},{11,3,5419}, +{0,29,3012},{0,25,964},{0,18,3152},{18,2,5419},{0,18,3152},{1,34,66},{1,34,66},{1,34,66},{1,21,66},{0,22,1152},{0,19,442},{0,19,442},{0,11,493},{0,11,1480},{0,11,749},{1,34,2},{1,34,2},{1,34,2},{1,21,2},{6,2,1152},{0,19,442},{0,19,442},{0,11,493},{11,0,1152},{0,11,493},{16,2,1568},{0,42,85},{3,28,0},{0,26,128},{16,2,1568},{27,1,1568},{0,26,128}, +{0,21,1568},{27,1,1568},{0,21,1568},{1,0,65},{1,0,65},{1,0,65},{1,0,65},{0,6,0},{0,6,0},{0,6,0},{0,4,1},{0,3,20},{0,3,20},{1,58,1713},{1,44,230},{2,31,354},{1,28,273},{0,45,5419},{0,32,2817},{0,27,682},{0,20,2945},{0,25,6853},{0,19,3825},{2,55,1569},{2,44,138},{3,30,74},{2,28,186},{13,1,5419},{0,32,2817},{0,27,682},{0,20,2945},{19,4,5419}, +{0,20,2945},{1,40,145},{1,40,145},{1,40,145},{1,25,146},{0,27,1154},{0,23,338},{0,23,338},{0,13,394},{0,14,1665},{0,13,755},{2,36,2},{2,36,2},{2,36,2},{2,23,2},{8,0,1152},{0,23,338},{0,23,338},{0,13,394},{11,3,1152},{0,13,394},{17,4,1568},{0,46,41},{4,30,1},{0,29,80},{17,4,1568},{27,4,1568},{0,29,80},{0,23,1568},{27,4,1568},{0,23,1568},{1,0,145}, +{1,0,145},{1,0,145},{1,0,145},{0,11,1},{0,11,1},{0,11,1},{0,7,0},{0,5,74},{0,5,74},{2,60,1905},{1,49,393},{2,33,531},{1,31,433},{0,50,5419},{0,36,2609},{0,29,468},{0,22,2756},{0,27,7195},{0,21,3825},{3,57,1569},{3,46,138},{4,32,81},{3,30,186},{15,0,5419},{0,36,2609},{0,29,468},{0,22,2756},{24,1,5419},{0,22,2756},{2,42,337},{2,42,337},{2,42,337}, +{2,27,338},{0,32,1154},{0,25,244},{0,25,244},{0,16,289},{0,17,1849},{0,15,797},{3,38,2},{3,38,2},{3,38,2},{3,25,2},{9,2,1152},{0,25,244},{0,25,244},{0,16,289},{16,0,1152},{0,16,289},{18,6,1568},{0,51,16},{5,32,2},{0,31,41},{18,6,1568},{28,6,1568},{0,31,41},{0,25,1568},{28,6,1568},{0,25,1568},{2,0,337},{2,0,337},{2,0,337},{2,0,337},{0,17,0}, +{0,17,0},{0,17,0},{0,10,0},{0,7,164},{0,7,164},{2,63,2145},{2,51,585},{3,35,851},{2,33,618},{0,55,5420},{0,40,2425},{0,32,274},{0,24,2585},{0,29,7609},{0,23,3861},{4,58,1570},{4,47,136},{5,34,81},{4,32,193},{16,1,5419},{0,40,2425},{0,32,274},{0,24,2585},{25,3,5419},{0,24,2585},{2,47,545},{2,47,545},{2,47,545},{2,30,545},{0,38,1152},{0,29,164},{0,29,164}, +{0,18,208},{0,19,2098},{0,17,869},{4,40,0},{4,40,0},{4,40,0},{4,27,1},{11,1,1152},{0,29,164},{0,29,164},{0,18,208},{17,2,1152},{0,18,208},{21,1,1568},{0,54,2},{6,34,2},{0,33,13},{21,1,1568},{29,8,1568},{0,33,13},{0,27,1568},{29,8,1568},{0,27,1568},{2,0,545},{2,0,545},{2,0,545},{2,0,545},{0,22,0},{0,22,0},{0,22,0},{0,13,1},{0,10,289}, +{0,10,289},{3,63,2596},{3,53,934},{3,37,1277},{2,35,964},{0,61,5420},{0,44,2242},{0,35,141},{0,28,2402},{0,32,8131},{0,25,3956},{5,61,1569},{5,50,138},{6,36,74},{5,34,186},{17,4,5419},{0,44,2242},{0,35,141},{0,28,2402},{27,4,5419},{0,28,2402},{3,49,901},{3,49,901},{3,49,901},{3,32,900},{0,44,1152},{0,34,97},{0,34,97},{0,20,145},{0,23,2436},{0,19,989},{5,43,1}, +{5,43,1},{5,43,1},{5,30,2},{13,0,1152},{0,34,97},{0,34,97},{0,20,145},{20,2,1152},{0,20,145},{23,0,1568},{1,57,2},{7,36,0},{0,36,1},{23,0,1568},{31,9,1568},{0,36,1},{0,29,1570},{31,9,1568},{0,29,1570},{3,0,900},{3,0,900},{3,0,900},{3,0,900},{0,28,0},{0,28,0},{0,28,0},{0,17,1},{0,13,461},{0,13,461},{4,63,3146},{3,57,1262},{4,40,1731}, +{3,37,1284},{1,63,5484},{0,46,2129},{0,38,53},{0,30,2243},{0,36,8615},{0,27,4070},{6,63,1569},{6,52,138},{7,38,74},{6,36,186},{18,6,5419},{0,46,2129},{0,38,53},{0,30,2243},{28,6,5419},{0,30,2243},{3,55,1252},{3,55,1252},{3,55,1252},{3,35,1253},{0,49,1152},{0,38,53},{0,38,53},{0,23,89},{0,25,2763},{0,21,1139},{6,45,1},{6,45,1},{6,45,1},{6,32,2},{14,2,1152}, +{0,38,53},{0,38,53},{0,23,89},{24,0,1152},{0,23,89},{24,2,1568},{2,59,2},{8,38,1},{1,38,1},{24,2,1568},{27,17,1568},{1,38,1},{0,31,1570},{27,17,1568},{0,31,1570},{3,0,1252},{3,0,1252},{3,0,1252},{3,0,1252},{0,33,0},{0,33,0},{0,33,0},{0,20,0},{0,15,653},{0,15,653},{5,63,3716},{4,58,1599},{4,42,2134},{4,39,1627},{2,63,5655},{0,51,1983},{0,40,33}, +{0,32,2133},{0,38,8925},{0,30,4044},{7,63,1587},{7,54,138},{8,40,80},{7,38,186},{21,1,5419},{0,51,1979},{0,40,29},{0,32,2129},{29,8,5419},{0,32,2129},{4,56,1587},{4,56,1587},{4,56,1587},{4,37,1586},{0,54,1156},{0,41,29},{0,41,29},{0,25,54},{0,27,2988},{0,23,1193},{7,47,1},{7,47,1},{7,47,1},{7,33,2},{16,0,1152},{0,41,25},{0,41,25},{0,25,50},{26,1,1152}, +{0,25,50},{25,4,1568},{3,61,2},{9,40,1},{2,40,1},{25,4,1568},{28,19,1568},{2,40,1},{0,33,1568},{28,19,1568},{0,33,1568},{4,0,1586},{4,0,1586},{4,0,1586},{4,0,1586},{0,38,5},{0,38,5},{0,38,5},{0,23,4},{0,17,754},{0,17,754},{6,63,3890},{5,60,1599},{5,44,2134},{5,41,1627},{3,63,5748},{0,55,1975},{1,42,33},{0,33,2085},{0,42,8569},{0,32,3525},{9,63,1634}, +{8,55,136},{9,42,80},{8,40,208},{22,3,5419},{0,55,1875},{1,42,29},{0,33,1985},{30,10,5419},{0,33,1985},{5,58,1587},{5,58,1587},{5,58,1587},{5,39,1586},{1,56,1156},{1,43,29},{1,43,29},{1,27,54},{0,31,2736},{0,25,907},{8,48,0},{8,48,0},{8,48,0},{8,35,1},{17,2,1152},{0,45,5},{0,45,5},{0,28,17},{27,3,1152},{0,28,17},{26,6,1568},{4,62,1},{10,42,1}, +{3,42,1},{26,6,1568},{29,21,1568},{3,42,1},{0,35,1568},{29,21,1568},{0,35,1568},{5,0,1586},{5,0,1586},{5,0,1586},{5,0,1586},{1,40,5},{1,40,5},{1,40,5},{1,25,4},{0,21,610},{0,21,610},{7,63,4136},{6,63,1589},{7,46,2141},{6,44,1613},{4,63,5895},{2,55,1973},{2,45,43},{2,35,2100},{0,46,8199},{0,34,3051},{10,63,1667},{9,57,131},{10,45,74},{9,43,195},{24,2,5419}, +{0,59,1772},{2,45,34},{0,36,1874},{27,17,5419},{0,36,1874},{6,61,1576},{6,61,1576},{6,61,1576},{6,41,1577},{2,59,1161},{2,46,26},{2,46,26},{2,30,49},{0,36,2505},{0,29,616},{9,51,1},{9,51,1},{9,51,1},{9,38,2},{19,1,1152},{0,49,2},{0,49,2},{0,30,4},{29,4,1152},{0,30,4},{28,5,1568},{6,63,13},{11,44,1},{4,44,1},{28,5,1568},{31,22,1568},{4,44,1}, +{0,37,1570},{31,22,1568},{0,37,1570},{6,0,1576},{6,0,1576},{6,0,1576},{6,0,1576},{2,43,9},{2,43,9},{2,43,9},{2,27,10},{0,25,468},{0,25,468},{8,63,4436},{7,63,1625},{7,49,2161},{7,46,1613},{6,63,6079},{3,57,1973},{3,47,43},{3,37,2100},{0,49,7908},{0,37,2671},{11,63,1745},{10,59,131},{11,47,74},{10,45,195},{25,4,5419},{0,61,1699},{3,47,34},{0,38,1787},{28,19,5419}, +{0,38,1787},{7,63,1576},{7,63,1576},{7,63,1576},{7,43,1577},{3,61,1161},{3,47,34},{3,47,34},{3,32,59},{0,38,2294},{0,31,422},{10,53,1},{10,53,1},{10,53,1},{10,40,2},{20,3,1152},{2,49,2},{2,49,2},{1,32,2},{30,6,1152},{1,32,2},{31,0,1568},{8,63,34},{12,46,1},{5,46,1},{31,0,1568},{28,29,1568},{5,46,1},{0,39,1570},{28,29,1568},{0,39,1570},{7,0,1576}, +{7,0,1576},{7,0,1576},{7,0,1576},{3,45,9},{3,45,9},{3,45,9},{3,29,10},{0,29,356},{0,29,356},{9,63,4730},{8,63,1716},{8,50,2134},{8,47,1627},{7,63,6244},{3,61,1977},{4,48,33},{3,40,2100},{0,53,7620},{0,40,2343},{12,63,1832},{11,61,131},{12,48,80},{11,47,195},{26,6,5419},{1,63,1699},{4,48,29},{0,41,1714},{29,21,5419},{0,41,1714},{8,63,1595},{8,63,1595},{8,63,1595}, +{8,45,1587},{4,62,1158},{4,49,29},{4,49,29},{4,33,54},{0,42,2098},{0,34,282},{11,55,1},{11,55,1},{11,55,1},{11,42,2},{21,5,1152},{3,51,2},{3,51,2},{2,34,2},{31,8,1152},{2,34,2},{30,9,1568},{9,63,68},{13,48,1},{6,48,1},{30,9,1568},{29,31,1568},{6,48,1},{0,41,1570},{29,31,1568},{0,41,1570},{8,0,1586},{8,0,1586},{8,0,1586},{8,0,1586},{4,47,4}, +{4,47,4},{4,47,4},{4,31,5},{0,32,269},{0,32,269},{11,63,5010},{9,63,1878},{9,52,2134},{9,49,1627},{8,63,6508},{4,62,1965},{5,50,33},{4,42,2079},{0,55,7360},{0,42,2067},{14,63,1952},{12,63,149},{13,50,80},{12,48,208},{29,1,5419},{3,63,1787},{5,50,29},{0,43,1651},{30,23,5419},{0,43,1651},{9,63,1622},{9,63,1622},{9,63,1622},{9,47,1587},{5,63,1164},{5,51,29},{5,51,29}, +{5,35,54},{0,46,1926},{0,36,186},{12,56,0},{12,56,0},{12,56,0},{12,44,1},{24,0,1152},{3,55,2},{3,55,2},{3,36,2},{31,11,1152},{3,36,2},{31,11,1568},{11,63,116},{14,50,1},{7,50,1},{31,11,1568},{29,34,1568},{7,50,1},{0,43,1570},{29,34,1568},{0,43,1570},{9,0,1586},{9,0,1586},{9,0,1586},{9,0,1586},{5,48,5},{5,48,5},{5,48,5},{5,33,4},{0,36,185}, +{0,36,185},{12,63,5316},{11,63,2154},{11,54,2141},{10,52,1613},{10,63,6800},{6,62,1995},{6,53,43},{5,44,2085},{0,59,7068},{0,44,1845},{15,63,2081},{13,63,206},{14,53,74},{13,51,195},{31,0,5419},{5,63,1937},{6,53,34},{0,45,1601},{28,29,5419},{0,45,1601},{10,63,1676},{10,63,1676},{10,63,1676},{10,49,1577},{7,63,1179},{6,54,26},{6,54,26},{6,38,49},{0,49,1764},{0,40,149},{13,59,1}, +{13,59,1},{13,59,1},{13,46,1},{24,6,1152},{4,57,1},{4,57,1},{4,38,4},{29,17,1152},{4,38,4},{30,20,1568},{13,63,205},{15,52,1},{8,52,1},{30,20,1568},{31,35,1568},{8,52,1},{0,45,1576},{31,35,1568},{0,45,1576},{10,0,1576},{10,0,1576},{10,0,1576},{10,0,1576},{6,51,9},{6,51,9},{6,51,9},{6,35,10},{0,40,113},{0,40,113},{13,63,5658},{12,63,2435},{12,56,2148}, +{11,54,1613},{11,63,7055},{7,63,2090},{7,55,43},{6,46,2085},{0,63,6820},{0,47,1701},{16,63,2216},{14,63,334},{15,55,74},{14,53,195},{30,9,5419},{7,63,2081},{7,55,34},{0,47,1580},{29,31,5419},{0,47,1580},{11,63,1745},{11,63,1745},{11,63,1745},{11,51,1577},{8,63,1220},{7,56,26},{7,56,26},{7,40,49},{0,53,1624},{0,42,145},{14,61,1},{14,61,1},{14,61,1},{14,48,2},{27,1,1152}, +{5,59,1},{5,59,1},{5,40,4},{30,19,1152},{5,40,4},{31,22,1568},{15,63,289},{16,54,1},{9,54,1},{31,22,1568},{28,42,1568},{9,54,1},{0,47,1576},{28,42,1568},{0,47,1576},{11,0,1576},{11,0,1576},{11,0,1576},{11,0,1576},{7,53,9},{7,53,9},{7,53,9},{7,37,10},{0,44,61},{0,44,61},{14,63,6036},{13,63,2751},{12,58,2119},{12,55,1627},{12,63,7316},{8,63,2228},{8,57,31}, +{7,48,2100},{0,63,6884},{0,49,1606},{17,63,2402},{16,63,500},{16,57,82},{15,55,195},{31,11,5419},{9,63,2195},{8,57,27},{0,49,1570},{29,34,5419},{0,49,1570},{12,63,1811},{12,63,1811},{12,63,1811},{12,53,1587},{9,63,1286},{8,57,22},{8,57,22},{8,41,56},{0,57,1508},{1,44,145},{15,63,1},{15,63,1},{15,63,1},{15,50,2},{28,3,1152},{6,61,1},{6,61,1},{6,42,4},{31,21,1152}, +{6,42,4},{31,27,1568},{17,63,410},{17,56,1},{10,56,1},{31,27,1568},{29,44,1568},{10,56,1},{0,49,1570},{29,44,1568},{0,49,1570},{12,0,1586},{12,0,1586},{12,0,1586},{12,0,1586},{8,55,4},{8,55,4},{8,55,4},{8,39,5},{0,48,34},{0,48,34},{15,63,6450},{14,63,3135},{13,60,2119},{13,57,1627},{13,63,7661},{10,63,2448},{9,59,31},{8,50,2079},{2,63,7196},{0,51,1580},{19,63,2594}, +{17,63,698},{16,59,73},{16,57,194},{31,16,5419},{11,63,2379},{9,59,27},{1,51,1570},{30,36,5419},{1,51,1570},{13,63,1910},{13,63,1910},{13,63,1910},{13,55,1587},{10,63,1388},{9,59,22},{9,59,22},{9,43,56},{0,61,1416},{2,46,145},{16,63,4},{16,63,4},{16,63,4},{16,52,1},{29,5,1152},{7,63,1},{7,63,1},{7,44,4},{31,24,1152},{7,44,4},{31,32,1568},{19,63,530},{18,58,1}, +{11,58,1},{31,32,1568},{30,46,1568},{11,58,1},{0,51,1570},{30,46,1568},{0,51,1570},{13,0,1586},{13,0,1586},{13,0,1586},{13,0,1586},{9,57,4},{9,57,4},{9,57,4},{9,41,5},{0,51,10},{0,51,10},{16,63,6900},{15,63,3657},{15,62,2128},{14,60,1616},{15,63,8023},{11,63,2845},{10,61,33},{9,52,2085},{5,63,7651},{1,54,1584},{20,63,2866},{18,63,1011},{18,61,69},{17,59,181},{31,22,5419}, +{13,63,2657},{10,61,24},{2,53,1577},{28,42,5419},{2,53,1577},{15,63,2057},{15,63,2057},{15,63,2057},{14,58,1577},{12,63,1476},{10,61,29},{10,61,29},{10,46,54},{0,63,1324},{4,47,137},{17,63,37},{17,63,37},{17,63,37},{17,54,1},{31,4,1152},{9,63,4},{9,63,4},{7,47,2},{30,29,1152},{7,47,2},{31,38,1568},{21,63,637},{19,60,5},{12,60,4},{31,38,1568},{31,48,1568},{12,60,4}, +{0,53,1576},{31,48,1568},{0,53,1576},{14,0,1576},{14,0,1576},{14,0,1576},{14,0,1576},{10,59,10},{10,59,10},{10,59,10},{10,44,10},{0,56,0},{0,56,0},{18,63,7332},{16,63,4196},{16,63,2175},{15,62,1616},{16,63,8348},{13,63,3285},{11,63,33},{10,54,2085},{8,63,8004},{2,56,1584},{21,63,3112},{20,63,1281},{19,63,69},{18,61,181},{31,27,5419},{15,63,2897},{11,63,24},{3,55,1577},{29,44,5419}, +{3,55,1577},{16,63,2171},{16,63,2171},{16,63,2171},{15,60,1577},{13,63,1590},{11,63,29},{11,63,29},{11,48,49},{2,63,1424},{4,50,145},{19,63,65},{19,63,65},{19,63,65},{18,56,1},{30,13,1152},{11,63,20},{11,63,20},{9,48,4},{31,31,1152},{9,48,4},{30,47,1568},{23,63,785},{20,62,4},{13,62,4},{30,47,1568},{31,51,1568},{13,62,4},{0,55,1576},{31,51,1568},{0,55,1576},{15,0,1576}, +{15,0,1576},{15,0,1576},{15,0,1576},{11,61,10},{11,61,10},{11,61,10},{11,46,10},{1,58,0},{1,58,0},{19,63,7014},{17,63,4230},{17,63,2294},{16,63,1595},{17,63,7865},{14,63,3114},{12,63,85},{12,55,1713},{8,63,7436},{3,57,1268},{22,63,2794},{21,63,1221},{20,63,113},{19,61,114},{31,31,4803},{17,63,2648},{13,63,61},{4,57,1253},{31,44,4803},{4,57,1253},{17,63,2294},{17,63,2294},{17,63,2294}, +{16,62,1587},{14,63,1740},{12,63,85},{12,63,85},{12,49,56},{3,63,1571},{5,52,145},{20,63,113},{20,63,113},{20,63,113},{19,58,1},{31,15,1152},{13,63,61},{13,63,61},{10,50,4},{31,34,1152},{10,50,4},{31,47,1250},{24,63,680},{21,63,4},{15,63,0},{31,47,1250},{31,53,1250},{15,63,0},{0,57,1252},{31,53,1250},{0,57,1252},{16,0,1586},{16,0,1586},{16,0,1586},{16,0,1586},{12,63,4}, +{12,63,4},{12,63,4},{12,47,8},{2,60,0},{2,60,0},{19,63,6534},{18,63,4116},{18,63,2435},{17,63,1590},{18,63,7164},{15,63,2809},{14,63,161},{12,57,1256},{10,63,6748},{5,58,909},{23,63,2340},{22,63,1065},{21,63,164},{20,62,41},{31,34,4056},{18,63,2211},{15,63,113},{6,58,884},{31,46,4056},{6,58,884},{18,63,2435},{18,63,2435},{18,63,2435},{17,63,1590},{16,63,1923},{14,63,161},{14,63,161}, +{13,51,56},{6,63,1729},{6,54,145},{21,63,164},{21,63,164},{21,63,164},{20,60,0},{31,20,1152},{15,63,113},{15,63,113},{11,52,4},{31,37,1152},{11,52,4},{31,49,882},{25,63,482},{23,63,0},{18,63,1},{31,49,882},{30,56,882},{18,63,1},{0,58,884},{30,56,882},{0,58,884},{17,0,1586},{17,0,1586},{17,0,1586},{17,0,1586},{13,63,13},{13,63,13},{13,63,13},{13,49,5},{3,62,0}, +{3,62,0},{21,63,6091},{20,63,4022},{19,63,2609},{18,63,1640},{19,63,6490},{16,63,2617},{15,63,318},{14,58,834},{11,63,6135},{6,59,598},{24,63,1881},{23,63,931},{22,63,245},{21,63,5},{31,38,3318},{20,63,1733},{17,63,202},{8,59,545},{31,48,3318},{8,59,545},{19,63,2609},{19,63,2609},{19,63,2609},{18,63,1640},{17,63,2086},{15,63,318},{15,63,318},{14,54,54},{8,63,1868},{8,55,137},{22,63,245}, +{22,63,245},{22,63,245},{21,62,2},{31,26,1152},{17,63,202},{17,63,202},{11,55,2},{30,42,1152},{11,55,2},{31,52,545},{26,63,305},{25,63,4},{20,63,1},{31,52,545},{30,58,545},{20,63,1},{0,59,545},{30,58,545},{0,59,545},{18,0,1576},{18,0,1576},{18,0,1576},{18,0,1576},{15,63,29},{15,63,29},{15,63,29},{14,52,10},{5,62,5},{5,62,5},{22,63,5719},{21,63,3980},{20,63,2834}, +{19,63,1745},{20,63,6050},{18,63,2457},{16,63,536},{15,59,515},{14,63,5674},{8,60,385},{25,63,1573},{24,63,861},{23,63,338},{22,63,10},{30,45,2753},{21,63,1438},{19,63,290},{10,60,313},{31,50,2753},{10,60,313},{20,63,2834},{20,63,2834},{20,63,2834},{19,63,1745},{18,63,2284},{16,63,536},{16,63,536},{15,56,54},{10,63,2064},{9,57,137},{23,63,338},{23,63,338},{23,63,338},{22,63,10},{31,31,1152}, +{19,63,290},{19,63,290},{12,57,2},{31,44,1152},{12,57,2},{31,55,313},{28,63,181},{26,63,1},{23,63,0},{31,55,313},{31,58,313},{23,63,0},{0,60,313},{31,58,313},{0,60,313},{19,0,1576},{19,0,1576},{19,0,1576},{19,0,1576},{16,63,52},{16,63,52},{16,63,52},{15,54,10},{7,62,25},{7,62,25},{22,63,5399},{22,63,3974},{21,63,3035},{20,63,1875},{21,63,5619},{19,63,2378},{18,63,776}, +{16,60,294},{15,63,5258},{9,61,225},{26,63,1333},{25,63,813},{24,63,425},{23,63,65},{29,52,2273},{23,63,1218},{20,63,353},{12,61,146},{28,56,2273},{12,61,146},{21,63,3035},{21,63,3035},{21,63,3035},{20,63,1875},{19,63,2518},{18,63,776},{18,63,776},{16,58,49},{11,63,2323},{10,59,137},{24,63,425},{24,63,425},{24,63,425},{23,63,65},{31,36,1152},{20,63,353},{20,63,353},{13,59,2},{31,47,1152}, +{13,59,2},{31,57,145},{28,63,85},{28,63,4},{26,63,1},{31,57,145},{31,60,145},{26,63,1},{0,61,145},{31,60,145},{0,61,145},{20,0,1586},{20,0,1586},{20,0,1586},{20,0,1586},{17,63,85},{17,63,85},{17,63,85},{16,56,5},{8,63,40},{8,63,40},{23,63,5143},{23,63,4004},{22,63,3254},{21,63,2070},{22,63,5274},{20,63,2310},{19,63,1062},{17,61,133},{17,63,5011},{10,63,161},{27,63,1161}, +{26,63,801},{26,63,545},{25,63,164},{30,52,1878},{24,63,1094},{22,63,461},{13,62,42},{28,58,1878},{13,62,42},{22,63,3254},{22,63,3254},{22,63,3254},{21,63,2070},{20,63,2833},{19,63,1062},{19,63,1062},{17,60,49},{14,63,2577},{11,61,137},{26,63,545},{26,63,545},{26,63,545},{25,63,164},{30,45,1152},{22,63,461},{22,63,461},{14,61,2},{31,50,1152},{14,61,2},{31,60,41},{30,63,25},{29,63,1}, +{28,63,0},{31,60,41},{31,61,41},{28,63,0},{0,62,41},{31,61,41},{0,62,41},{21,0,1586},{21,0,1586},{21,0,1586},{21,0,1586},{18,63,136},{18,63,136},{18,63,136},{17,58,5},{10,63,80},{10,63,80},{24,63,4882},{24,63,4070},{23,63,3532},{22,63,2360},{24,63,4945},{21,63,2422},{20,63,1433},{18,63,58},{20,63,4717},{12,63,157},{28,63,1026},{27,63,835},{27,63,666},{26,63,305},{30,56,1536}, +{26,63,996},{24,63,628},{16,63,1},{30,58,1536},{16,63,1},{23,63,3532},{23,63,3532},{23,63,3532},{22,63,2360},{22,63,3110},{20,63,1433},{20,63,1433},{18,62,50},{16,63,2939},{12,63,157},{27,63,666},{27,63,666},{27,63,666},{26,63,305},{29,54,1152},{24,63,628},{24,63,628},{16,63,1},{29,56,1152},{16,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0}, +{0,63,0},{31,63,0},{0,63,0},{22,0,1576},{22,0,1576},{22,0,1576},{22,0,1576},{19,63,221},{19,63,221},{19,63,221},{18,60,9},{12,63,157},{12,63,157},{25,63,4212},{24,63,3590},{24,63,3106},{23,63,2201},{24,63,4129},{22,63,2101},{21,63,1301},{19,63,13},{20,63,3869},{14,63,233},{28,63,706},{28,63,562},{28,63,481},{27,63,218},{30,58,1067},{27,63,699},{25,63,442},{18,63,1},{31,58,1067}, +{18,63,1},{24,63,3106},{24,63,3106},{24,63,3106},{23,63,2201},{23,63,2668},{21,63,1301},{21,63,1301},{19,63,13},{18,63,2523},{14,63,233},{28,63,481},{28,63,481},{28,63,481},{27,63,218},{31,50,800},{25,63,442},{25,63,442},{18,63,1},{31,55,800},{18,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{23,0,1576}, +{23,0,1576},{23,0,1576},{23,0,1576},{20,63,325},{20,63,325},{20,63,325},{19,62,9},{14,63,233},{14,63,233},{26,63,3642},{25,63,3132},{25,63,2771},{24,63,2070},{25,63,3444},{23,63,1834},{22,63,1205},{20,63,8},{21,63,3219},{16,63,346},{29,63,456},{28,63,370},{28,63,289},{28,63,145},{31,56,683},{28,63,451},{27,63,290},{21,63,1},{30,60,683},{21,63,1},{25,63,2771},{25,63,2771},{25,63,2771}, +{24,63,2070},{24,63,2273},{22,63,1205},{22,63,1205},{20,63,8},{20,63,2121},{16,63,346},{28,63,289},{28,63,289},{28,63,289},{28,63,145},{30,56,512},{27,63,290},{27,63,290},{21,63,1},{30,58,512},{21,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{24,0,1586},{24,0,1586},{24,0,1586},{24,0,1586},{22,63,421}, +{22,63,421},{22,63,421},{20,63,8},{16,63,346},{16,63,346},{26,63,3162},{26,63,2742},{26,63,2486},{25,63,1947},{26,63,2877},{24,63,1641},{23,63,1145},{22,63,52},{22,63,2673},{18,63,458},{29,63,264},{29,63,200},{29,63,164},{28,63,81},{31,58,384},{28,63,243},{28,63,162},{23,63,1},{31,60,384},{23,63,1},{26,63,2486},{26,63,2486},{26,63,2486},{25,63,1947},{24,63,1969},{23,63,1145},{23,63,1145}, +{22,63,52},{20,63,1785},{18,63,458},{29,63,164},{29,63,164},{29,63,164},{28,63,81},{31,55,288},{28,63,162},{28,63,162},{23,63,1},{28,62,288},{23,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{25,0,1586},{25,0,1586},{25,0,1586},{25,0,1586},{23,63,520},{23,63,520},{23,63,520},{22,63,52},{18,63,458}, +{18,63,458},{0,46,2665},{0,36,666},{0,26,37},{0,22,773},{0,31,5885},{0,23,4085},{0,21,1802},{0,15,4214},{0,17,6543},{0,13,4662},{0,46,2665},{0,36,666},{0,26,37},{0,22,773},{9,0,5885},{0,23,4085},{0,21,1802},{0,15,4214},{15,0,5885},{0,15,4214},{0,22,0},{0,22,0},{0,22,0},{0,13,1},{0,11,545},{0,10,289},{0,10,289},{0,6,306},{0,6,630},{0,5,362},{0,22,0}, +{0,22,0},{0,22,0},{0,13,1},{3,1,545},{0,10,289},{0,10,289},{0,6,306},{5,1,545},{0,6,306},{13,2,2665},{0,36,666},{0,26,37},{0,22,773},{13,2,2665},{23,0,2665},{0,22,773},{0,17,2665},{23,0,2665},{0,17,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,51,2665},{0,40,530},{0,28,2}, +{0,24,650},{0,34,6670},{0,27,4445},{0,23,1886},{0,16,4577},{0,19,7493},{0,15,5130},{0,51,2665},{0,40,530},{0,28,2},{0,24,650},{10,1,6669},{0,27,4445},{0,23,1886},{0,16,4577},{17,0,6669},{0,16,4577},{0,27,1},{0,27,1},{0,27,1},{0,16,1},{0,14,841},{0,13,442},{0,13,442},{0,7,458},{0,7,965},{0,7,558},{0,27,1},{0,27,1},{0,27,1},{0,16,1},{4,0,841}, +{0,13,442},{0,13,442},{0,7,458},{7,0,841},{0,7,458},{15,1,2665},{0,40,530},{0,28,2},{0,24,650},{15,1,2665},{25,0,2665},{0,24,650},{0,19,2665},{25,0,2665},{0,19,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,56,2665},{0,44,410},{0,30,17},{0,28,522},{0,38,7538},{0,29,4826},{0,25,2006}, +{0,18,5002},{0,21,8547},{0,16,5681},{0,56,2665},{0,44,410},{0,30,17},{0,28,522},{11,1,7538},{0,29,4826},{0,25,2006},{0,18,5002},{17,2,7538},{0,18,5002},{0,32,1},{0,32,1},{0,32,1},{0,19,1},{0,16,1201},{0,15,628},{0,15,628},{0,9,653},{0,9,1382},{0,9,822},{0,32,1},{0,32,1},{0,32,1},{0,19,1},{5,0,1201},{0,15,628},{0,15,628},{0,9,653},{8,0,1201}, +{0,9,653},{16,2,2665},{0,44,410},{1,30,2},{0,28,522},{16,2,2665},{28,0,2665},{0,28,522},{0,21,2665},{28,0,2665},{0,21,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,62,2665},{0,49,325},{0,33,65},{0,30,405},{0,41,8493},{0,31,5261},{0,27,2162},{0,20,5477},{0,23,9705},{0,18,6259},{0,62,2665}, +{0,49,325},{0,33,65},{0,30,405},{11,4,8493},{0,31,5261},{0,27,2162},{0,20,5477},{17,4,8493},{0,20,5477},{0,38,0},{0,38,0},{0,38,0},{0,23,1},{0,19,1625},{0,17,821},{0,17,821},{0,11,898},{0,10,1874},{0,9,1094},{0,38,0},{0,38,0},{0,38,0},{0,23,1},{5,2,1625},{0,17,821},{0,17,821},{0,11,898},{8,2,1625},{0,11,898},{18,1,2665},{0,49,325},{2,32,1}, +{0,30,405},{18,1,2665},{29,2,2665},{0,30,405},{0,23,2665},{29,2,2665},{0,23,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,63,2777},{0,51,221},{0,35,178},{0,32,306},{0,45,9669},{0,34,5810},{0,29,2382},{0,22,6054},{0,25,11123},{0,20,6989},{1,63,2741},{0,51,221},{1,35,137},{0,32,306},{13,1,9669}, +{0,34,5810},{0,29,2382},{0,22,6054},{19,4,9669},{0,22,6054},{0,44,0},{0,44,0},{0,44,0},{0,26,1},{0,22,2178},{0,19,1108},{0,19,1108},{0,11,1213},{0,11,2506},{0,11,1469},{0,44,0},{0,44,0},{0,44,0},{0,26,1},{6,2,2178},{0,19,1108},{0,19,1108},{0,11,1213},{11,0,2178},{0,11,1213},{20,0,2665},{0,51,221},{3,34,1},{0,32,306},{20,0,2665},{31,3,2665},{0,32,306}, +{0,25,2669},{31,3,2665},{0,25,2669},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{1,63,2949},{0,55,209},{1,37,242},{0,35,285},{0,50,9669},{0,38,5486},{0,32,1938},{0,24,5733},{0,27,11477},{0,22,6887},{2,63,2789},{0,55,209},{2,37,137},{0,35,285},{15,0,9669},{0,38,5486},{0,32,1938},{0,24,5733},{24,1,9669}, +{0,24,5733},{1,46,64},{1,46,64},{1,46,64},{1,28,65},{0,27,2180},{0,23,932},{0,23,932},{0,15,1037},{0,14,2691},{0,13,1421},{1,46,0},{1,46,0},{1,46,0},{1,28,1},{8,0,2178},{0,23,932},{0,23,932},{0,15,1037},{11,3,2178},{0,15,1037},{21,2,2665},{0,55,145},{4,36,2},{0,35,221},{21,2,2665},{31,6,2665},{0,35,221},{0,27,2669},{31,6,2665},{0,27,2669},{0,0,64}, +{0,0,64},{0,0,64},{0,0,64},{0,5,1},{0,5,1},{0,5,1},{0,3,1},{0,3,17},{0,3,17},{2,63,3285},{1,57,273},{2,39,434},{1,37,349},{0,56,9670},{0,42,5186},{0,34,1530},{0,26,5430},{0,29,11903},{0,24,6821},{3,63,2873},{1,57,209},{3,39,137},{1,37,285},{16,1,9669},{0,42,5186},{0,34,1530},{0,26,5430},{25,3,9669},{0,26,5430},{1,51,128},{1,51,128},{1,51,128}, +{1,31,129},{0,32,2180},{0,27,772},{0,27,772},{0,16,865},{0,17,2875},{0,15,1409},{2,47,1},{2,47,1},{2,47,1},{2,30,1},{9,2,2178},{0,27,772},{0,27,772},{0,16,865},{16,0,2178},{0,16,865},{23,0,2665},{0,59,85},{5,38,2},{0,37,146},{23,0,2665},{31,9,2665},{0,37,146},{0,29,2669},{31,9,2665},{0,29,2669},{1,0,128},{1,0,128},{1,0,128},{1,0,128},{0,11,0}, +{0,11,0},{0,11,0},{0,6,1},{0,5,65},{0,5,65},{3,63,3785},{1,61,405},{2,41,653},{1,39,466},{0,61,9669},{0,44,4909},{0,36,1190},{0,28,5145},{0,32,12358},{0,26,6791},{4,63,2966},{2,59,209},{4,41,154},{2,39,285},{18,0,9669},{0,44,4909},{0,36,1190},{0,28,5145},{30,0,9669},{0,28,5145},{2,53,320},{2,53,320},{2,53,320},{2,33,321},{0,38,2178},{0,31,628},{0,31,628}, +{0,18,730},{0,19,3124},{0,17,1427},{3,49,1},{3,49,1},{3,49,1},{3,32,0},{11,1,2178},{0,31,628},{0,31,628},{0,18,730},{17,2,2178},{0,18,730},{24,2,2665},{0,63,41},{6,40,2},{0,39,89},{24,2,2665},{30,14,2665},{0,39,89},{0,31,2669},{30,14,2665},{0,31,2669},{1,0,320},{1,0,320},{1,0,320},{1,0,320},{0,16,0},{0,16,0},{0,16,0},{0,10,1},{0,7,149}, +{0,7,149},{4,63,4514},{2,63,630},{3,45,1013},{2,41,681},{1,63,9738},{0,49,4610},{0,40,833},{0,30,4849},{0,36,12905},{0,29,6798},{6,63,3101},{4,60,208},{5,43,149},{3,41,286},{18,6,9669},{0,49,4610},{0,40,833},{0,30,4849},{28,6,9669},{0,30,4849},{2,59,545},{2,59,545},{2,59,545},{2,37,545},{0,44,2178},{0,34,493},{0,34,493},{0,22,584},{0,23,3462},{0,19,1493},{4,52,0}, +{4,52,0},{4,52,0},{4,34,1},{13,0,2178},{0,34,493},{0,34,493},{0,22,584},{20,2,2178},{0,22,584},{26,1,2665},{2,63,85},{7,42,4},{0,41,52},{26,1,2665},{31,16,2665},{0,41,52},{0,33,2669},{31,16,2665},{0,33,2669},{2,0,545},{2,0,545},{2,0,545},{2,0,545},{0,22,0},{0,22,0},{0,22,0},{0,13,1},{0,10,289},{0,10,289},{4,63,5330},{3,63,1018},{3,47,1406}, +{2,43,966},{2,63,9981},{0,53,4366},{0,42,585},{0,33,4609},{0,38,13451},{0,31,6834},{7,63,3233},{5,62,208},{6,45,149},{4,42,285},{21,1,9669},{0,53,4366},{0,42,585},{0,33,4609},{29,8,9669},{0,33,4609},{3,61,865},{3,61,865},{3,61,865},{3,39,865},{0,49,2178},{0,38,377},{0,38,377},{0,24,461},{0,25,3789},{0,21,1589},{5,54,0},{5,54,0},{5,54,0},{5,36,1},{14,2,2178}, +{0,38,377},{0,38,377},{0,24,461},{24,0,2178},{0,24,461},{28,0,2665},{3,63,153},{8,44,5},{0,44,20},{28,0,2665},{31,19,2665},{0,44,20},{0,35,2669},{31,19,2665},{0,35,2669},{3,0,865},{3,0,865},{3,0,865},{3,0,865},{0,27,1},{0,27,1},{0,27,1},{0,16,1},{0,13,442},{0,13,442},{5,63,6270},{3,63,1626},{4,49,1866},{3,45,1286},{2,63,10381},{0,55,4133},{0,45,401}, +{0,35,4366},{0,42,14023},{0,32,6917},{8,63,3434},{6,63,242},{7,47,149},{5,44,285},{22,3,9669},{0,55,4133},{0,45,401},{0,35,4366},{30,10,9669},{0,35,4366},{3,63,1226},{3,63,1226},{3,63,1226},{3,42,1201},{0,54,2178},{0,42,277},{0,42,277},{0,26,356},{0,27,4170},{0,23,1721},{6,56,0},{6,56,0},{6,56,0},{6,38,1},{16,0,2178},{0,42,277},{0,42,277},{0,26,356},{26,1,2178}, +{0,26,356},{29,2,2665},{5,63,232},{9,46,5},{0,46,5},{29,2,2665},{31,22,2665},{0,46,5},{0,37,2669},{31,22,2665},{0,37,2669},{3,0,1201},{3,0,1201},{3,0,1201},{3,0,1201},{0,32,1},{0,32,1},{0,32,1},{0,19,1},{0,15,628},{0,15,628},{6,63,7374},{4,63,2339},{4,51,2411},{3,48,1715},{3,63,10950},{0,59,3909},{0,47,257},{0,37,4141},{0,44,14641},{0,34,7031},{9,63,3638}, +{7,63,320},{8,49,154},{6,46,285},{24,1,9669},{0,59,3909},{0,47,257},{0,37,4141},{31,12,9669},{0,37,4141},{4,63,1714},{4,63,1714},{4,63,1714},{4,44,1665},{0,59,2180},{0,46,193},{0,46,193},{0,28,269},{0,31,4582},{0,25,1889},{7,58,0},{7,58,0},{7,58,0},{7,40,1},{17,2,2178},{0,46,193},{0,46,193},{0,28,269},{27,3,2178},{0,28,269},{31,0,2665},{8,63,313},{10,48,2}, +{1,48,2},{31,0,2665},{30,27,2665},{1,48,2},{0,39,2669},{30,27,2665},{0,39,2669},{3,0,1665},{3,0,1665},{3,0,1665},{3,0,1665},{0,38,0},{0,38,0},{0,38,0},{0,23,1},{0,17,821},{0,17,821},{7,63,8807},{5,63,3388},{5,53,3082},{4,49,2230},{4,63,11766},{0,63,3686},{0,51,134},{0,39,3909},{0,46,15438},{0,37,7214},{11,63,3853},{8,63,457},{9,51,149},{7,49,286},{25,4,9669}, +{0,63,3686},{0,51,134},{0,39,3909},{28,19,9669},{0,39,3909},{5,63,2427},{5,63,2427},{5,63,2427},{4,47,2179},{0,63,2210},{0,49,128},{0,49,128},{0,31,193},{0,34,5117},{0,29,2123},{8,60,0},{8,60,0},{8,60,0},{8,42,1},{19,1,2178},{0,49,128},{0,49,128},{0,31,193},{29,4,2178},{0,31,193},{31,6,2665},{9,63,405},{11,50,4},{2,50,4},{31,6,2665},{31,29,2665},{2,50,4}, +{0,42,2669},{31,29,2665},{0,42,2669},{4,0,2178},{4,0,2178},{4,0,2178},{4,0,2178},{0,44,0},{0,44,0},{0,44,0},{0,26,1},{0,19,1108},{0,19,1108},{7,63,10230},{6,63,4421},{6,55,3705},{4,52,2725},{5,63,12634},{0,63,3719},{0,53,77},{0,41,3727},{0,51,15978},{0,39,7289},{12,63,4050},{10,63,629},{10,53,149},{8,50,285},{26,6,9669},{0,63,3718},{0,53,76},{0,41,3726},{29,21,9669}, +{0,41,3726},{5,63,3050},{5,63,3050},{5,63,3050},{5,49,2690},{1,63,2325},{0,53,73},{0,53,73},{0,33,118},{0,36,5499},{0,31,2266},{9,62,0},{9,62,0},{9,62,0},{9,44,1},{20,3,2178},{0,53,72},{0,53,72},{0,33,117},{30,6,2178},{0,33,117},{31,12,2665},{11,63,521},{12,52,5},{3,52,4},{31,12,2665},{31,32,2665},{3,52,4},{0,44,2669},{31,32,2665},{0,44,2669},{5,0,2689}, +{5,0,2689},{5,0,2689},{5,0,2689},{0,49,1},{0,49,1},{0,49,1},{0,29,2},{0,21,1341},{0,21,1341},{9,63,10738},{7,63,4899},{7,57,3705},{5,54,2725},{6,63,13045},{1,63,4002},{1,55,77},{0,43,3642},{0,53,15510},{0,42,6577},{13,63,4302},{11,63,857},{11,55,149},{9,52,285},{29,1,9669},{2,63,3954},{1,55,76},{0,43,3561},{30,23,9669},{0,43,3561},{6,63,3173},{6,63,3173},{6,63,3173}, +{6,51,2690},{2,63,2427},{1,55,73},{1,55,73},{1,35,118},{0,40,5171},{0,32,1846},{10,63,4},{10,63,4},{10,63,4},{10,46,1},{21,5,2178},{0,57,32},{0,57,32},{0,35,72},{31,8,2178},{0,35,72},{31,17,2665},{13,63,680},{13,54,5},{3,55,4},{31,17,2665},{31,35,2665},{3,55,4},{0,46,2669},{31,35,2665},{0,46,2669},{6,0,2689},{6,0,2689},{6,0,2689},{6,0,2689},{1,51,1}, +{1,51,1},{1,51,1},{1,31,2},{0,25,1145},{0,25,1145},{10,63,11278},{8,63,5402},{8,58,3742},{6,56,2725},{7,63,13510},{3,63,4314},{2,57,77},{1,45,3642},{0,57,15046},{0,44,5927},{14,63,4590},{12,63,1171},{12,57,149},{10,54,285},{30,3,9669},{3,63,4265},{2,57,76},{0,45,3414},{31,25,9669},{0,45,3414},{7,63,3314},{7,63,3314},{7,63,3314},{7,53,2690},{4,63,2532},{2,57,73},{2,57,73}, +{2,37,118},{0,44,4867},{0,36,1470},{11,63,25},{11,63,25},{11,63,25},{11,48,1},{24,0,2178},{0,61,8},{0,61,8},{0,38,40},{31,11,2178},{0,38,40},{31,22,2665},{15,63,832},{14,56,5},{4,57,4},{31,22,2665},{30,40,2665},{4,57,4},{0,47,2677},{30,40,2665},{0,47,2677},{7,0,2689},{7,0,2689},{7,0,2689},{7,0,2689},{2,53,1},{2,53,1},{2,53,1},{2,33,2},{0,29,965}, +{0,29,965},{11,63,11942},{10,63,6090},{9,62,3723},{8,58,2734},{9,63,14053},{4,63,4863},{3,59,79},{2,48,3633},{0,61,14558},{0,46,5283},{16,63,4858},{14,63,1556},{13,60,138},{11,57,299},{30,9,9669},{6,63,4594},{3,59,75},{0,48,3233},{29,31,9669},{0,48,3233},{9,63,3505},{9,63,3505},{9,63,3505},{8,55,2690},{5,63,2645},{3,61,72},{3,61,72},{3,39,117},{0,46,4539},{0,38,1091},{12,63,64}, +{12,63,64},{12,63,64},{12,50,1},{24,6,2178},{1,63,10},{1,63,10},{0,40,13},{29,17,2178},{0,40,13},{31,28,2665},{17,63,1053},{15,59,2},{5,59,2},{31,28,2665},{31,42,2665},{5,59,2},{0,50,2669},{31,42,2665},{0,50,2669},{8,0,2689},{8,0,2689},{8,0,2689},{8,0,2689},{3,55,5},{3,55,5},{3,55,5},{3,36,5},{0,34,794},{0,34,794},{12,63,12466},{11,63,6718},{10,62,3738}, +{8,60,2723},{10,63,14554},{6,63,5363},{4,61,87},{3,50,3633},{0,63,14190},{0,49,4774},{17,63,5158},{15,63,1938},{14,62,138},{12,59,282},{31,11,9669},{8,63,4806},{4,61,86},{0,50,3110},{29,34,9669},{0,50,3110},{10,63,3658},{10,63,3658},{10,63,3658},{9,57,2690},{6,63,2795},{4,62,66},{4,62,66},{4,41,131},{0,51,4269},{0,40,833},{14,63,100},{14,63,100},{14,63,100},{13,52,1},{27,1,2178}, +{3,63,34},{3,63,34},{0,43,2},{30,19,2178},{0,43,2},{31,33,2665},{19,63,1241},{16,61,5},{6,61,2},{31,33,2665},{31,45,2665},{6,61,2},{0,52,2669},{31,45,2665},{0,52,2669},{9,0,2689},{9,0,2689},{9,0,2689},{9,0,2689},{4,57,1},{4,57,1},{4,57,1},{4,37,2},{0,38,650},{0,38,650},{14,63,13094},{12,63,7445},{11,63,3830},{9,62,2723},{12,63,14998},{8,63,5926},{5,63,87}, +{4,51,3642},{0,63,14254},{0,51,4306},{18,63,5494},{16,63,2414},{15,63,146},{13,61,282},{31,16,9669},{10,63,5138},{5,63,86},{0,52,3005},{30,36,9669},{0,52,3005},{11,63,3829},{11,63,3829},{11,63,3829},{10,59,2690},{7,63,2981},{5,63,86},{5,63,86},{5,43,131},{0,53,4014},{0,44,601},{15,63,145},{15,63,145},{15,63,145},{14,54,1},{28,3,2178},{5,63,85},{5,63,85},{1,45,2},{31,21,2178}, +{1,45,2},{31,38,2665},{20,63,1378},{17,63,5},{7,63,2},{31,38,2665},{31,48,2665},{7,63,2},{0,54,2669},{31,48,2665},{0,54,2669},{10,0,2689},{10,0,2689},{10,0,2689},{10,0,2689},{5,59,1},{5,59,1},{5,59,1},{5,39,2},{0,40,520},{0,40,520},{15,63,12507},{13,63,7370},{12,63,4001},{11,63,2705},{13,63,14148},{8,63,5491},{6,63,154},{5,53,3033},{1,63,13399},{0,53,3297},{19,63,4949}, +{17,63,2261},{16,63,202},{15,61,185},{31,20,8712},{11,63,4644},{7,63,145},{0,54,2365},{31,37,8712},{0,54,2365},{12,63,4001},{12,63,4001},{12,63,4001},{11,61,2690},{9,63,3204},{6,63,154},{6,63,154},{6,45,131},{0,57,3762},{0,46,419},{16,63,202},{16,63,202},{16,63,202},{15,56,1},{29,5,2178},{7,63,145},{7,63,145},{2,47,2},{31,24,2178},{2,47,2},{30,45,2178},{22,63,1145},{18,63,1}, +{10,63,1},{30,45,2178},{31,50,2178},{10,63,1},{0,55,2180},{31,50,2178},{0,55,2180},{11,0,2689},{11,0,2689},{11,0,2689},{11,0,2689},{6,61,1},{6,61,1},{6,61,1},{6,41,2},{0,44,400},{0,44,400},{16,63,11658},{14,63,7195},{13,63,4225},{12,63,2693},{14,63,13066},{10,63,5014},{8,63,261},{6,54,2366},{3,63,12366},{0,55,2274},{20,63,4338},{18,63,2037},{17,63,289},{16,62,80},{31,24,7578}, +{13,63,4037},{9,63,202},{0,55,1698},{29,42,7578},{0,55,1698},{13,63,4225},{13,63,4225},{13,63,4225},{12,63,2693},{10,63,3429},{8,63,261},{8,63,261},{7,47,132},{0,61,3509},{0,49,290},{17,63,289},{17,63,289},{17,63,289},{16,59,0},{31,4,2178},{9,63,202},{9,63,202},{3,49,2},{30,29,2178},{3,49,2},{31,44,1625},{23,63,850},{20,63,0},{13,63,1},{31,44,1625},{31,52,1625},{13,63,1}, +{0,56,1625},{31,52,1625},{0,56,1625},{12,0,2689},{12,0,2689},{12,0,2689},{12,0,2689},{7,63,5},{7,63,5},{7,63,5},{7,44,4},{0,49,289},{0,49,289},{16,63,11002},{15,63,7081},{14,63,4450},{13,63,2738},{15,63,12205},{11,63,4663},{9,63,411},{7,55,1813},{4,63,11643},{0,56,1550},{21,63,3802},{20,63,1845},{18,63,388},{17,62,25},{31,27,6661},{15,63,3525},{11,63,290},{0,57,1217},{29,44,6661}, +{0,57,1217},{14,63,4450},{14,63,4450},{14,63,4450},{13,63,2738},{11,63,3675},{9,63,411},{9,63,411},{8,49,131},{0,63,3354},{0,51,222},{18,63,388},{18,63,388},{18,63,388},{17,61,0},{30,13,2178},{11,63,290},{11,63,290},{4,51,2},{31,31,2178},{4,51,2},{31,47,1201},{24,63,653},{22,63,4},{15,63,1},{31,47,1201},{31,53,1201},{15,63,1},{0,57,1201},{31,53,1201},{0,57,1201},{13,0,2689}, +{13,0,2689},{13,0,2689},{13,0,2689},{8,63,17},{8,63,17},{8,63,17},{8,46,2},{0,53,205},{0,53,205},{17,63,10434},{16,63,7010},{15,63,4693},{14,63,2833},{16,63,11374},{12,63,4462},{10,63,629},{8,56,1358},{6,63,10895},{0,57,1002},{22,63,3334},{20,63,1701},{19,63,505},{18,63,0},{31,31,5829},{16,63,3145},{12,63,405},{1,58,842},{31,44,5829},{1,58,842},{15,63,4693},{15,63,4693},{15,63,4693}, +{14,63,2833},{12,63,3906},{10,63,629},{10,63,629},{9,51,131},{1,63,3525},{0,54,218},{19,63,505},{19,63,505},{19,63,505},{18,63,0},{31,15,2178},{12,63,405},{12,63,405},{5,53,2},{31,34,2178},{5,53,2},{31,49,841},{25,63,461},{23,63,1},{18,63,0},{31,49,841},{31,55,841},{18,63,0},{0,58,841},{31,55,841},{0,58,841},{14,0,2689},{14,0,2689},{14,0,2689},{14,0,2689},{9,63,50}, +{9,63,50},{9,63,50},{9,48,2},{0,57,137},{0,57,137},{18,63,9934},{17,63,6962},{16,63,4913},{15,63,2978},{17,63,10683},{13,63,4277},{11,63,915},{9,58,974},{8,63,10078},{0,59,630},{23,63,2934},{22,63,1605},{21,63,650},{19,63,25},{31,34,5082},{18,63,2769},{14,63,521},{2,59,546},{31,46,5082},{2,59,546},{16,63,4913},{16,63,4913},{16,63,4913},{15,63,2978},{14,63,4170},{11,63,915},{11,63,915}, +{10,53,131},{3,63,3789},{1,56,218},{21,63,650},{21,63,650},{21,63,650},{19,63,25},{31,20,2178},{14,63,521},{14,63,521},{6,55,2},{31,37,2178},{6,55,2},{31,52,545},{26,63,305},{25,63,4},{20,63,1},{31,52,545},{30,58,545},{20,63,1},{0,59,545},{30,58,545},{0,59,545},{15,0,2689},{15,0,2689},{15,0,2689},{15,0,2689},{11,63,74},{11,63,74},{11,63,74},{10,49,2},{0,59,85}, +{0,59,85},{19,63,9465},{18,63,6955},{17,63,5233},{16,63,3218},{18,63,10003},{14,63,4183},{13,63,1258},{11,58,645},{9,63,9445},{1,61,409},{24,63,2529},{23,63,1525},{22,63,785},{20,63,100},{31,38,4344},{20,63,2345},{16,63,698},{4,60,289},{31,48,4344},{4,60,289},{17,63,5233},{17,63,5233},{17,63,5233},{16,63,3218},{15,63,4491},{13,63,1258},{13,63,1258},{11,56,120},{5,63,4171},{2,59,213},{22,63,785}, +{22,63,785},{22,63,785},{20,63,100},{31,26,2178},{16,63,698},{16,63,698},{7,57,0},{30,42,2178},{7,57,0},{31,55,288},{28,63,162},{26,63,4},{23,63,1},{31,55,288},{28,62,288},{23,63,1},{0,60,288},{28,62,288},{0,60,288},{16,0,2689},{16,0,2689},{16,0,2689},{16,0,2689},{12,63,113},{12,63,113},{12,63,113},{11,52,4},{0,63,45},{0,63,45},{20,63,9219},{19,63,6985},{18,63,5530}, +{17,63,3473},{19,63,9496},{15,63,4186},{14,63,1630},{11,60,404},{11,63,8961},{3,61,277},{25,63,2275},{24,63,1509},{23,63,932},{22,63,208},{30,45,3779},{21,63,2086},{18,63,850},{6,61,129},{31,50,3779},{6,61,129},{18,63,5530},{18,63,5530},{18,63,5530},{17,63,3473},{16,63,4770},{14,63,1630},{14,63,1630},{12,57,129},{8,63,4442},{3,61,213},{23,63,932},{23,63,932},{23,63,932},{22,63,208},{31,31,2178}, +{18,63,850},{18,63,850},{8,59,1},{31,44,2178},{8,59,1},{31,58,128},{29,63,72},{28,63,1},{26,63,0},{31,58,128},{31,60,128},{26,63,0},{0,61,128},{31,60,128},{0,61,128},{17,0,2689},{17,0,2689},{17,0,2689},{17,0,2689},{13,63,170},{13,63,170},{13,63,170},{12,54,2},{2,63,89},{2,63,89},{21,63,8929},{20,63,7062},{19,63,5845},{18,63,3778},{20,63,9188},{17,63,4260},{15,63,2070}, +{13,61,234},{13,63,8680},{4,62,212},{26,63,2089},{25,63,1515},{24,63,1073},{23,63,353},{29,52,3299},{22,63,1913},{20,63,965},{8,62,32},{28,56,3299},{8,62,32},{19,63,5845},{19,63,5845},{19,63,5845},{18,63,3778},{17,63,5124},{15,63,2070},{15,63,2070},{13,59,129},{9,63,4725},{4,62,196},{24,63,1073},{24,63,1073},{24,63,1073},{23,63,353},{31,36,2178},{20,63,965},{20,63,965},{9,61,1},{31,47,2178}, +{9,61,1},{31,60,34},{30,63,18},{29,63,4},{28,63,1},{31,60,34},{31,61,34},{28,63,1},{0,62,32},{31,61,34},{0,62,32},{18,0,2689},{18,0,2689},{18,0,2689},{18,0,2689},{14,63,245},{14,63,245},{14,63,245},{13,56,2},{4,63,164},{4,63,164},{22,63,8707},{21,63,7170},{21,63,6209},{19,63,4133},{21,63,8853},{18,63,4387},{17,63,2548},{14,62,154},{14,63,8388},{6,63,244},{27,63,1971}, +{26,63,1557},{25,63,1250},{24,63,565},{30,52,2904},{23,63,1826},{22,63,1145},{10,63,1},{28,58,2904},{10,63,1},{21,63,6209},{21,63,6209},{21,63,6209},{19,63,4133},{19,63,5460},{17,63,2548},{17,63,2548},{14,61,129},{11,63,5085},{6,63,244},{25,63,1250},{25,63,1250},{25,63,1250},{24,63,565},{30,45,2178},{22,63,1145},{22,63,1145},{10,63,1},{31,50,2178},{10,63,1},{31,63,0},{31,63,0},{31,63,0}, +{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{19,0,2689},{19,0,2689},{19,0,2689},{19,0,2689},{15,63,338},{15,63,338},{15,63,338},{14,58,2},{6,63,244},{6,63,244},{23,63,7705},{22,63,6418},{21,63,5633},{20,63,3845},{22,63,7654},{19,63,3874},{18,63,2310},{15,63,53},{15,63,7258},{8,63,317},{27,63,1458},{27,63,1186},{26,63,932},{25,63,425},{30,54,2166}, +{25,63,1398},{23,63,850},{13,63,1},{29,58,2166},{13,63,1},{21,63,5633},{21,63,5633},{21,63,5633},{20,63,3845},{19,63,4830},{18,63,2310},{18,63,2310},{15,62,45},{13,63,4506},{8,63,317},{26,63,932},{26,63,932},{26,63,932},{25,63,425},{31,44,1625},{23,63,850},{23,63,850},{13,63,1},{31,52,1625},{13,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0}, +{0,63,0},{31,63,0},{0,63,0},{20,0,2689},{20,0,2689},{20,0,2689},{20,0,2689},{16,63,449},{16,63,449},{16,63,449},{15,60,4},{8,63,317},{8,63,317},{24,63,6881},{23,63,5814},{22,63,5138},{21,63,3650},{23,63,6713},{20,63,3400},{19,63,2142},{16,63,5},{17,63,6397},{9,63,425},{28,63,1075},{27,63,866},{27,63,697},{26,63,320},{30,56,1601},{26,63,1041},{24,63,653},{15,63,1},{30,58,1601}, +{15,63,1},{22,63,5138},{22,63,5138},{22,63,5138},{21,63,3650},{21,63,4313},{19,63,2142},{19,63,2142},{16,63,5},{14,63,3981},{9,63,425},{27,63,697},{27,63,697},{27,63,697},{26,63,320},{31,47,1201},{24,63,653},{24,63,653},{15,63,1},{31,53,1201},{15,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{21,0,2689}, +{21,0,2689},{21,0,2689},{21,0,2689},{18,63,549},{18,63,549},{18,63,549},{16,62,1},{9,63,425},{9,63,425},{24,63,6097},{24,63,5285},{23,63,4693},{22,63,3473},{23,63,5833},{21,63,3067},{20,63,1988},{17,63,10},{18,63,5571},{11,63,541},{28,63,739},{28,63,595},{27,63,505},{27,63,233},{31,54,1121},{26,63,737},{25,63,461},{18,63,0},{29,60,1121},{18,63,0},{23,63,4693},{23,63,4693},{23,63,4693}, +{22,63,3473},{22,63,3845},{20,63,1988},{20,63,1988},{17,63,10},{15,63,3542},{11,63,541},{27,63,505},{27,63,505},{27,63,505},{27,63,233},{31,49,841},{25,63,461},{25,63,461},{18,63,0},{31,55,841},{18,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{22,0,2689},{22,0,2689},{22,0,2689},{22,0,2689},{19,63,666}, +{19,63,666},{19,63,666},{17,63,10},{11,63,541},{11,63,541},{25,63,5427},{24,63,4757},{24,63,4273},{23,63,3314},{24,63,5002},{22,63,2788},{21,63,1898},{18,63,65},{20,63,4714},{13,63,698},{29,63,489},{28,63,387},{28,63,306},{28,63,162},{31,56,726},{27,63,482},{26,63,305},{20,63,1},{30,60,726},{20,63,1},{24,63,4273},{24,63,4273},{24,63,4273},{23,63,3314},{22,63,3429},{21,63,1898},{21,63,1898}, +{18,63,65},{17,63,3213},{13,63,698},{28,63,306},{28,63,306},{28,63,306},{28,63,162},{31,52,545},{26,63,305},{26,63,305},{20,63,1},{30,58,545},{20,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{23,0,2689},{23,0,2689},{23,0,2689},{23,0,2689},{20,63,832},{20,63,832},{20,63,832},{18,63,65},{13,63,698}, +{13,63,698},{4,63,33740},{0,63,5184},{0,45,446},{0,43,4126},{3,63,45594},{0,59,24105},{0,42,8295},{0,37,24703},{0,44,64117},{0,34,38807},{2,63,9704},{0,61,2866},{0,44,386},{0,37,3205},{14,4,18065},{0,38,13219},{0,34,6147},{0,24,13496},{25,0,18065},{0,24,13496},{0,31,1},{0,31,1},{0,31,1},{0,19,1},{0,16,1105},{0,15,584},{0,15,584},{0,9,605},{0,8,1273},{0,7,750},{0,31,1}, +{0,31,1},{0,31,1},{0,19,1},{4,2,1105},{0,15,584},{0,15,584},{0,9,605},{8,0,1105},{0,9,605},{21,5,9248},{0,61,2866},{0,44,386},{0,37,3205},{21,5,9248},{31,8,9248},{0,37,3205},{0,28,9256},{31,8,9248},{0,28,9256},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{4,63,38380},{1,63,6614},{0,47,261}, +{0,45,3769},{4,63,50747},{0,63,24961},{0,44,8337},{0,39,25658},{0,46,65535},{0,36,41267},{2,63,10152},{0,63,2624},{0,46,221},{0,41,2929},{16,0,19334},{0,42,13795},{0,36,6237},{0,26,14121},{26,1,19334},{0,26,14121},{0,36,1},{0,36,1},{0,36,1},{0,22,0},{0,18,1513},{0,17,769},{0,17,769},{0,9,845},{0,9,1742},{0,9,1014},{0,36,1},{0,36,1},{0,36,1},{0,22,0},{5,1,1513}, +{0,17,769},{0,17,769},{0,9,845},{9,0,1513},{0,9,845},{24,0,9248},{0,63,2624},{0,46,221},{0,41,2929},{24,0,9248},{31,11,9248},{0,41,2929},{0,30,9256},{31,11,9248},{0,30,9256},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{4,63,43788},{1,63,8598},{0,49,121},{0,47,3430},{4,63,56155},{0,63,26241},{0,46,8415}, +{0,41,26663},{0,46,65535},{0,36,43795},{3,63,10706},{0,63,2624},{0,48,116},{0,43,2650},{17,0,20689},{0,44,14404},{0,38,6363},{0,28,14796},{26,3,20689},{0,28,14796},{0,42,0},{0,42,0},{0,42,0},{0,25,0},{0,21,1985},{0,19,1009},{0,19,1009},{0,11,1090},{0,11,2281},{0,11,1346},{0,42,0},{0,42,0},{0,42,0},{0,25,0},{6,1,1985},{0,19,1009},{0,19,1009},{0,11,1090},{9,2,1985}, +{0,11,1090},{25,2,9248},{0,63,2624},{0,48,116},{0,43,2650},{25,2,9248},{27,19,9248},{0,43,2650},{0,32,9250},{27,19,9248},{0,32,9250},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{5,63,49566},{1,63,11350},{0,51,36},{0,50,3105},{4,63,62331},{0,63,28289},{0,48,8549},{0,43,27718},{0,49,65535},{0,38,46395},{4,63,11395}, +{0,63,2880},{0,51,36},{0,45,2389},{17,4,22129},{0,46,15067},{0,40,6525},{0,28,15500},{27,4,22129},{0,28,15500},{0,47,0},{0,47,0},{0,47,0},{0,28,1},{0,24,2521},{0,21,1285},{0,21,1285},{0,13,1385},{0,12,2905},{0,11,1714},{0,47,0},{0,47,0},{0,47,0},{0,28,1},{7,0,2521},{0,21,1285},{0,21,1285},{0,13,1385},{11,1,2521},{0,13,1385},{26,4,9248},{1,63,2866},{0,51,36}, +{0,45,2389},{26,4,9248},{28,21,9248},{0,45,2389},{0,34,9250},{28,21,9248},{0,34,9250},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{5,63,56892},{2,63,15166},{0,53,5},{0,52,2726},{5,63,65535},{0,63,31511},{0,51,8735},{0,45,28953},{0,53,65535},{0,40,49505},{4,63,12385},{1,63,3380},{0,53,1},{0,47,2120},{19,1,23851}, +{0,49,15876},{0,42,6761},{0,30,16331},{29,4,23851},{0,30,16331},{0,53,0},{0,53,0},{0,53,0},{0,32,1},{0,27,3200},{0,23,1640},{0,23,1640},{0,15,1769},{0,13,3689},{0,13,2169},{0,53,0},{0,53,0},{0,53,0},{0,32,1},{7,3,3200},{0,23,1640},{0,23,1640},{0,15,1769},{13,0,3200},{0,15,1769},{28,3,9248},{3,63,3204},{0,53,1},{0,47,2120},{28,3,9248},{31,21,9248},{0,47,2120}, +{0,36,9256},{31,21,9248},{0,36,9256},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{6,63,63870},{2,63,19230},{0,55,30},{0,54,2429},{5,63,65535},{1,63,35016},{0,53,8921},{0,47,30108},{0,55,65535},{0,42,52385},{5,63,13379},{2,63,4026},{0,56,18},{0,50,1885},{20,1,25472},{0,53,16616},{0,46,6989},{0,33,17105},{29,6,25472}, +{0,33,17105},{0,58,0},{0,58,0},{0,58,0},{0,35,0},{0,29,3874},{0,25,1994},{0,25,1994},{0,16,2129},{0,15,4454},{0,15,2637},{0,58,0},{0,58,0},{0,58,0},{0,35,0},{8,2,3872},{0,25,1994},{0,25,1994},{0,16,2129},{12,3,3872},{0,16,2129},{29,5,9248},{5,63,3589},{1,55,1},{0,50,1885},{29,5,9248},{31,24,9248},{0,50,1885},{0,38,9256},{31,24,9248},{0,38,9256},{0,0,0}, +{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{6,63,65535},{2,63,24002},{0,57,109},{0,56,2154},{6,63,65535},{1,63,38780},{0,55,8815},{0,48,30807},{0,57,65535},{0,44,54965},{6,63,14345},{2,63,4766},{1,58,54},{0,52,1670},{20,5,26744},{0,55,17059},{0,46,7005},{0,33,17609},{31,6,26744},{0,33,17609},{0,63,5},{0,63,5},{0,63,5}, +{0,38,4},{0,32,4420},{0,29,2210},{0,29,2210},{0,18,2378},{0,17,5115},{0,16,2981},{0,63,5},{0,63,5},{0,63,5},{0,38,4},{9,2,4418},{0,29,2210},{0,29,2210},{0,18,2378},{16,0,4418},{0,18,2378},{30,7,9248},{8,63,3904},{2,57,1},{0,52,1666},{30,7,9248},{28,31,9248},{0,52,1666},{0,40,9256},{28,31,9248},{0,40,9256},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,1,1}, +{0,1,1},{0,1,1},{0,1,0},{0,1,1},{0,1,1},{7,63,65535},{3,63,29032},{0,60,314},{0,58,1989},{6,63,65535},{2,63,42151},{0,57,7781},{0,50,30102},{0,59,65535},{0,46,56345},{7,63,14819},{3,63,5416},{2,60,54},{0,54,1565},{23,0,26744},{0,59,16547},{0,49,6177},{0,37,17105},{31,9,26744},{0,37,17105},{1,63,84},{1,63,84},{1,63,84},{1,40,68},{0,38,4418},{0,31,1972},{0,31,1972}, +{0,20,2129},{0,19,5364},{0,18,2915},{1,63,20},{1,63,20},{1,63,20},{1,40,4},{11,1,4418},{0,31,1972},{0,31,1972},{0,20,2129},{17,2,4418},{0,20,2129},{31,9,9248},{8,63,4160},{3,59,1},{0,54,1465},{31,9,9248},{28,34,9248},{0,54,1465},{0,42,9256},{28,34,9248},{0,42,9256},{1,0,68},{1,0,68},{1,0,68},{1,0,68},{0,7,0},{0,7,0},{0,7,0},{0,4,0},{0,3,25}, +{0,3,25},{7,63,65535},{3,63,35719},{1,62,657},{0,60,1985},{7,63,65535},{2,63,46660},{0,59,6696},{0,52,29368},{0,63,65535},{0,49,58301},{9,63,15473},{5,63,6173},{3,62,45},{1,56,1566},{23,6,26744},{0,63,15992},{0,53,5318},{0,39,16547},{30,14,26744},{0,39,16547},{2,63,329},{2,63,329},{2,63,329},{1,44,186},{0,44,4418},{0,36,1709},{0,36,1709},{0,22,1872},{0,23,5702},{0,20,2885},{3,63,34}, +{3,63,34},{3,63,34},{2,42,10},{13,0,4418},{0,36,1709},{0,36,1709},{0,22,1872},{20,2,4418},{0,22,1872},{31,15,9248},{10,63,4570},{4,61,4},{0,56,1268},{31,15,9248},{31,34,9248},{0,56,1268},{0,45,9250},{31,34,9248},{0,45,9250},{1,0,185},{1,0,185},{1,0,185},{1,0,185},{0,13,0},{0,13,0},{0,13,0},{0,8,1},{0,6,97},{0,6,97},{7,63,65535},{4,63,40786},{1,63,1122}, +{0,62,2034},{7,63,65535},{2,63,49800},{0,61,5634},{0,54,27965},{0,63,65535},{0,49,58553},{10,63,15531},{6,63,6593},{4,63,61},{2,58,1482},{25,4,26259},{0,63,15284},{0,55,4484},{0,41,15722},{28,19,26259},{0,41,15722},{2,63,633},{2,63,633},{2,63,633},{2,46,378},{0,49,4418},{0,40,1489},{0,40,1489},{0,24,1665},{0,25,6029},{0,22,2897},{4,63,61},{4,63,61},{4,63,61},{3,44,10},{14,2,4418}, +{0,40,1489},{0,40,1489},{0,24,1665},{24,0,4418},{0,24,1665},{31,19,8978},{11,63,4744},{5,63,0},{0,58,1025},{31,19,8978},{30,38,8978},{0,58,1025},{0,46,8986},{30,38,8978},{0,46,8986},{2,0,377},{2,0,377},{2,0,377},{2,0,377},{0,18,0},{0,18,0},{0,18,0},{0,11,0},{0,8,193},{0,8,193},{8,63,65535},{4,63,40898},{1,63,1890},{1,62,2029},{7,63,65535},{3,63,47871},{0,61,4194}, +{0,56,24760},{0,63,65535},{0,49,55881},{11,63,14325},{8,63,6051},{5,63,100},{3,58,1197},{27,1,24371},{0,63,13716},{0,57,3402},{0,41,13914},{30,19,24371},{0,41,13914},{3,63,1058},{3,63,1058},{3,63,1058},{2,49,618},{0,54,4418},{0,44,1285},{0,44,1285},{0,26,1476},{0,27,6410},{0,25,2937},{5,63,100},{5,63,100},{5,63,100},{4,46,5},{16,0,4418},{0,44,1285},{0,44,1285},{0,26,1476},{26,1,4418}, +{0,26,1476},{31,22,7938},{13,63,4225},{7,63,4},{0,60,628},{31,22,7938},{28,42,7938},{0,60,628},{0,47,7946},{28,42,7938},{0,47,7946},{2,0,617},{2,0,617},{2,0,617},{2,0,617},{0,23,1},{0,23,1},{0,23,1},{0,14,0},{0,11,320},{0,11,320},{8,63,65535},{4,63,41266},{1,63,2914},{1,62,2109},{7,63,65535},{3,63,46175},{0,61,3010},{0,56,21624},{0,63,65535},{0,51,53461},{12,63,13140}, +{8,63,5571},{6,63,157},{4,59,932},{28,1,22568},{0,63,12404},{0,57,2474},{0,43,12155},{30,21,22568},{0,43,12155},{4,63,1630},{4,63,1630},{4,63,1630},{3,51,938},{0,59,4420},{0,46,1117},{0,46,1117},{0,30,1280},{0,31,6822},{0,27,3009},{6,63,157},{6,63,157},{6,63,157},{5,48,4},{17,2,4418},{0,46,1117},{0,46,1117},{0,30,1280},{27,3,4418},{0,30,1280},{30,28,6962},{14,63,3709},{8,63,1}, +{0,60,340},{30,28,6962},{31,40,6962},{0,60,340},{0,48,6964},{31,40,6962},{0,48,6964},{3,0,937},{3,0,937},{3,0,937},{3,0,937},{0,29,0},{0,29,0},{0,29,0},{0,17,0},{0,13,482},{0,13,482},{9,63,65535},{5,63,41956},{2,63,4257},{1,62,2505},{7,63,65535},{3,63,44573},{0,62,1944},{0,56,18402},{0,63,65535},{0,51,50815},{13,63,11930},{10,63,5125},{7,63,250},{5,59,701},{28,5,20642}, +{1,63,11209},{0,59,1634},{0,45,10346},{31,22,20642},{0,45,10346},{4,63,2350},{4,63,2350},{4,63,2350},{3,54,1361},{0,63,4450},{0,51,914},{0,51,914},{0,32,1097},{0,34,7357},{0,29,3131},{7,63,250},{7,63,250},{7,63,250},{6,50,10},{19,1,4418},{0,51,914},{0,51,914},{0,32,1097},{29,4,4418},{0,32,1097},{31,27,5941},{15,63,3176},{10,63,0},{0,62,116},{31,27,5941},{31,42,5941},{0,62,116}, +{0,49,5945},{31,42,5941},{0,49,5945},{3,0,1360},{3,0,1360},{3,0,1360},{3,0,1360},{0,34,1},{0,34,1},{0,34,1},{0,21,1},{0,17,706},{0,17,706},{9,63,65535},{5,63,42660},{2,63,5617},{1,63,3088},{8,63,65535},{3,63,43421},{0,62,1240},{0,56,15810},{0,63,65535},{0,51,48735},{13,63,10922},{11,63,4753},{8,63,360},{6,61,509},{29,5,19021},{2,63,10246},{0,61,1088},{0,47,8885},{31,24,19021}, +{0,47,8885},{5,63,3131},{5,63,3131},{5,63,3131},{4,56,1818},{1,63,4580},{0,55,754},{0,55,754},{0,35,928},{0,36,7846},{0,31,3281},{8,63,360},{8,63,360},{8,63,360},{7,52,10},{20,3,4418},{0,55,754},{0,55,754},{0,35,928},{30,6,4418},{0,35,928},{31,30,5101},{17,63,2777},{11,63,9},{0,62,20},{31,30,5101},{31,43,5101},{0,62,20},{0,50,5105},{31,43,5101},{0,50,5105},{4,0,1818}, +{4,0,1818},{4,0,1818},{4,0,1818},{0,40,0},{0,40,0},{0,40,0},{0,24,0},{0,17,914},{0,17,914},{9,63,65535},{5,63,43620},{2,63,7233},{1,63,3920},{8,63,65535},{3,63,42525},{0,63,738},{0,58,13413},{0,63,65535},{0,51,46911},{14,63,9978},{11,63,4449},{10,63,452},{8,60,344},{31,2,17485},{3,63,9369},{0,61,704},{0,48,7498},{29,29,17485},{0,48,7498},{6,63,4058},{6,63,4058},{6,63,4058}, +{4,59,2315},{2,63,4874},{0,59,610},{0,59,610},{0,37,769},{0,38,8389},{0,32,3497},{10,63,452},{10,63,452},{10,63,452},{8,54,5},{21,5,4418},{0,59,610},{0,59,610},{0,37,769},{31,8,4418},{0,37,769},{31,32,4325},{18,63,2357},{13,63,0},{1,63,0},{31,32,4325},{31,45,4325},{1,63,0},{0,51,4329},{31,45,4325},{0,51,4329},{4,0,2314},{4,0,2314},{4,0,2314},{4,0,2314},{0,45,0}, +{0,45,0},{0,45,0},{0,27,0},{0,19,1184},{0,19,1184},{9,63,65535},{5,63,44836},{2,63,9105},{2,63,4905},{8,63,65535},{3,63,41885},{0,63,482},{0,58,11125},{0,63,65535},{0,51,45343},{15,63,9102},{13,63,4161},{11,63,557},{9,61,212},{30,9,16034},{5,63,8602},{0,63,482},{0,48,6250},{29,31,16034},{0,48,6250},{6,63,5066},{6,63,5066},{6,63,5066},{5,61,2907},{2,63,5322},{0,63,482},{0,63,482}, +{0,39,628},{0,42,8965},{0,36,3717},{11,63,557},{11,63,557},{11,63,557},{9,56,5},{24,0,4418},{0,63,482},{0,63,482},{0,39,628},{31,11,4418},{0,39,628},{31,35,3613},{20,63,1940},{15,63,4},{4,63,1},{31,35,3613},{28,50,3613},{4,63,1},{0,52,3617},{28,50,3613},{0,52,3617},{5,0,2906},{5,0,2906},{5,0,2906},{5,0,2906},{0,50,0},{0,50,0},{0,50,0},{0,30,1},{0,23,1480}, +{0,23,1480},{9,63,65535},{5,63,46510},{3,63,11362},{2,63,6237},{9,63,65535},{3,63,41471},{0,63,500},{0,58,8857},{0,63,65535},{0,53,43697},{16,63,8139},{14,63,3853},{12,63,680},{10,62,89},{30,13,14504},{8,63,7667},{0,63,500},{0,50,4961},{31,31,14504},{0,50,4961},{7,63,6337},{7,63,6337},{7,63,6337},{5,63,3642},{3,63,5962},{0,63,500},{0,63,500},{0,41,493},{0,44,9656},{0,38,3995},{12,63,680}, +{12,63,680},{12,63,680},{10,59,10},{24,6,4418},{0,63,500},{0,63,500},{0,41,493},{29,17,4418},{0,41,493},{31,38,2888},{20,63,1517},{16,63,1},{7,63,1},{31,38,2888},{31,48,2888},{7,63,1},{0,53,2896},{31,48,2888},{0,53,2896},{5,0,3617},{5,0,3617},{5,0,3617},{5,0,3617},{0,56,0},{0,56,0},{0,56,0},{0,34,1},{0,25,1853},{0,25,1853},{10,63,65535},{6,63,48082},{3,63,13570}, +{2,63,7693},{9,63,65535},{3,63,41375},{0,63,788},{0,58,7113},{0,63,65535},{0,53,42465},{17,63,7409},{15,63,3625},{13,63,821},{11,62,34},{31,13,13235},{8,63,6899},{2,63,628},{0,52,3956},{30,34,13235},{0,52,3956},{7,63,7681},{7,63,7681},{7,63,7681},{6,63,4437},{4,63,6659},{1,63,738},{1,63,738},{0,43,394},{0,46,10331},{0,40,4289},{13,63,821},{13,63,821},{13,63,821},{11,61,10},{27,1,4418}, +{2,63,628},{2,63,628},{0,43,394},{30,19,4418},{0,43,394},{31,41,2312},{21,63,1217},{18,63,1},{9,63,0},{31,41,2312},{29,52,2312},{9,63,0},{0,54,2320},{29,52,2312},{0,54,2320},{6,0,4337},{6,0,4337},{6,0,4337},{6,0,4337},{0,61,1},{0,61,1},{0,61,1},{0,37,0},{0,27,2225},{0,27,2225},{10,63,65535},{6,63,49890},{3,63,16034},{2,63,9405},{9,63,65535},{4,63,41526},{0,63,1332}, +{0,59,5520},{0,63,65535},{0,53,41489},{18,63,6747},{16,63,3459},{14,63,980},{12,63,5},{30,20,12051},{10,63,6275},{4,63,801},{0,53,3089},{31,35,12051},{0,53,3089},{8,63,9062},{8,63,9062},{8,63,9062},{7,63,5410},{4,63,7555},{1,63,1154},{1,63,1154},{0,46,306},{0,49,11046},{0,42,4619},{14,63,980},{14,63,980},{14,63,980},{12,62,5},{28,3,4418},{4,63,801},{4,63,801},{0,46,306},{31,21,4418}, +{0,46,306},{30,47,1800},{23,63,949},{19,63,4},{12,63,1},{30,47,1800},{31,51,1800},{12,63,1},{0,55,1808},{31,51,1800},{0,55,1808},{6,0,5105},{6,0,5105},{6,0,5105},{6,0,5105},{0,63,36},{0,63,36},{0,63,36},{0,40,0},{0,29,2633},{0,29,2633},{10,63,65535},{6,63,51954},{3,63,18754},{3,63,11330},{9,63,65535},{4,63,41798},{1,63,2082},{0,60,4084},{0,63,65535},{0,53,40769},{19,63,6153}, +{17,63,3297},{16,63,1154},{13,63,20},{31,20,10952},{11,63,5708},{6,63,965},{0,54,2281},{31,37,10952},{0,54,2281},{9,63,10545},{9,63,10545},{9,63,10545},{7,63,6482},{5,63,8549},{2,63,1716},{2,63,1716},{0,48,208},{0,53,11786},{0,44,4985},{16,63,1154},{16,63,1154},{16,63,1154},{13,63,20},{29,5,4418},{6,63,965},{6,63,965},{0,48,208},{31,24,4418},{0,48,208},{31,46,1352},{23,63,725},{21,63,0}, +{14,63,1},{31,46,1352},{30,54,1352},{14,63,1},{0,56,1360},{30,54,1352},{0,56,1360},{7,0,5953},{7,0,5953},{7,0,5953},{7,0,5953},{1,63,145},{1,63,145},{1,63,145},{0,43,1},{0,31,3077},{0,31,3077},{10,63,65535},{6,63,54582},{4,63,21886},{3,63,13652},{9,63,65535},{4,63,42410},{1,63,3144},{0,60,2770},{0,63,65535},{0,55,40127},{19,63,5649},{18,63,3157},{17,63,1325},{15,63,74},{31,24,9818}, +{13,63,5241},{8,63,1108},{0,56,1538},{29,42,9818},{0,56,1538},{10,63,12376},{10,63,12376},{10,63,12376},{8,63,7844},{6,63,9861},{3,63,2576},{3,63,2576},{0,50,145},{0,55,12659},{0,46,5441},{17,63,1325},{17,63,1325},{17,63,1325},{15,63,74},{31,4,4418},{8,63,1108},{8,63,1108},{0,50,145},{30,29,4418},{0,50,145},{31,49,925},{25,63,505},{23,63,1},{17,63,1},{31,49,925},{30,56,925},{17,63,1}, +{0,58,929},{30,56,925},{0,58,929},{7,0,6970},{7,0,6970},{7,0,6970},{7,0,6970},{1,63,388},{1,63,388},{1,63,388},{0,47,0},{0,34,3625},{0,34,3625},{10,63,65535},{7,63,57052},{4,63,24910},{3,63,15988},{9,63,65535},{4,63,43226},{1,63,4360},{0,61,1833},{0,63,65535},{0,55,39743},{21,63,5202},{19,63,3073},{18,63,1508},{16,63,180},{31,27,8901},{14,63,4814},{10,63,1300},{0,57,1021},{29,44,8901}, +{0,57,1021},{10,63,14136},{10,63,14136},{10,63,14136},{8,63,9252},{7,63,11195},{3,63,3536},{3,63,3536},{0,53,89},{0,59,13491},{0,49,5921},{18,63,1508},{18,63,1508},{18,63,1508},{16,63,180},{30,13,4418},{10,63,1300},{10,63,1300},{0,53,89},{31,31,4418},{0,53,89},{31,51,613},{26,63,337},{24,63,1},{20,63,1},{31,51,613},{31,56,613},{20,63,1},{0,59,617},{31,56,613},{0,59,617},{8,0,7956}, +{8,0,7956},{8,0,7956},{8,0,7956},{2,63,697},{2,63,697},{2,63,697},{0,50,0},{0,36,4141},{0,36,4141},{11,63,65535},{7,63,59708},{4,63,28190},{3,63,18580},{10,63,65535},{5,63,44295},{1,63,5832},{0,61,1081},{0,63,65535},{0,55,39615},{22,63,4818},{20,63,3017},{19,63,1709},{17,63,325},{31,31,8069},{15,63,4473},{11,63,1514},{0,58,593},{31,44,8069},{0,58,593},{11,63,15965},{11,63,15965},{11,63,15965}, +{9,63,10757},{7,63,12667},{4,63,4662},{4,63,4662},{0,55,50},{0,61,14340},{0,51,6395},{19,63,1709},{19,63,1709},{19,63,1709},{17,63,325},{31,15,4418},{11,63,1514},{11,63,1514},{0,55,50},{31,34,4418},{0,55,50},{31,54,365},{27,63,205},{26,63,1},{22,63,1},{31,54,365},{31,58,365},{22,63,1},{0,60,369},{31,58,365},{0,60,369},{8,0,8980},{8,0,8980},{8,0,8980},{8,0,8980},{2,63,1097}, +{2,63,1097},{2,63,1097},{0,53,0},{0,40,4689},{0,40,4689},{11,63,65535},{8,63,58981},{5,63,29551},{4,63,19751},{10,63,65535},{5,63,43215},{2,63,6910},{1,62,614},{0,63,65535},{0,57,34909},{23,63,4502},{21,63,3011},{20,63,1973},{18,63,520},{31,34,7322},{17,63,4242},{13,63,1769},{0,60,274},{31,46,7322},{0,60,274},{12,63,16739},{12,63,16739},{12,63,16739},{10,63,11492},{8,63,13636},{5,63,5510},{5,63,5510}, +{0,58,53},{0,63,14139},{0,53,5981},{20,63,1973},{20,63,1973},{20,63,1973},{18,63,520},{31,20,4418},{13,63,1769},{13,63,1769},{0,58,17},{31,37,4418},{0,58,17},{31,57,181},{28,63,97},{27,63,4},{25,63,0},{31,57,181},{31,59,181},{25,63,0},{0,61,185},{31,59,181},{0,61,185},{9,0,9248},{9,0,9248},{9,0,9248},{9,0,9248},{3,63,1348},{3,63,1348},{3,63,1348},{1,55,4},{0,42,4545}, +{0,42,4545},{12,63,65535},{9,63,57270},{6,63,30345},{5,63,20521},{11,63,65535},{6,63,41449},{3,63,8015},{2,62,242},{0,63,65535},{0,57,28330},{24,63,4181},{22,63,3053},{21,63,2248},{20,63,772},{31,38,6584},{20,63,3941},{15,63,2041},{0,61,77},{31,48,6584},{0,61,77},{13,63,17289},{13,63,17289},{13,63,17289},{11,63,12050},{10,63,14315},{7,63,6389},{7,63,6389},{2,60,41},{0,63,13860},{0,55,5252},{21,63,2248}, +{21,63,2248},{21,63,2248},{20,63,772},{31,26,4418},{15,63,2041},{15,63,2041},{0,60,4},{30,42,4418},{0,60,4},{31,60,50},{30,63,34},{29,63,0},{28,63,1},{31,60,50},{31,61,50},{28,63,1},{0,62,52},{31,61,50},{0,62,52},{10,0,9250},{10,0,9250},{10,0,9250},{10,0,9250},{4,63,1549},{4,63,1549},{4,63,1549},{2,57,2},{0,46,4141},{0,46,4141},{13,63,65535},{9,63,55894},{7,63,31068}, +{6,63,21256},{12,63,65535},{8,63,39740},{4,63,9073},{3,63,90},{0,63,65535},{0,59,23356},{24,63,3973},{23,63,3125},{23,63,2500},{21,63,1037},{30,45,6019},{20,63,3701},{17,63,2340},{0,63,4},{31,50,6019},{0,63,4},{14,63,17796},{14,63,17796},{14,63,17796},{12,63,12625},{11,63,14957},{8,63,7139},{8,63,7139},{3,62,41},{0,63,14020},{0,59,4652},{23,63,2500},{23,63,2500},{23,63,2500},{21,63,1037},{31,31,4418}, +{17,63,2340},{17,63,2340},{1,62,4},{31,44,4418},{1,62,4},{31,62,4},{31,63,4},{31,63,4},{30,63,1},{31,62,4},{31,63,4},{30,63,1},{0,63,4},{31,63,4},{0,63,4},{11,0,9250},{11,0,9250},{11,0,9250},{11,0,9250},{6,63,1765},{6,63,1765},{6,63,1765},{3,59,2},{0,51,3816},{0,51,3816},{13,63,65535},{10,63,53236},{8,63,30487},{7,63,21105},{13,63,65535},{8,63,37332},{5,63,9177}, +{4,63,36},{1,63,65535},{0,59,18680},{25,63,3443},{24,63,2741},{23,63,2248},{22,63,980},{31,44,5163},{21,63,3218},{20,63,2117},{3,63,1},{29,54,5163},{3,63,1},{15,63,17289},{15,63,17289},{15,63,17289},{13,63,12512},{12,63,14328},{9,63,7149},{9,63,7149},{4,63,20},{0,63,13376},{0,59,3944},{23,63,2248},{23,63,2248},{23,63,2248},{22,63,980},{31,34,3872},{20,63,2117},{20,63,2117},{3,63,1},{31,46,3872}, +{3,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{12,0,9248},{12,0,9248},{12,0,9248},{12,0,9248},{7,63,1972},{7,63,1972},{7,63,1972},{4,61,5},{0,55,3488},{0,55,3488},{14,63,65535},{11,63,50266},{9,63,29322},{8,63,20567},{13,63,65535},{9,63,35025},{6,63,8985},{5,63,21},{2,63,65535},{0,59,14712},{26,63,2873}, +{25,63,2283},{24,63,1825},{22,63,820},{29,52,4267},{22,63,2657},{20,63,1685},{5,63,1},{28,56,4267},{5,63,1},{16,63,16427},{16,63,16427},{16,63,16427},{14,63,12185},{13,63,13442},{10,63,6915},{10,63,6915},{5,63,5},{1,63,12539},{0,61,3314},{24,63,1825},{24,63,1825},{24,63,1825},{22,63,820},{31,37,3200},{20,63,1685},{20,63,1685},{5,63,1},{27,52,3200},{5,63,1},{31,63,0},{31,63,0},{31,63,0}, +{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{13,0,9248},{13,0,9248},{13,0,9248},{13,0,9248},{8,63,2250},{8,63,2250},{8,63,2250},{5,63,5},{0,57,3170},{0,57,3170},{15,63,65535},{12,63,47239},{10,63,28065},{9,63,20104},{14,63,65535},{10,63,32574},{7,63,8839},{6,63,54},{3,63,64890},{0,61,10964},{26,63,2252},{25,63,1806},{25,63,1445},{23,63,650},{29,54,3361}, +{23,63,2091},{21,63,1322},{8,63,0},{29,56,3361},{8,63,0},{17,63,15584},{17,63,15584},{17,63,15584},{15,63,11846},{14,63,12522},{11,63,6697},{11,63,6697},{6,63,50},{3,63,11669},{0,63,2834},{25,63,1445},{25,63,1445},{25,63,1445},{23,63,650},{31,40,2521},{21,63,1322},{21,63,1322},{8,63,0},{31,49,2521},{8,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0}, +{0,63,0},{31,63,0},{0,63,0},{14,0,9250},{14,0,9250},{14,0,9250},{14,0,9250},{9,63,2525},{9,63,2525},{9,63,2525},{6,63,50},{0,63,2834},{0,63,2834},{16,63,65535},{13,63,44559},{11,63,27000},{10,63,19705},{15,63,64179},{11,63,30525},{8,63,8677},{7,63,149},{3,63,60570},{0,61,8308},{27,63,1782},{26,63,1416},{25,63,1157},{24,63,520},{31,49,2646},{23,63,1691},{22,63,1040},{11,63,1},{30,56,2646}, +{11,63,1},{18,63,14889},{18,63,14889},{18,63,14889},{16,63,11585},{15,63,11778},{12,63,6555},{12,63,6555},{7,63,145},{3,63,11061},{0,63,2610},{25,63,1157},{25,63,1157},{25,63,1157},{24,63,520},{31,42,1985},{22,63,1040},{22,63,1040},{11,63,1},{30,52,1985},{11,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{15,0,9250}, +{15,0,9250},{15,0,9250},{15,0,9250},{10,63,2792},{10,63,2792},{10,63,2792},{7,63,145},{0,63,2610},{0,63,2610},{16,63,63318},{14,63,42019},{12,63,25930},{11,63,19324},{16,63,59178},{11,63,28845},{9,63,8605},{8,63,276},{6,63,56253},{0,61,6420},{27,63,1366},{27,63,1094},{26,63,872},{25,63,397},{31,51,2017},{25,63,1298},{23,63,794},{13,63,1},{31,56,2017},{13,63,1},{19,63,14244},{19,63,14244},{19,63,14244}, +{17,63,11312},{16,63,11037},{13,63,6429},{13,63,6429},{8,63,260},{6,63,10457},{0,63,2642},{26,63,872},{26,63,872},{26,63,872},{25,63,397},{31,45,1513},{23,63,794},{23,63,794},{13,63,1},{31,52,1513},{13,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{16,0,9248},{16,0,9248},{16,0,9248},{16,0,9248},{12,63,3074}, +{12,63,3074},{12,63,3074},{8,63,260},{0,63,2642},{0,63,2642},{17,63,58848},{15,63,39619},{13,63,24975},{12,63,19007},{16,63,54474},{13,63,27057},{10,63,8569},{9,63,461},{8,63,51302},{0,63,5046},{28,63,979},{27,63,806},{27,63,637},{26,63,292},{30,56,1473},{26,63,953},{24,63,605},{16,63,0},{30,58,1473},{16,63,0},{19,63,13604},{19,63,13604},{19,63,13604},{18,63,11057},{16,63,10429},{14,63,6339},{14,63,6339}, +{10,63,424},{8,63,9713},{1,63,2900},{27,63,637},{27,63,637},{27,63,637},{26,63,292},{31,48,1105},{24,63,605},{24,63,605},{16,63,0},{31,54,1105},{16,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{17,0,9248},{17,0,9248},{17,0,9248},{17,0,9248},{12,63,3330},{12,63,3330},{12,63,3330},{10,63,424},{1,63,2900}, +{1,63,2900}, diff --git a/thirdparty/basisu/transcoder/basisu_transcoder_tables_bc7_m5_alpha.inc b/thirdparty/basisu/transcoder/basisu_transcoder_tables_bc7_m5_alpha.inc new file mode 100644 index 000000000..666985292 --- /dev/null +++ b/thirdparty/basisu/transcoder/basisu_transcoder_tables_bc7_m5_alpha.inc @@ -0,0 +1,49 @@ +{7,0,47},{7,0,44},{2,0,15},{2,0,12},{8,2,48},{0,0,0},{15,0,27},{15,1,24},{9,0,7},{15,1,24},{23,3,96},{6,0,3},{24,7,27},{24,7,24},{18,3,6},{18,5,4},{24,5,16},{21,1,6},{32,15,27},{32,15,24},{26,11,6},{26,13,4},{32,13,16},{22,3,1},{41,24,27},{41,24,24},{35,20,6},{35,22,4},{41,22,16},{31,12,1},{49,32,27}, +{49,32,24},{43,28,6},{43,30,4},{49,30,16},{39,20,1},{57,40,27},{57,40,24},{51,36,6},{51,38,4},{57,38,16},{47,28,1},{65,48,27},{65,48,24},{59,44,6},{59,46,4},{65,46,16},{55,36,1},{74,57,27},{74,57,24},{68,53,6},{68,55,4},{74,55,16},{64,45,1},{82,65,27},{82,65,24},{76,61,6},{76,63,4},{82,63,16},{72,53,1},{90,73,27},{90,73,24},{84,69,6}, +{84,71,4},{90,71,16},{80,61,1},{98,81,27},{98,81,24},{92,77,6},{92,79,4},{98,79,16},{88,69,1},{107,90,27},{107,90,24},{101,86,6},{101,88,4},{107,88,16},{97,78,1},{115,98,27},{115,98,24},{109,94,6},{109,96,4},{115,96,16},{105,86,1},{123,106,27},{123,106,24},{117,102,6},{117,104,4},{123,104,16},{113,94,1},{131,114,27},{131,114,24},{125,110,6},{125,112,4},{131,112,16}, +{121,102,1},{140,123,27},{140,123,24},{134,119,6},{134,121,4},{140,121,16},{130,111,1},{148,131,27},{148,131,24},{142,127,6},{142,129,4},{148,129,16},{138,119,1},{156,139,27},{156,139,24},{150,135,6},{150,137,4},{156,137,16},{146,127,1},{164,147,27},{164,147,24},{158,143,6},{158,145,4},{164,145,16},{154,135,1},{173,156,27},{173,156,24},{167,152,6},{167,154,4},{173,154,16},{163,144,1},{181,164,27}, +{181,164,24},{175,160,6},{175,162,4},{181,162,16},{171,152,1},{189,172,27},{189,172,24},{183,168,6},{183,170,4},{189,170,16},{179,160,1},{197,180,27},{197,180,24},{191,176,6},{191,178,4},{197,178,16},{187,168,1},{206,189,27},{206,189,24},{200,185,6},{200,187,4},{206,187,16},{196,177,1},{214,197,27},{214,197,24},{208,193,6},{208,195,4},{214,195,16},{204,185,1},{222,205,27},{222,205,24},{216,201,6}, +{216,203,4},{222,203,16},{212,193,1},{230,213,27},{230,213,24},{224,209,6},{224,211,4},{230,211,16},{220,201,1},{239,222,27},{239,222,24},{233,218,6},{233,220,4},{239,220,16},{229,210,1},{247,230,27},{247,230,24},{241,226,6},{241,228,4},{247,228,16},{237,218,1},{255,238,27},{255,238,24},{249,234,6},{249,236,4},{255,236,16},{245,226,1},{255,247,7},{255,248,4},{255,247,7},{255,248,4},{255,0,0}, +{253,234,1},{16,0,47},{16,0,44},{5,0,15},{5,0,12},{17,5,48},{0,0,0},{37,1,111},{36,2,108},{13,0,11},{13,3,12},{37,1,96},{3,0,3},{33,0,27},{33,0,24},{31,0,27},{31,1,24},{45,9,96},{11,0,3},{40,7,27},{41,8,24},{39,7,27},{29,0,4},{41,3,16},{19,1,2},{49,16,27},{50,17,24},{38,5,6},{38,6,4},{50,12,16},{40,4,6},{57,24,27}, +{58,25,24},{46,13,6},{46,14,4},{58,20,16},{36,0,1},{65,32,27},{66,33,24},{54,21,6},{54,22,4},{66,28,16},{44,6,1},{73,40,27},{74,41,24},{62,29,6},{62,30,4},{74,36,16},{52,14,1},{82,49,27},{83,50,24},{71,38,6},{71,39,4},{83,45,16},{61,23,1},{90,57,27},{91,58,24},{79,46,6},{79,47,4},{91,53,16},{69,31,1},{98,65,27},{99,66,24},{87,54,6}, +{87,55,4},{99,61,16},{77,39,1},{106,73,27},{107,74,24},{95,62,6},{95,63,4},{107,69,16},{85,47,1},{115,82,27},{116,83,24},{104,71,6},{104,72,4},{116,78,16},{94,56,1},{123,90,27},{124,91,24},{112,79,6},{112,80,4},{124,86,16},{102,64,1},{131,98,27},{132,99,24},{120,87,6},{120,88,4},{132,94,16},{110,72,1},{139,106,27},{140,107,24},{128,95,6},{128,96,4},{140,102,16}, +{118,80,1},{148,115,27},{149,116,24},{137,104,6},{137,105,4},{149,111,16},{127,89,1},{156,123,27},{157,124,24},{145,112,6},{145,113,4},{157,119,16},{135,97,1},{164,131,27},{165,132,24},{153,120,6},{153,121,4},{165,127,16},{143,105,1},{172,139,27},{173,140,24},{161,128,6},{161,129,4},{173,135,16},{151,113,1},{181,148,27},{182,149,24},{170,137,6},{170,138,4},{182,144,16},{160,122,1},{189,156,27}, +{190,157,24},{178,145,6},{178,146,4},{190,152,16},{168,130,1},{197,164,27},{198,165,24},{186,153,6},{186,154,4},{198,160,16},{176,138,1},{205,172,27},{206,173,24},{194,161,6},{194,162,4},{206,168,16},{184,146,1},{214,181,27},{215,182,24},{203,170,6},{203,171,4},{215,177,16},{193,155,1},{222,189,27},{223,190,24},{211,178,6},{211,179,4},{223,185,16},{201,163,1},{230,197,27},{231,198,24},{219,186,6}, +{219,187,4},{231,193,16},{209,171,1},{238,205,27},{239,206,24},{227,194,6},{227,195,4},{239,201,16},{217,179,1},{247,214,27},{248,215,24},{236,203,6},{236,204,4},{248,210,16},{226,188,1},{255,222,27},{255,222,24},{244,211,6},{244,212,4},{255,220,16},{234,196,1},{254,218,6},{255,242,28},{252,219,6},{252,220,4},{255,245,16},{242,204,1},{255,238,7},{255,239,4},{255,238,7},{255,239,4},{255,0,0}, +{250,212,1},{28,0,47},{28,0,44},{9,0,15},{9,0,12},{29,9,48},{0,0,0},{53,0,111},{53,0,108},{17,0,15},{17,0,12},{37,7,32},{0,0,0},{66,3,111},{63,7,108},{25,0,11},{25,7,12},{65,5,96},{7,0,3},{51,0,27},{71,15,108},{47,0,27},{33,6,8},{73,13,96},{15,0,3},{61,4,27},{80,24,108},{61,4,27},{60,6,24},{62,0,16},{24,4,3},{69,12,27}, +{88,32,108},{69,12,27},{68,14,24},{70,8,16},{32,2,2},{77,20,27},{96,40,108},{58,2,6},{58,2,4},{78,16,16},{60,0,6},{85,28,27},{104,48,108},{66,10,6},{66,10,4},{86,24,16},{68,8,6},{94,37,27},{113,57,108},{75,19,6},{75,19,4},{95,33,16},{77,17,6},{102,45,27},{121,65,108},{83,27,6},{83,27,4},{103,41,16},{65,3,1},{110,53,27},{129,73,108},{91,35,6}, +{91,35,4},{111,49,16},{73,11,1},{118,61,27},{137,81,108},{99,43,6},{99,43,4},{119,57,16},{81,19,1},{127,70,27},{146,90,108},{108,52,6},{108,52,4},{128,66,16},{90,28,1},{135,78,27},{154,98,108},{116,60,6},{116,60,4},{136,74,16},{98,36,1},{143,86,27},{162,106,108},{124,68,6},{124,68,4},{144,82,16},{106,44,1},{151,94,27},{170,114,108},{132,76,6},{132,76,4},{152,90,16}, +{114,52,1},{160,103,27},{179,123,108},{141,85,6},{141,85,4},{161,99,16},{123,61,1},{168,111,27},{187,131,108},{149,93,6},{149,93,4},{169,107,16},{131,69,1},{176,119,27},{195,139,108},{157,101,6},{157,101,4},{177,115,16},{139,77,1},{184,127,27},{203,147,108},{165,109,6},{165,109,4},{185,123,16},{147,85,1},{193,136,27},{212,156,108},{174,118,6},{174,118,4},{194,132,16},{156,94,1},{201,144,27}, +{220,164,108},{182,126,6},{182,126,4},{202,140,16},{164,102,1},{209,152,27},{228,172,108},{190,134,6},{190,134,4},{210,148,16},{172,110,1},{217,160,27},{236,180,108},{198,142,6},{198,142,4},{218,156,16},{180,118,1},{226,169,27},{245,189,108},{207,151,6},{207,151,4},{227,165,16},{189,127,1},{234,177,27},{253,197,108},{215,159,6},{215,159,4},{235,173,16},{197,135,1},{242,185,27},{243,186,24},{223,167,6}, +{223,167,4},{243,181,16},{205,143,1},{250,193,27},{251,194,24},{231,175,6},{231,175,4},{251,189,16},{213,151,1},{255,203,27},{255,206,24},{240,184,6},{240,184,4},{255,208,16},{222,160,1},{251,188,6},{255,230,28},{248,192,6},{248,192,4},{255,233,16},{230,168,1},{255,200,6},{255,202,4},{255,200,6},{255,202,4},{255,0,0},{238,176,1},{255,226,7},{255,227,4},{255,226,7},{255,227,4},{255,0,0}, +{246,184,1},{41,0,47},{41,0,44},{13,0,15},{13,0,12},{42,13,48},{0,0,0},{71,0,111},{71,0,108},{21,0,15},{21,0,12},{50,7,32},{0,0,0},{86,1,111},{85,2,108},{29,1,15},{29,3,12},{87,0,96},{3,0,3},{96,6,111},{93,10,108},{37,0,11},{37,11,12},{95,8,96},{11,0,3},{71,0,27},{102,19,108},{68,0,27},{46,7,8},{104,17,96},{20,0,3},{82,0,27}, +{110,27,108},{81,0,27},{80,2,24},{112,25,96},{28,0,3},{91,7,27},{118,35,108},{91,7,27},{88,10,24},{91,2,16},{36,7,3},{99,15,27},{126,43,108},{99,15,27},{96,18,24},{99,10,16},{44,1,2},{108,24,27},{135,52,108},{108,24,27},{79,0,4},{108,19,16},{53,10,2},{116,32,27},{143,60,108},{87,5,6},{87,7,4},{116,27,16},{90,3,6},{124,40,27},{151,68,108},{95,13,6}, +{95,15,4},{124,35,16},{98,11,6},{132,48,27},{159,76,108},{103,21,6},{103,23,4},{132,43,16},{106,19,6},{141,57,27},{168,85,108},{112,30,6},{112,32,4},{141,52,16},{115,28,6},{149,65,27},{176,93,108},{120,38,6},{120,40,4},{149,60,16},{94,5,1},{157,73,27},{184,101,108},{128,46,6},{128,48,4},{157,68,16},{102,13,1},{165,81,27},{192,109,108},{136,54,6},{136,56,4},{165,76,16}, +{110,21,1},{174,90,27},{201,118,108},{145,63,6},{145,65,4},{174,85,16},{119,30,1},{182,98,27},{209,126,108},{153,71,6},{153,73,4},{182,93,16},{127,38,1},{190,106,27},{217,134,108},{161,79,6},{161,81,4},{190,101,16},{135,46,1},{198,114,27},{225,142,108},{169,87,6},{169,89,4},{198,109,16},{143,54,1},{207,123,27},{234,151,108},{178,96,6},{178,98,4},{207,118,16},{152,63,1},{215,131,27}, +{242,159,108},{186,104,6},{186,106,4},{215,126,16},{160,71,1},{223,139,27},{250,167,108},{194,112,6},{194,114,4},{223,134,16},{168,79,1},{231,147,27},{231,147,24},{202,120,6},{202,122,4},{231,142,16},{176,87,1},{240,156,27},{240,156,24},{211,129,6},{211,131,4},{240,151,16},{185,96,1},{248,164,27},{248,164,24},{219,137,6},{219,139,4},{248,159,16},{193,104,1},{254,173,27},{255,174,24},{227,145,6}, +{227,147,4},{255,169,16},{201,112,1},{255,182,27},{255,187,24},{235,153,6},{235,155,4},{255,193,16},{209,120,1},{249,159,6},{255,218,28},{244,162,6},{244,164,4},{255,220,16},{218,129,1},{254,169,6},{254,168,4},{252,170,6},{252,172,4},{255,245,16},{226,137,1},{255,184,6},{255,190,4},{255,184,6},{255,190,4},{255,0,0},{234,145,1},{255,213,7},{255,214,4},{255,213,7},{255,214,4},{255,0,0}, +{242,153,1},{59,0,47},{59,0,44},{18,0,15},{18,0,12},{60,18,48},{0,0,0},{69,0,47},{68,2,44},{26,0,15},{26,0,12},{68,5,32},{0,0,0},{111,0,111},{111,0,108},{34,0,15},{34,0,12},{76,13,32},{0,0,0},{123,3,111},{122,5,108},{42,3,15},{42,6,12},{125,1,96},{6,0,3},{135,8,111},{131,14,108},{50,0,11},{51,15,12},{134,10,96},{15,0,3},{95,0,27}, +{139,22,108},{59,2,11},{59,5,8},{142,18,96},{23,0,3},{105,0,27},{147,30,108},{99,0,27},{67,13,8},{150,26,96},{31,0,3},{115,0,27},{155,38,108},{113,1,27},{110,4,24},{158,34,96},{39,0,3},{125,7,27},{164,47,108},{125,7,27},{119,13,24},{167,43,96},{48,6,3},{133,15,27},{172,55,108},{133,15,27},{127,21,24},{134,5,16},{56,14,3},{141,23,27},{180,63,108},{141,23,27}, +{135,29,24},{142,13,16},{64,1,2},{149,31,27},{188,71,108},{149,31,27},{143,37,24},{150,21,16},{72,9,2},{158,40,27},{197,80,108},{118,1,6},{117,6,4},{159,30,16},{120,0,6},{166,48,27},{205,88,108},{126,9,6},{125,14,4},{167,38,16},{130,6,6},{174,56,27},{213,96,108},{134,17,6},{133,22,4},{175,46,16},{138,14,6},{182,64,27},{221,104,108},{142,25,6},{141,30,4},{183,54,16}, +{146,22,6},{191,73,27},{230,113,108},{151,34,6},{150,39,4},{192,63,16},{155,31,6},{199,81,27},{238,121,108},{159,42,6},{158,47,4},{200,71,16},{163,39,6},{207,89,27},{246,129,108},{167,50,6},{166,55,4},{208,79,16},{130,1,1},{215,97,27},{254,137,108},{175,58,6},{174,63,4},{216,87,16},{138,9,1},{224,106,27},{224,106,24},{184,67,6},{183,72,4},{225,96,16},{147,18,1},{232,114,27}, +{232,114,24},{192,75,6},{191,80,4},{233,104,16},{155,26,1},{240,122,27},{240,122,24},{200,83,6},{199,88,4},{241,112,16},{163,34,1},{248,130,27},{248,130,24},{208,91,6},{207,96,4},{249,120,16},{171,42,1},{255,140,27},{254,142,24},{217,100,6},{216,105,4},{255,135,16},{180,51,1},{255,149,27},{255,156,24},{225,108,6},{224,113,4},{255,159,16},{188,59,1},{255,159,27},{253,196,28},{233,116,6}, +{232,121,4},{255,184,16},{196,67,1},{247,120,6},{255,205,28},{241,124,6},{240,129,4},{255,208,16},{204,75,1},{252,132,6},{252,132,4},{250,133,6},{249,138,4},{255,236,16},{213,84,1},{255,144,6},{255,150,4},{255,144,6},{255,150,4},{255,0,0},{221,92,1},{254,185,7},{255,175,4},{253,187,7},{255,175,4},{255,0,0},{229,100,1},{255,196,7},{255,199,4},{255,196,7},{255,199,4},{255,0,0}, +{237,108,1},{80,0,47},{80,0,44},{24,0,15},{24,0,12},{80,24,48},{0,0,0},{88,1,47},{88,1,44},{32,0,15},{32,0,12},{88,4,32},{0,0,0},{138,0,111},{138,0,108},{40,0,15},{40,0,12},{96,12,32},{0,0,0},{153,0,111},{153,0,108},{48,0,15},{48,0,12},{104,20,32},{0,0,0},{166,4,111},{164,8,108},{57,4,15},{57,9,12},{167,3,96},{9,0,3},{176,9,111}, +{172,16,108},{65,0,11},{65,17,12},{175,11,96},{17,0,3},{189,13,111},{180,24,108},{73,0,11},{73,1,8},{183,19,96},{25,0,3},{129,0,27},{188,32,108},{83,2,11},{81,9,8},{191,27,96},{33,0,3},{141,0,27},{197,41,108},{132,0,27},{90,18,8},{200,36,96},{42,0,3},{151,0,27},{205,49,108},{148,0,27},{145,3,24},{208,44,96},{50,0,3},{160,3,27},{213,57,108},{160,3,27}, +{153,11,24},{216,52,96},{58,2,3},{168,11,27},{221,65,108},{168,11,27},{161,19,24},{170,0,16},{66,10,3},{177,20,27},{230,74,108},{177,20,27},{170,28,24},{179,7,16},{75,19,3},{185,28,27},{238,82,108},{185,28,27},{178,36,24},{187,15,16},{83,0,2},{193,36,27},{246,90,108},{193,36,27},{186,44,24},{195,23,16},{91,7,2},{201,44,27},{254,98,108},{201,44,27},{147,0,4},{203,31,16}, +{99,15,2},{210,53,27},{211,54,24},{157,1,6},{156,9,4},{212,40,16},{108,24,2},{218,61,27},{219,62,24},{165,9,6},{164,17,4},{220,48,16},{170,6,6},{226,69,27},{227,70,24},{173,17,6},{172,25,4},{228,56,16},{178,14,6},{234,77,27},{235,78,24},{181,25,6},{180,33,4},{236,64,16},{186,22,6},{243,86,27},{244,87,24},{190,34,6},{189,42,4},{245,73,16},{195,31,6},{251,94,27}, +{252,95,24},{198,42,6},{197,50,4},{253,81,16},{203,39,6},{255,104,27},{255,107,24},{206,50,6},{205,58,4},{255,102,16},{211,47,6},{255,113,27},{255,123,24},{214,58,6},{213,66,4},{255,126,16},{219,55,6},{255,126,27},{253,172,28},{223,67,6},{222,75,4},{255,153,16},{174,2,1},{242,66,6},{254,182,28},{231,75,6},{230,83,4},{255,178,16},{182,10,1},{246,79,6},{255,190,28},{239,83,6}, +{238,91,4},{255,202,16},{190,18,1},{251,89,6},{251,88,4},{247,91,6},{246,99,4},{255,227,16},{198,26,1},{255,102,6},{255,108,4},{255,102,6},{255,108,4},{255,0,0},{207,35,1},{255,117,6},{255,132,4},{255,117,6},{255,132,4},{255,0,0},{215,43,1},{254,167,7},{255,156,4},{254,167,7},{255,156,4},{255,0,0},{223,51,1},{255,175,7},{255,181,4},{255,175,7},{255,181,4},{255,0,0}, +{231,59,1},{105,0,47},{105,0,44},{33,0,15},{33,0,12},{106,33,48},{0,0,0},{115,0,47},{114,2,44},{41,0,15},{41,0,12},{114,5,32},{0,0,0},{123,2,47},{124,3,44},{49,0,15},{49,0,12},{122,13,32},{0,0,0},{190,0,111},{190,0,108},{57,0,15},{57,0,12},{130,21,32},{0,0,0},{205,0,111},{205,0,108},{66,0,15},{66,0,12},{139,30,32},{0,0,0},{217,4,111}, +{215,7,108},{74,4,15},{74,8,12},{218,3,96},{8,0,3},{229,8,111},{223,15,108},{80,0,11},{82,16,12},{226,11,96},{16,0,3},{237,13,111},{231,23,108},{89,0,11},{90,24,12},{234,19,96},{24,0,3},{251,17,111},{240,32,108},{99,0,11},{99,1,8},{243,28,96},{33,0,3},{169,0,27},{248,40,108},{108,2,11},{107,9,8},{251,36,96},{41,0,3},{181,0,27},{254,49,108},{166,0,27}, +{115,17,8},{188,79,32},{49,0,3},{190,0,27},{255,59,108},{181,0,27},{123,25,8},{196,87,32},{57,0,3},{203,0,27},{203,0,24},{197,0,27},{196,2,24},{205,96,32},{66,0,3},{213,1,27},{212,3,24},{211,2,27},{204,10,24},{213,104,32},{74,1,3},{221,9,27},{220,11,24},{219,10,27},{212,18,24},{221,0,16},{82,9,3},{229,17,27},{228,19,24},{227,18,27},{220,26,24},{229,5,16}, +{90,17,3},{238,26,27},{237,28,24},{236,27,27},{229,35,24},{238,14,16},{99,26,3},{246,34,27},{245,36,24},{244,35,27},{237,43,24},{246,22,16},{107,34,3},{254,42,27},{253,44,24},{252,43,27},{245,51,24},{254,30,16},{115,6,2},{255,52,27},{255,58,24},{255,52,27},{253,59,24},{255,53,16},{123,14,2},{255,64,27},{255,74,24},{196,0,6},{198,100,8},{255,80,16},{132,23,2},{255,74,27}, +{255,89,24},{206,1,6},{206,4,4},{255,105,16},{140,31,2},{255,86,27},{253,147,28},{215,7,6},{214,12,4},{255,129,16},{219,4,6},{255,95,27},{255,155,28},{223,15,6},{222,20,4},{255,153,16},{227,12,6},{242,17,6},{255,166,28},{232,24,6},{231,29,4},{255,181,16},{236,21,6},{247,26,6},{255,175,28},{240,32,6},{239,37,4},{255,205,16},{244,29,6},{251,38,6},{251,37,4},{248,40,6}, +{247,45,4},{255,230,16},{252,37,6},{255,50,6},{255,53,4},{255,50,6},{255,53,4},{255,0,0},{189,80,2},{255,65,6},{255,80,4},{255,65,6},{255,80,4},{255,0,0},{198,89,2},{252,131,7},{255,105,4},{252,131,7},{255,105,4},{255,0,0},{206,97,2},{254,139,7},{255,129,4},{253,141,7},{255,129,4},{255,0,0},{214,105,2},{255,150,7},{255,153,4},{255,150,7},{255,153,4},{255,0,0}, +{222,0,1},{181,0,47},{178,0,44},{47,0,15},{47,0,12},{183,47,48},{0,0,0},{190,0,47},{190,0,44},{55,0,15},{55,0,12},{191,55,48},{0,0,0},{199,0,47},{199,0,44},{63,0,15},{63,0,12},{199,63,48},{0,0,0},{207,0,47},{207,1,44},{71,0,15},{71,0,12},{207,4,32},{0,0,0},{216,1,47},{218,3,44},{80,0,15},{80,0,12},{216,13,32},{0,0,0},{225,1,47}, +{227,6,44},{88,0,15},{88,0,12},{224,21,32},{0,0,0},{233,2,47},{235,11,44},{96,1,15},{96,2,12},{232,29,32},{2,0,3},{241,4,47},{243,19,44},{104,5,15},{104,10,12},{240,37,32},{10,0,3},{251,5,47},{252,28,44},{113,9,15},{113,19,12},{249,46,32},{19,0,3},{255,9,47},{255,37,44},{117,0,11},{121,27,12},{255,55,32},{27,0,3},{255,9,47},{255,46,44},{126,0,11}, +{129,35,12},{255,67,32},{35,0,3},{248,0,27},{255,55,44},{137,0,11},{137,43,12},{255,79,32},{43,0,3},{250,0,27},{255,64,44},{147,2,11},{146,6,8},{255,93,32},{52,0,3},{251,0,27},{255,73,44},{155,4,11},{154,14,8},{255,104,32},{60,0,3},{253,0,27},{248,0,24},{233,0,27},{162,22,8},{255,116,32},{68,0,3},{254,0,27},{254,0,24},{248,0,27},{170,30,8},{255,128,32}, +{76,0,3},{255,1,27},{255,7,24},{255,1,27},{179,39,8},{255,22,16},{85,0,3},{255,2,27},{255,22,24},{255,7,27},{187,47,8},{255,47,16},{93,0,3},{255,4,27},{251,100,28},{182,0,7},{195,55,8},{255,71,16},{101,0,3},{255,4,27},{253,108,28},{191,0,7},{203,63,8},{255,95,16},{109,0,3},{255,7,27},{255,118,28},{200,0,7},{212,72,8},{255,123,16},{118,0,3},{246,0,7}, +{255,129,28},{209,0,7},{220,80,8},{255,147,16},{126,0,3},{246,0,7},{255,138,28},{218,0,7},{228,88,8},{255,172,16},{134,0,3},{249,3,7},{245,91,8},{228,3,7},{236,96,8},{255,196,16},{142,6,3},{251,14,7},{250,102,8},{237,12,7},{245,105,8},{255,223,16},{151,15,3},{253,22,7},{254,112,8},{245,20,7},{253,113,8},{255,248,16},{159,23,3},{253,31,7},{255,124,8},{249,28,7}, +{255,124,8},{255,0,0},{167,31,3},{254,39,7},{255,10,4},{252,37,7},{255,10,4},{255,0,0},{175,39,3},{255,48,7},{255,38,4},{254,48,7},{255,38,4},{255,0,0},{184,48,3},{255,56,7},{255,62,4},{255,56,7},{255,62,4},{255,0,0},{192,56,3},{255,65,7},{255,86,4},{255,65,7},{255,86,4},{255,0,0},{200,64,3},{255,74,7},{255,111,4},{255,77,7},{255,111,4},{255,0,0}, +{208,5,2}, diff --git a/thirdparty/basisu/transcoder/basisu_transcoder_tables_bc7_m5_color.inc b/thirdparty/basisu/transcoder/basisu_transcoder_tables_bc7_m5_color.inc new file mode 100644 index 000000000..c0780988d --- /dev/null +++ b/thirdparty/basisu/transcoder/basisu_transcoder_tables_bc7_m5_color.inc @@ -0,0 +1,481 @@ +{0,7,18},{0,5,2},{0,4,1},{0,3,8},{0,4,35},{0,3,24},{0,3,12},{0,2,29},{0,2,36},{0,2,30},{0,7,18},{0,5,2},{0,4,1},{0,3,8},{2,0,35},{0,3,24},{0,3,12},{0,2,29},{4,0,35},{0,2,29},{0,3,0},{0,3,0},{0,3,0},{0,1,1},{0,1,2},{0,1,2},{0,1,2},{0,1,1},{1,0,3},{0,1,2},{0,3,0}, +{0,3,0},{0,3,0},{0,1,1},{1,0,2},{1,0,2},{1,0,2},{0,1,1},{1,0,2},{0,1,1},{4,0,18},{0,5,2},{0,4,1},{0,3,8},{4,0,18},{7,0,18},{0,3,8},{0,2,20},{7,0,18},{0,2,20},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{2,15,38},{2,11,20},{2,8,24}, +{1,8,21},{0,16,51},{0,10,19},{0,8,2},{0,6,24},{0,8,76},{0,6,40},{3,13,18},{3,10,2},{3,8,1},{3,7,5},{8,0,51},{1,9,19},{0,8,2},{0,6,24},{16,0,51},{0,6,24},{2,11,20},{2,11,20},{2,11,20},{1,7,20},{0,12,8},{0,7,2},{0,7,2},{0,5,1},{0,5,19},{0,5,10},{3,9,0},{3,9,0},{3,9,0},{3,6,0},{6,0,8}, +{1,7,0},{1,7,0},{0,5,1},{12,0,8},{0,5,1},{10,0,18},{2,11,0},{3,8,1},{0,8,1},{10,0,18},{20,0,18},{0,8,1},{0,6,20},{20,0,18},{0,6,20},{1,0,20},{1,0,20},{1,0,20},{1,0,20},{0,9,0},{0,9,0},{0,9,0},{0,4,1},{0,3,8},{0,3,8},{6,19,38},{6,15,20},{6,12,24},{5,12,21},{4,20,51},{4,14,19},{4,12,2}, +{4,10,24},{0,14,52},{1,10,20},{7,17,18},{7,14,2},{7,12,1},{7,11,5},{14,0,51},{5,13,19},{4,12,2},{1,10,20},{29,0,51},{1,10,20},{6,15,20},{6,15,20},{6,15,20},{5,11,20},{4,16,8},{4,11,2},{4,11,2},{4,9,1},{0,12,8},{1,9,2},{7,13,0},{7,13,0},{7,13,0},{7,10,0},{12,0,8},{5,11,0},{5,11,0},{3,9,0},{24,0,8}, +{3,9,0},{15,1,18},{6,15,0},{7,12,1},{3,12,0},{15,1,18},{32,0,18},{3,12,0},{0,10,20},{32,0,18},{0,10,20},{5,0,20},{5,0,20},{5,0,20},{5,0,20},{4,13,0},{4,13,0},{4,13,0},{4,8,1},{1,10,0},{1,10,0},{10,23,38},{10,19,20},{10,16,24},{9,16,21},{8,24,51},{8,18,19},{8,16,2},{8,14,24},{3,18,52},{5,14,20},{11,21,18}, +{11,18,2},{11,16,1},{11,15,5},{20,0,51},{9,17,19},{8,16,2},{5,14,20},{41,0,51},{5,14,20},{10,19,20},{10,19,20},{10,19,20},{9,15,20},{8,20,8},{8,15,2},{8,15,2},{8,13,1},{4,16,8},{5,13,2},{11,17,0},{11,17,0},{11,17,0},{11,14,0},{18,0,8},{9,15,0},{9,15,0},{7,13,0},{36,0,8},{7,13,0},{21,1,18},{10,19,0},{11,16,1}, +{7,16,0},{21,1,18},{44,0,18},{7,16,0},{0,14,20},{44,0,18},{0,14,20},{9,0,20},{9,0,20},{9,0,20},{9,0,20},{8,17,0},{8,17,0},{8,17,0},{8,12,1},{5,14,0},{5,14,0},{14,29,36},{14,24,18},{14,21,23},{14,20,20},{13,28,52},{13,22,20},{13,20,2},{12,19,23},{7,23,51},{9,19,18},{16,25,19},{15,23,1},{16,20,2},{15,20,6},{27,0,51}, +{13,22,19},{13,20,1},{8,19,18},{55,0,51},{8,19,18},{14,24,18},{14,24,18},{14,24,18},{14,19,18},{13,23,9},{13,20,1},{13,20,1},{12,17,2},{9,20,8},{10,18,1},{16,20,1},{16,20,1},{16,20,1},{16,18,1},{24,1,8},{14,19,0},{14,19,0},{12,17,1},{50,0,8},{12,17,1},{28,0,18},{15,23,0},{16,20,1},{12,20,1},{28,0,18},{58,0,18},{12,20,1}, +{0,19,18},{58,0,18},{0,19,18},{14,0,18},{14,0,18},{14,0,18},{14,0,18},{13,20,1},{13,20,1},{13,20,1},{12,17,1},{10,18,0},{10,18,0},{18,33,36},{18,28,18},{18,25,23},{18,24,20},{17,32,52},{17,26,20},{17,24,2},{16,23,23},{11,27,51},{13,23,18},{20,29,19},{19,27,1},{20,24,2},{19,24,6},{33,0,51},{17,26,19},{17,24,1},{12,23,18},{66,0,51}, +{12,23,18},{18,28,18},{18,28,18},{18,28,18},{18,23,18},{17,27,9},{17,24,1},{17,24,1},{16,21,2},{13,24,8},{14,22,1},{20,24,1},{20,24,1},{20,24,1},{20,22,1},{30,1,8},{18,23,0},{18,23,0},{16,21,1},{62,0,8},{16,21,1},{34,0,18},{19,27,0},{20,24,1},{16,24,1},{34,0,18},{69,0,18},{16,24,1},{0,23,18},{69,0,18},{0,23,18},{18,0,18}, +{18,0,18},{18,0,18},{18,0,18},{17,24,1},{17,24,1},{17,24,1},{16,21,1},{14,22,0},{14,22,0},{22,37,36},{22,32,18},{22,29,23},{22,28,20},{21,36,52},{21,30,20},{21,28,2},{20,27,23},{15,31,51},{17,27,18},{24,33,19},{23,31,1},{24,28,2},{23,28,6},{39,0,51},{21,30,19},{21,28,1},{16,27,18},{78,0,51},{16,27,18},{22,32,18},{22,32,18},{22,32,18}, +{22,27,18},{21,31,9},{21,28,1},{21,28,1},{20,25,2},{17,28,8},{18,26,1},{24,28,1},{24,28,1},{24,28,1},{24,26,1},{36,1,8},{22,27,0},{22,27,0},{20,25,1},{74,0,8},{20,25,1},{40,0,18},{23,31,0},{24,28,1},{20,28,1},{40,0,18},{82,0,18},{20,28,1},{0,27,18},{82,0,18},{0,27,18},{22,0,18},{22,0,18},{22,0,18},{22,0,18},{21,28,1}, +{21,28,1},{21,28,1},{20,25,1},{18,26,0},{18,26,0},{26,41,36},{26,36,18},{26,33,23},{26,32,20},{25,40,52},{25,34,20},{25,32,2},{24,31,23},{19,35,51},{21,31,18},{28,37,19},{27,35,1},{28,32,2},{27,32,6},{45,0,51},{25,34,19},{25,32,1},{20,31,18},{91,0,51},{20,31,18},{26,36,18},{26,36,18},{26,36,18},{26,31,18},{25,35,9},{25,32,1},{25,32,1}, +{24,29,2},{21,32,8},{22,30,1},{28,32,1},{28,32,1},{28,32,1},{28,30,1},{42,1,8},{26,31,0},{26,31,0},{24,29,1},{86,0,8},{24,29,1},{46,0,18},{27,35,0},{28,32,1},{24,32,1},{46,0,18},{94,0,18},{24,32,1},{0,31,18},{94,0,18},{0,31,18},{26,0,18},{26,0,18},{26,0,18},{26,0,18},{25,32,1},{25,32,1},{25,32,1},{24,29,1},{22,30,0}, +{22,30,0},{31,44,38},{31,40,20},{31,37,24},{30,37,21},{29,45,51},{29,39,19},{29,37,2},{29,35,24},{24,39,52},{26,35,20},{32,42,18},{32,39,2},{32,37,1},{32,36,5},{51,0,51},{30,38,19},{29,37,2},{26,35,20},{104,0,51},{26,35,20},{31,40,20},{31,40,20},{31,40,20},{30,36,20},{29,41,8},{29,36,2},{29,36,2},{29,34,1},{25,37,8},{26,34,2},{32,38,0}, +{32,38,0},{32,38,0},{32,35,0},{49,0,8},{30,36,0},{30,36,0},{28,34,0},{100,0,8},{28,34,0},{53,0,18},{31,40,0},{32,37,1},{28,37,0},{53,0,18},{107,0,18},{28,37,0},{0,35,20},{107,0,18},{0,35,20},{30,0,20},{30,0,20},{30,0,20},{30,0,20},{29,38,0},{29,38,0},{29,38,0},{29,33,1},{26,35,0},{26,35,0},{35,48,38},{35,44,20},{35,41,24}, +{34,41,21},{33,49,51},{33,43,19},{33,41,2},{33,39,24},{28,43,52},{30,39,20},{36,46,18},{36,43,2},{36,41,1},{36,40,5},{57,0,51},{34,42,19},{33,41,2},{30,39,20},{117,0,51},{30,39,20},{35,44,20},{35,44,20},{35,44,20},{34,40,20},{33,45,8},{33,40,2},{33,40,2},{33,38,1},{29,41,8},{30,38,2},{36,42,0},{36,42,0},{36,42,0},{36,39,0},{55,0,8}, +{34,40,0},{34,40,0},{32,38,0},{112,0,8},{32,38,0},{59,0,18},{35,44,0},{36,41,1},{32,41,0},{59,0,18},{120,0,18},{32,41,0},{0,39,20},{120,0,18},{0,39,20},{34,0,20},{34,0,20},{34,0,20},{34,0,20},{33,42,0},{33,42,0},{33,42,0},{33,37,1},{30,39,0},{30,39,0},{39,52,38},{39,48,20},{39,45,24},{38,45,21},{37,53,51},{37,47,19},{37,45,2}, +{37,43,24},{32,47,52},{34,43,20},{40,50,18},{40,47,2},{40,45,1},{40,44,5},{63,0,51},{38,46,19},{37,45,2},{34,43,20},{127,1,51},{34,43,20},{39,48,20},{39,48,20},{39,48,20},{38,44,20},{37,49,8},{37,44,2},{37,44,2},{37,42,1},{33,45,8},{34,42,2},{40,46,0},{40,46,0},{40,46,0},{40,43,0},{61,0,8},{38,44,0},{38,44,0},{36,42,0},{124,0,8}, +{36,42,0},{64,0,18},{39,48,0},{40,45,1},{36,45,0},{64,0,18},{126,3,18},{36,45,0},{0,43,20},{126,3,18},{0,43,20},{38,0,20},{38,0,20},{38,0,20},{38,0,20},{37,46,0},{37,46,0},{37,46,0},{37,41,1},{34,43,0},{34,43,0},{43,56,38},{43,52,20},{43,49,24},{42,49,21},{41,57,51},{41,51,19},{41,49,2},{41,47,24},{36,51,52},{38,47,20},{44,54,18}, +{44,51,2},{44,49,1},{44,48,5},{69,0,51},{42,50,19},{41,49,2},{38,47,20},{127,7,51},{38,47,20},{43,52,20},{43,52,20},{43,52,20},{42,48,20},{41,53,8},{41,48,2},{41,48,2},{41,46,1},{37,49,8},{38,46,2},{44,50,0},{44,50,0},{44,50,0},{44,47,0},{66,1,8},{42,48,0},{42,48,0},{40,46,0},{126,5,8},{40,46,0},{70,0,18},{43,52,0},{44,49,1}, +{40,49,0},{70,0,18},{126,9,18},{40,49,0},{0,47,20},{126,9,18},{0,47,20},{42,0,20},{42,0,20},{42,0,20},{42,0,20},{41,50,0},{41,50,0},{41,50,0},{41,45,1},{38,47,0},{38,47,0},{47,62,36},{47,57,18},{47,54,23},{47,53,20},{46,61,52},{46,55,20},{46,53,2},{45,52,23},{40,56,51},{42,52,18},{49,58,19},{48,56,1},{49,53,2},{48,53,6},{75,1,51}, +{46,55,19},{46,53,1},{41,52,18},{126,14,51},{41,52,18},{47,57,18},{47,57,18},{47,57,18},{47,52,18},{46,56,9},{46,53,1},{46,53,1},{45,50,2},{42,53,8},{43,51,1},{49,53,1},{49,53,1},{49,53,1},{49,51,1},{73,0,8},{47,52,0},{47,52,0},{45,50,1},{126,12,8},{45,50,1},{77,0,18},{48,56,0},{49,53,1},{45,53,1},{77,0,18},{127,15,18},{45,53,1}, +{0,52,18},{127,15,18},{0,52,18},{47,0,18},{47,0,18},{47,0,18},{47,0,18},{46,53,1},{46,53,1},{46,53,1},{45,50,1},{43,51,0},{43,51,0},{51,65,36},{51,61,18},{51,58,23},{51,57,20},{50,64,52},{50,59,20},{50,57,2},{49,56,23},{44,60,51},{46,56,18},{53,62,19},{52,60,1},{53,57,2},{52,57,6},{81,0,51},{50,59,19},{50,57,1},{45,56,18},{126,20,51}, +{45,56,18},{51,61,18},{51,61,18},{51,61,18},{51,56,18},{50,60,9},{50,57,1},{50,57,1},{49,54,2},{46,57,8},{47,55,1},{53,57,1},{53,57,1},{53,57,1},{53,55,1},{79,0,8},{51,56,0},{51,56,0},{49,54,1},{127,17,8},{49,54,1},{83,0,18},{52,60,0},{53,57,1},{49,57,1},{83,0,18},{127,21,18},{49,57,1},{0,56,18},{127,21,18},{0,56,18},{51,0,18}, +{51,0,18},{51,0,18},{51,0,18},{50,57,1},{50,57,1},{50,57,1},{49,54,1},{47,55,0},{47,55,0},{55,69,36},{55,64,19},{55,62,23},{55,61,20},{54,68,52},{54,63,20},{54,61,2},{53,60,23},{48,64,51},{50,60,18},{57,65,19},{56,64,2},{57,61,2},{56,61,6},{87,0,51},{54,63,19},{54,61,1},{49,60,18},{126,26,51},{49,60,18},{55,65,18},{55,65,18},{55,65,18}, +{55,60,18},{54,64,9},{54,61,1},{54,61,1},{53,58,2},{50,61,8},{51,59,1},{57,61,1},{57,61,1},{57,61,1},{57,59,1},{85,0,8},{55,60,0},{55,60,0},{53,58,1},{127,23,8},{53,58,1},{89,0,18},{55,64,1},{57,61,1},{53,61,1},{89,0,18},{127,27,18},{53,61,1},{0,60,18},{127,27,18},{0,60,18},{55,0,18},{55,0,18},{55,0,18},{55,0,18},{54,61,1}, +{54,61,1},{54,61,1},{53,58,1},{51,59,0},{51,59,0},{59,73,36},{59,68,19},{59,66,26},{59,64,22},{58,72,52},{57,67,19},{57,65,2},{57,63,28},{52,68,51},{53,64,21},{61,69,19},{60,67,2},{61,65,1},{60,65,5},{93,0,51},{57,67,18},{57,65,1},{52,64,20},{126,32,51},{52,64,20},{59,69,18},{59,69,18},{59,69,18},{59,64,18},{58,68,9},{58,64,2},{58,64,2}, +{57,62,2},{55,64,9},{55,63,1},{61,65,1},{61,65,1},{61,65,1},{61,63,1},{91,0,8},{58,64,1},{58,64,1},{57,62,1},{127,29,8},{57,62,1},{95,0,18},{60,67,1},{61,65,0},{56,65,0},{95,0,18},{127,33,18},{56,65,0},{0,64,20},{127,33,18},{0,64,20},{59,0,18},{59,0,18},{59,0,18},{59,0,18},{58,65,1},{58,65,1},{58,65,1},{57,62,1},{55,63,0}, +{55,63,0},{63,79,38},{63,73,21},{64,70,28},{63,69,22},{62,78,51},{62,71,18},{62,69,2},{61,68,26},{57,72,51},{58,68,19},{65,74,19},{64,72,1},{65,69,2},{64,69,6},{100,0,51},{62,71,18},{62,69,2},{58,68,18},{127,38,51},{58,68,18},{63,74,20},{63,74,20},{63,74,20},{63,68,21},{62,73,8},{62,69,1},{62,69,1},{62,66,1},{59,69,9},{60,66,2},{65,69,1}, +{65,69,1},{65,69,1},{65,67,1},{98,0,8},{63,68,1},{63,68,1},{62,66,1},{127,36,8},{62,66,1},{101,1,18},{64,72,0},{65,69,1},{62,69,1},{101,1,18},{126,40,18},{62,69,1},{0,68,18},{126,40,18},{0,68,18},{63,0,20},{63,0,20},{63,0,20},{63,0,20},{62,70,0},{62,70,0},{62,70,0},{62,66,0},{60,66,1},{60,66,1},{67,82,36},{67,77,18},{67,74,23}, +{67,73,20},{66,81,52},{66,75,20},{66,73,2},{65,72,23},{61,76,51},{62,72,19},{69,78,19},{68,76,1},{69,73,2},{68,73,6},{106,0,51},{66,75,19},{66,73,1},{62,72,18},{127,44,51},{62,72,18},{67,77,18},{67,77,18},{67,77,18},{67,72,18},{66,76,9},{66,73,1},{66,73,1},{65,70,2},{63,73,9},{63,71,2},{69,73,1},{69,73,1},{69,73,1},{69,71,1},{104,0,8}, +{67,72,0},{67,72,0},{65,70,1},{127,42,8},{65,70,1},{107,1,18},{68,76,0},{69,73,1},{65,73,1},{107,1,18},{126,46,18},{65,73,1},{0,72,18},{126,46,18},{0,72,18},{67,0,18},{67,0,18},{67,0,18},{67,0,18},{66,73,1},{66,73,1},{66,73,1},{65,70,1},{63,71,1},{63,71,1},{71,86,36},{71,81,18},{71,78,23},{71,77,20},{70,85,52},{70,79,20},{70,77,2}, +{69,76,23},{64,80,51},{66,76,18},{73,82,19},{72,80,1},{73,77,2},{72,77,6},{112,0,51},{70,79,19},{70,77,1},{65,76,18},{127,50,51},{65,76,18},{71,81,18},{71,81,18},{71,81,18},{71,76,18},{70,80,9},{70,77,1},{70,77,1},{69,74,2},{66,77,8},{67,75,1},{73,77,1},{73,77,1},{73,77,1},{73,75,1},{110,0,8},{71,76,0},{71,76,0},{69,74,1},{126,48,8}, +{69,74,1},{113,0,18},{72,80,0},{73,77,1},{69,77,1},{113,0,18},{126,52,18},{69,77,1},{0,76,18},{126,52,18},{0,76,18},{71,0,18},{71,0,18},{71,0,18},{71,0,18},{70,77,1},{70,77,1},{70,77,1},{69,74,1},{67,75,0},{67,75,0},{75,90,36},{75,85,18},{75,82,23},{75,81,20},{74,89,52},{74,83,20},{74,81,2},{73,80,23},{68,84,51},{70,80,18},{77,86,19}, +{76,84,1},{77,81,2},{76,81,6},{118,0,51},{74,83,19},{74,81,1},{69,80,18},{127,56,51},{69,80,18},{75,85,18},{75,85,18},{75,85,18},{75,80,18},{74,84,9},{74,81,1},{74,81,1},{73,78,2},{70,81,8},{71,79,1},{77,81,1},{77,81,1},{77,81,1},{77,79,1},{115,1,8},{75,80,0},{75,80,0},{73,78,1},{126,54,8},{73,78,1},{119,0,18},{76,84,0},{77,81,1}, +{73,81,1},{119,0,18},{126,58,18},{73,81,1},{0,80,18},{126,58,18},{0,80,18},{75,0,18},{75,0,18},{75,0,18},{75,0,18},{74,81,1},{74,81,1},{74,81,1},{73,78,1},{71,79,0},{71,79,0},{80,93,38},{80,89,20},{80,86,24},{79,86,21},{78,94,51},{78,88,19},{78,86,2},{78,84,24},{73,88,52},{75,84,20},{81,91,18},{81,88,2},{81,86,1},{81,85,5},{124,1,51}, +{79,87,19},{78,86,2},{75,84,20},{126,63,51},{75,84,20},{80,89,20},{80,89,20},{80,89,20},{79,85,20},{78,90,8},{78,85,2},{78,85,2},{78,83,1},{74,86,8},{75,83,2},{81,87,0},{81,87,0},{81,87,0},{81,84,0},{122,0,8},{79,85,0},{79,85,0},{77,83,0},{126,61,8},{77,83,0},{126,0,18},{80,89,0},{81,86,1},{77,86,0},{126,0,18},{126,64,18},{77,86,0}, +{0,84,20},{126,64,18},{0,84,20},{79,0,20},{79,0,20},{79,0,20},{79,0,20},{78,87,0},{78,87,0},{78,87,0},{78,82,1},{75,84,0},{75,84,0},{84,97,38},{84,93,20},{84,90,24},{83,90,21},{82,98,51},{82,92,19},{82,90,2},{82,88,24},{77,92,52},{79,88,20},{85,95,18},{85,92,2},{85,90,1},{85,89,5},{127,7,51},{83,91,19},{82,90,2},{79,88,20},{127,68,51}, +{79,88,20},{84,93,20},{84,93,20},{84,93,20},{83,89,20},{82,94,8},{82,89,2},{82,89,2},{82,87,1},{78,90,8},{79,87,2},{85,91,0},{85,91,0},{85,91,0},{85,88,0},{127,2,8},{83,89,0},{83,89,0},{81,87,0},{127,66,8},{81,87,0},{127,10,18},{84,93,0},{85,90,1},{81,90,0},{127,10,18},{126,70,18},{81,90,0},{0,88,20},{126,70,18},{0,88,20},{83,0,20}, +{83,0,20},{83,0,20},{83,0,20},{82,91,0},{82,91,0},{82,91,0},{82,86,1},{79,88,0},{79,88,0},{88,101,38},{88,97,20},{88,94,24},{87,94,21},{86,102,51},{86,96,19},{86,94,2},{86,92,24},{81,96,52},{83,92,20},{89,99,18},{89,96,2},{89,94,1},{89,93,5},{127,19,51},{87,95,19},{86,94,2},{83,92,20},{127,74,51},{83,92,20},{88,97,20},{88,97,20},{88,97,20}, +{87,93,20},{86,98,8},{86,93,2},{86,93,2},{86,91,1},{82,94,8},{83,91,2},{89,95,0},{89,95,0},{89,95,0},{89,92,0},{127,14,8},{87,93,0},{87,93,0},{85,91,0},{127,72,8},{85,91,0},{127,22,18},{88,97,0},{89,94,1},{85,94,0},{127,22,18},{126,76,18},{85,94,0},{0,92,20},{126,76,18},{0,92,20},{87,0,20},{87,0,20},{87,0,20},{87,0,20},{86,95,0}, +{86,95,0},{86,95,0},{86,90,1},{83,92,0},{83,92,0},{92,105,38},{92,101,20},{92,98,24},{91,98,21},{90,106,51},{90,100,19},{90,98,2},{90,96,24},{85,100,52},{87,96,20},{93,103,18},{93,100,2},{93,98,1},{93,97,5},{127,31,51},{91,99,19},{90,98,2},{87,96,20},{127,80,51},{87,96,20},{92,101,20},{92,101,20},{92,101,20},{91,97,20},{90,102,8},{90,97,2},{90,97,2}, +{90,95,1},{86,98,8},{87,95,2},{93,99,0},{93,99,0},{93,99,0},{93,96,0},{127,27,8},{91,97,0},{91,97,0},{89,95,0},{126,78,8},{89,95,0},{127,34,18},{92,101,0},{93,98,1},{89,98,0},{127,34,18},{126,82,18},{89,98,0},{0,96,20},{126,82,18},{0,96,20},{91,0,20},{91,0,20},{91,0,20},{91,0,20},{90,99,0},{90,99,0},{90,99,0},{90,94,1},{87,96,0}, +{87,96,0},{96,111,36},{96,106,18},{96,103,23},{96,102,20},{95,110,52},{95,104,20},{95,102,2},{94,101,23},{89,105,51},{91,101,18},{98,107,19},{97,105,1},{98,102,2},{97,102,6},{127,45,51},{95,104,19},{95,102,1},{90,101,18},{126,87,51},{90,101,18},{96,106,18},{96,106,18},{96,106,18},{96,101,18},{95,105,9},{95,102,1},{95,102,1},{94,99,2},{91,102,8},{92,100,1},{98,102,1}, +{98,102,1},{98,102,1},{98,100,1},{127,40,8},{96,101,0},{96,101,0},{94,99,1},{126,85,8},{94,99,1},{127,48,18},{97,105,0},{98,102,1},{94,102,1},{127,48,18},{127,88,18},{94,102,1},{0,101,18},{127,88,18},{0,101,18},{96,0,18},{96,0,18},{96,0,18},{96,0,18},{95,102,1},{95,102,1},{95,102,1},{94,99,1},{92,100,0},{92,100,0},{100,115,36},{100,110,18},{100,107,23}, +{100,106,20},{99,114,52},{99,108,20},{99,106,2},{98,105,23},{93,109,51},{95,105,18},{102,111,19},{101,109,1},{102,106,2},{101,106,6},{127,57,51},{99,108,19},{99,106,1},{94,105,18},{126,93,51},{94,105,18},{100,110,18},{100,110,18},{100,110,18},{100,105,18},{99,109,9},{99,106,1},{99,106,1},{98,103,2},{95,106,8},{96,104,1},{102,106,1},{102,106,1},{102,106,1},{102,104,1},{127,53,8}, +{100,105,0},{100,105,0},{98,103,1},{126,91,8},{98,103,1},{127,60,18},{101,109,0},{102,106,1},{98,106,1},{127,60,18},{127,94,18},{98,106,1},{0,105,18},{127,94,18},{0,105,18},{100,0,18},{100,0,18},{100,0,18},{100,0,18},{99,106,1},{99,106,1},{99,106,1},{98,103,1},{96,104,0},{96,104,0},{104,119,36},{104,114,18},{104,111,23},{104,110,20},{103,118,52},{103,112,20},{103,110,2}, +{102,109,23},{97,113,51},{99,109,18},{106,115,19},{105,113,1},{106,110,2},{105,110,6},{127,69,51},{103,112,19},{103,110,1},{98,109,18},{126,99,51},{98,109,18},{104,114,18},{104,114,18},{104,114,18},{104,109,18},{103,113,9},{103,110,1},{103,110,1},{102,107,2},{99,110,8},{100,108,1},{106,110,1},{106,110,1},{106,110,1},{106,108,1},{127,64,8},{104,109,0},{104,109,0},{102,107,1},{126,97,8}, +{102,107,1},{127,72,18},{105,113,0},{106,110,1},{102,110,1},{127,72,18},{127,100,18},{102,110,1},{0,109,18},{127,100,18},{0,109,18},{104,0,18},{104,0,18},{104,0,18},{104,0,18},{103,110,1},{103,110,1},{103,110,1},{102,107,1},{100,108,0},{100,108,0},{108,123,36},{108,118,18},{108,115,23},{108,114,20},{107,122,52},{107,116,20},{107,114,2},{106,113,23},{101,117,51},{103,113,18},{110,119,19}, +{109,117,1},{110,114,2},{109,114,6},{127,81,51},{107,116,19},{107,114,1},{102,113,18},{126,105,51},{102,113,18},{108,118,18},{108,118,18},{108,118,18},{108,113,18},{107,117,9},{107,114,1},{107,114,1},{106,111,2},{103,114,8},{104,112,1},{110,114,1},{110,114,1},{110,114,1},{110,112,1},{127,76,8},{108,113,0},{108,113,0},{106,111,1},{126,103,8},{106,111,1},{127,84,18},{109,117,0},{110,114,1}, +{106,114,1},{127,84,18},{127,106,18},{106,114,1},{0,113,18},{127,106,18},{0,113,18},{108,0,18},{108,0,18},{108,0,18},{108,0,18},{107,114,1},{107,114,1},{107,114,1},{106,111,1},{104,112,0},{104,112,0},{113,126,38},{113,122,20},{113,119,24},{112,119,21},{111,127,51},{111,121,19},{111,119,2},{111,117,24},{106,121,52},{108,117,20},{114,124,18},{114,121,2},{114,119,1},{114,118,5},{127,95,51}, +{112,120,19},{111,119,2},{108,117,20},{127,111,51},{108,117,20},{113,122,20},{113,122,20},{113,122,20},{112,118,20},{111,123,8},{111,118,2},{111,118,2},{111,116,1},{107,119,8},{108,116,2},{114,120,0},{114,120,0},{114,120,0},{114,117,0},{127,90,8},{112,118,0},{112,118,0},{110,116,0},{127,109,8},{110,116,0},{127,98,18},{113,122,0},{114,119,1},{110,119,0},{127,98,18},{126,113,18},{110,119,0}, +{0,117,20},{126,113,18},{0,117,20},{112,0,20},{112,0,20},{112,0,20},{112,0,20},{111,120,0},{111,120,0},{111,120,0},{111,115,1},{108,117,0},{108,117,0},{117,127,46},{117,126,20},{117,123,24},{116,123,21},{116,126,63},{115,125,19},{115,123,2},{115,121,24},{110,125,52},{112,121,20},{118,127,20},{118,125,2},{118,123,1},{118,122,5},{127,107,51},{116,124,19},{115,123,2},{112,121,20},{127,117,51}, +{112,121,20},{117,126,20},{117,126,20},{117,126,20},{116,122,20},{115,127,8},{115,122,2},{115,122,2},{115,120,1},{111,123,8},{112,120,2},{118,124,0},{118,124,0},{118,124,0},{118,121,0},{127,102,8},{116,122,0},{116,122,0},{114,120,0},{127,115,8},{114,120,0},{127,110,18},{117,126,0},{118,123,1},{114,123,0},{127,110,18},{126,119,18},{114,123,0},{0,121,20},{126,119,18},{0,121,20},{116,0,20}, +{116,0,20},{116,0,20},{116,0,20},{115,124,0},{115,124,0},{115,124,0},{115,119,1},{112,121,0},{112,121,0},{122,126,86},{121,127,40},{121,127,24},{120,127,21},{121,127,88},{119,127,27},{119,127,2},{119,125,24},{116,127,60},{116,125,20},{123,127,30},{122,127,10},{122,127,1},{122,126,5},{127,119,51},{121,127,24},{119,127,2},{116,125,20},{127,123,51},{116,125,20},{121,127,24},{121,127,24},{121,127,24}, +{120,126,20},{120,127,14},{119,126,2},{119,126,2},{119,124,1},{115,127,8},{116,124,2},{122,126,1},{122,126,1},{122,126,1},{122,125,0},{127,115,8},{120,126,0},{120,126,0},{118,124,0},{127,121,8},{118,124,0},{127,122,18},{124,127,8},{122,127,1},{118,127,0},{127,122,18},{126,125,18},{118,127,0},{0,125,20},{126,125,18},{0,125,20},{120,0,20},{120,0,20},{120,0,20},{120,0,20},{119,126,1}, +{119,126,1},{119,126,1},{119,123,1},{116,125,0},{116,125,0},{125,126,38},{125,127,30},{125,127,29},{125,127,21},{125,126,35},{124,127,16},{124,127,12},{123,127,1},{122,127,20},{122,127,2},{126,127,2},{126,127,2},{126,127,1},{126,127,1},{127,125,3},{127,126,3},{125,127,2},{124,127,0},{127,126,3},{124,127,0},{125,126,29},{125,126,29},{125,126,29},{125,127,21},{124,127,24},{124,127,12},{124,127,12}, +{123,127,1},{122,127,11},{122,127,2},{126,127,1},{126,127,1},{126,127,1},{126,127,1},{127,125,2},{127,126,2},{127,126,2},{124,127,0},{127,126,2},{124,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{124,0,20},{124,0,20},{124,0,20},{124,0,20},{124,127,8},{124,127,8},{124,127,8},{123,127,1},{122,127,2}, +{122,127,2},{0,16,72},{0,11,8},{0,8,1},{0,7,25},{0,10,153},{0,8,97},{0,6,50},{0,4,115},{0,5,162},{0,4,124},{0,16,72},{0,11,8},{0,8,1},{0,7,25},{5,0,153},{0,8,97},{0,6,50},{0,4,115},{10,0,153},{0,4,115},{0,7,0},{0,7,0},{0,7,0},{0,4,0},{0,3,13},{0,2,5},{0,2,5},{0,1,10},{0,2,14},{0,1,11},{0,7,0}, +{0,7,0},{0,7,0},{0,4,0},{2,0,13},{0,2,5},{0,2,5},{0,1,10},{3,0,13},{0,1,10},{8,0,72},{0,11,8},{0,8,1},{0,7,25},{8,0,72},{16,0,72},{0,7,25},{0,5,74},{16,0,72},{0,5,74},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{1,27,77},{1,18,5},{1,13,15}, +{1,11,18},{0,21,243},{0,14,108},{0,11,29},{0,8,154},{0,9,287},{0,8,179},{2,25,73},{1,18,1},{2,12,6},{1,11,14},{10,1,243},{0,14,108},{0,11,29},{0,8,154},{21,0,243},{0,8,154},{1,17,5},{1,17,5},{1,17,5},{1,9,5},{0,12,50},{0,8,8},{0,8,8},{0,5,25},{0,5,61},{0,5,34},{2,15,1},{2,15,1},{2,15,1},{1,9,1},{6,0,50}, +{0,8,8},{0,8,8},{0,5,25},{12,0,50},{0,5,25},{14,0,72},{1,18,0},{4,12,1},{0,12,10},{14,0,72},{29,0,72},{0,12,10},{0,9,74},{29,0,72},{0,9,74},{1,0,5},{1,0,5},{1,0,5},{1,0,5},{0,4,0},{0,4,0},{0,4,0},{0,2,0},{0,2,1},{0,2,1},{3,35,133},{3,24,63},{4,17,90},{3,16,66},{0,33,243},{0,19,75},{0,16,2}, +{0,13,110},{0,14,345},{0,12,179},{6,29,73},{5,22,1},{6,16,6},{5,15,14},{16,0,243},{0,19,75},{0,16,2},{0,13,110},{33,0,243},{0,13,110},{3,26,61},{3,26,61},{3,26,61},{3,14,61},{0,24,50},{0,16,1},{0,16,1},{0,10,5},{0,11,101},{0,9,46},{6,19,1},{6,19,1},{6,19,1},{5,13,1},{12,0,50},{1,15,0},{1,15,0},{0,10,5},{24,0,50}, +{0,10,5},{20,0,72},{5,22,0},{8,16,1},{0,16,2},{20,0,72},{41,0,72},{0,16,2},{0,13,74},{41,0,72},{0,13,74},{3,0,61},{3,0,61},{3,0,61},{3,0,61},{0,16,0},{0,16,0},{0,16,0},{0,8,0},{0,5,25},{0,5,25},{7,39,146},{7,28,76},{8,21,107},{6,20,79},{4,37,244},{4,23,76},{4,20,3},{3,16,107},{0,22,292},{0,17,106},{10,33,73}, +{9,26,1},{10,20,6},{9,19,14},{22,0,243},{2,25,72},{4,20,2},{0,17,90},{45,0,243},{0,17,90},{7,30,74},{7,30,74},{7,30,74},{7,18,74},{4,28,51},{4,20,2},{4,20,2},{3,14,6},{0,17,68},{0,14,5},{10,23,1},{10,23,1},{10,23,1},{9,17,1},{18,0,50},{5,19,0},{5,19,0},{0,14,1},{36,0,50},{0,14,1},{26,0,72},{9,26,0},{12,20,1}, +{3,20,1},{26,0,72},{53,0,72},{3,20,1},{0,17,74},{53,0,72},{0,17,74},{6,0,74},{6,0,74},{6,0,74},{6,0,74},{4,20,1},{4,20,1},{4,20,1},{4,12,1},{0,14,4},{0,14,4},{11,44,144},{11,32,76},{12,25,105},{11,24,77},{8,43,243},{8,28,75},{8,25,2},{8,21,105},{0,28,248},{1,22,76},{14,38,72},{14,30,1},{15,25,5},{14,24,13},{29,0,243}, +{7,29,72},{8,25,2},{0,22,76},{59,0,243},{0,22,76},{11,35,72},{11,35,72},{11,35,72},{11,23,72},{8,34,50},{8,24,1},{8,24,1},{8,18,5},{1,24,51},{3,19,1},{14,29,0},{14,29,0},{14,29,0},{14,21,1},{24,1,50},{9,24,0},{9,24,0},{3,19,0},{50,0,50},{3,19,0},{33,0,72},{13,31,0},{16,25,0},{6,25,0},{33,0,72},{66,0,72},{6,25,0}, +{0,22,72},{66,0,72},{0,22,72},{11,0,72},{11,0,72},{11,0,72},{11,0,72},{8,26,0},{8,26,0},{8,26,0},{8,17,0},{2,20,0},{2,20,0},{15,48,144},{15,36,76},{16,29,105},{15,28,77},{12,47,243},{12,32,75},{12,29,2},{12,25,105},{2,33,244},{5,26,76},{18,42,72},{18,34,1},{19,29,5},{18,28,13},{35,0,243},{11,33,72},{12,29,2},{1,26,72},{71,0,243}, +{1,26,72},{15,39,72},{15,39,72},{15,39,72},{15,27,72},{12,38,50},{12,28,1},{12,28,1},{12,22,5},{5,28,51},{7,23,1},{18,33,0},{18,33,0},{18,33,0},{18,25,1},{30,1,50},{13,28,0},{13,28,0},{7,23,0},{62,0,50},{7,23,0},{39,0,72},{17,35,0},{20,29,0},{10,29,0},{39,0,72},{78,0,72},{10,29,0},{0,26,72},{78,0,72},{0,26,72},{15,0,72}, +{15,0,72},{15,0,72},{15,0,72},{12,30,0},{12,30,0},{12,30,0},{12,21,0},{6,24,0},{6,24,0},{19,52,144},{19,40,76},{20,33,105},{19,32,77},{16,51,243},{16,36,75},{16,33,2},{16,29,105},{6,37,244},{9,30,76},{22,46,72},{22,38,1},{23,33,5},{22,32,13},{41,0,243},{15,37,72},{16,33,2},{5,30,72},{83,0,243},{5,30,72},{19,43,72},{19,43,72},{19,43,72}, +{19,31,72},{16,42,50},{16,32,1},{16,32,1},{16,26,5},{9,32,51},{11,27,1},{22,37,0},{22,37,0},{22,37,0},{22,29,1},{36,1,50},{17,32,0},{17,32,0},{11,27,0},{74,0,50},{11,27,0},{45,0,72},{21,39,0},{24,33,0},{14,33,0},{45,0,72},{91,0,72},{14,33,0},{0,30,72},{91,0,72},{0,30,72},{19,0,72},{19,0,72},{19,0,72},{19,0,72},{16,34,0}, +{16,34,0},{16,34,0},{16,25,0},{10,28,0},{10,28,0},{23,56,144},{23,44,76},{24,37,105},{23,36,77},{20,55,243},{20,40,75},{20,37,2},{20,33,105},{10,41,244},{13,34,76},{26,50,72},{26,42,1},{27,37,5},{26,36,13},{47,0,243},{19,41,72},{20,37,2},{9,34,72},{95,0,243},{9,34,72},{23,47,72},{23,47,72},{23,47,72},{23,35,72},{20,46,50},{20,36,1},{20,36,1}, +{20,30,5},{13,36,51},{15,31,1},{26,41,0},{26,41,0},{26,41,0},{26,33,1},{42,1,50},{21,36,0},{21,36,0},{15,31,0},{86,0,50},{15,31,0},{50,1,72},{25,43,0},{28,37,0},{18,37,0},{50,1,72},{103,0,72},{18,37,0},{0,34,72},{103,0,72},{0,34,72},{23,0,72},{23,0,72},{23,0,72},{23,0,72},{20,38,0},{20,38,0},{20,38,0},{20,29,0},{14,32,0}, +{14,32,0},{28,60,146},{28,49,76},{29,42,107},{27,41,79},{25,58,244},{25,44,76},{25,41,3},{24,37,107},{14,46,244},{17,38,76},{31,54,73},{30,47,1},{31,41,6},{30,40,14},{53,1,243},{23,46,72},{25,41,2},{15,38,74},{109,0,243},{15,38,74},{28,51,74},{28,51,74},{28,51,74},{28,39,74},{25,49,51},{25,41,2},{25,41,2},{24,35,6},{18,40,51},{19,36,1},{31,44,1}, +{31,44,1},{31,44,1},{30,38,1},{49,0,50},{26,40,0},{26,40,0},{21,35,1},{100,0,50},{21,35,1},{57,0,72},{30,47,0},{33,41,1},{24,41,1},{57,0,72},{117,0,72},{24,41,1},{0,38,74},{117,0,72},{0,38,74},{27,0,74},{27,0,74},{27,0,74},{27,0,74},{25,41,1},{25,41,1},{25,41,1},{25,33,1},{19,36,0},{19,36,0},{32,64,146},{32,53,76},{33,46,107}, +{31,45,79},{29,62,244},{29,48,76},{29,45,3},{28,41,107},{18,50,244},{21,42,76},{35,58,73},{34,51,1},{35,45,6},{34,44,14},{59,1,243},{27,50,72},{29,45,2},{19,42,74},{121,0,243},{19,42,74},{32,55,74},{32,55,74},{32,55,74},{32,43,74},{29,53,51},{29,45,2},{29,45,2},{28,39,6},{22,44,51},{23,40,1},{35,48,1},{35,48,1},{35,48,1},{34,42,1},{55,0,50}, +{30,44,0},{30,44,0},{25,39,1},{112,0,50},{25,39,1},{63,0,72},{34,51,0},{37,45,1},{28,45,1},{63,0,72},{127,1,72},{28,45,1},{0,42,74},{127,1,72},{0,42,74},{31,0,74},{31,0,74},{31,0,74},{31,0,74},{29,45,1},{29,45,1},{29,45,1},{29,37,1},{23,40,0},{23,40,0},{36,67,146},{36,57,76},{37,50,107},{35,49,79},{33,66,244},{33,52,76},{33,49,3}, +{32,45,107},{22,54,244},{25,46,76},{39,62,73},{38,55,1},{39,49,6},{38,48,14},{65,0,243},{31,54,72},{33,49,2},{23,46,74},{127,3,243},{23,46,74},{36,59,74},{36,59,74},{36,59,74},{36,47,74},{33,57,51},{33,49,2},{33,49,2},{32,43,6},{26,48,51},{27,44,1},{39,52,1},{39,52,1},{39,52,1},{38,46,1},{61,0,50},{34,48,0},{34,48,0},{29,43,1},{124,0,50}, +{29,43,1},{69,0,72},{38,55,0},{41,49,1},{32,49,1},{69,0,72},{127,7,72},{32,49,1},{0,46,74},{127,7,72},{0,46,74},{35,0,74},{35,0,74},{35,0,74},{35,0,74},{33,49,1},{33,49,1},{33,49,1},{33,41,1},{27,44,0},{27,44,0},{40,71,146},{40,61,76},{41,54,107},{39,53,79},{37,70,244},{37,56,76},{37,53,3},{36,49,107},{26,58,244},{29,50,76},{43,65,73}, +{42,59,1},{43,53,6},{42,52,14},{71,0,243},{35,58,72},{37,53,2},{27,50,74},{127,9,243},{27,50,74},{40,63,74},{40,63,74},{40,63,74},{40,51,74},{37,61,51},{37,53,2},{37,53,2},{36,47,6},{30,52,51},{31,48,1},{43,56,1},{43,56,1},{43,56,1},{42,50,1},{66,1,50},{38,52,0},{38,52,0},{33,47,1},{126,5,50},{33,47,1},{75,0,72},{42,59,0},{45,53,1}, +{36,53,1},{75,0,72},{127,13,72},{36,53,1},{0,50,74},{127,13,72},{0,50,74},{39,0,74},{39,0,74},{39,0,74},{39,0,74},{37,53,1},{37,53,1},{37,53,1},{37,45,1},{31,48,0},{31,48,0},{44,77,144},{44,65,77},{45,58,105},{44,57,77},{41,75,243},{41,61,75},{41,58,2},{41,54,105},{31,62,244},{34,55,76},{47,71,72},{47,63,1},{48,58,5},{47,57,13},{78,0,243}, +{40,62,72},{41,58,2},{30,55,72},{126,16,243},{30,55,72},{44,68,72},{44,68,72},{44,68,72},{44,56,72},{41,66,50},{41,57,1},{41,57,1},{41,51,5},{34,57,51},{36,52,1},{47,62,0},{47,62,0},{47,62,0},{47,54,1},{73,0,50},{42,57,0},{42,57,0},{36,52,0},{126,12,50},{36,52,0},{81,0,72},{47,63,1},{49,58,0},{39,58,0},{81,0,72},{126,20,72},{39,58,0}, +{0,55,72},{126,20,72},{0,55,72},{44,0,72},{44,0,72},{44,0,72},{44,0,72},{41,59,0},{41,59,0},{41,59,0},{41,50,0},{35,53,0},{35,53,0},{48,81,144},{48,69,77},{49,62,105},{48,61,77},{45,79,243},{45,65,73},{45,62,2},{45,58,105},{34,67,243},{38,59,76},{51,75,72},{51,66,1},{52,62,5},{51,61,13},{83,1,243},{45,65,73},{45,62,2},{34,59,72},{126,22,243}, +{34,59,72},{48,72,72},{48,72,72},{48,72,72},{48,60,72},{45,70,50},{45,61,1},{45,61,1},{45,55,5},{38,61,51},{40,56,1},{51,65,0},{51,65,0},{51,65,0},{51,58,1},{79,0,50},{46,61,0},{46,61,0},{40,56,0},{127,17,50},{40,56,0},{87,0,72},{51,66,1},{53,62,0},{43,62,0},{87,0,72},{126,26,72},{43,62,0},{0,59,72},{126,26,72},{0,59,72},{48,0,72}, +{48,0,72},{48,0,72},{48,0,72},{45,63,0},{45,63,0},{45,63,0},{45,54,0},{39,57,0},{39,57,0},{52,85,144},{52,73,77},{53,67,105},{52,65,77},{49,83,243},{49,69,73},{49,65,2},{49,62,105},{38,71,243},{42,63,76},{55,79,72},{55,70,1},{55,66,5},{54,65,14},{89,1,243},{49,69,73},{49,65,2},{38,63,72},{126,28,243},{38,63,72},{52,76,72},{52,76,72},{52,76,72}, +{52,63,73},{49,74,50},{49,65,1},{49,65,1},{49,59,5},{41,65,50},{44,60,1},{55,69,0},{55,69,0},{55,69,0},{55,62,1},{85,0,50},{50,64,1},{50,64,1},{44,60,0},{127,23,50},{44,60,0},{93,0,72},{55,70,1},{57,65,1},{48,65,1},{93,0,72},{126,32,72},{48,65,1},{0,63,72},{126,32,72},{0,63,72},{52,0,72},{52,0,72},{52,0,72},{52,0,72},{49,67,0}, +{49,67,0},{49,67,0},{49,58,0},{43,61,0},{43,61,0},{56,89,144},{56,77,77},{57,71,105},{56,69,77},{53,87,243},{53,73,73},{53,69,2},{52,66,103},{42,75,243},{45,66,79},{59,83,72},{59,74,1},{59,70,5},{58,69,14},{95,1,243},{53,73,73},{53,69,2},{43,66,74},{126,34,243},{43,66,74},{56,80,72},{56,80,72},{56,80,72},{56,67,73},{53,78,50},{53,69,1},{53,69,1}, +{53,63,5},{45,69,50},{47,64,2},{59,73,0},{59,73,0},{59,73,0},{59,66,0},{91,0,50},{55,67,1},{55,67,1},{47,64,1},{127,29,50},{47,64,1},{99,0,72},{59,74,1},{61,69,1},{52,69,1},{99,0,72},{126,38,72},{52,69,1},{0,66,74},{126,38,72},{0,66,74},{56,0,72},{56,0,72},{56,0,72},{56,0,72},{53,71,0},{53,71,0},{53,71,0},{53,62,0},{47,64,1}, +{47,64,1},{61,92,146},{61,80,79},{61,75,103},{60,74,78},{58,91,244},{57,78,74},{58,74,2},{56,70,105},{47,79,243},{49,71,77},{63,88,73},{63,79,2},{64,74,5},{63,73,14},{102,0,243},{57,78,73},{58,74,1},{47,71,72},{126,41,243},{47,71,72},{61,83,74},{61,83,74},{61,83,74},{60,72,74},{58,82,51},{58,73,2},{58,73,2},{57,68,5},{50,73,50},{52,68,1},{63,79,1}, +{63,79,1},{63,79,1},{63,71,1},{98,0,50},{59,72,1},{59,72,1},{53,68,0},{127,36,50},{53,68,0},{106,0,72},{63,79,1},{65,74,0},{56,74,0},{106,0,72},{127,44,72},{56,74,0},{0,71,72},{127,44,72},{0,71,72},{60,0,74},{60,0,74},{60,0,74},{60,0,74},{58,74,1},{58,74,1},{58,74,1},{57,66,1},{52,68,1},{52,68,1},{64,97,144},{64,85,76},{65,78,105}, +{64,77,77},{62,95,244},{61,82,74},{62,78,2},{60,74,105},{51,83,243},{53,75,77},{67,91,72},{67,83,1},{68,78,5},{67,77,13},{108,0,243},{61,82,73},{62,78,1},{51,75,72},{125,47,243},{51,75,72},{64,88,72},{64,88,72},{64,88,72},{64,76,72},{62,86,51},{62,77,2},{62,77,2},{61,72,5},{54,77,50},{56,72,1},{67,82,0},{67,82,0},{67,82,0},{67,74,1},{104,0,50}, +{63,76,1},{63,76,1},{57,72,0},{127,42,50},{57,72,0},{112,0,72},{66,84,0},{69,78,0},{60,78,0},{112,0,72},{127,50,72},{60,78,0},{0,75,72},{127,50,72},{0,75,72},{64,0,72},{64,0,72},{64,0,72},{64,0,72},{62,78,1},{62,78,1},{62,78,1},{61,70,1},{56,72,1},{56,72,1},{68,101,144},{68,89,76},{69,82,105},{68,81,77},{65,100,243},{65,85,75},{65,82,2}, +{65,78,105},{55,87,243},{57,79,77},{71,95,72},{71,87,1},{72,82,5},{71,81,13},{114,0,243},{64,86,72},{65,82,2},{55,79,72},{127,52,243},{55,79,72},{68,92,72},{68,92,72},{68,92,72},{68,80,72},{65,91,50},{65,81,1},{65,81,1},{65,75,5},{58,81,50},{60,76,1},{71,86,0},{71,86,0},{71,86,0},{71,78,1},{110,0,50},{66,81,0},{66,81,0},{61,76,0},{126,48,50}, +{61,76,0},{118,0,72},{70,88,0},{73,82,0},{64,82,0},{118,0,72},{127,56,72},{64,82,0},{0,79,72},{127,56,72},{0,79,72},{68,0,72},{68,0,72},{68,0,72},{68,0,72},{65,83,0},{65,83,0},{65,83,0},{65,74,0},{60,76,1},{60,76,1},{72,105,144},{72,93,76},{73,86,105},{72,85,77},{69,104,243},{69,89,75},{69,86,2},{69,82,105},{59,91,243},{61,83,77},{75,99,72}, +{75,91,1},{76,86,5},{75,85,13},{120,0,243},{68,90,72},{69,86,2},{59,83,72},{127,58,243},{59,83,72},{72,96,72},{72,96,72},{72,96,72},{72,84,72},{69,95,50},{69,85,1},{69,85,1},{69,79,5},{62,85,50},{64,80,1},{75,90,0},{75,90,0},{75,90,0},{75,82,1},{115,1,50},{70,85,0},{70,85,0},{64,80,0},{126,54,50},{64,80,0},{124,0,72},{74,92,0},{77,86,0}, +{67,86,0},{124,0,72},{127,62,72},{67,86,0},{0,83,72},{127,62,72},{0,83,72},{72,0,72},{72,0,72},{72,0,72},{72,0,72},{69,87,0},{69,87,0},{69,87,0},{69,78,0},{64,80,1},{64,80,1},{77,109,146},{77,98,76},{78,91,107},{76,90,79},{74,107,244},{74,93,76},{74,90,3},{73,86,107},{63,96,243},{66,87,76},{80,103,73},{79,96,1},{80,90,6},{79,89,14},{127,0,243}, +{72,95,72},{74,90,2},{64,87,74},{126,65,243},{64,87,74},{77,100,74},{77,100,74},{77,100,74},{77,88,74},{74,98,51},{74,90,2},{74,90,2},{73,84,6},{67,89,51},{68,85,1},{80,93,1},{80,93,1},{80,93,1},{79,87,1},{122,0,50},{75,89,0},{75,89,0},{70,84,1},{126,61,50},{70,84,1},{127,7,72},{79,96,0},{82,90,1},{73,90,1},{127,7,72},{127,68,72},{73,90,1}, +{0,87,74},{127,68,72},{0,87,74},{76,0,74},{76,0,74},{76,0,74},{76,0,74},{74,90,1},{74,90,1},{74,90,1},{74,82,1},{68,85,0},{68,85,0},{81,113,146},{81,102,76},{82,95,107},{80,94,79},{78,111,244},{78,97,76},{78,94,3},{77,90,107},{67,99,244},{70,91,76},{84,107,73},{83,100,1},{84,94,6},{83,93,14},{127,11,243},{76,99,72},{78,94,2},{68,91,74},{126,71,243}, +{68,91,74},{81,104,74},{81,104,74},{81,104,74},{81,92,74},{78,102,51},{78,94,2},{78,94,2},{77,88,6},{71,93,51},{72,89,1},{84,97,1},{84,97,1},{84,97,1},{83,91,1},{127,2,50},{79,93,0},{79,93,0},{74,88,1},{127,66,50},{74,88,1},{127,19,72},{83,100,0},{86,94,1},{77,94,1},{127,19,72},{127,74,72},{77,94,1},{0,91,74},{127,74,72},{0,91,74},{80,0,74}, +{80,0,74},{80,0,74},{80,0,74},{78,94,1},{78,94,1},{78,94,1},{78,86,1},{72,89,0},{72,89,0},{85,117,146},{85,106,76},{86,99,107},{84,98,79},{82,115,244},{82,101,76},{82,98,3},{81,94,107},{71,103,244},{74,95,76},{88,111,73},{87,104,1},{88,98,6},{87,97,14},{127,24,243},{80,103,72},{82,98,2},{72,95,74},{125,77,243},{72,95,74},{85,108,74},{85,108,74},{85,108,74}, +{85,96,74},{82,106,51},{82,98,2},{82,98,2},{81,92,6},{75,97,51},{76,93,1},{88,101,1},{88,101,1},{88,101,1},{87,95,1},{127,14,50},{83,97,0},{83,97,0},{78,92,1},{127,72,50},{78,92,1},{127,31,72},{87,104,0},{90,98,1},{81,98,1},{127,31,72},{127,80,72},{81,98,1},{0,95,74},{127,80,72},{0,95,74},{84,0,74},{84,0,74},{84,0,74},{84,0,74},{82,98,1}, +{82,98,1},{82,98,1},{82,90,1},{76,93,0},{76,93,0},{89,121,146},{89,110,76},{90,103,107},{88,102,79},{86,119,244},{86,105,76},{86,102,3},{85,98,107},{75,107,244},{78,99,76},{92,115,73},{91,108,1},{92,102,6},{91,101,14},{127,36,243},{84,107,72},{86,102,2},{76,99,74},{127,82,243},{76,99,74},{89,112,74},{89,112,74},{89,112,74},{89,100,74},{86,110,51},{86,102,2},{86,102,2}, +{85,96,6},{79,101,51},{80,97,1},{92,105,1},{92,105,1},{92,105,1},{91,99,1},{127,27,50},{87,101,0},{87,101,0},{82,96,1},{126,78,50},{82,96,1},{127,43,72},{91,108,0},{94,102,1},{85,102,1},{127,43,72},{127,86,72},{85,102,1},{0,99,74},{127,86,72},{0,99,74},{88,0,74},{88,0,74},{88,0,74},{88,0,74},{86,102,1},{86,102,1},{86,102,1},{86,94,1},{80,97,0}, +{80,97,0},{93,126,144},{93,114,76},{94,107,105},{93,106,77},{90,125,243},{90,110,75},{90,107,2},{90,103,105},{80,111,244},{83,104,76},{96,120,72},{96,112,1},{97,107,5},{96,106,13},{127,50,243},{89,111,72},{90,107,2},{79,104,72},{127,89,243},{79,104,72},{93,117,72},{93,117,72},{93,117,72},{93,105,72},{90,116,50},{90,106,1},{90,106,1},{90,100,5},{83,106,51},{85,101,1},{96,111,0}, +{96,111,0},{96,111,0},{96,103,1},{127,40,50},{91,106,0},{91,106,0},{85,101,0},{126,85,50},{85,101,0},{127,57,72},{95,113,0},{98,107,0},{88,107,0},{127,57,72},{126,93,72},{88,107,0},{0,104,72},{126,93,72},{0,104,72},{93,0,72},{93,0,72},{93,0,72},{93,0,72},{90,108,0},{90,108,0},{90,108,0},{90,99,0},{84,102,0},{84,102,0},{97,127,152},{97,118,76},{98,111,105}, +{97,110,77},{94,127,244},{94,114,75},{94,111,2},{94,107,105},{84,115,244},{87,108,76},{100,124,72},{100,116,1},{101,111,5},{100,110,13},{127,62,243},{93,115,72},{94,111,2},{83,108,72},{127,95,243},{83,108,72},{97,121,72},{97,121,72},{97,121,72},{97,109,72},{94,120,50},{94,110,1},{94,110,1},{94,104,5},{87,110,51},{89,105,1},{100,115,0},{100,115,0},{100,115,0},{100,107,1},{127,53,50}, +{95,110,0},{95,110,0},{89,105,0},{126,91,50},{89,105,0},{127,69,72},{99,117,0},{102,111,0},{92,111,0},{127,69,72},{126,99,72},{92,111,0},{0,108,72},{126,99,72},{0,108,72},{97,0,72},{97,0,72},{97,0,72},{97,0,72},{94,112,0},{94,112,0},{94,112,0},{94,103,0},{88,106,0},{88,106,0},{102,126,184},{101,122,76},{102,115,105},{101,114,77},{100,127,260},{98,118,75},{98,115,2}, +{98,111,105},{88,119,244},{91,112,76},{104,127,74},{104,120,1},{105,115,5},{104,114,13},{127,73,243},{97,119,72},{98,115,2},{87,112,72},{127,101,243},{87,112,72},{101,125,72},{101,125,72},{101,125,72},{101,113,72},{98,124,50},{98,114,1},{98,114,1},{98,108,5},{91,114,51},{93,109,1},{104,119,0},{104,119,0},{104,119,0},{104,111,1},{127,64,50},{99,114,0},{99,114,0},{93,109,0},{126,97,50}, +{93,109,0},{127,81,72},{103,121,0},{106,115,0},{96,115,0},{127,81,72},{126,105,72},{96,115,0},{0,112,72},{126,105,72},{0,112,72},{101,0,72},{101,0,72},{101,0,72},{101,0,72},{98,116,0},{98,116,0},{98,116,0},{98,107,0},{92,110,0},{92,110,0},{106,127,224},{105,126,76},{106,119,105},{105,118,77},{104,127,299},{102,122,75},{102,119,2},{102,115,105},{92,123,244},{95,116,76},{109,127,84}, +{108,124,1},{109,119,5},{108,118,13},{127,86,243},{101,123,72},{102,119,2},{91,116,72},{127,107,243},{91,116,72},{105,126,76},{105,126,76},{105,126,76},{105,117,72},{102,126,52},{102,118,1},{102,118,1},{102,112,5},{95,118,51},{97,113,1},{108,123,0},{108,123,0},{108,123,0},{108,115,1},{127,76,50},{103,118,0},{103,118,0},{97,113,0},{126,103,50},{97,113,0},{127,93,72},{107,125,0},{110,119,0}, +{100,119,0},{127,93,72},{126,111,72},{100,119,0},{0,116,72},{126,111,72},{0,116,72},{105,0,72},{105,0,72},{105,0,72},{105,0,72},{102,120,0},{102,120,0},{102,120,0},{102,111,0},{96,114,0},{96,114,0},{111,127,290},{111,127,103},{111,124,107},{109,123,79},{111,127,345},{107,126,76},{107,123,3},{106,119,107},{98,127,248},{99,120,76},{114,127,113},{113,127,5},{113,123,6},{112,122,14},{127,99,243}, +{107,126,75},{107,123,2},{97,120,74},{126,114,243},{97,120,74},{110,126,90},{110,126,90},{110,126,90},{110,121,74},{108,126,67},{107,123,2},{107,123,2},{106,117,6},{100,122,51},{101,118,1},{113,126,1},{113,126,1},{113,126,1},{112,120,1},{127,90,50},{108,122,0},{108,122,0},{103,117,1},{127,109,50},{103,117,1},{127,107,72},{113,127,4},{115,123,1},{106,123,1},{127,107,72},{127,117,72},{106,123,1}, +{0,120,74},{127,117,72},{0,120,74},{109,0,74},{109,0,74},{109,0,74},{109,0,74},{107,123,1},{107,123,1},{107,123,1},{107,115,1},{101,118,0},{101,118,0},{117,127,343},{115,127,179},{114,127,110},{113,126,78},{115,127,387},{111,127,102},{111,127,2},{110,123,90},{105,127,263},{103,124,63},{120,127,134},{118,127,46},{117,127,5},{116,126,9},{127,110,221},{113,127,89},{111,127,1},{101,124,61},{126,119,221}, +{101,124,61},{114,127,110},{114,127,110},{114,127,110},{114,125,74},{112,127,91},{111,127,2},{111,127,2},{110,121,6},{104,126,51},{105,122,1},{117,127,5},{117,127,5},{117,127,5},{116,124,1},{127,102,50},{112,126,0},{112,126,0},{107,121,1},{127,115,50},{107,121,1},{127,118,61},{119,127,25},{119,127,0},{110,127,0},{127,118,61},{126,123,61},{110,127,0},{0,124,61},{126,123,61},{0,124,61},{113,0,74}, +{113,0,74},{113,0,74},{113,0,74},{111,127,1},{111,127,1},{111,127,1},{111,119,1},{105,122,0},{105,122,0},{120,127,239},{119,127,179},{119,127,154},{118,127,83},{120,127,254},{116,127,78},{116,127,29},{114,126,15},{113,127,169},{109,126,5},{123,127,54},{122,127,34},{122,127,25},{121,127,2},{127,119,93},{119,127,33},{119,127,8},{109,126,5},{127,123,93},{109,126,5},{119,126,154},{119,126,154},{119,126,154}, +{118,127,83},{117,127,125},{116,127,29},{116,127,29},{114,125,6},{110,127,72},{109,126,1},{122,126,25},{122,126,25},{122,126,25},{121,127,2},{127,115,50},{119,127,8},{119,127,8},{111,125,1},{127,121,50},{111,125,1},{127,124,5},{125,127,1},{125,127,0},{122,127,0},{127,124,5},{126,126,5},{122,127,0},{0,126,5},{126,126,5},{0,126,5},{117,0,74},{117,0,74},{117,0,74},{117,0,74},{115,127,10}, +{115,127,10},{115,127,10},{115,123,1},{109,126,0},{109,126,0},{123,127,140},{123,127,124},{123,127,115},{122,127,83},{123,127,131},{121,127,66},{120,127,50},{119,127,1},{119,127,86},{116,127,8},{126,127,11},{125,127,11},{125,127,10},{125,127,2},{127,124,17},{125,127,6},{124,127,5},{119,127,0},{126,126,17},{119,127,0},{123,127,115},{123,127,115},{123,127,115},{122,127,83},{121,127,98},{120,127,50},{120,127,50}, +{119,127,1},{116,127,57},{116,127,8},{125,126,10},{125,126,10},{125,126,10},{125,127,2},{127,122,13},{124,127,5},{124,127,5},{119,127,0},{126,125,13},{119,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{121,0,74},{121,0,74},{121,0,74},{121,0,74},{120,127,25},{120,127,25},{120,127,25},{119,127,1},{116,127,8}, +{116,127,8},{0,29,200},{0,19,18},{0,14,1},{0,13,73},{0,20,441},{0,11,281},{0,11,134},{0,7,331},{0,8,474},{0,7,356},{0,29,200},{0,19,18},{0,14,1},{0,13,73},{10,0,441},{0,11,281},{0,11,134},{0,7,331},{20,0,441},{0,7,331},{0,13,0},{0,13,0},{0,13,0},{0,7,0},{0,6,41},{0,5,13},{0,5,13},{0,3,25},{0,3,45},{0,3,29},{0,13,0}, +{0,13,0},{0,13,0},{0,7,0},{3,0,41},{0,5,13},{0,5,13},{0,3,25},{6,0,41},{0,3,25},{14,0,200},{0,19,18},{0,14,1},{0,13,73},{14,0,200},{29,0,200},{0,13,73},{0,9,202},{29,0,200},{0,9,202},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,41,200},{0,27,2},{1,19,18}, +{0,17,45},{0,27,686},{0,19,346},{0,15,158},{0,10,467},{0,11,762},{0,10,516},{0,41,200},{0,27,2},{1,19,14},{0,17,45},{13,1,686},{0,19,346},{0,15,158},{0,10,467},{27,0,686},{0,10,467},{0,26,0},{0,26,0},{0,26,0},{0,13,0},{0,12,145},{0,11,53},{0,11,53},{0,6,89},{0,5,158},{0,6,105},{0,26,0},{0,26,0},{0,26,0},{0,13,0},{6,0,145}, +{0,11,53},{0,11,53},{0,6,89},{12,0,145},{0,6,89},{20,0,200},{1,26,2},{4,18,1},{0,17,45},{20,0,200},{41,0,200},{0,17,45},{0,13,202},{41,0,200},{0,13,202},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{2,49,225},{2,32,27},{3,24,71},{1,21,54},{0,39,723},{0,25,283},{0,20,65}, +{0,15,419},{0,17,865},{0,15,519},{4,45,201},{3,32,1},{5,23,19},{3,21,35},{19,0,723},{0,25,283},{0,20,65},{0,15,419},{39,0,723},{0,15,419},{2,34,25},{2,34,25},{2,34,25},{2,17,26},{0,24,162},{0,17,20},{0,17,20},{0,10,61},{0,11,213},{0,9,110},{4,30,1},{4,30,1},{4,30,1},{3,17,1},{12,0,162},{0,17,20},{0,17,20},{0,10,61},{24,0,162}, +{0,10,61},{26,0,200},{3,32,0},{8,22,1},{0,21,18},{26,0,200},{53,0,200},{0,21,18},{0,17,202},{53,0,200},{0,17,202},{2,0,25},{2,0,25},{2,0,25},{2,0,25},{0,10,0},{0,10,0},{0,10,0},{0,5,0},{0,5,9},{0,5,9},{4,57,313},{4,39,121},{5,28,198},{4,25,131},{0,52,723},{0,31,227},{0,26,9},{0,20,362},{0,22,959},{0,19,515},{8,49,201}, +{7,36,1},{9,27,19},{7,25,35},{25,0,723},{0,31,227},{0,26,9},{0,20,362},{52,0,723},{0,20,362},{4,42,113},{4,42,113},{4,42,113},{4,22,114},{0,36,162},{0,24,2},{0,24,2},{0,15,34},{0,17,285},{0,14,122},{8,34,1},{8,34,1},{8,34,1},{7,21,1},{18,0,162},{1,23,2},{1,23,2},{0,15,34},{36,0,162},{0,15,34},{32,0,200},{7,36,0},{12,26,1}, +{0,26,5},{32,0,200},{65,0,200},{0,26,5},{0,21,202},{65,0,200},{0,21,202},{4,0,113},{4,0,113},{4,0,113},{4,0,113},{0,23,0},{0,23,0},{0,23,0},{0,11,0},{0,8,41},{0,8,41},{7,64,400},{7,43,216},{8,32,312},{7,30,213},{2,61,723},{1,38,208},{2,31,2},{0,24,312},{0,31,980},{0,24,440},{12,54,200},{12,40,0},{13,31,20},{11,29,36},{32,0,723}, +{0,38,203},{2,31,2},{0,24,296},{65,0,723},{0,24,296},{7,49,200},{7,49,200},{7,49,200},{7,28,200},{2,46,162},{2,31,2},{2,31,2},{2,20,20},{0,22,280},{0,19,84},{12,39,0},{12,39,0},{12,39,0},{12,25,1},{24,1,162},{4,29,0},{4,29,0},{0,20,13},{50,0,162},{0,20,13},{39,0,200},{12,40,0},{16,31,0},{0,31,0},{39,0,200},{78,0,200},{0,31,0}, +{0,26,200},{78,0,200},{0,26,200},{7,0,200},{7,0,200},{7,0,200},{7,0,200},{2,32,0},{2,32,0},{2,32,0},{2,17,0},{0,14,50},{0,14,50},{11,68,400},{11,47,216},{12,36,312},{11,34,213},{6,65,723},{5,42,208},{6,35,2},{4,28,312},{0,37,868},{0,28,304},{16,58,200},{16,44,0},{17,35,20},{15,33,36},{38,0,723},{3,43,200},{6,35,2},{0,29,257},{77,0,723}, +{0,29,257},{11,53,200},{11,53,200},{11,53,200},{11,32,200},{6,50,162},{6,35,2},{6,35,2},{6,24,20},{0,28,216},{0,24,20},{16,43,0},{16,43,0},{16,43,0},{16,29,1},{30,1,162},{8,33,0},{8,33,0},{0,24,4},{62,0,162},{0,24,4},{45,0,200},{16,44,0},{20,35,0},{4,35,0},{45,0,200},{91,0,200},{4,35,0},{0,30,200},{91,0,200},{0,30,200},{11,0,200}, +{11,0,200},{11,0,200},{11,0,200},{6,36,0},{6,36,0},{6,36,0},{6,21,0},{0,22,8},{0,22,8},{15,72,400},{15,51,216},{16,40,312},{15,38,213},{10,69,723},{9,46,208},{10,39,2},{8,32,312},{0,43,788},{0,33,228},{20,62,200},{20,48,0},{21,39,20},{19,37,36},{44,0,723},{7,47,200},{10,39,2},{0,33,224},{89,0,723},{0,33,224},{15,57,200},{15,57,200},{15,57,200}, +{15,36,200},{10,54,162},{10,39,2},{10,39,2},{10,28,20},{0,35,179},{1,29,0},{20,47,0},{20,47,0},{20,47,0},{20,33,1},{36,1,162},{12,37,0},{12,37,0},{1,29,0},{74,0,162},{1,29,0},{50,1,200},{20,48,0},{24,39,0},{8,39,0},{50,1,200},{103,0,200},{8,39,0},{0,34,200},{103,0,200},{0,34,200},{15,0,200},{15,0,200},{15,0,200},{15,0,200},{10,40,0}, +{10,40,0},{10,40,0},{10,25,0},{1,29,0},{1,29,0},{19,76,400},{19,55,216},{20,44,312},{19,42,213},{14,73,723},{13,50,208},{14,43,2},{12,36,312},{0,49,740},{1,37,216},{24,66,200},{24,52,0},{25,43,20},{23,41,36},{50,0,723},{11,51,200},{14,43,2},{0,38,209},{101,0,723},{0,38,209},{19,61,200},{19,61,200},{19,61,200},{19,40,200},{14,58,162},{14,43,2},{14,43,2}, +{14,32,20},{1,42,163},{5,33,0},{24,51,0},{24,51,0},{24,51,0},{24,37,1},{42,1,162},{16,41,0},{16,41,0},{5,33,0},{86,0,162},{5,33,0},{56,1,200},{24,52,0},{28,43,0},{12,43,0},{56,1,200},{115,0,200},{12,43,0},{0,38,200},{115,0,200},{0,38,200},{19,0,200},{19,0,200},{19,0,200},{19,0,200},{14,44,0},{14,44,0},{14,44,0},{14,29,0},{5,33,0}, +{5,33,0},{24,80,402},{24,60,212},{25,49,308},{23,47,212},{19,76,724},{19,53,212},{19,47,4},{17,41,308},{1,55,724},{6,42,212},{29,69,201},{28,57,1},{30,48,19},{28,46,35},{56,1,723},{15,56,200},{19,47,3},{1,42,202},{115,0,723},{1,42,202},{24,64,202},{24,64,202},{24,64,202},{24,44,202},{19,61,163},{19,46,3},{19,46,3},{18,36,19},{6,46,163},{9,38,1},{29,55,1}, +{29,55,1},{29,55,1},{28,42,1},{49,0,162},{20,46,0},{20,46,0},{11,37,1},{100,0,162},{11,37,1},{63,0,200},{28,57,0},{33,47,1},{18,47,1},{63,0,200},{127,1,200},{18,47,1},{0,42,202},{127,1,200},{0,42,202},{23,0,202},{23,0,202},{23,0,202},{23,0,202},{19,48,1},{19,48,1},{19,48,1},{19,33,1},{9,38,0},{9,38,0},{28,84,402},{28,64,215},{29,53,308}, +{27,51,212},{23,80,724},{23,57,212},{23,51,4},{21,45,308},{5,59,724},{10,46,212},{33,73,201},{32,61,1},{34,52,19},{32,50,35},{62,1,723},{19,60,200},{23,51,3},{5,46,202},{127,0,723},{5,46,202},{28,68,202},{28,68,202},{28,68,202},{28,48,202},{23,65,163},{23,50,3},{23,50,3},{22,40,19},{10,50,163},{13,42,1},{33,59,1},{33,59,1},{33,59,1},{32,46,1},{55,0,162}, +{24,50,0},{24,50,0},{15,41,1},{112,0,162},{15,41,1},{69,0,200},{32,61,0},{37,51,1},{22,51,1},{69,0,200},{127,7,200},{22,51,1},{0,46,202},{127,7,200},{0,46,202},{27,0,202},{27,0,202},{27,0,202},{27,0,202},{23,52,1},{23,52,1},{23,52,1},{23,37,1},{13,42,0},{13,42,0},{32,88,402},{32,68,215},{33,57,308},{31,55,212},{27,84,724},{27,61,212},{27,55,4}, +{25,49,308},{8,64,723},{14,50,212},{37,77,201},{36,64,2},{38,56,19},{36,54,35},{68,0,723},{22,64,201},{27,55,3},{9,50,202},{127,6,723},{9,50,202},{32,72,202},{32,72,202},{32,72,202},{32,52,202},{27,69,163},{27,54,3},{27,54,3},{26,44,19},{14,54,163},{17,46,1},{37,63,1},{37,63,1},{37,63,1},{36,50,1},{61,0,162},{28,54,0},{28,54,0},{19,45,1},{124,0,162}, +{19,45,1},{75,0,200},{36,64,1},{41,55,1},{26,55,1},{75,0,200},{127,13,200},{26,55,1},{0,50,202},{127,13,200},{0,50,202},{31,0,202},{31,0,202},{31,0,202},{31,0,202},{27,56,1},{27,56,1},{27,56,1},{27,41,1},{17,46,0},{17,46,0},{36,92,402},{36,72,215},{37,61,308},{35,59,212},{31,88,724},{30,65,207},{31,59,4},{29,53,308},{12,68,723},{18,54,212},{41,81,201}, +{41,67,2},{42,60,19},{40,58,35},{74,0,723},{28,67,201},{31,59,3},{13,54,202},{127,12,723},{13,54,202},{36,76,202},{36,76,202},{36,76,202},{36,56,202},{31,73,163},{31,58,3},{31,58,3},{30,48,19},{18,58,163},{21,50,1},{41,66,1},{41,66,1},{41,66,1},{40,54,1},{66,1,162},{32,58,0},{32,58,0},{23,49,1},{126,5,162},{23,49,1},{80,1,200},{41,67,1},{45,59,1}, +{30,59,1},{80,1,200},{126,19,200},{30,59,1},{0,54,202},{126,19,200},{0,54,202},{35,0,202},{35,0,202},{35,0,202},{35,0,202},{31,60,1},{31,60,1},{31,60,1},{31,45,1},{21,50,0},{21,50,0},{40,97,400},{40,77,213},{42,64,314},{40,63,213},{35,94,723},{35,70,209},{35,64,6},{33,57,312},{17,72,723},{22,58,216},{45,87,200},{45,72,1},{46,64,21},{44,62,36},{80,1,723}, +{33,71,201},{36,63,4},{16,59,200},{126,19,723},{16,59,200},{40,82,200},{40,82,200},{40,82,200},{40,61,200},{35,78,162},{35,63,4},{35,63,4},{35,53,20},{22,63,163},{26,54,0},{45,72,0},{45,72,0},{45,72,0},{45,58,1},{73,0,162},{37,62,0},{37,62,0},{26,54,0},{126,12,162},{26,54,0},{87,0,200},{45,72,1},{49,64,1},{32,64,1},{87,0,200},{126,26,200},{32,64,1}, +{0,59,200},{126,26,200},{0,59,200},{40,0,200},{40,0,200},{40,0,200},{40,0,200},{35,65,0},{35,65,0},{35,65,0},{35,50,0},{26,54,0},{26,54,0},{44,101,400},{44,81,213},{46,68,314},{44,66,213},{39,98,723},{39,74,209},{39,67,2},{37,61,312},{21,76,723},{26,62,216},{49,91,200},{49,76,1},{50,68,21},{48,66,38},{86,1,723},{37,75,201},{39,67,2},{20,63,200},{126,25,723}, +{20,63,200},{44,86,200},{44,86,200},{44,86,200},{44,64,200},{39,82,162},{39,67,1},{39,67,1},{39,57,20},{25,67,162},{30,58,0},{49,76,0},{49,76,0},{49,76,0},{49,62,1},{79,0,162},{41,65,1},{41,65,1},{30,58,0},{127,17,162},{30,58,0},{93,0,200},{49,76,1},{53,67,1},{38,67,1},{93,0,200},{126,32,200},{38,67,1},{0,63,200},{126,32,200},{0,63,200},{44,0,200}, +{44,0,200},{44,0,200},{44,0,200},{39,69,0},{39,69,0},{39,69,0},{39,54,0},{30,58,0},{30,58,0},{48,105,400},{48,85,213},{50,72,314},{48,70,213},{43,102,723},{43,78,209},{43,71,2},{41,65,315},{25,80,723},{29,66,215},{53,95,200},{53,80,1},{54,72,21},{52,70,38},{92,1,723},{41,79,201},{43,71,2},{25,66,202},{126,31,723},{25,66,202},{48,90,200},{48,90,200},{48,90,200}, +{48,68,200},{43,86,162},{43,71,1},{43,71,1},{43,61,20},{29,71,162},{34,62,0},{53,80,0},{53,80,0},{53,80,0},{53,66,0},{85,0,162},{45,69,1},{45,69,1},{34,62,0},{127,23,162},{34,62,0},{99,0,200},{53,80,1},{57,71,1},{42,71,1},{99,0,200},{126,38,200},{42,71,1},{0,66,202},{126,38,200},{0,66,202},{48,0,200},{48,0,200},{48,0,200},{48,0,200},{43,73,0}, +{43,73,0},{43,73,0},{43,58,0},{34,62,0},{34,62,0},{52,109,400},{52,89,213},{54,76,314},{52,74,213},{47,106,723},{47,82,209},{47,75,2},{45,69,315},{29,84,723},{33,70,215},{57,99,200},{57,84,1},{58,76,21},{56,74,38},{98,1,723},{45,83,201},{47,75,2},{29,70,202},{126,37,723},{29,70,202},{52,94,200},{52,94,200},{52,94,200},{52,72,200},{47,90,162},{47,75,1},{47,75,1}, +{46,65,21},{33,75,162},{38,65,2},{57,84,0},{57,84,0},{57,84,0},{57,70,0},{91,0,162},{49,73,1},{49,73,1},{39,65,1},{127,29,162},{39,65,1},{105,0,200},{57,84,1},{61,75,1},{46,75,1},{105,0,200},{126,44,200},{46,75,1},{0,70,202},{126,44,200},{0,70,202},{52,0,200},{52,0,200},{52,0,200},{52,0,200},{47,77,0},{47,77,0},{47,77,0},{47,62,0},{38,65,1}, +{38,65,1},{57,113,402},{57,93,215},{58,82,315},{56,79,215},{52,109,724},{51,86,207},{52,80,2},{51,73,314},{33,89,723},{38,75,213},{62,102,201},{62,88,2},{62,81,21},{60,79,38},{105,0,723},{49,88,201},{52,80,1},{33,75,200},{126,44,723},{33,75,200},{57,97,202},{57,97,202},{57,97,202},{56,77,202},{52,94,163},{52,79,2},{52,79,2},{51,69,21},{38,79,162},{42,70,1},{62,87,1}, +{62,87,1},{62,87,1},{62,74,1},{98,0,162},{54,77,1},{54,77,1},{43,70,0},{127,36,162},{43,70,0},{112,0,200},{62,88,1},{65,80,0},{50,80,0},{112,0,200},{127,50,200},{50,80,0},{0,75,200},{127,50,200},{0,75,200},{56,0,202},{56,0,202},{56,0,202},{56,0,202},{52,80,1},{52,80,1},{52,80,1},{51,66,1},{43,69,1},{43,69,1},{61,117,402},{61,97,215},{62,86,315}, +{60,83,215},{56,113,724},{55,90,207},{56,84,2},{55,77,314},{37,93,723},{42,79,213},{65,107,200},{65,93,0},{66,84,20},{64,82,36},{111,0,723},{53,92,201},{56,84,1},{37,79,200},{127,49,723},{37,79,200},{61,101,202},{61,101,202},{61,101,202},{60,81,202},{56,98,163},{56,83,2},{56,83,2},{55,73,21},{42,83,162},{46,74,1},{65,92,0},{65,92,0},{65,92,0},{65,78,1},{104,0,162}, +{58,81,1},{58,81,1},{47,74,0},{127,42,162},{47,74,0},{118,0,200},{65,93,0},{69,84,0},{54,84,0},{118,0,200},{127,56,200},{54,84,0},{0,79,200},{127,56,200},{0,79,200},{60,0,202},{60,0,202},{60,0,202},{60,0,202},{56,84,1},{56,84,1},{56,84,1},{55,70,1},{47,73,1},{47,73,1},{64,122,400},{64,100,216},{65,89,312},{64,87,213},{60,117,724},{59,94,207},{60,88,2}, +{59,81,314},{41,97,723},{46,83,213},{69,111,200},{69,97,0},{70,88,20},{68,86,36},{117,0,723},{57,96,201},{60,88,1},{41,83,200},{127,55,723},{41,83,200},{64,106,200},{64,106,200},{64,106,200},{64,85,200},{60,102,163},{60,87,2},{60,87,2},{59,77,21},{46,87,162},{50,78,1},{69,96,0},{69,96,0},{69,96,0},{69,82,1},{110,0,162},{62,85,1},{62,85,1},{51,78,0},{126,48,162}, +{51,78,0},{124,0,200},{69,97,0},{73,88,0},{58,88,0},{124,0,200},{127,62,200},{58,88,0},{0,83,200},{127,62,200},{0,83,200},{64,0,200},{64,0,200},{64,0,200},{64,0,200},{60,88,1},{60,88,1},{60,88,1},{59,74,1},{51,77,1},{51,77,1},{68,126,400},{68,104,216},{69,93,312},{68,91,213},{63,123,724},{63,98,207},{63,92,6},{63,85,314},{45,101,723},{50,87,213},{73,115,200}, +{73,101,0},{74,92,20},{72,90,36},{123,0,723},{61,100,201},{64,91,4},{45,87,200},{127,61,723},{45,87,200},{68,110,200},{68,110,200},{68,110,200},{68,89,200},{63,108,163},{64,91,4},{64,91,4},{63,81,21},{50,91,162},{54,82,1},{73,100,0},{73,100,0},{73,100,0},{73,86,1},{115,1,162},{65,90,0},{65,90,0},{55,82,0},{126,54,162},{55,82,0},{127,5,200},{73,101,0},{77,92,0}, +{62,92,0},{127,5,200},{126,68,200},{62,92,0},{0,87,200},{126,68,200},{0,87,200},{68,0,200},{68,0,200},{68,0,200},{68,0,200},{63,94,1},{63,94,1},{63,94,1},{63,78,1},{55,81,1},{55,81,1},{73,126,410},{73,109,212},{74,98,308},{72,96,212},{68,126,724},{68,102,212},{68,96,4},{66,90,308},{50,105,723},{54,91,215},{78,119,201},{77,106,1},{79,97,19},{77,95,35},{127,5,723}, +{64,105,200},{68,96,3},{50,91,202},{126,68,723},{50,91,202},{73,114,202},{73,114,202},{73,114,202},{73,93,202},{68,110,163},{68,95,3},{68,95,3},{67,85,19},{54,96,162},{59,86,2},{78,104,1},{78,104,1},{78,104,1},{77,91,1},{122,0,162},{69,95,0},{69,95,0},{60,86,1},{126,61,162},{60,86,1},{127,19,200},{77,106,0},{82,96,1},{67,96,1},{127,19,200},{127,74,200},{67,96,1}, +{0,91,202},{127,74,200},{0,91,202},{72,0,202},{72,0,202},{72,0,202},{72,0,202},{68,97,1},{68,97,1},{68,97,1},{68,82,1},{59,86,1},{59,86,1},{77,127,434},{77,113,212},{78,102,308},{76,100,212},{72,127,732},{72,106,212},{72,100,4},{70,94,308},{54,109,723},{58,95,215},{82,123,201},{81,110,1},{83,101,19},{81,99,35},{127,18,723},{68,109,200},{72,100,3},{54,95,202},{126,74,723}, +{54,95,202},{77,118,202},{77,118,202},{77,118,202},{77,97,202},{72,114,163},{72,99,3},{72,99,3},{71,89,19},{58,100,162},{63,90,2},{82,108,1},{82,108,1},{82,108,1},{81,95,1},{127,2,162},{73,99,0},{73,99,0},{64,90,1},{127,66,162},{64,90,1},{127,31,200},{81,110,0},{86,100,1},{71,100,1},{127,31,200},{127,80,200},{71,100,1},{0,95,202},{127,80,200},{0,95,202},{76,0,202}, +{76,0,202},{76,0,202},{76,0,202},{72,101,1},{72,101,1},{72,101,1},{72,86,1},{63,90,1},{63,90,1},{82,127,468},{81,117,212},{82,106,308},{80,104,212},{77,127,753},{76,110,212},{76,104,4},{74,98,308},{58,113,723},{62,99,215},{86,127,201},{85,114,1},{87,105,19},{85,103,35},{127,30,723},{72,113,200},{76,104,3},{58,99,202},{127,79,723},{58,99,202},{81,122,202},{81,122,202},{81,122,202}, +{81,101,202},{76,118,163},{76,103,3},{76,103,3},{75,93,19},{62,104,162},{66,95,1},{86,112,1},{86,112,1},{86,112,1},{85,99,1},{127,14,162},{77,103,0},{77,103,0},{68,94,1},{127,72,162},{68,94,1},{127,43,200},{85,114,0},{90,104,1},{75,104,1},{127,43,200},{127,86,200},{75,104,1},{0,99,202},{127,86,200},{0,99,202},{80,0,202},{80,0,202},{80,0,202},{80,0,202},{76,105,1}, +{76,105,1},{76,105,1},{76,90,1},{66,95,0},{66,95,0},{86,127,546},{85,121,212},{86,110,308},{84,108,212},{82,127,788},{80,114,212},{80,108,4},{78,102,308},{62,117,723},{67,103,212},{91,127,211},{89,118,1},{91,109,19},{89,107,35},{127,42,723},{76,117,200},{80,108,3},{62,103,202},{127,85,723},{62,103,202},{85,126,202},{85,126,202},{85,126,202},{85,105,202},{80,122,163},{80,107,3},{80,107,3}, +{79,97,19},{67,107,163},{70,99,1},{90,116,1},{90,116,1},{90,116,1},{89,103,1},{127,27,162},{81,107,0},{81,107,0},{72,98,1},{126,78,162},{72,98,1},{127,56,200},{89,118,0},{94,108,1},{79,108,1},{127,56,200},{127,92,200},{79,108,1},{0,103,202},{127,92,200},{0,103,202},{84,0,202},{84,0,202},{84,0,202},{84,0,202},{80,109,1},{80,109,1},{80,109,1},{80,94,1},{70,99,0}, +{70,99,0},{91,127,632},{89,125,216},{90,114,312},{89,112,213},{88,127,852},{83,120,208},{84,113,2},{82,106,312},{66,121,724},{71,107,216},{97,127,244},{94,122,0},{95,113,20},{93,111,36},{127,56,723},{81,121,200},{84,113,2},{65,108,200},{127,92,723},{65,108,200},{89,127,209},{89,127,209},{89,127,209},{89,110,200},{84,126,164},{84,113,2},{84,113,2},{84,102,20},{71,112,163},{75,103,0},{94,121,0}, +{94,121,0},{94,121,0},{94,107,1},{127,40,162},{86,111,0},{86,111,0},{75,103,0},{126,85,162},{75,103,0},{127,69,200},{94,122,0},{98,113,0},{82,113,0},{127,69,200},{126,99,200},{82,113,0},{0,108,200},{126,99,200},{0,108,200},{89,0,200},{89,0,200},{89,0,200},{89,0,200},{84,114,0},{84,114,0},{84,114,0},{84,99,0},{75,103,0},{75,103,0},{97,127,728},{94,127,228},{94,118,312}, +{93,116,213},{94,127,932},{87,124,208},{88,117,2},{86,110,312},{70,125,724},{75,111,216},{101,127,286},{98,126,0},{99,117,20},{97,115,36},{127,67,723},{85,125,200},{88,117,2},{69,112,200},{127,98,723},{69,112,200},{94,127,224},{94,127,224},{94,127,224},{93,114,200},{89,127,174},{88,117,2},{88,117,2},{88,106,20},{75,116,163},{79,107,0},{98,125,0},{98,125,0},{98,125,0},{98,111,1},{127,53,162}, +{90,115,0},{90,115,0},{79,107,0},{126,91,162},{79,107,0},{127,81,200},{98,126,0},{102,117,0},{86,117,0},{127,81,200},{126,105,200},{86,117,0},{0,112,200},{126,105,200},{0,112,200},{93,0,200},{93,0,200},{93,0,200},{93,0,200},{88,118,0},{88,118,0},{88,118,0},{88,103,0},{79,107,0},{79,107,0},{100,127,864},{99,127,304},{98,122,312},{97,120,213},{98,127,1043},{92,127,211},{92,121,2}, +{90,114,312},{76,127,732},{79,115,216},{106,127,336},{102,127,20},{103,121,20},{101,119,36},{127,79,723},{90,127,208},{92,121,2},{73,116,200},{127,104,723},{73,116,200},{98,127,257},{98,127,257},{98,127,257},{97,118,200},{94,127,196},{92,121,2},{92,121,2},{92,110,20},{79,120,163},{83,111,0},{102,126,4},{102,126,4},{102,126,4},{102,115,1},{127,64,162},{94,119,0},{94,119,0},{83,111,0},{126,97,162}, +{83,111,0},{127,93,200},{105,127,8},{106,121,0},{90,121,0},{127,93,200},{126,111,200},{90,121,0},{0,116,200},{126,111,200},{0,116,200},{97,0,200},{97,0,200},{97,0,200},{97,0,200},{92,122,0},{92,122,0},{92,122,0},{92,107,0},{83,111,0},{83,111,0},{106,127,992},{103,127,440},{102,126,312},{101,124,213},{103,127,1144},{96,127,272},{96,125,2},{94,118,312},{84,127,804},{83,119,216},{111,127,398}, +{108,127,84},{107,125,20},{105,123,36},{127,92,723},{99,127,248},{96,125,2},{77,120,200},{126,110,723},{77,120,200},{103,127,296},{103,127,296},{103,127,296},{101,122,200},{100,127,244},{96,125,2},{96,125,2},{96,114,20},{83,124,163},{87,115,0},{107,127,13},{107,127,13},{107,127,13},{106,119,1},{127,76,162},{98,123,0},{98,123,0},{87,115,0},{126,103,162},{87,115,0},{127,105,200},{111,127,45},{110,125,0}, +{94,125,0},{127,105,200},{126,117,200},{94,125,0},{0,120,200},{126,117,200},{0,120,200},{101,0,200},{101,0,200},{101,0,200},{101,0,200},{96,126,0},{96,126,0},{96,126,0},{96,111,0},{87,115,0},{87,115,0},{111,127,919},{108,127,515},{107,127,362},{105,127,203},{109,127,1027},{102,127,267},{101,127,9},{99,122,198},{93,127,703},{88,123,121},{115,127,338},{113,127,122},{112,127,34},{110,126,14},{127,102,546}, +{108,127,202},{102,127,2},{85,123,113},{127,115,546},{85,123,113},{107,127,362},{107,127,362},{107,127,362},{106,126,202},{104,127,309},{101,127,9},{101,127,9},{100,118,19},{89,127,168},{91,120,1},{112,127,34},{112,127,34},{112,127,34},{110,124,1},{127,90,162},{104,126,2},{104,126,2},{93,119,1},{127,109,162},{93,119,1},{127,115,113},{119,127,41},{116,127,0},{104,127,0},{127,115,113},{127,121,113},{104,127,0}, +{0,123,113},{127,121,113},{0,123,113},{105,0,202},{105,0,202},{105,0,202},{105,0,202},{101,127,5},{101,127,5},{101,127,5},{101,115,1},{91,120,0},{91,120,0},{114,127,751},{111,127,514},{111,127,414},{110,127,218},{114,127,814},{108,127,219},{107,127,65},{103,124,71},{102,127,539},{94,125,27},{120,127,206},{117,127,110},{117,127,61},{115,127,2},{127,110,333},{113,127,121},{110,127,20},{93,125,25},{126,119,333}, +{93,125,25},{111,127,414},{111,127,414},{111,127,414},{110,127,218},{109,127,371},{107,127,65},{107,127,65},{104,122,19},{96,127,203},{95,124,1},{117,127,61},{117,127,61},{117,127,61},{115,127,2},{127,102,162},{110,127,20},{110,127,20},{97,123,1},{127,115,162},{97,123,1},{127,121,25},{122,127,9},{122,127,0},{116,127,0},{127,121,25},{127,124,25},{116,127,0},{0,125,25},{127,124,25},{0,125,25},{109,0,202}, +{109,0,202},{109,0,202},{109,0,202},{106,127,18},{106,127,18},{106,127,18},{105,119,1},{95,124,0},{95,124,0},{117,127,636},{117,127,516},{117,127,467},{115,127,282},{117,127,643},{111,127,241},{111,127,141},{108,126,18},{108,127,434},{99,127,2},{123,127,131},{121,127,105},{121,127,89},{119,127,26},{127,118,193},{119,127,86},{116,127,53},{101,127,0},{126,123,193},{101,127,0},{117,127,467},{117,127,467},{117,127,467}, +{115,127,282},{114,127,410},{111,127,141},{111,127,141},{108,126,14},{105,127,254},{99,127,2},{121,127,89},{121,127,89},{121,127,89},{119,127,26},{127,113,145},{116,127,53},{116,127,53},{101,127,0},{127,120,145},{101,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{113,0,202},{113,0,202},{113,0,202},{113,0,202},{111,127,41}, +{111,127,41},{111,127,41},{109,123,1},{101,126,2},{101,126,2},{120,127,412},{120,127,356},{120,127,331},{118,127,238},{120,127,387},{116,127,183},{116,127,134},{113,127,1},{113,127,262},{108,127,18},{124,127,41},{124,127,29},{124,127,25},{122,127,10},{127,122,54},{122,127,22},{122,127,13},{113,127,0},{126,125,54},{113,127,0},{120,127,331},{120,127,331},{120,127,331},{118,127,238},{117,127,266},{116,127,134},{116,127,134}, +{113,127,1},{110,127,161},{108,127,18},{124,127,25},{124,127,25},{124,127,25},{122,127,10},{127,119,41},{122,127,13},{122,127,13},{113,127,0},{127,123,41},{113,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{117,0,202},{117,0,202},{117,0,202},{117,0,202},{114,127,73},{114,127,73},{114,127,73},{113,127,1},{108,127,18}, +{108,127,18},{0,41,421},{0,28,41},{0,21,1},{0,18,157},{0,27,925},{0,19,569},{0,17,285},{0,12,701},{0,12,1005},{0,10,751},{0,41,421},{0,28,41},{0,21,1},{0,18,157},{13,1,925},{0,19,569},{0,17,285},{0,12,701},{27,0,925},{0,12,701},{0,20,0},{0,20,0},{0,20,0},{0,10,0},{0,9,85},{0,8,29},{0,8,29},{0,4,50},{0,5,94},{0,4,59},{0,20,0}, +{0,20,0},{0,20,0},{0,10,0},{5,0,85},{0,8,29},{0,8,29},{0,4,50},{9,0,85},{0,4,50},{20,0,421},{0,28,41},{0,21,1},{0,18,157},{20,0,421},{41,0,421},{0,18,157},{0,14,421},{41,0,421},{0,14,421},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,53,421},{0,37,9},{1,25,17}, +{0,21,113},{0,36,1261},{0,22,677},{0,20,305},{0,15,881},{0,17,1383},{0,13,971},{0,53,421},{0,37,9},{1,25,13},{0,21,113},{18,0,1261},{0,22,677},{0,20,305},{0,15,881},{36,0,1261},{0,15,881},{0,32,0},{0,32,0},{0,32,0},{0,15,1},{0,15,221},{0,11,85},{0,11,85},{0,7,130},{0,8,246},{0,7,155},{0,32,0},{0,32,0},{0,32,0},{0,15,1},{8,0,221}, +{0,11,85},{0,11,85},{0,7,130},{15,0,221},{0,7,130},{26,0,421},{0,37,9},{3,25,1},{0,21,113},{26,0,421},{53,0,421},{0,21,113},{0,18,421},{53,0,421},{0,18,421},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{1,63,426},{1,42,6},{2,29,66},{0,27,74},{0,45,1514},{0,28,706},{0,25,258}, +{0,18,974},{0,19,1706},{0,18,1118},{2,61,422},{1,42,2},{3,30,38},{0,27,74},{22,0,1514},{0,28,706},{0,25,258},{0,18,974},{45,0,1514},{0,18,974},{1,42,5},{1,42,5},{1,42,5},{1,21,5},{0,24,338},{0,19,90},{0,19,90},{0,10,181},{0,11,389},{0,10,230},{2,40,1},{2,40,1},{2,40,1},{1,21,1},{12,0,338},{0,19,90},{0,19,90},{0,10,181},{24,0,338}, +{0,10,181},{32,0,421},{1,42,1},{7,29,1},{0,27,65},{32,0,421},{65,0,421},{0,27,65},{0,22,421},{65,0,421},{0,22,421},{1,0,5},{1,0,5},{1,0,5},{1,0,5},{0,4,0},{0,4,0},{0,4,0},{0,2,0},{0,2,1},{0,2,1},{3,71,482},{3,48,62},{4,34,158},{2,31,117},{0,58,1514},{0,37,594},{0,31,122},{0,23,881},{0,25,1818},{0,21,1086},{6,65,422}, +{5,46,2},{7,34,38},{4,31,74},{28,0,1514},{0,37,594},{0,31,122},{0,23,881},{58,0,1514},{0,23,881},{3,50,61},{3,50,61},{3,50,61},{3,26,61},{0,36,338},{0,25,34},{0,25,34},{0,15,130},{0,17,461},{0,14,226},{6,44,1},{6,44,1},{6,44,1},{5,25,1},{18,0,338},{0,25,34},{0,25,34},{0,15,130},{36,0,338},{0,15,130},{38,0,421},{5,46,1},{11,33,1}, +{0,32,40},{38,0,421},{77,0,421},{0,32,40},{0,26,421},{77,0,421},{0,26,421},{3,0,61},{3,0,61},{3,0,61},{3,0,61},{0,16,0},{0,16,0},{0,16,0},{0,8,0},{0,5,25},{0,5,25},{5,80,621},{5,55,213},{7,40,357},{5,35,241},{0,71,1514},{0,43,497},{0,36,29},{0,27,761},{0,31,1989},{0,26,1079},{10,70,421},{10,50,1},{11,38,41},{8,35,77},{35,0,1514}, +{0,43,497},{0,36,29},{0,27,761},{71,0,1514},{0,27,761},{5,60,200},{5,60,200},{5,60,200},{5,32,200},{0,50,338},{0,34,4},{0,34,4},{0,21,80},{0,22,552},{0,19,244},{10,49,0},{10,49,0},{10,49,0},{10,29,0},{24,1,338},{0,34,4},{0,34,4},{0,21,80},{50,0,338},{0,21,80},{45,0,421},{10,50,1},{16,37,1},{0,36,13},{45,0,421},{91,0,421},{0,36,13}, +{0,30,421},{91,0,421},{0,30,421},{5,0,200},{5,0,200},{5,0,200},{5,0,200},{0,30,0},{0,30,0},{0,30,0},{0,15,0},{0,11,74},{0,11,74},{7,89,813},{7,60,421},{10,43,617},{7,40,417},{0,83,1514},{0,51,446},{0,41,3},{0,32,686},{0,37,2165},{0,31,1101},{14,74,421},{14,54,1},{15,42,41},{12,39,77},{41,0,1514},{0,51,446},{0,41,3},{0,32,686},{83,0,1514}, +{0,32,686},{7,67,392},{7,67,392},{7,67,392},{7,37,392},{0,62,338},{0,41,2},{0,41,2},{0,24,52},{0,28,680},{0,23,298},{14,53,0},{14,53,0},{14,53,0},{14,33,0},{30,1,338},{2,39,0},{2,39,0},{0,24,52},{62,0,338},{0,24,52},{51,0,421},{14,54,1},{20,41,1},{0,41,2},{51,0,421},{103,0,421},{0,41,2},{0,34,421},{103,0,421},{0,34,421},{7,0,392}, +{7,0,392},{7,0,392},{7,0,392},{0,42,0},{0,42,0},{0,42,0},{0,21,0},{0,17,146},{0,17,146},{11,93,842},{11,65,453},{12,48,654},{11,44,446},{4,87,1515},{3,54,435},{4,45,4},{0,36,654},{0,43,2030},{0,35,882},{18,78,421},{18,58,1},{19,46,41},{16,43,77},{47,0,1514},{0,57,422},{4,45,3},{0,36,605},{95,0,1514},{0,36,605},{11,71,421},{11,71,421},{11,71,421}, +{11,41,421},{4,66,339},{4,45,3},{4,45,3},{2,29,41},{0,34,581},{0,28,161},{18,57,0},{18,57,0},{18,57,0},{18,37,0},{36,1,338},{6,43,0},{6,43,0},{0,30,20},{74,0,338},{0,30,20},{57,0,421},{18,58,1},{24,45,1},{2,45,1},{57,0,421},{115,0,421},{2,45,1},{0,38,421},{115,0,421},{0,38,421},{11,0,421},{11,0,421},{11,0,421},{11,0,421},{4,46,1}, +{4,46,1},{4,46,1},{3,25,1},{0,22,85},{0,22,85},{15,97,842},{16,67,453},{16,52,654},{15,48,446},{8,91,1515},{7,58,435},{8,49,4},{4,40,654},{0,51,1850},{0,40,682},{22,82,421},{22,62,1},{23,50,41},{20,47,77},{53,0,1514},{3,61,421},{8,49,3},{0,41,554},{107,0,1514},{0,41,554},{15,75,421},{15,75,421},{15,75,421},{15,45,421},{8,70,339},{8,49,3},{8,49,3}, +{6,33,41},{0,40,477},{0,34,57},{22,61,0},{22,61,0},{22,61,0},{22,41,0},{42,1,338},{10,47,0},{10,47,0},{0,34,8},{86,0,338},{0,34,8},{63,0,421},{22,62,1},{28,49,1},{6,49,1},{63,0,421},{127,0,421},{6,49,1},{0,42,421},{127,0,421},{0,42,421},{15,0,421},{15,0,421},{15,0,421},{15,0,421},{8,50,1},{8,50,1},{8,50,1},{7,29,1},{0,31,29}, +{0,31,29},{19,102,842},{20,71,451},{21,57,650},{19,52,446},{12,97,1514},{12,63,437},{12,54,2},{9,45,650},{0,57,1710},{0,45,530},{27,86,422},{26,67,1},{28,55,38},{25,52,74},{59,1,1514},{7,66,421},{12,54,2},{0,45,494},{121,0,1514},{0,45,494},{19,81,421},{19,81,421},{19,81,421},{19,49,422},{12,75,338},{12,54,2},{12,54,2},{11,38,38},{0,49,402},{0,39,6},{27,64,1}, +{27,64,1},{27,64,1},{26,46,1},{49,0,338},{14,52,0},{14,52,0},{1,39,1},{100,0,338},{1,39,1},{69,0,421},{26,67,0},{32,54,1},{10,54,0},{69,0,421},{127,7,421},{10,54,0},{0,47,421},{127,7,421},{0,47,421},{19,0,421},{19,0,421},{19,0,421},{19,0,421},{12,56,0},{12,56,0},{12,56,0},{12,33,1},{0,38,5},{0,38,5},{23,106,842},{24,75,451},{25,61,650}, +{23,56,446},{16,101,1514},{15,67,434},{16,58,2},{13,49,650},{0,64,1617},{0,50,462},{31,90,422},{30,71,1},{32,59,38},{29,56,74},{65,0,1514},{11,70,421},{16,58,2},{0,50,461},{127,3,1514},{0,50,461},{23,85,421},{23,85,421},{23,85,421},{23,53,422},{16,79,338},{16,58,2},{16,58,2},{15,42,38},{0,54,349},{3,43,2},{31,68,1},{31,68,1},{31,68,1},{30,50,1},{55,0,338}, +{18,56,0},{18,56,0},{5,43,1},{112,0,338},{5,43,1},{75,0,421},{30,71,0},{36,58,1},{14,58,0},{75,0,421},{127,13,421},{14,58,0},{0,51,421},{127,13,421},{0,51,421},{23,0,421},{23,0,421},{23,0,421},{23,0,421},{16,60,0},{16,60,0},{16,60,0},{16,37,1},{3,43,1},{3,43,1},{27,110,842},{28,79,451},{30,64,650},{27,60,446},{20,105,1514},{19,71,434},{20,62,2}, +{17,53,650},{0,70,1553},{1,54,450},{35,94,422},{34,75,1},{36,63,38},{33,60,74},{71,0,1514},{15,74,421},{20,62,2},{0,54,441},{127,9,1514},{0,54,441},{27,89,421},{27,89,421},{27,89,421},{27,57,422},{20,83,338},{20,62,2},{20,62,2},{19,46,38},{0,61,338},{7,47,2},{35,72,1},{35,72,1},{35,72,1},{34,54,1},{61,0,338},{22,60,0},{22,60,0},{9,47,1},{124,0,338}, +{9,47,1},{81,0,421},{34,75,0},{40,62,1},{18,62,0},{81,0,421},{126,19,421},{18,62,0},{0,55,421},{126,19,421},{0,55,421},{27,0,421},{27,0,421},{27,0,421},{27,0,421},{20,64,0},{20,64,0},{20,64,0},{20,41,1},{7,47,1},{7,47,1},{31,114,842},{32,83,451},{33,67,650},{31,64,446},{24,109,1514},{23,75,434},{24,65,6},{21,57,650},{0,76,1521},{5,58,450},{39,98,422}, +{38,79,1},{41,66,42},{37,63,81},{77,0,1514},{19,78,421},{25,65,2},{0,59,425},{127,15,1514},{0,59,425},{31,93,421},{31,93,421},{31,93,421},{31,61,422},{24,87,338},{24,64,5},{24,64,5},{23,50,38},{5,64,339},{11,51,2},{39,76,1},{39,76,1},{39,76,1},{38,58,1},{66,1,338},{25,64,1},{25,64,1},{13,51,1},{126,5,338},{13,51,1},{87,0,421},{38,79,0},{45,65,1}, +{23,65,1},{87,0,421},{126,25,421},{23,65,1},{0,59,421},{126,25,421},{0,59,421},{31,0,421},{31,0,421},{31,0,421},{31,0,421},{24,67,0},{24,67,0},{24,67,0},{24,45,1},{11,51,1},{11,51,1},{36,118,842},{37,88,453},{38,72,646},{35,69,445},{29,112,1515},{27,79,438},{29,69,6},{25,61,654},{3,82,1515},{10,63,450},{43,103,421},{43,83,2},{45,70,41},{42,67,76},{83,1,1514}, +{24,82,421},{29,69,5},{2,63,421},{126,22,1514},{2,63,421},{36,96,421},{36,96,421},{36,96,421},{36,65,421},{29,91,339},{29,69,2},{29,69,2},{27,54,41},{10,68,339},{15,56,1},{43,82,0},{43,82,0},{43,82,0},{43,62,0},{73,0,338},{31,67,1},{31,67,1},{16,56,0},{126,12,338},{16,56,0},{93,0,421},{42,84,0},{49,70,0},{26,70,0},{93,0,421},{126,32,421},{26,70,0}, +{0,63,421},{126,32,421},{0,63,421},{36,0,421},{36,0,421},{36,0,421},{36,0,421},{29,71,1},{29,71,1},{29,71,1},{28,50,1},{15,56,1},{15,56,1},{40,122,842},{41,92,453},{42,76,646},{39,73,445},{33,116,1515},{31,83,438},{33,73,6},{30,64,650},{7,86,1515},{15,66,451},{47,107,421},{47,87,2},{49,74,41},{46,71,76},{89,1,1514},{28,86,421},{33,73,5},{5,67,421},{126,28,1514}, +{5,67,421},{40,100,421},{40,100,421},{40,100,421},{40,69,421},{33,95,339},{33,73,2},{33,73,2},{31,58,41},{14,72,339},{19,60,1},{47,86,0},{47,86,0},{47,86,0},{47,66,0},{79,0,338},{35,71,1},{35,71,1},{20,60,0},{127,17,338},{20,60,0},{99,0,421},{46,88,0},{53,74,0},{30,74,0},{99,0,421},{126,38,421},{30,74,0},{0,67,421},{126,38,421},{0,67,421},{40,0,421}, +{40,0,421},{40,0,421},{40,0,421},{33,75,1},{33,75,1},{33,75,1},{32,54,1},{19,60,1},{19,60,1},{44,126,842},{45,96,453},{46,80,646},{43,77,445},{37,120,1515},{35,87,438},{37,77,6},{34,68,650},{11,90,1515},{19,70,451},{51,111,421},{51,91,2},{53,78,41},{50,75,76},{95,1,1514},{32,90,421},{37,77,5},{9,71,421},{126,34,1514},{9,71,421},{44,104,421},{44,104,421},{44,104,421}, +{44,73,421},{37,99,339},{37,77,2},{37,77,2},{35,62,41},{18,76,339},{23,64,1},{51,90,0},{51,90,0},{51,90,0},{51,70,0},{85,0,338},{39,75,1},{39,75,1},{23,64,1},{127,23,338},{23,64,1},{105,0,421},{50,92,0},{57,78,0},{34,78,0},{105,0,421},{126,44,421},{34,78,0},{0,71,421},{126,44,421},{0,71,421},{44,0,421},{44,0,421},{44,0,421},{44,0,421},{37,79,1}, +{37,79,1},{37,79,1},{36,58,1},{23,64,0},{23,64,0},{48,126,854},{49,100,453},{50,84,646},{47,81,445},{41,124,1515},{39,91,438},{41,81,6},{38,72,650},{15,94,1515},{23,74,451},{55,115,421},{55,95,2},{57,82,41},{54,79,76},{101,1,1514},{36,94,421},{41,81,5},{13,75,421},{126,40,1514},{13,75,421},{48,108,421},{48,108,421},{48,108,421},{48,77,421},{41,103,339},{41,81,2},{41,81,2}, +{40,65,42},{22,80,339},{27,68,1},{55,94,0},{55,94,0},{55,94,0},{55,74,0},{91,0,338},{43,79,1},{43,79,1},{29,67,1},{127,29,338},{29,67,1},{111,0,421},{54,96,0},{61,82,0},{38,82,0},{111,0,421},{127,49,421},{38,82,0},{0,75,421},{127,49,421},{0,75,421},{48,0,421},{48,0,421},{48,0,421},{48,0,421},{41,83,1},{41,83,1},{41,83,1},{40,62,1},{27,68,0}, +{27,68,0},{53,127,886},{53,104,451},{54,88,650},{52,85,446},{46,126,1521},{44,96,434},{45,86,6},{43,77,646},{18,99,1515},{27,78,453},{60,119,422},{59,100,1},{62,87,42},{59,84,75},{108,0,1514},{40,99,421},{46,86,2},{18,79,421},{125,47,1514},{18,79,421},{52,114,421},{52,114,421},{52,114,421},{52,82,421},{45,108,338},{45,85,5},{45,85,5},{45,70,41},{26,85,339},{31,72,2},{60,97,1}, +{60,97,1},{60,97,1},{60,78,1},{98,0,338},{48,83,1},{48,83,1},{33,72,0},{127,36,338},{33,72,0},{118,0,421},{59,100,0},{65,86,1},{44,86,1},{118,0,421},{127,56,421},{44,86,1},{0,79,421},{127,56,421},{0,79,421},{52,0,421},{52,0,421},{52,0,421},{52,0,421},{45,88,0},{45,88,0},{45,88,0},{45,66,0},{31,73,0},{31,73,0},{58,127,926},{57,108,451},{58,92,650}, +{56,89,446},{51,126,1542},{48,100,434},{49,90,6},{47,81,646},{22,103,1515},{31,82,453},{63,125,422},{63,104,1},{64,91,41},{63,88,75},{114,0,1514},{44,103,421},{50,90,2},{22,83,421},{127,52,1514},{22,83,421},{56,118,421},{56,118,421},{56,118,421},{56,86,421},{49,112,338},{49,89,5},{49,89,5},{49,74,41},{30,89,339},{35,76,2},{63,103,1},{63,103,1},{63,103,1},{63,82,2},{104,0,338}, +{52,87,1},{52,87,1},{37,76,0},{127,42,338},{37,76,0},{124,0,421},{63,104,0},{69,90,1},{48,90,1},{124,0,421},{127,62,421},{48,90,1},{0,83,421},{127,62,421},{0,83,421},{56,0,421},{56,0,421},{56,0,421},{56,0,421},{49,92,0},{49,92,0},{49,92,0},{49,70,0},{35,77,0},{35,77,0},{62,127,1010},{61,112,451},{62,96,650},{60,93,446},{56,127,1577},{52,104,434},{53,94,6}, +{51,85,646},{26,107,1515},{35,86,453},{67,126,425},{67,107,1},{68,95,41},{65,92,77},{120,0,1514},{48,107,421},{54,94,2},{26,87,421},{127,58,1514},{26,87,421},{60,122,421},{60,122,421},{60,122,421},{60,90,421},{53,116,338},{53,93,5},{53,93,5},{53,78,41},{34,93,339},{39,80,2},{67,106,0},{67,106,0},{67,106,0},{67,86,0},{110,0,338},{56,91,1},{56,91,1},{41,80,0},{126,48,338}, +{41,80,0},{127,5,421},{67,107,1},{73,94,1},{52,94,1},{127,5,421},{126,68,421},{52,94,1},{0,87,421},{126,68,421},{0,87,421},{60,0,421},{60,0,421},{60,0,421},{60,0,421},{53,96,0},{53,96,0},{53,96,0},{53,74,0},{39,81,0},{39,81,0},{68,127,1098},{64,117,450},{65,101,654},{64,97,446},{61,127,1626},{56,108,434},{57,98,6},{55,89,646},{30,111,1515},{39,90,453},{72,127,437}, +{71,111,1},{72,99,41},{69,96,77},{126,0,1514},{52,111,421},{58,98,2},{30,91,421},{126,64,1514},{30,91,421},{64,125,421},{64,125,421},{64,125,421},{64,94,421},{57,120,338},{57,97,5},{57,97,5},{57,82,41},{38,97,339},{43,84,2},{71,110,0},{71,110,0},{71,110,0},{71,90,0},{115,1,338},{60,95,1},{60,95,1},{45,84,0},{126,54,338},{45,84,0},{127,18,421},{71,111,1},{77,98,1}, +{56,98,1},{127,18,421},{126,74,421},{56,98,1},{0,91,421},{126,74,421},{0,91,421},{64,0,421},{64,0,421},{64,0,421},{64,0,421},{57,100,0},{57,100,0},{57,100,0},{57,78,0},{43,85,0},{43,85,0},{71,127,1214},{69,122,450},{70,106,650},{68,101,446},{66,127,1722},{60,112,438},{62,102,6},{59,93,650},{36,115,1515},{44,95,451},{78,126,470},{75,116,2},{77,104,38},{74,101,74},{127,11,1514}, +{57,115,421},{62,102,5},{34,96,421},{126,71,1514},{34,96,421},{68,127,425},{68,127,425},{68,127,425},{68,98,422},{62,124,339},{62,102,2},{62,102,2},{61,86,42},{43,101,339},{48,89,1},{76,114,1},{76,114,1},{76,114,1},{75,95,1},{122,0,338},{63,101,1},{63,101,1},{50,88,1},{126,61,338},{50,88,1},{127,31,421},{75,116,1},{81,103,1},{59,103,0},{127,31,421},{127,80,421},{59,103,0}, +{0,96,421},{127,80,421},{0,96,421},{68,0,421},{68,0,421},{68,0,421},{68,0,421},{62,104,1},{62,104,1},{62,104,1},{62,82,1},{48,89,0},{48,89,0},{77,127,1334},{73,126,450},{74,110,650},{72,105,446},{71,127,1805},{65,116,437},{65,107,2},{63,97,650},{40,119,1515},{48,99,451},{82,127,506},{79,120,2},{81,108,38},{78,105,74},{127,24,1514},{61,119,421},{65,107,2},{38,100,421},{125,77,1514}, +{38,100,421},{73,126,441},{73,126,441},{73,126,441},{72,102,422},{65,127,340},{65,107,2},{65,107,2},{64,91,38},{47,105,339},{52,93,1},{80,118,1},{80,118,1},{80,118,1},{79,99,1},{127,2,338},{67,105,0},{67,105,0},{54,92,1},{127,66,338},{54,92,1},{127,43,421},{79,120,1},{85,107,1},{63,107,0},{127,43,421},{127,86,421},{63,107,0},{0,100,421},{127,86,421},{0,100,421},{72,0,421}, +{72,0,421},{72,0,421},{72,0,421},{65,109,0},{65,109,0},{65,109,0},{65,86,1},{52,93,0},{52,93,0},{82,127,1470},{77,127,462},{78,114,650},{76,109,446},{77,127,1917},{69,120,437},{69,111,2},{66,102,650},{44,123,1515},{52,103,451},{88,127,562},{83,124,2},{85,112,38},{82,109,74},{127,36,1514},{65,122,421},{69,111,2},{42,104,421},{127,82,1514},{42,104,421},{77,127,461},{77,127,461},{77,127,461}, +{76,106,422},{71,127,356},{69,111,2},{69,111,2},{68,95,38},{51,109,339},{56,97,1},{84,122,1},{84,122,1},{84,122,1},{83,103,1},{127,14,338},{71,109,0},{71,109,0},{58,96,1},{127,72,338},{58,96,1},{127,56,421},{83,124,1},{89,111,1},{67,111,0},{127,56,421},{127,92,421},{67,111,0},{0,104,421},{127,92,421},{0,104,421},{76,0,421},{76,0,421},{76,0,421},{76,0,421},{69,113,0}, +{69,113,0},{69,113,0},{69,90,1},{56,97,0},{56,97,0},{85,127,1634},{82,127,530},{82,118,650},{80,113,446},{82,127,2030},{73,124,437},{73,115,2},{70,106,650},{48,127,1515},{56,107,451},{92,127,646},{87,127,6},{89,116,38},{86,113,74},{127,48,1514},{69,126,421},{73,115,2},{46,108,421},{127,88,1514},{46,108,421},{82,127,494},{82,127,494},{82,127,494},{80,110,422},{75,127,386},{73,115,2},{73,115,2}, +{72,99,38},{55,113,339},{60,101,1},{88,126,1},{88,126,1},{88,126,1},{87,107,1},{127,27,338},{75,113,0},{75,113,0},{62,100,1},{126,78,338},{62,100,1},{127,67,421},{89,126,5},{93,115,1},{71,115,0},{127,67,421},{127,98,421},{71,115,0},{0,108,421},{127,98,421},{0,108,421},{80,0,421},{80,0,421},{80,0,421},{80,0,421},{73,117,0},{73,117,0},{73,117,0},{73,94,1},{60,101,0}, +{60,101,0},{91,127,1838},{87,127,682},{86,122,654},{85,118,446},{88,127,2198},{78,127,450},{78,119,4},{74,110,654},{57,127,1557},{60,111,453},{97,127,741},{93,127,57},{93,120,41},{90,117,77},{127,62,1514},{76,127,446},{78,119,3},{51,112,421},{127,95,1514},{51,112,421},{86,127,554},{86,127,554},{86,127,554},{85,115,421},{80,127,435},{78,119,3},{78,119,3},{76,103,41},{59,118,339},{64,105,1},{93,126,8}, +{93,126,8},{93,126,8},{92,111,0},{127,40,338},{80,117,0},{80,117,0},{65,105,0},{126,85,338},{65,105,0},{127,81,421},{96,127,29},{98,119,1},{76,119,1},{127,81,421},{126,105,421},{76,119,1},{0,112,421},{126,105,421},{0,112,421},{85,0,421},{85,0,421},{85,0,421},{85,0,421},{78,120,1},{78,120,1},{78,120,1},{77,99,1},{64,105,1},{64,105,1},{97,127,2070},{92,127,882},{90,126,654}, +{89,122,446},{91,127,2382},{84,127,546},{82,123,4},{78,114,654},{64,127,1658},{62,116,453},{103,127,837},{99,127,161},{97,124,41},{94,121,77},{127,73,1514},{84,127,521},{82,123,3},{55,116,421},{127,101,1514},{55,116,421},{91,127,605},{91,127,605},{91,127,605},{89,119,421},{85,127,485},{82,123,3},{82,123,3},{80,107,41},{63,122,339},{68,109,1},{97,127,20},{97,127,20},{97,127,20},{96,115,0},{127,53,338}, +{84,121,0},{84,121,0},{69,109,0},{126,91,338},{69,109,0},{127,93,421},{102,127,85},{102,123,1},{80,123,1},{127,93,421},{126,111,421},{80,123,1},{0,116,421},{126,111,421},{0,116,421},{89,0,421},{89,0,421},{89,0,421},{89,0,421},{82,124,1},{82,124,1},{82,124,1},{81,103,1},{68,109,1},{68,109,1},{100,127,2201},{96,127,1101},{95,127,686},{93,126,441},{97,127,2469},{87,127,689},{86,127,3}, +{84,117,617},{73,127,1735},{67,120,421},{109,127,916},{104,127,298},{102,127,52},{98,125,68},{127,86,1459},{93,127,584},{86,127,2},{59,120,392},{127,107,1459},{59,120,392},{95,127,686},{95,127,686},{95,127,686},{93,123,421},{91,127,557},{86,127,3},{86,127,3},{84,111,41},{66,126,338},{72,113,1},{102,126,52},{102,126,52},{102,126,52},{100,119,0},{127,64,338},{88,125,0},{88,125,0},{73,113,0},{126,97,338}, +{73,113,0},{127,105,392},{110,127,146},{106,127,0},{84,127,0},{127,105,392},{126,117,392},{84,127,0},{0,120,392},{126,117,392},{0,120,392},{93,0,421},{93,0,421},{93,0,421},{93,0,421},{86,127,2},{86,127,2},{86,127,2},{85,107,1},{72,113,1},{72,113,1},{103,127,1901},{101,127,1079},{100,127,761},{97,127,421},{100,127,2093},{93,127,537},{91,127,29},{87,120,357},{81,127,1425},{72,122,213},{111,127,638}, +{108,127,244},{106,127,80},{103,126,17},{127,93,1064},{99,127,392},{93,127,4},{67,122,200},{126,111,1064},{67,122,200},{100,127,761},{100,127,761},{100,127,761},{97,127,421},{95,127,659},{91,127,29},{91,127,29},{88,115,41},{73,127,350},{76,117,1},{106,127,80},{106,127,80},{106,127,80},{104,123,0},{127,76,338},{93,127,4},{93,127,4},{77,117,0},{126,103,338},{77,117,0},{127,111,200},{116,127,74},{112,127,0}, +{96,127,0},{127,111,200},{126,120,200},{96,127,0},{0,122,200},{126,120,200},{0,122,200},{97,0,421},{97,0,421},{97,0,421},{97,0,421},{91,127,13},{91,127,13},{91,127,13},{89,111,1},{76,117,1},{76,117,1},{109,127,1646},{106,127,1086},{104,127,881},{102,127,450},{106,127,1730},{99,127,474},{96,127,122},{93,123,158},{87,127,1166},{79,124,62},{114,127,446},{111,127,225},{111,127,125},{108,127,2},{127,102,722}, +{105,127,254},{102,127,34},{76,124,61},{127,115,722},{76,124,61},{104,127,881},{104,127,881},{104,127,881},{102,127,450},{100,127,770},{96,127,122},{96,127,122},{93,120,38},{81,127,426},{81,121,2},{111,127,125},{111,127,125},{111,127,125},{108,127,2},{127,90,338},{102,127,34},{102,127,34},{83,121,1},{127,109,338},{83,121,1},{127,118,61},{119,127,25},{119,127,0},{110,127,0},{127,118,61},{126,123,61},{110,127,0}, +{0,124,61},{126,123,61},{0,124,61},{101,0,421},{101,0,421},{101,0,421},{101,0,421},{95,127,40},{95,127,40},{95,127,40},{94,115,1},{81,121,1},{81,121,1},{111,127,1450},{111,127,1109},{109,127,974},{105,127,542},{111,127,1505},{102,127,514},{102,127,258},{98,125,66},{96,127,1026},{84,126,6},{120,127,342},{117,127,230},{117,127,181},{113,127,37},{127,110,509},{111,127,213},{108,127,90},{85,126,5},{126,119,509}, +{85,126,5},{109,127,974},{109,127,974},{109,127,974},{105,127,542},{106,127,882},{102,127,258},{102,127,258},{97,124,38},{90,127,530},{85,125,2},{117,127,181},{117,127,181},{117,127,181},{113,127,37},{127,102,338},{108,127,90},{108,127,90},{87,125,1},{127,115,338},{87,125,1},{127,124,5},{125,127,1},{125,127,0},{122,127,0},{127,124,5},{126,126,5},{122,127,0},{0,126,5},{126,126,5},{0,126,5},{105,0,421}, +{105,0,421},{105,0,421},{105,0,421},{100,127,65},{100,127,65},{100,127,65},{98,119,1},{85,125,1},{85,125,1},{114,127,1179},{114,127,971},{112,127,881},{111,127,545},{114,127,1170},{108,127,457},{107,127,305},{102,126,17},{102,127,793},{90,127,9},{121,127,209},{120,127,155},{120,127,130},{118,127,45},{127,116,294},{116,127,134},{113,127,85},{95,127,0},{126,122,294},{95,127,0},{112,127,881},{112,127,881},{112,127,881}, +{111,127,545},{111,127,765},{107,127,305},{107,127,305},{102,126,13},{96,127,465},{90,127,9},{120,127,130},{120,127,130},{120,127,130},{118,127,45},{127,110,221},{113,127,85},{113,127,85},{95,127,0},{126,119,221},{95,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{109,0,421},{109,0,421},{109,0,421},{109,0,421},{106,127,113}, +{106,127,113},{106,127,113},{102,123,1},{90,127,9},{90,127,9},{117,127,871},{117,127,751},{115,127,701},{113,127,502},{117,127,822},{111,127,364},{111,127,264},{106,127,1},{105,127,537},{99,127,41},{123,127,75},{123,127,59},{123,127,50},{121,127,17},{127,121,113},{119,127,54},{119,127,29},{107,127,0},{127,124,113},{107,127,0},{115,127,701},{115,127,701},{115,127,701},{113,127,502},{114,127,561},{111,127,264},{111,127,264}, +{106,127,1},{102,127,329},{99,127,41},{123,127,50},{123,127,50},{123,127,50},{121,127,17},{127,116,85},{119,127,29},{119,127,29},{107,127,0},{126,122,85},{107,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{113,0,421},{113,0,421},{113,0,421},{113,0,421},{109,127,157},{109,127,157},{109,127,157},{106,127,1},{99,127,41}, +{99,127,41},{0,59,882},{0,40,100},{0,30,4},{0,24,340},{0,39,1896},{0,25,1188},{0,23,590},{0,15,1444},{0,19,2040},{0,15,1544},{0,59,882},{0,40,100},{0,30,4},{0,24,340},{19,0,1896},{0,25,1188},{0,23,590},{0,15,1444},{39,0,1896},{0,15,1444},{0,27,0},{0,27,0},{0,27,0},{0,13,1},{0,13,162},{0,11,58},{0,11,58},{0,7,97},{0,5,179},{0,6,116},{0,27,0}, +{0,27,0},{0,27,0},{0,13,1},{7,0,162},{0,11,58},{0,11,58},{0,7,97},{13,0,162},{0,7,97},{29,0,882},{0,40,100},{0,30,4},{0,24,340},{29,0,882},{59,0,882},{0,24,340},{0,19,884},{59,0,882},{0,19,884},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,71,882},{0,49,40},{1,34,12}, +{0,30,260},{0,47,2355},{0,31,1332},{0,28,612},{0,18,1700},{0,22,2556},{0,18,1844},{0,71,882},{0,49,40},{1,34,8},{0,30,260},{23,0,2355},{0,31,1332},{0,28,612},{0,18,1700},{47,0,2355},{0,18,1700},{0,39,0},{0,39,0},{0,39,0},{0,19,0},{0,20,338},{0,14,130},{0,14,130},{0,10,205},{0,8,371},{0,9,244},{0,39,0},{0,39,0},{0,39,0},{0,19,0},{10,0,338}, +{0,14,130},{0,14,130},{0,10,205},{20,0,338},{0,10,205},{35,0,882},{0,49,40},{3,34,0},{0,30,260},{35,0,882},{71,0,882},{0,30,260},{0,23,884},{71,0,882},{0,23,884},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,83,882},{0,55,8},{1,40,60},{0,33,200},{0,56,2899},{0,37,1508},{0,31,660}, +{0,21,2004},{0,25,3176},{0,21,2200},{0,83,882},{0,55,8},{2,38,52},{0,33,200},{27,1,2899},{0,37,1508},{0,31,660},{0,21,2004},{56,0,2899},{0,21,2004},{0,52,0},{0,52,0},{0,52,0},{0,25,0},{0,26,578},{0,19,208},{0,19,208},{0,13,353},{0,11,635},{0,10,414},{0,52,0},{0,52,0},{0,52,0},{0,25,0},{13,0,578},{0,19,208},{0,19,208},{0,13,353},{26,0,578}, +{0,13,353},{41,0,882},{0,55,8},{7,38,0},{0,33,200},{41,0,882},{83,0,882},{0,33,200},{0,27,884},{83,0,882},{0,27,884},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{2,91,902},{1,62,20},{3,44,133},{0,39,172},{0,66,3048},{0,43,1416},{0,37,488},{0,27,1944},{0,31,3460},{0,26,2254},{3,89,882}, +{3,60,2},{5,44,68},{2,38,168},{33,0,3048},{0,43,1416},{0,37,488},{0,27,1944},{66,0,3048},{0,27,1944},{2,60,20},{2,60,20},{2,60,20},{2,30,20},{0,36,648},{0,28,160},{0,28,160},{0,15,340},{0,17,771},{0,15,440},{3,58,0},{3,58,0},{3,58,0},{3,30,0},{18,0,648},{0,28,160},{0,28,160},{0,15,340},{36,0,648},{0,15,340},{47,0,882},{1,62,0},{11,42,0}, +{0,39,136},{47,0,882},{95,0,882},{0,39,136},{0,31,884},{95,0,882},{0,31,884},{1,0,20},{1,0,20},{1,0,20},{1,0,20},{0,9,0},{0,9,0},{0,9,0},{0,4,1},{0,3,8},{0,3,8},{4,101,995},{4,67,114},{6,48,283},{3,44,242},{0,80,3048},{0,51,1224},{0,43,267},{0,32,1794},{0,37,3651},{0,30,2195},{8,93,883},{7,65,2},{9,48,67},{6,42,171},{39,1,3048}, +{0,51,1224},{0,43,267},{0,32,1794},{80,0,3048},{0,32,1794},{4,69,113},{4,69,113},{4,69,113},{4,36,113},{0,50,648},{0,34,74},{0,34,74},{0,21,250},{0,22,862},{0,19,434},{8,61,1},{8,61,1},{8,61,1},{8,34,1},{24,1,648},{0,34,74},{0,34,74},{0,21,250},{50,0,648},{0,21,250},{53,1,882},{6,65,1},{15,47,1},{0,44,89},{53,1,882},{109,0,882},{0,44,89}, +{0,36,882},{109,0,882},{0,36,882},{4,0,113},{4,0,113},{4,0,113},{4,0,113},{0,23,0},{0,23,0},{0,23,0},{0,11,0},{0,8,41},{0,8,41},{6,109,1147},{6,72,274},{8,53,499},{5,47,367},{0,92,3048},{0,57,1096},{0,47,129},{0,36,1635},{0,43,3859},{0,34,2183},{12,97,883},{11,69,2},{13,52,67},{10,46,171},{45,1,3048},{0,57,1096},{0,47,129},{0,36,1635},{92,0,3048}, +{0,36,1635},{6,77,265},{6,77,265},{6,77,265},{6,41,265},{0,62,648},{0,43,26},{0,43,26},{0,26,193},{0,28,990},{0,23,458},{12,65,1},{12,65,1},{12,65,1},{12,38,1},{30,1,648},{0,43,26},{0,43,26},{0,26,193},{62,0,648},{0,26,193},{59,1,882},{10,69,1},{19,51,1},{0,49,58},{59,1,882},{121,0,882},{0,49,58},{0,40,882},{121,0,882},{0,40,882},{6,0,265}, +{6,0,265},{6,0,265},{6,0,265},{0,35,0},{0,35,0},{0,35,0},{0,17,0},{0,14,97},{0,14,97},{8,117,1363},{8,77,506},{10,58,795},{7,52,559},{0,104,3048},{0,63,1000},{0,53,41},{0,39,1523},{0,46,4095},{0,38,2195},{16,101,883},{15,73,2},{17,56,67},{14,50,171},{51,0,3048},{0,63,1000},{0,53,41},{0,39,1523},{104,0,3048},{0,39,1523},{8,85,481},{8,85,481},{8,85,481}, +{8,46,481},{0,74,648},{0,49,2},{0,49,2},{0,30,130},{0,31,1146},{0,28,494},{16,69,1},{16,69,1},{16,69,1},{16,42,1},{36,1,648},{0,49,2},{0,49,2},{0,30,130},{74,0,648},{0,30,130},{65,0,882},{14,73,1},{23,55,1},{0,53,25},{65,0,882},{127,3,882},{0,53,25},{0,44,882},{127,3,882},{0,44,882},{8,0,481},{8,0,481},{8,0,481},{8,0,481},{0,47,0}, +{0,47,0},{0,47,0},{0,23,0},{0,19,169},{0,19,169},{10,125,1643},{11,83,802},{13,61,1159},{9,56,826},{0,117,3048},{0,70,933},{0,58,11},{0,45,1395},{0,54,4321},{0,43,2227},{20,105,883},{19,77,2},{21,60,67},{18,54,171},{57,0,3048},{0,70,933},{0,58,11},{0,45,1395},{117,0,3048},{0,45,1395},{10,93,761},{10,93,761},{10,93,761},{10,51,761},{0,86,648},{0,56,4},{0,56,4}, +{0,35,89},{0,37,1322},{0,31,578},{20,73,1},{20,73,1},{20,73,1},{20,46,1},{42,1,648},{3,54,0},{3,54,0},{0,35,89},{86,0,648},{0,35,89},{71,0,882},{18,77,1},{27,59,1},{0,58,10},{71,0,882},{127,9,882},{0,58,10},{0,48,882},{127,9,882},{0,48,882},{10,0,761},{10,0,761},{10,0,761},{10,0,761},{0,59,0},{0,59,0},{0,59,0},{0,29,0},{0,22,277}, +{0,22,277},{14,126,1784},{14,89,929},{17,65,1329},{13,60,945},{3,124,3048},{2,75,910},{3,62,14},{0,50,1329},{0,60,4212},{0,47,1974},{24,110,882},{24,80,5},{27,64,70},{23,59,168},{63,2,3048},{0,77,893},{4,63,10},{0,50,1293},{126,2,3048},{0,50,1293},{14,99,884},{14,99,884},{14,99,884},{13,56,884},{3,94,648},{3,62,10},{3,62,10},{1,40,68},{0,46,1256},{0,37,428},{24,78,0}, +{24,78,0},{24,78,0},{24,51,0},{49,0,648},{7,59,0},{7,59,0},{0,39,52},{100,0,648},{0,39,52},{78,0,882},{23,81,1},{32,63,0},{0,63,1},{78,0,882},{126,16,882},{0,63,1},{0,52,884},{126,16,882},{0,52,884},{13,0,884},{13,0,884},{13,0,884},{13,0,884},{3,66,0},{3,66,0},{3,66,0},{3,34,0},{0,31,232},{0,31,232},{19,126,1814},{18,93,929},{21,69,1329}, +{17,65,942},{7,127,3051},{6,79,910},{7,66,14},{4,54,1329},{0,67,3975},{0,52,1640},{28,114,882},{28,84,5},{30,67,66},{27,63,168},{69,1,3048},{1,83,882},{9,66,10},{0,53,1205},{126,8,3048},{0,53,1205},{18,103,884},{18,103,884},{18,103,884},{17,60,884},{7,98,648},{8,65,9},{8,65,9},{5,44,68},{0,51,1059},{0,43,236},{28,82,0},{28,82,0},{28,82,0},{28,55,0},{55,0,648}, +{11,63,0},{11,63,0},{0,45,20},{112,0,648},{0,45,20},{83,1,882},{27,85,1},{36,66,1},{4,66,1},{83,1,882},{126,22,882},{4,66,1},{0,56,884},{126,22,882},{0,56,884},{17,0,884},{17,0,884},{17,0,884},{17,0,884},{7,70,0},{7,70,0},{7,70,0},{7,38,0},{0,37,136},{0,37,136},{23,127,1854},{22,97,929},{25,73,1329},{21,69,942},{12,127,3064},{10,83,910},{11,70,14}, +{8,58,1329},{0,73,3751},{0,57,1374},{32,118,882},{32,88,5},{34,71,66},{30,67,165},{75,1,3048},{5,87,882},{13,70,10},{0,59,1125},{126,14,3048},{0,59,1125},{22,107,884},{22,107,884},{22,107,884},{21,64,884},{11,102,648},{12,69,9},{12,69,9},{9,48,68},{0,57,923},{0,49,108},{32,86,0},{32,86,0},{32,86,0},{32,59,0},{61,0,648},{16,65,1},{16,65,1},{0,49,8},{124,0,648}, +{0,49,8},{89,1,882},{31,89,1},{40,70,1},{8,70,1},{89,1,882},{126,28,882},{8,70,1},{0,60,884},{126,28,882},{0,60,884},{21,0,884},{21,0,884},{21,0,884},{21,0,884},{11,74,0},{11,74,0},{11,74,0},{11,42,0},{0,46,68},{0,46,68},{29,127,1934},{26,101,929},{29,77,1329},{25,73,942},{17,127,3091},{14,87,910},{15,74,14},{12,62,1329},{0,79,3559},{0,62,1174},{36,122,882}, +{36,92,5},{38,75,66},{34,71,165},{81,0,3048},{9,91,882},{17,74,10},{0,62,1053},{126,20,3048},{0,62,1053},{26,111,884},{26,111,884},{26,111,884},{26,67,884},{15,106,648},{16,73,9},{16,73,9},{13,52,68},{0,64,810},{0,54,26},{36,90,0},{36,90,0},{36,90,0},{36,63,0},{66,1,648},{20,69,1},{20,69,1},{0,54,1},{126,5,648},{0,54,1},{95,1,882},{35,93,1},{44,74,1}, +{12,74,1},{95,1,882},{126,34,882},{12,74,1},{0,64,882},{126,34,882},{0,64,882},{25,0,884},{25,0,884},{25,0,884},{25,0,884},{15,78,0},{15,78,0},{15,78,0},{15,46,0},{0,51,18},{0,51,18},{33,127,2036},{31,106,931},{33,81,1331},{29,77,942},{23,127,3145},{19,92,910},{19,79,14},{17,65,1329},{0,87,3375},{0,66,1021},{41,126,883},{40,98,2},{43,80,65},{39,76,166},{88,0,3048}, +{14,95,882},{21,78,9},{0,66,996},{127,26,3048},{0,66,996},{30,116,882},{30,116,882},{30,116,882},{30,72,882},{20,109,649},{20,77,10},{20,77,10},{18,57,67},{0,70,729},{1,59,3},{41,94,1},{41,94,1},{41,94,1},{40,67,1},{73,0,648},{24,74,1},{24,74,1},{4,58,1},{126,12,648},{4,58,1},{102,0,882},{39,98,1},{48,79,0},{15,79,0},{102,0,882},{126,41,882},{15,79,0}, +{0,68,884},{126,41,882},{0,68,884},{30,0,882},{30,0,882},{30,0,882},{30,0,882},{20,82,1},{20,82,1},{20,82,1},{19,51,1},{0,60,0},{0,60,0},{38,127,2134},{35,110,931},{37,85,1331},{33,81,942},{29,127,3217},{23,96,910},{23,83,14},{21,69,1329},{0,93,3247},{0,71,949},{45,127,891},{44,102,2},{47,84,65},{43,80,166},{94,0,3048},{18,99,882},{25,82,9},{0,72,948},{127,32,3048}, +{0,72,948},{34,120,882},{34,120,882},{34,120,882},{34,76,882},{24,113,649},{24,81,10},{24,81,10},{22,61,67},{0,79,673},{5,63,3},{45,98,1},{45,98,1},{45,98,1},{44,71,1},{79,0,648},{28,78,1},{28,78,1},{8,62,1},{127,17,648},{8,62,1},{108,0,882},{43,102,1},{52,83,0},{19,83,0},{108,0,882},{125,47,882},{19,83,0},{0,72,884},{125,47,882},{0,72,884},{34,0,882}, +{34,0,882},{34,0,882},{34,0,882},{24,86,1},{24,86,1},{24,86,1},{23,55,1},{3,64,1},{3,64,1},{42,127,2284},{39,114,931},{41,89,1331},{37,85,942},{33,127,3316},{27,100,910},{27,87,14},{25,73,1329},{0,99,3151},{1,76,929},{50,127,909},{48,106,2},{51,88,65},{47,84,166},{100,0,3048},{22,103,882},{29,86,9},{0,75,916},{127,38,3048},{0,75,916},{38,124,882},{38,124,882},{38,124,882}, +{38,80,882},{28,117,649},{28,85,10},{28,85,10},{27,64,66},{0,85,649},{8,66,5},{49,102,1},{49,102,1},{49,102,1},{48,75,1},{85,0,648},{32,82,1},{32,82,1},{11,66,0},{127,23,648},{11,66,0},{114,0,882},{47,106,1},{56,87,0},{23,87,0},{114,0,882},{127,52,882},{23,87,0},{0,76,884},{127,52,882},{0,76,884},{38,0,882},{38,0,882},{38,0,882},{38,0,882},{28,90,1}, +{28,90,1},{28,90,1},{27,59,1},{8,67,1},{8,67,1},{47,127,2414},{43,118,931},{45,93,1331},{41,89,942},{38,127,3409},{31,104,910},{31,91,14},{29,77,1329},{0,105,3087},{5,80,929},{55,127,939},{52,110,2},{55,92,65},{51,88,166},{106,0,3048},{26,107,882},{33,90,9},{0,80,893},{127,44,3048},{0,80,893},{42,127,883},{42,127,883},{42,127,883},{42,84,882},{32,121,649},{32,89,10},{32,89,10}, +{31,68,66},{4,89,649},{12,70,5},{53,106,1},{53,106,1},{53,106,1},{52,79,1},{91,0,648},{36,86,1},{36,86,1},{15,70,0},{127,29,648},{15,70,0},{120,0,882},{51,110,1},{60,91,0},{27,91,0},{120,0,882},{127,58,882},{27,91,0},{0,80,884},{127,58,882},{0,80,884},{42,0,882},{42,0,882},{42,0,882},{42,0,882},{32,94,1},{32,94,1},{32,94,1},{31,63,1},{12,71,1}, +{12,71,1},{52,127,2584},{47,122,929},{50,98,1329},{46,94,942},{44,127,3547},{35,108,910},{36,95,14},{34,82,1331},{0,112,3055},{9,84,931},{61,127,996},{57,113,5},{59,96,66},{55,92,165},{112,1,3048},{30,112,882},{38,95,10},{0,85,883},{126,51,3048},{0,85,883},{47,127,893},{47,127,893},{47,127,893},{47,88,884},{36,127,648},{37,94,9},{37,94,9},{35,72,65},{9,93,649},{17,75,2},{57,111,0}, +{57,111,0},{57,111,0},{57,83,0},{98,0,648},{41,90,1},{41,90,1},{21,74,1},{127,36,648},{21,74,1},{127,0,882},{56,114,1},{64,96,1},{33,95,1},{127,0,882},{126,65,882},{33,95,1},{0,85,882},{126,65,882},{0,85,882},{46,0,884},{46,0,884},{46,0,884},{46,0,884},{36,99,0},{36,99,0},{36,99,0},{36,67,0},{17,75,1},{17,75,1},{58,127,2792},{51,126,929},{54,102,1329}, +{50,98,942},{49,127,3672},{39,112,910},{40,99,14},{38,86,1331},{3,117,3049},{13,88,931},{65,127,1061},{61,117,5},{63,100,66},{59,96,165},{118,1,3048},{34,116,882},{42,99,10},{2,89,882},{126,57,3048},{2,89,882},{52,127,916},{52,127,916},{52,127,916},{51,92,884},{41,127,654},{41,98,9},{41,98,9},{39,76,65},{13,97,649},{21,79,2},{61,115,0},{61,115,0},{61,115,0},{61,87,0},{104,0,648}, +{45,94,1},{45,94,1},{25,78,1},{127,42,648},{25,78,1},{127,11,882},{60,118,1},{68,100,1},{37,99,1},{127,11,882},{126,71,882},{37,99,1},{0,89,882},{126,71,882},{0,89,882},{50,0,884},{50,0,884},{50,0,884},{50,0,884},{40,103,0},{40,103,0},{40,103,0},{40,71,0},{21,79,1},{21,79,1},{61,127,2984},{55,127,949},{58,106,1329},{54,102,942},{55,127,3832},{43,116,910},{44,103,14}, +{42,90,1331},{7,121,3049},{17,92,931},{71,127,1149},{64,122,3},{66,105,67},{63,100,165},{124,1,3048},{38,120,882},{46,103,10},{6,93,882},{126,63,3048},{6,93,882},{55,127,948},{55,127,948},{55,127,948},{55,96,884},{46,126,682},{45,102,9},{45,102,9},{43,80,65},{17,101,649},{25,83,2},{65,118,1},{65,118,1},{65,118,1},{65,91,1},{110,0,648},{49,98,1},{49,98,1},{29,82,1},{126,48,648}, +{29,82,1},{127,24,882},{63,123,1},{72,104,1},{41,103,1},{127,24,882},{125,77,882},{41,103,1},{0,93,882},{125,77,882},{0,93,882},{54,0,884},{54,0,884},{54,0,884},{54,0,884},{44,107,0},{44,107,0},{44,107,0},{44,75,0},{25,83,1},{25,83,1},{65,127,3214},{61,127,1021},{62,110,1329},{58,106,942},{61,127,4024},{47,120,910},{48,107,14},{46,94,1331},{11,125,3049},{21,96,931},{74,127,1245}, +{68,126,3},{70,109,67},{67,103,171},{127,7,3048},{42,124,882},{50,107,10},{10,97,882},{127,68,3048},{10,97,882},{61,127,996},{61,127,996},{61,127,996},{59,100,884},{50,127,714},{49,106,9},{49,106,9},{47,84,65},{21,105,649},{29,87,2},{69,122,1},{69,122,1},{69,122,1},{69,95,1},{115,1,648},{53,102,1},{53,102,1},{33,86,1},{126,54,648},{33,86,1},{127,36,882},{67,127,0},{76,108,1}, +{45,107,1},{127,36,882},{127,82,882},{45,107,1},{0,97,882},{127,82,882},{0,97,882},{58,0,884},{58,0,884},{58,0,884},{58,0,884},{48,111,0},{48,111,0},{48,111,0},{48,79,0},{29,87,1},{29,87,1},{71,127,3494},{65,127,1174},{66,114,1320},{62,110,942},{65,127,4231},{52,125,910},{52,112,14},{50,98,1329},{19,127,3067},{26,101,929},{79,127,1364},{73,127,26},{75,114,68},{72,108,168},{127,21,3048}, +{48,127,885},{54,111,9},{16,101,884},{127,75,3048},{16,101,884},{65,127,1053},{65,127,1053},{65,127,1053},{63,105,882},{56,127,769},{53,110,10},{53,110,10},{52,89,66},{25,110,649},{33,91,5},{73,126,1},{73,126,1},{73,126,1},{73,100,0},{122,0,648},{57,107,1},{57,107,1},{36,91,0},{126,61,648},{36,91,0},{127,50,882},{76,127,18},{81,112,0},{48,112,0},{127,50,882},{127,89,882},{48,112,0}, +{0,101,884},{127,89,882},{0,101,884},{63,0,882},{63,0,882},{63,0,882},{63,0,882},{53,115,1},{53,115,1},{53,115,1},{53,83,1},{33,92,1},{33,92,1},{74,127,3782},{70,127,1374},{70,118,1320},{66,113,945},{71,127,4455},{57,127,926},{56,116,14},{54,102,1329},{25,127,3139},{30,105,929},{85,127,1476},{79,127,97},{79,118,68},{76,112,168},{127,33,3048},{57,127,925},{58,115,9},{20,105,884},{126,81,3048}, +{20,105,884},{68,127,1125},{68,127,1125},{68,127,1125},{66,109,884},{61,127,827},{57,114,10},{57,114,10},{56,93,66},{29,114,649},{37,95,5},{78,126,8},{78,126,8},{78,126,8},{77,104,0},{127,2,648},{61,111,1},{61,111,1},{40,95,0},{127,66,648},{40,95,0},{127,62,882},{81,127,68},{85,116,0},{52,116,0},{127,62,882},{127,95,882},{52,116,0},{0,105,884},{127,95,882},{0,105,884},{66,0,884}, +{66,0,884},{66,0,884},{66,0,884},{57,119,1},{57,119,1},{57,119,1},{57,87,1},{37,96,1},{37,96,1},{79,127,4024},{75,127,1640},{74,122,1320},{70,117,945},{74,127,4699},{63,127,1030},{60,120,14},{58,106,1329},{34,127,3259},{34,109,929},{91,127,1620},{84,127,236},{83,122,68},{80,116,168},{127,45,3048},{64,127,1003},{62,119,9},{24,109,884},{126,87,3048},{24,109,884},{74,127,1205},{74,127,1205},{74,127,1205}, +{70,113,884},{65,127,910},{61,118,10},{61,118,10},{60,97,66},{33,118,649},{41,99,5},{82,127,20},{82,127,20},{82,127,20},{81,108,0},{127,14,648},{64,116,0},{64,116,0},{44,99,0},{127,72,648},{44,99,0},{127,73,882},{90,127,136},{89,120,0},{56,120,0},{127,73,882},{127,101,882},{56,120,0},{0,109,884},{127,101,882},{0,109,884},{70,0,884},{70,0,884},{70,0,884},{70,0,884},{61,123,1}, +{61,123,1},{61,123,1},{61,91,1},{41,100,1},{41,100,1},{85,127,4328},{79,127,1925},{78,126,1320},{74,121,945},{79,127,4920},{67,127,1215},{64,123,14},{62,110,1329},{43,127,3435},{38,113,929},{94,127,1784},{90,127,428},{87,126,68},{84,120,168},{127,57,3048},{70,127,1131},{65,124,10},{28,113,884},{126,93,3048},{28,113,884},{77,127,1293},{77,127,1293},{77,127,1293},{74,117,884},{71,127,1006},{64,123,10},{64,123,10}, +{63,100,70},{37,122,649},{45,103,5},{87,126,52},{87,126,52},{87,126,52},{85,112,0},{127,27,648},{68,120,0},{68,120,0},{48,103,0},{126,78,648},{48,103,0},{127,86,882},{96,127,232},{93,124,0},{60,124,0},{127,86,882},{127,107,882},{60,124,0},{0,113,884},{127,107,882},{0,113,884},{74,0,884},{74,0,884},{74,0,884},{74,0,884},{64,126,1},{64,126,1},{64,126,1},{64,95,0},{45,104,1}, +{45,104,1},{88,127,4403},{84,127,2227},{82,127,1395},{78,125,922},{85,127,4875},{73,127,1326},{69,127,11},{65,115,1174},{54,127,3438},{44,116,802},{100,127,1746},{95,127,578},{92,127,89},{88,124,126},{127,69,2814},{79,127,1146},{71,127,4},{33,117,761},{126,99,2814},{33,117,761},{82,127,1395},{82,127,1395},{82,127,1395},{79,121,883},{77,127,1137},{69,127,11},{69,127,11},{67,106,67},{42,126,649},{50,108,2},{92,127,89}, +{92,127,89},{92,127,89},{90,116,1},{127,40,648},{73,124,0},{73,124,0},{54,107,1},{126,85,648},{54,107,1},{127,96,761},{105,127,277},{98,127,0},{67,127,0},{127,96,761},{127,112,761},{67,127,0},{0,117,761},{127,112,761},{0,117,761},{79,0,882},{79,0,882},{79,0,882},{79,0,882},{69,127,10},{69,127,10},{69,127,10},{68,100,1},{50,108,1},{50,108,1},{94,127,3955},{89,127,2195},{88,127,1523}, +{83,127,886},{91,127,4323},{79,127,1139},{74,127,41},{71,116,799},{60,127,2958},{49,118,506},{103,127,1386},{99,127,494},{97,127,130},{92,125,50},{127,76,2249},{87,127,870},{78,127,2},{42,119,481},{126,103,2249},{42,119,481},{88,127,1523},{88,127,1523},{88,127,1523},{83,125,883},{82,127,1251},{74,127,41},{74,127,41},{71,110,67},{48,127,670},{54,112,2},{97,127,130},{97,127,130},{97,127,130},{94,120,1},{127,53,648}, +{78,127,2},{78,127,2},{58,111,1},{126,91,648},{58,111,1},{127,102,481},{108,127,169},{104,127,0},{79,127,0},{127,102,481},{127,115,481},{79,127,0},{0,119,481},{127,115,481},{0,119,481},{83,0,882},{83,0,882},{83,0,882},{83,0,882},{74,127,25},{74,127,25},{74,127,25},{72,104,1},{54,112,1},{54,112,1},{97,127,3571},{93,127,2183},{91,127,1635},{87,127,891},{94,127,3827},{84,127,1035},{79,127,114}, +{74,119,499},{67,127,2577},{55,121,274},{106,127,1098},{102,127,458},{101,127,193},{97,126,9},{127,86,1769},{93,127,654},{84,127,26},{50,121,265},{127,107,1769},{50,121,265},{91,127,1635},{91,127,1635},{91,127,1635},{87,127,891},{85,127,1387},{79,127,114},{79,127,114},{75,114,67},{57,127,734},{58,116,2},{101,127,193},{101,127,193},{101,127,193},{98,124,1},{127,64,648},{84,127,26},{84,127,26},{62,115,1},{126,97,648}, +{62,115,1},{127,108,265},{113,127,97},{110,127,0},{92,127,0},{127,108,265},{127,118,265},{92,127,0},{0,121,265},{127,118,265},{0,121,265},{87,0,882},{87,0,882},{87,0,882},{87,0,882},{79,127,50},{79,127,50},{79,127,50},{76,108,1},{58,116,1},{58,116,1},{100,127,3267},{97,127,2195},{95,127,1794},{92,127,954},{97,127,3435},{87,127,975},{84,127,267},{79,121,283},{73,127,2281},{60,123,114},{111,127,838}, +{107,127,434},{106,127,250},{102,127,2},{127,93,1374},{99,127,502},{93,127,74},{58,123,113},{126,111,1374},{58,123,113},{95,127,1794},{95,127,1794},{95,127,1794},{92,127,954},{91,127,1539},{84,127,267},{84,127,267},{79,118,67},{64,127,840},{62,120,2},{106,127,250},{106,127,250},{106,127,250},{102,127,2},{127,76,648},{93,127,74},{93,127,74},{65,119,1},{126,103,648},{65,119,1},{127,115,113},{119,127,41},{116,127,0}, +{104,127,0},{127,115,113},{127,121,113},{104,127,0},{0,123,113},{127,121,113},{0,123,113},{91,0,882},{91,0,882},{91,0,882},{91,0,882},{83,127,89},{83,127,89},{83,127,89},{80,112,1},{62,120,1},{62,120,1},{103,127,3032},{101,127,2254},{100,127,1944},{96,127,1080},{103,127,3096},{93,127,1012},{90,127,488},{83,124,133},{81,127,2104},{65,126,20},{114,127,666},{111,127,425},{111,127,325},{108,127,52},{127,102,1032}, +{105,127,404},{99,127,160},{67,125,20},{127,115,1032},{67,125,20},{100,127,1944},{100,127,1944},{100,127,1944},{96,127,1080},{97,127,1736},{90,127,488},{90,127,488},{83,122,68},{73,127,1011},{67,124,2},{111,127,325},{111,127,325},{111,127,325},{108,127,52},{127,90,648},{99,127,160},{99,127,160},{69,124,0},{127,109,648},{69,124,0},{127,122,18},{124,127,8},{122,127,1},{118,127,0},{127,122,18},{126,125,18},{118,127,0}, +{0,125,20},{126,125,18},{0,125,20},{95,0,884},{95,0,884},{95,0,884},{95,0,884},{88,127,136},{88,127,136},{88,127,136},{85,116,0},{65,126,0},{65,126,0},{109,127,2756},{106,127,2200},{103,127,2004},{102,127,1188},{106,127,2760},{96,127,1060},{96,127,660},{87,126,60},{87,127,1900},{72,127,8},{117,127,534},{117,127,414},{114,127,353},{111,127,104},{127,110,771},{108,127,352},{108,127,208},{75,127,0},{126,119,771}, +{75,127,0},{103,127,2004},{103,127,2004},{103,127,2004},{102,127,1188},{100,127,1784},{96,127,660},{96,127,660},{89,125,52},{81,127,1112},{72,127,8},{114,127,353},{114,127,353},{114,127,353},{111,127,104},{127,101,578},{108,127,208},{108,127,208},{75,127,0},{127,114,578},{75,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{99,0,884}, +{99,0,884},{99,0,884},{99,0,884},{94,127,200},{94,127,200},{94,127,200},{89,120,0},{72,127,8},{72,127,8},{111,127,2214},{109,127,1844},{109,127,1700},{105,127,1096},{109,127,2180},{102,127,900},{99,127,612},{93,126,12},{93,127,1468},{78,127,40},{120,127,306},{118,127,244},{117,127,205},{116,127,65},{127,113,451},{113,127,211},{111,127,125},{87,127,0},{127,120,451},{87,127,0},{109,127,1700},{109,127,1700},{109,127,1700}, +{105,127,1096},{103,127,1460},{99,127,612},{99,127,612},{93,126,8},{87,127,872},{78,127,40},{117,127,205},{117,127,205},{117,127,205},{116,127,65},{127,107,338},{111,127,125},{111,127,125},{87,127,0},{127,117,338},{87,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{103,0,884},{103,0,884},{103,0,884},{103,0,884},{97,127,260}, +{97,127,260},{97,127,260},{93,124,0},{78,127,40},{78,127,40},{114,127,1798},{111,127,1521},{111,127,1421},{108,127,1028},{111,127,1675},{105,127,792},{104,127,590},{97,127,4},{96,127,1128},{87,127,100},{123,127,150},{121,127,116},{120,127,97},{119,127,29},{127,118,216},{119,127,99},{116,127,58},{99,127,0},{126,123,216},{99,127,0},{111,127,1421},{111,127,1421},{111,127,1421},{108,127,1028},{109,127,1188},{104,127,590},{104,127,590}, +{97,127,4},{93,127,696},{87,127,100},{120,127,97},{120,127,97},{120,127,97},{119,127,29},{127,113,162},{116,127,58},{116,127,58},{99,127,0},{127,120,162},{99,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{107,0,884},{107,0,884},{107,0,884},{107,0,884},{103,127,340},{103,127,340},{103,127,340},{97,127,4},{87,127,100}, +{87,127,100},{0,78,1568},{0,54,170},{0,40,4},{0,33,596},{0,53,3371},{0,34,2124},{0,31,1048},{0,21,2552},{0,25,3628},{0,21,2748},{0,78,1568},{0,54,170},{0,40,4},{0,33,596},{26,0,3371},{0,34,2124},{0,31,1048},{0,21,2552},{53,0,3371},{0,21,2552},{0,36,0},{0,36,0},{0,36,0},{0,18,0},{0,18,288},{0,14,106},{0,14,106},{0,9,180},{0,8,315},{0,7,206},{0,36,0}, +{0,36,0},{0,36,0},{0,18,0},{9,0,288},{0,14,106},{0,14,106},{0,9,180},{18,0,288},{0,9,180},{39,0,1568},{0,54,170},{0,40,4},{0,33,596},{39,0,1568},{78,0,1568},{0,33,596},{0,26,1568},{78,0,1568},{0,26,1568},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,91,1568},{0,63,90},{0,45,8}, +{0,39,500},{0,61,3968},{0,40,2316},{0,35,1078},{0,24,2880},{0,28,4304},{0,24,3136},{0,91,1568},{0,63,90},{1,44,8},{0,39,500},{30,0,3968},{0,40,2316},{0,35,1078},{0,24,2880},{61,0,3968},{0,24,2880},{0,48,0},{0,48,0},{0,48,0},{0,24,0},{0,24,512},{0,19,180},{0,19,180},{0,10,313},{0,11,563},{0,10,362},{0,48,0},{0,48,0},{0,48,0},{0,24,0},{12,0,512}, +{0,19,180},{0,19,180},{0,10,313},{24,0,512},{0,10,313},{45,0,1568},{0,63,90},{2,44,0},{0,39,500},{45,0,1568},{91,0,1568},{0,39,500},{0,30,1568},{91,0,1568},{0,30,1568},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,103,1568},{0,70,37},{1,49,48},{0,42,400},{0,68,4651},{0,46,2540},{0,40,1128}, +{0,27,3256},{0,31,5084},{0,27,3580},{0,103,1568},{0,70,37},{1,49,44},{0,42,400},{33,1,4651},{0,46,2540},{0,40,1128},{0,27,3256},{68,0,4651},{0,27,3256},{0,61,0},{0,61,0},{0,61,0},{0,30,0},{0,30,800},{0,25,292},{0,25,292},{0,13,485},{0,14,883},{0,13,566},{0,61,0},{0,61,0},{0,61,0},{0,30,0},{15,0,800},{0,25,292},{0,25,292},{0,13,485},{30,0,800}, +{0,13,485},{50,1,1568},{0,70,37},{6,48,0},{0,42,400},{50,1,1568},{103,0,1568},{0,42,400},{0,34,1568},{103,0,1568},{0,34,1568},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,115,1568},{0,76,5},{1,54,145},{0,47,325},{0,77,5419},{0,51,2755},{0,43,1208},{0,30,3680},{0,34,5968},{0,30,4080},{0,115,1568}, +{0,76,5},{3,53,121},{0,47,325},{38,0,5419},{0,51,2755},{0,43,1208},{0,30,3680},{77,0,5419},{0,30,3680},{0,72,0},{0,72,0},{0,72,0},{0,36,0},{0,36,1152},{0,28,424},{0,28,424},{0,18,720},{0,17,1275},{0,15,824},{0,72,0},{0,72,0},{0,72,0},{0,36,0},{18,0,1152},{0,28,424},{0,28,424},{0,18,720},{36,0,1152},{0,18,720},{56,1,1568},{0,76,5},{10,52,0}, +{0,47,325},{56,1,1568},{115,0,1568},{0,47,325},{0,38,1568},{115,0,1568},{0,38,1568},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{2,125,1609},{2,82,42},{5,58,254},{1,51,318},{0,91,5419},{0,57,2480},{0,49,835},{0,36,3427},{0,40,6191},{0,35,3991},{5,119,1569},{4,80,6},{8,58,122},{2,52,302},{45,0,5419}, +{0,57,2480},{0,49,835},{0,36,3427},{91,0,5419},{0,36,3427},{2,82,41},{2,82,41},{2,82,41},{2,41,42},{0,50,1152},{0,37,274},{0,37,274},{0,21,586},{0,22,1366},{0,20,782},{5,76,1},{5,76,1},{5,76,1},{5,40,1},{24,1,1152},{0,37,274},{0,37,274},{0,21,586},{50,0,1152},{0,21,586},{63,0,1568},{2,82,1},{15,56,1},{0,53,245},{63,0,1568},{127,1,1568},{0,53,245}, +{0,42,1570},{127,1,1568},{0,42,1570},{2,0,41},{2,0,41},{2,0,41},{2,0,41},{0,13,0},{0,13,0},{0,13,0},{0,7,0},{0,5,13},{0,5,13},{5,126,1735},{4,87,150},{7,63,426},{3,56,382},{0,103,5419},{0,64,2265},{0,54,557},{0,39,3243},{0,46,6415},{0,39,3919},{9,123,1569},{8,84,6},{12,62,122},{6,56,302},{50,1,5419},{0,64,2265},{0,54,557},{0,39,3243},{103,0,5419}, +{0,39,3243},{4,90,145},{4,90,145},{4,90,145},{4,46,146},{0,62,1152},{0,43,170},{0,43,170},{0,27,482},{0,28,1494},{0,24,770},{9,80,1},{9,80,1},{9,80,1},{9,44,1},{30,1,1152},{0,43,170},{0,43,170},{0,27,482},{62,0,1152},{0,27,482},{69,0,1568},{6,86,1},{19,60,1},{0,56,181},{69,0,1568},{127,7,1568},{0,56,181},{0,46,1570},{127,7,1568},{0,46,1570},{4,0,145}, +{4,0,145},{4,0,145},{4,0,145},{0,26,0},{0,26,0},{0,26,0},{0,13,0},{0,11,53},{0,11,53},{9,127,1991},{7,93,322},{9,66,678},{5,61,518},{0,115,5419},{0,70,2081},{0,60,341},{0,45,3035},{0,51,6641},{0,42,3891},{13,127,1569},{12,88,6},{15,66,122},{10,60,302},{56,1,5419},{0,70,2081},{0,60,341},{0,45,3035},{115,0,5419},{0,45,3035},{6,98,313},{6,98,313},{6,98,313}, +{6,51,314},{0,74,1152},{0,51,80},{0,51,80},{0,30,394},{0,31,1650},{0,28,782},{13,84,1},{13,84,1},{13,84,1},{13,48,1},{36,1,1152},{0,51,80},{0,51,80},{0,30,394},{74,0,1152},{0,30,394},{75,0,1568},{10,90,1},{23,64,0},{0,62,125},{75,0,1568},{127,13,1568},{0,62,125},{0,50,1570},{127,13,1568},{0,50,1570},{6,0,313},{6,0,313},{6,0,313},{6,0,313},{0,38,0}, +{0,38,0},{0,38,0},{0,18,1},{0,14,117},{0,14,117},{12,127,2387},{9,98,566},{11,71,1002},{7,64,721},{0,127,5419},{0,76,1929},{0,64,181},{0,50,2886},{0,57,6921},{0,47,3879},{17,127,1577},{16,92,6},{19,70,122},{13,64,309},{62,1,5419},{0,76,1929},{0,64,181},{0,50,2886},{127,0,5419},{0,50,2886},{8,106,545},{8,106,545},{8,106,545},{8,56,545},{0,86,1152},{0,57,32},{0,57,32}, +{0,36,306},{0,37,1826},{0,34,822},{17,88,1},{17,88,1},{17,88,1},{17,52,1},{42,1,1152},{0,57,32},{0,57,32},{0,36,306},{86,0,1152},{0,36,306},{80,1,1568},{14,94,1},{27,68,0},{0,66,80},{80,1,1568},{126,19,1568},{0,66,80},{0,54,1570},{126,19,1568},{0,54,1570},{8,0,545},{8,0,545},{8,0,545},{8,0,545},{0,50,0},{0,50,0},{0,50,0},{0,24,1},{0,19,193}, +{0,19,193},{15,127,3004},{11,106,925},{14,77,1461},{9,69,1030},{3,127,5520},{0,84,1769},{0,70,66},{0,53,2697},{0,63,7276},{0,51,3898},{23,127,1602},{21,97,5},{23,74,125},{19,67,306},{69,0,5419},{0,84,1769},{0,70,66},{0,53,2697},{127,7,5419},{0,53,2697},{11,114,884},{11,114,884},{11,114,884},{10,62,884},{0,100,1152},{0,65,5},{0,65,5},{0,42,232},{0,43,2064},{0,37,896},{21,93,0}, +{21,93,0},{21,93,0},{21,57,0},{49,0,1152},{0,65,5},{0,65,5},{0,42,232},{100,0,1152},{0,42,232},{87,0,1568},{19,98,1},{31,73,1},{0,71,45},{87,0,1568},{126,26,1568},{0,71,45},{0,59,1568},{126,26,1568},{0,59,1568},{10,0,884},{10,0,884},{10,0,884},{10,0,884},{0,64,0},{0,64,0},{0,64,0},{0,31,0},{0,25,320},{0,25,320},{17,127,3702},{13,111,1317},{17,80,1945}, +{11,74,1374},{6,127,5788},{0,90,1681},{0,75,18},{0,59,2537},{0,70,7631},{0,55,3952},{29,127,1650},{25,101,5},{27,78,125},{23,71,306},{75,0,5419},{0,90,1681},{0,75,18},{0,59,2537},{127,13,5419},{0,59,2537},{13,122,1252},{13,122,1252},{13,122,1252},{13,66,1252},{0,112,1152},{0,72,5},{0,72,5},{0,45,164},{0,51,2291},{0,43,992},{25,97,0},{25,97,0},{25,97,0},{25,61,0},{55,0,1152}, +{3,70,1},{3,70,1},{0,45,164},{112,0,1152},{0,45,164},{93,0,1568},{23,102,1},{35,77,1},{0,75,18},{93,0,1568},{126,32,1568},{0,75,18},{0,63,1568},{126,32,1568},{0,63,1568},{12,0,1252},{12,0,1252},{12,0,1252},{12,0,1252},{0,75,0},{0,75,0},{0,75,0},{0,37,0},{0,31,464},{0,31,464},{23,127,4370},{16,114,1661},{19,85,2353},{14,77,1674},{12,127,6128},{0,99,1621},{2,80,18}, +{0,62,2397},{0,73,7815},{0,60,3858},{32,127,1702},{29,105,5},{31,82,125},{27,75,306},{80,1,5419},{0,99,1617},{2,80,14},{0,62,2393},{126,19,5419},{0,62,2393},{15,127,1572},{15,127,1572},{15,127,1572},{15,71,1568},{1,122,1152},{1,79,17},{1,79,17},{0,50,121},{0,54,2403},{0,46,996},{29,101,0},{29,101,0},{29,101,0},{29,64,0},{61,0,1152},{7,74,1},{7,74,1},{0,50,117},{124,0,1152}, +{0,50,117},{99,0,1568},{27,106,1},{39,81,1},{0,80,5},{99,0,1568},{126,38,1568},{0,80,5},{0,66,1570},{126,38,1568},{0,66,1570},{15,0,1568},{15,0,1568},{15,0,1568},{15,0,1568},{1,86,0},{1,86,0},{1,86,0},{1,43,0},{0,34,544},{0,34,544},{26,127,4586},{20,118,1661},{23,89,2353},{18,81,1674},{17,127,6275},{3,102,1617},{6,84,18},{1,67,2355},{0,81,7401},{0,64,3371},{38,127,1766}, +{33,109,5},{35,86,125},{31,79,306},{86,1,5419},{0,105,1577},{6,84,14},{0,66,2259},{126,25,5419},{0,66,2259},{20,127,1585},{20,127,1585},{20,127,1585},{19,75,1568},{5,126,1152},{5,83,17},{5,83,17},{4,54,121},{0,60,2155},{0,51,682},{33,105,0},{33,105,0},{33,105,0},{33,68,0},{66,1,1152},{11,78,1},{11,78,1},{0,55,80},{126,5,1152},{0,55,80},{105,0,1568},{31,110,1},{43,85,1}, +{0,84,1},{105,0,1568},{126,44,1568},{0,84,1},{0,70,1570},{126,44,1568},{0,70,1570},{19,0,1568},{19,0,1568},{19,0,1568},{19,0,1568},{5,90,0},{5,90,0},{5,90,0},{5,47,0},{0,43,388},{0,43,388},{32,127,4866},{25,123,1659},{27,93,2355},{23,86,1670},{23,127,6489},{8,107,1615},{10,88,18},{5,71,2353},{0,87,7039},{0,70,2897},{44,127,1865},{37,113,6},{40,91,122},{36,84,305},{93,0,5419}, +{2,111,1569},{10,88,17},{0,72,2124},{126,32,5419},{0,72,2124},{24,127,1619},{24,127,1619},{24,127,1619},{24,79,1570},{10,127,1161},{10,88,14},{10,88,14},{8,58,122},{0,67,1905},{0,57,409},{38,109,1},{38,109,1},{38,109,1},{37,73,1},{73,0,1152},{15,83,1},{15,83,1},{0,59,41},{126,12,1152},{0,59,41},{112,0,1568},{35,115,1},{48,89,0},{4,89,0},{112,0,1568},{127,50,1568},{4,89,0}, +{0,75,1568},{127,50,1568},{0,75,1568},{23,0,1570},{23,0,1570},{23,0,1570},{23,0,1570},{10,93,1},{10,93,1},{10,93,1},{10,51,1},{0,51,232},{0,51,232},{35,127,5154},{29,127,1659},{31,97,2355},{27,90,1670},{26,127,6689},{12,111,1615},{14,92,18},{9,75,2353},{0,93,6751},{0,74,2541},{49,127,1955},{41,117,6},{44,95,122},{40,88,305},{99,0,5419},{6,115,1569},{14,92,17},{0,75,2020},{126,38,5419}, +{0,75,2020},{29,127,1650},{29,127,1650},{29,127,1650},{28,83,1570},{15,127,1179},{14,92,14},{14,92,14},{12,62,122},{0,76,1713},{0,63,225},{42,113,1},{42,113,1},{42,113,1},{41,77,1},{79,0,1152},{19,87,1},{19,87,1},{0,64,20},{127,17,1152},{0,64,20},{118,0,1568},{39,119,1},{52,93,0},{8,93,0},{118,0,1568},{127,56,1568},{8,93,0},{0,79,1568},{127,56,1568},{0,79,1568},{27,0,1570}, +{27,0,1570},{27,0,1570},{27,0,1570},{14,97,1},{14,97,1},{14,97,1},{14,55,1},{0,57,136},{0,57,136},{41,127,5426},{33,127,1711},{35,101,2355},{31,94,1670},{32,127,6905},{16,115,1615},{18,96,18},{13,79,2353},{0,102,6483},{0,79,2241},{52,127,2075},{45,121,6},{48,99,122},{44,92,305},{105,0,5419},{10,119,1569},{18,96,17},{0,80,1929},{126,44,5419},{0,80,1929},{33,127,1707},{33,127,1707},{33,127,1707}, +{32,87,1570},{20,127,1209},{18,96,14},{18,96,14},{15,66,125},{0,81,1526},{0,67,97},{46,117,1},{46,117,1},{46,117,1},{45,81,1},{85,0,1152},{23,91,1},{23,91,1},{0,69,4},{127,23,1152},{0,69,4},{124,0,1568},{43,123,1},{56,97,0},{12,97,0},{124,0,1568},{127,62,1568},{12,97,0},{0,83,1568},{127,62,1568},{0,83,1568},{31,0,1570},{31,0,1570},{31,0,1570},{31,0,1570},{18,101,1}, +{18,101,1},{18,101,1},{18,59,1},{0,64,65},{0,64,65},{44,127,5762},{38,127,1811},{39,105,2355},{35,98,1670},{38,127,7153},{20,119,1615},{22,100,18},{17,83,2353},{0,108,6243},{0,83,2009},{58,127,2195},{49,125,6},{52,103,122},{48,96,305},{111,0,5419},{14,123,1569},{22,100,17},{0,85,1856},{127,49,5419},{0,85,1856},{38,127,1762},{38,127,1762},{38,127,1762},{36,91,1570},{26,127,1265},{22,100,14},{22,100,14}, +{19,70,125},{0,87,1398},{0,73,25},{50,121,1},{50,121,1},{50,121,1},{49,85,1},{91,0,1152},{27,95,1},{27,95,1},{0,73,0},{127,29,1152},{0,73,0},{127,5,1568},{47,127,1},{60,101,0},{16,101,0},{127,5,1568},{126,68,1568},{16,101,0},{0,87,1568},{126,68,1568},{0,87,1568},{35,0,1570},{35,0,1570},{35,0,1570},{35,0,1570},{22,105,1},{22,105,1},{22,105,1},{22,63,1},{0,70,25}, +{0,70,25},{52,127,6088},{43,127,2009},{44,110,2353},{39,102,1674},{44,127,7451},{24,123,1617},{27,105,18},{22,88,2355},{0,113,5999},{0,89,1811},{65,127,2385},{54,127,25},{56,107,125},{52,100,306},{118,0,5419},{19,127,1569},{27,105,14},{0,89,1762},{127,56,5419},{0,89,1762},{42,127,1856},{42,127,1856},{42,127,1856},{40,96,1568},{30,127,1344},{26,104,17},{26,104,17},{24,75,122},{0,96,1281},{1,78,6},{54,126,0}, +{54,126,0},{54,126,0},{54,89,0},{98,0,1152},{32,99,1},{32,99,1},{6,77,1},{127,36,1152},{6,77,1},{127,19,1568},{54,127,25},{64,105,1},{21,105,1},{127,19,1568},{127,74,1568},{21,105,1},{0,91,1570},{127,74,1568},{0,91,1570},{40,0,1568},{40,0,1568},{40,0,1568},{40,0,1568},{26,111,0},{26,111,0},{26,111,0},{26,67,0},{0,79,1},{0,79,1},{55,127,6416},{48,127,2241},{48,114,2353}, +{43,106,1674},{49,127,7696},{28,127,1617},{31,109,18},{26,92,2355},{0,122,5827},{0,94,1711},{68,127,2521},{60,127,97},{60,111,125},{56,104,306},{124,0,5419},{25,127,1601},{31,109,14},{0,94,1707},{127,62,5419},{0,94,1707},{47,127,1929},{47,127,1929},{47,127,1929},{44,100,1568},{35,127,1414},{30,108,17},{30,108,17},{28,79,122},{0,102,1209},{5,82,6},{58,127,4},{58,127,4},{58,127,4},{58,93,0},{104,0,1152}, +{36,103,1},{36,103,1},{10,81,1},{127,42,1152},{10,81,1},{127,31,1568},{63,127,65},{68,109,1},{25,109,1},{127,31,1568},{127,80,1568},{25,109,1},{0,95,1570},{127,80,1568},{0,95,1570},{44,0,1568},{44,0,1568},{44,0,1568},{44,0,1568},{30,115,0},{30,115,0},{30,115,0},{30,71,0},{4,83,1},{4,83,1},{61,127,6800},{53,127,2541},{52,118,2353},{47,110,1674},{52,127,7996},{34,127,1665},{35,113,18}, +{30,96,2355},{0,126,5709},{0,98,1659},{74,127,2705},{64,127,225},{65,115,122},{60,108,306},{127,5,5419},{34,127,1665},{35,113,14},{0,98,1650},{126,68,5419},{0,98,1650},{52,127,2020},{52,127,2020},{52,127,2020},{48,104,1568},{41,127,1510},{34,112,17},{34,112,17},{32,83,122},{0,108,1169},{9,86,6},{63,126,20},{63,126,20},{63,126,20},{62,97,0},{110,0,1152},{40,107,1},{40,107,1},{14,85,1},{126,48,1152}, +{14,85,1},{127,43,1568},{70,127,136},{72,113,1},{29,113,1},{127,43,1568},{127,86,1568},{29,113,1},{0,99,1570},{127,86,1568},{0,99,1570},{48,0,1568},{48,0,1568},{48,0,1568},{48,0,1568},{34,119,0},{34,119,0},{34,119,0},{34,75,0},{8,87,1},{8,87,1},{65,127,7186},{57,127,2897},{56,122,2353},{51,114,1674},{58,127,8300},{40,127,1809},{39,117,18},{34,100,2355},{5,127,5791},{4,102,1659},{79,127,2875}, +{70,127,409},{69,119,122},{63,114,310},{127,18,5419},{43,127,1777},{39,117,14},{0,103,1619},{126,74,5419},{0,103,1619},{55,127,2124},{55,127,2124},{55,127,2124},{52,108,1568},{44,127,1634},{38,116,17},{38,116,17},{36,87,122},{1,115,1153},{13,90,6},{68,127,41},{68,127,41},{68,127,41},{66,101,1},{115,1,1152},{44,111,1},{44,111,1},{18,89,1},{126,54,1152},{18,89,1},{127,56,1568},{76,127,232},{76,117,1}, +{33,117,1},{127,56,1568},{127,92,1568},{33,117,1},{0,103,1570},{127,92,1568},{0,103,1570},{52,0,1568},{52,0,1568},{52,0,1568},{52,0,1568},{38,123,0},{38,123,0},{38,123,0},{38,79,0},{12,91,1},{12,91,1},{68,127,7650},{63,127,3371},{60,126,2355},{56,119,1670},{65,127,8695},{46,127,2083},{43,121,18},{38,104,2353},{14,127,6049},{6,107,1661},{82,127,3112},{76,127,682},{73,123,121},{67,117,305},{127,31,5419}, +{51,127,1977},{43,121,17},{0,107,1585},{127,80,5419},{0,107,1585},{61,127,2259},{61,127,2259},{61,127,2259},{57,112,1570},{52,127,1755},{43,121,14},{43,121,14},{40,91,125},{6,119,1153},{18,94,5},{72,127,80},{72,127,80},{72,127,80},{70,106,0},{122,0,1152},{48,116,1},{48,116,1},{21,94,0},{126,61,1152},{21,94,0},{127,69,1568},{84,127,388},{80,122,0},{37,122,0},{127,69,1568},{126,99,1568},{37,122,0}, +{0,108,1568},{126,99,1568},{0,108,1568},{56,0,1570},{56,0,1570},{56,0,1570},{56,0,1570},{43,126,1},{43,126,1},{43,126,1},{42,84,1},{16,96,1},{16,96,1},{74,127,8066},{67,127,3858},{65,127,2397},{60,123,1670},{68,127,9035},{51,127,2458},{47,125,18},{42,108,2353},{22,127,6379},{10,111,1661},{88,127,3320},{79,127,977},{77,127,121},{71,121,305},{127,43,5419},{60,127,2185},{47,125,17},{0,112,1572},{127,86,5419}, +{0,112,1572},{65,127,2393},{65,127,2393},{65,127,2393},{61,116,1570},{55,127,1891},{47,125,14},{47,125,14},{44,95,125},{10,123,1153},{22,98,5},{77,127,117},{77,127,117},{77,127,117},{74,110,0},{127,2,1152},{52,120,1},{52,120,1},{25,98,0},{127,66,1152},{25,98,0},{127,81,1568},{93,127,544},{84,126,0},{41,126,0},{127,81,1568},{126,105,1568},{41,126,0},{0,112,1568},{126,105,1568},{0,112,1568},{60,0,1570}, +{60,0,1570},{60,0,1570},{60,0,1570},{47,127,5},{47,127,5},{47,127,5},{46,88,1},{20,100,1},{20,100,1},{79,127,7660},{72,127,3952},{68,127,2537},{64,125,1620},{74,127,8515},{57,127,2310},{52,127,18},{47,110,1945},{28,127,5939},{16,114,1317},{91,127,2984},{84,127,992},{82,127,164},{76,123,201},{127,53,4803},{64,127,1931},{54,126,5},{5,114,1252},{126,91,4803},{5,114,1252},{68,127,2537},{68,127,2537},{68,127,2537}, +{64,120,1569},{61,127,2043},{52,127,18},{52,127,18},{48,99,125},{14,127,1153},{26,102,5},{82,127,164},{82,127,164},{82,127,164},{78,114,0},{127,14,1152},{56,124,1},{56,124,1},{29,102,0},{127,72,1152},{29,102,0},{127,89,1250},{96,127,464},{90,127,0},{51,127,0},{127,89,1250},{125,109,1250},{51,127,0},{0,114,1252},{125,109,1250},{0,114,1252},{64,0,1568},{64,0,1568},{64,0,1568},{64,0,1568},{52,127,18}, +{52,127,18},{52,127,18},{50,92,1},{24,104,1},{24,104,1},{82,127,7060},{76,127,3898},{74,127,2697},{68,126,1576},{79,127,7756},{60,127,2062},{57,127,66},{50,113,1461},{37,127,5307},{21,116,925},{97,127,2504},{90,127,896},{85,127,232},{80,125,96},{127,60,4056},{70,127,1563},{60,127,5},{13,116,884},{127,94,4056},{13,116,884},{74,127,2697},{74,127,2697},{74,127,2697},{68,124,1569},{65,127,2214},{57,127,66},{57,127,66}, +{52,103,125},{22,127,1186},{30,106,5},{85,127,232},{85,127,232},{85,127,232},{82,118,0},{127,27,1152},{62,126,5},{62,126,5},{33,106,0},{126,78,1152},{33,106,0},{127,95,882},{102,127,320},{96,127,0},{63,127,0},{127,95,882},{127,111,882},{63,127,0},{0,116,884},{127,111,882},{0,116,884},{68,0,1568},{68,0,1568},{68,0,1568},{68,0,1568},{56,127,45},{56,127,45},{56,127,45},{54,96,1},{28,108,1}, +{28,108,1},{85,127,6483},{79,127,3828},{79,127,2867},{73,127,1574},{82,127,6979},{67,127,1846},{63,127,181},{56,116,1002},{43,127,4714},{29,118,566},{100,127,2034},{93,127,822},{91,127,306},{86,125,26},{127,69,3318},{76,127,1233},{70,127,32},{20,119,545},{126,99,3318},{20,119,545},{79,127,2867},{79,127,2867},{79,127,2867},{73,127,1574},{71,127,2425},{63,127,181},{63,127,181},{57,108,122},{31,127,1273},{34,111,6},{91,127,306}, +{91,127,306},{91,127,306},{87,122,1},{127,40,1152},{70,127,32},{70,127,32},{39,110,1},{126,85,1152},{39,110,1},{127,101,545},{108,127,193},{102,127,1},{76,127,0},{127,101,545},{127,114,545},{76,127,0},{0,119,545},{127,114,545},{0,119,545},{72,0,1570},{72,0,1570},{72,0,1570},{72,0,1570},{61,127,80},{61,127,80},{61,127,80},{59,100,0},{33,112,1},{33,112,1},{88,127,6059},{84,127,3891},{82,127,3035}, +{76,127,1634},{85,127,6411},{70,127,1754},{67,127,341},{61,118,678},{51,127,4330},{34,120,322},{103,127,1698},{98,127,782},{97,127,394},{90,127,2},{127,76,2753},{84,127,1018},{76,127,80},{28,121,313},{126,103,2753},{28,121,313},{82,127,3035},{82,127,3035},{82,127,3035},{76,127,1634},{77,127,2641},{67,127,341},{67,127,341},{61,112,122},{37,127,1401},{38,115,6},{97,127,394},{97,127,394},{97,127,394},{91,126,1},{127,53,1152}, +{76,127,80},{76,127,80},{43,114,1},{126,91,1152},{43,114,1},{127,107,313},{111,127,116},{108,127,1},{89,127,0},{127,107,313},{127,117,313},{89,127,0},{0,121,313},{127,117,313},{0,121,313},{76,0,1570},{76,0,1570},{76,0,1570},{76,0,1570},{65,127,125},{65,127,125},{65,127,125},{63,104,0},{37,116,1},{37,116,1},{94,127,5691},{88,127,3919},{88,127,3243},{81,127,1739},{88,127,5947},{76,127,1722},{73,127,557}, +{64,120,426},{57,127,3994},{39,122,150},{106,127,1434},{102,127,770},{100,127,482},{95,127,17},{127,86,2273},{90,127,850},{84,127,170},{36,123,145},{127,107,2273},{36,123,145},{88,127,3243},{88,127,3243},{88,127,3243},{81,127,1739},{79,127,2835},{73,127,557},{73,127,557},{65,115,122},{46,127,1561},{42,119,6},{100,127,482},{100,127,482},{100,127,482},{95,127,17},{127,64,1152},{84,127,170},{84,127,170},{47,118,1},{126,97,1152}, +{47,118,1},{127,113,145},{116,127,53},{114,127,0},{101,127,0},{127,113,145},{127,120,145},{101,127,0},{0,123,145},{127,120,145},{0,123,145},{80,0,1570},{80,0,1570},{80,0,1570},{80,0,1570},{71,127,181},{71,127,181},{71,127,181},{67,108,1},{41,120,1},{41,120,1},{97,127,5379},{92,127,3991},{91,127,3427},{87,127,1907},{94,127,5539},{79,127,1783},{79,127,822},{69,122,254},{64,127,3745},{44,125,42},{111,127,1210}, +{106,127,782},{106,127,586},{99,127,82},{127,93,1878},{96,127,746},{90,127,274},{44,125,41},{126,111,1878},{44,125,41},{91,127,3427},{91,127,3427},{91,127,3427},{87,127,1907},{85,127,3051},{79,127,822},{79,127,822},{69,119,122},{54,127,1798},{46,123,6},{106,127,586},{106,127,586},{106,127,586},{99,127,82},{127,76,1152},{90,127,274},{90,127,274},{51,122,1},{126,103,1152},{51,122,1},{127,119,41},{122,127,13},{120,127,0}, +{113,127,0},{127,119,41},{127,123,41},{113,127,0},{0,125,41},{127,123,41},{0,125,41},{84,0,1570},{84,0,1570},{84,0,1570},{84,0,1570},{74,127,245},{74,127,245},{74,127,245},{71,112,1},{45,124,1},{45,124,1},{100,127,5128},{97,127,4080},{97,127,3680},{90,127,2152},{97,127,5200},{87,127,1964},{84,127,1208},{73,126,145},{73,127,3580},{51,127,5},{114,127,1062},{111,127,797},{111,127,697},{105,127,212},{127,102,1536}, +{102,127,708},{99,127,424},{54,127,0},{127,115,1536},{54,127,0},{97,127,3680},{97,127,3680},{97,127,3680},{90,127,2152},{91,127,3328},{84,127,1208},{84,127,1208},{74,124,121},{63,127,2089},{51,127,5},{111,127,697},{111,127,697},{111,127,697},{105,127,212},{127,90,1152},{99,127,424},{99,127,424},{54,127,0},{127,109,1152},{54,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0}, +{0,127,0},{127,127,0},{0,127,0},{89,0,1568},{89,0,1568},{89,0,1568},{89,0,1568},{79,127,320},{79,127,320},{79,127,320},{75,117,0},{51,127,5},{51,127,5},{103,127,4416},{100,127,3580},{100,127,3256},{96,127,2040},{100,127,4372},{90,127,1736},{87,127,1128},{79,126,57},{76,127,2956},{57,127,37},{117,127,738},{114,127,566},{114,127,485},{108,127,148},{127,107,1067},{108,127,484},{102,127,292},{66,127,0},{127,117,1067}, +{66,127,0},{100,127,3256},{100,127,3256},{100,127,3256},{96,127,2040},{94,127,2852},{87,127,1128},{87,127,1128},{78,125,45},{70,127,1739},{57,127,37},{114,127,485},{114,127,485},{114,127,485},{108,127,148},{127,96,800},{102,127,292},{102,127,292},{66,127,0},{127,112,800},{66,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{93,0,1568}, +{93,0,1568},{93,0,1568},{93,0,1568},{85,127,400},{85,127,400},{85,127,400},{79,121,0},{57,127,37},{57,127,37},{106,127,3784},{103,127,3136},{103,127,2880},{99,127,1924},{103,127,3648},{93,127,1564},{92,127,1078},{82,127,8},{81,127,2480},{64,127,90},{117,127,482},{117,127,362},{117,127,313},{113,127,97},{127,110,683},{111,127,321},{108,127,180},{78,127,0},{126,119,683},{78,127,0},{103,127,2880},{103,127,2880},{103,127,2880}, +{99,127,1924},{97,127,2448},{92,127,1078},{92,127,1078},{83,126,8},{76,127,1451},{64,127,90},{117,127,313},{117,127,313},{117,127,313},{113,127,97},{127,102,512},{108,127,180},{108,127,180},{78,127,0},{127,115,512},{78,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{97,0,1568},{97,0,1568},{97,0,1568},{97,0,1568},{88,127,500}, +{88,127,500},{88,127,500},{83,125,0},{64,127,90},{64,127,90},{109,127,3232},{106,127,2748},{106,127,2552},{102,127,1832},{106,127,3028},{99,127,1420},{96,127,1048},{87,127,4},{87,127,2032},{73,127,170},{120,127,262},{120,127,206},{118,127,180},{116,127,53},{127,115,384},{116,127,179},{113,127,106},{90,127,0},{127,121,384},{90,127,0},{106,127,2552},{106,127,2552},{106,127,2552},{102,127,1832},{103,127,2112},{96,127,1048},{96,127,1048}, +{87,127,4},{81,127,1260},{73,127,170},{118,127,180},{118,127,180},{118,127,180},{116,127,53},{127,108,288},{113,127,106},{113,127,106},{90,127,0},{127,118,288},{90,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{101,0,1568},{101,0,1568},{101,0,1568},{101,0,1568},{94,127,596},{94,127,596},{94,127,596},{87,127,4},{73,127,170}, +{73,127,170},{0,104,2665},{0,73,274},{0,53,4},{0,45,985},{0,69,5885},{0,46,3677},{0,41,1789},{0,27,4441},{0,31,6341},{0,27,4765},{0,104,2665},{0,73,274},{0,53,4},{0,45,985},{34,0,5885},{0,46,3677},{0,41,1789},{0,27,4441},{69,0,5885},{0,27,4441},{0,50,0},{0,50,0},{0,50,0},{0,24,1},{0,24,545},{0,19,193},{0,19,193},{0,12,337},{0,11,598},{0,10,387},{0,50,0}, +{0,50,0},{0,50,0},{0,24,1},{12,0,545},{0,19,193},{0,19,193},{0,12,337},{24,0,545},{0,12,337},{51,0,2665},{0,73,274},{0,53,4},{0,45,985},{51,0,2665},{104,0,2665},{0,45,985},{0,35,2665},{104,0,2665},{0,35,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,117,2665},{0,81,160},{1,57,12}, +{0,50,872},{0,78,6669},{0,51,3898},{0,46,1825},{0,33,4865},{0,34,7249},{0,30,5277},{0,117,2665},{0,81,160},{1,57,8},{0,50,872},{39,0,6669},{0,51,3898},{0,46,1825},{0,33,4865},{78,0,6669},{0,33,4865},{0,62,0},{0,62,0},{0,62,0},{0,30,1},{0,30,841},{0,25,305},{0,25,305},{0,13,514},{0,14,926},{0,13,595},{0,62,0},{0,62,0},{0,62,0},{0,30,1},{15,0,841}, +{0,25,305},{0,25,305},{0,13,514},{30,0,841},{0,13,514},{57,0,2665},{0,81,160},{3,57,0},{0,50,872},{57,0,2665},{117,0,2665},{0,50,872},{0,39,2665},{117,0,2665},{0,39,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,127,2669},{0,87,80},{1,63,60},{0,53,740},{0,86,7538},{0,57,4178},{0,49,1889}, +{0,36,5333},{0,40,8241},{0,33,5845},{0,127,2669},{0,87,80},{2,61,52},{0,53,740},{42,1,7538},{0,57,4178},{0,49,1889},{0,36,5333},{86,0,7538},{0,36,5333},{0,74,0},{0,74,0},{0,74,0},{0,36,1},{0,36,1201},{0,28,445},{0,28,445},{0,18,745},{0,17,1326},{0,15,861},{0,74,0},{0,74,0},{0,74,0},{0,36,1},{18,0,1201},{0,28,445},{0,28,445},{0,18,745},{36,0,1201}, +{0,18,745},{63,0,2665},{0,87,80},{7,61,0},{0,53,740},{63,0,2665},{127,1,2665},{0,53,740},{0,43,2665},{127,1,2665},{0,43,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{3,127,2797},{0,93,32},{2,65,154},{0,59,628},{0,94,8493},{0,63,4490},{0,54,1965},{0,39,5849},{0,43,9333},{0,36,6469},{3,127,2761}, +{0,93,32},{3,66,126},{0,59,628},{46,0,8493},{0,63,4490},{0,54,1965},{0,39,5849},{94,0,8493},{0,39,5849},{0,86,0},{0,86,0},{0,86,0},{0,42,1},{0,42,1625},{0,34,605},{0,34,605},{0,21,1009},{0,19,1781},{0,18,1161},{0,86,0},{0,86,0},{0,86,0},{0,42,1},{21,0,1625},{0,34,605},{0,34,605},{0,21,1009},{42,0,1625},{0,21,1009},{69,0,2665},{0,93,32},{11,64,1}, +{0,59,628},{69,0,2665},{127,7,2665},{0,59,628},{0,47,2665},{127,7,2665},{0,47,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{3,127,3157},{0,102,2},{2,71,304},{0,62,514},{0,103,9669},{0,67,4865},{0,60,2107},{0,42,6485},{0,46,10689},{0,39,7245},{6,127,3029},{0,102,2},{5,71,260},{0,62,514},{50,1,9669}, +{0,67,4865},{0,60,2107},{0,42,6485},{103,0,9669},{0,42,6485},{0,100,0},{0,100,0},{0,100,0},{0,49,0},{0,50,2178},{0,40,820},{0,40,820},{0,24,1348},{0,22,2392},{0,21,1556},{0,100,0},{0,100,0},{0,100,0},{0,49,0},{24,1,2178},{0,40,820},{0,40,820},{0,24,1348},{50,0,2178},{0,24,1348},{76,0,2665},{0,102,2},{15,69,0},{0,62,514},{76,0,2665},{126,14,2665},{0,62,514}, +{0,51,2665},{126,14,2665},{0,51,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{6,127,3509},{2,108,32},{4,76,424},{1,67,473},{0,115,9669},{0,73,4529},{0,64,1649},{0,45,6209},{0,51,10919},{0,45,7109},{12,127,3141},{4,106,2},{9,75,260},{1,67,469},{56,1,9669},{0,73,4529},{0,64,1649},{0,45,6209},{115,0,9669}, +{0,45,6209},{2,108,32},{2,108,32},{2,108,32},{2,54,32},{0,62,2178},{0,46,628},{0,46,628},{0,27,1184},{0,28,2520},{0,26,1502},{4,104,0},{4,104,0},{4,104,0},{4,53,0},{30,1,2178},{0,46,628},{0,46,628},{0,27,1184},{62,0,2178},{0,27,1184},{81,0,2665},{2,108,0},{19,73,0},{0,66,421},{81,0,2665},{126,20,2665},{0,66,421},{0,55,2665},{126,20,2665},{0,55,2665},{2,0,32}, +{2,0,32},{2,0,32},{2,0,32},{0,12,0},{0,12,0},{0,12,0},{0,6,0},{0,5,10},{0,5,10},{12,127,3989},{4,114,130},{7,79,616},{3,72,525},{0,127,9669},{0,81,4178},{0,67,1265},{0,50,5950},{0,57,11199},{0,49,7029},{17,127,3253},{8,110,2},{13,79,260},{5,71,469},{62,1,9669},{0,81,4178},{0,67,1265},{0,50,5950},{127,0,9669},{0,50,5950},{4,116,128},{4,116,128},{4,116,128}, +{4,59,128},{0,74,2178},{0,54,442},{0,54,442},{0,33,1040},{0,31,2676},{0,30,1460},{8,108,0},{8,108,0},{8,108,0},{8,57,0},{36,1,2178},{0,54,442},{0,54,442},{0,33,1040},{74,0,2178},{0,33,1040},{87,0,2665},{6,112,0},{23,77,0},{0,72,325},{87,0,2665},{126,26,2665},{0,72,325},{0,59,2665},{126,26,2665},{0,59,2665},{4,0,128},{4,0,128},{4,0,128},{4,0,128},{0,24,0}, +{0,24,0},{0,24,0},{0,12,0},{0,8,50},{0,8,50},{15,127,4613},{6,118,304},{9,84,880},{4,76,645},{3,127,9761},{0,87,3898},{0,73,913},{0,56,5686},{0,63,11511},{0,53,6939},{20,127,3401},{12,114,2},{17,83,260},{9,75,469},{68,0,9669},{0,87,3898},{0,73,913},{0,56,5686},{127,6,9669},{0,56,5686},{6,124,288},{6,124,288},{6,124,288},{6,63,289},{0,86,2178},{0,60,306},{0,60,306}, +{0,36,900},{0,37,2852},{0,34,1452},{12,112,0},{12,112,0},{12,112,0},{12,61,0},{42,1,2178},{0,60,306},{0,60,306},{0,36,900},{86,0,2178},{0,36,900},{93,0,2665},{10,116,0},{27,81,0},{0,77,260},{93,0,2665},{126,32,2665},{0,77,260},{0,63,2665},{126,32,2665},{0,63,2665},{6,0,288},{6,0,288},{6,0,288},{6,0,288},{0,36,0},{0,36,0},{0,36,0},{0,18,0},{0,14,106}, +{0,14,106},{17,127,5446},{9,124,575},{12,90,1267},{8,80,862},{7,127,10073},{0,96,3625},{0,79,594},{0,59,5393},{0,70,11905},{0,57,6894},{26,127,3570},{16,119,3},{21,87,259},{14,80,470},{75,0,9669},{0,96,3625},{0,79,594},{0,59,5393},{127,13,9669},{0,59,5393},{9,127,558},{9,127,558},{9,127,558},{8,69,545},{0,100,2178},{0,67,185},{0,67,185},{0,42,754},{0,43,3090},{0,38,1478},{17,115,1}, +{17,115,1},{17,115,1},{16,65,1},{49,0,2178},{0,67,185},{0,67,185},{0,42,754},{100,0,2178},{0,42,754},{100,0,2665},{15,120,0},{32,85,1},{0,82,193},{100,0,2665},{127,38,2665},{0,82,193},{0,67,2665},{127,38,2665},{0,67,2665},{8,0,545},{8,0,545},{8,0,545},{8,0,545},{0,50,0},{0,50,0},{0,50,0},{0,24,1},{0,19,193},{0,19,193},{20,127,6378},{12,127,915},{15,93,1695}, +{9,84,1118},{12,127,10506},{0,102,3401},{0,84,366},{0,65,5186},{0,76,12289},{0,61,6870},{32,127,3738},{20,123,3},{25,91,259},{18,84,470},{80,1,9669},{0,102,3401},{0,84,366},{0,65,5186},{126,19,9669},{0,65,5186},{12,127,914},{12,127,914},{12,127,914},{10,74,841},{0,112,2178},{0,76,97},{0,76,97},{0,45,650},{0,51,3317},{0,43,1514},{21,119,1},{21,119,1},{21,119,1},{20,69,1},{55,0,2178}, +{0,76,97},{0,76,97},{0,45,650},{112,0,2178},{0,45,650},{106,0,2665},{19,124,0},{36,89,1},{0,86,130},{106,0,2665},{127,44,2665},{0,86,130},{0,71,2665},{127,44,2665},{0,71,2665},{10,0,841},{10,0,841},{10,0,841},{10,0,841},{0,62,0},{0,62,0},{0,62,0},{0,30,1},{0,25,305},{0,25,305},{23,127,7454},{15,127,1431},{17,97,2214},{11,89,1438},{15,127,11102},{0,108,3209},{0,90,198}, +{0,69,4913},{0,81,12639},{0,65,6875},{38,127,3938},{24,127,3},{29,95,259},{22,88,470},{86,1,9669},{0,108,3209},{0,90,198},{0,69,4913},{126,25,9669},{0,69,4913},{15,127,1382},{15,127,1382},{15,127,1382},{12,79,1201},{0,124,2178},{0,84,37},{0,84,37},{0,50,549},{0,54,3565},{0,46,1598},{25,123,1},{25,123,1},{25,123,1},{24,73,1},{61,0,2178},{0,84,37},{0,84,37},{0,50,549},{124,0,2178}, +{0,50,549},{112,0,2665},{24,127,2},{40,93,1},{0,91,89},{112,0,2665},{127,50,2665},{0,91,89},{0,75,2665},{127,50,2665},{0,75,2665},{12,0,1201},{12,0,1201},{12,0,1201},{12,0,1201},{0,74,0},{0,74,0},{0,74,0},{0,36,1},{0,28,445},{0,28,445},{26,127,8674},{18,127,2131},{19,102,2786},{13,93,1839},{17,127,11833},{0,113,3038},{0,95,86},{0,72,4685},{0,84,13079},{0,70,6909},{41,127,4130}, +{30,127,35},{33,99,259},{26,92,470},{92,1,9669},{0,113,3038},{0,95,86},{0,72,4685},{126,31,9669},{0,72,4685},{17,127,1941},{17,127,1941},{17,127,1941},{14,84,1625},{3,127,2222},{0,90,5},{0,90,5},{0,56,445},{0,60,3861},{0,51,1674},{29,127,1},{29,127,1},{29,127,1},{28,77,1},{66,1,2178},{0,90,5},{0,90,5},{0,56,445},{126,5,2178},{0,56,445},{118,0,2665},{31,127,20},{44,97,1}, +{0,95,50},{118,0,2665},{127,56,2665},{0,95,50},{0,79,2665},{127,56,2665},{0,79,2665},{14,0,1625},{14,0,1625},{14,0,1625},{14,0,1625},{0,86,0},{0,86,0},{0,86,0},{0,42,1},{0,34,605},{0,34,605},{32,127,10209},{21,127,3140},{22,108,3525},{15,98,2360},{20,127,12902},{0,122,2885},{0,101,21},{0,78,4450},{0,93,13598},{0,73,6982},{47,127,4373},{34,127,134},{38,104,260},{30,96,469},{99,0,9669}, +{0,122,2885},{0,101,21},{0,78,4450},{126,38,9669},{0,78,4450},{20,127,2717},{20,127,2717},{20,127,2717},{16,90,2180},{6,127,2420},{0,98,1},{0,98,1},{0,62,353},{0,64,4230},{0,57,1814},{34,126,13},{34,126,13},{34,126,13},{33,81,1},{73,0,2178},{2,96,1},{2,96,1},{0,62,353},{126,12,2178},{0,62,353},{125,0,2665},{40,127,74},{48,102,0},{0,101,20},{125,0,2665},{126,63,2665},{0,101,20}, +{0,84,2665},{126,63,2665},{0,84,2665},{16,0,2180},{16,0,2180},{16,0,2180},{16,0,2180},{0,100,0},{0,100,0},{0,100,0},{0,49,0},{0,40,820},{0,40,820},{35,127,11582},{25,127,4131},{25,111,4166},{17,103,2825},{26,127,13903},{1,126,2826},{1,105,18},{0,83,4290},{0,96,13969},{0,79,6967},{52,127,4577},{40,127,270},{42,108,260},{34,100,469},{105,0,9669},{1,126,2825},{1,105,17},{0,83,4289},{126,44,9669}, +{0,83,4289},{23,127,3434},{23,127,3434},{23,127,3434},{19,94,2666},{9,127,2709},{1,105,14},{1,105,14},{0,66,275},{0,70,4491},{0,60,1893},{38,127,29},{38,127,29},{38,127,29},{37,85,1},{79,0,2178},{6,100,1},{6,100,1},{0,66,274},{127,17,2178},{0,66,274},{127,7,2665},{46,127,146},{52,106,0},{0,105,8},{127,7,2665},{127,68,2665},{0,105,8},{0,88,2665},{127,68,2665},{0,88,2665},{19,0,2665}, +{19,0,2665},{19,0,2665},{19,0,2665},{1,110,1},{1,110,1},{1,110,1},{0,55,1},{0,43,985},{0,43,985},{38,127,12090},{30,127,4561},{29,115,4166},{21,107,2825},{29,127,14311},{5,127,2910},{5,109,18},{0,86,4166},{0,105,13477},{0,84,6285},{58,127,4833},{46,127,470},{46,112,260},{38,104,469},{111,0,9669},{5,127,2909},{5,109,17},{0,86,4085},{127,49,9669},{0,86,4085},{26,127,3590},{26,127,3590},{26,127,3590}, +{23,98,2666},{12,127,2853},{5,109,14},{5,109,14},{3,69,259},{0,76,4147},{0,64,1475},{44,127,61},{44,127,61},{44,127,61},{41,89,1},{85,0,2178},{10,104,1},{10,104,1},{0,69,202},{127,23,2178},{0,69,202},{127,19,2665},{54,127,260},{56,110,0},{0,110,0},{127,19,2665},{127,74,2665},{0,110,0},{0,92,2665},{127,74,2665},{0,92,2665},{23,0,2665},{23,0,2665},{23,0,2665},{23,0,2665},{5,114,1}, +{5,114,1},{5,114,1},{4,59,1},{0,51,745},{0,51,745},{44,127,12610},{34,127,5039},{33,119,4166},{25,111,2825},{35,127,14719},{11,127,3118},{9,113,18},{4,90,4166},{0,113,13003},{0,87,5661},{61,127,5093},{51,127,736},{50,116,260},{42,108,469},{117,0,9669},{14,127,3073},{9,113,17},{0,89,3929},{127,55,9669},{0,89,3929},{32,127,3726},{32,127,3726},{32,127,3726},{27,102,2666},{17,127,2979},{9,113,14},{9,113,14}, +{7,73,259},{0,84,3784},{0,70,1091},{47,127,97},{47,127,97},{47,127,97},{45,93,1},{91,0,2178},{14,108,1},{14,108,1},{0,75,146},{127,29,2178},{0,75,146},{127,31,2665},{60,127,388},{60,114,0},{3,114,0},{127,31,2665},{127,80,2665},{3,114,0},{0,96,2665},{127,80,2665},{0,96,2665},{27,0,2665},{27,0,2665},{27,0,2665},{27,0,2665},{9,118,1},{9,118,1},{9,118,1},{8,63,1},{0,60,565}, +{0,60,565},{49,127,13154},{40,127,5661},{37,123,4166},{30,116,2823},{41,127,15213},{16,127,3497},{14,118,18},{8,94,4166},{0,116,12489},{0,93,5039},{68,127,5409},{57,127,1091},{54,120,259},{47,113,470},{124,0,9669},{22,127,3341},{14,118,14},{0,95,3726},{127,62,9669},{0,95,3726},{38,127,3929},{38,127,3929},{38,127,3929},{31,107,2665},{23,127,3156},{13,117,17},{13,117,17},{11,77,260},{0,90,3467},{0,76,736},{52,127,146}, +{52,127,146},{52,127,146},{49,98,1},{98,0,2178},{19,112,1},{19,112,1},{0,80,97},{127,36,2178},{0,80,97},{127,45,2665},{67,127,565},{64,119,1},{9,118,1},{127,45,2665},{126,87,2665},{9,118,1},{0,100,2665},{126,87,2665},{0,100,2665},{31,0,2665},{31,0,2665},{31,0,2665},{31,0,2665},{13,123,0},{13,123,0},{13,123,0},{13,67,0},{0,67,388},{0,67,388},{52,127,13734},{43,127,6285},{41,127,4166}, +{34,120,2823},{47,127,15677},{22,127,3905},{18,122,18},{12,98,4166},{0,125,12093},{0,97,4561},{71,127,5701},{60,127,1475},{58,124,259},{51,117,470},{127,5,9669},{31,127,3601},{18,122,14},{0,101,3590},{126,68,9669},{0,101,3590},{41,127,4085},{41,127,4085},{41,127,4085},{35,111,2665},{29,127,3332},{17,121,17},{17,121,17},{15,81,260},{0,99,3211},{0,81,470},{58,127,202},{58,127,202},{58,127,202},{53,102,1},{104,0,2178}, +{23,116,1},{23,116,1},{0,83,61},{127,42,2178},{0,83,61},{127,57,2665},{76,127,745},{68,123,1},{13,122,1},{127,57,2665},{126,93,2665},{13,122,1},{0,104,2665},{126,93,2665},{0,104,2665},{35,0,2665},{35,0,2665},{35,0,2665},{35,0,2665},{17,127,0},{17,127,0},{17,127,0},{17,71,0},{0,73,260},{0,73,260},{58,127,14302},{48,127,6967},{44,127,4290},{38,124,2823},{52,127,16094},{28,127,4409},{22,126,18}, +{16,102,4166},{0,126,11883},{0,102,4131},{77,127,6005},{67,127,1893},{61,127,275},{55,121,470},{127,18,9669},{40,127,3909},{22,126,14},{0,104,3434},{126,74,9669},{0,104,3434},{44,127,4289},{44,127,4289},{44,127,4289},{39,115,2665},{35,127,3540},{21,125,17},{21,125,17},{19,85,260},{0,105,2979},{0,87,270},{61,127,274},{61,127,274},{61,127,274},{57,106,1},{110,0,2178},{27,120,1},{27,120,1},{0,89,29},{126,48,2178}, +{0,89,29},{127,69,2665},{81,127,985},{72,127,1},{17,126,1},{127,69,2665},{126,99,2665},{17,126,1},{0,108,2665},{126,99,2665},{0,108,2665},{39,0,2665},{39,0,2665},{39,0,2665},{39,0,2665},{22,126,8},{22,126,8},{22,126,8},{21,75,0},{0,81,146},{0,81,146},{61,127,13635},{53,127,6982},{49,127,4450},{42,125,2742},{55,127,15195},{34,127,4106},{26,127,21},{19,105,3525},{2,127,10776},{0,106,3140},{79,127,5396}, +{70,127,1814},{65,127,353},{59,122,321},{127,27,8712},{46,127,3462},{29,127,1},{0,107,2717},{126,78,8712},{0,107,2717},{49,127,4450},{49,127,4450},{49,127,4450},{43,119,2665},{38,127,3736},{26,127,21},{26,127,21},{23,89,260},{0,113,2772},{0,93,134},{65,127,353},{65,127,353},{65,127,353},{61,110,1},{115,1,2178},{31,124,1},{31,124,1},{0,93,13},{126,54,2178},{0,93,13},{127,76,2178},{87,127,820},{78,127,0}, +{27,127,0},{127,76,2178},{126,103,2178},{27,127,0},{0,110,2180},{126,103,2178},{0,110,2180},{43,0,2665},{43,0,2665},{43,0,2665},{43,0,2665},{26,127,20},{26,127,20},{26,127,20},{25,79,0},{0,87,74},{0,87,74},{65,127,12750},{57,127,6909},{55,127,4685},{48,126,2678},{61,127,14070},{37,127,3711},{32,127,86},{25,108,2786},{5,127,9739},{0,109,2131},{85,127,4658},{76,127,1674},{71,127,445},{64,123,173},{127,36,7578}, +{51,127,2949},{37,127,5},{0,110,1941},{127,82,7578},{0,110,1941},{55,127,4685},{55,127,4685},{55,127,4685},{48,123,2666},{44,127,3987},{32,127,86},{32,127,86},{28,94,259},{0,119,2571},{0,97,35},{71,127,445},{71,127,445},{71,127,445},{66,114,1},{122,0,2178},{37,127,5},{37,127,5},{0,98,1},{126,61,2178},{0,98,1},{127,83,1625},{93,127,605},{84,127,1},{40,127,0},{127,83,1625},{126,106,1625},{40,127,0}, +{0,113,1625},{126,106,1625},{0,113,1625},{48,0,2665},{48,0,2665},{48,0,2665},{48,0,2665},{32,127,50},{32,127,50},{32,127,50},{30,83,1},{0,96,20},{0,96,20},{68,127,12050},{62,127,6875},{58,127,4913},{52,127,2666},{65,127,13165},{43,127,3423},{37,127,198},{30,110,2214},{11,127,8987},{0,112,1431},{88,127,4082},{79,127,1547},{77,127,549},{68,124,81},{127,43,6661},{57,127,2525},{43,127,37},{0,112,1382},{127,86,6661}, +{0,112,1382},{58,127,4913},{58,127,4913},{58,127,4913},{52,127,2666},{49,127,4197},{37,127,198},{37,127,198},{32,98,259},{0,125,2435},{0,103,3},{77,127,549},{77,127,549},{77,127,549},{70,118,1},{127,2,2178},{43,127,37},{43,127,37},{3,102,1},{127,66,2178},{3,102,1},{127,89,1201},{99,127,445},{90,127,1},{53,127,0},{127,89,1201},{125,109,1201},{53,127,0},{0,115,1201},{125,109,1201},{0,115,1201},{52,0,2665}, +{52,0,2665},{52,0,2665},{52,0,2665},{36,127,89},{36,127,89},{36,127,89},{34,87,1},{0,103,2},{0,103,2},{74,127,11418},{65,127,6870},{62,127,5186},{56,127,2701},{68,127,12313},{46,127,3251},{43,127,366},{34,112,1695},{14,127,8383},{0,115,915},{91,127,3578},{84,127,1514},{79,127,650},{73,126,21},{127,53,5829},{63,127,2165},{51,127,97},{0,115,914},{126,91,5829},{0,115,914},{62,127,5186},{62,127,5186},{62,127,5186}, +{56,127,2701},{55,127,4461},{43,127,366},{43,127,366},{36,102,259},{2,127,2427},{4,107,3},{79,127,650},{79,127,650},{79,127,650},{74,122,1},{127,14,2178},{51,127,97},{51,127,97},{7,106,1},{127,72,2178},{7,106,1},{127,95,841},{102,127,305},{96,127,1},{64,127,0},{127,95,841},{127,111,841},{64,127,0},{0,117,841},{127,111,841},{0,117,841},{56,0,2665},{56,0,2665},{56,0,2665},{56,0,2665},{41,127,130}, +{41,127,130},{41,127,130},{38,91,1},{3,108,0},{3,108,0},{77,127,10830},{69,127,6894},{68,127,5393},{60,127,2786},{71,127,11565},{54,127,3154},{48,127,594},{37,115,1267},{25,127,7825},{3,118,575},{94,127,3146},{87,127,1478},{85,127,754},{77,127,1},{127,60,5082},{70,127,1869},{57,127,185},{0,118,558},{127,94,5082},{0,118,558},{68,127,5393},{68,127,5393},{68,127,5393},{60,127,2786},{58,127,4725},{48,127,594},{48,127,594}, +{40,106,259},{8,127,2587},{8,111,3},{85,127,754},{85,127,754},{85,127,754},{78,126,1},{127,27,2178},{57,127,185},{57,127,185},{11,110,1},{126,78,2178},{11,110,1},{127,101,545},{108,127,193},{102,127,1},{76,127,0},{127,101,545},{127,114,545},{76,127,0},{0,119,545},{127,114,545},{0,119,545},{60,0,2665},{60,0,2665},{60,0,2665},{60,0,2665},{45,127,193},{45,127,193},{45,127,193},{42,95,1},{7,112,0}, +{7,112,0},{79,127,10221},{74,127,6939},{71,127,5686},{64,127,2954},{77,127,10836},{57,127,3109},{54,127,913},{42,117,880},{34,127,7300},{8,120,304},{100,127,2736},{93,127,1452},{91,127,900},{83,127,29},{127,69,4344},{76,127,1611},{67,127,306},{2,121,288},{126,99,4344},{2,121,288},{71,127,5686},{71,127,5686},{71,127,5686},{64,127,2954},{65,127,5051},{54,127,913},{54,127,913},{44,110,260},{14,127,2856},{13,115,2},{91,127,900}, +{91,127,900},{91,127,900},{83,127,29},{127,40,2178},{67,127,306},{67,127,306},{15,115,0},{126,85,2178},{15,115,0},{127,108,288},{113,127,106},{109,127,0},{90,127,0},{127,108,288},{127,118,288},{90,127,0},{0,121,288},{127,118,288},{0,121,288},{64,0,2665},{64,0,2665},{64,0,2665},{64,0,2665},{50,127,260},{50,127,260},{50,127,260},{46,100,0},{11,117,0},{11,117,0},{85,127,9781},{79,127,6942},{77,127,5950}, +{70,127,3146},{79,127,10205},{63,127,3141},{57,127,1265},{47,119,616},{40,127,6924},{13,123,130},{103,127,2436},{97,127,1460},{94,127,1040},{87,127,104},{127,76,3779},{81,127,1496},{73,127,442},{10,123,128},{126,103,3779},{10,123,128},{77,127,5950},{77,127,5950},{77,127,5950},{70,127,3146},{68,127,5347},{57,127,1265},{57,127,1265},{48,114,260},{25,127,3115},{17,119,2},{94,127,1040},{94,127,1040},{94,127,1040},{87,127,104},{127,53,2178}, +{73,127,442},{73,127,442},{19,119,0},{126,91,2178},{19,119,0},{127,115,128},{116,127,50},{115,127,0},{102,127,0},{127,115,128},{127,121,128},{102,127,0},{0,123,128},{127,121,128},{0,123,128},{68,0,2665},{68,0,2665},{68,0,2665},{68,0,2665},{55,127,325},{55,127,325},{55,127,325},{50,104,0},{15,121,0},{15,121,0},{88,127,9417},{82,127,7109},{82,127,6209},{73,127,3406},{85,127,9733},{67,127,3260},{63,127,1649}, +{50,122,424},{46,127,6644},{19,125,32},{106,127,2208},{101,127,1502},{100,127,1184},{93,127,232},{127,86,3299},{87,127,1400},{81,127,628},{19,125,32},{127,107,3299},{19,125,32},{82,127,6209},{82,127,6209},{82,127,6209},{73,127,3406},{74,127,5659},{63,127,1649},{63,127,1649},{52,118,260},{34,127,3419},{21,123,2},{100,127,1184},{100,127,1184},{100,127,1184},{93,127,232},{127,64,2178},{81,127,628},{81,127,628},{23,123,0},{126,97,2178}, +{23,123,0},{127,121,32},{122,127,10},{121,127,0},{115,127,0},{127,121,32},{127,124,32},{115,127,0},{0,125,32},{127,124,32},{0,125,32},{72,0,2665},{72,0,2665},{72,0,2665},{72,0,2665},{61,127,421},{61,127,421},{61,127,421},{54,108,0},{19,125,0},{19,125,0},{91,127,9133},{88,127,7245},{85,127,6485},{79,127,3710},{88,127,9325},{73,127,3460},{67,127,2107},{55,124,304},{54,127,6532},{25,127,2},{111,127,2038}, +{106,127,1556},{103,127,1348},{96,127,416},{127,93,2904},{93,127,1368},{87,127,820},{27,127,0},{126,111,2904},{27,127,0},{85,127,6485},{85,127,6485},{85,127,6485},{79,127,3710},{79,127,5949},{67,127,2107},{67,127,2107},{56,122,260},{40,127,3771},{25,127,2},{103,127,1348},{103,127,1348},{103,127,1348},{96,127,416},{127,76,2178},{87,127,820},{87,127,820},{27,127,0},{126,103,2178},{27,127,0},{127,127,0},{127,127,0},{127,127,0}, +{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{76,0,2665},{76,0,2665},{76,0,2665},{76,0,2665},{65,127,514},{65,127,514},{65,127,514},{58,112,0},{25,127,2},{25,127,2},{94,127,8049},{91,127,6469},{88,127,5849},{84,127,3561},{91,127,8053},{76,127,3106},{73,127,1965},{62,125,154},{60,127,5562},{34,127,32},{111,127,1507},{109,127,1161},{106,127,1009},{102,127,305},{127,98,2166}, +{99,127,1009},{93,127,605},{40,127,0},{126,113,2166},{40,127,0},{88,127,5849},{88,127,5849},{88,127,5849},{84,127,3561},{82,127,5209},{73,127,1965},{73,127,1965},{61,124,126},{46,127,3225},{34,127,32},{106,127,1009},{106,127,1009},{106,127,1009},{102,127,305},{127,83,1625},{93,127,605},{93,127,605},{40,127,0},{126,106,1625},{40,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0}, +{0,127,0},{127,127,0},{0,127,0},{80,0,2665},{80,0,2665},{80,0,2665},{80,0,2665},{68,127,628},{68,127,628},{68,127,628},{63,116,1},{34,127,32},{34,127,32},{97,127,7165},{94,127,5845},{91,127,5333},{87,127,3401},{94,127,7033},{79,127,2823},{79,127,1862},{64,126,60},{64,127,4795},{40,127,80},{114,127,1107},{111,127,830},{111,127,730},{105,127,221},{127,102,1601},{102,127,737},{99,127,445},{53,127,0},{127,115,1601}, +{53,127,0},{91,127,5333},{91,127,5333},{91,127,5333},{87,127,3401},{85,127,4629},{79,127,1862},{79,127,1862},{66,125,52},{54,127,2834},{40,127,80},{111,127,730},{111,127,730},{111,127,730},{105,127,221},{127,89,1201},{99,127,445},{99,127,445},{53,127,0},{125,109,1201},{53,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{84,0,2665}, +{84,0,2665},{84,0,2665},{84,0,2665},{74,127,740},{74,127,740},{74,127,740},{66,120,0},{40,127,80},{40,127,80},{100,127,6361},{97,127,5277},{94,127,4865},{90,127,3265},{97,127,6117},{84,127,2641},{79,127,1814},{70,126,12},{70,127,4123},{46,127,160},{117,127,779},{114,127,595},{114,127,514},{108,127,153},{127,105,1121},{105,127,513},{102,127,305},{64,127,0},{126,117,1121},{64,127,0},{94,127,4865},{94,127,4865},{94,127,4865}, +{90,127,3265},{91,127,4117},{79,127,1814},{79,127,1814},{70,126,8},{60,127,2474},{46,127,160},{114,127,514},{114,127,514},{114,127,514},{108,127,153},{127,95,841},{102,127,305},{102,127,305},{64,127,0},{127,111,841},{64,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{88,0,2665},{88,0,2665},{88,0,2665},{88,0,2665},{79,127,853}, +{79,127,853},{79,127,853},{70,124,0},{46,127,160},{46,127,160},{103,127,5637},{100,127,4765},{100,127,4441},{93,127,3153},{100,127,5305},{90,127,2465},{86,127,1789},{74,127,4},{73,127,3543},{54,127,274},{117,127,507},{117,127,387},{115,127,337},{111,127,101},{127,110,726},{108,127,337},{108,127,193},{76,127,0},{126,119,726},{76,127,0},{100,127,4441},{100,127,4441},{100,127,4441},{93,127,3153},{94,127,3657},{86,127,1789},{86,127,1789}, +{74,127,4},{67,127,2182},{54,127,274},{115,127,337},{115,127,337},{115,127,337},{111,127,101},{127,101,545},{108,127,193},{108,127,193},{76,127,0},{127,114,545},{76,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{92,0,2665},{92,0,2665},{92,0,2665},{92,0,2665},{82,127,985},{82,127,985},{82,127,985},{74,127,4},{54,127,274}, +{54,127,274},{17,127,39416},{1,127,2073},{0,90,164},{0,86,5261},{17,127,49709},{0,113,21212},{0,85,8480},{0,75,26137},{0,87,56335},{0,67,37225},{12,127,10274},{0,122,1413},{0,90,148},{0,75,3940},{57,0,18065},{0,76,12036},{0,68,6166},{0,45,14098},{117,0,18065},{0,45,14098},{0,71,0},{0,71,0},{0,71,0},{0,35,0},{0,35,1105},{0,28,405},{0,28,405},{0,15,689},{0,14,1226},{0,15,789},{0,71,0}, +{0,71,0},{0,71,0},{0,35,0},{17,0,1105},{0,28,405},{0,28,405},{0,15,689},{35,0,1105},{0,15,689},{85,0,9248},{0,122,1413},{0,90,148},{0,75,3940},{85,0,9248},{127,23,9248},{0,75,3940},{0,57,9250},{127,23,9248},{0,57,9250},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{20,127,44736},{2,127,2677},{0,95,64}, +{0,92,4941},{17,127,55213},{0,119,21820},{0,89,8426},{0,78,27209},{0,90,60583},{0,73,39465},{15,127,10870},{0,125,1213},{0,93,52},{0,78,3656},{61,0,19334},{0,81,12395},{0,73,6176},{0,50,14795},{124,0,19334},{0,50,14795},{0,83,0},{0,83,0},{0,83,0},{0,41,0},{0,41,1513},{0,34,565},{0,34,565},{0,18,937},{0,19,1661},{0,18,1081},{0,83,0},{0,83,0},{0,83,0},{0,41,0},{20,0,1513}, +{0,34,565},{0,34,565},{0,18,937},{41,0,1513},{0,18,937},{91,0,9248},{0,125,1213},{0,93,52},{0,78,3656},{91,0,9248},{127,29,9248},{0,78,3656},{0,61,9250},{127,29,9248},{0,61,9250},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{20,127,50624},{2,127,4005},{0,99,4},{0,95,4545},{20,127,61345},{0,125,22460},{0,93,8410}, +{0,83,28480},{0,96,65039},{0,76,41789},{17,127,11556},{1,127,1217},{0,99,4},{0,83,3425},{64,0,20689},{0,87,12835},{0,76,6216},{0,53,15539},{126,3,20689},{0,53,15539},{0,95,0},{0,95,0},{0,95,0},{0,47,0},{0,47,1985},{0,37,745},{0,37,745},{0,21,1225},{0,22,2185},{0,21,1421},{0,95,0},{0,95,0},{0,95,0},{0,47,0},{23,0,1985},{0,37,745},{0,37,745},{0,21,1225},{47,0,1985}, +{0,21,1225},{97,0,9248},{2,127,1205},{0,99,4},{0,83,3425},{97,0,9248},{127,35,9248},{0,83,3425},{0,65,9248},{127,35,9248},{0,65,9248},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{23,127,56952},{5,127,6081},{0,103,9},{0,98,4221},{20,127,65535},{0,126,23535},{0,97,8416},{0,86,29640},{0,99,65535},{0,79,44233},{20,127,12416}, +{2,127,1397},{1,103,8},{0,86,3181},{69,0,22129},{0,90,13307},{0,81,6266},{0,56,16331},{127,7,22129},{0,56,16331},{0,107,0},{0,107,0},{0,107,0},{0,53,0},{0,53,2521},{0,43,953},{0,43,953},{0,24,1553},{0,22,2777},{0,24,1809},{0,107,0},{0,107,0},{0,107,0},{0,53,0},{26,0,2521},{0,43,953},{0,43,953},{0,24,1553},{53,0,2521},{0,24,1553},{103,0,9248},{2,127,1381},{2,103,0}, +{0,86,3181},{103,0,9248},{127,41,9248},{0,86,3181},{0,69,9248},{127,41,9248},{0,69,9248},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{26,127,64790},{5,127,9105},{1,108,78},{0,104,3845},{23,127,65535},{2,127,25370},{0,102,8458},{0,89,31046},{0,105,65535},{0,81,47079},{23,127,13542},{5,127,1881},{2,109,50},{0,92,2897},{73,0,23851}, +{0,96,13865},{0,87,6374},{0,59,17289},{126,12,23851},{0,59,17289},{0,121,0},{0,121,0},{0,121,0},{0,59,1},{0,61,3200},{0,51,1210},{0,51,1210},{0,27,1972},{0,25,3528},{0,27,2296},{0,121,0},{0,121,0},{0,121,0},{0,59,1},{30,0,3200},{0,51,1210},{0,51,1210},{0,27,1972},{61,0,3200},{0,27,1972},{110,0,9248},{11,127,1693},{7,107,1},{0,92,2897},{110,0,9248},{126,48,9248},{0,92,2897}, +{0,73,9250},{126,48,9248},{0,73,9250},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{26,127,65535},{5,127,12609},{1,111,198},{0,107,3541},{26,127,65535},{2,127,27706},{0,105,8522},{0,92,32390},{0,113,65535},{0,87,49743},{26,127,14690},{8,127,2501},{3,113,126},{0,98,2665},{77,0,25472},{0,102,14385},{0,90,6486},{0,62,18185},{127,15,25472}, +{0,62,18185},{0,127,16},{0,127,16},{0,127,16},{0,65,0},{0,66,3872},{0,51,1450},{0,51,1450},{0,30,2384},{0,28,4268},{0,27,2776},{1,127,13},{1,127,13},{1,127,13},{0,65,0},{33,0,3872},{0,51,1450},{0,51,1450},{0,30,2384},{66,0,3872},{0,30,2384},{115,1,9248},{19,127,2041},{11,111,1},{0,98,2665},{115,1,9248},{126,54,9248},{0,98,2665},{0,77,9250},{126,54,9248},{0,77,9250},{0,0,0}, +{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{29,127,65535},{8,127,16605},{1,116,349},{0,112,3274},{26,127,65535},{2,127,30470},{0,109,8346},{0,98,33226},{0,113,65535},{0,90,52035},{29,127,15746},{11,127,3213},{5,117,217},{0,101,2445},{81,0,26744},{0,108,14657},{0,96,6398},{0,66,18739},{126,20,26744},{0,66,18739},{3,127,116},{3,127,116},{3,127,116}, +{0,71,4},{0,74,4418},{0,57,1586},{0,57,1586},{0,33,2664},{0,31,4916},{0,30,3140},{3,127,80},{3,127,80},{3,127,80},{1,70,1},{36,1,4418},{0,57,1586},{0,57,1586},{0,33,2664},{74,0,4418},{0,33,2664},{121,1,9248},{25,127,2377},{15,115,1},{0,101,2441},{121,1,9248},{126,60,9248},{0,101,2441},{0,81,9250},{126,60,9248},{0,81,9250},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,3,0}, +{0,3,0},{0,3,0},{0,1,1},{1,0,2},{1,0,2},{32,127,65535},{11,127,21205},{2,120,605},{0,115,3106},{29,127,65535},{2,127,33062},{0,114,7330},{0,101,32522},{0,119,65535},{0,93,52995},{35,127,16210},{14,127,3809},{9,121,217},{0,107,2325},{87,0,26744},{0,116,14054},{0,99,5606},{0,72,18275},{126,26,26744},{0,72,18275},{6,127,292},{6,127,292},{6,127,292},{2,76,52},{0,86,4418},{0,64,1313},{0,64,1313}, +{0,39,2440},{0,37,5092},{0,36,3044},{9,127,128},{9,127,128},{9,127,128},{5,74,1},{42,1,4418},{0,64,1313},{0,64,1313},{0,39,2440},{86,0,4418},{0,39,2440},{127,1,9248},{34,127,2741},{19,119,1},{0,107,2225},{127,1,9248},{127,65,9248},{0,107,2225},{0,85,9250},{127,65,9248},{0,85,9250},{2,0,52},{2,0,52},{2,0,52},{2,0,52},{0,15,0},{0,15,0},{0,15,0},{0,7,1},{0,5,18}, +{0,5,18},{35,127,65535},{11,127,27154},{3,124,1078},{0,121,3041},{32,127,65535},{5,127,36765},{0,119,6285},{0,107,31809},{0,125,65535},{0,96,54246},{38,127,16729},{22,127,4594},{14,126,218},{4,111,2318},{94,0,26744},{0,122,13481},{0,105,4785},{0,75,17772},{127,32,26744},{0,75,17772},{9,127,614},{9,127,614},{9,127,614},{5,81,181},{0,100,4418},{0,73,1037},{0,73,1037},{0,42,2210},{0,43,5330},{0,41,2986},{13,127,193}, +{13,127,193},{13,127,193},{9,79,1},{49,0,4418},{0,73,1037},{0,73,1037},{0,42,2210},{100,0,4418},{0,42,2210},{127,14,9248},{40,127,3177},{23,124,0},{0,110,2009},{127,14,9248},{127,72,9248},{0,110,2009},{0,90,9248},{127,72,9248},{0,90,9248},{5,0,181},{5,0,181},{5,0,181},{5,0,181},{0,29,0},{0,29,0},{0,29,0},{0,14,0},{0,11,65},{0,11,65},{35,127,65535},{14,127,31662},{4,127,1705}, +{0,124,3025},{35,127,65535},{8,127,39325},{0,122,5245},{0,110,30421},{0,125,65535},{0,102,54322},{44,127,16749},{25,127,5034},{17,127,245},{7,114,2214},{99,0,26259},{0,125,12725},{0,111,3981},{0,78,16984},{126,38,26259},{0,78,16984},{10,127,1025},{10,127,1025},{10,127,1025},{7,86,365},{0,112,4418},{0,81,797},{0,81,797},{0,47,2025},{0,51,5557},{0,45,2950},{17,127,245},{17,127,245},{17,127,245},{13,83,1},{55,0,4418}, +{0,81,797},{0,81,797},{0,47,2025},{112,0,4418},{0,47,2025},{127,25,8978},{46,127,3385},{28,127,0},{0,115,1732},{127,25,8978},{127,77,8978},{0,115,1732},{0,93,8980},{127,77,8978},{0,93,8980},{7,0,365},{7,0,365},{7,0,365},{7,0,365},{0,41,0},{0,41,0},{0,41,0},{0,20,0},{0,17,137},{0,17,137},{38,127,65535},{14,127,32078},{6,127,2618},{1,125,2789},{35,127,65535},{8,127,37485},{0,123,3805}, +{0,110,27013},{0,125,65535},{0,102,51330},{49,127,15435},{31,127,4714},{23,127,317},{12,116,1830},{104,0,24371},{0,125,11205},{0,113,2881},{0,83,15117},{127,42,24371},{0,83,15117},{12,127,1538},{12,127,1538},{12,127,1538},{9,91,613},{0,124,4418},{0,87,605},{0,87,605},{0,53,1825},{0,54,5805},{0,49,2946},{23,127,317},{23,127,317},{23,127,317},{17,87,1},{61,0,4418},{0,87,605},{0,87,605},{0,53,1825},{124,0,4418}, +{0,53,1825},{127,31,7938},{51,127,3029},{34,127,0},{0,118,1224},{127,31,7938},{127,80,7938},{0,118,1224},{0,95,7940},{127,80,7938},{0,95,7940},{9,0,613},{9,0,613},{9,0,613},{9,0,613},{0,53,0},{0,53,0},{0,53,0},{0,26,0},{0,22,221},{0,22,221},{38,127,65535},{14,127,32750},{6,127,3706},{3,124,2753},{35,127,65535},{8,127,35901},{0,124,2601},{0,110,23861},{0,125,65535},{0,102,48594},{52,127,14211}, +{34,127,4410},{26,127,405},{16,117,1482},{107,1,22568},{2,127,9869},{0,116,1973},{0,86,13349},{126,46,22568},{0,86,13349},{17,127,2137},{17,127,2137},{17,127,2137},{11,96,925},{3,127,4462},{0,93,445},{0,93,445},{0,56,1649},{0,60,6101},{0,54,2966},{26,127,405},{26,127,405},{26,127,405},{21,91,1},{66,1,4418},{0,93,445},{0,93,445},{0,56,1649},{126,5,4418},{0,56,1649},{127,37,6962},{57,127,2645},{40,127,0}, +{0,118,808},{127,37,6962},{127,83,6962},{0,118,808},{0,97,6964},{127,83,6962},{0,97,6964},{11,0,925},{11,0,925},{11,0,925},{11,0,925},{0,65,0},{0,65,0},{0,65,0},{0,32,0},{0,25,337},{0,25,337},{38,127,65535},{14,127,33812},{7,127,5233},{4,125,2961},{35,127,65535},{8,127,34425},{0,125,1509},{0,112,20542},{0,125,65535},{0,105,45810},{55,127,12917},{40,127,4114},{32,127,505},{22,117,1140},{112,0,20642}, +{2,127,8609},{0,119,1157},{0,89,11489},{127,50,20642},{0,89,11489},{20,127,2969},{20,127,2969},{20,127,2969},{13,102,1352},{6,127,4660},{0,102,289},{0,102,289},{0,62,1445},{0,64,6470},{0,57,3018},{32,127,505},{32,127,505},{32,127,505},{26,95,1},{73,0,4418},{0,102,289},{0,102,289},{0,62,1445},{126,12,4418},{0,62,1445},{127,43,5941},{63,127,2248},{46,127,1},{0,121,433},{127,43,5941},{127,86,5941},{0,121,433}, +{0,100,5941},{127,86,5941},{0,100,5941},{13,0,1352},{13,0,1352},{13,0,1352},{13,0,1352},{0,78,0},{0,78,0},{0,78,0},{0,39,0},{0,31,500},{0,31,500},{38,127,65535},{19,127,34934},{9,127,6748},{5,126,3381},{38,127,65535},{11,127,33369},{0,125,789},{0,112,17822},{0,125,65535},{0,105,43458},{58,127,11849},{43,127,3870},{38,127,617},{27,119,868},{115,1,19021},{5,127,7741},{0,120,621},{0,92,9957},{126,54,19021}, +{0,92,9957},{23,127,3833},{23,127,3833},{23,127,3833},{15,107,1800},{9,127,4984},{0,108,185},{0,108,185},{0,66,1282},{0,70,6822},{0,63,3090},{38,127,617},{38,127,617},{38,127,617},{30,99,1},{79,0,4418},{0,108,185},{0,108,185},{0,66,1282},{127,17,4418},{0,66,1282},{127,50,5101},{67,127,1921},{52,127,0},{0,124,205},{127,50,5101},{127,89,5101},{0,124,205},{0,102,5101},{127,89,5101},{0,102,5101},{15,0,1800}, +{15,0,1800},{15,0,1800},{15,0,1800},{0,91,0},{0,91,0},{0,91,0},{0,45,0},{0,37,676},{0,37,676},{41,127,65535},{19,127,36134},{9,127,8476},{5,126,4005},{38,127,65535},{11,127,32489},{0,126,309},{0,115,15210},{0,125,65535},{0,105,41362},{61,127,10853},{46,127,3674},{41,127,725},{31,120,632},{120,0,17485},{11,127,6965},{0,123,249},{0,95,8537},{127,58,17485},{0,95,8537},{23,127,4809},{23,127,4809},{23,127,4809}, +{17,112,2312},{12,127,5444},{0,116,89},{0,116,89},{0,72,1130},{0,76,7206},{0,67,3194},{41,127,725},{41,127,725},{41,127,725},{34,103,1},{85,0,4418},{0,116,89},{0,116,89},{0,72,1130},{127,23,4418},{0,72,1130},{127,56,4325},{70,127,1625},{58,127,0},{0,124,61},{127,56,4325},{127,92,4325},{0,124,61},{0,104,4325},{127,92,4325},{0,104,4325},{17,0,2312},{17,0,2312},{17,0,2312},{17,0,2312},{0,103,0}, +{0,103,0},{0,103,0},{0,50,1},{0,40,872},{0,40,872},{41,127,65535},{19,127,37590},{9,127,10460},{6,127,4841},{38,127,65535},{11,127,31865},{0,127,65},{0,115,12842},{0,125,65535},{0,105,39522},{65,127,9918},{51,127,3500},{47,127,853},{35,121,436},{124,0,16034},{14,127,6273},{0,125,53},{0,98,7229},{127,62,16034},{0,98,7229},{26,127,5881},{26,127,5881},{26,127,5881},{19,117,2888},{15,127,6040},{0,125,37},{0,125,37}, +{0,75,986},{0,81,7565},{0,70,3318},{47,127,853},{47,127,853},{47,127,853},{38,107,1},{91,0,4418},{0,125,37},{0,125,37},{0,75,986},{127,29,4418},{0,75,986},{127,62,3613},{76,127,1345},{64,127,0},{0,127,1},{127,62,3613},{127,95,3613},{0,127,1},{0,106,3613},{127,95,3613},{0,106,3613},{19,0,2888},{19,0,2888},{19,0,2888},{19,0,2888},{0,115,0},{0,115,0},{0,115,0},{0,56,1},{0,46,1096}, +{0,46,1096},{41,127,65535},{22,127,39522},{12,127,12842},{7,127,6030},{38,127,65535},{11,127,31469},{0,127,65},{0,118,10460},{0,125,65535},{0,108,37590},{68,127,8954},{57,127,3318},{52,127,986},{40,123,258},{127,2,14504},{22,127,5594},{2,127,37},{0,101,5881},{127,66,14504},{0,101,5881},{29,127,7229},{29,127,7229},{29,127,7229},{21,122,3614},{17,127,6824},{1,127,53},{1,127,53},{0,80,853},{0,87,8059},{0,76,3500},{52,127,986}, +{52,127,986},{52,127,986},{42,112,1},{98,0,4418},{2,127,37},{2,127,37},{0,80,853},{127,36,4418},{0,80,853},{127,69,2888},{81,127,1096},{70,127,1},{11,127,0},{127,69,2888},{126,99,2888},{11,127,0},{0,108,2888},{126,99,2888},{0,108,2888},{21,0,3613},{21,0,3613},{21,0,3613},{21,0,3613},{0,127,1},{0,127,1},{0,127,1},{0,63,0},{0,51,1345},{0,51,1345},{41,127,65535},{22,127,41362},{12,127,15210}, +{8,127,7309},{41,127,65535},{11,127,31389},{1,127,309},{0,118,8476},{0,125,65535},{0,108,36134},{71,127,8174},{60,127,3194},{55,127,1130},{45,123,137},{127,10,13235},{28,127,5018},{11,127,89},{0,101,4809},{126,70,13235},{0,101,4809},{32,127,8537},{32,127,8537},{32,127,8537},{23,127,4326},{20,127,7700},{4,127,249},{4,127,249},{0,86,725},{0,93,8531},{0,81,3674},{55,127,1130},{55,127,1130},{55,127,1130},{46,116,1},{104,0,4418}, +{11,127,89},{11,127,89},{0,86,725},{127,42,4418},{0,86,725},{127,75,2312},{87,127,872},{76,127,1},{24,127,0},{127,75,2312},{126,102,2312},{24,127,0},{0,110,2312},{126,102,2312},{0,110,2312},{23,0,4325},{23,0,4325},{23,0,4325},{23,0,4325},{3,127,61},{3,127,61},{3,127,61},{0,69,0},{0,54,1625},{0,54,1625},{44,127,65535},{22,127,43458},{15,127,17822},{8,127,8829},{41,127,65535},{14,127,31497},{2,127,789}, +{0,118,6748},{0,125,65535},{0,108,34934},{77,127,7454},{64,127,3090},{61,127,1282},{50,125,53},{127,19,12051},{34,127,4506},{16,127,185},{0,104,3833},{127,74,12051},{0,104,3833},{35,127,9957},{35,127,9957},{35,127,9957},{25,127,5150},{23,127,8712},{5,127,621},{5,127,621},{0,89,617},{0,96,9035},{0,84,3870},{61,127,1282},{61,127,1282},{61,127,1282},{50,120,1},{110,0,4418},{16,127,185},{16,127,185},{0,89,617},{126,48,4418}, +{0,89,617},{127,81,1800},{90,127,676},{82,127,0},{36,127,0},{127,81,1800},{126,105,1800},{36,127,0},{0,112,1800},{126,105,1800},{0,112,1800},{25,0,5101},{25,0,5101},{25,0,5101},{25,0,5101},{3,127,205},{3,127,205},{3,127,205},{0,75,0},{0,60,1921},{0,60,1921},{44,127,65535},{22,127,45810},{15,127,20542},{10,127,10546},{41,127,65535},{14,127,31833},{2,127,1509},{0,120,5233},{0,125,65535},{0,113,33812},{79,127,6740}, +{70,127,3018},{65,127,1445},{54,126,9},{127,27,10952},{40,127,4058},{25,127,289},{0,107,2969},{126,78,10952},{0,107,2969},{38,127,11489},{38,127,11489},{38,127,11489},{28,127,6114},{26,127,9860},{8,127,1157},{8,127,1157},{0,95,505},{0,102,9555},{0,87,4114},{65,127,1445},{65,127,1445},{65,127,1445},{54,124,1},{115,1,4418},{25,127,289},{25,127,289},{0,95,505},{126,54,4418},{0,95,505},{127,87,1352},{96,127,500},{88,127,0}, +{48,127,0},{127,87,1352},{126,108,1352},{48,127,0},{0,114,1352},{126,108,1352},{0,114,1352},{27,0,5941},{27,0,5941},{27,0,5941},{27,0,5941},{6,127,433},{6,127,433},{6,127,433},{0,80,1},{0,64,2248},{0,64,2248},{44,127,65535},{25,127,48594},{17,127,23861},{11,127,12725},{41,127,65535},{14,127,32517},{3,127,2601},{0,121,3706},{0,126,65535},{0,113,32750},{82,127,6098},{73,127,2966},{71,127,1649},{59,127,4},{127,36,9818}, +{46,127,3638},{34,127,445},{0,110,2137},{127,82,9818},{0,110,2137},{41,127,13349},{41,127,13349},{41,127,13349},{31,127,7380},{29,127,11310},{11,127,1973},{11,127,1973},{0,101,405},{0,108,10181},{0,93,4410},{71,127,1649},{71,127,1649},{71,127,1649},{59,127,4},{122,0,4418},{34,127,445},{34,127,445},{0,101,405},{126,61,4418},{0,101,405},{127,93,925},{102,127,337},{95,127,0},{62,127,0},{127,93,925},{126,111,925},{62,127,0}, +{0,116,925},{126,111,925},{0,116,925},{29,0,6964},{29,0,6964},{29,0,6964},{29,0,6964},{9,127,808},{9,127,808},{9,127,808},{0,87,0},{0,70,2645},{0,70,2645},{44,127,65535},{25,127,51330},{17,127,27013},{11,127,14917},{41,127,65535},{14,127,33397},{4,127,3805},{0,121,2618},{1,126,65535},{0,113,32078},{88,127,5594},{79,127,2891},{74,127,1825},{64,127,45},{127,43,8901},{54,127,3373},{40,127,605},{0,115,1538},{127,86,8901}, +{0,115,1538},{44,127,15117},{44,127,15117},{44,127,15117},{34,127,8664},{32,127,12746},{14,127,2881},{14,127,2881},{0,104,317},{0,113,10694},{0,96,4714},{74,127,1825},{74,127,1825},{74,127,1825},{64,127,45},{127,2,4418},{40,127,605},{40,127,605},{0,104,317},{127,66,4418},{0,104,317},{127,99,613},{105,127,221},{101,127,0},{73,127,0},{127,99,613},{126,114,613},{73,127,0},{0,118,613},{126,114,613},{0,118,613},{31,0,7940}, +{31,0,7940},{31,0,7940},{31,0,7940},{9,127,1224},{9,127,1224},{9,127,1224},{0,93,0},{0,76,3029},{0,76,3029},{49,127,65535},{25,127,54322},{17,127,30421},{13,127,17348},{44,127,65535},{14,127,34533},{5,127,5245},{0,123,1705},{2,127,65535},{0,113,31662},{91,127,5146},{82,127,2950},{79,127,1994},{67,127,145},{127,53,8069},{60,127,3125},{46,127,797},{0,117,1025},{126,91,8069},{0,117,1025},{49,127,16984},{49,127,16984},{49,127,16984}, +{37,127,10100},{35,127,14318},{16,127,3981},{16,127,3981},{0,110,245},{0,119,11302},{0,102,5034},{79,127,1994},{79,127,1994},{79,127,1994},{67,127,145},{127,14,4418},{46,127,797},{46,127,797},{0,110,245},{127,72,4418},{0,110,245},{127,105,365},{111,127,136},{107,127,0},{86,127,0},{127,105,365},{126,117,365},{86,127,0},{0,120,365},{126,117,365},{0,120,365},{33,0,8980},{33,0,8980},{33,0,8980},{33,0,8980},{12,127,1732}, +{12,127,1732},{12,127,1732},{0,99,0},{0,81,3385},{0,81,3385},{49,127,65535},{28,127,54246},{20,127,31809},{16,127,18396},{49,127,65535},{19,127,34118},{8,127,6285},{3,124,1078},{2,127,65535},{0,116,27154},{94,127,4770},{86,127,2986},{82,127,2210},{73,127,281},{127,60,7322},{67,127,2945},{54,127,1037},{0,118,614},{127,94,7322},{0,118,614},{52,127,17772},{52,127,17772},{52,127,17772},{40,127,10764},{38,127,15150},{22,127,4785},{22,127,4785}, +{1,113,218},{0,125,10994},{0,105,4594},{82,127,2210},{82,127,2210},{82,127,2210},{73,127,281},{127,27,4418},{54,127,1037},{54,127,1037},{0,114,193},{126,78,4418},{0,114,193},{127,111,181},{116,127,65},{113,127,0},{98,127,0},{127,111,181},{126,120,181},{98,127,0},{0,122,181},{126,120,181},{0,122,181},{37,0,9248},{37,0,9248},{37,0,9248},{37,0,9248},{17,127,2009},{17,127,2009},{17,127,2009},{3,104,0},{0,87,3177}, +{0,87,3177},{52,127,65535},{34,127,52995},{26,127,32522},{21,127,19126},{49,127,65535},{22,127,32935},{13,127,7330},{7,125,605},{2,127,64493},{0,116,21205},{97,127,4436},{91,127,3044},{88,127,2440},{79,127,505},{127,69,6584},{73,127,2811},{63,127,1313},{0,121,292},{126,99,6584},{0,121,292},{55,127,18275},{55,127,18275},{55,127,18275},{46,127,11259},{44,127,15797},{28,127,5606},{28,127,5606},{6,118,217},{0,125,10589},{0,113,3809},{88,127,2440}, +{88,127,2440},{88,127,2440},{79,127,505},{127,40,4418},{63,127,1313},{63,127,1313},{0,118,128},{126,85,4418},{0,118,128},{127,119,50},{122,127,18},{119,127,1},{111,127,0},{127,119,50},{127,123,50},{111,127,0},{0,124,52},{127,123,50},{0,124,52},{41,0,9250},{41,0,9250},{41,0,9250},{41,0,9250},{20,127,2225},{20,127,2225},{20,127,2225},{8,108,1},{0,93,2741},{0,93,2741},{55,127,65535},{37,127,52035},{29,127,33226}, +{25,127,19751},{52,127,65535},{28,127,31983},{18,127,8346},{11,126,349},{2,127,60061},{0,119,16605},{100,127,4216},{97,127,3140},{94,127,2664},{84,127,776},{127,76,6019},{76,127,2779},{70,127,1586},{0,124,116},{126,103,6019},{0,124,116},{61,127,18739},{61,127,18739},{61,127,18739},{51,127,11794},{49,127,16315},{31,127,6398},{31,127,6398},{10,122,217},{2,127,10565},{0,116,3213},{94,127,2664},{94,127,2664},{94,127,2664},{84,127,776},{127,53,4418}, +{70,127,1586},{70,127,1586},{0,124,80},{126,91,4418},{0,124,80},{127,125,2},{127,126,2},{125,127,1},{124,127,0},{127,125,2},{127,126,2},{124,127,0},{0,126,4},{127,126,2},{0,126,4},{45,0,9250},{45,0,9250},{45,0,9250},{45,0,9250},{26,127,2441},{26,127,2441},{26,127,2441},{12,112,1},{0,102,2377},{0,102,2377},{58,127,65535},{40,127,49743},{35,127,32390},{28,127,19671},{55,127,65535},{31,127,30383},{22,127,8522}, +{15,126,198},{2,127,55505},{0,122,12609},{103,127,3648},{100,127,2776},{97,127,2384},{87,127,740},{127,83,5163},{84,127,2468},{73,127,1450},{0,127,16},{126,106,5163},{0,127,16},{65,127,18185},{65,127,18185},{65,127,18185},{54,127,11714},{52,127,15699},{37,127,6486},{37,127,6486},{14,124,126},{2,127,9785},{0,119,2501},{97,127,2384},{97,127,2384},{97,127,2384},{87,127,740},{127,60,3872},{73,127,1450},{73,127,1450},{0,126,13},{127,94,3872}, +{0,126,13},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{49,0,9250},{49,0,9250},{49,0,9250},{49,0,9250},{29,127,2665},{29,127,2665},{29,127,2665},{16,116,1},{0,108,2041},{0,108,2041},{61,127,65535},{46,127,47079},{38,127,31046},{33,127,19370},{58,127,65535},{34,127,28647},{25,127,8458},{19,126,78},{2,127,51393},{0,122,9105},{106,127,3012}, +{100,127,2296},{100,127,1972},{90,127,612},{127,86,4267},{87,127,2028},{76,127,1210},{5,127,0},{127,107,4267},{5,127,0},{65,127,17289},{65,127,17289},{65,127,17289},{57,127,11462},{55,127,14739},{40,127,6374},{40,127,6374},{18,125,50},{2,127,8937},{0,122,1881},{100,127,1972},{100,127,1972},{100,127,1972},{90,127,612},{127,66,3200},{76,127,1210},{76,127,1210},{5,127,0},{127,97,3200},{5,127,0},{127,127,0},{127,127,0},{127,127,0}, +{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{53,0,9250},{53,0,9250},{53,0,9250},{53,0,9250},{35,127,2897},{35,127,2897},{35,127,2897},{20,120,1},{0,116,1693},{0,116,1693},{65,127,65535},{48,127,44233},{41,127,29640},{37,127,18961},{61,127,65535},{40,127,26823},{30,127,8416},{24,127,9},{8,127,47133},{0,122,6081},{109,127,2377},{103,127,1809},{103,127,1553},{96,127,481},{127,90,3361}, +{90,127,1593},{84,127,953},{19,127,0},{127,109,3361},{19,127,0},{71,127,16331},{71,127,16331},{71,127,16331},{63,127,11212},{61,127,13721},{46,127,6266},{46,127,6266},{24,126,8},{5,127,8245},{0,125,1397},{103,127,1553},{103,127,1553},{103,127,1553},{96,127,481},{127,72,2521},{84,127,953},{84,127,953},{19,127,0},{127,100,2521},{19,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0}, +{0,127,0},{127,127,0},{0,127,0},{58,0,9248},{58,0,9248},{58,0,9248},{58,0,9248},{41,127,3181},{41,127,3181},{41,127,3181},{24,125,0},{0,125,1381},{0,125,1381},{65,127,65535},{51,127,41789},{44,127,28480},{40,127,18673},{65,127,65535},{43,127,25251},{34,127,8410},{28,127,4},{14,127,43557},{0,125,4005},{111,127,1843},{106,127,1421},{106,127,1225},{99,127,373},{127,95,2646},{96,127,1241},{90,127,745},{31,127,0},{127,111,2646}, +{31,127,0},{74,127,15539},{74,127,15539},{74,127,15539},{64,127,10979},{65,127,12914},{51,127,6216},{51,127,6216},{28,127,4},{11,127,7709},{0,126,1217},{106,127,1225},{106,127,1225},{106,127,1225},{99,127,373},{127,78,1985},{90,127,745},{90,127,745},{31,127,0},{127,103,1985},{31,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{62,0,9248}, +{62,0,9248},{62,0,9248},{62,0,9248},{44,127,3425},{44,127,3425},{44,127,3425},{28,127,4},{0,125,1205},{0,125,1205},{68,127,65535},{54,127,39465},{49,127,27209},{46,127,18393},{65,127,64563},{46,127,23799},{38,127,8426},{32,127,64},{14,127,40437},{0,125,2677},{111,127,1411},{111,127,1070},{109,127,937},{102,127,281},{127,99,2017},{99,127,937},{93,127,565},{43,127,0},{126,114,2017},{43,127,0},{77,127,14795},{77,127,14795},{77,127,14795}, +{70,127,10779},{68,127,12146},{54,127,6176},{54,127,6176},{34,127,52},{14,127,7281},{2,127,1213},{109,127,937},{109,127,937},{109,127,937},{102,127,281},{127,84,1513},{93,127,565},{93,127,565},{43,127,0},{127,106,1513},{43,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{65,0,9250},{65,0,9250},{65,0,9250},{65,0,9250},{49,127,3656}, +{49,127,3656},{49,127,3656},{34,127,52},{2,127,1213},{2,127,1213},{71,127,63180},{60,127,37225},{52,127,26137},{48,127,18128},{68,127,59595},{51,127,22636},{42,127,8480},{37,127,164},{22,127,37455},{0,126,2073},{114,127,1019},{111,127,766},{111,127,666},{105,127,205},{127,102,1473},{102,127,681},{99,127,405},{56,127,0},{127,115,1473},{56,127,0},{79,127,14066},{79,127,14066},{79,127,14066},{73,127,10571},{71,127,11450},{59,127,6166},{59,127,6166}, +{37,127,148},{25,127,6914},{5,127,1413},{111,127,666},{111,127,666},{111,127,666},{105,127,205},{127,90,1105},{99,127,405},{99,127,405},{56,127,0},{127,109,1105},{56,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{69,0,9250},{69,0,9250},{69,0,9250},{69,0,9250},{52,127,3940},{52,127,3940},{52,127,3940},{37,127,148},{5,127,1413}, +{5,127,1413}, diff --git a/thirdparty/basisu/transcoder/basisu_transcoder_tables_dxt1_5.inc b/thirdparty/basisu/transcoder/basisu_transcoder_tables_dxt1_5.inc new file mode 100644 index 000000000..205758b3d --- /dev/null +++ b/thirdparty/basisu/transcoder/basisu_transcoder_tables_dxt1_5.inc @@ -0,0 +1,494 @@ +// Copyright (C) 2017-2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +{0,2,18},{0,1,9},{0,1,0},{0,1,9},{0,1,40},{0,1,22},{0,1,13},{0,1,61},{0,1,47},{0,1,65},{0,2,18},{0,1,9},{0,1,0},{0,1,9},{0,1,40},{0,1,22},{0,1,13},{0,1,61},{1,0,40},{0,1,61},{0,1,0},{0,1,0},{0,1,0},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,1,0}, +{0,1,0},{0,1,0},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{1,0,18},{0,1,9},{0,1,0},{0,1,9},{1,0,18},{2,0,18},{0,1,9},{0,1,36},{2,0,18},{0,1,36},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,5,54},{0,3,40},{0,2,61}, +{0,2,36},{0,4,51},{0,2,37},{0,2,1},{0,2,52},{0,2,77},{0,1,73},{1,3,22},{1,2,13},{1,2,4},{1,2,13},{0,4,51},{1,2,34},{0,2,1},{0,2,52},{4,0,51},{0,2,52},{0,4,37},{0,4,37},{0,4,37},{0,2,36},{0,3,8},{0,2,1},{0,2,1},{0,1,5},{0,1,30},{0,1,9},{1,2,4},{1,2,4},{1,2,4},{1,1,8},{1,1,8}, +{0,2,1},{0,2,1},{0,1,5},{3,0,8},{0,1,5},{2,1,18},{0,3,4},{1,2,0},{0,2,0},{2,1,18},{5,0,18},{0,2,0},{0,2,36},{5,0,18},{0,2,36},{0,0,36},{0,0,36},{0,0,36},{0,0,36},{0,2,1},{0,2,1},{0,2,1},{0,1,1},{0,1,5},{0,1,5},{1,6,54},{1,4,40},{1,3,61},{1,3,36},{1,5,51},{1,3,37},{1,3,1}, +{1,3,52},{0,3,72},{0,3,40},{2,4,22},{2,3,13},{2,3,4},{2,3,13},{1,5,51},{0,4,29},{1,3,1},{0,3,40},{7,0,51},{0,3,40},{1,5,37},{1,5,37},{1,5,37},{1,3,36},{1,4,8},{1,3,1},{1,3,1},{1,2,5},{0,3,8},{1,2,9},{2,3,4},{2,3,4},{2,3,4},{2,2,8},{3,0,8},{1,3,1},{1,3,1},{1,2,5},{6,0,8}, +{1,2,5},{3,2,18},{1,4,4},{2,3,0},{1,3,0},{3,2,18},{6,1,18},{1,3,0},{0,3,36},{6,1,18},{0,3,36},{1,0,36},{1,0,36},{1,0,36},{1,0,36},{1,3,1},{1,3,1},{1,3,1},{1,2,1},{0,3,4},{0,3,4},{2,7,54},{2,5,40},{2,4,62},{2,4,38},{2,6,51},{2,4,27},{2,4,3},{2,3,69},{0,5,60},{1,4,54},{3,5,22}, +{3,4,9},{3,4,6},{3,4,21},{5,0,51},{2,4,27},{2,4,3},{0,4,50},{10,0,51},{0,4,50},{2,6,37},{2,6,37},{2,6,37},{2,4,37},{2,5,8},{2,4,2},{2,4,2},{2,3,5},{1,4,8},{2,3,9},{3,4,5},{3,4,5},{3,4,5},{3,3,8},{4,1,8},{2,4,2},{2,4,2},{2,3,5},{9,0,8},{2,3,5},{3,5,18},{2,5,4},{3,4,2}, +{2,4,2},{3,5,18},{7,2,18},{2,4,2},{0,4,50},{7,2,18},{0,4,50},{2,0,36},{2,0,36},{2,0,36},{2,0,36},{2,4,1},{2,4,1},{2,4,1},{2,3,1},{1,4,4},{1,4,4},{3,8,68},{3,6,60},{4,5,68},{3,5,50},{3,7,53},{3,6,28},{3,5,5},{3,5,53},{3,5,68},{2,5,38},{4,6,22},{4,5,13},{4,5,4},{4,5,13},{6,1,52}, +{3,6,27},{3,5,4},{1,5,37},{13,0,52},{1,5,37},{3,7,50},{3,7,50},{3,7,50},{3,5,50},{3,6,11},{3,5,5},{3,5,5},{3,4,6},{2,5,11},{3,4,9},{4,5,4},{4,5,4},{4,5,4},{4,4,8},{4,4,8},{3,5,4},{3,5,4},{3,4,5},{12,0,8},{3,4,5},{7,0,18},{4,5,9},{4,5,0},{3,5,0},{7,0,18},{14,0,18},{3,5,0}, +{0,5,36},{14,0,18},{0,5,36},{3,0,50},{3,0,50},{3,0,50},{3,0,50},{3,6,2},{3,6,2},{3,6,2},{3,4,2},{2,5,2},{2,5,2},{4,9,54},{4,7,40},{4,6,61},{4,6,36},{4,8,51},{4,6,37},{4,6,1},{4,6,52},{2,7,68},{3,6,38},{5,7,22},{5,6,13},{5,6,4},{5,6,13},{8,0,51},{5,6,34},{4,6,1},{2,6,37},{16,0,51}, +{2,6,37},{4,8,37},{4,8,37},{4,8,37},{4,6,36},{4,7,8},{4,6,1},{4,6,1},{4,5,5},{3,6,11},{4,5,9},{5,6,4},{5,6,4},{5,6,4},{5,5,8},{5,5,8},{4,6,1},{4,6,1},{4,5,5},{15,0,8},{4,5,5},{8,1,18},{4,7,4},{5,6,0},{4,6,0},{8,1,18},{17,0,18},{4,6,0},{0,6,36},{17,0,18},{0,6,36},{4,0,36}, +{4,0,36},{4,0,36},{4,0,36},{4,6,1},{4,6,1},{4,6,1},{4,5,1},{3,6,2},{3,6,2},{5,10,54},{5,8,40},{5,7,61},{5,7,36},{5,9,51},{5,7,37},{5,7,1},{5,7,52},{3,8,60},{4,7,40},{6,8,22},{6,7,13},{6,7,4},{6,7,13},{9,1,51},{4,8,29},{5,7,1},{3,7,37},{19,0,51},{3,7,37},{5,9,37},{5,9,37},{5,9,37}, +{5,7,36},{5,8,8},{5,7,1},{5,7,1},{5,6,5},{4,7,8},{5,6,9},{6,7,4},{6,7,4},{6,7,4},{6,6,8},{9,0,8},{5,7,1},{5,7,1},{5,6,5},{18,0,8},{5,6,5},{10,0,18},{5,8,4},{6,7,0},{5,7,0},{10,0,18},{18,1,18},{5,7,0},{0,7,36},{18,1,18},{0,7,36},{5,0,36},{5,0,36},{5,0,36},{5,0,36},{5,7,1}, +{5,7,1},{5,7,1},{5,6,1},{4,7,4},{4,7,4},{6,11,54},{6,9,40},{6,8,62},{6,8,38},{6,10,51},{6,8,27},{6,8,3},{6,7,69},{4,9,60},{5,8,54},{7,9,22},{7,8,9},{7,8,6},{7,8,21},{11,0,51},{6,8,27},{6,8,3},{4,8,50},{22,0,51},{4,8,50},{6,10,37},{6,10,37},{6,10,37},{6,8,37},{6,9,8},{6,8,2},{6,8,2}, +{6,7,5},{5,8,8},{6,7,9},{7,8,5},{7,8,5},{7,8,5},{7,7,8},{10,1,8},{6,8,2},{6,8,2},{6,7,5},{21,0,8},{6,7,5},{11,1,18},{6,9,4},{7,8,2},{6,8,2},{11,1,18},{19,2,18},{6,8,2},{0,8,50},{19,2,18},{0,8,50},{6,0,36},{6,0,36},{6,0,36},{6,0,36},{6,8,1},{6,8,1},{6,8,1},{6,7,1},{5,8,4}, +{5,8,4},{7,12,68},{7,10,60},{8,9,68},{7,9,50},{7,11,53},{7,10,28},{7,9,5},{7,9,53},{7,9,68},{6,9,38},{8,10,22},{8,9,13},{8,9,4},{8,9,13},{12,1,52},{7,10,27},{7,9,4},{5,9,37},{25,0,52},{5,9,37},{7,11,50},{7,11,50},{7,11,50},{7,9,50},{7,10,11},{7,9,5},{7,9,5},{7,8,6},{6,9,11},{7,8,9},{8,9,4}, +{8,9,4},{8,9,4},{8,8,8},{12,0,8},{7,9,4},{7,9,4},{7,8,5},{24,0,8},{7,8,5},{13,0,18},{8,9,9},{8,9,0},{7,9,0},{13,0,18},{26,0,18},{7,9,0},{0,9,36},{26,0,18},{0,9,36},{7,0,50},{7,0,50},{7,0,50},{7,0,50},{7,10,2},{7,10,2},{7,10,2},{7,8,2},{6,9,2},{6,9,2},{8,13,54},{8,11,40},{8,10,61}, +{8,10,36},{8,12,51},{8,10,37},{8,10,1},{8,10,52},{6,11,68},{7,10,38},{9,11,22},{9,10,13},{9,10,4},{9,10,13},{12,4,51},{9,10,34},{8,10,1},{6,10,37},{28,0,51},{6,10,37},{8,12,37},{8,12,37},{8,12,37},{8,10,36},{8,11,8},{8,10,1},{8,10,1},{8,9,5},{7,10,11},{8,9,9},{9,10,4},{9,10,4},{9,10,4},{9,9,8},{13,1,8}, +{8,10,1},{8,10,1},{8,9,5},{27,0,8},{8,9,5},{14,1,18},{8,11,4},{9,10,0},{8,10,0},{14,1,18},{29,0,18},{8,10,0},{0,10,36},{29,0,18},{0,10,36},{8,0,36},{8,0,36},{8,0,36},{8,0,36},{8,10,1},{8,10,1},{8,10,1},{8,9,1},{7,10,2},{7,10,2},{9,14,54},{9,12,40},{9,11,61},{9,11,36},{9,13,51},{9,11,37},{9,11,1}, +{9,11,52},{7,12,60},{8,11,40},{10,12,22},{10,11,13},{10,11,4},{10,11,13},{13,5,51},{8,12,29},{9,11,1},{7,11,37},{31,0,51},{7,11,37},{9,13,37},{9,13,37},{9,13,37},{9,11,36},{9,12,8},{9,11,1},{9,11,1},{9,10,5},{8,11,8},{9,10,9},{10,11,4},{10,11,4},{10,11,4},{10,10,8},{15,0,8},{9,11,1},{9,11,1},{9,10,5},{30,0,8}, +{9,10,5},{15,2,18},{9,12,4},{10,11,0},{9,11,0},{15,2,18},{30,1,18},{9,11,0},{0,11,36},{30,1,18},{0,11,36},{9,0,36},{9,0,36},{9,0,36},{9,0,36},{9,11,1},{9,11,1},{9,11,1},{9,10,1},{8,11,4},{8,11,4},{10,15,54},{10,13,40},{10,12,62},{10,12,38},{10,14,51},{10,12,27},{10,12,3},{10,11,69},{8,13,60},{9,12,54},{11,13,22}, +{11,12,9},{11,12,6},{11,12,21},{17,0,51},{10,12,27},{10,12,3},{8,12,50},{30,2,51},{8,12,50},{10,14,37},{10,14,37},{10,14,37},{10,12,37},{10,13,8},{10,12,2},{10,12,2},{10,11,5},{9,12,8},{10,11,9},{11,12,5},{11,12,5},{11,12,5},{11,11,8},{16,1,8},{10,12,2},{10,12,2},{10,11,5},{31,1,8},{10,11,5},{15,5,18},{10,13,4},{11,12,2}, +{10,12,2},{15,5,18},{31,2,18},{10,12,2},{0,12,50},{31,2,18},{0,12,50},{10,0,36},{10,0,36},{10,0,36},{10,0,36},{10,12,1},{10,12,1},{10,12,1},{10,11,1},{9,12,4},{9,12,4},{11,16,68},{11,14,60},{12,13,68},{11,13,50},{11,15,53},{11,14,28},{11,13,5},{11,13,53},{11,13,68},{10,13,38},{12,14,22},{12,13,13},{12,13,4},{12,13,13},{18,1,52}, +{11,14,27},{11,13,4},{9,13,37},{31,3,52},{9,13,37},{11,15,50},{11,15,50},{11,15,50},{11,13,50},{11,14,11},{11,13,5},{11,13,5},{11,12,6},{10,13,11},{11,12,9},{12,13,4},{12,13,4},{12,13,4},{12,12,8},{16,4,8},{11,13,4},{11,13,4},{11,12,5},{28,4,8},{11,12,5},{19,0,18},{12,13,9},{12,13,0},{11,13,0},{19,0,18},{30,4,18},{11,13,0}, +{0,13,36},{30,4,18},{0,13,36},{11,0,50},{11,0,50},{11,0,50},{11,0,50},{11,14,2},{11,14,2},{11,14,2},{11,12,2},{10,13,2},{10,13,2},{12,17,54},{12,15,40},{12,14,61},{12,14,36},{12,16,51},{12,14,37},{12,14,1},{12,14,52},{10,15,68},{11,14,38},{13,15,22},{13,14,13},{13,14,4},{13,14,13},{20,0,51},{13,14,34},{12,14,1},{10,14,37},{24,8,51}, +{10,14,37},{12,16,37},{12,16,37},{12,16,37},{12,14,36},{12,15,8},{12,14,1},{12,14,1},{12,13,5},{11,14,11},{12,13,9},{13,14,4},{13,14,4},{13,14,4},{13,13,8},{17,5,8},{12,14,1},{12,14,1},{12,13,5},{31,4,8},{12,13,5},{20,1,18},{12,15,4},{13,14,0},{12,14,0},{20,1,18},{31,5,18},{12,14,0},{0,14,36},{31,5,18},{0,14,36},{12,0,36}, +{12,0,36},{12,0,36},{12,0,36},{12,14,1},{12,14,1},{12,14,1},{12,13,1},{11,14,2},{11,14,2},{13,18,54},{13,16,40},{13,15,61},{13,15,36},{13,17,51},{13,15,37},{13,15,1},{13,15,52},{11,16,60},{12,15,40},{14,16,22},{14,15,13},{14,15,4},{14,15,13},{21,1,51},{12,16,29},{13,15,1},{11,15,37},{27,8,51},{11,15,37},{13,17,37},{13,17,37},{13,17,37}, +{13,15,36},{13,16,8},{13,15,1},{13,15,1},{13,14,5},{12,15,8},{13,14,9},{14,15,4},{14,15,4},{14,15,4},{14,14,8},{21,0,8},{13,15,1},{13,15,1},{13,14,5},{30,6,8},{13,14,5},{22,0,18},{13,16,4},{14,15,0},{13,15,0},{22,0,18},{30,7,18},{13,15,0},{0,15,36},{30,7,18},{0,15,36},{13,0,36},{13,0,36},{13,0,36},{13,0,36},{13,15,1}, +{13,15,1},{13,15,1},{13,14,1},{12,15,4},{12,15,4},{14,19,54},{14,17,40},{14,16,62},{14,16,38},{14,18,51},{14,16,27},{14,16,3},{14,15,69},{12,17,60},{13,16,54},{15,17,22},{15,16,9},{15,16,6},{15,16,21},{23,0,51},{14,16,27},{14,16,3},{12,16,50},{30,8,51},{12,16,50},{14,18,37},{14,18,37},{14,18,37},{14,16,37},{14,17,8},{14,16,2},{14,16,2}, +{14,15,5},{13,16,8},{14,15,9},{15,16,5},{15,16,5},{15,16,5},{15,15,8},{22,1,8},{14,16,2},{14,16,2},{14,15,5},{31,7,8},{14,15,5},{23,1,18},{14,17,4},{15,16,2},{14,16,2},{23,1,18},{27,10,18},{14,16,2},{0,16,50},{27,10,18},{0,16,50},{14,0,36},{14,0,36},{14,0,36},{14,0,36},{14,16,1},{14,16,1},{14,16,1},{14,15,1},{13,16,4}, +{13,16,4},{15,20,68},{15,18,60},{16,17,68},{15,17,50},{15,19,53},{15,18,28},{15,17,5},{15,17,53},{15,17,68},{14,17,38},{16,18,22},{16,17,13},{16,17,4},{16,17,13},{24,1,52},{15,18,27},{15,17,4},{13,17,37},{31,9,52},{13,17,37},{15,19,50},{15,19,50},{15,19,50},{15,17,50},{15,18,11},{15,17,5},{15,17,5},{15,16,6},{14,17,11},{15,16,9},{16,17,4}, +{16,17,4},{16,17,4},{16,16,8},{24,0,8},{15,17,4},{15,17,4},{15,16,5},{24,12,8},{15,16,5},{25,0,18},{16,17,9},{16,17,0},{15,17,0},{25,0,18},{30,10,18},{15,17,0},{0,17,36},{30,10,18},{0,17,36},{15,0,50},{15,0,50},{15,0,50},{15,0,50},{15,18,2},{15,18,2},{15,18,2},{15,16,2},{14,17,2},{14,17,2},{16,21,54},{16,19,40},{16,18,61}, +{16,18,36},{16,20,51},{16,18,37},{16,18,1},{16,18,52},{14,19,68},{15,18,38},{17,19,22},{17,18,13},{17,18,4},{17,18,13},{24,4,51},{17,18,34},{16,18,1},{14,18,37},{28,12,51},{14,18,37},{16,20,37},{16,20,37},{16,20,37},{16,18,36},{16,19,8},{16,18,1},{16,18,1},{16,17,5},{15,18,11},{16,17,9},{17,18,4},{17,18,4},{17,18,4},{17,17,8},{25,1,8}, +{16,18,1},{16,18,1},{16,17,5},{27,12,8},{16,17,5},{26,1,18},{16,19,4},{17,18,0},{16,18,0},{26,1,18},{31,11,18},{16,18,0},{0,18,36},{31,11,18},{0,18,36},{16,0,36},{16,0,36},{16,0,36},{16,0,36},{16,18,1},{16,18,1},{16,18,1},{16,17,1},{15,18,2},{15,18,2},{17,22,54},{17,20,40},{17,19,61},{17,19,36},{17,21,51},{17,19,37},{17,19,1}, +{17,19,52},{15,20,60},{16,19,40},{18,20,22},{18,19,13},{18,19,4},{18,19,13},{25,5,51},{16,20,29},{17,19,1},{15,19,37},{31,12,51},{15,19,37},{17,21,37},{17,21,37},{17,21,37},{17,19,36},{17,20,8},{17,19,1},{17,19,1},{17,18,5},{16,19,8},{17,18,9},{18,19,4},{18,19,4},{18,19,4},{18,18,8},{27,0,8},{17,19,1},{17,19,1},{17,18,5},{30,12,8}, +{17,18,5},{27,2,18},{17,20,4},{18,19,0},{17,19,0},{27,2,18},{30,13,18},{17,19,0},{0,19,36},{30,13,18},{0,19,36},{17,0,36},{17,0,36},{17,0,36},{17,0,36},{17,19,1},{17,19,1},{17,19,1},{17,18,1},{16,19,4},{16,19,4},{18,23,54},{18,21,40},{18,20,62},{18,20,38},{18,22,51},{18,20,27},{18,20,3},{18,19,69},{16,21,60},{17,20,54},{19,21,22}, +{19,20,9},{19,20,6},{19,20,21},{29,0,51},{18,20,27},{18,20,3},{16,20,50},{30,14,51},{16,20,50},{18,22,37},{18,22,37},{18,22,37},{18,20,37},{18,21,8},{18,20,2},{18,20,2},{18,19,5},{17,20,8},{18,19,9},{19,20,5},{19,20,5},{19,20,5},{19,19,8},{28,1,8},{18,20,2},{18,20,2},{18,19,5},{31,13,8},{18,19,5},{27,5,18},{18,21,4},{19,20,2}, +{18,20,2},{27,5,18},{31,14,18},{18,20,2},{0,20,50},{31,14,18},{0,20,50},{18,0,36},{18,0,36},{18,0,36},{18,0,36},{18,20,1},{18,20,1},{18,20,1},{18,19,1},{17,20,4},{17,20,4},{19,24,68},{19,22,60},{20,21,68},{19,21,50},{19,23,53},{19,22,28},{19,21,5},{19,21,53},{19,21,68},{18,21,38},{20,22,22},{20,21,13},{20,21,4},{20,21,13},{30,1,52}, +{19,22,27},{19,21,4},{17,21,37},{31,15,52},{17,21,37},{19,23,50},{19,23,50},{19,23,50},{19,21,50},{19,22,11},{19,21,5},{19,21,5},{19,20,6},{18,21,11},{19,20,9},{20,21,4},{20,21,4},{20,21,4},{20,20,8},{28,4,8},{19,21,4},{19,21,4},{19,20,5},{28,16,8},{19,20,5},{31,0,18},{20,21,9},{20,21,0},{19,21,0},{31,0,18},{30,16,18},{19,21,0}, +{0,21,36},{30,16,18},{0,21,36},{19,0,50},{19,0,50},{19,0,50},{19,0,50},{19,22,2},{19,22,2},{19,22,2},{19,20,2},{18,21,2},{18,21,2},{20,25,54},{20,23,40},{20,22,61},{20,22,36},{20,24,51},{20,22,37},{20,22,1},{20,22,52},{18,23,68},{19,22,38},{21,23,22},{21,22,13},{21,22,4},{21,22,13},{28,8,51},{21,22,34},{20,22,1},{18,22,37},{24,20,51}, +{18,22,37},{20,24,37},{20,24,37},{20,24,37},{20,22,36},{20,23,8},{20,22,1},{20,22,1},{20,21,5},{19,22,11},{20,21,9},{21,22,4},{21,22,4},{21,22,4},{21,21,8},{29,5,8},{20,22,1},{20,22,1},{20,21,5},{31,16,8},{20,21,5},{31,3,18},{20,23,4},{21,22,0},{20,22,0},{31,3,18},{31,17,18},{20,22,0},{0,22,36},{31,17,18},{0,22,36},{20,0,36}, +{20,0,36},{20,0,36},{20,0,36},{20,22,1},{20,22,1},{20,22,1},{20,21,1},{19,22,2},{19,22,2},{21,26,54},{21,24,40},{21,23,61},{21,23,36},{21,25,51},{21,23,37},{21,23,1},{21,23,52},{19,24,60},{20,23,40},{22,24,22},{22,23,13},{22,23,4},{22,23,13},{29,9,51},{20,24,29},{21,23,1},{19,23,37},{27,20,51},{19,23,37},{21,25,37},{21,25,37},{21,25,37}, +{21,23,36},{21,24,8},{21,23,1},{21,23,1},{21,22,5},{20,23,8},{21,22,9},{22,23,4},{22,23,4},{22,23,4},{22,22,8},{31,4,8},{21,23,1},{21,23,1},{21,22,5},{30,18,8},{21,22,5},{31,6,18},{21,24,4},{22,23,0},{21,23,0},{31,6,18},{30,19,18},{21,23,0},{0,23,36},{30,19,18},{0,23,36},{21,0,36},{21,0,36},{21,0,36},{21,0,36},{21,23,1}, +{21,23,1},{21,23,1},{21,22,1},{20,23,4},{20,23,4},{22,27,54},{22,25,40},{22,24,62},{22,24,38},{22,26,51},{22,24,27},{22,24,3},{22,23,69},{20,25,60},{21,24,54},{23,25,22},{23,24,9},{23,24,6},{23,24,21},{31,8,51},{22,24,27},{22,24,3},{20,24,50},{30,20,51},{20,24,50},{22,26,37},{22,26,37},{22,26,37},{22,24,37},{22,25,8},{22,24,2},{22,24,2}, +{22,23,5},{21,24,8},{22,23,9},{23,24,5},{23,24,5},{23,24,5},{23,23,8},{31,7,8},{22,24,2},{22,24,2},{22,23,5},{31,19,8},{22,23,5},{31,9,18},{22,25,4},{23,24,2},{22,24,2},{31,9,18},{27,22,18},{22,24,2},{0,24,50},{27,22,18},{0,24,50},{22,0,36},{22,0,36},{22,0,36},{22,0,36},{22,24,1},{22,24,1},{22,24,1},{22,23,1},{21,24,4}, +{21,24,4},{23,28,68},{23,26,60},{24,25,68},{23,25,50},{23,27,53},{23,26,28},{23,25,5},{23,25,53},{23,25,68},{22,25,38},{24,26,22},{24,25,13},{24,25,4},{24,25,13},{31,11,52},{23,26,27},{23,25,4},{21,25,37},{31,21,52},{21,25,37},{23,27,50},{23,27,50},{23,27,50},{23,25,50},{23,26,11},{23,25,5},{23,25,5},{23,24,6},{22,25,11},{23,24,9},{24,25,4}, +{24,25,4},{24,25,4},{24,24,8},{28,16,8},{23,25,4},{23,25,4},{23,24,5},{24,24,8},{23,24,5},{31,12,18},{24,25,9},{24,25,0},{23,25,0},{31,12,18},{30,22,18},{23,25,0},{0,25,36},{30,22,18},{0,25,36},{23,0,50},{23,0,50},{23,0,50},{23,0,50},{23,26,2},{23,26,2},{23,26,2},{23,24,2},{22,25,2},{22,25,2},{24,29,54},{24,27,40},{24,26,61}, +{24,26,36},{24,28,51},{24,26,37},{24,26,1},{24,26,52},{22,27,68},{23,26,38},{25,27,22},{25,26,13},{25,26,4},{25,26,13},{28,20,51},{25,26,34},{24,26,1},{22,26,37},{28,24,51},{22,26,37},{24,28,37},{24,28,37},{24,28,37},{24,26,36},{24,27,8},{24,26,1},{24,26,1},{24,25,5},{23,26,11},{24,25,9},{25,26,4},{25,26,4},{25,26,4},{25,25,8},{29,17,8}, +{24,26,1},{24,26,1},{24,25,5},{27,24,8},{24,25,5},{31,15,18},{24,27,4},{25,26,0},{24,26,0},{31,15,18},{31,23,18},{24,26,0},{0,26,36},{31,23,18},{0,26,36},{24,0,36},{24,0,36},{24,0,36},{24,0,36},{24,26,1},{24,26,1},{24,26,1},{24,25,1},{23,26,2},{23,26,2},{25,30,54},{25,28,40},{25,27,61},{25,27,36},{25,29,51},{25,27,37},{25,27,1}, +{25,27,52},{23,28,60},{24,27,40},{26,28,22},{26,27,13},{26,27,4},{26,27,13},{29,21,51},{24,28,29},{25,27,1},{23,27,37},{31,24,51},{23,27,37},{25,29,37},{25,29,37},{25,29,37},{25,27,36},{25,28,8},{25,27,1},{25,27,1},{25,26,5},{24,27,8},{25,26,9},{26,27,4},{26,27,4},{26,27,4},{26,26,8},{31,16,8},{25,27,1},{25,27,1},{25,26,5},{30,24,8}, +{25,26,5},{31,18,18},{25,28,4},{26,27,0},{25,27,0},{31,18,18},{30,25,18},{25,27,0},{0,27,36},{30,25,18},{0,27,36},{25,0,36},{25,0,36},{25,0,36},{25,0,36},{25,27,1},{25,27,1},{25,27,1},{25,26,1},{24,27,4},{24,27,4},{26,31,54},{26,29,40},{26,28,62},{26,28,38},{26,30,51},{26,28,27},{26,28,3},{26,27,69},{24,29,60},{25,28,54},{27,29,22}, +{27,28,9},{27,28,6},{27,28,21},{31,20,51},{26,28,27},{26,28,3},{24,28,50},{30,26,51},{24,28,50},{26,30,37},{26,30,37},{26,30,37},{26,28,37},{26,29,8},{26,28,2},{26,28,2},{26,27,5},{25,28,8},{26,27,9},{27,28,5},{27,28,5},{27,28,5},{27,27,8},{31,19,8},{26,28,2},{26,28,2},{26,27,5},{31,25,8},{26,27,5},{31,21,18},{26,29,4},{27,28,2}, +{26,28,2},{31,21,18},{31,26,18},{26,28,2},{0,28,50},{31,26,18},{0,28,50},{26,0,36},{26,0,36},{26,0,36},{26,0,36},{26,28,1},{26,28,1},{26,28,1},{26,27,1},{25,28,4},{25,28,4},{28,30,86},{27,30,60},{28,29,68},{27,29,50},{27,31,53},{27,30,28},{27,29,5},{27,29,53},{27,29,68},{26,29,38},{28,30,22},{28,29,13},{28,29,4},{28,29,13},{31,23,52}, +{27,30,27},{27,29,4},{25,29,37},{31,27,52},{25,29,37},{27,31,50},{27,31,50},{27,31,50},{27,29,50},{27,30,11},{27,29,5},{27,29,5},{27,28,6},{26,29,11},{27,28,9},{28,29,4},{28,29,4},{28,29,4},{28,28,8},{28,28,8},{27,29,4},{27,29,4},{27,28,5},{28,28,8},{27,28,5},{31,24,18},{28,29,9},{28,29,0},{27,29,0},{31,24,18},{30,28,18},{27,29,0}, +{0,29,36},{30,28,18},{0,29,36},{27,0,50},{27,0,50},{27,0,50},{27,0,50},{27,30,2},{27,30,2},{27,30,2},{27,28,2},{26,29,2},{26,29,2},{29,31,86},{28,31,40},{28,30,61},{28,30,36},{28,31,72},{28,30,37},{28,30,1},{28,30,52},{26,31,68},{27,30,38},{29,31,22},{29,30,13},{29,30,4},{29,30,13},{31,26,52},{29,30,34},{28,30,1},{26,30,37},{30,29,52}, +{26,30,37},{28,31,40},{28,31,40},{28,31,40},{28,30,36},{28,31,8},{28,30,1},{28,30,1},{28,29,5},{27,30,11},{28,29,9},{29,30,4},{29,30,4},{29,30,4},{29,29,8},{29,29,8},{28,30,1},{28,30,1},{28,29,5},{31,28,8},{28,29,5},{31,27,18},{28,31,4},{29,30,0},{28,30,0},{31,27,18},{31,29,18},{28,30,0},{0,30,36},{31,29,18},{0,30,36},{28,0,36}, +{28,0,36},{28,0,36},{28,0,36},{28,30,1},{28,30,1},{28,30,1},{28,29,1},{27,30,2},{27,30,2},{30,31,104},{30,31,77},{29,31,61},{29,31,36},{30,31,116},{29,31,37},{29,31,1},{29,31,52},{28,31,72},{28,31,40},{30,31,40},{30,31,13},{30,31,4},{30,31,13},{31,29,52},{30,31,34},{29,31,1},{27,31,37},{31,30,52},{27,31,37},{29,31,61},{29,31,61},{29,31,61}, +{29,31,36},{29,31,26},{29,31,1},{29,31,1},{29,30,5},{28,31,8},{29,30,9},{30,31,4},{30,31,4},{30,31,4},{30,30,8},{31,28,8},{29,31,1},{29,31,1},{29,30,5},{30,30,8},{29,30,5},{31,30,18},{30,31,9},{30,31,0},{29,31,0},{31,30,18},{30,31,18},{29,31,0},{0,31,36},{30,31,18},{0,31,36},{29,0,36},{29,0,36},{29,0,36},{29,0,36},{29,31,1}, +{29,31,1},{29,31,1},{29,30,1},{28,31,4},{28,31,4},{31,31,68},{31,31,68},{31,31,68},{30,31,45},{31,31,68},{30,31,34},{30,31,25},{30,31,1},{30,31,23},{30,31,5},{31,31,4},{31,31,4},{31,31,4},{31,31,4},{31,31,4},{31,31,4},{31,31,4},{30,31,1},{31,31,4},{30,31,1},{31,31,68},{31,31,68},{31,31,68},{30,31,45},{30,31,52},{30,31,25},{30,31,25}, +{30,31,1},{30,31,14},{30,31,5},{31,31,4},{31,31,4},{31,31,4},{31,31,4},{31,31,4},{31,31,4},{31,31,4},{30,31,1},{31,31,4},{30,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{30,0,36},{30,0,36},{30,0,36},{30,0,36},{30,31,16},{30,31,16},{30,31,16},{30,31,1},{30,31,5}, +{30,31,5},{0,4,72},{0,3,10},{0,2,1},{0,2,26},{0,3,154},{0,2,99},{0,2,51},{0,1,115},{0,1,173},{0,1,119},{0,4,72},{0,3,10},{0,2,1},{0,2,26},{1,1,154},{0,2,99},{0,2,51},{0,1,115},{3,0,154},{0,1,115},{0,2,0},{0,2,0},{0,2,0},{0,1,0},{0,1,13},{0,1,4},{0,1,4},{0,0,25},{0,0,25},{0,0,25},{0,2,0}, +{0,2,0},{0,2,0},{0,1,0},{0,1,13},{0,1,4},{0,1,4},{0,0,25},{1,0,13},{0,0,25},{0,4,72},{0,3,10},{0,2,1},{0,2,26},{0,4,72},{4,0,72},{0,2,26},{0,1,90},{4,0,72},{0,1,90},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,7,81},{0,5,13},{0,3,35}, +{0,3,19},{0,5,244},{0,3,115},{0,3,35},{0,2,139},{0,2,308},{0,2,164},{0,7,81},{0,5,13},{1,3,26},{0,3,19},{2,1,244},{0,3,115},{0,3,35},{0,2,139},{5,0,244},{0,2,139},{0,5,9},{0,5,9},{0,5,9},{0,2,18},{0,3,50},{0,2,13},{0,2,13},{0,1,29},{0,1,72},{0,1,33},{0,5,9},{0,5,9},{0,5,9},{0,2,18},{1,1,50}, +{0,2,13},{0,2,13},{0,1,29},{3,0,50},{0,1,29},{1,5,72},{0,5,4},{1,3,1},{0,3,10},{1,5,72},{7,0,72},{0,3,10},{0,2,90},{7,0,72},{0,2,90},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,1,1},{0,1,1},{0,1,1},{0,1,4},{0,1,8},{0,1,8},{1,8,145},{1,6,77},{1,4,98},{1,4,82},{0,8,243},{0,5,76},{0,4,1}, +{0,3,115},{0,4,364},{0,3,179},{1,8,81},{1,6,13},{2,4,25},{1,4,18},{4,0,243},{0,5,76},{0,4,1},{0,3,115},{8,0,243},{0,3,115},{1,6,73},{1,6,73},{1,6,73},{1,3,82},{0,6,50},{0,4,1},{0,4,1},{0,2,26},{0,3,114},{0,2,51},{1,6,9},{1,6,9},{1,6,9},{1,3,18},{3,0,50},{0,4,1},{0,4,1},{0,2,26},{6,0,50}, +{0,2,26},{5,0,72},{1,6,4},{2,4,0},{0,4,1},{5,0,72},{10,0,72},{0,4,1},{0,3,90},{10,0,72},{0,3,90},{1,0,73},{1,0,73},{1,0,73},{1,0,73},{0,4,0},{0,4,0},{0,4,0},{0,2,1},{0,2,26},{0,2,26},{2,9,162},{2,7,94},{2,5,115},{2,5,99},{1,9,244},{1,6,77},{1,5,2},{1,4,106},{0,5,300},{0,4,105},{2,9,81}, +{2,7,13},{3,5,25},{2,5,18},{5,1,243},{1,6,76},{1,5,1},{0,4,89},{11,0,243},{0,4,89},{2,7,90},{2,7,90},{2,7,90},{2,4,94},{1,7,51},{1,5,2},{1,5,2},{1,3,27},{0,4,74},{0,3,35},{2,7,9},{2,7,9},{2,7,9},{2,4,13},{4,1,50},{1,5,1},{1,5,1},{0,4,25},{9,0,50},{0,4,25},{6,1,72},{2,7,4},{3,5,0}, +{1,5,1},{6,1,72},{13,0,72},{1,5,1},{0,4,80},{13,0,72},{0,4,80},{2,0,90},{2,0,90},{2,0,90},{2,0,90},{1,5,1},{1,5,1},{1,5,1},{1,3,2},{0,3,10},{0,3,10},{3,10,154},{3,8,84},{3,6,117},{3,6,85},{2,10,244},{2,7,75},{2,6,2},{2,5,106},{0,7,253},{0,5,100},{3,10,90},{3,8,20},{4,6,26},{3,6,21},{7,0,244}, +{2,7,75},{2,6,2},{0,5,91},{14,0,244},{0,5,91},{3,8,80},{3,8,80},{3,8,80},{3,6,84},{2,8,52},{2,6,1},{2,6,1},{2,5,25},{0,6,52},{0,5,19},{3,8,16},{3,8,16},{3,8,16},{3,6,20},{4,4,50},{2,6,1},{2,6,1},{0,5,10},{12,0,50},{0,5,10},{8,0,72},{3,8,4},{4,6,1},{2,6,1},{8,0,72},{16,0,72},{2,6,1}, +{0,5,90},{16,0,72},{0,5,90},{3,0,80},{3,0,80},{3,0,80},{3,0,80},{2,6,1},{2,6,1},{2,6,1},{2,4,1},{0,5,10},{0,5,10},{4,11,162},{4,9,94},{4,7,116},{4,7,100},{3,11,244},{3,8,76},{3,7,2},{3,6,106},{1,8,253},{1,6,100},{4,11,81},{4,9,13},{5,7,26},{4,7,19},{8,1,244},{3,8,76},{3,7,2},{1,6,91},{17,0,244}, +{1,6,91},{4,9,90},{4,9,90},{4,9,90},{4,6,99},{3,9,52},{3,7,1},{3,7,1},{3,6,25},{1,7,52},{1,6,19},{4,9,9},{4,9,9},{4,9,9},{4,6,18},{5,5,50},{3,7,1},{3,7,1},{1,6,10},{15,0,50},{1,6,10},{9,1,72},{4,9,4},{5,7,1},{3,7,1},{9,1,72},{19,0,72},{3,7,1},{0,6,90},{19,0,72},{0,6,90},{4,0,90}, +{4,0,90},{4,0,90},{4,0,90},{3,7,1},{3,7,1},{3,7,1},{3,5,1},{1,6,10},{1,6,10},{5,12,162},{5,10,94},{5,8,115},{5,8,99},{4,12,244},{4,9,77},{4,8,2},{4,7,116},{2,9,253},{2,7,100},{5,12,81},{5,10,13},{6,8,25},{5,8,18},{8,4,243},{4,9,76},{4,8,1},{2,7,91},{20,0,243},{2,7,91},{5,10,90},{5,10,90},{5,10,90}, +{5,7,99},{4,10,51},{4,8,2},{4,8,2},{4,6,27},{2,8,50},{2,7,19},{5,10,9},{5,10,9},{5,10,9},{5,7,18},{9,0,50},{4,8,1},{4,8,1},{2,7,10},{18,0,50},{2,7,10},{11,0,72},{5,10,4},{6,8,0},{4,8,1},{11,0,72},{22,0,72},{4,8,1},{0,7,90},{22,0,72},{0,7,90},{5,0,90},{5,0,90},{5,0,90},{5,0,90},{4,8,1}, +{4,8,1},{4,8,1},{4,6,2},{2,7,10},{2,7,10},{6,13,162},{6,11,94},{6,9,115},{6,9,99},{5,13,244},{5,10,77},{5,9,2},{5,8,106},{3,10,253},{3,8,84},{6,13,81},{6,11,13},{7,9,25},{6,9,18},{9,5,243},{5,10,76},{5,9,1},{3,8,80},{23,0,243},{3,8,80},{6,11,90},{6,11,90},{6,11,90},{6,8,94},{5,11,51},{5,9,2},{5,9,2}, +{5,7,27},{3,9,50},{3,8,20},{6,11,9},{6,11,9},{6,11,9},{6,8,13},{10,1,50},{5,9,1},{5,9,1},{3,8,16},{21,0,50},{3,8,16},{12,1,72},{6,11,4},{7,9,0},{5,9,1},{12,1,72},{25,0,72},{5,9,1},{0,8,80},{25,0,72},{0,8,80},{6,0,90},{6,0,90},{6,0,90},{6,0,90},{5,9,1},{5,9,1},{5,9,1},{5,7,2},{3,8,4}, +{3,8,4},{7,14,154},{7,12,84},{7,10,117},{7,10,85},{6,14,244},{6,11,75},{6,10,2},{6,9,106},{4,11,253},{4,9,100},{7,14,90},{7,12,20},{8,10,26},{7,10,21},{13,0,244},{6,11,75},{6,10,2},{4,9,91},{26,0,244},{4,9,91},{7,12,80},{7,12,80},{7,12,80},{7,10,84},{6,12,52},{6,10,1},{6,10,1},{6,9,25},{4,10,52},{4,9,19},{7,12,16}, +{7,12,16},{7,12,16},{7,10,20},{12,0,50},{6,10,1},{6,10,1},{4,9,10},{24,0,50},{4,9,10},{12,4,72},{7,12,4},{8,10,1},{6,10,1},{12,4,72},{28,0,72},{6,10,1},{0,9,90},{28,0,72},{0,9,90},{7,0,80},{7,0,80},{7,0,80},{7,0,80},{6,10,1},{6,10,1},{6,10,1},{6,8,1},{4,9,10},{4,9,10},{8,15,162},{8,13,94},{8,11,116}, +{8,11,100},{7,15,244},{7,12,76},{7,11,2},{7,10,106},{5,12,253},{5,10,100},{8,15,81},{8,13,13},{9,11,26},{8,11,19},{14,1,244},{7,12,76},{7,11,2},{5,10,91},{29,0,244},{5,10,91},{8,13,90},{8,13,90},{8,13,90},{8,10,99},{7,13,52},{7,11,1},{7,11,1},{7,10,25},{5,11,52},{5,10,19},{8,13,9},{8,13,9},{8,13,9},{8,10,18},{13,1,50}, +{7,11,1},{7,11,1},{5,10,10},{27,0,50},{5,10,10},{13,5,72},{8,13,4},{9,11,1},{7,11,1},{13,5,72},{31,0,72},{7,11,1},{0,10,90},{31,0,72},{0,10,90},{8,0,90},{8,0,90},{8,0,90},{8,0,90},{7,11,1},{7,11,1},{7,11,1},{7,9,1},{5,10,10},{5,10,10},{9,16,162},{9,14,94},{9,12,115},{9,12,99},{8,16,244},{8,13,77},{8,12,2}, +{8,11,116},{6,13,253},{6,11,100},{9,16,81},{9,14,13},{10,12,25},{9,12,18},{16,0,243},{8,13,76},{8,12,1},{6,11,91},{24,4,243},{6,11,91},{9,14,90},{9,14,90},{9,14,90},{9,11,99},{8,14,51},{8,12,2},{8,12,2},{8,10,27},{6,12,50},{6,11,19},{9,14,9},{9,14,9},{9,14,9},{9,11,18},{15,0,50},{8,12,1},{8,12,1},{6,11,10},{30,0,50}, +{6,11,10},{17,0,72},{9,14,4},{10,12,0},{8,12,1},{17,0,72},{30,2,72},{8,12,1},{0,11,90},{30,2,72},{0,11,90},{9,0,90},{9,0,90},{9,0,90},{9,0,90},{8,12,1},{8,12,1},{8,12,1},{8,10,2},{6,11,10},{6,11,10},{10,17,162},{10,15,94},{10,13,115},{10,13,99},{9,17,244},{9,14,77},{9,13,2},{9,12,106},{7,14,253},{7,12,84},{10,17,81}, +{10,15,13},{11,13,25},{10,13,18},{17,1,243},{9,14,76},{9,13,1},{7,12,80},{27,4,243},{7,12,80},{10,15,90},{10,15,90},{10,15,90},{10,12,94},{9,15,51},{9,13,2},{9,13,2},{9,11,27},{7,13,50},{7,12,20},{10,15,9},{10,15,9},{10,15,9},{10,12,13},{16,1,50},{9,13,1},{9,13,1},{7,12,16},{31,1,50},{7,12,16},{18,1,72},{10,15,4},{11,13,0}, +{9,13,1},{18,1,72},{31,3,72},{9,13,1},{0,12,80},{31,3,72},{0,12,80},{10,0,90},{10,0,90},{10,0,90},{10,0,90},{9,13,1},{9,13,1},{9,13,1},{9,11,2},{7,12,4},{7,12,4},{11,18,154},{11,16,84},{11,14,117},{11,14,85},{10,18,244},{10,15,75},{10,14,2},{10,13,106},{8,15,253},{8,13,100},{11,18,90},{11,16,20},{12,14,26},{11,14,21},{19,0,244}, +{10,15,75},{10,14,2},{8,13,91},{30,4,244},{8,13,91},{11,16,80},{11,16,80},{11,16,80},{11,14,84},{10,16,52},{10,14,1},{10,14,1},{10,13,25},{8,14,52},{8,13,19},{11,16,16},{11,16,16},{11,16,16},{11,14,20},{16,4,50},{10,14,1},{10,14,1},{8,13,10},{28,4,50},{8,13,10},{20,0,72},{11,16,4},{12,14,1},{10,14,1},{20,0,72},{24,8,72},{10,14,1}, +{0,13,90},{24,8,72},{0,13,90},{11,0,80},{11,0,80},{11,0,80},{11,0,80},{10,14,1},{10,14,1},{10,14,1},{10,12,1},{8,13,10},{8,13,10},{12,19,162},{12,17,94},{12,15,116},{12,15,100},{11,19,244},{11,16,76},{11,15,2},{11,14,106},{9,16,253},{9,14,100},{12,19,81},{12,17,13},{13,15,26},{12,15,19},{20,1,244},{11,16,76},{11,15,2},{9,14,91},{31,5,244}, +{9,14,91},{12,17,90},{12,17,90},{12,17,90},{12,14,99},{11,17,52},{11,15,1},{11,15,1},{11,14,25},{9,15,52},{9,14,19},{12,17,9},{12,17,9},{12,17,9},{12,14,18},{17,5,50},{11,15,1},{11,15,1},{9,14,10},{31,4,50},{9,14,10},{21,1,72},{12,17,4},{13,15,1},{11,15,1},{21,1,72},{27,8,72},{11,15,1},{0,14,90},{27,8,72},{0,14,90},{12,0,90}, +{12,0,90},{12,0,90},{12,0,90},{11,15,1},{11,15,1},{11,15,1},{11,13,1},{9,14,10},{9,14,10},{13,20,162},{13,18,94},{13,16,115},{13,16,99},{12,20,244},{12,17,77},{12,16,2},{12,15,116},{10,17,253},{10,15,100},{13,20,81},{13,18,13},{14,16,25},{13,16,18},{20,4,243},{12,17,76},{12,16,1},{10,15,91},{28,8,243},{10,15,91},{13,18,90},{13,18,90},{13,18,90}, +{13,15,99},{12,18,51},{12,16,2},{12,16,2},{12,14,27},{10,16,50},{10,15,19},{13,18,9},{13,18,9},{13,18,9},{13,15,18},{21,0,50},{12,16,1},{12,16,1},{10,15,10},{30,6,50},{10,15,10},{23,0,72},{13,18,4},{14,16,0},{12,16,1},{23,0,72},{30,8,72},{12,16,1},{0,15,90},{30,8,72},{0,15,90},{13,0,90},{13,0,90},{13,0,90},{13,0,90},{12,16,1}, +{12,16,1},{12,16,1},{12,14,2},{10,15,10},{10,15,10},{14,21,162},{14,19,94},{14,17,115},{14,17,99},{13,21,244},{13,18,77},{13,17,2},{13,16,106},{11,18,253},{11,16,84},{14,21,81},{14,19,13},{15,17,25},{14,17,18},{21,5,243},{13,18,76},{13,17,1},{11,16,80},{31,8,243},{11,16,80},{14,19,90},{14,19,90},{14,19,90},{14,16,94},{13,19,51},{13,17,2},{13,17,2}, +{13,15,27},{11,17,50},{11,16,20},{14,19,9},{14,19,9},{14,19,9},{14,16,13},{22,1,50},{13,17,1},{13,17,1},{11,16,16},{31,7,50},{11,16,16},{24,1,72},{14,19,4},{15,17,0},{13,17,1},{24,1,72},{31,9,72},{13,17,1},{0,16,80},{31,9,72},{0,16,80},{14,0,90},{14,0,90},{14,0,90},{14,0,90},{13,17,1},{13,17,1},{13,17,1},{13,15,2},{11,16,4}, +{11,16,4},{15,22,154},{15,20,84},{15,18,117},{15,18,85},{14,22,244},{14,19,75},{14,18,2},{14,17,106},{12,19,253},{12,17,100},{15,22,90},{15,20,20},{16,18,26},{15,18,21},{25,0,244},{14,19,75},{14,18,2},{12,17,91},{30,10,244},{12,17,91},{15,20,80},{15,20,80},{15,20,80},{15,18,84},{14,20,52},{14,18,1},{14,18,1},{14,17,25},{12,18,52},{12,17,19},{15,20,16}, +{15,20,16},{15,20,16},{15,18,20},{24,0,50},{14,18,1},{14,18,1},{12,17,10},{24,12,50},{12,17,10},{24,4,72},{15,20,4},{16,18,1},{14,18,1},{24,4,72},{28,12,72},{14,18,1},{0,17,90},{28,12,72},{0,17,90},{15,0,80},{15,0,80},{15,0,80},{15,0,80},{14,18,1},{14,18,1},{14,18,1},{14,16,1},{12,17,10},{12,17,10},{16,23,162},{16,21,94},{16,19,116}, +{16,19,100},{15,23,244},{15,20,76},{15,19,2},{15,18,106},{13,20,253},{13,18,100},{16,23,81},{16,21,13},{17,19,26},{16,19,19},{26,1,244},{15,20,76},{15,19,2},{13,18,91},{31,11,244},{13,18,91},{16,21,90},{16,21,90},{16,21,90},{16,18,99},{15,21,52},{15,19,1},{15,19,1},{15,18,25},{13,19,52},{13,18,19},{16,21,9},{16,21,9},{16,21,9},{16,18,18},{25,1,50}, +{15,19,1},{15,19,1},{13,18,10},{27,12,50},{13,18,10},{25,5,72},{16,21,4},{17,19,1},{15,19,1},{25,5,72},{31,12,72},{15,19,1},{0,18,90},{31,12,72},{0,18,90},{16,0,90},{16,0,90},{16,0,90},{16,0,90},{15,19,1},{15,19,1},{15,19,1},{15,17,1},{13,18,10},{13,18,10},{17,24,162},{17,22,94},{17,20,115},{17,20,99},{16,24,244},{16,21,77},{16,20,2}, +{16,19,116},{14,21,253},{14,19,100},{17,24,81},{17,22,13},{18,20,25},{17,20,18},{28,0,243},{16,21,76},{16,20,1},{14,19,91},{24,16,243},{14,19,91},{17,22,90},{17,22,90},{17,22,90},{17,19,99},{16,22,51},{16,20,2},{16,20,2},{16,18,27},{14,20,50},{14,19,19},{17,22,9},{17,22,9},{17,22,9},{17,19,18},{27,0,50},{16,20,1},{16,20,1},{14,19,10},{30,12,50}, +{14,19,10},{29,0,72},{17,22,4},{18,20,0},{16,20,1},{29,0,72},{30,14,72},{16,20,1},{0,19,90},{30,14,72},{0,19,90},{17,0,90},{17,0,90},{17,0,90},{17,0,90},{16,20,1},{16,20,1},{16,20,1},{16,18,2},{14,19,10},{14,19,10},{18,25,162},{18,23,94},{18,21,115},{18,21,99},{17,25,244},{17,22,77},{17,21,2},{17,20,106},{15,22,253},{15,20,84},{18,25,81}, +{18,23,13},{19,21,25},{18,21,18},{29,1,243},{17,22,76},{17,21,1},{15,20,80},{27,16,243},{15,20,80},{18,23,90},{18,23,90},{18,23,90},{18,20,94},{17,23,51},{17,21,2},{17,21,2},{17,19,27},{15,21,50},{15,20,20},{18,23,9},{18,23,9},{18,23,9},{18,20,13},{28,1,50},{17,21,1},{17,21,1},{15,20,16},{31,13,50},{15,20,16},{30,1,72},{18,23,4},{19,21,0}, +{17,21,1},{30,1,72},{31,15,72},{17,21,1},{0,20,80},{31,15,72},{0,20,80},{18,0,90},{18,0,90},{18,0,90},{18,0,90},{17,21,1},{17,21,1},{17,21,1},{17,19,2},{15,20,4},{15,20,4},{19,26,154},{19,24,84},{19,22,117},{19,22,85},{18,26,244},{18,23,75},{18,22,2},{18,21,106},{16,23,253},{16,21,100},{19,26,90},{19,24,20},{20,22,26},{19,22,21},{31,0,244}, +{18,23,75},{18,22,2},{16,21,91},{30,16,244},{16,21,91},{19,24,80},{19,24,80},{19,24,80},{19,22,84},{18,24,52},{18,22,1},{18,22,1},{18,21,25},{16,22,52},{16,21,19},{19,24,16},{19,24,16},{19,24,16},{19,22,20},{28,4,50},{18,22,1},{18,22,1},{16,21,10},{28,16,50},{16,21,10},{28,8,72},{19,24,4},{20,22,1},{18,22,1},{28,8,72},{24,20,72},{18,22,1}, +{0,21,90},{24,20,72},{0,21,90},{19,0,80},{19,0,80},{19,0,80},{19,0,80},{18,22,1},{18,22,1},{18,22,1},{18,20,1},{16,21,10},{16,21,10},{20,27,162},{20,25,94},{20,23,116},{20,23,100},{19,27,244},{19,24,76},{19,23,2},{19,22,106},{17,24,253},{17,22,100},{20,27,81},{20,25,13},{21,23,26},{20,23,19},{31,3,244},{19,24,76},{19,23,2},{17,22,91},{31,17,244}, +{17,22,91},{20,25,90},{20,25,90},{20,25,90},{20,22,99},{19,25,52},{19,23,1},{19,23,1},{19,22,25},{17,23,52},{17,22,19},{20,25,9},{20,25,9},{20,25,9},{20,22,18},{29,5,50},{19,23,1},{19,23,1},{17,22,10},{31,16,50},{17,22,10},{29,9,72},{20,25,4},{21,23,1},{19,23,1},{29,9,72},{27,20,72},{19,23,1},{0,22,90},{27,20,72},{0,22,90},{20,0,90}, +{20,0,90},{20,0,90},{20,0,90},{19,23,1},{19,23,1},{19,23,1},{19,21,1},{17,22,10},{17,22,10},{21,28,162},{21,26,94},{21,24,115},{21,24,99},{20,28,244},{20,25,77},{20,24,2},{20,23,116},{18,25,253},{18,23,100},{21,28,81},{21,26,13},{22,24,25},{21,24,18},{28,12,243},{20,25,76},{20,24,1},{18,23,91},{28,20,243},{18,23,91},{21,26,90},{21,26,90},{21,26,90}, +{21,23,99},{20,26,51},{20,24,2},{20,24,2},{20,22,27},{18,24,50},{18,23,19},{21,26,9},{21,26,9},{21,26,9},{21,23,18},{31,4,50},{20,24,1},{20,24,1},{18,23,10},{30,18,50},{18,23,10},{31,8,72},{21,26,4},{22,24,0},{20,24,1},{31,8,72},{30,20,72},{20,24,1},{0,23,90},{30,20,72},{0,23,90},{21,0,90},{21,0,90},{21,0,90},{21,0,90},{20,24,1}, +{20,24,1},{20,24,1},{20,22,2},{18,23,10},{18,23,10},{22,29,162},{22,27,94},{22,25,115},{22,25,99},{21,29,244},{21,26,77},{21,25,2},{21,24,106},{19,26,253},{19,24,84},{22,29,81},{22,27,13},{23,25,25},{22,25,18},{29,13,243},{21,26,76},{21,25,1},{19,24,80},{31,20,243},{19,24,80},{22,27,90},{22,27,90},{22,27,90},{22,24,94},{21,27,51},{21,25,2},{21,25,2}, +{21,23,27},{19,25,50},{19,24,20},{22,27,9},{22,27,9},{22,27,9},{22,24,13},{31,7,50},{21,25,1},{21,25,1},{19,24,16},{31,19,50},{19,24,16},{31,11,72},{22,27,4},{23,25,0},{21,25,1},{31,11,72},{31,21,72},{21,25,1},{0,24,80},{31,21,72},{0,24,80},{22,0,90},{22,0,90},{22,0,90},{22,0,90},{21,25,1},{21,25,1},{21,25,1},{21,23,2},{19,24,4}, +{19,24,4},{23,30,154},{23,28,84},{23,26,117},{23,26,85},{22,30,244},{22,27,75},{22,26,2},{22,25,106},{20,27,253},{20,25,100},{23,30,90},{23,28,20},{24,26,26},{23,26,21},{31,12,244},{22,27,75},{22,26,2},{20,25,91},{30,22,244},{20,25,91},{23,28,80},{23,28,80},{23,28,80},{23,26,84},{22,28,52},{22,26,1},{22,26,1},{22,25,25},{20,26,52},{20,25,19},{23,28,16}, +{23,28,16},{23,28,16},{23,26,20},{28,16,50},{22,26,1},{22,26,1},{20,25,10},{24,24,50},{20,25,10},{28,20,72},{23,28,4},{24,26,1},{22,26,1},{28,20,72},{28,24,72},{22,26,1},{0,25,90},{28,24,72},{0,25,90},{23,0,80},{23,0,80},{23,0,80},{23,0,80},{22,26,1},{22,26,1},{22,26,1},{22,24,1},{20,25,10},{20,25,10},{24,31,162},{24,29,94},{24,27,116}, +{24,27,100},{23,31,244},{23,28,76},{23,27,2},{23,26,106},{21,28,253},{21,26,100},{24,31,81},{24,29,13},{25,27,26},{24,27,19},{31,15,244},{23,28,76},{23,27,2},{21,26,91},{31,23,244},{21,26,91},{24,29,90},{24,29,90},{24,29,90},{24,26,99},{23,29,52},{23,27,1},{23,27,1},{23,26,25},{21,27,52},{21,26,19},{24,29,9},{24,29,9},{24,29,9},{24,26,18},{29,17,50}, +{23,27,1},{23,27,1},{21,26,10},{27,24,50},{21,26,10},{29,21,72},{24,29,4},{25,27,1},{23,27,1},{29,21,72},{31,24,72},{23,27,1},{0,26,90},{31,24,72},{0,26,90},{24,0,90},{24,0,90},{24,0,90},{24,0,90},{23,27,1},{23,27,1},{23,27,1},{23,25,1},{21,26,10},{21,26,10},{25,31,180},{25,30,94},{25,28,115},{25,28,99},{24,31,265},{24,29,77},{24,28,2}, +{24,27,116},{22,29,253},{22,27,100},{26,30,97},{25,30,13},{26,28,25},{25,28,18},{28,24,243},{24,29,76},{24,28,1},{22,27,91},{24,28,243},{22,27,91},{25,30,90},{25,30,90},{25,30,90},{25,27,99},{24,30,51},{24,28,2},{24,28,2},{24,26,27},{22,28,50},{22,27,19},{25,30,9},{25,30,9},{25,30,9},{25,27,18},{31,16,50},{24,28,1},{24,28,1},{22,27,10},{30,24,50}, +{22,27,10},{31,20,72},{25,30,4},{26,28,0},{24,28,1},{31,20,72},{30,26,72},{24,28,1},{0,27,90},{30,26,72},{0,27,90},{25,0,90},{25,0,90},{25,0,90},{25,0,90},{24,28,1},{24,28,1},{24,28,1},{24,26,2},{22,27,10},{22,27,10},{26,31,234},{26,31,94},{26,29,115},{26,29,99},{26,31,325},{25,30,77},{25,29,2},{25,28,106},{23,30,253},{23,28,84},{27,31,97}, +{26,31,13},{27,29,25},{26,29,18},{29,25,243},{25,30,76},{25,29,1},{23,28,80},{27,28,243},{23,28,80},{26,31,90},{26,31,90},{26,31,90},{26,28,94},{25,31,51},{25,29,2},{25,29,2},{25,27,27},{23,29,50},{23,28,20},{26,31,9},{26,31,9},{26,31,9},{26,28,13},{31,19,50},{25,29,1},{25,29,1},{23,28,16},{31,25,50},{23,28,16},{31,23,72},{26,31,4},{27,29,0}, +{25,29,1},{31,23,72},{31,27,72},{25,29,1},{0,28,80},{31,27,72},{0,28,80},{26,0,90},{26,0,90},{26,0,90},{26,0,90},{25,29,1},{25,29,1},{25,29,1},{25,27,2},{23,28,4},{23,28,4},{27,31,314},{27,31,105},{27,30,117},{27,30,85},{27,31,347},{26,31,75},{26,30,2},{26,29,106},{24,31,253},{24,29,100},{28,31,115},{28,31,35},{28,30,26},{27,30,21},{31,24,244}, +{26,31,75},{26,30,2},{24,29,91},{30,28,244},{24,29,91},{27,31,89},{27,31,89},{27,31,89},{27,30,84},{26,31,82},{26,30,1},{26,30,1},{26,29,25},{24,30,52},{24,29,19},{28,30,25},{28,30,25},{28,30,25},{27,30,20},{28,28,50},{26,30,1},{26,30,1},{24,29,10},{28,28,50},{24,29,10},{31,26,74},{28,31,10},{28,30,1},{26,30,1},{31,26,74},{30,29,74},{26,30,1}, +{0,29,90},{30,29,74},{0,29,90},{27,0,80},{27,0,80},{27,0,80},{27,0,80},{26,30,1},{26,30,1},{26,30,1},{26,28,1},{24,29,10},{24,29,10},{28,31,371},{28,31,179},{28,31,115},{28,31,99},{28,31,387},{27,31,122},{27,31,1},{27,30,89},{26,31,279},{25,30,83},{29,31,146},{29,31,61},{29,31,25},{28,31,18},{31,27,221},{28,31,98},{27,31,1},{25,30,74},{31,29,221}, +{25,30,74},{28,31,115},{28,31,115},{28,31,115},{28,30,99},{27,31,122},{27,31,1},{27,31,1},{27,30,25},{25,31,52},{25,30,19},{29,31,25},{29,31,25},{29,31,25},{28,30,18},{29,29,50},{27,31,1},{27,31,1},{25,30,10},{31,28,50},{25,30,10},{31,29,61},{30,31,34},{29,31,0},{27,31,0},{31,29,61},{31,30,61},{27,31,0},{0,30,73},{31,30,61},{0,30,73},{28,0,90}, +{28,0,90},{28,0,90},{28,0,90},{27,31,1},{27,31,1},{27,31,1},{27,29,1},{25,30,10},{25,30,10},{29,31,275},{29,31,190},{29,31,154},{29,31,99},{29,31,270},{28,31,99},{28,31,35},{28,31,35},{28,31,195},{26,31,19},{30,31,70},{30,31,43},{30,31,34},{29,31,18},{31,29,94},{29,31,49},{29,31,13},{26,31,10},{31,30,94},{26,31,10},{29,31,154},{29,31,154},{29,31,154}, +{29,31,99},{29,31,149},{28,31,35},{28,31,35},{28,30,27},{27,31,77},{26,31,19},{30,31,34},{30,31,34},{30,31,34},{29,31,18},{31,28,50},{29,31,13},{29,31,13},{26,31,10},{30,30,50},{26,31,10},{31,30,9},{31,31,9},{30,31,9},{30,31,0},{31,30,9},{30,31,9},{30,31,0},{0,31,9},{30,31,9},{0,31,9},{29,0,90},{29,0,90},{29,0,90},{29,0,90},{28,31,10}, +{28,31,10},{28,31,10},{28,30,2},{26,31,10},{26,31,10},{30,31,162},{30,31,135},{30,31,126},{30,31,99},{30,31,154},{30,31,100},{29,31,73},{29,31,2},{29,31,109},{28,31,10},{31,31,25},{31,31,25},{31,31,25},{30,31,18},{31,30,22},{30,31,19},{30,31,10},{29,31,1},{30,31,22},{29,31,1},{30,31,126},{30,31,126},{30,31,126},{30,31,99},{30,31,118},{29,31,73},{29,31,73}, +{29,31,2},{29,31,73},{28,31,10},{31,31,25},{31,31,25},{31,31,25},{30,31,18},{31,30,13},{30,31,10},{30,31,10},{29,31,1},{30,31,13},{29,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{30,0,90},{30,0,90},{30,0,90},{30,0,90},{29,31,37},{29,31,37},{29,31,37},{29,31,2},{28,31,10}, +{28,31,10},{0,7,200},{0,5,20},{0,4,20},{0,3,74},{0,5,441},{0,3,282},{0,3,138},{0,2,318},{0,2,487},{0,2,343},{0,7,200},{0,5,20},{0,4,20},{0,3,74},{2,1,441},{0,3,282},{0,3,138},{0,2,318},{5,0,441},{0,2,318},{0,3,1},{0,3,1},{0,3,1},{0,2,1},{0,2,41},{0,1,20},{0,1,20},{0,1,26},{0,1,45},{0,1,30},{0,3,1}, +{0,3,1},{0,3,1},{0,2,1},{1,0,41},{0,1,20},{0,1,20},{0,1,26},{2,0,41},{0,1,26},{1,5,200},{0,5,20},{0,4,20},{0,3,74},{1,5,200},{7,0,200},{0,3,74},{0,2,218},{7,0,200},{0,2,218},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,10,200},{0,7,5},{0,5,32}, +{0,4,41},{0,7,689},{0,5,369},{0,4,162},{0,3,474},{0,3,762},{0,3,538},{0,10,200},{0,7,5},{0,5,32},{0,4,41},{3,1,686},{0,5,369},{0,4,162},{0,3,474},{3,2,686},{0,3,474},{0,6,1},{0,6,1},{0,6,1},{0,3,1},{0,3,145},{0,3,65},{0,3,65},{0,2,101},{0,1,173},{0,1,110},{0,6,1},{0,6,1},{0,6,1},{0,3,1},{1,1,145}, +{0,3,65},{0,3,65},{0,2,101},{3,0,145},{0,2,101},{5,0,200},{0,7,5},{1,4,17},{0,4,41},{5,0,200},{10,0,200},{0,4,41},{0,3,218},{10,0,200},{0,3,218},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,13,249},{0,8,59},{1,6,97},{0,5,69},{0,10,728},{0,6,299},{0,5,56}, +{0,4,433},{0,4,884},{0,4,554},{1,11,201},{1,8,6},{1,6,33},{1,5,42},{5,0,728},{0,6,299},{0,5,56},{0,4,433},{10,0,728},{0,4,433},{0,9,50},{0,9,50},{0,9,50},{0,5,53},{0,6,162},{0,4,25},{0,4,25},{0,3,82},{0,3,226},{0,2,115},{1,7,2},{1,7,2},{1,7,2},{1,4,2},{3,0,162},{0,4,25},{0,4,25},{0,3,82},{6,0,162}, +{0,3,82},{6,1,200},{1,8,5},{2,5,17},{0,5,20},{6,1,200},{13,0,200},{0,5,20},{0,4,208},{13,0,200},{0,4,208},{0,0,49},{0,0,49},{0,0,49},{0,0,49},{0,3,1},{0,3,1},{0,3,1},{0,1,4},{0,1,8},{0,1,8},{1,14,313},{1,9,123},{1,7,210},{1,6,133},{0,12,724},{0,8,251},{0,6,18},{0,5,352},{0,6,987},{0,5,521},{2,12,201}, +{2,9,6},{2,7,33},{2,6,42},{4,4,724},{0,8,251},{0,6,18},{0,5,352},{12,0,724},{0,5,352},{1,10,114},{1,10,114},{1,10,114},{1,6,117},{0,9,162},{0,6,2},{0,6,2},{0,4,49},{0,4,291},{0,3,146},{2,8,2},{2,8,2},{2,8,2},{2,5,2},{4,1,162},{0,6,2},{0,6,2},{0,4,49},{9,0,162},{0,4,49},{7,2,200},{2,9,5},{3,6,17}, +{0,6,17},{7,2,200},{14,1,200},{0,6,17},{0,5,208},{14,1,200},{0,5,208},{1,0,113},{1,0,113},{1,0,113},{1,0,113},{0,6,1},{0,6,1},{0,6,1},{0,3,1},{0,2,50},{0,2,50},{2,15,410},{2,11,221},{2,8,324},{2,7,234},{1,14,739},{0,10,233},{0,8,40},{0,6,298},{0,7,1013},{0,6,426},{3,13,202},{3,10,1},{3,8,32},{3,7,41},{7,2,723}, +{0,10,217},{0,8,24},{0,6,282},{14,1,723},{0,6,282},{2,12,209},{2,12,209},{2,12,209},{2,7,209},{0,12,178},{1,7,20},{1,7,20},{0,5,26},{0,6,308},{0,5,91},{3,10,1},{3,10,1},{3,10,1},{3,6,4},{4,4,162},{1,7,4},{1,7,4},{0,5,10},{12,0,162},{0,5,10},{9,1,200},{3,10,1},{4,8,20},{0,8,20},{9,1,200},{19,0,200},{0,8,20}, +{0,6,218},{19,0,200},{0,6,218},{2,0,208},{2,0,208},{2,0,208},{2,0,208},{1,7,16},{1,7,16},{1,7,16},{1,4,16},{0,4,53},{0,4,53},{3,16,408},{3,12,225},{3,9,324},{3,8,228},{2,15,739},{1,11,233},{2,8,33},{1,7,298},{0,9,875},{0,7,303},{4,14,201},{4,11,6},{4,9,33},{4,8,42},{7,5,723},{0,11,204},{2,8,17},{0,7,254},{15,2,723}, +{0,7,254},{3,12,209},{3,12,209},{3,12,209},{3,8,212},{1,13,178},{2,8,17},{2,8,17},{1,6,26},{0,7,229},{0,6,17},{4,10,2},{4,10,2},{4,10,2},{4,7,2},{5,5,162},{2,8,1},{2,8,1},{0,6,1},{15,0,162},{0,6,1},{11,0,200},{4,11,5},{5,8,17},{2,8,17},{11,0,200},{22,0,200},{2,8,17},{0,7,218},{22,0,200},{0,7,218},{3,0,208}, +{3,0,208},{3,0,208},{3,0,208},{2,8,16},{2,8,16},{2,8,16},{2,5,16},{0,6,16},{0,6,16},{4,17,418},{4,12,228},{4,10,315},{4,9,238},{3,16,744},{2,11,234},{3,9,33},{2,8,305},{0,11,828},{0,8,228},{5,15,201},{5,12,6},{5,10,33},{5,9,42},{11,0,728},{1,12,204},{3,9,17},{0,8,224},{22,0,728},{0,8,224},{4,13,219},{4,13,219},{4,13,219}, +{4,9,222},{3,12,178},{3,9,17},{3,9,17},{2,7,26},{0,9,178},{0,7,6},{5,11,2},{5,11,2},{5,11,2},{5,8,2},{9,0,162},{3,9,1},{3,9,1},{1,7,1},{18,0,162},{1,7,1},{12,1,200},{5,12,5},{6,9,17},{3,9,17},{12,1,200},{25,0,200},{3,9,17},{0,8,208},{25,0,200},{0,8,208},{4,0,218},{4,0,218},{4,0,218},{4,0,218},{3,9,16}, +{3,9,16},{3,9,16},{3,6,16},{0,7,5},{0,7,5},{5,18,418},{5,13,228},{5,11,315},{5,10,238},{3,19,744},{3,12,225},{3,11,40},{3,9,305},{0,12,749},{0,9,225},{6,16,201},{6,13,6},{6,11,33},{6,10,42},{12,0,724},{2,13,204},{4,10,18},{0,9,209},{24,0,724},{0,9,209},{5,14,219},{5,14,219},{5,14,219},{5,10,222},{3,15,178},{3,11,24},{3,11,24}, +{3,8,32},{1,10,178},{1,8,5},{6,12,2},{6,12,2},{6,12,2},{6,9,2},{10,1,162},{4,10,2},{4,10,2},{2,8,1},{21,0,162},{2,8,1},{14,0,200},{6,13,5},{7,10,17},{4,10,17},{14,0,200},{26,1,200},{4,10,17},{0,9,208},{26,1,200},{0,9,208},{5,0,218},{5,0,218},{5,0,218},{5,0,218},{3,12,17},{3,12,17},{3,12,17},{3,7,20},{1,8,5}, +{1,8,5},{6,19,410},{6,15,221},{6,12,324},{6,11,234},{5,18,739},{4,14,233},{4,12,40},{4,10,298},{0,14,724},{2,10,228},{7,17,202},{7,14,1},{7,12,32},{7,11,41},{14,0,723},{3,14,211},{4,12,24},{1,10,218},{26,1,723},{1,10,218},{6,16,209},{6,16,209},{6,16,209},{6,11,209},{4,16,178},{5,11,20},{5,11,20},{4,9,26},{1,12,171},{3,9,10},{7,14,1}, +{7,14,1},{7,14,1},{7,10,4},{12,0,162},{5,11,4},{5,11,4},{3,9,1},{24,0,162},{3,9,1},{13,5,200},{7,14,1},{8,12,20},{3,12,17},{13,5,200},{31,0,200},{3,12,17},{0,10,218},{31,0,200},{0,10,218},{6,0,208},{6,0,208},{6,0,208},{6,0,208},{5,11,16},{5,11,16},{5,11,16},{5,8,16},{3,9,9},{3,9,9},{7,20,408},{7,16,225},{7,13,324}, +{7,12,228},{6,19,739},{5,15,233},{6,12,33},{5,11,298},{1,15,724},{3,11,228},{8,18,201},{8,15,6},{8,13,33},{8,12,42},{15,1,723},{4,15,204},{6,12,17},{2,11,218},{27,2,723},{2,11,218},{7,16,209},{7,16,209},{7,16,209},{7,12,212},{5,17,178},{6,12,17},{6,12,17},{5,10,26},{2,13,171},{3,10,14},{8,14,2},{8,14,2},{8,14,2},{8,11,2},{13,1,162}, +{6,12,1},{6,12,1},{4,10,1},{27,0,162},{4,10,1},{17,0,200},{8,15,5},{9,12,17},{6,12,17},{17,0,200},{30,2,200},{6,12,17},{0,11,218},{30,2,200},{0,11,218},{7,0,208},{7,0,208},{7,0,208},{7,0,208},{6,12,16},{6,12,16},{6,12,16},{6,9,16},{3,11,10},{3,11,10},{8,21,418},{8,16,228},{8,14,315},{8,13,238},{7,20,744},{6,15,234},{7,13,33}, +{6,12,305},{2,16,729},{3,12,225},{9,19,201},{9,16,6},{9,14,33},{9,13,42},{17,0,728},{5,16,204},{7,13,17},{3,12,209},{30,2,728},{3,12,209},{8,17,219},{8,17,219},{8,17,219},{8,13,222},{7,16,178},{7,13,17},{7,13,17},{6,11,26},{3,14,171},{4,11,6},{9,15,2},{9,15,2},{9,15,2},{9,12,2},{15,0,162},{7,13,1},{7,13,1},{5,11,1},{30,0,162}, +{5,11,1},{18,1,200},{9,16,5},{10,13,17},{7,13,17},{18,1,200},{31,3,200},{7,13,17},{0,12,208},{31,3,200},{0,12,208},{8,0,218},{8,0,218},{8,0,218},{8,0,218},{7,13,16},{7,13,16},{7,13,16},{7,10,16},{4,11,5},{4,11,5},{9,22,418},{9,17,228},{9,15,315},{9,14,238},{7,23,744},{7,16,225},{7,15,40},{7,13,305},{3,17,729},{4,13,225},{10,20,201}, +{10,17,6},{10,15,33},{10,14,42},{16,4,724},{6,17,204},{8,14,18},{4,13,209},{28,4,724},{4,13,209},{9,18,219},{9,18,219},{9,18,219},{9,14,222},{7,19,178},{7,15,24},{7,15,24},{7,12,32},{5,14,178},{5,12,5},{10,16,2},{10,16,2},{10,16,2},{10,13,2},{16,1,162},{8,14,2},{8,14,2},{6,12,1},{31,1,162},{6,12,1},{19,2,200},{10,17,5},{11,14,17}, +{8,14,17},{19,2,200},{30,5,200},{8,14,17},{0,13,208},{30,5,200},{0,13,208},{9,0,218},{9,0,218},{9,0,218},{9,0,218},{7,16,17},{7,16,17},{7,16,17},{7,11,20},{5,12,5},{5,12,5},{10,23,410},{10,19,221},{10,16,324},{10,15,234},{9,22,739},{8,18,233},{8,16,40},{8,14,298},{4,18,724},{6,14,228},{11,21,202},{11,18,1},{11,16,32},{11,15,41},{19,2,723}, +{7,18,211},{8,16,24},{5,14,218},{30,5,723},{5,14,218},{10,20,209},{10,20,209},{10,20,209},{10,15,209},{8,20,178},{9,15,20},{9,15,20},{8,13,26},{5,16,171},{7,13,10},{11,18,1},{11,18,1},{11,18,1},{11,14,4},{16,4,162},{9,15,4},{9,15,4},{7,13,1},{28,4,162},{7,13,1},{21,1,200},{11,18,1},{12,16,20},{7,16,17},{21,1,200},{27,8,200},{7,16,17}, +{0,14,218},{27,8,200},{0,14,218},{10,0,208},{10,0,208},{10,0,208},{10,0,208},{9,15,16},{9,15,16},{9,15,16},{9,12,16},{7,13,9},{7,13,9},{11,24,408},{11,20,225},{11,17,324},{11,16,228},{10,23,739},{9,19,233},{10,16,33},{9,15,298},{5,19,724},{7,15,228},{12,22,201},{12,19,6},{12,17,33},{12,16,42},{19,5,723},{8,19,204},{10,16,17},{6,15,218},{31,6,723}, +{6,15,218},{11,20,209},{11,20,209},{11,20,209},{11,16,212},{9,21,178},{10,16,17},{10,16,17},{9,14,26},{6,17,171},{7,14,14},{12,18,2},{12,18,2},{12,18,2},{12,15,2},{17,5,162},{10,16,1},{10,16,1},{8,14,1},{31,4,162},{8,14,1},{23,0,200},{12,19,5},{13,16,17},{10,16,17},{23,0,200},{30,8,200},{10,16,17},{0,15,218},{30,8,200},{0,15,218},{11,0,208}, +{11,0,208},{11,0,208},{11,0,208},{10,16,16},{10,16,16},{10,16,16},{10,13,16},{7,15,10},{7,15,10},{12,25,418},{12,20,228},{12,18,315},{12,17,238},{11,24,744},{10,19,234},{11,17,33},{10,16,305},{6,20,729},{7,16,225},{13,23,201},{13,20,6},{13,18,33},{13,17,42},{23,0,728},{9,20,204},{11,17,17},{7,16,209},{30,8,728},{7,16,209},{12,21,219},{12,21,219},{12,21,219}, +{12,17,222},{11,20,178},{11,17,17},{11,17,17},{10,15,26},{7,18,171},{8,15,6},{13,19,2},{13,19,2},{13,19,2},{13,16,2},{21,0,162},{11,17,1},{11,17,1},{9,15,1},{30,6,162},{9,15,1},{24,1,200},{13,20,5},{14,17,17},{11,17,17},{24,1,200},{31,9,200},{11,17,17},{0,16,208},{31,9,200},{0,16,208},{12,0,218},{12,0,218},{12,0,218},{12,0,218},{11,17,16}, +{11,17,16},{11,17,16},{11,14,16},{8,15,5},{8,15,5},{13,26,418},{13,21,228},{13,19,315},{13,18,238},{11,27,744},{11,20,225},{11,19,40},{11,17,305},{7,21,729},{8,17,225},{14,24,201},{14,21,6},{14,19,33},{14,18,42},{24,0,724},{10,21,204},{12,18,18},{8,17,209},{24,12,724},{8,17,209},{13,22,219},{13,22,219},{13,22,219},{13,18,222},{11,23,178},{11,19,24},{11,19,24}, +{11,16,32},{9,18,178},{9,16,5},{14,20,2},{14,20,2},{14,20,2},{14,17,2},{22,1,162},{12,18,2},{12,18,2},{10,16,1},{31,7,162},{10,16,1},{26,0,200},{14,21,5},{15,18,17},{12,18,17},{26,0,200},{30,11,200},{12,18,17},{0,17,208},{30,11,200},{0,17,208},{13,0,218},{13,0,218},{13,0,218},{13,0,218},{11,20,17},{11,20,17},{11,20,17},{11,15,20},{9,16,5}, +{9,16,5},{14,27,410},{14,23,221},{14,20,324},{14,19,234},{13,26,739},{12,22,233},{12,20,40},{12,18,298},{8,22,724},{10,18,228},{15,25,202},{15,22,1},{15,20,32},{15,19,41},{26,0,723},{11,22,211},{12,20,24},{9,18,218},{30,11,723},{9,18,218},{14,24,209},{14,24,209},{14,24,209},{14,19,209},{12,24,178},{13,19,20},{13,19,20},{12,17,26},{9,20,171},{11,17,10},{15,22,1}, +{15,22,1},{15,22,1},{15,18,4},{24,0,162},{13,19,4},{13,19,4},{11,17,1},{24,12,162},{11,17,1},{25,5,200},{15,22,1},{16,20,20},{11,20,17},{25,5,200},{31,12,200},{11,20,17},{0,18,218},{31,12,200},{0,18,218},{14,0,208},{14,0,208},{14,0,208},{14,0,208},{13,19,16},{13,19,16},{13,19,16},{13,16,16},{11,17,9},{11,17,9},{15,28,408},{15,24,225},{15,21,324}, +{15,20,228},{14,27,739},{13,23,233},{14,20,33},{13,19,298},{9,23,724},{11,19,228},{16,26,201},{16,23,6},{16,21,33},{16,20,42},{27,1,723},{12,23,204},{14,20,17},{10,19,218},{27,14,723},{10,19,218},{15,24,209},{15,24,209},{15,24,209},{15,20,212},{13,25,178},{14,20,17},{14,20,17},{13,18,26},{10,21,171},{11,18,14},{16,22,2},{16,22,2},{16,22,2},{16,19,2},{25,1,162}, +{14,20,1},{14,20,1},{12,18,1},{27,12,162},{12,18,1},{29,0,200},{16,23,5},{17,20,17},{14,20,17},{29,0,200},{30,14,200},{14,20,17},{0,19,218},{30,14,200},{0,19,218},{15,0,208},{15,0,208},{15,0,208},{15,0,208},{14,20,16},{14,20,16},{14,20,16},{14,17,16},{11,19,10},{11,19,10},{16,29,418},{16,24,228},{16,22,315},{16,21,238},{15,28,744},{14,23,234},{15,21,33}, +{14,20,305},{10,24,729},{11,20,225},{17,27,201},{17,24,6},{17,22,33},{17,21,42},{29,0,728},{13,24,204},{15,21,17},{11,20,209},{30,14,728},{11,20,209},{16,25,219},{16,25,219},{16,25,219},{16,21,222},{15,24,178},{15,21,17},{15,21,17},{14,19,26},{11,22,171},{12,19,6},{17,23,2},{17,23,2},{17,23,2},{17,20,2},{27,0,162},{15,21,1},{15,21,1},{13,19,1},{30,12,162}, +{13,19,1},{30,1,200},{17,24,5},{18,21,17},{15,21,17},{30,1,200},{31,15,200},{15,21,17},{0,20,208},{31,15,200},{0,20,208},{16,0,218},{16,0,218},{16,0,218},{16,0,218},{15,21,16},{15,21,16},{15,21,16},{15,18,16},{12,19,5},{12,19,5},{17,30,418},{17,25,228},{17,23,315},{17,22,238},{15,31,744},{15,24,225},{15,23,40},{15,21,305},{11,25,729},{12,21,225},{18,28,201}, +{18,25,6},{18,23,33},{18,22,42},{28,4,724},{14,25,204},{16,22,18},{12,21,209},{28,16,724},{12,21,209},{17,26,219},{17,26,219},{17,26,219},{17,22,222},{15,27,178},{15,23,24},{15,23,24},{15,20,32},{13,22,178},{13,20,5},{18,24,2},{18,24,2},{18,24,2},{18,21,2},{28,1,162},{16,22,2},{16,22,2},{14,20,1},{31,13,162},{14,20,1},{31,2,200},{18,25,5},{19,22,17}, +{16,22,17},{31,2,200},{30,17,200},{16,22,17},{0,21,208},{30,17,200},{0,21,208},{17,0,218},{17,0,218},{17,0,218},{17,0,218},{15,24,17},{15,24,17},{15,24,17},{15,19,20},{13,20,5},{13,20,5},{18,31,410},{18,27,221},{18,24,324},{18,23,234},{17,30,739},{16,26,233},{16,24,40},{16,22,298},{12,26,724},{14,22,228},{19,29,202},{19,26,1},{19,24,32},{19,23,41},{31,2,723}, +{15,26,211},{16,24,24},{13,22,218},{30,17,723},{13,22,218},{18,28,209},{18,28,209},{18,28,209},{18,23,209},{16,28,178},{17,23,20},{17,23,20},{16,21,26},{13,24,171},{15,21,10},{19,26,1},{19,26,1},{19,26,1},{19,22,4},{28,4,162},{17,23,4},{17,23,4},{15,21,1},{28,16,162},{15,21,1},{29,9,200},{19,26,1},{20,24,20},{15,24,17},{29,9,200},{27,20,200},{15,24,17}, +{0,22,218},{27,20,200},{0,22,218},{18,0,208},{18,0,208},{18,0,208},{18,0,208},{17,23,16},{17,23,16},{17,23,16},{17,20,16},{15,21,9},{15,21,9},{19,31,426},{19,28,225},{19,25,324},{19,24,228},{18,31,739},{17,27,233},{18,24,33},{17,23,298},{13,27,724},{15,23,228},{20,30,201},{20,27,6},{20,25,33},{20,24,42},{31,5,723},{16,27,204},{18,24,17},{14,23,218},{31,18,723}, +{14,23,218},{19,28,209},{19,28,209},{19,28,209},{19,24,212},{17,29,178},{18,24,17},{18,24,17},{17,22,26},{14,25,171},{15,22,14},{20,26,2},{20,26,2},{20,26,2},{20,23,2},{29,5,162},{18,24,1},{18,24,1},{16,22,1},{31,16,162},{16,22,1},{31,8,200},{20,27,5},{21,24,17},{18,24,17},{31,8,200},{30,20,200},{18,24,17},{0,23,218},{30,20,200},{0,23,218},{19,0,208}, +{19,0,208},{19,0,208},{19,0,208},{18,24,16},{18,24,16},{18,24,16},{18,21,16},{15,23,10},{15,23,10},{20,31,468},{20,28,228},{20,26,315},{20,25,238},{19,31,747},{18,27,234},{19,25,33},{18,24,305},{14,28,729},{15,24,225},{21,31,201},{21,28,6},{21,26,33},{21,25,42},{31,8,728},{17,28,204},{19,25,17},{15,24,209},{30,20,728},{15,24,209},{20,29,219},{20,29,219},{20,29,219}, +{20,25,222},{19,28,178},{19,25,17},{19,25,17},{18,23,26},{15,26,171},{16,23,6},{21,27,2},{21,27,2},{21,27,2},{21,24,2},{31,4,162},{19,25,1},{19,25,1},{17,23,1},{30,18,162},{17,23,1},{31,11,200},{21,28,5},{22,25,17},{19,25,17},{31,11,200},{31,21,200},{19,25,17},{0,24,208},{31,21,200},{0,24,208},{20,0,218},{20,0,218},{20,0,218},{20,0,218},{19,25,16}, +{19,25,16},{19,25,16},{19,22,16},{16,23,5},{16,23,5},{21,31,546},{21,29,228},{21,27,315},{21,26,238},{20,31,788},{19,28,225},{19,27,40},{19,25,305},{15,29,729},{16,25,225},{22,31,219},{22,29,6},{22,27,33},{22,26,42},{28,16,724},{18,29,204},{20,26,18},{16,25,209},{24,24,724},{16,25,209},{21,30,219},{21,30,219},{21,30,219},{21,26,222},{19,31,178},{19,27,24},{19,27,24}, +{19,24,32},{17,26,178},{17,24,5},{22,28,2},{22,28,2},{22,28,2},{22,25,2},{31,7,162},{20,26,2},{20,26,2},{18,24,1},{31,19,162},{18,24,1},{31,14,200},{22,29,5},{23,26,17},{20,26,17},{31,14,200},{30,23,200},{20,26,17},{0,25,208},{30,23,200},{0,25,208},{21,0,218},{21,0,218},{21,0,218},{21,0,218},{19,28,17},{19,28,17},{19,28,17},{19,23,20},{17,24,5}, +{17,24,5},{23,31,672},{22,31,221},{22,28,324},{22,27,234},{22,31,888},{20,30,233},{20,28,40},{20,26,298},{16,30,724},{18,26,228},{23,31,272},{23,30,1},{23,28,32},{23,27,41},{31,14,723},{19,30,211},{20,28,24},{17,26,218},{30,23,723},{17,26,218},{22,31,212},{22,31,212},{22,31,212},{22,27,209},{21,30,180},{21,27,20},{21,27,20},{20,25,26},{17,28,171},{19,25,10},{23,30,1}, +{23,30,1},{23,30,1},{23,26,4},{28,16,162},{21,27,4},{21,27,4},{19,25,1},{24,24,162},{19,25,1},{29,21,200},{23,30,1},{24,28,20},{19,28,17},{29,21,200},{31,24,200},{19,28,17},{0,26,218},{31,24,200},{0,26,218},{22,0,208},{22,0,208},{22,0,208},{22,0,208},{21,27,16},{21,27,16},{21,27,16},{21,24,16},{19,25,9},{19,25,9},{24,31,770},{23,31,228},{23,29,324}, +{23,28,228},{23,31,932},{21,31,233},{22,28,33},{21,27,298},{17,31,724},{19,27,228},{25,31,299},{24,31,6},{24,29,33},{24,28,42},{31,17,723},{20,31,204},{22,28,17},{18,27,218},{27,26,723},{18,27,218},{23,31,224},{23,31,224},{23,31,224},{23,28,212},{22,31,180},{22,28,17},{22,28,17},{21,26,26},{18,29,171},{19,26,14},{24,30,2},{24,30,2},{24,30,2},{24,27,2},{29,17,162}, +{22,28,1},{22,28,1},{20,26,1},{27,24,162},{20,26,1},{31,20,200},{24,31,5},{25,28,17},{22,28,17},{31,20,200},{30,26,200},{22,28,17},{0,27,218},{30,26,200},{0,27,218},{23,0,208},{23,0,208},{23,0,208},{23,0,208},{22,28,16},{22,28,16},{22,28,16},{22,25,16},{19,27,10},{19,27,10},{25,31,884},{24,31,303},{24,30,315},{24,29,238},{24,31,1025},{22,31,234},{23,29,33}, +{22,28,305},{19,31,747},{19,28,225},{26,31,353},{25,31,27},{25,30,33},{25,29,42},{31,20,728},{22,31,218},{23,29,17},{19,28,209},{30,26,728},{19,28,209},{24,31,254},{24,31,254},{24,31,254},{24,29,222},{23,31,196},{23,29,17},{23,29,17},{22,27,26},{19,30,171},{20,27,6},{25,31,2},{25,31,2},{25,31,2},{25,28,2},{31,16,162},{23,29,1},{23,29,1},{21,27,1},{30,24,162}, +{21,27,1},{31,23,200},{26,31,20},{26,29,17},{23,29,17},{31,23,200},{31,27,200},{23,29,17},{0,28,208},{31,27,200},{0,28,208},{24,0,218},{24,0,218},{24,0,218},{24,0,218},{23,29,16},{23,29,16},{23,29,16},{23,26,16},{20,27,5},{20,27,5},{26,31,1034},{25,31,468},{25,31,315},{25,30,238},{25,31,1172},{24,31,291},{23,31,40},{23,29,305},{21,31,837},{20,29,225},{27,31,409}, +{26,31,117},{26,31,33},{26,30,42},{28,28,724},{24,31,266},{24,30,18},{20,29,209},{28,28,724},{20,29,209},{25,31,299},{25,31,299},{25,31,299},{25,30,222},{24,31,237},{23,31,24},{23,31,24},{23,28,32},{21,30,178},{21,28,5},{26,31,17},{26,31,17},{26,31,17},{26,29,2},{31,19,162},{24,30,2},{24,30,2},{22,28,1},{31,25,162},{22,28,1},{31,26,200},{27,31,53},{27,30,17}, +{24,30,17},{31,26,200},{30,29,200},{24,30,17},{0,29,208},{30,29,200},{0,29,208},{25,0,218},{25,0,218},{25,0,218},{25,0,218},{23,31,20},{23,31,20},{23,31,20},{23,27,20},{21,28,5},{21,28,5},{27,31,933},{27,31,570},{26,31,377},{26,31,209},{27,31,1054},{25,31,309},{25,31,20},{24,30,193},{23,31,735},{22,30,123},{28,31,338},{28,31,146},{27,31,49},{27,31,16},{29,29,546}, +{26,31,222},{25,31,4},{21,30,113},{31,28,546},{21,30,113},{26,31,377},{26,31,377},{26,31,377},{26,31,209},{26,31,338},{25,31,20},{25,31,20},{24,29,26},{22,31,173},{23,29,10},{27,31,49},{27,31,49},{27,31,49},{27,30,4},{28,28,162},{25,31,4},{25,31,4},{23,29,1},{28,28,162},{23,29,1},{31,28,113},{29,31,52},{28,31,1},{26,31,1},{31,28,113},{30,30,113},{26,31,1}, +{0,30,113},{30,30,113},{0,30,113},{26,0,208},{26,0,208},{26,0,208},{26,0,208},{25,31,16},{25,31,16},{25,31,16},{25,28,16},{23,29,9},{23,29,9},{28,31,779},{27,31,554},{27,31,433},{27,31,224},{28,31,859},{26,31,270},{26,31,74},{25,30,90},{24,31,590},{23,31,59},{29,31,218},{29,31,133},{28,31,82},{28,31,2},{31,27,333},{27,31,146},{27,31,25},{22,31,49},{31,29,333}, +{22,31,49},{27,31,433},{27,31,433},{27,31,433},{27,31,224},{27,31,378},{26,31,74},{26,31,74},{25,30,26},{24,31,229},{23,30,14},{28,31,82},{28,31,82},{28,31,82},{28,31,2},{29,29,162},{27,31,25},{27,31,25},{24,30,1},{31,28,162},{24,30,1},{31,30,25},{30,31,10},{30,31,1},{28,31,1},{31,30,25},{30,31,25},{28,31,1},{0,31,49},{30,31,25},{0,31,49},{27,0,208}, +{27,0,208},{27,0,208},{27,0,208},{26,31,25},{26,31,25},{26,31,25},{26,29,16},{23,31,10},{23,31,10},{29,31,684},{28,31,538},{28,31,474},{28,31,282},{28,31,682},{27,31,283},{27,31,162},{26,31,25},{26,31,482},{24,31,5},{30,31,153},{30,31,126},{30,31,117},{29,31,37},{31,29,193},{29,31,108},{28,31,65},{25,31,0},{31,30,193},{25,31,0},{28,31,474},{28,31,474},{28,31,474}, +{28,31,282},{28,31,426},{27,31,162},{27,31,162},{26,31,25},{26,31,286},{24,31,5},{30,31,117},{30,31,117},{30,31,117},{29,31,37},{31,28,145},{28,31,65},{28,31,65},{25,31,0},{30,30,145},{25,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{28,0,218},{28,0,218},{28,0,218},{28,0,218},{27,31,41}, +{27,31,41},{27,31,41},{27,30,16},{24,31,5},{24,31,5},{29,31,460},{29,31,375},{29,31,339},{29,31,254},{29,31,415},{28,31,202},{28,31,138},{27,31,20},{27,31,295},{26,31,26},{30,31,73},{30,31,46},{30,31,37},{30,31,10},{31,30,54},{30,31,27},{30,31,18},{28,31,1},{30,31,54},{28,31,1},{29,31,339},{29,31,339},{29,31,339},{29,31,254},{29,31,294},{28,31,138},{28,31,138}, +{27,31,20},{27,31,174},{26,31,26},{30,31,37},{30,31,37},{30,31,37},{30,31,10},{31,29,45},{30,31,18},{30,31,18},{28,31,1},{31,30,45},{28,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{29,0,218},{29,0,218},{29,0,218},{29,0,218},{28,31,74},{28,31,74},{28,31,74},{27,31,20},{26,31,26}, +{26,31,26},{0,10,421},{0,7,52},{0,5,1},{0,4,162},{0,7,926},{0,5,590},{0,4,283},{0,3,701},{0,3,1005},{0,3,765},{0,10,421},{0,7,52},{0,5,1},{0,4,162},{3,1,925},{0,5,590},{0,4,283},{0,3,701},{3,2,925},{0,3,701},{0,5,0},{0,5,0},{0,5,0},{0,2,9},{0,2,89},{0,2,34},{0,2,34},{0,1,50},{0,1,93},{0,1,54},{0,5,0}, +{0,5,0},{0,5,0},{0,2,9},{1,0,89},{0,2,34},{0,2,34},{0,1,50},{2,0,89},{0,1,50},{5,0,421},{0,7,52},{0,5,1},{0,4,162},{5,0,421},{10,0,421},{0,4,162},{0,3,445},{10,0,421},{0,3,445},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,13,421},{0,9,10},{0,6,26}, +{0,5,117},{0,9,1261},{0,6,701},{0,5,286},{0,4,917},{0,4,1390},{0,3,1005},{0,13,421},{0,9,10},{0,6,26},{0,5,117},{4,1,1261},{0,6,701},{0,5,286},{0,4,917},{9,0,1261},{0,4,917},{0,8,1},{0,8,1},{0,8,1},{0,4,1},{0,4,221},{0,3,89},{0,3,89},{0,2,125},{0,2,246},{0,2,150},{0,8,1},{0,8,1},{0,8,1},{0,4,1},{2,0,221}, +{0,3,89},{0,3,89},{0,2,125},{4,0,221},{0,2,125},{6,1,421},{0,9,10},{1,6,1},{0,5,117},{6,1,421},{13,0,421},{0,5,117},{0,4,433},{13,0,421},{0,4,433},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,16,430},{0,11,14},{0,7,110},{0,7,91},{0,11,1514},{0,7,737},{0,6,259}, +{0,4,1002},{0,5,1710},{0,4,1123},{0,16,430},{0,11,14},{1,7,51},{0,7,91},{5,1,1514},{0,7,737},{0,6,259},{0,4,1002},{11,0,1514},{0,4,1002},{0,11,10},{0,11,10},{0,11,10},{0,5,13},{0,6,338},{0,5,104},{0,5,104},{0,3,194},{0,3,402},{0,2,243},{0,11,10},{0,11,10},{0,11,10},{0,5,13},{3,0,338},{0,5,104},{0,5,104},{0,3,194},{6,0,338}, +{0,3,194},{8,0,421},{0,11,5},{2,7,1},{0,7,82},{8,0,421},{16,0,421},{0,7,82},{0,5,433},{16,0,421},{0,5,433},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,1,1},{0,1,1},{0,1,1},{0,1,4},{0,1,8},{0,1,8},{1,17,494},{1,12,78},{1,8,173},{1,8,154},{0,14,1514},{0,9,602},{0,7,146},{0,6,874},{0,7,1875},{0,5,1066},{1,17,430}, +{1,12,14},{2,8,50},{1,8,90},{7,0,1514},{0,9,602},{0,7,146},{0,6,874},{14,0,1514},{0,6,874},{1,12,74},{1,12,74},{1,12,74},{1,6,77},{0,9,338},{0,6,50},{0,6,50},{0,4,137},{0,4,467},{0,3,258},{1,12,10},{1,12,10},{1,12,10},{1,6,13},{4,1,338},{0,6,50},{0,6,50},{0,4,137},{9,0,338},{0,4,137},{9,1,421},{1,12,5},{3,8,1}, +{0,8,49},{9,1,421},{19,0,421},{0,8,49},{0,6,433},{19,0,421},{0,6,433},{1,0,73},{1,0,73},{1,0,73},{1,0,73},{0,4,0},{0,4,0},{0,4,0},{0,2,1},{0,2,26},{0,2,26},{1,20,629},{1,14,233},{2,10,385},{1,9,245},{0,17,1517},{0,11,521},{0,9,26},{0,7,769},{0,8,2025},{0,6,1085},{3,16,437},{2,13,17},{3,9,53},{2,9,81},{8,1,1517}, +{0,11,521},{0,9,26},{0,7,769},{17,0,1517},{0,7,769},{1,15,208},{1,15,208},{1,15,208},{1,8,208},{0,12,338},{0,8,8},{0,8,8},{0,5,74},{0,5,579},{0,5,243},{3,11,16},{3,11,16},{3,11,16},{3,7,16},{4,4,338},{0,8,8},{0,8,8},{0,5,74},{12,0,338},{0,5,74},{11,0,421},{2,13,1},{4,9,1},{0,9,10},{11,0,421},{22,0,421},{0,9,10}, +{0,7,445},{22,0,421},{0,7,445},{1,0,208},{1,0,208},{1,0,208},{1,0,208},{0,7,1},{0,7,1},{0,7,1},{0,4,4},{0,3,80},{0,3,80},{2,21,821},{2,15,425},{2,11,645},{2,10,437},{0,20,1514},{0,12,458},{0,10,2},{0,8,689},{0,9,2198},{0,7,1146},{3,19,437},{3,14,17},{4,10,51},{3,10,81},{8,4,1514},{0,12,458},{0,10,2},{0,8,689},{20,0,1514}, +{0,8,689},{2,16,400},{2,16,400},{2,16,400},{2,9,400},{0,15,338},{0,10,1},{0,10,1},{0,6,41},{0,7,717},{0,6,297},{3,14,16},{3,14,16},{3,14,16},{3,8,20},{5,5,338},{0,10,1},{0,10,1},{0,6,41},{15,0,338},{0,6,41},{12,1,421},{3,14,1},{5,10,1},{0,10,1},{12,1,421},{25,0,421},{0,10,1},{0,8,433},{25,0,421},{0,8,433},{2,0,400}, +{2,0,400},{2,0,400},{2,0,400},{0,10,1},{0,10,1},{0,10,1},{0,5,1},{0,4,157},{0,4,157},{3,22,854},{3,15,459},{3,12,666},{3,11,470},{1,21,1515},{1,13,459},{1,11,3},{0,9,651},{0,11,2070},{0,9,891},{4,20,430},{4,15,14},{5,11,51},{4,11,91},{9,5,1514},{0,14,425},{1,11,2},{0,9,602},{23,0,1514},{0,9,602},{3,17,433},{3,17,433},{3,17,433}, +{3,10,433},{1,16,339},{1,11,2},{1,11,2},{1,7,42},{0,8,613},{0,7,173},{4,15,10},{4,15,10},{4,15,10},{4,9,13},{9,0,338},{1,11,1},{1,11,1},{0,7,29},{18,0,338},{0,7,29},{14,0,421},{4,15,5},{6,11,1},{1,11,1},{14,0,421},{28,0,421},{1,11,1},{0,9,433},{28,0,421},{0,9,433},{3,0,433},{3,0,433},{3,0,433},{3,0,433},{1,11,2}, +{1,11,2},{1,11,2},{1,6,2},{0,6,97},{0,6,97},{4,23,866},{4,16,461},{4,12,670},{4,12,494},{2,22,1515},{2,14,459},{2,12,3},{1,10,651},{0,12,1913},{0,10,677},{5,21,430},{5,16,14},{6,12,50},{5,12,90},{13,0,1514},{1,15,425},{2,12,2},{0,10,533},{26,0,1514},{0,10,533},{4,18,446},{4,18,446},{4,18,446},{4,11,446},{2,17,339},{2,12,3},{2,12,3}, +{2,8,42},{0,10,500},{0,8,65},{5,16,10},{5,16,10},{5,16,10},{5,10,13},{10,1,338},{2,12,2},{2,12,2},{0,8,16},{21,0,338},{0,8,16},{15,1,421},{5,16,5},{7,12,1},{2,12,1},{15,1,421},{31,0,421},{2,12,1},{0,10,433},{31,0,421},{0,10,433},{4,0,445},{4,0,445},{4,0,445},{4,0,445},{2,12,2},{2,12,2},{2,12,2},{2,7,2},{0,8,49}, +{0,8,49},{5,24,854},{5,18,458},{5,14,678},{5,13,470},{3,23,1517},{3,15,461},{3,13,5},{2,11,653},{0,14,1758},{0,11,530},{7,20,437},{6,17,17},{7,13,53},{6,13,81},{14,1,1517},{2,16,422},{3,13,5},{0,11,494},{29,0,1517},{0,11,494},{5,19,433},{5,19,433},{5,19,433},{5,12,433},{3,18,340},{3,13,4},{3,13,4},{3,9,41},{0,12,419},{0,10,19},{7,15,16}, +{7,15,16},{7,15,16},{7,11,16},{12,0,338},{3,13,4},{3,13,4},{0,10,10},{24,0,338},{0,10,10},{17,0,421},{6,17,1},{8,13,1},{3,13,1},{17,0,421},{30,2,421},{3,13,1},{0,11,445},{30,2,421},{0,11,445},{5,0,433},{5,0,433},{5,0,433},{5,0,433},{3,14,1},{3,14,1},{3,14,1},{3,8,1},{0,10,10},{0,10,10},{6,25,854},{6,19,458},{6,15,678}, +{6,14,470},{4,24,1515},{4,16,459},{4,14,3},{3,12,666},{0,16,1658},{0,12,459},{7,23,437},{7,18,17},{8,14,51},{7,14,81},{16,0,1514},{3,17,422},{4,14,2},{0,12,458},{24,4,1514},{0,12,458},{6,20,433},{6,20,433},{6,20,433},{6,13,433},{4,19,339},{4,14,2},{4,14,2},{4,10,42},{0,13,365},{0,11,14},{7,18,16},{7,18,16},{7,18,16},{7,12,20},{13,1,338}, +{4,14,1},{4,14,1},{0,11,10},{27,0,338},{0,11,10},{18,1,421},{7,18,1},{9,14,1},{4,14,1},{18,1,421},{31,3,421},{4,14,1},{0,12,433},{31,3,421},{0,12,433},{6,0,433},{6,0,433},{6,0,433},{6,0,433},{4,14,2},{4,14,2},{4,14,2},{4,9,2},{0,11,5},{0,11,5},{7,26,854},{7,19,459},{7,16,666},{7,15,470},{5,25,1515},{5,17,459},{5,15,3}, +{4,13,651},{0,17,1577},{1,13,459},{8,24,430},{8,19,14},{9,15,51},{8,15,91},{17,1,1514},{4,18,425},{5,15,2},{0,13,437},{27,4,1514},{0,13,437},{7,21,433},{7,21,433},{7,21,433},{7,14,433},{5,20,339},{5,15,2},{5,15,2},{5,11,42},{0,15,339},{1,12,21},{8,19,10},{8,19,10},{8,19,10},{8,13,13},{15,0,338},{5,15,1},{5,15,1},{1,12,17},{30,0,338}, +{1,12,17},{20,0,421},{8,19,5},{10,15,1},{5,15,1},{20,0,421},{30,5,421},{5,15,1},{0,13,433},{30,5,421},{0,13,433},{7,0,433},{7,0,433},{7,0,433},{7,0,433},{5,15,2},{5,15,2},{5,15,2},{5,10,2},{1,12,5},{1,12,5},{8,27,866},{8,20,461},{8,16,670},{8,16,494},{6,26,1515},{6,18,459},{6,16,3},{5,14,651},{0,19,1530},{2,14,459},{9,25,430}, +{9,20,14},{10,16,50},{9,16,90},{19,0,1514},{5,19,425},{6,16,2},{0,14,434},{30,4,1514},{0,14,434},{8,22,446},{8,22,446},{8,22,446},{8,15,446},{6,21,339},{6,16,3},{6,16,3},{6,12,42},{1,16,339},{2,13,21},{9,20,10},{9,20,10},{9,20,10},{9,14,13},{16,1,338},{6,16,2},{6,16,2},{4,12,16},{31,1,338},{4,12,16},{21,1,421},{9,20,5},{11,16,1}, +{6,16,1},{21,1,421},{31,6,421},{6,16,1},{0,14,433},{31,6,421},{0,14,433},{8,0,445},{8,0,445},{8,0,445},{8,0,445},{6,16,2},{6,16,2},{6,16,2},{6,11,2},{2,13,5},{2,13,5},{9,28,854},{9,22,458},{9,18,678},{9,17,470},{7,27,1517},{7,19,461},{7,17,5},{6,15,653},{1,20,1526},{3,15,461},{11,24,437},{10,21,17},{11,17,53},{10,17,81},{20,1,1517}, +{6,20,422},{7,17,5},{2,15,446},{31,5,1517},{2,15,446},{9,23,433},{9,23,433},{9,23,433},{9,16,433},{7,22,340},{7,17,4},{7,17,4},{7,13,41},{2,17,340},{4,14,19},{11,19,16},{11,19,16},{11,19,16},{11,15,16},{16,4,338},{7,17,4},{7,17,4},{4,14,10},{28,4,338},{4,14,10},{23,0,421},{10,21,1},{12,17,1},{7,17,1},{23,0,421},{30,8,421},{7,17,1}, +{0,15,445},{30,8,421},{0,15,445},{9,0,433},{9,0,433},{9,0,433},{9,0,433},{7,18,1},{7,18,1},{7,18,1},{7,12,1},{4,14,10},{4,14,10},{10,29,854},{10,23,458},{10,19,678},{10,18,470},{8,28,1515},{8,20,459},{8,18,3},{7,16,666},{2,21,1526},{4,16,459},{11,27,437},{11,22,17},{12,18,51},{11,18,81},{20,4,1514},{7,21,422},{8,18,2},{2,16,434},{28,8,1514}, +{2,16,434},{10,24,433},{10,24,433},{10,24,433},{10,17,433},{8,23,339},{8,18,2},{8,18,2},{8,14,42},{3,18,340},{4,15,14},{11,22,16},{11,22,16},{11,22,16},{11,16,20},{17,5,338},{8,18,1},{8,18,1},{4,15,10},{31,4,338},{4,15,10},{24,1,421},{11,22,1},{13,18,1},{8,18,1},{24,1,421},{31,9,421},{8,18,1},{0,16,433},{31,9,421},{0,16,433},{10,0,433}, +{10,0,433},{10,0,433},{10,0,433},{8,18,2},{8,18,2},{8,18,2},{8,13,2},{4,15,5},{4,15,5},{11,30,854},{11,23,459},{11,20,666},{11,19,470},{9,29,1515},{9,21,459},{9,19,3},{8,17,651},{3,22,1526},{5,17,459},{12,28,430},{12,23,14},{13,19,51},{12,19,91},{21,5,1514},{8,22,425},{9,19,2},{3,17,434},{31,8,1514},{3,17,434},{11,25,433},{11,25,433},{11,25,433}, +{11,18,433},{9,24,339},{9,19,2},{9,19,2},{9,15,42},{4,19,339},{5,16,21},{12,23,10},{12,23,10},{12,23,10},{12,17,13},{21,0,338},{9,19,1},{9,19,1},{5,16,17},{30,6,338},{5,16,17},{26,0,421},{12,23,5},{14,19,1},{9,19,1},{26,0,421},{30,11,421},{9,19,1},{0,17,433},{30,11,421},{0,17,433},{11,0,433},{11,0,433},{11,0,433},{11,0,433},{9,19,2}, +{9,19,2},{9,19,2},{9,14,2},{5,16,5},{5,16,5},{12,31,866},{12,24,461},{12,20,670},{12,20,494},{10,30,1515},{10,22,459},{10,20,3},{9,18,651},{4,23,1530},{6,18,459},{13,29,430},{13,24,14},{14,20,50},{13,20,90},{25,0,1514},{9,23,425},{10,20,2},{4,18,434},{30,10,1514},{4,18,434},{12,26,446},{12,26,446},{12,26,446},{12,19,446},{10,25,339},{10,20,3},{10,20,3}, +{10,16,42},{5,20,339},{6,17,21},{13,24,10},{13,24,10},{13,24,10},{13,18,13},{22,1,338},{10,20,2},{10,20,2},{8,16,16},{31,7,338},{8,16,16},{27,1,421},{13,24,5},{15,20,1},{10,20,1},{27,1,421},{31,12,421},{10,20,1},{0,18,433},{31,12,421},{0,18,433},{12,0,445},{12,0,445},{12,0,445},{12,0,445},{10,20,2},{10,20,2},{10,20,2},{10,15,2},{6,17,5}, +{6,17,5},{13,31,878},{13,26,458},{13,22,678},{13,21,470},{11,31,1517},{11,23,461},{11,21,5},{10,19,653},{5,24,1526},{7,19,461},{15,28,437},{14,25,17},{15,21,53},{14,21,81},{26,1,1517},{10,24,422},{11,21,5},{6,19,446},{31,11,1517},{6,19,446},{13,27,433},{13,27,433},{13,27,433},{13,20,433},{11,26,340},{11,21,4},{11,21,4},{11,17,41},{6,21,340},{8,18,19},{15,23,16}, +{15,23,16},{15,23,16},{15,19,16},{24,0,338},{11,21,4},{11,21,4},{8,18,10},{24,12,338},{8,18,10},{29,0,421},{14,25,1},{16,21,1},{11,21,1},{29,0,421},{30,14,421},{11,21,1},{0,19,445},{30,14,421},{0,19,445},{13,0,433},{13,0,433},{13,0,433},{13,0,433},{11,22,1},{11,22,1},{11,22,1},{11,16,1},{8,18,10},{8,18,10},{14,31,938},{14,27,458},{14,23,678}, +{14,22,470},{12,31,1542},{12,24,459},{12,22,3},{11,20,666},{6,25,1526},{8,20,459},{15,31,437},{15,26,17},{16,22,51},{15,22,81},{28,0,1514},{11,25,422},{12,22,2},{6,20,434},{24,16,1514},{6,20,434},{14,28,433},{14,28,433},{14,28,433},{14,21,433},{12,27,339},{12,22,2},{12,22,2},{12,18,42},{7,22,340},{8,19,14},{15,26,16},{15,26,16},{15,26,16},{15,20,20},{25,1,338}, +{12,22,1},{12,22,1},{8,19,10},{27,12,338},{8,19,10},{30,1,421},{15,26,1},{17,22,1},{12,22,1},{30,1,421},{31,15,421},{12,22,1},{0,20,433},{31,15,421},{0,20,433},{14,0,433},{14,0,433},{14,0,433},{14,0,433},{12,22,2},{12,22,2},{12,22,2},{12,17,2},{8,19,5},{8,19,5},{15,31,998},{15,27,459},{15,24,666},{15,23,470},{14,31,1598},{13,25,459},{13,23,3}, +{12,21,651},{7,26,1526},{9,21,459},{16,31,442},{16,27,14},{17,23,51},{16,23,91},{29,1,1514},{12,26,425},{13,23,2},{7,21,434},{27,16,1514},{7,21,434},{15,29,433},{15,29,433},{15,29,433},{15,22,433},{13,28,339},{13,23,2},{13,23,2},{13,19,42},{8,23,339},{9,20,21},{16,27,10},{16,27,10},{16,27,10},{16,21,13},{27,0,338},{13,23,1},{13,23,1},{9,20,17},{30,12,338}, +{9,20,17},{31,2,421},{16,27,5},{18,23,1},{13,23,1},{31,2,421},{30,17,421},{13,23,1},{0,21,433},{30,17,421},{0,21,433},{15,0,433},{15,0,433},{15,0,433},{15,0,433},{13,23,2},{13,23,2},{13,23,2},{13,18,2},{9,20,5},{9,20,5},{16,31,1086},{16,28,461},{16,24,670},{16,24,494},{15,31,1622},{14,26,459},{14,24,3},{13,22,651},{8,27,1530},{10,22,459},{18,31,446}, +{17,28,14},{18,24,50},{17,24,90},{31,0,1514},{13,27,425},{14,24,2},{8,22,434},{30,16,1514},{8,22,434},{16,30,446},{16,30,446},{16,30,446},{16,23,446},{14,29,339},{14,24,3},{14,24,3},{14,20,42},{9,24,339},{10,21,21},{17,28,10},{17,28,10},{17,28,10},{17,22,13},{28,1,338},{14,24,2},{14,24,2},{12,20,16},{31,13,338},{12,20,16},{31,5,421},{17,28,5},{19,24,1}, +{14,24,1},{31,5,421},{31,18,421},{14,24,1},{0,22,433},{31,18,421},{0,22,433},{16,0,445},{16,0,445},{16,0,445},{16,0,445},{14,24,2},{14,24,2},{14,24,2},{14,19,2},{10,21,5},{10,21,5},{18,31,1242},{17,30,458},{17,26,678},{17,25,470},{16,31,1703},{15,27,461},{15,25,5},{14,23,653},{9,28,1526},{11,23,461},{19,31,461},{18,29,17},{19,25,53},{18,25,81},{31,3,1517}, +{14,28,422},{15,25,5},{10,23,446},{31,17,1517},{10,23,446},{17,31,433},{17,31,433},{17,31,433},{17,24,433},{15,30,340},{15,25,4},{15,25,4},{15,21,41},{10,25,340},{12,22,19},{19,27,16},{19,27,16},{19,27,16},{19,23,16},{28,4,338},{15,25,4},{15,25,4},{12,22,10},{28,16,338},{12,22,10},{31,8,421},{18,29,1},{20,25,1},{15,25,1},{31,8,421},{30,20,421},{15,25,1}, +{0,23,445},{30,20,421},{0,23,445},{17,0,433},{17,0,433},{17,0,433},{17,0,433},{15,26,1},{15,26,1},{15,26,1},{15,20,1},{12,22,10},{12,22,10},{19,31,1326},{18,31,458},{18,27,678},{18,26,470},{17,31,1838},{16,28,459},{16,26,3},{15,24,666},{10,29,1526},{12,24,459},{20,31,506},{19,30,17},{20,26,51},{19,26,81},{28,12,1514},{15,29,422},{16,26,2},{10,24,434},{28,20,1514}, +{10,24,434},{18,31,442},{18,31,442},{18,31,442},{18,25,433},{16,31,339},{16,26,2},{16,26,2},{16,22,42},{11,26,340},{12,23,14},{19,30,16},{19,30,16},{19,30,16},{19,24,20},{29,5,338},{16,26,1},{16,26,1},{12,23,10},{31,16,338},{12,23,10},{31,11,421},{19,30,1},{21,26,1},{16,26,1},{31,11,421},{31,21,421},{16,26,1},{0,24,433},{31,21,421},{0,24,433},{18,0,433}, +{18,0,433},{18,0,433},{18,0,433},{16,26,2},{16,26,2},{16,26,2},{16,21,2},{12,23,5},{12,23,5},{20,31,1470},{19,31,459},{19,28,666},{19,27,470},{19,31,1911},{17,29,459},{17,27,3},{16,25,651},{11,30,1526},{13,25,459},{21,31,590},{20,31,14},{21,27,51},{20,27,91},{29,13,1514},{16,30,425},{17,27,2},{11,25,434},{31,20,1514},{11,25,434},{19,31,458},{19,31,458},{19,31,458}, +{19,26,433},{17,31,357},{17,27,2},{17,27,2},{17,23,42},{12,27,339},{13,24,21},{20,31,10},{20,31,10},{20,31,10},{20,25,13},{31,4,338},{17,27,1},{17,27,1},{13,24,17},{30,18,338},{13,24,17},{31,14,421},{20,31,5},{22,27,1},{17,27,1},{31,14,421},{30,23,421},{17,27,1},{0,25,433},{30,23,421},{0,25,433},{19,0,433},{19,0,433},{19,0,433},{19,0,433},{17,27,2}, +{17,27,2},{17,27,2},{17,22,2},{13,24,5},{13,24,5},{21,31,1650},{20,31,530},{20,28,670},{20,28,494},{20,31,2030},{18,30,459},{18,28,3},{17,26,651},{12,31,1530},{14,26,459},{23,31,650},{21,31,29},{22,28,50},{21,28,90},{31,12,1514},{17,31,425},{18,28,2},{12,26,434},{30,22,1514},{12,26,434},{20,31,494},{20,31,494},{20,31,494},{20,27,446},{18,31,411},{18,28,3},{18,28,3}, +{18,24,42},{13,28,339},{14,25,21},{21,31,13},{21,31,13},{21,31,13},{21,26,13},{31,7,338},{18,28,2},{18,28,2},{16,24,16},{31,19,338},{16,24,16},{31,17,421},{22,31,10},{23,28,1},{18,28,1},{31,17,421},{31,24,421},{18,28,1},{0,26,433},{31,24,421},{0,26,433},{20,0,445},{20,0,445},{20,0,445},{20,0,445},{18,28,2},{18,28,2},{18,28,2},{18,23,2},{14,25,5}, +{14,25,5},{22,31,1902},{21,31,723},{21,30,678},{21,29,470},{21,31,2235},{19,31,461},{19,29,5},{18,27,653},{14,31,1587},{15,27,461},{24,31,734},{23,31,65},{23,29,53},{22,29,81},{31,15,1517},{19,31,461},{19,29,5},{14,27,446},{31,23,1517},{14,27,446},{21,31,554},{21,31,554},{21,31,554},{21,28,433},{20,31,437},{19,29,4},{19,29,4},{19,25,41},{14,29,340},{16,26,19},{23,31,16}, +{23,31,16},{23,31,16},{23,27,16},{28,16,338},{19,29,4},{19,29,4},{16,26,10},{24,24,338},{16,26,10},{31,20,421},{23,31,49},{24,29,1},{19,29,1},{31,20,421},{30,26,421},{19,29,1},{0,27,445},{30,26,421},{0,27,445},{21,0,433},{21,0,433},{21,0,433},{21,0,433},{19,30,1},{19,30,1},{19,30,1},{19,24,1},{16,26,10},{16,26,10},{23,31,2074},{23,31,930},{22,31,678}, +{22,30,470},{23,31,2382},{20,31,570},{20,30,3},{19,28,666},{16,31,1703},{16,28,459},{25,31,854},{24,31,173},{24,30,51},{23,30,81},{28,24,1514},{21,31,554},{20,30,2},{14,28,434},{24,28,1514},{14,28,434},{22,31,629},{22,31,629},{22,31,629},{22,29,433},{21,31,491},{20,30,2},{20,30,2},{20,26,42},{15,30,340},{16,27,14},{24,31,29},{24,31,29},{24,31,29},{23,28,20},{29,17,338}, +{20,30,1},{20,30,1},{16,27,10},{27,24,338},{16,27,10},{31,23,421},{25,31,109},{25,30,1},{20,30,1},{31,23,421},{31,27,421},{20,30,1},{0,28,433},{31,27,421},{0,28,433},{22,0,433},{22,0,433},{22,0,433},{22,0,433},{20,30,2},{20,30,2},{20,30,2},{20,25,2},{16,27,5},{16,27,5},{24,31,2229},{24,31,1146},{23,31,689},{23,31,469},{24,31,2476},{22,31,731},{21,31,2}, +{20,29,618},{18,31,1805},{17,29,426},{26,31,953},{25,31,339},{25,31,50},{24,30,82},{29,25,1459},{23,31,620},{21,31,1},{15,29,401},{27,28,1459},{15,29,401},{23,31,689},{23,31,689},{23,31,689},{23,30,433},{22,31,581},{21,31,2},{21,31,2},{21,27,42},{16,31,339},{17,28,21},{25,31,50},{25,31,50},{25,31,50},{24,29,13},{31,16,338},{21,31,1},{21,31,1},{17,28,17},{30,24,338}, +{17,28,17},{31,26,392},{27,31,157},{26,31,0},{21,31,0},{31,26,392},{30,29,392},{21,31,0},{0,29,400},{30,29,392},{0,29,400},{23,0,433},{23,0,433},{23,0,433},{23,0,433},{21,31,2},{21,31,2},{21,31,2},{21,26,2},{17,28,5},{17,28,5},{25,31,1943},{24,31,1130},{24,31,769},{24,31,446},{25,31,2103},{23,31,573},{22,31,42},{22,29,373},{20,31,1481},{18,30,234},{27,31,657}, +{26,31,285},{26,31,89},{25,31,18},{31,23,1064},{24,31,426},{23,31,8},{16,30,209},{31,27,1064},{16,30,209},{24,31,769},{24,31,769},{24,31,769},{24,31,446},{23,31,661},{22,31,42},{22,31,42},{22,28,42},{18,31,365},{18,29,21},{26,31,89},{26,31,89},{26,31,89},{25,30,13},{31,19,338},{23,31,8},{23,31,8},{20,28,16},{31,25,338},{20,28,16},{31,27,202},{28,31,80},{27,31,4}, +{24,31,1},{31,27,202},{31,29,202},{24,31,1},{0,30,208},{31,29,202},{0,30,208},{24,0,445},{24,0,445},{24,0,445},{24,0,445},{22,31,17},{22,31,17},{22,31,17},{22,27,2},{18,29,5},{18,29,5},{27,31,1710},{26,31,1126},{25,31,917},{25,31,469},{26,31,1779},{24,31,507},{24,31,146},{23,30,154},{22,31,1221},{20,30,83},{28,31,450},{27,31,258},{27,31,137},{27,31,16},{29,29,722}, +{26,31,286},{25,31,52},{20,30,74},{31,28,722},{20,30,74},{25,31,917},{25,31,917},{25,31,917},{25,31,469},{25,31,789},{24,31,146},{24,31,146},{23,29,41},{20,31,446},{20,30,19},{27,31,137},{27,31,137},{27,31,137},{27,31,16},{28,28,338},{25,31,52},{25,31,52},{20,30,10},{28,28,338},{20,30,10},{31,29,61},{30,31,34},{29,31,0},{27,31,0},{31,29,61},{31,30,61},{27,31,0}, +{0,30,73},{31,30,61},{0,30,73},{25,0,433},{25,0,433},{25,0,433},{25,0,433},{23,31,49},{23,31,49},{23,31,49},{23,28,1},{20,30,10},{20,30,10},{27,31,1486},{27,31,1123},{27,31,1002},{26,31,554},{27,31,1519},{25,31,582},{25,31,293},{24,30,106},{23,31,1090},{20,31,14},{29,31,354},{28,31,258},{28,31,194},{28,31,50},{31,27,509},{27,31,234},{27,31,113},{20,31,10},{31,29,509}, +{20,31,10},{27,31,1002},{27,31,1002},{27,31,1002},{26,31,554},{26,31,915},{25,31,293},{25,31,293},{24,30,42},{22,31,564},{20,31,14},{28,31,194},{28,31,194},{28,31,194},{28,31,50},{29,29,338},{27,31,113},{27,31,113},{20,31,10},{31,28,338},{20,31,10},{31,30,9},{31,31,9},{30,31,9},{30,31,0},{31,30,9},{30,31,9},{30,31,0},{0,31,9},{30,31,9},{0,31,9},{26,0,433}, +{26,0,433},{26,0,433},{26,0,433},{24,31,82},{24,31,82},{24,31,82},{24,29,2},{20,31,5},{20,31,5},{28,31,1197},{28,31,1005},{27,31,917},{27,31,554},{28,31,1213},{26,31,522},{26,31,326},{25,31,17},{24,31,870},{22,31,16},{30,31,241},{29,31,182},{29,31,146},{29,31,61},{31,28,297},{28,31,153},{28,31,89},{23,31,1},{30,30,297},{23,31,1},{27,31,917},{27,31,917},{27,31,917}, +{27,31,554},{27,31,774},{26,31,326},{26,31,326},{25,31,17},{24,31,509},{22,31,16},{29,31,146},{29,31,146},{29,31,146},{29,31,61},{31,27,221},{28,31,89},{28,31,89},{23,31,1},{31,29,221},{23,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{27,0,433},{27,0,433},{27,0,433},{27,0,433},{26,31,130}, +{26,31,130},{26,31,130},{25,30,2},{22,31,16},{22,31,16},{29,31,927},{28,31,765},{28,31,701},{28,31,509},{28,31,845},{27,31,404},{27,31,283},{26,31,2},{26,31,589},{24,31,52},{30,31,97},{30,31,70},{30,31,61},{30,31,34},{31,30,118},{30,31,67},{29,31,40},{26,31,1},{30,31,118},{26,31,1},{28,31,701},{28,31,701},{28,31,701},{28,31,509},{28,31,589},{27,31,283},{27,31,283}, +{26,31,2},{25,31,386},{24,31,52},{30,31,61},{30,31,61},{30,31,61},{30,31,34},{31,29,85},{29,31,40},{29,31,40},{26,31,1},{31,30,85},{26,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{28,0,445},{28,0,445},{28,0,445},{28,0,445},{27,31,162},{27,31,162},{27,31,162},{26,31,2},{24,31,52}, +{24,31,52},{0,14,884},{0,10,117},{0,7,10},{0,6,317},{0,10,1899},{0,6,1236},{0,6,573},{0,4,1438},{0,4,2065},{0,4,1559},{0,14,884},{0,10,117},{0,7,10},{0,6,317},{5,0,1899},{0,6,1236},{0,6,573},{0,4,1438},{10,0,1899},{0,4,1438},{0,7,1},{0,7,1},{0,7,1},{0,3,4},{0,3,164},{0,3,68},{0,3,68},{0,2,104},{0,2,189},{0,1,129},{0,7,1}, +{0,7,1},{0,7,1},{0,3,4},{2,0,164},{0,3,68},{0,3,68},{0,2,104},{3,0,164},{0,2,104},{7,0,884},{0,10,117},{0,7,10},{0,6,317},{7,0,884},{14,0,884},{0,6,317},{0,5,890},{14,0,884},{0,5,890},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,17,884},{0,12,53},{0,8,20}, +{0,7,265},{0,11,2360},{0,8,1384},{0,7,626},{0,5,1683},{0,5,2580},{0,5,1852},{0,17,884},{0,12,53},{0,8,20},{0,7,265},{6,0,2356},{0,8,1384},{0,7,626},{0,5,1683},{10,1,2356},{0,5,1683},{0,10,1},{0,10,1},{0,10,1},{0,5,1},{0,5,338},{0,4,137},{0,4,137},{0,2,200},{0,2,381},{0,2,225},{0,10,1},{0,10,1},{0,10,1},{0,5,1},{2,1,338}, +{0,4,137},{0,4,137},{0,2,200},{5,0,338},{0,2,200},{8,1,884},{0,12,53},{1,8,5},{0,7,265},{8,1,884},{17,0,884},{0,7,265},{0,6,890},{17,0,884},{0,6,890},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,20,882},{0,14,16},{0,10,85},{0,8,200},{0,14,2904},{0,9,1530},{0,8,684}, +{0,6,1978},{0,6,3220},{0,5,2172},{0,20,882},{0,14,16},{1,9,84},{0,8,200},{7,0,2904},{0,9,1530},{0,8,684},{0,6,1978},{14,0,2904},{0,6,1978},{0,12,1},{0,12,1},{0,12,1},{0,6,4},{0,6,580},{0,5,218},{0,5,218},{0,3,356},{0,3,644},{0,3,420},{0,12,1},{0,12,1},{0,12,1},{0,6,4},{3,0,580},{0,5,218},{0,5,218},{0,3,356},{6,0,580}, +{0,3,356},{8,4,882},{0,14,16},{2,9,5},{0,8,200},{8,4,882},{20,0,882},{0,8,200},{0,7,890},{20,0,882},{0,7,890},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,23,918},{0,15,41},{1,11,153},{0,10,184},{0,16,3048},{0,11,1476},{0,9,473},{0,6,1950},{0,7,3517},{0,6,2206},{1,21,886}, +{1,15,20},{1,11,89},{0,10,184},{8,0,3048},{0,11,1476},{0,9,473},{0,6,1950},{16,0,3048},{0,6,1950},{0,15,37},{0,15,37},{0,15,37},{0,8,40},{0,9,648},{0,7,185},{0,7,185},{0,4,337},{0,4,777},{0,4,458},{1,13,5},{1,13,5},{1,13,5},{1,7,8},{4,1,648},{0,7,185},{0,7,185},{0,4,337},{9,0,648},{0,4,337},{9,5,882},{0,15,5},{3,10,5}, +{0,10,148},{9,5,882},{23,0,882},{0,10,148},{0,8,900},{23,0,882},{0,8,900},{0,0,36},{0,0,36},{0,0,36},{0,0,36},{0,2,1},{0,2,1},{0,2,1},{0,1,1},{0,1,5},{0,1,5},{1,24,997},{1,17,123},{1,12,318},{1,11,243},{0,19,3051},{0,12,1278},{0,10,274},{0,8,1795},{0,9,3672},{0,7,2222},{2,22,885},{2,16,10},{2,12,101},{1,11,179},{10,0,3051}, +{0,12,1278},{0,10,274},{0,8,1795},{19,0,3051},{0,8,1795},{1,17,114},{1,17,114},{1,17,114},{1,9,114},{0,12,648},{0,9,85},{0,9,85},{0,5,244},{0,5,889},{0,5,413},{2,15,1},{2,15,1},{2,15,1},{2,8,5},{4,4,648},{0,9,85},{0,9,85},{0,5,244},{12,0,648},{0,5,244},{13,0,884},{2,16,9},{4,11,10},{0,11,90},{13,0,884},{26,0,884},{0,11,90}, +{0,9,890},{26,0,884},{0,9,890},{1,0,113},{1,0,113},{1,0,113},{1,0,113},{0,6,1},{0,6,1},{0,6,1},{0,3,1},{0,2,50},{0,2,50},{1,27,1173},{1,18,314},{2,13,510},{1,12,374},{0,22,3051},{0,14,1139},{0,12,153},{0,9,1630},{0,10,3924},{0,8,2199},{3,23,885},{3,17,10},{3,13,101},{2,12,197},{11,0,3051},{0,14,1139},{0,12,153},{0,9,1630},{22,0,3051}, +{0,9,1630},{1,20,290},{1,20,290},{1,20,290},{1,10,293},{0,15,648},{0,10,41},{0,10,41},{0,6,181},{0,7,1027},{0,6,437},{3,16,2},{3,16,2},{3,16,2},{3,9,5},{5,5,648},{0,10,41},{0,10,41},{0,6,181},{15,0,648},{0,6,181},{14,1,884},{3,17,9},{5,12,5},{0,12,53},{14,1,884},{29,0,884},{0,12,53},{0,10,890},{29,0,884},{0,10,890},{1,0,289}, +{1,0,289},{1,0,289},{1,0,289},{0,8,1},{0,8,1},{0,8,1},{0,4,1},{0,3,113},{0,3,113},{2,28,1365},{2,19,506},{3,14,830},{2,13,566},{0,25,3051},{0,16,1051},{0,13,36},{0,10,1483},{0,12,4164},{0,9,2174},{4,24,886},{4,18,20},{4,14,89},{3,13,197},{12,1,3051},{0,16,1051},{0,13,36},{0,10,1483},{25,0,3051},{0,10,1483},{2,21,482},{2,21,482},{2,21,482}, +{2,11,485},{0,18,648},{0,12,5},{0,12,5},{0,7,149},{0,8,1182},{0,7,510},{4,16,5},{4,16,5},{4,16,5},{4,10,8},{9,0,648},{0,12,5},{0,12,5},{0,7,149},{18,0,648},{0,7,149},{16,0,882},{3,19,10},{6,13,5},{0,13,20},{16,0,882},{24,4,882},{0,13,20},{0,11,890},{24,4,882},{0,11,890},{2,0,481},{2,0,481},{2,0,481},{2,0,481},{0,11,1}, +{0,11,1},{0,11,1},{0,6,1},{0,5,185},{0,5,185},{2,31,1669},{3,20,818},{3,15,1161},{2,14,838},{0,28,3048},{0,17,949},{0,14,6},{0,11,1395},{0,13,4381},{0,10,2228},{5,25,886},{5,19,20},{5,15,89},{4,14,184},{12,4,3048},{0,17,949},{0,14,6},{0,11,1395},{28,0,3048},{0,11,1395},{2,24,786},{2,24,786},{2,24,786},{2,13,786},{0,21,648},{0,14,2},{0,14,2}, +{0,9,101},{0,9,1352},{0,8,590},{5,17,5},{5,17,5},{5,17,5},{5,11,8},{10,1,648},{0,14,2},{0,14,2},{0,9,101},{21,0,648},{0,9,101},{17,1,882},{4,19,5},{7,14,5},{0,14,5},{17,1,882},{27,4,882},{0,14,5},{0,12,900},{27,4,882},{0,12,900},{2,0,785},{2,0,785},{2,0,785},{2,0,785},{0,14,1},{0,14,1},{0,14,1},{0,7,1},{0,6,305}, +{0,6,305},{3,31,1814},{3,22,968},{4,16,1314},{3,15,945},{1,29,3055},{0,19,936},{1,15,17},{0,12,1314},{0,15,4321},{0,12,2007},{6,26,885},{6,20,10},{6,16,101},{5,15,179},{15,2,3051},{0,19,900},{1,15,13},{0,12,1278},{31,0,3051},{0,12,1278},{3,25,900},{3,25,900},{3,25,900},{3,14,900},{1,22,654},{1,15,8},{1,15,8},{0,10,76},{0,11,1296},{0,9,425},{6,19,1}, +{6,19,1},{6,19,1},{6,12,5},{12,0,648},{1,15,4},{1,15,4},{0,10,40},{24,0,648},{0,10,40},{19,0,884},{6,20,9},{8,15,10},{1,15,9},{19,0,884},{30,4,884},{1,15,9},{0,13,890},{30,4,884},{0,13,890},{3,0,900},{3,0,900},{3,0,900},{3,0,900},{1,16,5},{1,16,5},{1,16,5},{1,8,8},{0,7,269},{0,7,269},{5,31,1838},{4,23,948},{5,17,1314}, +{4,16,943},{2,30,3055},{1,20,936},{2,16,13},{1,13,1314},{0,16,4056},{0,13,1620},{7,27,885},{7,21,10},{7,17,101},{6,16,197},{17,0,3051},{0,21,891},{2,16,9},{0,13,1179},{30,2,3051},{0,13,1179},{4,26,891},{4,26,891},{4,26,891},{4,15,891},{2,23,654},{2,16,9},{2,16,9},{1,11,76},{0,13,1107},{0,10,273},{7,20,2},{7,20,2},{7,20,2},{7,13,5},{13,1,648}, +{3,15,5},{3,15,5},{0,11,20},{27,0,648},{0,11,20},{20,1,884},{7,21,9},{9,16,5},{2,16,5},{20,1,884},{31,5,884},{2,16,5},{0,14,890},{31,5,884},{0,14,890},{4,0,890},{4,0,890},{4,0,890},{4,0,890},{2,17,5},{2,17,5},{2,17,5},{2,9,8},{0,9,149},{0,9,149},{6,31,1868},{5,24,948},{6,18,1314},{5,17,943},{3,31,3055},{2,21,936},{3,17,13}, +{2,14,1314},{0,18,3825},{0,14,1354},{8,28,886},{8,22,20},{8,18,89},{7,17,197},{18,1,3051},{1,22,891},{3,17,9},{0,14,1098},{31,3,3051},{0,14,1098},{5,27,891},{5,27,891},{5,27,891},{5,16,891},{3,24,652},{3,17,9},{3,17,9},{2,12,86},{0,14,976},{0,12,126},{8,20,5},{8,20,5},{8,20,5},{8,14,8},{15,0,648},{4,16,5},{4,16,5},{0,12,5},{30,0,648}, +{0,12,5},{20,4,882},{7,23,10},{10,17,5},{3,17,5},{20,4,882},{28,8,882},{3,17,5},{0,15,890},{28,8,882},{0,15,890},{5,0,890},{5,0,890},{5,0,890},{5,0,890},{3,18,5},{3,18,5},{3,18,5},{3,10,8},{0,11,80},{0,11,80},{7,31,1908},{6,25,948},{7,19,1314},{6,18,943},{4,31,3084},{3,22,936},{4,18,15},{3,15,1314},{0,19,3640},{0,15,1175},{9,29,886}, +{9,23,20},{9,19,89},{8,18,184},{20,0,3048},{2,23,891},{4,18,6},{0,15,1054},{24,8,3048},{0,15,1054},{6,28,891},{6,28,891},{6,28,891},{6,17,891},{4,25,657},{4,18,11},{4,18,11},{3,13,86},{0,16,852},{0,13,27},{9,21,5},{9,21,5},{9,21,5},{9,15,8},{16,1,648},{4,18,2},{4,18,2},{0,13,2},{31,1,648},{0,13,2},{21,5,882},{8,23,5},{11,18,5}, +{4,18,5},{21,5,882},{31,8,882},{4,18,5},{0,16,900},{31,8,882},{0,16,900},{6,0,890},{6,0,890},{6,0,890},{6,0,890},{4,18,10},{4,18,10},{4,18,10},{4,11,10},{0,13,26},{0,13,26},{8,31,1998},{7,26,968},{8,20,1314},{7,19,945},{6,31,3160},{4,23,936},{5,19,17},{4,16,1314},{0,21,3420},{0,16,1028},{10,30,885},{10,24,10},{10,20,101},{9,19,179},{22,0,3051}, +{3,24,891},{5,19,13},{0,16,1003},{30,7,3051},{0,16,1003},{7,29,900},{7,29,900},{7,29,900},{7,18,900},{5,26,654},{5,19,8},{5,19,8},{4,14,76},{0,18,750},{1,14,24},{10,23,1},{10,23,1},{10,23,1},{10,16,5},{16,4,648},{5,19,4},{5,19,4},{2,14,5},{28,4,648},{2,14,5},{25,0,884},{10,24,9},{12,19,10},{5,19,9},{25,0,884},{30,10,884},{5,19,9}, +{0,17,890},{30,10,884},{0,17,890},{7,0,900},{7,0,900},{7,0,900},{7,0,900},{5,20,5},{5,20,5},{5,20,5},{5,12,8},{0,15,5},{0,15,5},{9,31,2124},{8,27,948},{9,21,1314},{8,20,943},{7,31,3196},{5,24,936},{6,20,13},{5,17,1314},{0,23,3307},{0,17,971},{11,31,885},{11,25,10},{11,21,101},{10,20,197},{23,0,3051},{4,25,891},{6,20,9},{0,17,970},{30,8,3051}, +{0,17,970},{8,30,891},{8,30,891},{8,30,891},{8,19,891},{6,27,654},{6,20,9},{6,20,9},{5,15,76},{0,19,691},{2,15,24},{11,24,2},{11,24,2},{11,24,2},{11,17,5},{17,5,648},{7,19,5},{7,19,5},{3,15,5},{31,4,648},{3,15,5},{26,1,884},{11,25,9},{13,20,5},{6,20,5},{26,1,884},{31,11,884},{6,20,5},{0,18,890},{31,11,884},{0,18,890},{8,0,890}, +{8,0,890},{8,0,890},{8,0,890},{6,21,5},{6,21,5},{6,21,5},{6,13,8},{1,16,5},{1,16,5},{10,31,2286},{9,28,948},{10,22,1314},{9,21,943},{8,31,3277},{6,25,936},{7,21,13},{6,18,1314},{0,24,3196},{0,19,948},{12,31,904},{12,26,20},{12,22,89},{11,21,197},{24,1,3051},{5,26,891},{7,21,9},{0,19,939},{31,9,3051},{0,19,939},{9,31,891},{9,31,891},{9,31,891}, +{9,20,891},{7,28,652},{7,21,9},{7,21,9},{6,16,86},{0,21,652},{2,16,18},{12,24,5},{12,24,5},{12,24,5},{12,18,8},{21,0,648},{8,20,5},{8,20,5},{3,16,2},{30,6,648},{3,16,2},{28,0,882},{11,27,10},{14,21,5},{7,21,5},{28,0,882},{24,16,882},{7,21,5},{0,19,890},{24,16,882},{0,19,890},{9,0,890},{9,0,890},{9,0,890},{9,0,890},{7,22,5}, +{7,22,5},{7,22,5},{7,14,8},{2,17,5},{2,17,5},{11,31,2414},{10,29,948},{11,23,1314},{10,22,943},{9,31,3412},{7,26,936},{8,22,15},{7,19,1314},{0,26,3115},{1,20,958},{13,31,958},{13,27,20},{13,23,89},{12,22,184},{24,4,3048},{6,27,891},{8,22,6},{0,20,925},{28,12,3048},{0,20,925},{10,31,894},{10,31,894},{10,31,894},{10,21,891},{8,29,657},{8,22,11},{8,22,11}, +{7,17,86},{1,22,652},{3,17,18},{13,25,5},{13,25,5},{13,25,5},{13,19,8},{22,1,648},{8,22,2},{8,22,2},{4,17,2},{31,7,648},{4,17,2},{29,1,882},{12,27,5},{15,22,5},{8,22,5},{29,1,882},{27,16,882},{8,22,5},{0,20,900},{27,16,882},{0,20,900},{10,0,890},{10,0,890},{10,0,890},{10,0,890},{8,22,10},{8,22,10},{8,22,10},{8,15,10},{3,18,5}, +{3,18,5},{13,31,2606},{11,30,968},{12,24,1314},{11,23,945},{11,31,3519},{8,27,936},{9,23,17},{8,20,1314},{0,28,3085},{2,21,942},{15,31,995},{14,28,10},{14,24,101},{13,23,179},{27,2,3051},{7,28,891},{9,23,13},{0,21,891},{31,12,3051},{0,21,891},{11,31,925},{11,31,925},{11,31,925},{11,22,900},{9,30,654},{9,23,8},{9,23,8},{8,18,76},{2,23,651},{5,18,24},{14,27,1}, +{14,27,1},{14,27,1},{14,20,5},{24,0,648},{9,23,4},{9,23,4},{6,18,5},{24,12,648},{6,18,5},{31,0,884},{14,28,9},{16,23,10},{9,23,9},{31,0,884},{30,16,884},{9,23,9},{0,21,890},{30,16,884},{0,21,890},{11,0,900},{11,0,900},{11,0,900},{11,0,900},{9,24,5},{9,24,5},{9,24,5},{9,16,8},{4,19,5},{4,19,5},{14,31,2804},{12,31,948},{13,25,1314}, +{12,24,943},{12,31,3652},{9,28,936},{10,24,13},{9,21,1314},{0,29,3052},{3,22,942},{16,31,1054},{15,29,10},{15,25,101},{14,24,197},{29,0,3051},{8,29,891},{10,24,9},{1,22,891},{30,14,3051},{1,22,891},{12,31,939},{12,31,939},{12,31,939},{12,23,891},{10,31,654},{10,24,9},{10,24,9},{9,19,76},{3,24,652},{6,19,24},{15,28,2},{15,28,2},{15,28,2},{15,21,5},{25,1,648}, +{11,23,5},{11,23,5},{7,19,5},{27,12,648},{7,19,5},{31,3,884},{15,29,9},{17,24,5},{10,24,5},{31,3,884},{31,17,884},{10,24,5},{0,22,890},{31,17,884},{0,22,890},{12,0,890},{12,0,890},{12,0,890},{12,0,890},{10,25,5},{10,25,5},{10,25,5},{10,17,8},{5,20,5},{5,20,5},{15,31,2956},{14,31,979},{14,26,1314},{13,25,943},{13,31,3841},{10,29,936},{11,25,13}, +{10,22,1314},{1,30,3052},{4,23,948},{17,31,1144},{16,30,20},{16,26,89},{15,25,197},{30,1,3051},{9,30,891},{11,25,9},{2,23,891},{31,15,3051},{2,23,891},{14,31,979},{14,31,979},{14,31,979},{13,24,891},{11,31,670},{11,25,9},{11,25,9},{10,20,86},{4,25,652},{6,20,18},{16,28,5},{16,28,5},{16,28,5},{16,22,8},{27,0,648},{12,24,5},{12,24,5},{7,20,2},{30,12,648}, +{7,20,2},{28,12,882},{15,31,10},{18,25,5},{11,25,5},{28,12,882},{28,20,882},{11,25,5},{0,23,890},{28,20,882},{0,23,890},{13,0,890},{13,0,890},{13,0,890},{13,0,890},{11,26,5},{11,26,5},{11,26,5},{11,18,8},{6,21,5},{6,21,5},{16,31,3182},{15,31,1028},{15,27,1314},{14,26,943},{15,31,4020},{11,30,936},{12,26,15},{11,23,1314},{2,31,3052},{5,24,958},{18,31,1270}, +{17,31,20},{17,27,89},{16,26,184},{28,8,3048},{10,31,891},{12,26,6},{2,24,901},{24,20,3048},{2,24,901},{15,31,1003},{15,31,1003},{15,31,1003},{14,25,891},{12,31,707},{12,26,11},{12,26,11},{11,21,86},{5,26,652},{7,21,18},{17,29,5},{17,29,5},{17,29,5},{17,23,8},{28,1,648},{12,26,2},{12,26,2},{8,21,2},{31,13,648},{8,21,2},{29,13,882},{16,31,5},{19,26,5}, +{12,26,5},{29,13,882},{31,20,882},{12,26,5},{0,24,900},{31,20,882},{0,24,900},{14,0,890},{14,0,890},{14,0,890},{14,0,890},{12,26,10},{12,26,10},{12,26,10},{12,19,10},{7,22,5},{7,22,5},{17,31,3508},{16,31,1175},{16,28,1314},{15,27,945},{16,31,4209},{12,31,936},{13,27,17},{12,24,1314},{4,31,3100},{6,25,942},{20,31,1368},{18,31,37},{18,28,101},{17,27,179},{31,6,3051}, +{12,31,900},{13,27,13},{4,25,891},{30,19,3051},{4,25,891},{16,31,1054},{16,31,1054},{16,31,1054},{15,26,900},{14,31,780},{13,27,8},{13,27,8},{12,22,76},{6,27,651},{9,22,24},{18,31,1},{18,31,1},{18,31,1},{18,24,5},{28,4,648},{13,27,4},{13,27,4},{10,22,5},{28,16,648},{10,22,5},{31,12,884},{18,31,36},{20,27,10},{13,27,9},{31,12,884},{30,22,884},{13,27,9}, +{0,25,890},{30,22,884},{0,25,890},{15,0,900},{15,0,900},{15,0,900},{15,0,900},{13,28,5},{13,28,5},{13,28,5},{13,20,8},{8,23,5},{8,23,5},{19,31,3790},{17,31,1412},{17,29,1314},{16,28,943},{17,31,4452},{14,31,954},{14,28,13},{13,25,1314},{7,31,3196},{7,26,942},{21,31,1494},{19,31,126},{19,29,101},{18,28,197},{31,8,3051},{14,31,950},{14,28,9},{5,26,891},{30,20,3051}, +{5,26,891},{17,31,1123},{17,31,1123},{17,31,1123},{16,27,891},{15,31,820},{14,28,9},{14,28,9},{13,23,76},{7,28,652},{10,23,24},{19,31,5},{19,31,5},{19,31,5},{19,25,5},{29,5,648},{15,27,5},{15,27,5},{11,23,5},{31,16,648},{11,23,5},{31,15,884},{20,31,80},{21,28,5},{14,28,5},{31,15,884},{31,23,884},{14,28,5},{0,26,890},{31,23,884},{0,26,890},{16,0,890}, +{16,0,890},{16,0,890},{16,0,890},{14,29,5},{14,29,5},{14,29,5},{14,21,8},{9,24,5},{9,24,5},{20,31,4072},{18,31,1694},{18,30,1314},{17,29,943},{19,31,4705},{15,31,1064},{15,29,13},{14,26,1314},{8,31,3355},{8,27,948},{22,31,1656},{20,31,276},{20,30,89},{19,29,197},{31,11,3051},{16,31,1054},{15,29,9},{6,27,891},{31,21,3051},{6,27,891},{18,31,1210},{18,31,1210},{18,31,1210}, +{17,28,891},{16,31,897},{15,29,9},{15,29,9},{14,24,86},{8,29,652},{10,24,18},{20,31,20},{20,31,20},{20,31,20},{20,26,8},{31,4,648},{16,28,5},{16,28,5},{11,24,2},{30,18,648},{11,24,2},{28,24,882},{22,31,157},{22,29,5},{15,29,5},{28,24,882},{24,28,882},{15,29,5},{0,27,890},{24,28,882},{0,27,890},{17,0,890},{17,0,890},{17,0,890},{17,0,890},{15,30,5}, +{15,30,5},{15,30,5},{15,22,8},{10,25,5},{10,25,5},{21,31,4390},{19,31,2007},{19,31,1314},{18,30,943},{20,31,4932},{16,31,1287},{16,30,15},{15,27,1314},{11,31,3547},{9,28,958},{23,31,1784},{22,31,465},{21,31,89},{20,30,184},{28,20,3048},{18,31,1188},{16,30,6},{6,28,901},{28,24,3048},{6,28,901},{19,31,1278},{19,31,1278},{19,31,1278},{18,29,891},{17,31,1011},{16,30,11},{16,30,11}, +{15,25,86},{9,30,652},{11,25,18},{21,31,53},{21,31,53},{21,31,53},{21,27,8},{31,7,648},{16,30,2},{16,30,2},{12,25,2},{31,19,648},{12,25,2},{29,25,882},{24,31,269},{23,30,5},{16,30,5},{29,25,882},{27,28,882},{16,30,5},{0,28,900},{27,28,882},{0,28,900},{18,0,890},{18,0,890},{18,0,890},{18,0,890},{16,30,10},{16,30,10},{16,30,10},{16,23,10},{11,26,5}, +{11,26,5},{22,31,4471},{20,31,2295},{20,31,1395},{19,31,936},{21,31,4906},{18,31,1414},{17,31,8},{16,28,1161},{13,31,3570},{11,28,818},{24,31,1769},{23,31,590},{23,31,106},{22,30,146},{29,21,2814},{19,31,1206},{17,31,4},{8,29,786},{31,24,2814},{8,29,786},{20,31,1395},{20,31,1395},{20,31,1395},{19,30,900},{19,31,1134},{17,31,8},{17,31,8},{16,26,76},{10,31,651},{13,26,24},{23,31,106}, +{23,31,106},{23,31,106},{22,28,5},{28,16,648},{17,31,4},{17,31,4},{14,26,5},{24,24,648},{14,26,5},{31,24,761},{26,31,317},{24,31,1},{17,31,0},{31,24,761},{30,28,761},{17,31,0},{0,29,785},{30,28,761},{0,29,785},{19,0,900},{19,0,900},{19,0,900},{19,0,900},{17,31,8},{17,31,8},{17,31,8},{17,24,8},{12,27,5},{12,27,5},{23,31,3955},{22,31,2260},{21,31,1530}, +{20,31,891},{22,31,4375},{19,31,1194},{18,31,54},{17,28,805},{15,31,3075},{12,29,498},{25,31,1417},{24,31,510},{24,31,149},{23,30,74},{31,19,2249},{21,31,937},{19,31,5},{11,29,482},{31,25,2249},{11,29,482},{21,31,1530},{21,31,1530},{21,31,1530},{20,31,891},{20,31,1251},{18,31,54},{18,31,54},{17,27,76},{12,31,691},{14,27,24},{24,31,149},{24,31,149},{24,31,149},{23,29,5},{29,17,648}, +{19,31,5},{19,31,5},{15,27,5},{27,24,648},{15,27,5},{29,29,481},{27,31,202},{25,31,4},{20,31,1},{29,29,481},{31,28,481},{20,31,1},{0,29,481},{31,28,481},{0,29,481},{20,0,890},{20,0,890},{20,0,890},{20,0,890},{18,31,29},{18,31,29},{18,31,29},{18,25,8},{13,28,5},{13,28,5},{24,31,3609},{23,31,2199},{22,31,1683},{21,31,915},{23,31,3827},{20,31,1071},{19,31,153}, +{18,29,485},{16,31,2690},{13,30,306},{26,31,1133},{25,31,489},{25,31,200},{24,31,20},{29,25,1769},{22,31,710},{21,31,41},{12,30,290},{27,28,1769},{12,30,290},{22,31,1683},{22,31,1683},{22,31,1683},{21,31,915},{21,31,1401},{19,31,153},{19,31,153},{18,28,86},{14,31,769},{14,28,18},{25,31,200},{25,31,200},{25,31,200},{24,30,8},{31,16,648},{21,31,41},{21,31,41},{15,28,2},{30,24,648}, +{15,28,2},{31,27,265},{28,31,113},{27,31,1},{23,31,1},{31,27,265},{31,29,265},{23,31,1},{0,30,289},{31,29,265},{0,30,289},{21,0,890},{21,0,890},{21,0,890},{21,0,890},{19,31,53},{19,31,53},{19,31,53},{19,26,8},{14,29,5},{14,29,5},{24,31,3305},{24,31,2222},{23,31,1795},{22,31,990},{24,31,3438},{22,31,1087},{21,31,306},{19,30,293},{18,31,2403},{15,30,118},{27,31,857}, +{26,31,465},{26,31,269},{25,31,8},{31,23,1374},{24,31,546},{23,31,98},{14,30,114},{31,27,1374},{14,30,114},{23,31,1795},{23,31,1795},{23,31,1795},{22,31,990},{22,31,1587},{21,31,306},{21,31,306},{19,29,86},{16,31,897},{15,29,18},{26,31,269},{26,31,269},{26,31,269},{25,31,8},{31,19,648},{23,31,98},{23,31,98},{16,29,2},{31,25,648},{16,29,2},{31,28,113},{29,31,52},{28,31,1}, +{26,31,1},{31,28,113},{30,30,113},{26,31,1},{0,30,113},{30,30,113},{0,30,113},{22,0,890},{22,0,890},{22,0,890},{22,0,890},{20,31,90},{20,31,90},{20,31,90},{20,27,10},{15,30,5},{15,30,5},{25,31,3092},{25,31,2292},{24,31,1978},{24,31,1123},{25,31,3124},{23,31,1068},{22,31,525},{20,30,140},{20,31,2196},{16,31,41},{28,31,680},{27,31,458},{27,31,337},{26,31,65},{29,29,1032}, +{26,31,456},{24,31,185},{16,31,37},{31,28,1032},{16,31,37},{24,31,1978},{24,31,1978},{24,31,1978},{24,31,1123},{24,31,1769},{22,31,525},{22,31,525},{20,30,76},{18,31,1080},{17,30,24},{27,31,337},{27,31,337},{27,31,337},{26,31,65},{28,28,648},{24,31,185},{24,31,185},{18,30,5},{28,28,648},{18,30,5},{31,30,18},{30,31,9},{30,31,0},{29,31,0},{31,30,18},{30,31,18},{29,31,0}, +{0,31,36},{30,31,18},{0,31,36},{23,0,900},{23,0,900},{23,0,900},{23,0,900},{22,31,164},{22,31,164},{22,31,164},{21,28,8},{16,31,5},{16,31,5},{27,31,2818},{26,31,2254},{25,31,2043},{25,31,1243},{26,31,2829},{24,31,1099},{23,31,684},{21,31,72},{22,31,2007},{18,31,20},{29,31,566},{28,31,420},{28,31,356},{27,31,122},{31,27,771},{27,31,386},{26,31,232},{19,31,1},{31,29,771}, +{19,31,1},{25,31,2043},{25,31,2043},{25,31,2043},{25,31,1243},{25,31,1819},{23,31,684},{23,31,684},{21,31,72},{20,31,1172},{18,31,20},{28,31,356},{28,31,356},{28,31,356},{27,31,122},{31,25,578},{26,31,232},{26,31,232},{19,31,1},{27,30,578},{19,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{24,0,890}, +{24,0,890},{24,0,890},{24,0,890},{23,31,200},{23,31,200},{23,31,200},{22,29,8},{18,31,20},{18,31,20},{27,31,2242},{27,31,1879},{26,31,1738},{26,31,1150},{27,31,2209},{24,31,987},{24,31,626},{23,31,20},{23,31,1560},{19,31,53},{29,31,342},{29,31,257},{29,31,221},{28,31,68},{31,28,452},{28,31,228},{27,31,137},{22,31,1},{30,30,452},{22,31,1},{26,31,1738},{26,31,1738},{26,31,1738}, +{26,31,1150},{25,31,1499},{24,31,626},{24,31,626},{23,31,20},{22,31,950},{19,31,53},{29,31,221},{29,31,221},{29,31,221},{28,31,68},{31,26,340},{27,31,137},{27,31,137},{22,31,1},{30,29,340},{22,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{25,0,890},{25,0,890},{25,0,890},{25,0,890},{24,31,265}, +{24,31,265},{24,31,265},{23,30,8},{19,31,53},{19,31,53},{28,31,1844},{27,31,1559},{27,31,1438},{27,31,1075},{27,31,1713},{26,31,853},{25,31,635},{24,31,10},{24,31,1207},{21,31,125},{30,31,172},{30,31,145},{29,31,125},{29,31,40},{31,29,216},{29,31,121},{28,31,68},{24,31,1},{31,30,216},{24,31,1},{27,31,1438},{27,31,1438},{27,31,1438},{27,31,1075},{27,31,1229},{25,31,635},{25,31,635}, +{24,31,10},{23,31,756},{21,31,125},{29,31,125},{29,31,125},{29,31,125},{29,31,40},{31,28,164},{28,31,68},{28,31,68},{24,31,1},{30,30,164},{24,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{26,0,890},{26,0,890},{26,0,890},{26,0,890},{25,31,346},{25,31,346},{25,31,346},{24,31,10},{21,31,125}, +{21,31,125},{0,19,1568},{0,13,202},{0,10,13},{0,8,596},{0,13,3371},{0,9,2162},{0,8,1080},{0,5,2539},{0,6,3648},{0,5,2708},{0,19,1568},{0,13,202},{0,10,13},{0,8,596},{6,1,3371},{0,9,2162},{0,8,1080},{0,5,2539},{13,0,3371},{0,5,2539},{0,9,0},{0,9,0},{0,9,0},{0,4,4},{0,4,290},{0,4,125},{0,4,125},{0,2,164},{0,2,321},{0,2,189},{0,9,0}, +{0,9,0},{0,9,0},{0,4,4},{2,1,290},{0,4,125},{0,4,125},{0,2,164},{4,0,290},{0,2,164},{9,1,1568},{0,13,202},{0,10,13},{0,8,596},{9,1,1568},{19,0,1568},{0,8,596},{0,6,1586},{19,0,1568},{0,6,1586},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,22,1568},{0,15,117},{0,11,8}, +{0,9,485},{0,15,3971},{0,10,2369},{0,9,1061},{0,6,2834},{0,7,4341},{0,6,3090},{0,22,1568},{0,15,117},{0,11,8},{0,9,485},{7,1,3968},{0,10,2369},{0,9,1061},{0,6,2834},{11,2,3968},{0,6,2834},{0,12,1},{0,12,1},{0,12,1},{0,6,0},{0,6,512},{0,5,194},{0,5,194},{0,3,320},{0,3,576},{0,2,381},{0,12,1},{0,12,1},{0,12,1},{0,6,0},{3,0,512}, +{0,5,194},{0,5,194},{0,3,320},{6,0,512},{0,3,320},{11,0,1568},{0,15,117},{0,11,8},{0,9,485},{11,0,1568},{22,0,1568},{0,9,485},{0,7,1586},{22,0,1568},{0,7,1586},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,25,1568},{0,17,45},{0,12,58},{0,10,392},{0,17,4652},{0,11,2596},{0,10,1121}, +{0,7,3254},{0,8,5140},{0,6,3570},{0,25,1568},{0,17,45},{0,12,58},{0,10,392},{8,1,4652},{0,11,2596},{0,10,1121},{0,7,3254},{17,0,4652},{0,7,3254},{0,15,1},{0,15,1},{0,15,1},{0,7,4},{0,7,802},{0,6,320},{0,6,320},{0,3,512},{0,3,896},{0,3,576},{0,15,1},{0,15,1},{0,15,1},{0,7,4},{3,2,802},{0,6,320},{0,6,320},{0,3,512},{7,0,802}, +{0,3,512},{12,1,1568},{0,17,45},{1,12,13},{0,10,392},{12,1,1568},{25,0,1568},{0,10,392},{0,8,1576},{25,0,1568},{0,8,1576},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,28,1570},{0,19,16},{0,13,178},{0,11,340},{0,19,5424},{0,13,2843},{0,11,1240},{0,8,3704},{0,9,6003},{0,7,4095},{0,28,1570}, +{0,19,16},{1,13,122},{0,11,340},{7,5,5419},{0,13,2843},{0,11,1240},{0,8,3704},{15,2,5419},{0,8,3704},{0,18,1},{0,18,1},{0,18,1},{0,9,1},{0,9,1152},{0,7,461},{0,7,461},{0,4,709},{0,4,1281},{0,4,830},{0,18,1},{0,18,1},{0,18,1},{0,9,1},{4,1,1152},{0,7,461},{0,7,461},{0,4,709},{9,0,1152},{0,4,709},{14,0,1568},{0,19,16},{2,13,13}, +{0,11,340},{14,0,1568},{26,1,1568},{0,11,340},{0,9,1576},{26,1,1568},{0,9,1576},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{1,29,1633},{1,20,75},{1,14,265},{0,13,313},{0,22,5419},{0,14,2571},{0,12,853},{0,9,3410},{0,10,6244},{0,9,3986},{1,29,1569},{1,20,11},{2,14,117},{0,13,313},{11,0,5419}, +{0,14,2571},{0,12,853},{0,9,3410},{22,0,5419},{0,9,3410},{1,19,65},{1,19,65},{1,19,65},{1,10,65},{0,12,1152},{0,9,289},{0,9,289},{0,5,580},{0,5,1393},{0,5,749},{1,19,1},{1,19,1},{1,19,1},{1,10,1},{4,4,1152},{0,9,289},{0,9,289},{0,5,580},{12,0,1152},{0,5,580},{13,5,1568},{0,21,4},{3,14,13},{0,13,232},{13,5,1568},{31,0,1568},{0,13,232}, +{0,10,1586},{31,0,1568},{0,10,1586},{1,0,65},{1,0,65},{1,0,65},{1,0,65},{0,3,1},{0,3,1},{0,3,1},{0,2,1},{0,1,20},{0,1,20},{1,31,1731},{1,22,149},{2,15,457},{1,14,377},{0,25,5419},{0,16,2347},{0,13,556},{0,10,3179},{0,11,6495},{0,9,3890},{2,30,1569},{2,21,11},{3,15,117},{1,14,313},{12,1,5419},{0,16,2347},{0,13,556},{0,10,3179},{25,0,5419}, +{0,10,3179},{1,22,145},{1,22,145},{1,22,145},{1,11,154},{0,15,1152},{0,11,194},{0,11,194},{0,6,481},{0,7,1531},{0,6,737},{2,20,1},{2,20,1},{2,20,1},{2,11,1},{5,5,1152},{0,11,194},{0,11,194},{0,6,481},{15,0,1152},{0,6,481},{17,0,1568},{1,22,4},{4,15,8},{0,14,157},{17,0,1568},{30,2,1568},{0,14,157},{0,11,1586},{30,2,1568},{0,11,1586},{1,0,145}, +{1,0,145},{1,0,145},{1,0,145},{0,6,1},{0,6,1},{0,6,1},{0,3,1},{0,3,65},{0,3,65},{2,31,1977},{2,23,341},{2,16,707},{1,15,510},{0,28,5424},{0,17,2137},{0,15,373},{0,11,3035},{0,13,6709},{0,10,3860},{3,31,1569},{3,22,11},{4,16,122},{2,15,313},{14,0,5419},{0,17,2137},{0,15,373},{0,11,3035},{26,1,5419},{0,11,3035},{2,23,337},{2,23,337},{2,23,337}, +{2,12,341},{0,18,1152},{0,13,104},{0,13,104},{0,8,410},{0,8,1686},{0,7,786},{3,21,1},{3,21,1},{3,21,1},{3,12,2},{9,0,1152},{0,13,104},{0,13,104},{0,8,410},{18,0,1152},{0,8,410},{18,1,1568},{2,23,4},{5,16,13},{0,15,117},{18,1,1568},{31,3,1568},{0,15,117},{0,12,1576},{31,3,1568},{0,12,1576},{2,0,337},{2,0,337},{2,0,337},{2,0,337},{0,9,1}, +{0,9,1},{0,9,1},{0,5,4},{0,4,130},{0,4,130},{3,31,2353},{2,24,570},{3,17,1027},{2,16,714},{0,31,5424},{0,19,1979},{0,16,195},{0,12,2857},{0,14,7039},{0,11,3919},{4,31,1576},{4,23,16},{5,17,122},{3,16,331},{15,1,5419},{0,19,1979},{0,16,195},{0,12,2857},{27,2,5419},{0,12,2857},{2,26,546},{2,26,546},{2,26,546},{2,14,546},{0,21,1152},{0,14,50},{0,14,50}, +{0,9,305},{0,9,1856},{0,8,830},{4,22,1},{4,22,1},{4,22,1},{4,13,1},{10,1,1152},{0,14,50},{0,14,50},{0,9,305},{21,0,1152},{0,9,305},{19,2,1568},{3,24,10},{6,17,13},{0,16,74},{19,2,1568},{30,5,1568},{0,16,74},{0,13,1576},{30,5,1568},{0,13,1576},{2,0,545},{2,0,545},{2,0,545},{2,0,545},{0,12,0},{0,12,0},{0,12,0},{0,6,1},{0,5,205}, +{0,5,205},{4,31,2980},{3,26,925},{3,19,1484},{2,17,1032},{1,31,5504},{0,21,1811},{0,17,61},{0,13,2651},{0,16,7420},{0,13,3876},{6,31,1619},{5,24,11},{6,18,117},{4,17,313},{17,0,5419},{0,21,1811},{0,17,61},{0,13,2651},{30,2,5419},{0,13,2651},{3,27,900},{3,27,900},{3,27,900},{3,15,900},{0,24,1152},{0,16,8},{0,16,8},{0,10,208},{0,11,2124},{0,9,881},{5,23,1}, +{5,23,1},{5,23,1},{5,14,1},{12,0,1152},{0,16,8},{0,16,8},{0,10,208},{24,0,1152},{0,10,208},{21,1,1568},{4,25,4},{7,18,13},{0,17,45},{21,1,1568},{27,8,1568},{0,17,45},{0,14,1586},{27,8,1568},{0,14,1586},{3,0,900},{3,0,900},{3,0,900},{3,0,900},{0,15,1},{0,15,1},{0,15,1},{0,8,4},{0,6,356},{0,6,356},{4,31,3668},{3,27,1320},{4,20,1940}, +{3,18,1352},{2,31,5771},{0,23,1737},{0,18,37},{0,14,2486},{0,17,7711},{0,14,3930},{7,31,1635},{6,25,11},{7,19,117},{5,18,313},{18,1,5419},{0,23,1737},{0,18,37},{0,14,2486},{31,3,5419},{0,14,2486},{3,30,1252},{3,30,1252},{3,30,1252},{3,16,1256},{0,27,1152},{0,18,1},{0,18,1},{0,11,164},{0,12,2369},{0,10,1001},{6,24,1},{6,24,1},{6,24,1},{6,15,1},{13,1,1152}, +{0,18,1},{0,18,1},{0,11,164},{27,0,1152},{0,11,164},{23,0,1568},{5,26,4},{8,19,8},{0,18,36},{23,0,1568},{30,8,1568},{0,18,36},{0,15,1586},{30,8,1568},{0,15,1586},{3,0,1252},{3,0,1252},{3,0,1252},{3,0,1252},{0,18,1},{0,18,1},{0,18,1},{0,9,1},{0,7,505},{0,7,505},{5,31,4346},{4,28,1644},{5,21,2372},{4,19,1703},{3,31,6079},{0,24,1644},{0,20,38}, +{0,15,2390},{0,18,7969},{0,15,3907},{8,31,1682},{7,26,11},{8,20,122},{6,19,313},{19,2,5419},{0,24,1640},{0,20,34},{0,15,2386},{30,5,5419},{0,15,2386},{4,31,1587},{4,31,1587},{4,31,1587},{4,17,1590},{0,30,1156},{0,20,29},{0,20,29},{0,12,117},{0,13,2483},{0,11,1044},{7,25,1},{7,25,1},{7,25,1},{7,16,2},{15,0,1152},{1,19,1},{1,19,1},{0,12,113},{30,0,1152}, +{0,12,113},{24,1,1568},{6,27,4},{9,20,13},{0,20,25},{24,1,1568},{31,9,1568},{0,20,25},{0,16,1576},{31,9,1568},{0,16,1576},{4,0,1586},{4,0,1586},{4,0,1586},{4,0,1586},{0,21,5},{0,21,5},{0,21,5},{0,11,8},{0,9,565},{0,9,565},{7,31,4580},{5,29,1644},{6,22,2372},{4,20,1692},{4,31,6228},{1,25,1644},{1,21,38},{1,16,2348},{0,20,7577},{0,16,3408},{9,31,1760}, +{8,27,16},{9,21,122},{7,20,331},{19,5,5419},{0,26,1593},{2,20,27},{0,16,2252},{31,6,5419},{0,16,2252},{5,31,1590},{5,31,1590},{5,31,1590},{5,18,1590},{1,31,1156},{1,21,29},{1,21,29},{1,13,117},{0,15,2241},{0,13,699},{8,26,1},{8,26,1},{8,26,1},{8,17,1},{16,1,1152},{2,20,2},{2,20,2},{0,13,74},{31,1,1152},{0,13,74},{26,0,1568},{7,28,10},{10,21,13}, +{0,21,10},{26,0,1568},{30,11,1568},{0,21,10},{0,17,1576},{30,11,1568},{0,17,1576},{5,0,1586},{5,0,1586},{5,0,1586},{5,0,1586},{1,22,5},{1,22,5},{1,22,5},{1,12,8},{0,10,433},{0,10,433},{8,31,4826},{6,30,1644},{7,23,2352},{6,21,1676},{5,31,6463},{2,26,1644},{2,22,38},{2,17,2348},{0,22,7196},{0,17,2863},{11,31,1865},{9,28,11},{10,22,117},{8,21,313},{23,0,5419}, +{1,27,1586},{2,22,29},{0,18,2115},{30,8,5419},{0,18,2115},{6,31,1612},{6,31,1612},{6,31,1612},{6,19,1585},{3,30,1179},{3,21,29},{3,21,29},{2,14,122},{0,17,1953},{0,14,426},{9,27,1},{9,27,1},{9,27,1},{9,18,1},{16,4,1152},{3,21,4},{3,21,4},{0,15,49},{28,4,1152},{0,15,49},{25,5,1568},{8,29,4},{11,22,13},{1,22,4},{25,5,1568},{31,12,1568},{1,22,4}, +{0,18,1586},{31,12,1568},{0,18,1586},{6,0,1576},{6,0,1576},{6,0,1576},{6,0,1576},{2,24,10},{2,24,10},{2,24,10},{2,13,10},{0,13,272},{0,13,272},{9,31,5108},{7,31,1644},{8,24,2372},{7,22,1676},{7,31,6660},{3,27,1644},{3,23,38},{3,18,2348},{0,23,6891},{0,18,2519},{12,31,1952},{10,29,11},{11,23,117},{9,22,313},{24,1,5419},{2,28,1584},{3,23,29},{0,19,2027},{31,9,5419}, +{0,19,2027},{7,31,1640},{7,31,1640},{7,31,1640},{7,20,1580},{4,31,1188},{3,23,34},{3,23,34},{3,15,122},{0,19,1795},{0,15,261},{10,28,1},{10,28,1},{10,28,1},{10,19,1},{17,5,1152},{4,22,1},{4,22,1},{0,16,26},{31,4,1152},{0,16,26},{29,0,1568},{9,30,4},{12,23,8},{2,23,4},{29,0,1568},{30,14,1568},{2,23,4},{0,19,1586},{30,14,1568},{0,19,1586},{7,0,1576}, +{7,0,1576},{7,0,1576},{7,0,1576},{3,24,10},{3,24,10},{3,24,10},{3,14,10},{0,14,170},{0,14,170},{10,31,5426},{8,31,1695},{9,25,2372},{8,23,1703},{8,31,6861},{4,28,1644},{4,24,38},{3,19,2372},{0,25,6573},{0,19,2268},{13,31,2066},{11,30,11},{12,24,122},{10,23,313},{26,0,5419},{3,29,1584},{4,24,34},{0,20,1937},{30,11,5419},{0,20,1937},{8,31,1686},{8,31,1686},{8,31,1686}, +{8,21,1590},{5,31,1206},{4,24,29},{4,24,29},{4,16,117},{0,20,1602},{0,17,126},{11,29,1},{11,29,1},{11,29,1},{11,20,2},{21,0,1152},{5,23,1},{5,23,1},{0,17,5},{30,6,1152},{0,17,5},{30,1,1568},{10,31,4},{13,24,13},{3,24,10},{30,1,1568},{31,15,1568},{3,24,10},{0,20,1576},{31,15,1568},{0,20,1576},{8,0,1586},{8,0,1586},{8,0,1586},{8,0,1586},{4,25,5}, +{4,25,5},{4,25,5},{4,15,8},{0,16,90},{0,16,90},{11,31,5658},{9,31,1836},{10,26,2372},{8,24,1692},{9,31,7116},{5,29,1644},{5,25,38},{5,20,2348},{0,26,6379},{0,21,2028},{14,31,2216},{12,31,16},{13,25,122},{11,24,331},{27,1,5419},{2,31,1584},{6,24,27},{0,21,1832},{27,14,5419},{0,21,1832},{9,31,1755},{9,31,1755},{9,31,1755},{9,22,1590},{6,31,1260},{5,25,29},{5,25,29}, +{5,17,117},{0,22,1459},{0,18,38},{12,30,1},{12,30,1},{12,30,1},{12,21,1},{22,1,1152},{6,24,2},{6,24,2},{0,18,2},{31,7,1152},{0,18,2},{31,2,1568},{11,31,13},{14,25,13},{4,25,10},{31,2,1568},{30,17,1568},{4,25,10},{0,21,1576},{30,17,1568},{0,21,1576},{9,0,1586},{9,0,1586},{9,0,1586},{9,0,1586},{5,26,5},{5,26,5},{5,26,5},{5,16,8},{0,18,37}, +{0,18,37},{12,31,6036},{11,31,2033},{11,27,2352},{10,25,1676},{11,31,7423},{6,30,1644},{6,26,38},{6,21,2348},{0,28,6109},{0,22,1794},{15,31,2371},{13,31,50},{14,26,117},{12,25,313},{29,0,5419},{5,31,1586},{6,26,29},{0,22,1730},{30,14,5419},{0,22,1730},{10,31,1865},{10,31,1865},{10,31,1865},{10,23,1585},{7,31,1339},{7,25,29},{7,25,29},{6,18,122},{0,23,1345},{0,19,16},{13,31,1}, +{13,31,1},{13,31,1},{13,22,1},{24,0,1152},{7,25,4},{7,25,4},{2,19,1},{24,12,1152},{2,19,1},{29,9,1568},{14,31,41},{15,26,13},{5,26,4},{29,9,1568},{27,20,1568},{5,26,4},{0,22,1586},{27,20,1568},{0,22,1586},{10,0,1576},{10,0,1576},{10,0,1576},{10,0,1576},{6,28,10},{6,28,10},{6,28,10},{6,17,10},{0,20,13},{0,20,13},{13,31,6450},{12,31,2268},{12,28,2372}, +{11,26,1676},{12,31,7676},{7,31,1644},{7,27,38},{7,22,2348},{0,30,5924},{0,23,1695},{17,31,2536},{15,31,139},{15,27,117},{13,26,313},{30,1,5419},{7,31,1635},{7,27,29},{0,23,1686},{31,15,5419},{0,23,1686},{11,31,1937},{11,31,1937},{11,31,1937},{11,24,1580},{9,31,1420},{7,27,34},{7,27,34},{7,19,122},{0,25,1234},{1,20,17},{14,31,10},{14,31,10},{14,31,10},{14,23,1},{25,1,1152}, +{8,26,1},{8,26,1},{2,20,2},{27,12,1152},{2,20,2},{31,8,1568},{15,31,90},{16,27,8},{6,27,4},{31,8,1568},{30,20,1568},{6,27,4},{0,23,1586},{30,20,1568},{0,23,1586},{11,0,1576},{11,0,1576},{11,0,1576},{11,0,1576},{7,28,10},{7,28,10},{7,28,10},{7,18,10},{0,21,10},{0,21,10},{15,31,6772},{13,31,2595},{13,29,2372},{12,27,1703},{13,31,7985},{8,31,1725},{8,28,38}, +{7,23,2372},{0,31,5773},{0,24,1644},{18,31,2722},{16,31,261},{16,28,122},{14,27,313},{31,2,5419},{8,31,1721},{8,28,34},{0,24,1640},{30,17,5419},{0,24,1640},{12,31,2027},{12,31,2027},{12,31,2027},{12,25,1590},{10,31,1510},{8,28,29},{8,28,29},{8,20,117},{0,27,1188},{2,21,17},{15,31,26},{15,31,26},{15,31,26},{15,24,2},{27,0,1152},{9,27,1},{9,27,1},{3,21,2},{30,12,1152}, +{3,21,2},{31,11,1568},{17,31,180},{17,28,13},{7,28,10},{31,11,1568},{31,21,1568},{7,28,10},{0,24,1576},{31,21,1568},{0,24,1576},{12,0,1586},{12,0,1586},{12,0,1586},{12,0,1586},{8,29,5},{8,29,5},{8,29,5},{8,19,8},{1,22,10},{1,22,10},{16,31,7154},{14,31,2955},{14,30,2372},{12,28,1692},{14,31,8348},{10,31,1895},{9,29,38},{9,24,2348},{1,31,5956},{1,25,1644},{19,31,2866}, +{17,31,468},{17,29,122},{15,28,331},{31,5,5419},{10,31,1859},{10,28,27},{0,25,1601},{31,18,5419},{0,25,1601},{13,31,2162},{13,31,2162},{13,31,2162},{13,26,1590},{11,31,1590},{9,29,29},{9,29,29},{9,21,117},{0,28,1161},{3,22,17},{16,31,49},{16,31,49},{16,31,49},{16,25,1},{28,1,1152},{10,28,2},{10,28,2},{4,22,2},{31,13,1152},{4,22,2},{31,14,1568},{19,31,277},{18,29,13}, +{8,29,10},{31,14,1568},{30,23,1568},{8,29,10},{0,25,1576},{30,23,1568},{0,25,1576},{13,0,1586},{13,0,1586},{13,0,1586},{13,0,1586},{9,30,5},{9,30,5},{9,30,5},{9,20,8},{2,23,10},{2,23,10},{17,31,7636},{15,31,3408},{15,31,2352},{14,29,1676},{16,31,8673},{11,31,2187},{10,30,38},{10,25,2348},{3,31,6235},{2,26,1644},{20,31,3112},{19,31,754},{18,30,117},{16,29,313},{31,8,5419}, +{12,31,2060},{10,30,29},{0,26,1587},{30,20,5419},{0,26,1587},{15,31,2252},{15,31,2252},{15,31,2252},{14,27,1585},{12,31,1740},{11,29,29},{11,29,29},{10,22,122},{0,30,1163},{4,23,16},{18,31,85},{18,31,85},{18,31,85},{17,26,1},{28,4,1152},{11,29,4},{11,29,4},{6,23,1},{28,16,1152},{6,23,1},{29,21,1568},{20,31,436},{19,30,13},{9,30,4},{29,21,1568},{31,24,1568},{9,30,4}, +{0,26,1586},{31,24,1568},{0,26,1586},{14,0,1576},{14,0,1576},{14,0,1576},{14,0,1576},{10,31,13},{10,31,13},{10,31,13},{10,21,10},{3,24,10},{3,24,10},{18,31,8122},{16,31,3907},{16,31,2390},{15,30,1676},{17,31,9036},{12,31,2576},{11,31,38},{11,26,2348},{6,31,6555},{3,27,1644},{22,31,3392},{20,31,1044},{19,31,117},{17,30,313},{31,11,5419},{15,31,2284},{11,31,29},{0,27,1587},{31,21,5419}, +{0,27,1587},{16,31,2386},{16,31,2386},{16,31,2386},{15,28,1580},{14,31,1924},{11,31,34},{11,31,34},{11,23,122},{1,31,1163},{5,24,17},{19,31,113},{19,31,113},{19,31,113},{18,27,1},{29,5,1152},{12,30,1},{12,30,1},{6,24,2},{31,16,1152},{6,24,2},{31,20,1568},{22,31,593},{20,31,8},{10,31,4},{31,20,1568},{30,26,1568},{10,31,4},{0,27,1586},{30,26,1568},{0,27,1586},{15,0,1576}, +{15,0,1576},{15,0,1576},{15,0,1576},{11,31,25},{11,31,25},{11,31,25},{11,22,10},{4,25,10},{4,25,10},{19,31,7638},{17,31,4060},{17,31,2539},{16,30,1659},{18,31,8553},{14,31,2430},{13,31,37},{11,27,1940},{7,31,6120},{4,28,1320},{23,31,2996},{20,31,1064},{20,31,164},{19,30,186},{29,17,4803},{16,31,2018},{13,31,1},{1,28,1253},{27,24,4803},{1,28,1253},{17,31,2539},{17,31,2539},{17,31,2539}, +{16,29,1590},{15,31,2028},{13,31,37},{13,31,37},{12,24,117},{3,31,1170},{6,25,17},{20,31,164},{20,31,164},{20,31,164},{19,28,2},{31,4,1152},{13,31,1},{13,31,1},{7,25,2},{30,18,1152},{7,25,2},{31,22,1250},{24,31,505},{22,31,0},{13,31,0},{31,22,1250},{30,27,1250},{13,31,0},{0,28,1252},{30,27,1250},{0,28,1252},{16,0,1586},{16,0,1586},{16,0,1586},{16,0,1586},{13,31,37}, +{13,31,37},{13,31,37},{12,23,8},{5,26,10},{5,26,10},{20,31,7060},{19,31,3955},{18,31,2710},{17,31,1595},{19,31,7717},{15,31,2140},{14,31,77},{13,28,1480},{8,31,5539},{6,28,925},{23,31,2516},{22,31,945},{21,31,233},{20,30,100},{31,15,4056},{18,31,1656},{15,31,8},{4,28,900},{31,23,4056},{4,28,900},{18,31,2710},{18,31,2710},{18,31,2710},{17,30,1590},{16,31,2193},{14,31,77},{14,31,77}, +{13,25,117},{5,31,1233},{7,26,17},{21,31,233},{21,31,233},{21,31,233},{20,29,1},{31,7,1152},{15,31,8},{15,31,8},{8,26,2},{31,19,1152},{8,26,2},{31,23,884},{25,31,370},{23,31,4},{16,31,1},{31,23,884},{31,27,884},{16,31,1},{0,28,900},{31,27,884},{0,28,900},{17,0,1586},{17,0,1586},{17,0,1586},{17,0,1586},{14,31,52},{14,31,52},{14,31,52},{13,24,8},{6,27,10}, +{6,27,10},{21,31,6535},{20,31,3919},{19,31,2857},{18,31,1585},{20,31,6979},{16,31,1942},{15,31,195},{14,28,990},{11,31,4914},{7,29,562},{24,31,2045},{23,31,830},{22,31,338},{21,31,26},{29,21,3318},{19,31,1314},{17,31,52},{5,29,546},{31,24,3318},{5,29,546},{19,31,2857},{19,31,2857},{19,31,2857},{18,31,1585},{17,31,2436},{15,31,195},{15,31,195},{14,26,122},{7,31,1339},{8,27,16},{22,31,338}, +{22,31,338},{22,31,338},{21,30,1},{28,16,1152},{17,31,52},{17,31,52},{10,27,1},{24,24,1152},{10,27,1},{31,25,545},{26,31,221},{25,31,0},{19,31,0},{31,25,545},{31,28,545},{19,31,0},{0,29,545},{31,28,545},{0,29,545},{18,0,1576},{18,0,1576},{18,0,1576},{18,0,1576},{15,31,74},{15,31,74},{15,31,74},{14,25,10},{7,28,10},{7,28,10},{22,31,6151},{20,31,3935},{20,31,3035}, +{19,31,1640},{21,31,6458},{18,31,1902},{16,31,373},{15,29,670},{12,31,4499},{8,29,347},{25,31,1729},{24,31,786},{23,31,410},{22,31,1},{31,19,2753},{20,31,1094},{19,31,113},{8,29,338},{31,25,2753},{8,29,338},{20,31,3035},{20,31,3035},{20,31,3035},{19,31,1640},{19,31,2630},{16,31,373},{16,31,373},{15,27,122},{10,31,1483},{9,28,17},{23,31,410},{23,31,410},{23,31,410},{22,31,1},{29,17,1152}, +{19,31,113},{19,31,113},{10,28,2},{27,24,1152},{10,28,2},{31,26,317},{27,31,130},{26,31,9},{22,31,0},{31,26,317},{30,29,317},{22,31,0},{0,29,337},{30,29,317},{0,29,337},{19,0,1576},{19,0,1576},{19,0,1576},{19,0,1576},{16,31,117},{16,31,117},{16,31,117},{15,26,10},{8,29,10},{8,29,10},{23,31,5691},{22,31,4004},{21,31,3254},{20,31,1755},{22,31,6023},{19,31,1830},{18,31,606}, +{16,29,430},{14,31,4162},{9,30,155},{26,31,1481},{25,31,801},{24,31,505},{23,31,26},{29,25,2273},{22,31,914},{20,31,194},{9,30,146},{27,28,2273},{9,30,146},{21,31,3254},{21,31,3254},{21,31,3254},{20,31,1755},{20,31,2835},{18,31,606},{18,31,606},{16,28,117},{11,31,1665},{10,29,17},{24,31,505},{24,31,505},{24,31,505},{23,31,26},{31,16,1152},{20,31,194},{20,31,194},{11,29,2},{30,24,1152}, +{11,29,2},{31,28,145},{28,31,65},{28,31,1},{25,31,0},{31,28,145},{30,30,145},{25,31,0},{0,30,145},{30,30,145},{0,30,145},{20,0,1586},{20,0,1586},{20,0,1586},{20,0,1586},{17,31,180},{17,31,180},{17,31,180},{16,27,8},{9,30,10},{9,30,10},{24,31,5421},{23,31,3999},{22,31,3491},{21,31,1947},{23,31,5539},{20,31,1879},{19,31,853},{17,30,238},{16,31,3922},{11,30,81},{27,31,1229}, +{26,31,813},{26,31,617},{24,31,100},{31,23,1878},{23,31,822},{22,31,305},{12,30,66},{31,27,1878},{12,30,66},{22,31,3491},{22,31,3491},{22,31,3491},{21,31,1947},{21,31,3081},{19,31,853},{19,31,853},{17,29,117},{14,31,1905},{11,30,17},{26,31,617},{26,31,617},{26,31,617},{24,31,100},{31,19,1152},{22,31,305},{22,31,305},{12,30,2},{31,25,1152},{12,30,2},{31,29,45},{30,31,18},{29,31,4}, +{28,31,1},{31,29,45},{31,30,45},{28,31,1},{0,30,65},{31,30,45},{0,30,65},{21,0,1586},{21,0,1586},{21,0,1586},{21,0,1586},{18,31,261},{18,31,261},{18,31,261},{17,28,8},{10,31,10},{10,31,10},{24,31,5178},{24,31,4095},{23,31,3704},{22,31,2201},{24,31,5197},{21,31,2101},{20,31,1240},{18,31,153},{18,31,3760},{12,31,16},{28,31,1088},{27,31,830},{27,31,709},{26,31,245},{29,29,1536}, +{26,31,792},{24,31,461},{14,31,1},{31,28,1536},{14,31,1},{23,31,3704},{23,31,3704},{23,31,3704},{22,31,2201},{23,31,3396},{20,31,1240},{20,31,1240},{18,30,122},{15,31,2208},{12,31,16},{27,31,709},{27,31,709},{27,31,709},{26,31,245},{28,28,1152},{24,31,461},{24,31,461},{14,31,1},{28,28,1152},{14,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0}, +{0,31,0},{31,31,0},{0,31,0},{22,0,1576},{22,0,1576},{22,0,1576},{22,0,1576},{20,31,340},{20,31,340},{20,31,340},{18,29,10},{12,31,16},{12,31,16},{25,31,4468},{24,31,3615},{24,31,3254},{23,31,2060},{24,31,4413},{22,31,1834},{21,31,1205},{19,31,58},{19,31,3127},{14,31,53},{28,31,768},{28,31,576},{28,31,512},{27,31,170},{31,26,1068},{26,31,536},{25,31,338},{16,31,1},{30,29,1068}, +{16,31,1},{24,31,3254},{24,31,3254},{24,31,3254},{23,31,2060},{23,31,2852},{21,31,1205},{21,31,1205},{19,31,58},{16,31,1878},{14,31,53},{28,31,512},{28,31,512},{28,31,512},{27,31,170},{31,24,802},{25,31,338},{25,31,338},{16,31,1},{30,28,802},{16,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{23,0,1576}, +{23,0,1576},{23,0,1576},{23,0,1576},{21,31,421},{21,31,421},{21,31,421},{19,30,10},{14,31,53},{14,31,53},{26,31,3858},{25,31,3188},{25,31,2899},{24,31,1947},{25,31,3700},{23,31,1596},{23,31,1112},{20,31,8},{20,31,2588},{16,31,117},{29,31,498},{28,31,384},{28,31,320},{27,31,122},{31,27,683},{27,31,342},{26,31,212},{19,31,1},{31,29,683},{19,31,1},{25,31,2899},{25,31,2899},{25,31,2899}, +{24,31,1947},{24,31,2441},{23,31,1112},{23,31,1112},{20,31,8},{18,31,1568},{16,31,117},{28,31,320},{28,31,320},{28,31,320},{27,31,122},{29,29,512},{26,31,212},{26,31,212},{19,31,1},{31,28,512},{19,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{24,0,1586},{24,0,1586},{24,0,1586},{24,0,1586},{22,31,520}, +{22,31,520},{22,31,520},{20,31,8},{16,31,117},{16,31,117},{27,31,3258},{26,31,2790},{26,31,2594},{25,31,1875},{26,31,3105},{24,31,1447},{23,31,1080},{21,31,20},{22,31,2151},{18,31,208},{29,31,306},{29,31,221},{29,31,185},{28,31,64},{31,28,384},{28,31,192},{27,31,125},{22,31,1},{30,30,384},{22,31,1},{26,31,2594},{26,31,2594},{26,31,2594},{25,31,1875},{25,31,2131},{23,31,1080},{23,31,1080}, +{21,31,20},{20,31,1336},{18,31,208},{29,31,185},{29,31,185},{29,31,185},{28,31,64},{31,27,290},{27,31,125},{27,31,125},{22,31,1},{31,29,290},{22,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{25,0,1586},{25,0,1586},{25,0,1586},{25,0,1586},{23,31,596},{23,31,596},{23,31,596},{21,31,20},{18,31,208}, +{18,31,208},{0,25,2669},{0,18,320},{0,13,5},{0,11,985},{0,17,5885},{0,11,3745},{0,10,1746},{0,7,4421},{0,8,6385},{0,7,4782},{0,25,2669},{0,18,320},{0,13,5},{0,11,985},{8,1,5885},{0,11,3745},{0,10,1746},{0,7,4421},{17,0,5885},{0,7,4421},{0,12,0},{0,12,0},{0,12,0},{0,6,1},{0,6,545},{0,5,205},{0,5,205},{0,3,337},{0,3,609},{0,3,401},{0,12,0}, +{0,12,0},{0,12,0},{0,6,1},{3,0,545},{0,5,205},{0,5,205},{0,3,337},{6,0,545},{0,3,337},{13,0,2669},{0,18,320},{0,13,5},{0,11,985},{13,0,2669},{25,0,2669},{0,11,985},{0,8,2689},{25,0,2669},{0,8,2689},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,28,2665},{0,20,212},{0,14,10}, +{0,12,850},{0,19,6669},{0,13,3974},{0,11,1837},{0,8,4865},{0,9,7266},{0,7,5310},{0,28,2665},{0,20,212},{0,14,10},{0,12,850},{9,1,6669},{0,13,3974},{0,11,1837},{0,8,4865},{19,0,6669},{0,8,4865},{0,15,0},{0,15,0},{0,15,0},{0,7,9},{0,7,845},{0,6,337},{0,6,337},{0,3,545},{0,3,945},{0,3,609},{0,15,0},{0,15,0},{0,15,0},{0,7,9},{3,2,841}, +{0,6,337},{0,6,337},{0,3,545},{6,1,841},{0,3,545},{12,4,2665},{0,20,212},{1,14,5},{0,12,850},{12,4,2665},{28,0,2665},{0,12,850},{0,9,2689},{28,0,2665},{0,9,2689},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,31,2665},{0,21,113},{0,15,65},{0,13,709},{0,21,7538},{0,14,4289},{0,12,1907}, +{0,9,5330},{0,10,8294},{0,8,5845},{0,31,2665},{0,21,113},{0,15,65},{0,13,709},{10,1,7538},{0,14,4289},{0,12,1907},{0,9,5330},{21,0,7538},{0,9,5330},{0,18,0},{0,18,0},{0,18,0},{0,9,0},{0,9,1201},{0,7,482},{0,7,482},{0,4,740},{0,4,1334},{0,4,861},{0,18,0},{0,18,0},{0,18,0},{0,9,0},{4,1,1201},{0,7,482},{0,7,482},{0,4,740},{9,0,1201}, +{0,4,740},{13,5,2665},{0,21,113},{2,15,5},{0,13,709},{13,5,2665},{31,0,2665},{0,13,709},{0,10,2689},{31,0,2665},{0,10,2689},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,31,2809},{0,23,52},{0,16,173},{0,14,586},{0,23,8494},{0,15,4610},{0,13,1950},{0,9,5826},{0,11,9409},{0,9,6402},{1,31,2753}, +{0,23,52},{1,16,132},{0,14,586},{11,1,8493},{0,15,4610},{0,13,1950},{0,9,5826},{19,2,8493},{0,9,5826},{0,21,0},{0,21,0},{0,21,0},{0,10,9},{0,10,1629},{0,9,640},{0,9,640},{0,5,985},{0,5,1798},{0,5,1154},{0,21,0},{0,21,0},{0,21,0},{0,10,9},{3,5,1625},{0,9,640},{0,9,640},{0,5,985},{7,2,1625},{0,5,985},{17,0,2665},{0,23,52},{2,16,13}, +{0,14,586},{17,0,2665},{30,2,2665},{0,14,586},{0,11,2689},{30,2,2665},{0,11,2689},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{1,31,3105},{0,25,8},{1,17,325},{0,15,512},{0,25,9669},{0,17,4961},{0,14,2120},{0,10,6421},{0,12,10774},{0,10,7150},{2,31,3033},{0,25,8},{1,17,261},{0,15,512},{12,1,9669}, +{0,17,4961},{0,14,2120},{0,10,6421},{25,0,9669},{0,10,6421},{0,24,0},{0,24,0},{0,24,0},{0,12,0},{0,12,2178},{0,9,865},{0,9,865},{0,6,1313},{0,5,2419},{0,5,1523},{0,24,0},{0,24,0},{0,24,0},{0,12,0},{4,4,2178},{0,9,865},{0,9,865},{0,6,1313},{12,0,2178},{0,6,1313},{19,0,2669},{0,25,8},{4,17,5},{0,15,512},{19,0,2669},{31,3,2669},{0,15,512}, +{0,12,2689},{31,3,2669},{0,12,2689},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{2,31,3465},{0,27,65},{1,19,434},{0,17,474},{0,28,9670},{0,18,4658},{0,16,1717},{0,11,6209},{0,13,10979},{0,11,7109},{3,31,3101},{1,26,8},{2,18,261},{0,17,474},{14,0,9669},{0,18,4658},{0,16,1717},{0,11,6209},{26,1,9669}, +{0,11,6209},{1,25,64},{1,25,64},{1,25,64},{1,13,64},{0,15,2178},{0,11,680},{0,11,680},{0,7,1189},{0,7,2557},{0,6,1457},{1,25,0},{1,25,0},{1,25,0},{1,13,0},{5,5,2178},{0,11,680},{0,11,680},{0,7,1189},{15,0,2178},{0,7,1189},{20,0,2665},{0,27,1},{5,18,5},{0,17,410},{20,0,2665},{24,8,2665},{0,17,410},{0,13,2689},{24,8,2665},{0,13,2689},{0,0,64}, +{0,0,64},{0,0,64},{0,0,64},{0,3,0},{0,3,0},{0,3,0},{0,2,4},{0,1,13},{0,1,13},{3,31,3917},{1,28,129},{2,20,645},{1,18,538},{0,31,9670},{0,20,4330},{0,17,1246},{0,12,5925},{0,14,11321},{0,12,7014},{4,31,3218},{2,27,8},{3,19,261},{1,18,474},{15,1,9669},{0,20,4330},{0,17,1246},{0,12,5925},{27,2,9669},{0,12,5925},{1,28,128},{1,28,128},{1,28,128}, +{1,15,132},{0,18,2178},{0,13,482},{0,13,482},{0,8,1040},{0,8,2712},{0,7,1470},{2,26,0},{2,26,0},{2,26,0},{2,14,0},{9,0,2178},{0,13,482},{0,13,482},{0,8,1040},{18,0,2178},{0,8,1040},{21,1,2665},{1,28,1},{6,19,5},{0,18,305},{21,1,2665},{27,8,2665},{0,18,305},{0,14,2689},{27,8,2665},{0,14,2689},{1,0,128},{1,0,128},{1,0,128},{1,0,128},{0,6,0}, +{0,6,0},{0,6,0},{0,3,0},{0,2,61},{0,2,61},{3,31,4541},{2,29,321},{2,21,904},{1,19,625},{1,31,9749},{0,21,4018},{0,18,914},{0,14,5633},{0,16,11661},{0,13,6859},{5,31,3374},{3,28,10},{4,20,254},{2,19,474},{17,0,9670},{0,21,4018},{0,18,914},{0,14,5633},{30,2,9670},{0,14,5633},{2,29,320},{2,29,320},{2,29,320},{1,16,320},{0,21,2178},{0,15,353},{0,15,353}, +{0,9,881},{0,9,2882},{0,9,1457},{3,27,0},{3,27,0},{3,27,0},{3,15,0},{10,1,2178},{0,15,353},{0,15,353},{0,9,881},{21,0,2178},{0,9,881},{23,0,2665},{2,29,1},{6,20,13},{0,19,245},{23,0,2665},{30,8,2665},{0,19,245},{0,15,2689},{30,8,2665},{0,15,2689},{1,0,320},{1,0,320},{1,0,320},{1,0,320},{0,9,0},{0,9,0},{0,9,0},{0,4,4},{0,4,125}, +{0,4,125},{4,31,5378},{2,31,570},{3,22,1267},{2,20,850},{2,31,10045},{0,23,3745},{0,19,642},{0,14,5354},{0,17,11993},{0,14,6798},{7,31,3553},{4,29,8},{5,21,261},{3,20,491},{18,1,9669},{0,23,3745},{0,19,642},{0,14,5354},{31,3,9669},{0,14,5354},{2,31,561},{2,31,561},{2,31,561},{2,17,546},{0,24,2178},{0,17,205},{0,17,205},{0,10,730},{0,11,3150},{0,9,1457},{4,28,0}, +{4,28,0},{4,28,0},{4,16,0},{12,0,2178},{0,17,205},{0,17,205},{0,10,730},{24,0,2178},{0,10,730},{25,0,2669},{3,30,1},{8,21,5},{0,20,170},{25,0,2669},{31,9,2669},{0,20,170},{0,16,2689},{31,9,2669},{0,16,2689},{2,0,545},{2,0,545},{2,0,545},{2,0,545},{0,12,0},{0,12,0},{0,12,0},{0,6,1},{0,5,205},{0,5,205},{5,31,6330},{3,31,905},{4,23,1718}, +{2,21,1099},{3,31,10453},{0,25,3478},{0,21,369},{0,16,5138},{0,18,12455},{0,15,6867},{8,31,3710},{5,30,8},{6,22,261},{4,21,474},{19,2,9669},{0,25,3478},{0,21,369},{0,16,5138},{30,5,9669},{0,16,5138},{3,31,901},{3,31,901},{3,31,901},{3,18,866},{0,27,2178},{0,19,130},{0,19,130},{0,11,650},{0,12,3395},{0,10,1523},{5,29,0},{5,29,0},{5,29,0},{5,17,0},{13,1,2178}, +{0,19,130},{0,19,130},{0,11,650},{27,0,2178},{0,11,650},{24,4,2665},{4,31,1},{9,22,5},{0,21,113},{24,4,2665},{28,12,2665},{0,21,113},{0,17,2689},{28,12,2665},{0,17,2689},{3,0,865},{3,0,865},{3,0,865},{3,0,865},{0,15,0},{0,15,0},{0,15,0},{0,7,9},{0,6,337},{0,6,337},{6,31,7446},{4,31,1458},{4,24,2174},{3,22,1419},{3,31,11045},{0,27,3314},{0,22,195}, +{0,17,4865},{0,20,12855},{0,16,6882},{9,31,3902},{6,31,8},{7,23,261},{5,22,474},{19,5,9669},{0,27,3314},{0,22,195},{0,17,4865},{31,6,9669},{0,17,4865},{3,31,1397},{3,31,1397},{3,31,1397},{3,19,1210},{0,30,2178},{0,20,61},{0,20,61},{0,12,545},{0,13,3645},{0,12,1634},{6,30,0},{6,30,0},{6,30,0},{6,18,0},{15,0,2178},{0,20,61},{0,20,61},{0,12,545},{30,0,2178}, +{0,12,545},{25,5,2665},{6,31,8},{10,23,5},{0,22,74},{25,5,2665},{31,12,2665},{0,22,74},{0,18,2689},{31,12,2665},{0,18,2689},{3,0,1201},{3,0,1201},{3,0,1201},{3,0,1201},{0,18,0},{0,18,0},{0,18,0},{0,9,0},{0,7,482},{0,7,482},{7,31,8578},{4,31,2146},{5,25,2766},{3,23,1835},{4,31,11766},{0,28,3125},{0,23,94},{0,18,4610},{0,21,13238},{0,17,6837},{10,31,4130}, +{7,31,49},{8,24,254},{6,23,474},{23,0,9670},{0,28,3125},{0,23,94},{0,18,4610},{30,8,9670},{0,18,4610},{4,31,1921},{4,31,1921},{4,31,1921},{3,21,1665},{0,31,2228},{0,22,18},{0,22,18},{0,14,425},{0,15,3987},{0,13,1677},{7,31,0},{7,31,0},{7,31,0},{7,19,0},{16,1,2178},{0,22,18},{0,22,18},{0,14,425},{31,1,2178},{0,14,425},{29,0,2665},{7,31,49},{10,24,13}, +{0,23,58},{29,0,2665},{30,14,2665},{0,23,58},{0,19,2689},{30,14,2665},{0,19,2689},{3,0,1665},{3,0,1665},{3,0,1665},{3,0,1665},{0,21,0},{0,21,0},{0,21,0},{0,10,9},{0,9,640},{0,9,640},{7,31,10135},{5,31,3190},{5,26,3543},{4,24,2348},{5,31,12846},{0,30,2961},{0,25,30},{0,19,4421},{0,23,13846},{0,18,6930},{11,31,4345},{8,31,178},{9,25,261},{7,24,491},{24,1,9669}, +{0,30,2961},{0,25,30},{0,19,4421},{31,9,9669},{0,19,4421},{5,31,2706},{5,31,2706},{5,31,2706},{4,22,2179},{1,31,2404},{0,24,0},{0,24,0},{0,15,337},{0,16,4356},{0,14,1833},{8,31,9},{8,31,9},{8,31,9},{8,20,0},{16,4,2178},{0,24,0},{0,24,0},{0,15,337},{28,4,2178},{0,15,337},{31,0,2669},{10,31,113},{12,25,5},{0,25,26},{31,0,2669},{31,15,2669},{0,25,26}, +{0,20,2689},{31,15,2669},{0,20,2689},{4,0,2178},{4,0,2178},{4,0,2178},{4,0,2178},{0,24,0},{0,24,0},{0,24,0},{0,12,0},{0,9,865},{0,9,865},{8,31,11466},{6,31,4187},{6,27,4166},{4,25,2818},{6,31,13898},{0,31,2871},{0,26,15},{0,20,4226},{0,24,14214},{0,19,7006},{12,31,4590},{10,31,321},{10,26,261},{8,25,474},{26,0,9669},{0,31,2870},{0,26,14},{0,20,4225},{30,11,9669}, +{0,20,4225},{6,31,3458},{6,31,3458},{6,31,3458},{5,23,2690},{2,31,2691},{0,26,14},{0,26,14},{0,16,261},{0,17,4595},{0,15,1953},{9,31,36},{9,31,36},{9,31,36},{9,21,0},{17,5,2178},{1,25,0},{1,25,0},{0,16,260},{31,4,2178},{0,16,260},{28,8,2665},{11,31,194},{13,26,5},{0,26,5},{28,8,2665},{24,20,2665},{0,26,5},{0,21,2689},{24,20,2665},{0,21,2689},{5,0,2689}, +{5,0,2689},{5,0,2689},{5,0,2689},{0,27,1},{0,27,1},{0,27,1},{0,14,5},{0,11,1037},{0,11,1037},{9,31,12054},{7,31,4578},{7,28,4118},{5,26,2818},{7,31,14214},{1,31,3042},{1,27,15},{0,21,4099},{0,25,13683},{0,21,6322},{14,31,4858},{11,31,514},{11,27,261},{9,26,474},{27,1,9669},{2,31,3014},{1,27,14},{0,21,4018},{27,14,9669},{0,21,4018},{7,31,3554},{7,31,3554},{7,31,3554}, +{6,24,2689},{3,31,2795},{1,27,14},{1,27,14},{1,17,261},{0,19,4269},{0,16,1517},{11,31,73},{11,31,73},{11,31,73},{10,22,0},{21,0,2178},{2,26,0},{2,26,0},{0,17,185},{30,6,2178},{0,17,185},{29,9,2665},{13,31,320},{14,27,5},{0,27,1},{29,9,2665},{27,20,2665},{0,27,1},{0,22,2689},{27,20,2665},{0,22,2689},{6,0,2689},{6,0,2689},{6,0,2689},{6,0,2689},{1,28,1}, +{1,28,1},{1,28,1},{1,15,5},{0,13,797},{0,13,797},{11,31,12506},{8,31,5075},{8,29,4166},{6,27,2818},{8,31,14651},{3,31,3255},{2,28,23},{1,22,4099},{0,27,13238},{0,21,5650},{15,31,5054},{12,31,782},{12,28,254},{10,27,474},{29,0,9670},{3,31,3206},{2,28,22},{0,22,3829},{30,14,9670},{0,22,3829},{8,31,3706},{8,31,3706},{8,31,3706},{7,25,2689},{4,31,2946},{2,28,19},{2,28,19}, +{2,18,261},{0,21,3906},{0,17,1106},{12,31,106},{12,31,106},{12,31,106},{11,23,0},{22,1,2178},{3,27,0},{3,27,0},{0,18,128},{31,7,2178},{0,18,128},{31,8,2665},{15,31,445},{14,28,13},{1,28,4},{31,8,2665},{30,20,2665},{1,28,4},{0,23,2689},{30,20,2665},{0,23,2689},{7,0,2689},{7,0,2689},{7,0,2689},{7,0,2689},{2,29,1},{2,29,1},{2,29,1},{2,15,10},{0,14,637}, +{0,14,637},{12,31,13094},{10,31,5782},{9,30,4166},{7,28,2838},{10,31,15213},{4,31,3618},{3,29,25},{2,23,4101},{0,29,12686},{0,23,5075},{16,31,5378},{14,31,1172},{13,29,261},{11,28,491},{30,1,9669},{6,31,3469},{3,29,21},{0,23,3706},{31,15,9669},{0,23,3706},{9,31,3890},{9,31,3890},{9,31,3890},{8,26,2690},{6,31,3157},{3,29,24},{3,29,24},{3,19,254},{0,23,3619},{0,19,782},{13,31,145}, +{13,31,145},{13,31,145},{12,24,0},{24,0,2178},{4,28,0},{4,28,0},{0,19,106},{24,12,2178},{0,19,106},{31,11,2669},{16,31,640},{16,29,5},{2,29,2},{31,11,2669},{31,21,2669},{2,29,2},{0,24,2689},{31,21,2669},{0,24,2689},{8,0,2689},{8,0,2689},{8,0,2689},{8,0,2689},{3,30,5},{3,30,5},{3,30,5},{3,17,8},{0,17,436},{0,17,436},{13,31,13718},{11,31,6325},{10,31,4166}, +{8,29,2818},{11,31,15565},{6,31,4094},{4,30,15},{3,24,4118},{0,31,12355},{0,24,4578},{17,31,5738},{15,31,1517},{14,30,261},{12,29,474},{31,2,9669},{7,31,3761},{4,30,14},{0,24,3554},{30,17,9669},{0,24,3554},{10,31,4085},{10,31,4085},{10,31,4085},{9,27,2690},{7,31,3285},{4,30,14},{4,30,14},{4,20,261},{0,24,3330},{0,20,514},{14,31,208},{14,31,208},{14,31,208},{13,25,0},{25,1,2178}, +{5,29,0},{5,29,0},{0,21,64},{27,12,2178},{0,21,64},{28,20,2665},{18,31,829},{17,30,5},{3,30,2},{28,20,2665},{28,24,2665},{3,30,2},{0,25,2689},{28,24,2665},{0,25,2689},{9,0,2689},{9,0,2689},{9,0,2689},{9,0,2689},{4,31,1},{4,31,1},{4,31,1},{4,18,5},{0,18,306},{0,18,306},{14,31,14378},{12,31,7006},{11,31,4226},{9,30,2818},{12,31,16054},{7,31,4578},{5,31,15}, +{4,25,4099},{0,31,12051},{0,25,4089},{19,31,5970},{16,31,1953},{15,31,261},{13,30,474},{31,5,9669},{10,31,4081},{5,31,14},{0,25,3413},{31,18,9669},{0,25,3413},{11,31,4225},{11,31,4225},{11,31,4225},{10,28,2689},{8,31,3476},{5,31,14},{5,31,14},{5,21,261},{0,26,3091},{0,21,289},{15,31,260},{15,31,260},{15,31,260},{14,26,0},{27,0,2178},{6,30,0},{6,30,0},{0,22,25},{30,12,2178}, +{0,22,25},{29,21,2665},{20,31,1037},{18,31,5},{4,31,1},{29,21,2665},{31,24,2665},{4,31,1},{0,26,2689},{31,24,2665},{0,26,2689},{10,0,2689},{10,0,2689},{10,0,2689},{10,0,2689},{5,31,10},{5,31,10},{5,31,10},{5,19,5},{0,20,194},{0,20,194},{15,31,13557},{13,31,7094},{12,31,4421},{10,31,2769},{13,31,15228},{8,31,4270},{6,31,46},{5,26,3476},{0,31,11020},{0,26,3108},{20,31,5400}, +{17,31,1931},{16,31,337},{15,30,320},{31,7,8712},{11,31,3630},{7,31,0},{0,26,2667},{31,19,8712},{0,26,2667},{12,31,4421},{12,31,4421},{12,31,4421},{11,29,2689},{9,31,3722},{6,31,46},{6,31,46},{6,22,261},{0,27,2882},{0,22,173},{16,31,337},{16,31,337},{16,31,337},{15,27,0},{28,1,2178},{7,31,0},{7,31,0},{0,23,9},{31,13,2178},{0,23,9},{31,19,2178},{22,31,881},{19,31,0}, +{7,31,0},{31,19,2178},{31,25,2178},{7,31,0},{0,27,2178},{31,25,2178},{0,27,2178},{11,0,2689},{11,0,2689},{11,0,2689},{11,0,2689},{6,31,37},{6,31,37},{6,31,37},{6,19,10},{0,22,109},{0,22,109},{16,31,12678},{14,31,7003},{13,31,4693},{12,31,2714},{15,31,14026},{10,31,3943},{8,31,94},{6,26,2766},{2,31,10074},{0,27,2146},{20,31,4698},{19,31,1746},{17,31,464},{16,30,164},{29,13,7578}, +{12,31,3090},{9,31,20},{0,27,1921},{31,20,7578},{0,27,1921},{13,31,4693},{13,31,4693},{13,31,4693},{12,30,2690},{11,31,3939},{8,31,94},{8,31,94},{7,23,254},{0,29,2650},{0,24,49},{17,31,464},{17,31,464},{17,31,464},{16,28,0},{28,4,2178},{9,31,20},{9,31,20},{0,24,0},{28,16,2178},{0,24,0},{31,21,1625},{23,31,653},{21,31,4},{10,31,1},{31,21,1625},{31,26,1625},{10,31,1}, +{0,27,1665},{31,26,1625},{0,27,1665},{12,0,2689},{12,0,2689},{12,0,2689},{12,0,2689},{8,31,58},{8,31,58},{8,31,58},{7,21,8},{0,24,49},{0,24,49},{17,31,12042},{15,31,6882},{15,31,4946},{13,31,2690},{16,31,13127},{11,31,3615},{9,31,229},{7,27,2174},{3,31,9313},{0,27,1458},{21,31,4150},{19,31,1634},{19,31,545},{17,30,89},{31,11,6661},{14,31,2654},{11,31,61},{0,28,1397},{31,21,6661}, +{0,28,1397},{15,31,4946},{15,31,4946},{15,31,4946},{13,31,2690},{12,31,4170},{9,31,229},{9,31,229},{8,24,261},{0,31,2520},{0,25,10},{19,31,545},{19,31,545},{19,31,545},{17,29,0},{29,5,2178},{11,31,61},{11,31,61},{1,25,0},{31,16,2178},{1,25,0},{31,22,1201},{24,31,482},{22,31,1},{13,31,1},{31,22,1201},{30,27,1201},{13,31,1},{0,28,1201},{30,27,1201},{0,28,1201},{13,0,2689}, +{13,0,2689},{13,0,2689},{13,0,2689},{9,31,85},{9,31,85},{9,31,85},{8,22,5},{0,25,10},{0,25,10},{18,31,11474},{16,31,6867},{15,31,5138},{14,31,2725},{16,31,12279},{12,31,3410},{10,31,419},{8,27,1718},{4,31,8678},{0,28,905},{23,31,3626},{20,31,1550},{20,31,650},{18,31,25},{29,17,5829},{15,31,2306},{12,31,130},{0,28,901},{27,24,5829},{0,28,901},{15,31,5138},{15,31,5138},{15,31,5138}, +{14,31,2725},{13,31,4452},{10,31,419},{10,31,419},{9,25,261},{0,31,2520},{1,26,10},{20,31,650},{20,31,650},{20,31,650},{18,30,0},{31,4,2178},{12,31,130},{12,31,130},{2,26,0},{30,18,2178},{2,26,0},{31,23,845},{25,31,353},{23,31,9},{16,31,0},{31,23,845},{31,27,845},{16,31,0},{0,28,865},{31,27,845},{0,28,865},{14,0,2689},{14,0,2689},{14,0,2689},{14,0,2689},{10,31,130}, +{10,31,130},{10,31,130},{9,23,5},{0,27,1},{0,27,1},{19,31,10774},{17,31,6962},{16,31,5378},{15,31,2810},{17,31,11598},{12,31,3346},{12,31,642},{9,28,1222},{6,31,8113},{0,29,570},{23,31,3146},{22,31,1539},{21,31,773},{19,31,0},{31,15,5082},{16,31,2034},{14,31,221},{0,29,554},{31,23,5082},{0,29,554},{16,31,5378},{16,31,5378},{16,31,5378},{15,31,2810},{15,31,4746},{12,31,642},{12,31,642}, +{10,26,261},{2,31,2714},{2,27,10},{21,31,773},{21,31,773},{21,31,773},{19,31,0},{31,7,2178},{14,31,221},{14,31,221},{3,27,0},{31,19,2178},{3,27,0},{31,25,545},{26,31,221},{25,31,0},{19,31,0},{31,25,545},{31,28,545},{19,31,0},{0,29,545},{31,28,545},{0,29,545},{15,0,2689},{15,0,2689},{15,0,2689},{15,0,2689},{11,31,170},{11,31,170},{11,31,170},{10,23,10},{1,28,1}, +{1,28,1},{20,31,10225},{18,31,7025},{18,31,5729},{16,31,2978},{19,31,10792},{14,31,3283},{13,31,982},{10,29,861},{8,31,7588},{2,29,325},{24,31,2729},{23,31,1460},{22,31,932},{20,31,36},{29,21,4344},{19,31,1746},{16,31,353},{2,29,321},{31,24,4344},{2,29,321},{18,31,5729},{18,31,5729},{18,31,5729},{16,31,2978},{16,31,5028},{13,31,982},{13,31,982},{11,27,254},{4,31,2981},{3,28,10},{22,31,932}, +{22,31,932},{22,31,932},{20,31,36},{28,16,2178},{16,31,353},{16,31,353},{4,28,0},{24,24,2178},{4,28,0},{31,27,290},{27,31,125},{27,31,4},{22,31,1},{31,27,290},{31,29,290},{22,31,1},{0,29,320},{31,29,290},{0,29,320},{16,0,2689},{16,0,2689},{16,0,2689},{16,0,2689},{12,31,245},{12,31,245},{12,31,245},{11,25,8},{2,29,5},{2,29,5},{20,31,9825},{19,31,7014},{19,31,5925}, +{17,31,3218},{20,31,10245},{15,31,3285},{14,31,1330},{11,29,612},{10,31,7225},{3,30,133},{25,31,2467},{24,31,1470},{23,31,1040},{21,31,144},{31,19,3779},{20,31,1580},{18,31,500},{3,30,129},{31,25,3779},{3,30,129},{19,31,5925},{19,31,5925},{19,31,5925},{17,31,3218},{17,31,5346},{14,31,1330},{14,31,1330},{12,28,261},{6,31,3267},{4,29,10},{23,31,1040},{23,31,1040},{23,31,1040},{21,31,144},{29,17,2178}, +{18,31,500},{18,31,500},{5,29,0},{27,24,2178},{5,29,0},{31,28,128},{29,31,61},{28,31,0},{25,31,1},{31,28,128},{30,30,128},{25,31,1},{0,30,128},{30,30,128},{0,30,128},{17,0,2689},{17,0,2689},{17,0,2689},{17,0,2689},{13,31,338},{13,31,338},{13,31,338},{12,26,5},{3,30,5},{3,30,5},{21,31,9523},{20,31,7109},{20,31,6209},{18,31,3473},{20,31,9749},{16,31,3410},{15,31,1717}, +{12,30,401},{11,31,6964},{4,31,65},{26,31,2273},{25,31,1539},{24,31,1189},{23,31,260},{29,25,3299},{22,31,1490},{20,31,680},{6,30,64},{27,28,3299},{6,30,64},{20,31,6209},{20,31,6209},{20,31,6209},{18,31,3473},{18,31,5700},{15,31,1717},{15,31,1717},{13,29,261},{8,31,3587},{5,30,10},{24,31,1189},{24,31,1189},{24,31,1189},{23,31,260},{31,16,2178},{20,31,680},{20,31,680},{6,30,0},{30,24,2178}, +{6,30,0},{31,30,34},{30,31,13},{30,31,4},{28,31,0},{31,30,34},{30,31,34},{28,31,0},{0,30,64},{30,31,34},{0,30,64},{18,0,2689},{18,0,2689},{18,0,2689},{18,0,2689},{15,31,421},{15,31,421},{15,31,421},{13,27,5},{4,31,1},{4,31,1},{23,31,9201},{21,31,7314},{21,31,6530},{19,31,3778},{21,31,9420},{18,31,3652},{16,31,2193},{14,30,325},{14,31,6804},{6,31,10},{27,31,2057}, +{26,31,1605},{25,31,1378},{24,31,442},{31,23,2904},{23,31,1452},{22,31,881},{7,31,0},{31,27,2904},{7,31,0},{21,31,6530},{21,31,6530},{21,31,6530},{19,31,3778},{19,31,5956},{16,31,2193},{16,31,2193},{14,30,261},{10,31,3957},{6,31,10},{25,31,1378},{25,31,1378},{25,31,1378},{24,31,442},{31,19,2178},{22,31,881},{22,31,881},{7,31,0},{31,25,2178},{7,31,0},{31,31,0},{31,31,0},{31,31,0}, +{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{19,0,2689},{19,0,2689},{19,0,2689},{19,0,2689},{16,31,512},{16,31,512},{16,31,512},{14,27,10},{6,31,10},{6,31,10},{23,31,8049},{22,31,6550},{22,31,5925},{20,31,3589},{23,31,8137},{19,31,3220},{18,31,2050},{15,31,173},{15,31,5805},{8,31,52},{27,31,1544},{27,31,1181},{26,31,1040},{25,31,353},{31,24,2166}, +{24,31,1083},{23,31,653},{10,31,1},{30,28,2166},{10,31,1},{22,31,5925},{22,31,5925},{22,31,5925},{20,31,3589},{20,31,5209},{18,31,2050},{18,31,2050},{15,30,117},{12,31,3405},{8,31,52},{26,31,1040},{26,31,1040},{26,31,1040},{25,31,353},{31,21,1625},{23,31,653},{23,31,653},{10,31,1},{31,26,1625},{10,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0}, +{0,31,0},{31,31,0},{0,31,0},{20,0,2689},{20,0,2689},{20,0,2689},{20,0,2689},{17,31,625},{17,31,625},{17,31,625},{15,29,8},{8,31,52},{8,31,52},{24,31,7177},{23,31,5845},{23,31,5361},{21,31,3473},{23,31,7033},{20,31,2945},{19,31,1907},{16,31,65},{16,31,5026},{10,31,117},{28,31,1137},{27,31,861},{27,31,740},{26,31,260},{29,29,1601},{26,31,833},{24,31,482},{13,31,1},{31,28,1601}, +{13,31,1},{23,31,5361},{23,31,5361},{23,31,5361},{21,31,3473},{21,31,4661},{19,31,1907},{19,31,1907},{16,31,65},{14,31,2997},{10,31,117},{27,31,740},{27,31,740},{27,31,740},{26,31,260},{31,22,1201},{24,31,482},{24,31,482},{13,31,1},{30,27,1201},{13,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{21,0,2689}, +{21,0,2689},{21,0,2689},{21,0,2689},{18,31,754},{18,31,754},{18,31,754},{16,30,5},{10,31,117},{10,31,117},{24,31,6393},{24,31,5310},{23,31,4865},{22,31,3314},{24,31,6146},{20,31,2737},{20,31,1837},{17,31,5},{18,31,4381},{11,31,212},{28,31,801},{28,31,609},{28,31,545},{27,31,185},{31,26,1121},{26,31,561},{25,31,353},{16,31,0},{30,29,1121},{16,31,0},{23,31,4865},{23,31,4865},{23,31,4865}, +{22,31,3314},{22,31,4181},{20,31,1837},{20,31,1837},{17,31,5},{15,31,2621},{11,31,212},{28,31,545},{28,31,545},{28,31,545},{27,31,185},{31,23,845},{25,31,353},{25,31,353},{16,31,0},{31,27,845},{16,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{22,0,2689},{22,0,2689},{22,0,2689},{22,0,2689},{19,31,850}, +{19,31,850},{19,31,850},{17,31,5},{11,31,212},{11,31,212},{25,31,5683},{24,31,4782},{24,31,4421},{23,31,3173},{24,31,5314},{22,31,2563},{21,31,1844},{18,31,10},{18,31,3757},{13,31,338},{29,31,531},{28,31,401},{28,31,337},{27,31,121},{31,27,726},{27,31,363},{26,31,221},{19,31,0},{31,29,726},{19,31,0},{24,31,4421},{24,31,4421},{24,31,4421},{23,31,3173},{23,31,3657},{21,31,1844},{21,31,1844}, +{18,31,10},{16,31,2321},{13,31,338},{28,31,337},{28,31,337},{28,31,337},{27,31,121},{31,25,545},{26,31,221},{26,31,221},{19,31,0},{31,28,545},{19,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{23,0,2689},{23,0,2689},{23,0,2689},{23,0,2689},{20,31,985},{20,31,985},{20,31,985},{18,31,10},{13,31,338}, +{13,31,338},{2,31,10560},{0,30,1586},{0,22,173},{0,18,3826},{0,28,18065},{0,19,12194},{0,17,6081},{0,11,14098},{0,13,19386},{0,11,14998},{3,31,10216},{0,30,1586},{0,22,173},{0,18,3826},{12,4,18065},{0,19,12194},{0,17,6081},{0,11,14098},{28,0,18065},{0,11,14098},{0,17,1},{0,17,1},{0,17,1},{0,9,4},{0,9,1105},{0,7,442},{0,7,442},{0,4,680},{0,4,1230},{0,4,801},{0,17,1}, +{0,17,1},{0,17,1},{0,9,4},{4,1,1105},{0,7,442},{0,7,442},{0,4,680},{9,0,1105},{0,4,680},{21,0,9248},{0,30,1586},{0,22,173},{0,18,3826},{21,0,9248},{30,6,9248},{0,18,3826},{0,14,9248},{30,6,9248},{0,14,9248},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{2,31,11328},{0,31,1341},{0,23,68}, +{0,19,3626},{0,30,19334},{0,20,12611},{0,18,6099},{0,12,14756},{0,14,20886},{0,12,15845},{3,31,10792},{0,31,1341},{0,23,68},{0,19,3626},{15,0,19334},{0,20,12611},{0,18,6099},{0,12,14756},{30,0,19334},{0,12,14756},{0,20,0},{0,20,0},{0,20,0},{0,10,1},{0,10,1513},{0,8,605},{0,8,605},{0,5,925},{0,5,1682},{0,4,1089},{0,20,0},{0,20,0},{0,20,0},{0,10,1},{5,0,1513}, +{0,8,605},{0,8,605},{0,5,925},{10,0,1513},{0,5,925},{22,1,9248},{0,31,1341},{0,23,68},{0,19,3626},{22,1,9248},{31,7,9248},{0,19,3626},{0,15,9248},{31,7,9248},{0,15,9248},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{3,31,12200},{0,31,1325},{0,24,10},{0,20,3365},{0,31,20705},{0,21,13009},{0,18,6227}, +{0,13,15441},{0,15,22455},{0,13,16666},{4,31,11489},{0,31,1325},{0,24,10},{0,20,3365},{15,2,20689},{0,21,13009},{0,18,6227},{0,13,15441},{30,1,20689},{0,13,15441},{0,23,0},{0,23,0},{0,23,0},{0,11,9},{0,11,1989},{0,9,772},{0,9,772},{0,5,1213},{0,5,2194},{0,5,1382},{0,23,0},{0,23,0},{0,23,0},{0,11,9},{6,0,1985},{0,9,772},{0,9,772},{0,5,1213},{10,1,1985}, +{0,5,1213},{23,2,9248},{0,31,1325},{0,24,10},{0,20,3365},{23,2,9248},{30,9,9248},{0,20,3365},{0,16,9250},{30,9,9248},{0,16,9250},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{3,31,13288},{0,31,1565},{0,25,10},{0,22,3077},{1,31,22214},{0,23,13555},{0,20,6310},{0,14,16176},{0,16,24130},{0,13,17562},{4,31,12385}, +{0,31,1565},{0,25,10},{0,22,3077},{17,0,22129},{0,23,13555},{0,20,6310},{0,14,16176},{30,2,22129},{0,14,16176},{0,26,0},{0,26,0},{0,26,0},{0,13,0},{0,13,2521},{0,10,1018},{0,10,1018},{0,6,1508},{0,6,2801},{0,6,1764},{0,26,0},{0,26,0},{0,26,0},{0,13,0},{6,1,2521},{0,10,1018},{0,10,1018},{0,6,1508},{13,0,2521},{0,6,1508},{23,5,9248},{0,31,1565},{0,25,10}, +{0,22,3077},{23,5,9248},{31,10,9248},{0,22,3077},{0,17,9250},{31,10,9248},{0,17,9250},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{4,31,14788},{0,31,2141},{0,27,72},{0,22,2825},{1,31,24140},{0,25,14114},{0,21,6323},{0,14,17202},{0,17,26063},{0,14,18646},{5,31,13521},{1,31,2100},{0,27,72},{0,22,2825},{16,4,23851}, +{0,25,14114},{0,21,6323},{0,14,17202},{28,4,23851},{0,14,17202},{0,29,1},{0,29,1},{0,29,1},{0,15,4},{0,15,3202},{0,12,1285},{0,12,1285},{0,7,1973},{0,7,3569},{0,6,2241},{0,29,1},{0,29,1},{0,29,1},{0,15,4},{7,1,3200},{0,12,1285},{0,12,1285},{0,7,1973},{11,2,3200},{0,7,1973},{27,0,9248},{3,31,1885},{2,26,8},{0,22,2825},{27,0,9248},{30,12,9248},{0,22,2825}, +{0,18,9248},{30,12,9248},{0,18,9248},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{4,31,16228},{1,31,2836},{0,28,170},{0,24,2612},{2,31,25971},{0,25,14594},{0,22,6473},{0,16,18144},{0,17,27951},{0,14,19830},{7,31,14675},{2,31,2706},{1,27,149},{0,24,2612},{19,0,25472},{0,25,14594},{0,22,6473},{0,16,18144},{30,4,25472}, +{0,16,18144},{0,31,9},{0,31,9},{0,31,9},{0,16,0},{0,16,3872},{0,13,1514},{0,13,1514},{0,7,2405},{0,7,4305},{0,7,2766},{0,31,9},{0,31,9},{0,31,9},{0,16,0},{8,0,3872},{0,13,1514},{0,13,1514},{0,7,2405},{16,0,3872},{0,7,2405},{28,1,9248},{4,31,2210},{3,27,8},{0,24,2612},{28,1,9248},{31,13,9248},{0,24,2612},{0,19,9248},{31,13,9248},{0,19,9248},{0,0,0}, +{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{4,31,17796},{2,31,3702},{1,29,270},{0,25,2361},{3,31,27563},{0,27,14944},{0,23,6470},{0,17,18681},{0,19,29556},{0,16,20628},{7,31,15635},{3,31,3425},{1,29,206},{0,25,2361},{20,0,26744},{0,27,14944},{0,23,6470},{0,17,18681},{24,8,26744},{0,17,18681},{0,31,125},{0,31,125},{0,31,125}, +{0,18,8},{0,18,4418},{0,14,1696},{0,14,1696},{0,8,2664},{0,8,4952},{0,8,3148},{1,31,72},{1,31,72},{1,31,72},{0,18,8},{9,0,4418},{0,14,1696},{0,14,1696},{0,8,2664},{18,0,4418},{0,8,2664},{30,0,9248},{6,31,2557},{3,28,10},{0,25,2357},{30,0,9248},{30,15,9248},{0,25,2357},{0,20,9250},{30,15,9248},{0,20,9250},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,1,0}, +{0,1,0},{0,1,0},{0,0,4},{0,0,4},{0,0,4},{5,31,19090},{2,31,4598},{1,30,406},{0,26,2220},{3,31,28187},{0,29,14347},{0,25,5609},{0,18,18144},{0,20,29961},{0,17,20301},{8,31,16082},{4,31,4019},{2,30,206},{0,26,2220},{21,1,26744},{0,29,14347},{0,25,5609},{0,18,18144},{27,8,26744},{0,18,18144},{1,31,264},{1,31,264},{1,31,264},{1,19,72},{0,21,4418},{0,16,1412},{0,16,1412}, +{0,10,2420},{0,9,5122},{0,9,2997},{2,31,117},{2,31,117},{2,31,117},{1,19,8},{10,1,4418},{0,16,1412},{0,16,1412},{0,10,2420},{21,0,4418},{0,10,2420},{31,1,9248},{8,31,2929},{4,29,10},{0,26,2120},{31,1,9248},{27,18,9248},{0,26,2120},{0,21,9250},{27,18,9248},{0,21,9250},{1,0,68},{1,0,68},{1,0,68},{1,0,68},{0,4,1},{0,4,1},{0,4,1},{0,2,0},{0,2,25}, +{0,2,25},{7,31,20669},{3,31,5786},{2,31,625},{1,27,2283},{4,31,29033},{0,30,13795},{0,26,4770},{0,18,17667},{0,21,30425},{0,18,20068},{9,31,16691},{5,31,4841},{3,31,205},{1,27,2219},{23,0,26747},{0,30,13795},{0,26,4770},{0,18,17667},{30,8,26747},{0,18,17667},{2,31,589},{2,31,589},{2,31,589},{1,20,186},{0,24,4418},{0,18,1125},{0,18,1125},{0,10,2186},{0,11,5390},{0,10,2915},{3,31,169}, +{3,31,169},{3,31,169},{2,20,13},{12,0,4418},{0,18,1125},{0,18,1125},{0,10,2186},{24,0,4418},{0,10,2186},{31,4,9248},{10,31,3380},{6,30,8},{0,27,1954},{31,4,9248},{30,18,9248},{0,27,1954},{0,22,9248},{30,18,9248},{0,22,9248},{1,0,185},{1,0,185},{1,0,185},{1,0,185},{0,7,0},{0,7,0},{0,7,0},{0,3,9},{0,3,73},{0,3,73},{7,31,21577},{4,31,6797},{3,31,985}, +{1,28,2226},{5,31,29436},{0,31,12990},{0,27,4027},{0,20,16925},{0,23,30436},{0,18,19620},{11,31,16645},{7,31,5260},{4,31,232},{2,28,2141},{24,1,26259},{0,31,12990},{0,27,4027},{0,20,16925},{31,9,26259},{0,20,16925},{3,31,985},{3,31,985},{3,31,985},{2,21,378},{0,27,4418},{0,20,905},{0,20,905},{0,12,2005},{0,12,5635},{0,11,2950},{4,31,232},{4,31,232},{4,31,232},{3,21,13},{13,1,4418}, +{0,20,905},{0,20,905},{0,12,2005},{27,0,4418},{0,12,2005},{31,7,8980},{11,31,3601},{7,31,4},{0,28,1665},{31,7,8980},{31,19,8980},{0,28,1665},{0,23,8980},{31,19,8980},{0,23,8980},{2,0,377},{2,0,377},{2,0,377},{2,0,377},{0,10,0},{0,10,0},{0,10,0},{0,5,0},{0,4,146},{0,4,146},{7,31,21193},{4,31,7053},{3,31,1481},{2,28,2093},{6,31,28313},{0,31,11406},{0,28,2965}, +{0,20,15021},{0,23,28900},{0,19,17995},{11,31,15381},{7,31,4876},{5,31,325},{3,28,1786},{25,1,24371},{0,31,11406},{0,28,2965},{0,20,15021},{27,12,24371},{0,20,15021},{3,31,1481},{3,31,1481},{3,31,1481},{2,23,621},{0,30,4418},{0,21,680},{0,21,680},{0,13,1780},{0,13,5885},{0,12,2950},{5,31,325},{5,31,325},{5,31,325},{4,22,8},{15,0,4418},{0,21,680},{0,21,680},{0,13,1780},{30,0,4418}, +{0,13,1780},{31,8,7938},{12,31,3188},{8,31,0},{0,29,1156},{31,8,7938},{30,20,7938},{0,29,1156},{0,23,7956},{30,20,7938},{0,23,7956},{2,0,617},{2,0,617},{2,0,617},{2,0,617},{0,13,0},{0,13,0},{0,13,0},{0,6,9},{0,5,233},{0,5,233},{8,31,20825},{5,31,7494},{4,31,2089},{2,29,1970},{7,31,27269},{0,31,10078},{0,28,2021},{0,21,13204},{0,25,27384},{0,20,16398},{12,31,14148}, +{8,31,4562},{7,31,392},{4,29,1445},{26,1,22568},{0,31,10078},{0,28,2021},{0,21,13204},{31,11,22568},{0,21,13204},{4,31,2089},{4,31,2089},{4,31,2089},{3,23,946},{0,31,4468},{0,23,521},{0,23,521},{0,14,1573},{0,15,6227},{0,13,2909},{7,31,392},{7,31,392},{7,31,392},{5,23,8},{16,1,4418},{0,23,521},{0,23,521},{0,14,1573},{31,1,4418},{0,14,1573},{31,10,6964},{14,31,2785},{10,31,4}, +{0,29,740},{31,10,6964},{31,20,6964},{0,29,740},{0,24,6970},{31,20,6964},{0,24,6970},{3,0,937},{3,0,937},{3,0,937},{3,0,937},{0,16,1},{0,16,1},{0,16,1},{0,8,1},{0,6,377},{0,6,377},{8,31,20717},{6,31,8012},{5,31,2958},{3,29,2057},{7,31,26243},{0,31,8890},{0,29,1154},{0,22,11325},{0,25,25854},{0,21,14671},{13,31,12926},{10,31,4313},{8,31,485},{5,29,1106},{25,5,20642}, +{0,31,8890},{0,29,1154},{0,22,11325},{31,12,20642},{0,22,11325},{5,31,2958},{5,31,2958},{5,31,2958},{3,25,1361},{1,31,4644},{0,25,337},{0,25,337},{0,15,1429},{0,16,6596},{0,14,2981},{8,31,485},{8,31,485},{8,31,485},{6,24,13},{16,4,4418},{0,25,337},{0,25,337},{0,15,1429},{28,4,4418},{0,15,1429},{31,11,5941},{15,31,2377},{11,31,1},{0,30,388},{31,11,5941},{31,21,5941},{0,30,388}, +{0,24,5953},{31,21,5941},{0,24,5953},{3,0,1360},{3,0,1360},{3,0,1360},{3,0,1360},{0,19,0},{0,19,0},{0,19,0},{0,10,4},{0,8,548},{0,8,548},{9,31,20713},{7,31,8575},{5,31,3806},{3,30,2260},{7,31,25603},{0,31,8106},{0,29,642},{0,22,9805},{0,26,24678},{0,21,13359},{15,31,11882},{11,31,4006},{9,31,596},{6,29,818},{28,1,19021},{2,31,8066},{0,29,642},{0,22,9805},{31,13,19021}, +{0,22,9805},{5,31,3806},{5,31,3806},{5,31,3806},{4,26,1819},{2,31,4962},{0,27,232},{0,27,232},{0,16,1268},{0,17,6926},{0,15,3126},{9,31,596},{9,31,596},{9,31,596},{7,25,13},{17,5,4418},{0,27,232},{0,27,232},{0,16,1268},{31,4,4418},{0,16,1268},{31,13,5101},{16,31,2042},{13,31,4},{0,30,164},{31,13,5101},{31,22,5101},{0,30,164},{0,25,5105},{31,22,5101},{0,25,5105},{4,0,1818}, +{4,0,1818},{4,0,1818},{4,0,1818},{0,22,0},{0,22,0},{0,22,0},{0,11,0},{0,9,697},{0,9,697},{10,31,20905},{7,31,9247},{6,31,4787},{4,30,2547},{8,31,25042},{1,31,7537},{0,30,264},{0,23,8449},{0,27,23521},{0,22,12141},{15,31,10794},{12,31,3786},{10,31,725},{8,29,621},{29,1,17485},{3,31,7274},{0,30,264},{0,23,8449},{27,16,17485},{0,23,8449},{6,31,4787},{6,31,4787},{6,31,4787}, +{4,27,2323},{3,31,5386},{0,29,130},{0,29,130},{0,18,1096},{0,19,7364},{0,17,3225},{10,31,725},{10,31,725},{10,31,725},{8,26,8},{21,0,4418},{0,29,130},{0,29,130},{0,18,1096},{30,6,4418},{0,18,1096},{31,14,4325},{18,31,1737},{14,31,1},{0,31,64},{31,14,4325},{30,23,4325},{0,31,64},{0,25,4337},{30,23,4325},{0,25,4337},{4,0,2314},{4,0,2314},{4,0,2314},{4,0,2314},{0,25,0}, +{0,25,0},{0,25,0},{0,12,4},{0,10,925},{0,10,925},{11,31,21021},{8,31,10106},{7,31,5819},{4,30,3027},{8,31,24722},{2,31,7042},{0,31,81},{0,24,7169},{0,29,22467},{0,22,11133},{16,31,9869},{12,31,3594},{11,31,821},{8,30,420},{30,1,16034},{4,31,6558},{0,31,81},{0,24,7169},{31,15,16034},{0,24,7169},{7,31,5819},{7,31,5819},{7,31,5819},{5,28,2915},{3,31,5962},{0,30,72},{0,30,72}, +{0,18,968},{0,20,7781},{0,17,3305},{11,31,821},{11,31,821},{11,31,821},{9,27,8},{22,1,4418},{0,30,72},{0,30,72},{0,18,968},{31,7,4418},{0,18,968},{31,15,3617},{19,31,1450},{15,31,9},{0,31,0},{31,15,3617},{31,23,3617},{0,31,0},{0,26,3617},{31,23,3617},{0,26,3617},{5,0,2906},{5,0,2906},{5,0,2906},{5,0,2906},{0,28,1},{0,28,1},{0,28,1},{0,14,0},{0,11,1156}, +{0,11,1156},{11,31,21381},{8,31,11186},{7,31,7169},{5,31,3633},{9,31,24543},{3,31,6762},{0,31,81},{0,24,5819},{0,29,21333},{0,23,10106},{17,31,8955},{14,31,3433},{12,31,980},{10,30,242},{29,5,14504},{6,31,5834},{1,31,74},{0,24,5819},{31,16,14504},{0,24,5819},{7,31,7169},{7,31,7169},{7,31,7169},{5,30,3618},{4,31,6757},{0,31,81},{0,31,81},{0,20,821},{0,21,8245},{0,18,3531},{12,31,980}, +{12,31,980},{12,31,980},{10,28,13},{24,0,4418},{1,31,74},{1,31,74},{0,20,821},{24,12,4418},{0,20,821},{29,21,2888},{20,31,1156},{17,31,1},{3,31,1},{29,21,2888},{31,24,2888},{3,31,1},{0,26,2906},{31,24,2888},{0,26,2906},{5,0,3617},{5,0,3617},{5,0,3617},{5,0,3617},{0,31,0},{0,31,0},{0,31,0},{0,15,9},{0,13,1421},{0,13,1421},{12,31,21949},{9,31,12367},{8,31,8449}, +{6,31,4338},{10,31,24600},{3,31,6650},{1,31,298},{0,25,4698},{0,30,20575},{0,24,9247},{17,31,8219},{15,31,3236},{13,31,1157},{11,30,122},{31,3,13235},{7,31,5260},{3,31,145},{0,25,4698},{31,17,13235},{0,25,4698},{8,31,8449},{8,31,8449},{8,31,8449},{6,31,4338},{5,31,7667},{1,31,298},{1,31,298},{0,21,680},{0,23,8779},{0,19,3786},{13,31,1157},{13,31,1157},{13,31,1157},{11,29,13},{25,1,4418}, +{3,31,145},{3,31,145},{0,21,680},{27,12,4418},{0,21,680},{31,19,2314},{22,31,949},{19,31,4},{6,31,1},{31,19,2314},{31,25,2314},{6,31,1},{0,27,2314},{31,25,2314},{0,27,2314},{6,0,4337},{6,0,4337},{6,0,4337},{6,0,4337},{0,31,64},{0,31,64},{0,31,64},{0,17,0},{0,13,1709},{0,13,1709},{12,31,22557},{10,31,13585},{9,31,9926},{6,31,5186},{11,31,24636},{3,31,6794},{2,31,692}, +{0,26,3723},{0,31,19836},{0,25,8442},{19,31,7417},{16,31,3126},{15,31,1268},{12,31,68},{29,9,12051},{8,31,4762},{4,31,232},{0,26,3723},{27,20,12051},{0,26,3723},{9,31,9926},{9,31,9926},{9,31,9926},{6,31,5186},{5,31,8691},{2,31,692},{2,31,692},{0,22,557},{0,25,9284},{0,21,3929},{15,31,1268},{15,31,1268},{15,31,1268},{12,30,8},{27,0,4418},{4,31,232},{4,31,232},{0,22,557},{30,12,4418}, +{0,22,557},{31,20,1800},{22,31,725},{20,31,0},{9,31,1},{31,20,1800},{30,26,1800},{9,31,1},{0,27,1818},{30,26,1800},{0,27,1818},{6,0,5105},{6,0,5105},{6,0,5105},{6,0,5105},{1,31,185},{1,31,185},{1,31,185},{0,18,9},{0,15,2042},{0,15,2042},{12,31,23421},{11,31,14850},{9,31,11462},{7,31,6149},{11,31,24860},{4,31,7053},{2,31,1236},{0,26,2891},{0,31,19260},{0,25,7818},{19,31,6761}, +{17,31,3107},{16,31,1429},{13,31,8},{31,7,10952},{10,31,4300},{6,31,353},{0,26,2891},{31,19,10952},{0,26,2891},{9,31,11462},{9,31,11462},{9,31,11462},{7,31,6149},{7,31,9845},{2,31,1236},{2,31,1236},{0,23,485},{0,25,9764},{0,21,4185},{16,31,1429},{16,31,1429},{16,31,1429},{13,31,8},{28,1,4418},{6,31,353},{6,31,353},{0,23,485},{31,13,4418},{0,23,485},{31,22,1354},{23,31,548},{22,31,4}, +{12,31,0},{31,22,1354},{30,27,1354},{12,31,0},{0,28,1360},{30,27,1354},{0,28,1360},{7,0,5953},{7,0,5953},{7,0,5953},{7,0,5953},{1,31,425},{1,31,425},{1,31,425},{0,20,1},{0,17,2372},{0,17,2372},{13,31,24507},{11,31,16398},{10,31,13349},{8,31,7460},{11,31,25418},{4,31,7647},{3,31,2021},{0,27,2089},{0,31,18918},{0,26,7302},{20,31,6098},{18,31,3037},{17,31,1640},{14,31,25},{29,13,9818}, +{12,31,3874},{8,31,521},{0,27,2089},{31,20,9818},{0,27,2089},{10,31,13349},{10,31,13349},{10,31,13349},{8,31,7460},{7,31,11195},{3,31,2021},{3,31,2021},{0,24,392},{0,27,10472},{0,23,4562},{17,31,1640},{17,31,1640},{17,31,1640},{14,31,25},{28,4,4418},{8,31,521},{8,31,521},{0,24,392},{28,16,4418},{0,24,392},{31,23,925},{24,31,386},{23,31,1},{15,31,1},{31,23,925},{31,27,925},{15,31,1}, +{0,28,937},{31,27,925},{0,28,937},{7,0,6970},{7,0,6970},{7,0,6970},{7,0,6970},{2,31,785},{2,31,785},{2,31,785},{0,22,4},{0,17,2741},{0,17,2741},{14,31,25663},{12,31,17995},{11,31,15021},{8,31,8740},{12,31,26003},{6,31,8399},{3,31,2965},{0,28,1481},{0,31,18886},{0,26,7014},{21,31,5634},{19,31,2950},{18,31,1853},{16,31,72},{31,11,8901},{14,31,3578},{10,31,698},{0,28,1481},{31,21,8901}, +{0,28,1481},{11,31,15021},{11,31,15021},{11,31,15021},{8,31,8740},{8,31,12646},{3,31,2965},{3,31,2965},{0,26,292},{0,29,11051},{0,24,4876},{18,31,1853},{18,31,1853},{18,31,1853},{16,31,72},{29,5,4418},{10,31,698},{10,31,698},{0,26,292},{31,16,4418},{0,26,292},{31,25,613},{26,31,245},{25,31,4},{18,31,1},{31,25,613},{27,30,613},{18,31,1},{0,29,617},{27,30,613},{0,29,617},{8,0,7956}, +{8,0,7956},{8,0,7956},{8,0,7956},{3,31,1201},{3,31,1201},{3,31,1201},{0,23,0},{0,18,3185},{0,18,3185},{15,31,26715},{12,31,19659},{11,31,16925},{9,31,10232},{12,31,26835},{6,31,9215},{4,31,4027},{0,28,985},{0,31,19110},{0,27,6797},{22,31,5238},{20,31,2950},{19,31,2005},{17,31,180},{29,17,8069},{15,31,3314},{11,31,905},{0,28,985},{27,24,8069},{0,28,985},{11,31,16925},{11,31,16925},{11,31,16925}, +{9,31,10232},{8,31,14182},{4,31,4027},{4,31,4027},{0,27,232},{0,29,11627},{0,25,5117},{19,31,2005},{19,31,2005},{19,31,2005},{17,31,180},{31,4,4418},{11,31,905},{11,31,905},{0,27,232},{30,18,4418},{0,27,232},{31,26,365},{27,31,146},{26,31,1},{21,31,1},{31,26,365},{30,29,365},{21,31,1},{0,29,377},{30,29,365},{0,29,377},{8,0,8980},{8,0,8980},{8,0,8980},{8,0,8980},{3,31,1665}, +{3,31,1665},{3,31,1665},{0,24,4},{0,20,3601},{0,20,3601},{15,31,26555},{13,31,20326},{12,31,17723},{10,31,10897},{13,31,26598},{7,31,9527},{5,31,4934},{0,29,590},{0,31,18606},{0,28,5786},{23,31,4770},{21,31,3041},{20,31,2210},{18,31,325},{31,15,7322},{16,31,3126},{14,31,1145},{0,29,554},{31,23,7322},{0,29,554},{12,31,17723},{12,31,17723},{12,31,17723},{10,31,10897},{9,31,15092},{5,31,4934},{5,31,4934}, +{0,28,205},{0,31,11381},{0,26,4709},{20,31,2210},{20,31,2210},{20,31,2210},{18,31,325},{31,7,4418},{14,31,1145},{14,31,1145},{0,28,169},{31,19,4418},{0,28,169},{31,27,185},{28,31,73},{27,31,9},{24,31,0},{31,27,185},{31,29,185},{24,31,0},{0,30,185},{31,29,185},{0,30,185},{9,0,9248},{9,0,9248},{9,0,9248},{9,0,9248},{4,31,1954},{4,31,1954},{4,31,1954},{1,25,8},{0,21,3330}, +{0,21,3330},{16,31,25958},{15,31,20468},{13,31,18321},{11,31,11371},{15,31,25748},{8,31,9863},{7,31,5684},{1,30,373},{0,31,18111},{0,29,4452},{24,31,4437},{23,31,3084},{22,31,2500},{19,31,554},{29,21,6584},{18,31,2996},{15,31,1412},{0,30,237},{31,24,6584},{0,30,237},{13,31,18321},{13,31,18321},{13,31,18321},{11,31,11371},{11,31,15661},{7,31,5684},{7,31,5684},{1,29,206},{0,31,10886},{0,27,4019},{22,31,2500}, +{22,31,2500},{22,31,2500},{19,31,554},{28,16,4418},{15,31,1412},{15,31,1412},{0,29,100},{24,24,4418},{0,29,100},{31,29,52},{30,31,25},{29,31,1},{27,31,1},{31,29,52},{31,30,52},{27,31,1},{0,30,68},{31,30,52},{0,30,68},{10,0,9250},{10,0,9250},{10,0,9250},{10,0,9250},{5,31,2197},{5,31,2197},{5,31,2197},{2,27,5},{0,23,2929},{0,23,2929},{17,31,25604},{15,31,20628},{15,31,18692}, +{12,31,11876},{16,31,25201},{10,31,10381},{8,31,6470},{2,30,270},{2,31,17924},{0,29,3588},{25,31,4259},{23,31,3148},{23,31,2664},{20,31,820},{31,19,6019},{19,31,2950},{16,31,1717},{0,30,125},{31,25,6019},{0,30,125},{15,31,18692},{15,31,18692},{15,31,18692},{12,31,11876},{12,31,16244},{8,31,6470},{8,31,6470},{2,30,206},{0,31,10854},{0,29,3332},{23,31,2664},{23,31,2664},{23,31,2664},{20,31,820},{29,17,4418}, +{16,31,1717},{16,31,1717},{0,30,61},{27,24,4418},{0,30,61},{31,31,4},{31,31,4},{31,31,4},{30,31,1},{31,31,4},{31,31,4},{30,31,1},{0,31,4},{31,31,4},{0,31,4},{11,0,9250},{11,0,9250},{11,0,9250},{11,0,9250},{6,31,2440},{6,31,2440},{6,31,2440},{3,27,10},{0,25,2509},{0,25,2509},{18,31,24418},{16,31,19831},{15,31,18144},{13,31,11876},{16,31,23685},{11,31,10015},{8,31,6638}, +{3,31,170},{3,31,16879},{0,30,2738},{25,31,3699},{24,31,2766},{24,31,2405},{21,31,820},{31,20,5164},{20,31,2584},{18,31,1552},{0,31,9},{30,26,5164},{0,31,9},{15,31,18144},{15,31,18144},{15,31,18144},{13,31,11876},{12,31,15696},{8,31,6638},{8,31,6638},{4,30,132},{0,31,10150},{0,29,2624},{24,31,2405},{24,31,2405},{24,31,2405},{21,31,820},{31,15,3872},{18,31,1552},{18,31,1552},{0,31,9},{31,23,3872}, +{0,31,9},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{12,0,9248},{12,0,9248},{12,0,9248},{12,0,9248},{7,31,2612},{7,31,2612},{7,31,2612},{4,28,8},{0,27,2210},{0,27,2210},{19,31,22762},{17,31,18876},{16,31,17222},{14,31,11585},{17,31,22068},{11,31,9551},{10,31,6509},{5,31,68},{4,31,15612},{0,30,2098},{26,31,3089}, +{24,31,2334},{24,31,1973},{22,31,661},{29,25,4267},{22,31,2150},{19,31,1285},{2,31,0},{27,28,4267},{2,31,0},{16,31,17222},{16,31,17222},{16,31,17222},{14,31,11585},{13,31,14786},{10,31,6509},{10,31,6509},{5,31,68},{0,31,9366},{0,30,2034},{24,31,1973},{24,31,1973},{24,31,1973},{22,31,661},{31,16,3202},{19,31,1285},{19,31,1285},{2,31,0},{30,24,3202},{2,31,0},{31,31,0},{31,31,0},{31,31,0}, +{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{13,0,9248},{13,0,9248},{13,0,9248},{13,0,9248},{8,31,2845},{8,31,2845},{8,31,2845},{5,29,8},{0,29,1856},{0,29,1856},{19,31,21160},{18,31,17776},{17,31,16329},{15,31,11282},{17,31,20358},{12,31,9098},{11,31,6310},{6,31,5},{6,31,14287},{0,31,1565},{27,31,2412},{25,31,1862},{25,31,1573},{23,31,509},{28,28,3361}, +{22,31,1691},{20,31,1021},{5,31,1},{28,28,3361},{5,31,1},{17,31,16329},{17,31,16329},{17,31,16329},{15,31,11282},{15,31,13658},{11,31,6310},{11,31,6310},{6,31,5},{2,31,8690},{0,31,1565},{25,31,1573},{25,31,1573},{25,31,1573},{23,31,509},{31,18,2521},{20,31,1021},{20,31,1021},{5,31,1},{30,25,2521},{5,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0}, +{0,31,0},{31,31,0},{0,31,0},{14,0,9250},{14,0,9250},{14,0,9250},{14,0,9250},{9,31,3176},{9,31,3176},{9,31,3176},{6,31,5},{0,30,1556},{0,30,1556},{20,31,19810},{19,31,16741},{18,31,15584},{16,31,11057},{19,31,18721},{14,31,8860},{12,31,6234},{7,31,10},{7,31,13210},{0,31,1325},{27,31,1868},{26,31,1464},{26,31,1268},{24,31,397},{31,23,2649},{23,31,1329},{22,31,794},{8,31,0},{31,27,2649}, +{8,31,0},{18,31,15584},{18,31,15584},{18,31,15584},{16,31,11057},{15,31,12858},{12,31,6234},{12,31,6234},{7,31,10},{3,31,8150},{0,31,1325},{26,31,1268},{26,31,1268},{26,31,1268},{24,31,397},{31,19,1989},{22,31,794},{22,31,794},{8,31,0},{31,25,1989},{8,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{15,0,9250}, +{15,0,9250},{15,0,9250},{15,0,9250},{11,31,3365},{11,31,3365},{11,31,3365},{7,31,10},{0,31,1325},{0,31,1325},{20,31,18626},{19,31,15845},{19,31,14756},{17,31,10897},{19,31,17297},{15,31,8442},{13,31,6285},{8,31,68},{8,31,12227},{0,31,1341},{27,31,1452},{27,31,1089},{27,31,968},{25,31,325},{31,24,2018},{24,31,1011},{23,31,605},{11,31,0},{30,28,2018},{11,31,0},{19,31,14756},{19,31,14756},{19,31,14756}, +{17,31,10897},{16,31,12077},{13,31,6285},{13,31,6285},{8,31,68},{4,31,7686},{0,31,1341},{27,31,968},{27,31,968},{27,31,968},{25,31,325},{31,21,1513},{23,31,605},{23,31,605},{11,31,0},{31,26,1513},{11,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{16,0,9248},{16,0,9248},{16,0,9248},{16,0,9248},{12,31,3626}, +{12,31,3626},{12,31,3626},{8,31,68},{0,31,1341},{0,31,1341},{21,31,17476},{20,31,14998},{20,31,14098},{18,31,10672},{20,31,16018},{15,31,8154},{15,31,6218},{9,31,200},{10,31,11338},{0,31,1613},{28,31,1041},{27,31,801},{27,31,680},{26,31,232},{29,29,1473},{26,31,753},{24,31,442},{14,31,0},{31,28,1473},{14,31,0},{20,31,14098},{20,31,14098},{20,31,14098},{18,31,10672},{17,31,11453},{15,31,6218},{15,31,6218}, +{9,31,200},{6,31,7270},{0,31,1613},{27,31,680},{27,31,680},{27,31,680},{26,31,232},{28,28,1105},{24,31,442},{24,31,442},{14,31,0},{28,28,1105},{14,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{17,0,9248},{17,0,9248},{17,0,9248},{17,0,9248},{13,31,3929},{13,31,3929},{13,31,3929},{9,31,200},{0,31,1613}, +{0,31,1613}, diff --git a/thirdparty/basisu/transcoder/basisu_transcoder_tables_dxt1_6.inc b/thirdparty/basisu/transcoder/basisu_transcoder_tables_dxt1_6.inc new file mode 100644 index 000000000..f2d324fcc --- /dev/null +++ b/thirdparty/basisu/transcoder/basisu_transcoder_tables_dxt1_6.inc @@ -0,0 +1,494 @@ +// Copyright (C) 2017-2024 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +{0,4,18},{0,3,4},{0,2,0},{0,2,9},{0,3,36},{0,2,22},{0,2,13},{0,1,24},{0,1,41},{0,1,25},{0,4,18},{0,3,4},{0,2,0},{0,2,9},{1,1,36},{0,2,22},{0,2,13},{0,1,24},{3,0,36},{0,1,24},{0,2,0},{0,2,0},{0,2,0},{0,1,0},{0,1,2},{0,1,1},{0,1,1},{0,0,4},{0,0,4},{0,0,4},{0,2,0}, +{0,2,0},{0,2,0},{0,1,0},{0,1,2},{0,1,1},{0,1,1},{0,0,4},{1,0,2},{0,0,4},{2,0,18},{0,3,4},{0,2,0},{0,2,9},{2,0,18},{4,0,18},{0,2,9},{0,1,20},{4,0,18},{0,1,20},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{1,8,38},{1,6,21},{1,4,24}, +{1,4,24},{0,8,52},{0,5,18},{0,4,1},{0,3,24},{0,4,77},{0,3,40},{2,6,22},{1,6,5},{2,4,4},{1,4,8},{4,0,52},{0,5,18},{0,4,1},{0,3,24},{8,0,52},{0,3,24},{1,6,20},{1,6,20},{1,6,20},{1,3,21},{0,6,8},{0,4,1},{0,4,1},{0,2,5},{0,3,24},{0,2,9},{2,4,4},{2,4,4},{2,4,4},{2,3,4},{3,0,8}, +{1,3,1},{1,3,1},{1,2,4},{6,0,8},{1,2,4},{5,0,18},{1,6,1},{2,4,0},{0,4,0},{5,0,18},{10,0,18},{0,4,0},{0,3,20},{10,0,18},{0,3,20},{1,0,20},{1,0,20},{1,0,20},{1,0,20},{0,5,0},{0,5,0},{0,5,0},{0,2,1},{0,2,5},{0,2,5},{3,10,38},{3,8,21},{3,6,24},{3,6,24},{2,10,52},{2,7,18},{2,6,1}, +{2,5,24},{0,7,53},{1,5,21},{4,8,22},{3,8,5},{4,6,4},{3,6,8},{7,0,52},{2,7,18},{2,6,1},{1,5,20},{14,0,52},{1,5,20},{3,8,20},{3,8,20},{3,8,20},{3,5,21},{2,8,8},{2,6,1},{2,6,1},{2,4,5},{0,6,8},{1,5,5},{4,6,4},{4,6,4},{4,6,4},{4,5,4},{6,0,8},{3,5,1},{3,5,1},{3,4,4},{12,0,8}, +{3,4,4},{8,0,18},{3,8,1},{4,6,0},{2,6,0},{8,0,18},{16,0,18},{2,6,0},{0,5,20},{16,0,18},{0,5,20},{3,0,20},{3,0,20},{3,0,20},{3,0,20},{2,7,0},{2,7,0},{2,7,0},{2,4,1},{1,5,1},{1,5,1},{5,12,38},{5,10,21},{5,8,24},{5,8,24},{4,12,52},{4,9,18},{4,8,1},{4,7,24},{2,9,53},{3,7,21},{6,10,22}, +{5,10,5},{6,8,4},{5,8,8},{2,16,51},{4,9,18},{4,8,1},{3,7,20},{20,0,51},{3,7,20},{5,10,20},{5,10,20},{5,10,20},{5,7,21},{4,10,8},{4,8,1},{4,8,1},{4,6,5},{2,8,8},{3,7,5},{6,8,4},{6,8,4},{6,8,4},{6,7,4},{9,0,8},{5,7,1},{5,7,1},{5,6,4},{18,0,8},{5,6,4},{11,0,18},{5,10,1},{6,8,0}, +{4,8,0},{11,0,18},{22,0,18},{4,8,0},{0,7,20},{22,0,18},{0,7,20},{5,0,20},{5,0,20},{5,0,20},{5,0,20},{4,9,0},{4,9,0},{4,9,0},{4,6,1},{3,7,1},{3,7,1},{7,15,36},{7,12,19},{7,10,28},{7,10,20},{6,15,52},{6,11,22},{6,10,7},{6,9,28},{3,12,52},{5,9,27},{8,13,19},{8,11,3},{8,10,3},{8,10,6},{13,1,51}, +{6,11,21},{7,10,3},{5,9,27},{27,0,51},{5,9,27},{7,12,19},{7,12,19},{7,12,19},{7,10,19},{6,13,9},{6,10,6},{6,10,6},{6,9,3},{5,10,9},{5,9,2},{8,10,2},{8,10,2},{8,10,2},{8,9,2},{12,1,8},{7,10,2},{7,10,2},{5,9,2},{25,0,8},{5,9,2},{14,1,18},{7,12,1},{8,10,2},{6,10,2},{14,1,18},{15,7,18},{6,10,2}, +{0,9,26},{15,7,18},{0,9,26},{7,0,18},{7,0,18},{7,0,18},{7,0,18},{6,11,2},{6,11,2},{6,11,2},{6,9,2},{5,9,1},{5,9,1},{9,16,38},{9,14,19},{9,12,28},{9,12,20},{8,17,52},{8,13,22},{8,12,7},{8,11,28},{5,14,52},{7,11,27},{10,15,19},{10,13,3},{10,12,3},{10,12,6},{16,1,51},{8,13,21},{9,12,3},{7,11,27},{33,0,51}, +{7,11,27},{9,14,19},{9,14,19},{9,14,19},{9,12,19},{8,15,9},{8,12,6},{8,12,6},{8,11,3},{7,12,9},{7,11,2},{10,12,2},{10,12,2},{10,12,2},{10,11,2},{15,1,8},{9,12,2},{9,12,2},{7,11,2},{31,0,8},{7,11,2},{17,0,18},{9,14,1},{10,12,2},{8,12,2},{17,0,18},{34,0,18},{8,12,2},{0,11,26},{34,0,18},{0,11,26},{9,0,18}, +{9,0,18},{9,0,18},{9,0,18},{8,13,2},{8,13,2},{8,13,2},{8,11,2},{7,11,1},{7,11,1},{11,18,38},{11,16,19},{11,14,28},{11,14,20},{10,19,52},{10,15,22},{10,14,7},{10,13,28},{7,16,52},{9,13,27},{12,16,21},{12,15,3},{12,14,3},{12,14,6},{19,1,51},{10,15,21},{11,14,3},{9,13,27},{39,0,51},{9,13,27},{11,16,18},{11,16,18},{11,16,18}, +{11,14,19},{10,17,9},{10,14,6},{10,14,6},{10,13,3},{9,14,9},{9,13,2},{12,14,2},{12,14,2},{12,14,2},{12,13,2},{15,7,8},{11,14,2},{11,14,2},{9,13,2},{31,3,8},{9,13,2},{20,0,18},{11,16,1},{12,14,2},{10,14,2},{20,0,18},{40,0,18},{10,14,2},{0,13,26},{40,0,18},{0,13,26},{11,0,18},{11,0,18},{11,0,18},{11,0,18},{10,15,2}, +{10,15,2},{10,15,2},{10,13,2},{9,13,1},{9,13,1},{13,20,38},{13,18,19},{13,16,27},{13,16,19},{12,21,52},{12,17,19},{12,16,5},{12,15,28},{10,17,52},{11,15,27},{14,18,21},{14,17,3},{14,16,1},{13,16,10},{22,1,51},{12,17,18},{13,16,2},{11,15,27},{45,0,51},{11,15,27},{13,18,18},{13,18,18},{13,18,18},{13,16,19},{12,19,9},{12,16,5},{12,16,5}, +{12,15,3},{10,16,11},{11,15,2},{14,16,1},{14,16,1},{14,16,1},{14,15,2},{15,13,8},{13,16,2},{13,16,2},{11,15,2},{31,6,8},{11,15,2},{23,0,18},{13,18,1},{14,16,0},{12,16,0},{23,0,18},{46,0,18},{12,16,0},{0,15,26},{46,0,18},{0,15,26},{13,0,18},{13,0,18},{13,0,18},{13,0,18},{12,17,1},{12,17,1},{12,17,1},{12,15,2},{11,15,1}, +{11,15,1},{15,23,38},{15,20,21},{15,18,37},{15,18,21},{15,22,55},{15,19,23},{15,18,5},{14,17,30},{13,19,56},{13,17,28},{16,21,19},{16,19,3},{16,18,3},{16,18,6},{17,17,51},{15,19,19},{15,18,1},{14,17,26},{51,0,51},{14,17,26},{15,21,20},{15,21,20},{15,21,20},{15,18,20},{15,19,14},{15,18,4},{15,18,4},{14,17,5},{13,18,9},{13,17,3},{16,18,2}, +{16,18,2},{16,18,2},{16,17,2},{24,1,8},{15,18,0},{15,18,0},{14,17,1},{49,0,8},{14,17,1},{26,1,18},{15,20,1},{16,18,2},{15,18,1},{26,1,18},{53,0,18},{15,18,1},{0,17,26},{53,0,18},{0,17,26},{15,0,20},{15,0,20},{15,0,20},{15,0,20},{15,18,4},{15,18,4},{15,18,4},{14,17,4},{13,17,2},{13,17,2},{17,25,36},{17,22,19},{17,20,28}, +{17,20,20},{16,25,52},{16,21,22},{16,20,7},{16,19,28},{15,21,56},{15,19,28},{18,23,19},{18,21,3},{18,20,3},{18,20,6},{20,17,51},{16,21,21},{17,20,3},{14,20,26},{57,0,51},{14,20,26},{17,22,19},{17,22,19},{17,22,19},{17,20,19},{16,23,9},{16,20,6},{16,20,6},{16,19,3},{15,20,9},{15,19,3},{18,20,2},{18,20,2},{18,20,2},{18,19,2},{27,1,8}, +{17,20,2},{17,20,2},{15,19,2},{55,0,8},{15,19,2},{29,1,18},{17,22,1},{18,20,2},{16,20,2},{29,1,18},{59,0,18},{16,20,2},{0,19,26},{59,0,18},{0,19,26},{17,0,18},{17,0,18},{17,0,18},{17,0,18},{16,21,2},{16,21,2},{16,21,2},{16,19,2},{15,19,2},{15,19,2},{19,27,36},{19,24,19},{19,22,28},{19,22,20},{18,27,52},{18,23,22},{18,22,7}, +{18,21,28},{15,24,56},{17,21,27},{20,25,19},{20,23,3},{20,22,3},{20,22,6},{23,17,51},{18,23,21},{19,22,3},{17,21,27},{63,0,51},{17,21,27},{19,24,19},{19,24,19},{19,24,19},{19,22,19},{18,25,9},{18,22,6},{18,22,6},{18,21,3},{17,22,9},{17,21,2},{20,22,2},{20,22,2},{20,22,2},{20,21,2},{30,1,8},{19,22,2},{19,22,2},{17,21,2},{61,0,8}, +{17,21,2},{31,3,18},{19,24,1},{20,22,2},{18,22,2},{31,3,18},{63,1,18},{18,22,2},{0,21,26},{63,1,18},{0,21,26},{19,0,18},{19,0,18},{19,0,18},{19,0,18},{18,23,2},{18,23,2},{18,23,2},{18,21,2},{17,21,1},{17,21,1},{21,29,36},{21,26,19},{21,24,28},{21,24,20},{20,29,52},{20,25,22},{20,24,7},{20,23,28},{17,26,52},{19,23,27},{22,27,19}, +{22,25,3},{22,24,3},{22,24,6},{34,1,51},{20,25,21},{21,24,3},{19,23,27},{63,3,51},{19,23,27},{21,26,19},{21,26,19},{21,26,19},{21,24,19},{20,27,9},{20,24,6},{20,24,6},{20,23,3},{19,24,9},{19,23,2},{22,24,2},{22,24,2},{22,24,2},{22,23,2},{33,1,8},{21,24,2},{21,24,2},{19,23,2},{63,2,8},{19,23,2},{31,9,18},{21,26,1},{22,24,2}, +{20,24,2},{31,9,18},{63,4,18},{20,24,2},{0,23,26},{63,4,18},{0,23,26},{21,0,18},{21,0,18},{21,0,18},{21,0,18},{20,25,2},{20,25,2},{20,25,2},{20,23,2},{19,23,1},{19,23,1},{23,31,40},{23,29,24},{23,27,33},{23,26,24},{23,30,55},{22,28,24},{23,26,8},{22,26,28},{20,28,51},{21,26,21},{24,29,20},{24,28,1},{24,27,4},{24,26,5},{38,0,51}, +{22,28,20},{23,26,4},{20,26,20},{62,7,51},{20,26,20},{23,29,20},{23,29,20},{23,29,20},{23,26,20},{23,28,12},{23,26,4},{23,26,4},{22,25,4},{20,27,9},{22,25,4},{24,27,0},{24,27,0},{24,27,0},{24,26,1},{31,12,8},{23,26,0},{23,26,0},{22,25,0},{62,6,8},{22,25,0},{38,1,18},{24,28,1},{24,27,4},{23,26,4},{38,1,18},{45,16,18},{23,26,4}, +{0,26,20},{45,16,18},{0,26,20},{23,0,20},{23,0,20},{23,0,20},{23,0,20},{23,26,4},{23,26,4},{23,26,4},{22,25,4},{21,26,1},{21,26,1},{25,33,38},{25,31,24},{25,29,33},{25,28,24},{25,32,55},{24,30,24},{25,28,8},{24,28,28},{22,30,51},{23,28,21},{26,31,20},{26,30,1},{26,29,4},{26,28,5},{41,0,51},{24,30,20},{25,28,4},{22,28,20},{62,10,51}, +{22,28,20},{25,31,20},{25,31,20},{25,31,20},{25,28,20},{25,30,12},{25,28,4},{25,28,4},{24,27,4},{22,29,9},{24,27,4},{26,29,0},{26,29,0},{26,29,0},{26,28,1},{31,18,8},{25,28,0},{25,28,0},{24,27,0},{62,9,8},{24,27,0},{41,1,18},{26,30,1},{26,29,4},{25,28,4},{41,1,18},{51,16,18},{25,28,4},{0,28,20},{51,16,18},{0,28,20},{25,0,20}, +{25,0,20},{25,0,20},{25,0,20},{25,28,4},{25,28,4},{25,28,4},{24,27,4},{23,28,1},{23,28,1},{27,35,38},{27,32,21},{27,31,33},{27,30,24},{27,34,55},{26,32,24},{27,30,8},{26,30,28},{25,31,56},{25,30,21},{28,33,18},{28,32,2},{28,31,4},{28,30,5},{44,0,51},{26,32,20},{27,30,4},{24,30,20},{62,13,51},{24,30,20},{27,33,20},{27,33,20},{27,33,20}, +{27,30,20},{27,31,14},{27,30,4},{27,30,4},{26,29,4},{24,31,9},{26,29,4},{28,31,0},{28,31,0},{28,31,0},{28,30,1},{34,17,8},{27,30,0},{27,30,0},{26,29,0},{62,12,8},{26,29,0},{44,1,18},{27,32,1},{28,31,4},{27,30,4},{44,1,18},{57,16,18},{27,30,4},{0,30,20},{57,16,18},{0,30,20},{27,0,20},{27,0,20},{27,0,20},{27,0,20},{27,30,4}, +{27,30,4},{27,30,4},{26,29,4},{25,30,1},{25,30,1},{29,37,38},{29,34,21},{29,32,37},{29,32,21},{29,36,55},{29,33,23},{29,32,5},{28,32,39},{27,33,56},{26,32,30},{30,35,18},{30,34,2},{30,32,2},{30,32,5},{47,0,51},{29,33,19},{29,32,1},{26,32,26},{46,24,51},{26,32,26},{29,35,20},{29,35,20},{29,35,20},{29,32,20},{29,33,14},{29,32,4},{29,32,4}, +{28,31,4},{27,32,9},{28,31,4},{30,33,0},{30,33,0},{30,33,0},{30,31,4},{37,17,8},{29,32,0},{29,32,0},{28,31,0},{62,15,8},{28,31,0},{47,1,18},{29,34,1},{30,32,2},{29,32,1},{47,1,18},{63,16,18},{29,32,1},{0,32,26},{63,16,18},{0,32,26},{29,0,20},{29,0,20},{29,0,20},{29,0,20},{29,32,4},{29,32,4},{29,32,4},{28,31,4},{28,31,4}, +{28,31,4},{31,40,44},{31,37,28},{32,35,40},{31,34,31},{31,38,53},{31,35,21},{31,34,7},{31,34,30},{28,36,51},{29,34,21},{32,37,20},{32,36,1},{32,35,4},{32,34,5},{50,0,51},{30,36,19},{31,34,6},{28,34,21},{62,19,51},{28,34,21},{31,38,26},{31,38,26},{31,38,26},{31,34,27},{31,36,9},{31,34,3},{31,34,3},{31,33,2},{29,34,10},{30,33,2},{32,35,0}, +{32,35,0},{32,35,0},{32,34,1},{49,0,8},{31,34,2},{31,34,2},{30,33,1},{62,18,8},{30,33,1},{47,8,18},{32,36,1},{32,35,4},{31,34,5},{47,8,18},{62,20,18},{31,34,5},{0,34,20},{62,20,18},{0,34,20},{31,0,26},{31,0,26},{31,0,26},{31,0,26},{31,35,1},{31,35,1},{31,35,1},{31,33,2},{29,34,1},{29,34,1},{33,41,40},{33,39,24},{33,37,33}, +{33,36,24},{33,40,55},{32,38,24},{33,36,8},{32,36,28},{30,38,51},{31,36,21},{34,39,20},{34,38,1},{34,37,4},{34,36,5},{53,0,51},{32,38,20},{33,36,4},{30,36,21},{62,22,51},{30,36,21},{33,39,20},{33,39,20},{33,39,20},{33,36,20},{33,38,12},{33,36,4},{33,36,4},{32,35,4},{31,36,10},{32,35,4},{34,37,0},{34,37,0},{34,37,0},{34,36,1},{52,0,8}, +{33,36,0},{33,36,0},{32,35,0},{62,21,8},{32,35,0},{47,14,18},{34,38,1},{34,37,4},{33,36,4},{47,14,18},{62,23,18},{33,36,4},{0,36,20},{62,23,18},{0,36,20},{33,0,20},{33,0,20},{33,0,20},{33,0,20},{33,36,4},{33,36,4},{33,36,4},{32,35,4},{31,36,1},{31,36,1},{35,43,40},{35,41,24},{35,39,33},{35,38,24},{35,42,55},{34,40,24},{35,38,8}, +{34,38,28},{32,40,51},{33,38,21},{36,41,20},{36,40,1},{36,39,4},{36,38,5},{56,0,51},{34,40,20},{35,38,4},{32,38,20},{62,25,51},{32,38,20},{35,41,20},{35,41,20},{35,41,20},{35,38,20},{35,40,12},{35,38,4},{35,38,4},{34,37,4},{32,39,9},{34,37,4},{36,39,0},{36,39,0},{36,39,0},{36,38,1},{55,0,8},{35,38,0},{35,38,0},{34,37,0},{62,24,8}, +{34,37,0},{48,17,18},{36,40,1},{36,39,4},{35,38,4},{48,17,18},{62,26,18},{35,38,4},{0,38,20},{62,26,18},{0,38,20},{35,0,20},{35,0,20},{35,0,20},{35,0,20},{35,38,4},{35,38,4},{35,38,4},{34,37,4},{33,38,1},{33,38,1},{37,45,40},{37,43,24},{37,41,33},{37,40,24},{37,44,55},{36,42,24},{37,40,8},{36,40,28},{34,42,51},{35,40,21},{38,43,20}, +{38,42,1},{38,41,4},{38,40,5},{59,0,51},{36,42,20},{37,40,4},{34,40,20},{62,28,51},{34,40,20},{37,43,20},{37,43,20},{37,43,20},{37,40,20},{37,42,12},{37,40,4},{37,40,4},{36,39,4},{34,41,9},{36,39,4},{38,41,0},{38,41,0},{38,41,0},{38,40,1},{58,0,8},{37,40,0},{37,40,0},{36,39,0},{62,27,8},{36,39,0},{51,17,18},{38,42,1},{38,41,4}, +{37,40,4},{51,17,18},{62,29,18},{37,40,4},{0,40,20},{62,29,18},{0,40,20},{37,0,20},{37,0,20},{37,0,20},{37,0,20},{37,40,4},{37,40,4},{37,40,4},{36,39,4},{35,40,1},{35,40,1},{40,46,44},{40,44,27},{40,43,28},{39,43,28},{39,47,52},{39,44,22},{39,43,3},{38,42,28},{36,44,53},{37,42,19},{40,46,19},{40,44,2},{40,43,3},{40,42,10},{62,1,51}, +{38,44,19},{39,43,2},{37,42,18},{63,31,51},{37,42,18},{40,44,26},{40,44,26},{40,44,26},{40,42,26},{39,44,11},{39,43,2},{39,43,2},{39,41,2},{37,43,11},{38,41,3},{40,44,1},{40,44,1},{40,44,1},{40,42,1},{53,16,8},{40,42,1},{40,42,1},{39,41,1},{63,30,8},{39,41,1},{63,0,18},{40,44,1},{40,43,2},{38,43,1},{63,0,18},{62,32,18},{38,43,1}, +{0,42,18},{62,32,18},{0,42,18},{39,0,26},{39,0,26},{39,0,26},{39,0,26},{39,43,1},{39,43,1},{39,43,1},{39,41,1},{37,42,1},{37,42,1},{42,48,44},{42,46,27},{42,45,28},{41,45,28},{41,48,53},{41,46,22},{41,45,3},{40,44,28},{38,46,53},{39,44,19},{42,48,19},{42,46,2},{42,45,3},{42,44,10},{63,5,51},{40,46,19},{41,45,2},{39,44,18},{47,42,51}, +{39,44,18},{42,46,26},{42,46,26},{42,46,26},{42,44,26},{41,46,11},{41,45,2},{41,45,2},{41,43,2},{39,45,11},{40,43,3},{42,46,1},{42,46,1},{42,46,1},{42,44,1},{56,16,8},{42,44,1},{42,44,1},{41,43,1},{62,33,8},{41,43,1},{63,6,18},{42,46,1},{42,45,2},{40,45,1},{63,6,18},{62,35,18},{40,45,1},{0,44,18},{62,35,18},{0,44,18},{41,0,26}, +{41,0,26},{41,0,26},{41,0,26},{41,45,1},{41,45,1},{41,45,1},{41,43,1},{39,44,1},{39,44,1},{44,50,44},{44,48,26},{44,47,28},{43,47,28},{43,50,53},{43,47,27},{43,47,3},{42,46,28},{40,48,51},{41,46,19},{44,50,19},{44,48,1},{44,47,3},{44,46,10},{63,11,51},{42,48,19},{43,47,2},{41,46,18},{47,45,51},{41,46,18},{44,48,26},{44,48,26},{44,48,26}, +{44,46,26},{43,48,9},{43,47,2},{43,47,2},{43,45,2},{41,47,11},{42,45,3},{44,48,1},{44,48,1},{44,48,1},{44,46,1},{59,16,8},{44,46,1},{44,46,1},{43,45,1},{62,36,8},{43,45,1},{63,12,18},{44,48,0},{44,47,2},{42,47,1},{63,12,18},{62,38,18},{42,47,1},{0,46,18},{62,38,18},{0,46,18},{43,0,26},{43,0,26},{43,0,26},{43,0,26},{43,47,1}, +{43,47,1},{43,47,1},{43,45,1},{41,46,1},{41,46,1},{46,52,44},{46,50,26},{46,49,31},{45,48,31},{45,52,53},{45,49,21},{45,48,7},{45,48,30},{42,50,51},{43,48,21},{46,52,19},{46,50,1},{46,49,6},{46,48,6},{55,32,51},{44,50,19},{45,48,6},{42,48,21},{46,48,51},{42,48,21},{46,50,26},{46,50,26},{46,50,26},{45,48,27},{45,50,9},{45,48,3},{45,48,3}, +{45,47,2},{43,48,10},{44,47,3},{46,50,1},{46,50,1},{46,50,1},{46,48,2},{62,16,8},{45,48,2},{45,48,2},{45,47,1},{62,39,8},{45,47,1},{63,18,18},{46,50,0},{47,48,4},{45,48,5},{63,18,18},{62,41,18},{45,48,5},{0,48,20},{62,41,18},{0,48,20},{45,0,26},{45,0,26},{45,0,26},{45,0,26},{45,49,1},{45,49,1},{45,49,1},{45,47,1},{43,48,1}, +{43,48,1},{48,54,44},{48,52,27},{48,51,28},{48,50,35},{47,55,51},{47,52,21},{47,51,3},{47,50,22},{45,52,52},{45,50,19},{48,54,19},{48,52,2},{48,51,3},{48,50,10},{63,23,51},{47,52,21},{47,51,3},{45,50,18},{63,43,51},{45,50,18},{48,52,26},{48,52,26},{48,52,26},{48,50,26},{47,53,8},{47,51,2},{47,51,2},{47,49,1},{45,51,8},{46,49,5},{48,52,1}, +{48,52,1},{48,52,1},{48,50,1},{63,21,8},{48,50,1},{48,50,1},{47,49,1},{63,42,8},{47,49,1},{63,25,18},{48,52,1},{48,51,2},{46,51,1},{63,25,18},{63,44,18},{46,51,1},{0,50,18},{63,44,18},{0,50,18},{48,0,26},{48,0,26},{48,0,26},{48,0,26},{47,51,1},{47,51,1},{47,51,1},{47,49,0},{45,50,1},{45,50,1},{50,56,44},{50,54,27},{50,53,28}, +{49,53,28},{49,57,52},{49,54,22},{49,53,3},{48,52,28},{47,54,52},{47,52,19},{50,56,19},{50,54,2},{50,53,3},{50,52,10},{63,29,51},{48,54,19},{49,53,2},{47,52,18},{63,46,51},{47,52,18},{50,54,26},{50,54,26},{50,54,26},{50,52,26},{49,54,11},{49,53,2},{49,53,2},{49,51,2},{47,53,8},{48,51,3},{50,54,1},{50,54,1},{50,54,1},{50,52,1},{63,27,8}, +{50,52,1},{50,52,1},{49,51,1},{63,45,8},{49,51,1},{63,31,18},{50,54,1},{50,53,2},{48,53,1},{63,31,18},{63,47,18},{48,53,1},{0,52,18},{63,47,18},{0,52,18},{49,0,26},{49,0,26},{49,0,26},{49,0,26},{49,53,1},{49,53,1},{49,53,1},{49,51,1},{47,52,1},{47,52,1},{52,58,44},{52,56,27},{52,55,28},{51,55,28},{51,59,52},{51,56,22},{51,55,3}, +{50,54,28},{48,56,53},{49,54,19},{52,58,19},{52,56,2},{52,55,3},{52,54,10},{63,35,51},{50,56,19},{51,55,2},{49,54,18},{63,49,51},{49,54,18},{52,56,26},{52,56,26},{52,56,26},{52,54,26},{51,56,11},{51,55,2},{51,55,2},{51,53,2},{49,55,11},{50,53,3},{52,56,1},{52,56,1},{52,56,1},{52,54,1},{63,33,8},{52,54,1},{52,54,1},{51,53,1},{47,56,8}, +{51,53,1},{57,48,18},{52,56,1},{52,55,2},{50,55,1},{57,48,18},{62,50,18},{50,55,1},{0,54,18},{62,50,18},{0,54,18},{51,0,26},{51,0,26},{51,0,26},{51,0,26},{51,55,1},{51,55,1},{51,55,1},{51,53,1},{49,54,1},{49,54,1},{54,60,44},{54,58,27},{54,57,28},{53,57,28},{53,61,52},{53,58,22},{53,57,3},{52,56,28},{50,58,53},{51,56,19},{54,60,19}, +{54,58,2},{54,57,3},{54,56,10},{63,41,51},{52,58,19},{53,57,2},{51,56,18},{63,52,51},{51,56,18},{54,58,26},{54,58,26},{54,58,26},{54,56,26},{53,58,11},{53,57,2},{53,57,2},{53,55,2},{51,57,11},{52,55,3},{54,58,1},{54,58,1},{54,58,1},{54,56,1},{63,39,8},{54,56,1},{54,56,1},{53,55,1},{47,59,8},{53,55,1},{60,48,18},{54,58,1},{54,57,2}, +{52,57,1},{60,48,18},{62,53,18},{52,57,1},{0,56,18},{62,53,18},{0,56,18},{53,0,26},{53,0,26},{53,0,26},{53,0,26},{53,57,1},{53,57,1},{53,57,1},{53,55,1},{51,56,1},{51,56,1},{56,63,38},{56,61,21},{56,59,24},{56,59,24},{55,63,52},{55,60,18},{55,59,1},{55,58,24},{53,60,53},{54,58,21},{57,61,22},{56,61,5},{57,59,4},{56,59,8},{63,47,52}, +{55,60,18},{55,59,1},{54,58,20},{63,55,52},{54,58,20},{56,61,20},{56,61,20},{56,61,20},{56,58,21},{55,61,8},{55,59,1},{55,59,1},{55,57,5},{53,59,8},{54,58,5},{57,59,4},{57,59,4},{57,59,4},{57,58,4},{61,49,8},{56,58,1},{56,58,1},{56,57,4},{63,54,8},{56,57,4},{63,49,18},{56,61,1},{57,59,0},{55,59,0},{63,49,18},{63,56,18},{55,59,0}, +{0,58,20},{63,56,18},{0,58,20},{56,0,20},{56,0,20},{56,0,20},{56,0,20},{55,60,0},{55,60,0},{55,60,0},{55,57,1},{54,58,1},{54,58,1},{58,63,56},{58,63,21},{58,61,24},{58,61,24},{58,63,68},{57,62,18},{57,61,1},{57,60,24},{55,62,53},{56,60,21},{59,63,22},{58,63,5},{59,61,4},{58,61,8},{63,53,52},{57,62,18},{57,61,1},{56,60,20},{63,58,52}, +{56,60,20},{58,63,20},{58,63,20},{58,63,20},{58,60,21},{57,63,8},{57,61,1},{57,61,1},{57,59,5},{55,61,8},{56,60,5},{59,61,4},{59,61,4},{59,61,4},{59,60,4},{63,51,8},{58,60,1},{58,60,1},{58,59,4},{63,57,8},{58,59,4},{63,55,18},{58,63,1},{59,61,0},{57,61,0},{63,55,18},{63,59,18},{57,61,0},{0,60,20},{63,59,18},{0,60,20},{58,0,20}, +{58,0,20},{58,0,20},{58,0,20},{57,62,0},{57,62,0},{57,62,0},{57,59,1},{56,60,1},{56,60,1},{60,63,88},{60,63,40},{60,63,24},{60,63,24},{60,63,88},{59,63,37},{59,63,1},{59,62,24},{58,63,63},{58,62,21},{61,63,40},{61,63,13},{61,63,4},{60,63,8},{63,59,52},{60,63,24},{59,63,1},{58,62,20},{63,61,52},{58,62,20},{60,63,24},{60,63,24},{60,63,24}, +{60,62,21},{60,63,24},{59,63,1},{59,63,1},{59,61,5},{57,63,8},{58,62,5},{61,63,4},{61,63,4},{61,63,4},{61,62,4},{63,57,8},{60,62,1},{60,62,1},{60,61,4},{63,60,8},{60,61,4},{63,61,18},{61,63,9},{61,63,0},{59,63,0},{63,61,18},{63,62,18},{59,63,0},{0,62,20},{63,62,18},{0,62,20},{60,0,20},{60,0,20},{60,0,20},{60,0,20},{59,63,1}, +{59,63,1},{59,63,1},{59,61,1},{58,62,1},{58,62,1},{62,63,38},{62,63,33},{62,63,29},{62,63,24},{62,63,35},{62,63,25},{62,63,21},{61,63,1},{61,63,23},{60,63,4},{63,63,4},{63,63,4},{63,63,4},{63,63,4},{63,63,4},{63,63,4},{63,63,4},{62,63,0},{63,63,4},{62,63,0},{62,63,29},{62,63,29},{62,63,29},{62,63,24},{62,63,26},{62,63,21},{62,63,21}, +{61,63,1},{61,63,14},{60,63,4},{63,63,4},{63,63,4},{63,63,4},{63,63,4},{63,62,4},{63,63,4},{63,63,4},{62,63,0},{62,63,4},{62,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{62,0,20},{62,0,20},{62,0,20},{62,0,20},{61,63,16},{61,63,16},{61,63,16},{61,63,1},{60,63,4}, +{60,63,4},{0,8,74},{0,6,10},{0,4,1},{0,4,26},{0,6,154},{0,4,99},{0,3,50},{0,2,115},{0,3,170},{0,2,119},{0,8,74},{0,6,10},{0,4,1},{0,4,26},{3,0,154},{0,4,99},{0,3,50},{0,2,115},{6,0,154},{0,2,115},{0,4,0},{0,4,0},{0,4,0},{0,2,0},{0,2,13},{0,2,4},{0,2,4},{0,1,5},{0,1,14},{0,1,6},{0,4,0}, +{0,4,0},{0,4,0},{0,2,0},{1,0,13},{0,2,4},{0,2,4},{0,1,5},{2,0,13},{0,1,5},{4,0,74},{0,6,10},{0,4,1},{0,4,26},{4,0,74},{8,0,74},{0,4,26},{0,3,74},{8,0,74},{0,3,74},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,14,83},{0,10,10},{1,6,27}, +{0,6,19},{0,11,243},{0,7,110},{0,5,34},{0,4,139},{0,5,280},{0,4,164},{1,12,75},{1,9,2},{1,6,11},{1,6,18},{5,1,243},{0,7,110},{0,5,34},{0,4,139},{11,0,243},{0,4,139},{0,10,9},{0,10,9},{0,10,9},{0,5,9},{0,6,50},{0,5,9},{0,5,9},{0,3,26},{0,3,66},{0,2,33},{1,8,1},{1,8,1},{1,8,1},{1,4,2},{3,0,50}, +{0,5,9},{0,5,9},{0,3,26},{6,0,50},{0,3,26},{7,0,74},{1,9,1},{2,6,1},{0,6,10},{7,0,74},{14,0,74},{0,6,10},{0,5,74},{14,0,74},{0,5,74},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,2,1},{0,2,1},{0,2,1},{0,1,1},{0,1,2},{0,1,2},{1,18,137},{1,12,74},{2,9,98},{1,8,67},{0,16,244},{0,10,78},{0,8,2}, +{0,6,115},{0,8,344},{0,6,179},{3,14,75},{3,11,2},{3,8,11},{3,8,18},{8,1,243},{0,10,78},{0,8,2},{0,6,115},{15,1,243},{0,6,115},{1,14,65},{1,14,65},{1,14,65},{1,7,66},{0,12,50},{0,8,1},{0,8,1},{0,5,5},{0,5,104},{0,5,41},{3,10,1},{3,10,1},{3,10,1},{3,6,2},{6,0,50},{0,8,1},{0,8,1},{0,5,5},{12,0,50}, +{0,5,5},{2,16,72},{3,11,1},{4,8,1},{0,8,1},{2,16,72},{20,0,72},{0,8,1},{0,7,74},{20,0,72},{0,7,74},{1,0,65},{1,0,65},{1,0,65},{1,0,65},{0,8,1},{0,8,1},{0,8,1},{0,4,1},{0,3,25},{0,3,25},{3,20,146},{3,14,83},{4,11,115},{3,10,76},{2,18,245},{2,12,79},{2,10,3},{1,8,108},{0,11,293},{0,8,103},{5,16,73}, +{5,13,2},{5,10,11},{5,10,18},{11,1,243},{1,13,75},{2,10,2},{0,8,94},{15,4,243},{0,8,94},{3,16,74},{3,16,74},{3,16,74},{3,9,75},{2,14,51},{2,10,2},{2,10,2},{2,7,6},{0,9,75},{0,7,6},{5,12,1},{5,12,1},{5,12,1},{5,8,2},{9,0,50},{2,10,1},{2,10,1},{0,7,2},{18,0,50},{0,7,2},{5,16,72},{5,13,1},{6,10,1}, +{2,10,1},{5,16,72},{26,0,72},{2,10,1},{0,9,74},{26,0,72},{0,9,74},{3,0,74},{3,0,74},{3,0,74},{3,0,74},{2,10,2},{2,10,2},{2,10,2},{2,6,2},{0,7,5},{0,7,5},{6,21,152},{6,16,82},{6,13,109},{5,12,84},{4,21,243},{4,15,78},{4,13,8},{4,11,108},{0,14,255},{0,11,77},{7,19,72},{7,15,1},{7,13,8},{7,12,13},{6,17,243}, +{3,15,72},{5,12,5},{0,11,73},{29,0,243},{0,11,73},{6,16,81},{6,16,81},{6,16,81},{5,12,80},{4,17,50},{4,13,4},{4,13,4},{4,9,5},{0,12,52},{1,10,4},{7,15,0},{7,15,0},{7,15,0},{7,11,0},{12,1,50},{5,12,1},{5,12,1},{3,9,4},{25,0,50},{3,9,4},{16,1,72},{7,15,1},{8,13,4},{5,12,4},{16,1,72},{33,0,72},{5,12,4}, +{0,11,72},{33,0,72},{0,11,72},{5,0,80},{5,0,80},{5,0,80},{5,0,80},{4,13,0},{4,13,0},{4,13,0},{4,9,1},{1,10,0},{1,10,0},{8,23,152},{8,18,82},{8,15,109},{7,14,84},{6,23,243},{6,16,75},{6,15,8},{6,13,108},{1,17,244},{2,13,77},{9,21,72},{9,17,1},{9,15,8},{9,14,13},{17,1,243},{5,17,72},{7,14,5},{1,13,72},{35,0,243}, +{1,13,72},{8,18,81},{8,18,81},{8,18,81},{7,14,80},{6,19,50},{6,15,4},{6,15,4},{6,11,5},{2,14,52},{3,12,4},{9,16,1},{9,16,1},{9,16,1},{9,13,0},{15,1,50},{7,14,1},{7,14,1},{5,11,4},{31,0,50},{5,11,4},{19,1,72},{9,17,1},{10,15,4},{7,14,4},{19,1,72},{39,0,72},{7,14,4},{0,13,72},{39,0,72},{0,13,72},{7,0,80}, +{7,0,80},{7,0,80},{7,0,80},{6,15,0},{6,15,0},{6,15,0},{6,11,1},{3,12,0},{3,12,0},{10,25,152},{10,20,82},{10,17,114},{9,16,82},{8,25,243},{8,18,75},{8,16,2},{8,15,108},{3,19,244},{4,15,77},{11,23,72},{11,19,1},{11,16,10},{11,16,17},{20,1,243},{7,19,72},{8,16,2},{3,15,72},{41,0,243},{3,15,72},{10,20,81},{10,20,81},{10,20,81}, +{10,15,81},{8,21,50},{8,16,1},{8,16,1},{8,13,5},{4,16,52},{5,14,4},{11,18,1},{11,18,1},{11,18,1},{11,15,0},{15,7,50},{8,16,1},{8,16,1},{7,13,4},{31,3,50},{7,13,4},{22,1,72},{11,19,1},{12,16,2},{8,16,1},{22,1,72},{45,0,72},{8,16,1},{0,15,72},{45,0,72},{0,15,72},{9,0,80},{9,0,80},{9,0,80},{9,0,80},{8,17,0}, +{8,17,0},{8,17,0},{8,13,1},{5,14,0},{5,14,0},{12,27,152},{12,22,82},{12,19,114},{11,18,82},{10,27,243},{10,20,75},{10,18,2},{10,16,106},{5,21,244},{6,17,79},{13,25,72},{13,21,1},{13,18,10},{13,18,17},{23,1,243},{9,21,72},{10,18,2},{5,17,74},{47,0,243},{5,17,74},{12,22,81},{12,22,81},{12,22,81},{12,17,80},{10,23,50},{10,18,1},{10,18,1}, +{10,15,5},{6,18,52},{8,15,9},{13,20,1},{13,20,1},{13,20,1},{13,17,1},{15,13,50},{10,18,1},{10,18,1},{9,15,4},{31,6,50},{9,15,4},{25,1,72},{13,21,1},{14,18,2},{10,18,1},{25,1,72},{47,2,72},{10,18,1},{0,17,74},{47,2,72},{0,17,74},{11,0,80},{11,0,80},{11,0,80},{11,0,80},{10,19,0},{10,19,0},{10,19,0},{10,15,1},{7,16,0}, +{7,16,0},{14,30,146},{14,24,78},{14,21,114},{14,20,79},{12,30,244},{12,23,79},{12,21,7},{12,19,109},{7,23,244},{9,19,76},{15,28,73},{15,23,2},{15,21,14},{15,20,14},{27,0,243},{11,23,75},{13,20,4},{7,19,73},{46,4,243},{7,19,73},{14,25,74},{14,25,74},{14,25,74},{14,19,75},{12,25,53},{12,21,3},{12,21,3},{12,17,6},{8,21,52},{9,18,6},{15,23,1}, +{15,23,1},{15,23,1},{15,19,1},{24,1,50},{13,20,0},{13,20,0},{11,17,5},{49,0,50},{11,17,5},{20,17,72},{15,23,1},{16,21,4},{13,20,4},{20,17,72},{57,0,72},{13,20,4},{0,19,72},{57,0,72},{0,19,72},{14,0,74},{14,0,74},{14,0,74},{14,0,74},{12,22,1},{12,22,1},{12,22,1},{12,17,2},{9,18,2},{9,18,2},{16,31,152},{16,26,81},{16,23,109}, +{16,22,88},{14,32,244},{14,25,79},{14,23,7},{14,21,109},{9,25,244},{11,21,76},{17,29,72},{17,25,1},{17,23,8},{17,22,13},{30,0,243},{13,25,75},{15,22,4},{9,21,73},{46,7,243},{9,21,73},{16,27,80},{16,27,80},{16,27,80},{16,21,81},{14,27,53},{14,23,3},{14,23,3},{14,19,6},{10,23,52},{11,20,6},{17,25,0},{17,25,0},{17,25,0},{17,21,0},{27,1,50}, +{15,22,0},{15,22,0},{13,19,5},{55,0,50},{13,19,5},{23,17,72},{17,25,1},{18,23,4},{15,22,4},{23,17,72},{63,0,72},{15,22,4},{0,21,72},{63,0,72},{0,21,72},{16,0,80},{16,0,80},{16,0,80},{16,0,80},{14,24,1},{14,24,1},{14,24,1},{14,19,2},{11,20,2},{11,20,2},{18,33,152},{18,28,81},{18,25,109},{17,24,84},{16,33,243},{16,27,78},{16,25,8}, +{16,23,108},{11,27,244},{13,23,76},{19,31,72},{19,27,1},{19,25,8},{19,24,13},{32,1,243},{15,27,75},{17,24,5},{11,23,73},{46,10,243},{11,23,73},{18,29,80},{18,29,80},{18,29,80},{17,24,80},{16,29,50},{16,25,4},{16,25,4},{16,21,5},{12,25,52},{13,22,6},{19,27,0},{19,27,0},{19,27,0},{19,23,0},{30,1,50},{17,24,1},{17,24,1},{15,21,5},{61,0,50}, +{15,21,5},{34,1,72},{19,27,1},{20,25,4},{17,24,4},{34,1,72},{63,3,72},{17,24,4},{0,23,72},{63,3,72},{0,23,72},{17,0,80},{17,0,80},{17,0,80},{17,0,80},{16,25,0},{16,25,0},{16,25,0},{16,21,1},{13,22,2},{13,22,2},{20,35,152},{20,30,81},{20,27,109},{19,26,84},{18,35,243},{18,29,78},{18,27,8},{18,25,108},{13,29,244},{15,25,76},{21,33,72}, +{21,29,1},{21,27,8},{21,26,13},{35,1,243},{17,29,72},{19,26,5},{13,25,73},{46,13,243},{13,25,73},{20,31,80},{20,31,80},{20,31,80},{19,26,80},{18,31,50},{18,27,4},{18,27,4},{18,23,5},{14,27,52},{15,24,6},{21,29,0},{21,29,0},{21,29,0},{21,25,0},{33,1,50},{19,26,1},{19,26,1},{17,23,4},{63,2,50},{17,23,4},{37,1,72},{21,29,1},{22,27,4}, +{19,26,4},{37,1,72},{63,6,72},{19,26,4},{0,25,72},{63,6,72},{0,25,72},{19,0,80},{19,0,80},{19,0,80},{19,0,80},{18,27,0},{18,27,0},{18,27,0},{18,23,1},{15,24,2},{15,24,2},{22,38,146},{22,32,78},{22,29,111},{22,28,84},{20,38,244},{20,31,74},{20,29,4},{20,27,100},{14,32,247},{17,27,75},{23,36,73},{23,32,3},{23,29,11},{23,28,14},{39,0,243}, +{20,31,73},{20,29,3},{16,27,74},{62,8,243},{16,27,74},{22,33,74},{22,33,74},{22,33,74},{22,28,75},{20,33,53},{20,29,3},{20,29,3},{20,25,10},{16,29,50},{18,26,2},{23,31,2},{23,31,2},{23,31,2},{23,27,2},{31,12,50},{21,28,1},{21,28,1},{18,26,1},{62,6,50},{18,26,1},{41,0,72},{23,32,2},{24,29,2},{19,29,2},{41,0,72},{62,10,72},{19,29,2}, +{0,27,74},{62,10,72},{0,27,74},{22,0,74},{22,0,74},{22,0,74},{22,0,74},{20,30,1},{20,30,1},{20,30,1},{20,25,1},{18,26,1},{18,26,1},{24,40,146},{24,34,78},{24,31,111},{24,30,84},{22,40,244},{22,33,79},{22,31,4},{22,29,100},{17,33,244},{19,29,75},{25,38,73},{25,33,2},{25,31,11},{25,30,14},{42,0,243},{21,33,75},{22,31,3},{18,29,74},{62,11,243}, +{18,29,74},{24,35,74},{24,35,74},{24,35,74},{24,30,75},{22,35,53},{22,31,3},{22,31,3},{22,27,10},{18,31,50},{20,28,2},{25,33,1},{25,33,1},{25,33,1},{25,29,2},{31,18,50},{23,30,1},{23,30,1},{20,28,1},{62,9,50},{20,28,1},{44,0,72},{25,33,1},{26,31,2},{21,31,2},{44,0,72},{62,13,72},{21,31,2},{0,29,74},{62,13,72},{0,29,74},{24,0,74}, +{24,0,74},{24,0,74},{24,0,74},{22,32,1},{22,32,1},{22,32,1},{22,27,1},{20,28,1},{20,28,1},{26,42,146},{26,36,78},{26,33,114},{26,32,79},{24,42,244},{24,35,79},{24,33,7},{24,31,100},{19,35,244},{21,31,75},{27,40,73},{27,35,2},{28,33,13},{27,32,14},{45,0,243},{23,35,75},{25,32,4},{20,31,74},{62,14,243},{20,31,74},{26,37,74},{26,37,74},{26,37,74}, +{26,32,75},{24,37,53},{24,33,3},{24,33,3},{24,29,10},{20,33,52},{22,30,2},{27,35,1},{27,35,1},{27,35,1},{27,31,2},{34,17,50},{25,32,0},{25,32,0},{22,30,1},{62,12,50},{22,30,1},{47,0,72},{27,35,1},{28,33,4},{25,32,4},{47,0,72},{46,24,72},{25,32,4},{0,31,74},{46,24,72},{0,31,74},{26,0,74},{26,0,74},{26,0,74},{26,0,74},{24,34,1}, +{24,34,1},{24,34,1},{24,29,1},{22,30,1},{22,30,1},{28,44,146},{28,38,78},{28,35,114},{28,34,79},{26,44,244},{26,37,79},{26,35,7},{26,33,109},{21,37,244},{23,33,76},{29,42,73},{29,37,2},{30,35,13},{29,34,14},{47,2,243},{25,37,75},{27,34,4},{21,33,73},{62,17,243},{21,33,73},{28,39,74},{28,39,74},{28,39,74},{28,33,75},{26,39,53},{26,35,3},{26,35,3}, +{26,31,10},{22,35,52},{23,32,6},{29,37,1},{29,37,1},{29,37,1},{29,33,1},{37,17,50},{27,34,0},{27,34,0},{23,32,5},{62,15,50},{23,32,5},{49,1,72},{29,37,1},{30,35,4},{27,34,4},{49,1,72},{46,27,72},{27,34,4},{0,33,72},{46,27,72},{0,33,72},{28,0,74},{28,0,74},{28,0,74},{28,0,74},{26,36,1},{26,36,1},{26,36,1},{26,31,1},{23,32,2}, +{23,32,2},{30,46,146},{30,41,77},{31,37,121},{30,36,81},{29,45,247},{28,39,77},{28,37,9},{28,35,103},{24,39,248},{25,35,76},{31,44,78},{31,40,4},{32,37,11},{31,36,17},{51,0,243},{28,39,73},{28,37,5},{24,35,74},{47,28,243},{24,35,74},{30,42,72},{30,42,72},{30,42,72},{30,36,72},{29,40,54},{29,36,6},{29,36,6},{28,34,9},{24,37,51},{26,34,2},{31,40,4}, +{31,40,4},{31,40,4},{31,35,5},{49,0,50},{29,36,2},{29,36,2},{26,34,1},{62,18,50},{26,34,1},{53,0,72},{31,40,0},{32,37,2},{28,37,1},{53,0,72},{62,22,72},{28,37,1},{0,35,74},{62,22,72},{0,35,74},{30,0,72},{30,0,72},{30,0,72},{30,0,72},{29,36,5},{29,36,5},{29,36,5},{28,33,4},{26,34,1},{26,34,1},{32,48,146},{32,42,79},{32,39,111}, +{32,38,84},{31,47,247},{30,41,77},{30,39,9},{30,37,103},{26,41,248},{27,37,76},{33,46,73},{33,42,3},{33,39,11},{33,38,14},{54,0,243},{30,41,73},{30,39,5},{26,37,74},{47,31,243},{26,37,74},{32,43,75},{32,43,75},{32,43,75},{32,38,75},{31,42,54},{31,38,6},{31,38,6},{30,36,9},{26,39,51},{28,36,2},{33,41,2},{33,41,2},{33,41,2},{33,37,2},{52,0,50}, +{31,38,2},{31,38,2},{28,36,1},{62,21,50},{28,36,1},{56,0,72},{33,42,2},{34,39,2},{30,39,1},{56,0,72},{62,25,72},{30,39,1},{0,37,74},{62,25,72},{0,37,74},{32,0,74},{32,0,74},{32,0,74},{32,0,74},{31,38,5},{31,38,5},{31,38,5},{30,35,4},{28,36,1},{28,36,1},{34,50,146},{34,44,79},{34,41,111},{34,40,84},{32,50,244},{32,43,74},{32,41,4}, +{32,39,100},{28,43,248},{29,39,76},{35,48,73},{35,44,3},{35,41,11},{35,40,14},{57,0,243},{32,43,73},{32,41,3},{28,39,74},{50,32,243},{28,39,74},{34,45,75},{34,45,75},{34,45,75},{34,40,75},{32,46,51},{32,41,3},{32,41,3},{32,37,10},{28,41,51},{30,38,2},{35,43,2},{35,43,2},{35,43,2},{35,39,2},{55,0,50},{33,40,1},{33,40,1},{30,38,1},{62,24,50}, +{30,38,1},{59,0,72},{35,44,2},{36,41,2},{31,41,2},{59,0,72},{62,28,72},{31,41,2},{0,39,74},{62,28,72},{0,39,74},{34,0,74},{34,0,74},{34,0,74},{34,0,74},{32,42,1},{32,42,1},{32,42,1},{32,37,1},{30,38,1},{30,38,1},{36,52,146},{36,46,79},{36,43,111},{36,42,84},{34,52,244},{34,45,74},{34,43,4},{34,41,100},{30,45,248},{31,41,76},{37,50,73}, +{37,46,3},{37,43,11},{37,42,14},{60,0,243},{34,45,73},{34,43,3},{30,41,74},{56,32,243},{30,41,74},{36,47,75},{36,47,75},{36,47,75},{36,42,75},{34,47,53},{34,43,3},{34,43,3},{34,39,10},{30,43,51},{32,40,2},{37,45,2},{37,45,2},{37,45,2},{37,41,2},{58,0,50},{35,42,1},{35,42,1},{32,40,1},{62,27,50},{32,40,1},{62,0,72},{37,46,2},{38,43,2}, +{33,43,2},{62,0,72},{62,31,72},{33,43,2},{0,41,74},{62,31,72},{0,41,74},{36,0,74},{36,0,74},{36,0,74},{36,0,74},{34,44,1},{34,44,1},{34,44,1},{34,39,1},{32,40,1},{32,40,1},{38,54,146},{38,49,77},{39,45,120},{38,45,76},{37,53,247},{36,47,78},{37,45,5},{36,43,100},{31,48,243},{33,43,81},{40,50,78},{39,48,4},{40,45,8},{39,45,20},{63,1,243}, +{36,47,74},{37,45,1},{33,43,80},{63,32,243},{33,43,80},{38,50,72},{38,50,72},{38,50,72},{38,44,72},{37,48,54},{37,45,5},{37,45,5},{36,42,8},{32,45,53},{34,42,1},{40,46,4},{40,46,4},{40,46,4},{40,43,4},{53,16,50},{37,45,1},{37,45,1},{35,42,0},{63,30,50},{35,42,0},{63,5,72},{39,48,0},{41,45,1},{36,45,0},{63,5,72},{47,42,72},{36,45,0}, +{0,43,80},{47,42,72},{0,43,80},{38,0,72},{38,0,72},{38,0,72},{38,0,72},{37,45,4},{37,45,4},{37,45,4},{37,41,4},{34,42,1},{34,42,1},{40,56,146},{40,51,77},{41,47,120},{40,47,76},{39,55,247},{38,49,77},{39,47,5},{38,45,100},{34,49,248},{35,45,81},{42,52,78},{41,50,4},{42,47,8},{41,47,20},{63,7,243},{38,49,73},{39,47,1},{35,45,80},{63,35,243}, +{35,45,80},{40,52,72},{40,52,72},{40,52,72},{40,46,72},{39,50,54},{39,47,5},{39,47,5},{38,44,8},{34,47,53},{36,44,1},{42,48,4},{42,48,4},{42,48,4},{42,45,4},{56,16,50},{39,47,1},{39,47,1},{37,44,0},{62,33,50},{37,44,0},{63,11,72},{41,50,0},{43,47,1},{38,47,0},{63,11,72},{47,45,72},{38,47,0},{0,45,80},{47,45,72},{0,45,80},{40,0,72}, +{40,0,72},{40,0,72},{40,0,72},{39,47,4},{39,47,4},{39,47,4},{39,43,4},{36,44,1},{36,44,1},{42,58,146},{42,53,77},{43,49,121},{42,48,81},{41,57,247},{40,51,77},{40,49,9},{40,47,100},{36,51,248},{37,47,81},{44,54,78},{43,52,4},{44,49,6},{43,48,17},{63,13,243},{40,51,73},{40,49,5},{37,47,80},{63,38,243},{37,47,80},{42,54,72},{42,54,72},{42,54,72}, +{42,48,72},{41,52,54},{41,48,6},{41,48,6},{40,46,8},{36,49,51},{38,46,1},{44,50,4},{44,50,4},{44,50,4},{44,47,4},{59,16,50},{41,48,2},{41,48,2},{39,46,0},{62,36,50},{39,46,0},{55,32,72},{43,52,0},{44,49,2},{40,49,1},{55,32,72},{46,48,72},{40,49,1},{0,47,80},{46,48,72},{0,47,80},{42,0,72},{42,0,72},{42,0,72},{42,0,72},{41,48,5}, +{41,48,5},{41,48,5},{41,45,4},{38,46,1},{38,46,1},{44,60,146},{44,55,77},{45,51,121},{44,50,81},{43,59,247},{42,53,77},{42,51,9},{42,49,103},{38,53,248},{39,49,76},{46,56,78},{45,54,4},{46,51,6},{45,50,17},{63,19,243},{42,53,73},{42,51,5},{38,49,74},{63,41,243},{38,49,74},{44,56,72},{44,56,72},{44,56,72},{44,50,72},{43,54,54},{43,50,6},{43,50,6}, +{42,48,9},{38,51,51},{40,48,2},{46,52,4},{46,52,4},{46,52,4},{46,49,4},{62,16,50},{43,50,2},{43,50,2},{40,48,1},{62,39,50},{40,48,1},{58,32,72},{45,54,0},{46,51,2},{42,51,1},{58,32,72},{52,48,72},{42,51,1},{0,49,74},{52,48,72},{0,49,74},{44,0,72},{44,0,72},{44,0,72},{44,0,72},{43,50,5},{43,50,5},{43,50,5},{43,47,4},{40,48,1}, +{40,48,1},{46,63,146},{46,57,79},{47,53,115},{46,53,78},{45,61,245},{45,55,77},{45,53,2},{44,51,105},{39,56,243},{42,51,82},{48,59,76},{48,55,9},{48,53,8},{47,53,18},{59,33,243},{44,55,74},{45,53,1},{41,51,80},{55,48,243},{41,51,80},{46,59,74},{46,59,74},{46,59,74},{46,52,75},{45,57,51},{45,53,2},{45,53,2},{44,50,10},{41,53,51},{42,50,1},{48,54,4}, +{48,54,4},{48,54,4},{48,51,4},{63,21,50},{45,53,1},{45,53,1},{43,50,0},{63,42,50},{43,50,0},{63,29,72},{47,56,2},{49,53,1},{44,53,1},{63,29,72},{63,46,72},{44,53,1},{0,51,80},{63,46,72},{0,51,80},{46,0,74},{46,0,74},{46,0,74},{46,0,74},{45,53,1},{45,53,1},{45,53,1},{45,49,1},{42,50,1},{42,50,1},{48,63,152},{48,59,77},{49,55,120}, +{48,55,76},{47,63,245},{47,57,77},{47,55,2},{46,53,105},{41,58,243},{44,53,82},{50,61,76},{49,58,4},{50,55,8},{49,55,20},{62,33,243},{46,57,74},{47,55,1},{43,53,80},{61,48,243},{43,53,80},{48,60,72},{48,60,72},{48,60,72},{48,54,72},{47,59,51},{47,55,2},{47,55,2},{46,52,10},{43,55,51},{44,52,1},{50,56,4},{50,56,4},{50,56,4},{50,53,4},{63,27,50}, +{47,55,1},{47,55,1},{45,52,0},{63,45,50},{45,52,0},{63,35,72},{49,58,0},{51,55,1},{46,55,1},{63,35,72},{63,49,72},{46,55,1},{0,53,80},{63,49,72},{0,53,80},{48,0,72},{48,0,72},{48,0,72},{48,0,72},{47,55,1},{47,55,1},{47,55,1},{47,51,1},{44,52,1},{44,52,1},{51,63,184},{50,61,77},{51,57,120},{50,57,76},{49,63,268},{48,59,78},{49,57,5}, +{48,55,100},{43,60,243},{46,55,82},{52,63,76},{51,60,4},{52,57,8},{51,57,20},{63,37,243},{48,59,74},{49,57,1},{45,55,80},{63,50,243},{45,55,80},{50,62,72},{50,62,72},{50,62,72},{50,56,72},{49,60,56},{49,57,5},{49,57,5},{48,54,8},{45,57,51},{46,54,1},{52,58,4},{52,58,4},{52,58,4},{52,55,4},{63,33,50},{49,57,1},{49,57,1},{47,54,0},{47,56,50}, +{47,54,0},{63,41,72},{51,60,0},{53,57,1},{48,57,0},{63,41,72},{63,52,72},{48,57,0},{0,55,80},{63,52,72},{0,55,80},{50,0,72},{50,0,72},{50,0,72},{50,0,72},{49,57,4},{49,57,4},{49,57,4},{49,53,4},{46,54,1},{46,54,1},{53,63,226},{52,63,77},{53,59,120},{52,59,76},{52,63,300},{50,61,78},{51,59,5},{50,57,100},{45,62,243},{47,57,84},{54,63,84}, +{53,62,4},{54,59,8},{53,59,20},{63,43,243},{49,62,73},{51,59,1},{47,57,80},{63,53,243},{47,57,80},{52,63,76},{52,63,76},{52,63,76},{52,58,72},{51,62,56},{51,59,5},{51,59,5},{50,56,8},{47,59,51},{48,56,1},{54,60,4},{54,60,4},{54,60,4},{54,57,4},{63,39,50},{51,59,1},{51,59,1},{49,56,0},{47,59,50},{49,56,0},{63,47,72},{53,62,0},{55,59,1}, +{50,59,0},{63,47,72},{63,55,72},{50,59,0},{0,57,80},{63,55,72},{0,57,80},{52,0,72},{52,0,72},{52,0,72},{52,0,72},{51,59,4},{51,59,4},{51,59,4},{51,55,4},{48,56,1},{48,56,1},{56,63,314},{55,63,115},{55,62,115},{54,61,76},{54,63,364},{53,63,79},{53,61,3},{52,59,108},{48,63,252},{49,60,79},{57,63,115},{56,63,10},{56,61,11},{56,61,18},{63,50,243}, +{53,63,78},{53,61,2},{47,60,75},{62,57,243},{47,60,75},{54,63,99},{54,63,99},{54,63,99},{54,60,75},{53,63,69},{53,61,2},{53,61,2},{53,58,6},{49,61,51},{50,58,6},{56,63,1},{56,63,1},{56,63,1},{56,59,2},{61,49,50},{53,61,1},{53,61,1},{51,58,2},{63,54,50},{51,58,2},{63,53,74},{56,63,9},{57,61,1},{53,61,1},{63,53,74},{63,58,74},{53,61,1}, +{0,60,74},{63,58,74},{0,60,74},{54,0,74},{54,0,74},{54,0,74},{54,0,74},{53,61,2},{53,61,2},{53,61,2},{53,57,2},{50,59,2},{50,59,2},{57,63,371},{57,63,179},{57,63,115},{56,63,75},{57,63,387},{55,63,123},{55,63,2},{54,61,91},{52,63,286},{51,62,70},{59,63,146},{58,63,59},{58,63,10},{58,63,17},{63,55,221},{57,63,98},{55,63,1},{49,62,66},{63,59,221}, +{49,62,66},{57,63,115},{57,63,115},{57,63,115},{56,62,75},{56,63,93},{55,63,2},{55,63,2},{55,60,6},{51,63,51},{52,60,6},{58,63,10},{58,63,10},{58,63,10},{58,61,2},{63,51,50},{55,63,1},{55,63,1},{53,60,2},{63,57,50},{53,60,2},{63,59,61},{60,63,25},{59,63,0},{55,63,0},{63,59,61},{63,61,61},{55,63,0},{0,62,65},{63,61,61},{0,62,65},{56,0,74}, +{56,0,74},{56,0,74},{56,0,74},{55,63,2},{55,63,2},{55,63,2},{55,59,2},{52,61,2},{52,61,2},{60,63,259},{59,63,190},{59,63,154},{58,63,90},{59,63,270},{58,63,91},{57,63,35},{57,62,22},{56,63,194},{54,63,11},{61,63,70},{60,63,42},{60,63,26},{60,63,2},{63,59,94},{60,63,42},{59,63,13},{53,63,10},{63,61,94},{53,63,10},{59,63,154},{59,63,154},{59,63,154}, +{58,63,90},{58,63,147},{57,63,35},{57,63,35},{57,62,6},{55,63,77},{54,62,6},{60,63,26},{60,63,26},{60,63,26},{60,63,2},{63,57,50},{59,63,13},{59,63,13},{55,62,2},{63,60,50},{55,62,2},{63,62,5},{62,63,4},{62,63,0},{61,63,0},{63,62,5},{62,63,5},{61,63,0},{0,63,9},{62,63,5},{0,63,9},{58,0,74},{58,0,74},{58,0,74},{58,0,74},{57,63,10}, +{57,63,10},{57,63,10},{57,61,2},{54,63,2},{54,63,2},{61,63,162},{61,63,135},{61,63,126},{60,63,90},{61,63,154},{60,63,66},{60,63,50},{59,63,2},{59,63,109},{57,63,10},{62,63,19},{62,63,14},{62,63,10},{62,63,5},{63,62,17},{62,63,12},{62,63,8},{59,63,1},{62,63,17},{59,63,1},{61,63,126},{61,63,126},{61,63,126},{60,63,90},{60,63,98},{60,63,50},{60,63,50}, +{59,63,2},{58,63,62},{57,63,10},{62,63,10},{62,63,10},{62,63,10},{62,63,5},{63,61,13},{62,63,8},{62,63,8},{59,63,1},{63,62,13},{59,63,1},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{60,0,74},{60,0,74},{60,0,74},{60,0,74},{60,63,34},{60,63,34},{60,63,34},{59,63,2},{57,63,10}, +{57,63,10},{0,14,202},{0,10,25},{0,7,1},{0,6,74},{0,10,441},{0,6,282},{0,5,133},{0,4,318},{0,5,477},{0,4,343},{0,14,202},{0,10,25},{0,7,1},{0,6,74},{5,0,441},{0,6,282},{0,5,133},{0,4,318},{10,0,441},{0,4,318},{0,7,0},{0,7,0},{0,7,0},{0,3,1},{0,3,41},{0,3,17},{0,3,17},{0,2,26},{0,2,45},{0,1,30},{0,7,0}, +{0,7,0},{0,7,0},{0,3,1},{2,0,41},{0,3,17},{0,3,17},{0,2,26},{3,0,41},{0,2,26},{7,0,202},{0,10,25},{0,7,1},{0,6,74},{7,0,202},{14,0,202},{0,6,74},{0,5,202},{14,0,202},{0,5,202},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,20,200},{0,14,1},{0,10,25}, +{0,8,41},{0,14,686},{0,9,362},{0,8,141},{0,5,467},{0,6,762},{0,5,503},{0,20,200},{0,14,1},{0,10,25},{0,8,41},{7,0,686},{0,9,362},{0,8,141},{0,5,467},{14,0,686},{0,5,467},{0,13,0},{0,13,0},{0,13,0},{0,6,1},{0,6,145},{0,5,52},{0,5,52},{0,3,89},{0,3,161},{0,3,105},{0,13,0},{0,13,0},{0,13,0},{0,6,1},{3,0,145}, +{0,5,52},{0,5,52},{0,3,89},{6,0,145},{0,3,89},{2,16,200},{0,14,1},{2,9,1},{0,8,41},{2,16,200},{20,0,200},{0,8,41},{0,7,202},{20,0,200},{0,7,202},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{1,24,225},{1,16,27},{1,12,83},{1,11,51},{0,19,724},{0,12,299},{0,10,62}, +{0,8,414},{0,9,875},{0,7,500},{2,22,201},{2,16,6},{2,12,26},{1,11,35},{10,0,723},{0,12,299},{0,10,62},{0,8,414},{14,3,723},{0,8,414},{1,17,25},{1,17,25},{1,17,25},{1,9,25},{0,12,162},{0,8,25},{0,8,25},{0,5,61},{0,5,216},{0,5,97},{2,15,1},{2,15,1},{2,15,1},{2,8,2},{6,0,162},{0,8,25},{0,8,25},{0,5,61},{12,0,162}, +{0,5,61},{5,16,200},{1,16,2},{4,11,1},{0,11,17},{5,16,200},{26,0,200},{0,11,17},{0,9,202},{26,0,200},{0,9,202},{1,0,25},{1,0,25},{1,0,25},{1,0,25},{0,5,1},{0,5,1},{0,5,1},{0,3,1},{0,2,8},{0,2,8},{2,28,313},{2,19,118},{2,14,203},{2,13,130},{0,25,724},{0,16,236},{0,13,6},{0,10,339},{0,11,984},{0,10,508},{4,24,201}, +{4,17,5},{4,14,26},{3,13,35},{13,0,723},{0,16,236},{0,13,6},{0,10,339},{14,6,723},{0,10,339},{2,21,113},{2,21,113},{2,21,113},{2,11,114},{0,18,162},{0,12,2},{0,12,2},{0,7,34},{0,8,280},{0,7,115},{4,17,1},{4,17,1},{4,17,1},{4,10,2},{9,0,162},{0,12,2},{0,12,2},{0,7,34},{18,0,162},{0,7,34},{16,0,200},{3,18,2},{6,13,1}, +{0,13,2},{16,0,200},{32,0,200},{0,13,2},{0,11,202},{32,0,200},{0,11,202},{2,0,113},{2,0,113},{2,0,113},{2,0,113},{0,11,1},{0,11,1},{0,11,1},{0,6,1},{0,5,40},{0,5,40},{4,31,408},{4,22,216},{4,16,317},{3,15,216},{1,30,723},{1,19,216},{1,15,9},{0,12,312},{0,15,1000},{0,12,440},{6,27,200},{6,20,1},{7,16,29},{5,15,36},{16,0,723}, +{0,19,203},{2,15,5},{0,12,296},{32,0,723},{0,12,296},{4,23,209},{4,23,209},{4,23,209},{4,14,208},{1,23,162},{1,16,4},{1,16,4},{1,10,20},{0,11,294},{0,10,89},{6,19,1},{6,19,1},{6,19,1},{6,13,0},{12,1,162},{2,15,1},{2,15,1},{0,10,8},{25,0,162},{0,10,8},{19,1,200},{6,20,1},{8,15,5},{2,15,4},{19,1,200},{39,0,200},{2,15,4}, +{0,13,200},{39,0,200},{0,13,200},{3,0,208},{3,0,208},{3,0,208},{3,0,208},{1,16,0},{1,16,0},{1,16,0},{1,9,1},{0,8,45},{0,8,45},{6,33,408},{6,24,216},{6,18,317},{5,17,213},{3,32,723},{3,21,216},{3,17,2},{2,14,312},{0,18,888},{0,14,293},{8,29,200},{8,22,1},{9,17,26},{8,17,37},{19,0,723},{1,22,200},{3,17,2},{0,14,257},{38,0,723}, +{0,14,257},{6,25,209},{6,25,209},{6,25,209},{5,16,208},{3,25,162},{3,17,1},{3,17,1},{3,12,20},{0,15,228},{0,12,20},{8,21,1},{8,21,1},{8,21,1},{8,15,0},{15,1,162},{3,17,1},{3,17,1},{0,12,4},{31,0,162},{0,12,4},{22,1,200},{8,22,1},{10,17,2},{3,17,1},{22,1,200},{45,0,200},{3,17,1},{0,15,200},{45,0,200},{0,15,200},{5,0,208}, +{5,0,208},{5,0,208},{5,0,208},{3,18,0},{3,18,0},{3,18,0},{3,11,1},{0,11,13},{0,11,13},{8,35,408},{8,26,216},{8,20,317},{7,19,213},{5,34,723},{5,23,216},{5,19,2},{4,16,298},{0,22,804},{0,16,228},{10,31,200},{10,24,1},{11,19,26},{10,19,37},{22,0,723},{3,24,200},{5,19,2},{0,16,227},{44,0,723},{0,16,227},{8,27,209},{8,27,209},{8,27,209}, +{7,18,208},{5,27,162},{5,19,1},{5,19,1},{5,14,20},{0,18,180},{0,15,4},{10,23,1},{10,23,1},{10,23,1},{10,17,1},{15,7,162},{5,19,1},{5,19,1},{2,14,4},{31,3,162},{2,14,4},{25,1,200},{10,24,1},{12,19,2},{5,19,1},{25,1,200},{47,2,200},{5,19,1},{0,17,202},{47,2,200},{0,17,202},{7,0,208},{7,0,208},{7,0,208},{7,0,208},{5,20,0}, +{5,20,0},{5,20,0},{5,13,1},{0,15,0},{0,15,0},{10,37,408},{10,28,216},{10,22,317},{9,21,213},{7,36,723},{7,25,216},{7,21,2},{6,18,298},{0,25,748},{1,19,218},{12,33,200},{12,26,1},{13,21,26},{12,21,37},{25,0,723},{5,26,200},{7,21,2},{0,19,211},{50,0,723},{0,19,211},{10,29,209},{10,29,209},{10,29,209},{9,20,208},{7,29,162},{7,21,1},{7,21,1}, +{7,16,17},{0,21,164},{3,16,3},{12,25,1},{12,25,1},{12,25,1},{12,19,1},{15,13,162},{7,21,1},{7,21,1},{4,16,1},{31,6,162},{4,16,1},{28,1,200},{12,26,1},{14,21,2},{7,21,1},{28,1,200},{47,5,200},{7,21,1},{0,19,202},{47,5,200},{0,19,202},{9,0,208},{9,0,208},{9,0,208},{9,0,208},{7,22,0},{7,22,0},{7,22,0},{7,15,1},{2,17,0}, +{2,17,0},{12,39,404},{12,30,212},{13,24,318},{12,23,215},{9,39,724},{9,27,212},{9,24,7},{8,20,306},{0,28,724},{3,21,210},{14,35,203},{14,28,2},{15,24,22},{14,23,30},{28,1,723},{7,28,203},{10,23,4},{0,21,201},{47,5,723},{0,21,201},{12,32,202},{12,32,202},{12,32,202},{12,22,202},{9,31,165},{9,24,3},{9,24,3},{9,18,14},{2,24,164},{4,19,6},{14,28,1}, +{14,28,1},{14,28,1},{14,21,1},{24,1,162},{10,23,0},{10,23,0},{6,18,5},{49,0,162},{6,18,5},{23,17,200},{14,28,1},{16,24,4},{10,23,4},{23,17,200},{63,0,200},{10,23,4},{0,21,200},{63,0,200},{0,21,200},{12,0,202},{12,0,202},{12,0,202},{12,0,202},{9,25,1},{9,25,1},{9,25,1},{9,17,2},{4,19,2},{4,19,2},{14,41,404},{14,32,215},{15,26,318}, +{14,25,215},{11,41,724},{11,29,212},{11,26,7},{10,22,306},{2,30,724},{5,23,210},{16,37,200},{16,30,1},{17,26,24},{16,25,40},{31,1,723},{9,30,203},{12,25,4},{2,23,201},{47,8,723},{2,23,201},{14,34,202},{14,34,202},{14,34,202},{14,24,202},{11,33,163},{11,26,3},{11,26,3},{11,20,14},{4,26,164},{6,21,6},{16,30,0},{16,30,0},{16,30,0},{16,23,0},{27,1,162}, +{12,25,0},{12,25,0},{8,20,5},{55,0,162},{8,20,5},{34,1,200},{16,30,1},{18,26,4},{12,25,4},{34,1,200},{63,3,200},{12,25,4},{0,23,200},{63,3,200},{0,23,200},{14,0,202},{14,0,202},{14,0,202},{14,0,202},{11,27,1},{11,27,1},{11,27,1},{11,19,2},{6,21,2},{6,21,2},{16,43,408},{16,34,216},{16,28,312},{16,27,221},{13,43,724},{13,31,212},{13,28,7}, +{12,24,306},{4,32,724},{7,25,210},{18,39,200},{18,32,1},{19,28,24},{17,27,36},{34,0,723},{11,32,201},{14,27,4},{4,25,201},{47,11,723},{4,25,201},{16,35,209},{16,35,209},{16,35,209},{16,26,208},{13,35,163},{13,28,3},{13,28,3},{13,22,14},{6,28,164},{8,23,6},{18,31,1},{18,31,1},{18,31,1},{18,25,0},{30,1,162},{14,27,0},{14,27,0},{10,22,5},{61,0,162}, +{10,22,5},{37,1,200},{18,32,1},{20,28,4},{14,27,4},{37,1,200},{63,6,200},{14,27,4},{0,25,200},{63,6,200},{0,25,200},{16,0,208},{16,0,208},{16,0,208},{16,0,208},{13,29,1},{13,29,1},{13,29,1},{13,21,2},{8,23,2},{8,23,2},{18,45,408},{18,36,216},{18,30,312},{17,29,216},{15,45,724},{15,33,210},{15,30,7},{14,26,306},{6,34,724},{9,27,210},{20,41,200}, +{20,34,1},{21,30,24},{19,29,36},{37,0,723},{13,34,201},{16,29,5},{6,27,201},{47,14,723},{6,27,201},{18,37,209},{18,37,209},{18,37,209},{18,28,208},{15,37,163},{15,30,3},{15,30,3},{15,24,14},{8,30,164},{10,25,6},{20,33,1},{20,33,1},{20,33,1},{20,27,0},{33,1,162},{16,29,1},{16,29,1},{12,24,5},{63,2,162},{12,24,5},{40,1,200},{20,34,1},{22,30,4}, +{16,29,4},{40,1,200},{63,9,200},{16,29,4},{0,27,200},{63,9,200},{0,27,200},{17,0,208},{17,0,208},{17,0,208},{17,0,208},{15,31,1},{15,31,1},{15,31,1},{15,23,2},{10,25,2},{10,25,2},{20,47,404},{20,38,212},{21,32,318},{20,31,215},{17,47,724},{17,35,212},{17,32,7},{16,29,308},{8,36,724},{11,29,210},{22,44,201},{22,36,2},{23,32,22},{22,31,35},{40,1,723}, +{15,36,206},{17,32,6},{9,29,202},{63,9,723},{9,29,202},{20,40,202},{20,40,202},{20,40,202},{20,30,203},{17,39,165},{17,32,3},{17,32,3},{17,26,19},{10,32,164},{13,27,2},{22,36,1},{22,36,1},{22,36,1},{22,29,2},{31,12,162},{18,31,1},{18,31,1},{13,27,1},{62,6,162},{13,27,1},{44,0,200},{22,36,1},{24,32,4},{16,32,4},{44,0,200},{62,13,200},{16,32,4}, +{0,29,202},{62,13,200},{0,29,202},{20,0,202},{20,0,202},{20,0,202},{20,0,202},{17,33,1},{17,33,1},{17,33,1},{17,25,1},{13,27,1},{13,27,1},{22,49,404},{22,40,212},{23,34,318},{22,33,215},{19,49,724},{19,37,212},{19,34,7},{18,31,308},{10,38,724},{13,31,210},{24,46,201},{24,38,2},{25,34,22},{24,33,30},{43,1,723},{17,38,203},{20,33,4},{11,31,202},{63,12,723}, +{11,31,202},{22,42,202},{22,42,202},{22,42,202},{22,32,202},{19,41,165},{19,34,3},{19,34,3},{19,28,19},{12,34,164},{15,29,2},{24,38,1},{24,38,1},{24,38,1},{24,31,2},{31,18,162},{20,33,0},{20,33,0},{15,29,1},{62,9,162},{15,29,1},{47,0,200},{24,38,1},{26,34,4},{20,33,4},{47,0,200},{46,24,200},{20,33,4},{0,31,202},{46,24,200},{0,31,202},{22,0,202}, +{22,0,202},{22,0,202},{22,0,202},{19,35,1},{19,35,1},{19,35,1},{19,27,1},{15,29,1},{15,29,1},{24,51,404},{24,42,212},{25,36,318},{24,35,215},{21,51,724},{21,39,212},{21,36,7},{20,32,306},{12,40,724},{15,33,213},{26,47,203},{26,40,2},{27,36,22},{26,35,30},{46,1,723},{19,40,203},{22,35,4},{13,33,200},{63,15,723},{13,33,200},{24,44,202},{24,44,202},{24,44,202}, +{24,34,202},{21,43,165},{21,36,3},{21,36,3},{21,30,19},{14,36,164},{17,31,2},{26,40,1},{26,40,1},{26,40,1},{26,33,1},{34,17,162},{22,35,0},{22,35,0},{17,31,1},{62,12,162},{17,31,1},{49,1,200},{26,40,1},{28,36,4},{22,35,4},{49,1,200},{46,27,200},{22,35,4},{0,33,200},{46,27,200},{0,33,200},{24,0,202},{24,0,202},{24,0,202},{24,0,202},{21,37,1}, +{21,37,1},{21,37,1},{21,29,1},{17,31,1},{17,31,1},{26,53,404},{26,44,212},{27,38,318},{26,37,215},{23,53,724},{23,41,212},{23,38,7},{22,34,306},{14,42,724},{17,35,210},{28,49,203},{28,42,2},{29,38,22},{28,37,30},{47,5,723},{21,42,203},{24,37,4},{15,35,200},{63,18,723},{15,35,200},{26,46,202},{26,46,202},{26,46,202},{26,36,202},{23,45,165},{23,38,3},{23,38,3}, +{23,32,14},{16,38,164},{18,33,6},{28,42,1},{28,42,1},{28,42,1},{28,35,1},{37,17,162},{24,37,0},{24,37,0},{20,32,5},{62,15,162},{20,32,5},{52,1,200},{28,42,1},{30,38,4},{24,37,4},{52,1,200},{46,30,200},{24,37,4},{0,35,200},{46,30,200},{0,35,200},{26,0,202},{26,0,202},{26,0,202},{26,0,202},{23,39,1},{23,39,1},{23,39,1},{23,31,1},{18,33,2}, +{18,33,2},{28,56,400},{28,46,216},{29,40,314},{28,39,213},{26,53,728},{25,44,215},{25,40,9},{25,37,306},{17,44,728},{19,37,210},{31,50,204},{30,45,4},{31,40,21},{30,39,29},{52,1,723},{24,44,201},{25,40,5},{17,37,202},{46,30,723},{17,37,202},{28,49,200},{28,49,200},{28,49,200},{28,38,201},{26,46,166},{26,39,6},{26,39,6},{25,34,17},{18,40,163},{21,35,2},{31,43,4}, +{31,43,4},{31,43,4},{31,37,4},{49,0,162},{26,39,2},{26,39,2},{21,35,1},{62,18,162},{21,35,1},{56,0,200},{30,45,0},{32,40,2},{25,40,1},{56,0,200},{62,25,200},{25,40,1},{0,37,202},{62,25,200},{0,37,202},{28,0,200},{28,0,200},{28,0,200},{28,0,200},{26,39,5},{26,39,5},{26,39,5},{25,33,4},{21,35,1},{21,35,1},{30,58,400},{30,48,217},{31,42,314}, +{30,41,213},{28,55,728},{27,46,215},{27,42,9},{27,39,306},{19,46,728},{21,39,210},{32,54,201},{32,47,3},{33,42,19},{32,41,35},{55,1,723},{26,46,201},{27,42,5},{19,39,202},{47,32,723},{19,39,202},{30,51,200},{30,51,200},{30,51,200},{30,40,201},{28,48,166},{28,41,6},{28,41,6},{27,36,17},{20,42,163},{23,37,2},{32,46,2},{32,46,2},{32,46,2},{32,39,2},{52,0,162}, +{28,41,2},{28,41,2},{23,37,1},{62,21,162},{23,37,1},{59,0,200},{32,47,2},{34,42,2},{27,42,1},{59,0,200},{62,28,200},{27,42,1},{0,39,202},{62,28,200},{0,39,202},{30,0,200},{30,0,200},{30,0,200},{30,0,200},{28,41,5},{28,41,5},{28,41,5},{27,35,4},{23,37,1},{23,37,1},{32,60,402},{32,50,212},{32,44,324},{32,43,215},{30,57,728},{29,47,216},{29,44,9}, +{29,41,306},{20,48,724},{23,41,210},{34,56,201},{34,48,2},{35,44,19},{34,43,35},{58,1,723},{28,48,201},{29,44,5},{21,41,202},{53,32,723},{21,41,202},{32,52,202},{32,52,202},{32,52,202},{32,42,203},{30,50,166},{30,43,6},{30,43,6},{29,38,17},{22,44,163},{25,39,2},{34,48,1},{34,48,1},{34,48,1},{34,41,2},{55,0,162},{30,43,2},{30,43,2},{25,39,1},{62,24,162}, +{25,39,1},{62,0,200},{34,48,1},{36,44,2},{29,44,1},{62,0,200},{62,31,200},{29,44,1},{0,41,202},{62,31,200},{0,41,202},{32,0,202},{32,0,202},{32,0,202},{32,0,202},{30,43,5},{30,43,5},{30,43,5},{29,37,4},{25,39,1},{25,39,1},{34,62,402},{34,52,212},{34,46,324},{34,45,215},{31,61,728},{31,49,210},{31,46,9},{31,43,306},{22,50,724},{25,43,210},{36,58,201}, +{36,50,2},{37,46,19},{36,45,35},{61,1,723},{30,50,201},{31,46,5},{23,43,202},{59,32,723},{23,43,202},{34,54,202},{34,54,202},{34,54,202},{34,44,203},{31,54,166},{31,46,8},{31,46,8},{31,40,17},{24,46,163},{27,41,2},{36,50,1},{36,50,1},{36,50,1},{36,43,2},{58,0,162},{32,45,1},{32,45,1},{27,41,1},{62,27,162},{27,41,1},{63,4,200},{36,50,1},{38,46,2}, +{31,46,1},{63,4,200},{62,34,200},{31,46,1},{0,43,202},{62,34,200},{0,43,202},{34,0,202},{34,0,202},{34,0,202},{34,0,202},{31,47,5},{31,47,5},{31,47,5},{31,39,4},{27,41,1},{27,41,1},{36,63,408},{36,54,216},{37,48,314},{36,47,217},{34,62,727},{33,52,215},{33,48,9},{32,45,308},{24,53,723},{28,45,217},{39,58,206},{38,53,4},{39,48,21},{38,47,36},{63,4,723}, +{32,52,201},{33,48,5},{26,45,208},{62,34,723},{26,45,208},{36,57,200},{36,57,200},{36,57,200},{36,47,201},{34,54,166},{33,48,8},{33,48,8},{33,43,20},{26,48,163},{29,43,1},{39,51,4},{39,51,4},{39,51,4},{39,45,4},{53,16,162},{35,47,4},{35,47,4},{30,43,0},{63,30,162},{30,43,0},{63,11,200},{38,53,0},{40,48,2},{33,48,1},{63,11,200},{47,45,200},{33,48,1}, +{0,45,208},{47,45,200},{0,45,208},{36,0,200},{36,0,200},{36,0,200},{36,0,200},{34,47,5},{34,47,5},{34,47,5},{34,41,4},{29,43,1},{29,43,1},{39,63,440},{38,56,216},{39,50,314},{38,49,213},{36,63,728},{35,54,215},{35,50,9},{34,47,308},{26,55,723},{30,47,217},{41,60,206},{40,55,4},{41,50,21},{40,49,29},{63,10,723},{34,54,201},{35,50,5},{28,47,208},{62,37,723}, +{28,47,208},{38,59,200},{38,59,200},{38,59,200},{38,48,201},{36,56,166},{36,49,6},{36,49,6},{35,45,20},{28,50,163},{31,45,1},{41,53,4},{41,53,4},{41,53,4},{41,47,4},{56,16,162},{36,49,2},{36,49,2},{32,45,0},{62,33,162},{32,45,0},{55,32,200},{40,55,0},{42,50,2},{35,50,1},{55,32,200},{46,48,200},{35,50,1},{0,47,208},{46,48,200},{0,47,208},{38,0,200}, +{38,0,200},{38,0,200},{38,0,200},{36,49,5},{36,49,5},{36,49,5},{36,43,4},{31,45,1},{31,45,1},{41,63,482},{40,58,216},{41,52,314},{40,51,213},{39,63,760},{37,56,215},{37,52,9},{37,49,306},{28,57,723},{31,49,215},{43,62,206},{42,57,4},{43,52,21},{42,51,29},{63,16,723},{36,56,201},{37,52,5},{29,49,203},{62,40,723},{29,49,203},{40,61,200},{40,61,200},{40,61,200}, +{40,50,201},{38,58,166},{38,51,6},{38,51,6},{37,47,20},{30,52,163},{33,47,1},{43,55,4},{43,55,4},{43,55,4},{43,49,4},{59,16,162},{38,51,2},{38,51,2},{34,47,0},{62,36,162},{34,47,0},{58,32,200},{42,57,0},{44,52,2},{37,52,1},{58,32,200},{52,48,200},{37,52,1},{0,49,202},{52,48,200},{0,49,202},{40,0,200},{40,0,200},{40,0,200},{40,0,200},{38,51,5}, +{38,51,5},{38,51,5},{38,45,4},{33,47,1},{33,47,1},{43,63,530},{42,60,216},{43,54,314},{42,53,213},{41,63,799},{39,58,215},{39,54,9},{39,51,306},{30,59,723},{33,51,210},{45,63,212},{44,59,4},{45,54,21},{44,53,29},{63,22,723},{38,58,201},{39,54,5},{31,51,203},{62,43,723},{31,51,203},{42,63,200},{42,63,200},{42,63,200},{42,52,201},{40,60,166},{40,53,6},{40,53,6}, +{39,48,17},{32,54,163},{35,49,2},{45,57,4},{45,57,4},{45,57,4},{45,51,4},{62,16,162},{40,53,2},{40,53,2},{35,49,1},{62,39,162},{35,49,1},{61,32,200},{44,59,0},{46,54,2},{39,54,1},{61,32,200},{58,48,200},{39,54,1},{0,51,202},{58,48,200},{0,51,202},{42,0,200},{42,0,200},{42,0,200},{42,0,200},{40,53,5},{40,53,5},{40,53,5},{40,47,4},{35,49,1}, +{35,49,1},{46,63,626},{44,63,222},{45,57,315},{44,56,211},{44,63,869},{41,60,213},{42,56,2},{41,53,298},{32,61,723},{36,53,217},{48,63,244},{47,60,5},{47,56,26},{46,55,34},{61,32,723},{40,60,202},{42,56,1},{34,53,208},{58,48,723},{34,53,208},{44,63,218},{44,63,218},{44,63,218},{44,55,203},{42,63,163},{42,56,2},{42,56,2},{41,51,25},{35,56,163},{37,51,1},{47,60,1}, +{47,60,1},{47,60,1},{47,53,2},{63,21,162},{42,56,1},{42,56,1},{38,51,0},{63,42,162},{38,51,0},{63,35,200},{46,61,2},{49,56,1},{41,56,1},{63,35,200},{63,49,200},{41,56,1},{0,53,208},{63,49,200},{0,53,208},{44,0,202},{44,0,202},{44,0,202},{44,0,202},{42,56,1},{42,56,1},{42,56,1},{42,49,1},{37,51,1},{37,51,1},{48,63,728},{47,63,238},{47,59,315}, +{46,58,211},{46,63,937},{43,62,213},{44,58,2},{43,55,298},{34,63,723},{38,55,217},{50,63,286},{48,63,4},{49,58,20},{48,57,36},{63,34,723},{42,62,202},{44,58,1},{36,55,208},{62,49,723},{36,55,208},{47,63,234},{47,63,234},{47,63,234},{46,57,203},{44,63,181},{44,58,2},{44,58,2},{43,53,25},{37,58,163},{39,53,1},{49,61,4},{49,61,4},{49,61,4},{49,55,4},{63,27,162}, +{44,58,1},{44,58,1},{40,53,0},{63,45,162},{40,53,0},{63,41,200},{48,63,0},{51,58,1},{43,58,1},{63,41,200},{63,52,200},{43,58,1},{0,55,208},{63,52,200},{0,55,208},{46,0,202},{46,0,202},{46,0,202},{46,0,202},{44,58,1},{44,58,1},{44,58,1},{44,51,1},{39,53,1},{39,53,1},{50,63,866},{49,63,317},{49,61,312},{48,60,216},{48,63,1048},{46,63,219},{46,60,2}, +{45,57,298},{38,63,748},{40,57,217},{53,63,350},{51,63,20},{51,60,20},{50,59,36},{63,40,723},{46,63,218},{46,60,1},{38,57,208},{62,52,723},{38,57,208},{48,63,264},{48,63,264},{48,63,264},{48,59,201},{47,63,205},{46,60,2},{46,60,2},{45,55,25},{39,60,163},{41,55,1},{51,63,4},{51,63,4},{51,63,4},{51,57,4},{63,33,162},{46,60,1},{46,60,1},{42,55,0},{47,56,162}, +{42,55,0},{63,47,200},{52,63,13},{53,60,1},{45,60,1},{63,47,200},{63,55,200},{45,60,1},{0,57,208},{63,55,200},{0,57,208},{48,0,200},{48,0,200},{48,0,200},{48,0,200},{46,60,1},{46,60,1},{46,60,1},{46,53,1},{41,55,1},{41,55,1},{53,63,1026},{51,63,440},{51,63,312},{50,62,216},{51,63,1144},{48,63,280},{48,62,5},{47,59,298},{43,63,827},{42,59,217},{55,63,428}, +{54,63,100},{53,62,20},{52,61,36},{63,46,723},{49,63,266},{48,62,1},{40,59,208},{62,55,723},{40,59,208},{51,63,296},{51,63,296},{51,63,296},{50,61,201},{49,63,248},{48,62,5},{48,62,5},{47,57,25},{41,62,163},{43,57,1},{53,63,13},{53,63,13},{53,63,13},{53,59,4},{63,39,162},{48,62,1},{48,62,1},{44,57,0},{47,59,162},{44,57,0},{63,53,200},{55,63,53},{55,62,1}, +{47,62,1},{63,53,200},{63,58,200},{47,62,1},{0,59,208},{63,58,200},{0,59,208},{50,0,200},{50,0,200},{50,0,200},{50,0,200},{48,62,4},{48,62,4},{48,62,4},{48,55,4},{43,57,1},{43,57,1},{54,63,955},{54,63,523},{53,63,362},{52,63,202},{54,63,1027},{51,63,283},{50,63,14},{49,60,198},{47,63,735},{44,61,118},{57,63,338},{56,63,141},{56,63,41},{55,63,17},{63,51,546}, +{53,63,233},{51,63,2},{43,61,113},{63,57,546},{43,61,113},{53,63,362},{53,63,362},{53,63,362},{52,63,202},{51,63,315},{50,63,14},{50,63,14},{49,59,19},{44,63,171},{46,59,3},{56,63,41},{56,63,41},{56,63,41},{55,61,2},{61,49,162},{51,63,2},{51,63,2},{47,59,1},{63,54,162},{47,59,1},{63,57,113},{58,63,50},{57,63,1},{52,63,0},{63,57,113},{63,60,113},{52,63,0}, +{0,61,113},{63,60,113},{0,61,113},{52,0,202},{52,0,202},{52,0,202},{52,0,202},{50,63,5},{50,63,5},{50,63,5},{50,57,2},{45,60,0},{45,60,0},{57,63,779},{56,63,542},{56,63,442},{54,63,227},{56,63,830},{54,63,251},{53,63,78},{51,62,70},{51,63,587},{47,62,25},{59,63,218},{58,63,123},{58,63,74},{57,63,2},{63,55,333},{56,63,145},{55,63,25},{47,62,25},{63,59,333}, +{47,62,25},{56,63,442},{56,63,442},{56,63,442},{54,63,227},{54,63,371},{53,63,78},{53,63,78},{51,61,19},{48,63,219},{47,61,6},{58,63,74},{58,63,74},{58,63,74},{57,63,2},{63,51,162},{55,63,25},{55,63,25},{48,61,2},{63,57,162},{48,61,2},{63,60,25},{61,63,10},{60,63,1},{58,63,0},{63,60,25},{62,62,25},{58,63,0},{0,62,25},{62,62,25},{0,62,25},{54,0,202}, +{54,0,202},{54,0,202},{54,0,202},{52,63,26},{52,63,26},{52,63,26},{52,59,2},{47,62,0},{47,62,0},{59,63,684},{57,63,538},{57,63,474},{57,63,282},{57,63,682},{56,63,285},{55,63,171},{53,63,18},{54,63,482},{49,63,5},{60,63,153},{60,63,105},{60,63,89},{59,63,37},{63,59,193},{58,63,107},{58,63,58},{50,63,1},{63,61,193},{50,63,1},{57,63,474},{57,63,474},{57,63,474}, +{57,63,282},{57,63,426},{55,63,171},{55,63,171},{53,63,18},{52,63,278},{49,63,5},{60,63,89},{60,63,89},{60,63,89},{59,63,37},{63,57,145},{58,63,58},{58,63,58},{50,63,1},{63,60,145},{50,63,1},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{56,0,202},{56,0,202},{56,0,202},{56,0,202},{54,63,50}, +{54,63,50},{54,63,50},{54,61,2},{49,63,5},{49,63,5},{60,63,426},{59,63,375},{59,63,339},{58,63,251},{59,63,415},{57,63,202},{57,63,138},{56,63,2},{55,63,295},{53,63,29},{62,63,43},{62,63,38},{62,63,34},{61,63,10},{63,61,54},{61,63,27},{60,63,17},{56,63,1},{63,62,54},{56,63,1},{59,63,339},{59,63,339},{59,63,339},{58,63,251},{59,63,294},{57,63,138},{57,63,138}, +{56,63,2},{55,63,174},{53,63,29},{62,63,34},{62,63,34},{62,63,34},{61,63,10},{63,60,41},{60,63,17},{60,63,17},{56,63,1},{62,62,41},{56,63,1},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{58,0,202},{58,0,202},{58,0,202},{58,0,202},{57,63,74},{57,63,74},{57,63,74},{56,63,2},{53,63,29}, +{53,63,29},{0,20,421},{0,14,50},{0,10,4},{0,9,157},{0,14,925},{0,9,589},{0,8,264},{0,6,701},{0,6,1005},{0,5,738},{0,20,421},{0,14,50},{0,10,4},{0,9,157},{7,0,925},{0,9,589},{0,8,264},{0,6,701},{14,0,925},{0,6,701},{0,10,0},{0,10,0},{0,10,0},{0,5,0},{0,5,85},{0,4,34},{0,4,34},{0,2,50},{0,2,93},{0,2,54},{0,10,0}, +{0,10,0},{0,10,0},{0,5,0},{2,1,85},{0,4,34},{0,4,34},{0,2,50},{5,0,85},{0,2,50},{10,1,421},{0,14,50},{0,10,4},{0,9,157},{10,1,421},{20,0,421},{0,9,157},{0,7,421},{20,0,421},{0,7,421},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,26,421},{0,18,13},{0,13,20}, +{0,11,100},{0,18,1261},{0,11,701},{0,10,294},{0,7,857},{0,8,1382},{0,7,938},{0,26,421},{0,18,13},{0,13,20},{0,11,100},{9,0,1261},{0,11,701},{0,10,294},{0,7,857},{18,0,1261},{0,7,857},{0,16,0},{0,16,0},{0,16,0},{0,8,0},{0,8,221},{0,6,89},{0,6,89},{0,4,125},{0,4,246},{0,4,150},{0,16,0},{0,16,0},{0,16,0},{0,8,0},{4,0,221}, +{0,6,89},{0,6,89},{0,4,125},{8,0,221},{0,4,125},{13,1,421},{0,18,13},{2,12,4},{0,11,100},{13,1,421},{26,0,421},{0,11,100},{0,9,421},{26,0,421},{0,9,421},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,32,430},{0,22,10},{1,15,70},{0,13,70},{0,22,1517},{0,14,730},{0,13,257}, +{0,9,974},{0,11,1713},{0,8,1109},{1,30,422},{1,21,5},{2,15,45},{0,13,70},{11,1,1514},{0,14,730},{0,13,257},{0,9,974},{15,4,1514},{0,9,974},{0,22,9},{0,22,9},{0,22,9},{0,11,9},{0,12,338},{0,9,106},{0,9,106},{0,5,181},{0,5,392},{0,5,217},{1,20,1},{1,20,1},{1,20,1},{1,10,2},{6,0,338},{0,9,106},{0,9,106},{0,5,181},{12,0,338}, +{0,5,181},{16,0,421},{0,22,1},{4,14,4},{0,13,61},{16,0,421},{32,0,421},{0,13,61},{0,11,421},{32,0,421},{0,11,421},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,2,1},{0,2,1},{0,2,1},{0,1,1},{0,1,2},{0,1,2},{1,36,486},{1,24,69},{2,17,163},{1,16,115},{0,28,1517},{0,18,614},{0,15,126},{0,11,857},{0,14,1841},{0,11,1053},{3,32,422}, +{3,23,5},{3,17,46},{2,15,70},{14,1,1514},{0,18,614},{0,15,126},{0,11,857},{15,7,1514},{0,11,857},{1,26,65},{1,26,65},{1,26,65},{1,13,66},{0,18,338},{0,13,45},{0,13,45},{0,8,125},{0,8,456},{0,7,211},{3,22,1},{3,22,1},{3,22,1},{3,12,2},{9,0,338},{0,13,45},{0,13,45},{0,8,125},{18,0,338},{0,8,125},{19,0,421},{2,24,1},{6,16,1}, +{0,16,37},{19,0,421},{38,0,421},{0,16,37},{0,13,421},{38,0,421},{0,13,421},{1,0,65},{1,0,65},{1,0,65},{1,0,65},{0,8,1},{0,8,1},{0,8,1},{0,4,1},{0,3,25},{0,3,25},{3,39,629},{3,27,213},{3,20,365},{2,18,237},{0,35,1514},{0,22,506},{0,18,24},{0,14,750},{0,16,2003},{0,13,1050},{5,35,421},{5,25,2},{6,19,45},{5,17,72},{17,1,1514}, +{0,22,506},{0,18,24},{0,14,750},{35,0,1514},{0,14,750},{3,28,209},{3,28,209},{3,28,209},{2,16,208},{0,25,338},{0,17,9},{0,17,9},{0,10,72},{0,11,566},{0,10,241},{5,24,1},{5,24,1},{5,24,1},{5,15,0},{12,1,338},{0,17,9},{0,17,9},{0,10,72},{25,0,338},{0,10,72},{22,1,421},{4,26,1},{8,18,5},{0,18,8},{22,1,421},{45,0,421},{0,18,8}, +{0,15,421},{45,0,421},{0,15,421},{2,0,208},{2,0,208},{2,0,208},{2,0,208},{0,15,0},{0,15,0},{0,15,0},{0,8,1},{0,6,80},{0,6,80},{4,43,821},{4,30,418},{5,22,621},{3,20,420},{0,41,1514},{0,25,450},{0,20,5},{0,16,670},{0,19,2187},{0,15,1109},{7,37,421},{7,27,2},{8,21,45},{7,19,72},{20,1,1514},{0,25,450},{0,20,5},{0,16,670},{41,0,1514}, +{0,16,670},{4,32,400},{4,32,400},{4,32,400},{4,18,400},{0,31,338},{0,20,1},{0,20,1},{0,13,40},{0,14,694},{0,11,297},{7,26,1},{7,26,1},{7,26,1},{7,17,1},{15,1,338},{0,20,1},{0,20,1},{0,13,40},{31,0,338},{0,13,40},{25,1,421},{6,28,1},{10,20,5},{0,20,4},{25,1,421},{51,0,421},{0,20,4},{0,17,421},{51,0,421},{0,17,421},{3,0,400}, +{3,0,400},{3,0,400},{3,0,400},{0,21,0},{0,21,0},{0,21,0},{0,11,1},{0,8,149},{0,8,149},{5,47,846},{6,32,451},{6,23,662},{5,22,445},{2,43,1515},{1,28,446},{2,22,6},{0,18,638},{0,22,2046},{0,18,878},{9,39,421},{9,29,2},{10,23,45},{9,21,72},{23,1,1514},{0,28,426},{2,22,5},{0,18,589},{47,0,1514},{0,18,589},{5,36,425},{5,36,425},{5,36,425}, +{5,21,426},{2,32,341},{2,22,2},{2,22,2},{1,15,38},{0,16,606},{0,14,158},{9,28,1},{9,28,1},{9,28,1},{9,19,1},{15,7,338},{2,22,1},{2,22,1},{0,15,20},{31,3,338},{0,15,20},{28,1,421},{8,30,1},{12,22,5},{2,22,4},{28,1,421},{57,0,421},{2,22,4},{0,19,421},{57,0,421},{0,19,421},{5,0,425},{5,0,425},{5,0,425},{5,0,425},{2,23,1}, +{2,23,1},{2,23,1},{2,13,2},{0,12,97},{0,12,97},{7,49,846},{8,33,450},{8,25,662},{7,24,445},{4,45,1515},{3,30,446},{4,24,6},{2,20,638},{0,25,1886},{0,19,682},{11,41,421},{11,31,2},{12,25,45},{11,23,72},{26,1,1514},{1,31,421},{4,24,5},{0,20,545},{53,0,1514},{0,20,545},{7,38,425},{7,38,425},{7,38,425},{7,23,426},{4,34,341},{4,24,2},{4,24,2}, +{4,16,42},{0,20,500},{0,16,62},{11,30,1},{11,30,1},{11,30,1},{11,21,1},{15,13,338},{4,24,1},{4,24,1},{0,17,5},{31,6,338},{0,17,5},{31,1,421},{10,32,1},{14,24,5},{4,24,4},{31,1,421},{63,0,421},{4,24,4},{0,21,421},{63,0,421},{0,21,421},{7,0,425},{7,0,425},{7,0,425},{7,0,425},{4,25,1},{4,25,1},{4,25,1},{4,15,2},{0,16,37}, +{0,16,37},{10,49,846},{10,36,445},{11,28,650},{10,26,450},{6,47,1517},{5,32,446},{6,27,3},{4,22,646},{0,29,1751},{0,22,522},{13,44,422},{13,34,3},{14,27,46},{13,26,74},{30,0,1514},{3,33,422},{6,27,3},{0,22,497},{46,7,1514},{0,22,497},{10,39,425},{10,39,425},{10,39,425},{10,24,426},{6,37,338},{6,27,2},{6,27,2},{6,19,40},{0,24,410},{0,19,9},{13,33,1}, +{13,33,1},{13,33,1},{13,23,1},{24,1,338},{7,26,0},{7,26,0},{1,19,5},{49,0,338},{1,19,5},{35,0,421},{12,34,1},{16,27,1},{5,27,1},{35,0,421},{63,3,421},{5,27,1},{0,23,421},{63,3,421},{0,23,421},{10,0,425},{10,0,425},{10,0,425},{10,0,425},{6,28,0},{6,28,0},{6,28,0},{6,17,1},{0,19,5},{0,19,5},{12,51,846},{12,38,445},{13,30,650}, +{12,28,450},{8,49,1517},{7,34,446},{8,29,3},{6,24,646},{0,32,1647},{0,25,458},{15,46,422},{15,36,3},{16,29,42},{15,28,74},{32,1,1514},{5,35,422},{8,29,3},{0,25,457},{46,10,1514},{0,25,457},{12,41,425},{12,41,425},{12,41,425},{12,26,426},{8,39,338},{8,29,2},{8,29,2},{8,21,40},{0,27,362},{1,22,5},{15,35,1},{15,35,1},{15,35,1},{15,25,1},{27,1,338}, +{9,28,0},{9,28,0},{3,21,5},{55,0,338},{3,21,5},{38,0,421},{14,36,1},{18,29,1},{7,29,1},{38,0,421},{63,6,421},{7,29,1},{0,25,421},{63,6,421},{0,25,421},{12,0,425},{12,0,425},{12,0,425},{12,0,425},{8,30,0},{8,30,0},{8,30,0},{8,19,1},{1,22,1},{1,22,1},{14,53,846},{14,40,445},{15,32,661},{14,30,450},{10,51,1517},{9,36,446},{10,31,3}, +{8,26,646},{0,35,1575},{0,27,446},{17,47,421},{17,37,2},{18,31,42},{17,30,73},{35,1,1514},{7,37,422},{10,31,3},{0,27,430},{46,13,1514},{0,27,430},{14,43,425},{14,43,425},{14,43,425},{14,28,426},{10,41,338},{10,31,2},{10,31,2},{10,23,40},{0,31,341},{3,24,5},{17,36,1},{17,36,1},{17,36,1},{17,27,0},{30,1,338},{11,30,0},{11,30,0},{5,23,5},{61,0,338}, +{5,23,5},{41,0,421},{16,38,1},{20,31,1},{9,31,1},{41,0,421},{63,9,421},{9,31,1},{0,27,421},{63,9,421},{0,27,421},{14,0,425},{14,0,425},{14,0,425},{14,0,425},{10,31,1},{10,31,1},{10,31,1},{10,21,1},{3,24,1},{3,24,1},{16,55,854},{16,42,451},{16,33,662},{15,32,446},{12,53,1517},{11,38,446},{12,32,9},{10,28,646},{0,39,1533},{2,29,446},{19,49,421}, +{19,39,2},{20,33,45},{18,31,81},{38,1,1514},{9,39,422},{13,32,6},{0,29,422},{45,16,1514},{0,29,422},{15,47,433},{15,47,433},{15,47,433},{15,31,433},{12,43,338},{12,32,5},{12,32,5},{12,25,40},{1,33,339},{5,26,5},{19,38,1},{19,38,1},{19,38,1},{19,29,0},{33,1,338},{13,32,2},{13,32,2},{7,25,5},{63,2,338},{7,25,5},{44,0,421},{18,40,1},{22,32,5}, +{12,32,5},{44,0,421},{63,12,421},{12,32,5},{0,29,421},{63,12,421},{0,29,421},{15,0,433},{15,0,433},{15,0,433},{15,0,433},{12,33,1},{12,33,1},{12,33,1},{12,23,1},{5,26,1},{5,26,1},{18,58,846},{18,44,445},{19,36,650},{18,34,450},{14,56,1515},{14,40,447},{14,35,7},{13,31,646},{0,42,1515},{5,31,445},{21,52,422},{21,42,2},{22,35,46},{21,34,74},{42,0,1514}, +{12,41,422},{14,35,6},{2,31,425},{62,11,1514},{2,31,425},{18,47,425},{18,47,425},{18,47,425},{18,32,426},{14,46,339},{14,35,6},{14,35,6},{14,27,35},{4,35,341},{8,28,5},{21,41,1},{21,41,1},{21,41,1},{21,31,2},{31,12,338},{15,34,0},{15,34,0},{8,28,1},{62,6,338},{8,28,1},{47,0,421},{21,42,1},{24,35,1},{13,35,1},{47,0,421},{62,16,421},{13,35,1}, +{0,31,425},{62,16,421},{0,31,425},{18,0,425},{18,0,425},{18,0,425},{18,0,425},{14,36,1},{14,36,1},{14,36,1},{14,25,1},{7,29,1},{7,29,1},{20,60,846},{20,46,445},{21,38,650},{20,36,450},{16,58,1514},{16,41,449},{16,37,3},{15,32,630},{2,44,1515},{7,33,441},{23,54,422},{23,44,2},{24,37,46},{23,36,74},{45,0,1514},{14,43,422},{16,37,3},{4,33,421},{62,14,1514}, +{4,33,421},{20,49,425},{20,49,425},{20,49,425},{20,34,426},{16,47,340},{16,37,2},{16,37,2},{16,29,34},{6,37,341},{10,30,5},{23,43,1},{23,43,1},{23,43,1},{23,33,1},{31,18,338},{17,36,0},{17,36,0},{10,30,1},{62,9,338},{10,30,1},{50,0,421},{23,44,1},{26,37,1},{15,37,1},{50,0,421},{62,19,421},{15,37,1},{0,33,421},{62,19,421},{0,33,421},{20,0,425}, +{20,0,425},{20,0,425},{20,0,425},{16,38,0},{16,38,0},{16,38,0},{16,27,0},{9,31,1},{9,31,1},{22,62,846},{22,48,445},{23,40,650},{22,38,450},{18,60,1514},{18,43,449},{18,39,3},{16,34,646},{4,46,1515},{9,35,441},{25,56,422},{25,46,2},{26,39,46},{25,38,74},{47,2,1514},{16,45,425},{18,39,3},{6,35,421},{62,17,1514},{6,35,421},{22,51,425},{22,51,425},{22,51,425}, +{22,36,426},{18,49,338},{18,39,2},{18,39,2},{18,31,34},{8,39,341},{11,32,6},{25,45,1},{25,45,1},{25,45,1},{25,35,1},{34,17,338},{19,38,0},{19,38,0},{12,32,4},{62,12,338},{12,32,4},{53,0,421},{25,46,1},{28,39,1},{17,39,1},{53,0,421},{62,22,421},{17,39,1},{0,35,421},{62,22,421},{0,35,421},{22,0,425},{22,0,425},{22,0,425},{22,0,425},{18,40,0}, +{18,40,0},{18,40,0},{18,29,0},{11,32,2},{11,32,2},{24,63,850},{24,50,445},{25,42,650},{24,40,450},{20,62,1514},{20,45,449},{20,41,3},{18,36,646},{6,48,1517},{11,37,441},{27,58,422},{27,48,3},{28,41,46},{27,40,74},{47,8,1514},{18,47,425},{20,41,3},{8,37,421},{62,20,1514},{8,37,421},{24,53,425},{24,53,425},{24,53,425},{24,38,426},{20,51,338},{20,41,2},{20,41,2}, +{20,33,40},{10,41,341},{13,34,6},{27,47,1},{27,47,1},{27,47,1},{27,37,1},{37,17,338},{21,40,0},{21,40,0},{14,34,4},{62,15,338},{14,34,4},{56,0,421},{26,48,1},{30,41,1},{19,41,1},{56,0,421},{62,25,421},{19,41,1},{0,37,421},{62,25,421},{0,37,421},{24,0,425},{24,0,425},{24,0,425},{24,0,425},{20,42,0},{20,42,0},{20,42,0},{20,31,0},{13,34,2}, +{13,34,2},{26,63,882},{26,52,447},{27,44,646},{26,42,446},{23,62,1526},{22,48,447},{22,43,5},{21,39,646},{8,50,1515},{13,39,443},{30,58,425},{29,50,4},{30,44,45},{29,42,69},{54,0,1514},{20,49,422},{22,43,4},{10,39,426},{47,31,1514},{10,39,426},{26,56,421},{26,56,421},{26,56,421},{26,41,421},{22,54,339},{22,43,5},{22,43,5},{22,35,35},{12,43,338},{16,36,5},{30,48,4}, +{30,48,4},{30,48,4},{30,39,4},{49,0,338},{23,42,2},{23,42,2},{16,36,1},{62,18,338},{16,36,1},{59,0,421},{29,50,0},{32,43,1},{22,43,0},{59,0,421},{62,28,421},{22,43,0},{0,39,425},{62,28,421},{0,39,425},{26,0,421},{26,0,421},{26,0,421},{26,0,421},{22,44,2},{22,44,2},{22,44,2},{22,33,1},{15,37,1},{15,37,1},{29,63,922},{28,54,447},{29,46,646}, +{28,44,446},{25,63,1535},{24,50,447},{24,45,5},{23,41,646},{10,52,1515},{15,41,443},{31,62,425},{31,52,4},{32,45,45},{31,44,69},{57,0,1514},{22,51,422},{24,45,4},{12,41,426},{50,32,1514},{12,41,426},{28,58,421},{28,58,421},{28,58,421},{28,43,421},{24,56,339},{24,45,5},{24,45,5},{24,37,35},{14,45,338},{18,38,5},{31,52,4},{31,52,4},{31,52,4},{31,41,5},{52,0,338}, +{25,44,2},{25,44,2},{18,38,1},{62,21,338},{18,38,1},{62,0,421},{31,52,0},{34,45,1},{24,45,0},{62,0,421},{62,31,421},{24,45,0},{0,41,425},{62,31,421},{0,41,425},{28,0,421},{28,0,421},{28,0,421},{28,0,421},{24,46,2},{24,46,2},{24,46,2},{24,35,1},{17,39,1},{17,39,1},{31,63,994},{30,56,447},{31,48,655},{30,46,446},{28,63,1575},{26,52,447},{26,47,5}, +{25,43,646},{12,54,1515},{17,43,445},{33,63,426},{33,54,2},{34,47,45},{33,46,66},{60,0,1514},{24,53,422},{26,47,4},{14,43,426},{56,32,1514},{14,43,426},{30,60,421},{30,60,421},{30,60,421},{30,45,421},{26,58,339},{26,47,5},{26,47,5},{26,39,35},{16,47,338},{20,40,5},{33,53,1},{33,53,1},{33,53,1},{33,43,2},{55,0,338},{27,46,2},{27,46,2},{20,40,1},{62,24,338}, +{20,40,1},{63,4,421},{33,54,1},{36,47,1},{26,47,0},{63,4,421},{62,34,421},{26,47,0},{0,43,425},{62,34,421},{0,43,425},{30,0,421},{30,0,421},{30,0,421},{30,0,421},{26,48,1},{26,48,1},{26,48,1},{26,37,1},{19,41,1},{19,41,1},{33,63,1082},{32,58,445},{33,50,650},{32,48,450},{30,63,1638},{28,54,447},{28,49,7},{27,45,646},{14,56,1515},{19,45,445},{36,63,434}, +{35,56,2},{36,49,46},{35,48,74},{63,0,1514},{26,55,422},{28,49,6},{16,45,425},{62,32,1514},{16,45,425},{32,61,425},{32,61,425},{32,61,425},{32,47,426},{28,60,339},{28,49,6},{28,49,6},{28,41,35},{18,49,341},{22,42,5},{35,55,1},{35,55,1},{35,55,1},{35,45,2},{58,0,338},{29,48,0},{29,48,0},{22,42,1},{62,27,338},{22,42,1},{63,10,421},{35,56,1},{38,49,1}, +{27,49,1},{63,10,421},{62,37,421},{27,49,1},{0,45,425},{62,37,421},{0,45,425},{32,0,425},{32,0,425},{32,0,425},{32,0,425},{28,50,1},{28,50,1},{28,50,1},{28,39,1},{21,43,1},{21,43,1},{36,63,1206},{34,61,446},{35,52,646},{34,50,446},{33,63,1710},{30,56,445},{31,51,5},{29,47,638},{17,58,1515},{22,47,450},{39,63,469},{37,58,5},{38,52,45},{37,50,69},{63,7,1514}, +{27,58,422},{31,51,1},{19,47,433},{63,35,1514},{19,47,433},{34,63,422},{34,63,422},{34,63,422},{34,49,421},{31,61,342},{31,51,5},{31,51,5},{30,43,36},{20,51,338},{24,44,2},{38,56,4},{38,56,4},{38,56,4},{38,47,4},{53,16,338},{31,51,1},{31,51,1},{25,44,0},{63,30,338},{25,44,0},{63,17,421},{37,58,1},{40,51,1},{30,51,0},{63,17,421},{63,40,421},{30,51,0}, +{0,47,433},{63,40,421},{0,47,433},{34,0,421},{34,0,421},{34,0,421},{34,0,421},{31,51,4},{31,51,4},{31,51,4},{31,41,4},{23,45,1},{23,45,1},{38,63,1350},{36,63,446},{37,54,646},{36,52,446},{36,63,1814},{32,58,447},{32,53,5},{31,48,638},{19,60,1515},{23,49,443},{41,63,517},{39,60,5},{40,54,45},{39,52,69},{63,13,1514},{29,60,422},{32,53,4},{20,49,426},{63,38,1514}, +{20,49,426},{36,63,437},{36,63,437},{36,63,437},{36,51,421},{33,62,347},{32,53,5},{32,53,5},{32,45,33},{22,53,338},{26,46,2},{40,58,4},{40,58,4},{40,58,4},{40,49,4},{56,16,338},{33,52,2},{33,52,2},{27,46,0},{62,33,338},{27,46,0},{63,23,421},{39,60,1},{42,53,1},{32,53,0},{63,23,421},{63,43,421},{32,53,0},{0,49,425},{63,43,421},{0,49,425},{36,0,421}, +{36,0,421},{36,0,421},{36,0,421},{32,54,2},{32,54,2},{32,54,2},{32,43,2},{25,47,1},{25,47,1},{40,63,1466},{38,63,474},{39,56,646},{38,54,446},{38,63,1931},{34,60,447},{34,55,5},{33,51,646},{21,62,1515},{25,51,443},{43,63,569},{41,62,5},{42,56,45},{41,54,69},{63,19,1514},{32,61,422},{34,55,4},{22,51,426},{63,41,1514},{22,51,426},{38,63,470},{38,63,470},{38,63,470}, +{38,53,421},{35,63,355},{34,55,5},{34,55,5},{34,47,33},{24,55,338},{28,48,3},{42,60,4},{42,60,4},{42,60,4},{42,51,4},{59,16,338},{35,54,2},{35,54,2},{28,48,2},{62,36,338},{28,48,2},{63,29,421},{41,62,1},{44,55,1},{34,55,0},{63,29,421},{63,46,421},{34,55,0},{0,51,425},{63,46,421},{0,51,425},{38,0,421},{38,0,421},{38,0,421},{38,0,421},{34,56,2}, +{34,56,2},{34,56,2},{34,45,2},{27,49,1},{27,49,1},{43,63,1634},{41,63,546},{41,58,646},{40,56,446},{40,63,2039},{36,62,447},{36,57,5},{35,53,646},{24,63,1521},{27,53,443},{46,63,633},{44,63,9},{44,58,45},{43,56,69},{63,25,1514},{34,63,422},{36,57,4},{24,53,426},{63,44,1514},{24,53,426},{40,63,502},{40,63,502},{40,63,502},{40,55,421},{37,63,379},{36,57,5},{36,57,5}, +{36,49,35},{26,57,338},{30,50,3},{44,62,4},{44,62,4},{44,62,4},{44,53,4},{62,16,338},{37,56,2},{37,56,2},{30,50,2},{62,39,338},{30,50,2},{63,34,421},{44,63,5},{46,57,1},{36,57,0},{63,34,421},{62,49,421},{36,57,0},{0,53,425},{62,49,421},{0,53,425},{40,0,421},{40,0,421},{40,0,421},{40,0,421},{36,58,2},{36,58,2},{36,58,2},{36,47,2},{29,51,1}, +{29,51,1},{45,63,1866},{43,63,689},{43,60,650},{42,59,447},{43,63,2201},{38,63,469},{39,59,6},{37,55,638},{29,63,1590},{30,55,450},{48,63,741},{46,63,69},{46,60,46},{45,58,70},{62,33,1514},{38,63,465},{39,59,2},{25,56,425},{61,48,1514},{25,56,425},{43,63,545},{43,63,545},{43,63,545},{42,57,422},{40,63,424},{39,59,5},{39,59,5},{38,51,36},{29,59,339},{32,52,2},{46,63,5}, +{46,63,5},{46,63,5},{46,55,2},{63,21,338},{39,59,1},{39,59,1},{33,52,0},{63,42,338},{33,52,0},{63,41,421},{48,63,41},{49,59,2},{38,59,2},{63,41,421},{63,52,421},{38,59,2},{0,56,425},{63,52,421},{0,56,425},{42,0,421},{42,0,421},{42,0,421},{42,0,421},{39,59,4},{39,59,4},{39,59,4},{39,49,4},{31,53,1},{31,53,1},{48,63,2070},{46,63,889},{45,62,650}, +{44,61,447},{46,63,2361},{41,63,573},{41,61,6},{39,57,638},{32,63,1710},{32,57,450},{51,63,837},{49,63,184},{48,62,49},{47,60,70},{63,37,1514},{43,63,545},{41,61,2},{27,58,425},{63,50,1514},{27,58,425},{45,63,614},{45,63,614},{45,63,614},{44,59,422},{43,63,488},{41,61,5},{41,61,5},{40,53,36},{31,61,339},{34,54,2},{48,63,20},{48,63,20},{48,63,20},{48,57,4},{63,27,338}, +{41,61,1},{41,61,1},{35,54,0},{63,45,338},{35,54,0},{63,47,421},{51,63,97},{51,61,2},{40,61,2},{63,47,421},{63,55,421},{40,61,2},{0,58,425},{63,55,421},{0,58,425},{44,0,421},{44,0,421},{44,0,421},{44,0,421},{41,61,4},{41,61,4},{41,61,4},{41,51,4},{33,55,1},{33,55,1},{50,63,2239},{48,63,1109},{47,63,701},{46,62,441},{48,63,2469},{44,63,720},{43,63,5}, +{41,59,605},{37,63,1804},{34,59,417},{54,63,916},{51,63,308},{51,63,52},{49,62,56},{63,43,1459},{46,63,618},{43,63,1},{31,59,400},{63,53,1459},{31,59,400},{47,63,701},{47,63,701},{47,63,701},{46,61,422},{45,63,566},{43,63,5},{43,63,5},{42,55,36},{32,63,341},{36,56,2},{51,63,52},{51,63,52},{51,63,52},{50,59,4},{63,33,338},{43,63,1},{43,63,1},{37,56,0},{47,56,338}, +{37,56,0},{63,53,392},{55,63,157},{53,63,1},{42,63,1},{63,53,392},{63,58,392},{42,63,1},{0,59,400},{63,58,392},{0,59,400},{46,0,421},{46,0,421},{46,0,421},{46,0,421},{43,63,4},{43,63,4},{43,63,4},{43,53,4},{35,57,1},{35,57,1},{51,63,1901},{50,63,1114},{49,63,785},{48,63,421},{51,63,2093},{46,63,574},{45,63,38},{44,60,356},{40,63,1476},{36,60,213},{55,63,684}, +{54,63,260},{53,63,85},{51,63,20},{63,47,1064},{49,63,426},{47,63,8},{35,60,208},{63,55,1064},{35,60,208},{49,63,785},{49,63,785},{49,63,785},{48,63,421},{48,63,661},{45,63,38},{45,63,38},{44,57,36},{37,63,371},{38,58,2},{53,63,85},{53,63,85},{53,63,85},{52,61,4},{63,39,338},{47,63,8},{47,63,8},{39,58,0},{47,59,338},{39,58,0},{63,56,200},{57,63,80},{56,63,1}, +{48,63,0},{63,56,200},{62,60,200},{48,63,0},{0,60,208},{62,60,200},{0,60,208},{48,0,421},{48,0,421},{48,0,421},{48,0,421},{45,63,13},{45,63,13},{45,63,13},{45,55,4},{37,59,1},{37,59,1},{54,63,1646},{52,63,1119},{51,63,886},{50,63,470},{53,63,1761},{48,63,526},{48,63,126},{46,61,146},{44,63,1218},{39,62,69},{57,63,450},{56,63,245},{56,63,145},{54,63,2},{63,51,722}, +{52,63,290},{51,63,50},{38,62,65},{63,57,722},{38,62,65},{51,63,886},{51,63,886},{51,63,886},{50,63,470},{50,63,776},{48,63,126},{48,63,126},{47,59,42},{41,63,446},{40,60,5},{56,63,145},{56,63,145},{56,63,145},{54,63,2},{61,49,338},{51,63,50},{51,63,50},{42,60,1},{63,54,338},{42,60,1},{63,59,61},{60,63,25},{59,63,0},{55,63,0},{63,59,61},{63,61,61},{55,63,0}, +{0,62,65},{63,61,61},{0,62,65},{50,0,421},{50,0,421},{50,0,421},{50,0,421},{48,63,45},{48,63,45},{48,63,45},{47,57,2},{40,61,1},{40,61,1},{56,63,1518},{54,63,1118},{54,63,974},{52,63,565},{54,63,1526},{51,63,534},{51,63,278},{48,62,57},{47,63,1090},{42,63,10},{59,63,354},{58,63,251},{57,63,194},{56,63,50},{63,55,509},{55,63,234},{54,63,106},{42,63,9},{63,59,509}, +{42,63,9},{54,63,974},{54,63,974},{54,63,974},{52,63,565},{53,63,904},{51,63,278},{51,63,278},{48,61,38},{44,63,574},{42,62,5},{57,63,194},{57,63,194},{57,63,194},{56,63,50},{63,51,338},{54,63,106},{54,63,106},{44,62,1},{63,57,338},{44,62,1},{63,62,5},{62,63,4},{62,63,0},{61,63,0},{63,62,5},{62,63,5},{61,63,0},{0,63,9},{62,63,5},{0,63,9},{52,0,421}, +{52,0,421},{52,0,421},{52,0,421},{50,63,72},{50,63,72},{50,63,72},{49,59,5},{42,63,1},{42,63,1},{57,63,1197},{56,63,994},{56,63,894},{54,63,565},{56,63,1210},{54,63,493},{53,63,332},{50,63,13},{51,63,861},{45,63,17},{60,63,209},{60,63,161},{60,63,145},{58,63,50},{63,58,294},{58,63,147},{57,63,89},{47,63,1},{62,61,294},{47,63,1},{56,63,894},{56,63,894},{56,63,894}, +{54,63,565},{54,63,781},{53,63,332},{53,63,332},{50,63,13},{48,63,501},{45,63,17},{60,63,145},{60,63,145},{60,63,145},{58,63,50},{63,55,221},{57,63,89},{57,63,89},{47,63,1},{63,59,221},{47,63,1},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{54,0,421},{54,0,421},{54,0,421},{54,0,421},{52,63,117}, +{52,63,117},{52,63,117},{51,61,5},{45,63,17},{45,63,17},{58,63,925},{57,63,765},{57,63,701},{57,63,509},{57,63,845},{55,63,423},{54,63,301},{53,63,5},{52,63,598},{49,63,52},{61,63,97},{61,63,70},{61,63,61},{60,63,17},{63,60,113},{60,63,57},{59,63,40},{53,63,1},{62,62,113},{53,63,1},{57,63,701},{57,63,701},{57,63,701},{57,63,509},{57,63,589},{54,63,301},{54,63,301}, +{53,63,5},{51,63,365},{49,63,52},{61,63,61},{61,63,61},{61,63,61},{60,63,17},{63,58,85},{59,63,40},{59,63,40},{53,63,1},{62,61,85},{53,63,1},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{56,0,421},{56,0,421},{56,0,421},{56,0,421},{54,63,157},{54,63,157},{54,63,157},{53,63,5},{49,63,52}, +{49,63,52},{0,29,882},{0,21,116},{0,15,4},{0,13,320},{0,19,1899},{0,14,1214},{0,11,573},{0,8,1421},{0,9,2052},{0,8,1521},{0,29,882},{0,21,116},{0,15,4},{0,13,320},{10,0,1896},{0,14,1214},{0,11,573},{0,8,1421},{14,3,1896},{0,8,1421},{0,14,0},{0,14,0},{0,14,0},{0,7,0},{0,7,162},{0,5,61},{0,5,61},{0,3,100},{0,3,180},{0,3,116},{0,14,0}, +{0,14,0},{0,14,0},{0,7,0},{3,1,162},{0,5,61},{0,5,61},{0,3,100},{7,0,162},{0,3,100},{6,17,882},{0,21,116},{0,15,4},{0,13,320},{6,17,882},{29,0,882},{0,13,320},{0,10,884},{29,0,882},{0,10,884},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,35,882},{0,25,50},{0,17,10}, +{0,15,260},{0,23,2355},{0,16,1355},{0,14,589},{0,10,1656},{0,11,2567},{0,10,1825},{0,35,882},{0,25,50},{0,17,10},{0,15,260},{3,17,2355},{0,16,1355},{0,14,589},{0,10,1656},{23,0,2355},{0,10,1656},{0,19,1},{0,19,1},{0,19,1},{0,10,0},{0,10,338},{0,8,125},{0,8,125},{0,4,200},{0,5,374},{0,4,225},{0,19,1},{0,19,1},{0,19,1},{0,10,0},{5,0,338}, +{0,8,125},{0,8,125},{0,4,200},{10,0,338},{0,4,200},{17,1,882},{0,25,50},{1,17,2},{0,15,260},{17,1,882},{35,0,882},{0,15,260},{0,12,884},{35,0,882},{0,12,884},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,41,882},{0,28,10},{1,19,69},{0,17,193},{0,28,2899},{0,19,1539},{0,16,643}, +{0,11,1965},{0,13,3209},{0,11,2161},{0,41,882},{0,28,10},{1,19,53},{0,17,193},{14,0,2899},{0,19,1539},{0,16,643},{0,11,1965},{28,0,2899},{0,11,1965},{0,25,1},{0,25,1},{0,25,1},{0,13,0},{0,13,578},{0,11,221},{0,11,221},{0,6,356},{0,6,644},{0,5,401},{0,25,1},{0,25,1},{0,25,1},{0,13,0},{6,1,578},{0,11,221},{0,11,221},{0,6,356},{13,0,578}, +{0,6,356},{20,1,882},{0,28,10},{3,19,2},{0,17,193},{20,1,882},{41,0,882},{0,17,193},{0,14,884},{41,0,882},{0,14,884},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{1,45,902},{1,31,24},{1,21,142},{0,19,166},{0,33,3048},{0,22,1443},{0,18,481},{0,13,1908},{0,16,3492},{0,13,2197},{2,43,886}, +{1,31,8},{3,21,73},{1,19,153},{16,1,3048},{0,22,1443},{0,18,481},{0,13,1908},{33,0,3048},{0,13,1908},{1,29,21},{1,29,21},{1,29,21},{1,15,21},{0,18,648},{0,14,169},{0,14,169},{0,8,325},{0,8,766},{0,7,421},{2,27,5},{2,27,5},{2,27,5},{2,15,4},{9,0,648},{0,14,169},{0,14,169},{0,8,325},{18,0,648},{0,8,325},{23,1,882},{0,31,2},{5,21,2}, +{0,19,130},{23,1,882},{47,0,882},{0,19,130},{0,16,890},{47,0,882},{0,16,890},{1,0,20},{1,0,20},{1,0,20},{1,0,20},{0,5,0},{0,5,0},{0,5,0},{0,2,1},{0,2,5},{0,2,5},{2,49,997},{2,34,114},{3,24,290},{1,22,234},{0,39,3051},{0,25,1256},{0,21,258},{0,16,1764},{0,19,3685},{0,15,2195},{4,46,883},{3,33,10},{5,23,74},{3,21,154},{15,10,3048}, +{0,25,1256},{0,21,258},{0,16,1764},{30,5,3048},{0,16,1764},{2,34,113},{2,34,113},{2,34,113},{2,18,113},{0,25,648},{0,17,89},{0,17,89},{0,10,242},{0,11,876},{0,10,411},{4,30,1},{4,30,1},{4,30,1},{4,17,1},{12,1,648},{0,17,89},{0,17,89},{0,10,242},{25,0,648},{0,10,242},{27,0,882},{3,33,1},{8,23,1},{0,22,80},{27,0,882},{46,4,882},{0,22,80}, +{0,18,884},{46,4,882},{0,18,884},{2,0,113},{2,0,113},{2,0,113},{2,0,113},{0,11,1},{0,11,1},{0,11,1},{0,6,1},{0,5,40},{0,5,40},{3,53,1149},{3,36,274},{4,27,513},{3,24,362},{0,45,3051},{0,28,1128},{0,24,122},{0,18,1605},{0,22,3901},{0,16,2173},{6,47,885},{5,35,10},{7,25,74},{5,23,154},{15,16,3048},{0,28,1128},{0,24,122},{0,18,1605},{30,8,3048}, +{0,18,1605},{3,38,265},{3,38,265},{3,38,265},{3,20,266},{0,31,648},{0,21,34},{0,21,34},{0,13,170},{0,14,1004},{0,11,457},{6,32,1},{6,32,1},{6,32,1},{6,19,1},{15,1,648},{0,21,34},{0,21,34},{0,13,170},{31,0,648},{0,13,170},{30,0,882},{5,35,1},{10,25,1},{0,24,41},{30,0,882},{46,7,882},{0,24,41},{0,20,884},{46,7,882},{0,20,884},{3,0,265}, +{3,0,265},{3,0,265},{3,0,265},{0,17,0},{0,17,0},{0,17,0},{0,9,1},{0,8,104},{0,8,104},{4,57,1365},{4,39,498},{5,28,793},{4,26,561},{0,51,3048},{0,31,1032},{0,26,41},{0,20,1509},{0,24,4147},{0,19,2149},{8,49,885},{7,37,10},{9,27,74},{7,25,154},{17,17,3048},{0,31,1032},{0,26,41},{0,20,1509},{51,0,3048},{0,20,1509},{4,42,481},{4,42,481},{4,42,481}, +{4,23,481},{0,36,650},{0,25,4},{0,25,4},{0,15,130},{0,16,1161},{0,14,481},{8,34,1},{8,34,1},{8,34,1},{8,21,1},{15,7,648},{0,25,4},{0,25,4},{0,15,130},{31,3,648},{0,15,130},{32,1,882},{7,37,1},{12,27,1},{0,26,25},{32,1,882},{46,10,882},{0,26,25},{0,22,884},{46,10,882},{0,22,884},{4,0,481},{4,0,481},{4,0,481},{4,0,481},{0,23,0}, +{0,23,0},{0,23,0},{0,12,1},{0,9,193},{0,9,193},{5,61,1645},{5,42,795},{7,30,1177},{4,28,826},{0,57,3048},{0,35,954},{0,29,9},{0,22,1380},{0,27,4419},{0,21,2197},{10,51,885},{9,39,10},{11,29,74},{9,27,154},{20,17,3048},{0,35,954},{0,29,9},{0,22,1380},{57,0,3048},{0,22,1380},{5,46,761},{5,46,761},{5,46,761},{5,25,762},{0,42,650},{0,28,4},{0,28,4}, +{0,17,85},{0,19,1345},{0,16,557},{10,36,1},{10,36,1},{10,36,1},{10,23,1},{15,13,648},{1,27,1},{1,27,1},{0,17,85},{31,6,648},{0,17,85},{35,1,882},{9,39,1},{14,29,1},{0,29,9},{35,1,882},{46,13,882},{0,29,9},{0,24,884},{46,13,882},{0,24,884},{5,0,761},{5,0,761},{5,0,761},{5,0,761},{0,29,0},{0,29,0},{0,29,0},{0,15,1},{0,11,296}, +{0,11,296},{7,63,1774},{7,44,924},{8,33,1320},{6,30,941},{2,60,3052},{1,38,924},{2,31,10},{0,24,1302},{0,31,4300},{0,24,1942},{12,54,882},{12,40,8},{13,32,68},{11,29,161},{32,0,3048},{0,39,898},{2,31,6},{0,24,1266},{62,1,3048},{0,24,1266},{7,48,885},{7,48,885},{7,48,885},{7,28,884},{2,45,652},{2,31,9},{2,31,9},{1,20,62},{0,22,1275},{0,19,419},{12,39,0}, +{12,39,0},{12,39,0},{12,25,1},{24,1,648},{3,30,0},{3,30,0},{0,20,45},{49,0,648},{0,20,45},{39,0,882},{11,41,1},{16,31,2},{1,31,1},{39,0,882},{62,8,882},{1,31,1},{0,26,882},{62,8,882},{0,26,882},{7,0,884},{7,0,884},{7,0,884},{7,0,884},{2,32,4},{2,32,4},{2,32,4},{2,17,4},{0,16,250},{0,16,250},{9,63,1798},{9,46,924},{10,35,1320}, +{9,32,936},{4,62,3052},{3,40,924},{4,33,9},{2,26,1302},{0,33,4023},{0,26,1647},{14,56,882},{14,42,8},{15,34,68},{13,31,161},{35,0,3048},{0,42,882},{4,33,5},{0,27,1170},{62,4,3048},{0,27,1170},{9,50,885},{9,50,885},{9,50,885},{9,30,884},{4,47,652},{4,32,8},{4,32,8},{3,22,62},{0,25,1107},{0,22,243},{14,41,0},{14,41,0},{14,41,0},{14,27,1},{27,1,648}, +{5,32,1},{5,32,1},{0,22,18},{55,0,648},{0,22,18},{42,0,882},{13,43,1},{18,33,1},{2,33,0},{42,0,882},{62,11,882},{2,33,0},{0,28,882},{62,11,882},{0,28,882},{9,0,884},{9,0,884},{9,0,884},{9,0,884},{4,34,4},{4,34,4},{4,34,4},{4,19,4},{0,19,146},{0,19,146},{12,63,1846},{11,48,924},{12,37,1320},{11,34,936},{6,63,3055},{5,42,924},{6,35,9}, +{4,28,1302},{0,36,3799},{0,28,1356},{16,58,883},{16,44,9},{17,35,74},{15,33,161},{38,0,3048},{2,44,882},{6,35,5},{0,29,1110},{62,7,3048},{0,29,1110},{11,52,885},{11,52,885},{11,52,885},{11,32,885},{6,49,652},{6,34,8},{6,34,8},{5,24,62},{0,29,969},{0,24,109},{16,42,1},{16,42,1},{16,42,1},{16,29,2},{30,1,648},{7,34,1},{7,34,1},{0,24,9},{61,0,648}, +{0,24,9},{45,0,882},{15,45,1},{20,35,1},{4,35,0},{45,0,882},{62,14,882},{4,35,0},{0,30,882},{62,14,882},{0,30,882},{11,0,884},{11,0,884},{11,0,884},{11,0,884},{6,36,4},{6,36,4},{6,36,4},{6,21,4},{0,22,74},{0,22,74},{14,63,1912},{13,50,924},{14,39,1320},{13,36,936},{9,63,3087},{7,44,924},{8,37,9},{6,30,1302},{0,39,3607},{0,31,1156},{18,60,883}, +{18,46,9},{19,37,74},{17,35,154},{41,0,3048},{4,46,882},{8,37,5},{0,31,1035},{62,10,3048},{0,31,1035},{13,54,885},{13,54,885},{13,54,885},{13,34,885},{8,51,652},{8,36,8},{8,36,8},{7,26,62},{0,33,846},{0,27,37},{18,44,1},{18,44,1},{18,44,1},{18,31,2},{33,1,648},{9,36,1},{9,36,1},{0,27,1},{63,2,648},{0,27,1},{47,2,882},{16,48,1},{22,37,1}, +{6,37,0},{47,2,882},{62,17,882},{6,37,0},{0,32,884},{62,17,882},{0,32,884},{13,0,884},{13,0,884},{13,0,884},{13,0,884},{8,38,4},{8,38,4},{8,38,4},{8,23,4},{0,26,29},{0,26,29},{16,63,2014},{15,52,932},{16,40,1341},{15,38,932},{12,63,3141},{9,46,924},{10,39,7},{9,32,1308},{0,43,3436},{0,33,1003},{20,62,884},{20,48,8},{21,40,74},{19,37,161},{36,16,3048}, +{6,48,882},{10,39,6},{0,33,978},{63,13,3048},{0,33,978},{15,57,882},{15,57,882},{15,57,882},{15,36,882},{10,54,649},{10,39,6},{10,39,6},{9,28,61},{0,36,737},{1,29,8},{20,47,0},{20,47,0},{20,47,0},{20,33,1},{31,12,648},{11,38,1},{11,38,1},{2,29,1},{62,6,648},{2,29,1},{51,0,882},{19,49,1},{24,39,1},{9,39,1},{51,0,882},{47,28,882},{9,39,1}, +{0,34,882},{47,28,882},{0,34,882},{15,0,882},{15,0,882},{15,0,882},{15,0,882},{10,40,2},{10,40,2},{10,40,2},{10,25,2},{0,30,2},{0,30,2},{19,63,2126},{17,54,924},{18,42,1341},{16,40,941},{14,63,3204},{11,48,924},{12,41,7},{11,34,1308},{0,46,3300},{0,35,939},{22,63,890},{22,50,8},{23,42,74},{21,39,161},{39,16,3048},{8,50,882},{12,41,6},{0,35,939},{62,16,3048}, +{0,35,939},{17,59,884},{17,59,884},{17,59,884},{17,38,884},{12,56,649},{12,41,6},{12,41,6},{11,30,61},{0,39,681},{3,31,8},{22,49,0},{22,49,0},{22,49,0},{22,35,1},{31,18,648},{13,40,1},{13,40,1},{4,31,1},{62,9,648},{4,31,1},{54,0,882},{21,51,1},{26,41,1},{11,41,1},{54,0,882},{47,31,882},{11,41,1},{0,36,882},{47,31,882},{0,36,882},{17,0,884}, +{17,0,884},{17,0,884},{17,0,884},{12,42,2},{12,42,2},{12,42,2},{12,27,2},{2,32,1},{2,32,1},{20,63,2264},{19,56,924},{20,44,1341},{18,42,941},{17,63,3292},{13,50,924},{14,43,7},{13,36,1308},{0,50,3192},{0,38,923},{25,63,906},{24,52,8},{25,44,74},{23,41,161},{50,0,3048},{10,52,882},{14,43,6},{0,38,907},{62,19,3048},{0,38,907},{19,61,884},{19,61,884},{19,61,884}, +{19,40,884},{14,58,649},{14,43,6},{14,43,6},{13,32,67},{0,42,657},{4,33,10},{24,51,0},{24,51,0},{24,51,0},{24,37,1},{34,17,648},{15,42,1},{15,42,1},{6,33,1},{62,12,648},{6,33,1},{57,0,882},{23,53,1},{28,43,1},{13,43,1},{57,0,882},{50,32,882},{13,43,1},{0,38,882},{50,32,882},{0,38,882},{19,0,884},{19,0,884},{19,0,884},{19,0,884},{14,44,2}, +{14,44,2},{14,44,2},{14,29,2},{4,34,1},{4,34,1},{23,63,2376},{21,58,924},{22,46,1341},{20,44,941},{19,63,3391},{15,52,924},{16,45,10},{15,38,1308},{0,53,3112},{2,40,923},{27,63,948},{26,54,8},{27,46,74},{25,43,161},{53,0,3048},{12,54,882},{16,45,6},{0,40,891},{62,22,3048},{0,40,891},{21,63,884},{21,63,884},{21,63,884},{21,42,884},{16,59,652},{16,45,9},{16,45,9}, +{15,34,67},{1,45,650},{6,35,10},{26,53,0},{26,53,0},{26,53,0},{26,39,1},{37,17,648},{17,44,0},{17,44,0},{8,35,1},{62,15,648},{8,35,1},{60,0,882},{25,55,1},{30,45,1},{15,45,1},{60,0,882},{56,32,882},{15,45,1},{0,40,882},{56,32,882},{0,40,882},{21,0,884},{21,0,884},{21,0,884},{21,0,884},{16,46,4},{16,46,4},{16,46,4},{16,31,4},{6,36,1}, +{6,36,1},{26,63,2564},{23,61,927},{24,49,1324},{23,47,935},{22,63,3529},{17,54,924},{18,47,10},{17,41,1302},{0,56,3060},{5,42,924},{30,63,1003},{28,57,6},{29,48,67},{28,46,158},{56,1,3048},{15,56,883},{19,47,8},{1,42,884},{63,25,3048},{1,42,884},{23,63,891},{23,63,891},{23,63,891},{23,44,883},{18,62,649},{18,47,6},{18,47,6},{17,36,61},{4,47,652},{9,37,8},{28,55,2}, +{28,55,2},{28,55,2},{28,42,2},{49,0,648},{19,46,2},{19,46,2},{11,37,0},{62,18,648},{11,37,0},{63,1,882},{27,58,2},{32,47,4},{17,47,5},{63,1,882},{63,32,882},{17,47,5},{0,42,884},{63,32,882},{0,42,884},{23,0,882},{23,0,882},{23,0,882},{23,0,882},{18,48,2},{18,48,2},{18,48,2},{18,33,2},{8,38,1},{8,38,1},{28,63,2774},{25,63,927},{26,51,1324}, +{25,48,932},{25,63,3681},{19,56,924},{20,49,7},{19,43,1302},{0,59,3052},{7,44,924},{32,63,1074},{30,59,6},{31,50,67},{30,48,170},{59,1,3048},{16,58,885},{20,49,6},{3,44,884},{63,28,3048},{3,44,884},{25,63,918},{25,63,918},{25,63,918},{25,46,883},{20,63,651},{20,49,6},{20,49,6},{19,38,61},{5,49,651},{11,39,8},{30,57,2},{30,57,2},{30,57,2},{30,44,2},{52,0,648}, +{21,48,1},{21,48,1},{13,39,0},{62,21,648},{13,39,0},{63,7,882},{29,60,2},{34,49,1},{19,49,1},{63,7,882},{63,35,882},{19,49,1},{0,44,884},{63,35,882},{0,44,884},{25,0,882},{25,0,882},{25,0,882},{25,0,882},{20,50,2},{20,50,2},{20,50,2},{20,35,2},{10,40,1},{10,40,1},{31,63,2998},{28,63,951},{28,53,1324},{27,50,932},{26,63,3844},{21,58,924},{22,51,7}, +{21,45,1302},{2,61,3052},{9,46,924},{34,63,1146},{32,61,9},{33,52,74},{32,50,169},{62,1,3048},{18,60,885},{22,51,6},{5,46,884},{63,31,3048},{5,46,884},{28,63,950},{28,63,950},{28,63,950},{27,48,882},{23,63,675},{22,51,6},{22,51,6},{21,40,61},{7,51,651},{13,41,8},{32,59,0},{32,59,0},{32,59,0},{32,46,1},{55,0,648},{23,50,1},{23,50,1},{15,41,0},{62,24,648}, +{15,41,0},{63,13,882},{31,62,2},{36,51,1},{21,51,1},{63,13,882},{63,38,882},{21,51,1},{0,46,884},{63,38,882},{0,46,884},{27,0,882},{27,0,882},{27,0,882},{27,0,882},{22,52,2},{22,52,2},{22,52,2},{22,37,2},{12,42,1},{12,42,1},{33,63,3224},{30,63,1031},{30,55,1324},{29,52,932},{29,63,3996},{23,60,924},{24,53,7},{23,47,1302},{4,63,3052},{11,48,922},{37,63,1226}, +{34,63,9},{35,54,74},{33,51,161},{63,5,3048},{20,62,885},{24,53,6},{6,48,882},{47,42,3048},{6,48,882},{30,63,995},{30,63,995},{30,63,995},{29,50,882},{25,63,705},{24,53,6},{24,53,6},{23,42,61},{9,53,651},{15,43,8},{34,61,0},{34,61,0},{34,61,0},{34,47,4},{58,0,648},{25,52,1},{25,52,1},{16,43,1},{62,27,648},{16,43,1},{63,19,882},{33,63,4},{38,53,1}, +{23,53,1},{63,19,882},{63,41,882},{23,53,1},{0,48,882},{63,41,882},{0,48,882},{29,0,882},{29,0,882},{29,0,882},{29,0,882},{24,54,2},{24,54,2},{24,54,2},{24,39,2},{14,44,1},{14,44,1},{34,63,3510},{32,63,1202},{33,57,1335},{31,54,936},{33,63,4252},{25,63,925},{26,55,13},{25,49,1309},{9,63,3091},{13,50,918},{40,63,1349},{37,63,38},{37,56,77},{36,54,158},{60,17,3048}, +{24,63,894},{27,55,8},{9,50,884},{63,37,3048},{9,50,884},{32,63,1058},{32,63,1058},{32,63,1058},{31,52,885},{28,63,762},{26,55,9},{26,55,9},{26,44,61},{11,56,650},{17,45,11},{36,63,2},{36,63,2},{36,63,2},{36,50,2},{53,16,648},{28,54,1},{28,54,1},{19,45,1},{63,30,648},{19,45,1},{59,33,882},{37,63,29},{40,56,4},{26,55,4},{59,33,882},{55,48,882},{26,55,4}, +{0,50,884},{55,48,882},{0,50,884},{31,0,884},{31,0,884},{31,0,884},{31,0,884},{26,57,0},{26,57,0},{26,57,0},{26,42,1},{16,47,2},{16,47,2},{37,63,3734},{34,63,1399},{35,59,1335},{33,57,935},{34,63,4441},{28,63,957},{28,57,13},{27,51,1309},{13,63,3192},{15,52,918},{43,63,1485},{39,63,131},{39,58,77},{38,56,158},{63,17,3048},{28,63,957},{29,57,8},{11,52,884},{63,40,3048}, +{11,52,884},{34,63,1110},{34,63,1110},{34,63,1110},{33,54,883},{30,63,840},{28,57,9},{28,57,9},{28,46,61},{13,58,650},{19,47,11},{39,63,10},{39,63,10},{39,63,10},{38,52,2},{56,16,648},{30,56,1},{30,56,1},{21,47,1},{62,33,648},{21,47,1},{62,33,882},{41,63,80},{42,58,4},{28,57,4},{62,33,882},{61,48,882},{28,57,4},{0,52,884},{61,48,882},{0,52,884},{33,0,882}, +{33,0,882},{33,0,882},{33,0,882},{28,59,0},{28,59,0},{28,59,0},{28,44,1},{18,48,1},{18,48,1},{40,63,4022},{37,63,1647},{37,61,1335},{35,59,935},{37,63,4657},{31,63,1085},{30,59,13},{29,53,1309},{18,63,3339},{17,54,924},{45,63,1635},{42,63,275},{41,60,77},{40,58,158},{63,23,3048},{32,63,1044},{31,59,8},{13,54,884},{63,43,3048},{13,54,884},{36,63,1203},{36,63,1203},{36,63,1203}, +{35,56,883},{33,63,915},{30,59,9},{30,59,9},{30,48,65},{15,60,650},{21,49,8},{41,63,25},{41,63,25},{41,63,25},{40,54,2},{59,16,648},{32,57,4},{32,57,4},{23,49,0},{62,36,648},{23,49,0},{63,37,882},{44,63,160},{44,60,4},{30,59,4},{63,37,882},{63,50,882},{30,59,4},{0,54,884},{63,50,882},{0,54,884},{35,0,882},{35,0,882},{35,0,882},{35,0,882},{30,61,0}, +{30,61,0},{30,61,0},{30,46,1},{20,50,1},{20,50,1},{42,63,4364},{40,63,1991},{39,63,1335},{37,61,935},{40,63,4905},{34,63,1287},{32,61,10},{31,55,1309},{21,63,3555},{19,56,924},{48,63,1796},{44,63,465},{43,62,77},{42,60,158},{63,29,3048},{35,63,1188},{33,61,8},{15,56,884},{63,46,3048},{15,56,884},{39,63,1299},{39,63,1299},{39,63,1299},{37,58,883},{34,63,1017},{32,61,6},{32,61,6}, +{32,50,66},{17,62,650},{23,51,8},{43,63,45},{43,63,45},{43,63,45},{42,56,2},{62,16,648},{33,60,2},{33,60,2},{25,51,0},{62,39,648},{25,51,0},{63,43,882},{48,63,260},{46,62,4},{30,62,4},{63,43,882},{63,53,882},{30,62,4},{0,56,884},{63,53,882},{0,56,884},{37,0,882},{37,0,882},{37,0,882},{37,0,882},{32,63,1},{32,63,1},{32,63,1},{32,47,2},{22,52,1}, +{22,52,1},{45,63,4441},{43,63,2286},{41,63,1421},{39,62,916},{43,63,4878},{37,63,1410},{34,63,9},{33,57,1141},{26,63,3535},{21,58,795},{50,63,1770},{47,63,609},{46,63,85},{44,62,120},{63,35,2814},{40,63,1194},{35,63,2},{17,58,762},{63,49,2814},{17,58,762},{41,63,1421},{41,63,1421},{41,63,1421},{39,61,885},{37,63,1130},{34,63,9},{34,63,9},{34,52,61},{20,63,651},{25,53,11},{46,63,85}, +{46,63,85},{46,63,85},{44,58,4},{63,21,648},{36,62,1},{36,62,1},{27,53,2},{63,42,648},{27,53,2},{63,48,761},{51,63,305},{48,63,1},{34,63,0},{63,48,761},{62,56,761},{34,63,0},{0,58,761},{62,56,761},{0,58,761},{39,0,884},{39,0,884},{39,0,884},{39,0,884},{34,63,9},{34,63,9},{34,63,9},{34,50,1},{24,55,2},{24,55,2},{46,63,3945},{43,63,2238},{43,63,1509}, +{41,63,885},{45,63,4345},{38,63,1230},{37,63,41},{34,58,781},{30,63,3066},{24,59,494},{51,63,1386},{48,63,530},{48,63,130},{46,62,45},{63,39,2249},{43,63,914},{39,63,8},{21,59,482},{47,59,2249},{21,59,482},{43,63,1509},{43,63,1509},{43,63,1509},{41,63,885},{40,63,1242},{37,63,41},{37,63,41},{36,54,61},{24,63,689},{27,55,11},{48,63,130},{48,63,130},{48,63,130},{46,60,4},{63,27,648}, +{39,63,8},{39,63,8},{29,55,2},{63,45,648},{29,55,2},{63,51,481},{54,63,193},{51,63,1},{40,63,0},{63,51,481},{63,57,481},{40,63,0},{0,59,481},{63,57,481},{0,59,481},{41,0,884},{41,0,884},{41,0,884},{41,0,884},{37,63,25},{37,63,25},{37,63,25},{36,52,1},{26,57,2},{26,57,2},{48,63,3571},{46,63,2182},{46,63,1653},{43,63,900},{46,63,3838},{41,63,1094},{40,63,137}, +{37,59,490},{32,63,2703},{27,60,270},{53,63,1106},{51,63,458},{50,63,193},{48,63,10},{63,43,1769},{46,63,698},{42,63,40},{25,60,266},{63,53,1769},{25,60,266},{46,63,1653},{46,63,1653},{46,63,1653},{43,63,900},{43,63,1386},{40,63,137},{40,63,137},{38,56,61},{29,63,779},{29,57,11},{50,63,193},{50,63,193},{50,63,193},{48,62,1},{63,33,648},{42,63,40},{42,63,40},{31,57,2},{47,56,648}, +{31,57,2},{63,54,265},{57,63,113},{54,63,1},{46,63,0},{63,54,265},{62,59,265},{46,63,0},{0,60,265},{62,59,265},{0,60,265},{43,0,884},{43,0,884},{43,0,884},{43,0,884},{39,63,52},{39,63,52},{39,63,52},{38,54,1},{28,59,2},{28,59,2},{50,63,3307},{48,63,2195},{48,63,1795},{46,63,964},{48,63,3435},{43,63,1026},{42,63,296},{39,60,269},{37,63,2410},{30,61,115},{54,63,882}, +{54,63,450},{53,63,265},{50,63,10},{63,47,1374},{48,63,546},{46,63,89},{29,61,114},{63,55,1374},{29,61,114},{48,63,1795},{48,63,1795},{48,63,1795},{46,63,964},{45,63,1560},{42,63,296},{42,63,296},{40,58,61},{32,63,891},{31,59,11},{53,63,265},{53,63,265},{53,63,265},{50,63,10},{63,39,648},{46,63,89},{46,63,89},{33,59,1},{47,59,648},{33,59,1},{63,57,113},{58,63,50},{57,63,1}, +{52,63,0},{63,57,113},{63,60,113},{52,63,0},{0,61,113},{63,60,113},{0,61,113},{45,0,884},{45,0,884},{45,0,884},{45,0,884},{41,63,97},{41,63,97},{41,63,97},{40,56,1},{30,61,2},{30,61,2},{51,63,3032},{51,63,2264},{50,63,1965},{48,63,1096},{51,63,3096},{46,63,1059},{46,63,530},{41,62,126},{40,63,2191},{33,62,25},{57,63,680},{56,63,465},{55,63,356},{53,63,68},{63,51,1032}, +{52,63,450},{49,63,185},{34,62,20},{63,57,1032},{34,62,20},{50,63,1965},{50,63,1965},{50,63,1965},{48,63,1096},{48,63,1736},{46,63,530},{46,63,530},{42,60,62},{37,63,1086},{33,62,9},{55,63,356},{55,63,356},{55,63,356},{53,63,68},{61,49,648},{49,63,185},{49,63,185},{36,61,4},{63,54,648},{36,61,4},{63,61,18},{61,63,9},{61,63,0},{59,63,0},{63,61,18},{63,62,18},{59,63,0}, +{0,62,20},{63,62,18},{0,62,20},{47,0,890},{47,0,890},{47,0,890},{47,0,890},{44,63,149},{44,63,149},{44,63,149},{42,58,1},{32,63,0},{32,63,0},{54,63,2756},{52,63,2249},{51,63,2004},{50,63,1224},{53,63,2803},{48,63,1092},{48,63,692},{44,62,58},{43,63,1991},{35,63,16},{59,63,566},{57,63,420},{57,63,356},{55,63,125},{63,55,771},{55,63,386},{52,63,241},{38,63,0},{63,59,771}, +{38,63,0},{51,63,2004},{51,63,2004},{51,63,2004},{50,63,1224},{50,63,1802},{48,63,692},{48,63,692},{44,62,42},{40,63,1166},{35,63,16},{57,63,356},{57,63,356},{57,63,356},{55,63,125},{63,50,580},{52,63,241},{52,63,241},{38,63,0},{62,57,580},{38,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{49,0,884}, +{49,0,884},{49,0,884},{49,0,884},{46,63,193},{46,63,193},{46,63,193},{44,60,1},{35,63,16},{35,63,16},{54,63,2276},{54,63,1844},{54,63,1700},{52,63,1125},{54,63,2180},{51,63,948},{49,63,649},{46,63,10},{46,63,1551},{39,63,58},{60,63,324},{59,63,257},{59,63,221},{57,63,68},{63,57,452},{57,63,228},{55,63,137},{44,63,0},{63,60,452},{44,63,0},{54,63,1700},{54,63,1700},{54,63,1700}, +{52,63,1125},{51,63,1460},{49,63,649},{49,63,649},{46,63,10},{43,63,926},{39,63,58},{59,63,221},{59,63,221},{59,63,221},{57,63,68},{63,53,340},{55,63,137},{55,63,137},{44,63,0},{63,58,340},{44,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{51,0,884},{51,0,884},{51,0,884},{51,0,884},{48,63,260}, +{48,63,260},{48,63,260},{46,62,1},{39,63,58},{39,63,58},{57,63,1844},{55,63,1585},{55,63,1464},{54,63,1044},{56,63,1747},{52,63,850},{51,63,596},{48,63,4},{48,63,1204},{43,63,117},{60,63,164},{60,63,116},{60,63,100},{59,63,40},{63,59,216},{58,63,114},{58,63,65},{50,63,0},{63,61,216},{50,63,0},{55,63,1464},{55,63,1464},{55,63,1464},{54,63,1044},{54,63,1188},{51,63,596},{51,63,596}, +{48,63,4},{46,63,750},{43,63,117},{60,63,100},{60,63,100},{60,63,100},{59,63,40},{63,56,164},{58,63,65},{58,63,65},{50,63,0},{62,60,164},{50,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{53,0,884},{53,0,884},{53,0,884},{53,0,884},{51,63,340},{51,63,340},{51,63,340},{48,63,4},{43,63,117}, +{43,63,117},{0,39,1568},{0,28,194},{0,19,10},{0,16,586},{0,26,3371},{0,17,2169},{0,16,1027},{0,10,2532},{0,12,3648},{0,10,2701},{0,39,1568},{0,28,194},{0,19,10},{0,16,586},{5,16,3371},{0,17,2169},{0,16,1027},{0,10,2532},{26,0,3371},{0,10,2532},{0,18,0},{0,18,0},{0,18,0},{0,9,0},{0,9,288},{0,8,109},{0,8,109},{0,4,164},{0,4,321},{0,4,189},{0,18,0}, +{0,18,0},{0,18,0},{0,9,0},{4,1,288},{0,8,109},{0,8,109},{0,4,164},{9,0,288},{0,4,164},{19,1,1568},{0,28,194},{0,19,10},{0,16,586},{19,1,1568},{39,0,1568},{0,16,586},{0,13,1568},{39,0,1568},{0,13,1568},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,45,1568},{0,31,106},{0,22,10}, +{0,18,481},{0,30,3968},{0,19,2355},{0,18,1057},{0,13,2852},{0,14,4319},{0,11,3117},{0,45,1568},{0,31,106},{0,22,10},{0,18,481},{15,0,3968},{0,19,2355},{0,18,1057},{0,13,2852},{30,0,3968},{0,13,2852},{0,24,0},{0,24,0},{0,24,0},{0,12,0},{0,12,512},{0,10,205},{0,10,205},{0,5,313},{0,5,566},{0,5,349},{0,24,0},{0,24,0},{0,24,0},{0,12,0},{6,0,512}, +{0,10,205},{0,10,205},{0,5,313},{12,0,512},{0,5,313},{22,1,1568},{0,31,106},{1,22,1},{0,18,481},{22,1,1568},{45,0,1568},{0,18,481},{0,15,1568},{45,0,1568},{0,15,1568},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,50,1570},{0,34,52},{1,24,58},{0,21,377},{0,34,4652},{0,22,2571},{0,19,1107}, +{0,14,3225},{0,16,5108},{0,13,3525},{0,50,1570},{0,34,52},{1,24,42},{0,21,377},{15,4,4651},{0,22,2571},{0,19,1107},{0,14,3225},{30,2,4651},{0,14,3225},{0,30,0},{0,30,0},{0,30,0},{0,15,0},{0,15,800},{0,11,317},{0,11,317},{0,7,468},{0,7,889},{0,7,549},{0,30,0},{0,30,0},{0,30,0},{0,15,0},{7,1,800},{0,11,317},{0,11,317},{0,7,468},{15,0,800}, +{0,7,468},{25,1,1568},{0,34,52},{3,24,1},{0,21,377},{25,1,1568},{47,2,1568},{0,21,377},{0,17,1570},{47,2,1568},{0,17,1570},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,56,1570},{0,38,13},{1,27,138},{0,24,305},{0,38,5419},{0,25,2819},{0,22,1187},{0,16,3659},{0,17,6013},{0,14,4061},{0,56,1570}, +{0,38,13},{1,27,122},{0,24,305},{19,0,5419},{0,25,2819},{0,22,1187},{0,16,3659},{38,0,5419},{0,16,3659},{0,36,0},{0,36,0},{0,36,0},{0,18,0},{0,18,1152},{0,14,445},{0,14,445},{0,8,697},{0,8,1270},{0,8,797},{0,36,0},{0,36,0},{0,36,0},{0,18,0},{9,0,1152},{0,14,445},{0,14,445},{0,8,697},{18,0,1152},{0,8,697},{28,1,1568},{0,38,13},{5,26,1}, +{0,24,305},{28,1,1568},{47,5,1568},{0,24,305},{0,19,1570},{47,5,1568},{0,19,1570},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{1,61,1609},{1,41,43},{2,29,250},{1,26,301},{0,45,5419},{0,28,2552},{0,25,820},{0,18,3377},{0,21,6243},{0,18,3953},{2,59,1569},{2,41,10},{3,29,125},{1,26,285},{22,1,5419}, +{0,28,2552},{0,25,820},{0,18,3377},{45,0,5419},{0,18,3377},{1,40,42},{1,40,42},{1,40,42},{1,21,42},{0,25,1152},{0,19,292},{0,19,292},{0,11,569},{0,11,1380},{0,10,747},{2,38,2},{2,38,2},{2,38,2},{2,20,2},{12,1,1152},{0,19,292},{0,19,292},{0,11,569},{25,0,1152},{0,11,569},{23,17,1568},{0,42,1},{7,28,1},{0,26,233},{23,17,1568},{63,0,1568},{0,26,233}, +{0,21,1568},{63,0,1568},{0,21,1568},{1,0,41},{1,0,41},{1,0,41},{1,0,41},{0,7,0},{0,7,0},{0,7,0},{0,3,1},{0,3,17},{0,3,17},{2,63,1731},{2,44,146},{3,31,441},{1,28,370},{0,50,5420},{0,33,2329},{0,27,554},{0,21,3217},{0,23,6476},{0,19,3861},{4,61,1569},{4,43,10},{5,31,125},{3,28,285},{25,1,5419},{0,33,2329},{0,27,554},{0,21,3217},{47,2,5419}, +{0,21,3217},{2,44,146},{2,44,146},{2,44,146},{2,23,146},{0,31,1152},{0,22,180},{0,22,180},{0,13,458},{0,14,1508},{0,13,747},{4,40,2},{4,40,2},{4,40,2},{4,22,2},{15,1,1152},{0,22,180},{0,22,180},{0,13,458},{31,0,1152},{0,13,458},{34,1,1568},{2,44,1},{9,30,1},{0,28,164},{34,1,1568},{63,3,1568},{0,28,164},{0,23,1568},{63,3,1568},{0,23,1568},{2,0,145}, +{2,0,145},{2,0,145},{2,0,145},{0,13,0},{0,13,0},{0,13,0},{0,6,1},{0,5,52},{0,5,52},{4,63,1977},{3,47,318},{5,33,675},{3,30,498},{0,56,5420},{0,36,2129},{0,30,338},{0,22,3012},{0,25,6733},{0,21,3825},{6,63,1569},{6,45,10},{8,33,123},{5,30,285},{28,1,5419},{0,36,2129},{0,30,338},{0,22,3012},{47,5,5419},{0,22,3012},{3,48,313},{3,48,313},{3,48,313}, +{3,26,314},{0,36,1154},{0,25,100},{0,25,100},{0,16,388},{0,16,1665},{0,14,757},{6,42,2},{6,42,2},{6,42,2},{6,24,2},{15,7,1152},{0,25,100},{0,25,100},{0,16,388},{31,3,1152},{0,16,388},{37,1,1568},{4,46,1},{11,32,2},{0,30,113},{37,1,1568},{63,6,1568},{0,30,113},{0,25,1568},{63,6,1568},{0,25,1568},{3,0,313},{3,0,313},{3,0,313},{3,0,313},{0,19,0}, +{0,19,0},{0,19,0},{0,9,1},{0,8,116},{0,8,116},{6,63,2353},{4,49,562},{6,35,1006},{3,32,715},{0,62,5420},{0,39,1961},{0,32,174},{0,24,2817},{0,28,7013},{0,24,3841},{9,63,1577},{8,47,10},{10,35,123},{7,31,290},{31,1,5419},{0,39,1961},{0,32,174},{0,24,2817},{47,8,5419},{0,24,2817},{4,52,545},{4,52,545},{4,52,545},{4,28,546},{0,42,1154},{0,29,45},{0,29,45}, +{0,18,289},{0,19,1849},{0,16,797},{8,44,2},{8,44,2},{8,44,2},{8,26,2},{15,13,1152},{0,29,45},{0,29,45},{0,18,289},{31,6,1152},{0,18,289},{40,1,1568},{6,48,1},{13,34,2},{0,33,73},{40,1,1568},{63,9,1568},{0,33,73},{0,27,1568},{63,9,1568},{0,27,1568},{4,0,545},{4,0,545},{4,0,545},{4,0,545},{0,25,0},{0,25,0},{0,25,0},{0,12,1},{0,11,212}, +{0,11,212},{8,63,2980},{6,52,920},{7,38,1444},{5,34,1012},{2,63,5504},{0,42,1814},{0,35,57},{0,27,2630},{0,31,7380},{0,25,3860},{11,63,1604},{10,49,13},{12,37,116},{10,33,292},{34,1,5419},{0,42,1814},{0,35,57},{0,27,2630},{63,3,5419},{0,27,2630},{5,57,884},{5,57,884},{5,57,884},{5,31,884},{0,49,1152},{0,33,5},{0,33,5},{0,21,221},{0,22,2091},{0,19,875},{11,45,4}, +{11,45,4},{11,45,4},{11,28,4},{24,1,1152},{0,33,5},{0,33,5},{0,21,221},{49,0,1152},{0,21,221},{44,0,1568},{8,50,1},{15,36,4},{0,35,32},{44,0,1568},{62,13,1568},{0,35,32},{0,29,1570},{62,13,1568},{0,29,1570},{5,0,884},{5,0,884},{5,0,884},{5,0,884},{0,31,1},{0,31,1},{0,31,1},{0,16,1},{0,14,349},{0,14,349},{9,63,3638},{7,55,1309},{8,39,1940}, +{6,36,1365},{3,63,5739},{0,45,1718},{0,38,17},{0,29,2514},{0,34,7760},{0,28,3900},{14,63,1636},{12,51,13},{14,39,116},{12,35,292},{37,1,5419},{0,45,1718},{0,38,17},{0,29,2514},{63,6,5419},{0,29,2514},{6,61,1252},{6,61,1252},{6,61,1252},{6,33,1253},{0,55,1152},{0,37,4},{0,37,4},{0,22,162},{0,25,2339},{0,21,989},{13,47,4},{13,47,4},{13,47,4},{13,30,4},{27,1,1152}, +{1,36,1},{1,36,1},{0,22,162},{55,0,1152},{0,22,162},{47,0,1568},{10,52,1},{17,38,1},{0,38,16},{47,0,1568},{46,24,1568},{0,38,16},{0,31,1570},{46,24,1568},{0,31,1570},{6,0,1252},{6,0,1252},{6,0,1252},{6,0,1252},{0,37,0},{0,37,0},{0,37,0},{0,19,1},{0,16,490},{0,16,490},{11,63,4328},{8,57,1644},{10,41,2360},{7,39,1656},{6,63,6079},{0,49,1644},{1,40,17}, +{0,31,2359},{0,36,7943},{0,30,3834},{16,63,1689},{14,53,13},{16,41,129},{14,37,292},{40,1,5419},{0,49,1640},{1,40,13},{0,31,2355},{63,9,5419},{0,31,2355},{8,63,1576},{8,63,1576},{8,63,1576},{7,36,1576},{1,59,1156},{1,39,13},{1,39,13},{0,25,110},{0,28,2475},{0,24,1017},{15,49,4},{15,49,4},{15,49,4},{15,32,4},{30,1,1152},{3,38,1},{3,38,1},{0,25,106},{61,0,1152}, +{0,25,106},{49,1,1568},{12,54,1},{19,40,1},{0,40,4},{49,1,1568},{46,27,1568},{0,40,4},{0,33,1568},{46,27,1568},{0,33,1568},{7,0,1576},{7,0,1576},{7,0,1576},{7,0,1576},{1,41,4},{1,41,4},{1,41,4},{1,21,4},{0,19,578},{0,19,578},{14,63,4584},{10,59,1644},{12,43,2360},{9,41,1656},{8,63,6244},{2,51,1644},{3,42,17},{1,33,2308},{0,42,7575},{0,32,3345},{19,63,1761}, +{16,55,10},{17,43,125},{16,40,290},{43,1,5419},{0,52,1592},{3,42,13},{0,33,2225},{63,12,5419},{0,33,2225},{10,63,1585},{10,63,1585},{10,63,1585},{9,38,1576},{3,61,1156},{3,41,13},{3,41,13},{2,27,110},{0,31,2211},{0,25,699},{16,52,2},{16,52,2},{16,52,2},{16,34,2},{33,1,1152},{5,40,1},{5,40,1},{0,27,61},{63,2,1152},{0,27,61},{52,1,1568},{14,56,1},{21,42,1}, +{1,42,0},{52,1,1568},{46,30,1568},{1,42,0},{0,35,1568},{46,30,1568},{0,35,1568},{9,0,1576},{9,0,1576},{9,0,1576},{9,0,1576},{3,43,4},{3,43,4},{3,43,4},{3,23,4},{0,22,410},{0,22,410},{15,63,4826},{12,62,1650},{14,46,2355},{11,43,1660},{11,63,6452},{4,54,1641},{5,44,15},{3,35,2308},{0,45,7165},{0,35,2875},{22,63,1862},{18,57,9},{20,45,126},{18,42,281},{47,0,5419}, +{0,56,1568},{6,44,10},{0,35,2091},{46,24,5419},{0,35,2091},{12,63,1606},{12,63,1606},{12,63,1606},{12,40,1571},{5,63,1155},{5,44,14},{5,44,14},{4,29,109},{0,33,1977},{0,28,424},{19,53,4},{19,53,4},{19,53,4},{19,36,4},{31,12,1152},{7,42,1},{7,42,1},{0,30,29},{62,6,1152},{0,30,29},{56,0,1568},{17,58,1},{24,44,2},{3,44,2},{56,0,1568},{62,25,1568},{3,44,2}, +{0,37,1570},{62,25,1568},{0,37,1570},{12,0,1570},{12,0,1570},{12,0,1570},{12,0,1570},{5,46,1},{5,46,1},{5,46,1},{5,25,2},{0,25,260},{0,25,260},{17,63,5096},{14,63,1652},{15,48,2358},{13,45,1660},{14,63,6668},{6,56,1641},{7,46,15},{5,37,2308},{0,47,6891},{0,36,2543},{23,63,1956},{20,59,9},{22,47,126},{20,44,281},{49,1,5419},{2,58,1568},{8,46,10},{0,38,1979},{46,27,5419}, +{0,38,1979},{14,63,1651},{14,63,1651},{14,63,1651},{14,42,1571},{8,63,1179},{7,46,14},{7,46,14},{6,31,109},{0,38,1778},{0,31,232},{21,55,4},{21,55,4},{21,55,4},{21,38,4},{31,18,1152},{9,44,1},{9,44,1},{0,32,10},{62,9,1152},{0,32,10},{59,0,1568},{19,60,1},{26,46,2},{5,46,2},{59,0,1568},{62,28,1568},{5,46,2},{0,39,1570},{62,28,1568},{0,39,1570},{14,0,1570}, +{14,0,1570},{14,0,1570},{14,0,1570},{7,48,1},{7,48,1},{7,48,1},{7,27,2},{0,28,164},{0,28,164},{20,63,5352},{17,63,1708},{18,49,2360},{15,47,1660},{15,63,6877},{8,58,1641},{9,48,14},{7,39,2308},{0,50,6576},{0,39,2215},{26,63,2052},{22,61,9},{24,49,116},{22,46,281},{52,1,5419},{4,60,1568},{9,48,13},{0,40,1907},{46,30,5419},{0,40,1907},{16,63,1697},{16,63,1697},{16,63,1697}, +{16,44,1577},{10,63,1209},{9,48,14},{9,48,14},{8,33,115},{0,42,1601},{0,33,110},{23,57,4},{23,57,4},{23,57,4},{23,40,4},{34,17,1152},{11,46,1},{11,46,1},{0,34,2},{62,12,1152},{0,34,2},{62,0,1568},{21,62,1},{28,48,1},{7,48,0},{62,0,1568},{62,31,1568},{7,48,0},{0,41,1570},{62,31,1568},{0,41,1570},{16,0,1576},{16,0,1576},{16,0,1576},{16,0,1576},{9,50,1}, +{9,50,1},{9,50,1},{9,29,2},{0,33,85},{0,33,85},{23,63,5672},{19,63,1825},{20,51,2360},{17,49,1656},{19,63,7135},{10,60,1641},{11,50,14},{9,41,2308},{0,53,6336},{0,42,1983},{29,63,2180},{24,63,9},{26,51,116},{24,48,293},{55,1,5419},{6,62,1568},{11,50,13},{0,42,1814},{47,32,5419},{0,42,1814},{19,63,1761},{19,63,1761},{19,63,1761},{17,46,1576},{12,63,1249},{11,50,14},{11,50,14}, +{10,35,115},{0,45,1449},{0,36,30},{25,59,4},{25,59,4},{25,59,4},{25,42,4},{37,17,1152},{13,48,0},{13,48,0},{1,36,2},{62,15,1152},{1,36,2},{63,4,1568},{23,63,4},{30,50,1},{9,50,0},{63,4,1568},{62,34,1568},{9,50,0},{0,43,1570},{62,34,1568},{0,43,1570},{17,0,1576},{17,0,1576},{17,0,1576},{17,0,1576},{11,52,1},{11,52,1},{11,52,1},{11,31,2},{0,36,29}, +{0,36,29},{25,63,6066},{21,63,2039},{22,54,2355},{19,51,1660},{20,63,7420},{12,62,1635},{13,52,14},{11,43,2316},{0,57,6109},{0,44,1789},{31,63,2369},{27,63,38},{28,53,115},{26,50,286},{59,0,5419},{9,63,1577},{14,52,11},{0,44,1740},{62,28,5419},{0,44,1740},{21,63,1843},{21,63,1843},{21,63,1843},{20,48,1571},{15,63,1314},{13,52,13},{13,52,13},{12,37,116},{0,48,1329},{0,39,13},{27,62,1}, +{27,62,1},{27,62,1},{27,44,2},{49,0,1152},{15,50,2},{15,50,2},{4,38,4},{62,18,1152},{4,38,4},{63,11,1568},{27,63,37},{32,52,2},{12,52,1},{63,11,1568},{47,45,1568},{12,52,1},{0,45,1576},{47,45,1568},{0,45,1576},{20,0,1570},{20,0,1570},{20,0,1570},{20,0,1570},{13,54,1},{13,54,1},{13,54,1},{13,34,1},{0,40,4},{0,40,4},{28,63,6434},{23,63,2268},{24,56,2355}, +{21,53,1660},{23,63,7668},{14,63,1652},{15,54,14},{13,45,2316},{0,61,5924},{0,47,1685},{34,63,2502},{29,63,123},{30,55,115},{28,52,286},{62,0,5419},{13,63,1627},{16,54,10},{0,47,1676},{62,31,5419},{0,47,1676},{23,63,1907},{23,63,1907},{23,63,1907},{22,50,1571},{17,63,1395},{15,54,13},{15,54,13},{14,39,116},{0,51,1241},{2,41,13},{29,63,2},{29,63,2},{29,63,2},{29,46,2},{52,0,1152}, +{17,52,1},{17,52,1},{6,40,4},{62,21,1152},{6,40,4},{55,32,1568},{31,63,97},{34,54,2},{14,54,1},{55,32,1568},{46,48,1568},{14,54,1},{0,47,1576},{46,48,1568},{0,47,1576},{22,0,1570},{22,0,1570},{22,0,1570},{22,0,1570},{15,56,1},{15,56,1},{15,56,1},{15,36,1},{1,43,1},{1,43,1},{29,63,6756},{26,63,2548},{26,58,2355},{23,55,1660},{26,63,7948},{17,63,1716},{17,56,15}, +{15,47,2316},{0,63,5773},{0,49,1638},{37,63,2694},{32,63,262},{32,57,126},{30,54,286},{63,4,5419},{17,63,1715},{18,56,10},{0,49,1634},{62,34,5419},{0,49,1634},{25,63,2018},{25,63,2018},{25,63,2018},{24,52,1571},{20,63,1483},{17,56,14},{17,56,14},{16,41,109},{0,54,1185},{4,43,13},{31,63,17},{31,63,17},{31,63,17},{31,48,2},{55,0,1152},{19,54,1},{19,54,1},{8,42,4},{62,24,1152}, +{8,42,4},{58,32,1568},{34,63,169},{36,56,2},{15,56,2},{58,32,1568},{52,48,1568},{15,56,2},{0,49,1570},{52,48,1568},{0,49,1570},{24,0,1570},{24,0,1570},{24,0,1570},{24,0,1570},{17,58,1},{17,58,1},{17,58,1},{17,37,2},{3,45,1},{3,45,1},{31,63,7218},{29,63,2924},{28,60,2355},{25,57,1660},{29,63,8260},{20,63,1884},{19,58,15},{17,49,2308},{3,63,5933},{2,51,1638},{39,63,2892}, +{34,63,445},{34,59,126},{32,56,281},{63,10,5419},{21,63,1849},{20,58,10},{0,51,1606},{62,37,5419},{0,51,1606},{28,63,2130},{28,63,2130},{28,63,2130},{26,54,1571},{23,63,1603},{19,58,14},{19,58,14},{18,43,109},{0,58,1156},{6,45,13},{33,63,40},{33,63,40},{33,63,40},{33,50,4},{58,0,1152},{21,56,1},{21,56,1},{10,44,4},{62,27,1152},{10,44,4},{61,32,1568},{38,63,274},{38,58,2}, +{17,58,2},{61,32,1568},{58,48,1568},{17,58,2},{0,51,1570},{58,48,1568},{0,51,1570},{26,0,1570},{26,0,1570},{26,0,1570},{26,0,1570},{19,60,1},{19,60,1},{19,60,1},{19,39,2},{5,47,1},{5,47,1},{34,63,7586},{31,63,3453},{30,62,2357},{28,59,1668},{31,63,8699},{23,63,2180},{21,60,21},{19,51,2316},{7,63,6224},{4,53,1634},{42,63,3131},{37,63,722},{36,62,125},{34,58,278},{55,32,5419}, +{26,63,2052},{22,60,9},{0,53,1580},{46,48,5419},{0,53,1580},{30,63,2272},{30,63,2272},{30,63,2272},{28,56,1568},{25,63,1746},{21,60,17},{21,60,17},{21,45,113},{1,61,1154},{9,47,14},{36,63,74},{36,63,74},{36,63,74},{35,52,2},{53,16,1152},{23,59,1},{23,59,1},{11,47,1},{63,30,1152},{11,47,1},{63,35,1568},{43,63,433},{40,60,4},{20,60,4},{63,35,1568},{63,49,1568},{20,60,4}, +{0,53,1576},{63,49,1568},{0,53,1576},{28,0,1568},{28,0,1568},{28,0,1568},{28,0,1568},{21,63,0},{21,63,0},{21,63,0},{21,42,0},{7,49,1},{7,49,1},{37,63,8018},{34,63,3915},{32,63,2410},{30,61,1668},{34,63,8985},{26,63,2548},{23,62,21},{21,53,2316},{12,63,6555},{6,55,1634},{43,63,3345},{40,63,1026},{38,63,129},{36,60,278},{58,32,5419},{30,63,2274},{24,62,9},{0,55,1577},{52,48,5419}, +{0,55,1577},{32,63,2406},{32,63,2406},{32,63,2406},{30,58,1568},{28,63,1890},{23,62,17},{23,62,17},{23,47,113},{3,63,1154},{11,49,12},{38,63,125},{38,63,125},{38,63,125},{37,54,2},{56,16,1152},{25,61,1},{25,61,1},{14,48,5},{62,33,1152},{14,48,5},{63,41,1568},{46,63,585},{42,62,4},{22,62,4},{63,41,1568},{63,52,1568},{22,62,4},{0,55,1576},{63,52,1568},{0,55,1576},{30,0,1568}, +{30,0,1568},{30,0,1568},{30,0,1568},{23,63,4},{23,63,4},{23,63,4},{23,44,0},{9,51,1},{9,51,1},{39,63,7700},{35,63,4026},{34,63,2514},{32,62,1611},{37,63,8485},{29,63,2424},{26,63,20},{23,55,1896},{15,63,6115},{9,56,1308},{46,63,2973},{43,63,1034},{40,63,169},{37,61,194},{63,27,4803},{32,63,2024},{27,63,1},{2,57,1253},{63,45,4803},{2,57,1253},{34,63,2514},{34,63,2514},{34,63,2514}, +{32,60,1570},{29,63,2056},{26,63,20},{26,63,20},{24,49,116},{7,63,1164},{13,51,12},{40,63,169},{40,63,169},{40,63,169},{39,56,2},{59,16,1152},{27,63,1},{27,63,1},{16,50,4},{62,36,1152},{16,50,4},{63,45,1250},{48,63,500},{45,63,1},{26,63,0},{63,45,1250},{47,62,1250},{26,63,0},{0,57,1252},{47,62,1250},{0,57,1252},{32,0,1570},{32,0,1570},{32,0,1570},{32,0,1570},{26,63,20}, +{26,63,20},{26,63,20},{25,46,0},{11,53,1},{11,53,1},{40,63,7062},{37,63,3915},{37,63,2690},{34,63,1579},{37,63,7765},{30,63,2178},{28,63,77},{26,56,1437},{18,63,5499},{11,57,918},{48,63,2504},{44,63,945},{43,63,225},{40,62,89},{63,31,4056},{35,63,1656},{30,63,9},{6,58,885},{63,47,4056},{6,58,885},{37,63,2690},{37,63,2690},{37,63,2690},{34,62,1570},{33,63,2227},{28,63,77},{28,63,77}, +{26,51,116},{11,63,1227},{15,53,12},{43,63,225},{43,63,225},{43,63,225},{41,58,2},{62,16,1152},{30,63,9},{30,63,9},{18,52,4},{62,39,1152},{18,52,4},{63,47,884},{51,63,356},{47,63,4},{32,63,0},{63,47,884},{63,55,884},{32,63,0},{0,58,884},{63,55,884},{0,58,884},{34,0,1570},{34,0,1570},{34,0,1570},{34,0,1570},{28,63,41},{28,63,41},{28,63,41},{27,48,1},{13,55,1}, +{13,55,1},{43,63,6493},{40,63,3882},{39,63,2880},{36,63,1584},{40,63,6982},{34,63,1966},{31,63,206},{28,57,971},{23,63,4927},{14,59,562},{50,63,2070},{46,63,842},{46,63,313},{43,62,25},{63,35,3318},{38,63,1326},{34,63,45},{11,59,545},{63,49,3318},{11,59,545},{39,63,2880},{39,63,2880},{39,63,2880},{36,63,1584},{34,63,2434},{31,63,206},{31,63,206},{29,53,114},{15,63,1329},{17,55,14},{46,63,313}, +{46,63,313},{46,63,313},{43,61,0},{63,21,1152},{34,63,45},{34,63,45},{19,55,1},{63,42,1152},{19,55,1},{63,51,545},{54,63,225},{51,63,1},{39,63,0},{63,51,545},{63,57,545},{39,63,0},{0,59,545},{63,57,545},{0,59,545},{36,0,1568},{36,0,1568},{36,0,1568},{36,0,1568},{31,63,85},{31,63,85},{31,63,85},{29,50,1},{15,57,1},{15,57,1},{45,63,6113},{43,63,3938},{40,63,3065}, +{38,63,1649},{43,63,6422},{35,63,1878},{34,63,365},{30,59,651},{26,63,4495},{16,60,318},{51,63,1698},{48,63,794},{48,63,394},{45,63,0},{63,39,2753},{41,63,1094},{38,63,106},{15,60,313},{47,59,2753},{15,60,313},{40,63,3065},{40,63,3065},{40,63,3065},{38,63,1649},{37,63,2610},{34,63,365},{34,63,365},{31,55,114},{20,63,1483},{19,57,14},{48,63,394},{48,63,394},{48,63,394},{45,63,0},{63,27,1152}, +{38,63,106},{38,63,106},{21,57,1},{63,45,1152},{21,57,1},{63,54,313},{55,63,130},{54,63,1},{45,63,0},{63,54,313},{62,59,313},{45,63,0},{0,60,313},{62,59,313},{0,60,313},{38,0,1568},{38,0,1568},{38,0,1568},{38,0,1568},{33,63,128},{33,63,128},{33,63,128},{31,52,1},{17,59,1},{17,59,1},{46,63,5677},{43,63,3954},{43,63,3225},{40,63,1764},{45,63,5985},{37,63,1830},{37,63,605}, +{32,60,410},{29,63,4159},{19,61,146},{53,63,1454},{51,63,770},{50,63,493},{47,63,25},{63,43,2273},{44,63,926},{41,63,194},{19,61,145},{63,53,2273},{19,61,145},{43,63,3225},{43,63,3225},{43,63,3225},{40,63,1764},{40,63,2818},{37,63,605},{37,63,605},{33,57,113},{23,63,1659},{21,59,14},{50,63,493},{50,63,493},{50,63,493},{47,63,25},{63,33,1152},{41,63,194},{41,63,194},{23,59,1},{47,56,1152}, +{23,59,1},{63,57,145},{58,63,58},{57,63,1},{50,63,1},{63,57,145},{63,60,145},{50,63,1},{0,61,145},{63,60,145},{0,61,145},{40,0,1568},{40,0,1568},{40,0,1568},{40,0,1568},{34,63,185},{34,63,185},{34,63,185},{33,54,0},{19,61,1},{19,61,1},{48,63,5379},{46,63,3978},{46,63,3449},{43,63,1924},{46,63,5542},{40,63,1870},{38,63,890},{34,61,225},{32,63,3919},{22,62,43},{54,63,1242}, +{53,63,809},{51,63,610},{49,63,101},{63,47,1878},{48,63,810},{44,63,314},{23,62,41},{63,55,1878},{23,62,41},{46,63,3449},{46,63,3449},{46,63,3449},{43,63,1924},{43,63,3058},{38,63,890},{38,63,890},{35,59,113},{27,63,1889},{23,61,14},{51,63,610},{51,63,610},{51,63,610},{49,63,101},{63,39,1152},{44,63,314},{44,63,314},{25,61,1},{47,59,1152},{25,61,1},{63,60,41},{60,63,17},{60,63,1}, +{56,63,1},{63,60,41},{62,62,41},{56,63,1},{0,62,41},{62,62,41},{0,62,41},{42,0,1568},{42,0,1568},{42,0,1568},{42,0,1568},{37,63,233},{37,63,233},{37,63,233},{35,56,0},{21,63,1},{21,63,1},{50,63,5170},{48,63,4080},{48,63,3680},{46,63,2195},{48,63,5200},{43,63,2023},{41,63,1269},{37,62,133},{35,63,3772},{25,63,17},{57,63,1088},{55,63,861},{54,63,720},{52,63,241},{63,51,1536}, +{51,63,768},{49,63,461},{28,63,0},{63,57,1536},{28,63,0},{48,63,3680},{48,63,3680},{48,63,3680},{46,63,2195},{46,63,3345},{41,63,1269},{41,63,1269},{37,61,114},{32,63,2200},{25,63,17},{54,63,720},{54,63,720},{54,63,720},{52,63,241},{61,49,1152},{49,63,461},{49,63,461},{28,63,0},{63,54,1152},{28,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0}, +{0,63,0},{63,63,0},{0,63,0},{44,0,1570},{44,0,1570},{44,0,1570},{44,0,1570},{40,63,317},{40,63,317},{40,63,317},{37,58,2},{25,63,17},{25,63,17},{51,63,4416},{50,63,3629},{48,63,3296},{47,63,2070},{50,63,4411},{46,63,1823},{43,63,1150},{39,63,50},{38,63,3132},{29,63,52},{57,63,768},{57,63,576},{56,63,505},{54,63,160},{63,53,1068},{54,63,544},{51,63,320},{33,63,1},{63,58,1068}, +{33,63,1},{48,63,3296},{48,63,3296},{48,63,3296},{47,63,2070},{46,63,2881},{43,63,1150},{43,63,1150},{39,62,42},{34,63,1846},{29,63,52},{56,63,505},{56,63,505},{56,63,505},{54,63,160},{63,48,800},{51,63,320},{51,63,320},{33,63,1},{62,56,800},{33,63,1},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{46,0,1570}, +{46,0,1570},{46,0,1570},{46,0,1570},{42,63,410},{42,63,410},{42,63,410},{39,60,2},{29,63,52},{29,63,52},{53,63,3826},{51,63,3136},{51,63,2880},{49,63,1961},{51,63,3648},{46,63,1615},{46,63,1086},{41,63,5},{41,63,2588},{32,63,116},{59,63,498},{57,63,384},{57,63,320},{56,63,116},{63,55,683},{55,63,342},{54,63,208},{39,63,1},{63,59,683},{39,63,1},{51,63,2880},{51,63,2880},{51,63,2880}, +{49,63,1961},{48,63,2448},{46,63,1086},{46,63,1086},{41,63,5},{37,63,1558},{32,63,116},{57,63,320},{57,63,320},{57,63,320},{56,63,116},{63,51,512},{54,63,208},{54,63,208},{39,63,1},{63,57,512},{39,63,1},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{48,0,1568},{48,0,1568},{48,0,1568},{48,0,1568},{45,63,514}, +{45,63,514},{45,63,514},{41,62,2},{32,63,116},{32,63,116},{54,63,3232},{53,63,2781},{53,63,2585},{51,63,1856},{53,63,3067},{48,63,1456},{48,63,1056},{43,63,10},{44,63,2140},{35,63,212},{60,63,272},{59,63,221},{59,63,185},{57,63,64},{63,57,384},{57,63,192},{55,63,125},{45,63,1},{63,60,384},{45,63,1},{53,63,2585},{53,63,2585},{53,63,2585},{51,63,1856},{51,63,2112},{48,63,1056},{48,63,1056}, +{43,63,10},{40,63,1334},{35,63,212},{59,63,185},{59,63,185},{59,63,185},{57,63,64},{63,54,288},{55,63,125},{55,63,125},{45,63,1},{62,59,288},{45,63,1},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{50,0,1568},{50,0,1568},{50,0,1568},{50,0,1568},{46,63,605},{46,63,605},{46,63,605},{43,63,10},{35,63,212}, +{35,63,212},{0,51,2665},{0,36,306},{0,26,5},{0,22,965},{0,34,5885},{0,22,3726},{0,21,1754},{0,14,4398},{0,16,6359},{0,13,4722},{0,51,2665},{0,36,306},{0,26,5},{0,22,965},{17,0,5885},{0,22,3726},{0,21,1754},{0,14,4398},{34,0,5885},{0,14,4398},{0,25,0},{0,25,0},{0,25,0},{0,12,1},{0,12,545},{0,11,212},{0,11,212},{0,6,337},{0,5,605},{0,5,374},{0,25,0}, +{0,25,0},{0,25,0},{0,12,1},{6,0,545},{0,11,212},{0,11,212},{0,6,337},{12,0,545},{0,6,337},{26,0,2665},{0,36,306},{0,26,5},{0,22,965},{26,0,2665},{51,0,2665},{0,22,965},{0,17,2665},{51,0,2665},{0,17,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,57,2665},{0,39,202},{0,29,13}, +{0,24,818},{0,39,6669},{0,25,3974},{0,22,1790},{0,16,4826},{0,18,7285},{0,16,5267},{0,57,2665},{0,39,202},{0,29,13},{0,24,818},{19,1,6669},{0,25,3974},{0,22,1790},{0,16,4826},{39,0,6669},{0,16,4826},{0,31,0},{0,31,0},{0,31,0},{0,15,1},{0,15,841},{0,12,337},{0,12,337},{0,7,493},{0,7,934},{0,7,574},{0,31,0},{0,31,0},{0,31,0},{0,15,1},{8,0,841}, +{0,12,337},{0,12,337},{0,7,493},{15,0,841},{0,7,493},{29,0,2665},{0,39,202},{1,28,2},{0,24,818},{29,0,2665},{57,0,2665},{0,24,818},{0,19,2665},{57,0,2665},{0,19,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,63,2665},{0,44,116},{1,31,66},{0,27,698},{0,42,7541},{0,28,4254},{0,25,1854}, +{0,18,5281},{0,19,8271},{0,16,5795},{0,63,2665},{0,44,116},{1,31,50},{0,27,698},{15,13,7538},{0,28,4254},{0,25,1854},{0,18,5281},{31,6,7538},{0,18,5281},{0,36,1},{0,36,1},{0,36,1},{0,18,1},{0,18,1201},{0,14,468},{0,14,468},{0,8,730},{0,8,1325},{0,8,830},{0,36,1},{0,36,1},{0,36,1},{0,18,1},{9,0,1201},{0,14,468},{0,14,468},{0,8,730},{18,0,1201}, +{0,8,730},{32,0,2665},{0,44,116},{3,30,2},{0,27,698},{32,0,2665},{63,0,2665},{0,27,698},{0,21,2665},{63,0,2665},{0,21,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{1,63,2781},{0,47,52},{1,33,148},{0,29,610},{0,46,8493},{0,31,4566},{0,27,1962},{0,18,5809},{0,22,9367},{0,18,6385},{2,63,2753}, +{0,47,52},{2,33,129},{0,29,610},{23,0,8493},{0,31,4566},{0,27,1962},{0,18,5809},{46,0,8493},{0,18,5809},{0,42,1},{0,42,1},{0,42,1},{0,21,1},{0,21,1625},{0,16,637},{0,16,637},{0,10,965},{0,10,1806},{0,10,1134},{0,42,1},{0,42,1},{0,42,1},{0,21,1},{11,0,1625},{0,16,637},{0,16,637},{0,10,965},{21,0,1625},{0,10,965},{35,0,2665},{0,47,52},{5,32,1}, +{0,29,610},{35,0,2665},{63,3,2665},{0,29,610},{0,23,2665},{63,3,2665},{0,23,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{2,63,3105},{0,51,10},{1,35,297},{0,32,481},{0,51,9670},{0,33,4965},{0,30,2120},{0,21,6413},{0,24,10749},{0,19,7191},{3,63,2989},{0,51,10},{2,36,257},{0,32,481},{25,1,9669}, +{0,33,4965},{0,30,2120},{0,21,6413},{47,2,9669},{0,21,6413},{0,49,0},{0,49,0},{0,49,0},{0,25,1},{0,25,2178},{0,19,850},{0,19,850},{0,11,1325},{0,11,2406},{0,10,1521},{0,49,0},{0,49,0},{0,49,0},{0,25,1},{12,1,2178},{0,19,850},{0,19,850},{0,11,1325},{25,0,2178},{0,11,1325},{38,0,2665},{0,51,10},{8,34,2},{0,32,481},{38,0,2665},{62,7,2665},{0,32,481}, +{0,25,2669},{62,7,2665},{0,25,2669},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{3,63,3437},{1,54,33},{3,37,425},{1,33,457},{0,57,9670},{0,36,4629},{0,31,1654},{0,24,6165},{0,26,11014},{0,22,7031},{6,63,3101},{2,53,10},{4,38,257},{1,33,441},{28,1,9669},{0,36,4629},{0,31,1654},{0,24,6165},{47,5,9669}, +{0,24,6165},{1,53,32},{1,53,32},{1,53,32},{1,27,32},{0,31,2178},{0,22,666},{0,22,666},{0,13,1160},{0,14,2534},{0,13,1449},{2,51,0},{2,51,0},{2,51,0},{2,27,1},{15,1,2178},{0,22,666},{0,22,666},{0,13,1160},{31,0,2178},{0,13,1160},{41,0,2665},{1,54,1},{10,36,2},{0,33,394},{41,0,2665},{62,10,2665},{0,33,394},{0,27,2669},{62,10,2665},{0,27,2669},{1,0,32}, +{1,0,32},{1,0,32},{1,0,32},{0,6,0},{0,6,0},{0,6,0},{0,3,0},{0,2,13},{0,2,13},{6,63,3917},{2,57,129},{3,40,609},{1,36,497},{0,63,9670},{0,39,4325},{0,33,1274},{0,25,5878},{0,28,11299},{0,24,6917},{8,63,3233},{4,55,10},{6,40,257},{3,35,441},{31,1,9669},{0,39,4325},{0,33,1274},{0,25,5878},{47,8,9669},{0,25,5878},{2,57,128},{2,57,128},{2,57,128}, +{2,30,129},{0,36,2180},{0,28,490},{0,28,490},{0,16,1018},{0,16,2691},{0,14,1441},{4,53,0},{4,53,0},{4,53,0},{4,29,1},{15,7,2178},{0,28,490},{0,28,490},{0,16,1018},{31,3,2178},{0,16,1018},{44,0,2665},{3,56,1},{12,38,2},{0,36,306},{44,0,2665},{62,13,2665},{0,36,306},{0,29,2669},{62,13,2665},{0,29,2669},{2,0,128},{2,0,128},{2,0,128},{2,0,128},{0,12,0}, +{0,12,0},{0,12,0},{0,6,0},{0,5,45},{0,5,45},{6,63,4541},{3,59,297},{5,42,865},{3,38,625},{2,63,9749},{0,45,4021},{0,36,914},{0,27,5581},{0,31,11611},{0,27,6877},{11,63,3377},{6,57,10},{8,42,257},{5,37,441},{34,0,9669},{0,45,4021},{0,36,914},{0,27,5581},{47,11,9669},{0,27,5581},{3,61,288},{3,61,288},{3,61,288},{3,32,288},{0,42,2180},{0,31,338},{0,31,338}, +{0,18,865},{0,19,2875},{0,16,1427},{6,55,0},{6,55,0},{6,55,0},{6,31,1},{15,13,2178},{0,31,338},{0,31,338},{0,18,865},{31,6,2178},{0,18,865},{47,0,2665},{5,58,1},{14,40,2},{0,38,225},{47,0,2665},{62,16,2665},{0,38,225},{0,31,2669},{62,16,2665},{0,31,2669},{3,0,288},{3,0,288},{3,0,288},{3,0,288},{0,18,0},{0,18,0},{0,18,0},{0,9,0},{0,8,109}, +{0,8,109},{9,63,5374},{4,62,570},{6,44,1269},{4,40,841},{3,63,10021},{0,47,3745},{0,39,593},{0,30,5294},{0,36,12029},{0,28,6810},{14,63,3558},{8,59,9},{10,44,254},{7,39,446},{37,1,9669},{0,47,3745},{0,39,593},{0,30,5294},{63,6,9669},{0,30,5294},{4,63,561},{4,63,561},{4,63,561},{4,35,546},{0,49,2178},{0,34,218},{0,34,218},{0,21,725},{0,22,3117},{0,19,1433},{8,58,1}, +{8,58,1},{8,58,1},{8,33,2},{24,1,2178},{0,34,218},{0,34,218},{0,21,725},{49,0,2178},{0,21,725},{50,0,2665},{7,60,1},{16,42,5},{0,41,157},{50,0,2665},{62,19,2665},{0,41,157},{0,33,2669},{62,19,2665},{0,33,2669},{4,0,545},{4,0,545},{4,0,545},{4,0,545},{0,25,0},{0,25,0},{0,25,0},{0,12,1},{0,11,212},{0,11,212},{11,63,6350},{6,63,905},{7,47,1678}, +{4,42,1102},{6,63,10453},{0,50,3485},{0,42,361},{0,32,5054},{0,36,12429},{0,31,6794},{15,63,3710},{10,61,9},{12,46,254},{9,41,446},{40,1,9669},{0,50,3485},{0,42,361},{0,32,5054},{63,9,9669},{0,32,5054},{6,63,901},{6,63,901},{6,63,901},{5,37,842},{0,55,2178},{0,38,125},{0,38,125},{0,24,629},{0,25,3365},{0,22,1489},{10,60,1},{10,60,1},{10,60,1},{10,35,2},{27,1,2178}, +{0,38,125},{0,38,125},{0,24,629},{55,0,2178},{0,24,629},{53,0,2665},{9,62,1},{18,44,5},{0,43,117},{53,0,2665},{62,22,2665},{0,43,117},{0,35,2669},{62,22,2665},{0,35,2669},{5,0,841},{5,0,841},{5,0,841},{5,0,841},{0,31,0},{0,31,0},{0,31,0},{0,15,1},{0,12,337},{0,12,337},{12,63,7350},{7,63,1450},{9,48,2190},{6,44,1422},{6,63,11045},{0,53,3293},{0,45,193}, +{0,35,4870},{0,39,12829},{0,32,6807},{17,63,3905},{12,63,9},{14,48,267},{11,43,446},{43,1,9669},{0,53,3293},{0,45,193},{0,35,4870},{63,12,9669},{0,35,4870},{7,63,1369},{7,63,1369},{7,63,1369},{6,40,1202},{0,61,2178},{0,42,53},{0,42,53},{0,25,520},{0,28,3645},{0,24,1573},{12,62,1},{12,62,1},{12,62,1},{12,37,2},{30,1,2178},{0,42,53},{0,42,53},{0,25,520},{61,0,2178}, +{0,25,520},{56,0,2665},{12,63,8},{20,46,5},{0,45,72},{56,0,2665},{62,25,2665},{0,45,72},{0,37,2669},{62,25,2665},{0,37,2669},{6,0,1201},{6,0,1201},{6,0,1201},{6,0,1201},{0,36,1},{0,36,1},{0,36,1},{0,18,1},{0,14,468},{0,14,468},{14,63,8614},{9,63,2129},{9,51,2758},{6,47,1822},{9,63,11765},{0,57,3125},{0,47,81},{0,36,4629},{0,42,13261},{0,35,6855},{20,63,4081}, +{15,63,49},{16,50,257},{13,45,446},{46,1,9669},{0,57,3125},{0,47,81},{0,36,4629},{63,15,9669},{0,36,4629},{9,63,1933},{9,63,1933},{9,63,1933},{7,42,1626},{1,63,2212},{0,45,13},{0,45,13},{0,27,421},{0,31,3957},{0,25,1673},{14,63,2},{14,63,2},{14,63,2},{14,39,2},{33,1,2178},{0,45,13},{0,45,13},{0,27,421},{63,2,2178},{0,27,421},{59,0,2665},{15,63,40},{22,48,2}, +{0,47,45},{59,0,2665},{62,28,2665},{0,47,45},{0,39,2669},{62,28,2665},{0,39,2669},{7,0,1625},{7,0,1625},{7,0,1625},{7,0,1625},{0,42,1},{0,42,1},{0,42,1},{0,21,1},{0,16,637},{0,16,637},{15,63,10085},{11,63,3185},{11,53,3481},{8,49,2337},{11,63,12845},{0,62,2958},{0,50,14},{0,38,4381},{0,45,13802},{0,36,6942},{23,63,4318},{17,63,154},{18,52,254},{14,48,456},{49,1,9669}, +{0,62,2958},{0,50,14},{0,38,4381},{46,27,9669},{0,38,4381},{10,63,2717},{10,63,2717},{10,63,2717},{8,45,2180},{3,63,2394},{0,49,0},{0,49,0},{0,30,317},{0,33,4314},{0,28,1811},{17,63,10},{17,63,10},{17,63,10},{16,41,1},{31,12,2178},{0,49,0},{0,49,0},{0,30,317},{62,6,2178},{0,30,317},{62,1,2665},{20,63,113},{24,50,4},{0,50,13},{62,1,2665},{63,31,2665},{0,50,13}, +{0,42,2669},{63,31,2665},{0,42,2669},{8,0,2180},{8,0,2180},{8,0,2180},{8,0,2180},{0,49,0},{0,49,0},{0,49,0},{0,25,1},{0,19,850},{0,19,850},{17,63,11454},{12,63,4143},{12,55,4141},{9,51,2805},{12,63,13803},{0,63,2871},{1,52,18},{0,41,4182},{0,50,14186},{0,39,6911},{26,63,4550},{20,63,306},{20,54,254},{17,49,446},{52,1,9669},{0,63,2870},{1,52,9},{0,41,4181},{46,30,9669}, +{0,41,4181},{12,63,3414},{12,63,3414},{12,63,3414},{9,47,2673},{3,63,2691},{1,52,14},{1,52,14},{0,32,245},{0,36,4587},{0,31,1906},{19,63,25},{19,63,25},{19,63,25},{18,43,1},{31,18,2178},{2,51,0},{2,51,0},{0,32,244},{62,9,2178},{0,32,244},{63,5,2665},{23,63,193},{26,52,4},{0,52,4},{63,5,2665},{63,34,2665},{0,52,4},{0,44,2669},{63,34,2665},{0,44,2669},{9,0,2669}, +{9,0,2669},{9,0,2669},{9,0,2669},{0,55,1},{0,55,1},{0,55,1},{0,28,2},{0,22,1009},{0,22,1009},{20,63,11990},{15,63,4575},{14,57,4141},{11,53,2805},{15,63,14195},{3,63,3015},{3,54,18},{0,43,4122},{0,53,13674},{0,41,6249},{29,63,4814},{23,63,522},{22,56,254},{19,51,446},{55,1,9669},{3,63,3006},{3,54,9},{0,43,4041},{47,32,9669},{0,43,4041},{14,63,3561},{14,63,3561},{14,63,3561}, +{11,49,2670},{6,63,2795},{3,54,14},{3,54,14},{2,34,245},{0,39,4227},{0,33,1470},{21,63,58},{21,63,58},{21,63,58},{20,45,1},{34,17,2178},{4,53,0},{4,53,0},{0,35,180},{62,12,2178},{0,35,180},{63,11,2665},{26,63,305},{28,54,4},{2,54,4},{63,11,2665},{63,37,2665},{2,54,4},{0,46,2669},{63,37,2665},{0,46,2669},{11,0,2669},{11,0,2669},{11,0,2669},{11,0,2669},{2,57,1}, +{2,57,1},{2,57,1},{2,30,2},{0,25,801},{0,25,801},{22,63,12554},{17,63,5066},{16,59,4118},{13,55,2805},{17,63,14614},{6,63,3255},{5,56,18},{1,45,4078},{0,56,13194},{0,44,5633},{31,63,5090},{26,63,802},{24,58,254},{21,53,446},{58,1,9669},{7,63,3198},{5,56,9},{0,45,3846},{53,32,9669},{0,45,3846},{15,63,3710},{15,63,3710},{15,63,3710},{13,51,2670},{9,63,2931},{5,56,14},{5,56,14}, +{4,36,245},{0,42,3899},{0,36,1110},{23,63,90},{23,63,90},{23,63,90},{22,47,1},{37,17,2178},{6,55,0},{6,55,0},{0,37,136},{62,15,2178},{0,37,136},{63,17,2665},{30,63,442},{30,56,4},{4,56,4},{63,17,2665},{63,40,2665},{4,56,4},{0,47,2677},{63,40,2665},{0,47,2677},{13,0,2669},{13,0,2669},{13,0,2669},{13,0,2669},{4,59,1},{4,59,1},{4,59,1},{4,31,5},{0,28,625}, +{0,28,625},{23,63,13130},{20,63,5706},{18,61,4122},{15,57,2807},{20,63,15102},{9,63,3625},{7,59,15},{3,47,4086},{0,59,12686},{0,47,5027},{34,63,5386},{29,63,1169},{26,60,257},{23,56,446},{62,0,9669},{12,63,3469},{7,59,11},{0,47,3658},{62,31,9669},{0,47,3658},{17,63,3905},{17,63,3905},{17,63,3905},{16,53,2677},{12,63,3112},{7,58,9},{7,58,9},{6,38,246},{0,45,3576},{0,38,755},{26,63,136}, +{26,63,136},{26,63,136},{24,49,4},{49,0,2178},{8,57,2},{8,57,2},{0,39,85},{62,18,2178},{0,39,85},{63,23,2665},{34,63,628},{32,59,2},{4,59,2},{63,23,2665},{63,43,2665},{4,59,2},{0,50,2669},{63,43,2665},{0,50,2669},{15,0,2677},{15,0,2677},{15,0,2677},{15,0,2677},{7,60,4},{7,60,4},{7,60,4},{7,33,5},{0,33,424},{0,33,424},{26,63,13650},{23,63,6378},{20,63,4122}, +{17,59,2799},{23,63,15558},{12,63,4065},{9,61,15},{6,49,4074},{0,62,12278},{0,49,4534},{36,63,5698},{30,63,1556},{28,62,257},{25,58,446},{63,4,9669},{15,63,3749},{9,61,11},{0,49,3510},{62,34,9669},{0,49,3510},{20,63,4041},{20,63,4041},{20,63,4041},{17,56,2670},{15,63,3304},{9,60,9},{9,60,9},{8,40,246},{0,50,3317},{0,41,499},{29,63,200},{29,63,200},{29,63,200},{26,51,4},{52,0,2178}, +{10,59,2},{10,59,2},{0,42,45},{62,21,2178},{0,42,45},{63,29,2665},{37,63,820},{34,61,2},{6,61,2},{63,29,2665},{63,46,2665},{6,61,2},{0,52,2669},{63,46,2665},{0,52,2669},{17,0,2669},{17,0,2669},{17,0,2669},{17,0,2669},{9,62,4},{9,62,4},{9,62,4},{9,35,5},{0,36,296},{0,36,296},{29,63,14234},{23,63,7050},{23,63,4242},{19,61,2799},{26,63,16046},{15,63,4601},{11,63,15}, +{8,51,4074},{0,63,12051},{0,50,4110},{37,63,6002},{34,63,1989},{31,63,270},{27,60,446},{63,10,9669},{20,63,4081},{11,63,11},{0,52,3374},{62,37,9669},{0,52,3374},{23,63,4241},{23,63,4241},{23,63,4241},{19,58,2670},{17,63,3485},{11,62,9},{11,62,9},{10,42,246},{0,53,3069},{0,43,306},{31,63,269},{31,63,269},{31,63,269},{28,53,4},{55,0,2178},{12,61,2},{12,61,2},{0,44,18},{62,24,2178}, +{0,44,18},{63,35,2665},{41,63,1037},{36,63,2},{8,63,2},{63,35,2665},{63,49,2665},{8,63,2},{0,54,2669},{63,49,2665},{0,54,2669},{19,0,2669},{19,0,2669},{19,0,2669},{19,0,2669},{11,63,5},{11,63,5},{11,63,5},{11,37,5},{0,40,193},{0,40,193},{31,63,13639},{26,63,7005},{25,63,4454},{21,62,2721},{28,63,15204},{17,63,4285},{13,63,24},{9,52,3457},{0,63,11020},{0,52,3109},{40,63,5381}, +{34,63,1890},{33,63,346},{29,61,296},{62,16,8712},{23,63,3636},{14,63,2},{0,53,2676},{62,39,8712},{0,53,2676},{25,63,4454},{25,63,4454},{25,63,4454},{21,60,2670},{19,63,3707},{13,63,24},{13,63,24},{12,44,246},{0,56,2853},{0,45,153},{33,63,346},{33,63,346},{33,63,346},{30,55,4},{58,0,2178},{14,63,2},{14,63,2},{0,46,10},{62,27,2178},{0,46,10},{63,39,2178},{44,63,872},{39,63,1}, +{14,63,1},{63,39,2178},{47,59,2178},{14,63,1},{0,55,2180},{47,59,2178},{0,55,2180},{21,0,2669},{21,0,2669},{21,0,2669},{21,0,2669},{13,63,20},{13,63,20},{13,63,20},{13,39,5},{0,45,104},{0,45,104},{33,63,12766},{29,63,6930},{26,63,4694},{24,63,2685},{29,63,14014},{20,63,3898},{16,63,101},{13,53,2722},{3,63,10057},{0,54,2129},{43,63,4689},{37,63,1718},{36,63,452},{32,62,164},{63,19,7578}, +{26,63,3078},{18,63,17},{0,55,1905},{63,41,7578},{0,55,1905},{26,63,4694},{26,63,4694},{26,63,4694},{24,61,2670},{22,63,3960},{16,63,101},{16,63,101},{14,47,242},{0,59,2650},{0,48,49},{36,63,452},{36,63,452},{36,63,452},{33,57,4},{53,16,2178},{18,63,17},{18,63,17},{0,49,1},{63,30,2178},{0,49,1},{63,42,1625},{46,63,650},{42,63,0},{21,63,0},{63,42,1625},{62,53,1625},{21,63,0}, +{0,56,1625},{62,53,1625},{0,56,1625},{24,0,2669},{24,0,2669},{24,0,2669},{24,0,2669},{15,63,50},{15,63,50},{15,63,50},{15,42,2},{0,48,40},{0,48,40},{34,63,11970},{31,63,6969},{29,63,4878},{26,63,2670},{31,63,13161},{21,63,3638},{18,63,229},{15,55,2146},{6,63,9313},{0,56,1410},{43,63,4097},{40,63,1614},{37,63,541},{34,62,69},{58,32,6661},{29,63,2654},{21,63,65},{0,56,1346},{52,48,6661}, +{0,56,1346},{29,63,4878},{29,63,4878},{29,63,4878},{26,63,2670},{23,63,4206},{18,63,229},{18,63,229},{15,49,242},{0,62,2506},{0,51,9},{37,63,541},{37,63,541},{37,63,541},{35,59,4},{56,16,2178},{21,63,65},{21,63,65},{2,51,1},{62,33,2178},{2,51,1},{63,45,1201},{48,63,481},{45,63,0},{27,63,0},{63,45,1201},{63,54,1201},{27,63,0},{0,57,1201},{63,54,1201},{0,57,1201},{26,0,2669}, +{26,0,2669},{26,0,2669},{26,0,2669},{17,63,85},{17,63,85},{17,63,85},{16,44,4},{0,51,8},{0,51,8},{37,63,11370},{33,63,6958},{31,63,5145},{28,63,2718},{34,63,12263},{23,63,3410},{21,63,405},{17,56,1665},{9,63,8665},{0,57,905},{46,63,3585},{43,63,1574},{40,63,637},{36,63,20},{63,27,5829},{32,63,2294},{26,63,130},{0,57,901},{63,45,5829},{0,57,901},{31,63,5145},{31,63,5145},{31,63,5145}, +{28,63,2718},{26,63,4430},{21,63,405},{21,63,405},{18,50,246},{0,63,2520},{2,53,9},{40,63,637},{40,63,637},{40,63,637},{37,61,4},{59,16,2178},{26,63,130},{26,63,130},{4,53,1},{62,36,2178},{4,53,1},{63,48,841},{51,63,337},{48,63,1},{33,63,0},{63,48,841},{62,56,841},{33,63,0},{0,58,841},{62,56,841},{0,58,841},{28,0,2669},{28,0,2669},{28,0,2669},{28,0,2669},{20,63,117}, +{20,63,117},{20,63,117},{18,46,4},{1,54,1},{1,54,1},{37,63,10794},{34,63,6895},{34,63,5374},{30,63,2813},{36,63,11574},{26,63,3274},{23,63,622},{19,57,1222},{13,63,8106},{1,59,562},{48,63,3170},{43,63,1494},{43,63,765},{39,63,4},{63,31,5082},{34,63,1998},{29,63,218},{0,59,554},{63,47,5082},{0,59,554},{34,63,5374},{34,63,5374},{34,63,5374},{30,63,2813},{29,63,4686},{23,63,622},{23,63,622}, +{20,52,246},{4,63,2714},{4,55,9},{43,63,765},{43,63,765},{43,63,765},{39,63,4},{62,16,2178},{29,63,218},{29,63,218},{6,55,1},{62,39,2178},{6,55,1},{63,51,545},{54,63,225},{51,63,1},{39,63,0},{63,51,545},{63,57,545},{39,63,0},{0,59,545},{63,57,545},{0,59,545},{30,0,2669},{30,0,2669},{30,0,2669},{30,0,2669},{22,63,180},{22,63,180},{22,63,180},{21,47,5},{3,56,1}, +{3,56,1},{40,63,10197},{37,63,6930},{36,63,5678},{32,63,2993},{37,63,10780},{29,63,3229},{26,63,945},{21,59,853},{17,63,7593},{4,60,293},{48,63,2756},{46,63,1454},{45,63,914},{41,63,37},{63,35,4344},{38,63,1740},{32,63,360},{2,60,289},{63,49,4344},{2,60,289},{36,63,5678},{36,63,5678},{36,63,5678},{32,63,2993},{31,63,5067},{26,63,945},{26,63,945},{22,55,242},{9,63,2979},{6,57,10},{45,63,914}, +{45,63,914},{45,63,914},{41,63,37},{63,21,2178},{32,63,360},{32,63,360},{8,57,1},{63,42,2178},{8,57,1},{63,54,288},{55,63,125},{54,63,0},{45,63,1},{63,54,288},{62,59,288},{45,63,1},{0,60,288},{62,59,288},{0,60,288},{32,0,2669},{32,0,2669},{32,0,2669},{32,0,2669},{25,63,250},{25,63,250},{25,63,250},{23,50,2},{5,59,2},{5,59,2},{43,63,9837},{40,63,7042},{37,63,5945}, +{34,63,3198},{40,63,10204},{32,63,3344},{29,63,1289},{24,59,589},{20,63,7225},{7,61,130},{51,63,2436},{48,63,1460},{48,63,1060},{43,63,122},{63,39,3779},{41,63,1580},{37,63,505},{6,61,129},{47,59,3779},{6,61,129},{37,63,5945},{37,63,5945},{37,63,5945},{34,63,3198},{34,63,5304},{29,63,1289},{29,63,1289},{24,57,242},{13,63,3261},{8,59,10},{48,63,1060},{48,63,1060},{48,63,1060},{43,63,122},{63,27,2178}, +{37,63,505},{37,63,505},{10,59,1},{63,45,2178},{10,59,1},{63,57,128},{58,63,53},{57,63,0},{51,63,0},{63,57,128},{63,60,128},{51,63,0},{0,61,128},{63,60,128},{0,61,128},{34,0,2669},{34,0,2669},{34,0,2669},{34,0,2669},{27,63,337},{27,63,337},{27,63,337},{25,52,2},{7,61,2},{7,61,2},{43,63,9437},{40,63,7154},{40,63,6193},{37,63,3454},{42,63,9783},{34,63,3398},{32,63,1740}, +{26,61,397},{23,63,6953},{9,62,37},{53,63,2246},{51,63,1508},{50,63,1213},{46,63,250},{63,43,3299},{44,63,1484},{40,63,673},{10,62,33},{63,53,3299},{10,62,33},{40,63,6193},{40,63,6193},{40,63,6193},{37,63,3454},{37,63,5624},{32,63,1740},{32,63,1740},{26,59,242},{17,63,3589},{10,61,10},{50,63,1213},{50,63,1213},{50,63,1213},{46,63,250},{63,33,2178},{40,63,673},{40,63,673},{12,61,1},{47,56,2178}, +{12,61,1},{63,60,32},{61,63,13},{60,63,0},{57,63,0},{63,60,32},{62,62,32},{57,63,0},{0,62,32},{62,62,32},{0,62,32},{36,0,2669},{36,0,2669},{36,0,2669},{36,0,2669},{29,63,405},{29,63,405},{29,63,405},{27,54,2},{9,63,2},{9,63,2},{46,63,9141},{43,63,7234},{43,63,6505},{40,63,3806},{43,63,9340},{37,63,3622},{34,63,2149},{27,62,277},{27,63,6772},{12,63,10},{54,63,2052}, +{53,63,1601},{51,63,1348},{48,63,436},{63,47,2904},{47,63,1452},{44,63,872},{14,63,1},{63,55,2904},{14,63,1},{43,63,6505},{43,63,6505},{43,63,6505},{40,63,3806},{40,63,5976},{34,63,2149},{34,63,2149},{28,61,242},{21,63,3955},{12,63,10},{51,63,1348},{51,63,1348},{51,63,1348},{48,63,436},{63,39,2178},{44,63,872},{44,63,872},{14,63,1},{47,59,2178},{14,63,1},{63,63,0},{63,63,0},{63,63,0}, +{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{38,0,2669},{38,0,2669},{38,0,2669},{38,0,2669},{32,63,520},{32,63,520},{32,63,520},{29,56,2},{12,63,10},{12,63,10},{46,63,8097},{46,63,6510},{43,63,5893},{41,63,3605},{46,63,8074},{37,63,3244},{37,63,2019},{30,62,129},{30,63,5794},{16,63,58},{56,63,1563},{54,63,1161},{54,63,1017},{51,63,337},{63,49,2166}, +{49,63,1083},{46,63,650},{21,63,0},{63,56,2166},{21,63,0},{43,63,5893},{43,63,5893},{43,63,5893},{41,63,3605},{40,63,5238},{37,63,2019},{37,63,2019},{30,62,113},{24,63,3401},{16,63,58},{54,63,1017},{54,63,1017},{54,63,1017},{51,63,337},{63,42,1625},{46,63,650},{46,63,650},{21,63,0},{62,53,1625},{21,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0}, +{0,63,0},{63,63,0},{0,63,0},{40,0,2665},{40,0,2665},{40,0,2665},{40,0,2665},{34,63,610},{34,63,610},{34,63,610},{31,58,0},{16,63,58},{16,63,58},{48,63,7165},{46,63,5854},{46,63,5325},{43,63,3434},{46,63,7050},{40,63,2932},{37,63,1955},{32,62,57},{32,63,5021},{20,63,117},{57,63,1137},{54,63,889},{54,63,745},{52,63,250},{63,51,1601},{51,63,801},{48,63,481},{27,63,0},{63,57,1601}, +{27,63,0},{46,63,5325},{46,63,5325},{46,63,5325},{43,63,3434},{43,63,4622},{37,63,1955},{37,63,1955},{32,62,41},{27,63,2977},{20,63,117},{54,63,745},{54,63,745},{54,63,745},{52,63,250},{63,45,1201},{48,63,481},{48,63,481},{27,63,0},{63,54,1201},{27,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{42,0,2665}, +{42,0,2665},{42,0,2665},{42,0,2665},{37,63,730},{37,63,730},{37,63,730},{33,60,1},{20,63,117},{20,63,117},{50,63,6415},{48,63,5277},{48,63,4877},{45,63,3330},{48,63,6117},{43,63,2716},{40,63,1843},{35,63,10},{35,63,4341},{23,63,205},{57,63,801},{57,63,609},{56,63,530},{54,63,169},{63,53,1121},{52,63,571},{51,63,337},{33,63,0},{63,58,1121},{33,63,0},{48,63,4877},{48,63,4877},{48,63,4877}, +{45,63,3330},{45,63,4146},{40,63,1843},{40,63,1843},{35,63,10},{30,63,2617},{23,63,205},{56,63,530},{56,63,530},{56,63,530},{54,63,169},{63,48,841},{51,63,337},{51,63,337},{33,63,0},{62,56,841},{33,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{44,0,2665},{44,0,2665},{44,0,2665},{44,0,2665},{39,63,865}, +{39,63,865},{39,63,865},{35,62,1},{23,63,205},{23,63,205},{51,63,5637},{50,63,4826},{48,63,4445},{47,63,3189},{48,63,5365},{43,63,2524},{43,63,1795},{37,63,5},{37,63,3750},{27,63,320},{59,63,531},{57,63,401},{57,63,337},{55,63,122},{63,55,726},{55,63,363},{54,63,225},{39,63,0},{63,59,726},{39,63,0},{48,63,4445},{48,63,4445},{48,63,4445},{47,63,3189},{46,63,3654},{43,63,1795},{43,63,1795}, +{37,63,5},{32,63,2329},{27,63,320},{57,63,337},{57,63,337},{57,63,337},{55,63,122},{63,51,545},{54,63,225},{54,63,225},{39,63,0},{63,57,545},{39,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{46,0,2665},{46,0,2665},{46,0,2665},{46,0,2665},{40,63,1010},{40,63,1010},{40,63,1010},{37,63,5},{27,63,320}, +{27,63,320},{3,63,10504},{0,62,1552},{0,44,169},{0,38,3866},{0,57,18065},{0,39,12152},{0,35,6099},{0,24,13992},{0,26,19423},{0,22,14922},{6,63,10216},{0,62,1552},{0,44,169},{0,38,3866},{20,17,18065},{0,39,12152},{0,35,6099},{0,24,13992},{57,0,18065},{0,24,13992},{0,35,0},{0,35,0},{0,35,0},{0,17,1},{0,17,1105},{0,14,424},{0,14,424},{0,8,666},{0,8,1217},{0,7,766},{0,35,0}, +{0,35,0},{0,35,0},{0,17,1},{9,0,1105},{0,14,424},{0,14,424},{0,8,666},{17,0,1105},{0,8,666},{34,17,9248},{0,62,1552},{0,44,169},{0,38,3866},{34,17,9248},{62,12,9248},{0,38,3866},{0,28,9256},{62,12,9248},{0,28,9256},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{5,63,11298},{0,63,1341},{0,47,65}, +{0,38,3578},{0,61,19334},{0,42,12584},{0,36,6093},{0,24,14696},{0,28,20850},{0,24,15720},{6,63,10792},{0,63,1341},{0,47,65},{0,38,3578},{30,1,19334},{0,42,12584},{0,36,6093},{0,24,14696},{61,0,19334},{0,24,14696},{0,41,0},{0,41,0},{0,41,0},{0,20,1},{0,20,1513},{0,16,585},{0,16,585},{0,10,901},{0,9,1681},{0,8,1070},{0,41,0},{0,41,0},{0,41,0},{0,20,1},{10,1,1513}, +{0,16,585},{0,16,585},{0,10,901},{20,0,1513},{0,10,901},{37,17,9248},{0,63,1341},{0,47,65},{0,38,3578},{37,17,9248},{62,15,9248},{0,38,3578},{0,30,9256},{62,15,9248},{0,30,9256},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{6,63,12200},{0,63,1325},{0,49,9},{0,41,3298},{0,63,20705},{0,42,13032},{0,38,6147}, +{0,27,15400},{0,31,22426},{0,25,16626},{9,63,11512},{0,63,1325},{0,49,9},{0,41,3298},{31,3,20689},{0,42,13032},{0,38,6147},{0,27,15400},{63,1,20689},{0,27,15400},{0,47,0},{0,47,0},{0,47,0},{0,23,1},{0,23,1985},{0,19,769},{0,19,769},{0,11,1202},{0,11,2193},{0,10,1374},{0,47,0},{0,47,0},{0,47,0},{0,23,1},{12,0,1985},{0,19,769},{0,19,769},{0,11,1202},{23,0,1985}, +{0,11,1202},{48,1,9248},{0,63,1325},{0,49,9},{0,41,3298},{48,1,9248},{63,17,9248},{0,41,3298},{0,32,9250},{63,17,9248},{0,32,9250},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{6,63,13288},{0,63,1565},{0,51,4},{0,44,3050},{2,63,22214},{0,45,13496},{0,41,6227},{0,28,16225},{0,31,24090},{0,27,17528},{9,63,12344}, +{0,63,1565},{0,51,4},{0,44,3050},{34,1,22129},{0,45,13496},{0,41,6227},{0,28,16225},{63,3,22129},{0,28,16225},{0,53,0},{0,53,0},{0,53,0},{0,26,1},{0,26,2521},{0,22,985},{0,22,985},{0,13,1517},{0,11,2801},{0,11,1766},{0,53,0},{0,53,0},{0,53,0},{0,26,1},{13,1,2521},{0,22,985},{0,22,985},{0,13,1517},{26,0,2521},{0,13,1517},{51,1,9248},{1,63,1552},{1,51,0}, +{0,44,3050},{51,1,9248},{63,20,9248},{0,44,3050},{0,34,9250},{63,20,9248},{0,34,9250},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{6,63,14818},{1,63,2106},{0,54,58},{0,47,2792},{3,63,24091},{0,50,14075},{0,42,6341},{0,30,17106},{0,33,26067},{0,28,18692},{12,63,13474},{3,63,2077},{1,54,50},{0,47,2792},{31,12,23851}, +{0,50,14075},{0,42,6341},{0,30,17106},{62,6,23851},{0,30,17106},{0,59,1},{0,59,1},{0,59,1},{0,30,0},{0,30,3200},{0,25,1258},{0,25,1258},{0,13,1940},{0,14,3542},{0,13,2229},{0,59,1},{0,59,1},{0,59,1},{0,30,0},{15,0,3200},{0,25,1258},{0,25,1258},{0,13,1940},{30,0,3200},{0,13,1940},{55,0,9248},{6,63,1885},{3,53,2},{0,47,2792},{55,0,9248},{62,24,9248},{0,47,2792}, +{0,36,9256},{62,24,9248},{0,36,9256},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{9,63,16258},{3,63,2813},{1,56,141},{0,49,2561},{4,63,25971},{0,50,14619},{0,45,6453},{0,32,17996},{0,36,27923},{0,30,19698},{12,63,14594},{3,63,2669},{2,56,122},{0,49,2561},{38,1,25472},{0,50,14619},{0,45,6453},{0,32,17996},{45,16,25472}, +{0,32,17996},{0,63,9},{0,63,9},{0,63,9},{0,33,1},{0,33,3872},{0,25,1530},{0,25,1530},{0,16,2378},{0,14,4294},{0,14,2717},{0,63,9},{0,63,9},{0,63,9},{0,33,1},{16,1,3872},{0,25,1530},{0,25,1530},{0,16,2378},{33,0,3872},{0,16,2378},{58,0,9248},{9,63,2205},{5,55,2},{0,49,2561},{58,0,9248},{62,27,9248},{0,49,2561},{0,38,9256},{62,27,9248},{0,38,9256},{0,0,0}, +{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{9,63,17750},{3,63,3617},{1,58,254},{0,50,2366},{6,63,27563},{0,53,14891},{0,47,6385},{0,32,18616},{0,39,29523},{0,32,20465},{15,63,15654},{6,63,3425},{2,59,206},{0,50,2366},{41,0,26744},{0,53,14891},{0,47,6385},{0,32,18616},{62,10,26744},{0,32,18616},{1,63,101},{1,63,101},{1,63,101}, +{0,36,5},{0,36,4420},{0,28,1666},{0,28,1666},{0,16,2642},{0,16,4931},{0,16,3083},{2,63,72},{2,63,72},{2,63,72},{1,35,4},{15,7,4418},{0,28,1666},{0,28,1666},{0,16,2642},{31,3,4418},{0,16,2642},{61,0,9248},{13,63,2554},{7,57,2},{0,50,2362},{61,0,9248},{62,30,9248},{0,50,2362},{0,40,9256},{62,30,9248},{0,40,9256},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,2,0}, +{0,2,0},{0,2,0},{0,1,0},{0,1,1},{0,1,1},{12,63,19046},{4,63,4598},{3,60,382},{0,52,2237},{6,63,28187},{0,59,14347},{0,50,5579},{0,35,18104},{0,39,29955},{0,33,20313},{17,63,16091},{7,63,4046},{4,61,206},{0,52,2237},{44,0,26744},{0,59,14347},{0,50,5579},{0,35,18104},{62,13,26744},{0,35,18104},{2,63,264},{2,63,264},{2,63,264},{1,38,52},{0,42,4420},{0,33,1381},{0,33,1381}, +{0,18,2405},{0,19,5115},{0,18,2981},{4,63,117},{4,63,117},{4,63,117},{3,37,4},{15,13,4418},{0,33,1381},{0,33,1381},{0,18,2405},{31,6,4418},{0,18,2405},{63,2,9248},{17,63,2938},{9,59,2},{0,52,2137},{63,2,9248},{46,41,9248},{0,52,2137},{0,42,9256},{46,41,9248},{0,42,9256},{1,0,52},{1,0,52},{1,0,52},{1,0,52},{0,8,0},{0,8,0},{0,8,0},{0,4,0},{0,3,20}, +{0,3,20},{12,63,20585},{6,63,5786},{4,63,625},{1,56,2246},{9,63,29012},{0,62,13736},{0,53,4760},{0,38,17595},{0,45,30452},{0,36,20086},{20,63,16620},{9,63,4794},{6,63,205},{1,56,2230},{39,16,26744},{0,62,13736},{0,53,4760},{0,38,17595},{62,16,26744},{0,38,17595},{4,63,589},{4,63,589},{4,63,589},{2,41,185},{0,49,4418},{0,36,1097},{0,36,1097},{0,21,2153},{0,22,5357},{0,21,2937},{6,63,169}, +{6,63,169},{6,63,169},{5,39,2},{24,1,4418},{0,36,1097},{0,36,1097},{0,21,2153},{49,0,4418},{0,21,2153},{59,16,9248},{20,63,3380},{11,62,4},{0,55,1901},{59,16,9248},{62,36,9248},{0,55,1901},{0,45,9250},{62,36,9248},{0,45,9250},{2,0,185},{2,0,185},{2,0,185},{2,0,185},{0,14,1},{0,14,1},{0,14,1},{0,7,1},{0,5,72},{0,5,72},{15,63,21605},{9,63,6850},{5,63,978}, +{2,57,2226},{11,63,29435},{0,63,12990},{0,55,3962},{0,41,16835},{0,45,30392},{0,38,19516},{22,63,16694},{12,63,5218},{9,63,225},{4,56,2130},{49,1,26259},{0,63,12990},{0,55,3962},{0,41,16835},{46,27,26259},{0,41,16835},{5,63,978},{5,63,978},{5,63,978},{3,43,370},{0,55,4418},{0,39,881},{0,39,881},{0,24,1945},{0,25,5605},{0,22,2889},{9,63,225},{9,63,225},{9,63,225},{7,41,2},{27,1,4418}, +{0,39,881},{0,39,881},{0,24,1945},{55,0,4418},{0,24,1945},{63,14,8978},{23,63,3592},{14,63,1},{0,58,1625},{63,14,8978},{46,47,8978},{0,58,1625},{0,46,8986},{46,47,8978},{0,46,8986},{3,0,369},{3,0,369},{3,0,369},{3,0,369},{0,20,0},{0,20,0},{0,20,0},{0,10,1},{0,8,136},{0,8,136},{15,63,21141},{9,63,7026},{6,63,1481},{3,58,2034},{12,63,28216},{0,63,11406},{0,56,2868}, +{0,41,14915},{0,48,28876},{0,39,17854},{23,63,15352},{15,63,4866},{11,63,306},{6,58,1746},{52,0,24371},{0,63,11406},{0,56,2868},{0,41,14915},{62,21,24371},{0,41,14915},{6,63,1481},{6,63,1481},{6,63,1481},{4,46,617},{0,61,4418},{0,45,689},{0,45,689},{0,27,1769},{0,28,5885},{0,24,2889},{11,63,306},{11,63,306},{11,63,306},{9,43,2},{30,1,4418},{0,45,689},{0,45,689},{0,27,1769},{61,0,4418}, +{0,27,1769},{55,32,7938},{26,63,3176},{16,63,1},{0,58,1129},{55,32,7938},{46,48,7938},{0,58,1129},{0,47,7946},{46,48,7938},{0,47,7946},{4,0,617},{4,0,617},{4,0,617},{4,0,617},{0,26,0},{0,26,0},{0,26,0},{0,13,1},{0,11,232},{0,11,232},{17,63,20849},{9,63,7458},{8,63,2106},{4,59,1970},{12,63,27224},{0,63,10078},{0,58,1986},{0,42,13214},{0,50,27357},{0,41,16276},{26,63,14168}, +{17,63,4556},{14,63,394},{9,58,1410},{47,14,22568},{0,63,10078},{0,58,1986},{0,42,13214},{62,23,22568},{0,42,13214},{8,63,2106},{8,63,2106},{8,63,2106},{5,48,930},{1,63,4452},{0,47,521},{0,47,521},{0,28,1600},{0,31,6197},{0,27,2921},{14,63,394},{14,63,394},{14,63,394},{11,45,2},{33,1,4418},{0,47,521},{0,47,521},{0,28,1600},{63,2,4418},{0,28,1600},{56,33,6962},{29,63,2792},{19,63,1}, +{0,59,740},{56,33,6962},{49,48,6962},{0,59,740},{0,48,6964},{49,48,6962},{0,48,6964},{5,0,929},{5,0,929},{5,0,929},{5,0,929},{0,32,0},{0,32,0},{0,32,0},{0,16,0},{0,14,360},{0,14,360},{17,63,20651},{12,63,7922},{9,63,2921},{5,60,2052},{15,63,26270},{0,63,8890},{0,59,1154},{0,44,11309},{0,53,25875},{0,42,14658},{26,63,12890},{20,63,4254},{16,63,493},{10,59,1076},{56,0,20642}, +{0,63,8890},{0,59,1154},{0,44,11309},{62,25,20642},{0,44,11309},{9,63,2921},{9,63,2921},{9,63,2921},{6,51,1360},{3,63,4634},{0,50,346},{0,50,346},{0,31,1402},{0,33,6554},{0,28,2987},{16,63,493},{16,63,493},{16,63,493},{13,48,1},{31,12,4418},{0,50,346},{0,50,346},{0,31,1402},{62,6,4418},{0,31,1402},{63,23,5941},{30,63,2386},{23,63,1},{0,61,388},{63,23,5941},{63,43,5941},{0,61,388}, +{0,49,5945},{63,43,5941},{0,49,5945},{6,0,1360},{6,0,1360},{6,0,1360},{6,0,1360},{0,39,0},{0,39,0},{0,39,0},{0,19,1},{0,16,522},{0,16,522},{20,63,20683},{12,63,8578},{11,63,3792},{7,60,2241},{15,63,25566},{1,63,8090},{0,59,642},{0,45,9834},{0,53,24595},{0,44,13350},{29,63,11794},{20,63,4030},{17,63,610},{13,59,804},{58,0,19021},{3,63,8050},{0,59,642},{0,45,9834},{62,27,19021}, +{0,45,9834},{11,63,3792},{11,63,3792},{11,63,3792},{8,53,1808},{3,63,4954},{0,55,232},{0,55,232},{0,33,1241},{0,36,6922},{0,31,3051},{17,63,610},{17,63,610},{17,63,610},{15,50,1},{31,18,4418},{0,55,232},{0,55,232},{0,33,1241},{62,9,4418},{0,33,1241},{63,26,5101},{34,63,2050},{26,63,1},{0,61,164},{63,26,5101},{62,45,5101},{0,61,164},{0,50,5105},{62,45,5101},{0,50,5105},{7,0,1808}, +{7,0,1808},{7,0,1808},{7,0,1808},{0,45,0},{0,45,0},{0,45,0},{0,22,1},{0,19,706},{0,19,706},{20,63,20715},{15,63,9258},{12,63,4729},{8,61,2553},{17,63,25067},{3,63,7474},{0,61,264},{0,47,8373},{0,56,23451},{0,45,12138},{31,63,10854},{23,63,3766},{20,63,698},{16,60,594},{60,0,17485},{6,63,7274},{0,61,264},{0,47,8373},{56,32,17485},{0,47,8373},{12,63,4729},{12,63,4729},{12,63,4729}, +{8,56,2320},{6,63,5386},{0,59,130},{0,59,130},{0,35,1076},{0,39,7322},{0,33,3161},{20,63,698},{20,63,698},{20,63,698},{17,51,2},{34,17,4418},{0,59,130},{0,59,130},{0,35,1076},{62,12,4418},{0,35,1076},{63,29,4325},{35,63,1733},{29,63,1},{0,62,41},{63,29,4325},{63,46,4325},{0,62,41},{0,51,4329},{63,46,4325},{0,51,4329},{8,0,2320},{8,0,2320},{8,0,2320},{8,0,2320},{0,50,1}, +{0,50,1},{0,50,1},{0,25,1},{0,19,914},{0,19,914},{20,63,21003},{15,63,10106},{14,63,5840},{9,62,2993},{17,63,24683},{3,63,7010},{0,62,74},{0,49,7113},{0,56,22427},{0,47,11094},{33,63,9941},{26,63,3566},{23,63,818},{17,61,402},{62,0,16034},{9,63,6562},{0,62,74},{0,49,7113},{62,31,16034},{0,49,7113},{14,63,5840},{14,63,5840},{14,63,5840},{10,58,2896},{6,63,5962},{0,62,58},{0,62,58}, +{0,38,932},{0,42,7754},{0,35,3317},{23,63,818},{23,63,818},{23,63,818},{19,53,2},{37,17,4418},{0,62,58},{0,62,58},{0,38,932},{62,15,4418},{0,38,932},{63,32,3613},{38,63,1445},{32,63,1},{0,63,0},{63,32,3613},{61,48,3613},{0,63,0},{0,52,3617},{61,48,3613},{0,52,3617},{9,0,2896},{9,0,2896},{9,0,2896},{9,0,2896},{0,56,1},{0,56,1},{0,56,1},{0,28,1},{0,22,1130}, +{0,22,1130},{23,63,21401},{17,63,11165},{15,63,7141},{10,63,3641},{20,63,24533},{6,63,6762},{0,63,81},{0,49,5745},{0,59,21333},{0,47,9996},{34,63,8897},{29,63,3396},{26,63,976},{20,61,224},{56,16,14504},{12,63,5834},{1,63,68},{0,49,5745},{62,33,14504},{0,49,5745},{15,63,7141},{15,63,7141},{15,63,7141},{11,61,3617},{9,63,6772},{0,63,81},{0,63,81},{0,41,794},{0,45,8260},{0,38,3515},{26,63,976}, +{26,63,976},{26,63,976},{21,56,0},{49,0,4418},{1,63,68},{1,63,68},{0,41,794},{62,18,4418},{0,41,794},{63,35,2888},{41,63,1156},{35,63,0},{7,63,0},{63,35,2888},{63,49,2888},{7,63,0},{0,53,2896},{63,49,2888},{0,53,2896},{11,0,3617},{11,0,3617},{11,0,3617},{11,0,3617},{0,63,0},{0,63,0},{0,63,0},{0,32,1},{0,25,1413},{0,25,1413},{23,63,21913},{17,63,12317},{17,63,8473}, +{12,63,4330},{20,63,24437},{6,63,6650},{2,63,298},{0,52,4721},{0,62,20509},{0,49,9157},{37,63,8153},{29,63,3268},{28,63,1129},{23,61,128},{63,6,13235},{15,63,5258},{6,63,145},{0,52,4721},{62,35,13235},{0,52,4721},{17,63,8473},{17,63,8473},{17,63,8473},{12,63,4330},{9,63,7636},{2,63,298},{2,63,298},{0,42,689},{0,45,8740},{0,39,3689},{28,63,1129},{28,63,1129},{28,63,1129},{23,58,0},{52,0,4418}, +{6,63,145},{6,63,145},{0,42,689},{62,21,4418},{0,42,689},{63,38,2312},{43,63,925},{38,63,0},{13,63,0},{63,38,2312},{62,51,2312},{13,63,0},{0,54,2320},{62,51,2312},{0,54,2320},{12,0,4329},{12,0,4329},{12,0,4329},{12,0,4329},{1,63,52},{1,63,52},{1,63,52},{0,34,1},{0,28,1693},{0,28,1693},{26,63,22641},{20,63,13461},{17,63,9881},{13,63,5169},{20,63,24597},{6,63,6794},{3,63,649}, +{0,52,3713},{0,62,19773},{0,50,8413},{37,63,7401},{32,63,3181},{29,63,1280},{24,62,48},{63,11,12051},{18,63,4746},{9,63,233},{0,52,3713},{47,45,12051},{0,52,3713},{17,63,9881},{17,63,9881},{17,63,9881},{13,63,5169},{12,63,8644},{3,63,649},{3,63,649},{0,45,569},{0,50,9245},{0,42,3905},{29,63,1280},{29,63,1280},{29,63,1280},{25,60,0},{55,0,4418},{9,63,233},{9,63,233},{0,45,569},{62,24,4418}, +{0,45,569},{63,41,1800},{46,63,725},{41,63,0},{19,63,0},{63,41,1800},{63,52,1800},{19,63,0},{0,55,1808},{63,52,1800},{0,55,1808},{13,0,5105},{13,0,5105},{13,0,5105},{13,0,5105},{2,63,185},{2,63,185},{2,63,185},{0,37,1},{0,31,2005},{0,31,2005},{26,63,23345},{20,63,14805},{20,63,11441},{14,63,6170},{23,63,24893},{9,63,7066},{4,63,1236},{0,53,2900},{0,63,19260},{0,52,7861},{40,63,6753}, +{34,63,3038},{32,63,1465},{27,63,9},{62,16,10952},{21,63,4298},{12,63,353},{0,53,2900},{62,39,10952},{0,53,2900},{20,63,11441},{20,63,11441},{20,63,11441},{14,63,6170},{12,63,9764},{4,63,1236},{4,63,1236},{0,47,458},{0,50,9789},{0,45,4185},{32,63,1465},{32,63,1465},{32,63,1465},{27,62,0},{58,0,4418},{12,63,353},{12,63,353},{0,47,458},{62,27,4418},{0,47,458},{63,44,1352},{48,63,544},{44,63,0}, +{25,63,0},{63,44,1352},{62,54,1352},{25,63,0},{0,56,1360},{62,54,1352},{0,56,1360},{14,0,5945},{14,0,5945},{14,0,5945},{14,0,5945},{3,63,400},{3,63,400},{3,63,400},{0,40,1},{0,33,2336},{0,33,2336},{26,63,24443},{23,63,16415},{20,63,13259},{15,63,7448},{23,63,25379},{9,63,7606},{6,63,2021},{0,55,2045},{0,63,18918},{0,53,7275},{40,63,6141},{37,63,2978},{34,63,1625},{29,63,10},{63,19,9818}, +{24,63,3870},{17,63,530},{0,55,2045},{63,41,9818},{0,55,2045},{20,63,13259},{20,63,13259},{20,63,13259},{15,63,7448},{15,63,11218},{6,63,2021},{6,63,2021},{0,49,365},{0,53,10427},{0,45,4509},{34,63,1625},{34,63,1625},{34,63,1625},{29,63,10},{53,16,4418},{17,63,530},{17,63,530},{0,49,365},{63,30,4418},{0,49,365},{63,47,925},{51,63,377},{47,63,1},{31,63,1},{63,47,925},{63,55,925},{31,63,1}, +{0,58,929},{63,55,925},{0,58,929},{15,0,6964},{15,0,6964},{15,0,6964},{15,0,6964},{3,63,769},{3,63,769},{3,63,769},{0,44,0},{0,36,2745},{0,36,2745},{29,63,25483},{23,63,17983},{22,63,15066},{17,63,8739},{23,63,26083},{12,63,8302},{6,63,2965},{0,56,1458},{0,63,18886},{0,53,6955},{43,63,5581},{40,63,2986},{37,63,1801},{32,63,65},{58,32,8901},{27,63,3558},{20,63,698},{0,56,1458},{52,48,8901}, +{0,56,1458},{22,63,15066},{22,63,15066},{22,63,15066},{17,63,8739},{15,63,12626},{6,63,2965},{6,63,2965},{0,52,277},{0,56,11011},{0,49,4833},{37,63,1801},{37,63,1801},{37,63,1801},{32,63,65},{56,16,4418},{20,63,698},{20,63,698},{0,52,277},{62,33,4418},{0,52,277},{63,50,613},{52,63,250},{50,63,0},{37,63,0},{63,50,613},{62,57,613},{37,63,0},{0,59,617},{62,57,613},{0,59,617},{16,0,7946}, +{16,0,7946},{16,0,7946},{16,0,7946},{5,63,1184},{5,63,1184},{5,63,1184},{0,47,0},{0,36,3145},{0,36,3145},{29,63,26667},{26,63,19695},{23,63,16891},{18,63,10206},{26,63,26795},{12,63,9118},{9,63,4037},{0,58,933},{0,63,19110},{0,56,6699},{46,63,5181},{40,63,2970},{40,63,2009},{34,63,160},{63,27,8069},{30,63,3310},{23,63,898},{0,58,933},{63,45,8069},{0,58,933},{23,63,16891},{23,63,16891},{23,63,16891}, +{18,63,10206},{17,63,14179},{9,63,4037},{9,63,4037},{0,55,221},{0,59,11627},{0,50,5115},{40,63,2009},{40,63,2009},{40,63,2009},{34,63,160},{59,16,4418},{23,63,898},{23,63,898},{0,55,221},{62,36,4418},{0,55,221},{63,53,365},{55,63,146},{53,63,0},{43,63,0},{63,53,365},{63,58,365},{43,63,0},{0,60,369},{63,58,365},{0,60,369},{16,0,8986},{16,0,8986},{16,0,8986},{16,0,8986},{6,63,1665}, +{6,63,1665},{6,63,1665},{0,50,1},{0,39,3545},{0,39,3545},{31,63,26643},{26,63,20231},{26,63,17731},{20,63,10867},{26,63,26531},{15,63,9546},{12,63,4889},{0,59,590},{0,63,18606},{0,56,5707},{46,63,4781},{43,63,2978},{42,63,2228},{37,63,320},{63,31,7322},{32,63,3134},{27,63,1125},{0,59,554},{63,47,7322},{0,59,554},{26,63,17731},{26,63,17731},{26,63,17731},{20,63,10867},{20,63,15043},{12,63,4889},{12,63,4889}, +{1,56,185},{0,62,11315},{0,53,4667},{42,63,2228},{42,63,2228},{42,63,2228},{37,63,320},{62,16,4418},{27,63,1125},{27,63,1125},{0,56,162},{62,39,4418},{0,56,162},{63,56,181},{57,63,73},{56,63,0},{49,63,0},{63,56,181},{62,60,181},{49,63,0},{0,61,185},{62,60,181},{0,61,185},{18,0,9250},{18,0,9250},{18,0,9250},{18,0,9250},{8,63,1972},{8,63,1972},{8,63,1972},{1,52,4},{0,42,3341}, +{0,42,3341},{34,63,26006},{29,63,20400},{28,63,18273},{23,63,11384},{29,63,25736},{17,63,9864},{12,63,5726},{2,60,366},{0,63,18111},{0,59,4452},{48,63,4436},{46,63,3050},{43,63,2465},{40,63,562},{63,35,6584},{37,63,3006},{30,63,1421},{0,61,237},{63,49,6584},{0,61,237},{28,63,18273},{28,63,18273},{28,63,18273},{23,63,11384},{23,63,15704},{12,63,5726},{12,63,5726},{3,59,189},{0,63,10886},{0,56,3924},{43,63,2465}, +{43,63,2465},{43,63,2465},{40,63,562},{63,21,4418},{30,63,1421},{30,63,1421},{0,59,100},{63,42,4418},{0,59,100},{63,59,52},{60,63,20},{59,63,1},{56,63,0},{63,59,52},{63,61,52},{56,63,0},{0,62,52},{63,61,52},{0,62,52},{20,0,9256},{20,0,9256},{20,0,9256},{20,0,9256},{9,63,2205},{9,63,2205},{9,63,2205},{4,54,1},{0,45,2925},{0,45,2925},{34,63,25526},{31,63,20721},{29,63,18700}, +{24,63,11921},{31,63,25279},{20,63,10264},{15,63,6494},{4,62,238},{4,63,17924},{0,59,3588},{51,63,4228},{48,63,3140},{46,63,2665},{41,63,833},{63,39,6019},{40,63,2958},{34,63,1693},{0,62,84},{47,59,6019},{0,62,84},{29,63,18700},{29,63,18700},{29,63,18700},{24,63,11921},{23,63,16280},{15,63,6494},{15,63,6494},{5,61,189},{0,63,10854},{0,59,3332},{46,63,2665},{46,63,2665},{46,63,2665},{41,63,833},{63,27,4418}, +{34,63,1693},{34,63,1693},{0,61,61},{63,45,4418},{0,61,61},{63,62,4},{63,63,4},{62,63,1},{62,63,0},{63,62,4},{62,63,4},{62,63,0},{0,63,4},{62,63,4},{0,63,4},{22,0,9256},{22,0,9256},{22,0,9256},{22,0,9256},{12,63,2389},{12,63,2389},{12,63,2389},{6,56,1},{0,50,2512},{0,50,2512},{37,63,24250},{34,63,19895},{31,63,18169},{26,63,11820},{34,63,23717},{20,63,10012},{17,63,6584}, +{7,62,122},{6,63,16879},{0,62,2736},{51,63,3648},{48,63,2784},{48,63,2384},{43,63,778},{63,42,5163},{41,63,2584},{37,63,1549},{0,63,9},{46,61,5163},{0,63,9},{31,63,18169},{31,63,18169},{31,63,18169},{26,63,11820},{26,63,15620},{17,63,6584},{17,63,6584},{7,62,106},{0,63,10150},{0,59,2624},{48,63,2384},{48,63,2384},{48,63,2384},{43,63,778},{63,31,3872},{37,63,1549},{37,63,1549},{0,63,9},{63,47,3872}, +{0,63,9},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{24,0,9256},{24,0,9256},{24,0,9256},{24,0,9256},{15,63,2605},{15,63,2605},{15,63,2605},{8,58,1},{0,53,2176},{0,53,2176},{37,63,22746},{34,63,18775},{34,63,17254},{29,63,11564},{34,63,21973},{23,63,9532},{20,63,6424},{9,62,57},{9,63,15607},{0,62,2032},{53,63,3058}, +{51,63,2304},{48,63,2000},{46,63,650},{63,43,4267},{43,63,2134},{40,63,1285},{4,63,0},{63,53,4267},{4,63,0},{34,63,17254},{34,63,17254},{34,63,17254},{29,63,11564},{29,63,14692},{20,63,6424},{20,63,6424},{9,62,41},{0,63,9366},{0,62,2016},{48,63,2000},{48,63,2000},{48,63,2000},{46,63,650},{55,49,3200},{40,63,1285},{40,63,1285},{4,63,0},{63,48,3200},{4,63,0},{63,63,0},{63,63,0},{63,63,0}, +{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{26,0,9256},{26,0,9256},{26,0,9256},{26,0,9256},{17,63,2836},{17,63,2836},{17,63,2836},{10,60,1},{0,59,1856},{0,59,1856},{40,63,21168},{37,63,17685},{34,63,16300},{30,63,11323},{37,63,20205},{26,63,9090},{23,63,6310},{12,63,4},{12,63,14287},{0,62,1546},{54,63,2377},{51,63,1809},{51,63,1553},{47,63,520},{61,49,3361}, +{46,63,1683},{43,63,1018},{11,63,0},{63,54,3361},{11,63,0},{34,63,16300},{34,63,16300},{34,63,16300},{30,63,11323},{29,63,13666},{23,63,6310},{23,63,6310},{12,63,4},{3,63,8686},{0,62,1530},{51,63,1553},{51,63,1553},{51,63,1553},{47,63,520},{63,37,2521},{43,63,1018},{43,63,1018},{11,63,0},{62,50,2521},{11,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0}, +{0,63,0},{63,63,0},{0,63,0},{29,0,9250},{29,0,9250},{29,0,9250},{29,0,9250},{20,63,3114},{20,63,3114},{20,63,3114},{12,62,1},{0,62,1514},{0,62,1514},{40,63,19824},{37,63,16725},{37,63,15500},{32,63,11084},{37,63,18685},{26,63,8770},{26,63,6270},{14,63,16},{15,63,13215},{0,63,1325},{54,63,1881},{54,63,1449},{51,63,1249},{48,63,409},{63,47,2649},{47,63,1329},{44,63,797},{16,63,1},{63,55,2649}, +{16,63,1},{37,63,15500},{37,63,15500},{37,63,15500},{32,63,11084},{31,63,12906},{26,63,6270},{26,63,6270},{14,63,16},{6,63,8150},{0,63,1325},{51,63,1249},{51,63,1249},{51,63,1249},{48,63,409},{63,40,1985},{44,63,797},{44,63,797},{16,63,1},{63,51,1985},{16,63,1},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{31,0,9250}, +{31,0,9250},{31,0,9250},{31,0,9250},{22,63,3393},{22,63,3393},{22,63,3393},{14,63,16},{0,63,1325},{0,63,1325},{43,63,18608},{40,63,15853},{37,63,14796},{34,63,10841},{40,63,17341},{29,63,8410},{26,63,6206},{17,63,74},{17,63,12226},{0,63,1341},{56,63,1451},{54,63,1081},{54,63,937},{51,63,305},{63,50,2017},{49,63,1011},{47,63,605},{22,63,1},{62,57,2017},{22,63,1},{37,63,14796},{37,63,14796},{37,63,14796}, +{34,63,10841},{34,63,12089},{26,63,6206},{26,63,6206},{17,63,74},{9,63,7678},{0,63,1341},{54,63,937},{54,63,937},{54,63,937},{51,63,305},{63,43,1513},{47,63,605},{47,63,605},{22,63,1},{62,53,1513},{22,63,1},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{32,0,9256},{32,0,9256},{32,0,9256},{32,0,9256},{23,63,3650}, +{23,63,3650},{23,63,3650},{17,63,74},{0,63,1341},{0,63,1341},{43,63,17392},{40,63,15021},{40,63,14060},{37,63,10673},{40,63,16013},{32,63,8261},{29,63,6166},{19,63,194},{20,63,11338},{1,63,1594},{57,63,1041},{56,63,822},{54,63,697},{52,63,234},{63,51,1473},{51,63,737},{49,63,442},{28,63,1},{63,57,1473},{28,63,1},{40,63,14060},{40,63,14060},{40,63,14060},{37,63,10673},{34,63,11401},{29,63,6166},{29,63,6166}, +{19,63,194},{12,63,7270},{1,63,1594},{54,63,697},{54,63,697},{54,63,697},{52,63,234},{63,46,1105},{49,63,442},{49,63,442},{28,63,1},{63,54,1105},{28,63,1},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{34,0,9256},{34,0,9256},{34,0,9256},{34,0,9256},{26,63,3898},{26,63,3898},{26,63,3898},{19,63,194},{1,63,1594}, +{1,63,1594}, diff --git a/thirdparty/basisu/transcoder/basisu_transcoder_tables_pvrtc2_45.inc b/thirdparty/basisu/transcoder/basisu_transcoder_tables_pvrtc2_45.inc new file mode 100644 index 000000000..fbaf988d7 --- /dev/null +++ b/thirdparty/basisu/transcoder/basisu_transcoder_tables_pvrtc2_45.inc @@ -0,0 +1,481 @@ +{0,2,20},{0,1,10},{0,1,1},{0,1,9},{0,1,35},{0,1,27},{0,1,18},{0,1,61},{0,1,52},{0,0,68},{0,2,20},{0,1,10},{0,1,1},{0,1,9},{0,1,35},{0,1,27},{0,1,18},{0,1,61},{0,1,43},{0,1,61},{0,1,1},{0,1,1},{0,1,1},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,1,1}, +{0,1,1},{0,1,1},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,2,20},{0,1,10},{0,1,1},{0,1,9},{0,2,20},{0,1,18},{0,1,9},{0,1,36},{0,1,18},{0,1,36},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,4,56},{0,3,38},{0,2,52}, +{0,2,36},{0,4,56},{0,3,35},{0,2,0},{0,2,52},{0,2,88},{0,1,78},{0,4,56},{0,3,38},{0,2,52},{0,2,36},{1,0,52},{0,3,35},{0,2,0},{0,2,52},{1,1,51},{0,2,52},{0,3,37},{0,3,37},{0,3,37},{0,2,36},{0,3,10},{0,2,0},{0,2,0},{0,1,5},{0,1,35},{0,1,14},{0,3,37},{0,3,37},{0,3,37},{0,2,36},{0,3,10}, +{0,2,0},{0,2,0},{0,1,5},{1,0,16},{0,1,5},{1,1,18},{0,3,2},{0,2,16},{0,2,0},{1,1,18},{2,0,20},{0,2,0},{0,2,36},{2,0,20},{0,2,36},{0,0,36},{0,0,36},{0,0,36},{0,0,36},{0,2,0},{0,2,0},{0,2,0},{0,1,1},{0,1,10},{0,1,10},{1,4,88},{1,3,78},{1,3,69},{1,3,77},{1,3,115},{0,4,88},{0,3,98}, +{0,3,101},{0,4,72},{0,3,38},{1,4,24},{1,3,14},{1,3,5},{1,3,13},{1,3,51},{0,4,24},{0,3,34},{0,3,37},{3,0,52},{0,3,37},{1,3,69},{1,3,69},{1,3,69},{1,2,72},{1,2,72},{1,2,72},{1,2,72},{1,2,72},{0,3,11},{0,2,24},{1,3,5},{1,3,5},{1,3,5},{1,2,8},{1,2,8},{1,2,8},{1,2,8},{1,2,8},{1,2,8}, +{1,2,8},{0,7,18},{1,3,10},{1,3,1},{0,3,9},{0,7,18},{1,3,18},{0,3,9},{0,3,36},{1,3,18},{0,3,36},{1,0,68},{1,0,68},{1,0,68},{1,0,68},{1,1,65},{1,1,65},{1,1,65},{0,3,65},{0,3,2},{0,3,2},{1,6,56},{1,5,38},{1,4,53},{1,4,37},{1,6,56},{1,5,35},{1,4,1},{1,4,66},{0,5,60},{0,4,70},{1,6,56}, +{1,5,38},{1,4,53},{1,4,37},{0,9,51},{1,5,35},{1,4,1},{0,4,54},{2,3,51},{0,4,54},{1,5,37},{1,5,37},{1,5,37},{1,4,36},{1,5,10},{1,4,0},{1,4,0},{1,3,5},{0,5,11},{1,3,14},{1,5,37},{1,5,37},{1,5,37},{1,4,36},{0,8,8},{1,4,0},{1,4,0},{1,3,5},{4,0,8},{1,3,5},{2,3,18},{1,5,2},{1,4,17}, +{1,4,1},{2,3,18},{4,1,18},{1,4,1},{0,4,50},{4,1,18},{0,4,50},{1,0,36},{1,0,36},{1,0,36},{1,0,36},{1,4,0},{1,4,0},{1,4,0},{1,3,1},{1,3,10},{1,3,10},{2,6,88},{2,5,78},{2,5,69},{2,5,77},{2,5,115},{2,5,107},{2,5,98},{1,5,117},{1,6,60},{1,5,36},{2,6,24},{2,5,14},{2,5,5},{2,5,13},{3,2,51}, +{1,6,35},{2,5,34},{1,5,36},{1,6,51},{1,5,36},{2,5,69},{2,5,69},{2,5,69},{2,4,72},{2,4,72},{2,4,72},{2,4,72},{2,4,72},{1,5,16},{1,5,36},{2,5,5},{2,5,5},{2,5,5},{2,4,8},{3,1,8},{2,4,8},{2,4,8},{2,4,8},{3,3,8},{2,4,8},{3,3,20},{2,5,10},{2,5,1},{2,5,9},{3,3,20},{2,5,18},{2,5,9}, +{0,5,36},{2,5,18},{0,5,36},{2,0,68},{2,0,68},{2,0,68},{2,0,68},{2,3,68},{2,3,68},{2,3,68},{2,4,68},{1,5,0},{1,5,0},{2,8,56},{2,7,38},{2,6,52},{2,6,36},{2,8,56},{2,7,35},{2,6,0},{2,6,52},{1,7,76},{1,6,70},{2,8,56},{2,7,38},{2,6,52},{2,6,36},{4,1,51},{2,7,35},{2,6,0},{1,6,45},{3,5,51}, +{1,6,45},{2,7,37},{2,7,37},{2,7,37},{2,6,36},{2,7,10},{2,6,0},{2,6,0},{2,5,5},{1,7,12},{2,5,14},{2,7,37},{2,7,37},{2,7,37},{2,6,36},{4,0,8},{2,6,0},{2,6,0},{2,5,5},{1,7,8},{2,5,5},{3,5,18},{2,7,2},{2,6,16},{2,6,0},{3,5,18},{0,9,18},{2,6,0},{0,6,36},{0,9,18},{0,6,36},{2,0,36}, +{2,0,36},{2,0,36},{2,0,36},{2,6,0},{2,6,0},{2,6,0},{2,5,1},{1,7,8},{1,7,8},{3,8,88},{3,7,78},{3,7,69},{3,7,77},{3,7,115},{2,8,88},{2,7,98},{2,7,101},{1,9,67},{2,7,38},{3,8,24},{3,7,14},{3,7,5},{3,7,13},{3,7,51},{2,8,24},{2,7,34},{2,7,37},{8,0,51},{2,7,37},{3,7,69},{3,7,69},{3,7,69}, +{3,6,72},{3,6,72},{3,6,72},{3,6,72},{3,6,72},{2,7,11},{2,6,24},{3,7,5},{3,7,5},{3,7,5},{3,6,8},{3,6,8},{3,6,8},{3,6,8},{3,6,8},{3,6,8},{3,6,8},{5,1,18},{3,7,10},{3,7,1},{2,7,9},{5,1,18},{3,7,18},{2,7,9},{0,7,36},{3,7,18},{0,7,36},{3,0,68},{3,0,68},{3,0,68},{3,0,68},{3,5,65}, +{3,5,65},{3,5,65},{2,7,65},{2,7,2},{2,7,2},{3,10,56},{3,9,38},{3,8,53},{3,8,37},{3,10,56},{3,9,35},{3,8,1},{3,8,66},{2,9,60},{2,8,70},{3,10,56},{3,9,38},{3,8,53},{3,8,37},{5,3,51},{3,9,35},{3,8,1},{2,8,54},{4,7,51},{2,8,54},{3,9,37},{3,9,37},{3,9,37},{3,8,36},{3,9,10},{3,8,0},{3,8,0}, +{3,7,5},{2,9,11},{3,7,14},{3,9,37},{3,9,37},{3,9,37},{3,8,36},{5,2,8},{3,8,0},{3,8,0},{3,7,5},{8,1,8},{3,7,5},{4,7,18},{3,9,2},{3,8,17},{3,8,1},{4,7,18},{8,2,18},{3,8,1},{0,8,50},{8,2,18},{0,8,50},{3,0,36},{3,0,36},{3,0,36},{3,0,36},{3,8,0},{3,8,0},{3,8,0},{3,7,1},{3,7,10}, +{3,7,10},{4,10,88},{4,9,78},{4,9,69},{4,9,77},{4,9,115},{4,9,107},{4,9,98},{3,9,117},{3,10,60},{3,9,36},{4,10,24},{4,9,14},{4,9,5},{4,9,13},{5,6,51},{3,10,35},{4,9,34},{3,9,36},{10,1,51},{3,9,36},{4,9,69},{4,9,69},{4,9,69},{4,8,72},{4,8,72},{4,8,72},{4,8,72},{4,8,72},{3,9,16},{3,9,36},{4,9,5}, +{4,9,5},{4,9,5},{4,8,8},{5,5,8},{4,8,8},{4,8,8},{4,8,8},{5,7,8},{4,8,8},{7,0,18},{4,9,10},{4,9,1},{4,9,9},{7,0,18},{4,9,18},{4,9,9},{0,9,36},{4,9,18},{0,9,36},{4,0,68},{4,0,68},{4,0,68},{4,0,68},{4,7,68},{4,7,68},{4,7,68},{4,8,68},{3,9,0},{3,9,0},{4,12,56},{4,11,38},{4,10,52}, +{4,10,36},{4,12,56},{4,11,35},{4,10,0},{4,10,52},{3,11,76},{3,10,70},{4,12,56},{4,11,38},{4,10,52},{4,10,36},{7,2,51},{4,11,35},{4,10,0},{3,10,45},{12,0,51},{3,10,45},{4,11,37},{4,11,37},{4,11,37},{4,10,36},{4,11,10},{4,10,0},{4,10,0},{4,9,5},{3,11,12},{4,9,14},{4,11,37},{4,11,37},{4,11,37},{4,10,36},{7,1,8}, +{4,10,0},{4,10,0},{4,9,5},{10,2,8},{4,9,5},{5,9,18},{4,11,2},{4,10,16},{4,10,0},{5,9,18},{10,3,18},{4,10,0},{0,10,36},{10,3,18},{0,10,36},{4,0,36},{4,0,36},{4,0,36},{4,0,36},{4,10,0},{4,10,0},{4,10,0},{4,9,1},{3,11,8},{3,11,8},{5,12,88},{5,11,78},{5,11,69},{5,11,77},{5,11,115},{4,12,88},{4,11,98}, +{4,11,101},{3,13,67},{4,11,38},{5,12,24},{5,11,14},{5,11,5},{5,11,13},{5,11,51},{4,12,24},{4,11,34},{4,11,37},{11,3,51},{4,11,37},{5,11,69},{5,11,69},{5,11,69},{5,10,72},{5,10,72},{5,10,72},{5,10,72},{5,10,72},{4,11,11},{4,10,24},{5,11,5},{5,11,5},{5,11,5},{5,10,8},{5,10,8},{5,10,8},{5,10,8},{5,10,8},{12,1,8}, +{5,10,8},{8,0,18},{5,11,10},{5,11,1},{4,11,9},{8,0,18},{12,2,18},{4,11,9},{0,11,36},{12,2,18},{0,11,36},{5,0,68},{5,0,68},{5,0,68},{5,0,68},{5,9,65},{5,9,65},{5,9,65},{4,11,65},{4,11,2},{4,11,2},{5,14,56},{5,13,38},{5,12,53},{5,12,37},{5,14,56},{5,13,35},{5,12,1},{5,12,66},{4,13,60},{4,12,70},{5,14,56}, +{5,13,38},{5,12,53},{5,12,37},{8,2,51},{5,13,35},{5,12,1},{4,12,54},{13,2,51},{4,12,54},{5,13,37},{5,13,37},{5,13,37},{5,12,36},{5,13,10},{5,12,0},{5,12,0},{5,11,5},{4,13,11},{5,11,14},{5,13,37},{5,13,37},{5,13,37},{5,12,36},{8,1,8},{5,12,0},{5,12,0},{5,11,5},{10,5,8},{5,11,5},{6,11,18},{5,13,2},{5,12,17}, +{5,12,1},{6,11,18},{15,0,18},{5,12,1},{0,12,50},{15,0,18},{0,12,50},{5,0,36},{5,0,36},{5,0,36},{5,0,36},{5,12,0},{5,12,0},{5,12,0},{5,11,1},{5,11,10},{5,11,10},{6,14,88},{6,13,78},{6,13,69},{6,13,77},{6,13,115},{6,13,107},{6,13,98},{5,13,117},{5,14,60},{5,13,36},{6,14,24},{6,13,14},{6,13,5},{6,13,13},{8,5,51}, +{5,14,35},{6,13,34},{5,13,36},{12,5,51},{5,13,36},{6,13,69},{6,13,69},{6,13,69},{6,12,72},{6,12,72},{6,12,72},{6,12,72},{6,12,72},{5,13,16},{5,13,36},{6,13,5},{6,13,5},{6,13,5},{6,12,8},{8,4,8},{6,12,8},{6,12,8},{6,12,8},{14,2,8},{6,12,8},{3,24,18},{6,13,10},{6,13,1},{6,13,9},{3,24,18},{14,3,18},{6,13,9}, +{0,13,36},{14,3,18},{0,13,36},{6,0,68},{6,0,68},{6,0,68},{6,0,68},{6,11,68},{6,11,68},{6,11,68},{6,12,68},{5,13,0},{5,13,0},{6,16,56},{6,15,38},{6,14,52},{6,14,36},{6,16,56},{6,15,35},{6,14,0},{6,14,52},{5,15,76},{5,14,70},{6,16,56},{6,15,38},{6,14,52},{6,14,36},{3,26,51},{6,15,35},{6,14,0},{5,14,45},{15,3,51}, +{5,14,45},{6,15,37},{6,15,37},{6,15,37},{6,14,36},{6,15,10},{6,14,0},{6,14,0},{6,13,5},{5,15,12},{6,13,14},{6,15,37},{6,15,37},{6,15,37},{6,14,36},{3,25,8},{6,14,0},{6,14,0},{6,13,5},{12,6,8},{6,13,5},{9,5,18},{6,15,2},{6,14,16},{6,14,0},{9,5,18},{12,7,18},{6,14,0},{0,14,36},{12,7,18},{0,14,36},{6,0,36}, +{6,0,36},{6,0,36},{6,0,36},{6,14,0},{6,14,0},{6,14,0},{6,13,1},{5,15,8},{5,15,8},{7,16,88},{7,15,78},{7,15,69},{7,15,77},{7,15,115},{6,16,88},{6,15,98},{6,15,101},{5,17,67},{6,15,38},{7,16,24},{7,15,14},{7,15,5},{7,15,13},{11,0,51},{6,16,24},{6,15,34},{6,15,37},{13,7,51},{6,15,37},{7,15,69},{7,15,69},{7,15,69}, +{7,14,72},{7,14,72},{7,14,72},{7,14,72},{7,14,72},{6,15,11},{6,14,24},{7,15,5},{7,15,5},{7,15,5},{7,14,8},{9,6,8},{7,14,8},{7,14,8},{7,14,8},{14,5,8},{7,14,8},{10,4,18},{7,15,10},{7,15,1},{6,15,9},{10,4,18},{14,6,18},{6,15,9},{0,15,36},{14,6,18},{0,15,36},{7,0,68},{7,0,68},{7,0,68},{7,0,68},{7,13,65}, +{7,13,65},{7,13,65},{6,15,65},{6,15,2},{6,15,2},{7,18,56},{7,17,38},{7,16,53},{7,16,37},{7,18,56},{7,17,35},{7,16,1},{7,16,66},{6,17,60},{6,16,70},{7,18,56},{7,17,38},{7,16,53},{7,16,37},{10,6,51},{7,17,35},{7,16,1},{6,16,54},{15,6,51},{6,16,54},{7,17,37},{7,17,37},{7,17,37},{7,16,36},{7,17,10},{7,16,0},{7,16,0}, +{7,15,5},{6,17,11},{7,15,14},{7,17,37},{7,17,37},{7,17,37},{7,16,36},{10,5,8},{7,16,0},{7,16,0},{7,15,5},{12,9,8},{7,15,5},{12,0,18},{7,17,2},{7,16,17},{7,16,1},{12,0,18},{12,10,18},{7,16,1},{0,16,50},{12,10,18},{0,16,50},{7,0,36},{7,0,36},{7,0,36},{7,0,36},{7,16,0},{7,16,0},{7,16,0},{7,15,1},{7,15,10}, +{7,15,10},{7,21,326},{7,19,322},{8,17,392},{7,17,322},{7,21,137},{7,18,116},{7,17,133},{7,17,117},{7,18,60},{7,17,36},{8,16,118},{8,16,134},{8,17,136},{8,17,136},{10,9,51},{7,18,35},{7,17,52},{7,17,36},{14,9,51},{7,17,36},{7,20,307},{7,20,307},{7,20,307},{7,18,307},{7,20,91},{7,18,91},{7,18,91},{7,16,110},{7,17,16},{7,17,36},{8,15,101}, +{8,15,101},{8,15,101},{8,16,101},{10,8,8},{7,18,10},{7,18,10},{7,16,29},{11,12,8},{7,16,29},{12,3,18},{7,19,16},{8,17,36},{7,17,16},{12,3,18},{11,13,18},{7,17,16},{0,17,36},{11,13,18},{0,17,36},{7,0,306},{7,0,306},{7,0,306},{7,0,306},{7,19,81},{7,19,81},{7,19,81},{7,17,81},{7,17,0},{7,17,0},{8,19,88},{8,18,78},{8,18,69}, +{8,18,77},{8,18,115},{8,18,107},{8,18,98},{8,17,136},{7,19,76},{7,18,70},{8,19,24},{8,18,14},{8,18,5},{8,18,13},{9,15,51},{8,18,43},{8,18,34},{7,18,45},{5,22,51},{7,18,45},{8,18,69},{8,18,69},{8,18,69},{8,17,72},{8,17,72},{8,17,72},{8,17,72},{8,17,72},{7,19,12},{7,18,70},{8,18,5},{8,18,5},{8,18,5},{8,17,8},{9,14,8}, +{8,17,8},{8,17,8},{8,17,8},{14,10,8},{8,17,8},{11,9,18},{8,18,10},{8,18,1},{8,18,9},{11,9,18},{14,11,18},{8,18,9},{0,18,36},{14,11,18},{0,18,36},{8,0,68},{8,0,68},{8,0,68},{8,0,68},{8,16,65},{8,16,65},{8,16,65},{8,17,68},{7,19,8},{7,19,8},{8,21,56},{8,20,38},{8,19,52},{8,19,36},{8,21,56},{8,20,35},{8,19,0}, +{8,19,52},{7,21,67},{8,18,78},{8,21,56},{8,20,38},{8,19,52},{8,19,36},{13,4,51},{8,20,35},{8,19,0},{8,19,52},{15,11,51},{8,19,52},{8,20,37},{8,20,37},{8,20,37},{8,19,36},{8,20,10},{8,19,0},{8,19,0},{8,18,5},{8,18,35},{8,18,14},{8,20,37},{8,20,37},{8,20,37},{8,19,36},{11,10,8},{8,19,0},{8,19,0},{8,18,5},{5,23,8}, +{8,18,5},{12,8,18},{8,20,2},{8,19,16},{8,19,0},{12,8,18},{11,16,18},{8,19,0},{0,19,36},{11,16,18},{0,19,36},{8,0,36},{8,0,36},{8,0,36},{8,0,36},{8,19,0},{8,19,0},{8,19,0},{8,18,1},{8,18,10},{8,18,10},{9,21,88},{9,20,78},{9,20,70},{9,20,78},{9,20,115},{8,21,88},{8,20,99},{8,20,115},{8,21,72},{8,20,52},{9,21,24}, +{9,20,14},{9,20,6},{9,20,14},{15,0,51},{8,21,24},{8,20,35},{8,20,51},{12,16,51},{8,20,51},{9,20,69},{9,20,69},{9,20,69},{9,19,72},{9,19,72},{9,19,72},{9,19,72},{9,19,72},{8,20,11},{8,19,24},{9,20,5},{9,20,5},{9,20,5},{9,19,8},{12,9,8},{9,19,8},{9,19,8},{9,19,8},{14,13,8},{9,19,8},{14,4,18},{9,20,10},{9,20,2}, +{8,20,10},{14,4,18},{14,14,18},{8,20,10},{0,20,50},{14,14,18},{0,20,50},{9,0,68},{9,0,68},{9,0,68},{9,0,68},{9,18,65},{9,18,65},{9,18,65},{8,20,65},{8,20,2},{8,20,2},{9,24,70},{9,22,58},{9,21,75},{9,21,51},{9,23,52},{9,22,25},{9,21,3},{9,21,46},{8,23,68},{8,21,70},{9,24,69},{9,22,57},{9,21,74},{9,21,50},{15,3,51}, +{9,22,24},{9,21,2},{8,21,45},{11,19,51},{8,21,45},{9,23,51},{9,23,51},{9,23,51},{9,21,51},{9,22,9},{9,21,3},{9,21,3},{9,20,9},{8,22,12},{9,20,12},{9,23,50},{9,23,50},{9,23,50},{9,21,50},{15,2,8},{9,21,2},{9,21,2},{9,20,8},{13,16,8},{9,20,8},{14,7,18},{9,22,8},{9,21,25},{9,21,1},{14,7,18},{13,17,18},{9,21,1}, +{0,21,36},{13,17,18},{0,21,36},{9,0,50},{9,0,50},{9,0,50},{9,0,50},{9,21,2},{9,21,2},{9,21,2},{9,20,5},{9,20,8},{9,20,8},{10,23,88},{10,22,78},{10,22,69},{10,22,77},{10,22,115},{10,22,107},{10,22,98},{9,22,117},{9,23,60},{9,22,36},{10,23,24},{10,22,14},{10,22,5},{10,22,13},{11,19,51},{9,23,35},{10,22,34},{9,22,36},{9,23,51}, +{9,22,36},{10,22,69},{10,22,69},{10,22,69},{10,21,72},{10,21,72},{10,21,72},{10,21,72},{10,21,72},{9,22,16},{9,22,36},{10,22,5},{10,22,5},{10,22,5},{10,21,8},{11,18,8},{10,21,8},{10,21,8},{10,21,8},{10,21,8},{10,21,8},{13,13,18},{10,22,10},{10,22,1},{10,22,9},{13,13,18},{15,16,18},{10,22,9},{0,22,36},{15,16,18},{0,22,36},{10,0,68}, +{10,0,68},{10,0,68},{10,0,68},{10,20,65},{10,20,65},{10,20,65},{10,21,68},{9,22,0},{9,22,0},{10,25,56},{10,24,38},{10,23,52},{10,23,36},{10,25,56},{10,24,35},{10,23,0},{10,23,52},{9,24,63},{9,23,70},{10,25,56},{10,24,38},{10,23,52},{10,23,36},{15,8,51},{10,24,35},{10,23,0},{9,23,45},{11,22,51},{9,23,45},{10,24,37},{10,24,37},{10,24,37}, +{10,23,36},{10,24,10},{10,23,0},{10,23,0},{10,22,5},{9,24,14},{10,22,14},{10,24,37},{10,24,37},{10,24,37},{10,23,36},{13,14,8},{10,23,0},{10,23,0},{10,22,5},{8,25,8},{10,22,5},{14,12,18},{10,24,2},{10,23,16},{10,23,0},{14,12,18},{13,20,18},{10,23,0},{0,23,36},{13,20,18},{0,23,36},{10,0,36},{10,0,36},{10,0,36},{10,0,36},{10,23,0}, +{10,23,0},{10,23,0},{10,22,1},{10,22,10},{10,22,10},{11,25,88},{11,24,78},{11,24,70},{11,24,78},{11,24,115},{10,25,88},{10,24,99},{10,24,115},{9,26,67},{10,24,52},{11,25,24},{11,24,14},{11,24,6},{11,24,14},{14,14,51},{10,25,24},{10,24,35},{10,24,51},{14,20,51},{10,24,51},{11,24,69},{11,24,69},{11,24,69},{11,23,72},{11,23,72},{11,23,72},{11,23,72}, +{11,23,72},{10,24,11},{10,23,24},{11,24,5},{11,24,5},{11,24,5},{11,23,8},{14,13,8},{11,23,8},{11,23,8},{11,23,8},{11,23,8},{11,23,8},{13,18,18},{11,24,10},{11,24,2},{10,24,10},{13,18,18},{11,24,18},{10,24,10},{0,24,50},{11,24,18},{0,24,50},{11,0,68},{11,0,68},{11,0,68},{11,0,68},{11,22,65},{11,22,65},{11,22,65},{10,24,65},{10,24,2}, +{10,24,2},{11,28,70},{11,26,58},{11,25,75},{11,25,51},{11,27,52},{11,26,25},{11,25,3},{11,25,46},{10,27,68},{10,25,70},{11,28,69},{11,26,57},{11,25,74},{11,25,50},{14,17,51},{11,26,24},{11,25,2},{10,25,45},{13,23,51},{10,25,45},{11,27,51},{11,27,51},{11,27,51},{11,25,51},{11,26,9},{11,25,3},{11,25,3},{11,24,9},{10,26,12},{11,24,12},{11,27,50}, +{11,27,50},{11,27,50},{11,25,50},{14,16,8},{11,25,2},{11,25,2},{11,24,8},{15,20,8},{11,24,8},{13,21,18},{11,26,8},{11,25,25},{11,25,1},{13,21,18},{15,21,18},{11,25,1},{0,25,36},{15,21,18},{0,25,36},{11,0,50},{11,0,50},{11,0,50},{11,0,50},{11,25,2},{11,25,2},{11,25,2},{11,24,5},{11,24,8},{11,24,8},{12,27,88},{12,26,78},{12,26,69}, +{12,26,77},{12,26,115},{12,26,107},{12,26,98},{11,26,117},{11,27,60},{11,26,36},{12,27,24},{12,26,14},{12,26,5},{12,26,13},{13,23,51},{11,27,35},{12,26,34},{11,26,36},{11,27,51},{11,26,36},{12,26,69},{12,26,69},{12,26,69},{12,25,72},{12,25,72},{12,25,72},{12,25,72},{12,25,72},{11,26,16},{11,26,36},{12,26,5},{12,26,5},{12,26,5},{12,25,8},{13,22,8}, +{12,25,8},{12,25,8},{12,25,8},{12,25,8},{12,25,8},{15,17,18},{12,26,10},{12,26,1},{12,26,9},{15,17,18},{12,26,18},{12,26,9},{0,26,36},{12,26,18},{0,26,36},{12,0,68},{12,0,68},{12,0,68},{12,0,68},{12,24,65},{12,24,65},{12,24,65},{12,25,68},{11,26,0},{11,26,0},{12,29,56},{12,28,38},{12,27,52},{12,27,36},{12,29,56},{12,28,35},{12,27,0}, +{12,27,52},{11,28,63},{11,27,70},{12,29,56},{12,28,38},{12,27,52},{12,27,36},{15,19,51},{12,28,35},{12,27,0},{11,27,45},{13,26,51},{11,27,45},{12,28,37},{12,28,37},{12,28,37},{12,27,36},{12,28,10},{12,27,0},{12,27,0},{12,26,5},{11,28,14},{12,26,14},{12,28,37},{12,28,37},{12,28,37},{12,27,36},{15,18,8},{12,27,0},{12,27,0},{12,26,5},{10,29,8}, +{12,26,5},{13,26,18},{12,28,2},{12,27,16},{12,27,0},{13,26,18},{15,24,18},{12,27,0},{0,27,36},{15,24,18},{0,27,36},{12,0,36},{12,0,36},{12,0,36},{12,0,36},{12,27,0},{12,27,0},{12,27,0},{12,26,1},{12,26,10},{12,26,10},{13,29,88},{13,28,78},{13,28,70},{13,28,78},{13,28,115},{12,29,88},{12,28,99},{12,28,115},{11,30,67},{12,28,52},{13,29,24}, +{13,28,14},{13,28,6},{13,28,14},{13,28,51},{12,29,24},{12,28,35},{12,28,51},{11,30,51},{12,28,51},{13,28,69},{13,28,69},{13,28,69},{13,27,72},{13,27,72},{13,27,72},{13,27,72},{13,27,72},{12,28,11},{12,27,24},{13,28,5},{13,28,5},{13,28,5},{13,27,8},{13,27,8},{13,27,8},{13,27,8},{13,27,8},{13,27,8},{13,27,8},{15,22,18},{13,28,10},{13,28,2}, +{12,28,10},{15,22,18},{13,28,18},{12,28,10},{0,28,50},{13,28,18},{0,28,50},{13,0,68},{13,0,68},{13,0,68},{13,0,68},{13,26,65},{13,26,65},{13,26,65},{12,28,65},{12,28,2},{12,28,2},{13,31,76},{13,30,58},{13,29,75},{13,29,51},{13,31,52},{13,30,25},{13,29,3},{13,29,46},{12,31,68},{12,29,70},{13,31,75},{13,30,57},{13,29,74},{13,29,50},{13,31,51}, +{13,30,24},{13,29,2},{12,29,45},{15,27,51},{12,29,45},{13,31,51},{13,31,51},{13,31,51},{13,29,51},{13,30,9},{13,29,3},{13,29,3},{13,28,9},{12,30,12},{13,28,12},{13,31,50},{13,31,50},{13,31,50},{13,29,50},{13,30,8},{13,29,2},{13,29,2},{13,28,8},{12,30,8},{13,28,8},{15,25,18},{13,30,8},{13,29,25},{13,29,1},{15,25,18},{12,31,18},{13,29,1}, +{0,29,36},{12,31,18},{0,29,36},{13,0,50},{13,0,50},{13,0,50},{13,0,50},{13,29,2},{13,29,2},{13,29,2},{13,28,5},{13,28,8},{13,28,8},{14,31,88},{14,30,78},{14,30,69},{14,30,77},{14,30,115},{14,30,107},{14,30,98},{13,30,117},{13,31,60},{13,30,36},{14,31,24},{14,30,14},{14,30,5},{14,30,13},{15,27,51},{13,31,35},{14,30,34},{13,30,36},{13,31,51}, +{13,30,36},{14,30,69},{14,30,69},{14,30,69},{14,29,72},{14,29,72},{14,29,72},{14,29,72},{14,29,72},{13,30,16},{13,30,36},{14,30,5},{14,30,5},{14,30,5},{14,29,8},{15,26,8},{14,29,8},{14,29,8},{14,29,8},{14,29,8},{14,29,8},{14,31,20},{14,30,10},{14,30,1},{14,30,9},{14,31,20},{14,30,18},{14,30,9},{0,30,36},{14,30,18},{0,30,36},{14,0,68}, +{14,0,68},{14,0,68},{14,0,68},{14,28,65},{14,28,65},{14,28,65},{14,29,68},{13,30,0},{13,30,0},{14,31,152},{14,31,88},{14,31,52},{14,31,36},{14,31,116},{14,31,36},{14,31,0},{14,31,52},{14,31,88},{13,31,70},{15,30,118},{14,31,88},{14,31,52},{14,31,36},{15,29,52},{14,31,36},{14,31,0},{13,31,45},{15,30,51},{13,31,45},{14,31,52},{14,31,52},{14,31,52}, +{14,31,36},{14,31,16},{14,31,0},{14,31,0},{14,30,5},{14,30,35},{14,30,14},{14,31,52},{14,31,52},{14,31,52},{14,31,36},{15,28,10},{14,31,0},{14,31,0},{14,30,5},{15,29,16},{14,30,5},{15,30,18},{15,30,34},{14,31,16},{14,31,0},{15,30,18},{15,30,26},{14,31,0},{0,31,36},{15,30,26},{0,31,36},{14,0,36},{14,0,36},{14,0,36},{14,0,36},{14,31,0}, +{14,31,0},{14,31,0},{14,30,1},{14,30,10},{14,30,10},{15,31,68},{15,31,68},{15,31,68},{15,31,68},{15,31,68},{15,31,68},{15,31,68},{15,31,68},{15,31,68},{14,31,20},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,68},{15,31,68},{15,31,68},{15,31,68},{15,31,68},{15,31,68},{15,31,68}, +{15,31,68},{14,31,56},{14,31,20},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{15,0,68},{15,0,68},{15,0,68},{15,0,68},{15,30,65},{15,30,65},{15,30,65},{15,31,68},{14,31,20}, +{14,31,20},{0,4,74},{0,3,20},{0,2,2},{0,2,26},{0,2,158},{0,2,110},{0,2,62},{0,1,115},{0,1,178},{0,1,124},{0,4,74},{0,3,20},{0,2,2},{0,2,26},{0,2,158},{0,2,110},{0,2,62},{0,1,115},{1,0,158},{0,1,115},{0,2,1},{0,2,1},{0,2,1},{0,1,0},{0,1,13},{0,1,9},{0,1,9},{0,0,25},{0,0,25},{0,0,25},{0,2,1}, +{0,2,1},{0,2,1},{0,1,0},{0,1,13},{0,1,9},{0,1,9},{0,0,25},{0,0,25},{0,0,25},{1,0,74},{0,3,20},{0,2,2},{0,2,26},{1,0,74},{1,1,72},{0,2,26},{0,1,90},{1,1,72},{0,1,90},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,6,83},{0,5,13},{0,3,26}, +{0,3,14},{0,5,248},{0,3,140},{0,3,41},{0,2,139},{0,2,319},{0,2,175},{0,6,83},{0,5,13},{0,3,26},{0,3,14},{1,1,244},{0,3,140},{0,3,41},{0,2,139},{0,3,248},{0,2,139},{0,4,10},{0,4,10},{0,4,10},{0,3,13},{0,3,52},{0,2,18},{0,2,18},{0,1,29},{0,1,77},{0,1,38},{0,4,10},{0,4,10},{0,4,10},{0,3,13},{0,3,52}, +{0,2,18},{0,2,18},{0,1,29},{1,0,58},{0,1,29},{1,3,72},{0,5,4},{0,3,17},{0,3,5},{1,3,72},{3,0,74},{0,3,5},{0,2,90},{3,0,74},{0,2,90},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,1,0},{0,1,0},{0,1,0},{0,1,4},{0,0,9},{0,0,9},{0,9,193},{0,7,125},{0,4,202},{0,4,122},{0,7,244},{0,5,96},{0,4,2}, +{0,3,106},{0,4,395},{0,3,187},{1,6,99},{1,5,45},{1,4,26},{1,4,50},{1,4,243},{0,5,96},{0,4,2},{0,3,106},{2,2,243},{0,3,106},{0,7,121},{0,7,121},{0,7,121},{0,4,122},{0,5,52},{0,4,2},{0,4,2},{0,3,25},{0,3,133},{0,2,62},{1,4,26},{1,4,26},{1,4,26},{1,3,25},{1,2,50},{0,4,2},{0,4,2},{0,3,25},{1,2,50}, +{0,3,25},{0,9,72},{0,7,4},{1,4,1},{0,4,1},{0,9,72},{2,3,72},{0,4,1},{0,3,90},{2,3,72},{0,3,90},{0,0,121},{0,0,121},{0,0,121},{0,0,121},{0,4,1},{0,4,1},{0,4,1},{0,2,1},{0,2,37},{0,2,37},{1,8,164},{1,7,94},{1,5,106},{1,5,94},{0,10,292},{0,7,125},{0,5,81},{0,4,130},{0,6,364},{0,4,106},{1,8,83}, +{1,7,13},{1,5,25},{1,5,13},{3,0,243},{0,7,76},{0,5,32},{0,4,81},{5,0,243},{0,4,81},{1,6,91},{1,6,91},{1,6,91},{1,5,94},{0,8,99},{0,6,51},{0,6,51},{0,4,66},{0,4,107},{0,4,42},{1,6,10},{1,6,10},{1,6,10},{1,5,13},{0,8,50},{0,6,2},{0,6,2},{0,4,17},{4,0,50},{0,4,17},{3,2,74},{1,7,4},{1,5,16}, +{1,5,4},{3,2,74},{5,1,72},{1,5,4},{0,4,80},{5,1,72},{0,4,80},{1,0,90},{1,0,90},{1,0,90},{1,0,90},{0,6,50},{0,6,50},{0,6,50},{0,4,50},{0,3,20},{0,3,20},{1,11,218},{1,9,149},{1,7,242},{1,6,149},{1,10,248},{1,7,99},{1,6,5},{1,5,99},{0,7,308},{0,5,100},{2,8,99},{2,7,45},{2,6,27},{2,6,51},{3,3,243}, +{0,9,81},{1,6,5},{0,5,99},{4,3,243},{0,5,99},{1,9,145},{1,9,145},{1,9,145},{1,6,148},{1,8,52},{1,6,4},{1,6,4},{1,5,18},{0,6,72},{0,5,19},{2,6,26},{2,6,26},{2,6,26},{2,5,25},{3,1,50},{1,6,4},{1,6,4},{0,5,18},{3,3,50},{0,5,18},{4,1,72},{1,9,5},{2,6,2},{1,6,5},{4,1,72},{3,5,72},{1,6,5}, +{0,5,90},{3,5,72},{0,5,90},{1,0,144},{1,0,144},{1,0,144},{1,0,144},{1,6,0},{1,6,0},{1,6,0},{1,4,4},{0,6,8},{0,6,8},{2,10,164},{2,9,94},{2,7,107},{2,7,95},{1,12,307},{1,9,137},{1,7,115},{1,6,154},{0,9,253},{1,6,106},{2,10,83},{2,9,13},{2,7,26},{2,7,14},{4,2,243},{1,9,73},{2,7,41},{1,6,90},{6,2,243}, +{1,6,90},{2,8,91},{2,8,91},{2,8,91},{2,7,94},{1,10,116},{1,8,69},{1,8,69},{1,6,73},{0,8,50},{1,6,25},{2,8,10},{2,8,10},{2,8,10},{2,7,13},{4,0,50},{1,8,5},{1,8,5},{1,6,9},{1,7,50},{1,6,9},{3,7,72},{2,9,4},{2,7,17},{2,7,5},{3,7,72},{8,0,72},{2,7,5},{0,6,90},{8,0,72},{0,6,90},{2,0,90}, +{2,0,90},{2,0,90},{2,0,90},{1,9,65},{1,9,65},{1,9,65},{1,6,64},{0,7,10},{0,7,10},{2,13,194},{2,11,126},{2,8,203},{2,8,123},{2,11,245},{2,9,97},{2,8,3},{2,7,107},{0,11,249},{1,7,100},{3,10,99},{3,9,45},{3,8,26},{3,8,50},{3,8,243},{1,11,81},{2,8,2},{1,7,99},{4,6,243},{1,7,99},{2,11,122},{2,11,122},{2,11,122}, +{2,8,123},{2,9,53},{2,8,3},{2,8,3},{2,7,26},{1,8,62},{1,7,19},{3,8,26},{3,8,26},{3,8,26},{3,7,25},{3,6,50},{2,8,2},{2,8,2},{1,7,18},{3,6,50},{1,7,18},{5,3,72},{2,11,4},{3,8,1},{2,8,1},{5,3,72},{4,7,72},{2,8,1},{0,7,90},{4,7,72},{0,7,90},{2,0,122},{2,0,122},{2,0,122},{2,0,122},{2,8,2}, +{2,8,2},{2,8,2},{2,6,2},{1,7,10},{1,7,10},{3,12,164},{3,11,94},{3,9,106},{3,9,94},{2,14,292},{2,11,125},{2,9,81},{2,8,130},{1,11,253},{2,8,106},{3,12,83},{3,11,13},{3,9,25},{3,9,13},{5,4,243},{2,11,76},{2,9,32},{2,8,81},{9,1,243},{2,8,81},{3,10,91},{3,10,91},{3,10,91},{3,9,94},{2,12,99},{2,10,51},{2,10,51}, +{2,8,66},{1,10,50},{2,8,42},{3,10,10},{3,10,10},{3,10,10},{3,9,13},{5,2,50},{2,10,2},{2,10,2},{2,8,17},{8,1,50},{2,8,17},{1,19,72},{3,11,4},{3,9,16},{3,9,4},{1,19,72},{9,2,72},{3,9,4},{0,8,80},{9,2,72},{0,8,80},{3,0,90},{3,0,90},{3,0,90},{3,0,90},{2,10,50},{2,10,50},{2,10,50},{2,8,50},{1,9,9}, +{1,9,9},{3,15,218},{3,13,149},{3,11,242},{3,10,149},{3,14,248},{3,11,99},{3,10,5},{3,9,99},{1,13,244},{2,9,100},{4,12,99},{4,11,45},{4,10,27},{4,10,51},{5,7,243},{2,13,81},{3,10,5},{2,9,99},{6,7,243},{2,9,99},{3,13,145},{3,13,145},{3,13,145},{3,10,148},{3,12,52},{3,10,4},{3,10,4},{3,9,18},{1,12,56},{2,9,19},{4,10,26}, +{4,10,26},{4,10,26},{4,9,25},{5,5,50},{3,10,4},{3,10,4},{2,9,18},{5,7,50},{2,9,18},{7,2,72},{3,13,5},{4,10,2},{3,10,5},{7,2,72},{12,0,72},{3,10,5},{0,9,90},{12,0,72},{0,9,90},{3,0,144},{3,0,144},{3,0,144},{3,0,144},{3,10,0},{3,10,0},{3,10,0},{3,8,4},{2,10,8},{2,10,8},{4,14,164},{4,13,94},{4,11,107}, +{4,11,95},{3,16,307},{3,13,137},{3,11,115},{3,10,154},{2,13,253},{3,10,106},{4,14,83},{4,13,13},{4,11,26},{4,11,14},{7,3,243},{3,13,73},{4,11,41},{3,10,90},{11,2,243},{3,10,90},{4,12,91},{4,12,91},{4,12,91},{4,11,94},{3,14,116},{3,12,69},{3,12,69},{3,10,73},{2,12,50},{3,10,25},{4,12,10},{4,12,10},{4,12,10},{4,11,13},{7,1,50}, +{3,12,5},{3,12,5},{3,10,9},{10,2,50},{3,10,9},{5,11,72},{4,13,4},{4,11,17},{4,11,5},{5,11,72},{11,3,72},{4,11,5},{0,10,90},{11,3,72},{0,10,90},{4,0,90},{4,0,90},{4,0,90},{4,0,90},{3,13,65},{3,13,65},{3,13,65},{3,10,64},{2,11,10},{2,11,10},{4,17,194},{4,15,126},{4,12,203},{4,12,123},{4,15,245},{4,13,97},{4,12,3}, +{4,11,107},{2,15,249},{3,11,100},{5,14,99},{5,13,45},{5,12,26},{5,12,50},{5,12,243},{3,15,81},{4,12,2},{3,11,99},{13,1,243},{3,11,99},{4,15,122},{4,15,122},{4,15,122},{4,12,123},{4,13,53},{4,12,3},{4,12,3},{4,11,26},{3,12,62},{3,11,19},{5,12,26},{5,12,26},{5,12,26},{5,11,25},{5,10,50},{4,12,2},{4,12,2},{3,11,18},{12,1,50}, +{3,11,18},{8,2,72},{4,15,4},{5,12,1},{4,12,1},{8,2,72},{13,2,72},{4,12,1},{0,11,90},{13,2,72},{0,11,90},{4,0,122},{4,0,122},{4,0,122},{4,0,122},{4,12,2},{4,12,2},{4,12,2},{4,10,2},{3,11,10},{3,11,10},{5,16,164},{5,15,94},{5,13,106},{5,13,94},{4,18,292},{4,15,125},{4,13,81},{4,12,130},{3,15,253},{4,12,106},{5,16,83}, +{5,15,13},{5,13,25},{5,13,13},{8,3,243},{4,15,76},{4,13,32},{4,12,81},{11,5,243},{4,12,81},{5,14,91},{5,14,91},{5,14,91},{5,13,94},{4,16,99},{4,14,51},{4,14,51},{4,12,66},{3,14,50},{4,12,42},{5,14,10},{5,14,10},{5,14,10},{5,13,13},{8,1,50},{4,14,2},{4,14,2},{4,12,17},{10,5,50},{4,12,17},{3,23,72},{5,15,4},{5,13,16}, +{5,13,4},{3,23,72},{11,6,72},{5,13,4},{0,12,80},{11,6,72},{0,12,80},{5,0,90},{5,0,90},{5,0,90},{5,0,90},{4,14,50},{4,14,50},{4,14,50},{4,12,50},{3,13,9},{3,13,9},{5,19,218},{5,17,149},{5,15,242},{5,14,149},{5,18,248},{5,15,99},{5,14,5},{5,13,99},{3,17,244},{4,13,100},{6,16,99},{6,15,45},{6,14,27},{6,14,51},{8,6,243}, +{4,17,81},{5,14,5},{4,13,99},{15,2,243},{4,13,99},{5,17,145},{5,17,145},{5,17,145},{5,14,148},{5,16,52},{5,14,4},{5,14,4},{5,13,18},{3,16,56},{4,13,19},{6,14,26},{6,14,26},{6,14,26},{6,13,25},{8,4,50},{5,14,4},{5,14,4},{4,13,18},{14,2,50},{4,13,18},{3,26,72},{5,17,5},{6,14,2},{5,14,5},{3,26,72},{15,3,72},{5,14,5}, +{0,13,90},{15,3,72},{0,13,90},{5,0,144},{5,0,144},{5,0,144},{5,0,144},{5,14,0},{5,14,0},{5,14,0},{5,12,4},{4,14,8},{4,14,8},{6,18,164},{6,17,94},{6,15,107},{6,15,95},{5,20,307},{5,17,137},{5,15,115},{5,14,154},{4,17,253},{5,14,106},{6,18,83},{6,17,13},{6,15,26},{6,15,14},{5,20,243},{5,17,73},{6,15,41},{5,14,90},{13,6,243}, +{5,14,90},{6,16,91},{6,16,91},{6,16,91},{6,15,94},{5,18,116},{5,16,69},{5,16,69},{5,14,73},{4,16,50},{5,14,25},{6,16,10},{6,16,10},{6,16,10},{6,15,13},{3,25,50},{5,16,5},{5,16,5},{5,14,9},{12,6,50},{5,14,9},{11,0,72},{6,17,4},{6,15,17},{6,15,5},{11,0,72},{13,7,72},{6,15,5},{0,14,90},{13,7,72},{0,14,90},{6,0,90}, +{6,0,90},{6,0,90},{6,0,90},{5,17,65},{5,17,65},{5,17,65},{5,14,64},{4,15,10},{4,15,10},{6,21,194},{6,19,126},{6,16,203},{6,16,123},{6,19,245},{6,17,97},{6,16,3},{6,15,107},{4,19,249},{5,15,100},{7,18,99},{7,17,45},{7,16,26},{7,16,50},{11,1,243},{5,19,81},{6,16,2},{5,15,99},{15,5,243},{5,15,99},{6,19,122},{6,19,122},{6,19,122}, +{6,16,123},{6,17,53},{6,16,3},{6,16,3},{6,15,26},{5,16,62},{5,15,19},{7,16,26},{7,16,26},{7,16,26},{7,15,25},{9,6,50},{6,16,2},{6,16,2},{5,15,18},{14,5,50},{5,15,18},{10,6,72},{6,19,4},{7,16,1},{6,16,1},{10,6,72},{15,6,72},{6,16,1},{0,15,90},{15,6,72},{0,15,90},{6,0,122},{6,0,122},{6,0,122},{6,0,122},{6,16,2}, +{6,16,2},{6,16,2},{6,14,2},{5,15,10},{5,15,10},{7,20,164},{7,19,94},{7,17,106},{7,17,94},{6,22,292},{6,19,125},{6,17,81},{6,16,130},{5,19,253},{6,16,106},{7,20,83},{7,19,13},{7,17,25},{7,17,13},{10,7,243},{6,19,76},{6,17,32},{6,16,81},{13,9,243},{6,16,81},{7,18,91},{7,18,91},{7,18,91},{7,17,94},{6,20,99},{6,18,51},{6,18,51}, +{6,16,66},{5,18,50},{6,16,42},{7,18,10},{7,18,10},{7,18,10},{7,17,13},{10,5,50},{6,18,2},{6,18,2},{6,16,17},{12,9,50},{6,16,17},{12,2,72},{7,19,4},{7,17,16},{7,17,4},{12,2,72},{13,10,72},{7,17,4},{0,16,80},{13,10,72},{0,16,80},{7,0,90},{7,0,90},{7,0,90},{7,0,90},{6,18,50},{6,18,50},{6,18,50},{6,16,50},{5,17,9}, +{5,17,9},{7,23,218},{7,21,149},{7,19,242},{7,18,149},{7,22,248},{7,19,99},{7,18,5},{7,17,99},{5,21,244},{6,17,100},{7,23,218},{7,21,149},{8,18,206},{7,18,149},{13,0,243},{6,21,81},{7,18,5},{6,17,99},{5,21,243},{6,17,99},{7,21,145},{7,21,145},{7,21,145},{7,18,148},{7,20,52},{7,18,4},{7,18,4},{7,17,18},{5,20,56},{6,17,19},{7,21,145}, +{7,21,145},{7,21,145},{7,18,148},{10,8,50},{7,18,4},{7,18,4},{6,17,18},{11,12,50},{6,17,18},{9,15,72},{7,21,5},{8,18,37},{7,18,5},{9,15,72},{5,22,72},{7,18,5},{0,17,90},{5,22,72},{0,17,90},{7,0,144},{7,0,144},{7,0,144},{7,0,144},{7,18,0},{7,18,0},{7,18,0},{7,16,4},{6,18,8},{6,18,8},{8,21,388},{8,20,334},{8,19,316}, +{8,19,340},{7,24,307},{7,21,137},{7,19,115},{7,18,154},{6,21,253},{7,18,106},{8,21,99},{8,20,45},{8,19,27},{8,19,51},{8,19,243},{7,21,73},{7,19,51},{7,18,90},{15,10,243},{7,18,90},{8,19,315},{8,19,315},{8,19,315},{8,18,314},{7,22,116},{7,20,69},{7,20,69},{7,18,73},{6,20,50},{7,18,25},{8,19,26},{8,19,26},{8,19,26},{8,18,25},{9,14,50}, +{7,20,5},{7,20,5},{7,18,9},{14,10,50},{7,18,9},{13,4,72},{8,20,20},{8,19,2},{7,19,26},{13,4,72},{15,11,72},{7,19,26},{0,18,90},{15,11,72},{0,18,90},{8,0,314},{8,0,314},{8,0,314},{8,0,314},{7,21,65},{7,21,65},{7,21,65},{7,18,64},{6,19,10},{6,19,10},{8,23,164},{8,22,94},{8,20,106},{8,20,94},{8,22,329},{8,20,221},{8,20,121}, +{8,19,220},{6,23,249},{7,19,100},{8,23,83},{8,22,13},{8,20,25},{8,20,13},{13,5,243},{7,23,81},{8,20,40},{7,19,99},{12,15,243},{7,19,99},{8,21,91},{8,21,91},{8,21,91},{8,20,94},{8,20,133},{8,19,99},{8,19,99},{8,18,110},{7,20,62},{7,19,19},{8,21,10},{8,21,10},{8,21,10},{8,20,13},{11,10,50},{8,19,18},{8,19,18},{7,19,18},{5,23,50}, +{7,19,18},{15,0,72},{8,22,4},{8,20,16},{8,20,4},{15,0,72},{12,16,72},{8,20,4},{0,19,90},{12,16,72},{0,19,90},{8,0,90},{8,0,90},{8,0,90},{8,0,90},{8,18,81},{8,18,81},{8,18,81},{8,18,85},{7,19,10},{7,19,10},{8,26,194},{8,24,131},{8,21,203},{8,21,123},{8,24,245},{8,22,97},{8,21,3},{8,20,97},{7,23,253},{7,20,141},{9,23,99}, +{9,22,45},{9,21,26},{9,21,50},{15,1,243},{8,22,96},{8,21,2},{8,20,96},{15,13,243},{8,20,96},{8,24,122},{8,24,122},{8,24,122},{8,21,123},{8,22,53},{8,21,3},{8,21,3},{8,19,27},{7,22,50},{8,19,75},{9,21,26},{9,21,26},{9,21,26},{9,20,25},{12,9,50},{8,21,2},{8,21,2},{8,19,26},{14,13,50},{8,19,26},{14,6,72},{8,24,9},{9,21,1}, +{8,21,1},{14,6,72},{15,14,72},{8,21,1},{0,20,80},{15,14,72},{0,20,80},{8,0,122},{8,0,122},{8,0,122},{8,0,122},{8,21,2},{8,21,2},{8,21,2},{8,19,2},{7,21,9},{7,21,9},{9,26,154},{9,24,81},{9,22,106},{9,22,82},{9,24,307},{8,24,137},{9,22,91},{8,21,154},{7,25,244},{8,21,106},{9,26,90},{9,24,17},{9,22,42},{9,22,18},{15,4,243}, +{8,24,73},{9,22,27},{8,21,90},{9,22,243},{8,21,90},{9,24,81},{9,24,81},{9,24,81},{9,22,81},{9,22,114},{8,23,68},{8,23,68},{8,21,73},{7,24,56},{8,21,25},{9,24,17},{9,24,17},{9,24,17},{9,22,17},{15,2,50},{8,23,4},{8,23,4},{8,21,9},{13,16,50},{8,21,9},{11,19,72},{9,24,1},{9,22,26},{9,22,2},{11,19,72},{9,23,72},{9,22,2}, +{0,21,90},{9,23,72},{0,21,90},{9,0,80},{9,0,80},{9,0,80},{9,0,80},{9,20,65},{9,20,65},{9,20,65},{8,21,64},{8,21,16},{8,21,16},{9,28,216},{9,26,149},{9,23,245},{9,23,149},{9,27,248},{9,24,89},{9,23,5},{9,22,99},{7,27,260},{8,22,100},{10,25,99},{10,24,45},{10,23,27},{10,23,51},{10,23,243},{8,26,81},{9,23,5},{8,22,99},{11,21,243}, +{8,22,99},{9,26,145},{9,26,145},{9,26,145},{9,23,148},{9,25,52},{9,23,4},{9,23,4},{9,22,18},{8,23,72},{8,22,19},{10,23,26},{10,23,26},{10,23,26},{10,22,25},{11,18,50},{9,23,4},{9,23,4},{8,22,18},{10,21,50},{8,22,18},{15,8,72},{9,26,5},{10,23,2},{9,23,5},{15,8,72},{11,22,72},{9,23,5},{0,22,90},{11,22,72},{0,22,90},{9,0,144}, +{9,0,144},{9,0,144},{9,0,144},{9,23,0},{9,23,0},{9,23,0},{9,21,4},{8,23,8},{8,23,8},{10,27,164},{10,26,94},{10,24,106},{10,24,94},{9,29,307},{9,26,137},{9,24,105},{9,23,154},{8,26,253},{9,23,106},{10,27,83},{10,26,13},{10,24,25},{10,24,13},{15,9,243},{9,26,73},{10,24,40},{9,23,90},{14,19,243},{9,23,90},{10,25,91},{10,25,91},{10,25,91}, +{10,24,94},{9,27,116},{9,25,69},{9,25,69},{9,23,73},{8,25,50},{9,23,25},{10,25,10},{10,25,10},{10,25,10},{10,24,13},{13,14,50},{9,25,5},{9,25,5},{9,23,9},{8,25,50},{9,23,9},{14,14,72},{10,26,4},{10,24,16},{10,24,4},{14,14,72},{14,20,72},{10,24,4},{0,23,90},{14,20,72},{0,23,90},{10,0,90},{10,0,90},{10,0,90},{10,0,90},{9,26,65}, +{9,26,65},{9,26,65},{9,23,64},{8,24,9},{8,24,9},{10,30,194},{10,28,131},{10,25,203},{10,25,123},{10,28,245},{10,26,97},{10,25,3},{10,24,97},{8,28,252},{9,24,85},{11,27,99},{11,26,45},{11,25,26},{11,25,50},{14,15,243},{9,28,80},{10,25,2},{9,24,84},{12,23,243},{9,24,84},{10,28,122},{10,28,122},{10,28,122},{10,25,123},{10,26,53},{10,25,3},{10,25,3}, +{10,23,27},{9,25,62},{9,24,21},{11,25,26},{11,25,26},{11,25,26},{11,24,25},{14,13,50},{10,25,2},{10,25,2},{9,24,20},{11,23,50},{9,24,20},{13,20,72},{10,28,9},{11,25,1},{10,25,1},{13,20,72},{13,23,74},{10,25,1},{0,24,80},{13,23,74},{0,24,80},{10,0,122},{10,0,122},{10,0,122},{10,0,122},{10,25,2},{10,25,2},{10,25,2},{10,23,2},{9,24,5}, +{9,24,5},{11,30,154},{11,28,81},{11,26,106},{11,26,82},{11,28,307},{10,28,137},{11,26,91},{10,25,154},{9,28,260},{10,25,106},{11,30,90},{11,28,17},{11,26,42},{11,26,18},{14,18,243},{10,28,73},{11,26,27},{10,25,90},{11,26,243},{10,25,90},{11,28,81},{11,28,81},{11,28,81},{11,26,81},{11,26,114},{10,27,68},{10,27,68},{10,25,73},{9,27,53},{10,25,25},{11,28,17}, +{11,28,17},{11,28,17},{11,26,17},{14,16,50},{10,27,4},{10,27,4},{10,25,9},{15,20,50},{10,25,9},{13,23,72},{11,28,1},{11,26,26},{11,26,2},{13,23,72},{11,27,72},{11,26,2},{0,25,90},{11,27,72},{0,25,90},{11,0,80},{11,0,80},{11,0,80},{11,0,80},{11,24,65},{11,24,65},{11,24,65},{10,25,64},{10,25,16},{10,25,16},{11,31,248},{11,30,149},{11,27,245}, +{11,27,149},{11,31,248},{11,28,89},{11,27,5},{11,26,99},{9,30,244},{10,26,100},{12,29,99},{12,28,45},{12,27,27},{12,27,51},{12,27,243},{10,30,81},{11,27,5},{10,26,99},{13,25,243},{10,26,99},{11,30,145},{11,30,145},{11,30,145},{11,27,148},{11,29,52},{11,27,4},{11,27,4},{11,26,18},{9,29,56},{10,26,19},{12,27,26},{12,27,26},{12,27,26},{12,26,25},{13,22,50}, +{11,27,4},{11,27,4},{10,26,18},{12,25,50},{10,26,18},{15,19,72},{11,30,5},{12,27,2},{11,27,5},{15,19,72},{13,26,72},{11,27,5},{0,26,90},{13,26,72},{0,26,90},{11,0,144},{11,0,144},{11,0,144},{11,0,144},{11,27,0},{11,27,0},{11,27,0},{11,25,4},{10,27,8},{10,27,8},{12,31,164},{12,30,94},{12,28,106},{12,28,94},{12,30,329},{11,30,137},{11,28,105}, +{11,27,154},{10,30,253},{11,27,106},{12,31,83},{12,30,13},{12,28,25},{12,28,13},{14,23,243},{11,30,73},{12,28,40},{11,27,90},{11,29,243},{11,27,90},{12,29,91},{12,29,91},{12,29,91},{12,28,94},{11,31,116},{11,29,69},{11,29,69},{11,27,73},{10,29,50},{11,27,25},{12,29,10},{12,29,10},{12,29,10},{12,28,13},{15,18,50},{11,29,5},{11,29,5},{11,27,9},{10,29,50}, +{11,27,9},{13,28,72},{12,30,4},{12,28,16},{12,28,4},{13,28,72},{11,30,72},{12,28,4},{0,27,90},{11,30,72},{0,27,90},{12,0,90},{12,0,90},{12,0,90},{12,0,90},{11,30,65},{11,30,65},{11,30,65},{11,27,64},{10,28,9},{10,28,9},{12,31,356},{12,31,140},{12,29,203},{12,29,123},{12,31,284},{12,30,97},{12,29,3},{12,28,97},{10,31,287},{11,28,85},{13,31,99}, +{13,30,45},{13,29,26},{13,29,50},{13,29,243},{12,30,96},{12,29,2},{11,28,84},{14,27,243},{11,28,84},{12,31,131},{12,31,131},{12,31,131},{12,29,123},{12,30,53},{12,29,3},{12,29,3},{12,27,27},{11,29,62},{11,28,21},{13,29,26},{13,29,26},{13,29,26},{13,28,25},{13,27,50},{12,29,2},{12,29,2},{11,28,20},{13,27,50},{11,28,20},{15,24,72},{12,31,18},{13,29,1}, +{12,29,1},{15,24,72},{15,27,74},{12,29,1},{0,28,80},{15,27,74},{0,28,80},{12,0,122},{12,0,122},{12,0,122},{12,0,122},{12,29,2},{12,29,2},{12,29,2},{12,27,2},{11,28,5},{11,28,5},{13,31,280},{13,31,120},{13,30,106},{13,30,82},{13,31,328},{13,31,200},{13,30,91},{12,29,154},{12,31,344},{12,29,106},{13,31,216},{13,31,56},{13,30,42},{13,30,18},{15,25,244}, +{13,31,136},{13,30,27},{12,29,90},{13,30,243},{12,29,90},{13,31,84},{13,31,84},{13,31,84},{13,30,81},{13,30,114},{12,31,68},{12,31,68},{12,29,73},{11,31,53},{12,29,25},{13,31,20},{13,31,20},{13,31,20},{13,30,17},{13,30,50},{12,31,4},{12,31,4},{12,29,9},{12,30,50},{12,29,9},{15,27,72},{13,31,40},{13,30,26},{13,30,2},{15,27,72},{13,31,72},{13,30,2}, +{0,29,90},{13,31,72},{0,29,90},{13,0,80},{13,0,80},{13,0,80},{13,0,80},{13,28,65},{13,28,65},{13,28,65},{12,29,64},{12,29,16},{12,29,16},{14,31,415},{14,31,351},{13,31,244},{13,31,148},{14,31,511},{13,31,173},{13,31,4},{13,30,82},{13,31,381},{12,30,83},{14,31,126},{14,31,62},{14,31,26},{14,31,50},{14,31,222},{13,31,173},{13,31,4},{12,30,82},{14,30,221}, +{12,30,82},{13,31,244},{13,31,244},{13,31,244},{13,31,148},{13,31,100},{13,31,4},{13,31,4},{13,30,18},{12,31,72},{12,30,19},{14,31,26},{14,31,26},{14,31,26},{14,30,25},{15,26,50},{13,31,4},{13,31,4},{12,30,18},{14,29,50},{12,30,18},{15,29,61},{14,31,37},{14,31,1},{13,31,4},{15,29,61},{15,30,61},{13,31,4},{0,30,73},{15,30,61},{0,30,73},{13,0,144}, +{13,0,144},{13,0,144},{13,0,144},{13,31,0},{13,31,0},{13,31,0},{13,29,4},{12,31,8},{12,31,8},{14,31,239},{14,31,175},{14,31,139},{14,31,99},{14,31,239},{14,31,135},{14,31,99},{13,31,73},{13,31,285},{13,31,25},{14,31,158},{14,31,94},{14,31,58},{14,31,18},{15,29,94},{14,31,54},{14,31,18},{13,31,9},{15,30,93},{13,31,9},{14,31,139},{14,31,139},{14,31,139}, +{14,31,99},{14,31,139},{14,31,99},{14,31,99},{13,31,73},{13,31,116},{13,31,25},{14,31,58},{14,31,58},{14,31,58},{14,31,18},{15,28,52},{14,31,18},{14,31,18},{13,31,9},{15,29,58},{13,31,9},{15,30,9},{15,31,9},{15,31,9},{14,31,9},{15,30,9},{15,31,9},{14,31,9},{0,31,9},{15,31,9},{0,31,9},{14,0,90},{14,0,90},{14,0,90},{14,0,90},{14,30,81}, +{14,30,81},{14,30,81},{13,31,64},{13,31,16},{13,31,16},{15,31,314},{14,31,258},{14,31,222},{14,31,158},{14,31,226},{14,31,98},{14,31,62},{14,31,2},{14,31,122},{14,31,50},{15,31,25},{15,31,25},{15,31,25},{15,31,25},{15,30,22},{15,31,25},{15,31,25},{14,31,1},{15,31,25},{14,31,1},{14,31,222},{14,31,222},{14,31,222},{14,31,158},{14,31,126},{14,31,62},{14,31,62}, +{14,31,2},{14,31,86},{14,31,50},{15,31,25},{15,31,25},{15,31,25},{15,31,25},{15,30,13},{15,31,25},{15,31,25},{14,31,1},{15,30,25},{14,31,1},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{14,0,122},{14,0,122},{14,0,122},{14,0,122},{14,31,26},{14,31,26},{14,31,26},{14,31,2},{14,31,50}, +{14,31,50},{0,6,202},{0,5,52},{0,3,25},{0,3,61},{0,4,442},{0,3,313},{0,3,142},{0,2,318},{0,2,498},{0,2,354},{0,6,202},{0,5,52},{0,3,25},{0,3,61},{1,1,441},{0,3,313},{0,3,142},{0,2,318},{2,0,442},{0,2,318},{0,3,0},{0,3,0},{0,3,0},{0,2,1},{0,1,45},{0,1,25},{0,1,25},{0,1,26},{0,1,50},{0,1,35},{0,3,0}, +{0,3,0},{0,3,0},{0,2,1},{0,1,45},{0,1,25},{0,1,25},{0,1,26},{0,1,41},{0,1,26},{1,3,200},{0,5,52},{0,3,25},{0,3,61},{1,3,200},{3,0,202},{0,3,61},{0,2,218},{3,0,202},{0,2,218},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,9,200},{0,7,20},{0,5,20}, +{0,4,25},{0,6,686},{0,5,433},{0,4,169},{0,3,443},{0,3,794},{0,3,524},{0,9,200},{0,7,20},{0,5,20},{0,4,25},{0,6,686},{0,5,433},{0,4,169},{0,3,443},{3,0,686},{0,3,443},{0,6,1},{0,6,1},{0,6,1},{0,3,4},{0,3,145},{0,2,85},{0,2,85},{0,2,101},{0,1,178},{0,1,115},{0,6,1},{0,6,1},{0,6,1},{0,3,4},{0,3,145}, +{0,2,85},{0,2,85},{0,2,101},{0,2,149},{0,2,101},{0,9,200},{0,7,20},{0,5,20},{0,4,25},{0,9,200},{2,3,200},{0,4,25},{0,3,218},{2,3,200},{0,3,218},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,11,257},{0,9,54},{0,6,114},{0,5,65},{0,9,728},{0,6,371},{0,5,80}, +{0,4,377},{0,5,949},{0,4,521},{0,11,257},{0,9,54},{1,5,97},{0,5,65},{2,2,723},{0,6,371},{0,5,80},{0,4,377},{1,4,723},{0,4,377},{0,8,50},{0,8,50},{0,8,50},{0,5,49},{0,5,164},{0,4,50},{0,4,50},{0,3,65},{0,3,245},{0,2,126},{0,8,50},{0,8,50},{0,8,50},{0,5,49},{1,2,162},{0,4,50},{0,4,50},{0,3,65},{1,2,162}, +{0,3,65},{3,2,202},{0,9,5},{1,5,16},{0,5,16},{3,2,202},{5,1,200},{0,5,16},{0,4,208},{5,1,200},{0,4,208},{0,0,49},{0,0,49},{0,0,49},{0,0,49},{0,2,1},{0,2,1},{0,2,1},{0,1,4},{0,1,13},{0,1,13},{0,14,425},{0,11,234},{1,7,277},{0,7,245},{0,11,724},{0,8,289},{0,6,34},{0,5,308},{0,6,1087},{0,5,533},{1,11,201}, +{1,9,18},{1,7,21},{1,6,26},{1,8,723},{0,8,289},{0,6,34},{0,5,308},{4,2,723},{0,5,308},{0,11,225},{0,11,225},{0,11,225},{0,7,229},{0,8,162},{0,6,18},{0,6,18},{0,4,25},{0,4,338},{0,3,162},{1,8,2},{1,8,2},{1,8,2},{1,5,5},{0,8,162},{0,6,18},{0,6,18},{0,4,25},{4,0,162},{0,4,25},{3,4,200},{0,11,9},{1,7,20}, +{0,7,20},{3,4,200},{7,0,200},{0,7,20},{0,5,208},{7,0,200},{0,5,208},{0,0,225},{0,0,225},{0,0,225},{0,0,225},{0,5,0},{0,5,0},{0,5,0},{0,3,0},{0,2,61},{0,2,61},{1,14,410},{1,11,209},{1,8,288},{1,7,234},{0,14,739},{0,10,254},{0,8,33},{0,6,270},{0,8,1131},{0,6,450},{1,14,266},{1,11,65},{2,7,106},{1,7,90},{3,4,723}, +{0,10,238},{0,8,17},{0,6,254},{7,0,723},{0,6,254},{1,11,209},{1,11,209},{1,11,209},{1,7,209},{0,11,178},{0,8,17},{0,8,17},{0,5,18},{0,6,376},{0,5,123},{1,11,65},{1,11,65},{1,11,65},{1,7,65},{3,1,162},{0,8,1},{0,8,1},{0,5,2},{3,3,162},{0,5,2},{3,7,200},{1,11,1},{2,7,25},{0,8,17},{3,7,200},{8,0,200},{0,8,17}, +{0,6,218},{8,0,200},{0,6,218},{1,0,208},{1,0,208},{1,0,208},{1,0,208},{0,8,16},{0,8,16},{0,8,16},{0,5,17},{0,4,80},{0,4,80},{1,16,474},{1,13,276},{1,9,457},{1,9,292},{1,13,740},{1,10,298},{1,8,45},{1,7,315},{0,9,1013},{0,7,308},{2,13,201},{2,11,21},{2,9,21},{2,8,26},{5,0,723},{0,12,227},{1,8,29},{0,7,227},{5,4,723}, +{0,7,227},{1,13,272},{1,13,272},{1,13,272},{1,9,276},{1,10,180},{1,8,29},{1,8,29},{1,6,33},{0,8,306},{0,6,41},{2,10,2},{2,10,2},{2,10,2},{2,7,5},{4,0,162},{0,10,5},{0,10,5},{0,6,5},{1,7,162},{0,6,5},{5,3,200},{1,13,4},{2,9,20},{1,9,20},{5,3,200},{4,7,200},{1,9,20},{0,7,218},{4,7,200},{0,7,218},{1,0,272}, +{1,0,272},{1,0,272},{1,0,272},{1,7,17},{1,7,17},{1,7,17},{1,5,17},{0,6,40},{0,6,40},{2,15,426},{2,13,223},{2,10,283},{2,9,234},{1,16,739},{1,12,267},{1,10,33},{1,8,273},{0,11,913},{0,8,225},{2,15,257},{2,13,54},{3,9,97},{2,9,65},{4,6,723},{0,13,208},{1,10,17},{0,8,209},{3,8,723},{0,8,209},{2,12,219},{2,12,219},{2,12,219}, +{2,9,218},{1,13,180},{1,10,17},{1,10,17},{1,7,18},{0,9,229},{0,7,27},{2,12,50},{2,12,50},{2,12,50},{2,9,49},{3,6,162},{1,10,1},{1,10,1},{1,7,2},{3,6,162},{1,7,2},{1,19,200},{2,13,5},{3,9,16},{2,9,16},{1,19,200},{9,2,200},{2,9,16},{0,8,208},{9,2,200},{0,8,208},{2,0,218},{2,0,218},{2,0,218},{2,0,218},{1,10,16}, +{1,10,16},{1,10,16},{1,7,17},{0,8,17},{0,8,17},{2,18,450},{2,15,259},{2,11,410},{2,11,270},{2,15,749},{2,12,314},{2,10,59},{2,9,333},{0,13,868},{0,9,213},{3,15,201},{3,13,18},{3,11,21},{3,10,26},{6,2,723},{0,15,204},{2,10,34},{0,9,212},{8,3,723},{0,9,212},{2,15,250},{2,15,250},{2,15,250},{2,11,254},{2,12,187},{2,10,43},{2,10,43}, +{2,8,50},{0,11,189},{1,8,37},{3,12,2},{3,12,2},{3,12,2},{3,9,5},{5,2,162},{1,12,2},{1,12,2},{1,8,1},{8,1,162},{1,8,1},{5,8,200},{2,15,9},{3,11,20},{2,11,20},{5,8,200},{4,10,200},{2,11,20},{0,9,208},{4,10,200},{0,9,208},{2,0,250},{2,0,250},{2,0,250},{2,0,250},{2,9,25},{2,9,25},{2,9,25},{2,7,25},{0,9,5}, +{0,9,5},{3,18,410},{3,15,209},{3,12,288},{3,11,234},{2,18,739},{2,14,254},{2,12,33},{2,10,270},{0,15,804},{1,10,227},{3,18,266},{3,15,65},{4,11,106},{3,11,90},{5,8,723},{1,15,219},{2,12,17},{1,10,218},{4,10,723},{1,10,218},{3,15,209},{3,15,209},{3,15,209},{3,11,209},{2,15,178},{2,12,17},{2,12,17},{2,9,18},{0,13,171},{1,9,26},{3,15,65}, +{3,15,65},{3,15,65},{3,11,65},{5,5,162},{2,12,1},{2,12,1},{2,9,2},{5,7,162},{2,9,2},{5,11,200},{3,15,1},{4,11,25},{2,12,17},{5,11,200},{11,3,200},{2,12,17},{0,10,218},{11,3,200},{0,10,218},{3,0,208},{3,0,208},{3,0,208},{3,0,208},{2,12,16},{2,12,16},{2,12,16},{2,9,17},{1,10,9},{1,10,9},{3,20,474},{3,17,276},{3,13,457}, +{3,13,292},{3,17,740},{3,14,298},{3,12,45},{3,11,315},{0,16,747},{1,11,231},{4,17,201},{4,15,21},{4,13,21},{4,12,26},{7,4,723},{1,17,209},{3,12,29},{1,11,227},{9,5,723},{1,11,227},{3,17,272},{3,17,272},{3,17,272},{3,13,276},{3,14,180},{3,12,29},{3,12,29},{3,10,33},{0,15,171},{2,10,41},{4,14,2},{4,14,2},{4,14,2},{4,11,5},{7,1,162}, +{2,14,5},{2,14,5},{2,10,5},{10,2,162},{2,10,5},{8,2,200},{3,17,4},{4,13,20},{3,13,20},{8,2,200},{13,2,200},{3,13,20},{0,11,218},{13,2,200},{0,11,218},{3,0,272},{3,0,272},{3,0,272},{3,0,272},{3,11,17},{3,11,17},{3,11,17},{3,9,17},{1,12,8},{1,12,8},{4,19,426},{4,17,223},{4,14,283},{4,13,234},{3,20,739},{3,16,267},{3,14,33}, +{3,12,273},{0,18,727},{2,12,225},{4,19,257},{4,17,54},{5,13,97},{4,13,65},{6,10,723},{2,17,208},{3,14,17},{2,12,209},{12,3,723},{2,12,209},{4,16,219},{4,16,219},{4,16,219},{4,13,218},{3,17,180},{3,14,17},{3,14,17},{3,11,18},{1,15,171},{2,11,27},{4,16,50},{4,16,50},{4,16,50},{4,13,49},{5,10,162},{3,14,1},{3,14,1},{3,11,2},{12,1,162}, +{3,11,2},{3,23,200},{4,17,5},{5,13,16},{4,13,16},{3,23,200},{11,6,200},{4,13,16},{0,12,208},{11,6,200},{0,12,208},{4,0,218},{4,0,218},{4,0,218},{4,0,218},{3,14,16},{3,14,16},{3,14,16},{3,11,17},{1,13,10},{1,13,10},{4,22,450},{4,19,259},{4,15,410},{4,15,270},{4,19,749},{4,16,314},{4,14,59},{4,13,333},{1,18,747},{2,13,213},{5,19,201}, +{5,17,18},{5,15,21},{5,14,26},{9,1,723},{2,19,204},{4,14,34},{2,13,212},{15,1,723},{2,13,212},{4,19,250},{4,19,250},{4,19,250},{4,15,254},{4,16,187},{4,14,43},{4,14,43},{4,12,50},{1,16,174},{3,12,37},{5,16,2},{5,16,2},{5,16,2},{5,13,5},{8,1,162},{3,16,2},{3,16,2},{3,12,1},{10,5,162},{3,12,1},{9,4,200},{4,19,9},{5,15,20}, +{4,15,20},{9,4,200},{9,10,200},{4,15,20},{0,13,208},{9,10,200},{0,13,208},{4,0,250},{4,0,250},{4,0,250},{4,0,250},{4,13,25},{4,13,25},{4,13,25},{4,11,25},{2,13,5},{2,13,5},{5,22,410},{5,19,209},{5,16,288},{5,15,234},{4,22,739},{4,18,254},{4,16,33},{4,14,270},{1,20,724},{3,14,227},{5,22,266},{5,19,65},{6,15,106},{5,15,90},{9,4,723}, +{3,19,219},{4,16,17},{3,14,218},{9,10,723},{3,14,218},{5,19,209},{5,19,209},{5,19,209},{5,15,209},{4,19,178},{4,16,17},{4,16,17},{4,13,18},{2,17,171},{3,13,26},{5,19,65},{5,19,65},{5,19,65},{5,15,65},{8,4,162},{4,16,1},{4,16,1},{4,13,2},{14,2,162},{4,13,2},{11,0,200},{5,19,1},{6,15,25},{4,16,17},{11,0,200},{13,7,200},{4,16,17}, +{0,14,218},{13,7,200},{0,14,218},{5,0,208},{5,0,208},{5,0,208},{5,0,208},{4,16,16},{4,16,16},{4,16,16},{4,13,17},{3,14,9},{3,14,9},{5,24,474},{5,21,276},{5,17,457},{5,17,292},{5,21,740},{5,18,298},{5,16,45},{5,15,315},{1,22,740},{3,15,231},{6,21,201},{6,19,21},{6,17,21},{6,16,26},{10,3,723},{3,21,209},{5,16,29},{3,15,227},{11,9,723}, +{3,15,227},{5,21,272},{5,21,272},{5,21,272},{5,17,276},{5,18,180},{5,16,29},{5,16,29},{5,14,33},{2,19,171},{4,14,41},{6,18,2},{6,18,2},{6,18,2},{6,15,5},{3,25,162},{4,18,5},{4,18,5},{4,14,5},{12,6,162},{4,14,5},{10,6,200},{5,21,4},{6,17,20},{5,17,20},{10,6,200},{15,6,200},{5,17,20},{0,15,218},{15,6,200},{0,15,218},{5,0,272}, +{5,0,272},{5,0,272},{5,0,272},{5,15,17},{5,15,17},{5,15,17},{5,13,17},{3,16,8},{3,16,8},{6,23,426},{6,21,223},{6,18,283},{6,17,234},{5,24,739},{5,20,267},{5,18,33},{5,16,273},{2,22,727},{4,16,225},{6,23,257},{6,21,54},{7,17,97},{6,17,65},{9,9,723},{4,21,208},{5,18,17},{4,16,209},{14,7,723},{4,16,209},{6,20,219},{6,20,219},{6,20,219}, +{6,17,218},{5,21,180},{5,18,17},{5,18,17},{5,15,18},{3,19,171},{4,15,27},{6,20,50},{6,20,50},{6,20,50},{6,17,49},{9,6,162},{5,18,1},{5,18,1},{5,15,2},{14,5,162},{5,15,2},{12,2,200},{6,21,5},{7,17,16},{6,17,16},{12,2,200},{13,10,200},{6,17,16},{0,16,208},{13,10,200},{0,16,208},{6,0,218},{6,0,218},{6,0,218},{6,0,218},{5,18,16}, +{5,18,16},{5,18,16},{5,15,17},{3,17,10},{3,17,10},{6,26,450},{6,23,259},{6,19,410},{6,19,270},{6,23,749},{6,20,314},{6,18,59},{6,17,333},{3,22,747},{4,17,213},{7,23,201},{7,21,18},{7,19,21},{7,18,26},{11,5,723},{4,23,204},{6,18,34},{4,17,212},{12,11,723},{4,17,212},{6,23,250},{6,23,250},{6,23,250},{6,19,254},{6,20,187},{6,18,43},{6,18,43}, +{6,16,50},{3,20,174},{5,16,37},{7,20,2},{7,20,2},{7,20,2},{7,17,5},{10,5,162},{5,20,2},{5,20,2},{5,16,1},{12,9,162},{5,16,1},{11,8,200},{6,23,9},{7,19,20},{6,19,20},{11,8,200},{11,14,200},{6,19,20},{0,17,208},{11,14,200},{0,17,208},{6,0,250},{6,0,250},{6,0,250},{6,0,250},{6,17,25},{6,17,25},{6,17,25},{6,15,25},{4,17,5}, +{4,17,5},{7,26,410},{7,23,209},{7,20,288},{7,19,234},{6,26,739},{6,22,254},{6,20,33},{6,18,270},{3,24,724},{5,18,227},{7,26,266},{7,23,65},{7,20,144},{7,19,90},{11,8,723},{5,23,219},{6,20,17},{5,18,218},{11,14,723},{5,18,218},{7,23,209},{7,23,209},{7,23,209},{7,19,209},{6,23,178},{6,20,17},{6,20,17},{6,17,18},{4,21,171},{5,17,26},{7,23,65}, +{7,23,65},{7,23,65},{7,19,65},{10,8,162},{6,20,1},{6,20,1},{6,17,2},{11,12,162},{6,17,2},{13,4,200},{7,23,1},{8,19,50},{6,20,17},{13,4,200},{15,11,200},{6,20,17},{0,18,218},{15,11,200},{0,18,218},{7,0,208},{7,0,208},{7,0,208},{7,0,208},{6,20,16},{6,20,16},{6,20,16},{6,17,17},{5,18,9},{5,18,9},{7,28,474},{7,25,276},{7,21,457}, +{7,21,292},{7,25,740},{7,22,298},{7,20,45},{7,19,315},{3,26,740},{5,19,231},{8,23,283},{8,22,133},{8,20,97},{8,20,133},{12,7,723},{5,25,209},{7,20,29},{5,19,227},{13,13,723},{5,19,227},{7,25,272},{7,25,272},{7,25,272},{7,21,276},{7,22,180},{7,20,29},{7,20,29},{7,18,33},{4,23,171},{6,18,41},{8,20,81},{8,20,81},{8,20,81},{8,19,82},{9,14,162}, +{6,22,5},{6,22,5},{6,18,5},{14,10,162},{6,18,5},{15,0,200},{7,25,4},{8,20,16},{7,21,20},{15,0,200},{12,16,200},{7,21,20},{0,19,218},{12,16,200},{0,19,218},{7,0,272},{7,0,272},{7,0,272},{7,0,272},{7,19,17},{7,19,17},{7,19,17},{7,17,17},{5,20,8},{5,20,8},{8,26,642},{8,24,459},{8,22,462},{8,21,467},{7,28,739},{7,24,267},{7,22,33}, +{7,20,273},{4,26,727},{6,20,225},{8,26,201},{8,24,18},{8,22,21},{8,21,26},{14,3,723},{6,25,208},{7,22,17},{6,20,209},{11,17,723},{6,20,209},{8,23,443},{8,23,443},{8,23,443},{8,20,446},{7,25,180},{7,22,17},{7,22,17},{7,19,18},{5,23,171},{6,19,27},{8,23,2},{8,23,2},{8,23,2},{8,20,5},{11,10,162},{7,22,1},{7,22,1},{7,19,2},{5,23,162}, +{7,19,2},{14,6,200},{8,24,17},{8,22,20},{7,22,17},{14,6,200},{15,14,200},{7,22,17},{0,20,208},{15,14,200},{0,20,208},{8,0,442},{8,0,442},{8,0,442},{8,0,442},{7,22,16},{7,22,16},{7,22,16},{7,19,17},{5,21,10},{5,21,10},{8,28,420},{8,26,223},{8,23,283},{8,22,234},{7,31,872},{7,26,371},{7,23,201},{7,21,368},{5,26,747},{6,21,213},{8,28,251}, +{8,26,54},{9,22,97},{8,22,65},{13,9,723},{6,27,204},{7,23,57},{6,21,212},{14,15,723},{6,21,212},{8,25,219},{8,25,219},{8,25,219},{8,22,218},{7,27,308},{7,24,146},{7,24,146},{7,20,145},{5,24,174},{7,20,37},{8,25,50},{8,25,50},{8,25,50},{8,22,49},{12,9,162},{7,24,2},{7,24,2},{7,20,1},{14,13,162},{7,20,1},{13,12,200},{8,26,5},{9,22,16}, +{8,22,16},{13,12,200},{13,18,200},{8,22,16},{0,21,208},{13,18,200},{0,21,208},{8,0,218},{8,0,218},{8,0,218},{8,0,218},{7,24,145},{7,24,145},{7,24,145},{7,20,145},{6,21,5},{6,21,5},{8,31,474},{8,28,276},{9,24,425},{8,24,292},{8,28,740},{8,25,298},{8,23,61},{8,22,315},{5,28,724},{7,22,227},{9,28,200},{9,26,13},{9,24,25},{9,23,29},{13,12,723}, +{7,27,219},{8,23,45},{7,22,218},{13,18,723},{7,22,218},{8,28,272},{8,28,272},{8,28,272},{8,24,276},{8,25,180},{8,23,36},{8,23,36},{8,21,33},{6,25,171},{7,21,26},{9,25,0},{9,25,0},{9,25,0},{9,23,4},{15,2,162},{8,23,20},{8,23,20},{8,21,17},{13,16,162},{8,21,17},{15,8,200},{8,28,4},{9,24,25},{8,24,20},{15,8,200},{11,22,200},{8,24,20}, +{0,22,218},{11,22,200},{0,22,218},{8,0,272},{8,0,272},{8,0,272},{8,0,272},{8,22,17},{8,22,17},{8,22,17},{8,20,17},{7,22,9},{7,22,9},{9,31,410},{9,28,212},{9,25,288},{9,24,224},{8,31,739},{8,27,254},{8,25,33},{8,23,270},{5,30,740},{7,23,231},{9,31,266},{9,28,68},{10,24,97},{9,24,80},{14,11,723},{7,29,209},{8,25,17},{7,23,227},{15,17,723}, +{7,23,227},{9,27,212},{9,27,212},{9,27,212},{9,24,208},{8,28,180},{8,25,17},{8,25,17},{8,22,18},{6,27,171},{7,23,62},{9,27,68},{9,27,68},{9,27,68},{9,24,64},{11,18,162},{8,25,1},{8,25,1},{8,22,2},{10,21,162},{8,22,2},{14,14,200},{9,28,4},{10,24,16},{9,24,16},{14,14,200},{14,20,200},{9,24,16},{0,23,218},{14,20,200},{0,23,218},{9,0,208}, +{9,0,208},{9,0,208},{9,0,208},{8,25,16},{8,25,16},{8,25,16},{8,22,17},{7,24,8},{7,24,8},{9,31,570},{9,30,276},{9,26,457},{9,26,292},{9,30,740},{9,27,298},{9,25,45},{9,24,324},{6,30,727},{7,24,280},{10,30,201},{10,28,18},{10,26,21},{10,25,26},{13,17,723},{8,29,227},{9,25,29},{8,24,224},{13,21,723},{8,24,224},{9,30,272},{9,30,272},{9,30,272}, +{9,26,276},{9,27,180},{9,25,29},{9,25,29},{9,23,33},{7,27,171},{8,23,41},{10,27,2},{10,27,2},{10,27,2},{10,24,5},{13,14,162},{8,27,5},{8,27,5},{8,23,5},{8,25,162},{8,23,5},{13,20,200},{9,30,4},{10,26,20},{9,26,20},{13,20,200},{13,23,202},{9,26,20},{0,24,208},{13,23,202},{0,24,208},{9,0,272},{9,0,272},{9,0,272},{9,0,272},{9,24,17}, +{9,24,17},{9,24,17},{9,22,17},{7,25,10},{7,25,10},{10,31,468},{10,30,223},{10,27,283},{10,26,234},{9,31,835},{9,29,267},{9,27,33},{9,25,273},{7,30,747},{8,25,225},{11,29,283},{10,30,54},{11,26,97},{10,26,65},{15,13,723},{8,30,208},{9,27,17},{8,25,209},{11,25,723},{8,25,209},{10,29,219},{10,29,219},{10,29,219},{10,26,218},{9,30,180},{9,27,17},{9,27,17}, +{9,24,20},{7,28,174},{8,24,17},{10,29,50},{10,29,50},{10,29,50},{10,26,49},{14,13,162},{9,27,1},{9,27,1},{9,24,4},{11,23,162},{9,24,4},{15,16,200},{10,30,5},{11,26,16},{10,26,16},{15,16,200},{15,22,200},{10,26,16},{0,25,208},{15,22,200},{0,25,208},{10,0,218},{10,0,218},{10,0,218},{10,0,218},{9,27,16},{9,27,16},{9,27,16},{9,24,20},{8,24,17}, +{8,24,17},{11,31,632},{10,31,297},{11,28,425},{10,28,292},{10,31,804},{10,29,298},{10,27,61},{10,26,315},{7,31,823},{8,26,231},{11,31,232},{11,30,13},{11,28,25},{11,27,29},{15,16,723},{9,31,216},{10,27,45},{8,26,227},{15,22,723},{8,26,227},{10,31,288},{10,31,288},{10,31,288},{10,28,276},{10,29,180},{10,27,36},{10,27,36},{10,25,33},{7,30,189},{9,25,30},{11,29,0}, +{11,29,0},{11,29,0},{11,27,4},{14,16,162},{9,29,4},{9,29,4},{9,25,5},{15,20,162},{9,25,5},{15,19,200},{11,30,13},{11,28,25},{10,28,20},{15,19,200},{13,26,200},{10,28,20},{0,26,218},{13,26,200},{0,26,218},{10,0,272},{10,0,272},{10,0,272},{10,0,272},{10,26,17},{10,26,17},{10,26,17},{10,24,17},{8,27,5},{8,27,5},{11,31,696},{11,31,237},{11,29,288}, +{11,28,224},{11,31,888},{10,31,254},{10,29,33},{10,27,270},{8,31,824},{9,27,227},{12,31,283},{11,31,93},{12,28,97},{11,28,80},{13,25,723},{10,31,238},{10,29,17},{9,27,218},{12,27,723},{9,27,218},{11,31,212},{11,31,212},{11,31,212},{11,28,208},{10,31,196},{10,29,17},{10,29,17},{10,26,18},{8,30,171},{9,26,26},{11,31,68},{11,31,68},{11,31,68},{11,28,64},{13,22,162}, +{10,29,1},{10,29,1},{10,26,2},{12,25,162},{10,26,2},{13,28,200},{11,31,29},{12,28,16},{11,28,16},{13,28,200},{11,30,200},{11,28,16},{0,27,218},{11,30,200},{0,27,218},{11,0,208},{11,0,208},{11,0,208},{11,0,208},{10,29,16},{10,29,16},{10,29,16},{10,26,17},{9,27,9},{9,27,9},{12,31,804},{12,31,492},{11,30,457},{11,30,292},{11,31,1080},{11,31,298},{11,29,45}, +{11,28,324},{9,31,920},{9,28,218},{12,31,363},{12,31,51},{12,30,21},{12,29,26},{15,21,723},{11,31,282},{11,29,29},{9,28,217},{15,25,723},{9,28,217},{11,31,372},{11,31,372},{11,31,372},{11,30,276},{11,31,180},{11,29,29},{11,29,29},{11,27,33},{8,31,184},{10,27,41},{12,31,2},{12,31,2},{12,31,2},{12,28,5},{15,18,162},{10,31,5},{10,31,5},{10,27,5},{10,29,162}, +{10,27,5},{15,24,200},{12,31,50},{12,30,20},{11,30,20},{15,24,200},{15,27,202},{11,30,20},{0,28,208},{15,27,202},{0,28,208},{11,0,272},{11,0,272},{11,0,272},{11,0,272},{11,28,17},{11,28,17},{11,28,17},{11,26,17},{9,29,8},{9,29,8},{12,31,996},{12,31,492},{12,31,283},{12,30,234},{12,31,1068},{11,31,458},{11,31,33},{11,29,273},{10,31,999},{10,29,225},{13,31,379}, +{13,31,171},{13,30,97},{12,30,65},{14,27,723},{12,31,371},{11,31,17},{10,29,209},{13,29,723},{10,29,209},{12,31,267},{12,31,267},{12,31,267},{12,30,218},{12,30,333},{11,31,17},{11,31,17},{11,28,20},{9,31,212},{10,28,17},{13,30,81},{13,30,81},{13,30,81},{12,30,49},{13,27,162},{11,31,1},{11,31,1},{11,28,4},{13,27,162},{11,28,4},{15,27,202},{13,31,90},{13,30,16}, +{12,30,16},{15,27,202},{15,28,202},{12,30,16},{0,29,208},{15,28,202},{0,29,208},{12,0,218},{12,0,218},{12,0,218},{12,0,218},{11,31,16},{11,31,16},{11,31,16},{11,28,20},{9,30,10},{9,30,10},{13,31,877},{13,31,605},{13,31,436},{12,31,288},{13,31,1021},{12,31,397},{12,31,36},{12,30,210},{11,31,910},{10,30,126},{14,31,414},{13,31,205},{13,31,36},{13,31,4},{15,26,546}, +{13,31,317},{12,31,20},{10,30,122},{14,29,546},{10,30,122},{13,31,436},{13,31,436},{13,31,436},{12,31,288},{12,31,276},{12,31,36},{12,31,36},{12,29,33},{10,31,261},{11,29,30},{13,31,36},{13,31,36},{13,31,36},{13,31,4},{13,30,162},{12,31,20},{12,31,20},{11,29,5},{12,30,162},{11,29,5},{15,28,117},{14,31,61},{14,31,25},{13,31,4},{15,28,117},{14,31,117},{13,31,4}, +{0,30,113},{14,31,117},{0,30,113},{12,0,272},{12,0,272},{12,0,272},{12,0,272},{12,30,17},{12,30,17},{12,30,17},{12,28,17},{10,31,5},{10,31,5},{13,31,845},{13,31,573},{13,31,404},{13,31,244},{13,31,797},{13,31,365},{12,31,196},{12,30,82},{12,31,737},{11,31,58},{14,31,190},{14,31,126},{14,31,90},{14,31,82},{14,31,334},{13,31,221},{13,31,52},{11,31,49},{14,30,333}, +{11,31,49},{13,31,404},{13,31,404},{13,31,404},{13,31,244},{13,31,356},{12,31,196},{12,31,196},{12,30,18},{11,31,333},{11,30,26},{14,31,90},{14,31,90},{14,31,90},{14,31,82},{15,26,162},{13,31,52},{13,31,52},{12,30,2},{14,29,162},{12,30,2},{15,30,25},{15,30,41},{14,31,9},{14,31,1},{15,30,25},{15,30,29},{14,31,1},{0,31,49},{15,30,29},{0,31,49},{13,0,208}, +{13,0,208},{13,0,208},{13,0,208},{12,31,52},{12,31,52},{12,31,52},{12,30,17},{11,31,9},{11,31,9},{14,31,642},{14,31,578},{14,31,542},{13,31,441},{14,31,690},{13,31,370},{13,31,201},{13,31,32},{13,31,610},{12,31,40},{14,31,201},{14,31,137},{14,31,101},{14,31,37},{15,29,193},{14,31,121},{14,31,85},{12,31,4},{14,31,193},{12,31,4},{14,31,542},{14,31,542},{14,31,542}, +{13,31,441},{13,31,473},{13,31,201},{13,31,201},{13,31,32},{12,31,401},{12,31,40},{14,31,101},{14,31,101},{14,31,101},{14,31,37},{15,28,145},{14,31,85},{14,31,85},{12,31,4},{15,29,149},{12,31,4},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{13,0,272},{13,0,272},{13,0,272},{13,0,272},{13,31,32}, +{13,31,32},{13,31,32},{13,30,17},{12,31,40},{12,31,40},{14,31,418},{14,31,354},{14,31,318},{14,31,254},{14,31,370},{14,31,242},{14,31,206},{13,31,32},{13,31,418},{13,31,80},{15,31,81},{15,31,81},{15,31,81},{15,31,81},{15,30,54},{14,31,73},{14,31,37},{14,31,9},{15,30,66},{14,31,9},{14,31,318},{14,31,318},{14,31,318},{14,31,254},{14,31,270},{14,31,206},{14,31,206}, +{13,31,32},{13,31,249},{13,31,80},{15,31,81},{15,31,81},{15,31,81},{15,31,81},{15,29,45},{14,31,37},{14,31,37},{14,31,9},{15,30,41},{14,31,9},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{14,0,218},{14,0,218},{14,0,218},{14,0,218},{13,31,160},{13,31,160},{13,31,160},{13,31,32},{13,31,80}, +{13,31,80},{0,9,421},{0,7,113},{0,5,5},{0,4,130},{0,6,925},{0,5,658},{0,4,274},{0,3,670},{0,3,1039},{0,3,751},{0,9,421},{0,7,113},{0,5,5},{0,4,130},{0,6,925},{0,5,658},{0,4,274},{0,3,670},{3,0,925},{0,3,670},{0,4,1},{0,4,1},{0,4,1},{0,3,4},{0,2,85},{0,2,45},{0,2,45},{0,1,50},{0,1,98},{0,1,59},{0,4,1}, +{0,4,1},{0,4,1},{0,3,4},{0,2,85},{0,2,45},{0,2,45},{0,1,50},{1,0,85},{0,1,50},{1,6,421},{0,7,113},{0,5,5},{0,4,130},{1,6,421},{3,2,421},{0,4,130},{0,3,445},{3,2,421},{0,3,445},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,12,425},{0,9,52},{0,6,10}, +{0,6,82},{0,8,1261},{0,6,805},{0,5,322},{0,4,833},{0,4,1445},{0,4,977},{0,12,425},{0,9,52},{0,6,10},{0,6,82},{0,8,1261},{0,6,805},{0,5,322},{0,4,833},{4,0,1261},{0,4,833},{0,7,0},{0,7,0},{0,7,0},{0,4,1},{0,3,225},{0,3,117},{0,3,117},{0,2,125},{0,2,257},{0,2,161},{0,7,0},{0,7,0},{0,7,0},{0,4,1},{1,0,221}, +{0,3,117},{0,3,117},{0,2,125},{1,1,221},{0,2,125},{3,2,421},{0,9,52},{0,6,10},{0,6,82},{3,2,421},{5,1,421},{0,6,82},{0,4,433},{5,1,421},{0,4,433},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,14,430},{0,11,29},{0,7,74},{0,7,46},{0,10,1514},{0,8,874},{0,6,307}, +{0,5,917},{0,5,1814},{0,4,1074},{0,14,430},{0,11,29},{0,7,74},{0,7,46},{3,0,1514},{0,8,874},{0,6,307},{0,5,917},{5,0,1514},{0,5,917},{0,10,10},{0,10,10},{0,10,10},{0,6,10},{0,5,340},{0,5,160},{0,5,160},{0,3,169},{0,3,421},{0,3,250},{0,10,10},{0,10,10},{0,10,10},{0,6,10},{1,2,338},{0,5,160},{0,5,160},{0,3,169},{1,2,338}, +{0,3,169},{4,1,421},{0,11,20},{1,7,5},{0,7,37},{4,1,421},{7,0,421},{0,7,37},{0,5,433},{7,0,421},{0,5,433},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,1,0},{0,1,0},{0,1,0},{0,1,4},{0,0,9},{0,0,9},{0,17,542},{0,13,130},{0,9,285},{0,8,137},{0,12,1517},{0,9,737},{0,7,185},{0,6,794},{0,7,1982},{0,5,1062},{1,13,450}, +{1,11,77},{1,8,34},{1,7,122},{3,3,1517},{0,9,737},{0,7,185},{0,6,794},{2,5,1514},{0,6,794},{0,12,121},{0,12,121},{0,12,121},{0,7,125},{0,8,338},{0,6,98},{0,6,98},{0,4,97},{0,4,514},{0,4,241},{1,9,25},{1,9,25},{1,9,25},{1,6,26},{0,8,338},{0,6,98},{0,6,98},{0,4,97},{4,0,338},{0,4,97},{5,0,421},{0,13,9},{1,8,9}, +{0,8,16},{5,0,421},{8,0,421},{0,8,16},{0,6,433},{8,0,421},{0,6,433},{0,0,121},{0,0,121},{0,0,121},{0,0,121},{0,4,1},{0,4,1},{0,4,1},{0,2,1},{0,2,37},{0,2,37},{1,16,697},{1,13,297},{1,9,354},{1,9,309},{0,15,1517},{0,11,630},{0,9,50},{0,7,670},{0,8,2198},{0,6,1109},{1,16,441},{1,13,41},{1,9,98},{1,9,53},{4,2,1514}, +{0,11,630},{0,9,50},{0,7,670},{6,2,1514},{0,7,670},{1,12,273},{1,12,273},{1,12,273},{1,8,273},{0,11,338},{0,8,41},{0,8,41},{0,5,50},{0,6,680},{0,5,275},{1,12,17},{1,12,17},{1,12,17},{1,8,17},{3,1,338},{0,8,41},{0,8,41},{0,5,50},{3,3,338},{0,5,50},{6,0,421},{0,15,1},{2,9,5},{0,9,1},{6,0,421},{5,6,421},{0,9,1}, +{0,7,445},{5,6,421},{0,7,445},{1,0,272},{1,0,272},{1,0,272},{1,0,272},{0,7,1},{0,7,1},{0,7,1},{0,4,0},{0,3,106},{0,3,106},{1,19,821},{1,15,405},{1,11,570},{1,10,410},{0,18,1514},{0,13,577},{0,10,14},{0,8,602},{0,10,2462},{0,7,1175},{2,16,450},{2,13,77},{2,10,35},{2,10,107},{3,8,1514},{0,13,577},{0,10,14},{0,8,602},{4,6,1514}, +{0,8,602},{1,14,401},{1,14,401},{1,14,401},{1,9,404},{0,13,340},{0,10,13},{0,10,13},{0,6,29},{0,7,851},{0,6,353},{2,11,25},{2,11,25},{2,11,25},{2,8,26},{4,0,338},{0,10,13},{0,10,13},{0,6,29},{1,7,338},{0,6,29},{5,6,421},{1,15,5},{2,10,10},{0,10,10},{5,6,421},{10,1,421},{0,10,10},{0,8,433},{10,1,421},{0,8,433},{1,0,400}, +{1,0,400},{1,0,400},{1,0,400},{0,9,1},{0,9,1},{0,9,1},{0,6,4},{0,4,208},{0,4,208},{1,22,902},{1,17,485},{2,11,750},{1,11,482},{0,21,1566},{0,15,570},{0,11,95},{0,9,582},{0,11,2337},{0,9,933},{2,18,430},{2,15,29},{2,11,74},{2,11,46},{5,4,1514},{0,15,521},{0,11,46},{0,9,533},{9,1,1514},{0,9,533},{1,17,481},{1,17,481},{1,17,481}, +{1,11,481},{0,16,387},{0,12,51},{0,12,51},{0,7,75},{0,8,755},{0,7,222},{2,14,10},{2,14,10},{2,14,10},{2,10,10},{3,6,338},{0,12,2},{0,12,2},{0,7,26},{3,6,338},{0,7,26},{7,2,421},{1,17,4},{3,11,5},{1,11,1},{7,2,421},{12,0,421},{1,11,1},{0,9,433},{12,0,421},{0,9,433},{1,0,481},{1,0,481},{1,0,481},{1,0,481},{0,12,50}, +{0,12,50},{0,12,50},{0,7,50},{0,6,157},{0,6,157},{2,21,866},{2,17,454},{2,13,609},{2,12,461},{1,20,1515},{1,15,578},{1,12,19},{1,10,603},{0,13,2214},{0,10,707},{3,17,450},{3,15,77},{3,12,34},{3,11,122},{7,0,1514},{0,16,458},{1,12,18},{0,10,482},{4,9,1514},{0,10,482},{2,16,445},{2,16,445},{2,16,445},{2,11,449},{1,15,341},{1,12,19},{1,12,19}, +{1,8,26},{0,10,635},{0,8,106},{3,13,25},{3,13,25},{3,13,25},{3,10,26},{5,2,338},{0,14,2},{0,14,2},{0,9,20},{8,1,338},{0,9,20},{7,4,421},{2,17,9},{3,12,9},{1,12,9},{7,4,421},{11,3,421},{1,12,9},{0,10,433},{11,3,421},{0,10,433},{2,0,445},{2,0,445},{2,0,445},{2,0,445},{1,11,2},{1,11,2},{1,11,2},{1,7,10},{0,8,90}, +{0,8,90},{2,24,902},{2,19,482},{3,13,723},{2,13,482},{1,23,1578},{1,17,566},{1,14,117},{1,11,590},{0,15,2046},{0,11,535},{3,20,441},{3,17,41},{3,13,98},{3,13,53},{7,3,1514},{0,18,429},{2,13,50},{0,11,454},{11,2,1514},{0,11,454},{2,19,481},{2,19,481},{2,19,481},{2,13,481},{1,18,404},{1,14,68},{1,14,68},{1,10,89},{0,12,557},{0,10,49},{3,16,17}, +{3,16,17},{3,16,17},{3,12,17},{5,5,338},{1,14,4},{1,14,4},{0,10,13},{5,7,338},{0,10,13},{8,2,421},{2,19,1},{4,13,5},{2,13,1},{8,2,421},{14,1,421},{2,13,1},{0,11,445},{14,1,421},{0,11,445},{2,0,481},{2,0,481},{2,0,481},{2,0,481},{1,14,64},{1,14,64},{1,14,64},{1,9,65},{0,10,40},{0,10,40},{3,23,854},{3,19,438},{3,15,603}, +{3,14,443},{2,22,1515},{2,17,578},{2,14,15},{2,12,603},{0,16,1911},{0,12,458},{4,20,450},{4,17,77},{4,14,35},{4,14,107},{5,12,1514},{0,20,425},{2,14,14},{0,12,433},{13,1,1514},{0,12,433},{3,18,434},{3,18,434},{3,18,434},{3,13,437},{2,17,341},{2,14,14},{2,14,14},{2,10,30},{0,14,477},{0,11,35},{4,15,25},{4,15,25},{4,15,25},{4,12,26},{7,1,338}, +{1,16,5},{1,16,5},{1,11,10},{10,2,338},{1,11,10},{8,5,421},{3,19,5},{4,14,10},{2,14,10},{8,5,421},{12,5,421},{2,14,10},{0,12,433},{12,5,421},{0,12,433},{3,0,433},{3,0,433},{3,0,433},{3,0,433},{2,13,2},{2,13,2},{2,13,2},{2,10,5},{0,12,25},{0,12,25},{3,26,902},{3,21,485},{4,15,750},{3,15,482},{2,25,1566},{2,19,570},{2,15,95}, +{2,13,582},{0,18,1787},{0,13,442},{4,22,430},{4,19,29},{4,15,74},{4,15,46},{8,3,1514},{1,20,425},{2,15,46},{0,13,442},{11,5,1514},{0,13,442},{3,21,481},{3,21,481},{3,21,481},{3,15,481},{2,20,387},{2,16,51},{2,16,51},{2,11,75},{0,16,419},{1,12,69},{4,18,10},{4,18,10},{4,18,10},{4,14,10},{5,10,338},{2,16,2},{2,16,2},{1,12,20},{12,1,338}, +{1,12,20},{9,4,421},{3,21,4},{5,15,5},{3,15,1},{9,4,421},{15,3,421},{3,15,1},{0,13,433},{15,3,421},{0,13,433},{3,0,481},{3,0,481},{3,0,481},{3,0,481},{2,16,50},{2,16,50},{2,16,50},{2,11,50},{0,13,9},{0,13,9},{4,25,866},{4,21,454},{4,17,609},{4,16,461},{3,24,1515},{3,19,578},{3,16,19},{3,14,603},{0,20,1686},{1,14,458},{5,21,450}, +{5,19,77},{5,16,34},{5,15,122},{3,24,1514},{1,22,425},{3,16,18},{1,14,433},{14,3,1514},{1,14,433},{4,20,445},{4,20,445},{4,20,445},{4,15,449},{3,19,341},{3,16,19},{3,16,19},{3,12,26},{0,17,372},{1,13,45},{5,17,25},{5,17,25},{5,17,25},{5,14,26},{8,1,338},{2,18,2},{2,18,2},{2,13,20},{10,5,338},{2,13,20},{11,0,421},{4,21,9},{5,16,9}, +{3,16,9},{11,0,421},{13,7,421},{3,16,9},{0,14,433},{13,7,421},{0,14,433},{4,0,445},{4,0,445},{4,0,445},{4,0,445},{3,15,2},{3,15,2},{3,15,2},{3,11,10},{0,15,5},{0,15,5},{4,28,902},{4,23,482},{5,17,723},{4,17,482},{3,27,1578},{3,21,566},{3,18,117},{3,15,590},{0,22,1614},{1,15,462},{5,24,441},{5,21,41},{5,17,98},{5,17,53},{5,20,1514}, +{2,22,429},{4,17,50},{2,15,454},{13,6,1514},{2,15,454},{4,23,481},{4,23,481},{4,23,481},{4,17,481},{3,22,404},{3,18,68},{3,18,68},{3,14,89},{0,19,347},{2,14,49},{5,20,17},{5,20,17},{5,20,17},{5,16,17},{8,4,338},{3,18,4},{3,18,4},{2,14,13},{14,2,338},{2,14,13},{11,3,421},{4,23,1},{6,17,5},{4,17,1},{11,3,421},{15,6,421},{4,17,1}, +{0,15,445},{15,6,421},{0,15,445},{4,0,481},{4,0,481},{4,0,481},{4,0,481},{3,18,64},{3,18,64},{3,18,64},{3,13,65},{1,16,8},{1,16,8},{5,27,854},{5,23,438},{5,19,603},{5,18,443},{4,26,1515},{4,21,578},{4,18,15},{4,16,603},{0,23,1566},{2,16,458},{6,24,450},{6,21,77},{6,18,35},{6,18,107},{11,1,1514},{2,24,425},{4,18,14},{2,16,433},{15,5,1514}, +{2,16,433},{5,22,434},{5,22,434},{5,22,434},{5,17,437},{4,21,341},{4,18,14},{4,18,14},{4,14,30},{0,21,341},{2,15,35},{6,19,25},{6,19,25},{6,19,25},{6,16,26},{3,25,338},{3,20,5},{3,20,5},{3,15,10},{12,6,338},{3,15,10},{12,2,421},{5,23,5},{6,18,10},{4,18,10},{12,2,421},{14,9,421},{4,18,10},{0,16,433},{14,9,421},{0,16,433},{5,0,433}, +{5,0,433},{5,0,433},{5,0,433},{4,17,2},{4,17,2},{4,17,2},{4,14,5},{1,17,10},{1,17,10},{5,30,902},{5,25,485},{6,19,750},{5,19,482},{4,29,1566},{4,23,570},{4,19,95},{4,17,582},{0,25,1533},{2,17,442},{6,26,430},{6,23,29},{6,19,74},{6,19,46},{10,7,1514},{3,24,425},{4,19,46},{2,17,442},{13,9,1514},{2,17,442},{5,25,481},{5,25,481},{5,25,481}, +{5,19,481},{4,24,387},{4,20,51},{4,20,51},{4,15,75},{1,21,341},{3,16,69},{6,22,10},{6,22,10},{6,22,10},{6,18,10},{9,6,338},{4,20,2},{4,20,2},{3,16,20},{14,5,338},{3,16,20},{11,8,421},{5,25,4},{7,19,5},{5,19,1},{11,8,421},{11,14,421},{5,19,1},{0,17,433},{11,14,421},{0,17,433},{5,0,481},{5,0,481},{5,0,481},{5,0,481},{4,20,50}, +{4,20,50},{4,20,50},{4,15,50},{2,17,9},{2,17,9},{6,29,866},{6,25,454},{6,21,609},{6,20,461},{5,28,1515},{5,23,578},{5,20,19},{5,18,603},{0,27,1521},{3,18,458},{7,25,450},{7,23,77},{7,20,34},{7,19,122},{12,3,1514},{3,26,425},{5,20,18},{3,18,433},{11,13,1514},{3,18,433},{6,24,445},{6,24,445},{6,24,445},{6,19,449},{5,23,341},{5,20,19},{5,20,19}, +{5,16,26},{1,23,341},{3,17,45},{7,21,25},{7,21,25},{7,21,25},{7,18,26},{10,5,338},{4,22,2},{4,22,2},{4,17,20},{12,9,338},{4,17,20},{13,4,421},{6,25,9},{7,20,9},{5,20,9},{13,4,421},{15,11,421},{5,20,9},{0,18,433},{15,11,421},{0,18,433},{6,0,445},{6,0,445},{6,0,445},{6,0,445},{5,19,2},{5,19,2},{5,19,2},{5,15,10},{2,19,5}, +{2,19,5},{6,31,914},{6,27,482},{7,21,723},{6,21,482},{5,31,1578},{5,25,566},{5,22,117},{5,19,590},{1,27,1535},{3,19,462},{7,28,441},{7,25,41},{7,21,98},{7,21,53},{8,19,1514},{4,26,429},{6,21,50},{4,19,454},{15,10,1514},{4,19,454},{6,27,481},{6,27,481},{6,27,481},{6,21,481},{5,26,404},{5,22,68},{5,22,68},{5,18,89},{2,23,347},{4,18,49},{7,24,17}, +{7,24,17},{7,24,17},{7,20,17},{10,8,338},{5,22,4},{5,22,4},{4,18,13},{11,12,338},{4,18,13},{15,0,421},{6,27,1},{8,21,50},{6,21,1},{15,0,421},{13,15,421},{6,21,1},{0,19,445},{13,15,421},{0,19,445},{6,0,481},{6,0,481},{6,0,481},{6,0,481},{5,22,64},{5,22,64},{5,22,64},{5,17,65},{3,20,8},{3,20,8},{7,31,854},{7,27,438},{7,23,603}, +{7,22,443},{6,30,1515},{6,25,578},{6,22,15},{6,20,603},{1,29,1518},{4,20,458},{7,31,565},{7,27,149},{8,22,174},{7,22,154},{13,5,1514},{4,28,425},{6,22,14},{4,20,433},{12,15,1514},{4,20,433},{7,26,434},{7,26,434},{7,26,434},{7,21,437},{6,25,341},{6,22,14},{6,22,14},{6,18,30},{2,25,341},{4,19,35},{7,26,145},{7,26,145},{7,26,145},{7,21,148},{9,14,338}, +{5,24,5},{5,24,5},{5,19,10},{14,10,338},{5,19,10},{15,3,421},{7,27,5},{8,22,5},{6,22,10},{15,3,421},{15,14,421},{6,22,10},{0,20,433},{15,14,421},{0,20,433},{7,0,433},{7,0,433},{7,0,433},{7,0,433},{6,21,2},{6,21,2},{6,21,2},{6,18,5},{3,21,10},{3,21,10},{7,31,1046},{7,29,485},{7,24,770},{7,23,482},{7,29,1598},{6,27,570},{6,23,95}, +{6,21,582},{2,29,1533},{4,21,442},{8,28,450},{8,26,77},{8,23,35},{8,23,107},{15,1,1514},{5,28,425},{6,23,46},{4,21,442},{15,13,1514},{4,21,442},{7,29,481},{7,29,481},{7,29,481},{7,23,481},{6,28,387},{6,24,51},{6,24,51},{6,19,75},{3,25,341},{5,20,69},{8,24,25},{8,24,25},{8,24,25},{8,21,26},{11,10,338},{6,24,2},{6,24,2},{5,20,20},{5,23,338}, +{5,20,20},{13,12,421},{7,29,4},{8,23,10},{7,23,1},{13,12,421},{13,18,421},{7,23,1},{0,21,433},{13,18,421},{0,21,433},{7,0,481},{7,0,481},{7,0,481},{7,0,481},{6,24,50},{6,24,50},{6,24,50},{6,19,50},{4,21,9},{4,21,9},{8,31,1106},{8,28,714},{8,24,749},{8,24,734},{7,31,1542},{7,27,578},{7,24,19},{7,22,603},{2,31,1521},{5,22,458},{8,31,430}, +{8,28,38},{8,24,73},{8,24,58},{14,7,1514},{5,30,425},{7,24,18},{5,22,433},{13,17,1514},{5,22,433},{8,27,686},{8,27,686},{8,27,686},{8,23,686},{7,27,341},{7,24,19},{7,24,19},{7,20,26},{3,27,341},{5,21,45},{8,27,10},{8,27,10},{8,27,10},{8,23,10},{12,9,338},{6,26,2},{6,26,2},{6,21,20},{14,13,338},{6,21,20},{15,8,421},{7,31,20},{9,24,4}, +{7,24,9},{15,8,421},{15,17,421},{7,24,9},{0,22,433},{15,17,421},{0,22,433},{8,0,685},{8,0,685},{8,0,685},{8,0,685},{7,23,2},{7,23,2},{7,23,2},{7,19,10},{4,23,5},{4,23,5},{8,31,1034},{8,30,438},{8,26,603},{8,25,443},{8,30,1806},{7,29,566},{7,26,117},{7,23,590},{3,31,1535},{5,23,462},{9,31,437},{9,28,77},{9,25,33},{9,25,98},{10,23,1514}, +{6,30,429},{7,26,53},{6,23,454},{11,21,1514},{6,23,454},{8,29,434},{8,29,434},{8,29,434},{8,24,437},{7,30,404},{7,26,68},{7,26,68},{7,22,89},{4,27,347},{6,22,49},{9,26,17},{9,26,17},{9,26,17},{9,23,20},{15,2,338},{7,26,4},{7,26,4},{6,22,13},{13,16,338},{6,22,13},{15,11,421},{8,30,5},{9,25,17},{8,25,10},{15,11,421},{15,19,421},{8,25,10}, +{0,23,445},{15,19,421},{0,23,445},{8,0,433},{8,0,433},{8,0,433},{8,0,433},{7,26,64},{7,26,64},{7,26,64},{7,21,65},{5,24,8},{5,24,8},{9,31,1174},{8,31,506},{9,26,723},{8,26,482},{8,31,1643},{7,30,717},{8,26,131},{7,24,725},{4,31,1566},{6,24,458},{9,31,549},{9,30,41},{9,26,98},{9,26,53},{15,9,1514},{7,30,461},{8,26,50},{6,24,433},{14,19,1514}, +{6,24,433},{8,31,490},{8,31,490},{8,31,490},{8,26,481},{8,28,421},{8,25,122},{8,25,122},{8,22,131},{4,29,341},{6,23,35},{9,29,17},{9,29,17},{9,29,17},{9,25,17},{11,18,338},{7,28,5},{7,28,5},{7,23,10},{10,21,338},{7,23,10},{14,17,421},{9,30,25},{10,26,5},{8,26,1},{14,17,421},{13,23,421},{8,26,1},{0,24,433},{13,23,421},{0,24,433},{8,0,481}, +{8,0,481},{8,0,481},{8,0,481},{8,24,82},{8,24,82},{8,24,82},{8,21,81},{5,25,10},{5,25,10},{9,31,1334},{9,31,470},{9,28,597},{9,27,443},{9,31,1815},{8,30,578},{8,27,15},{8,25,603},{5,31,1638},{6,25,442},{10,31,506},{10,30,77},{10,27,35},{10,27,107},{14,15,1514},{7,31,506},{8,27,14},{6,25,442},{12,23,1514},{6,25,442},{9,31,434},{9,31,434},{9,31,434}, +{9,26,437},{8,30,341},{8,27,14},{8,27,14},{8,23,30},{5,29,341},{7,24,69},{10,28,25},{10,28,25},{10,28,25},{10,25,26},{13,14,338},{8,27,13},{8,27,13},{7,24,20},{8,25,338},{7,24,20},{15,16,421},{9,31,37},{10,27,10},{8,27,10},{15,16,421},{15,22,421},{8,27,10},{0,25,433},{15,22,421},{0,25,433},{9,0,433},{9,0,433},{9,0,433},{9,0,433},{8,26,2}, +{8,26,2},{8,26,2},{8,23,5},{6,25,9},{6,25,9},{10,31,1470},{10,31,735},{10,28,749},{9,28,481},{9,31,1895},{8,31,579},{8,28,89},{8,26,582},{6,31,1761},{7,26,458},{11,31,614},{10,31,59},{10,28,73},{10,28,58},{13,21,1514},{8,31,530},{8,28,40},{7,26,433},{15,21,1514},{7,26,433},{9,31,562},{9,31,562},{9,31,562},{9,28,481},{9,30,421},{8,29,51},{8,29,51}, +{8,24,65},{5,31,341},{7,25,45},{10,31,10},{10,31,10},{10,31,10},{10,27,10},{14,13,338},{8,29,2},{8,29,2},{8,24,16},{11,23,338},{8,24,16},{15,19,421},{10,31,50},{11,28,4},{9,28,0},{15,19,421},{13,26,421},{9,28,0},{0,26,433},{13,26,421},{0,26,433},{9,0,481},{9,0,481},{9,0,481},{9,0,481},{8,29,50},{8,29,50},{8,29,50},{8,24,49},{6,27,5}, +{6,27,5},{11,31,1838},{10,31,753},{10,30,603},{10,29,443},{10,31,2046},{9,31,629},{9,29,21},{9,27,589},{7,31,1935},{7,27,462},{11,31,749},{11,31,120},{11,29,33},{11,29,98},{12,27,1514},{9,31,629},{9,29,21},{7,27,461},{13,25,1514},{7,27,461},{10,31,497},{10,31,497},{10,31,497},{10,28,437},{9,31,388},{9,29,20},{9,29,20},{9,25,29},{6,31,347},{8,25,105},{11,30,17}, +{11,30,17},{11,30,17},{11,27,20},{14,16,338},{8,31,4},{8,31,4},{8,26,10},{15,20,338},{8,26,10},{14,25,421},{11,31,104},{11,29,17},{10,29,10},{14,25,421},{12,29,421},{10,29,10},{0,27,445},{12,29,421},{0,27,445},{10,0,433},{10,0,433},{10,0,433},{10,0,433},{9,28,1},{9,28,1},{9,28,1},{9,25,4},{7,28,8},{7,28,8},{11,31,1902},{11,31,1001},{11,30,723}, +{10,30,482},{11,31,2286},{10,31,782},{9,31,117},{9,28,578},{8,31,2118},{7,29,491},{12,31,770},{12,31,338},{11,30,98},{11,30,53},{14,23,1514},{10,31,701},{10,30,50},{8,28,442},{11,29,1514},{8,28,442},{11,31,677},{11,31,677},{11,31,677},{10,30,481},{10,31,437},{9,31,68},{9,31,68},{9,27,89},{7,31,379},{8,27,49},{11,31,52},{11,31,52},{11,31,52},{11,29,17},{13,22,338}, +{9,31,4},{9,31,4},{8,27,13},{12,25,338},{8,27,13},{15,24,421},{12,31,169},{12,30,5},{10,30,1},{15,24,421},{15,27,421},{10,30,1},{0,28,433},{15,27,421},{0,28,433},{10,0,481},{10,0,481},{10,0,481},{10,0,481},{9,31,64},{9,31,64},{9,31,64},{9,26,65},{7,29,10},{7,29,10},{12,31,2151},{11,31,1254},{11,31,629},{11,31,442},{11,31,2393},{10,31,975},{10,31,14}, +{10,29,570},{9,31,2241},{8,29,425},{13,31,931},{12,31,395},{12,31,34},{12,30,105},{13,29,1459},{11,31,778},{10,31,13},{8,29,400},{14,27,1459},{8,29,400},{11,31,629},{11,31,629},{11,31,629},{11,30,437},{10,31,581},{10,31,14},{10,31,14},{10,27,30},{8,31,477},{8,28,45},{12,31,34},{12,31,34},{12,31,34},{12,29,26},{15,18,338},{10,31,13},{10,31,13},{9,28,17},{10,29,338}, +{9,28,17},{15,27,394},{13,31,218},{12,31,9},{10,31,9},{15,27,394},{15,28,394},{10,31,9},{0,29,400},{15,28,394},{0,29,400},{11,0,433},{11,0,433},{11,0,433},{11,0,433},{10,30,2},{10,30,2},{10,30,2},{10,27,5},{8,29,25},{8,29,25},{12,31,1767},{12,31,1167},{12,31,806},{11,31,506},{12,31,1983},{11,31,747},{11,31,122},{10,29,346},{10,31,1836},{8,30,217},{13,31,611}, +{13,31,339},{12,31,130},{12,31,10},{15,24,1064},{12,31,587},{11,31,41},{8,30,217},{15,27,1067},{8,30,217},{12,31,806},{12,31,806},{12,31,806},{11,31,506},{11,31,581},{11,31,122},{11,31,122},{10,28,65},{9,31,557},{9,29,69},{12,31,130},{12,31,130},{12,31,130},{12,31,10},{13,27,338},{11,31,41},{11,31,41},{10,28,16},{13,27,338},{10,28,16},{14,31,200},{14,31,136},{13,31,1}, +{12,31,1},{14,31,200},{15,29,200},{12,31,1},{0,30,208},{15,29,200},{0,30,208},{11,0,481},{11,0,481},{11,0,481},{11,0,481},{11,30,82},{11,30,82},{11,30,82},{10,28,49},{8,30,9},{8,30,9},{13,31,1646},{12,31,1194},{12,31,833},{12,31,497},{12,31,1686},{12,31,750},{11,31,221},{11,30,122},{11,31,1541},{9,30,110},{14,31,542},{13,31,285},{13,31,116},{13,31,20},{15,26,722}, +{13,31,429},{12,31,100},{10,30,74},{14,29,722},{10,30,74},{12,31,833},{12,31,833},{12,31,833},{12,31,497},{12,31,725},{11,31,221},{11,31,221},{11,29,29},{10,31,632},{9,30,46},{13,31,116},{13,31,116},{13,31,116},{13,31,20},{13,30,338},{12,31,100},{12,31,100},{10,30,10},{12,30,338},{10,30,10},{15,29,61},{14,31,37},{14,31,1},{13,31,4},{15,29,61},{15,30,61},{13,31,4}, +{0,30,73},{15,30,61},{0,30,73},{12,0,433},{12,0,433},{12,0,433},{12,0,433},{11,31,25},{11,31,25},{11,31,25},{11,29,4},{9,31,17},{9,31,17},{13,31,1406},{13,31,1134},{13,31,965},{12,31,737},{13,31,1454},{12,31,702},{12,31,341},{11,31,89},{11,31,1381},{10,31,49},{14,31,318},{14,31,254},{14,31,218},{13,31,116},{14,31,510},{13,31,333},{13,31,164},{10,31,13},{14,30,509}, +{10,31,13},{13,31,965},{13,31,965},{13,31,965},{12,31,737},{12,31,869},{12,31,341},{12,31,341},{11,31,89},{11,31,756},{10,31,49},{14,31,218},{14,31,218},{14,31,218},{13,31,116},{15,26,338},{13,31,164},{13,31,164},{10,31,13},{14,29,338},{10,31,13},{15,30,9},{15,31,9},{15,31,9},{14,31,9},{15,30,9},{15,31,9},{14,31,9},{0,31,9},{15,31,9},{0,31,9},{12,0,481}, +{12,0,481},{12,0,481},{12,0,481},{12,31,85},{12,31,85},{12,31,85},{11,30,65},{10,31,40},{10,31,40},{13,31,1315},{13,31,1043},{13,31,874},{13,31,602},{13,31,1171},{13,31,627},{13,31,458},{12,31,5},{12,31,1087},{11,31,80},{14,31,225},{14,31,161},{14,31,125},{14,31,61},{15,29,297},{14,31,193},{14,31,157},{12,31,4},{14,31,297},{12,31,4},{13,31,874},{13,31,874},{13,31,874}, +{13,31,602},{13,31,730},{13,31,458},{13,31,458},{12,31,5},{11,31,705},{11,31,80},{14,31,125},{14,31,125},{14,31,125},{14,31,61},{14,31,221},{14,31,157},{14,31,157},{12,31,4},{15,29,221},{12,31,4},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{13,0,433},{13,0,433},{13,0,433},{13,0,433},{12,31,101}, +{12,31,101},{12,31,101},{12,31,5},{11,31,80},{11,31,80},{14,31,885},{14,31,821},{14,31,785},{13,31,650},{14,31,885},{13,31,483},{13,31,314},{13,31,81},{13,31,779},{11,31,144},{15,31,169},{14,31,145},{14,31,109},{14,31,45},{15,30,118},{14,31,81},{14,31,45},{13,31,0},{15,30,114},{13,31,0},{14,31,785},{14,31,785},{14,31,785},{13,31,650},{13,31,586},{13,31,314},{13,31,314}, +{13,31,81},{12,31,518},{11,31,144},{14,31,109},{14,31,109},{14,31,109},{14,31,45},{15,29,85},{14,31,45},{14,31,45},{13,31,0},{14,31,85},{13,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{13,0,481},{13,0,481},{13,0,481},{13,0,481},{13,31,145},{13,31,145},{13,31,145},{13,31,81},{11,31,144}, +{11,31,144},{0,13,884},{0,10,225},{0,7,18},{0,6,265},{0,9,1899},{0,7,1355},{0,6,589},{0,4,1354},{0,5,2124},{0,4,1498},{0,13,884},{0,10,225},{0,7,18},{0,6,265},{2,2,1896},{0,7,1355},{0,6,589},{0,4,1354},{1,4,1896},{0,4,1354},{0,6,0},{0,6,0},{0,6,0},{0,4,4},{0,3,162},{0,3,90},{0,3,90},{0,2,104},{0,2,200},{0,1,134},{0,6,0}, +{0,6,0},{0,6,0},{0,4,4},{0,3,162},{0,3,90},{0,3,90},{0,2,104},{0,2,164},{0,2,104},{3,3,882},{0,10,225},{0,7,18},{0,6,265},{3,3,882},{4,3,882},{0,6,265},{0,5,890},{4,3,882},{0,5,890},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,15,884},{0,12,170},{0,8,8}, +{0,7,202},{0,10,2360},{0,8,1530},{0,7,643},{0,5,1579},{0,6,2684},{0,5,1804},{0,15,884},{0,12,170},{0,8,8},{0,7,202},{1,7,2355},{0,8,1530},{0,7,643},{0,5,1579},{1,5,2355},{0,5,1579},{0,9,1},{0,9,1},{0,9,1},{0,5,1},{0,4,340},{0,4,180},{0,4,180},{0,2,200},{0,2,392},{0,2,236},{0,9,1},{0,9,1},{0,9,1},{0,5,1},{1,1,338}, +{0,4,180},{0,4,180},{0,2,200},{2,0,340},{0,2,200},{4,2,882},{0,12,170},{0,8,8},{0,7,202},{4,2,882},{6,2,882},{0,7,202},{0,6,890},{6,2,882},{0,6,890},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,18,882},{0,14,106},{0,10,52},{0,9,148},{0,12,2899},{0,9,1773},{0,8,725}, +{0,6,1854},{0,7,3348},{0,5,2124},{0,18,882},{0,14,106},{0,10,52},{0,9,148},{1,9,2899},{0,9,1773},{0,8,725},{0,6,1854},{6,0,2899},{0,6,1854},{0,11,1},{0,11,1},{0,11,1},{0,7,1},{0,6,580},{0,5,306},{0,5,306},{0,3,325},{0,3,667},{0,3,406},{0,11,1},{0,11,1},{0,11,1},{0,7,1},{1,2,580},{0,5,306},{0,5,306},{0,3,325},{2,1,578}, +{0,3,325},{3,8,882},{0,14,106},{1,9,13},{0,9,148},{3,8,882},{4,6,882},{0,9,148},{0,7,890},{4,6,882},{0,7,890},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,21,920},{0,16,89},{0,11,153},{0,10,121},{0,14,3051},{0,11,1709},{0,9,557},{0,7,1795},{0,8,3651},{0,6,2174},{0,21,920}, +{0,16,89},{1,10,108},{0,10,121},{4,1,3048},{0,11,1709},{0,9,557},{0,7,1795},{3,5,3048},{0,7,1795},{0,14,37},{0,14,37},{0,14,37},{0,8,37},{0,8,648},{0,7,274},{0,7,274},{0,4,277},{0,4,824},{0,4,421},{0,14,37},{0,14,37},{0,14,37},{0,8,37},{0,8,648},{0,7,274},{0,7,274},{0,4,277},{4,0,648},{0,4,277},{5,4,882},{0,16,53},{1,10,8}, +{0,10,85},{5,4,882},{9,1,882},{0,10,85},{0,8,900},{9,1,882},{0,8,900},{0,0,36},{0,0,36},{0,0,36},{0,0,36},{0,2,0},{0,2,0},{0,2,0},{0,1,1},{0,1,10},{0,1,10},{0,23,1115},{0,18,242},{1,12,309},{0,11,259},{0,17,3051},{0,13,1579},{0,10,346},{0,8,1630},{0,9,3924},{0,7,2173},{1,20,885},{1,16,90},{1,12,53},{1,11,131},{4,4,3048}, +{0,13,1579},{0,10,346},{0,8,1630},{7,2,3048},{0,8,1630},{0,17,226},{0,17,226},{0,17,226},{0,10,225},{0,11,648},{0,9,169},{0,9,169},{0,5,200},{0,6,990},{0,5,425},{1,13,2},{1,13,2},{1,13,2},{1,9,2},{3,1,648},{0,9,169},{0,9,169},{0,5,200},{3,3,648},{0,5,200},{5,7,882},{0,18,17},{2,11,18},{0,11,34},{5,7,882},{6,7,882},{0,11,34}, +{0,9,890},{6,7,882},{0,9,890},{0,0,225},{0,0,225},{0,0,225},{0,0,225},{0,5,0},{0,5,0},{0,5,0},{0,3,0},{0,2,61},{0,2,61},{1,23,1187},{1,18,350},{1,13,422},{1,12,373},{0,20,3048},{0,15,1443},{0,12,204},{0,9,1483},{0,11,4212},{0,8,2174},{1,23,931},{1,18,94},{2,12,108},{1,12,117},{6,0,3048},{0,15,1443},{0,12,204},{0,9,1483},{5,6,3048}, +{0,9,1483},{1,16,305},{1,16,305},{1,16,305},{1,10,309},{0,13,650},{0,11,109},{0,11,109},{0,7,148},{0,7,1161},{0,6,473},{1,16,49},{1,16,49},{1,16,49},{1,10,53},{4,0,648},{0,11,109},{0,11,109},{0,7,148},{1,7,648},{0,7,148},{7,3,882},{0,20,8},{2,12,8},{0,12,8},{7,3,882},{11,2,882},{0,12,8},{0,10,890},{11,2,882},{0,10,890},{1,0,305}, +{1,0,305},{1,0,305},{1,0,305},{0,8,1},{0,8,1},{0,8,1},{0,5,4},{0,3,145},{0,3,145},{1,25,1365},{1,20,497},{1,14,713},{1,13,510},{0,23,3051},{0,16,1278},{0,13,86},{0,10,1354},{0,12,4609},{0,9,2228},{2,22,886},{2,18,110},{2,14,56},{2,13,152},{5,6,3048},{0,16,1278},{0,13,86},{0,10,1354},{10,1,3048},{0,10,1354},{1,19,482},{1,19,482},{1,19,482}, +{1,12,481},{0,16,648},{0,12,72},{0,12,72},{0,8,101},{0,8,1352},{0,7,557},{2,15,5},{2,15,5},{2,15,5},{2,11,5},{3,6,648},{0,12,72},{0,12,72},{0,8,101},{3,6,648},{0,8,101},{5,12,882},{0,21,10},{3,13,13},{0,13,5},{5,12,882},{13,1,882},{0,13,5},{0,11,890},{13,1,882},{0,11,890},{1,0,481},{1,0,481},{1,0,481},{1,0,481},{0,10,1}, +{0,10,1},{0,10,1},{0,6,1},{0,5,261},{0,5,261},{1,28,1667},{1,22,793},{1,15,1182},{1,14,793},{0,25,3048},{0,18,1170},{0,14,36},{0,11,1243},{0,14,5005},{0,10,2318},{2,25,920},{2,20,89},{3,14,108},{2,14,121},{7,2,3048},{0,18,1170},{0,14,36},{0,11,1243},{12,0,3048},{0,11,1243},{1,21,786},{1,21,786},{1,21,786},{1,14,789},{0,19,650},{0,14,32},{0,14,32}, +{0,9,50},{0,9,1619},{0,8,661},{2,18,37},{2,18,37},{2,18,37},{2,12,37},{5,2,648},{0,14,32},{0,14,32},{0,9,50},{8,1,648},{0,9,50},{8,3,882},{1,22,8},{3,14,8},{1,14,8},{8,3,882},{11,5,882},{1,14,8},{0,12,900},{11,5,882},{0,12,900},{1,0,785},{1,0,785},{1,0,785},{1,0,785},{0,13,1},{0,13,1},{0,13,1},{0,8,4},{0,6,405}, +{0,6,405},{2,27,1844},{2,22,971},{2,16,1186},{2,15,988},{0,28,3084},{0,20,1095},{0,16,77},{0,12,1159},{0,16,4945},{0,12,2084},{3,24,885},{3,20,90},{3,16,53},{3,15,131},{6,8,3048},{0,20,1059},{0,16,41},{0,12,1123},{4,12,3048},{0,12,1123},{2,21,955},{2,21,955},{2,21,955},{2,14,954},{0,22,686},{0,16,41},{0,16,41},{0,10,49},{0,11,1577},{0,9,545},{3,17,2}, +{3,17,2},{3,17,2},{3,13,2},{5,5,648},{0,16,5},{0,16,5},{0,10,13},{5,7,648},{0,10,13},{8,6,882},{1,24,9},{4,15,18},{1,15,13},{8,6,882},{15,2,882},{1,15,13},{0,13,890},{15,2,882},{0,13,890},{2,0,954},{2,0,954},{2,0,954},{2,0,954},{0,16,37},{0,16,37},{0,16,37},{0,9,40},{0,8,373},{0,8,373},{2,30,1772},{2,24,898},{2,17,1287}, +{2,16,898},{1,27,3055},{1,20,1143},{1,16,33},{1,13,1214},{0,17,4639},{0,13,1730},{3,27,931},{3,22,94},{4,16,108},{3,16,117},{5,14,3048},{0,21,996},{1,16,29},{0,13,1054},{14,1,3048},{0,13,1054},{2,23,891},{2,23,891},{2,23,891},{2,16,894},{1,21,652},{1,16,29},{1,16,29},{1,11,44},{0,13,1452},{0,11,365},{3,20,49},{3,20,49},{3,20,49},{3,14,53},{7,1,648}, +{0,18,1},{0,18,1},{0,11,4},{10,2,648},{0,11,4},{5,20,882},{2,24,8},{4,16,8},{2,16,8},{5,20,882},{13,6,882},{2,16,8},{0,14,890},{13,6,882},{0,14,890},{2,0,890},{2,0,890},{2,0,890},{2,0,890},{1,15,4},{1,15,4},{1,15,4},{1,10,5},{0,9,269},{0,9,269},{3,29,1838},{3,24,970},{3,18,1186},{3,17,983},{1,30,3084},{1,22,1095},{1,18,77}, +{1,14,1159},{0,19,4419},{0,14,1444},{4,26,886},{4,22,110},{4,18,56},{4,17,152},{8,5,3048},{0,23,936},{1,18,41},{0,14,1003},{12,5,3048},{0,14,1003},{3,23,955},{3,23,955},{3,23,955},{3,16,954},{1,23,692},{1,18,41},{1,18,41},{1,12,46},{0,15,1296},{0,12,235},{4,19,5},{4,19,5},{4,19,5},{4,15,5},{5,10,648},{1,18,5},{1,18,5},{0,12,10},{12,1,648}, +{0,12,10},{11,1,882},{2,25,10},{5,17,13},{2,17,5},{11,1,882},{15,5,882},{2,17,5},{0,15,890},{15,5,882},{0,15,890},{3,0,954},{3,0,954},{3,0,954},{3,0,954},{1,18,37},{1,18,37},{1,18,37},{1,11,40},{0,11,185},{0,11,185},{3,31,1790},{3,26,898},{3,19,1287},{3,18,898},{2,29,3057},{2,22,1179},{2,18,45},{1,15,1250},{0,20,4156},{0,15,1226},{4,29,920}, +{4,24,89},{5,18,108},{4,18,121},{3,26,3048},{0,25,909},{2,18,36},{0,15,970},{15,3,3048},{0,15,970},{3,25,891},{3,25,891},{3,25,891},{3,18,894},{2,23,659},{2,18,41},{2,18,41},{2,13,59},{0,16,1137},{0,13,137},{4,22,37},{4,22,37},{4,22,37},{4,16,37},{8,1,648},{1,20,2},{1,20,2},{1,13,1},{10,5,648},{1,13,1},{10,7,882},{3,26,8},{5,18,8}, +{3,18,8},{10,7,882},{13,9,882},{3,18,8},{0,16,900},{13,9,882},{0,16,900},{3,0,890},{3,0,890},{3,0,890},{3,0,890},{2,17,10},{2,17,10},{2,17,10},{2,12,13},{0,13,136},{0,13,136},{4,31,1844},{4,26,971},{4,20,1186},{4,19,988},{2,31,3111},{2,24,1095},{2,20,77},{2,16,1159},{0,22,3940},{0,16,1055},{5,28,885},{5,24,90},{5,20,53},{5,19,131},{5,22,3048}, +{0,27,886},{2,20,41},{0,17,926},{8,13,3048},{0,17,926},{4,25,955},{4,25,955},{4,25,955},{4,18,954},{2,26,686},{2,20,41},{2,20,41},{2,14,49},{0,18,1002},{0,15,110},{5,21,2},{5,21,2},{5,21,2},{5,17,2},{8,4,648},{2,20,5},{2,20,5},{2,14,13},{14,2,648},{2,14,13},{13,0,882},{3,28,9},{6,19,18},{3,19,13},{13,0,882},{5,21,882},{3,19,13}, +{0,17,890},{5,21,882},{0,17,890},{4,0,954},{4,0,954},{4,0,954},{4,0,954},{2,20,37},{2,20,37},{2,20,37},{2,13,40},{0,15,74},{0,15,74},{4,31,1972},{4,28,898},{4,21,1287},{4,20,898},{3,31,3055},{3,24,1143},{3,20,33},{3,17,1214},{0,23,3820},{0,18,963},{5,31,931},{5,26,94},{6,20,108},{5,20,117},{11,3,3048},{0,28,899},{3,20,29},{0,18,899},{11,11,3048}, +{0,18,899},{4,27,891},{4,27,891},{4,27,891},{4,20,894},{3,25,652},{3,20,29},{3,20,29},{3,15,44},{0,20,876},{0,16,102},{5,24,49},{5,24,49},{5,24,49},{5,18,53},{3,25,648},{2,22,1},{2,22,1},{2,15,4},{12,6,648},{2,15,4},{8,19,882},{4,28,8},{6,20,8},{4,20,8},{8,19,882},{15,10,882},{4,20,8},{0,18,890},{15,10,882},{0,18,890},{4,0,890}, +{4,0,890},{4,0,890},{4,0,890},{3,19,4},{3,19,4},{3,19,4},{3,14,5},{0,17,29},{0,17,29},{5,31,1964},{5,28,970},{5,22,1186},{5,21,983},{4,31,3172},{3,26,1095},{3,22,77},{3,18,1159},{0,25,3679},{0,19,899},{6,30,886},{6,26,110},{6,22,56},{6,21,152},{10,9,3048},{1,29,888},{3,22,41},{0,19,890},{14,9,3048},{0,19,890},{5,27,955},{5,27,955},{5,27,955}, +{5,20,954},{3,27,692},{3,22,41},{3,22,41},{3,16,46},{0,22,800},{1,16,98},{6,23,5},{6,23,5},{6,23,5},{6,19,5},{9,6,648},{3,22,5},{3,22,5},{2,16,10},{14,5,648},{2,16,10},{13,5,882},{4,29,10},{7,21,13},{4,21,5},{13,5,882},{12,15,882},{4,21,5},{0,19,890},{12,15,882},{0,19,890},{5,0,954},{5,0,954},{5,0,954},{5,0,954},{3,22,37}, +{3,22,37},{3,22,37},{3,15,40},{0,19,9},{0,19,9},{6,31,2264},{5,30,898},{5,23,1287},{5,22,898},{4,31,3204},{4,26,1179},{4,22,45},{3,19,1250},{0,27,3523},{0,20,908},{6,31,968},{6,28,89},{7,22,108},{6,22,121},{9,15,3048},{1,30,899},{4,22,36},{0,20,904},{5,22,3048},{0,20,904},{5,29,891},{5,29,891},{5,29,891},{5,22,894},{4,27,659},{4,22,41},{4,22,41}, +{4,17,59},{0,23,747},{1,18,102},{6,26,37},{6,26,37},{6,26,37},{6,20,37},{10,5,648},{3,24,2},{3,24,2},{3,17,1},{12,9,648},{3,17,1},{15,1,882},{5,30,8},{7,22,8},{5,22,8},{15,1,882},{15,13,882},{5,22,8},{0,20,900},{15,13,882},{0,20,900},{5,0,890},{5,0,890},{5,0,890},{5,0,890},{4,21,10},{4,21,10},{4,21,10},{4,16,13},{0,20,8}, +{0,20,8},{6,31,2228},{6,30,971},{6,24,1186},{6,23,988},{5,31,3256},{4,28,1095},{4,24,77},{4,20,1159},{0,29,3364},{1,21,894},{7,31,915},{7,28,90},{7,24,53},{7,23,131},{14,1,3048},{2,31,886},{4,24,41},{1,21,890},{10,17,3048},{1,21,890},{6,29,955},{6,29,955},{6,29,955},{6,22,954},{4,30,686},{4,24,41},{4,24,41},{4,18,49},{0,25,705},{2,19,110},{7,25,2}, +{7,25,2},{7,25,2},{7,21,2},{10,8,648},{4,24,5},{4,24,5},{4,18,13},{11,12,648},{4,18,13},{15,4,882},{6,30,17},{7,24,52},{5,23,13},{15,4,882},{9,22,882},{5,23,13},{0,21,890},{9,22,882},{0,21,890},{6,0,954},{6,0,954},{6,0,954},{6,0,954},{4,24,37},{4,24,37},{4,24,37},{4,17,40},{1,21,4},{1,21,4},{7,31,2444},{6,31,907},{6,25,1287}, +{6,24,898},{6,31,3436},{5,28,1143},{5,24,33},{5,21,1214},{0,31,3276},{1,22,908},{8,30,1208},{7,30,94},{7,25,166},{7,24,117},{13,7,3048},{3,31,906},{5,24,29},{1,22,899},{13,15,3048},{1,22,899},{6,31,891},{6,31,891},{6,31,891},{6,24,894},{5,29,652},{5,24,29},{5,24,29},{5,19,44},{0,27,665},{2,20,102},{7,28,49},{7,28,49},{7,28,49},{7,22,53},{9,14,648}, +{4,26,1},{4,26,1},{4,19,4},{14,10,648},{4,19,4},{10,23,882},{6,31,17},{8,24,13},{6,24,8},{10,23,882},{11,21,882},{6,24,8},{0,22,890},{11,21,882},{0,22,890},{6,0,890},{6,0,890},{6,0,890},{6,0,890},{5,23,4},{5,23,4},{5,23,4},{5,18,5},{1,23,4},{1,23,4},{7,31,2636},{7,31,991},{7,26,1186},{7,25,983},{6,31,3532},{5,30,1095},{5,26,77}, +{5,22,1159},{0,31,3340},{2,23,899},{8,31,1014},{7,31,262},{8,25,108},{7,25,254},{15,3,3048},{4,31,936},{5,26,41},{2,23,890},{11,19,3048},{2,23,890},{7,31,955},{7,31,955},{7,31,955},{7,24,954},{5,31,692},{5,26,41},{5,26,41},{5,20,46},{0,29,651},{3,20,98},{8,26,101},{8,26,101},{8,26,101},{8,22,101},{11,10,648},{5,26,5},{5,26,5},{4,20,10},{5,23,648}, +{4,20,10},{15,9,882},{7,31,37},{8,25,8},{6,25,5},{15,9,882},{14,19,882},{6,25,5},{0,23,890},{14,19,882},{0,23,890},{7,0,954},{7,0,954},{7,0,954},{7,0,954},{5,26,37},{5,26,37},{5,26,37},{5,19,40},{2,23,9},{2,23,9},{8,31,3110},{7,31,1135},{7,27,1287},{7,26,898},{7,31,3652},{6,30,1179},{6,26,45},{5,23,1250},{1,31,3492},{2,24,908},{8,31,1174}, +{8,31,110},{8,27,56},{8,26,152},{11,19,3048},{5,31,996},{6,26,36},{2,24,904},{9,23,3048},{2,24,904},{7,31,939},{7,31,939},{7,31,939},{7,26,894},{6,31,659},{6,26,41},{6,26,41},{6,21,59},{1,29,659},{3,22,102},{8,28,4},{8,28,4},{8,28,4},{8,24,8},{12,9,648},{5,28,2},{5,28,2},{5,21,1},{14,13,648},{5,21,1},{14,15,882},{8,31,106},{9,26,13}, +{7,26,8},{14,15,882},{12,23,882},{7,26,8},{0,24,900},{12,23,882},{0,24,900},{7,0,890},{7,0,890},{7,0,890},{7,0,890},{6,25,10},{6,25,10},{6,25,10},{6,20,13},{2,24,8},{2,24,8},{8,31,3038},{8,31,1470},{8,28,1391},{7,27,1137},{7,31,4120},{6,31,1146},{6,28,77},{6,24,1159},{3,31,3681},{3,25,894},{9,31,1205},{8,31,245},{9,27,99},{8,27,122},{10,25,3048}, +{6,31,1110},{6,28,41},{3,25,890},{12,21,3048},{3,25,890},{8,31,1274},{8,31,1274},{8,31,1274},{7,27,1128},{7,30,750},{6,28,41},{6,28,41},{6,22,49},{1,31,648},{4,23,110},{8,31,49},{8,31,49},{8,31,49},{8,25,53},{15,2,648},{6,28,5},{6,28,5},{6,22,13},{13,16,648},{6,22,13},{14,18,882},{9,31,164},{9,27,18},{7,27,13},{14,18,882},{11,26,882},{7,27,13}, +{0,25,890},{11,26,882},{0,25,890},{7,0,1124},{7,0,1124},{7,0,1124},{7,0,1124},{6,28,37},{6,28,37},{6,28,37},{6,21,40},{3,25,4},{3,25,4},{9,31,3454},{8,31,1502},{8,29,1186},{8,28,983},{8,31,4077},{7,31,1230},{7,28,33},{7,25,1214},{4,31,3820},{3,26,908},{10,31,1368},{9,31,261},{9,29,53},{9,28,126},{15,11,3048},{7,31,1226},{7,28,29},{3,26,899},{15,19,3048}, +{3,26,899},{8,31,1018},{8,31,1018},{8,31,1018},{8,27,954},{7,31,724},{7,28,29},{7,28,29},{7,23,44},{2,31,665},{4,24,102},{9,30,2},{9,30,2},{9,30,2},{9,26,2},{11,18,648},{6,30,1},{6,30,1},{6,23,4},{10,21,648},{6,23,4},{12,27,882},{10,31,225},{10,28,13},{7,28,20},{12,27,882},{13,25,882},{7,28,20},{0,26,890},{13,25,882},{0,26,890},{8,0,954}, +{8,0,954},{8,0,954},{8,0,954},{7,27,4},{7,27,4},{7,27,4},{7,22,5},{3,27,4},{3,27,4},{9,31,3614},{9,31,1886},{8,30,1287},{8,29,898},{8,31,4381},{7,31,1582},{7,30,77},{7,26,1159},{5,31,4036},{4,27,899},{10,31,1560},{10,31,405},{10,29,108},{9,29,117},{14,17,3048},{8,31,1444},{7,30,41},{4,27,890},{13,23,3048},{4,27,890},{8,31,1146},{8,31,1146},{8,31,1146}, +{8,29,894},{8,30,1011},{7,30,41},{7,30,41},{7,24,46},{3,31,705},{5,24,98},{9,31,85},{9,31,85},{9,31,85},{9,27,53},{13,14,648},{7,30,5},{7,30,5},{6,24,10},{8,25,648},{6,24,10},{14,23,882},{10,31,305},{10,29,8},{8,29,8},{14,23,882},{11,29,882},{8,29,8},{0,27,890},{11,29,882},{0,27,890},{8,0,890},{8,0,890},{8,0,890},{8,0,890},{7,30,37}, +{7,30,37},{7,30,37},{7,23,40},{4,27,9},{4,27,9},{10,31,4072},{9,31,2174},{9,31,1186},{9,30,983},{9,31,4545},{8,31,1725},{8,30,207},{7,27,1250},{6,31,4339},{4,28,908},{11,31,1656},{10,31,645},{10,31,56},{10,30,152},{13,23,3048},{8,31,1604},{7,31,77},{4,28,904},{11,27,3048},{4,28,904},{9,31,1150},{9,31,1150},{9,31,1150},{9,29,954},{8,31,841},{8,29,193},{8,29,193}, +{7,25,197},{4,31,747},{5,26,102},{10,31,20},{10,31,20},{10,31,20},{10,28,8},{14,13,648},{7,31,41},{7,31,41},{7,25,1},{11,23,648},{7,25,1},{13,29,882},{11,31,397},{11,30,13},{8,30,5},{13,29,882},{14,27,882},{8,30,5},{0,28,900},{14,27,882},{0,28,900},{9,0,954},{9,0,954},{9,0,954},{9,0,954},{8,27,122},{8,27,122},{8,27,122},{8,23,122},{4,28,8}, +{4,28,8},{10,31,4147},{10,31,2404},{9,31,1429},{9,31,901},{10,31,4627},{9,31,1938},{8,31,38},{8,28,1061},{7,31,4330},{5,29,789},{12,31,1701},{11,31,715},{11,31,90},{10,31,113},{15,19,2814},{10,31,1554},{8,31,34},{5,29,785},{13,26,2814},{5,29,785},{9,31,1429},{9,31,1429},{9,31,1429},{9,31,901},{9,31,1022},{8,31,38},{8,31,38},{8,26,44},{5,31,840},{6,27,110},{11,31,90}, +{11,31,90},{11,31,90},{10,29,53},{14,16,648},{8,31,34},{8,31,34},{7,26,20},{15,20,648},{7,26,20},{15,25,761},{12,31,425},{11,31,9},{9,31,1},{15,25,761},{12,31,761},{9,31,1},{0,29,785},{12,31,761},{0,29,785},{9,0,900},{9,0,900},{9,0,900},{9,0,900},{8,30,4},{8,30,4},{8,30,4},{8,25,5},{5,29,4},{5,29,4},{11,31,3735},{10,31,2356},{10,31,1395}, +{10,31,954},{10,31,4099},{9,31,1618},{9,31,174},{8,29,686},{7,31,3930},{6,29,510},{12,31,1285},{12,31,685},{11,31,122},{11,31,37},{13,27,2249},{10,31,1186},{9,31,74},{6,29,485},{13,27,2249},{6,29,485},{10,31,1395},{10,31,1395},{10,31,1395},{10,31,954},{9,31,1086},{9,31,174},{9,31,174},{8,27,49},{6,31,969},{6,28,102},{11,31,122},{11,31,122},{11,31,122},{11,30,2},{13,22,648}, +{9,31,74},{9,31,74},{8,27,13},{12,25,648},{8,27,13},{15,26,481},{13,31,269},{12,31,0},{10,31,0},{15,26,481},{15,28,481},{10,31,0},{0,29,481},{15,28,481},{0,29,481},{10,0,954},{10,0,954},{10,0,954},{10,0,954},{8,31,61},{8,31,61},{8,31,61},{8,26,40},{5,31,4},{5,31,4},{11,31,3399},{11,31,2260},{11,31,1635},{10,31,954},{11,31,3639},{10,31,1435},{9,31,238}, +{9,29,430},{8,31,3443},{6,30,314},{13,31,1121},{12,31,525},{12,31,164},{11,31,53},{13,29,1769},{11,31,918},{10,31,113},{7,30,290},{14,27,1769},{7,30,290},{11,31,1635},{11,31,1635},{11,31,1635},{10,31,954},{10,31,1251},{9,31,238},{9,31,238},{9,28,41},{7,31,1105},{7,28,98},{12,31,164},{12,31,164},{12,31,164},{11,31,53},{15,18,648},{10,31,113},{10,31,113},{8,28,1},{10,29,648}, +{8,28,1},{15,27,269},{13,31,173},{13,31,4},{11,31,4},{15,27,269},{14,30,265},{11,31,4},{0,30,289},{14,30,265},{0,30,289},{10,0,890},{10,0,890},{10,0,890},{10,0,890},{9,31,13},{9,31,13},{9,31,13},{9,27,5},{6,31,9},{6,31,9},{12,31,3157},{11,31,2308},{11,31,1683},{11,31,1054},{11,31,3303},{10,31,1339},{10,31,378},{9,30,213},{9,31,3103},{7,30,166},{13,31,801}, +{13,31,529},{12,31,260},{12,31,20},{15,24,1374},{12,31,777},{11,31,181},{8,30,117},{15,27,1377},{8,30,117},{11,31,1683},{11,31,1683},{11,31,1683},{11,31,1054},{10,31,1491},{10,31,378},{10,31,378},{9,29,46},{8,31,1331},{7,30,102},{12,31,260},{12,31,260},{12,31,260},{12,31,20},{13,27,648},{11,31,181},{11,31,181},{8,29,10},{13,27,648},{8,29,10},{15,28,117},{14,31,61},{14,31,25}, +{13,31,4},{15,28,117},{14,31,117},{13,31,4},{0,30,113},{14,31,117},{0,30,113},{11,0,954},{11,0,954},{11,0,954},{11,0,954},{10,31,122},{10,31,122},{10,31,122},{9,28,40},{7,31,29},{7,31,29},{12,31,2860},{12,31,2260},{12,31,1899},{11,31,1261},{12,31,2932},{11,31,1310},{11,31,685},{10,30,108},{10,31,2731},{7,31,173},{13,31,747},{13,31,475},{13,31,306},{13,31,130},{15,26,1032}, +{12,31,651},{12,31,290},{8,31,40},{14,29,1032},{8,31,40},{12,31,1899},{12,31,1899},{12,31,1899},{11,31,1261},{11,31,1620},{11,31,685},{11,31,685},{10,30,44},{9,31,1524},{8,30,134},{13,31,306},{13,31,306},{13,31,306},{13,31,130},{13,30,648},{12,31,290},{12,31,290},{9,30,5},{12,30,648},{9,30,5},{15,30,18},{15,30,34},{14,31,16},{14,31,0},{15,30,18},{15,30,26},{14,31,0}, +{0,31,36},{15,30,26},{0,31,36},{11,0,900},{11,0,900},{11,0,900},{11,0,900},{10,31,104},{10,31,104},{10,31,104},{10,29,5},{8,30,130},{8,30,130},{13,31,2732},{12,31,2276},{12,31,1915},{12,31,1315},{12,31,2660},{11,31,1414},{11,31,789},{10,31,45},{11,31,2487},{8,31,116},{14,31,524},{14,31,460},{14,31,424},{13,31,170},{14,31,776},{13,31,507},{13,31,338},{10,31,9},{14,30,771}, +{10,31,9},{12,31,1915},{12,31,1915},{12,31,1915},{12,31,1315},{12,31,1699},{11,31,789},{11,31,789},{10,31,45},{10,31,1546},{8,31,116},{14,31,424},{14,31,424},{14,31,424},{13,31,170},{15,26,580},{13,31,338},{13,31,338},{10,31,9},{14,29,580},{10,31,9},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{12,0,954}, +{12,0,954},{12,0,954},{12,0,954},{11,31,164},{11,31,164},{11,31,164},{10,30,40},{8,31,116},{8,31,116},{13,31,2156},{13,31,1884},{13,31,1715},{12,31,1251},{13,31,2132},{12,31,1108},{12,31,747},{11,31,5},{11,31,1927},{9,31,180},{14,31,300},{14,31,236},{14,31,200},{14,31,136},{15,28,451},{14,31,328},{13,31,194},{11,31,1},{15,29,456},{11,31,1},{13,31,1715},{13,31,1715},{13,31,1715}, +{12,31,1251},{12,31,1347},{12,31,747},{12,31,747},{11,31,5},{10,31,1242},{9,31,180},{14,31,200},{14,31,200},{14,31,200},{14,31,136},{15,27,338},{13,31,194},{13,31,194},{11,31,1},{13,31,338},{11,31,1},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{12,0,890},{12,0,890},{12,0,890},{12,0,890},{11,31,260}, +{11,31,260},{11,31,260},{11,31,5},{9,31,180},{9,31,180},{13,31,1836},{13,31,1564},{13,31,1395},{13,31,1123},{13,31,1620},{12,31,1012},{12,31,651},{11,31,85},{12,31,1564},{10,31,233},{14,31,204},{14,31,140},{14,31,104},{14,31,40},{15,29,216},{14,31,136},{14,31,100},{12,31,1},{14,31,216},{12,31,1},{13,31,1395},{13,31,1395},{13,31,1395},{13,31,1123},{13,31,1179},{12,31,651},{12,31,651}, +{11,31,85},{11,31,998},{10,31,233},{14,31,104},{14,31,104},{14,31,104},{14,31,40},{15,28,162},{14,31,100},{14,31,100},{12,31,1},{15,29,164},{12,31,1},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{13,0,954},{13,0,954},{13,0,954},{13,0,954},{12,31,290},{12,31,290},{12,31,290},{11,31,85},{10,31,233}, +{10,31,233},{0,17,1568},{0,14,442},{0,10,40},{0,8,485},{0,11,3379},{0,9,2369},{0,8,1061},{0,5,2435},{0,6,3760},{0,5,2660},{0,17,1568},{0,14,442},{0,10,40},{0,8,485},{1,8,3372},{0,9,2369},{0,8,1061},{0,5,2435},{5,1,3371},{0,5,2435},{0,8,0},{0,8,0},{0,8,0},{0,5,1},{0,4,288},{0,4,160},{0,4,160},{0,2,164},{0,2,332},{0,2,200},{0,8,0}, +{0,8,0},{0,8,0},{0,5,1},{0,4,288},{0,4,160},{0,4,160},{0,2,164},{2,0,288},{0,2,164},{3,7,1568},{0,14,442},{0,10,40},{0,8,485},{3,7,1568},{8,0,1568},{0,8,485},{0,6,1586},{8,0,1568},{0,6,1586},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,20,1570},{0,16,325},{0,11,5}, +{0,9,392},{0,13,3968},{0,10,2630},{0,9,1121},{0,6,2710},{0,7,4484},{0,6,3034},{0,20,1570},{0,16,325},{0,11,5},{0,9,392},{1,10,3968},{0,10,2630},{0,9,1121},{0,6,2710},{5,2,3968},{0,6,2710},{0,11,1},{0,11,1},{0,11,1},{0,6,4},{0,5,514},{0,5,274},{0,5,274},{0,3,289},{0,3,595},{0,3,370},{0,11,1},{0,11,1},{0,11,1},{0,6,4},{1,2,512}, +{0,5,274},{0,5,274},{0,3,289},{1,2,512},{0,3,289},{5,3,1568},{0,16,325},{0,11,5},{0,9,392},{5,3,1568},{4,7,1568},{0,9,392},{0,7,1586},{4,7,1568},{0,7,1586},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,22,1570},{0,17,225},{0,12,18},{0,11,292},{0,15,4652},{0,11,2945},{0,10,1217}, +{0,7,3035},{0,8,5283},{0,7,3476},{0,22,1570},{0,17,225},{0,12,18},{0,11,292},{2,8,4651},{0,11,2945},{0,10,1217},{0,7,3035},{5,3,4651},{0,7,3035},{0,13,0},{0,13,0},{0,13,0},{0,8,1},{0,7,802},{0,6,424},{0,6,424},{0,4,449},{0,3,931},{0,3,562},{0,13,0},{0,13,0},{0,13,0},{0,8,1},{2,0,800},{0,6,424},{0,6,424},{0,4,449},{0,4,800}, +{0,4,449},{1,19,1568},{0,17,225},{0,12,18},{0,11,292},{1,19,1568},{9,2,1568},{0,11,292},{0,8,1576},{9,2,1568},{0,8,1576},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,25,1570},{0,19,149},{0,13,73},{0,12,194},{0,17,5424},{0,13,3368},{0,11,1349},{0,8,3449},{0,9,6213},{0,7,3956},{0,25,1570}, +{0,19,149},{0,13,73},{0,12,194},{5,0,5419},{0,13,3368},{0,11,1349},{0,8,3449},{5,4,5419},{0,8,3449},{0,16,1},{0,16,1},{0,16,1},{0,9,4},{0,8,1152},{0,7,610},{0,7,610},{0,4,625},{0,4,1328},{0,4,769},{0,16,1},{0,16,1},{0,16,1},{0,9,4},{0,8,1152},{0,7,610},{0,7,610},{0,4,625},{4,0,1152},{0,4,625},{5,8,1568},{0,19,149},{1,13,13}, +{0,12,194},{5,8,1568},{4,10,1568},{0,12,194},{0,9,1576},{4,10,1568},{0,9,1576},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,28,1651},{0,21,155},{0,14,281},{0,13,198},{0,20,5424},{0,15,3099},{0,12,996},{0,9,3179},{0,10,6544},{0,8,3890},{1,24,1619},{0,21,155},{1,14,69},{0,13,198},{5,3,5419}, +{0,15,3099},{0,12,996},{0,9,3179},{4,7,5419},{0,9,3179},{0,19,82},{0,19,82},{0,19,82},{0,11,82},{0,11,1152},{0,9,445},{0,9,445},{0,6,505},{0,6,1494},{0,5,737},{1,15,50},{1,15,50},{1,15,50},{1,10,49},{3,1,1152},{0,9,445},{0,9,445},{0,6,505},{3,3,1152},{0,6,505},{5,11,1568},{0,21,74},{1,14,20},{0,13,117},{5,11,1568},{11,3,1568},{0,13,117}, +{0,10,1586},{11,3,1568},{0,10,1586},{0,0,81},{0,0,81},{0,0,81},{0,0,81},{0,3,0},{0,3,0},{0,3,0},{0,2,1},{0,1,25},{0,1,25},{1,27,1825},{0,23,323},{1,15,342},{0,14,361},{0,22,5420},{0,16,2834},{0,13,726},{0,10,2966},{0,11,6916},{0,9,3860},{1,27,1569},{1,21,131},{1,15,86},{1,14,181},{1,19,5419},{0,16,2834},{0,13,726},{0,10,2966},{9,2,5419}, +{0,10,2966},{1,18,257},{1,18,257},{1,18,257},{1,12,261},{0,13,1154},{0,11,337},{0,11,337},{0,7,388},{0,7,1665},{0,6,749},{1,18,1},{1,18,1},{1,18,1},{1,12,5},{4,0,1152},{0,11,337},{0,11,337},{0,7,388},{1,7,1152},{0,7,388},{8,2,1568},{0,23,34},{2,15,5},{0,14,72},{8,2,1568},{13,2,1568},{0,14,72},{0,11,1586},{13,2,1568},{0,11,1586},{1,0,257}, +{1,0,257},{1,0,257},{1,0,257},{0,6,1},{0,6,1},{0,6,1},{0,3,4},{0,2,85},{0,2,85},{1,30,1907},{1,23,411},{1,16,542},{1,15,454},{0,25,5424},{0,18,2630},{0,15,486},{0,11,2771},{0,13,7299},{0,11,3860},{2,26,1634},{1,23,155},{2,16,82},{1,15,198},{5,8,5419},{0,18,2630},{0,15,486},{0,11,2771},{4,10,5419},{0,11,2771},{1,21,338},{1,21,338},{1,21,338}, +{1,13,338},{0,16,1152},{0,13,274},{0,13,274},{0,8,305},{0,8,1856},{0,7,797},{2,17,64},{2,17,64},{2,17,64},{2,12,65},{3,6,1152},{0,13,274},{0,13,274},{0,8,305},{3,6,1152},{0,8,305},{3,23,1568},{0,25,17},{2,16,18},{0,15,45},{3,23,1568},{11,6,1568},{0,15,45},{0,12,1576},{11,6,1568},{0,12,1576},{1,0,337},{1,0,337},{1,0,337},{1,0,337},{0,8,1}, +{0,8,1},{0,8,1},{0,5,0},{0,4,169},{0,4,169},{1,31,2145},{1,25,590},{1,17,915},{1,16,619},{0,27,5420},{0,20,2424},{0,16,282},{0,12,2552},{0,15,7711},{0,11,3908},{2,29,1570},{2,23,149},{2,17,73},{2,16,194},{7,4,5419},{0,20,2424},{0,16,282},{0,12,2552},{9,5,5419},{0,12,2552},{1,23,546},{1,23,546},{1,23,546},{1,15,546},{0,19,1154},{0,15,194},{0,15,194}, +{0,9,218},{0,9,2123},{0,8,865},{2,20,1},{2,20,1},{2,20,1},{2,13,4},{5,2,1152},{0,15,194},{0,15,194},{0,9,218},{8,1,1152},{0,9,218},{9,4,1568},{0,27,5},{3,17,13},{0,16,26},{9,4,1568},{9,10,1568},{0,16,26},{0,13,1576},{9,10,1568},{0,13,1576},{1,0,545},{1,0,545},{1,0,545},{1,0,545},{0,11,0},{0,11,0},{0,11,0},{0,7,4},{0,5,289}, +{0,5,289},{2,31,2746},{1,27,945},{2,18,1370},{1,17,977},{0,30,5420},{0,22,2243},{0,17,145},{0,13,2386},{0,16,8161},{0,13,3986},{3,28,1619},{2,25,155},{3,18,69},{2,17,198},{8,2,5419},{0,22,2243},{0,17,145},{0,13,2386},{13,2,5419},{0,13,2386},{1,26,932},{1,26,932},{1,26,932},{1,16,936},{0,22,1154},{0,17,109},{0,17,109},{0,10,145},{0,11,2441},{0,9,1001},{3,19,50}, +{3,19,50},{3,19,50},{3,14,49},{5,5,1152},{0,17,109},{0,17,109},{0,10,145},{5,7,1152},{0,10,145},{11,0,1568},{0,29,10},{3,18,20},{0,18,8},{11,0,1568},{13,7,1568},{0,18,8},{0,14,1586},{13,7,1568},{0,14,1586},{1,0,932},{1,0,932},{1,0,932},{1,0,932},{0,14,1},{0,14,1},{0,14,1},{0,8,1},{0,6,468},{0,6,468},{2,31,3146},{2,27,1412},{2,19,1743}, +{1,19,1441},{0,31,5515},{0,23,2096},{0,19,69},{0,14,2251},{0,18,8669},{0,14,4100},{3,31,1569},{3,25,131},{3,19,86},{3,18,181},{3,23,5419},{0,23,2096},{0,19,69},{0,14,2251},{11,6,5419},{0,14,2251},{2,25,1379},{2,25,1379},{2,25,1379},{2,17,1379},{0,24,1152},{0,18,61},{0,18,61},{0,11,100},{0,12,2859},{0,10,1157},{3,22,1},{3,22,1},{3,22,1},{3,16,5},{7,1,1152}, +{0,18,61},{0,18,61},{0,11,100},{10,2,1152},{0,11,100},{10,6,1568},{1,29,2},{4,19,5},{0,19,5},{10,6,1568},{15,6,1568},{0,19,5},{0,15,1586},{15,6,1568},{0,15,1586},{2,0,1378},{2,0,1378},{2,0,1378},{2,0,1378},{0,16,1},{0,16,1},{0,16,1},{0,10,1},{0,8,657},{0,8,657},{2,31,3802},{2,29,1603},{2,21,2148},{2,19,1631},{1,31,5655},{0,25,2005},{0,20,31}, +{0,15,2138},{0,19,8963},{0,15,4070},{4,30,1634},{3,27,155},{4,20,82},{3,19,198},{9,4,5419},{0,25,2001},{0,20,27},{0,15,2134},{9,10,5419},{0,15,2134},{2,28,1587},{2,28,1587},{2,28,1587},{2,18,1590},{0,27,1158},{0,20,22},{0,20,22},{0,12,62},{0,14,3075},{0,11,1221},{4,21,64},{4,21,64},{4,21,64},{4,16,65},{5,10,1152},{0,20,18},{0,20,18},{0,12,58},{12,1,1152}, +{0,12,58},{12,2,1568},{1,31,10},{4,20,18},{0,20,18},{12,2,1568},{13,10,1568},{0,20,18},{0,16,1576},{13,10,1568},{0,16,1576},{2,0,1586},{2,0,1586},{2,0,1586},{2,0,1586},{0,19,4},{0,19,4},{0,19,4},{0,11,8},{0,9,769},{0,9,769},{3,31,3890},{2,31,1623},{3,21,2180},{2,20,1644},{1,31,5863},{0,27,1989},{1,21,109},{0,17,2117},{0,21,8560},{0,16,3545},{4,31,1640}, +{4,27,149},{4,21,73},{4,20,194},{10,3,5419},{0,27,1889},{0,21,49},{0,17,2017},{11,9,5419},{0,17,2017},{2,30,1619},{2,30,1619},{2,30,1619},{2,20,1619},{1,26,1188},{1,20,86},{1,20,86},{1,13,121},{0,16,2801},{0,13,949},{4,24,1},{4,24,1},{4,24,1},{4,17,4},{8,1,1152},{0,22,2},{0,22,2},{0,14,26},{10,5,1152},{0,14,26},{11,8,1568},{2,31,5},{5,21,13}, +{1,21,9},{11,8,1568},{11,14,1568},{1,21,9},{0,17,1576},{11,14,1568},{0,17,1576},{2,0,1618},{2,0,1618},{2,0,1618},{2,0,1618},{1,18,37},{1,18,37},{1,18,37},{1,12,37},{0,10,625},{0,10,625},{4,31,4308},{3,31,1589},{3,23,2160},{3,21,1621},{2,31,5895},{1,27,1999},{1,22,33},{1,18,2124},{0,23,8196},{0,17,3043},{5,31,1667},{4,29,155},{5,22,69},{4,21,198},{10,6,5419}, +{0,29,1772},{1,22,24},{0,18,1875},{15,6,5419},{0,18,1875},{3,30,1576},{3,30,1576},{3,30,1576},{3,20,1580},{1,29,1161},{1,22,29},{1,22,29},{1,15,58},{0,17,2529},{0,14,656},{5,23,50},{5,23,50},{5,23,50},{5,18,49},{8,4,1152},{0,24,1},{0,24,1},{0,15,1},{14,2,1152},{0,15,1},{13,4,1568},{3,31,13},{5,22,20},{2,22,8},{13,4,1568},{15,11,1568},{2,22,8}, +{0,18,1586},{15,11,1568},{0,18,1586},{3,0,1576},{3,0,1576},{3,0,1576},{3,0,1576},{1,21,10},{1,21,10},{1,21,10},{1,14,13},{0,12,520},{0,12,520},{4,31,4436},{3,31,1765},{4,23,2175},{3,23,1669},{3,31,6079},{1,29,1977},{2,23,105},{1,19,2107},{0,24,7969},{0,18,2675},{6,31,1832},{5,29,131},{5,23,86},{5,22,181},{12,2,5419},{0,30,1699},{1,23,62},{0,19,1782},{13,10,5419}, +{0,19,1782},{3,31,1665},{3,31,1665},{3,31,1665},{3,22,1640},{2,28,1188},{2,22,97},{2,22,97},{2,15,136},{0,19,2313},{0,15,474},{5,26,1},{5,26,1},{5,26,1},{5,20,5},{3,25,1152},{1,24,5},{1,24,5},{0,16,2},{12,6,1152},{0,16,2},{15,0,1568},{4,31,34},{6,23,5},{2,23,5},{15,0,1568},{12,16,1568},{2,23,5},{0,19,1586},{12,16,1568},{0,19,1586},{3,0,1640}, +{3,0,1640},{3,0,1640},{3,0,1640},{2,20,37},{2,20,37},{2,20,37},{2,14,37},{0,14,400},{0,14,400},{5,31,4740},{4,31,1716},{4,25,2148},{4,23,1631},{3,31,6351},{2,29,2005},{2,24,31},{2,19,2138},{0,26,7669},{0,19,2375},{6,31,1832},{5,31,155},{6,24,82},{5,23,198},{11,8,5419},{0,31,1712},{2,24,27},{0,20,1720},{11,14,5419},{0,20,1720},{4,31,1595},{4,31,1595},{4,31,1595}, +{4,22,1590},{2,31,1158},{2,24,22},{2,24,22},{2,16,62},{0,21,2091},{0,17,306},{6,25,64},{6,25,64},{6,25,64},{6,20,65},{9,6,1152},{1,26,1},{1,26,1},{1,17,5},{14,5,1152},{1,17,5},{14,6,1568},{5,31,74},{6,24,18},{2,24,18},{14,6,1568},{15,14,1568},{2,24,18},{0,20,1576},{15,14,1568},{0,20,1576},{4,0,1586},{4,0,1586},{4,0,1586},{4,0,1586},{2,23,4}, +{2,23,4},{2,23,4},{2,15,8},{0,16,277},{0,16,277},{5,31,5060},{5,31,1980},{5,25,2180},{4,24,1644},{4,31,6508},{2,31,1989},{3,25,109},{2,21,2117},{0,28,7364},{0,21,2098},{7,31,1952},{6,31,149},{6,25,73},{6,24,194},{12,7,5419},{1,31,1804},{2,25,49},{0,21,1657},{13,13,5419},{0,21,1657},{4,31,1739},{4,31,1739},{4,31,1739},{4,24,1619},{3,30,1188},{3,24,86},{3,24,86}, +{3,17,121},{0,22,1928},{0,18,194},{6,28,1},{6,28,1},{6,28,1},{6,21,4},{10,5,1152},{2,26,2},{2,26,2},{1,18,2},{12,9,1152},{1,18,2},{13,12,1568},{6,31,149},{7,25,13},{3,25,9},{13,12,1568},{13,18,1568},{3,25,9},{0,21,1576},{13,18,1568},{0,21,1576},{4,0,1618},{4,0,1618},{4,0,1618},{4,0,1618},{3,22,37},{3,22,37},{3,22,37},{3,16,37},{0,18,193}, +{0,18,193},{6,31,5316},{5,31,2160},{5,27,2160},{5,25,1621},{5,31,6800},{3,31,1999},{3,26,33},{3,22,2124},{0,29,7068},{0,22,1836},{7,31,2195},{7,31,270},{7,26,69},{6,25,198},{15,0,5419},{2,31,1970},{3,26,24},{0,22,1611},{12,16,5419},{0,22,1611},{5,31,1676},{5,31,1676},{5,31,1676},{5,24,1580},{3,31,1233},{3,26,29},{3,26,29},{3,19,58},{0,24,1798},{0,19,157},{7,27,50}, +{7,27,50},{7,27,50},{7,22,49},{10,8,1152},{2,28,1},{2,28,1},{2,19,1},{11,12,1152},{2,19,1},{15,8,1568},{7,31,221},{7,26,20},{4,26,8},{15,8,1568},{11,22,1568},{4,26,8},{0,22,1586},{11,22,1568},{0,22,1586},{5,0,1576},{5,0,1576},{5,0,1576},{5,0,1576},{3,25,10},{3,25,10},{3,25,10},{3,18,13},{0,20,106},{0,20,106},{6,31,5828},{6,31,2435},{6,27,2175}, +{5,27,1669},{5,31,7184},{4,31,2132},{4,27,105},{3,23,2107},{0,31,6820},{0,23,1690},{8,31,2306},{7,31,334},{7,27,86},{7,26,181},{14,6,5419},{4,31,2096},{3,27,62},{0,23,1590},{15,14,5419},{0,23,1590},{6,31,1859},{6,31,1859},{6,31,1859},{5,26,1640},{4,31,1220},{4,26,97},{4,26,97},{4,19,136},{0,26,1650},{0,21,161},{7,30,1},{7,30,1},{7,30,1},{7,24,5},{9,14,1152}, +{3,28,5},{3,28,5},{2,20,2},{14,10,1152},{2,20,2},{14,14,1568},{7,31,333},{8,27,40},{4,27,5},{14,14,1568},{14,20,1568},{4,27,5},{0,23,1586},{14,20,1568},{0,23,1586},{5,0,1640},{5,0,1640},{5,0,1640},{5,0,1640},{4,24,37},{4,24,37},{4,24,37},{4,18,37},{0,22,58},{0,22,58},{7,31,6036},{6,31,2835},{6,29,2148},{6,27,1631},{6,31,7316},{4,31,2228},{4,28,31}, +{4,23,2138},{0,31,6884},{0,24,1613},{8,31,2402},{8,31,666},{8,28,269},{7,27,198},{13,12,5419},{4,31,2224},{4,28,27},{0,24,1577},{13,18,5419},{0,24,1577},{6,31,1811},{6,31,1811},{6,31,1811},{6,26,1590},{5,31,1356},{4,28,22},{4,28,22},{4,20,62},{0,28,1508},{1,21,137},{7,31,106},{7,31,106},{7,31,106},{7,25,82},{11,10,1152},{3,30,1},{3,30,1},{3,21,5},{5,23,1152}, +{3,21,5},{13,20,1568},{8,31,410},{8,28,13},{4,28,18},{13,20,1568},{13,23,1570},{4,28,18},{0,24,1576},{13,23,1570},{0,24,1576},{6,0,1586},{6,0,1586},{6,0,1586},{6,0,1586},{4,27,4},{4,27,4},{4,27,4},{4,19,8},{0,24,37},{0,24,37},{7,31,6740},{7,31,3135},{7,29,2180},{6,28,1644},{7,31,7676},{5,31,2448},{5,29,109},{4,25,2117},{1,31,7196},{0,25,1593},{9,31,2594}, +{8,31,698},{8,29,82},{8,28,345},{14,11,5419},{5,31,2412},{4,29,49},{1,25,1580},{15,17,5419},{1,25,1580},{7,31,1979},{7,31,1979},{7,31,1979},{6,28,1619},{5,31,1388},{5,28,86},{5,28,86},{5,21,121},{0,30,1416},{1,23,161},{8,30,64},{8,30,64},{8,30,64},{8,25,65},{12,9,1152},{4,30,2},{4,30,2},{3,22,2},{14,13,1152},{3,22,2},{15,16,1568},{9,31,530},{8,29,18}, +{5,29,9},{15,16,1568},{15,22,1568},{5,29,9},{0,25,1576},{15,22,1568},{0,25,1576},{6,0,1618},{6,0,1618},{6,0,1618},{6,0,1618},{5,26,37},{5,26,37},{5,26,37},{5,20,37},{0,25,17},{0,25,17},{8,31,6906},{7,31,3909},{7,31,2160},{7,29,1621},{7,31,8144},{6,31,2902},{5,30,33},{5,26,2124},{2,31,7661},{1,26,1615},{9,31,2945},{9,31,1025},{8,30,86},{8,29,181},{14,14,5419}, +{7,31,2694},{5,30,24},{1,26,1590},{14,20,5419},{1,26,1590},{7,31,2060},{7,31,2060},{7,31,2060},{7,28,1580},{6,31,1476},{5,30,29},{5,30,29},{5,23,58},{0,31,1324},{2,23,157},{8,31,37},{8,31,37},{8,31,37},{8,27,5},{15,2,1152},{5,30,20},{5,30,20},{4,23,1},{13,16,1152},{4,23,1},{15,19,1568},{10,31,637},{9,30,5},{6,30,8},{15,19,1568},{13,26,1568},{6,30,8}, +{0,26,1586},{13,26,1568},{0,26,1586},{7,0,1576},{7,0,1576},{7,0,1576},{7,0,1576},{5,29,10},{5,29,10},{5,29,10},{5,22,13},{0,28,10},{0,28,10},{8,31,7386},{8,31,4250},{8,31,2490},{7,31,1669},{8,31,8461},{6,31,3350},{6,31,105},{5,27,2107},{4,31,8004},{1,27,1611},{10,31,3112},{9,31,1361},{9,31,69},{8,30,198},{13,20,5419},{7,31,2950},{5,31,62},{2,27,1590},{11,25,5420}, +{2,27,1590},{8,31,2486},{8,31,2486},{8,31,2486},{7,30,1640},{6,31,1700},{6,30,97},{6,30,97},{6,23,136},{1,31,1424},{2,25,161},{9,31,65},{9,31,65},{9,31,65},{9,27,49},{11,18,1152},{5,31,58},{5,31,58},{4,24,2},{10,21,1152},{4,24,2},{13,28,1568},{11,31,785},{9,31,20},{6,31,5},{13,28,1568},{11,30,1568},{6,31,5},{0,27,1586},{11,30,1568},{0,27,1586},{7,0,1640}, +{7,0,1640},{7,0,1640},{7,0,1640},{6,28,37},{6,28,37},{6,28,37},{6,22,37},{1,28,10},{1,28,10},{9,31,7014},{8,31,4230},{8,31,2294},{8,31,1846},{8,31,7865},{7,31,3114},{6,31,85},{6,27,1706},{4,31,7436},{2,28,1289},{11,31,2852},{10,31,1221},{9,31,145},{9,30,114},{13,22,4803},{8,31,2648},{6,31,81},{2,28,1253},{12,25,4803},{2,28,1253},{8,31,2294},{8,31,2294},{8,31,2294}, +{8,30,1811},{7,31,1740},{6,31,85},{6,31,85},{6,24,62},{2,31,1577},{3,25,137},{9,31,145},{9,31,145},{9,31,145},{9,29,5},{13,14,1152},{6,31,81},{6,31,81},{5,25,5},{8,25,1152},{5,25,5},{15,23,1250},{11,31,689},{10,31,4},{7,31,9},{15,23,1250},{11,31,1250},{7,31,9},{0,28,1252},{11,31,1250},{0,28,1252},{8,0,1810},{8,0,1810},{8,0,1810},{8,0,1810},{6,31,4}, +{6,31,4},{6,31,4},{6,23,8},{1,29,8},{1,29,8},{9,31,6534},{9,31,4134},{8,31,2486},{8,31,1590},{9,31,7237},{7,31,2970},{7,31,161},{6,28,1256},{5,31,6748},{2,29,949},{11,31,2340},{10,31,1125},{10,31,164},{9,31,97},{15,17,4056},{9,31,2244},{7,31,125},{3,28,909},{12,26,4056},{3,28,909},{8,31,2486},{8,31,2486},{8,31,2486},{8,31,1590},{7,31,2156},{7,31,161},{7,31,161}, +{7,25,121},{3,31,1729},{3,27,161},{10,31,164},{10,31,164},{10,31,164},{10,29,65},{14,13,1152},{7,31,125},{7,31,125},{5,26,2},{11,23,1152},{5,26,2},{13,31,882},{12,31,482},{11,31,0},{8,31,4},{13,31,882},{15,27,882},{8,31,4},{0,28,900},{15,27,882},{0,28,900},{8,0,1586},{8,0,1586},{8,0,1586},{8,0,1586},{7,30,37},{7,30,37},{7,30,37},{7,24,37},{1,31,16}, +{1,31,16},{10,31,6091},{9,31,4053},{9,31,2609},{8,31,1761},{9,31,6490},{8,31,2622},{7,31,458},{7,28,835},{6,31,6162},{3,29,598},{12,31,1989},{11,31,931},{11,31,306},{10,31,5},{15,19,3318},{10,31,1806},{8,31,202},{4,29,545},{13,26,3318},{4,29,545},{9,31,2609},{9,31,2609},{9,31,2609},{8,31,1761},{8,31,2086},{7,31,458},{7,31,458},{7,27,58},{4,31,1868},{4,27,157},{11,31,306}, +{11,31,306},{11,31,306},{10,31,5},{14,16,1152},{8,31,202},{8,31,202},{6,27,1},{15,20,1152},{6,27,1},{15,26,545},{13,31,313},{12,31,4},{10,31,4},{15,26,545},{14,29,545},{10,31,4},{0,29,545},{14,29,545},{0,29,545},{8,0,1640},{8,0,1640},{8,0,1640},{8,0,1640},{7,31,58},{7,31,58},{7,31,58},{7,26,13},{2,31,13},{2,31,13},{10,31,5723},{10,31,3980},{9,31,2945}, +{9,31,1745},{10,31,6083},{8,31,2494},{8,31,558},{7,29,558},{7,31,5674},{4,30,411},{12,31,1573},{11,31,963},{11,31,338},{11,31,49},{13,27,2753},{10,31,1438},{9,31,290},{5,29,341},{13,27,2753},{5,29,341},{9,31,2945},{9,31,2945},{9,31,2945},{9,31,1745},{9,31,2390},{8,31,558},{8,31,558},{7,28,147},{5,31,2064},{4,29,161},{11,31,338},{11,31,338},{11,31,338},{11,31,49},{13,22,1152}, +{9,31,290},{9,31,290},{6,28,2},{12,25,1152},{6,28,2},{15,27,313},{13,31,185},{13,31,16},{11,31,0},{15,27,313},{13,31,313},{11,31,0},{0,29,337},{13,31,313},{0,29,337},{9,0,1576},{9,0,1576},{9,0,1576},{9,0,1576},{8,31,197},{8,31,197},{8,31,197},{7,27,122},{3,31,25},{3,31,25},{11,31,5415},{10,31,3996},{10,31,3035},{10,31,2006},{10,31,5619},{9,31,2378},{8,31,814}, +{8,29,414},{7,31,5338},{5,30,251},{12,31,1413},{12,31,813},{12,31,452},{11,31,65},{13,29,2273},{11,31,1218},{10,31,365},{6,30,146},{14,27,2273},{6,30,146},{10,31,3035},{10,31,3035},{10,31,3035},{10,31,2006},{9,31,2518},{8,31,814},{8,31,814},{8,28,121},{6,31,2329},{5,29,137},{12,31,452},{12,31,452},{12,31,452},{11,31,65},{15,18,1152},{10,31,365},{10,31,365},{7,29,5},{10,29,1152}, +{7,29,5},{15,28,145},{14,31,85},{13,31,16},{12,31,4},{15,28,145},{15,29,149},{12,31,4},{0,30,145},{15,29,149},{0,30,145},{9,0,1640},{9,0,1640},{9,0,1640},{9,0,1640},{8,31,85},{8,31,85},{8,31,85},{8,27,37},{4,31,40},{4,31,40},{11,31,5143},{11,31,4004},{11,31,3379},{10,31,2070},{11,31,5287},{10,31,2431},{9,31,1062},{8,30,133},{8,31,5011},{5,31,161},{13,31,1161}, +{13,31,889},{12,31,548},{12,31,164},{15,24,1878},{11,31,1106},{11,31,481},{7,30,66},{15,27,1881},{7,30,66},{11,31,3379},{11,31,3379},{11,31,3379},{10,31,2070},{10,31,2835},{9,31,1062},{9,31,1062},{8,29,62},{7,31,2577},{5,31,161},{12,31,548},{12,31,548},{12,31,548},{12,31,164},{13,27,1152},{11,31,481},{11,31,481},{7,30,2},{13,27,1152},{7,30,2},{15,29,45},{14,31,37},{14,31,1}, +{14,31,9},{15,29,45},{15,30,41},{14,31,9},{0,30,65},{15,30,41},{0,30,65},{10,0,1586},{10,0,1586},{10,0,1586},{10,0,1586},{9,31,221},{9,31,221},{9,31,221},{8,28,8},{5,31,80},{5,31,80},{12,31,4948},{11,31,4157},{11,31,3532},{11,31,2393},{11,31,5008},{10,31,2422},{10,31,1461},{9,31,125},{9,31,4752},{6,31,157},{13,31,1107},{13,31,835},{13,31,666},{12,31,362},{15,26,1536}, +{12,31,1011},{12,31,650},{8,31,16},{14,29,1536},{8,31,16},{11,31,3532},{11,31,3532},{11,31,3532},{11,31,2393},{10,31,3204},{10,31,1461},{10,31,1461},{9,30,114},{8,31,2976},{6,31,157},{13,31,666},{13,31,666},{13,31,666},{12,31,362},{13,30,1152},{12,31,650},{12,31,650},{8,31,16},{12,30,1152},{8,31,16},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0}, +{0,31,0},{15,31,0},{0,31,0},{10,0,1640},{10,0,1640},{10,0,1640},{10,0,1640},{9,31,221},{9,31,221},{9,31,221},{9,29,25},{6,31,157},{6,31,157},{12,31,4212},{12,31,3612},{12,31,3251},{11,31,2201},{12,31,4212},{11,31,2154},{10,31,1301},{9,31,13},{10,31,3939},{7,31,233},{14,31,776},{13,31,659},{13,31,490},{13,31,218},{15,27,1067},{13,31,699},{12,31,442},{9,31,4},{13,31,1067}, +{9,31,4},{12,31,3251},{12,31,3251},{12,31,3251},{11,31,2201},{11,31,2668},{10,31,1301},{10,31,1301},{9,31,13},{8,31,2528},{7,31,233},{13,31,490},{13,31,490},{13,31,490},{13,31,218},{15,25,802},{12,31,442},{12,31,442},{9,31,4},{15,27,802},{9,31,4},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{11,0,1576}, +{11,0,1576},{11,0,1576},{11,0,1576},{10,31,340},{10,31,340},{10,31,340},{9,31,13},{7,31,233},{7,31,233},{12,31,3732},{12,31,3132},{12,31,2771},{12,31,2171},{12,31,3444},{11,31,1834},{11,31,1209},{10,31,37},{10,31,3219},{8,31,400},{14,31,456},{14,31,392},{14,31,356},{13,31,170},{14,31,684},{13,31,459},{13,31,290},{10,31,1},{14,30,683},{10,31,1},{12,31,2771},{12,31,2771},{12,31,2771}, +{12,31,2171},{11,31,2348},{11,31,1209},{11,31,1209},{10,31,37},{9,31,2156},{8,31,400},{14,31,356},{14,31,356},{14,31,356},{13,31,170},{15,26,512},{13,31,290},{13,31,290},{10,31,1},{14,29,512},{10,31,1},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{11,0,1640},{11,0,1640},{11,0,1640},{11,0,1640},{10,31,436}, +{10,31,436},{10,31,436},{10,31,37},{8,31,400},{8,31,400},{13,31,3172},{13,31,2900},{12,31,2547},{12,31,1947},{12,31,2932},{12,31,1732},{11,31,1145},{10,31,53},{11,31,2695},{8,31,464},{14,31,264},{14,31,200},{14,31,164},{14,31,100},{15,28,387},{14,31,268},{13,31,178},{11,31,1},{15,29,396},{11,31,1},{12,31,2547},{12,31,2547},{12,31,2547},{12,31,1947},{12,31,1971},{11,31,1145},{11,31,1145}, +{10,31,53},{10,31,1794},{8,31,464},{14,31,164},{14,31,164},{14,31,164},{14,31,100},{15,27,290},{13,31,178},{13,31,178},{11,31,1},{14,30,290},{11,31,1},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{12,0,1586},{12,0,1586},{12,0,1586},{12,0,1586},{11,31,520},{11,31,520},{11,31,520},{10,31,53},{8,31,464}, +{8,31,464},{0,23,2665},{0,18,680},{0,13,50},{0,11,785},{0,15,5885},{0,11,4118},{0,10,1800},{0,7,4202},{0,8,6546},{0,7,4643},{0,23,2665},{0,18,680},{0,13,50},{0,11,785},{3,5,5885},{0,11,4118},{0,10,1800},{0,7,4202},{0,9,5885},{0,7,4202},{0,11,0},{0,11,0},{0,11,0},{0,7,4},{0,5,549},{0,5,289},{0,5,289},{0,3,306},{0,3,630},{0,3,387},{0,11,0}, +{0,11,0},{0,11,0},{0,7,4},{1,2,545},{0,5,289},{0,5,289},{0,3,306},{2,1,545},{0,3,306},{6,3,2665},{0,18,680},{0,13,50},{0,11,785},{6,3,2665},{11,0,2665},{0,11,785},{0,8,2689},{11,0,2665},{0,8,2689},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,25,2665},{0,20,521},{0,14,5}, +{0,12,625},{0,17,6669},{0,13,4529},{0,11,1890},{0,8,4610},{0,9,7494},{0,7,5171},{0,25,2665},{0,20,521},{0,14,5},{0,12,625},{3,7,6669},{0,13,4529},{0,11,1890},{0,8,4610},{8,0,6669},{0,8,4610},{0,13,1},{0,13,1},{0,13,1},{0,8,0},{0,7,841},{0,6,445},{0,6,445},{0,4,464},{0,3,982},{0,3,595},{0,13,1},{0,13,1},{0,13,1},{0,8,0},{2,0,841}, +{0,6,445},{0,6,445},{0,4,464},{1,3,841},{0,4,464},{7,2,2665},{0,20,521},{0,14,5},{0,12,625},{7,2,2665},{12,0,2665},{0,12,625},{0,9,2689},{12,0,2665},{0,9,2689},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,28,2665},{0,22,405},{0,15,10},{0,13,514},{0,19,7541},{0,14,4934},{0,12,2042}, +{0,9,5045},{0,10,8546},{0,8,5682},{0,28,2665},{0,22,405},{0,15,10},{0,13,514},{5,2,7538},{0,14,4934},{0,12,2042},{0,9,5045},{8,1,7538},{0,9,5045},{0,16,0},{0,16,0},{0,16,0},{0,10,4},{0,8,1201},{0,7,637},{0,7,637},{0,4,656},{0,4,1385},{0,4,800},{0,16,0},{0,16,0},{0,16,0},{0,10,4},{1,5,1201},{0,7,637},{0,7,637},{0,4,656},{4,0,1201}, +{0,4,656},{6,8,2665},{0,22,405},{0,15,10},{0,13,514},{6,8,2665},{11,3,2665},{0,13,514},{0,10,2689},{11,3,2665},{0,10,2689},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,30,2669},{0,23,313},{0,16,68},{0,15,410},{0,20,8498},{0,16,5330},{0,13,2210},{0,10,5530},{0,11,9702},{0,9,6270},{0,30,2669}, +{0,23,313},{0,16,68},{0,15,410},{4,7,8493},{0,16,5330},{0,13,2210},{0,10,5530},{8,2,8493},{0,10,5530},{0,19,1},{0,19,1},{0,19,1},{0,11,1},{0,9,1629},{0,8,832},{0,8,832},{0,5,881},{0,5,1874},{0,5,1106},{0,19,1},{0,19,1},{0,19,1},{0,11,1},{2,3,1625},{0,8,832},{0,8,832},{0,5,881},{4,1,1625},{0,5,881},{8,2,2665},{0,23,313},{1,16,8}, +{0,15,410},{8,2,2665},{14,1,2665},{0,15,410},{0,11,2689},{14,1,2665},{0,11,2689},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,31,2777},{0,26,232},{0,17,197},{0,16,305},{0,22,9674},{0,17,5849},{0,14,2450},{0,10,6106},{0,12,11199},{0,10,7006},{0,31,2777},{0,26,232},{0,17,197},{0,16,305},{1,19,9669}, +{0,17,5849},{0,14,2450},{0,10,6106},{9,2,9669},{0,10,6106},{0,22,1},{0,22,1},{0,22,1},{0,13,0},{0,11,2178},{0,10,1125},{0,10,1125},{0,6,1189},{0,6,2520},{0,5,1475},{0,22,1},{0,22,1},{0,22,1},{0,13,0},{3,1,2178},{0,10,1125},{0,10,1125},{0,6,1189},{3,3,2178},{0,6,1189},{9,2,2665},{0,26,232},{1,17,17},{0,16,305},{9,2,2665},{13,4,2665},{0,16,305}, +{0,12,2689},{13,4,2665},{0,12,2689},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{1,31,3045},{0,28,217},{0,19,401},{0,17,282},{0,25,9670},{0,19,5529},{0,16,1970},{0,12,5738},{0,13,11589},{0,11,6898},{1,31,2789},{0,28,217},{1,18,146},{0,17,282},{5,8,9669},{0,19,5529},{0,16,1970},{0,12,5738},{4,10,9669}, +{0,12,5738},{0,24,64},{0,24,64},{0,24,64},{0,15,68},{0,13,2180},{0,11,949},{0,11,949},{0,7,1018},{0,7,2691},{0,6,1433},{1,21,64},{1,21,64},{1,21,64},{1,13,68},{4,0,2178},{0,11,949},{0,11,949},{0,7,1018},{1,7,2178},{0,7,1018},{10,1,2665},{0,28,153},{2,18,5},{0,17,218},{10,1,2665},{15,3,2665},{0,17,218},{0,13,2689},{15,3,2665},{0,13,2689},{0,0,64}, +{0,0,64},{0,0,64},{0,0,64},{0,3,1},{0,3,1},{0,3,1},{0,2,4},{0,1,18},{0,1,18},{1,31,3285},{0,29,341},{1,19,453},{0,18,405},{0,27,9674},{0,20,5170},{0,17,1546},{0,13,5429},{0,15,11993},{0,12,6819},{2,31,2966},{1,28,221},{1,19,197},{1,18,305},{7,4,9669},{0,20,5170},{0,17,1546},{0,13,5429},{9,5,9669},{0,13,5429},{0,27,257},{0,27,257},{0,27,257}, +{1,15,256},{0,16,2178},{0,13,832},{0,13,832},{0,8,881},{0,8,2882},{0,7,1427},{1,23,4},{1,23,4},{1,23,4},{1,15,0},{3,6,2178},{0,13,832},{0,13,832},{0,8,881},{3,6,2178},{0,8,881},{11,0,2665},{0,29,85},{2,19,10},{0,18,149},{11,0,2665},{13,7,2665},{0,18,149},{0,14,2689},{13,7,2665},{0,14,2689},{0,0,256},{0,0,256},{0,0,256},{0,0,256},{0,5,1}, +{0,5,1},{0,5,1},{0,3,1},{0,2,72},{0,2,72},{1,31,3909},{1,29,465},{1,21,676},{1,19,538},{0,30,9669},{0,22,4878},{0,18,1190},{0,14,5138},{0,16,12390},{0,13,6789},{2,31,2966},{1,29,209},{2,20,149},{1,19,282},{6,10,9669},{0,22,4878},{0,18,1190},{0,14,5138},{12,3,9669},{0,14,5138},{1,26,320},{1,26,320},{1,26,320},{1,16,324},{0,19,2180},{0,15,680},{0,15,680}, +{0,9,740},{0,9,3149},{0,8,1441},{1,26,64},{1,26,64},{1,26,64},{1,16,68},{5,2,2178},{0,15,680},{0,15,680},{0,9,740},{8,1,2178},{0,9,740},{11,3,2665},{0,31,41},{3,20,8},{0,19,98},{11,3,2665},{15,6,2665},{0,19,98},{0,15,2689},{15,6,2665},{0,15,2689},{1,0,320},{1,0,320},{1,0,320},{1,0,320},{0,8,0},{0,8,0},{0,8,0},{0,5,1},{0,4,160}, +{0,4,160},{2,31,4514},{1,31,630},{1,22,1110},{1,20,694},{0,31,9789},{0,23,4646},{0,20,849},{0,15,4826},{0,18,12955},{0,14,6798},{3,31,3101},{2,30,232},{2,21,197},{2,20,305},{3,23,9669},{0,23,4646},{0,20,849},{0,15,4826},{11,6,9669},{0,15,4826},{1,29,545},{1,29,545},{1,29,545},{1,18,546},{0,22,2180},{0,17,505},{0,17,505},{0,11,610},{0,11,3467},{0,10,1513},{2,26,1}, +{2,26,1},{2,26,1},{2,17,0},{5,5,2178},{0,17,505},{0,17,505},{0,11,610},{5,7,2178},{0,11,610},{11,6,2665},{1,31,85},{3,21,17},{0,20,65},{11,6,2665},{15,8,2665},{0,20,65},{0,16,2689},{15,8,2665},{0,16,2689},{1,0,545},{1,0,545},{1,0,545},{1,0,545},{0,11,0},{0,11,0},{0,11,0},{0,7,4},{0,5,289},{0,5,289},{2,31,5330},{1,31,1110},{2,23,1490}, +{1,21,979},{1,31,9981},{0,26,4406},{0,21,579},{0,16,4610},{0,19,13489},{0,15,6846},{3,31,3341},{2,31,226},{3,22,146},{2,21,282},{9,4,9669},{0,26,4406},{0,21,579},{0,16,4610},{9,10,9669},{0,16,4610},{1,31,885},{1,31,885},{1,31,885},{1,20,885},{0,24,2178},{0,19,389},{0,19,389},{0,12,464},{0,12,3885},{0,11,1603},{3,25,64},{3,25,64},{3,25,64},{3,17,68},{7,1,2178}, +{0,19,389},{0,19,389},{0,12,464},{10,2,2178},{0,12,464},{13,2,2665},{2,31,162},{4,22,5},{0,22,37},{13,2,2665},{12,13,2665},{0,22,37},{0,17,2689},{12,13,2665},{0,17,2689},{1,0,881},{1,0,881},{1,0,881},{1,0,881},{0,13,1},{0,13,1},{0,13,1},{0,8,0},{0,6,445},{0,6,445},{3,31,6366},{2,31,1635},{2,24,1886},{1,22,1410},{1,31,10381},{0,28,4146},{0,22,377}, +{0,17,4373},{0,20,14006},{0,16,6915},{4,31,3434},{3,31,242},{3,23,197},{3,22,305},{10,3,9669},{0,28,4146},{0,22,377},{0,17,4373},{11,9,9669},{0,17,4373},{2,31,1346},{2,31,1346},{2,31,1346},{2,20,1345},{0,27,2180},{0,21,274},{0,21,274},{0,13,353},{0,14,4269},{0,11,1763},{3,27,4},{3,27,4},{3,27,4},{3,19,0},{5,10,2178},{0,21,274},{0,21,274},{0,13,353},{12,1,2178}, +{0,13,353},{14,1,2665},{3,31,242},{4,23,10},{0,23,10},{14,1,2665},{15,11,2665},{0,23,10},{0,18,2689},{15,11,2665},{0,18,2689},{1,0,1345},{1,0,1345},{1,0,1345},{1,0,1345},{0,16,0},{0,16,0},{0,16,0},{0,10,4},{0,7,637},{0,7,637},{3,31,7374},{2,31,2339},{2,25,2441},{2,23,1763},{2,31,11019},{0,29,3909},{0,23,243},{0,18,4154},{0,22,14614},{0,17,7029},{5,31,3654}, +{4,31,394},{4,24,149},{3,23,282},{9,9,9669},{0,29,3909},{0,23,243},{0,18,4154},{14,7,9669},{0,18,4154},{2,31,1714},{2,31,1714},{2,31,1714},{2,22,1669},{0,29,2180},{0,23,194},{0,23,194},{0,14,260},{0,15,4686},{0,13,1937},{3,30,64},{3,30,64},{3,30,64},{3,20,68},{8,1,2178},{0,23,194},{0,23,194},{0,14,260},{10,5,2178},{0,14,260},{15,0,2665},{4,31,313},{5,24,8}, +{0,24,4},{15,0,2665},{13,15,2665},{0,24,4},{0,19,2689},{13,15,2665},{0,19,2689},{2,0,1665},{2,0,1665},{2,0,1665},{2,0,1665},{0,19,1},{0,19,1},{0,19,1},{0,11,1},{0,8,832},{0,8,832},{3,31,8967},{3,31,3510},{3,26,3255},{2,24,2243},{2,31,11766},{0,31,3686},{0,25,138},{0,19,3938},{0,23,15369},{0,18,7206},{5,31,3933},{4,31,457},{4,25,197},{4,24,305},{12,2,9669}, +{0,31,3686},{0,25,138},{0,19,3938},{13,10,9669},{0,19,3938},{2,31,2434},{2,31,2434},{2,31,2434},{2,23,2182},{0,31,2210},{0,25,137},{0,25,137},{0,15,181},{0,16,5157},{0,14,2163},{4,30,1},{4,30,1},{4,30,1},{4,21,0},{8,4,2178},{0,25,137},{0,25,137},{0,15,181},{14,2,2178},{0,15,181},{15,3,2665},{5,31,421},{5,25,17},{1,25,5},{15,3,2665},{12,18,2665},{1,25,5}, +{0,20,2689},{12,18,2665},{0,20,2689},{2,0,2178},{2,0,2178},{2,0,2178},{2,0,2178},{0,22,1},{0,22,1},{0,22,1},{0,13,0},{0,10,1125},{0,10,1125},{4,31,10234},{3,31,4421},{3,27,3739},{2,26,2742},{2,31,12773},{0,31,3719},{0,26,87},{0,20,3771},{0,25,16061},{0,19,7283},{6,31,4050},{5,31,629},{5,26,146},{4,25,282},{11,8,9669},{0,31,3718},{0,26,86},{0,20,3770},{11,14,9669}, +{0,20,3770},{3,31,3125},{3,31,3125},{3,31,3125},{2,25,2706},{1,31,2411},{0,26,86},{0,26,86},{0,16,129},{0,18,5544},{0,15,2318},{5,29,64},{5,29,64},{5,29,64},{5,21,68},{3,25,2178},{0,26,85},{0,26,85},{0,16,128},{12,6,2178},{0,16,128},{15,6,2665},{5,31,565},{6,26,5},{1,26,2},{15,6,2665},{14,17,2665},{1,26,2},{0,21,2689},{14,17,2665},{0,21,2689},{2,0,2705}, +{2,0,2705},{2,0,2705},{2,0,2705},{0,24,1},{0,24,1},{0,24,1},{0,15,5},{0,11,1348},{0,11,1348},{4,31,10874},{4,31,5018},{3,28,3750},{3,26,2754},{3,31,13045},{1,31,4003},{0,27,183},{0,21,3686},{0,27,15601},{0,20,6570},{7,31,4366},{5,31,965},{5,27,197},{5,26,305},{12,7,9669},{1,31,3954},{0,27,102},{0,21,3605},{13,13,9669},{0,21,3605},{3,31,3173},{3,31,3173},{3,31,3173}, +{3,25,2690},{1,31,2427},{0,28,113},{0,28,113},{0,17,170},{0,20,5170},{0,16,1856},{5,31,4},{5,31,4},{5,31,4},{5,23,0},{9,6,2178},{0,28,32},{0,28,32},{0,17,89},{14,5,2178},{0,17,89},{15,8,2665},{6,31,706},{6,27,10},{2,27,10},{15,8,2665},{12,21,2665},{2,27,10},{0,22,2689},{12,21,2665},{0,22,2689},{3,0,2689},{3,0,2689},{3,0,2689},{3,0,2689},{1,23,53}, +{1,23,53},{1,23,53},{1,15,49},{0,13,1217},{0,13,1217},{5,31,11278},{4,31,5402},{4,29,3753},{3,28,2745},{4,31,13566},{1,31,4403},{1,28,77},{0,22,3747},{0,28,15046},{0,21,5958},{7,31,4590},{6,31,1171},{6,28,149},{5,27,282},{14,3,9669},{2,31,4265},{1,28,76},{0,22,3458},{11,17,9669},{0,22,3458},{4,31,3377},{4,31,3377},{4,31,3377},{3,27,2706},{2,31,2532},{1,28,73},{1,28,73}, +{1,18,129},{0,21,4837},{0,17,1490},{6,31,82},{6,31,82},{6,31,82},{5,24,68},{10,5,2178},{0,30,8},{0,30,8},{0,19,49},{12,9,2178},{0,19,49},{15,11,2665},{7,31,850},{7,28,8},{2,28,4},{15,11,2665},{15,19,2665},{2,28,4},{0,23,2689},{15,19,2665},{0,23,2689},{3,0,2705},{3,0,2705},{3,0,2705},{3,0,2705},{1,26,1},{1,26,1},{1,26,1},{1,16,5},{0,14,1037}, +{0,14,1037},{6,31,11954},{5,31,6090},{4,30,3794},{4,28,2754},{5,31,14170},{2,31,4863},{2,29,187},{1,24,3689},{0,30,14558},{0,23,5274},{8,31,5030},{7,31,1556},{6,29,197},{6,28,305},{14,6,9669},{3,31,4594},{1,29,101},{0,24,3265},{15,14,9669},{0,24,3265},{4,31,3530},{4,31,3530},{4,31,3530},{4,27,2693},{2,31,2739},{1,30,134},{1,30,134},{1,19,197},{0,23,4506},{0,19,1109},{6,31,64}, +{6,31,64},{6,31,64},{6,25,0},{10,8,2178},{1,30,34},{1,30,34},{0,20,16},{11,12,2178},{0,20,16},{15,14,2665},{8,31,1053},{7,29,17},{3,29,5},{15,14,2665},{14,22,2665},{3,29,5},{0,24,2689},{14,22,2665},{0,24,2689},{4,0,2689},{4,0,2689},{4,0,2689},{4,0,2689},{2,26,50},{2,26,50},{2,26,50},{2,17,49},{0,16,818},{0,16,818},{6,31,12466},{5,31,6794},{5,31,3739}, +{4,30,2742},{5,31,14554},{3,31,5363},{2,30,87},{1,24,3737},{0,31,14190},{0,24,4785},{8,31,5158},{7,31,2036},{7,30,146},{6,29,282},{13,12,9669},{4,31,4806},{2,30,86},{0,25,3130},{13,18,9669},{0,25,3130},{5,31,3658},{5,31,3658},{5,31,3658},{4,29,2706},{3,31,2795},{2,30,86},{2,30,86},{2,20,129},{0,25,4315},{0,20,809},{7,31,100},{7,31,100},{7,31,100},{7,25,68},{9,14,2178}, +{1,31,68},{1,31,68},{0,21,1},{14,10,2178},{0,21,1},{13,23,2665},{9,31,1241},{8,30,50},{3,30,2},{13,23,2665},{11,27,2665},{3,30,2},{0,25,2689},{11,27,2665},{0,25,2689},{4,0,2705},{4,0,2705},{4,0,2705},{4,0,2705},{2,28,1},{2,28,1},{2,28,1},{2,19,5},{0,18,666},{0,18,666},{7,31,13094},{6,31,7445},{5,31,3915},{5,30,2754},{6,31,14998},{4,31,5926},{2,31,183}, +{2,25,3686},{0,31,14254},{0,25,4323},{9,31,5546},{8,31,2478},{7,31,197},{7,30,305},{14,11,9669},{5,31,5138},{2,31,102},{0,26,3013},{15,17,9669},{0,26,3013},{5,31,3914},{5,31,3914},{5,31,3914},{5,29,2690},{4,31,3042},{2,31,182},{2,31,182},{2,21,170},{0,27,4059},{0,21,597},{7,31,196},{7,31,196},{7,31,196},{7,27,0},{11,10,2178},{2,31,101},{2,31,101},{0,22,4},{5,23,2178}, +{0,22,4},{15,19,2665},{10,31,1384},{8,31,5},{4,31,10},{15,19,2665},{14,25,2665},{4,31,10},{0,26,2689},{14,25,2665},{0,26,2689},{5,0,2689},{5,0,2689},{5,0,2689},{5,0,2689},{3,27,53},{3,27,53},{3,27,53},{3,19,49},{0,20,505},{0,20,505},{7,31,12517},{6,31,7482},{6,31,4001},{5,31,2706},{6,31,14185},{4,31,5491},{3,31,154},{2,26,3124},{0,31,13437},{0,26,3306},{9,31,4949}, +{8,31,2261},{8,31,325},{7,30,192},{14,13,8712},{6,31,4686},{3,31,153},{0,27,2403},{11,23,8712},{0,27,2403},{6,31,4001},{6,31,4001},{6,31,4001},{5,31,2706},{4,31,3234},{3,31,154},{3,31,154},{3,22,129},{0,28,3762},{0,23,425},{8,31,325},{8,31,325},{8,31,325},{7,28,68},{12,9,2178},{3,31,153},{3,31,153},{1,23,1},{14,13,2178},{1,23,1},{13,27,2178},{10,31,1157},{9,31,16}, +{5,31,1},{13,27,2178},{13,27,2178},{5,31,1},{0,27,2178},{13,27,2178},{0,27,2178},{5,0,2705},{5,0,2705},{5,0,2705},{5,0,2705},{3,30,1},{3,30,1},{3,30,1},{3,20,5},{0,22,389},{0,22,389},{8,31,12034},{7,31,7195},{6,31,4370},{6,31,2693},{7,31,13066},{5,31,5014},{4,31,261},{3,27,2390},{1,31,12394},{0,27,2277},{10,31,4410},{9,31,2045},{8,31,289},{8,30,192},{14,15,7578}, +{7,31,4050},{4,31,212},{0,27,1701},{12,23,7578},{0,27,1701},{6,31,4370},{6,31,4370},{6,31,4370},{6,31,2693},{5,31,3429},{4,31,261},{4,31,261},{3,23,197},{0,30,3509},{0,24,306},{8,31,289},{8,31,289},{8,31,289},{8,28,68},{15,2,2178},{4,31,212},{4,31,212},{1,24,9},{13,16,2178},{1,24,9},{15,22,1625},{11,31,850},{9,31,25},{6,31,4},{15,22,1625},{13,28,1625},{6,31,4}, +{0,27,1665},{13,28,1625},{0,27,1665},{6,0,2689},{6,0,2689},{6,0,2689},{6,0,2689},{4,30,50},{4,30,50},{4,30,50},{4,21,49},{0,24,306},{0,24,306},{8,31,11042},{7,31,7259},{7,31,4450},{6,31,2805},{7,31,12298},{5,31,4742},{4,31,501},{4,27,1875},{2,31,11643},{0,28,1578},{10,31,3802},{9,31,1869},{9,31,425},{8,31,25},{13,20,6661},{7,31,3554},{5,31,292},{0,28,1217},{13,23,6662}, +{0,28,1217},{7,31,4450},{7,31,4450},{7,31,4450},{6,31,2805},{6,31,3714},{4,31,501},{4,31,501},{4,24,129},{0,31,3354},{0,25,244},{9,31,425},{9,31,425},{9,31,425},{8,30,0},{11,18,2178},{5,31,292},{5,31,292},{2,25,1},{10,21,2178},{2,25,1},{15,23,1201},{11,31,674},{10,31,9},{7,31,16},{15,23,1201},{12,30,1201},{7,31,16},{0,28,1201},{12,30,1201},{0,28,1201},{6,0,2705}, +{6,0,2705},{6,0,2705},{6,0,2705},{4,31,17},{4,31,17},{4,31,17},{4,23,5},{0,26,218},{0,26,218},{8,31,10434},{8,31,7186},{7,31,4898},{7,31,2833},{8,31,11595},{6,31,4462},{5,31,629},{4,28,1387},{3,31,10895},{0,28,1002},{11,31,3446},{10,31,1707},{9,31,505},{8,31,73},{13,22,5829},{8,31,3170},{6,31,405},{1,28,869},{12,25,5829},{1,28,869},{7,31,4898},{7,31,4898},{7,31,4898}, +{7,31,2833},{6,31,3906},{5,31,629},{5,31,629},{4,25,170},{0,31,3546},{0,27,228},{9,31,505},{9,31,505},{9,31,505},{9,30,68},{13,14,2178},{6,31,405},{6,31,405},{2,26,4},{8,25,2178},{2,26,4},{13,31,841},{12,31,461},{11,31,1},{8,31,9},{13,31,841},{15,27,841},{8,31,9},{0,28,865},{15,27,841},{0,28,865},{7,0,2689},{7,0,2689},{7,0,2689},{7,0,2689},{5,31,53}, +{5,31,53},{5,31,53},{5,23,49},{0,28,137},{0,28,137},{9,31,10014},{8,31,6962},{8,31,5026},{7,31,3105},{8,31,10683},{7,31,4354},{6,31,933},{5,28,1019},{4,31,10078},{0,29,630},{11,31,2934},{10,31,1611},{10,31,650},{9,31,25},{15,17,5082},{8,31,2786},{7,31,521},{1,29,546},{12,26,5082},{1,29,546},{8,31,5026},{8,31,5026},{8,31,5026},{7,31,3105},{7,31,4170},{6,31,933},{6,31,933}, +{5,26,129},{1,31,3814},{0,28,234},{10,31,650},{10,31,650},{10,31,650},{9,31,25},{14,13,2178},{7,31,521},{7,31,521},{3,27,1},{11,23,2178},{3,27,1},{15,26,545},{13,31,313},{12,31,4},{10,31,4},{15,26,545},{14,29,545},{10,31,4},{0,29,545},{14,29,545},{0,29,545},{7,0,2705},{7,0,2705},{7,0,2705},{7,0,2705},{5,31,101},{5,31,101},{5,31,101},{5,24,5},{0,29,85}, +{0,29,85},{9,31,9465},{9,31,7065},{8,31,5233},{8,31,3329},{8,31,10116},{7,31,4183},{6,31,1338},{5,29,645},{5,31,9447},{0,30,441},{11,31,2664},{11,31,1525},{10,31,848},{10,31,113},{15,19,4344},{9,31,2424},{8,31,724},{2,30,321},{13,26,4344},{2,30,321},{8,31,5233},{8,31,5233},{8,31,5233},{8,31,3329},{7,31,4629},{6,31,1338},{6,31,1338},{5,27,197},{2,31,4212},{1,29,213},{10,31,848}, +{10,31,848},{10,31,848},{10,31,113},{14,16,2178},{8,31,724},{8,31,724},{3,28,9},{15,20,2178},{3,28,9},{15,27,290},{13,31,178},{13,31,9},{11,31,1},{15,27,290},{14,30,290},{11,31,1},{0,29,320},{14,30,290},{0,29,320},{8,0,2929},{8,0,2929},{8,0,2929},{8,0,2929},{6,31,113},{6,31,113},{6,31,113},{6,25,49},{0,31,45},{0,31,45},{10,31,9329},{9,31,6985},{9,31,5541}, +{8,31,3473},{9,31,9496},{8,31,4420},{7,31,1630},{6,29,426},{5,31,9031},{1,30,301},{12,31,2275},{11,31,1557},{11,31,932},{10,31,225},{13,27,3779},{10,31,2086},{8,31,884},{3,30,129},{13,27,3779},{3,30,129},{9,31,5541},{9,31,5541},{9,31,5541},{8,31,3473},{8,31,4836},{7,31,1630},{7,31,1630},{6,28,129},{4,31,4442},{1,30,237},{11,31,932},{11,31,932},{11,31,932},{10,31,225},{13,22,2178}, +{8,31,884},{8,31,884},{4,29,1},{12,25,2178},{4,29,1},{15,28,130},{14,31,72},{13,31,25},{12,31,9},{15,28,130},{15,29,136},{12,31,9},{0,30,128},{15,29,136},{0,30,128},{8,0,2689},{8,0,2689},{8,0,2689},{8,0,2689},{6,31,257},{6,31,257},{6,31,257},{6,27,5},{1,31,89},{1,31,89},{10,31,8929},{10,31,7186},{9,31,5845},{9,31,3829},{9,31,9208},{8,31,4260},{7,31,2270}, +{6,30,245},{6,31,8708},{2,31,228},{12,31,2115},{12,31,1515},{12,31,1154},{11,31,353},{13,29,3299},{11,31,1938},{10,31,1013},{4,30,68},{14,27,3299},{4,30,68},{9,31,5845},{9,31,5845},{9,31,5845},{9,31,3829},{8,31,5124},{7,31,2270},{7,31,2270},{6,29,170},{4,31,4762},{2,31,228},{12,31,1154},{12,31,1154},{12,31,1154},{11,31,353},{15,18,2178},{10,31,1013},{10,31,1013},{4,30,4},{10,29,2178}, +{4,30,4},{15,30,34},{14,31,40},{14,31,4},{14,31,4},{15,30,34},{15,30,34},{14,31,4},{0,30,64},{15,30,34},{0,30,64},{8,0,2705},{8,0,2705},{8,0,2705},{8,0,2705},{7,31,245},{7,31,245},{7,31,245},{7,27,49},{2,31,164},{2,31,164},{11,31,8857},{10,31,7170},{10,31,6209},{9,31,4133},{10,31,8853},{8,31,4484},{8,31,2548},{7,31,170},{7,31,8388},{3,31,244},{13,31,1971}, +{12,31,1611},{12,31,1250},{11,31,625},{15,24,2904},{11,31,1826},{10,31,1157},{5,31,1},{15,27,2907},{5,31,1},{10,31,6209},{10,31,6209},{10,31,6209},{9,31,4133},{9,31,5460},{8,31,2548},{8,31,2548},{7,30,129},{5,31,5126},{3,31,244},{12,31,1250},{12,31,1250},{12,31,1250},{11,31,625},{13,27,2178},{10,31,1157},{10,31,1157},{5,31,1},{13,27,2178},{5,31,1},{15,31,0},{15,31,0},{15,31,0}, +{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{9,0,2689},{9,0,2689},{9,0,2689},{9,0,2689},{7,31,485},{7,31,485},{7,31,485},{7,28,5},{3,31,244},{3,31,244},{11,31,7705},{11,31,6566},{10,31,5633},{10,31,3890},{10,31,7737},{9,31,3874},{8,31,2386},{7,31,116},{7,31,7398},{4,31,317},{13,31,1458},{13,31,1186},{13,31,1017},{12,31,425},{15,25,2166}, +{12,31,1398},{11,31,850},{6,31,4},{12,31,2166},{6,31,4},{10,31,5633},{10,31,5633},{10,31,5633},{10,31,3890},{9,31,4830},{8,31,2386},{8,31,2386},{7,31,116},{6,31,4509},{4,31,317},{13,31,1017},{13,31,1017},{13,31,1017},{12,31,425},{15,22,1625},{11,31,850},{11,31,850},{6,31,4},{13,28,1625},{6,31,4},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0}, +{0,31,0},{15,31,0},{0,31,0},{9,0,2725},{9,0,2725},{9,0,2725},{9,0,2725},{8,31,450},{8,31,450},{8,31,450},{7,30,101},{4,31,317},{4,31,317},{11,31,6953},{11,31,5814},{11,31,5189},{10,31,3650},{11,31,6713},{10,31,3531},{9,31,2142},{8,31,74},{8,31,6397},{5,31,425},{13,31,1138},{13,31,866},{13,31,697},{12,31,361},{15,26,1601},{12,31,1046},{11,31,674},{7,31,16},{14,29,1601}, +{7,31,16},{11,31,5189},{11,31,5189},{11,31,5189},{10,31,3650},{10,31,4313},{9,31,2142},{9,31,2142},{8,31,74},{7,31,3981},{5,31,425},{13,31,697},{13,31,697},{13,31,697},{12,31,361},{15,23,1201},{11,31,674},{11,31,674},{7,31,16},{12,30,1201},{7,31,16},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{10,0,2689}, +{10,0,2689},{10,0,2689},{10,0,2689},{8,31,578},{8,31,578},{8,31,578},{8,30,49},{5,31,425},{5,31,425},{12,31,6211},{11,31,5318},{11,31,4693},{11,31,3554},{11,31,5833},{10,31,3067},{10,31,2106},{8,31,10},{9,31,5601},{6,31,580},{14,31,825},{13,31,674},{13,31,505},{13,31,233},{15,27,1122},{13,31,738},{12,31,461},{8,31,9},{13,31,1122},{8,31,9},{11,31,4693},{11,31,4693},{11,31,4693}, +{11,31,3554},{10,31,3849},{10,31,2106},{10,31,2106},{8,31,10},{7,31,3629},{6,31,580},{13,31,505},{13,31,505},{13,31,505},{13,31,233},{13,31,841},{12,31,461},{12,31,461},{8,31,9},{15,27,841},{8,31,9},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{10,0,2705},{10,0,2705},{10,0,2705},{10,0,2705},{9,31,666}, +{9,31,666},{9,31,666},{8,31,10},{6,31,580},{6,31,580},{12,31,5427},{12,31,4827},{11,31,4453},{11,31,3314},{12,31,5175},{10,31,2859},{10,31,1898},{9,31,74},{10,31,4842},{7,31,724},{14,31,489},{14,31,425},{14,31,389},{13,31,169},{14,31,729},{13,31,482},{13,31,313},{10,31,4},{14,30,726},{10,31,4},{11,31,4453},{11,31,4453},{11,31,4453},{11,31,3314},{11,31,3445},{10,31,1898},{10,31,1898}, +{9,31,74},{8,31,3213},{7,31,724},{14,31,389},{14,31,389},{14,31,389},{13,31,169},{15,26,545},{13,31,313},{13,31,313},{10,31,4},{14,29,545},{10,31,4},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{11,0,2689},{11,0,2689},{11,0,2689},{11,0,2689},{9,31,890},{9,31,890},{9,31,890},{9,31,74},{7,31,724}, +{7,31,724},{2,31,33740},{0,31,5184},{0,22,420},{0,21,4221},{1,31,46089},{0,29,24105},{0,21,8317},{0,18,24790},{0,21,63990},{0,16,38959},{1,31,9704},{0,30,2866},{0,21,389},{0,19,3229},{7,2,18065},{0,20,13257},{0,17,6153},{0,12,13481},{12,0,18065},{0,12,13481},{0,15,1},{0,15,1},{0,15,1},{0,9,1},{0,8,1105},{0,7,585},{0,7,585},{0,4,596},{0,4,1273},{0,4,740},{0,15,1}, +{0,15,1},{0,15,1},{0,9,1},{2,1,1105},{0,7,585},{0,7,585},{0,4,596},{4,0,1105},{0,4,596},{9,6,9248},{0,30,2866},{0,21,389},{0,19,3229},{9,6,9248},{14,5,9248},{0,19,3229},{0,14,9248},{14,5,9248},{0,14,9248},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{2,31,38380},{0,31,6720},{0,23,245}, +{0,22,3864},{2,31,50747},{0,31,24961},{0,22,8353},{0,19,25735},{0,22,65535},{0,17,41319},{1,31,10152},{0,31,2624},{0,23,229},{0,20,2980},{5,10,19334},{0,20,13769},{0,18,6243},{0,13,14116},{12,1,19334},{0,13,14116},{0,18,0},{0,18,0},{0,18,0},{0,11,1},{0,9,1513},{0,8,772},{0,8,772},{0,5,821},{0,5,1750},{0,4,1028},{0,18,0},{0,18,0},{0,18,0},{0,11,1},{1,6,1513}, +{0,8,772},{0,8,772},{0,5,821},{3,2,1513},{0,5,821},{10,5,9248},{0,31,2624},{0,23,229},{0,20,2980},{10,5,9248},{12,9,9248},{0,20,2980},{0,15,9248},{12,9,9248},{0,15,9248},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{2,31,43788},{0,31,9024},{0,24,126},{0,23,3525},{2,31,56155},{0,31,26241},{0,23,8425}, +{0,20,26793},{0,23,65535},{0,18,43819},{2,31,10787},{0,31,2624},{0,24,122},{0,21,2701},{8,0,20689},{0,22,14385},{0,19,6369},{0,13,14756},{12,2,20689},{0,13,14756},{0,21,1},{0,21,1},{0,21,1},{0,12,4},{0,10,1989},{0,9,1018},{0,9,1018},{0,6,1096},{0,5,2294},{0,5,1334},{0,21,1},{0,21,1},{0,21,1},{0,12,4},{1,7,1985},{0,9,1018},{0,9,1018},{0,6,1096},{1,5,1985}, +{0,6,1096},{12,1,9248},{0,31,2624},{0,24,122},{0,21,2701},{12,1,9248},{15,7,9248},{0,21,2701},{0,16,9250},{15,7,9248},{0,16,9250},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{2,31,49964},{1,31,11512},{0,25,41},{0,24,3109},{2,31,62331},{0,31,28289},{0,24,8585},{0,21,27848},{0,23,65535},{0,19,46459},{2,31,11395}, +{0,31,2880},{0,25,37},{0,22,2440},{8,2,22129},{0,23,15030},{0,20,6509},{0,14,15441},{13,2,22129},{0,14,15441},{0,23,1},{0,23,1},{0,23,1},{0,14,0},{0,12,2525},{0,10,1300},{0,10,1300},{0,6,1384},{0,6,2905},{0,6,1708},{0,23,1},{0,23,1},{0,23,1},{0,14,0},{3,2,2521},{0,10,1300},{0,10,1300},{0,6,1384},{5,1,2521},{0,6,1384},{11,7,9248},{0,31,2880},{0,25,37}, +{0,22,2440},{11,7,9248},{13,11,9248},{0,22,2440},{0,17,9250},{13,11,9248},{0,17,9250},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{3,31,57022},{1,31,15166},{0,26,20},{0,25,2804},{2,31,65535},{0,31,31511},{0,25,8733},{0,22,29095},{0,26,65535},{0,20,49444},{2,31,12385},{0,31,3474},{0,26,4},{0,23,2173},{8,4,23851}, +{0,23,15948},{0,21,6729},{0,15,16274},{14,2,23851},{0,15,16274},{0,26,0},{0,26,0},{0,26,0},{0,16,4},{0,13,3200},{0,11,1665},{0,11,1665},{0,7,1754},{0,7,3691},{0,6,2185},{0,26,0},{0,26,0},{0,26,0},{0,16,4},{1,10,3200},{0,11,1665},{0,11,1665},{0,7,1754},{5,2,3200},{0,7,1754},{11,10,9248},{1,31,3226},{0,26,4},{0,23,2173},{11,10,9248},{5,23,9248},{0,23,2173}, +{0,18,9248},{5,23,9248},{0,18,9248},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{3,31,63870},{1,31,19230},{0,27,45},{0,27,2520},{3,31,65535},{0,31,35191},{0,26,8925},{0,23,30250},{0,28,65535},{0,21,52374},{3,31,13449},{1,31,4026},{0,27,29},{0,24,1901},{3,24,25472},{0,26,16706},{0,22,6963},{0,16,17124},{14,3,25472}, +{0,16,17124},{0,29,1},{0,29,1},{0,29,1},{0,17,1},{0,14,3874},{0,13,2084},{0,13,2084},{0,8,2165},{0,8,4466},{0,7,2627},{0,29,1},{0,29,1},{0,29,1},{0,17,1},{4,1,3872},{0,13,2084},{0,13,2084},{0,8,2165},{3,5,3872},{0,8,2165},{12,9,9248},{2,31,3593},{0,27,29},{0,24,1901},{12,9,9248},{14,13,9248},{0,24,1901},{0,19,9248},{14,13,9248},{0,19,9248},{0,0,0}, +{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{3,31,65535},{1,31,24002},{0,28,109},{0,27,2268},{3,31,65535},{1,31,39095},{0,27,8825},{0,24,30825},{0,28,65535},{0,22,54996},{3,31,14345},{1,31,4766},{0,29,102},{0,26,1697},{3,26,26744},{0,28,17104},{0,23,6957},{0,17,17625},{15,3,26744},{0,17,17625},{0,31,5},{0,31,5},{0,31,5}, +{0,19,5},{0,16,4418},{0,14,2306},{0,14,2306},{0,9,2420},{0,8,5122},{0,8,2997},{0,31,5},{0,31,5},{0,31,5},{0,19,5},{3,6,4418},{0,14,2306},{0,14,2306},{0,9,2420},{3,6,4418},{0,9,2420},{14,5,9248},{4,31,3904},{1,28,1},{0,26,1693},{14,5,9248},{12,17,9248},{0,26,1693},{0,20,9250},{12,17,9248},{0,20,9250},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,1,1}, +{0,1,1},{0,1,1},{0,0,4},{0,0,4},{0,0,4},{3,31,65535},{1,31,29442},{0,29,330},{0,28,2105},{3,31,65535},{1,31,42151},{0,28,7781},{0,25,30108},{0,29,65535},{0,22,56388},{4,31,14976},{2,31,5434},{1,29,62},{0,27,1580},{11,0,26744},{0,29,16547},{0,24,6221},{0,18,17124},{13,7,26744},{0,18,17124},{0,31,181},{0,31,181},{0,31,181},{0,20,101},{0,19,4420},{0,16,2005},{0,16,2005}, +{0,10,2165},{0,9,5389},{0,9,2925},{1,31,37},{1,31,37},{1,31,37},{1,19,37},{5,2,4418},{0,16,2005},{0,16,2005},{0,10,2165},{8,1,4418},{0,10,2165},{13,11,9248},{4,31,4160},{1,29,26},{0,27,1480},{13,11,9248},{15,15,9248},{0,27,1480},{0,21,9250},{15,15,9248},{0,21,9250},{0,0,100},{0,0,100},{0,0,100},{0,0,100},{0,3,1},{0,3,1},{0,3,1},{0,2,0},{0,1,34}, +{0,1,34},{4,31,65535},{2,31,36070},{0,30,822},{0,30,2062},{3,31,65535},{1,31,46660},{0,29,6696},{0,26,29322},{0,31,65535},{0,23,58077},{4,31,15507},{3,31,6253},{1,31,109},{0,28,1646},{11,3,26744},{0,31,15992},{0,26,5346},{0,19,16582},{11,11,26744},{0,19,16582},{1,31,329},{1,31,329},{1,31,329},{1,21,266},{0,22,4420},{0,18,1737},{0,18,1737},{0,11,1898},{0,11,5707},{0,10,2885},{1,31,73}, +{1,31,73},{1,31,73},{1,21,10},{5,5,4418},{0,18,1737},{0,18,1737},{0,11,1898},{5,7,4418},{0,11,1898},{13,14,9248},{5,31,4570},{2,30,4},{0,28,1285},{13,14,9248},{8,25,9248},{0,28,1285},{0,22,9248},{8,25,9248},{0,22,9248},{1,0,265},{1,0,265},{1,0,265},{1,0,265},{0,6,1},{0,6,1},{0,6,1},{0,4,1},{0,3,97},{0,3,97},{4,31,65535},{2,31,40786},{0,31,1405}, +{0,30,2138},{4,31,65535},{1,31,49800},{0,30,5634},{0,27,27967},{0,31,65535},{0,24,58770},{5,31,15531},{3,31,6593},{2,31,61},{1,29,1533},{12,2,26259},{0,31,15284},{0,27,4514},{0,20,15812},{13,10,26259},{0,20,15812},{1,31,633},{1,31,633},{1,31,633},{1,22,381},{0,24,4418},{0,20,1480},{0,20,1480},{0,12,1640},{0,12,6125},{0,11,2891},{2,31,61},{2,31,61},{2,31,61},{2,21,37},{7,1,4418}, +{0,20,1480},{0,20,1480},{0,12,1640},{10,2,4418},{0,12,1640},{13,16,8978},{6,31,4777},{2,31,25},{0,29,1040},{13,16,8978},{15,18,8978},{0,29,1040},{0,23,8980},{15,18,8978},{0,23,8980},{1,0,377},{1,0,377},{1,0,377},{1,0,377},{0,9,0},{0,9,0},{0,9,0},{0,5,4},{0,4,193},{0,4,193},{4,31,65535},{2,31,40898},{1,31,2217},{0,31,2125},{4,31,65535},{1,31,47976},{0,30,4194}, +{0,27,24703},{0,31,65535},{0,24,56130},{5,31,14379},{4,31,6051},{2,31,173},{2,29,1284},{9,14,24371},{0,31,13716},{0,28,3402},{0,21,13989},{14,10,24371},{0,21,13989},{1,31,1193},{1,31,1193},{1,31,1193},{1,24,617},{0,27,4420},{0,22,1280},{0,22,1280},{0,13,1445},{0,14,6509},{0,12,2945},{2,31,173},{2,31,173},{2,31,173},{2,23,5},{5,10,4418},{0,22,1280},{0,22,1280},{0,13,1445},{12,1,4418}, +{0,13,1445},{14,14,7938},{7,31,4253},{3,31,9},{0,29,656},{14,14,7938},{14,20,7938},{0,29,656},{0,23,7956},{14,20,7938},{0,23,7956},{1,0,617},{1,0,617},{1,0,617},{1,0,617},{0,11,4},{0,11,4},{0,11,4},{0,7,0},{0,5,325},{0,5,325},{4,31,65535},{2,31,41266},{1,31,3033},{0,31,2333},{4,31,65535},{1,31,46408},{0,30,3010},{0,27,21695},{0,31,65535},{0,25,53636},{6,31,13140}, +{4,31,5571},{3,31,157},{2,29,932},{11,9,22568},{0,31,12404},{0,28,2474},{0,21,12245},{14,11,22568},{0,21,12245},{2,31,1630},{2,31,1630},{2,31,1630},{1,26,989},{0,29,4420},{0,23,1090},{0,23,1090},{0,14,1268},{0,15,6926},{0,13,3029},{3,31,157},{3,31,157},{3,31,157},{3,23,37},{8,1,4418},{0,23,1090},{0,23,1090},{0,14,1268},{10,5,4418},{0,14,1268},{15,12,6962},{7,31,3709},{4,31,1}, +{0,30,353},{15,12,6962},{13,22,6962},{0,30,353},{0,24,6970},{13,22,6962},{0,24,6970},{1,0,985},{1,0,985},{1,0,985},{1,0,985},{0,14,0},{0,14,0},{0,14,0},{0,8,4},{0,6,493},{0,6,493},{4,31,65535},{2,31,41986},{1,31,4257},{0,31,2873},{4,31,65535},{1,31,44950},{0,30,1984},{0,28,18569},{0,31,65535},{0,25,51026},{6,31,11934},{5,31,5125},{4,31,296},{3,29,706},{13,4,20642}, +{0,31,11234},{0,29,1634},{0,22,10422},{15,11,20642},{0,22,10422},{2,31,2350},{2,31,2350},{2,31,2350},{2,26,1450},{0,31,4450},{0,25,949},{0,25,949},{0,16,1096},{0,16,7397},{0,14,3171},{4,31,296},{4,31,296},{4,31,296},{3,25,10},{8,4,4418},{0,25,949},{0,25,949},{0,16,1096},{14,2,4418},{0,16,1096},{14,17,5941},{7,31,3250},{5,31,0},{0,30,128},{14,17,5941},{13,23,5941},{0,30,128}, +{0,24,5953},{13,23,5941},{0,24,5953},{2,0,1450},{2,0,1450},{2,0,1450},{2,0,1450},{0,17,0},{0,17,0},{0,17,0},{0,10,1},{0,8,697},{0,8,697},{4,31,65535},{2,31,42898},{1,31,5617},{1,31,3337},{4,31,65535},{1,31,43926},{0,31,1250},{0,28,15865},{0,31,65535},{0,25,48978},{7,31,10938},{5,31,4773},{4,31,360},{3,30,509},{12,9,19021},{1,31,10246},{0,30,1088},{0,23,8945},{14,13,19021}, +{0,23,8945},{2,31,3262},{2,31,3262},{2,31,3262},{2,28,1822},{1,31,4682},{0,28,776},{0,28,776},{0,17,925},{0,18,7893},{0,15,3333},{4,31,360},{4,31,360},{4,31,360},{4,25,37},{3,25,4418},{0,28,776},{0,28,776},{0,17,925},{12,6,4418},{0,17,925},{15,15,5101},{8,31,2777},{6,31,9},{0,31,25},{15,15,5101},{11,26,5101},{0,31,25},{0,25,5105},{11,26,5101},{0,25,5105},{2,0,1818}, +{2,0,1818},{2,0,1818},{2,0,1818},{0,20,1},{0,20,1},{0,20,1},{0,12,1},{0,9,925},{0,9,925},{4,31,65535},{2,31,44066},{1,31,7233},{1,31,3993},{4,31,65535},{2,31,43110},{0,31,738},{0,28,13417},{0,31,65535},{0,25,47186},{7,31,9978},{6,31,4467},{5,31,452},{4,30,357},{15,1,17485},{2,31,9441},{0,30,704},{0,24,7570},{15,13,17485},{0,24,7570},{3,31,4058},{3,31,4058},{3,31,4058}, +{2,29,2315},{1,31,4874},{0,29,610},{0,29,610},{0,18,772},{0,20,8427},{0,16,3497},{5,31,452},{5,31,452},{5,31,452},{4,27,5},{9,6,4418},{0,29,610},{0,29,610},{0,18,772},{14,5,4418},{0,18,772},{15,16,4325},{8,31,2377},{6,31,25},{0,31,9},{15,16,4325},{15,22,4325},{0,31,9},{0,25,4337},{15,22,4325},{0,25,4337},{2,0,2314},{2,0,2314},{2,0,2314},{2,0,2314},{0,22,1}, +{0,22,1},{0,22,1},{0,13,4},{0,10,1189},{0,10,1189},{5,31,65535},{3,31,45090},{1,31,9105},{1,31,4905},{4,31,65535},{2,31,42326},{0,31,482},{0,28,11225},{0,31,65535},{0,26,45590},{7,31,9274},{6,31,4179},{5,31,612},{4,30,245},{14,6,16034},{3,31,8633},{0,31,482},{0,24,6242},{15,14,16034},{0,24,6242},{3,31,5066},{3,31,5066},{3,31,5066},{2,31,2939},{1,31,5322},{0,31,482},{0,31,482}, +{0,19,637},{0,20,8939},{0,17,3725},{5,31,612},{5,31,612},{5,31,612},{5,27,37},{10,5,4418},{0,31,482},{0,31,482},{0,19,637},{12,9,4418},{0,19,637},{13,24,3613},{9,31,1973},{7,31,9},{2,31,1},{13,24,3613},{15,23,3613},{2,31,1},{0,26,3617},{15,23,3613},{0,26,3617},{2,0,2938},{2,0,2938},{2,0,2938},{2,0,2938},{0,25,1},{0,25,1},{0,25,1},{0,15,0},{0,11,1489}, +{0,11,1489},{5,31,65535},{3,31,46530},{1,31,11517},{1,31,6237},{4,31,65535},{2,31,41750},{0,31,500},{0,29,8976},{0,31,65535},{0,26,43934},{8,31,8225},{7,31,3853},{6,31,680},{5,30,109},{11,18,14504},{4,31,7667},{0,31,500},{0,25,4979},{10,21,14504},{0,25,4979},{3,31,6506},{3,31,6506},{3,31,6506},{3,31,3701},{2,31,6019},{0,31,500},{0,31,500},{0,20,520},{0,22,9629},{0,18,4035},{6,31,680}, +{6,31,680},{6,31,680},{5,29,10},{10,8,4418},{0,31,500},{0,31,500},{0,20,520},{11,12,4418},{0,20,520},{15,19,2888},{10,31,1537},{8,31,16},{3,31,4},{15,19,2888},{13,26,2888},{3,31,4},{0,26,2906},{13,26,2888},{0,26,2906},{3,0,3697},{3,0,3697},{3,0,3697},{3,0,3697},{0,28,1},{0,28,1},{0,28,1},{0,17,4},{0,11,1930},{0,11,1930},{5,31,65535},{3,31,48082},{1,31,13933}, +{1,31,7693},{4,31,65535},{2,31,41510},{0,31,788},{0,29,7120},{0,31,65535},{0,26,42734},{8,31,7409},{7,31,3693},{7,31,884},{6,30,116},{13,13,13235},{4,31,6899},{1,31,628},{0,25,3987},{15,16,13235},{0,25,3987},{4,31,7686},{4,31,7686},{4,31,7686},{3,31,4437},{2,31,6659},{0,31,788},{0,31,788},{0,21,421},{0,23,10286},{0,20,4305},{7,31,884},{7,31,884},{7,31,884},{6,29,37},{9,14,4418}, +{1,31,628},{1,31,628},{0,21,421},{14,10,4418},{0,21,421},{15,20,2312},{10,31,1217},{8,31,16},{4,31,9},{15,20,2312},{12,28,2312},{4,31,9},{0,27,2314},{12,28,2312},{0,27,2314},{3,0,4337},{3,0,4337},{3,0,4337},{3,0,4337},{0,30,1},{0,30,1},{0,30,1},{0,18,1},{0,13,2329},{0,13,2329},{5,31,65535},{3,31,49890},{2,31,16310},{1,31,9405},{4,31,65535},{2,31,41526},{0,31,1332}, +{0,29,5520},{0,31,65535},{0,26,41790},{8,31,6849},{8,31,3601},{7,31,980},{6,31,5},{15,8,12051},{5,31,6275},{2,31,801},{0,26,3066},{11,22,12051},{0,26,3066},{4,31,9062},{4,31,9062},{4,31,9062},{3,31,5429},{2,31,7555},{1,31,1172},{1,31,1172},{0,23,325},{0,23,11118},{0,20,4625},{7,31,980},{7,31,980},{7,31,980},{6,31,5},{11,10,4418},{2,31,801},{2,31,801},{0,23,325},{5,23,4418}, +{0,23,325},{13,28,1800},{11,31,949},{9,31,4},{6,31,1},{13,28,1800},{11,30,1800},{6,31,1},{0,27,1818},{11,30,1800},{0,27,1818},{3,0,5105},{3,0,5105},{3,0,5105},{3,0,5105},{0,31,36},{0,31,36},{0,31,36},{0,20,4},{0,15,2741},{0,15,2741},{5,31,65535},{3,31,51954},{2,31,18790},{1,31,11373},{5,31,65535},{2,31,41798},{0,31,2132},{0,29,4176},{0,31,65535},{0,27,41092},{9,31,6153}, +{8,31,3297},{7,31,1332},{7,31,37},{14,13,10952},{5,31,5763},{3,31,965},{0,27,2291},{11,23,10952},{0,27,2291},{4,31,10694},{4,31,10694},{4,31,10694},{4,31,6566},{3,31,8619},{1,31,1716},{1,31,1716},{0,24,221},{0,26,11876},{0,22,4989},{7,31,1332},{7,31,1332},{7,31,1332},{7,31,37},{12,9,4418},{3,31,965},{3,31,965},{0,24,221},{14,13,4418},{0,24,221},{14,26,1352},{11,31,725},{10,31,0}, +{7,31,1},{14,26,1352},{15,26,1352},{7,31,1},{0,28,1360},{15,26,1352},{0,28,1360},{3,0,6001},{3,0,6001},{3,0,6001},{3,0,6001},{0,31,196},{0,31,196},{0,31,196},{0,21,1},{0,16,3130},{0,16,3130},{5,31,65535},{3,31,54582},{2,31,21886},{1,31,13893},{5,31,65535},{2,31,42410},{0,31,3338},{0,30,2841},{0,31,65535},{0,27,40390},{9,31,5649},{9,31,3249},{8,31,1325},{7,31,109},{14,15,9818}, +{6,31,5258},{4,31,1108},{0,27,1589},{12,23,9818},{0,27,1589},{5,31,12376},{5,31,12376},{5,31,12376},{4,31,7844},{3,31,9861},{1,31,2634},{1,31,2634},{0,25,136},{0,28,12696},{0,23,5429},{8,31,1325},{8,31,1325},{8,31,1325},{7,31,109},{15,2,4418},{4,31,1108},{4,31,1108},{0,25,136},{13,16,4418},{0,25,136},{15,24,925},{12,31,505},{11,31,1},{8,31,1},{15,24,925},{15,27,925},{8,31,1}, +{0,28,937},{15,27,925},{0,28,937},{4,0,7060},{4,0,7060},{4,0,7060},{4,0,7060},{1,31,425},{1,31,425},{1,31,425},{0,23,0},{0,17,3665},{0,17,3665},{5,31,65535},{3,31,57190},{2,31,24910},{1,31,16405},{5,31,65535},{2,31,43226},{0,31,4682},{0,30,1833},{0,31,65535},{0,27,40038},{10,31,5202},{9,31,3073},{8,31,1565},{8,31,277},{13,20,8901},{7,31,4814},{5,31,1300},{0,28,1021},{13,23,8902}, +{0,28,1021},{5,31,14136},{5,31,14136},{5,31,14136},{4,31,9252},{3,31,11237},{2,31,3590},{2,31,3590},{0,26,85},{0,29,13491},{0,23,5925},{8,31,1565},{8,31,1565},{8,31,1565},{8,31,277},{11,18,4418},{5,31,1300},{5,31,1300},{0,26,85},{10,21,4418},{0,26,85},{15,25,617},{13,31,365},{12,31,16},{9,31,9},{15,25,617},{13,30,613},{9,31,9},{0,29,617},{13,30,613},{0,29,617},{4,0,7956}, +{4,0,7956},{4,0,7956},{4,0,7956},{1,31,697},{1,31,697},{1,31,697},{0,25,4},{0,18,4181},{0,18,4181},{5,31,65535},{3,31,60054},{2,31,28190},{2,31,18895},{5,31,65535},{2,31,44298},{1,31,6090},{0,30,1081},{0,31,65535},{0,27,39942},{10,31,4850},{10,31,3107},{9,31,1709},{8,31,325},{13,22,8069},{7,31,4574},{6,31,1553},{0,29,602},{12,25,8069},{0,29,602},{6,31,16067},{6,31,16067},{6,31,16067}, +{5,31,10872},{4,31,12824},{2,31,4662},{2,31,4662},{0,27,52},{0,30,14340},{0,25,6449},{9,31,1709},{9,31,1709},{9,31,1709},{8,31,325},{13,14,4418},{6,31,1553},{6,31,1553},{0,27,52},{8,25,4418},{0,27,52},{15,27,365},{13,31,205},{12,31,16},{11,31,4},{15,27,365},{13,31,365},{11,31,4},{0,29,377},{13,31,365},{0,29,377},{4,0,8980},{4,0,8980},{4,0,8980},{4,0,8980},{1,31,1097}, +{1,31,1097},{1,31,1097},{0,26,1},{0,20,4682},{0,20,4682},{6,31,65535},{4,31,58981},{2,31,29926},{2,31,19751},{5,31,65535},{3,31,43402},{1,31,6910},{0,30,765},{0,31,65535},{0,28,34909},{11,31,4502},{10,31,3011},{9,31,2045},{9,31,557},{15,17,7322},{8,31,4242},{7,31,1781},{0,29,314},{12,26,7322},{0,29,314},{6,31,16739},{6,31,16739},{6,31,16739},{5,31,11492},{4,31,13636},{3,31,5586},{3,31,5586}, +{0,28,65},{0,31,14139},{0,26,6041},{9,31,2045},{9,31,2045},{9,31,2045},{9,31,557},{14,13,4418},{7,31,1781},{7,31,1781},{0,28,29},{11,23,4418},{0,28,29},{15,28,181},{14,31,117},{13,31,4},{12,31,0},{15,28,181},{15,29,181},{12,31,0},{0,30,185},{15,29,181},{0,30,185},{4,0,9376},{4,0,9376},{4,0,9376},{4,0,9376},{2,31,1405},{2,31,1405},{2,31,1405},{0,28,40},{0,21,4520}, +{0,21,4520},{6,31,65535},{4,31,57316},{3,31,30345},{3,31,20808},{6,31,65535},{3,31,41449},{2,31,8321},{1,31,301},{0,31,65535},{0,28,28330},{11,31,4232},{11,31,3093},{10,31,2248},{9,31,809},{15,19,6584},{9,31,3992},{7,31,2105},{0,30,77},{13,26,6584},{0,30,77},{7,31,17380},{7,31,17380},{7,31,17380},{6,31,12161},{5,31,14315},{3,31,6405},{3,31,6405},{1,29,53},{0,31,13860},{0,28,5286},{10,31,2248}, +{10,31,2248},{10,31,2248},{9,31,809},{14,16,4418},{7,31,2105},{7,31,2105},{0,30,13},{15,20,4418},{0,30,13},{15,29,52},{14,31,36},{14,31,0},{13,31,9},{15,29,52},{15,30,50},{13,31,9},{0,30,68},{15,30,50},{0,30,68},{5,0,9250},{5,0,9250},{5,0,9250},{5,0,9250},{2,31,1549},{2,31,1549},{2,31,1549},{1,28,2},{0,23,4114},{0,23,4114},{6,31,65535},{5,31,55908},{4,31,31583}, +{3,31,21256},{6,31,65535},{4,31,39740},{2,31,9073},{1,31,285},{0,31,65535},{0,29,23356},{12,31,4011},{11,31,3125},{11,31,2500},{10,31,1037},{13,27,6019},{10,31,3738},{8,31,2340},{0,31,4},{13,27,6019},{0,31,4},{7,31,17796},{7,31,17796},{7,31,17796},{6,31,12625},{6,31,14996},{4,31,7139},{4,31,7139},{1,30,86},{0,31,14020},{0,29,4652},{11,31,2500},{11,31,2500},{11,31,2500},{10,31,1037},{13,22,4418}, +{8,31,2340},{8,31,2340},{0,31,4},{12,25,4418},{0,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{15,31,4},{0,31,4},{15,31,4},{0,31,4},{6,0,9376},{6,0,9376},{6,0,9376},{6,0,9376},{3,31,1765},{3,31,1765},{3,31,1765},{1,30,50},{0,25,3877},{0,25,3877},{7,31,65535},{5,31,53236},{4,31,30487},{4,31,21367},{6,31,65535},{4,31,37332},{3,31,9385}, +{2,31,36},{0,31,65535},{0,29,18680},{12,31,3443},{12,31,2843},{11,31,2248},{10,31,997},{14,25,5163},{10,31,3218},{9,31,2120},{1,31,4},{12,29,5163},{1,31,4},{7,31,17504},{7,31,17504},{7,31,17504},{7,31,12569},{6,31,14328},{4,31,7227},{4,31,7227},{2,31,20},{0,31,13376},{0,29,3944},{11,31,2248},{11,31,2248},{11,31,2248},{10,31,997},{15,17,3872},{9,31,2120},{9,31,2120},{1,31,4},{12,26,3872}, +{1,31,4},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{6,0,9248},{6,0,9248},{6,0,9248},{6,0,9248},{3,31,2005},{3,31,2005},{3,31,2005},{2,30,5},{0,27,3545},{0,27,3545},{7,31,65535},{6,31,50785},{4,31,29687},{4,31,20567},{7,31,65535},{4,31,35412},{3,31,8985},{2,31,196},{1,31,65535},{0,29,14712},{12,31,2883}, +{12,31,2283},{12,31,1922},{11,31,821},{13,29,4267},{11,31,2694},{10,31,1745},{3,31,4},{14,27,4267},{3,31,4},{8,31,16610},{8,31,16610},{8,31,16610},{7,31,12185},{6,31,13528},{5,31,6915},{5,31,6915},{2,31,52},{1,31,12556},{0,30,3314},{12,31,1922},{12,31,1922},{12,31,1922},{11,31,821},{12,28,3200},{10,31,1745},{10,31,1745},{3,31,4},{11,28,3200},{3,31,4},{15,31,0},{15,31,0},{15,31,0}, +{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{6,0,9376},{6,0,9376},{6,0,9376},{6,0,9376},{4,31,2250},{4,31,2250},{4,31,2250},{2,31,52},{0,28,3170},{0,28,3170},{7,31,65535},{6,31,47239},{5,31,28065},{5,31,20409},{7,31,65535},{5,31,32574},{4,31,8965},{3,31,54},{1,31,65206},{0,30,10964},{13,31,2326},{12,31,1806},{12,31,1445},{11,31,650},{13,30,3361}, +{11,31,2091},{10,31,1322},{4,31,0},{12,30,3361},{4,31,0},{8,31,15584},{8,31,15584},{8,31,15584},{7,31,12059},{7,31,12522},{6,31,6811},{6,31,6811},{3,31,50},{1,31,11710},{0,31,2834},{12,31,1445},{12,31,1445},{12,31,1445},{11,31,650},{14,23,2521},{10,31,1322},{10,31,1322},{4,31,0},{15,24,2521},{4,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0}, +{0,31,0},{15,31,0},{0,31,0},{7,0,9250},{7,0,9250},{7,0,9250},{7,0,9250},{5,31,2600},{5,31,2600},{5,31,2600},{3,31,50},{0,31,2834},{0,31,2834},{8,31,65535},{6,31,44903},{5,31,27361},{5,31,19705},{7,31,64494},{5,31,30846},{4,31,8677},{3,31,470},{2,31,60777},{0,30,8308},{13,31,1782},{13,31,1510},{12,31,1157},{12,31,557},{13,31,2646},{11,31,1691},{11,31,1066},{5,31,4},{15,27,2646}, +{5,31,4},{8,31,14944},{8,31,14944},{8,31,14944},{8,31,11696},{7,31,11850},{6,31,6555},{6,31,6555},{4,31,164},{2,31,11097},{0,31,2610},{12,31,1157},{12,31,1157},{12,31,1157},{12,31,557},{15,21,1985},{11,31,1066},{11,31,1066},{5,31,4},{15,25,1985},{5,31,4},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{7,0,9410}, +{7,0,9410},{7,0,9410},{7,0,9410},{5,31,2792},{5,31,2792},{5,31,2792},{4,31,164},{0,31,2610},{0,31,2610},{8,31,63584},{7,31,42019},{6,31,25930},{5,31,19769},{8,31,60273},{6,31,28860},{5,31,8761},{4,31,276},{3,31,56253},{0,30,6420},{13,31,1366},{13,31,1094},{13,31,925},{12,31,397},{15,25,2018},{12,31,1298},{11,31,794},{7,31,4},{13,30,2017},{7,31,4},{9,31,14244},{9,31,14244},{9,31,14244}, +{8,31,11312},{8,31,11249},{7,31,6499},{7,31,6499},{4,31,260},{3,31,10457},{0,31,2642},{13,31,925},{13,31,925},{13,31,925},{12,31,397},{15,22,1513},{11,31,794},{11,31,794},{7,31,4},{14,27,1513},{7,31,4},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{8,0,9376},{8,0,9376},{8,0,9376},{8,0,9376},{6,31,3074}, +{6,31,3074},{6,31,3074},{4,31,260},{0,31,2642},{0,31,2642},{8,31,58848},{7,31,39683},{6,31,25130},{6,31,19007},{8,31,54849},{6,31,27132},{5,31,8569},{4,31,756},{4,31,51302},{0,31,5046},{13,31,1078},{13,31,806},{13,31,637},{12,31,365},{15,26,1473},{12,31,978},{12,31,617},{8,31,9},{14,29,1473},{8,31,9},{9,31,13604},{9,31,13604},{9,31,13604},{8,31,11184},{8,31,10433},{7,31,6339},{7,31,6339}, +{5,31,424},{4,31,9713},{0,31,2930},{13,31,637},{13,31,637},{13,31,637},{12,31,365},{14,27,1105},{12,31,617},{12,31,617},{8,31,9},{13,29,1105},{8,31,9},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{8,0,9248},{8,0,9248},{8,0,9248},{8,0,9248},{6,31,3330},{6,31,3330},{6,31,3330},{5,31,424},{0,31,2930}, +{0,31,2930}, diff --git a/thirdparty/basisu/transcoder/basisu_transcoder_tables_pvrtc2_alpha_33.inc b/thirdparty/basisu/transcoder/basisu_transcoder_tables_pvrtc2_alpha_33.inc new file mode 100644 index 000000000..3b9d7022e --- /dev/null +++ b/thirdparty/basisu/transcoder/basisu_transcoder_tables_pvrtc2_alpha_33.inc @@ -0,0 +1,481 @@ +{0,0,20},{0,0,20},{0,0,97},{0,0,145},{0,0,56},{0,0,104},{0,0,181},{0,0,406},{0,0,204},{0,0,442},{0,0,20},{0,0,20},{0,0,97},{0,0,145},{0,0,56},{0,0,104},{0,0,181},{0,0,406},{0,0,168},{0,0,406},{0,0,16},{0,0,16},{0,0,16},{0,0,64},{0,0,52},{0,0,100},{0,0,100},{0,0,325},{0,0,200},{0,0,361},{0,0,16}, +{0,0,16},{0,0,16},{0,0,64},{0,0,52},{0,0,100},{0,0,100},{0,0,325},{0,0,164},{0,0,325},{0,0,20},{0,0,20},{0,0,97},{0,0,145},{0,0,20},{0,0,68},{0,0,145},{0,0,306},{0,0,68},{0,0,306},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,1,126},{0,0,88},{0,0,53}, +{0,0,37},{0,0,116},{0,0,36},{0,0,1},{0,0,66},{0,0,88},{0,0,102},{0,1,126},{0,0,88},{0,0,53},{0,0,37},{0,0,116},{0,0,36},{0,0,1},{0,0,66},{0,0,52},{0,0,66},{0,0,52},{0,0,52},{0,0,52},{0,0,36},{0,0,16},{0,0,0},{0,0,0},{0,0,65},{0,0,52},{0,0,101},{0,0,52},{0,0,52},{0,0,52},{0,0,36},{0,0,16}, +{0,0,0},{0,0,0},{0,0,65},{0,0,16},{0,0,65},{0,1,90},{0,0,52},{0,0,17},{0,0,1},{0,1,90},{0,0,36},{0,0,1},{0,0,50},{0,0,36},{0,0,50},{0,0,36},{0,0,36},{0,0,36},{0,0,36},{0,0,0},{0,0,0},{0,0,0},{0,0,16},{0,0,52},{0,0,52},{0,1,286},{0,1,310},{0,0,453},{0,0,373},{0,1,115},{0,1,307},{0,0,241}, +{0,0,130},{0,0,280},{0,0,70},{0,1,222},{0,1,246},{0,0,389},{0,0,309},{0,1,51},{1,0,195},{0,0,177},{0,0,66},{1,0,107},{0,0,66},{0,1,261},{0,1,261},{0,1,261},{0,0,324},{0,1,90},{0,0,192},{0,0,192},{0,0,81},{0,0,84},{0,0,21},{0,1,197},{0,1,197},{0,1,197},{0,0,260},{0,1,26},{0,0,128},{0,0,128},{0,0,17},{0,0,80}, +{0,0,17},{0,1,26},{0,1,50},{1,0,130},{1,0,74},{0,1,26},{1,0,26},{1,0,74},{0,0,50},{1,0,26},{0,0,50},{0,0,260},{0,0,260},{0,0,260},{0,0,260},{0,1,89},{0,1,89},{0,1,89},{0,0,80},{0,0,20},{0,0,20},{1,0,494},{1,0,550},{1,0,694},{1,0,702},{0,2,363},{0,1,291},{1,0,583},{1,0,631},{0,1,116},{1,0,428},{1,0,170}, +{1,0,226},{1,0,370},{1,0,378},{1,0,51},{0,1,35},{1,0,259},{1,0,307},{1,0,91},{1,0,307},{1,0,469},{1,0,469},{1,0,469},{1,0,477},{0,1,314},{0,1,290},{0,1,290},{1,0,406},{0,1,115},{1,0,203},{1,0,145},{1,0,145},{1,0,145},{1,0,153},{1,0,26},{1,0,34},{1,0,34},{1,0,82},{1,0,10},{1,0,82},{1,0,26},{0,1,50},{1,0,226}, +{1,0,234},{1,0,26},{0,1,26},{1,0,234},{0,0,306},{0,1,26},{0,0,306},{1,0,468},{1,0,468},{1,0,468},{1,0,468},{0,1,265},{0,1,265},{0,1,265},{1,0,325},{0,1,90},{0,1,90},{1,1,116},{1,1,124},{1,1,215},{1,1,271},{1,1,188},{1,1,252},{1,1,343},{1,1,606},{0,1,152},{0,1,392},{1,1,35},{1,1,43},{1,1,134},{1,1,190},{1,1,107}, +{1,1,171},{0,1,260},{0,1,356},{2,0,51},{0,1,356},{1,1,115},{1,1,115},{1,1,115},{1,1,171},{1,0,161},{1,0,241},{1,0,241},{1,0,469},{0,1,52},{0,1,292},{1,1,34},{1,1,34},{1,1,34},{1,1,90},{0,2,16},{1,0,160},{1,0,160},{0,1,256},{0,1,16},{0,1,256},{1,1,26},{1,1,34},{1,1,125},{0,1,116},{1,1,26},{2,0,26},{0,1,116}, +{0,1,356},{2,0,26},{0,1,356},{1,0,90},{1,0,90},{1,0,90},{1,0,90},{1,0,97},{1,0,97},{1,0,97},{1,0,145},{0,1,36},{0,1,36},{1,1,116},{1,1,60},{1,1,39},{1,1,31},{1,1,92},{1,1,28},{1,1,7},{1,1,94},{1,1,100},{1,1,142},{1,1,115},{1,1,59},{1,1,38},{1,1,30},{0,3,51},{1,1,27},{1,1,6},{1,1,93},{1,1,51}, +{1,1,93},{1,1,35},{1,1,35},{1,1,35},{1,1,27},{1,1,11},{1,1,3},{1,1,3},{1,1,90},{1,1,75},{1,1,138},{1,1,34},{1,1,34},{1,1,34},{1,1,26},{1,1,10},{1,1,2},{1,1,2},{1,1,89},{1,1,26},{1,1,89},{2,0,26},{1,1,34},{1,1,13},{1,1,5},{2,0,26},{3,0,26},{1,1,5},{0,1,68},{3,0,26},{0,1,68},{1,0,26}, +{1,0,26},{1,0,26},{1,0,26},{1,1,2},{1,1,2},{1,1,2},{1,1,26},{1,1,74},{1,1,74},{1,2,238},{1,2,286},{1,1,375},{1,1,303},{1,2,105},{1,1,316},{1,1,183},{1,1,94},{0,2,156},{1,1,46},{1,2,189},{1,2,237},{1,1,326},{1,1,254},{1,2,56},{2,1,232},{1,1,134},{1,1,45},{0,2,56},{1,1,45},{1,2,222},{1,2,222},{1,2,222}, +{1,1,267},{1,2,89},{1,1,147},{1,1,147},{1,1,58},{1,1,59},{1,1,10},{1,2,173},{1,2,173},{1,2,173},{1,1,218},{2,0,10},{1,1,98},{1,1,98},{1,1,9},{3,0,10},{1,1,9},{1,2,20},{1,2,68},{2,1,136},{2,1,72},{1,2,20},{0,2,20},{2,1,72},{0,1,36},{0,2,20},{0,1,36},{1,0,218},{1,0,218},{1,0,218},{1,0,218},{1,2,85}, +{1,2,85},{1,2,85},{1,1,58},{1,1,10},{1,1,10},{2,1,550},{2,1,598},{2,1,730},{2,1,730},{1,3,361},{1,2,265},{2,1,597},{1,1,606},{1,2,152},{2,1,408},{2,1,189},{2,1,237},{2,1,369},{2,1,369},{2,1,56},{1,2,40},{2,1,236},{2,1,264},{4,0,56},{2,1,264},{2,1,534},{2,1,534},{2,1,534},{2,1,534},{1,2,265},{1,2,265},{1,2,265}, +{1,1,410},{2,1,152},{2,1,212},{2,1,173},{2,1,173},{2,1,173},{2,1,173},{0,4,8},{2,1,40},{2,1,40},{2,1,68},{2,1,8},{2,1,68},{2,1,20},{1,2,36},{2,1,200},{2,1,200},{2,1,20},{4,0,20},{2,1,200},{0,1,260},{4,0,20},{0,1,260},{2,0,530},{2,0,530},{2,0,530},{2,0,530},{1,2,229},{1,2,229},{1,2,229},{1,2,325},{1,2,116}, +{1,2,116},{2,2,152},{2,2,168},{2,2,273},{2,2,337},{2,2,236},{2,2,316},{2,2,421},{2,2,706},{1,2,116},{1,2,436},{2,2,52},{2,2,68},{2,2,173},{2,2,237},{3,0,56},{1,2,211},{1,2,251},{1,2,411},{3,1,56},{1,2,411},{2,2,152},{2,2,152},{2,2,152},{2,2,216},{2,1,158},{2,1,230},{2,1,230},{2,1,438},{1,2,35},{1,2,315},{2,2,52}, +{2,2,52},{2,2,52},{2,2,116},{1,3,10},{2,1,130},{2,1,130},{1,2,290},{1,2,10},{1,2,290},{3,0,20},{2,2,52},{2,2,157},{1,2,130},{3,0,20},{3,1,20},{1,2,130},{0,2,410},{3,1,20},{0,2,410},{2,0,116},{2,0,116},{2,0,116},{2,0,116},{2,1,109},{2,1,109},{2,1,109},{2,1,149},{1,2,26},{1,2,26},{2,2,88},{2,2,40},{2,2,33}, +{2,2,33},{2,2,76},{2,2,28},{2,2,21},{2,2,130},{2,2,120},{2,2,190},{2,2,84},{2,2,36},{2,2,29},{2,2,29},{1,4,56},{2,2,24},{2,2,17},{2,2,126},{2,2,56},{2,2,126},{2,2,24},{2,2,24},{2,2,24},{2,2,24},{2,2,12},{2,2,12},{2,2,12},{2,2,121},{2,2,104},{2,2,181},{2,2,20},{2,2,20},{2,2,20},{2,2,20},{2,2,8}, +{2,2,8},{2,2,8},{2,2,117},{5,0,8},{2,2,117},{1,4,20},{2,2,20},{2,2,13},{2,2,13},{1,4,20},{2,2,20},{2,2,13},{0,2,90},{2,2,20},{0,2,90},{2,0,20},{2,0,20},{2,0,20},{2,0,20},{2,2,8},{2,2,8},{2,2,8},{2,2,40},{2,2,100},{2,2,100},{2,3,198},{2,3,270},{2,2,305},{2,2,241},{2,3,103},{2,2,252},{2,2,133}, +{2,2,66},{1,3,148},{2,2,30},{2,3,162},{2,3,234},{2,2,269},{2,2,205},{2,3,67},{2,2,216},{2,2,97},{2,2,30},{6,0,67},{2,2,30},{2,3,189},{2,3,189},{2,3,189},{2,2,216},{2,3,94},{2,2,108},{2,2,108},{2,2,41},{2,2,40},{2,2,5},{2,3,153},{2,3,153},{2,3,153},{2,2,180},{3,1,8},{2,2,72},{2,2,72},{2,2,5},{4,1,8}, +{2,2,5},{2,3,18},{2,3,90},{2,2,125},{2,2,61},{2,3,18},{6,0,18},{2,2,61},{0,2,26},{6,0,18},{0,2,26},{2,0,180},{2,0,180},{2,0,180},{2,0,180},{2,2,72},{2,2,72},{2,2,72},{2,2,40},{2,2,4},{2,2,4},{3,2,614},{2,3,622},{3,2,774},{3,2,766},{2,3,343},{2,3,247},{3,2,619},{2,2,514},{2,3,196},{2,2,382},{3,2,214}, +{3,2,254},{3,2,374},{3,2,366},{4,0,59},{2,3,51},{3,2,219},{3,2,227},{3,2,59},{3,2,227},{3,2,605},{3,2,605},{3,2,605},{3,2,597},{2,3,222},{2,3,246},{2,3,246},{2,2,345},{3,2,179},{2,2,213},{3,2,205},{3,2,205},{3,2,205},{3,2,197},{4,0,10},{3,2,50},{3,2,50},{3,2,58},{3,2,10},{3,2,58},{3,2,18},{2,3,26},{3,2,178}, +{3,2,170},{3,2,18},{5,1,18},{3,2,170},{0,2,218},{5,1,18},{0,2,218},{2,0,596},{2,0,596},{2,0,596},{2,0,596},{2,3,197},{2,3,197},{2,3,197},{2,2,296},{2,3,146},{2,3,146},{3,3,196},{3,3,220},{3,3,339},{3,3,411},{3,3,292},{3,3,388},{3,3,507},{3,3,814},{2,3,88},{2,3,488},{3,3,75},{3,3,99},{3,3,218},{3,3,290},{4,1,67}, +{2,3,168},{2,3,248},{2,3,472},{4,2,67},{2,3,472},{3,2,182},{3,2,182},{3,2,182},{3,2,246},{3,2,161},{3,2,225},{3,2,225},{3,2,413},{2,3,24},{3,2,308},{3,2,61},{3,2,61},{3,2,61},{3,2,125},{2,4,8},{3,2,104},{3,2,104},{3,2,292},{7,0,8},{3,2,292},{4,1,18},{3,3,74},{3,3,193},{2,3,148},{4,1,18},{4,2,18},{2,3,148}, +{0,3,468},{4,2,18},{0,3,468},{3,0,146},{3,0,146},{3,0,146},{3,0,146},{3,2,125},{3,2,125},{3,2,125},{3,2,157},{2,3,20},{2,3,20},{3,3,68},{3,3,28},{3,3,35},{3,3,43},{3,3,68},{3,3,36},{3,3,43},{3,3,174},{3,3,148},{3,3,246},{3,3,59},{3,3,19},{3,3,26},{3,3,34},{3,3,59},{3,3,27},{3,3,34},{2,3,152},{6,1,59}, +{2,3,152},{3,3,19},{3,3,19},{3,3,19},{3,3,27},{3,3,19},{3,3,27},{3,3,27},{3,3,158},{3,3,139},{3,3,230},{3,3,10},{3,3,10},{3,3,10},{3,3,18},{3,3,10},{3,3,18},{3,3,18},{2,3,136},{6,1,10},{2,3,136},{5,0,18},{3,3,10},{3,3,17},{3,3,25},{5,0,18},{3,3,18},{3,3,25},{0,3,116},{3,3,18},{0,3,116},{3,0,18}, +{3,0,18},{3,0,18},{3,0,18},{3,3,18},{3,3,18},{3,3,18},{3,3,58},{3,3,130},{3,3,130},{3,4,166},{3,4,262},{3,3,243},{3,3,187},{3,4,109},{3,3,196},{3,3,91},{3,3,46},{3,3,148},{3,3,22},{3,4,141},{3,4,237},{3,3,218},{3,3,162},{4,2,59},{3,3,171},{3,3,66},{3,3,21},{5,2,59},{3,3,21},{3,4,162},{3,4,162},{3,4,162}, +{3,3,171},{3,4,105},{3,3,75},{3,3,75},{3,3,30},{3,3,27},{3,3,6},{3,4,137},{3,4,137},{3,4,137},{3,3,146},{4,2,10},{3,3,50},{3,3,50},{3,3,5},{5,2,10},{3,3,5},{3,4,20},{3,4,116},{3,3,97},{3,3,41},{3,4,20},{7,1,20},{3,3,41},{0,3,20},{7,1,20},{0,3,20},{3,0,146},{3,0,146},{3,0,146},{3,0,146},{3,3,50}, +{3,3,50},{3,3,50},{3,3,26},{3,3,2},{3,3,2},{3,5,598},{3,4,550},{4,3,826},{4,3,810},{3,4,285},{3,4,237},{4,3,649},{3,3,430},{4,3,248},{3,3,310},{4,3,245},{4,3,277},{4,3,385},{4,3,369},{5,1,52},{3,4,68},{4,3,208},{4,3,196},{4,3,52},{4,3,196},{3,4,546},{3,4,546},{3,4,546},{3,4,594},{3,4,185},{3,4,233},{3,4,233}, +{3,3,286},{4,3,212},{3,3,166},{4,3,241},{4,3,241},{4,3,241},{4,3,225},{5,1,16},{4,3,64},{4,3,64},{4,3,52},{7,1,16},{4,3,52},{4,3,20},{3,4,20},{4,3,160},{4,3,144},{4,3,20},{6,2,20},{4,3,144},{0,3,180},{6,2,20},{0,3,180},{3,0,530},{3,0,530},{3,0,530},{3,0,530},{3,4,169},{3,4,169},{3,4,169},{3,3,250},{3,3,130}, +{3,3,130},{4,4,248},{4,4,280},{4,4,413},{4,4,493},{4,3,291},{4,3,451},{4,4,601},{4,3,835},{3,4,68},{3,4,548},{4,4,104},{4,4,136},{4,4,269},{4,4,349},{6,0,59},{3,4,131},{3,4,251},{3,4,539},{3,4,59},{3,4,539},{4,3,205},{4,3,205},{4,3,205},{4,3,261},{4,3,170},{4,3,226},{4,3,226},{4,3,394},{3,4,19},{4,3,275},{4,3,61}, +{4,3,61},{4,3,61},{4,3,117},{6,0,10},{4,3,82},{4,3,82},{4,3,250},{3,4,10},{4,3,250},{5,2,20},{4,4,100},{4,4,233},{3,4,170},{5,2,20},{5,3,20},{3,4,170},{0,4,530},{5,3,20},{0,4,530},{4,0,180},{4,0,180},{4,0,180},{4,0,180},{4,3,145},{4,3,145},{4,3,145},{4,3,169},{3,4,18},{3,4,18},{4,4,56},{4,4,24},{4,4,45}, +{4,4,61},{4,4,68},{4,4,52},{4,4,73},{4,4,226},{4,4,184},{3,4,292},{4,4,40},{4,4,8},{4,4,29},{4,4,45},{4,4,52},{4,4,36},{4,4,57},{3,4,171},{7,2,52},{3,4,171},{4,4,20},{4,4,20},{4,4,20},{4,4,36},{4,4,32},{4,4,48},{4,4,48},{4,4,201},{4,4,180},{3,4,267},{4,4,4},{4,4,4},{4,4,4},{4,4,20},{5,2,16}, +{4,4,32},{4,4,32},{3,4,146},{7,2,16},{3,4,146},{6,1,20},{4,4,4},{4,4,25},{4,4,41},{6,1,20},{4,4,20},{4,4,41},{0,4,146},{4,4,20},{0,4,146},{4,0,20},{4,0,20},{4,0,20},{4,0,20},{4,4,32},{4,4,32},{4,4,32},{4,4,80},{3,4,146},{3,4,146},{4,5,142},{4,5,262},{4,4,189},{4,4,141},{4,5,123},{4,4,148},{4,4,57}, +{4,4,34},{4,4,120},{4,4,22},{4,5,126},{4,5,246},{4,4,173},{4,4,125},{5,3,52},{4,4,132},{4,4,41},{4,4,18},{6,3,52},{4,4,18},{4,5,141},{4,5,141},{4,5,141},{4,4,132},{4,4,96},{4,4,48},{4,4,48},{4,4,25},{4,4,20},{4,4,13},{4,5,125},{4,5,125},{4,5,125},{4,4,116},{6,1,16},{4,4,32},{4,4,32},{4,4,9},{6,3,16}, +{4,4,9},{7,0,26},{4,5,146},{4,4,73},{4,4,25},{7,0,26},{3,5,26},{4,4,25},{0,4,18},{3,5,26},{0,4,18},{4,0,116},{4,0,116},{4,0,116},{4,0,116},{4,4,32},{4,4,32},{4,4,32},{4,4,16},{4,4,4},{4,4,4},{4,5,558},{4,5,486},{4,4,845},{4,4,733},{4,5,235},{4,5,235},{4,4,553},{4,4,354},{5,4,276},{4,4,246},{5,4,282}, +{5,4,306},{5,4,402},{5,4,378},{6,2,51},{4,5,91},{5,4,203},{5,4,171},{5,4,51},{5,4,171},{4,5,477},{4,5,477},{4,5,477},{4,5,549},{4,5,154},{4,5,226},{4,5,226},{4,4,233},{3,5,235},{4,4,125},{5,4,281},{5,4,281},{5,4,281},{5,4,257},{7,0,10},{5,4,82},{5,4,82},{5,4,50},{3,5,10},{5,4,50},{6,2,26},{4,5,18},{5,4,146}, +{5,4,122},{6,2,26},{7,3,26},{5,4,122},{0,4,146},{7,3,26},{0,4,146},{4,0,468},{4,0,468},{4,0,468},{4,0,468},{4,5,145},{4,5,145},{4,5,145},{4,4,208},{4,4,100},{4,4,100},{5,5,308},{5,5,348},{5,5,495},{5,5,583},{5,4,285},{5,4,429},{5,4,633},{5,4,781},{4,5,56},{4,5,616},{5,5,139},{5,5,179},{5,5,326},{5,5,414},{7,1,52}, +{4,5,100},{4,5,260},{5,4,612},{4,5,52},{5,4,612},{5,4,234},{5,4,234},{5,4,234},{5,4,282},{5,4,185},{5,4,233},{5,4,233},{5,4,381},{4,5,20},{5,4,248},{5,4,65},{5,4,65},{5,4,65},{5,4,113},{7,1,16},{5,4,64},{5,4,64},{5,4,212},{7,3,16},{5,4,212},{6,3,26},{5,5,130},{5,5,277},{4,5,196},{6,3,26},{6,4,26},{4,5,196}, +{0,4,596},{6,4,26},{0,4,596},{5,0,218},{5,0,218},{5,0,218},{5,0,218},{5,4,169},{5,4,169},{5,4,169},{5,4,185},{4,5,20},{4,5,20},{5,5,52},{5,5,28},{5,5,63},{5,5,87},{5,5,76},{5,5,76},{5,5,111},{5,5,286},{5,5,228},{4,5,296},{5,5,27},{5,5,3},{5,5,38},{5,5,62},{5,5,51},{5,5,51},{5,5,86},{4,5,196},{3,6,51}, +{4,5,196},{5,5,27},{5,5,27},{5,5,27},{5,5,51},{5,5,51},{5,5,75},{5,5,75},{5,5,250},{4,5,180},{4,5,260},{5,5,2},{5,5,2},{5,5,2},{5,5,26},{6,3,10},{5,5,50},{5,5,50},{4,5,160},{6,4,10},{4,5,160},{7,2,26},{5,5,2},{5,5,37},{5,5,61},{7,2,26},{5,5,26},{5,5,61},{0,5,180},{5,5,26},{0,5,180},{5,0,26}, +{5,0,26},{5,0,26},{5,0,26},{5,5,50},{5,5,50},{5,5,50},{5,5,106},{4,5,116},{4,5,116},{5,6,126},{5,5,220},{5,5,143},{5,5,103},{5,6,145},{5,5,108},{5,5,31},{5,5,30},{5,5,100},{5,5,30},{5,6,117},{5,5,211},{5,5,134},{5,5,94},{6,4,51},{5,5,99},{5,5,22},{5,5,21},{7,4,51},{5,5,21},{5,6,126},{5,6,126},{5,6,126}, +{5,5,99},{5,5,67},{5,5,27},{5,5,27},{5,5,26},{5,5,19},{5,5,26},{5,6,117},{5,6,117},{5,6,117},{5,5,90},{7,2,10},{5,5,18},{5,5,18},{5,5,17},{5,5,10},{5,5,17},{6,4,26},{5,5,130},{5,5,53},{5,5,13},{6,4,26},{7,4,26},{5,5,13},{0,5,20},{7,4,26},{0,5,20},{5,0,90},{5,0,90},{5,0,90},{5,0,90},{5,5,18}, +{5,5,18},{5,5,18},{5,5,10},{5,5,10},{5,5,10},{5,6,478},{5,6,430},{5,5,735},{5,5,631},{5,6,193},{5,6,241},{5,5,463},{5,5,286},{4,6,268},{5,5,190},{6,5,325},{5,6,309},{6,5,425},{6,5,393},{7,3,56},{6,5,120},{6,5,204},{6,5,152},{6,5,56},{6,5,152},{5,6,414},{5,6,414},{5,6,414},{5,6,510},{5,6,129},{5,6,225},{5,6,225}, +{5,5,186},{5,5,195},{5,5,90},{5,6,293},{5,6,293},{5,6,293},{6,5,293},{5,6,8},{6,5,104},{6,5,104},{6,5,52},{4,6,8},{6,5,52},{7,3,20},{5,6,20},{6,5,136},{6,5,104},{7,3,20},{6,5,20},{6,5,104},{0,5,116},{6,5,20},{0,5,116},{5,0,410},{5,0,410},{5,0,410},{5,0,410},{5,6,125},{5,6,125},{5,6,125},{5,5,170},{5,5,74}, +{5,5,74},{6,5,350},{6,6,424},{6,6,585},{6,5,670},{6,5,287},{6,5,415},{6,5,607},{6,5,735},{5,6,52},{6,5,588},{6,5,154},{6,6,228},{6,6,389},{6,5,474},{5,7,51},{5,6,75},{5,6,275},{6,5,539},{5,6,51},{6,5,539},{6,5,269},{6,5,269},{6,5,269},{6,5,309},{6,5,206},{6,5,246},{6,5,246},{6,5,374},{5,6,27},{6,5,227},{6,5,73}, +{6,5,73},{6,5,73},{6,5,113},{6,5,10},{6,5,50},{6,5,50},{6,5,178},{3,7,10},{6,5,178},{5,7,26},{5,6,146},{6,6,325},{5,6,226},{5,7,26},{5,6,26},{5,6,226},{0,5,530},{5,6,26},{0,5,530},{6,0,260},{6,0,260},{6,0,260},{6,0,260},{6,5,197},{6,5,197},{6,5,197},{6,5,205},{5,6,26},{5,6,26},{6,6,56},{6,6,40},{6,6,89}, +{6,6,121},{6,6,92},{6,6,108},{6,6,157},{6,6,354},{6,6,280},{5,6,308},{6,6,20},{6,6,4},{6,6,53},{6,6,85},{6,6,56},{6,6,72},{6,6,121},{5,6,227},{4,7,56},{5,6,227},{6,6,40},{6,6,40},{6,6,40},{6,6,72},{6,6,76},{6,6,108},{6,6,108},{6,6,305},{5,6,139},{5,6,259},{6,6,4},{6,6,4},{6,6,4},{6,6,36},{7,4,8}, +{6,6,72},{6,6,72},{5,6,178},{7,5,8},{5,6,178},{6,6,20},{6,6,4},{6,6,53},{6,6,85},{6,6,20},{4,7,20},{6,6,85},{0,6,218},{4,7,20},{0,6,218},{6,0,36},{6,0,36},{6,0,36},{6,0,36},{6,6,72},{6,6,72},{6,6,72},{6,6,136},{5,6,90},{5,6,90},{6,7,118},{6,6,168},{6,6,105},{6,6,73},{6,7,175},{6,6,76},{6,6,13}, +{6,6,34},{6,6,88},{6,6,46},{6,7,114},{6,6,164},{6,6,101},{6,6,69},{7,5,56},{6,6,72},{6,6,9},{6,6,30},{6,6,72},{6,6,30},{6,6,104},{6,6,104},{6,6,104},{6,6,72},{6,6,44},{6,6,12},{6,6,12},{6,6,33},{6,6,24},{6,6,45},{6,6,100},{6,6,100},{6,6,100},{6,6,68},{7,5,40},{6,6,8},{6,6,8},{6,6,29},{6,6,8}, +{6,6,29},{7,5,20},{6,6,100},{6,6,37},{6,6,5},{7,5,20},{5,7,50},{6,6,5},{0,6,26},{5,7,50},{0,6,26},{6,0,68},{6,0,68},{6,0,68},{6,0,68},{6,6,8},{6,6,8},{6,6,8},{6,6,8},{6,6,20},{6,6,20},{6,7,406},{6,7,382},{6,6,633},{6,6,537},{6,7,159},{6,7,255},{6,6,381},{6,6,226},{5,7,228},{6,6,142},{6,7,306}, +{6,7,282},{7,6,454},{7,6,414},{6,7,59},{7,6,139},{7,6,211},{6,6,126},{5,7,59},{6,6,126},{6,7,357},{6,7,357},{6,7,357},{6,6,456},{6,7,110},{6,7,230},{6,7,230},{6,6,145},{6,6,152},{6,6,61},{6,7,257},{6,7,257},{6,7,257},{7,6,333},{6,7,10},{7,6,130},{7,6,130},{6,6,45},{5,7,10},{6,6,45},{7,6,50},{6,7,26},{7,6,130}, +{7,6,90},{7,6,50},{7,6,18},{7,6,90},{0,6,90},{7,6,18},{0,6,90},{6,0,356},{6,0,356},{6,0,356},{6,0,356},{6,7,109},{6,7,109},{6,7,109},{6,6,136},{6,6,52},{6,6,52},{7,6,374},{7,6,454},{7,6,634},{7,6,666},{7,6,297},{7,6,409},{7,6,589},{7,6,697},{6,7,56},{7,6,536},{7,6,149},{7,6,229},{7,6,409},{7,6,441},{7,6,72}, +{6,7,56},{6,7,296},{7,6,472},{6,7,56},{7,6,472},{7,6,310},{7,6,310},{7,6,310},{7,6,342},{7,6,233},{7,6,265},{7,6,265},{7,6,373},{6,7,40},{7,6,212},{7,6,85},{7,6,85},{7,6,85},{7,6,117},{7,6,8},{7,6,40},{7,6,40},{7,6,148},{7,6,40},{7,6,148},{7,6,68},{6,7,116},{7,6,328},{6,7,260},{7,6,68},{6,7,20},{6,7,260}, +{0,6,468},{6,7,20},{0,6,468},{7,0,306},{7,0,306},{7,0,306},{7,0,306},{7,6,229},{7,6,229},{7,6,229},{7,6,229},{6,7,36},{6,7,36},{7,7,68},{7,7,60},{7,7,123},{7,7,163},{7,7,116},{7,7,148},{7,7,211},{7,7,430},{6,7,248},{6,7,328},{7,7,19},{7,7,11},{7,7,74},{7,7,114},{7,7,67},{7,7,99},{7,7,162},{6,7,264},{7,7,171}, +{6,7,264},{7,7,59},{7,7,59},{7,7,59},{7,7,99},{7,7,107},{7,7,147},{7,7,147},{7,7,366},{6,7,104},{6,7,264},{7,7,10},{7,7,10},{7,7,10},{7,7,50},{7,7,58},{7,7,98},{7,7,98},{6,7,200},{6,7,40},{6,7,200},{7,7,18},{7,7,10},{7,7,73},{6,7,100},{7,7,18},{7,7,50},{6,7,100},{0,7,260},{7,7,50},{0,7,260},{7,0,50}, +{7,0,50},{7,0,50},{7,0,50},{7,6,85},{7,6,85},{7,6,85},{7,6,149},{6,7,68},{6,7,68},{7,7,196},{7,7,124},{7,7,75},{7,7,51},{7,7,148},{7,7,52},{7,7,3},{7,7,46},{7,7,84},{7,7,70},{7,7,195},{7,7,123},{7,7,74},{7,7,50},{7,7,147},{7,7,51},{7,7,2},{7,7,45},{7,7,59},{7,7,45},{7,7,75},{7,7,75},{7,7,75}, +{7,7,51},{7,7,27},{7,7,3},{7,7,3},{7,7,46},{7,7,35},{7,7,70},{7,7,74},{7,7,74},{7,7,74},{7,7,50},{7,7,26},{7,7,2},{7,7,2},{7,7,45},{7,7,10},{7,7,45},{7,7,146},{7,7,74},{7,7,25},{7,7,1},{7,7,146},{7,7,50},{7,7,1},{0,7,36},{7,7,50},{0,7,36},{7,0,50},{7,0,50},{7,0,50},{7,0,50},{7,7,2}, +{7,7,2},{7,7,2},{7,7,10},{7,7,34},{7,7,34},{7,7,548},{7,7,476},{7,7,427},{7,7,355},{7,7,404},{7,7,260},{7,7,211},{7,7,106},{7,7,132},{7,7,34},{7,7,467},{7,7,395},{7,7,346},{7,7,274},{7,7,323},{7,7,179},{7,7,130},{7,7,25},{7,7,123},{7,7,25},{7,7,427},{7,7,427},{7,7,427},{7,7,355},{7,7,283},{7,7,211},{7,7,211}, +{7,7,106},{7,7,83},{7,7,34},{7,7,346},{7,7,346},{7,7,346},{7,7,274},{7,7,202},{7,7,130},{7,7,130},{7,7,25},{7,7,74},{7,7,25},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{7,0,306},{7,0,306},{7,0,306},{7,0,306},{7,7,162},{7,7,162},{7,7,162},{7,7,106},{7,7,34}, +{7,7,34},{0,0,122},{0,0,50},{0,0,1},{0,0,25},{0,0,158},{0,0,110},{0,0,61},{0,0,244},{0,0,210},{0,0,280},{0,0,122},{0,0,50},{0,0,1},{0,0,25},{0,0,158},{0,0,110},{0,0,61},{0,0,244},{0,0,174},{0,0,244},{0,0,1},{0,0,1},{0,0,1},{0,0,25},{0,0,37},{0,0,61},{0,0,61},{0,0,244},{0,0,161},{0,0,280},{0,0,1}, +{0,0,1},{0,0,1},{0,0,25},{0,0,37},{0,0,61},{0,0,61},{0,0,244},{0,0,125},{0,0,244},{0,0,122},{0,0,50},{0,0,1},{0,0,25},{0,0,122},{0,0,74},{0,0,25},{0,0,144},{0,0,74},{0,0,144},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,1,81},{0,1,81},{0,0,122}, +{0,0,82},{0,1,328},{0,0,243},{0,0,82},{0,0,129},{0,0,319},{0,0,165},{0,1,81},{0,1,81},{0,0,122},{0,0,82},{0,1,328},{0,0,243},{0,0,82},{0,0,129},{0,0,283},{0,0,129},{0,1,45},{0,1,45},{0,1,45},{0,0,18},{0,0,58},{0,0,18},{0,0,18},{0,0,65},{0,0,94},{0,0,101},{0,1,45},{0,1,45},{0,1,45},{0,0,18},{0,0,58}, +{0,0,18},{0,0,18},{0,0,65},{0,0,58},{0,0,65},{0,1,72},{0,1,72},{0,0,113},{0,0,73},{0,1,72},{1,0,104},{0,0,73},{0,0,80},{1,0,104},{0,0,80},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,0,49},{0,0,85},{0,0,85},{0,2,225},{0,1,129},{0,1,449},{0,0,498},{0,1,264},{0,1,168},{0,0,402}, +{0,0,273},{0,0,687},{0,0,309},{0,2,225},{0,1,129},{0,1,449},{0,0,498},{0,1,264},{0,1,168},{0,0,402},{0,0,273},{1,0,248},{0,0,273},{0,1,125},{0,1,125},{0,1,125},{0,1,221},{0,1,68},{0,0,146},{0,0,146},{0,0,17},{0,0,158},{0,0,53},{0,1,125},{0,1,125},{0,1,125},{0,1,221},{0,1,68},{0,0,146},{0,0,146},{0,0,17},{0,0,122}, +{0,0,17},{1,0,72},{0,1,8},{1,0,292},{1,0,260},{1,0,72},{1,0,104},{1,0,260},{0,0,272},{1,0,104},{0,0,272},{0,0,121},{0,0,121},{0,0,121},{0,0,121},{0,0,25},{0,0,25},{0,0,25},{0,0,1},{0,0,37},{0,0,37},{0,2,514},{0,2,558},{0,1,610},{0,1,514},{0,2,297},{0,1,153},{0,1,153},{0,1,777},{0,1,392},{0,0,802},{1,1,347}, +{1,1,355},{1,1,446},{0,1,465},{0,2,248},{0,1,104},{0,1,104},{0,1,728},{0,1,248},{0,1,728},{0,2,414},{0,2,414},{0,2,414},{0,1,414},{0,1,149},{0,1,53},{0,1,53},{0,0,274},{0,1,292},{0,0,226},{1,0,229},{1,0,229},{1,0,229},{1,0,261},{1,0,68},{0,1,4},{0,1,4},{1,0,160},{1,0,52},{1,0,160},{0,2,104},{1,1,130},{0,1,200}, +{0,1,104},{0,2,104},{2,0,74},{0,1,104},{0,1,584},{2,0,74},{0,1,584},{0,0,410},{0,0,410},{0,0,410},{0,0,410},{0,1,49},{0,1,49},{0,1,49},{0,0,130},{0,0,82},{0,0,82},{1,1,464},{1,1,400},{1,1,365},{1,1,397},{1,1,572},{0,2,499},{0,1,387},{0,1,435},{0,1,428},{0,1,188},{1,1,140},{1,1,76},{1,1,41},{1,1,73},{1,1,248}, +{1,1,216},{0,1,131},{0,1,179},{2,0,264},{0,1,179},{1,1,364},{1,1,364},{1,1,364},{1,1,396},{0,2,314},{0,1,386},{0,1,386},{0,1,434},{0,1,67},{0,1,187},{1,1,40},{1,1,40},{1,1,40},{1,1,72},{0,2,58},{0,1,130},{0,1,130},{0,1,178},{0,1,58},{0,1,178},{0,3,72},{1,1,40},{1,1,5},{1,1,37},{0,3,72},{1,1,72},{1,1,37}, +{0,1,170},{1,1,72},{0,1,170},{1,0,360},{1,0,360},{1,0,360},{1,0,360},{0,2,265},{0,2,265},{0,2,265},{0,1,265},{0,1,18},{0,1,18},{1,2,178},{1,2,202},{1,1,189},{1,1,157},{1,2,463},{1,1,316},{1,1,169},{1,1,238},{0,2,412},{0,1,124},{1,2,78},{1,2,102},{1,1,89},{1,1,57},{2,0,248},{0,2,99},{1,1,69},{0,1,99},{3,0,248}, +{0,1,99},{1,1,140},{1,1,140},{1,1,140},{1,1,108},{1,1,152},{1,1,120},{1,1,120},{1,1,189},{0,1,275},{0,1,75},{1,1,40},{1,1,40},{1,1,40},{1,1,8},{1,1,52},{1,1,20},{1,1,20},{0,1,50},{1,1,68},{0,1,50},{1,2,74},{1,2,98},{1,1,85},{1,1,53},{1,2,74},{0,2,74},{1,1,53},{0,1,74},{0,2,74},{0,1,74},{1,0,104}, +{1,0,104},{1,0,104},{1,0,104},{1,1,116},{1,1,116},{1,1,116},{1,1,164},{0,1,50},{0,1,50},{1,3,226},{1,2,106},{1,2,466},{1,1,429},{1,2,255},{1,2,207},{1,1,345},{1,1,238},{0,2,252},{1,1,298},{1,3,222},{1,2,102},{1,2,462},{1,1,425},{1,2,251},{1,2,203},{1,1,341},{1,1,234},{0,2,251},{1,1,234},{1,2,105},{1,2,105},{1,2,105}, +{1,1,204},{1,2,86},{1,1,120},{1,1,120},{1,1,13},{0,2,83},{1,1,73},{1,2,101},{1,2,101},{1,2,101},{1,1,200},{2,0,52},{1,1,116},{1,1,116},{1,1,9},{3,0,52},{1,1,9},{2,1,74},{1,2,2},{2,1,274},{2,1,234},{2,1,74},{4,0,74},{2,1,234},{0,1,234},{4,0,74},{0,1,234},{1,0,104},{1,0,104},{1,0,104},{1,0,104},{1,1,20}, +{1,1,20},{1,1,20},{1,1,4},{1,1,64},{1,1,64},{1,3,450},{1,2,522},{1,2,562},{1,2,490},{1,3,295},{1,2,127},{1,2,167},{1,1,750},{1,2,428},{1,1,714},{2,2,400},{2,2,416},{2,2,521},{1,2,454},{2,1,251},{1,2,91},{1,2,131},{0,2,651},{4,0,251},{0,2,651},{1,3,369},{1,3,369},{1,3,369},{1,2,369},{1,2,118},{1,2,46},{1,2,46}, +{1,1,221},{0,2,163},{1,1,185},{2,1,257},{2,1,257},{2,1,257},{2,1,281},{0,4,50},{1,2,10},{1,2,10},{2,1,146},{2,1,50},{2,1,146},{3,0,80},{2,2,160},{1,2,202},{1,2,130},{3,0,80},{3,1,80},{1,2,130},{0,1,650},{3,1,80},{0,1,650},{1,0,360},{1,0,360},{1,0,360},{1,0,360},{1,2,37},{1,2,37},{1,2,37},{1,1,100},{1,1,64}, +{1,1,64},{2,2,500},{2,2,444},{2,2,423},{2,2,463},{1,4,588},{1,3,513},{1,2,329},{1,2,441},{0,3,284},{1,2,232},{2,2,139},{2,2,83},{2,2,62},{2,2,102},{3,0,251},{2,2,243},{1,2,104},{1,2,216},{3,1,251},{1,2,216},{2,2,419},{2,2,419},{2,2,419},{2,2,459},{1,3,277},{1,2,325},{1,2,325},{1,2,437},{1,2,68},{1,2,228},{2,2,58}, +{2,2,58},{2,2,58},{2,2,98},{1,3,52},{1,2,100},{1,2,100},{1,2,212},{1,2,52},{1,2,212},{1,4,74},{2,2,34},{2,2,13},{1,2,40},{1,4,74},{2,2,74},{1,2,40},{0,2,200},{2,2,74},{0,2,200},{2,0,410},{2,0,410},{2,0,410},{2,0,410},{1,3,241},{1,3,241},{1,3,241},{1,2,241},{1,2,32},{1,2,32},{2,3,202},{2,3,250},{2,2,183}, +{2,2,159},{2,2,460},{2,2,316},{2,2,183},{2,2,274},{1,3,468},{1,2,104},{2,3,81},{2,3,129},{2,2,62},{2,2,38},{1,4,251},{1,3,96},{2,2,62},{1,2,88},{2,2,251},{1,2,88},{2,2,147},{2,2,147},{2,2,147},{2,2,123},{2,2,171},{2,2,147},{2,2,147},{2,2,238},{0,3,219},{1,2,68},{2,2,26},{2,2,26},{2,2,26},{2,2,2},{2,2,50}, +{2,2,26},{2,2,26},{1,2,52},{5,0,50},{1,2,52},{2,3,80},{2,3,128},{2,2,61},{2,2,37},{2,3,80},{6,0,80},{2,2,37},{0,2,72},{6,0,80},{0,2,72},{2,0,122},{2,0,122},{2,0,122},{2,0,122},{2,2,146},{2,2,146},{2,2,146},{2,2,202},{1,2,32},{1,2,32},{2,3,234},{2,3,90},{2,2,455},{2,2,367},{2,3,253},{2,3,253},{2,2,295}, +{2,2,210},{1,3,244},{2,2,282},{2,3,225},{2,3,81},{2,2,446},{2,2,358},{2,3,244},{2,3,244},{2,2,286},{2,2,201},{6,0,244},{2,2,201},{2,3,90},{2,3,90},{2,3,90},{2,2,171},{2,3,109},{2,2,99},{2,2,99},{2,2,14},{1,3,100},{2,2,86},{2,3,81},{2,3,81},{2,3,81},{2,2,162},{3,1,50},{2,2,90},{2,2,90},{2,2,5},{4,1,50}, +{2,2,5},{4,0,80},{2,3,0},{3,2,260},{3,2,212},{4,0,80},{5,1,80},{3,2,212},{0,2,200},{5,1,80},{0,2,200},{2,0,90},{2,0,90},{2,0,90},{2,0,90},{2,2,18},{2,2,18},{2,2,18},{2,2,10},{2,2,82},{2,2,82},{2,4,394},{2,3,442},{2,3,522},{2,3,474},{2,4,301},{2,3,109},{2,3,189},{2,2,658},{2,3,472},{2,2,634},{2,4,369}, +{2,3,417},{2,3,497},{2,3,449},{3,2,244},{2,3,84},{2,3,164},{3,2,620},{5,1,244},{3,2,620},{2,4,330},{2,4,330},{2,4,330},{2,3,330},{2,3,93},{2,3,45},{2,3,45},{2,2,174},{1,3,132},{2,2,150},{3,2,289},{3,2,289},{3,2,289},{3,2,305},{4,0,52},{2,3,20},{2,3,20},{3,2,136},{3,2,52},{3,2,136},{2,4,80},{2,3,128},{2,3,208}, +{2,3,160},{2,4,80},{7,0,80},{2,3,160},{0,2,584},{7,0,80},{0,2,584},{2,0,314},{2,0,314},{2,0,314},{2,0,314},{2,3,29},{2,3,29},{2,3,29},{2,2,74},{2,2,50},{2,2,50},{3,3,544},{3,3,496},{3,3,489},{3,3,537},{2,4,535},{2,4,535},{2,3,279},{2,3,455},{1,4,292},{2,3,284},{3,3,144},{3,3,96},{3,3,89},{3,3,137},{4,1,244}, +{3,3,276},{2,3,83},{2,3,259},{4,2,244},{2,3,259},{3,3,480},{3,3,480},{3,3,480},{3,3,528},{2,4,246},{2,3,270},{2,3,270},{2,3,446},{2,3,75},{2,3,275},{3,3,80},{3,3,80},{3,3,80},{3,3,128},{2,4,50},{2,3,74},{2,3,74},{2,3,250},{7,0,50},{2,3,250},{5,0,80},{3,3,32},{3,3,25},{2,3,34},{5,0,80},{6,1,80},{2,3,34}, +{0,3,234},{6,1,80},{0,3,234},{3,0,464},{3,0,464},{3,0,464},{3,0,464},{2,4,221},{2,4,221},{2,4,221},{2,3,221},{2,3,50},{2,3,50},{3,4,234},{3,3,304},{3,3,185},{3,3,169},{3,3,452},{3,3,324},{3,3,205},{3,3,318},{1,4,452},{2,3,92},{3,4,90},{3,3,160},{3,3,41},{3,3,25},{5,0,244},{2,4,99},{3,3,61},{2,3,83},{3,3,244}, +{2,3,83},{3,3,160},{3,3,160},{3,3,160},{3,3,144},{3,3,196},{3,3,180},{3,3,180},{3,3,293},{2,3,187},{2,3,67},{3,3,16},{3,3,16},{3,3,16},{3,3,0},{3,3,52},{3,3,36},{3,3,36},{2,3,58},{6,1,52},{2,3,58},{4,2,80},{3,3,160},{3,3,41},{3,3,25},{4,2,80},{5,2,80},{3,3,25},{0,3,74},{5,2,80},{0,3,74},{3,0,144}, +{3,0,144},{3,0,144},{3,0,144},{3,3,180},{3,3,180},{3,3,180},{3,3,244},{2,3,18},{2,3,18},{3,4,202},{3,4,82},{3,3,393},{3,3,313},{3,4,259},{3,4,307},{3,3,253},{3,3,190},{2,4,244},{3,3,274},{3,4,186},{3,4,66},{3,3,377},{3,3,297},{3,4,243},{2,4,243},{3,3,237},{3,3,174},{7,1,243},{3,3,174},{3,4,81},{3,4,81},{3,4,81}, +{3,3,144},{3,4,138},{3,3,84},{3,3,84},{3,3,21},{2,4,123},{3,3,105},{3,4,65},{3,4,65},{3,4,65},{3,3,128},{4,2,52},{3,3,68},{3,3,68},{3,3,5},{5,2,52},{3,3,5},{5,1,74},{3,4,2},{4,3,250},{4,3,194},{5,1,74},{4,3,74},{4,3,194},{0,3,170},{4,3,74},{0,3,170},{3,0,80},{3,0,80},{3,0,80},{3,0,80},{3,3,20}, +{3,3,20},{3,3,20},{3,3,20},{3,3,104},{3,3,104},{3,5,346},{3,4,370},{3,4,490},{3,4,466},{3,5,315},{3,4,99},{3,4,219},{3,3,574},{2,4,468},{3,3,562},{3,5,330},{3,4,354},{3,4,474},{3,4,450},{4,3,243},{3,4,83},{3,4,203},{3,3,558},{6,2,243},{3,3,558},{3,5,297},{3,5,297},{3,5,297},{3,4,297},{3,4,74},{3,4,50},{3,4,50}, +{3,3,133},{2,4,107},{3,3,121},{3,5,281},{3,5,281},{3,5,281},{3,4,281},{5,1,58},{3,4,34},{3,4,34},{3,3,117},{7,1,58},{3,3,117},{6,0,74},{3,4,98},{3,4,218},{3,4,194},{6,0,74},{3,4,74},{3,4,194},{0,3,522},{3,4,74},{0,3,522},{3,0,272},{3,0,272},{3,0,272},{3,0,272},{3,4,25},{3,4,25},{3,4,25},{3,3,52},{3,3,40}, +{3,3,40},{4,4,596},{4,4,556},{4,4,563},{4,4,619},{3,5,477},{3,4,477},{3,4,237},{3,4,477},{2,5,308},{3,4,344},{4,4,155},{4,4,115},{4,4,122},{4,4,178},{5,2,243},{2,5,299},{3,4,68},{2,4,308},{5,3,243},{2,4,308},{4,4,547},{4,4,547},{4,4,547},{4,4,603},{3,5,221},{3,4,221},{3,4,221},{3,4,461},{3,4,88},{3,4,328},{4,4,106}, +{4,4,106},{4,4,106},{4,4,162},{6,0,52},{3,4,52},{3,4,52},{2,4,292},{3,4,52},{2,4,292},{4,4,74},{4,4,34},{4,4,41},{3,4,32},{4,4,74},{7,2,74},{3,4,32},{0,4,272},{7,2,74},{0,4,272},{4,0,522},{4,0,522},{4,0,522},{4,0,522},{3,4,205},{3,4,205},{3,4,205},{3,4,205},{3,4,72},{3,4,72},{4,5,274},{4,4,300},{4,4,195}, +{4,4,187},{4,4,452},{4,4,340},{4,4,235},{4,4,370},{2,5,404},{3,4,88},{4,5,105},{4,4,131},{4,4,26},{4,4,18},{6,1,243},{3,5,108},{4,4,66},{3,4,84},{4,4,243},{3,4,84},{4,4,179},{4,4,179},{4,4,179},{4,4,171},{4,4,227},{4,4,219},{4,4,219},{4,4,354},{3,4,152},{3,4,72},{4,4,10},{4,4,10},{4,4,10},{4,4,2},{5,2,58}, +{4,4,50},{4,4,50},{3,4,68},{7,2,58},{3,4,68},{5,3,74},{4,4,130},{4,4,25},{4,4,17},{5,3,74},{6,3,74},{4,4,17},{0,4,80},{6,3,74},{0,4,80},{4,0,170},{4,0,170},{4,0,170},{4,0,170},{4,3,205},{4,3,205},{4,3,205},{4,3,269},{3,4,8},{3,4,8},{4,5,178},{4,5,82},{4,4,339},{4,4,267},{4,5,273},{4,5,369},{4,4,219}, +{4,4,178},{3,5,252},{4,4,274},{4,5,153},{4,5,57},{4,4,314},{4,4,242},{7,0,248},{3,5,204},{4,4,194},{4,4,153},{3,5,248},{4,4,153},{4,5,78},{4,5,78},{4,5,78},{4,4,123},{4,4,147},{4,4,75},{4,4,75},{4,4,34},{3,5,152},{4,4,130},{4,5,53},{4,5,53},{4,5,53},{4,4,98},{6,1,58},{4,4,50},{4,4,50},{4,4,9},{6,3,58}, +{4,4,9},{6,2,72},{4,5,8},{5,4,244},{5,4,180},{6,2,72},{5,4,72},{5,4,180},{0,4,144},{5,4,72},{0,4,144},{4,0,74},{4,0,74},{4,0,74},{4,0,74},{4,4,26},{4,4,26},{4,4,26},{4,4,34},{4,4,130},{4,4,130},{4,6,306},{4,5,306},{4,5,466},{4,5,466},{4,6,337},{4,5,97},{4,5,257},{4,4,498},{3,5,412},{4,4,498},{4,6,297}, +{4,5,297},{4,5,457},{4,5,457},{5,4,248},{4,5,88},{4,5,248},{4,4,489},{7,3,248},{4,4,489},{4,5,270},{4,5,270},{4,5,270},{4,5,270},{4,5,61},{4,5,61},{4,5,61},{4,4,98},{3,5,88},{4,4,98},{4,5,261},{4,5,261},{4,5,261},{4,5,261},{7,0,52},{4,5,52},{4,5,52},{4,4,89},{3,5,52},{4,4,89},{7,1,72},{4,5,72},{4,5,232}, +{3,5,232},{7,1,72},{4,5,72},{3,5,232},{0,4,464},{4,5,72},{0,4,464},{4,0,234},{4,0,234},{4,0,234},{4,0,234},{4,5,25},{4,5,25},{4,5,25},{4,4,34},{4,4,34},{4,4,34},{5,5,656},{5,5,624},{5,5,645},{5,5,709},{4,6,427},{4,5,403},{4,5,203},{4,5,507},{4,5,332},{4,5,412},{5,5,172},{5,5,140},{5,5,161},{5,5,225},{6,3,248}, +{4,5,259},{4,5,59},{3,5,339},{6,4,248},{3,5,339},{5,5,620},{5,5,620},{5,5,620},{5,5,684},{4,6,202},{4,5,178},{4,5,178},{4,5,482},{4,5,107},{4,5,387},{5,5,136},{5,5,136},{5,5,136},{5,5,200},{7,1,58},{4,5,34},{4,5,34},{5,4,290},{7,3,58},{5,4,290},{5,5,72},{5,5,40},{5,5,61},{4,5,34},{5,5,72},{3,6,72},{4,5,34}, +{0,5,314},{3,6,72},{0,5,314},{5,0,584},{5,0,584},{5,0,584},{5,0,584},{4,5,169},{4,5,169},{4,5,169},{4,5,193},{4,5,98},{4,5,98},{5,6,322},{5,5,304},{5,5,213},{5,5,213},{5,5,460},{5,5,364},{5,5,273},{5,5,430},{3,6,364},{4,5,92},{5,6,126},{5,5,108},{5,5,17},{5,5,17},{7,2,248},{4,6,123},{5,5,77},{4,5,91},{5,5,248}, +{4,5,91},{5,5,204},{5,5,204},{5,5,204},{5,5,204},{5,5,264},{5,5,264},{5,5,264},{5,5,421},{4,5,123},{4,5,83},{5,5,8},{5,5,8},{5,5,8},{5,5,8},{6,3,52},{5,5,68},{5,5,68},{4,5,82},{6,4,52},{4,5,82},{6,4,72},{5,5,104},{5,5,13},{5,5,13},{6,4,72},{7,4,72},{5,5,13},{0,5,90},{7,4,72},{0,5,90},{5,0,200}, +{5,0,200},{5,0,200},{5,0,200},{5,4,221},{5,4,221},{5,4,221},{5,4,277},{4,5,2},{4,5,2},{5,6,162},{5,6,90},{5,5,293},{5,5,229},{5,6,295},{5,5,396},{5,5,193},{5,5,174},{4,6,268},{5,5,282},{5,6,126},{5,6,54},{5,5,257},{5,5,193},{5,6,259},{4,6,171},{5,5,157},{5,5,138},{4,6,259},{5,5,138},{5,6,81},{5,6,81},{5,6,81}, +{5,5,108},{5,5,136},{5,5,72},{5,5,72},{5,5,53},{4,6,187},{5,5,161},{5,6,45},{5,6,45},{5,6,45},{5,5,72},{7,2,52},{5,5,36},{5,5,36},{5,5,17},{5,5,52},{5,5,17},{7,3,74},{5,6,18},{5,5,221},{5,5,157},{7,3,74},{6,5,74},{5,5,157},{0,5,122},{6,5,74},{0,5,122},{5,0,72},{5,0,72},{5,0,72},{5,0,72},{5,5,36}, +{5,5,36},{5,5,36},{5,5,52},{5,5,160},{5,5,160},{5,7,274},{5,6,250},{5,6,450},{5,6,474},{5,6,343},{5,6,103},{5,6,303},{5,5,430},{4,6,364},{5,5,442},{5,7,270},{5,6,246},{5,6,446},{5,6,470},{7,3,251},{5,6,99},{5,6,299},{5,5,426},{6,5,251},{5,5,426},{5,6,225},{5,6,225},{5,6,225},{5,6,249},{5,6,54},{5,6,78},{5,6,78}, +{5,5,69},{4,6,75},{5,5,81},{5,6,221},{5,6,221},{5,6,221},{5,6,245},{5,6,50},{5,6,74},{5,6,74},{5,5,65},{4,6,50},{5,5,65},{5,7,74},{5,6,50},{5,6,250},{4,6,250},{5,7,74},{5,6,74},{4,6,250},{0,5,410},{5,6,74},{0,5,410},{5,0,200},{5,0,200},{5,0,200},{5,0,200},{5,6,29},{5,6,29},{5,6,29},{5,5,20},{5,5,32}, +{5,5,32},{6,6,724},{6,6,700},{6,6,735},{5,6,690},{5,7,385},{5,6,337},{5,6,177},{5,6,545},{5,6,328},{5,6,488},{6,6,195},{6,6,171},{6,6,206},{6,6,278},{7,4,259},{5,6,216},{5,6,56},{4,6,376},{7,5,259},{4,6,376},{5,7,654},{5,7,654},{5,7,654},{5,6,654},{5,7,189},{5,6,141},{5,6,141},{5,6,509},{5,6,132},{5,5,450},{6,5,157}, +{6,5,157},{6,5,157},{6,5,221},{6,5,52},{5,6,20},{5,6,20},{6,5,256},{3,7,52},{6,5,256},{6,6,74},{6,6,50},{6,6,85},{5,6,40},{6,6,74},{4,7,74},{5,6,40},{0,6,360},{4,7,74},{0,6,360},{5,0,650},{5,0,650},{5,0,650},{5,0,650},{5,6,137},{5,6,137},{5,6,137},{5,6,185},{5,6,128},{5,6,128},{6,7,378},{6,6,316},{6,6,239}, +{6,6,247},{6,6,476},{6,6,396},{6,6,319},{5,6,465},{4,7,332},{5,6,104},{6,7,153},{6,6,91},{6,6,14},{6,6,22},{6,6,251},{5,7,144},{6,6,94},{5,6,104},{4,7,251},{5,6,104},{6,6,235},{6,6,235},{6,6,235},{6,6,243},{6,6,307},{6,6,315},{6,6,315},{5,6,461},{5,6,100},{5,6,100},{6,6,10},{6,6,10},{6,6,10},{6,6,18},{7,4,50}, +{6,6,90},{6,6,90},{5,6,100},{7,5,50},{5,6,100},{7,5,74},{6,6,82},{6,6,5},{6,6,13},{7,5,74},{6,6,90},{6,6,13},{0,6,104},{6,6,90},{0,6,104},{6,0,234},{6,0,234},{6,0,234},{6,0,234},{6,5,241},{6,5,241},{6,5,241},{6,5,289},{5,6,0},{5,6,0},{6,7,154},{6,7,106},{6,6,255},{6,6,199},{6,7,325},{6,6,364},{6,6,175}, +{6,6,178},{5,7,292},{5,6,232},{6,7,105},{6,7,57},{6,6,206},{6,6,150},{7,5,251},{5,7,144},{6,6,126},{6,6,129},{5,7,276},{6,6,129},{6,7,90},{6,7,90},{6,7,90},{6,6,99},{6,6,131},{6,6,75},{6,6,75},{6,6,78},{6,6,219},{5,6,132},{6,7,41},{6,7,41},{6,7,41},{6,6,50},{7,5,82},{6,6,26},{6,6,26},{6,6,29},{6,6,50}, +{6,6,29},{6,7,80},{6,7,32},{6,6,181},{6,6,125},{6,7,80},{7,6,80},{6,6,125},{0,6,104},{7,6,80},{0,6,104},{6,0,74},{6,0,74},{6,0,74},{6,0,74},{6,6,50},{6,6,50},{6,6,50},{6,6,74},{5,6,128},{5,6,128},{6,7,442},{6,7,202},{6,7,442},{6,7,490},{6,7,309},{6,7,117},{6,7,357},{6,6,370},{5,7,324},{6,6,394},{6,7,441}, +{6,7,201},{6,7,441},{6,7,489},{7,6,276},{6,7,116},{6,7,356},{6,6,369},{7,6,244},{6,6,369},{6,7,186},{6,7,186},{6,7,186},{6,7,234},{6,7,53},{6,7,101},{6,7,101},{6,6,46},{5,7,68},{6,6,70},{6,7,185},{6,7,185},{6,7,185},{6,7,233},{6,7,52},{6,7,100},{6,7,100},{6,6,45},{5,7,52},{6,6,45},{7,6,80},{6,7,32},{6,7,272}, +{5,7,272},{7,6,80},{6,7,80},{5,7,272},{0,6,360},{6,7,80},{0,6,360},{6,0,170},{6,0,170},{6,0,170},{6,0,170},{6,7,37},{6,7,37},{6,7,37},{6,6,10},{6,6,34},{6,6,34},{7,7,800},{7,7,784},{6,7,802},{6,7,634},{6,7,903},{6,7,279},{6,7,159},{6,7,591},{6,7,332},{6,7,572},{7,7,224},{7,7,208},{7,7,257},{7,7,337},{7,6,339}, +{6,7,179},{6,7,59},{5,7,419},{6,7,251},{5,7,419},{7,6,745},{7,6,745},{7,6,745},{6,7,585},{6,7,278},{6,7,110},{6,7,110},{6,6,469},{6,7,163},{6,6,385},{7,6,169},{7,6,169},{7,6,169},{7,6,225},{7,6,50},{6,7,10},{6,7,10},{7,6,226},{7,6,82},{7,6,226},{7,7,80},{7,7,64},{7,7,113},{6,7,50},{7,7,80},{7,7,144},{6,7,50}, +{0,7,410},{7,7,144},{0,7,410},{6,0,584},{6,0,584},{6,0,584},{6,0,584},{6,7,109},{6,7,109},{6,7,109},{6,7,181},{6,6,160},{6,6,160},{7,7,393},{7,7,321},{7,7,272},{7,7,288},{7,7,477},{7,7,421},{7,7,372},{6,7,446},{6,7,483},{6,7,123},{7,7,137},{7,7,65},{7,7,16},{7,7,32},{7,7,221},{7,7,165},{7,7,116},{6,7,122},{7,7,261}, +{6,7,122},{7,7,272},{7,7,272},{7,7,272},{7,7,288},{7,7,356},{7,7,372},{7,7,372},{6,7,446},{6,7,83},{6,7,123},{7,7,16},{7,7,16},{7,7,16},{7,7,32},{7,7,100},{7,7,116},{7,7,116},{6,7,122},{6,7,82},{6,7,122},{7,7,121},{7,7,49},{7,7,0},{7,7,16},{7,7,121},{7,7,65},{7,7,16},{0,7,121},{7,7,65},{0,7,121},{7,0,272}, +{7,0,272},{7,0,272},{7,0,272},{7,6,265},{7,6,265},{7,6,265},{7,6,305},{6,7,2},{6,7,2},{7,7,265},{7,7,193},{7,7,144},{7,7,96},{7,7,253},{7,7,133},{7,7,84},{7,7,109},{7,7,297},{6,7,107},{7,7,201},{7,7,129},{7,7,80},{7,7,32},{7,7,189},{7,7,69},{7,7,20},{7,7,45},{7,7,101},{7,7,45},{7,7,144},{7,7,144},{7,7,144}, +{7,7,96},{7,7,132},{7,7,84},{7,7,84},{7,7,109},{7,7,248},{6,7,107},{7,7,80},{7,7,80},{7,7,80},{7,7,32},{7,7,68},{7,7,20},{7,7,20},{7,7,45},{7,7,52},{7,7,45},{7,7,185},{7,7,113},{7,7,64},{7,7,16},{7,7,185},{7,7,65},{7,7,16},{0,7,9},{7,7,65},{0,7,9},{7,0,80},{7,0,80},{7,0,80},{7,0,80},{7,7,68}, +{7,7,68},{7,7,68},{7,7,100},{6,7,98},{6,7,98},{7,7,386},{7,7,314},{7,7,265},{7,7,193},{7,7,278},{7,7,134},{7,7,85},{7,7,4},{7,7,138},{7,7,40},{7,7,386},{7,7,314},{7,7,265},{7,7,193},{7,7,278},{7,7,134},{7,7,85},{7,7,4},{7,7,102},{7,7,4},{7,7,265},{7,7,265},{7,7,265},{7,7,193},{7,7,157},{7,7,85},{7,7,85}, +{7,7,4},{7,7,89},{7,7,40},{7,7,265},{7,7,265},{7,7,265},{7,7,193},{7,7,157},{7,7,85},{7,7,85},{7,7,4},{7,7,53},{7,7,4},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{7,0,144},{7,0,144},{7,0,144},{7,0,144},{7,7,36},{7,7,36},{7,7,36},{7,7,4},{7,7,40}, +{7,7,40},{0,1,200},{0,1,104},{0,0,153},{0,0,145},{0,1,561},{0,0,398},{0,0,181},{0,0,308},{0,0,498},{0,0,344},{0,1,200},{0,1,104},{0,0,153},{0,0,145},{0,1,561},{0,0,398},{0,0,181},{0,0,308},{0,0,462},{0,0,308},{0,0,9},{0,0,9},{0,0,9},{0,0,1},{0,0,45},{0,0,37},{0,0,37},{0,0,164},{0,0,137},{0,0,200},{0,0,9}, +{0,0,9},{0,0,9},{0,0,1},{0,0,45},{0,0,37},{0,0,37},{0,0,164},{0,0,101},{0,0,164},{0,1,200},{0,1,104},{0,0,153},{0,0,145},{0,1,200},{1,0,232},{0,0,145},{0,0,208},{1,0,232},{0,0,208},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,2,232},{0,1,40},{0,1,200}, +{0,1,392},{0,1,689},{0,1,593},{0,0,485},{0,0,500},{0,0,914},{0,0,536},{0,2,232},{0,1,40},{0,1,200},{0,1,392},{0,1,689},{0,1,593},{0,0,485},{0,0,500},{1,0,761},{0,0,500},{0,1,4},{0,1,4},{0,1,4},{0,0,49},{0,0,157},{0,0,85},{0,0,85},{0,0,100},{0,0,185},{0,0,136},{0,1,4},{0,1,4},{0,1,4},{0,0,49},{0,0,157}, +{0,0,85},{0,0,85},{0,0,100},{0,0,149},{0,0,100},{1,0,200},{0,1,40},{0,1,200},{0,1,392},{1,0,200},{1,0,232},{0,1,392},{0,0,400},{1,0,232},{0,0,400},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,2,281},{0,2,149},{0,1,121},{0,1,121},{0,2,808},{0,1,376},{0,1,216}, +{0,0,857},{0,1,1169},{0,0,893},{0,2,281},{0,2,149},{0,1,121},{0,1,121},{1,0,728},{0,1,376},{0,1,216},{0,0,857},{1,0,744},{0,0,857},{0,1,85},{0,1,85},{0,1,85},{0,1,85},{0,1,180},{0,1,180},{0,1,180},{0,0,73},{0,0,270},{0,0,109},{0,1,85},{0,1,85},{0,1,85},{0,1,85},{0,1,180},{0,1,180},{0,1,180},{0,0,73},{0,0,234}, +{0,0,73},{0,2,232},{0,2,100},{0,1,72},{0,1,72},{0,2,232},{2,0,202},{0,1,72},{0,1,712},{2,0,202},{0,1,712},{0,0,49},{0,0,49},{0,0,49},{0,0,49},{0,0,1},{0,0,1},{0,0,1},{0,0,9},{0,0,45},{0,0,45},{0,3,427},{0,2,229},{0,1,425},{0,1,233},{0,2,744},{0,1,504},{0,1,24},{0,1,584},{0,1,1105},{0,1,945},{0,3,427}, +{0,2,229},{1,1,414},{0,1,233},{0,2,744},{0,1,504},{0,1,24},{0,1,584},{2,0,739},{0,1,584},{0,2,229},{0,2,229},{0,2,229},{0,1,229},{0,1,212},{0,1,20},{0,1,20},{0,0,281},{0,0,590},{0,0,317},{0,2,229},{0,2,229},{0,2,229},{0,1,229},{1,0,180},{0,1,20},{0,1,20},{0,0,281},{1,0,164},{0,0,281},{0,3,202},{0,2,4},{1,1,53}, +{0,1,8},{0,3,202},{1,1,202},{0,1,8},{0,1,328},{1,1,202},{0,1,328},{0,0,225},{0,0,225},{0,0,225},{0,0,225},{0,1,16},{0,1,16},{0,1,16},{0,0,25},{0,0,61},{0,0,61},{0,4,842},{0,3,740},{1,1,1125},{0,1,834},{0,3,744},{0,2,267},{0,1,283},{0,1,267},{0,1,1356},{0,1,476},{1,2,302},{1,2,230},{1,1,225},{1,1,225},{0,3,728}, +{0,2,251},{0,1,267},{0,1,251},{1,1,728},{0,1,251},{0,3,596},{0,3,596},{0,3,596},{0,1,713},{0,2,186},{0,1,162},{0,1,162},{0,1,146},{0,1,395},{0,1,355},{1,1,104},{1,1,104},{1,1,104},{1,1,104},{0,2,170},{0,1,146},{0,1,146},{0,1,130},{0,1,170},{0,1,130},{1,2,202},{1,2,130},{1,1,125},{1,1,125},{1,2,202},{0,2,202},{1,1,125}, +{0,1,202},{0,2,202},{0,1,202},{0,0,592},{0,0,592},{0,0,592},{0,0,592},{0,1,41},{0,1,41},{0,1,41},{0,1,65},{0,0,200},{0,0,200},{1,3,738},{1,2,522},{1,2,722},{1,1,885},{0,4,987},{0,2,443},{0,2,379},{0,1,507},{0,2,1100},{0,1,412},{1,3,254},{1,2,38},{1,2,238},{1,1,401},{1,2,739},{0,2,299},{0,2,235},{0,1,363},{0,2,739}, +{0,1,363},{1,2,497},{1,2,497},{1,2,497},{1,1,524},{0,3,324},{0,2,154},{0,2,154},{0,1,146},{0,1,411},{0,1,51},{1,2,13},{1,2,13},{1,2,13},{1,1,40},{1,1,164},{0,2,10},{0,2,10},{0,1,2},{1,1,180},{0,1,2},{2,1,202},{1,2,34},{1,2,234},{0,2,234},{2,1,202},{4,0,202},{0,2,234},{0,1,362},{4,0,202},{0,1,362},{1,0,488}, +{1,0,488},{1,0,488},{1,0,488},{0,2,145},{0,2,145},{0,2,145},{0,1,145},{0,1,50},{0,1,50},{1,3,450},{1,3,362},{1,2,306},{1,2,330},{1,2,1015},{1,2,583},{1,2,463},{1,1,990},{0,2,940},{0,1,860},{1,3,254},{1,3,166},{1,2,110},{1,2,134},{0,4,731},{0,3,288},{0,2,155},{1,1,794},{2,1,731},{1,1,794},{1,2,257},{1,2,257},{1,2,257}, +{1,2,281},{1,2,390},{1,1,392},{1,1,392},{1,1,261},{0,2,315},{0,1,131},{1,2,61},{1,2,61},{1,2,61},{1,2,85},{2,0,164},{0,2,106},{0,2,106},{1,1,65},{3,0,164},{1,1,65},{3,0,208},{1,3,130},{1,2,74},{0,2,74},{3,0,208},{3,1,208},{0,2,74},{0,1,778},{3,1,208},{0,1,778},{1,0,232},{1,0,232},{1,0,232},{1,0,232},{1,1,196}, +{1,1,196},{1,1,196},{1,1,212},{0,1,82},{0,1,82},{1,4,440},{1,3,234},{1,2,402},{1,2,234},{1,3,767},{1,2,503},{1,2,63},{1,2,687},{0,3,1140},{0,2,396},{1,4,404},{1,3,198},{1,2,366},{1,2,198},{1,3,731},{0,3,224},{1,2,27},{0,2,387},{1,2,731},{0,2,387},{1,3,233},{1,3,233},{1,3,233},{1,2,233},{1,2,230},{1,2,62},{1,2,62}, +{1,1,277},{0,2,203},{1,1,385},{1,3,197},{1,3,197},{1,3,197},{1,2,197},{0,4,162},{1,2,26},{1,2,26},{1,1,241},{2,1,162},{1,1,241},{2,2,208},{1,3,2},{2,2,65},{1,2,2},{2,2,208},{5,0,208},{1,2,2},{0,2,362},{5,0,208},{0,2,362},{1,0,232},{1,0,232},{1,0,232},{1,0,232},{1,2,61},{1,2,61},{1,2,61},{1,1,52},{0,2,34}, +{0,2,34},{1,5,810},{1,3,702},{1,2,1122},{1,2,738},{1,4,748},{1,3,281},{1,2,225},{1,2,273},{0,3,780},{0,2,252},{2,3,329},{2,3,281},{2,2,222},{2,2,230},{2,2,731},{1,3,272},{1,2,216},{0,2,216},{5,0,731},{0,2,216},{1,4,547},{1,4,547},{1,4,547},{1,2,638},{1,3,173},{1,2,125},{1,2,125},{1,2,173},{0,3,339},{0,2,152},{2,2,122}, +{2,2,122},{2,2,122},{2,2,130},{1,3,164},{1,2,116},{1,2,116},{0,2,116},{1,2,164},{0,2,116},{2,3,208},{2,3,160},{2,2,101},{2,2,109},{2,3,208},{6,0,208},{2,2,109},{0,2,200},{6,0,208},{0,2,200},{1,0,538},{1,0,538},{1,0,538},{1,0,538},{1,2,25},{1,2,25},{1,2,25},{1,2,73},{0,2,52},{0,2,52},{2,3,810},{2,3,570},{2,3,810}, +{2,2,887},{1,4,940},{1,3,393},{1,3,393},{1,2,449},{0,3,1004},{1,2,392},{2,3,281},{2,3,41},{2,3,281},{2,2,358},{3,1,731},{1,3,272},{1,3,272},{1,2,328},{4,1,731},{1,2,328},{2,3,554},{2,3,554},{2,3,554},{2,2,563},{1,4,315},{1,3,137},{1,3,137},{1,2,125},{0,3,163},{1,2,68},{2,3,25},{2,3,25},{2,3,25},{2,2,34},{2,2,162}, +{1,3,16},{1,3,16},{1,2,4},{5,0,162},{1,2,4},{4,0,208},{2,3,32},{2,3,272},{1,3,272},{4,0,208},{5,1,208},{1,3,272},{0,2,328},{5,1,208},{0,2,328},{2,0,538},{2,0,538},{2,0,538},{2,0,538},{1,3,121},{1,3,121},{1,3,121},{1,2,121},{1,2,64},{1,2,64},{2,4,458},{2,3,410},{2,3,330},{2,3,378},{2,3,1013},{2,3,629},{1,3,505}, +{2,2,962},{0,4,772},{1,2,776},{2,4,233},{2,3,185},{2,3,105},{2,3,153},{4,0,724},{1,4,323},{1,3,144},{2,2,737},{3,2,724},{2,2,737},{2,3,266},{2,3,266},{2,3,266},{2,3,314},{2,3,437},{2,2,395},{2,2,395},{2,2,286},{1,3,356},{1,2,100},{2,3,41},{2,3,41},{2,3,41},{2,3,89},{3,1,162},{1,3,80},{1,3,80},{2,2,61},{4,1,162}, +{2,2,61},{2,4,208},{2,3,160},{2,3,80},{1,3,80},{2,4,208},{7,0,208},{1,3,80},{0,2,712},{7,0,208},{0,2,712},{2,0,250},{2,0,250},{2,0,250},{2,0,250},{2,2,226},{2,2,226},{2,2,226},{2,2,250},{1,2,64},{1,2,64},{2,5,436},{2,4,222},{2,3,362},{2,3,218},{2,4,773},{2,3,485},{2,3,85},{2,3,773},{0,4,804},{1,3,452},{2,5,387}, +{2,4,173},{2,3,313},{2,3,169},{2,4,724},{1,4,211},{2,3,36},{1,3,436},{7,0,724},{1,3,436},{2,4,218},{2,4,218},{2,4,218},{2,3,218},{2,3,229},{2,3,85},{2,3,85},{2,2,254},{1,3,196},{2,2,374},{2,4,169},{2,4,169},{2,4,169},{2,3,169},{4,0,164},{2,3,36},{2,3,36},{2,2,205},{3,2,164},{2,2,205},{3,3,202},{2,4,4},{3,3,81}, +{2,3,0},{3,3,202},{6,1,202},{2,3,0},{0,3,400},{6,1,202},{0,3,400},{2,0,218},{2,0,218},{2,0,218},{2,0,218},{2,3,85},{2,3,85},{2,3,85},{2,2,58},{1,3,52},{1,3,52},{2,5,760},{2,4,618},{2,3,1010},{2,3,650},{2,5,760},{2,4,303},{2,3,175},{2,3,287},{1,4,788},{1,3,236},{3,4,362},{3,4,338},{3,3,225},{3,3,241},{3,3,724}, +{2,4,299},{2,3,171},{1,3,211},{6,1,724},{1,3,211},{2,5,504},{2,5,504},{2,5,504},{2,3,569},{2,4,166},{2,3,94},{2,3,94},{2,3,206},{0,4,363},{1,3,155},{3,3,144},{3,3,144},{3,3,144},{3,3,160},{2,4,162},{2,3,90},{2,3,90},{1,3,130},{7,0,162},{1,3,130},{4,2,208},{2,4,130},{3,3,81},{3,3,97},{4,2,208},{5,2,208},{3,3,97}, +{0,3,202},{5,2,208},{0,3,202},{2,0,488},{2,0,488},{2,0,488},{2,0,488},{2,3,13},{2,3,13},{2,3,13},{2,3,85},{1,3,34},{1,3,34},{3,4,842},{3,4,626},{3,4,906},{3,3,897},{2,5,888},{2,4,351},{2,4,415},{2,3,399},{0,5,788},{2,3,380},{3,4,266},{3,4,50},{3,4,330},{3,3,321},{4,2,724},{2,4,251},{2,4,315},{2,3,299},{5,2,724}, +{2,3,299},{3,4,617},{3,4,617},{3,4,617},{3,3,608},{2,5,312},{2,4,126},{2,4,126},{2,3,110},{1,4,164},{2,3,91},{3,4,41},{3,4,41},{3,4,41},{3,3,32},{3,3,164},{2,4,26},{2,4,26},{2,3,10},{6,1,164},{2,3,10},{5,1,202},{3,4,34},{3,4,314},{3,3,305},{5,1,202},{4,3,202},{3,3,305},{0,3,298},{4,3,202},{0,3,298},{3,0,592}, +{3,0,592},{3,0,592},{3,0,592},{2,4,101},{2,4,101},{2,4,101},{2,3,101},{2,3,82},{2,3,82},{3,5,474},{3,4,402},{3,4,362},{3,4,434},{3,4,1019},{3,4,683},{2,4,463},{3,3,942},{1,5,804},{2,3,700},{3,5,218},{3,4,146},{3,4,106},{3,4,178},{5,1,723},{2,5,364},{2,4,139},{3,3,686},{4,3,723},{3,3,686},{3,4,281},{3,4,281},{3,4,281}, +{3,4,353},{3,4,490},{2,4,382},{2,4,382},{3,3,317},{1,4,324},{2,3,75},{3,4,25},{3,4,25},{3,4,25},{3,4,97},{4,2,164},{2,4,58},{2,4,58},{3,3,61},{5,2,164},{3,3,61},{6,0,202},{3,4,130},{3,4,90},{2,4,90},{6,0,202},{3,4,202},{2,4,90},{0,3,650},{3,4,202},{0,3,650},{3,0,272},{3,0,272},{3,0,272},{3,0,272},{3,3,260}, +{3,3,260},{3,3,260},{3,3,292},{2,3,50},{2,3,50},{3,6,440},{3,5,218},{3,4,330},{3,4,210},{3,5,787},{3,4,475},{3,4,115},{3,4,867},{1,5,772},{2,4,516},{3,6,376},{3,5,154},{3,4,266},{3,4,146},{6,0,723},{2,5,204},{3,4,51},{1,4,478},{3,4,723},{1,4,478},{3,5,209},{3,5,209},{3,5,209},{3,4,209},{3,4,234},{3,4,114},{3,4,114}, +{3,3,237},{2,4,195},{3,3,369},{3,5,145},{3,5,145},{3,5,145},{3,4,145},{5,1,170},{3,4,50},{3,4,50},{3,3,173},{7,1,170},{3,3,173},{4,4,200},{3,5,10},{4,4,101},{3,4,2},{4,4,200},{7,2,200},{3,4,2},{0,4,442},{7,2,200},{0,4,442},{3,0,208},{3,0,208},{3,0,208},{3,0,208},{3,3,100},{3,3,100},{3,3,100},{3,3,68},{2,4,74}, +{2,4,74},{3,6,692},{3,5,542},{3,4,906},{3,4,570},{3,6,780},{3,5,333},{3,4,133},{3,4,309},{2,5,804},{2,4,228},{4,5,401},{4,4,395},{4,4,234},{4,4,258},{4,4,723},{3,5,332},{3,4,132},{2,4,212},{7,2,723},{2,4,212},{3,6,467},{3,6,467},{3,6,467},{3,4,506},{3,5,165},{3,4,69},{3,4,69},{3,4,245},{1,5,324},{2,4,164},{4,4,170}, +{4,4,170},{4,4,170},{4,4,194},{6,0,164},{3,4,68},{3,4,68},{2,4,148},{3,4,164},{2,4,148},{5,3,202},{3,5,100},{4,4,65},{4,4,89},{5,3,202},{6,3,202},{4,4,89},{0,4,208},{6,3,202},{0,4,208},{3,0,442},{3,0,442},{3,0,442},{3,0,442},{3,4,5},{3,4,5},{3,4,5},{3,4,101},{2,4,20},{2,4,20},{4,5,882},{4,5,690},{4,4,955}, +{4,4,915},{3,6,844},{3,5,317},{3,5,445},{3,4,357},{1,6,772},{3,4,376},{4,5,257},{4,5,65},{4,4,330},{4,4,290},{5,3,723},{3,5,236},{3,5,364},{3,4,276},{6,3,723},{3,4,276},{4,5,686},{4,5,686},{4,5,686},{4,4,659},{3,6,315},{3,5,121},{3,5,121},{3,4,101},{2,5,171},{3,4,120},{4,5,61},{4,5,61},{4,5,61},{4,4,34},{5,2,170}, +{3,5,40},{3,5,40},{3,4,20},{7,2,170},{3,4,20},{6,2,200},{4,5,40},{4,4,305},{4,4,265},{6,2,200},{5,4,200},{4,4,265},{0,4,272},{5,4,200},{0,4,272},{4,0,650},{4,0,650},{4,0,650},{4,0,650},{3,5,85},{3,5,85},{3,5,85},{3,4,85},{3,4,104},{3,4,104},{4,6,498},{4,5,402},{4,5,402},{4,5,498},{3,7,1017},{3,6,700},{3,5,429}, +{3,4,917},{2,6,844},{3,4,632},{4,6,209},{4,5,113},{4,5,113},{4,5,209},{6,2,728},{3,6,411},{3,5,140},{3,4,628},{5,4,728},{3,4,628},{4,5,302},{4,5,302},{4,5,302},{4,5,398},{3,6,459},{3,5,329},{3,5,329},{3,4,341},{2,5,283},{3,4,56},{4,5,13},{4,5,13},{4,5,13},{4,5,109},{6,1,170},{3,5,40},{3,5,40},{3,4,52},{6,3,170}, +{3,4,52},{7,1,200},{4,5,104},{4,5,104},{3,5,104},{7,1,200},{4,5,200},{3,5,104},{0,4,592},{4,5,200},{0,4,592},{4,0,298},{4,0,298},{4,0,298},{4,0,298},{4,4,298},{4,4,298},{4,4,298},{3,4,325},{3,4,40},{3,4,40},{4,7,452},{4,6,222},{4,5,306},{4,5,210},{4,6,809},{4,5,473},{4,5,153},{4,5,969},{2,6,748},{3,5,588},{4,7,371}, +{4,6,141},{4,5,225},{4,5,129},{7,1,728},{3,6,203},{4,5,72},{2,5,513},{4,5,728},{2,5,513},{4,6,206},{4,6,206},{4,6,206},{4,5,206},{4,5,245},{4,5,149},{4,5,149},{4,4,226},{3,5,200},{4,4,370},{4,6,125},{4,6,125},{4,6,125},{4,5,125},{7,0,164},{4,5,68},{4,5,68},{4,4,145},{3,5,164},{4,4,145},{5,5,202},{4,6,20},{4,5,104}, +{4,5,8},{5,5,202},{3,6,202},{4,5,8},{0,5,488},{3,6,202},{0,5,488},{4,0,202},{4,0,202},{4,0,202},{4,0,202},{4,4,106},{4,4,106},{4,4,106},{4,4,82},{3,5,100},{3,5,100},{4,7,632},{4,6,474},{4,5,810},{4,5,498},{4,7,808},{4,6,371},{4,5,99},{4,5,339},{3,6,828},{3,5,228},{5,6,446},{5,5,396},{5,5,249},{5,5,281},{5,5,728}, +{4,6,371},{4,5,99},{3,5,219},{3,6,728},{3,5,219},{4,7,436},{4,7,436},{4,7,436},{4,5,449},{4,6,170},{4,5,50},{4,5,50},{4,5,290},{2,6,291},{3,5,179},{5,5,200},{5,5,200},{5,5,200},{5,5,232},{7,1,170},{4,5,50},{4,5,50},{3,5,170},{7,3,170},{3,5,170},{6,4,200},{4,6,74},{5,5,53},{5,5,85},{6,4,200},{7,4,200},{5,5,85}, +{0,5,218},{7,4,200},{0,5,218},{4,0,400},{4,0,400},{4,0,400},{4,0,400},{4,5,1},{4,5,1},{4,5,1},{4,4,100},{3,5,10},{3,5,10},{5,6,930},{5,6,762},{5,5,973},{5,5,941},{4,7,808},{4,6,291},{4,6,483},{4,5,323},{2,7,764},{4,5,380},{5,6,254},{5,6,86},{5,5,297},{5,5,265},{6,4,728},{4,6,227},{5,5,373},{4,5,259},{7,4,728}, +{4,5,259},{5,5,748},{5,5,748},{5,5,748},{5,5,716},{4,6,298},{4,6,122},{4,6,122},{4,5,98},{3,6,184},{4,5,155},{5,5,72},{5,5,72},{5,5,72},{5,5,40},{6,3,164},{4,6,58},{4,6,58},{4,5,34},{6,4,164},{4,5,34},{7,3,202},{5,6,50},{5,5,261},{5,5,229},{7,3,202},{6,5,202},{5,5,229},{0,5,250},{6,5,202},{0,5,250},{5,0,712}, +{5,0,712},{5,0,712},{5,0,712},{4,6,73},{4,6,73},{4,6,73},{4,5,73},{4,5,130},{4,5,130},{5,7,530},{5,6,410},{5,6,450},{5,6,570},{5,6,1055},{4,7,720},{4,6,403},{4,5,819},{3,7,892},{4,5,572},{5,7,206},{5,6,86},{5,6,126},{5,6,246},{5,6,731},{3,7,387},{4,6,147},{4,5,563},{4,6,731},{4,5,563},{5,6,329},{5,6,329},{5,6,329}, +{5,5,428},{4,7,420},{4,6,282},{4,6,282},{4,5,290},{3,6,248},{4,5,43},{5,6,5},{5,6,5},{5,6,5},{5,5,104},{7,2,164},{4,6,26},{4,6,26},{4,5,34},{5,5,164},{4,5,34},{5,7,202},{5,6,82},{5,6,122},{4,6,122},{5,7,202},{5,6,202},{4,6,122},{0,5,538},{5,6,202},{0,5,538},{5,0,328},{5,0,328},{5,0,328},{5,0,328},{4,6,281}, +{4,6,281},{4,6,281},{4,5,281},{4,5,34},{4,5,34},{5,7,498},{5,7,234},{5,6,290},{5,6,218},{5,7,839},{5,6,479},{5,6,199},{5,6,1079},{3,7,732},{4,6,668},{5,7,398},{5,7,134},{5,6,190},{5,6,118},{6,5,731},{4,7,208},{5,6,99},{3,6,554},{3,7,731},{3,6,554},{5,7,209},{5,7,209},{5,7,209},{5,6,209},{5,6,262},{5,6,190},{5,6,190}, +{5,5,221},{4,6,211},{4,5,315},{5,7,109},{5,7,109},{5,7,109},{5,6,109},{5,6,162},{5,6,90},{5,6,90},{5,5,121},{4,6,162},{5,5,121},{7,4,208},{5,7,34},{5,6,90},{5,6,18},{7,4,208},{7,5,208},{5,6,18},{0,6,538},{7,5,208},{0,6,538},{5,0,200},{5,0,200},{5,0,200},{5,0,200},{5,5,116},{5,5,116},{5,5,116},{5,5,100},{4,6,130}, +{4,6,130},{5,7,1074},{5,7,414},{5,6,722},{5,6,434},{5,7,857},{5,7,417},{5,6,73},{5,6,377},{4,7,860},{4,6,236},{6,7,497},{6,6,403},{6,6,270},{6,6,310},{7,4,731},{4,7,379},{5,6,72},{4,6,232},{7,5,731},{4,6,232},{5,7,398},{5,7,398},{5,7,398},{5,6,398},{5,7,181},{5,6,37},{5,6,37},{5,6,341},{3,7,264},{4,6,200},{6,6,234}, +{6,6,234},{6,6,234},{6,6,274},{6,5,164},{5,6,36},{5,6,36},{4,6,196},{3,7,164},{4,6,196},{7,5,202},{5,7,52},{6,6,45},{5,6,72},{7,5,202},{6,6,218},{5,6,72},{0,6,232},{6,6,218},{0,6,232},{5,0,362},{5,0,362},{5,0,362},{5,0,362},{5,6,1},{5,6,1},{5,6,1},{5,5,82},{4,6,4},{4,6,4},{6,7,986},{6,7,842},{6,6,999}, +{6,6,975},{5,7,1417},{5,7,273},{5,6,505},{5,6,297},{4,7,828},{5,6,392},{6,7,257},{6,7,113},{6,6,270},{6,6,246},{7,5,739},{5,7,224},{6,6,366},{5,6,248},{6,6,731},{5,6,248},{6,6,803},{6,6,803},{6,6,803},{6,6,779},{5,7,261},{5,7,129},{5,7,129},{5,6,101},{4,7,203},{5,6,196},{6,6,74},{6,6,74},{6,6,74},{6,6,50},{7,4,162}, +{5,7,80},{5,7,80},{5,6,52},{7,5,162},{5,6,52},{6,7,208},{6,7,64},{6,6,221},{6,6,197},{6,7,208},{7,6,208},{6,6,197},{0,6,232},{7,6,208},{0,6,232},{5,0,778},{5,0,778},{5,0,778},{5,0,778},{5,7,65},{5,7,65},{5,7,65},{5,6,65},{5,6,160},{5,6,160},{6,7,762},{6,7,426},{6,7,506},{6,7,650},{6,7,1085},{5,7,641},{5,7,385}, +{5,6,729},{5,7,980},{5,6,520},{6,7,401},{6,7,65},{6,7,145},{6,7,289},{6,7,724},{5,7,416},{5,7,160},{5,6,504},{5,7,724},{5,6,504},{6,7,362},{6,7,362},{6,7,362},{6,6,443},{6,6,555},{5,7,241},{5,7,241},{5,6,245},{4,7,219},{5,6,36},{6,7,1},{6,7,1},{6,7,1},{6,6,82},{7,5,194},{5,7,16},{5,7,16},{5,6,20},{6,6,162}, +{5,6,20},{7,6,208},{6,7,64},{6,7,144},{5,7,144},{7,6,208},{6,7,208},{5,7,144},{0,6,488},{6,7,208},{0,6,488},{6,0,362},{6,0,362},{6,0,362},{6,0,362},{5,7,241},{5,7,241},{5,7,241},{5,6,241},{5,6,32},{5,6,32},{6,7,1050},{6,7,522},{6,7,282},{6,7,234},{6,7,1069},{6,7,493},{6,7,253},{6,6,1122},{5,7,1012},{5,7,756},{7,7,843}, +{6,7,401},{6,7,161},{6,7,113},{7,6,724},{6,7,372},{6,7,132},{4,7,601},{6,7,756},{4,7,601},{6,7,266},{6,7,266},{6,7,266},{6,7,218},{6,7,285},{6,7,237},{6,7,237},{6,6,222},{5,7,228},{5,6,260},{6,7,145},{6,7,145},{6,7,145},{6,7,97},{6,7,164},{6,7,116},{6,7,116},{6,6,101},{5,7,164},{6,6,101},{7,7,218},{7,7,178},{6,7,80}, +{6,7,32},{7,7,218},{6,7,272},{6,7,32},{0,7,592},{6,7,272},{0,7,592},{6,0,202},{6,0,202},{6,0,202},{6,0,202},{6,6,130},{6,6,130},{6,6,130},{6,6,122},{5,6,160},{5,6,160},{6,7,1641},{6,7,1017},{6,7,617},{6,7,353},{6,7,1318},{6,7,430},{6,7,30},{6,7,398},{6,7,1035},{5,7,227},{7,7,393},{7,7,321},{7,7,272},{7,7,320},{7,7,621}, +{6,7,426},{6,7,26},{5,7,226},{6,7,594},{5,7,226},{6,7,617},{6,7,617},{6,7,617},{6,7,353},{6,7,294},{6,7,30},{6,7,30},{6,7,398},{5,7,291},{5,7,227},{7,7,272},{7,7,272},{7,7,272},{7,7,320},{7,6,162},{6,7,26},{6,7,26},{5,7,226},{7,6,194},{5,7,226},{7,7,137},{7,7,65},{7,7,16},{6,7,25},{7,7,137},{7,7,113},{6,7,25}, +{0,7,225},{7,7,113},{0,7,225},{6,0,328},{6,0,328},{6,0,328},{6,0,328},{6,7,5},{6,7,5},{6,7,5},{6,6,68},{5,7,2},{5,7,2},{7,7,985},{7,7,913},{7,7,864},{7,7,848},{7,7,1117},{6,7,654},{6,7,254},{6,7,110},{6,7,763},{5,7,179},{7,7,201},{7,7,129},{7,7,80},{7,7,64},{7,7,333},{7,7,245},{7,7,196},{6,7,74},{7,7,373}, +{6,7,74},{7,7,864},{7,7,864},{7,7,864},{7,7,848},{6,7,710},{6,7,254},{6,7,254},{6,7,110},{6,7,363},{5,7,179},{7,7,80},{7,7,80},{7,7,80},{7,7,64},{7,7,212},{7,7,196},{7,7,196},{6,7,74},{6,7,194},{6,7,74},{7,7,137},{7,7,65},{7,7,16},{7,7,0},{7,7,137},{7,7,49},{7,7,0},{0,7,49},{7,7,49},{0,7,49},{6,0,712}, +{6,0,712},{6,0,712},{6,0,712},{6,7,85},{6,7,85},{6,7,85},{6,7,61},{5,7,130},{5,7,130},{7,7,642},{7,7,570},{7,7,521},{7,7,449},{7,7,678},{7,7,534},{7,7,485},{6,7,205},{6,7,834},{6,7,34},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,278},{7,7,134},{7,7,85},{6,7,9},{7,7,198},{6,7,9},{7,7,521},{7,7,521},{7,7,521}, +{7,7,449},{7,7,557},{7,7,485},{7,7,485},{6,7,205},{6,7,434},{6,7,34},{7,7,121},{7,7,121},{7,7,121},{7,7,49},{7,7,157},{7,7,85},{7,7,85},{6,7,9},{7,7,149},{6,7,9},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{7,0,400},{7,0,400},{7,0,400},{7,0,400},{6,7,421}, +{6,7,421},{6,7,421},{6,7,205},{6,7,34},{6,7,34},{7,7,450},{7,7,378},{7,7,329},{7,7,257},{7,7,390},{7,7,246},{7,7,197},{7,7,148},{7,7,426},{6,7,130},{7,7,306},{7,7,234},{7,7,185},{7,7,113},{7,7,246},{7,7,102},{7,7,53},{7,7,4},{7,7,102},{7,7,4},{7,7,329},{7,7,329},{7,7,329},{7,7,257},{7,7,269},{7,7,197},{7,7,197}, +{7,7,148},{7,7,377},{6,7,130},{7,7,185},{7,7,185},{7,7,185},{7,7,113},{7,7,125},{7,7,53},{7,7,53},{7,7,4},{7,7,53},{7,7,4},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{7,0,208},{7,0,208},{7,0,208},{7,0,208},{7,7,148},{7,7,148},{7,7,148},{7,7,148},{6,7,130}, +{6,7,130},{0,2,445},{0,1,157},{0,1,117},{0,1,405},{0,1,926},{0,1,806},{0,0,670},{0,0,741},{0,0,1169},{0,0,777},{0,2,445},{0,1,157},{0,1,117},{0,1,405},{0,1,926},{0,1,806},{0,0,670},{0,0,741},{1,0,990},{0,0,741},{0,1,36},{0,1,36},{0,1,36},{0,0,9},{0,0,85},{0,0,45},{0,0,45},{0,0,116},{0,0,145},{0,0,152},{0,1,36}, +{0,1,36},{0,1,36},{0,0,9},{0,0,85},{0,0,45},{0,0,45},{0,0,116},{0,0,109},{0,0,116},{1,0,421},{0,1,157},{0,1,117},{0,1,405},{1,0,421},{0,1,445},{0,1,405},{0,0,641},{0,1,445},{0,0,641},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,2,461},{0,2,109},{0,1,5}, +{0,1,101},{0,1,1326},{0,1,822},{0,1,462},{0,0,1205},{0,1,1783},{0,0,1241},{0,2,461},{0,2,109},{0,1,5},{0,1,101},{1,0,1294},{0,1,822},{0,1,462},{0,0,1205},{1,0,1262},{0,0,1205},{0,1,4},{0,1,4},{0,1,4},{0,1,100},{0,0,261},{0,0,157},{0,0,157},{0,0,116},{0,0,257},{0,0,152},{0,1,4},{0,1,4},{0,1,4},{0,1,100},{0,0,261}, +{0,0,157},{0,0,157},{0,0,116},{0,0,221},{0,0,116},{1,1,461},{0,2,109},{0,1,5},{0,1,101},{1,1,461},{2,0,421},{0,1,101},{0,1,901},{2,0,421},{0,1,901},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,3,430},{0,2,38},{0,1,158},{0,1,62},{0,2,1517},{0,1,989},{0,1,309}, +{0,1,1317},{0,1,1878},{0,1,1678},{0,3,430},{0,2,38},{0,1,158},{0,1,62},{0,2,1517},{0,1,989},{0,1,309},{0,1,1317},{0,1,1517},{0,1,1317},{0,2,13},{0,2,13},{0,2,13},{0,1,13},{0,1,356},{0,1,260},{0,1,260},{0,0,193},{0,0,446},{0,0,229},{0,2,13},{0,2,13},{0,2,13},{0,1,13},{0,1,356},{0,1,260},{0,1,260},{0,0,193},{0,0,410}, +{0,0,193},{0,3,421},{0,2,29},{0,1,149},{0,1,53},{0,3,421},{1,1,421},{0,1,53},{0,1,533},{1,1,421},{0,1,533},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,0,9},{0,0,49},{0,0,85},{0,0,85},{0,4,602},{0,3,234},{0,2,518},{0,1,382},{0,3,1622},{0,2,825},{0,1,325},{0,1,821},{0,1,2022},{0,1,1182},{0,4,602}, +{0,3,234},{0,2,518},{0,1,382},{1,1,1526},{0,2,825},{0,1,325},{0,1,821},{2,0,1526},{0,1,821},{0,2,157},{0,2,157},{0,2,157},{0,1,157},{0,1,388},{0,1,100},{0,1,100},{0,0,401},{0,0,766},{0,0,437},{0,2,157},{0,2,157},{0,2,157},{0,1,157},{1,0,356},{0,1,100},{0,1,100},{0,0,401},{1,0,340},{0,0,401},{1,2,425},{0,3,113},{1,1,234}, +{0,1,261},{1,2,425},{0,2,425},{0,1,261},{0,1,421},{0,2,425},{0,1,421},{0,0,121},{0,0,121},{0,0,121},{0,0,121},{0,0,25},{0,0,25},{0,0,25},{0,0,1},{0,0,37},{0,0,37},{0,4,845},{0,3,405},{0,2,725},{0,2,549},{0,3,1541},{0,2,654},{0,2,270},{0,1,722},{0,2,2583},{0,1,1083},{1,3,657},{1,2,345},{1,2,345},{0,2,549},{2,0,1517}, +{0,2,654},{0,2,270},{0,1,722},{3,0,1517},{0,1,722},{0,3,404},{0,3,404},{0,3,404},{0,2,449},{0,2,346},{0,2,170},{0,2,170},{0,1,146},{0,1,707},{0,1,507},{1,1,232},{1,1,232},{1,1,232},{1,1,200},{0,2,346},{0,2,170},{0,2,170},{0,1,146},{0,1,346},{0,1,146},{2,1,421},{0,3,5},{1,2,149},{0,2,149},{2,1,421},{4,0,421},{0,2,149}, +{0,1,601},{4,0,421},{0,1,601},{0,0,400},{0,0,400},{0,0,400},{0,0,400},{0,1,1},{0,1,1},{0,1,1},{0,0,100},{0,0,136},{0,0,136},{0,5,1209},{0,4,937},{1,2,1197},{0,2,789},{0,4,1526},{0,3,737},{0,2,14},{0,1,1042},{0,2,2487},{0,1,1403},{1,3,481},{1,3,173},{1,2,41},{1,2,161},{1,2,1526},{0,3,737},{0,2,14},{0,1,1042},{2,1,1526}, +{0,1,1042},{0,4,793},{0,4,793},{0,4,793},{0,2,785},{0,3,356},{0,2,10},{0,2,10},{0,1,18},{0,1,899},{0,1,379},{1,2,37},{1,2,37},{1,2,37},{1,1,136},{1,1,340},{0,2,10},{0,2,10},{0,1,18},{1,1,356},{0,1,18},{3,0,425},{1,3,137},{1,2,5},{0,2,5},{3,0,425},{3,1,425},{0,2,5},{0,2,965},{3,1,425},{0,2,965},{0,0,784}, +{0,0,784},{0,0,784},{0,0,784},{0,2,9},{0,2,9},{0,2,9},{0,1,9},{0,0,360},{0,0,360},{1,4,1158},{1,3,758},{1,2,850},{1,2,778},{0,4,1671},{0,3,546},{0,2,191},{0,2,903},{0,2,2390},{0,2,1430},{1,4,429},{1,3,29},{1,2,121},{1,2,49},{2,1,1526},{0,3,497},{0,2,142},{0,2,854},{4,0,1526},{0,2,854},{1,3,742},{1,3,742},{1,3,742}, +{1,2,742},{0,3,437},{0,2,155},{0,2,155},{0,1,195},{0,2,946},{0,1,290},{1,3,13},{1,3,13},{1,3,13},{1,2,13},{2,0,340},{0,2,106},{0,2,106},{0,1,146},{3,0,340},{0,1,146},{1,4,425},{1,3,25},{1,2,117},{1,2,45},{1,4,425},{2,2,425},{1,2,45},{0,2,565},{2,2,425},{0,2,565},{1,0,733},{1,0,733},{1,0,733},{1,0,733},{0,2,74}, +{0,2,74},{0,2,74},{0,1,74},{0,1,169},{0,1,169},{1,4,966},{1,4,606},{1,3,886},{1,2,682},{0,5,1742},{0,3,866},{0,3,641},{0,2,727},{0,3,2382},{0,2,758},{1,4,605},{1,4,245},{1,3,525},{1,2,321},{3,0,1517},{0,3,641},{1,2,302},{0,2,502},{3,1,1517},{0,2,502},{1,3,486},{1,3,486},{1,3,486},{1,2,486},{0,4,563},{0,3,241},{0,3,241}, +{0,2,531},{0,2,626},{0,1,546},{1,3,125},{1,3,125},{1,3,125},{1,2,125},{0,4,338},{0,3,16},{0,3,16},{0,2,306},{2,1,338},{0,2,306},{3,1,433},{1,4,145},{2,2,212},{1,2,221},{3,1,433},{6,0,433},{1,2,221},{0,2,421},{6,0,433},{0,2,421},{1,0,461},{1,0,461},{1,0,461},{1,0,461},{0,3,225},{0,3,225},{0,3,225},{0,2,306},{0,1,185}, +{0,1,185},{1,5,894},{1,4,462},{1,3,778},{1,3,646},{1,4,1626},{1,3,749},{1,3,429},{1,2,809},{0,3,2022},{0,2,614},{2,4,706},{1,4,362},{2,3,410},{1,3,546},{3,1,1526},{0,4,441},{0,3,227},{0,2,565},{4,1,1526},{0,2,565},{1,4,462},{1,4,462},{1,4,462},{1,3,525},{1,3,440},{1,2,296},{1,2,296},{1,2,280},{0,2,725},{0,2,85},{2,2,250}, +{2,2,250},{2,2,250},{2,2,226},{1,3,340},{0,3,106},{0,3,106},{0,2,36},{1,2,340},{0,2,36},{3,2,425},{1,4,1},{2,3,185},{0,3,146},{3,2,425},{5,1,425},{0,3,146},{0,2,565},{5,1,425},{0,2,565},{1,0,461},{1,0,461},{1,0,461},{1,0,461},{1,2,100},{1,2,100},{1,2,100},{1,1,181},{0,2,49},{0,2,49},{1,6,1166},{1,4,878},{1,3,1226}, +{1,3,742},{1,5,1545},{1,4,798},{1,3,29},{1,2,985},{0,3,2246},{0,2,1030},{2,4,482},{2,4,218},{2,3,58},{2,3,202},{2,3,1517},{0,4,521},{1,3,25},{1,2,981},{6,0,1517},{1,2,981},{1,5,749},{1,5,749},{1,5,749},{1,3,733},{1,4,374},{1,3,20},{1,3,20},{1,2,24},{0,3,482},{0,2,69},{2,3,49},{2,3,49},{2,3,49},{2,2,130},{2,2,338}, +{1,3,16},{1,3,16},{1,2,20},{5,0,338},{1,2,20},{4,1,433},{1,4,145},{2,3,9},{1,3,9},{4,1,433},{7,0,433},{1,3,9},{0,2,965},{7,0,433},{0,2,965},{1,0,733},{1,0,733},{1,0,733},{1,0,733},{1,3,20},{1,3,20},{1,3,20},{1,2,20},{0,2,65},{0,2,65},{2,5,1218},{2,4,810},{2,3,874},{2,3,826},{1,5,1625},{1,4,542},{1,3,141}, +{1,3,961},{0,4,1806},{0,3,642},{2,5,434},{2,4,26},{2,3,90},{2,3,42},{3,2,1517},{1,4,506},{1,3,105},{0,3,626},{5,1,1517},{0,3,626},{2,4,801},{2,4,801},{2,4,801},{2,3,801},{1,4,406},{1,3,116},{1,3,116},{1,2,152},{0,3,482},{1,2,285},{2,4,17},{2,4,17},{2,4,17},{2,3,17},{3,1,338},{1,3,80},{1,3,80},{1,2,116},{4,1,338}, +{1,2,116},{3,3,425},{2,4,25},{2,3,89},{2,3,41},{3,3,425},{6,1,425},{2,3,41},{0,3,601},{6,1,425},{0,3,601},{2,0,785},{2,0,785},{2,0,785},{2,0,785},{1,3,52},{1,3,52},{1,3,52},{1,2,52},{0,3,41},{0,3,41},{2,5,962},{2,4,618},{2,3,906},{2,3,666},{1,6,1710},{1,4,798},{1,4,663},{1,3,721},{0,4,1838},{0,3,450},{2,5,562}, +{2,4,218},{2,3,506},{2,3,266},{4,1,1514},{0,5,474},{2,3,285},{0,3,434},{4,2,1514},{0,3,434},{2,4,497},{2,4,497},{2,4,497},{2,3,497},{1,5,536},{1,4,222},{1,4,222},{1,3,552},{0,4,469},{0,3,281},{2,4,97},{2,4,97},{2,4,97},{2,3,97},{4,0,340},{1,4,26},{1,4,26},{0,3,265},{3,2,340},{0,3,265},{4,2,425},{2,4,137},{3,3,194}, +{2,3,185},{4,2,425},{5,2,425},{2,3,185},{0,3,425},{5,2,425},{0,3,425},{2,0,481},{2,0,481},{2,0,481},{2,0,481},{1,4,197},{1,4,197},{1,4,197},{1,3,296},{0,3,25},{0,3,25},{2,6,870},{2,5,446},{2,4,758},{2,4,670},{2,5,1638},{2,4,771},{2,4,515},{2,3,823},{0,5,1710},{1,3,598},{3,4,737},{2,5,325},{3,4,481},{2,4,549},{5,0,1517}, +{1,5,458},{1,4,224},{1,3,534},{3,3,1517},{1,3,534},{2,5,445},{2,5,445},{2,5,445},{2,4,526},{2,4,459},{2,3,291},{2,3,291},{2,3,339},{0,4,370},{1,3,114},{3,3,272},{3,3,272},{3,3,272},{3,3,256},{2,4,338},{1,4,80},{1,4,80},{1,3,50},{7,0,338},{1,3,50},{5,1,425},{2,5,1},{3,4,225},{1,4,160},{5,1,425},{4,3,425},{1,4,160}, +{0,3,533},{4,3,425},{0,3,533},{2,0,445},{2,0,445},{2,0,445},{2,0,445},{2,3,122},{2,3,122},{2,3,122},{2,2,185},{1,3,65},{1,3,65},{2,7,1130},{2,5,798},{2,4,1142},{2,4,702},{2,6,1571},{2,4,819},{2,4,51},{2,3,935},{0,5,1614},{1,3,950},{3,5,489},{3,5,269},{3,4,81},{3,4,249},{3,4,1514},{1,5,490},{2,4,42},{2,3,926},{7,1,1514}, +{2,3,926},{2,6,710},{2,6,710},{2,6,710},{2,4,686},{2,5,397},{2,4,35},{2,4,35},{2,3,35},{1,4,509},{1,3,50},{3,4,65},{3,4,65},{3,4,65},{3,3,128},{3,3,340},{2,4,26},{2,4,26},{2,3,26},{6,1,340},{2,3,26},{6,0,425},{2,5,113},{3,4,17},{2,4,17},{6,0,425},{3,4,425},{2,4,17},{0,3,901},{3,4,425},{0,3,901},{2,0,685}, +{2,0,685},{2,0,685},{2,0,685},{2,4,34},{2,4,34},{2,4,34},{2,3,34},{1,3,49},{1,3,49},{3,6,1286},{3,5,870},{3,4,906},{3,4,882},{2,6,1587},{2,5,546},{2,4,99},{2,4,1027},{1,5,1838},{1,4,702},{3,6,445},{3,5,29},{3,4,65},{3,4,41},{4,3,1514},{2,5,521},{2,4,74},{1,4,677},{6,2,1514},{1,4,677},{3,5,866},{3,5,866},{3,5,866}, +{3,4,866},{2,5,381},{2,4,83},{2,4,83},{2,3,115},{0,5,349},{2,3,286},{3,5,25},{3,5,25},{3,5,25},{3,4,25},{4,2,340},{2,4,58},{2,4,58},{2,3,90},{5,2,340},{2,3,90},{4,4,421},{3,5,29},{3,4,65},{3,4,41},{4,4,421},{7,2,421},{3,4,41},{0,4,641},{7,2,421},{0,4,641},{3,0,841},{3,0,841},{3,0,841},{3,0,841},{2,4,34}, +{2,4,34},{2,4,34},{2,3,34},{1,4,61},{1,4,61},{3,6,966},{3,5,614},{3,4,874},{3,4,658},{2,7,1686},{2,5,738},{2,4,659},{2,4,723},{0,6,1518},{1,4,446},{3,6,525},{3,5,173},{3,4,433},{3,4,217},{5,2,1517},{1,6,457},{3,4,274},{1,4,437},{5,3,1517},{1,4,437},{3,5,514},{3,5,514},{3,5,514},{3,4,514},{2,6,515},{2,5,209},{2,5,209}, +{2,3,579},{1,5,510},{1,4,302},{3,5,73},{3,5,73},{3,5,73},{3,4,73},{5,1,346},{2,5,40},{2,5,40},{3,3,293},{7,1,346},{3,3,293},{5,3,421},{3,5,109},{4,4,180},{3,4,153},{5,3,421},{6,3,421},{3,4,153},{0,4,433},{6,3,421},{0,4,433},{3,0,505},{3,0,505},{3,0,505},{3,0,505},{2,5,173},{2,5,173},{2,5,173},{2,3,290},{1,4,13}, +{1,4,13},{3,7,854},{3,6,438},{3,5,746},{3,5,702},{3,6,1658},{3,5,801},{3,5,609},{3,4,845},{1,6,1758},{2,4,590},{3,7,710},{3,6,294},{4,5,558},{3,5,558},{6,1,1514},{2,6,481},{2,5,227},{2,4,509},{4,4,1514},{2,4,509},{3,6,434},{3,6,434},{3,6,434},{3,5,533},{3,5,484},{3,4,292},{3,4,292},{3,4,404},{1,5,357},{2,4,149},{3,6,290}, +{3,6,290},{3,6,290},{4,4,290},{6,0,340},{2,5,58},{2,5,58},{2,4,68},{3,4,340},{2,4,68},{6,2,421},{3,6,5},{4,5,269},{2,5,178},{6,2,421},{5,4,421},{2,5,178},{0,4,505},{5,4,421},{0,4,505},{3,0,433},{3,0,433},{3,0,433},{3,0,433},{3,4,148},{3,4,148},{3,4,148},{3,3,193},{2,4,85},{2,4,85},{3,7,1206},{3,6,726},{3,5,1066}, +{3,5,670},{3,7,1605},{3,5,785},{3,5,81},{3,4,893},{1,6,1598},{2,4,878},{4,6,502},{4,5,310},{4,5,110},{4,5,302},{7,0,1517},{2,6,465},{3,5,65},{2,4,877},{3,5,1517},{2,4,877},{3,7,677},{3,7,677},{3,7,677},{3,5,645},{3,6,426},{3,5,56},{3,5,56},{3,4,52},{2,5,542},{2,4,37},{4,5,85},{4,5,85},{4,5,85},{4,4,130},{5,2,346}, +{3,5,40},{3,5,40},{2,4,36},{7,2,346},{2,4,36},{7,1,421},{3,6,85},{4,5,29},{3,5,29},{7,1,421},{4,5,421},{3,5,29},{0,4,841},{4,5,421},{0,4,841},{3,0,641},{3,0,641},{3,0,641},{3,0,641},{3,4,52},{3,4,52},{3,4,52},{3,4,52},{2,4,37},{2,4,37},{4,7,1362},{4,6,938},{4,5,946},{4,5,946},{3,7,1557},{3,6,558},{3,5,65}, +{3,5,1101},{0,7,1662},{2,5,770},{4,7,462},{4,6,38},{4,5,46},{4,5,46},{5,4,1517},{3,6,542},{3,5,49},{1,5,721},{7,3,1517},{1,5,721},{4,5,937},{4,5,937},{4,5,937},{4,5,937},{3,6,362},{3,5,56},{3,5,56},{3,4,84},{1,6,350},{3,4,293},{4,5,37},{4,5,37},{4,5,37},{4,5,37},{6,1,346},{3,5,40},{3,5,40},{3,4,68},{6,3,346}, +{3,4,68},{5,5,421},{4,6,37},{4,5,45},{3,5,45},{5,5,421},{3,6,421},{3,5,45},{0,5,685},{3,6,421},{0,5,685},{4,0,901},{4,0,901},{4,0,901},{4,0,901},{3,5,20},{3,5,20},{3,5,20},{3,4,20},{2,5,85},{2,5,85},{4,7,978},{4,6,618},{4,5,850},{4,5,658},{3,7,2021},{3,6,686},{3,5,561},{3,5,733},{1,7,1530},{2,5,450},{4,7,494}, +{4,6,134},{4,5,366},{4,5,174},{6,3,1526},{2,7,446},{4,5,269},{2,5,446},{6,4,1526},{2,5,446},{4,6,537},{4,6,537},{4,6,537},{4,5,537},{3,7,500},{3,6,202},{3,6,202},{3,4,500},{1,6,510},{2,5,329},{4,6,53},{4,6,53},{4,6,53},{4,5,53},{7,0,340},{3,6,58},{3,6,58},{4,4,265},{3,5,340},{4,4,265},{6,4,421},{4,6,85},{5,5,170}, +{4,5,125},{6,4,421},{7,4,421},{4,5,125},{0,5,445},{7,4,421},{0,5,445},{4,0,533},{4,0,533},{4,0,533},{4,0,533},{3,6,153},{3,6,153},{3,6,153},{3,4,244},{2,5,5},{2,5,5},{4,7,1158},{4,7,438},{4,6,742},{4,6,742},{4,7,1686},{4,6,839},{3,6,677},{4,5,875},{1,7,1710},{3,5,590},{5,6,769},{4,7,269},{4,6,573},{4,6,573},{7,2,1517}, +{3,7,510},{3,6,236},{3,5,490},{5,5,1517},{3,5,490},{4,7,429},{4,7,429},{4,7,429},{4,5,546},{4,6,515},{4,5,299},{4,5,299},{4,5,475},{2,6,350},{3,5,190},{4,7,260},{4,7,260},{4,7,260},{5,5,328},{7,1,346},{3,6,40},{3,6,40},{3,5,90},{7,3,346},{3,5,90},{7,3,421},{4,7,13},{4,6,317},{3,6,200},{7,3,421},{6,5,421},{3,6,200}, +{0,5,481},{6,5,421},{0,5,481},{4,0,425},{4,0,425},{4,0,425},{4,0,425},{4,5,178},{4,5,178},{4,5,178},{4,4,205},{3,5,109},{3,5,109},{4,7,1862},{4,7,662},{4,6,998},{4,6,646},{4,7,1686},{4,6,759},{4,6,119},{4,5,859},{2,7,1590},{3,5,814},{5,7,521},{5,6,305},{5,6,145},{5,6,361},{5,6,1526},{3,7,446},{4,6,94},{3,5,810},{4,6,1526}, +{3,5,810},{4,7,637},{4,7,637},{4,7,637},{4,6,610},{4,6,435},{4,6,83},{4,6,83},{4,5,75},{1,7,565},{3,5,30},{5,6,109},{5,6,109},{5,6,109},{5,5,136},{6,3,340},{4,6,58},{4,6,58},{3,5,26},{6,4,340},{3,5,26},{5,7,421},{4,7,61},{5,6,45},{4,6,45},{5,7,421},{5,6,421},{4,6,45},{0,5,785},{5,6,421},{0,5,785},{4,0,601}, +{4,0,601},{4,0,601},{4,0,601},{4,5,50},{4,5,50},{4,5,50},{4,5,74},{3,5,29},{3,5,29},{5,7,1498},{5,7,1014},{5,6,994},{5,6,1018},{4,7,2198},{4,7,578},{4,6,39},{4,6,1183},{2,7,1878},{3,6,846},{5,7,537},{5,7,53},{5,6,33},{5,6,57},{7,3,1526},{4,7,569},{4,6,30},{2,6,758},{6,5,1526},{2,6,758},{5,6,990},{5,6,990},{5,6,990}, +{5,6,1014},{4,7,349},{4,6,35},{4,6,35},{4,5,59},{2,7,357},{3,5,254},{5,6,29},{5,6,29},{5,6,29},{5,6,53},{7,2,340},{4,6,26},{4,6,26},{4,5,50},{5,5,340},{4,5,50},{6,6,425},{5,7,49},{5,6,29},{4,6,29},{6,6,425},{4,7,425},{4,6,29},{0,6,733},{4,7,425},{0,6,733},{5,0,965},{5,0,965},{5,0,965},{5,0,965},{4,6,10}, +{4,6,10},{4,6,10},{4,5,10},{3,6,113},{3,6,113},{5,7,1466},{5,7,630},{5,6,834},{5,6,666},{5,7,2055},{4,7,642},{4,6,471},{4,6,751},{3,7,1766},{3,6,462},{5,7,937},{5,7,101},{5,6,305},{5,6,137},{5,7,1526},{4,7,521},{5,6,270},{3,6,461},{5,6,1526},{3,6,461},{5,7,566},{5,7,566},{5,7,566},{5,6,566},{4,7,621},{4,7,201},{4,7,201}, +{4,5,427},{2,7,469},{3,6,362},{5,7,37},{5,7,37},{5,7,37},{5,6,37},{5,6,338},{4,7,80},{4,7,80},{5,5,241},{4,6,338},{5,5,241},{7,5,425},{5,7,65},{6,6,164},{5,6,101},{7,5,425},{6,6,433},{5,6,101},{0,6,461},{6,6,433},{0,6,461},{5,0,565},{5,0,565},{5,0,565},{5,0,565},{4,7,137},{4,7,137},{4,7,137},{4,5,202},{3,6,1}, +{3,6,1},{5,7,2042},{5,7,810},{5,7,746},{5,7,790},{5,7,2073},{5,7,885},{4,7,651},{4,6,877},{4,7,2102},{4,6,598},{6,7,794},{6,7,530},{5,7,550},{5,7,594},{6,6,1526},{5,7,689},{4,7,251},{4,6,477},{6,6,1526},{4,6,477},{5,7,521},{5,7,521},{5,7,521},{5,6,521},{5,7,552},{5,6,312},{5,6,312},{4,6,516},{3,7,349},{4,6,237},{5,7,325}, +{5,7,325},{5,7,325},{5,6,325},{6,5,340},{4,7,26},{4,7,26},{4,6,116},{3,7,340},{4,6,116},{6,7,433},{6,7,169},{5,7,325},{4,7,226},{6,7,433},{7,6,425},{4,7,226},{0,6,461},{7,6,425},{0,6,461},{5,0,421},{5,0,421},{5,0,421},{5,0,421},{5,6,212},{5,6,212},{5,6,212},{5,5,221},{4,6,137},{4,6,137},{6,7,2362},{5,7,1514},{5,7,938}, +{5,7,630},{5,7,2633},{5,7,741},{5,7,165},{5,6,833},{4,7,2070},{4,6,758},{6,7,762},{6,7,306},{6,7,186},{6,7,426},{7,5,1526},{5,7,705},{5,7,129},{4,6,749},{5,7,1541},{4,6,749},{5,7,889},{5,7,889},{5,7,889},{5,7,581},{5,7,424},{5,7,116},{5,7,116},{5,6,104},{3,7,525},{4,6,29},{6,7,137},{6,7,137},{6,7,137},{6,6,146},{7,4,338}, +{5,7,80},{5,7,80},{4,6,20},{7,5,338},{4,6,20},{7,6,433},{6,7,185},{6,7,65},{5,7,65},{7,6,433},{6,7,425},{5,7,65},{0,6,733},{6,7,425},{0,6,733},{5,0,565},{5,0,565},{5,0,565},{5,0,565},{5,6,52},{5,6,52},{5,6,52},{5,6,100},{4,6,25},{4,6,25},{6,7,2073},{6,7,1449},{6,7,1049},{5,7,981},{6,7,2548},{5,7,1044},{5,7,20}, +{5,6,1196},{5,7,2365},{4,7,929},{6,7,1049},{6,7,425},{6,7,25},{6,7,73},{7,6,1492},{6,7,948},{5,7,16},{3,7,800},{7,6,1460},{3,7,800},{6,7,1049},{6,7,1049},{6,7,1049},{5,7,981},{5,7,680},{5,7,20},{5,7,20},{5,6,40},{4,7,434},{4,6,205},{6,7,25},{6,7,25},{6,7,25},{6,7,73},{7,5,370},{5,7,16},{5,7,16},{5,6,36},{6,6,338}, +{5,6,36},{7,7,410},{7,7,338},{6,7,16},{5,7,16},{7,7,410},{6,7,464},{5,7,16},{0,7,784},{6,7,464},{0,7,784},{5,0,965},{5,0,965},{5,0,965},{5,0,965},{5,7,4},{5,7,4},{5,7,4},{5,6,4},{4,7,145},{4,7,145},{6,7,1769},{6,7,1145},{6,7,745},{6,7,601},{6,7,1940},{6,7,1172},{5,7,308},{5,7,696},{5,7,1805},{4,7,401},{7,7,1043}, +{6,7,569},{6,7,169},{6,7,25},{7,6,1076},{6,7,596},{6,7,196},{4,7,401},{6,7,1076},{4,7,401},{6,7,745},{6,7,745},{6,7,745},{6,7,601},{6,7,916},{5,7,308},{5,7,308},{5,6,360},{4,7,626},{5,6,341},{6,7,169},{6,7,169},{6,7,169},{6,7,25},{6,7,340},{6,7,196},{6,7,196},{6,6,221},{5,7,340},{6,6,221},{7,7,202},{7,7,130},{7,7,81}, +{6,7,0},{7,7,202},{7,7,218},{6,7,0},{0,7,400},{7,7,218},{0,7,400},{6,0,601},{6,0,601},{6,0,601},{6,0,601},{5,7,164},{5,7,164},{5,7,164},{5,6,164},{4,7,1},{4,7,1},{6,7,1886},{6,7,1262},{6,7,862},{6,7,502},{6,7,1715},{6,7,731},{6,7,331},{5,7,507},{5,7,1634},{4,7,266},{7,7,521},{7,7,449},{7,7,400},{6,7,277},{7,7,797}, +{6,7,506},{6,7,106},{5,7,146},{6,7,770},{5,7,146},{6,7,862},{6,7,862},{6,7,862},{6,7,502},{6,7,691},{6,7,331},{6,7,331},{5,7,507},{5,7,610},{4,7,266},{7,7,400},{7,7,400},{7,7,400},{6,7,277},{7,6,338},{6,7,106},{6,7,106},{5,7,146},{7,6,370},{5,7,146},{7,7,121},{7,7,49},{7,7,0},{7,7,16},{7,7,121},{7,7,65},{7,7,16}, +{0,7,121},{7,7,65},{0,7,121},{6,0,421},{6,0,421},{6,0,421},{6,0,421},{6,7,250},{6,7,250},{6,7,250},{6,6,241},{4,7,145},{4,7,145},{7,7,2010},{6,7,1774},{6,7,1374},{6,7,822},{6,7,1923},{6,7,747},{6,7,347},{6,7,139},{6,7,1446},{5,7,34},{7,7,329},{7,7,257},{7,7,208},{7,7,160},{7,7,509},{7,7,389},{6,7,298},{5,7,18},{7,7,549}, +{5,7,18},{6,7,1374},{6,7,1374},{6,7,1374},{6,7,822},{6,7,899},{6,7,347},{6,7,347},{6,7,139},{5,7,866},{5,7,34},{7,7,208},{7,7,208},{7,7,208},{7,7,160},{7,7,388},{6,7,298},{6,7,298},{5,7,18},{6,7,370},{5,7,18},{7,7,185},{7,7,113},{7,7,64},{7,7,16},{7,7,185},{7,7,65},{7,7,16},{0,7,9},{7,7,65},{0,7,9},{6,0,533}, +{6,0,533},{6,0,533},{6,0,533},{6,7,58},{6,7,58},{6,7,58},{6,7,130},{5,7,25},{5,7,25},{7,7,1347},{7,7,1275},{7,7,1226},{7,7,1154},{7,7,1431},{6,7,922},{6,7,522},{6,7,2},{6,7,1125},{5,7,137},{7,7,258},{7,7,186},{7,7,137},{7,7,65},{7,7,342},{7,7,198},{7,7,149},{6,7,1},{7,7,294},{6,7,1},{7,7,1226},{7,7,1226},{7,7,1226}, +{7,7,1154},{6,7,1146},{6,7,522},{6,7,522},{6,7,2},{6,7,725},{5,7,137},{7,7,137},{7,7,137},{7,7,137},{7,7,65},{7,7,221},{7,7,149},{7,7,149},{6,7,1},{7,7,245},{6,7,1},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{6,0,901},{6,0,901},{6,0,901},{6,0,901},{6,7,122}, +{6,7,122},{6,7,122},{6,7,2},{5,7,137},{5,7,137},{7,7,883},{7,7,811},{7,7,762},{7,7,690},{7,7,871},{7,7,727},{7,7,678},{6,7,130},{6,7,949},{6,7,149},{7,7,258},{7,7,186},{7,7,137},{7,7,65},{7,7,246},{7,7,102},{7,7,53},{7,7,36},{7,7,134},{7,7,36},{7,7,762},{7,7,762},{7,7,762},{7,7,690},{7,7,750},{7,7,678},{7,7,678}, +{6,7,130},{6,7,549},{6,7,149},{7,7,137},{7,7,137},{7,7,137},{7,7,65},{7,7,125},{7,7,53},{7,7,53},{7,7,36},{7,7,85},{7,7,36},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{7,0,641},{7,0,641},{7,0,641},{7,0,641},{6,7,442},{6,7,442},{6,7,442},{6,7,130},{6,7,149}, +{6,7,149},{0,3,932},{0,2,218},{0,1,82},{0,1,250},{0,2,1971},{0,1,1371},{0,1,611},{0,0,1950},{0,1,2332},{0,0,1986},{0,3,932},{0,2,218},{0,1,82},{0,1,250},{1,0,1899},{0,1,1371},{0,1,611},{0,0,1950},{1,0,1923},{0,0,1950},{0,1,1},{0,1,1},{0,1,1},{0,0,64},{0,0,180},{0,0,100},{0,0,100},{0,0,101},{0,0,200},{0,0,137},{0,1,1}, +{0,1,1},{0,1,1},{0,0,64},{0,0,180},{0,0,100},{0,0,100},{0,0,101},{0,0,164},{0,0,101},{1,1,884},{0,2,218},{0,1,82},{0,1,250},{1,1,884},{2,0,900},{0,1,250},{0,1,1170},{2,0,900},{0,1,1170},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,3,900},{0,2,250},{0,2,314}, +{0,1,314},{0,2,2355},{0,1,1755},{0,1,675},{0,1,1875},{0,1,2716},{0,1,2236},{0,3,900},{0,2,250},{0,2,314},{0,1,314},{0,2,2355},{0,1,1755},{0,1,675},{0,1,1875},{0,1,2355},{0,1,1875},{0,2,25},{0,2,25},{0,2,25},{0,1,25},{0,1,410},{0,0,292},{0,0,292},{0,0,181},{0,0,392},{0,0,217},{0,2,25},{0,2,25},{0,2,25},{0,1,25},{0,1,410}, +{0,0,292},{0,0,292},{0,0,181},{0,0,356},{0,0,181},{2,0,884},{0,2,250},{0,2,314},{0,1,314},{2,0,884},{3,0,884},{0,1,314},{0,1,914},{3,0,884},{0,1,914},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,4,890},{0,3,104},{0,2,90},{0,2,442},{0,2,2995},{0,2,1851},{0,1,995}, +{0,1,1875},{0,1,3356},{0,1,2236},{0,4,890},{0,3,104},{0,2,90},{0,2,442},{1,1,2932},{0,2,1851},{0,1,995},{0,1,1875},{2,0,2900},{0,1,1875},{0,2,9},{0,2,9},{0,2,9},{0,1,9},{0,1,586},{0,1,370},{0,1,370},{0,0,389},{0,0,712},{0,0,425},{0,2,9},{0,2,9},{0,2,9},{0,1,9},{0,1,586},{0,1,370},{0,1,370},{0,0,389},{1,0,650}, +{0,0,389},{1,2,890},{0,3,104},{0,2,90},{0,2,442},{1,2,890},{2,1,890},{0,2,442},{0,1,914},{2,1,890},{0,1,914},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,4,990},{0,3,140},{0,2,158},{0,2,158},{0,3,3048},{0,2,1707},{0,2,747},{0,1,1795},{0,1,3916},{0,1,2156},{0,4,990}, +{0,3,140},{0,2,158},{0,2,158},{0,3,3048},{0,2,1707},{0,2,747},{0,1,1795},{1,1,3048},{0,1,1795},{0,3,40},{0,3,40},{0,3,40},{0,1,157},{0,1,698},{0,1,290},{0,1,290},{0,0,641},{0,0,1076},{0,0,677},{0,3,40},{0,3,40},{0,3,40},{0,1,157},{1,0,666},{0,1,290},{0,1,290},{0,0,641},{1,0,650},{0,0,641},{2,1,890},{0,3,104},{0,2,122}, +{0,2,122},{2,1,890},{4,0,890},{0,2,122},{0,1,1170},{4,0,890},{0,1,1170},{0,0,36},{0,0,36},{0,0,36},{0,0,36},{0,0,0},{0,0,0},{0,0,0},{0,0,16},{0,0,52},{0,0,52},{0,5,1115},{0,4,265},{0,2,689},{0,2,293},{0,4,3096},{0,3,1731},{0,2,324},{0,1,2020},{0,2,4009},{0,1,2381},{0,5,1115},{0,4,265},{1,2,429},{0,2,293},{1,2,3048}, +{0,3,1731},{0,2,324},{0,1,2020},{0,2,3048},{0,1,2020},{0,4,261},{0,4,261},{0,4,261},{0,2,229},{0,2,656},{0,2,260},{0,2,260},{0,1,256},{0,1,1017},{0,1,617},{0,4,261},{0,4,261},{0,4,261},{0,2,229},{0,2,656},{0,2,260},{0,2,260},{0,1,256},{0,1,656},{0,1,256},{3,0,890},{0,4,40},{1,2,68},{0,2,68},{3,0,890},{5,0,890},{0,2,68}, +{0,2,1220},{5,0,890},{0,2,1220},{0,0,225},{0,0,225},{0,0,225},{0,0,225},{0,1,16},{0,1,16},{0,1,16},{0,0,25},{0,0,61},{0,0,61},{0,6,1419},{0,4,569},{0,3,1078},{0,2,821},{0,4,3096},{0,3,1395},{0,2,356},{0,2,1832},{0,2,4201},{0,2,2793},{1,4,1011},{1,3,353},{1,2,413},{1,2,413},{2,1,3048},{0,3,1395},{0,2,356},{0,2,1832},{4,0,3048}, +{0,2,1832},{0,4,533},{0,4,533},{0,4,533},{0,2,565},{0,3,666},{0,2,100},{0,2,100},{0,1,128},{0,1,1209},{0,1,489},{1,2,157},{1,2,157},{1,2,157},{1,2,157},{1,1,650},{0,2,100},{0,2,100},{0,1,128},{1,1,666},{0,1,128},{3,1,890},{0,4,40},{1,2,292},{0,2,292},{3,1,890},{4,1,890},{0,2,292},{0,2,932},{4,1,890},{0,2,932},{0,0,529}, +{0,0,529},{0,0,529},{0,0,529},{0,1,16},{0,1,16},{0,1,16},{0,1,64},{0,0,205},{0,0,205},{0,6,1915},{0,5,1019},{1,3,1269},{0,3,1110},{0,5,3051},{0,3,1443},{0,3,318},{0,2,1384},{0,2,4777},{0,2,2345},{1,5,909},{1,4,131},{1,3,113},{1,3,509},{3,0,3051},{0,3,1443},{0,3,318},{0,2,1384},{3,1,3051},{0,2,1384},{0,5,970},{0,5,970},{0,5,970}, +{0,3,1010},{0,3,698},{0,2,196},{0,2,196},{0,1,256},{0,2,1641},{0,1,617},{1,3,13},{1,3,13},{1,3,13},{1,2,13},{2,0,650},{0,2,196},{0,2,196},{0,1,256},{3,0,650},{0,1,256},{2,3,884},{0,5,58},{1,3,104},{0,3,149},{2,3,884},{6,0,884},{0,3,149},{0,2,900},{6,0,884},{0,2,900},{0,0,961},{0,0,961},{0,0,961},{0,0,961},{0,2,0}, +{0,2,0},{0,2,0},{0,1,0},{0,1,361},{0,1,361},{1,5,2113},{1,4,1271},{1,3,1285},{1,3,1329},{0,6,3123},{0,4,1208},{0,3,30},{0,2,1320},{0,3,5011},{0,2,2281},{1,5,957},{1,4,115},{1,3,129},{1,3,173},{1,4,3051},{0,4,1208},{0,3,30},{0,2,1320},{2,2,3051},{0,2,1320},{1,4,1190},{1,4,1190},{1,4,1190},{1,2,1281},{0,4,648},{0,3,26},{0,3,26}, +{0,2,296},{0,2,1641},{0,1,1001},{1,4,34},{1,4,34},{1,4,34},{1,2,125},{0,4,648},{0,3,26},{0,3,26},{0,2,296},{2,1,648},{0,2,296},{3,2,884},{0,5,26},{1,3,104},{0,3,5},{3,2,884},{5,1,884},{0,3,5},{0,2,1124},{5,1,884},{0,2,1124},{1,0,1181},{1,0,1181},{1,0,1181},{1,0,1181},{0,3,25},{0,3,25},{0,3,25},{0,1,64},{0,1,425}, +{0,1,425},{1,6,1864},{1,5,1038},{1,3,1390},{1,3,1038},{0,6,3132},{0,4,1199},{0,3,201},{0,2,1743},{0,3,4924},{0,2,2332},{1,6,1080},{1,5,254},{2,3,458},{1,3,254},{2,3,3051},{0,4,1163},{0,3,165},{0,2,1707},{6,0,3051},{0,2,1707},{1,4,1016},{1,4,1016},{1,4,1016},{1,3,989},{0,5,716},{0,3,152},{0,3,152},{0,2,62},{0,2,1611},{0,2,651},{1,4,232}, +{1,4,232},{1,4,232},{1,3,205},{1,3,650},{0,3,116},{0,3,116},{0,2,26},{1,2,650},{0,2,26},{4,1,884},{1,5,58},{2,3,58},{1,3,58},{4,1,884},{4,2,884},{1,3,58},{0,3,1274},{4,2,884},{0,3,1274},{1,0,980},{1,0,980},{1,0,980},{1,0,980},{0,3,52},{0,3,52},{0,3,52},{0,2,61},{0,1,458},{0,1,458},{1,7,1784},{1,5,910},{1,4,1441}, +{1,3,1134},{0,7,3247},{0,5,1292},{0,4,567},{0,3,1474},{0,4,4900},{0,3,2178},{2,5,1028},{2,4,362},{2,3,394},{2,3,418},{3,2,3051},{0,5,1096},{1,3,331},{0,3,1278},{5,1,3051},{0,3,1278},{1,5,885},{1,5,885},{1,5,885},{1,3,909},{0,5,876},{0,4,206},{0,4,206},{0,2,254},{0,3,1548},{0,2,347},{2,3,169},{2,3,169},{2,3,169},{2,3,193},{2,2,648}, +{0,4,10},{0,4,10},{0,2,58},{5,0,648},{0,2,58},{5,0,884},{1,5,26},{2,3,250},{1,3,250},{5,0,884},{3,3,884},{1,3,250},{0,3,954},{3,3,884},{0,3,954},{1,0,884},{1,0,884},{1,0,884},{1,0,884},{0,4,197},{0,4,197},{0,4,197},{0,2,205},{0,2,298},{0,2,298},{1,7,1976},{1,6,1124},{1,4,1649},{1,4,1229},{1,6,3204},{0,5,1452},{1,4,525}, +{0,3,1474},{0,4,4420},{0,3,1474},{2,6,930},{2,5,160},{2,4,138},{2,3,546},{4,1,3060},{0,5,968},{0,4,195},{0,3,990},{4,2,3060},{0,3,990},{1,6,1060},{1,6,1060},{1,6,1060},{1,4,1108},{1,4,824},{1,3,314},{1,3,314},{1,2,370},{0,3,1260},{0,2,427},{2,4,17},{2,4,17},{2,4,17},{2,3,17},{3,1,648},{0,4,74},{0,4,74},{1,2,226},{4,1,648}, +{1,2,226},{3,4,882},{1,6,80},{2,4,122},{0,4,146},{3,4,882},{7,1,882},{0,4,146},{0,3,890},{7,1,882},{0,3,890},{1,0,1044},{1,0,1044},{1,0,1044},{1,0,1044},{1,3,145},{1,3,145},{1,3,145},{1,2,145},{0,2,202},{0,2,202},{2,6,2374},{1,6,1476},{2,4,1550},{1,4,1469},{1,6,3172},{1,5,1259},{1,4,61},{1,3,1323},{0,4,4452},{0,3,1282},{2,6,930}, +{2,5,96},{2,4,106},{2,4,194},{5,0,3060},{0,6,1144},{1,4,45},{0,3,1086},{6,1,3060},{0,3,1086},{2,5,1476},{2,5,1476},{2,5,1476},{1,4,1460},{1,5,666},{1,4,52},{1,4,52},{1,3,362},{0,3,1356},{0,3,321},{2,5,32},{2,5,32},{2,5,32},{2,3,97},{4,0,650},{1,4,36},{1,4,36},{0,3,125},{3,2,650},{0,3,125},{4,3,882},{1,6,16},{2,4,90}, +{1,4,9},{4,3,882},{6,2,882},{1,4,9},{0,3,1082},{6,2,882},{0,3,1082},{1,0,1460},{1,0,1460},{1,0,1460},{1,0,1460},{1,4,52},{1,4,52},{1,4,52},{1,2,65},{0,3,200},{0,3,200},{2,7,1892},{2,6,1090},{2,4,1370},{2,4,1062},{1,7,3100},{1,5,1169},{1,4,151},{1,3,1665},{0,5,4036},{0,3,1678},{2,7,1051},{2,6,249},{3,4,493},{2,4,221},{3,4,3060}, +{0,6,883},{1,4,126},{0,4,1528},{7,1,3060},{0,4,1528},{2,5,1035},{2,5,1035},{2,5,1035},{2,4,1026},{1,6,723},{1,4,115},{1,4,115},{1,3,65},{0,4,1004},{0,3,78},{2,5,194},{2,5,194},{2,5,194},{2,4,185},{2,4,648},{1,4,90},{1,4,90},{1,3,40},{7,0,648},{1,3,40},{5,2,882},{2,6,80},{3,4,52},{2,4,52},{5,2,882},{5,3,882},{2,4,52}, +{0,4,1332},{5,3,882},{0,4,1332},{2,0,1010},{2,0,1010},{2,0,1010},{2,0,1010},{1,4,34},{1,4,34},{1,4,34},{1,3,61},{0,3,74},{0,3,74},{2,7,1892},{2,6,898},{2,5,1451},{2,4,1094},{2,6,3501},{1,6,1308},{1,5,589},{1,4,1510},{0,5,3940},{0,4,1116},{3,6,1051},{3,5,377},{3,4,381},{3,4,429},{5,1,3060},{0,6,1059},{2,4,312},{0,4,1016},{6,2,3060}, +{0,4,1016},{2,6,882},{2,6,882},{2,6,882},{2,4,898},{1,6,835},{1,5,189},{1,5,189},{1,3,209},{0,4,1036},{0,3,270},{3,4,185},{3,4,185},{3,4,185},{3,4,233},{3,3,650},{1,5,20},{1,5,20},{1,3,40},{6,1,650},{1,3,40},{6,1,882},{2,6,16},{3,4,212},{2,4,212},{6,1,882},{4,4,882},{2,4,212},{0,4,980},{4,4,882},{0,4,980},{2,0,882}, +{2,0,882},{2,0,882},{2,0,882},{1,5,173},{1,5,173},{1,5,173},{1,3,173},{0,4,136},{0,4,136},{2,7,2404},{2,7,1116},{2,5,1595},{2,5,1235},{2,7,3244},{1,6,1404},{2,5,619},{1,4,1446},{0,6,3804},{0,4,892},{3,7,957},{3,6,195},{3,5,169},{3,4,509},{6,0,3060},{0,7,936},{1,5,196},{0,4,888},{3,4,3060},{0,4,888},{2,7,1035},{2,7,1035},{2,7,1035}, +{2,5,1091},{2,5,835},{2,4,317},{2,4,317},{2,3,369},{0,5,875},{1,3,396},{3,5,25},{3,5,25},{3,5,25},{3,4,25},{4,2,650},{1,5,52},{1,5,52},{2,3,200},{5,2,650},{2,3,200},{7,0,884},{2,7,106},{3,5,144},{1,5,160},{7,0,884},{3,5,884},{1,5,160},{0,4,884},{3,5,884},{0,4,884},{2,0,1010},{2,0,1010},{2,0,1010},{2,0,1010},{2,4,173}, +{2,4,173},{2,4,173},{2,3,173},{0,4,8},{0,4,8},{3,7,2430},{2,7,1404},{3,5,1610},{2,5,1411},{2,7,3148},{2,6,1309},{2,5,91},{2,4,1325},{0,6,3484},{0,4,1180},{3,7,909},{3,6,83},{3,5,89},{3,5,221},{4,4,3051},{0,7,1000},{2,5,66},{1,4,1053},{7,2,3051},{1,4,1053},{2,7,1403},{2,7,1403},{2,7,1403},{2,5,1395},{2,6,681},{2,5,75},{2,5,75}, +{2,4,425},{0,5,795},{0,4,280},{3,6,34},{3,6,34},{3,6,34},{3,4,73},{5,1,656},{2,5,50},{2,5,50},{1,4,153},{7,1,656},{1,4,153},{5,4,884},{2,7,10},{3,5,80},{2,5,17},{5,4,884},{7,3,884},{2,5,17},{0,4,1044},{7,3,884},{0,4,1044},{2,0,1394},{2,0,1394},{2,0,1394},{2,0,1394},{2,4,61},{2,4,61},{2,4,61},{2,3,61},{0,4,136}, +{0,4,136},{3,7,2214},{3,7,1150},{3,5,1358},{3,5,1094},{2,7,3652},{2,6,1147},{2,5,109},{2,4,1595},{0,7,3724},{0,5,1402},{3,7,1314},{3,7,250},{3,5,458},{3,5,194},{5,3,3060},{1,7,888},{2,5,93},{0,5,1398},{6,3,3060},{0,5,1398},{3,6,1060},{3,6,1060},{3,6,1060},{3,5,1069},{2,7,736},{2,5,84},{2,5,84},{2,4,74},{0,6,820},{1,4,81},{3,6,160}, +{3,6,160},{3,6,160},{3,5,169},{6,0,650},{2,5,68},{2,5,68},{1,4,45},{3,4,650},{1,4,45},{6,3,884},{3,7,106},{4,5,50},{3,5,50},{6,3,884},{6,4,884},{3,5,50},{0,5,1394},{6,4,884},{0,5,1394},{3,0,1044},{3,0,1044},{3,0,1044},{3,0,1044},{2,5,20},{2,5,20},{2,5,20},{2,4,65},{0,5,8},{0,5,8},{3,7,2566},{3,7,894},{3,6,1469}, +{3,5,1062},{3,7,3535},{2,7,1332},{2,6,619},{2,5,1554},{0,7,3276},{0,5,1146},{4,7,1080},{4,6,398},{4,5,374},{4,5,446},{6,2,3051},{1,7,1016},{3,5,299},{0,5,1046},{5,4,3051},{0,5,1046},{3,7,885},{3,7,885},{3,7,885},{3,5,893},{2,7,800},{2,6,178},{2,6,178},{2,4,170},{0,6,660},{1,4,225},{4,5,205},{4,5,205},{4,5,205},{4,5,277},{5,2,656}, +{2,6,34},{2,6,34},{2,4,26},{7,2,656},{2,4,26},{7,2,884},{3,7,10},{4,5,178},{3,5,178},{7,2,884},{5,5,884},{3,5,178},{0,5,1010},{5,5,884},{0,5,1010},{3,0,884},{3,0,884},{3,0,884},{3,0,884},{2,6,153},{2,6,153},{2,6,153},{2,4,145},{0,5,136},{0,5,136},{4,7,3320},{3,7,1150},{3,6,1549},{3,6,1249},{3,7,3487},{2,7,1364},{2,6,603}, +{2,5,1426},{0,7,3340},{1,5,892},{4,7,1016},{4,7,236},{4,6,206},{4,5,478},{7,1,3051},{2,7,964},{2,6,203},{1,5,883},{4,5,3051},{1,5,883},{3,7,1029},{3,7,1029},{3,7,1029},{3,6,1080},{3,6,852},{3,5,326},{3,5,326},{3,4,374},{0,6,884},{2,4,371},{4,6,37},{4,6,37},{4,6,37},{4,5,37},{6,1,656},{2,6,34},{2,6,34},{3,4,178},{6,3,656}, +{3,4,178},{5,6,890},{3,7,170},{4,6,170},{2,6,178},{5,6,890},{4,6,890},{2,6,178},{0,5,882},{4,6,890},{0,5,882},{3,0,980},{3,0,980},{3,0,980},{3,0,980},{3,5,205},{3,5,205},{3,5,205},{3,4,205},{1,5,10},{1,5,10},{4,7,2936},{4,7,1676},{4,6,1678},{3,6,1361},{3,7,3951},{3,7,1367},{3,6,129},{3,5,1335},{1,7,3496},{1,5,1116},{4,7,1336}, +{4,7,76},{4,6,78},{4,6,254},{5,5,3048},{2,7,1124},{3,6,93},{2,5,1026},{3,6,3048},{2,5,1026},{3,7,1557},{3,7,1557},{3,7,1557},{3,6,1336},{3,7,702},{3,6,104},{3,6,104},{3,5,494},{0,7,667},{1,5,275},{4,7,40},{4,7,40},{4,7,40},{4,5,53},{7,0,650},{3,6,68},{3,6,68},{2,5,185},{3,5,650},{2,5,185},{7,3,890},{4,7,72},{4,6,74}, +{3,6,29},{7,3,890},{6,5,890},{3,6,29},{0,5,1010},{6,5,890},{0,5,1010},{3,0,1332},{3,0,1332},{3,0,1332},{3,0,1332},{3,5,61},{3,5,61},{3,5,61},{3,4,61},{1,5,106},{1,5,106},{4,7,3116},{4,7,1316},{4,6,1354},{4,6,1134},{4,7,4084},{3,7,1133},{3,6,75},{3,5,1533},{1,7,3676},{1,6,1470},{5,7,1429},{4,7,355},{4,6,393},{4,6,173},{6,4,3051}, +{3,7,1124},{3,6,66},{1,6,1469},{7,4,3051},{1,6,1469},{4,7,1091},{4,7,1091},{4,7,1091},{4,6,1118},{3,7,729},{3,6,59},{3,6,59},{3,5,89},{0,7,820},{2,5,90},{4,7,130},{4,7,130},{4,7,130},{4,6,157},{7,1,656},{3,6,50},{3,6,50},{2,5,41},{7,3,656},{2,5,41},{7,4,890},{4,7,234},{5,6,52},{4,6,52},{7,4,890},{7,5,890},{4,6,52}, +{0,5,1460},{7,5,890},{0,5,1460},{4,0,1082},{4,0,1082},{4,0,1082},{4,0,1082},{3,6,10},{3,6,10},{3,6,10},{3,5,73},{1,6,10},{1,6,10},{4,7,3820},{4,7,1540},{4,7,1495},{4,6,1038},{4,7,4084},{3,7,1469},{3,6,571},{3,6,1606},{2,7,3916},{1,6,1150},{5,7,1349},{5,7,425},{5,6,373},{5,6,469},{7,3,3048},{3,7,1348},{4,6,292},{1,6,1069},{6,5,3048}, +{1,6,1069},{4,7,1011},{4,7,1011},{4,7,1011},{4,6,894},{3,7,1161},{3,7,173},{3,7,173},{3,5,137},{1,7,659},{2,5,186},{5,6,229},{5,6,229},{5,6,229},{5,6,325},{6,3,650},{3,7,52},{3,7,52},{3,5,16},{6,4,650},{3,5,16},{6,6,890},{5,7,200},{5,6,148},{4,6,148},{6,6,890},{6,6,890},{4,6,148},{0,6,1044},{6,6,890},{0,6,1044},{4,0,890}, +{4,0,890},{4,0,890},{4,0,890},{3,7,137},{3,7,137},{3,7,137},{3,5,121},{1,6,106},{1,6,106},{5,7,4054},{4,7,2276},{4,7,1511},{4,7,1271},{4,7,4596},{4,7,1596},{3,7,577},{3,6,1414},{2,7,4204},{2,6,900},{5,7,1653},{5,7,377},{5,7,249},{5,6,453},{5,7,3048},{4,7,1371},{3,7,216},{2,6,884},{5,6,3048},{2,6,884},{4,7,1315},{4,7,1315},{4,7,1315}, +{4,6,1054},{4,7,875},{4,6,341},{4,6,341},{4,5,385},{1,7,835},{3,5,352},{5,7,53},{5,7,53},{5,7,53},{5,6,53},{7,2,650},{3,7,20},{3,7,20},{4,5,160},{5,5,650},{4,5,160},{7,5,890},{5,7,328},{5,7,200},{3,7,200},{7,5,890},{5,7,900},{3,7,200},{0,6,884},{5,7,900},{0,6,884},{4,0,954},{4,0,954},{4,0,954},{4,0,954},{4,6,241}, +{4,6,241},{4,6,241},{4,5,241},{2,6,16},{2,6,16},{5,7,4022},{5,7,2394},{5,7,1754},{4,7,1319},{5,7,4921},{4,7,1660},{4,7,175},{4,6,1353},{3,7,4380},{2,6,1060},{6,7,2021},{5,7,713},{5,7,73},{5,7,293},{6,6,3051},{4,7,1611},{4,7,126},{3,6,1005},{4,7,3051},{3,6,1005},{5,7,1718},{5,7,1718},{5,7,1718},{4,7,1283},{4,7,859},{4,7,139},{4,7,139}, +{4,5,465},{2,7,779},{2,6,276},{5,7,37},{5,7,37},{5,7,37},{5,6,37},{5,6,648},{4,7,90},{4,7,90},{3,6,221},{4,6,648},{3,6,221},{7,6,900},{6,7,452},{5,7,72},{4,7,45},{7,6,900},{7,6,884},{4,7,45},{0,6,980},{7,6,884},{0,6,980},{4,0,1274},{4,0,1274},{4,0,1274},{4,0,1274},{4,6,65},{4,6,65},{4,6,65},{4,5,65},{2,6,80}, +{2,6,80},{5,7,4265},{5,7,2373},{5,7,1349},{5,7,1173},{5,7,4606},{4,7,2065},{4,7,40},{4,6,1266},{3,7,4455},{3,6,1261},{6,7,1649},{6,7,1025},{5,7,325},{5,7,149},{7,5,2817},{5,7,1514},{4,7,36},{3,6,1197},{6,6,2841},{3,6,1197},{5,7,1349},{5,7,1349},{5,7,1349},{5,7,1173},{4,7,1300},{4,7,40},{4,7,40},{4,6,110},{2,7,1040},{3,6,105},{5,7,325}, +{5,7,325},{5,7,325},{5,7,149},{6,5,650},{4,7,36},{4,7,36},{3,6,41},{3,7,650},{3,6,41},{7,6,801},{6,7,449},{6,7,49},{4,7,36},{7,6,801},{6,7,761},{4,7,36},{0,6,1181},{6,7,761},{0,6,1181},{5,0,1124},{5,0,1124},{5,0,1124},{5,0,1124},{4,7,4},{4,7,4},{4,7,4},{4,6,85},{2,7,16},{2,7,16},{5,7,4345},{5,7,2453},{5,7,1429}, +{5,7,901},{5,7,4190},{5,7,1770},{4,7,360},{4,6,1266},{4,7,3861},{2,7,1041},{6,7,1281},{6,7,657},{6,7,257},{5,7,325},{6,7,2250},{5,7,1194},{5,7,170},{2,7,977},{5,7,2250},{2,7,977},{5,7,1429},{5,7,1429},{5,7,1429},{5,7,901},{5,7,1274},{4,7,360},{4,7,360},{4,6,110},{3,7,979},{3,6,153},{6,7,257},{6,7,257},{6,7,257},{5,7,325},{7,4,648}, +{5,7,170},{5,7,170},{4,6,10},{7,5,648},{4,6,10},{7,7,521},{6,7,401},{6,7,1},{5,7,1},{7,7,521},{6,7,521},{5,7,1},{0,7,961},{6,7,521},{0,7,961},{5,0,900},{5,0,900},{5,0,900},{5,0,900},{4,7,164},{4,7,164},{4,7,164},{4,6,101},{2,7,80},{2,7,80},{6,7,3669},{5,7,2917},{5,7,1893},{5,7,1013},{5,7,4158},{5,7,1386},{5,7,362}, +{4,7,1049},{4,7,3381},{3,7,555},{6,7,1169},{6,7,545},{6,7,145},{6,7,73},{7,6,1802},{5,7,1130},{5,7,106},{3,7,530},{7,6,1770},{3,7,530},{5,7,1893},{5,7,1893},{5,7,1893},{5,7,1013},{5,7,1242},{5,7,362},{5,7,362},{5,6,402},{3,7,1251},{4,6,339},{6,7,145},{6,7,145},{6,7,145},{6,7,73},{7,5,680},{5,7,106},{5,7,106},{5,6,146},{6,6,648}, +{5,6,146},{7,7,265},{7,7,193},{6,7,81},{6,7,9},{7,7,265},{7,7,305},{6,7,9},{0,7,529},{7,7,305},{0,7,529},{5,0,932},{5,0,932},{5,0,932},{5,0,932},{5,7,281},{5,7,281},{5,7,281},{5,6,281},{3,7,26},{3,7,26},{6,7,3077},{6,7,2453},{6,7,2053},{5,7,1509},{6,7,3438},{5,7,1386},{5,7,362},{5,7,650},{5,7,3195},{3,7,283},{7,7,1293}, +{6,7,689},{6,7,289},{6,7,25},{7,6,1386},{6,7,786},{5,7,298},{4,7,261},{6,7,1386},{4,7,261},{6,7,2053},{6,7,2053},{6,7,2053},{5,7,1509},{5,7,1594},{5,7,362},{5,7,362},{5,6,434},{4,7,1260},{3,7,283},{6,7,289},{6,7,289},{6,7,289},{6,7,25},{6,7,650},{5,7,298},{5,7,298},{4,7,261},{5,7,650},{4,7,261},{7,7,137},{7,7,65},{7,7,16}, +{6,7,25},{7,7,137},{7,7,113},{6,7,25},{0,7,225},{7,7,113},{0,7,225},{5,0,1220},{5,0,1220},{5,0,1220},{5,0,1220},{5,7,73},{5,7,73},{5,7,73},{5,6,73},{3,7,58},{3,7,58},{6,7,2870},{6,7,2246},{6,7,1846},{6,7,1366},{6,7,2889},{6,7,1785},{5,7,821},{5,7,137},{5,7,2700},{4,7,126},{7,7,771},{7,7,699},{7,7,650},{6,7,277},{7,7,1107}, +{6,7,696},{6,7,296},{4,7,45},{6,7,1080},{4,7,45},{6,7,1846},{6,7,1846},{6,7,1846},{6,7,1366},{6,7,1865},{5,7,821},{5,7,821},{5,7,137},{4,7,1611},{4,7,126},{7,7,650},{7,7,650},{7,7,650},{6,7,277},{7,6,648},{6,7,296},{6,7,296},{4,7,45},{7,6,680},{4,7,45},{7,7,146},{7,7,74},{7,7,25},{7,7,1},{7,7,146},{7,7,50},{7,7,1}, +{0,7,36},{7,7,50},{0,7,36},{6,0,1170},{6,0,1170},{6,0,1170},{6,0,1170},{5,7,145},{5,7,145},{5,7,145},{5,7,101},{4,7,90},{4,7,90},{6,7,2962},{6,7,2338},{6,7,1938},{6,7,1314},{6,7,2677},{6,7,1429},{6,7,1029},{5,7,85},{5,7,2536},{4,7,122},{7,7,531},{7,7,459},{7,7,410},{7,7,338},{7,7,771},{7,7,627},{6,7,404},{5,7,4},{7,7,827}, +{5,7,4},{6,7,1938},{6,7,1938},{6,7,1938},{6,7,1314},{6,7,1653},{6,7,1029},{6,7,1029},{5,7,85},{5,7,1512},{4,7,122},{7,7,410},{7,7,410},{7,7,410},{7,7,338},{7,7,650},{6,7,404},{6,7,404},{5,7,4},{6,7,596},{5,7,4},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{6,0,914}, +{6,0,914},{6,0,914},{6,0,914},{5,7,481},{5,7,481},{5,7,481},{5,7,85},{4,7,122},{4,7,122},{7,7,2924},{6,7,2338},{6,7,1938},{6,7,1314},{6,7,2373},{6,7,1125},{6,7,725},{5,7,325},{6,7,2132},{5,7,232},{7,7,323},{7,7,251},{7,7,202},{7,7,130},{7,7,467},{7,7,323},{7,7,274},{5,7,36},{7,7,459},{5,7,36},{6,7,1938},{6,7,1938},{6,7,1938}, +{6,7,1314},{6,7,1349},{6,7,725},{6,7,725},{5,7,325},{5,7,1256},{5,7,232},{7,7,202},{7,7,202},{7,7,202},{7,7,130},{7,7,346},{7,7,274},{7,7,274},{5,7,36},{7,7,410},{5,7,36},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{6,0,914},{6,0,914},{6,0,914},{6,0,914},{6,7,325}, +{6,7,325},{6,7,325},{5,7,325},{5,7,232},{5,7,232},{7,7,2092},{7,7,2020},{7,7,1971},{6,7,1570},{7,7,2140},{6,7,1077},{6,7,677},{6,7,85},{6,7,1588},{5,7,232},{7,7,243},{7,7,171},{7,7,122},{7,7,50},{7,7,291},{7,7,147},{7,7,98},{6,7,4},{7,7,219},{6,7,4},{7,7,1971},{7,7,1971},{7,7,1971},{6,7,1570},{6,7,1301},{6,7,677},{6,7,677}, +{6,7,85},{6,7,1188},{5,7,232},{7,7,122},{7,7,122},{7,7,122},{7,7,50},{7,7,170},{7,7,98},{7,7,98},{6,7,4},{7,7,170},{6,7,4},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{6,0,1170},{6,0,1170},{6,0,1170},{6,0,1170},{6,7,277},{6,7,277},{6,7,277},{6,7,85},{5,7,232}, +{5,7,232},{0,4,1618},{0,3,436},{0,2,74},{0,2,866},{0,2,3411},{0,2,2531},{0,1,1251},{0,1,2531},{0,1,3772},{0,1,2892},{0,4,1618},{0,3,436},{0,2,74},{0,2,866},{0,2,3411},{0,2,2531},{0,1,1251},{0,1,2531},{2,0,3376},{0,1,2531},{0,1,25},{0,1,25},{0,1,25},{0,1,49},{0,0,360},{0,0,232},{0,0,232},{0,0,149},{0,0,332},{0,0,185},{0,1,25}, +{0,1,25},{0,1,25},{0,1,49},{0,0,360},{0,0,232},{0,0,232},{0,0,149},{0,0,296},{0,0,149},{1,2,1570},{0,3,436},{0,2,74},{0,2,866},{1,2,1570},{0,2,1570},{0,2,866},{0,1,1570},{0,2,1570},{0,1,1570},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,4,1586},{0,3,340},{0,2,10}, +{0,2,450},{0,3,4016},{0,2,2627},{0,2,1411},{0,1,2691},{0,1,4572},{0,1,3052},{0,4,1586},{0,3,340},{0,2,10},{0,2,450},{1,1,3968},{0,2,2627},{0,2,1411},{0,1,2691},{2,0,4016},{0,1,2691},{0,2,1},{0,2,1},{0,2,1},{0,1,1},{0,1,530},{0,1,362},{0,1,362},{0,0,325},{0,0,620},{0,0,361},{0,2,1},{0,2,1},{0,2,1},{0,1,1},{0,1,530}, +{0,1,362},{0,1,362},{0,0,325},{0,0,584},{0,0,325},{2,1,1570},{0,3,340},{0,2,10},{0,2,450},{2,1,1570},{4,0,1570},{0,2,450},{0,1,1730},{4,0,1570},{0,1,1730},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,5,1576},{0,4,290},{0,2,202},{0,2,290},{0,3,4656},{0,2,2979},{0,2,1251}, +{0,1,3107},{0,1,5628},{0,1,3468},{0,5,1576},{0,4,290},{0,2,202},{0,2,290},{0,3,4656},{0,2,2979},{0,2,1251},{0,1,3107},{1,1,4656},{0,1,3107},{0,3,16},{0,3,16},{0,3,16},{0,1,81},{0,1,802},{0,1,442},{0,1,442},{0,0,629},{0,0,1036},{0,0,665},{0,3,16},{0,3,16},{0,3,16},{0,1,81},{0,1,802},{0,1,442},{0,1,442},{0,0,629},{1,0,818}, +{0,0,629},{3,0,1576},{0,4,290},{0,2,202},{0,2,290},{3,0,1576},{3,1,1576},{0,2,290},{0,1,2146},{3,1,1576},{0,1,2146},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,5,1640},{0,4,162},{0,3,241},{0,2,386},{0,4,5539},{0,3,3512},{0,2,1347},{0,1,3779},{0,2,6396},{0,1,4140},{0,5,1640}, +{0,4,162},{0,3,241},{0,2,386},{1,2,5435},{0,3,3512},{0,2,1347},{0,1,3779},{0,2,5435},{0,1,3779},{0,3,16},{0,3,16},{0,3,16},{0,2,25},{0,1,1202},{0,1,650},{0,1,650},{0,1,970},{0,0,1580},{0,0,1097},{0,3,16},{0,3,16},{0,3,16},{0,2,25},{1,0,1170},{0,1,650},{0,1,650},{0,1,970},{1,0,1154},{0,1,970},{2,2,1576},{0,4,162},{0,3,241}, +{0,2,386},{2,2,1576},{5,0,1576},{0,2,386},{0,2,1730},{5,0,1576},{0,2,1730},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,6,1667},{0,5,259},{0,3,286},{0,3,406},{0,4,5440},{0,3,3035},{0,2,1284},{0,2,3504},{0,2,6513},{0,2,4465},{0,6,1667},{0,5,259},{0,3,286},{0,3,406},{2,1,5424}, +{0,3,3035},{0,2,1284},{0,2,3504},{4,0,5424},{0,2,3504},{0,4,81},{0,4,81},{0,4,81},{0,2,97},{0,2,1160},{0,2,500},{0,2,500},{0,1,520},{0,1,1521},{0,1,881},{0,4,81},{0,4,81},{0,4,81},{0,2,97},{0,2,1160},{0,2,500},{0,2,500},{0,1,520},{0,1,1160},{0,1,520},{2,3,1576},{0,5,178},{1,3,100},{0,3,325},{2,3,1576},{6,0,1576},{0,3,325}, +{0,2,1568},{6,0,1576},{0,2,1568},{0,0,81},{0,0,81},{0,0,81},{0,0,81},{0,0,9},{0,0,9},{0,0,9},{0,0,1},{0,0,37},{0,0,37},{0,7,1865},{0,5,339},{0,3,734},{0,3,374},{0,5,5435},{0,3,3019},{0,3,814},{0,2,2992},{0,2,7025},{0,2,3953},{1,5,1865},{0,5,339},{1,3,293},{0,3,374},{3,0,5435},{0,3,3019},{0,3,814},{0,2,2992},{3,1,5435}, +{0,2,2992},{0,5,314},{0,5,314},{0,5,314},{0,3,370},{0,3,1170},{0,2,340},{0,2,340},{0,1,392},{0,1,1713},{0,1,753},{1,3,289},{1,3,289},{1,3,289},{1,2,289},{1,1,1154},{0,2,340},{0,2,340},{0,1,392},{1,1,1170},{0,1,392},{4,0,1576},{0,5,50},{1,3,4},{0,3,85},{4,0,1576},{5,1,1576},{0,3,85},{0,2,1696},{5,1,1576},{0,2,1696},{0,0,289}, +{0,0,289},{0,0,289},{0,0,289},{0,1,4},{0,1,4},{0,1,4},{0,0,49},{0,0,85},{0,0,85},{0,7,2265},{0,6,787},{1,3,1401},{0,3,726},{0,5,5515},{0,4,2664},{0,3,462},{0,2,2864},{0,3,7363},{0,2,3825},{1,6,1667},{1,5,405},{1,3,245},{1,3,377},{2,2,5427},{0,4,2664},{0,3,462},{0,2,2864},{5,0,5427},{0,2,2864},{0,5,634},{0,5,634},{0,5,634}, +{0,3,626},{0,3,1202},{0,3,362},{0,3,362},{0,1,520},{0,2,2145},{0,1,881},{1,4,106},{1,4,106},{1,4,106},{1,2,145},{2,0,1154},{0,3,362},{0,3,362},{0,1,520},{3,0,1154},{0,1,520},{2,4,1576},{0,6,162},{1,3,164},{0,3,101},{2,4,1576},{7,0,1576},{0,3,101},{0,2,2080},{7,0,1576},{0,2,2080},{0,0,625},{0,0,625},{0,0,625},{0,0,625},{0,1,36}, +{0,1,36},{0,1,36},{0,1,36},{0,0,261},{0,0,261},{1,6,2775},{0,6,1091},{1,4,1422},{0,3,1462},{0,6,5427},{0,4,2536},{0,3,494},{0,2,3120},{0,3,7635},{0,2,4081},{1,6,1619},{1,5,165},{1,4,266},{1,3,361},{3,1,5427},{0,4,2536},{0,3,494},{0,2,3120},{4,1,5427},{0,2,3120},{0,6,1090},{0,6,1090},{0,6,1090},{0,3,1138},{0,4,1152},{0,3,170},{0,3,170}, +{0,2,416},{0,2,2145},{0,1,1265},{1,4,10},{1,4,10},{1,4,10},{1,3,37},{0,4,1152},{0,3,170},{0,3,170},{0,2,416},{2,1,1152},{0,2,416},{3,3,1570},{0,6,2},{1,4,265},{1,3,360},{3,3,1570},{6,1,1570},{1,3,360},{0,3,1768},{6,1,1570},{0,3,1768},{0,0,1089},{0,0,1089},{0,0,1089},{0,0,1089},{0,2,4},{0,2,4},{0,2,4},{0,1,4},{0,1,365}, +{0,1,365},{1,7,2796},{1,6,1432},{1,4,1413},{1,4,1593},{0,7,5435},{0,5,2360},{0,4,299},{0,3,2594},{0,3,8400},{0,3,4530},{1,7,1640},{1,6,276},{1,4,257},{1,4,437},{4,0,5427},{0,5,2360},{0,4,299},{0,3,2594},{3,2,5427},{0,3,2594},{1,5,1221},{1,5,1221},{1,5,1221},{1,3,1229},{0,5,1184},{0,4,250},{0,4,250},{0,2,146},{0,2,2451},{0,2,1107},{1,5,65}, +{1,5,65},{1,5,65},{1,3,73},{1,3,1154},{0,4,250},{0,4,250},{0,2,146},{1,2,1154},{0,2,146},{4,2,1576},{0,6,128},{2,4,130},{0,4,74},{4,2,1576},{5,2,1576},{0,4,74},{0,3,1570},{5,2,1576},{0,3,1570},{1,0,1220},{1,0,1220},{1,0,1220},{1,0,1220},{0,3,4},{0,3,4},{0,3,4},{0,1,121},{0,1,482},{0,1,482},{1,7,3180},{1,6,1464},{1,4,1813}, +{1,4,1513},{0,7,5515},{0,5,2168},{0,4,59},{0,3,2242},{0,4,8764},{0,3,4178},{2,6,1894},{1,6,308},{2,4,326},{1,4,357},{2,4,5427},{0,5,2168},{0,4,59},{0,3,2242},{7,0,5427},{0,3,2242},{1,6,1448},{1,6,1448},{1,6,1448},{1,4,1512},{0,5,1184},{0,4,58},{0,4,58},{0,2,178},{0,2,2995},{0,2,1139},{1,6,292},{1,6,292},{1,6,292},{2,3,325},{2,2,1152}, +{0,4,58},{0,4,58},{0,2,178},{5,0,1152},{0,2,178},{5,1,1570},{0,7,34},{2,4,2},{0,4,10},{5,1,1570},{4,3,1570},{0,4,10},{0,3,1666},{4,3,1570},{0,3,1666},{1,0,1412},{1,0,1412},{1,0,1412},{1,0,1412},{0,3,36},{0,3,36},{0,3,36},{0,2,9},{0,1,722},{0,1,722},{1,7,3816},{1,6,1748},{1,5,2450},{1,4,1685},{0,7,5983},{0,6,2180},{0,4,207}, +{0,3,2278},{0,4,9004},{0,3,4038},{2,7,1700},{2,6,462},{2,4,230},{2,4,406},{3,3,5420},{0,6,2176},{0,4,203},{0,3,2274},{6,1,5420},{0,3,2274},{1,6,1604},{1,6,1604},{1,6,1604},{1,4,1604},{0,6,1156},{0,4,126},{0,4,126},{0,3,429},{0,3,3044},{0,2,1307},{2,5,136},{2,5,136},{2,5,136},{2,3,149},{3,1,1152},{0,4,122},{0,4,122},{0,3,425},{4,1,1152}, +{0,3,425},{6,0,1570},{0,7,130},{2,4,130},{1,4,85},{6,0,1570},{3,4,1570},{1,4,85},{0,3,2018},{3,4,1570},{0,3,2018},{1,0,1600},{1,0,1600},{1,0,1600},{1,0,1600},{0,4,5},{0,4,5},{0,4,5},{0,2,29},{0,2,866},{0,2,866},{1,7,4520},{1,7,1608},{1,5,2418},{1,4,1925},{1,7,5996},{0,6,1956},{0,5,409},{0,4,2751},{0,5,9020},{0,3,3846},{2,7,1604}, +{2,6,174},{2,5,297},{2,4,342},{4,2,5420},{0,6,1856},{0,5,309},{0,4,2651},{5,2,5420},{0,4,2651},{1,7,1604},{1,7,1604},{1,7,1604},{1,4,1636},{0,7,1302},{0,5,120},{0,5,120},{0,3,189},{0,3,2820},{0,3,1245},{2,5,8},{2,5,8},{2,5,8},{2,4,53},{4,0,1154},{0,5,20},{0,5,20},{0,3,89},{3,2,1154},{0,3,89},{4,4,1568},{1,7,8},{2,5,293}, +{0,5,293},{4,4,1568},{7,2,1568},{0,5,293},{0,4,1810},{7,2,1568},{0,4,1810},{1,0,1600},{1,0,1600},{1,0,1600},{1,0,1600},{0,5,116},{0,5,116},{0,5,116},{0,3,164},{0,2,610},{0,2,610},{2,7,4356},{1,7,2004},{2,5,2635},{1,5,2006},{1,7,5924},{0,6,2316},{0,5,499},{0,4,2337},{0,5,8300},{0,4,3420},{2,7,1955},{2,7,299},{2,5,234},{2,5,474},{5,1,5420}, +{0,6,1955},{0,5,138},{0,4,1976},{4,3,5420},{0,4,1976},{1,7,1955},{1,7,1955},{1,7,1955},{1,5,1942},{1,6,1427},{0,5,435},{0,5,435},{0,3,378},{0,4,2628},{0,3,642},{2,6,53},{2,6,53},{2,6,53},{2,4,53},{2,4,1152},{0,5,74},{0,5,74},{0,3,17},{7,0,1152},{0,3,17},{5,3,1570},{1,7,98},{3,5,164},{1,5,100},{5,3,1570},{6,3,1570},{1,5,100}, +{0,4,1576},{6,3,1570},{0,4,1576},{1,0,1906},{1,0,1906},{1,0,1906},{1,0,1906},{1,4,234},{1,4,234},{1,4,234},{1,2,325},{0,3,626},{0,3,626},{2,7,4356},{2,7,1964},{2,5,2267},{2,5,2027},{1,7,6404},{1,6,2220},{1,5,117},{1,4,2314},{0,5,8204},{0,4,2684},{3,7,1929},{2,7,283},{3,5,365},{2,5,346},{6,0,5420},{0,7,1712},{1,5,68},{0,4,1784},{3,4,5420}, +{0,4,1784},{2,6,1942},{2,6,1942},{2,6,1942},{2,4,2006},{1,6,1219},{1,5,117},{1,5,117},{1,3,209},{0,4,2340},{0,3,514},{2,6,261},{2,6,261},{2,6,261},{2,4,325},{3,3,1154},{0,6,50},{0,6,50},{1,3,160},{6,1,1154},{1,3,160},{6,2,1568},{2,7,58},{3,5,4},{1,5,4},{6,2,1568},{5,4,1568},{1,5,4},{0,4,1640},{5,4,1568},{0,4,1640},{2,0,1906}, +{2,0,1906},{2,0,1906},{2,0,1906},{1,4,74},{1,4,74},{1,4,74},{1,3,65},{0,3,370},{0,3,370},{2,7,4868},{2,7,1740},{2,5,2411},{2,5,1691},{2,7,6548},{1,7,2244},{1,5,165},{1,4,2250},{0,6,7668},{0,4,2460},{3,7,1817},{3,7,525},{3,5,221},{3,5,441},{4,4,5419},{0,7,1712},{1,5,164},{0,4,1976},{7,2,5419},{0,4,1976},{2,7,1619},{2,7,1619},{2,7,1619}, +{2,5,1627},{1,7,1155},{1,5,101},{1,5,101},{1,3,417},{0,5,2379},{0,4,696},{3,5,157},{3,5,157},{3,5,157},{3,4,157},{4,2,1154},{0,6,82},{0,6,82},{0,4,212},{5,2,1154},{0,4,212},{7,1,1568},{2,7,122},{3,5,100},{2,5,73},{7,1,1568},{4,5,1568},{2,5,73},{0,4,1960},{4,5,1568},{0,4,1960},{2,0,1618},{2,0,1618},{2,0,1618},{2,0,1618},{1,5,1}, +{1,5,1},{1,5,1},{1,3,17},{0,3,370},{0,3,370},{3,7,5570},{2,7,2028},{2,6,2394},{2,5,1867},{2,7,6452},{1,7,1956},{1,6,439},{1,4,2698},{0,6,7348},{0,4,2748},{3,7,2089},{3,7,189},{3,6,334},{3,5,329},{5,3,5419},{1,7,1875},{1,6,358},{0,5,2145},{6,3,5419},{0,5,2145},{2,7,1667},{2,7,1667},{2,7,1667},{2,5,1611},{1,7,1331},{1,6,115},{1,6,115}, +{1,4,198},{0,5,1979},{0,4,248},{3,6,10},{3,6,10},{3,6,10},{3,5,73},{5,1,1160},{1,6,34},{1,6,34},{0,4,52},{7,1,1160},{0,4,52},{5,5,1570},{3,7,180},{4,5,320},{2,5,281},{5,5,1570},{3,6,1570},{2,5,281},{0,5,1856},{3,6,1570},{0,5,1856},{2,0,1586},{2,0,1586},{2,0,1586},{2,0,1586},{1,6,106},{1,6,106},{1,6,106},{1,4,162},{0,4,212}, +{0,4,212},{3,7,5354},{3,7,2770},{3,6,2717},{2,6,1986},{2,7,6956},{1,7,2244},{1,6,457},{1,5,2351},{0,7,7268},{0,5,1974},{4,7,2384},{3,7,270},{3,6,217},{3,6,517},{6,2,5419},{1,7,1920},{1,6,133},{0,5,1650},{5,4,5419},{0,5,1650},{2,7,2180},{2,7,2180},{2,7,2180},{2,6,1905},{2,7,1480},{1,6,376},{1,6,376},{1,4,333},{0,5,1988},{0,4,203},{3,7,45}, +{3,7,45},{3,7,45},{3,5,37},{6,0,1154},{1,6,52},{1,6,52},{1,4,9},{3,4,1154},{1,4,9},{6,4,1568},{3,7,234},{3,6,181},{1,6,117},{6,4,1568},{7,4,1568},{1,6,117},{0,5,1586},{7,4,1568},{0,5,1586},{2,0,1856},{2,0,1856},{2,0,1856},{2,0,1856},{2,5,272},{2,5,272},{2,5,272},{1,4,324},{0,4,194},{0,4,194},{3,7,5706},{3,7,2514},{3,6,2285}, +{3,6,2105},{3,7,7359},{2,7,2244},{2,6,147},{2,5,2358},{0,7,6820},{0,5,1718},{4,7,2256},{3,7,750},{4,6,410},{3,6,341},{7,1,5419},{2,7,2180},{2,6,83},{0,5,1618},{4,5,5419},{0,5,1618},{3,7,1985},{3,7,1985},{3,7,1985},{3,5,2041},{2,7,1224},{2,6,146},{2,6,146},{2,4,210},{0,6,1644},{1,4,509},{3,7,221},{3,7,221},{3,7,221},{3,5,277},{5,2,1160}, +{1,7,64},{1,7,64},{2,4,146},{7,2,1160},{2,4,146},{7,3,1570},{4,7,356},{4,6,10},{2,6,2},{7,3,1570},{6,5,1570},{2,6,2},{0,5,1618},{6,5,1570},{0,5,1618},{3,0,1960},{3,0,1960},{3,0,1960},{3,0,1960},{2,5,80},{2,5,80},{2,5,80},{2,4,89},{0,5,100},{0,5,100},{3,7,6570},{3,7,2770},{3,6,2365},{3,6,1705},{3,7,7311},{2,7,2276},{2,6,131}, +{2,5,2230},{0,7,6884},{0,5,1974},{4,7,2512},{4,7,532},{4,6,218},{4,6,482},{5,5,5424},{2,7,2276},{2,6,131},{1,5,1931},{3,6,5424},{1,5,1931},{3,7,1809},{3,7,1809},{3,7,1809},{3,6,1656},{2,7,1352},{2,6,82},{2,6,82},{2,4,370},{0,6,1548},{0,5,293},{4,6,169},{4,6,169},{4,6,169},{4,5,169},{6,1,1160},{1,7,64},{1,7,64},{1,5,250},{6,3,1160}, +{1,5,250},{5,7,1570},{4,7,388},{4,6,74},{3,6,65},{5,7,1570},{5,6,1570},{3,6,65},{0,5,1906},{5,6,1570},{0,5,1906},{3,0,1640},{3,0,1640},{3,0,1640},{3,0,1640},{2,6,1},{2,6,1},{2,6,1},{2,4,9},{0,5,68},{0,5,68},{4,7,6752},{3,7,3538},{3,7,2378},{3,6,1817},{3,7,7775},{2,7,2820},{2,7,477},{2,5,2614},{1,7,7360},{0,6,1978},{5,7,3110}, +{4,7,692},{4,7,377},{4,6,322},{6,4,5424},{3,7,2539},{2,7,413},{0,6,1942},{7,4,5424},{0,6,1942},{3,7,2017},{3,7,2017},{3,7,2017},{3,6,1592},{3,7,1846},{2,7,116},{2,7,116},{2,5,213},{0,7,1531},{1,5,283},{4,7,16},{4,7,16},{4,7,16},{4,6,97},{7,0,1154},{2,7,52},{2,7,52},{1,5,58},{3,5,1154},{1,5,58},{7,4,1576},{5,7,610},{5,6,306}, +{3,6,241},{7,4,1576},{7,5,1576},{3,6,241},{0,6,1906},{7,5,1576},{0,6,1906},{3,0,1576},{3,0,1576},{3,0,1576},{3,0,1576},{2,7,100},{2,7,100},{2,7,100},{2,5,164},{0,6,72},{0,6,72},{4,7,6932},{4,7,3932},{4,7,2807},{3,7,1974},{4,7,8428},{3,7,2981},{2,7,423},{2,6,2373},{1,7,7540},{0,6,1618},{5,7,2921},{4,7,1331},{4,7,206},{4,7,566},{7,3,5424}, +{3,7,2692},{2,7,134},{0,6,1609},{6,5,5424},{0,6,1609},{4,7,2707},{4,7,2707},{4,7,2707},{3,7,1874},{3,7,1513},{2,7,323},{2,7,323},{2,5,294},{0,7,1324},{1,5,184},{4,7,106},{4,7,106},{4,7,106},{4,6,25},{7,1,1160},{2,7,34},{2,7,34},{2,5,5},{7,3,1160},{2,5,5},{7,5,1570},{5,7,628},{4,7,181},{2,7,125},{7,5,1570},{6,6,1586},{2,7,125}, +{0,6,1600},{6,6,1586},{0,6,1600},{3,0,1810},{3,0,1810},{3,0,1810},{3,0,1810},{2,7,298},{2,7,298},{2,7,298},{2,5,290},{0,6,18},{0,6,18},{4,7,7636},{4,7,4156},{4,7,2311},{4,7,2191},{4,7,8428},{3,7,3317},{3,7,185},{3,6,2410},{2,7,8180},{1,6,1722},{5,7,3161},{5,7,1357},{5,7,461},{4,7,342},{5,7,5424},{4,7,2979},{3,7,104},{1,6,1601},{5,6,5424}, +{1,6,1601},{4,7,2307},{4,7,2307},{4,7,2307},{4,6,2082},{3,7,1625},{3,7,181},{3,7,181},{3,5,217},{0,7,1548},{1,5,504},{5,7,457},{5,7,457},{5,7,457},{4,6,233},{6,3,1154},{3,7,100},{3,7,100},{3,5,136},{6,4,1154},{3,5,136},{6,7,1576},{5,7,916},{5,7,20},{3,7,4},{6,7,1576},{7,6,1576},{3,7,4},{0,6,1600},{7,6,1576},{0,6,1600},{4,0,2018}, +{4,0,2018},{4,0,2018},{4,0,2018},{3,6,90},{3,6,90},{3,6,90},{3,5,117},{0,7,104},{0,7,104},{5,7,7862},{4,7,4316},{4,7,2291},{4,7,1691},{4,7,8004},{3,7,3433},{3,7,69},{3,6,1774},{2,7,7580},{1,6,1470},{5,7,3101},{5,7,1209},{5,7,185},{4,7,466},{7,4,4803},{4,7,2579},{3,7,68},{2,6,1448},{7,5,4803},{2,6,1448},{4,7,2291},{4,7,2291},{4,7,2291}, +{4,7,1691},{3,7,2121},{3,7,69},{3,7,69},{3,5,329},{1,7,1539},{1,6,314},{5,7,185},{5,7,185},{5,7,185},{5,6,185},{7,2,1154},{3,7,68},{3,7,68},{2,6,292},{5,5,1154},{2,6,292},{7,6,1252},{6,7,724},{5,7,16},{4,7,25},{7,6,1252},{7,6,1268},{4,7,25},{0,6,1412},{7,6,1268},{0,6,1412},{4,0,1666},{4,0,1666},{4,0,1666},{4,0,1666},{3,7,5}, +{3,7,5},{3,7,5},{3,5,5},{0,7,40},{0,7,40},{5,7,6806},{4,7,4684},{4,7,2659},{4,7,1579},{4,7,7668},{4,7,2988},{3,7,341},{3,6,1390},{2,7,7084},{1,6,1470},{6,7,2645},{5,7,1193},{5,7,169},{5,7,125},{6,6,4059},{4,7,2259},{4,7,234},{2,6,1224},{4,7,4059},{2,6,1224},{4,7,2659},{4,7,2659},{4,7,2659},{4,7,1579},{4,7,2043},{3,7,341},{3,7,341}, +{3,6,234},{1,7,1779},{1,6,314},{5,7,169},{5,7,169},{5,7,169},{5,7,125},{5,6,1152},{4,7,234},{4,7,234},{2,6,68},{4,6,1152},{2,6,68},{7,6,900},{6,7,500},{6,7,100},{4,7,9},{7,6,900},{6,7,884},{4,7,9},{0,6,1220},{6,7,884},{0,6,1220},{4,0,1570},{4,0,1570},{4,0,1570},{4,0,1570},{3,7,85},{3,7,85},{3,7,85},{3,5,149},{1,7,98}, +{1,7,98},{5,7,6077},{5,7,4185},{5,7,3161},{4,7,1912},{5,7,6790},{4,7,2529},{4,7,504},{3,6,1417},{3,7,6199},{1,7,1097},{6,7,1925},{6,7,1301},{5,7,457},{5,7,17},{7,5,3321},{5,7,1754},{4,7,180},{1,7,1093},{6,6,3345},{1,7,1093},{5,7,3161},{5,7,3161},{5,7,3161},{4,7,1912},{4,7,2124},{4,7,504},{4,7,504},{3,6,261},{2,7,1944},{2,6,171},{5,7,457}, +{5,7,457},{5,7,457},{5,7,17},{6,5,1154},{4,7,180},{4,7,180},{3,6,5},{3,7,1154},{3,6,5},{7,7,605},{6,7,401},{6,7,1},{5,7,1},{7,7,605},{6,7,569},{5,7,1},{0,7,1089},{6,7,569},{0,7,1089},{4,0,1768},{4,0,1768},{4,0,1768},{4,0,1768},{4,7,360},{4,7,360},{4,7,360},{3,6,260},{1,7,8},{1,7,8},{5,7,5837},{5,7,3945},{5,7,2921}, +{5,7,2129},{5,7,6054},{4,7,2529},{4,7,504},{4,6,1386},{3,7,5767},{2,7,773},{6,7,1557},{6,7,933},{6,7,533},{5,7,193},{6,7,2754},{5,7,1434},{4,7,404},{2,7,629},{5,7,2754},{2,7,629},{5,7,2921},{5,7,2921},{5,7,2921},{5,7,2129},{4,7,2604},{4,7,504},{4,7,504},{4,6,230},{2,7,2264},{2,6,443},{6,7,533},{6,7,533},{6,7,533},{5,7,193},{7,4,1152}, +{4,7,404},{4,7,404},{3,6,117},{7,5,1152},{3,6,117},{7,7,317},{7,7,245},{6,7,49},{6,7,25},{7,7,317},{7,7,373},{6,7,25},{0,7,625},{7,7,373},{0,7,625},{5,0,2080},{5,0,2080},{5,0,2080},{5,0,2080},{4,7,104},{4,7,104},{4,7,104},{4,6,149},{2,7,148},{2,7,148},{5,7,5981},{5,7,4089},{5,7,3065},{5,7,1921},{5,7,5702},{5,7,2666},{4,7,888}, +{4,7,693},{4,7,5325},{2,7,341},{6,7,1445},{6,7,821},{6,7,421},{6,7,205},{7,6,2306},{5,7,1370},{5,7,346},{2,7,325},{7,6,2274},{2,7,325},{5,7,3065},{5,7,3065},{5,7,3065},{5,7,1921},{5,7,2786},{4,7,888},{4,7,888},{4,6,294},{3,7,2355},{2,7,341},{6,7,421},{6,7,421},{6,7,421},{6,7,205},{7,5,1184},{5,7,346},{5,7,346},{4,6,290},{6,6,1152}, +{4,6,290},{7,7,157},{7,7,85},{7,7,36},{6,7,9},{7,7,157},{7,7,149},{6,7,9},{0,7,289},{7,7,149},{0,7,289},{5,0,1696},{5,0,1696},{5,0,1696},{5,0,1696},{4,7,104},{4,7,104},{4,7,104},{4,6,5},{2,7,52},{2,7,52},{6,7,5433},{5,7,4617},{5,7,3593},{5,7,2097},{5,7,5734},{5,7,2346},{5,7,1322},{4,7,261},{4,7,4909},{2,7,293},{6,7,1589}, +{6,7,965},{6,7,565},{6,7,157},{7,6,1890},{6,7,1146},{5,7,538},{3,7,82},{6,7,1890},{3,7,82},{5,7,3593},{5,7,3593},{5,7,3593},{5,7,2097},{5,7,2818},{5,7,1322},{5,7,1322},{4,7,261},{3,7,2691},{2,7,293},{6,7,565},{6,7,565},{6,7,565},{6,7,157},{6,7,1154},{5,7,538},{5,7,538},{3,7,82},{5,7,1154},{3,7,82},{7,7,125},{7,7,53},{7,7,4}, +{7,7,4},{7,7,125},{7,7,53},{7,7,4},{0,7,81},{7,7,53},{0,7,81},{5,0,1568},{5,0,1568},{5,0,1568},{5,0,1568},{4,7,360},{4,7,360},{4,7,360},{4,6,117},{2,7,212},{2,7,212},{6,7,4866},{6,7,4242},{6,7,3842},{5,7,2754},{6,7,5113},{5,7,2445},{5,7,1421},{4,7,234},{5,7,4804},{3,7,164},{7,7,1203},{7,7,1131},{6,7,1033},{6,7,409},{7,7,1611}, +{6,7,1056},{6,7,656},{4,7,9},{6,7,1584},{4,7,9},{6,7,3842},{6,7,3842},{6,7,3842},{5,7,2754},{5,7,3313},{5,7,1421},{5,7,1421},{4,7,234},{4,7,2875},{3,7,164},{6,7,1033},{6,7,1033},{6,7,1033},{6,7,409},{7,6,1152},{6,7,656},{6,7,656},{4,7,9},{7,6,1184},{4,7,9},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49}, +{0,7,0},{7,7,98},{0,7,0},{5,0,1730},{5,0,1730},{5,0,1730},{5,0,1730},{5,7,397},{5,7,397},{5,7,397},{4,7,234},{3,7,164},{3,7,164},{6,7,4194},{6,7,3570},{6,7,3170},{6,7,2546},{6,7,4137},{5,7,2365},{5,7,1341},{5,7,185},{5,7,3876},{3,7,324},{7,7,771},{7,7,699},{7,7,650},{6,7,521},{7,7,1083},{6,7,864},{6,7,464},{4,7,25},{7,7,1187}, +{4,7,25},{6,7,3170},{6,7,3170},{6,7,3170},{6,7,2546},{6,7,3113},{5,7,1341},{5,7,1341},{5,7,185},{4,7,2491},{3,7,324},{7,7,650},{7,7,650},{7,7,650},{6,7,521},{7,6,832},{6,7,464},{6,7,464},{4,7,25},{6,7,800},{4,7,25},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{5,0,2146}, +{5,0,2146},{5,0,2146},{5,0,2146},{5,7,317},{5,7,317},{5,7,317},{5,7,185},{3,7,324},{3,7,324},{6,7,3778},{6,7,3154},{6,7,2754},{6,7,2130},{6,7,3417},{6,7,2169},{5,7,1517},{5,7,9},{5,7,3204},{4,7,338},{7,7,467},{7,7,395},{7,7,346},{7,7,274},{7,7,683},{7,7,539},{6,7,400},{5,7,0},{7,7,723},{5,7,0},{6,7,2754},{6,7,2754},{6,7,2754}, +{6,7,2130},{6,7,2393},{5,7,1517},{5,7,1517},{5,7,9},{5,7,2180},{4,7,338},{7,7,346},{7,7,346},{7,7,346},{7,7,274},{7,7,562},{6,7,400},{6,7,400},{5,7,0},{6,7,544},{5,7,0},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{6,0,1730},{6,0,1730},{6,0,1730},{6,0,1730},{5,7,493}, +{5,7,493},{5,7,493},{5,7,9},{4,7,338},{4,7,338},{6,7,3618},{6,7,2994},{6,7,2594},{6,7,1970},{6,7,2953},{6,7,1705},{6,7,1305},{5,7,89},{5,7,2788},{4,7,466},{7,7,291},{7,7,219},{7,7,170},{7,7,98},{7,7,411},{7,7,267},{7,7,218},{6,7,16},{7,7,387},{6,7,16},{6,7,2594},{6,7,2594},{6,7,2594},{6,7,1970},{6,7,1929},{6,7,1305},{6,7,1305}, +{5,7,89},{5,7,1764},{4,7,466},{7,7,170},{7,7,170},{7,7,170},{7,7,98},{7,7,290},{7,7,218},{7,7,218},{6,7,16},{7,7,338},{6,7,16},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{6,0,1570},{6,0,1570},{6,0,1570},{6,0,1570},{6,7,905},{6,7,905},{6,7,905},{5,7,89},{4,7,466}, +{4,7,466},{0,5,2665},{0,4,697},{0,3,290},{0,2,841},{0,3,5901},{0,2,4170},{0,2,1802},{0,1,4310},{0,1,6951},{0,1,4671},{0,5,2665},{0,4,697},{0,3,290},{0,2,841},{2,0,5893},{0,2,4170},{0,2,1802},{0,1,4310},{3,0,5893},{0,1,4310},{0,2,4},{0,2,4},{0,2,4},{0,1,4},{0,1,557},{0,1,365},{0,1,365},{0,0,356},{0,0,665},{0,0,392},{0,2,4}, +{0,2,4},{0,2,4},{0,1,4},{0,1,557},{0,1,365},{0,1,365},{0,0,356},{0,0,629},{0,0,356},{3,0,2665},{0,4,697},{0,3,290},{0,2,841},{3,0,2665},{3,1,2665},{0,2,841},{0,2,3145},{3,1,2665},{0,2,3145},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,6,2705},{0,4,617},{0,3,34}, +{0,2,985},{0,4,6754},{0,3,4625},{0,2,1946},{0,1,5030},{0,2,7635},{0,1,5391},{0,6,2705},{0,4,617},{0,3,34},{0,2,985},{1,2,6674},{0,3,4625},{0,2,1946},{0,1,5030},{0,2,6674},{0,1,5030},{0,3,9},{0,3,9},{0,3,9},{0,1,100},{0,1,845},{0,1,461},{0,1,461},{0,0,676},{0,0,1097},{0,0,712},{0,3,9},{0,3,9},{0,3,9},{0,1,100},{0,1,845}, +{0,1,461},{0,1,461},{0,0,676},{1,0,853},{0,0,676},{1,4,2665},{0,4,617},{0,3,34},{0,2,985},{1,4,2665},{2,2,2665},{0,2,985},{0,2,2777},{2,2,2665},{0,2,2777},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,6,2689},{0,5,477},{0,3,34},{0,3,634},{0,4,7538},{0,3,4865},{0,2,2346}, +{0,2,5474},{0,2,8547},{0,1,6367},{0,6,2689},{0,5,477},{0,3,34},{0,3,634},{0,4,7538},{0,3,4865},{0,2,2346},{0,2,5474},{2,1,7538},{0,2,5474},{0,3,25},{0,3,25},{0,3,25},{0,2,16},{0,1,1261},{0,1,685},{0,1,685},{0,1,965},{0,1,1646},{0,0,1160},{0,3,25},{0,3,25},{0,3,25},{0,2,16},{1,0,1213},{0,1,685},{0,1,685},{0,1,965},{1,0,1205}, +{0,1,965},{2,3,2669},{0,5,477},{0,3,34},{0,3,634},{2,3,2669},{6,0,2669},{0,3,634},{0,2,2665},{6,0,2669},{0,2,2665},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,7,2669},{0,5,365},{0,3,290},{0,3,410},{0,4,8578},{0,3,5361},{0,3,2346},{0,2,5618},{0,2,9715},{0,2,6579},{0,7,2669}, +{0,5,365},{0,3,290},{0,3,410},{2,1,8498},{0,3,5361},{0,3,2346},{0,2,5618},{4,0,8498},{0,2,5618},{0,4,0},{0,4,0},{0,4,0},{0,2,16},{0,2,1637},{0,2,977},{0,2,977},{0,1,997},{0,1,1998},{0,1,1358},{0,4,0},{0,4,0},{0,4,0},{0,2,16},{1,0,1629},{0,2,977},{0,2,977},{0,1,997},{0,1,1637},{0,1,997},{3,2,2669},{0,5,365},{1,3,185}, +{0,3,410},{3,2,2669},{5,1,2669},{0,3,410},{0,2,2809},{5,1,2669},{0,2,2809},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,7,2777},{0,6,257},{0,4,277},{0,3,464},{0,5,9677},{0,4,6026},{0,3,2400},{0,2,6086},{0,2,11335},{0,2,7047},{0,7,2777},{0,6,257},{0,4,277},{0,3,464},{3,0,9677}, +{0,4,6026},{0,3,2400},{0,2,6086},{3,1,9677},{0,2,6086},{0,5,16},{0,5,16},{0,5,16},{0,3,64},{0,2,2186},{0,2,1130},{0,2,1130},{0,1,1186},{0,1,2547},{0,1,1547},{0,5,16},{0,5,16},{0,5,16},{0,3,64},{0,2,2186},{0,2,1130},{0,2,1130},{0,1,1186},{0,1,2186},{0,1,1186},{4,1,2669},{0,6,257},{0,4,277},{0,3,464},{4,1,2669},{4,2,2669},{0,3,464}, +{0,3,3209},{4,2,2669},{0,3,3209},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{0,7,3209},{0,6,209},{0,4,389},{0,4,541},{0,6,9765},{0,4,5546},{0,3,2080},{0,2,5990},{0,3,11621},{0,2,6951},{0,7,3209},{0,6,209},{0,4,389},{0,4,541},{1,4,9677},{0,4,5546},{0,3,2080},{0,2,5990},{2,2,9677}, +{0,2,5990},{0,5,80},{0,5,80},{0,5,80},{0,3,64},{0,3,2196},{0,2,970},{0,2,970},{0,1,1058},{0,1,2739},{0,1,1419},{0,5,80},{0,5,80},{0,5,80},{0,3,64},{1,1,2180},{0,2,970},{0,2,970},{0,1,1058},{1,1,2196},{0,1,1058},{5,0,2669},{0,6,145},{1,4,52},{0,4,477},{5,0,2669},{3,3,2669},{0,4,477},{0,3,2809},{3,3,2669},{0,3,2809},{0,0,64}, +{0,0,64},{0,0,64},{0,0,64},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,40},{0,0,40},{0,7,4025},{0,7,441},{0,4,885},{0,4,429},{0,6,9685},{0,4,5450},{0,4,1854},{0,3,6029},{0,3,11925},{0,2,7239},{1,7,3001},{0,7,441},{1,4,344},{0,4,429},{3,1,9685},{0,4,5450},{0,4,1854},{0,3,6029},{4,1,9685},{0,3,6029},{0,6,256},{0,6,256},{0,6,256}, +{0,3,320},{0,3,2228},{0,3,848},{0,3,848},{0,1,1186},{0,2,3171},{0,1,1547},{0,6,256},{0,6,256},{0,6,256},{0,3,320},{2,0,2180},{0,3,848},{0,3,848},{0,1,1186},{3,0,2180},{0,1,1186},{4,2,2677},{0,7,185},{1,4,20},{0,4,173},{4,2,2677},{7,1,2677},{0,4,173},{0,3,2665},{7,1,2677},{0,3,2665},{0,0,256},{0,0,256},{0,0,256},{0,0,256},{0,1,9}, +{0,1,9},{0,1,9},{0,0,36},{0,0,72},{0,0,72},{1,7,4141},{0,7,617},{1,4,1500},{0,4,701},{0,7,9690},{0,5,5001},{0,4,1214},{0,3,5277},{0,3,12613},{0,3,7213},{1,7,2985},{1,6,465},{1,4,344},{1,4,524},{4,0,9674},{0,5,5001},{0,4,1214},{0,3,5277},{3,2,9674},{0,3,5277},{0,7,601},{0,7,601},{0,7,601},{0,4,601},{0,4,2178},{0,3,656},{0,3,656}, +{0,2,866},{0,2,3171},{0,2,1827},{1,5,101},{1,5,101},{1,5,101},{1,3,109},{0,4,2178},{0,3,656},{0,3,656},{0,2,866},{2,1,2178},{0,2,866},{5,1,2669},{0,7,41},{2,4,181},{0,4,125},{5,1,2669},{4,3,2669},{0,4,125},{0,3,2777},{4,3,2669},{0,3,2777},{0,0,576},{0,0,576},{0,0,576},{0,0,576},{0,1,25},{0,1,25},{0,1,25},{0,1,49},{0,0,232}, +{0,0,232},{1,7,4582},{0,7,1274},{1,5,1446},{0,4,1466},{0,7,9789},{0,5,4794},{0,4,953},{0,3,4890},{0,4,13038},{0,3,6826},{1,7,3426},{1,7,290},{1,5,290},{1,4,443},{2,4,9685},{0,5,4794},{0,4,953},{0,3,4890},{7,0,9685},{0,3,4890},{0,7,1105},{0,7,1105},{0,7,1105},{0,4,1105},{0,5,2210},{0,4,592},{0,4,592},{0,2,596},{0,2,3477},{0,2,1557},{1,6,26}, +{1,6,26},{1,6,26},{1,4,82},{1,3,2180},{0,4,592},{0,4,592},{0,2,596},{1,2,2180},{0,2,596},{6,0,2677},{0,7,185},{1,5,289},{0,4,377},{6,0,2677},{5,3,2677},{0,4,377},{0,3,3209},{5,3,2677},{0,3,3209},{0,0,1089},{0,0,1089},{0,0,1089},{0,0,1089},{0,2,4},{0,2,4},{0,2,4},{0,1,4},{0,1,365},{0,1,365},{1,7,5382},{1,7,1350},{1,5,1510}, +{1,5,1738},{0,7,10285},{0,6,4406},{0,5,971},{0,3,4954},{0,4,13534},{0,3,6890},{2,7,3454},{1,7,194},{1,5,354},{1,5,582},{3,3,9674},{0,6,4406},{0,5,971},{0,3,4954},{6,1,9674},{0,3,4954},{1,6,1214},{1,6,1214},{1,6,1214},{1,4,1206},{0,5,2210},{0,4,400},{0,4,400},{0,2,628},{0,2,4021},{0,2,1589},{1,6,58},{1,6,58},{1,6,58},{1,4,50},{2,2,2178}, +{0,4,400},{0,4,400},{0,2,628},{5,0,2178},{0,2,628},{4,4,2669},{1,7,145},{2,5,74},{0,5,130},{4,4,2669},{7,2,2669},{0,5,130},{0,4,2845},{7,2,2669},{0,4,2845},{1,0,1205},{1,0,1205},{1,0,1205},{1,0,1205},{0,3,9},{0,3,9},{0,3,9},{0,1,100},{0,1,461},{0,1,461},{1,7,6566},{1,7,1638},{1,5,1958},{1,5,1578},{1,7,10830},{0,6,4118},{0,5,443}, +{0,4,4785},{0,4,14414},{0,3,7338},{2,7,3390},{1,7,482},{2,5,371},{1,5,422},{4,2,9674},{0,6,4118},{0,5,443},{0,4,4785},{5,2,9674},{0,4,4785},{1,7,1382},{1,7,1382},{1,7,1382},{1,4,1430},{0,6,2178},{0,5,442},{0,5,442},{0,3,641},{0,3,4242},{0,2,1877},{1,7,226},{1,7,226},{1,7,226},{1,4,274},{3,1,2178},{0,5,442},{0,5,442},{0,3,641},{4,1,2178}, +{0,3,641},{5,3,2669},{1,7,257},{2,5,10},{0,5,2},{5,3,2669},{6,3,2669},{0,5,2},{0,4,2669},{6,3,2669},{0,4,2669},{1,0,1381},{1,0,1381},{1,0,1381},{1,0,1381},{0,3,25},{0,3,25},{0,3,25},{0,2,16},{0,1,685},{0,1,685},{1,7,8134},{1,7,2310},{1,6,2671},{1,5,1802},{1,7,11086},{0,7,4109},{0,5,299},{0,4,4193},{0,5,14830},{0,4,7442},{2,7,3710}, +{2,7,490},{2,5,323},{2,5,563},{5,1,9669},{0,7,4109},{0,5,299},{0,4,4193},{4,3,9669},{0,4,4193},{1,7,1734},{1,7,1734},{1,7,1734},{1,5,1721},{0,7,2228},{0,5,218},{0,5,218},{0,3,305},{0,3,4626},{0,3,2241},{2,6,125},{2,6,125},{2,6,125},{2,4,125},{4,0,2180},{0,5,218},{0,5,218},{0,3,305},{3,2,2180},{0,3,305},{6,2,2665},{2,7,369},{3,5,181}, +{1,5,117},{6,2,2665},{5,4,2665},{1,5,117},{0,4,2749},{5,4,2665},{0,4,2749},{1,0,1685},{1,0,1685},{1,0,1685},{1,0,1685},{0,4,0},{0,4,0},{0,4,0},{0,2,16},{0,2,977},{0,2,977},{2,7,9153},{1,7,3525},{1,6,3220},{1,5,2513},{1,7,11833},{0,7,3686},{0,6,548},{0,4,3986},{0,5,15577},{0,4,7235},{3,7,4141},{2,7,481},{2,6,309},{2,5,428},{6,0,9674}, +{0,7,3686},{0,6,548},{0,4,3986},{3,4,9674},{0,4,3986},{1,7,2436},{1,7,2436},{1,7,2436},{1,5,2189},{0,7,2210},{0,5,272},{0,5,272},{0,3,233},{0,3,5364},{0,3,2169},{2,7,40},{2,7,40},{2,7,40},{2,5,104},{2,4,2178},{0,5,272},{0,5,272},{0,3,233},{7,0,2178},{0,3,233},{7,1,2669},{2,7,477},{2,6,305},{0,6,292},{7,1,2669},{4,5,2669},{0,6,292}, +{0,4,3145},{4,5,2669},{0,4,3145},{1,0,2180},{1,0,2180},{1,0,2180},{1,0,2180},{0,5,16},{0,5,16},{0,5,16},{0,3,64},{0,2,1130},{0,2,1130},{2,7,10154},{1,7,4946},{1,6,4049},{1,6,2853},{1,7,12838},{0,7,3719},{0,6,133},{0,4,4211},{0,6,16286},{0,4,7346},{3,7,4061},{2,7,881},{2,6,325},{2,6,629},{4,4,9669},{0,7,3718},{0,6,132},{0,4,4210},{7,2,9669}, +{0,4,4210},{1,7,3265},{1,7,3265},{1,7,3265},{1,6,2789},{0,7,2467},{0,6,69},{0,6,69},{0,3,442},{0,4,5602},{0,3,2290},{2,7,40},{2,7,40},{2,7,40},{2,5,40},{3,3,2180},{0,6,68},{0,6,68},{0,3,441},{6,1,2180},{0,3,441},{5,5,2665},{3,7,617},{3,6,100},{0,6,68},{5,5,2665},{3,6,2665},{0,6,68},{0,5,2885},{3,6,2665},{0,5,2885},{1,0,2689}, +{1,0,2689},{1,0,2689},{1,0,2689},{0,5,17},{0,5,17},{0,5,17},{0,3,1},{0,2,1341},{0,2,1341},{2,7,10666},{2,7,5146},{2,6,4206},{1,6,2933},{2,7,13606},{0,7,4215},{0,6,181},{0,5,3914},{0,6,15454},{0,4,7122},{3,7,4365},{3,7,1097},{3,6,404},{2,6,421},{5,3,9669},{1,7,4133},{0,6,100},{0,5,3833},{6,3,9669},{0,5,3833},{2,7,3777},{2,7,3777},{2,7,3777}, +{1,6,2933},{1,7,2805},{0,6,181},{0,6,181},{0,4,275},{0,4,5282},{0,3,2130},{2,7,296},{2,7,296},{2,7,296},{2,5,232},{4,2,2180},{0,6,100},{0,6,100},{0,4,194},{5,2,2180},{0,4,194},{6,4,2665},{3,7,697},{3,6,4},{1,6,4},{6,4,2665},{7,4,2665},{1,6,4},{0,5,2677},{7,4,2665},{0,5,2677},{1,0,2929},{1,0,2929},{1,0,2929},{1,0,2929},{0,6,81}, +{0,6,81},{0,6,81},{0,3,145},{0,3,1289},{0,3,1289},{2,7,11690},{2,7,5434},{2,7,4085},{2,6,3198},{2,7,13510},{1,7,4470},{1,6,585},{0,5,3690},{0,6,15134},{0,5,6210},{3,7,5053},{3,7,1177},{3,6,308},{2,6,597},{6,2,9670},{1,7,4181},{1,6,296},{0,5,3401},{5,4,9670},{0,5,3401},{2,7,3409},{2,7,3409},{2,7,3409},{2,6,3134},{1,7,2565},{0,7,323},{0,7,323}, +{0,4,323},{0,5,5085},{0,4,1634},{3,7,153},{3,7,153},{3,7,153},{3,5,145},{5,1,2186},{0,7,34},{0,7,34},{0,4,34},{7,1,2186},{0,4,34},{7,3,2665},{4,7,937},{3,6,164},{1,6,100},{7,3,2665},{6,5,2665},{1,6,100},{0,5,2725},{6,5,2665},{0,5,2725},{2,0,3085},{2,0,3085},{2,0,3085},{2,0,3085},{1,5,290},{1,5,290},{1,5,290},{1,3,298},{0,3,985}, +{0,3,985},{3,7,12062},{2,7,6370},{2,7,3743},{2,6,3018},{2,7,14014},{1,7,4758},{1,6,603},{1,5,4049},{0,7,14638},{0,5,5310},{4,7,4958},{3,7,1726},{3,7,334},{3,6,419},{7,1,9669},{1,7,4694},{0,7,341},{0,5,3374},{4,5,9669},{0,5,3374},{2,7,3454},{2,7,3454},{2,7,3454},{2,6,2729},{1,7,2754},{1,6,314},{1,6,314},{1,4,289},{0,5,4626},{0,4,1121},{3,7,45}, +{3,7,45},{3,7,45},{3,5,109},{6,0,2180},{0,7,52},{0,7,52},{0,4,160},{3,4,2180},{0,4,160},{5,7,2665},{4,7,1009},{4,6,293},{2,6,293},{5,7,2665},{5,6,2665},{2,6,293},{0,5,3085},{5,6,2665},{0,5,3085},{2,0,2725},{2,0,2725},{2,0,2725},{2,0,2725},{1,6,89},{1,6,89},{1,6,89},{1,4,145},{0,3,949},{0,3,949},{3,7,12414},{3,7,7246},{2,7,3983}, +{2,7,2879},{2,7,15006},{1,7,5558},{1,7,163},{1,5,4161},{0,7,14190},{0,5,5054},{4,7,5246},{4,7,2186},{3,7,302},{3,6,659},{5,5,9670},{2,7,4926},{1,7,163},{0,5,3758},{3,6,9670},{0,5,3758},{2,7,3902},{2,7,3902},{2,7,3902},{2,6,2777},{1,7,3330},{1,7,82},{1,7,82},{1,4,401},{0,6,4490},{0,4,1073},{3,7,221},{3,7,221},{3,7,221},{3,6,34},{5,2,2186}, +{1,7,82},{1,7,82},{0,5,277},{7,2,2186},{0,5,277},{6,6,2665},{5,7,1313},{4,7,130},{1,7,82},{6,6,2665},{4,7,2665},{1,7,82},{0,6,2929},{4,7,2665},{0,6,2929},{2,0,2677},{2,0,2677},{2,0,2677},{2,0,2677},{1,6,9},{1,6,9},{1,6,9},{1,4,1},{0,4,673},{0,4,673},{3,7,13278},{3,7,7502},{3,7,4254},{2,7,2895},{3,7,15045},{2,7,6114},{1,7,147}, +{1,6,3978},{0,7,14254},{0,6,4818},{4,7,5918},{4,7,2378},{4,7,443},{3,7,426},{6,4,9670},{2,7,5438},{1,7,83},{0,6,3218},{7,4,9670},{0,6,3218},{3,7,4253},{3,7,4253},{3,7,4253},{2,7,2894},{2,7,3054},{1,7,146},{1,7,146},{1,5,296},{0,6,3978},{0,5,821},{4,7,442},{4,7,442},{4,7,442},{3,6,194},{6,1,2186},{1,7,82},{1,7,82},{0,5,37},{6,3,2186}, +{0,5,37},{7,5,2665},{5,7,1361},{4,7,2},{2,7,10},{7,5,2665},{6,6,2689},{2,7,10},{0,6,2689},{6,6,2689},{0,6,2689},{2,0,2885},{2,0,2885},{2,0,2885},{2,0,2885},{1,7,65},{1,7,65},{1,7,65},{1,4,113},{0,4,625},{0,4,625},{3,7,13261},{3,7,7409},{3,7,4045},{3,7,3209},{3,7,14116},{2,7,5615},{2,7,574},{1,6,3165},{0,7,13437},{0,6,3429},{5,7,5269}, +{4,7,2275},{4,7,250},{3,7,505},{5,6,8712},{3,7,4724},{2,7,250},{0,6,2405},{4,6,8712},{0,6,2405},{3,7,4045},{3,7,4045},{3,7,4045},{3,7,3209},{2,7,3150},{2,7,574},{2,7,574},{1,5,296},{0,6,3850},{0,5,453},{4,7,250},{4,7,250},{4,7,250},{4,6,169},{7,0,2180},{2,7,250},{2,7,250},{1,5,40},{3,5,2180},{1,5,40},{6,7,2180},{5,7,1168},{4,7,81}, +{2,7,25},{6,7,2180},{5,7,2180},{2,7,25},{0,6,2180},{5,7,2180},{0,6,2180},{3,0,3145},{3,0,3145},{3,0,3145},{3,0,3145},{1,7,305},{1,7,305},{1,7,305},{1,5,292},{0,5,449},{0,5,449},{4,7,11894},{3,7,7634},{3,7,4270},{3,7,2750},{3,7,13315},{2,7,5354},{2,7,313},{1,6,2634},{1,7,12414},{0,6,2250},{5,7,4369},{4,7,2365},{4,7,340},{4,7,160},{7,3,7590}, +{3,7,4094},{2,7,232},{0,6,1721},{6,5,7590},{0,6,1721},{3,7,4270},{3,7,4270},{3,7,4270},{3,7,2750},{2,7,3717},{2,7,313},{2,7,313},{2,5,302},{0,7,3546},{0,5,498},{4,7,340},{4,7,340},{4,7,340},{4,6,97},{7,1,2186},{2,7,232},{2,7,232},{1,5,130},{7,3,2186},{1,5,130},{6,7,1649},{6,7,1025},{5,7,9},{3,7,1},{6,7,1649},{7,6,1625},{3,7,1}, +{0,6,1685},{7,6,1625},{0,6,1685},{3,0,2749},{3,0,2749},{3,0,2749},{3,0,2749},{2,7,117},{2,7,117},{2,7,117},{2,5,181},{0,5,377},{0,5,377},{4,7,11078},{4,7,7478},{3,7,4878},{3,7,2750},{4,7,12662},{3,7,5031},{2,7,489},{2,6,1911},{1,7,11470},{0,6,1610},{5,7,3841},{5,7,1949},{4,7,692},{4,7,32},{5,7,6662},{4,7,3641},{3,7,442},{0,6,1385},{5,6,6662}, +{0,6,1385},{3,7,4878},{3,7,4878},{3,7,4878},{3,7,2750},{3,7,3795},{2,7,489},{2,7,489},{2,5,366},{0,7,3354},{0,6,454},{4,7,692},{4,7,692},{4,7,692},{4,7,32},{6,3,2180},{3,7,442},{3,7,442},{0,6,229},{6,4,2180},{0,6,229},{7,6,1201},{6,7,689},{5,7,25},{4,7,16},{7,6,1201},{7,6,1225},{4,7,16},{0,6,1381},{7,6,1225},{0,6,1381},{3,0,2669}, +{3,0,2669},{3,0,2669},{3,0,2669},{2,7,5},{2,7,5},{2,7,5},{2,5,5},{0,6,229},{0,6,229},{4,7,10646},{4,7,7046},{4,7,5021},{3,7,3134},{4,7,11526},{3,7,4503},{2,7,1049},{2,6,1479},{1,7,10910},{0,6,1354},{5,7,3569},{5,7,1677},{5,7,653},{4,7,160},{7,4,5829},{4,7,3065},{3,7,410},{1,6,1209},{7,5,5829},{1,6,1209},{4,7,5021},{4,7,5021},{4,7,5021}, +{3,7,3134},{3,7,3875},{2,7,1049},{2,7,1049},{2,6,323},{0,7,3546},{0,6,198},{5,7,653},{5,7,653},{5,7,653},{4,7,160},{7,2,2180},{3,7,410},{3,7,410},{1,6,53},{5,5,2180},{1,6,53},{7,6,865},{6,7,481},{6,7,81},{4,7,16},{7,6,865},{6,7,841},{4,7,16},{0,6,1205},{6,7,841},{0,6,1205},{3,0,2845},{3,0,2845},{3,0,2845},{3,0,2845},{2,7,149}, +{2,7,149},{2,7,149},{2,5,85},{0,6,149},{0,6,149},{4,7,10598},{4,7,6998},{4,7,4973},{4,7,3353},{4,7,10774},{3,7,4359},{3,7,995},{2,6,1431},{2,7,10294},{0,7,1242},{6,7,3329},{5,7,1661},{5,7,637},{5,7,197},{6,6,5085},{4,7,2745},{3,7,634},{0,7,1098},{4,7,5085},{0,7,1098},{4,7,4973},{4,7,4973},{4,7,4973},{4,7,3353},{3,7,4339},{3,7,995},{3,7,995}, +{2,6,275},{1,7,3845},{0,6,326},{5,7,637},{5,7,637},{5,7,637},{5,7,197},{5,6,2178},{3,7,634},{3,7,634},{1,6,37},{4,6,2178},{1,6,37},{7,7,605},{6,7,401},{6,7,1},{5,7,1},{7,7,605},{6,7,569},{5,7,1},{0,7,1089},{6,7,569},{0,7,1089},{4,0,3209},{4,0,3209},{4,0,3209},{4,0,3209},{3,7,370},{3,7,370},{3,7,370},{2,6,274},{0,7,153}, +{0,7,153},{5,7,9925},{4,7,7403},{4,7,5378},{4,7,3218},{4,7,10387},{4,7,4627},{3,7,1292},{3,6,1477},{2,7,9727},{0,7,621},{6,7,2609},{5,7,1949},{5,7,925},{5,7,89},{7,5,4347},{5,7,2384},{4,7,666},{0,7,612},{6,6,4371},{0,7,612},{4,7,5378},{4,7,5378},{4,7,5378},{4,7,3218},{4,7,4762},{3,7,1292},{3,7,1292},{3,6,321},{1,7,4106},{1,6,469},{5,7,925}, +{5,7,925},{5,7,925},{5,7,89},{6,5,2180},{4,7,666},{4,7,666},{2,6,104},{3,7,2180},{2,6,104},{7,7,290},{7,7,218},{6,7,64},{6,7,16},{7,7,290},{7,7,338},{6,7,16},{0,7,576},{7,7,338},{0,7,576},{4,0,2777},{4,0,2777},{4,0,2777},{4,0,2777},{3,7,136},{3,7,136},{3,7,136},{3,5,200},{0,7,45},{0,7,45},{5,7,9269},{5,7,7377},{4,7,6146}, +{4,7,3506},{5,7,10044},{4,7,4211},{3,7,1964},{3,7,836},{3,7,9185},{0,7,477},{6,7,2241},{6,7,1617},{6,7,1217},{5,7,265},{6,7,3780},{5,7,2064},{4,7,890},{1,7,257},{5,7,3780},{1,7,257},{4,7,6146},{4,7,6146},{4,7,6146},{4,7,3506},{4,7,4826},{3,7,1964},{3,7,1964},{3,6,337},{2,7,4590},{0,7,477},{6,7,1217},{6,7,1217},{6,7,1217},{5,7,265},{7,4,2178}, +{4,7,890},{4,7,890},{1,7,257},{7,5,2178},{1,7,257},{7,7,146},{7,7,74},{7,7,25},{6,7,16},{7,7,146},{7,7,130},{6,7,16},{0,7,256},{7,7,130},{0,7,256},{4,0,2665},{4,0,2665},{4,0,2665},{4,0,2665},{3,7,200},{3,7,200},{3,7,200},{3,6,13},{0,7,221},{0,7,221},{5,7,8997},{5,7,7105},{5,7,6081},{4,7,4178},{5,7,9276},{4,7,4179},{4,7,2154}, +{3,7,356},{3,7,8721},{1,7,209},{6,7,2129},{6,7,1505},{6,7,1105},{6,7,673},{7,6,3332},{5,7,2000},{5,7,976},{2,7,73},{7,6,3300},{2,7,73},{5,7,6081},{5,7,6081},{5,7,6081},{4,7,4178},{4,7,5274},{4,7,2154},{4,7,2154},{3,7,356},{2,7,4878},{1,7,209},{6,7,1105},{6,7,1105},{6,7,1105},{6,7,673},{7,5,2210},{5,7,976},{5,7,976},{2,7,73},{6,6,2178}, +{2,7,73},{7,7,130},{7,7,58},{7,7,9},{7,7,1},{7,7,130},{7,7,50},{7,7,1},{0,7,64},{7,7,50},{0,7,64},{4,0,2809},{4,0,2809},{4,0,2809},{4,0,2809},{3,7,520},{3,7,520},{3,7,520},{3,6,61},{1,7,145},{1,7,145},{5,7,9109},{5,7,7217},{5,7,6193},{5,7,4301},{5,7,8892},{4,7,4531},{4,7,2506},{3,7,260},{4,7,8587},{1,7,289},{6,7,2273}, +{6,7,1649},{6,7,1249},{6,7,625},{7,6,2916},{6,7,1956},{5,7,1168},{2,7,25},{6,7,2916},{2,7,25},{5,7,6193},{5,7,6193},{5,7,6193},{5,7,4301},{5,7,5976},{4,7,2506},{4,7,2506},{3,7,260},{3,7,5277},{1,7,289},{6,7,1249},{6,7,1249},{6,7,1249},{6,7,625},{6,7,2180},{5,7,1168},{5,7,1168},{2,7,25},{5,7,2180},{2,7,25},{7,7,242},{7,7,170},{7,7,121}, +{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{4,0,3209},{4,0,3209},{4,0,3209},{4,0,3209},{4,7,481},{4,7,481},{4,7,481},{3,7,260},{1,7,289},{1,7,289},{6,7,8325},{5,7,6749},{5,7,5725},{5,7,3833},{5,7,7866},{5,7,4082},{4,7,2470},{4,7,265},{4,7,7219},{2,7,365},{7,7,1842},{6,7,1460},{6,7,1060},{6,7,436},{7,6,2241}, +{6,7,1425},{6,7,1025},{3,7,1},{6,7,2169},{3,7,1},{5,7,5725},{5,7,5725},{5,7,5725},{5,7,3833},{5,7,4950},{4,7,2470},{4,7,2470},{4,7,265},{3,7,4521},{2,7,365},{6,7,1060},{6,7,1060},{6,7,1060},{6,7,436},{6,7,1649},{6,7,1025},{6,7,1025},{3,7,1},{7,6,1625},{3,7,1},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49}, +{0,7,0},{7,7,98},{0,7,0},{5,0,2809},{5,0,2809},{5,0,2809},{5,0,2809},{4,7,445},{4,7,445},{4,7,445},{4,6,202},{2,7,365},{2,7,365},{6,7,7093},{6,7,6469},{5,7,5581},{5,7,3689},{5,7,7226},{5,7,3442},{5,7,2418},{4,7,25},{4,7,6275},{2,7,509},{7,7,1266},{7,7,1194},{6,7,1028},{6,7,404},{7,7,1686},{6,7,1089},{6,7,689},{4,7,16},{6,7,1641}, +{4,7,16},{5,7,5581},{5,7,5581},{5,7,5581},{5,7,3689},{5,7,4310},{5,7,2418},{5,7,2418},{4,7,25},{3,7,4121},{2,7,509},{6,7,1028},{6,7,1028},{6,7,1028},{6,7,404},{7,6,1201},{6,7,689},{6,7,689},{4,7,16},{7,6,1225},{4,7,16},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{5,0,2665}, +{5,0,2665},{5,0,2665},{5,0,2665},{4,7,685},{4,7,685},{4,7,685},{4,7,25},{2,7,509},{2,7,509},{6,7,6117},{6,7,5493},{6,7,5093},{5,7,3801},{6,7,6098},{5,7,3058},{5,7,2034},{4,7,41},{4,7,5587},{3,7,613},{7,7,818},{7,7,746},{7,7,697},{6,7,500},{7,7,1142},{6,7,881},{6,7,481},{4,7,16},{6,7,1241},{4,7,16},{6,7,5093},{6,7,5093},{6,7,5093}, +{5,7,3801},{5,7,3926},{5,7,2034},{5,7,2034},{4,7,41},{4,7,3562},{3,7,613},{7,7,697},{7,7,697},{7,7,697},{6,7,500},{7,6,865},{6,7,481},{6,7,481},{4,7,16},{6,7,841},{4,7,16},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{5,0,2777},{5,0,2777},{5,0,2777},{5,0,2777},{5,7,1010}, +{5,7,1010},{5,7,1010},{4,7,41},{3,7,613},{3,7,613},{6,7,5397},{6,7,4773},{6,7,4373},{6,7,3749},{6,7,5074},{5,7,2930},{5,7,1906},{4,7,313},{5,7,4753},{3,7,725},{7,7,498},{7,7,426},{7,7,377},{7,7,305},{7,7,726},{7,7,582},{6,7,401},{5,7,1},{7,7,774},{5,7,1},{6,7,4373},{6,7,4373},{6,7,4373},{6,7,3749},{5,7,3798},{5,7,1906},{5,7,1906}, +{4,7,313},{4,7,3130},{3,7,725},{7,7,377},{7,7,377},{7,7,377},{7,7,305},{7,7,605},{6,7,401},{6,7,401},{5,7,1},{6,7,569},{5,7,1},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{5,0,3145},{5,0,3145},{5,0,3145},{5,0,3145},{5,7,882},{5,7,882},{5,7,882},{4,7,313},{3,7,725}, +{3,7,725},{1,7,34142},{0,7,5184},{0,5,609},{0,5,4841},{1,7,46442},{0,7,24449},{0,5,9741},{0,4,24761},{0,5,65162},{0,3,40820},{0,7,10048},{0,7,2880},{0,5,545},{0,4,3204},{1,4,18070},{0,4,13297},{0,4,6453},{0,2,13857},{2,2,18070},{0,2,13857},{0,3,9},{0,3,9},{0,3,9},{0,2,36},{0,1,1145},{0,1,617},{0,1,617},{0,1,977},{0,0,1505},{0,0,1036},{0,3,9}, +{0,3,9},{0,3,9},{0,2,36},{1,0,1129},{0,1,617},{0,1,617},{0,1,977},{1,0,1105},{0,1,977},{4,2,9250},{0,7,2880},{0,5,545},{0,4,3204},{4,2,9250},{5,2,9250},{0,4,3204},{0,3,9280},{5,2,9250},{0,3,9280},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{1,7,38782},{0,7,6720},{0,5,305}, +{0,5,3801},{1,7,51082},{0,7,24961},{0,5,8701},{0,4,25849},{0,5,65535},{0,4,42094},{1,7,10502},{0,7,2624},{0,5,241},{0,4,3044},{3,1,19334},{0,4,14065},{0,4,6293},{0,3,14756},{4,1,19334},{0,3,14756},{0,4,4},{0,4,4},{0,4,4},{0,2,4},{0,2,1537},{0,1,937},{0,1,937},{0,1,977},{0,1,1898},{0,1,1338},{0,4,4},{0,4,4},{0,4,4},{0,2,4},{1,0,1513}, +{0,1,937},{0,1,937},{0,1,977},{0,1,1537},{0,1,977},{5,1,9256},{0,7,2624},{0,5,241},{0,4,3044},{5,1,9256},{7,1,9256},{0,4,3044},{0,3,9280},{7,1,9256},{0,3,9280},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{1,7,44190},{0,7,9024},{0,5,769},{0,5,3529},{1,7,56490},{0,7,26241},{0,5,8429}, +{0,4,27705},{0,5,65535},{0,4,43950},{1,7,10854},{0,7,2624},{0,5,193},{0,5,2953},{2,3,20689},{0,5,14598},{0,4,6389},{0,3,15012},{6,0,20689},{0,3,15012},{0,4,36},{0,4,36},{0,4,36},{0,2,100},{0,2,1985},{0,2,1061},{0,2,1061},{0,1,1105},{0,1,2346},{0,1,1466},{0,4,36},{0,4,36},{0,4,36},{0,2,100},{0,2,1985},{0,2,1061},{0,2,1061},{0,1,1105},{0,1,1985}, +{0,1,1105},{6,0,9256},{0,7,2624},{0,5,193},{0,5,2953},{6,0,9256},{6,2,9256},{0,5,2953},{0,3,9536},{6,2,9256},{0,3,9536},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{1,7,50366},{0,7,12096},{0,6,1101},{0,5,4025},{1,7,62666},{0,7,28289},{0,5,8925},{0,5,28912},{0,6,65535},{0,4,46574},{1,7,11462}, +{0,7,2880},{0,6,317},{0,5,2425},{4,0,22137},{0,5,15206},{0,4,6741},{0,3,15524},{3,2,22137},{0,3,15524},{0,5,1},{0,5,1},{0,5,1},{0,3,9},{0,2,2561},{0,2,1285},{0,2,1285},{0,1,1361},{0,1,2922},{0,1,1722},{0,5,1},{0,5,1},{0,5,1},{0,3,9},{1,1,2561},{0,2,1285},{0,2,1285},{0,1,1361},{2,0,2521},{0,1,1361},{5,2,9250},{0,7,2880},{0,6,317}, +{0,5,2425},{5,2,9250},{5,3,9250},{0,5,2425},{0,4,9640},{5,3,9250},{0,4,9640},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{1,7,58232},{0,7,16470},{0,6,129},{0,6,3489},{1,7,65535},{0,7,31511},{0,6,10213},{0,5,29110},{0,6,65535},{0,4,50444},{1,7,12452},{0,7,3474},{0,6,29},{0,5,2137},{2,4,23851}, +{0,6,16172},{0,5,7037},{0,3,16406},{7,0,23851},{0,3,16406},{0,6,4},{0,6,4},{0,6,4},{0,3,36},{0,3,3232},{0,2,1690},{0,2,1690},{0,1,1802},{0,1,3723},{0,1,2163},{0,6,4},{0,6,4},{0,6,4},{0,3,36},{1,1,3200},{0,2,1690},{0,2,1690},{0,1,1802},{2,0,3232},{0,1,1802},{6,1,9256},{1,7,3232},{0,6,29},{0,5,2137},{6,1,9256},{6,3,9256},{0,5,2137}, +{0,4,9298},{6,3,9256},{0,4,9298},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{1,7,65535},{0,7,21174},{0,6,81},{0,6,2545},{1,7,65535},{0,7,35191},{0,6,9269},{0,5,30102},{0,6,65535},{0,5,54602},{1,7,13604},{0,7,4274},{0,6,45},{0,5,2153},{4,1,25472},{0,6,16620},{0,5,7053},{0,3,17462},{4,2,25472}, +{0,3,17462},{0,6,36},{0,6,36},{0,6,36},{0,4,49},{0,3,3872},{0,3,2132},{0,3,2132},{0,1,2330},{0,1,4571},{0,1,2691},{0,6,36},{0,6,36},{0,6,36},{0,4,49},{0,3,3872},{0,3,2132},{0,3,2132},{0,1,2330},{1,1,3872},{0,1,2330},{7,0,9250},{1,7,3488},{0,6,45},{0,5,2153},{7,0,9250},{3,5,9250},{0,5,2153},{0,4,9266},{3,5,9250},{0,4,9266},{0,0,0}, +{0,0,0},{0,0,0},{0,0,0},{0,0,36},{0,0,36},{0,0,36},{0,0,100},{0,0,136},{0,0,136},{2,7,65535},{1,7,25204},{0,6,805},{0,6,2373},{1,7,65535},{0,7,39263},{0,6,8769},{0,5,31402},{0,6,65535},{0,5,55902},{1,7,14880},{1,7,5024},{0,6,321},{0,6,1889},{5,0,26756},{0,6,17000},{0,5,7049},{0,4,18139},{6,1,26756},{0,4,18139},{0,7,5},{0,7,5},{0,7,5}, +{0,4,5},{0,3,4468},{0,3,2248},{0,3,2248},{0,2,2722},{0,2,5411},{0,1,3227},{0,7,5},{0,7,5},{0,7,5},{0,4,5},{2,0,4420},{0,3,2248},{0,3,2248},{0,2,2722},{3,0,4420},{0,2,2722},{5,4,9250},{1,7,4000},{1,6,185},{0,6,1885},{5,4,9250},{7,3,9250},{0,6,1885},{0,4,9490},{7,3,9250},{0,4,9490},{0,0,4},{0,0,4},{0,0,4},{0,0,4},{0,0,16}, +{0,0,16},{0,0,16},{0,0,64},{0,0,100},{0,0,100},{2,7,65535},{1,7,29620},{0,7,1306},{0,6,3061},{2,7,65535},{0,7,43055},{0,6,8145},{0,6,31878},{0,7,65535},{0,5,56670},{2,7,14924},{1,7,5344},{0,7,406},{0,6,1617},{4,2,26756},{0,6,16744},{0,5,6553},{0,4,17195},{5,2,26756},{0,4,17195},{0,7,181},{0,7,181},{0,7,181},{0,4,181},{0,4,4418},{0,3,2056},{0,3,2056}, +{0,2,2210},{0,2,5411},{0,2,3171},{0,7,181},{0,7,181},{0,7,181},{0,4,181},{0,4,4418},{0,3,2056},{0,3,2056},{0,2,2210},{2,1,4418},{0,2,2210},{6,3,9248},{2,7,4292},{0,7,306},{0,6,1517},{6,3,9248},{6,4,9248},{0,6,1517},{0,5,9698},{6,4,9248},{0,5,9698},{0,0,100},{0,0,100},{0,0,100},{0,0,100},{0,0,16},{0,0,16},{0,0,16},{0,0,0},{0,0,36}, +{0,0,36},{2,7,65535},{1,7,35659},{0,7,865},{0,7,2657},{2,7,65535},{1,7,48315},{0,7,8072},{0,6,29745},{0,7,65535},{0,5,58605},{2,7,15347},{1,7,6163},{1,7,270},{0,6,1770},{5,1,26756},{0,7,15992},{0,6,5378},{0,4,16592},{6,2,26756},{0,4,16592},{0,7,685},{0,7,685},{0,7,685},{0,5,370},{0,5,4450},{0,4,1768},{0,4,1768},{0,2,1940},{0,2,5717},{0,2,2901},{1,7,234}, +{1,7,234},{1,7,234},{1,4,250},{1,3,4420},{0,4,1768},{0,4,1768},{0,2,1940},{1,2,4420},{0,2,1940},{7,2,9250},{3,7,4820},{1,7,45},{0,6,1409},{7,2,9250},{5,5,9250},{0,6,1409},{0,5,9320},{5,5,9250},{0,5,9320},{0,0,361},{0,0,361},{0,0,361},{0,0,361},{0,1,0},{0,1,0},{0,1,0},{0,0,81},{0,0,117},{0,0,117},{2,7,65535},{1,7,40299},{0,7,1405}, +{0,7,2173},{2,7,65535},{1,7,50747},{0,7,6068},{0,6,28101},{0,7,65535},{0,5,59897},{2,7,15659},{1,7,6963},{1,7,74},{1,6,2046},{6,0,26264},{0,7,15284},{0,6,4470},{0,4,16052},{3,4,26264},{0,4,16052},{1,7,1230},{1,7,1230},{1,7,1230},{0,5,754},{0,5,4450},{0,4,1576},{0,4,1576},{0,2,1972},{0,2,6261},{0,2,2933},{1,7,74},{1,7,74},{1,7,74},{1,5,113},{2,2,4418}, +{0,4,1576},{0,4,1576},{0,2,1972},{5,0,4418},{0,2,1972},{5,6,8980},{3,7,4808},{1,7,25},{0,7,1444},{5,6,8980},{4,6,8980},{0,7,1444},{0,5,8980},{4,6,8980},{0,5,8980},{0,0,729},{0,0,729},{0,0,729},{0,0,729},{0,2,16},{0,2,16},{0,2,16},{0,1,16},{0,0,325},{0,0,325},{2,7,65535},{1,7,40395},{0,7,2381},{0,7,2125},{2,7,65535},{1,7,48635},{0,7,4500}, +{0,6,24853},{0,7,65535},{0,5,57545},{3,7,14605},{2,7,6211},{1,7,170},{1,6,1598},{5,2,24379},{0,7,13716},{0,6,3446},{0,5,14549},{5,3,24379},{0,5,14549},{1,7,1326},{1,7,1326},{1,7,1326},{1,5,1157},{0,6,4418},{0,5,1394},{0,5,1394},{0,3,1621},{0,3,6482},{0,2,3221},{1,7,170},{1,7,170},{1,7,170},{1,5,1},{3,1,4418},{0,5,1394},{0,5,1394},{0,3,1621},{4,1,4418}, +{0,3,1621},{7,3,7940},{3,7,4264},{2,7,81},{0,7,900},{7,3,7940},{6,5,7940},{0,7,900},{0,5,7988},{6,5,7940},{0,5,7988},{1,0,1157},{1,0,1157},{1,0,1157},{1,0,1157},{0,2,16},{0,2,16},{0,2,16},{0,1,16},{0,1,377},{0,1,377},{2,7,65535},{1,7,40747},{0,7,3613},{0,7,2333},{2,7,65535},{1,7,46779},{0,7,3188},{0,6,21861},{0,7,65535},{0,5,55449},{3,7,13181}, +{2,7,5667},{1,7,522},{1,7,1306},{6,1,22571},{0,7,12404},{0,6,2678},{0,5,12453},{4,4,22571},{0,5,12453},{1,7,1678},{1,7,1678},{1,7,1678},{1,5,1301},{0,7,4468},{0,5,1170},{0,5,1170},{0,3,1285},{0,3,6866},{0,3,3221},{1,7,522},{1,7,522},{1,7,522},{1,5,145},{4,0,4420},{0,5,1170},{0,5,1170},{0,3,1285},{3,2,4420},{0,3,1285},{6,5,6964},{3,7,3848},{2,7,1}, +{0,7,484},{6,5,6964},{3,7,6964},{0,7,484},{0,5,7124},{3,7,6964},{0,5,7124},{1,0,1237},{1,0,1237},{1,0,1237},{1,0,1237},{0,3,1},{0,3,1},{0,3,1},{0,2,100},{0,1,505},{0,1,505},{2,7,65535},{1,7,41449},{0,7,5305},{0,7,2873},{2,7,65535},{1,7,44997},{0,7,2018},{0,6,18801},{0,7,65535},{0,6,52421},{3,7,11885},{2,7,5361},{2,7,320},{1,7,1000},{5,3,20645}, +{0,7,11234},{0,7,2018},{0,5,10401},{6,3,20645},{0,5,10401},{1,7,2380},{1,7,2380},{1,7,2380},{1,6,1496},{0,7,4450},{0,6,964},{0,6,964},{0,3,1213},{0,3,7604},{0,3,3149},{2,7,320},{2,7,320},{2,7,320},{2,5,272},{2,4,4418},{0,6,964},{0,6,964},{0,3,1213},{7,0,4418},{0,3,1213},{5,7,5941},{4,7,3181},{2,7,64},{0,7,169},{5,7,5941},{5,6,5941},{0,7,169}, +{0,5,6305},{5,6,5941},{0,5,6305},{1,0,1480},{1,0,1480},{1,0,1480},{1,0,1480},{0,4,25},{0,4,25},{0,4,25},{0,2,1},{0,1,802},{0,1,802},{2,7,65535},{1,7,42345},{0,7,7081},{0,7,3625},{2,7,65535},{1,7,43685},{0,7,1250},{0,6,16353},{0,7,65535},{0,6,49973},{3,7,11005},{3,7,5153},{2,7,320},{2,7,964},{7,0,19026},{1,7,10349},{0,7,1250},{0,5,8849},{3,5,19026}, +{0,5,8849},{1,7,3276},{1,7,3276},{1,7,3276},{1,6,1848},{0,7,4706},{0,6,740},{0,6,740},{0,4,1226},{0,4,7955},{0,3,3357},{2,7,320},{2,7,320},{2,7,320},{2,6,145},{3,3,4420},{0,6,740},{0,6,740},{0,4,1226},{6,1,4420},{0,4,1226},{7,4,5105},{4,7,2701},{3,7,25},{0,7,25},{7,4,5105},{7,5,5105},{0,7,25},{0,6,5645},{7,5,5105},{0,6,5645},{1,0,1832}, +{1,0,1832},{1,0,1832},{1,0,1832},{0,4,9},{0,4,9},{0,4,9},{0,2,49},{0,2,1010},{0,2,1010},{2,7,65535},{1,7,43497},{1,7,9052},{0,7,4633},{2,7,65535},{1,7,42629},{0,7,738},{0,6,14161},{0,7,65535},{0,6,47781},{3,7,10381},{3,7,4529},{2,7,576},{2,7,484},{5,4,17490},{1,7,9293},{0,7,738},{0,5,7553},{7,3,17490},{0,5,7553},{1,7,4428},{1,7,4428},{1,7,4428}, +{1,7,2412},{0,7,5218},{0,7,738},{0,7,738},{0,4,810},{0,4,8467},{0,3,3821},{2,7,576},{2,7,576},{2,7,576},{2,6,1},{4,2,4420},{0,7,738},{0,7,738},{0,4,810},{5,2,4420},{0,4,810},{6,6,4329},{4,7,2349},{3,7,9},{0,7,9},{6,6,4329},{4,7,4329},{0,7,9},{0,6,4637},{4,7,4329},{0,6,4637},{1,0,2312},{1,0,2312},{1,0,2312},{1,0,2312},{0,5,4}, +{0,5,4},{0,5,4},{0,3,36},{0,2,1186},{0,2,1186},{2,7,65535},{1,7,44905},{1,7,10460},{0,7,5897},{2,7,65535},{1,7,41829},{0,7,482},{0,6,12225},{0,7,65535},{0,6,45845},{4,7,9325},{3,7,4161},{3,7,797},{2,7,260},{7,1,16034},{1,7,8493},{0,7,482},{0,5,6513},{4,5,16034},{0,5,6513},{2,7,5712},{2,7,5712},{2,7,5712},{1,7,2924},{1,7,5672},{0,7,482},{0,7,482}, +{0,4,650},{0,4,9235},{0,4,3899},{3,7,797},{3,7,797},{3,7,797},{2,6,113},{5,1,4426},{0,7,482},{0,7,482},{0,4,650},{7,1,4426},{0,4,650},{6,6,3625},{5,7,1985},{4,7,100},{1,7,4},{6,6,3625},{6,6,3617},{1,7,4},{0,6,3757},{6,6,3617},{0,6,3757},{1,0,2920},{1,0,2920},{1,0,2920},{1,0,2920},{0,5,36},{0,5,36},{0,5,36},{0,3,4},{0,2,1490}, +{0,2,1490},{2,7,65535},{1,7,46795},{1,7,12350},{0,7,7625},{2,7,65535},{1,7,41235},{0,7,500},{0,6,10353},{0,7,65535},{0,6,43973},{4,7,8227},{3,7,4053},{3,7,689},{2,7,314},{6,3,14507},{2,7,7875},{0,7,500},{0,5,5649},{6,4,14507},{0,5,5649},{2,7,6594},{2,7,6594},{2,7,6594},{1,7,3806},{1,7,6086},{0,7,500},{0,7,500},{0,4,776},{0,5,9830},{0,4,4025},{3,7,689}, +{3,7,689},{3,7,689},{3,6,298},{6,0,4420},{0,7,500},{0,7,500},{0,4,776},{3,4,4420},{0,4,776},{7,5,2890},{5,7,1508},{4,7,1},{2,7,25},{7,5,2890},{6,6,2906},{2,7,25},{0,6,2920},{6,6,2906},{0,6,2920},{1,0,3757},{1,0,3757},{1,0,3757},{1,0,3757},{0,6,9},{0,6,9},{0,6,9},{0,4,100},{0,2,1985},{0,2,1985},{2,7,65535},{1,7,48747},{1,7,14302}, +{0,7,9433},{2,7,65535},{1,7,40979},{0,7,788},{0,6,8961},{0,7,65535},{0,6,42581},{4,7,7523},{4,7,3923},{3,7,865},{3,7,181},{7,2,13243},{2,7,7075},{1,7,754},{0,6,4337},{5,5,13243},{0,6,4337},{2,7,7650},{2,7,7650},{2,7,7650},{1,7,4862},{1,7,6726},{0,7,788},{0,7,788},{0,5,529},{0,5,10470},{0,4,4409},{3,7,865},{3,7,865},{3,7,865},{3,7,181},{5,2,4426}, +{1,7,754},{1,7,754},{0,5,529},{7,2,4426},{0,5,529},{6,7,2320},{5,7,1220},{4,7,49},{2,7,9},{6,7,2320},{5,7,2320},{2,7,9},{0,6,2312},{5,7,2320},{0,6,2312},{1,0,4637},{1,0,4637},{1,0,4637},{1,0,4637},{0,7,4},{0,7,4},{0,7,4},{0,4,4},{0,3,2297},{0,3,2297},{2,7,65535},{1,7,50955},{1,7,16510},{1,7,10798},{2,7,65535},{1,7,40979},{0,7,1332}, +{0,7,6964},{0,7,65535},{0,6,41445},{4,7,7075},{4,7,3475},{3,7,1297},{3,7,5},{6,4,12051},{2,7,6531},{1,7,754},{0,6,3201},{7,4,12051},{0,6,3201},{2,7,8962},{2,7,8962},{2,7,8962},{2,7,5834},{1,7,7622},{0,7,1332},{0,7,1332},{0,5,289},{0,6,11342},{0,4,5049},{3,7,1297},{3,7,1297},{3,7,1297},{3,7,5},{6,1,4426},{1,7,754},{1,7,754},{0,5,289},{6,3,4426}, +{0,5,289},{6,7,1808},{5,7,1060},{5,7,36},{3,7,4},{6,7,1808},{7,6,1808},{3,7,4},{0,6,1832},{7,6,1808},{0,6,1832},{1,0,5645},{1,0,5645},{1,0,5645},{1,0,5645},{0,7,36},{0,7,36},{0,7,36},{0,4,36},{0,3,2665},{0,3,2665},{2,7,65535},{1,7,53419},{1,7,18974},{1,7,12366},{2,7,65535},{1,7,41235},{0,7,2132},{0,7,5204},{0,7,65535},{0,6,40565},{5,7,6641}, +{4,7,3283},{4,7,1258},{3,7,85},{5,6,10952},{3,7,5900},{1,7,1010},{0,6,2321},{4,6,10952},{0,6,2321},{2,7,10530},{2,7,10530},{2,7,10530},{2,7,6666},{1,7,8774},{0,7,2132},{0,7,2132},{0,5,305},{0,6,11790},{0,5,5205},{4,7,1258},{4,7,1258},{4,7,1258},{3,7,85},{7,0,4420},{1,7,1010},{1,7,1010},{0,5,305},{3,5,4420},{0,5,305},{7,6,1360},{6,7,800},{5,7,4}, +{3,7,36},{7,6,1360},{7,6,1360},{3,7,36},{0,6,1480},{7,6,1360},{0,6,1480},{2,0,6305},{2,0,6305},{2,0,6305},{2,0,6305},{0,7,196},{0,7,196},{0,7,196},{0,5,49},{0,3,3161},{0,3,3161},{2,7,65535},{2,7,56301},{1,7,22052},{1,7,14436},{2,7,65535},{1,7,41829},{0,7,3338},{0,7,3530},{0,7,65535},{0,6,39881},{5,7,5741},{4,7,3373},{4,7,1348},{4,7,328},{7,3,9830}, +{3,7,5270},{2,7,1184},{0,6,1637},{6,5,9830},{0,6,1637},{2,7,12600},{2,7,12600},{2,7,12600},{2,7,7908},{1,7,10376},{1,7,2760},{1,7,2760},{0,6,481},{0,6,12600},{0,5,5529},{4,7,1348},{4,7,1348},{4,7,1348},{4,7,328},{7,1,4426},{2,7,1184},{2,7,1184},{0,6,481},{7,3,4426},{0,6,481},{7,6,937},{6,7,521},{5,7,121},{4,7,4},{7,6,937},{6,7,929},{4,7,4}, +{0,6,1237},{6,7,929},{0,6,1237},{2,0,7124},{2,0,7124},{2,0,7124},{2,0,7124},{0,7,529},{0,7,529},{0,7,529},{0,5,4},{0,4,3778},{0,4,3778},{2,7,65535},{2,7,58413},{1,7,25060},{1,7,16548},{2,7,65535},{1,7,42629},{0,7,4682},{0,7,2314},{0,7,65535},{0,6,39545},{5,7,5213},{5,7,3321},{4,7,1700},{4,7,200},{5,7,8902},{3,7,4982},{2,7,1440},{0,6,1301},{5,6,8902}, +{0,6,1301},{3,7,14701},{3,7,14701},{3,7,14701},{2,7,9284},{2,7,11492},{1,7,3560},{1,7,3560},{0,6,145},{0,6,13592},{0,5,6089},{4,7,1700},{4,7,1700},{4,7,1700},{4,7,200},{6,3,4420},{2,7,1440},{2,7,1440},{0,6,145},{6,4,4420},{0,6,145},{7,6,697},{6,7,409},{6,7,9},{5,7,9},{7,6,697},{6,7,625},{5,7,9},{0,6,1157},{6,7,625},{0,6,1157},{2,0,7988}, +{2,0,7988},{2,0,7988},{2,0,7988},{0,7,961},{0,7,961},{0,7,961},{0,5,100},{0,4,4210},{0,4,4210},{3,7,65535},{2,7,60781},{1,7,28324},{1,7,18916},{2,7,65535},{1,7,43685},{0,7,6282},{0,7,1354},{0,7,65535},{0,6,39465},{5,7,4941},{5,7,3049},{5,7,2025},{4,7,328},{7,4,8069},{4,7,4465},{3,7,1586},{0,6,1221},{7,5,8069},{0,6,1221},{3,7,16189},{3,7,16189},{3,7,16189}, +{2,7,10916},{2,7,12740},{1,7,4616},{1,7,4616},{0,6,65},{0,7,14411},{0,6,6789},{5,7,2025},{5,7,2025},{5,7,2025},{4,7,328},{7,2,4420},{3,7,1586},{3,7,1586},{0,6,65},{5,5,4420},{0,6,65},{7,7,377},{7,7,305},{6,7,25},{5,7,25},{7,7,377},{6,7,449},{5,7,25},{0,7,729},{6,7,449},{0,7,729},{2,0,8980},{2,0,8980},{2,0,8980},{2,0,8980},{1,7,1480}, +{1,7,1480},{1,7,1480},{0,6,16},{0,4,4770},{0,4,4770},{3,7,65535},{2,7,59505},{1,7,29984},{1,7,19680},{2,7,65535},{1,7,43137},{0,7,8318},{0,7,830},{0,7,65535},{0,6,34901},{5,7,4925},{5,7,3033},{5,7,2009},{4,7,712},{6,6,7325},{4,7,4145},{3,7,1810},{0,7,650},{4,7,7325},{0,7,650},{3,7,16745},{3,7,16745},{3,7,16745},{2,7,12024},{2,7,13464},{1,7,5556},{1,7,5556}, +{0,6,277},{0,7,14139},{0,6,6017},{5,7,2009},{5,7,2009},{5,7,2009},{4,7,712},{5,6,4418},{3,7,1810},{3,7,1810},{0,6,241},{4,6,4418},{0,6,241},{7,7,185},{7,7,113},{7,7,64},{6,7,1},{7,7,185},{7,7,193},{6,7,1},{0,7,361},{7,7,193},{0,7,361},{2,0,9320},{2,0,9320},{2,0,9320},{2,0,9320},{1,7,1460},{1,7,1460},{1,7,1460},{0,6,52},{0,5,4772}, +{0,5,4772},{3,7,65535},{2,7,57588},{1,7,32135},{1,7,20823},{3,7,65535},{1,7,42804},{1,7,8359},{0,7,1289},{0,7,65535},{0,6,28970},{6,7,4317},{5,7,3321},{5,7,2297},{5,7,845},{7,5,6587},{5,7,4008},{4,7,2066},{0,7,164},{6,6,6611},{0,7,164},{3,7,17366},{3,7,17366},{3,7,17366},{3,7,12274},{2,7,14427},{2,7,6699},{2,7,6699},{0,7,389},{0,7,13860},{0,6,5234},{5,7,2297}, +{5,7,2297},{5,7,2297},{5,7,845},{6,5,4420},{4,7,2066},{4,7,2066},{0,7,164},{3,7,4420},{0,7,164},{7,7,122},{7,7,50},{7,7,1},{7,7,9},{7,7,122},{7,7,58},{7,7,9},{0,7,100},{7,7,58},{0,7,100},{2,0,9698},{2,0,9698},{2,0,9698},{2,0,9698},{1,7,1586},{1,7,1586},{1,7,1586},{0,7,289},{0,5,4250},{0,5,4250},{3,7,65535},{2,7,56836},{2,7,31631}, +{1,7,22791},{3,7,65535},{2,7,40532},{1,7,9015},{1,7,778},{0,7,65535},{0,6,24650},{6,7,3949},{6,7,3325},{5,7,2825},{5,7,1021},{6,7,6020},{5,7,3688},{4,7,2290},{0,7,4},{5,7,6020},{0,7,4},{3,7,18326},{3,7,18326},{3,7,18326},{3,7,12626},{3,7,15077},{2,7,7227},{2,7,7227},{1,7,294},{0,7,14020},{0,6,4946},{5,7,2825},{5,7,2825},{5,7,2825},{5,7,1021},{7,4,4418}, +{4,7,2290},{4,7,2290},{0,7,4},{7,5,4418},{0,7,4},{7,7,202},{7,7,130},{7,7,81},{7,7,25},{7,7,202},{7,7,74},{7,7,25},{0,7,4},{7,7,74},{0,7,4},{3,0,9490},{3,0,9490},{3,0,9490},{3,0,9490},{1,7,1970},{1,7,1970},{1,7,1970},{1,6,202},{0,6,3922},{0,6,3922},{3,7,65535},{3,7,55466},{2,7,30335},{2,7,21687},{3,7,65535},{2,7,37932},{1,7,9535}, +{1,7,70},{0,7,65535},{0,7,20544},{6,7,3417},{6,7,2793},{6,7,2393},{5,7,1033},{6,7,5184},{5,7,3204},{5,7,2180},{1,7,25},{7,6,5168},{1,7,25},{4,7,17611},{4,7,17611},{4,7,17611},{3,7,12630},{3,7,14321},{2,7,7251},{2,7,7251},{1,7,34},{0,7,13376},{0,7,4160},{6,7,2393},{6,7,2393},{6,7,2393},{5,7,1033},{6,6,3874},{5,7,2180},{5,7,2180},{1,7,25},{4,7,3874}, +{1,7,25},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{3,0,9266},{3,0,9266},{3,0,9266},{3,0,9266},{2,7,2210},{2,7,2210},{2,7,2210},{1,7,34},{0,6,3442},{0,6,3442},{3,7,65535},{3,7,51210},{2,7,29343},{2,7,20695},{3,7,65535},{2,7,35820},{1,7,10495},{1,7,134},{1,7,65535},{0,7,15936},{6,7,2889}, +{6,7,2265},{6,7,1865},{5,7,1049},{7,6,4288},{5,7,2724},{5,7,1700},{1,7,9},{7,6,4272},{1,7,9},{4,7,16555},{4,7,16555},{4,7,16555},{3,7,12662},{3,7,13441},{2,7,7251},{2,7,7251},{1,7,34},{0,7,12608},{0,7,3392},{6,7,1865},{6,7,1865},{6,7,1865},{5,7,1049},{7,5,3218},{5,7,1700},{5,7,1700},{1,7,9},{6,6,3202},{1,7,9},{7,7,242},{7,7,170},{7,7,121}, +{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{3,0,9298},{3,0,9298},{3,0,9298},{3,0,9298},{2,7,2210},{2,7,2210},{2,7,2210},{1,7,34},{0,6,3218},{0,6,3218},{4,7,65535},{3,7,47340},{2,7,29145},{2,7,20497},{3,7,65535},{2,7,34362},{2,7,9157},{1,7,1124},{1,7,64598},{0,7,11670},{6,7,2448},{6,7,1824},{6,7,1424},{6,7,800},{7,6,3361}, +{6,7,2321},{5,7,1313},{2,7,0},{6,7,3401},{2,7,0},{4,7,15673},{4,7,15673},{4,7,15673},{4,7,12073},{3,7,12757},{3,7,6905},{3,7,6905},{1,7,340},{1,7,11657},{0,7,2834},{6,7,1424},{6,7,1424},{6,7,1424},{6,7,800},{7,5,2525},{5,7,1313},{5,7,1313},{2,7,0},{5,7,2545},{2,7,0},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49}, +{0,7,0},{7,7,98},{0,7,0},{3,0,9640},{3,0,9640},{3,0,9640},{3,0,9640},{2,7,2516},{2,7,2516},{2,7,2516},{1,7,340},{0,7,2834},{0,7,2834},{4,7,65535},{3,7,44716},{3,7,27896},{2,7,21137},{4,7,65535},{3,7,31853},{2,7,8677},{2,7,784},{1,7,59734},{0,7,8694},{6,7,2192},{6,7,1568},{6,7,1168},{6,7,544},{7,6,2673},{6,7,1761},{5,7,1105},{3,7,25},{6,7,2649}, +{3,7,25},{4,7,15161},{4,7,15161},{4,7,15161},{4,7,11561},{4,7,12169},{3,7,6569},{3,7,6569},{2,7,208},{1,7,10889},{0,7,2610},{6,7,1168},{6,7,1168},{6,7,1168},{6,7,544},{6,7,1985},{5,7,1105},{5,7,1105},{3,7,25},{5,7,1985},{3,7,25},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{4,0,9536}, +{4,0,9536},{4,0,9536},{4,0,9536},{2,7,3060},{2,7,3060},{2,7,3060},{2,7,208},{0,7,2610},{0,7,2610},{4,7,63766},{3,7,42860},{3,7,26040},{3,7,20188},{4,7,60070},{3,7,29085},{2,7,8965},{2,7,336},{1,7,55638},{0,7,6486},{7,7,1686},{6,7,1440},{6,7,1040},{6,7,416},{7,6,2113},{6,7,1329},{6,7,929},{3,7,9},{6,7,2025},{3,7,9},{4,7,14905},{4,7,14905},{4,7,14905}, +{4,7,11305},{4,7,11209},{3,7,6489},{3,7,6489},{2,7,272},{1,7,10377},{0,7,2642},{6,7,1040},{6,7,1040},{6,7,1040},{6,7,416},{7,6,1537},{6,7,929},{6,7,929},{3,7,9},{7,6,1513},{3,7,9},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{4,0,9280},{4,0,9280},{4,0,9280},{4,0,9280},{3,7,3125}, +{3,7,3125},{3,7,3125},{2,7,272},{0,7,2642},{0,7,2642},{4,7,59414},{4,7,41414},{3,7,24952},{3,7,19100},{4,7,55014},{3,7,27085},{2,7,10021},{2,7,656},{1,7,52310},{0,7,5046},{7,7,1142},{7,7,1070},{7,7,1021},{6,7,416},{7,7,1538},{6,7,1025},{6,7,625},{4,7,4},{6,7,1529},{4,7,4},{5,7,13964},{5,7,13964},{5,7,13964},{4,7,11305},{4,7,10505},{3,7,6665},{3,7,6665}, +{2,7,592},{2,7,9973},{0,7,2930},{7,7,1021},{7,7,1021},{7,7,1021},{6,7,416},{7,6,1105},{6,7,625},{6,7,625},{4,7,4},{6,7,1129},{4,7,4},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{4,0,9280},{4,0,9280},{4,0,9280},{4,0,9280},{3,7,3301},{3,7,3301},{3,7,3301},{2,7,592},{0,7,2930}, +{0,7,2930}, diff --git a/thirdparty/basisu/transcoder/basisu_transcoder_uastc.h b/thirdparty/basisu/transcoder/basisu_transcoder_uastc.h new file mode 100644 index 000000000..457bd51e3 --- /dev/null +++ b/thirdparty/basisu/transcoder/basisu_transcoder_uastc.h @@ -0,0 +1,294 @@ +// basisu_transcoder_uastc.h +#pragma once +#include "basisu_transcoder_internal.h" + +namespace basist +{ + struct color_quad_u8 + { + uint8_t m_c[4]; + }; + + const uint32_t TOTAL_UASTC_MODES = 19; + const uint32_t UASTC_MODE_INDEX_SOLID_COLOR = 8; + + const uint32_t TOTAL_ASTC_BC7_COMMON_PARTITIONS2 = 30; + const uint32_t TOTAL_ASTC_BC6H_COMMON_PARTITIONS2 = 27; // BC6H only supports only 5-bit pattern indices, BC7 supports 4-bit or 6-bit + const uint32_t TOTAL_ASTC_BC7_COMMON_PARTITIONS3 = 11; + const uint32_t TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS = 19; + + extern const uint8_t g_uastc_mode_weight_bits[TOTAL_UASTC_MODES]; + extern const uint8_t g_uastc_mode_weight_ranges[TOTAL_UASTC_MODES]; + extern const uint8_t g_uastc_mode_endpoint_ranges[TOTAL_UASTC_MODES]; + extern const uint8_t g_uastc_mode_subsets[TOTAL_UASTC_MODES]; + extern const uint8_t g_uastc_mode_planes[TOTAL_UASTC_MODES]; + extern const uint8_t g_uastc_mode_comps[TOTAL_UASTC_MODES]; + extern const uint8_t g_uastc_mode_has_etc1_bias[TOTAL_UASTC_MODES]; + extern const uint8_t g_uastc_mode_has_bc1_hint0[TOTAL_UASTC_MODES]; + extern const uint8_t g_uastc_mode_has_bc1_hint1[TOTAL_UASTC_MODES]; + extern const uint8_t g_uastc_mode_has_alpha[TOTAL_UASTC_MODES]; + extern const uint8_t g_uastc_mode_is_la[TOTAL_UASTC_MODES]; + + struct astc_bc7_common_partition2_desc + { + uint8_t m_bc7; + uint16_t m_astc; + bool m_invert; + }; + + extern const astc_bc7_common_partition2_desc g_astc_bc7_common_partitions2[TOTAL_ASTC_BC7_COMMON_PARTITIONS2]; + + struct bc73_astc2_common_partition_desc + { + uint8_t m_bc73; + uint16_t m_astc2; + uint8_t k; // 0-5 - how to modify the BC7 3-subset pattern to match the ASTC pattern (LSB=invert) + }; + + extern const bc73_astc2_common_partition_desc g_bc7_3_astc2_common_partitions[TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS]; + + struct astc_bc7_common_partition3_desc + { + uint8_t m_bc7; + uint16_t m_astc; + uint8_t m_astc_to_bc7_perm; // converts ASTC to BC7 partition using g_astc_bc7_partition_index_perm_tables[][] + }; + + extern const astc_bc7_common_partition3_desc g_astc_bc7_common_partitions3[TOTAL_ASTC_BC7_COMMON_PARTITIONS3]; + + extern const uint8_t g_astc_bc7_patterns2[TOTAL_ASTC_BC7_COMMON_PARTITIONS2][16]; + extern const uint8_t g_astc_bc7_patterns3[TOTAL_ASTC_BC7_COMMON_PARTITIONS3][16]; + extern const uint8_t g_bc7_3_astc2_patterns2[TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS][16]; + + extern const uint8_t g_astc_bc7_pattern2_anchors[TOTAL_ASTC_BC7_COMMON_PARTITIONS2][3]; + extern const uint8_t g_astc_bc7_pattern3_anchors[TOTAL_ASTC_BC7_COMMON_PARTITIONS3][3]; + extern const uint8_t g_bc7_3_astc2_patterns2_anchors[TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS][3]; + + extern const uint32_t g_uastc_mode_huff_codes[TOTAL_UASTC_MODES + 1][2]; + + extern const uint8_t g_astc_to_bc7_partition_index_perm_tables[6][3]; + extern const uint8_t g_bc7_to_astc_partition_index_perm_tables[6][3]; // inverse of g_astc_to_bc7_partition_index_perm_tables + + extern const uint8_t* s_uastc_to_bc1_weights[6]; + + uint32_t bc7_convert_partition_index_3_to_2(uint32_t p, uint32_t k); + + inline uint32_t astc_interpolate(uint32_t l, uint32_t h, uint32_t w, bool srgb) + { + if (srgb) + { + l = (l << 8) | 0x80; + h = (h << 8) | 0x80; + } + else + { + l = (l << 8) | l; + h = (h << 8) | h; + } + + uint32_t k = (l * (64 - w) + h * w + 32) >> 6; + + return k >> 8; + } + + struct astc_block_desc + { + int m_weight_range; // weight BISE range + + int m_subsets; // number of ASTC partitions + int m_partition_seed; // partition pattern seed + int m_cem; // color endpoint mode used by all subsets + + int m_ccs; // color component selector (dual plane only) + bool m_dual_plane; // true if dual plane + + // Weight and endpoint BISE values. + // Note these values are NOT linear, they must be BISE encoded. See Table 97 and Table 107. + uint8_t m_endpoints[18]; // endpoint values, in RR GG BB etc. order + uint8_t m_weights[64]; // weight index values, raster order, in P0 P1, P0 P1, etc. or P0, P0, P0, P0, etc. order + }; + + const uint32_t BC7ENC_TOTAL_ASTC_RANGES = 21; + + // See tables 81, 93, 18.13.Endpoint Unquantization + const uint32_t TOTAL_ASTC_RANGES = 21; + extern const int g_astc_bise_range_table[TOTAL_ASTC_RANGES][3]; + + struct astc_quant_bin + { + uint8_t m_unquant; // unquantized value + uint8_t m_index; // sorted index + }; + + extern astc_quant_bin g_astc_unquant[BC7ENC_TOTAL_ASTC_RANGES][256]; // [ASTC encoded endpoint index] + + int astc_get_levels(int range); + bool astc_is_valid_endpoint_range(uint32_t range); + uint32_t unquant_astc_endpoint(uint32_t packed_bits, uint32_t packed_trits, uint32_t packed_quints, uint32_t range); + uint32_t unquant_astc_endpoint_val(uint32_t packed_val, uint32_t range); + + const uint8_t* get_anchor_indices(uint32_t subsets, uint32_t mode, uint32_t common_pattern, const uint8_t*& pPartition_pattern); + + // BC7 + const uint32_t BC7ENC_BLOCK_SIZE = 16; + + struct bc7_block + { + uint64_t m_qwords[2]; + }; + + struct bc7_optimization_results + { + uint32_t m_mode; + uint32_t m_partition; + uint8_t m_selectors[16]; + uint8_t m_alpha_selectors[16]; + color_quad_u8 m_low[3]; + color_quad_u8 m_high[3]; + uint32_t m_pbits[3][2]; + uint32_t m_index_selector; + uint32_t m_rotation; + }; + + extern const uint32_t g_bc7_weights1[2]; + extern const uint32_t g_bc7_weights2[4]; + extern const uint32_t g_bc7_weights3[8]; + extern const uint32_t g_bc7_weights4[16]; + extern const uint32_t g_astc_weights4[16]; + extern const uint32_t g_astc_weights5[32]; + extern const uint32_t g_astc_weights_3levels[3]; + extern const uint8_t g_bc7_partition1[16]; + extern const uint8_t g_bc7_partition2[64 * 16]; + extern const uint8_t g_bc7_partition3[64 * 16]; + extern const uint8_t g_bc7_table_anchor_index_second_subset[64]; + extern const uint8_t g_bc7_table_anchor_index_third_subset_1[64]; + extern const uint8_t g_bc7_table_anchor_index_third_subset_2[64]; + extern const uint8_t g_bc7_num_subsets[8]; + extern const uint8_t g_bc7_partition_bits[8]; + extern const uint8_t g_bc7_color_index_bitcount[8]; + extern const uint8_t g_bc7_mode_has_p_bits[8]; + extern const uint8_t g_bc7_mode_has_shared_p_bits[8]; + extern const uint8_t g_bc7_color_precision_table[8]; + extern const int8_t g_bc7_alpha_precision_table[8]; + extern const uint8_t g_bc7_alpha_index_bitcount[8]; + + inline bool get_bc7_mode_has_seperate_alpha_selectors(int mode) { return (mode == 4) || (mode == 5); } + inline int get_bc7_color_index_size(int mode, int index_selection_bit) { return g_bc7_color_index_bitcount[mode] + index_selection_bit; } + inline int get_bc7_alpha_index_size(int mode, int index_selection_bit) { return g_bc7_alpha_index_bitcount[mode] - index_selection_bit; } + + struct endpoint_err + { + uint16_t m_error; uint8_t m_lo; uint8_t m_hi; + }; + + extern endpoint_err g_bc7_mode_6_optimal_endpoints[256][2]; // [c][pbit] + const uint32_t BC7ENC_MODE_6_OPTIMAL_INDEX = 5; + + extern endpoint_err g_bc7_mode_5_optimal_endpoints[256]; // [c] + const uint32_t BC7ENC_MODE_5_OPTIMAL_INDEX = 1; + + // Packs a BC7 block from a high-level description. Handles all BC7 modes. + void encode_bc7_block(void* pBlock, const bc7_optimization_results* pResults); + + // Packs an ASTC block + // Constraints: Always 4x4, all subset CEM's must be equal, only tested with LDR CEM's. + bool pack_astc_block(uint32_t* pDst, const astc_block_desc* pBlock, uint32_t mode); + + void pack_astc_solid_block(void* pDst_block, const color32& color); + +#ifdef _DEBUG + int astc_compute_texel_partition(int seed, int x, int y, int z, int partitioncount, bool small_block); +#endif + + struct uastc_block + { + union + { + uint8_t m_bytes[16]; + uint32_t m_dwords[4]; + }; + }; + + struct unpacked_uastc_block + { + astc_block_desc m_astc; + + uint32_t m_mode; + uint32_t m_common_pattern; + + color32 m_solid_color; + + bool m_bc1_hint0; + bool m_bc1_hint1; + + bool m_etc1_flip; + bool m_etc1_diff; + uint32_t m_etc1_inten0; + uint32_t m_etc1_inten1; + + uint32_t m_etc1_bias; + + uint32_t m_etc2_hints; + + uint32_t m_etc1_selector; + uint32_t m_etc1_r, m_etc1_g, m_etc1_b; + }; + + color32 apply_etc1_bias(const color32 &block_color, uint32_t bias, uint32_t limit, uint32_t subblock); + + struct decoder_etc_block; + struct eac_block; + + bool unpack_uastc(uint32_t mode, uint32_t common_pattern, const color32& solid_color, const astc_block_desc& astc, color32* pPixels, bool srgb); + bool unpack_uastc(const unpacked_uastc_block& unpacked_blk, color32* pPixels, bool srgb); + + bool unpack_uastc(const uastc_block& blk, color32* pPixels, bool srgb); + bool unpack_uastc(const uastc_block& blk, unpacked_uastc_block& unpacked, bool undo_blue_contract, bool read_hints = true); + + bool transcode_uastc_to_astc(const uastc_block& src_blk, void* pDst); + + bool transcode_uastc_to_bc7(const unpacked_uastc_block& unpacked_src_blk, bc7_optimization_results& dst_blk); + bool transcode_uastc_to_bc7(const uastc_block& src_blk, bc7_optimization_results& dst_blk); + bool transcode_uastc_to_bc7(const uastc_block& src_blk, void* pDst); + + void transcode_uastc_to_etc1(unpacked_uastc_block& unpacked_src_blk, color32 block_pixels[4][4], void* pDst); + bool transcode_uastc_to_etc1(const uastc_block& src_blk, void* pDst); + bool transcode_uastc_to_etc1(const uastc_block& src_blk, void* pDst, uint32_t channel); + + void transcode_uastc_to_etc2_eac_a8(unpacked_uastc_block& unpacked_src_blk, color32 block_pixels[4][4], void* pDst); + bool transcode_uastc_to_etc2_rgba(const uastc_block& src_blk, void* pDst); + + // Packs 16 scalar values to BC4. Same PSNR as stb_dxt's BC4 encoder, around 13% faster. + void encode_bc4(void* pDst, const uint8_t* pPixels, uint32_t stride); + + void encode_bc1_solid_block(void* pDst, uint32_t fr, uint32_t fg, uint32_t fb); + + enum + { + cEncodeBC1HighQuality = 1, + cEncodeBC1HigherQuality = 2, + cEncodeBC1UseSelectors = 4, + }; + void encode_bc1(void* pDst, const uint8_t* pPixels, uint32_t flags); + + // Alternate PCA-free encoder, around 15% faster, same (or slightly higher) avg. PSNR + void encode_bc1_alt(void* pDst, const uint8_t* pPixels, uint32_t flags); + + void transcode_uastc_to_bc1_hint0(const unpacked_uastc_block& unpacked_src_blk, void* pDst); + void transcode_uastc_to_bc1_hint1(const unpacked_uastc_block& unpacked_src_blk, const color32 block_pixels[4][4], void* pDst, bool high_quality); + + bool transcode_uastc_to_bc1(const uastc_block& src_blk, void* pDst, bool high_quality); + bool transcode_uastc_to_bc3(const uastc_block& src_blk, void* pDst, bool high_quality); + bool transcode_uastc_to_bc4(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0); + bool transcode_uastc_to_bc5(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0, uint32_t chan1); + + bool transcode_uastc_to_etc2_eac_r11(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0); + bool transcode_uastc_to_etc2_eac_rg11(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0, uint32_t chan1); + + bool transcode_uastc_to_pvrtc1_4_rgb(const uastc_block* pSrc_blocks, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, bool high_quality, bool from_alpha); + bool transcode_uastc_to_pvrtc1_4_rgba(const uastc_block* pSrc_blocks, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, bool high_quality); + + // uastc_init() MUST be called before using this module. + void uastc_init(); + +} // namespace basist diff --git a/thirdparty/basisu/zstd/LICENSE b/thirdparty/basisu/zstd/LICENSE new file mode 100644 index 000000000..a793a8028 --- /dev/null +++ b/thirdparty/basisu/zstd/LICENSE @@ -0,0 +1,30 @@ +BSD License + +For Zstandard software + +Copyright (c) 2016-present, Facebook, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name Facebook nor the names of its contributors may be used to + endorse or promote products derived from this software without specific + prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/thirdparty/basisu/zstd/zstd.c b/thirdparty/basisu/zstd/zstd.c new file mode 100644 index 000000000..e83128be3 --- /dev/null +++ b/thirdparty/basisu/zstd/zstd.c @@ -0,0 +1,38720 @@ +/** + * \file zstd.c + * Single-file Zstandard library. + * + * Generate using: + * \code + * combine.sh -r ../../lib -o zstd.c zstd-in.c + * \endcode + */ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ +/* + * Settings to bake for the single library file. + * + * Note: It's important that none of these affects 'zstd.h' (only the + * implementation files we're amalgamating). + * + * Note: MEM_MODULE stops xxhash redefining BYTE, U16, etc., which are also + * defined in mem.h (breaking C99 compatibility). + * + * Note: the undefs for xxHash allow Zstd's implementation to coinside with with + * standalone xxHash usage (with global defines). + * + * Note: multithreading is enabled for all platforms apart from Emscripten. + */ +#define DEBUGLEVEL 0 +#define MEM_MODULE +#undef XXH_NAMESPACE +#define XXH_NAMESPACE ZSTD_ +#undef XXH_PRIVATE_API +#define XXH_PRIVATE_API +#undef XXH_INLINE_ALL +#define XXH_INLINE_ALL +#define ZSTD_LEGACY_SUPPORT 0 +#ifndef __EMSCRIPTEN__ +#define ZSTD_MULTITHREAD +#endif +#define ZSTD_TRACE 0 + +/* Include zstd_deps.h first with all the options we need enabled. */ +#define ZSTD_DEPS_NEED_MALLOC +#define ZSTD_DEPS_NEED_MATH64 +/**** start inlining common/zstd_deps.h ****/ +/* + * Copyright (c) 2016-2021, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* This file provides common libc dependencies that zstd requires. + * The purpose is to allow replacing this file with a custom implementation + * to compile zstd without libc support. + */ + +/* Need: + * NULL + * INT_MAX + * UINT_MAX + * ZSTD_memcpy() + * ZSTD_memset() + * ZSTD_memmove() + */ +#ifndef ZSTD_DEPS_COMMON +#define ZSTD_DEPS_COMMON + +#include +#include +#include + +#if defined(__GNUC__) && __GNUC__ >= 4 +# define ZSTD_memcpy(d,s,l) __builtin_memcpy((d),(s),(l)) +# define ZSTD_memmove(d,s,l) __builtin_memmove((d),(s),(l)) +# define ZSTD_memset(p,v,l) __builtin_memset((p),(v),(l)) +#else +# define ZSTD_memcpy(d,s,l) memcpy((d),(s),(l)) +# define ZSTD_memmove(d,s,l) memmove((d),(s),(l)) +# define ZSTD_memset(p,v,l) memset((p),(v),(l)) +#endif + +#endif /* ZSTD_DEPS_COMMON */ + +/* Need: + * ZSTD_malloc() + * ZSTD_free() + * ZSTD_calloc() + */ +#ifdef ZSTD_DEPS_NEED_MALLOC +#ifndef ZSTD_DEPS_MALLOC +#define ZSTD_DEPS_MALLOC + +#include + +#define ZSTD_malloc(s) malloc(s) +#define ZSTD_calloc(n,s) calloc((n), (s)) +#define ZSTD_free(p) free((p)) + +#endif /* ZSTD_DEPS_MALLOC */ +#endif /* ZSTD_DEPS_NEED_MALLOC */ + +/* + * Provides 64-bit math support. + * Need: + * U64 ZSTD_div64(U64 dividend, U32 divisor) + */ +#ifdef ZSTD_DEPS_NEED_MATH64 +#ifndef ZSTD_DEPS_MATH64 +#define ZSTD_DEPS_MATH64 + +#define ZSTD_div64(dividend, divisor) ((dividend) / (divisor)) + +#endif /* ZSTD_DEPS_MATH64 */ +#endif /* ZSTD_DEPS_NEED_MATH64 */ + +/* Need: + * assert() + */ +#ifdef ZSTD_DEPS_NEED_ASSERT +#ifndef ZSTD_DEPS_ASSERT +#define ZSTD_DEPS_ASSERT + +#include + +#endif /* ZSTD_DEPS_ASSERT */ +#endif /* ZSTD_DEPS_NEED_ASSERT */ + +/* Need: + * ZSTD_DEBUG_PRINT() + */ +#ifdef ZSTD_DEPS_NEED_IO +#ifndef ZSTD_DEPS_IO +#define ZSTD_DEPS_IO + +#include +#define ZSTD_DEBUG_PRINT(...) fprintf(stderr, __VA_ARGS__) + +#endif /* ZSTD_DEPS_IO */ +#endif /* ZSTD_DEPS_NEED_IO */ + +/* Only requested when is known to be present. + * Need: + * intptr_t + */ +#ifdef ZSTD_DEPS_NEED_STDINT +#ifndef ZSTD_DEPS_STDINT +#define ZSTD_DEPS_STDINT + +#include + +#endif /* ZSTD_DEPS_STDINT */ +#endif /* ZSTD_DEPS_NEED_STDINT */ +/**** ended inlining common/zstd_deps.h ****/ + +/**** start inlining common/debug.c ****/ +/* ****************************************************************** + * debug + * Part of FSE library + * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + + +/* + * This module only hosts one global variable + * which can be used to dynamically influence the verbosity of traces, + * such as DEBUGLOG and RAWLOG + */ + +/**** start inlining debug.h ****/ +/* ****************************************************************** + * debug + * Part of FSE library + * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + + +/* + * The purpose of this header is to enable debug functions. + * They regroup assert(), DEBUGLOG() and RAWLOG() for run-time, + * and DEBUG_STATIC_ASSERT() for compile-time. + * + * By default, DEBUGLEVEL==0, which means run-time debug is disabled. + * + * Level 1 enables assert() only. + * Starting level 2, traces can be generated and pushed to stderr. + * The higher the level, the more verbose the traces. + * + * It's possible to dynamically adjust level using variable g_debug_level, + * which is only declared if DEBUGLEVEL>=2, + * and is a global variable, not multi-thread protected (use with care) + */ + +#ifndef DEBUG_H_12987983217 +#define DEBUG_H_12987983217 + +#if defined (__cplusplus) +extern "C" { +#endif + + +/* static assert is triggered at compile time, leaving no runtime artefact. + * static assert only works with compile-time constants. + * Also, this variant can only be used inside a function. */ +#define DEBUG_STATIC_ASSERT(c) (void)sizeof(char[(c) ? 1 : -1]) + + +/* DEBUGLEVEL is expected to be defined externally, + * typically through compiler command line. + * Value must be a number. */ +#ifndef DEBUGLEVEL +# define DEBUGLEVEL 0 +#endif + + +/* recommended values for DEBUGLEVEL : + * 0 : release mode, no debug, all run-time checks disabled + * 1 : enables assert() only, no display + * 2 : reserved, for currently active debug path + * 3 : events once per object lifetime (CCtx, CDict, etc.) + * 4 : events once per frame + * 5 : events once per block + * 6 : events once per sequence (verbose) + * 7+: events at every position (*very* verbose) + * + * It's generally inconvenient to output traces > 5. + * In which case, it's possible to selectively trigger high verbosity levels + * by modifying g_debug_level. + */ + +#if (DEBUGLEVEL>=1) +# define ZSTD_DEPS_NEED_ASSERT +/**** skipping file: zstd_deps.h ****/ +#else +# ifndef assert /* assert may be already defined, due to prior #include */ +# define assert(condition) ((void)0) /* disable assert (default) */ +# endif +#endif + +#if (DEBUGLEVEL>=2) +# define ZSTD_DEPS_NEED_IO +/**** skipping file: zstd_deps.h ****/ +extern int g_debuglevel; /* the variable is only declared, + it actually lives in debug.c, + and is shared by the whole process. + It's not thread-safe. + It's useful when enabling very verbose levels + on selective conditions (such as position in src) */ + +# define RAWLOG(l, ...) { \ + if (l<=g_debuglevel) { \ + ZSTD_DEBUG_PRINT(__VA_ARGS__); \ + } } +# define DEBUGLOG(l, ...) { \ + if (l<=g_debuglevel) { \ + ZSTD_DEBUG_PRINT(__FILE__ ": " __VA_ARGS__); \ + ZSTD_DEBUG_PRINT(" \n"); \ + } } +#else +# define RAWLOG(l, ...) {} /* disabled */ +# define DEBUGLOG(l, ...) {} /* disabled */ +#endif + + +#if defined (__cplusplus) +} +#endif + +#endif /* DEBUG_H_12987983217 */ +/**** ended inlining debug.h ****/ + +int g_debuglevel = DEBUGLEVEL; +/**** ended inlining common/debug.c ****/ +/**** start inlining common/entropy_common.c ****/ +/* ****************************************************************** + * Common functions of New Generation Entropy library + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +/* ************************************* +* Dependencies +***************************************/ +/**** start inlining mem.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef MEM_H_MODULE +#define MEM_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + +/*-**************************************** +* Dependencies +******************************************/ +#include /* size_t, ptrdiff_t */ +/**** start inlining compiler.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMPILER_H +#define ZSTD_COMPILER_H + +/*-******************************************************* +* Compiler specifics +*********************************************************/ +/* force inlining */ + +#if !defined(ZSTD_NO_INLINE) +#if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# define INLINE_KEYWORD inline +#else +# define INLINE_KEYWORD +#endif + +#if defined(__GNUC__) || defined(__ICCARM__) +# define FORCE_INLINE_ATTR __attribute__((always_inline)) +#elif defined(_MSC_VER) +# define FORCE_INLINE_ATTR __forceinline +#else +# define FORCE_INLINE_ATTR +#endif + +#else + +#define INLINE_KEYWORD +#define FORCE_INLINE_ATTR + +#endif + +/** + On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC). + This explictly marks such functions as __cdecl so that the code will still compile + if a CC other than __cdecl has been made the default. +*/ +#if defined(_MSC_VER) +# define WIN_CDECL __cdecl +#else +# define WIN_CDECL +#endif + +/** + * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant + * parameters. They must be inlined for the compiler to eliminate the constant + * branches. + */ +#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR +/** + * HINT_INLINE is used to help the compiler generate better code. It is *not* + * used for "templates", so it can be tweaked based on the compilers + * performance. + * + * gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the + * always_inline attribute. + * + * clang up to 5.0.0 (trunk) benefit tremendously from the always_inline + * attribute. + */ +#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5 +# define HINT_INLINE static INLINE_KEYWORD +#else +# define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR +#endif + +/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */ +#if defined(__GNUC__) +# define UNUSED_ATTR __attribute__((unused)) +#else +# define UNUSED_ATTR +#endif + +/* force no inlining */ +#ifdef _MSC_VER +# define FORCE_NOINLINE static __declspec(noinline) +#else +# if defined(__GNUC__) || defined(__ICCARM__) +# define FORCE_NOINLINE static __attribute__((__noinline__)) +# else +# define FORCE_NOINLINE static +# endif +#endif + + +/* target attribute */ +#ifndef __has_attribute + #define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */ +#endif +#if defined(__GNUC__) || defined(__ICCARM__) +# define TARGET_ATTRIBUTE(target) __attribute__((__target__(target))) +#else +# define TARGET_ATTRIBUTE(target) +#endif + +/* Enable runtime BMI2 dispatch based on the CPU. + * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default. + */ +#ifndef DYNAMIC_BMI2 + #if ((defined(__clang__) && __has_attribute(__target__)) \ + || (defined(__GNUC__) \ + && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \ + && (defined(__x86_64__) || defined(_M_X86)) \ + && !defined(__BMI2__) + # define DYNAMIC_BMI2 1 + #else + # define DYNAMIC_BMI2 0 + #endif +#endif + +/* prefetch + * can be disabled, by declaring NO_PREFETCH build macro */ +#if defined(NO_PREFETCH) +# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ +# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ +#else +# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */ +//rg: fix for ARM64EC compilation +//# include /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ +# include /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ +# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0) +# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1) +# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) +# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */) +# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */) +# elif defined(__aarch64__) +# define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))) +# define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))) +# else +# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ +# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ +# endif +#endif /* NO_PREFETCH */ + +#define CACHELINE_SIZE 64 + +#define PREFETCH_AREA(p, s) { \ + const char* const _ptr = (const char*)(p); \ + size_t const _size = (size_t)(s); \ + size_t _pos; \ + for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \ + PREFETCH_L2(_ptr + _pos); \ + } \ +} + +/* vectorization + * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */ +#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__) +# if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5) +# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize"))) +# else +# define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")") +# endif +#else +# define DONT_VECTORIZE +#endif + +/* Tell the compiler that a branch is likely or unlikely. + * Only use these macros if it causes the compiler to generate better code. + * If you can remove a LIKELY/UNLIKELY annotation without speed changes in gcc + * and clang, please do. + */ +#if defined(__GNUC__) +#define LIKELY(x) (__builtin_expect((x), 1)) +#define UNLIKELY(x) (__builtin_expect((x), 0)) +#else +#define LIKELY(x) (x) +#define UNLIKELY(x) (x) +#endif + +/* disable warnings */ +#ifdef _MSC_VER /* Visual Studio */ +# include /* For Visual 2005 */ +# pragma warning(disable : 4100) /* disable: C4100: unreferenced formal parameter */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */ +# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ +# pragma warning(disable : 4324) /* disable: C4324: padded structure */ +#endif + +/*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/ +#ifndef STATIC_BMI2 +# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) +# ifdef __AVX2__ //MSVC does not have a BMI2 specific flag, but every CPU that supports AVX2 also supports BMI2 +# define STATIC_BMI2 1 +# endif +# endif +#endif + +#ifndef STATIC_BMI2 + #define STATIC_BMI2 0 +#endif + +/* compat. with non-clang compilers */ +#ifndef __has_builtin +# define __has_builtin(x) 0 +#endif + +/* compat. with non-clang compilers */ +#ifndef __has_feature +# define __has_feature(x) 0 +#endif + +/* detects whether we are being compiled under msan */ +#ifndef ZSTD_MEMORY_SANITIZER +# if __has_feature(memory_sanitizer) +# define ZSTD_MEMORY_SANITIZER 1 +# else +# define ZSTD_MEMORY_SANITIZER 0 +# endif +#endif + +#if ZSTD_MEMORY_SANITIZER +/* Not all platforms that support msan provide sanitizers/msan_interface.h. + * We therefore declare the functions we need ourselves, rather than trying to + * include the header file... */ +#include /* size_t */ +#define ZSTD_DEPS_NEED_STDINT +/**** skipping file: zstd_deps.h ****/ + +/* Make memory region fully initialized (without changing its contents). */ +void __msan_unpoison(const volatile void *a, size_t size); + +/* Make memory region fully uninitialized (without changing its contents). + This is a legacy interface that does not update origin information. Use + __msan_allocated_memory() instead. */ +void __msan_poison(const volatile void *a, size_t size); + +/* Returns the offset of the first (at least partially) poisoned byte in the + memory range, or -1 if the whole range is good. */ +intptr_t __msan_test_shadow(const volatile void *x, size_t size); +#endif + +/* detects whether we are being compiled under asan */ +#ifndef ZSTD_ADDRESS_SANITIZER +# if __has_feature(address_sanitizer) +# define ZSTD_ADDRESS_SANITIZER 1 +# elif defined(__SANITIZE_ADDRESS__) +# define ZSTD_ADDRESS_SANITIZER 1 +# else +# define ZSTD_ADDRESS_SANITIZER 0 +# endif +#endif + +#if ZSTD_ADDRESS_SANITIZER +/* Not all platforms that support asan provide sanitizers/asan_interface.h. + * We therefore declare the functions we need ourselves, rather than trying to + * include the header file... */ +#include /* size_t */ + +/** + * Marks a memory region ([addr, addr+size)) as unaddressable. + * + * This memory must be previously allocated by your program. Instrumented + * code is forbidden from accessing addresses in this region until it is + * unpoisoned. This function is not guaranteed to poison the entire region - + * it could poison only a subregion of [addr, addr+size) due to ASan + * alignment restrictions. + * + * \note This function is not thread-safe because no two threads can poison or + * unpoison memory in the same memory region simultaneously. + * + * \param addr Start of memory region. + * \param size Size of memory region. */ +void __asan_poison_memory_region(void const volatile *addr, size_t size); + +/** + * Marks a memory region ([addr, addr+size)) as addressable. + * + * This memory must be previously allocated by your program. Accessing + * addresses in this region is allowed until this region is poisoned again. + * This function could unpoison a super-region of [addr, addr+size) due + * to ASan alignment restrictions. + * + * \note This function is not thread-safe because no two threads can + * poison or unpoison memory in the same memory region simultaneously. + * + * \param addr Start of memory region. + * \param size Size of memory region. */ +void __asan_unpoison_memory_region(void const volatile *addr, size_t size); +#endif + +#endif /* ZSTD_COMPILER_H */ +/**** ended inlining compiler.h ****/ +/**** skipping file: debug.h ****/ +/**** skipping file: zstd_deps.h ****/ + + +/*-**************************************** +* Compiler specifics +******************************************/ +#if defined(_MSC_VER) /* Visual Studio */ +# include /* _byteswap_ulong */ +# include /* _byteswap_* */ +#endif +#if defined(__GNUC__) +# define MEM_STATIC static __inline __attribute__((unused)) +#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define MEM_STATIC static inline +#elif defined(_MSC_VER) +# define MEM_STATIC static __inline +#else +# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif + +/*-************************************************************** +* Basic Types +*****************************************************************/ +#if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# if defined(_AIX) +# include +# else +# include /* intptr_t */ +# endif + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef int16_t S16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; + typedef int64_t S64; +#else +# include +#if CHAR_BIT != 8 +# error "this implementation requires char to be exactly 8-bit type" +#endif + typedef unsigned char BYTE; +#if USHRT_MAX != 65535 +# error "this implementation requires short to be exactly 16-bit type" +#endif + typedef unsigned short U16; + typedef signed short S16; +#if UINT_MAX != 4294967295 +# error "this implementation requires int to be exactly 32-bit type" +#endif + typedef unsigned int U32; + typedef signed int S32; +/* note : there are no limits defined for long long type in C90. + * limits exist in C99, however, in such case, is preferred */ + typedef unsigned long long U64; + typedef signed long long S64; +#endif + + +/*-************************************************************** +* Memory I/O API +*****************************************************************/ +/*=== Static platform detection ===*/ +MEM_STATIC unsigned MEM_32bits(void); +MEM_STATIC unsigned MEM_64bits(void); +MEM_STATIC unsigned MEM_isLittleEndian(void); + +/*=== Native unaligned read/write ===*/ +MEM_STATIC U16 MEM_read16(const void* memPtr); +MEM_STATIC U32 MEM_read32(const void* memPtr); +MEM_STATIC U64 MEM_read64(const void* memPtr); +MEM_STATIC size_t MEM_readST(const void* memPtr); + +MEM_STATIC void MEM_write16(void* memPtr, U16 value); +MEM_STATIC void MEM_write32(void* memPtr, U32 value); +MEM_STATIC void MEM_write64(void* memPtr, U64 value); + +/*=== Little endian unaligned read/write ===*/ +MEM_STATIC U16 MEM_readLE16(const void* memPtr); +MEM_STATIC U32 MEM_readLE24(const void* memPtr); +MEM_STATIC U32 MEM_readLE32(const void* memPtr); +MEM_STATIC U64 MEM_readLE64(const void* memPtr); +MEM_STATIC size_t MEM_readLEST(const void* memPtr); + +MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val); +MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val); +MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32); +MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64); +MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val); + +/*=== Big endian unaligned read/write ===*/ +MEM_STATIC U32 MEM_readBE32(const void* memPtr); +MEM_STATIC U64 MEM_readBE64(const void* memPtr); +MEM_STATIC size_t MEM_readBEST(const void* memPtr); + +MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32); +MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64); +MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val); + +/*=== Byteswap ===*/ +MEM_STATIC U32 MEM_swap32(U32 in); +MEM_STATIC U64 MEM_swap64(U64 in); +MEM_STATIC size_t MEM_swapST(size_t in); + + +/*-************************************************************** +* Memory I/O Implementation +*****************************************************************/ +/* MEM_FORCE_MEMORY_ACCESS : + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (i.e., not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method is portable but violate C standard. + * It can generate buggy code on targets depending on alignment. + * In some circumstances, it's the only known way to get the most performance (i.e. GCC + ARMv6) + * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ +# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) +# define MEM_FORCE_MEMORY_ACCESS 2 +# elif defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__) +# define MEM_FORCE_MEMORY_ACCESS 1 +# endif +#endif + +MEM_STATIC unsigned MEM_32bits(void) { return sizeof(size_t)==4; } +MEM_STATIC unsigned MEM_64bits(void) { return sizeof(size_t)==8; } + +MEM_STATIC unsigned MEM_isLittleEndian(void) +{ + const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ + return one.c[0]; +} + +#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2) + +/* violates C standard, by lying on structure alignment. +Only use if no other choice to achieve best performance on target platform */ +MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; } +MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; } +MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; } +MEM_STATIC size_t MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; } + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } +MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } +MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; } + +#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32)) + __pragma( pack(push, 1) ) + typedef struct { U16 v; } unalign16; + typedef struct { U32 v; } unalign32; + typedef struct { U64 v; } unalign64; + typedef struct { size_t v; } unalignArch; + __pragma( pack(pop) ) +#else + typedef struct { U16 v; } __attribute__((packed)) unalign16; + typedef struct { U32 v; } __attribute__((packed)) unalign32; + typedef struct { U64 v; } __attribute__((packed)) unalign64; + typedef struct { size_t v; } __attribute__((packed)) unalignArch; +#endif + +MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign16*)ptr)->v; } +MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign32*)ptr)->v; } +MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign64*)ptr)->v; } +MEM_STATIC size_t MEM_readST(const void* ptr) { return ((const unalignArch*)ptr)->v; } + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign16*)memPtr)->v = value; } +MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign32*)memPtr)->v = value; } +MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v = value; } + +#else + +/* default method, safe and standard. + can sometimes prove slower */ + +MEM_STATIC U16 MEM_read16(const void* memPtr) +{ + U16 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC U32 MEM_read32(const void* memPtr) +{ + U32 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC U64 MEM_read64(const void* memPtr) +{ + U64 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC size_t MEM_readST(const void* memPtr) +{ + size_t val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) +{ + ZSTD_memcpy(memPtr, &value, sizeof(value)); +} + +MEM_STATIC void MEM_write32(void* memPtr, U32 value) +{ + ZSTD_memcpy(memPtr, &value, sizeof(value)); +} + +MEM_STATIC void MEM_write64(void* memPtr, U64 value) +{ + ZSTD_memcpy(memPtr, &value, sizeof(value)); +} + +#endif /* MEM_FORCE_MEMORY_ACCESS */ + +MEM_STATIC U32 MEM_swap32(U32 in) +{ +#if defined(_MSC_VER) /* Visual Studio */ + return _byteswap_ulong(in); +#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \ + || (defined(__clang__) && __has_builtin(__builtin_bswap32)) + return __builtin_bswap32(in); +#else + return ((in << 24) & 0xff000000 ) | + ((in << 8) & 0x00ff0000 ) | + ((in >> 8) & 0x0000ff00 ) | + ((in >> 24) & 0x000000ff ); +#endif +} + +MEM_STATIC U64 MEM_swap64(U64 in) +{ +#if defined(_MSC_VER) /* Visual Studio */ + return _byteswap_uint64(in); +#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \ + || (defined(__clang__) && __has_builtin(__builtin_bswap64)) + return __builtin_bswap64(in); +#else + return ((in << 56) & 0xff00000000000000ULL) | + ((in << 40) & 0x00ff000000000000ULL) | + ((in << 24) & 0x0000ff0000000000ULL) | + ((in << 8) & 0x000000ff00000000ULL) | + ((in >> 8) & 0x00000000ff000000ULL) | + ((in >> 24) & 0x0000000000ff0000ULL) | + ((in >> 40) & 0x000000000000ff00ULL) | + ((in >> 56) & 0x00000000000000ffULL); +#endif +} + +MEM_STATIC size_t MEM_swapST(size_t in) +{ + if (MEM_32bits()) + return (size_t)MEM_swap32((U32)in); + else + return (size_t)MEM_swap64((U64)in); +} + +/*=== Little endian r/w ===*/ + +MEM_STATIC U16 MEM_readLE16(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read16(memPtr); + else { + const BYTE* p = (const BYTE*)memPtr; + return (U16)(p[0] + (p[1]<<8)); + } +} + +MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val) +{ + if (MEM_isLittleEndian()) { + MEM_write16(memPtr, val); + } else { + BYTE* p = (BYTE*)memPtr; + p[0] = (BYTE)val; + p[1] = (BYTE)(val>>8); + } +} + +MEM_STATIC U32 MEM_readLE24(const void* memPtr) +{ + return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16); +} + +MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val) +{ + MEM_writeLE16(memPtr, (U16)val); + ((BYTE*)memPtr)[2] = (BYTE)(val>>16); +} + +MEM_STATIC U32 MEM_readLE32(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read32(memPtr); + else + return MEM_swap32(MEM_read32(memPtr)); +} + +MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32) +{ + if (MEM_isLittleEndian()) + MEM_write32(memPtr, val32); + else + MEM_write32(memPtr, MEM_swap32(val32)); +} + +MEM_STATIC U64 MEM_readLE64(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read64(memPtr); + else + return MEM_swap64(MEM_read64(memPtr)); +} + +MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64) +{ + if (MEM_isLittleEndian()) + MEM_write64(memPtr, val64); + else + MEM_write64(memPtr, MEM_swap64(val64)); +} + +MEM_STATIC size_t MEM_readLEST(const void* memPtr) +{ + if (MEM_32bits()) + return (size_t)MEM_readLE32(memPtr); + else + return (size_t)MEM_readLE64(memPtr); +} + +MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val) +{ + if (MEM_32bits()) + MEM_writeLE32(memPtr, (U32)val); + else + MEM_writeLE64(memPtr, (U64)val); +} + +/*=== Big endian r/w ===*/ + +MEM_STATIC U32 MEM_readBE32(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_swap32(MEM_read32(memPtr)); + else + return MEM_read32(memPtr); +} + +MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32) +{ + if (MEM_isLittleEndian()) + MEM_write32(memPtr, MEM_swap32(val32)); + else + MEM_write32(memPtr, val32); +} + +MEM_STATIC U64 MEM_readBE64(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_swap64(MEM_read64(memPtr)); + else + return MEM_read64(memPtr); +} + +MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64) +{ + if (MEM_isLittleEndian()) + MEM_write64(memPtr, MEM_swap64(val64)); + else + MEM_write64(memPtr, val64); +} + +MEM_STATIC size_t MEM_readBEST(const void* memPtr) +{ + if (MEM_32bits()) + return (size_t)MEM_readBE32(memPtr); + else + return (size_t)MEM_readBE64(memPtr); +} + +MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val) +{ + if (MEM_32bits()) + MEM_writeBE32(memPtr, (U32)val); + else + MEM_writeBE64(memPtr, (U64)val); +} + +/* code only tested on 32 and 64 bits systems */ +MEM_STATIC void MEM_check(void) { DEBUG_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); } + + +#if defined (__cplusplus) +} +#endif + +#endif /* MEM_H_MODULE */ +/**** ended inlining mem.h ****/ +/**** start inlining error_private.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* Note : this module is expected to remain private, do not expose it */ + +#ifndef ERROR_H_MODULE +#define ERROR_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + + +/* **************************************** +* Dependencies +******************************************/ +/**** skipping file: zstd_deps.h ****/ +/**** start inlining zstd_errors.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_ERRORS_H_398273423 +#define ZSTD_ERRORS_H_398273423 + +#if defined (__cplusplus) +extern "C" { +#endif + +/*===== dependency =====*/ +#include /* size_t */ + + +/* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */ +#ifndef ZSTDERRORLIB_VISIBILITY +# if defined(__GNUC__) && (__GNUC__ >= 4) +# define ZSTDERRORLIB_VISIBILITY __attribute__ ((visibility ("default"))) +# else +# define ZSTDERRORLIB_VISIBILITY +# endif +#endif +#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) +# define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBILITY +#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) +# define ZSTDERRORLIB_API __declspec(dllimport) ZSTDERRORLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +# define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY +#endif + +/*-********************************************* + * Error codes list + *-********************************************* + * Error codes _values_ are pinned down since v1.3.1 only. + * Therefore, don't rely on values if you may link to any version < v1.3.1. + * + * Only values < 100 are considered stable. + * + * note 1 : this API shall be used with static linking only. + * dynamic linking is not yet officially supported. + * note 2 : Prefer relying on the enum than on its value whenever possible + * This is the only supported way to use the error list < v1.3.1 + * note 3 : ZSTD_isError() is always correct, whatever the library version. + **********************************************/ +typedef enum { + ZSTD_error_no_error = 0, + ZSTD_error_GENERIC = 1, + ZSTD_error_prefix_unknown = 10, + ZSTD_error_version_unsupported = 12, + ZSTD_error_frameParameter_unsupported = 14, + ZSTD_error_frameParameter_windowTooLarge = 16, + ZSTD_error_corruption_detected = 20, + ZSTD_error_checksum_wrong = 22, + ZSTD_error_dictionary_corrupted = 30, + ZSTD_error_dictionary_wrong = 32, + ZSTD_error_dictionaryCreation_failed = 34, + ZSTD_error_parameter_unsupported = 40, + ZSTD_error_parameter_outOfBound = 42, + ZSTD_error_tableLog_tooLarge = 44, + ZSTD_error_maxSymbolValue_tooLarge = 46, + ZSTD_error_maxSymbolValue_tooSmall = 48, + ZSTD_error_stage_wrong = 60, + ZSTD_error_init_missing = 62, + ZSTD_error_memory_allocation = 64, + ZSTD_error_workSpace_tooSmall= 66, + ZSTD_error_dstSize_tooSmall = 70, + ZSTD_error_srcSize_wrong = 72, + ZSTD_error_dstBuffer_null = 74, + /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */ + ZSTD_error_frameIndex_tooLarge = 100, + ZSTD_error_seekableIO = 102, + ZSTD_error_dstBuffer_wrong = 104, + ZSTD_error_srcBuffer_wrong = 105, + ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */ +} ZSTD_ErrorCode; + +/*! ZSTD_getErrorCode() : + convert a `size_t` function result into a `ZSTD_ErrorCode` enum type, + which can be used to compare with enum list published above */ +ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult); +ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code); /**< Same as ZSTD_getErrorName, but using a `ZSTD_ErrorCode` enum argument */ + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_ERRORS_H_398273423 */ +/**** ended inlining zstd_errors.h ****/ + + +/* **************************************** +* Compiler-specific +******************************************/ +#if defined(__GNUC__) +# define ERR_STATIC static __attribute__((unused)) +#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define ERR_STATIC static inline +#elif defined(_MSC_VER) +# define ERR_STATIC static __inline +#else +# define ERR_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif + + +/*-**************************************** +* Customization (error_public.h) +******************************************/ +typedef ZSTD_ErrorCode ERR_enum; +#define PREFIX(name) ZSTD_error_##name + + +/*-**************************************** +* Error codes handling +******************************************/ +#undef ERROR /* already defined on Visual Studio */ +#define ERROR(name) ZSTD_ERROR(name) +#define ZSTD_ERROR(name) ((size_t)-PREFIX(name)) + +ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } + +ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); } + +/* check and forward error code */ +#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e +#define CHECK_F(f) { CHECK_V_F(_var_err__, f); } + + +/*-**************************************** +* Error Strings +******************************************/ + +const char* ERR_getErrorString(ERR_enum code); /* error_private.c */ + +ERR_STATIC const char* ERR_getErrorName(size_t code) +{ + return ERR_getErrorString(ERR_getErrorCode(code)); +} + +#if defined (__cplusplus) +} +#endif + +#endif /* ERROR_H_MODULE */ +/**** ended inlining error_private.h ****/ +#define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */ +/**** start inlining fse.h ****/ +/* ****************************************************************** + * FSE : Finite State Entropy codec + * Public Prototypes declaration + * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef FSE_H +#define FSE_H + + +/*-***************************************** +* Dependencies +******************************************/ +/**** skipping file: zstd_deps.h ****/ + + +/*-***************************************** +* FSE_PUBLIC_API : control library symbols visibility +******************************************/ +#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4) +# define FSE_PUBLIC_API __attribute__ ((visibility ("default"))) +#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */ +# define FSE_PUBLIC_API __declspec(dllexport) +#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1) +# define FSE_PUBLIC_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +# define FSE_PUBLIC_API +#endif + +/*------ Version ------*/ +#define FSE_VERSION_MAJOR 0 +#define FSE_VERSION_MINOR 9 +#define FSE_VERSION_RELEASE 0 + +#define FSE_LIB_VERSION FSE_VERSION_MAJOR.FSE_VERSION_MINOR.FSE_VERSION_RELEASE +#define FSE_QUOTE(str) #str +#define FSE_EXPAND_AND_QUOTE(str) FSE_QUOTE(str) +#define FSE_VERSION_STRING FSE_EXPAND_AND_QUOTE(FSE_LIB_VERSION) + +#define FSE_VERSION_NUMBER (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE) +FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */ + + +/*-**************************************** +* FSE simple functions +******************************************/ +/*! FSE_compress() : + Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'. + 'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize). + @return : size of compressed data (<= dstCapacity). + Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! + if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead. + if FSE_isError(return), compression failed (more details using FSE_getErrorName()) +*/ +FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + +/*! FSE_decompress(): + Decompress FSE data from buffer 'cSrc', of size 'cSrcSize', + into already allocated destination buffer 'dst', of size 'dstCapacity'. + @return : size of regenerated data (<= maxDstSize), + or an error code, which can be tested using FSE_isError() . + + ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!! + Why ? : making this distinction requires a header. + Header management is intentionally delegated to the user layer, which can better manage special cases. +*/ +FSE_PUBLIC_API size_t FSE_decompress(void* dst, size_t dstCapacity, + const void* cSrc, size_t cSrcSize); + + +/*-***************************************** +* Tool functions +******************************************/ +FSE_PUBLIC_API size_t FSE_compressBound(size_t size); /* maximum compressed size */ + +/* Error Management */ +FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return value is an error code */ +FSE_PUBLIC_API const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */ + + +/*-***************************************** +* FSE advanced functions +******************************************/ +/*! FSE_compress2() : + Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog' + Both parameters can be defined as '0' to mean : use default value + @return : size of compressed data + Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!! + if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression. + if FSE_isError(return), it's an error code. +*/ +FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); + + +/*-***************************************** +* FSE detailed API +******************************************/ +/*! +FSE_compress() does the following: +1. count symbol occurrence from source[] into table count[] (see hist.h) +2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog) +3. save normalized counters to memory buffer using writeNCount() +4. build encoding table 'CTable' from normalized counters +5. encode the data stream using encoding table 'CTable' + +FSE_decompress() does the following: +1. read normalized counters with readNCount() +2. build decoding table 'DTable' from normalized counters +3. decode the data stream using decoding table 'DTable' + +The following API allows targeting specific sub-functions for advanced tasks. +For example, it's possible to compress several blocks using the same 'CTable', +or to save and provide normalized distribution using external method. +*/ + +/* *** COMPRESSION *** */ + +/*! FSE_optimalTableLog(): + dynamically downsize 'tableLog' when conditions are met. + It saves CPU time, by using smaller tables, while preserving or even improving compression ratio. + @return : recommended tableLog (necessarily <= 'maxTableLog') */ +FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); + +/*! FSE_normalizeCount(): + normalize counts so that sum(count[]) == Power_of_2 (2^tableLog) + 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1). + useLowProbCount is a boolean parameter which trades off compressed size for + faster header decoding. When it is set to 1, the compressed data will be slightly + smaller. And when it is set to 0, FSE_readNCount() and FSE_buildDTable() will be + faster. If you are compressing a small amount of data (< 2 KB) then useLowProbCount=0 + is a good default, since header deserialization makes a big speed difference. + Otherwise, useLowProbCount=1 is a good default, since the speed difference is small. + @return : tableLog, + or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, + const unsigned* count, size_t srcSize, unsigned maxSymbolValue, unsigned useLowProbCount); + +/*! FSE_NCountWriteBound(): + Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'. + Typically useful for allocation purpose. */ +FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog); + +/*! FSE_writeNCount(): + Compactly save 'normalizedCounter' into 'buffer'. + @return : size of the compressed table, + or an errorCode, which can be tested using FSE_isError(). */ +FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize, + const short* normalizedCounter, + unsigned maxSymbolValue, unsigned tableLog); + +/*! Constructor and Destructor of FSE_CTable. + Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */ +typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */ +FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog); +FSE_PUBLIC_API void FSE_freeCTable (FSE_CTable* ct); + +/*! FSE_buildCTable(): + Builds `ct`, which must be already allocated, using FSE_createCTable(). + @return : 0, or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); + +/*! FSE_compress_usingCTable(): + Compress `src` using `ct` into `dst` which must be already allocated. + @return : size of compressed data (<= `dstCapacity`), + or 0 if compressed data could not fit into `dst`, + or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct); + +/*! +Tutorial : +---------- +The first step is to count all symbols. FSE_count() does this job very fast. +Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells. +'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0] +maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value) +FSE_count() will return the number of occurrence of the most frequent symbol. +This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility. +If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). + +The next step is to normalize the frequencies. +FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'. +It also guarantees a minimum of 1 to any Symbol with frequency >= 1. +You can use 'tableLog'==0 to mean "use default tableLog value". +If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(), +which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default"). + +The result of FSE_normalizeCount() will be saved into a table, +called 'normalizedCounter', which is a table of signed short. +'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells. +The return value is tableLog if everything proceeded as expected. +It is 0 if there is a single symbol within distribution. +If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()). + +'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount(). +'buffer' must be already allocated. +For guaranteed success, buffer size must be at least FSE_headerBound(). +The result of the function is the number of bytes written into 'buffer'. +If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small). + +'normalizedCounter' can then be used to create the compression table 'CTable'. +The space required by 'CTable' must be already allocated, using FSE_createCTable(). +You can then use FSE_buildCTable() to fill 'CTable'. +If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()). + +'CTable' can then be used to compress 'src', with FSE_compress_usingCTable(). +Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize' +The function returns the size of compressed data (without header), necessarily <= `dstCapacity`. +If it returns '0', compressed data could not fit into 'dst'. +If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). +*/ + + +/* *** DECOMPRESSION *** */ + +/*! FSE_readNCount(): + Read compactly saved 'normalizedCounter' from 'rBuffer'. + @return : size read from 'rBuffer', + or an errorCode, which can be tested using FSE_isError(). + maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */ +FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter, + unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, + const void* rBuffer, size_t rBuffSize); + +/*! FSE_readNCount_bmi2(): + * Same as FSE_readNCount() but pass bmi2=1 when your CPU supports BMI2 and 0 otherwise. + */ +FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter, + unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, + const void* rBuffer, size_t rBuffSize, int bmi2); + +/*! Constructor and Destructor of FSE_DTable. + Note that its size depends on 'tableLog' */ +typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ +FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog); +FSE_PUBLIC_API void FSE_freeDTable(FSE_DTable* dt); + +/*! FSE_buildDTable(): + Builds 'dt', which must be already allocated, using FSE_createDTable(). + return : 0, or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); + +/*! FSE_decompress_usingDTable(): + Decompress compressed source `cSrc` of size `cSrcSize` using `dt` + into `dst` which must be already allocated. + @return : size of regenerated data (necessarily <= `dstCapacity`), + or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt); + +/*! +Tutorial : +---------- +(Note : these functions only decompress FSE-compressed blocks. + If block is uncompressed, use memcpy() instead + If block is a single repeated byte, use memset() instead ) + +The first step is to obtain the normalized frequencies of symbols. +This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount(). +'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short. +In practice, that means it's necessary to know 'maxSymbolValue' beforehand, +or size the table to handle worst case situations (typically 256). +FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'. +The result of FSE_readNCount() is the number of bytes read from 'rBuffer'. +Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that. +If there is an error, the function will return an error code, which can be tested using FSE_isError(). + +The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'. +This is performed by the function FSE_buildDTable(). +The space required by 'FSE_DTable' must be already allocated using FSE_createDTable(). +If there is an error, the function will return an error code, which can be tested using FSE_isError(). + +`FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable(). +`cSrcSize` must be strictly correct, otherwise decompression will fail. +FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`). +If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small) +*/ + +#endif /* FSE_H */ + +#if defined(FSE_STATIC_LINKING_ONLY) && !defined(FSE_H_FSE_STATIC_LINKING_ONLY) +#define FSE_H_FSE_STATIC_LINKING_ONLY + +/* *** Dependency *** */ +/**** start inlining bitstream.h ****/ +/* ****************************************************************** + * bitstream + * Part of FSE library + * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ +#ifndef BITSTREAM_H_MODULE +#define BITSTREAM_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif +/* +* This API consists of small unitary functions, which must be inlined for best performance. +* Since link-time-optimization is not available for all compilers, +* these functions are defined into a .h to be included. +*/ + +/*-**************************************** +* Dependencies +******************************************/ +/**** skipping file: mem.h ****/ +/**** skipping file: compiler.h ****/ +/**** skipping file: debug.h ****/ +/**** skipping file: error_private.h ****/ + + +/*========================================= +* Target specific +=========================================*/ +#ifndef ZSTD_NO_INTRINSICS +# if defined(__BMI__) && defined(__GNUC__) +# include /* support for bextr (experimental) */ +# elif defined(__ICCARM__) +# include +# endif +#endif + +#define STREAM_ACCUMULATOR_MIN_32 25 +#define STREAM_ACCUMULATOR_MIN_64 57 +#define STREAM_ACCUMULATOR_MIN ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64)) + + +/*-****************************************** +* bitStream encoding API (write forward) +********************************************/ +/* bitStream can mix input from multiple sources. + * A critical property of these streams is that they encode and decode in **reverse** direction. + * So the first bit sequence you add will be the last to be read, like a LIFO stack. + */ +typedef struct { + size_t bitContainer; + unsigned bitPos; + char* startPtr; + char* ptr; + char* endPtr; +} BIT_CStream_t; + +MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity); +MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits); +MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC); +MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC); + +/* Start with initCStream, providing the size of buffer to write into. +* bitStream will never write outside of this buffer. +* `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code. +* +* bits are first added to a local register. +* Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems. +* Writing data into memory is an explicit operation, performed by the flushBits function. +* Hence keep track how many bits are potentially stored into local register to avoid register overflow. +* After a flushBits, a maximum of 7 bits might still be stored into local register. +* +* Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers. +* +* Last operation is to close the bitStream. +* The function returns the final size of CStream in bytes. +* If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable) +*/ + + +/*-******************************************** +* bitStream decoding API (read backward) +**********************************************/ +typedef struct { + size_t bitContainer; + unsigned bitsConsumed; + const char* ptr; + const char* start; + const char* limitPtr; +} BIT_DStream_t; + +typedef enum { BIT_DStream_unfinished = 0, + BIT_DStream_endOfBuffer = 1, + BIT_DStream_completed = 2, + BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */ + /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */ + +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize); +MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits); +MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD); +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD); + + +/* Start by invoking BIT_initDStream(). +* A chunk of the bitStream is then stored into a local register. +* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). +* You can then retrieve bitFields stored into the local register, **in reverse order**. +* Local register is explicitly reloaded from memory by the BIT_reloadDStream() method. +* A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished. +* Otherwise, it can be less than that, so proceed accordingly. +* Checking if DStream has reached its end can be performed with BIT_endOfDStream(). +*/ + + +/*-**************************************** +* unsafe API +******************************************/ +MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits); +/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */ + +MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC); +/* unsafe version; does not check buffer overflow */ + +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); +/* faster, but works only if nbBits >= 1 */ + + + +/*-************************************************************** +* Internal functions +****************************************************************/ +MEM_STATIC unsigned BIT_highbit32 (U32 val) +{ + assert(val != 0); + { +# if defined(_MSC_VER) /* Visual */ +# if STATIC_BMI2 == 1 + return _lzcnt_u32(val) ^ 31; +# else + unsigned long r = 0; + return _BitScanReverse(&r, val) ? (unsigned)r : 0; +# endif +# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ + return __builtin_clz (val) ^ 31; +# elif defined(__ICCARM__) /* IAR Intrinsic */ + return 31 - __CLZ(val); +# else /* Software version */ + static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, + 11, 14, 16, 18, 22, 25, 3, 30, + 8, 12, 20, 28, 15, 17, 24, 7, + 19, 27, 23, 6, 26, 5, 4, 31 }; + U32 v = val; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; +# endif + } +} + +/*===== Local Constants =====*/ +static const unsigned BIT_mask[] = { + 0, 1, 3, 7, 0xF, 0x1F, + 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, + 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, + 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, + 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF, + 0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */ +#define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0])) + +/*-************************************************************** +* bitStream encoding +****************************************************************/ +/*! BIT_initCStream() : + * `dstCapacity` must be > sizeof(size_t) + * @return : 0 if success, + * otherwise an error code (can be tested using ERR_isError()) */ +MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, + void* startPtr, size_t dstCapacity) +{ + bitC->bitContainer = 0; + bitC->bitPos = 0; + bitC->startPtr = (char*)startPtr; + bitC->ptr = bitC->startPtr; + bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer); + if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall); + return 0; +} + +/*! BIT_addBits() : + * can add up to 31 bits into `bitC`. + * Note : does not check for register overflow ! */ +MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, + size_t value, unsigned nbBits) +{ + DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32); + assert(nbBits < BIT_MASK_SIZE); + assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); + bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos; + bitC->bitPos += nbBits; +} + +/*! BIT_addBitsFast() : + * works only if `value` is _clean_, + * meaning all high bits above nbBits are 0 */ +MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, + size_t value, unsigned nbBits) +{ + assert((value>>nbBits) == 0); + assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); + bitC->bitContainer |= value << bitC->bitPos; + bitC->bitPos += nbBits; +} + +/*! BIT_flushBitsFast() : + * assumption : bitContainer has not overflowed + * unsafe version; does not check buffer overflow */ +MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC) +{ + size_t const nbBytes = bitC->bitPos >> 3; + assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); + assert(bitC->ptr <= bitC->endPtr); + MEM_writeLEST(bitC->ptr, bitC->bitContainer); + bitC->ptr += nbBytes; + bitC->bitPos &= 7; + bitC->bitContainer >>= nbBytes*8; +} + +/*! BIT_flushBits() : + * assumption : bitContainer has not overflowed + * safe version; check for buffer overflow, and prevents it. + * note : does not signal buffer overflow. + * overflow will be revealed later on using BIT_closeCStream() */ +MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC) +{ + size_t const nbBytes = bitC->bitPos >> 3; + assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); + assert(bitC->ptr <= bitC->endPtr); + MEM_writeLEST(bitC->ptr, bitC->bitContainer); + bitC->ptr += nbBytes; + if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr; + bitC->bitPos &= 7; + bitC->bitContainer >>= nbBytes*8; +} + +/*! BIT_closeCStream() : + * @return : size of CStream, in bytes, + * or 0 if it could not fit into dstBuffer */ +MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC) +{ + BIT_addBitsFast(bitC, 1, 1); /* endMark */ + BIT_flushBits(bitC); + if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */ + return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0); +} + + +/*-******************************************************** +* bitStream decoding +**********************************************************/ +/*! BIT_initDStream() : + * Initialize a BIT_DStream_t. + * `bitD` : a pointer to an already allocated BIT_DStream_t structure. + * `srcSize` must be the *exact* size of the bitStream, in bytes. + * @return : size of stream (== srcSize), or an errorCode if a problem is detected + */ +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) +{ + if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } + + bitD->start = (const char*)srcBuffer; + bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer); + + if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */ + bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer); + bitD->bitContainer = MEM_readLEST(bitD->ptr); + { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; + bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */ + if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ } + } else { + bitD->ptr = bitD->start; + bitD->bitContainer = *(const BYTE*)(bitD->start); + switch(srcSize) + { + case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16); + /* fall-through */ + + case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24); + /* fall-through */ + + case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32); + /* fall-through */ + + case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24; + /* fall-through */ + + case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16; + /* fall-through */ + + case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8; + /* fall-through */ + + default: break; + } + { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; + bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; + if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */ + } + bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8; + } + + return srcSize; +} + +MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start) +{ + return bitContainer >> start; +} + +MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) +{ + U32 const regMask = sizeof(bitContainer)*8 - 1; + /* if start > regMask, bitstream is corrupted, and result is undefined */ + assert(nbBits < BIT_MASK_SIZE); + return (bitContainer >> (start & regMask)) & BIT_mask[nbBits]; +} + +MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) +{ +#if defined(STATIC_BMI2) && STATIC_BMI2 == 1 + return _bzhi_u64(bitContainer, nbBits); +#else + assert(nbBits < BIT_MASK_SIZE); + return bitContainer & BIT_mask[nbBits]; +#endif +} + +/*! BIT_lookBits() : + * Provides next n bits from local register. + * local register is not modified. + * On 32-bits, maxNbBits==24. + * On 64-bits, maxNbBits==56. + * @return : value extracted */ +MEM_STATIC FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) +{ + /* arbitrate between double-shift and shift+mask */ +#if 1 + /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8, + * bitstream is likely corrupted, and result is undefined */ + return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits); +#else + /* this code path is slower on my os-x laptop */ + U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; + return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask); +#endif +} + +/*! BIT_lookBitsFast() : + * unsafe version; only works if nbBits >= 1 */ +MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits) +{ + U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; + assert(nbBits >= 1); + return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask); +} + +MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) +{ + bitD->bitsConsumed += nbBits; +} + +/*! BIT_readBits() : + * Read (consume) next n bits from local register and update. + * Pay attention to not read more than nbBits contained into local register. + * @return : extracted value. */ +MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits) +{ + size_t const value = BIT_lookBits(bitD, nbBits); + BIT_skipBits(bitD, nbBits); + return value; +} + +/*! BIT_readBitsFast() : + * unsafe version; only works only if nbBits >= 1 */ +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits) +{ + size_t const value = BIT_lookBitsFast(bitD, nbBits); + assert(nbBits >= 1); + BIT_skipBits(bitD, nbBits); + return value; +} + +/*! BIT_reloadDStreamFast() : + * Similar to BIT_reloadDStream(), but with two differences: + * 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold! + * 2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this + * point you must use BIT_reloadDStream() to reload. + */ +MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD) +{ + if (UNLIKELY(bitD->ptr < bitD->limitPtr)) + return BIT_DStream_overflow; + assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8); + bitD->ptr -= bitD->bitsConsumed >> 3; + bitD->bitsConsumed &= 7; + bitD->bitContainer = MEM_readLEST(bitD->ptr); + return BIT_DStream_unfinished; +} + +/*! BIT_reloadDStream() : + * Refill `bitD` from buffer previously set in BIT_initDStream() . + * This function is safe, it guarantees it will not read beyond src buffer. + * @return : status of `BIT_DStream_t` internal register. + * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */ +MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) +{ + if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */ + return BIT_DStream_overflow; + + if (bitD->ptr >= bitD->limitPtr) { + return BIT_reloadDStreamFast(bitD); + } + if (bitD->ptr == bitD->start) { + if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; + return BIT_DStream_completed; + } + /* start < ptr < limitPtr */ + { U32 nbBytes = bitD->bitsConsumed >> 3; + BIT_DStream_status result = BIT_DStream_unfinished; + if (bitD->ptr - nbBytes < bitD->start) { + nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ + result = BIT_DStream_endOfBuffer; + } + bitD->ptr -= nbBytes; + bitD->bitsConsumed -= nbBytes*8; + bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */ + return result; + } +} + +/*! BIT_endOfDStream() : + * @return : 1 if DStream has _exactly_ reached its end (all bits consumed). + */ +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream) +{ + return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8)); +} + +#if defined (__cplusplus) +} +#endif + +#endif /* BITSTREAM_H_MODULE */ +/**** ended inlining bitstream.h ****/ + + +/* ***************************************** +* Static allocation +*******************************************/ +/* FSE buffer bounds */ +#define FSE_NCOUNTBOUND 512 +#define FSE_BLOCKBOUND(size) ((size) + ((size)>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */) +#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ + +/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */ +#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<((maxTableLog)-1)) + (((maxSymbolValue)+1)*2)) +#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<(maxTableLog))) + +/* or use the size to malloc() space directly. Pay attention to alignment restrictions though */ +#define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue) (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable)) +#define FSE_DTABLE_SIZE(maxTableLog) (FSE_DTABLE_SIZE_U32(maxTableLog) * sizeof(FSE_DTable)) + + +/* ***************************************** + * FSE advanced API + ***************************************** */ + +unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus); +/**< same as FSE_optimalTableLog(), which used `minus==2` */ + +/* FSE_compress_wksp() : + * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). + * FSE_COMPRESS_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable. + */ +#define FSE_COMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) ) +size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); + +size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits); +/**< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */ + +size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue); +/**< build a fake FSE_CTable, designed to compress always the same symbolValue */ + +/* FSE_buildCTable_wksp() : + * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). + * `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`. + */ +#define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (maxSymbolValue + 2 + (1ull << (tableLog - 2))) +#define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)) +size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); + +#define FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) (sizeof(short) * (maxSymbolValue + 1) + (1ULL << maxTableLog) + 8) +#define FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ((FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) + sizeof(unsigned) - 1) / sizeof(unsigned)) +FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); +/**< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */ + +size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits); +/**< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */ + +size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue); +/**< build a fake FSE_DTable, designed to always generate the same symbolValue */ + +#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue)) +#define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned)) +size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize); +/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */ + +size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2); +/**< Same as FSE_decompress_wksp() but with dynamic BMI2 support. Pass 1 if your CPU supports BMI2 or 0 if it doesn't. */ + +typedef enum { + FSE_repeat_none, /**< Cannot use the previous table */ + FSE_repeat_check, /**< Can use the previous table but it must be checked */ + FSE_repeat_valid /**< Can use the previous table and it is assumed to be valid */ + } FSE_repeat; + +/* ***************************************** +* FSE symbol compression API +*******************************************/ +/*! + This API consists of small unitary functions, which highly benefit from being inlined. + Hence their body are included in next section. +*/ +typedef struct { + ptrdiff_t value; + const void* stateTable; + const void* symbolTT; + unsigned stateLog; +} FSE_CState_t; + +static void FSE_initCState(FSE_CState_t* CStatePtr, const FSE_CTable* ct); + +static void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* CStatePtr, unsigned symbol); + +static void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* CStatePtr); + +/**< +These functions are inner components of FSE_compress_usingCTable(). +They allow the creation of custom streams, mixing multiple tables and bit sources. + +A key property to keep in mind is that encoding and decoding are done **in reverse direction**. +So the first symbol you will encode is the last you will decode, like a LIFO stack. + +You will need a few variables to track your CStream. They are : + +FSE_CTable ct; // Provided by FSE_buildCTable() +BIT_CStream_t bitStream; // bitStream tracking structure +FSE_CState_t state; // State tracking structure (can have several) + + +The first thing to do is to init bitStream and state. + size_t errorCode = BIT_initCStream(&bitStream, dstBuffer, maxDstSize); + FSE_initCState(&state, ct); + +Note that BIT_initCStream() can produce an error code, so its result should be tested, using FSE_isError(); +You can then encode your input data, byte after byte. +FSE_encodeSymbol() outputs a maximum of 'tableLog' bits at a time. +Remember decoding will be done in reverse direction. + FSE_encodeByte(&bitStream, &state, symbol); + +At any time, you can also add any bit sequence. +Note : maximum allowed nbBits is 25, for compatibility with 32-bits decoders + BIT_addBits(&bitStream, bitField, nbBits); + +The above methods don't commit data to memory, they just store it into local register, for speed. +Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). +Writing data to memory is a manual operation, performed by the flushBits function. + BIT_flushBits(&bitStream); + +Your last FSE encoding operation shall be to flush your last state value(s). + FSE_flushState(&bitStream, &state); + +Finally, you must close the bitStream. +The function returns the size of CStream in bytes. +If data couldn't fit into dstBuffer, it will return a 0 ( == not compressible) +If there is an error, it returns an errorCode (which can be tested using FSE_isError()). + size_t size = BIT_closeCStream(&bitStream); +*/ + + +/* ***************************************** +* FSE symbol decompression API +*******************************************/ +typedef struct { + size_t state; + const void* table; /* precise table may vary, depending on U16 */ +} FSE_DState_t; + + +static void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt); + +static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD); + +static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr); + +/**< +Let's now decompose FSE_decompress_usingDTable() into its unitary components. +You will decode FSE-encoded symbols from the bitStream, +and also any other bitFields you put in, **in reverse order**. + +You will need a few variables to track your bitStream. They are : + +BIT_DStream_t DStream; // Stream context +FSE_DState_t DState; // State context. Multiple ones are possible +FSE_DTable* DTablePtr; // Decoding table, provided by FSE_buildDTable() + +The first thing to do is to init the bitStream. + errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize); + +You should then retrieve your initial state(s) +(in reverse flushing order if you have several ones) : + errorCode = FSE_initDState(&DState, &DStream, DTablePtr); + +You can then decode your data, symbol after symbol. +For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'. +Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out). + unsigned char symbol = FSE_decodeSymbol(&DState, &DStream); + +You can retrieve any bitfield you eventually stored into the bitStream (in reverse order) +Note : maximum allowed nbBits is 25, for 32-bits compatibility + size_t bitField = BIT_readBits(&DStream, nbBits); + +All above operations only read from local register (which size depends on size_t). +Refueling the register from memory is manually performed by the reload method. + endSignal = FSE_reloadDStream(&DStream); + +BIT_reloadDStream() result tells if there is still some more data to read from DStream. +BIT_DStream_unfinished : there is still some data left into the DStream. +BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled. +BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed. +BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted. + +When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop, +to properly detect the exact end of stream. +After each decoded symbol, check if DStream is fully consumed using this simple test : + BIT_reloadDStream(&DStream) >= BIT_DStream_completed + +When it's done, verify decompression is fully completed, by checking both DStream and the relevant states. +Checking if DStream has reached its end is performed by : + BIT_endOfDStream(&DStream); +Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible. + FSE_endOfDState(&DState); +*/ + + +/* ***************************************** +* FSE unsafe API +*******************************************/ +static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD); +/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */ + + +/* ***************************************** +* Implementation of inlined functions +*******************************************/ +typedef struct { + int deltaFindState; + U32 deltaNbBits; +} FSE_symbolCompressionTransform; /* total 8 bytes */ + +MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct) +{ + const void* ptr = ct; + const U16* u16ptr = (const U16*) ptr; + const U32 tableLog = MEM_read16(ptr); + statePtr->value = (ptrdiff_t)1<stateTable = u16ptr+2; + statePtr->symbolTT = ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1); + statePtr->stateLog = tableLog; +} + + +/*! FSE_initCState2() : +* Same as FSE_initCState(), but the first symbol to include (which will be the last to be read) +* uses the smallest state value possible, saving the cost of this symbol */ +MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U32 symbol) +{ + FSE_initCState(statePtr, ct); + { const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; + const U16* stateTable = (const U16*)(statePtr->stateTable); + U32 nbBitsOut = (U32)((symbolTT.deltaNbBits + (1<<15)) >> 16); + statePtr->value = (nbBitsOut << 16) - symbolTT.deltaNbBits; + statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; + } +} + +MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, unsigned symbol) +{ + FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; + const U16* const stateTable = (const U16*)(statePtr->stateTable); + U32 const nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16); + BIT_addBits(bitC, statePtr->value, nbBitsOut); + statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; +} + +MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr) +{ + BIT_addBits(bitC, statePtr->value, statePtr->stateLog); + BIT_flushBits(bitC); +} + + +/* FSE_getMaxNbBits() : + * Approximate maximum cost of a symbol, in bits. + * Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2) + * note 1 : assume symbolValue is valid (<= maxSymbolValue) + * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */ +MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue) +{ + const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr; + return (symbolTT[symbolValue].deltaNbBits + ((1<<16)-1)) >> 16; +} + +/* FSE_bitCost() : + * Approximate symbol cost, as fractional value, using fixed-point format (accuracyLog fractional bits) + * note 1 : assume symbolValue is valid (<= maxSymbolValue) + * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */ +MEM_STATIC U32 FSE_bitCost(const void* symbolTTPtr, U32 tableLog, U32 symbolValue, U32 accuracyLog) +{ + const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr; + U32 const minNbBits = symbolTT[symbolValue].deltaNbBits >> 16; + U32 const threshold = (minNbBits+1) << 16; + assert(tableLog < 16); + assert(accuracyLog < 31-tableLog); /* ensure enough room for renormalization double shift */ + { U32 const tableSize = 1 << tableLog; + U32 const deltaFromThreshold = threshold - (symbolTT[symbolValue].deltaNbBits + tableSize); + U32 const normalizedDeltaFromThreshold = (deltaFromThreshold << accuracyLog) >> tableLog; /* linear interpolation (very approximate) */ + U32 const bitMultiplier = 1 << accuracyLog; + assert(symbolTT[symbolValue].deltaNbBits + tableSize <= threshold); + assert(normalizedDeltaFromThreshold <= bitMultiplier); + return (minNbBits+1)*bitMultiplier - normalizedDeltaFromThreshold; + } +} + + +/* ====== Decompression ====== */ + +typedef struct { + U16 tableLog; + U16 fastMode; +} FSE_DTableHeader; /* sizeof U32 */ + +typedef struct +{ + unsigned short newState; + unsigned char symbol; + unsigned char nbBits; +} FSE_decode_t; /* size == U32 */ + +MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt) +{ + const void* ptr = dt; + const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr; + DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog); + BIT_reloadDStream(bitD); + DStatePtr->table = dt + 1; +} + +MEM_STATIC BYTE FSE_peekSymbol(const FSE_DState_t* DStatePtr) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + return DInfo.symbol; +} + +MEM_STATIC void FSE_updateState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + size_t const lowBits = BIT_readBits(bitD, nbBits); + DStatePtr->state = DInfo.newState + lowBits; +} + +MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + BYTE const symbol = DInfo.symbol; + size_t const lowBits = BIT_readBits(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +/*! FSE_decodeSymbolFast() : + unsafe, only works if no symbol has a probability > 50% */ +MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + BYTE const symbol = DInfo.symbol; + size_t const lowBits = BIT_readBitsFast(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) +{ + return DStatePtr->state == 0; +} + + + +#ifndef FSE_COMMONDEFS_ONLY + +/* ************************************************************** +* Tuning parameters +****************************************************************/ +/*!MEMORY_USAGE : +* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) +* Increasing memory usage improves compression ratio +* Reduced memory usage can improve speed, due to cache effect +* Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ +#ifndef FSE_MAX_MEMORY_USAGE +# define FSE_MAX_MEMORY_USAGE 14 +#endif +#ifndef FSE_DEFAULT_MEMORY_USAGE +# define FSE_DEFAULT_MEMORY_USAGE 13 +#endif +#if (FSE_DEFAULT_MEMORY_USAGE > FSE_MAX_MEMORY_USAGE) +# error "FSE_DEFAULT_MEMORY_USAGE must be <= FSE_MAX_MEMORY_USAGE" +#endif + +/*!FSE_MAX_SYMBOL_VALUE : +* Maximum symbol value authorized. +* Required for proper stack allocation */ +#ifndef FSE_MAX_SYMBOL_VALUE +# define FSE_MAX_SYMBOL_VALUE 255 +#endif + +/* ************************************************************** +* template functions type & suffix +****************************************************************/ +#define FSE_FUNCTION_TYPE BYTE +#define FSE_FUNCTION_EXTENSION +#define FSE_DECODE_TYPE FSE_decode_t + + +#endif /* !FSE_COMMONDEFS_ONLY */ + + +/* *************************************************************** +* Constants +*****************************************************************/ +#define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2) +#define FSE_MAX_TABLESIZE (1U< FSE_TABLELOG_ABSOLUTE_MAX +# error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported" +#endif + +#define FSE_TABLESTEP(tableSize) (((tableSize)>>1) + ((tableSize)>>3) + 3) + + +#endif /* FSE_STATIC_LINKING_ONLY */ + + +#if defined (__cplusplus) +} +#endif +/**** ended inlining fse.h ****/ +#define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */ +/**** start inlining huf.h ****/ +/* ****************************************************************** + * huff0 huffman codec, + * part of Finite State Entropy library + * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef HUF_H_298734234 +#define HUF_H_298734234 + +/* *** Dependencies *** */ +/**** skipping file: zstd_deps.h ****/ + + +/* *** library symbols visibility *** */ +/* Note : when linking with -fvisibility=hidden on gcc, or by default on Visual, + * HUF symbols remain "private" (internal symbols for library only). + * Set macro FSE_DLL_EXPORT to 1 if you want HUF symbols visible on DLL interface */ +#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4) +# define HUF_PUBLIC_API __attribute__ ((visibility ("default"))) +#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */ +# define HUF_PUBLIC_API __declspec(dllexport) +#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1) +# define HUF_PUBLIC_API __declspec(dllimport) /* not required, just to generate faster code (saves a function pointer load from IAT and an indirect jump) */ +#else +# define HUF_PUBLIC_API +#endif + + +/* ========================== */ +/* *** simple functions *** */ +/* ========================== */ + +/** HUF_compress() : + * Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'. + * 'dst' buffer must be already allocated. + * Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize). + * `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB. + * @return : size of compressed data (<= `dstCapacity`). + * Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! + * if HUF_isError(return), compression failed (more details using HUF_getErrorName()) + */ +HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + +/** HUF_decompress() : + * Decompress HUF data from buffer 'cSrc', of size 'cSrcSize', + * into already allocated buffer 'dst', of minimum size 'dstSize'. + * `originalSize` : **must** be the ***exact*** size of original (uncompressed) data. + * Note : in contrast with FSE, HUF_decompress can regenerate + * RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data, + * because it knows size to regenerate (originalSize). + * @return : size of regenerated data (== originalSize), + * or an error code, which can be tested using HUF_isError() + */ +HUF_PUBLIC_API size_t HUF_decompress(void* dst, size_t originalSize, + const void* cSrc, size_t cSrcSize); + + +/* *** Tool functions *** */ +#define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */ +HUF_PUBLIC_API size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */ + +/* Error Management */ +HUF_PUBLIC_API unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */ +HUF_PUBLIC_API const char* HUF_getErrorName(size_t code); /**< provides error code string (useful for debugging) */ + + +/* *** Advanced function *** */ + +/** HUF_compress2() : + * Same as HUF_compress(), but offers control over `maxSymbolValue` and `tableLog`. + * `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX . + * `tableLog` must be `<= HUF_TABLELOG_MAX` . */ +HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned tableLog); + +/** HUF_compress4X_wksp() : + * Same as HUF_compress2(), but uses externally allocated `workSpace`. + * `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */ +#define HUF_WORKSPACE_SIZE ((6 << 10) + 256) +#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32)) +HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned tableLog, + void* workSpace, size_t wkspSize); + +#endif /* HUF_H_298734234 */ + +/* ****************************************************************** + * WARNING !! + * The following section contains advanced and experimental definitions + * which shall never be used in the context of a dynamic library, + * because they are not guaranteed to remain stable in the future. + * Only consider them in association with static linking. + * *****************************************************************/ +#if defined(HUF_STATIC_LINKING_ONLY) && !defined(HUF_H_HUF_STATIC_LINKING_ONLY) +#define HUF_H_HUF_STATIC_LINKING_ONLY + +/* *** Dependencies *** */ +/**** skipping file: mem.h ****/ +#define FSE_STATIC_LINKING_ONLY +/**** skipping file: fse.h ****/ + + +/* *** Constants *** */ +#define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ +#define HUF_TABLELOG_DEFAULT 11 /* default tableLog value when none specified */ +#define HUF_SYMBOLVALUE_MAX 255 + +#define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ +#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX) +# error "HUF_TABLELOG_MAX is too large !" +#endif + + +/* **************************************** +* Static allocation +******************************************/ +/* HUF buffer bounds */ +#define HUF_CTABLEBOUND 129 +#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8) /* only true when incompressible is pre-filtered with fast heuristic */ +#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ + +/* static allocation of HUF's Compression Table */ +/* this is a private definition, just exposed for allocation and strict aliasing purpose. never EVER access its members directly */ +struct HUF_CElt_s { + U16 val; + BYTE nbBits; +}; /* typedef'd to HUF_CElt */ +typedef struct HUF_CElt_s HUF_CElt; /* consider it an incomplete type */ +#define HUF_CTABLE_SIZE_U32(maxSymbolValue) ((maxSymbolValue)+1) /* Use tables of U32, for proper alignment */ +#define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32)) +#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \ + HUF_CElt name[HUF_CTABLE_SIZE_U32(maxSymbolValue)] /* no final ; */ + +/* static allocation of HUF's DTable */ +typedef U32 HUF_DTable; +#define HUF_DTABLE_SIZE(maxTableLog) (1 + (1<<(maxTableLog))) +#define HUF_CREATE_STATIC_DTABLEX1(DTable, maxTableLog) \ + HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1) * 0x01000001) } +#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \ + HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog) * 0x01000001) } + + +/* **************************************** +* Advanced decompression functions +******************************************/ +size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ +#endif + +size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< decodes RLE and uncompressed */ +size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */ +size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< considers RLE and uncompressed as errors */ +size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ +size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */ +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ +size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */ +#endif + + +/* **************************************** + * HUF detailed API + * ****************************************/ + +/*! HUF_compress() does the following: + * 1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h") + * 2. (optional) refine tableLog using HUF_optimalTableLog() + * 3. build Huffman table from count using HUF_buildCTable() + * 4. save Huffman table to memory buffer using HUF_writeCTable() + * 5. encode the data stream using HUF_compress4X_usingCTable() + * + * The following API allows targeting specific sub-functions for advanced tasks. + * For example, it's possible to compress several blocks using the same 'CTable', + * or to save and regenerate 'CTable' using external methods. + */ +unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); +size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */ +size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog); +size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); +size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); +int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); + +typedef enum { + HUF_repeat_none, /**< Cannot use the previous table */ + HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */ + HUF_repeat_valid /**< Can use the previous table and it is assumed to be valid */ + } HUF_repeat; +/** HUF_compress4X_repeat() : + * Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. + * If it uses hufTable it does not modify hufTable or repeat. + * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. + * If preferRepeat then the old table will always be used if valid. */ +size_t HUF_compress4X_repeat(void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned tableLog, + void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); + +/** HUF_buildCTable_wksp() : + * Same as HUF_buildCTable(), but using externally allocated scratch buffer. + * `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE. + */ +#define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1) +#define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned)) +size_t HUF_buildCTable_wksp (HUF_CElt* tree, + const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, + void* workSpace, size_t wkspSize); + +/*! HUF_readStats() : + * Read compact Huffman tree, saved by HUF_writeCTable(). + * `huffWeight` is destination buffer. + * @return : size read from `src` , or an error Code . + * Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */ +size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, + U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize); + +/*! HUF_readStats_wksp() : + * Same as HUF_readStats() but takes an external workspace which must be + * 4-byte aligned and its size must be >= HUF_READ_STATS_WORKSPACE_SIZE. + * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0. + */ +#define HUF_READ_STATS_WORKSPACE_SIZE_U32 FSE_DECOMPRESS_WKSP_SIZE_U32(6, HUF_TABLELOG_MAX-1) +#define HUF_READ_STATS_WORKSPACE_SIZE (HUF_READ_STATS_WORKSPACE_SIZE_U32 * sizeof(unsigned)) +size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, + U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workspace, size_t wkspSize, + int bmi2); + +/** HUF_readCTable() : + * Loading a CTable saved with HUF_writeCTable() */ +size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights); + +/** HUF_getNbBits() : + * Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX + * Note 1 : is not inlined, as HUF_CElt definition is private + * Note 2 : const void* used, so that it can provide a statically allocated table as argument (which uses type U32) */ +U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue); + +/* + * HUF_decompress() does the following: + * 1. select the decompression algorithm (X1, X2) based on pre-computed heuristics + * 2. build Huffman table from save, using HUF_readDTableX?() + * 3. decode 1 or 4 segments in parallel using HUF_decompress?X?_usingDTable() + */ + +/** HUF_selectDecoder() : + * Tells which decoder is likely to decode faster, + * based on a set of pre-computed metrics. + * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 . + * Assumption : 0 < dstSize <= 128 KB */ +U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize); + +/** + * The minimum workspace size for the `workSpace` used in + * HUF_readDTableX1_wksp() and HUF_readDTableX2_wksp(). + * + * The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when + * HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15. + * Buffer overflow errors may potentially occur if code modifications result in + * a required workspace size greater than that specified in the following + * macro. + */ +#define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10) +#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32)) + +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize); +size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); +#endif +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize); +size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); +#endif + +size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#endif +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#endif + + +/* ====================== */ +/* single stream variants */ +/* ====================== */ + +size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); +size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ +size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); +/** HUF_compress1X_repeat() : + * Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. + * If it uses hufTable it does not modify hufTable or repeat. + * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. + * If preferRepeat then the old table will always be used if valid. */ +size_t HUF_compress1X_repeat(void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned tableLog, + void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); + +size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */ +#endif + +size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); +size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ +size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */ +#endif +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ +size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */ +#endif + +size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */ +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#endif +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#endif + +/* BMI2 variants. + * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0. + */ +size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); +#endif +size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); +size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2); +#endif + +#endif /* HUF_STATIC_LINKING_ONLY */ + +#if defined (__cplusplus) +} +#endif +/**** ended inlining huf.h ****/ + + +/*=== Version ===*/ +unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; } + + +/*=== Error Management ===*/ +unsigned FSE_isError(size_t code) { return ERR_isError(code); } +const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); } + +unsigned HUF_isError(size_t code) { return ERR_isError(code); } +const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); } + + +/*-************************************************************** +* FSE NCount encoding-decoding +****************************************************************/ +static U32 FSE_ctz(U32 val) +{ + assert(val != 0); + { +# if defined(_MSC_VER) /* Visual */ + unsigned long r=0; + return _BitScanForward(&r, val) ? (unsigned)r : 0; +# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ + return __builtin_ctz(val); +# elif defined(__ICCARM__) /* IAR Intrinsic */ + return __CTZ(val); +# else /* Software version */ + U32 count = 0; + while ((val & 1) == 0) { + val >>= 1; + ++count; + } + return count; +# endif + } +} + +FORCE_INLINE_TEMPLATE +size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + const BYTE* const istart = (const BYTE*) headerBuffer; + const BYTE* const iend = istart + hbSize; + const BYTE* ip = istart; + int nbBits; + int remaining; + int threshold; + U32 bitStream; + int bitCount; + unsigned charnum = 0; + unsigned const maxSV1 = *maxSVPtr + 1; + int previous0 = 0; + + if (hbSize < 8) { + /* This function only works when hbSize >= 8 */ + char buffer[8] = {0}; + ZSTD_memcpy(buffer, headerBuffer, hbSize); + { size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr, + buffer, sizeof(buffer)); + if (FSE_isError(countSize)) return countSize; + if (countSize > hbSize) return ERROR(corruption_detected); + return countSize; + } } + assert(hbSize >= 8); + + /* init */ + ZSTD_memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */ + bitStream = MEM_readLE32(ip); + nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ + if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge); + bitStream >>= 4; + bitCount = 4; + *tableLogPtr = nbBits; + remaining = (1<> 1; + while (repeats >= 12) { + charnum += 3 * 12; + if (LIKELY(ip <= iend-7)) { + ip += 3; + } else { + bitCount -= (int)(8 * (iend - 7 - ip)); + bitCount &= 31; + ip = iend - 4; + } + bitStream = MEM_readLE32(ip) >> bitCount; + repeats = FSE_ctz(~bitStream | 0x80000000) >> 1; + } + charnum += 3 * repeats; + bitStream >>= 2 * repeats; + bitCount += 2 * repeats; + + /* Add the final repeat which isn't 0b11. */ + assert((bitStream & 3) < 3); + charnum += bitStream & 3; + bitCount += 2; + + /* This is an error, but break and return an error + * at the end, because returning out of a loop makes + * it harder for the compiler to optimize. + */ + if (charnum >= maxSV1) break; + + /* We don't need to set the normalized count to 0 + * because we already memset the whole buffer to 0. + */ + + if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { + assert((bitCount >> 3) <= 3); /* For first condition to work */ + ip += bitCount>>3; + bitCount &= 7; + } else { + bitCount -= (int)(8 * (iend - 4 - ip)); + bitCount &= 31; + ip = iend - 4; + } + bitStream = MEM_readLE32(ip) >> bitCount; + } + { + int const max = (2*threshold-1) - remaining; + int count; + + if ((bitStream & (threshold-1)) < (U32)max) { + count = bitStream & (threshold-1); + bitCount += nbBits-1; + } else { + count = bitStream & (2*threshold-1); + if (count >= threshold) count -= max; + bitCount += nbBits; + } + + count--; /* extra accuracy */ + /* When it matters (small blocks), this is a + * predictable branch, because we don't use -1. + */ + if (count >= 0) { + remaining -= count; + } else { + assert(count == -1); + remaining += count; + } + normalizedCounter[charnum++] = (short)count; + previous0 = !count; + + assert(threshold > 1); + if (remaining < threshold) { + /* This branch can be folded into the + * threshold update condition because we + * know that threshold > 1. + */ + if (remaining <= 1) break; + nbBits = BIT_highbit32(remaining) + 1; + threshold = 1 << (nbBits - 1); + } + if (charnum >= maxSV1) break; + + if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { + ip += bitCount>>3; + bitCount &= 7; + } else { + bitCount -= (int)(8 * (iend - 4 - ip)); + bitCount &= 31; + ip = iend - 4; + } + bitStream = MEM_readLE32(ip) >> bitCount; + } } + if (remaining != 1) return ERROR(corruption_detected); + /* Only possible when there are too many zeros. */ + if (charnum > maxSV1) return ERROR(maxSymbolValue_tooSmall); + if (bitCount > 32) return ERROR(corruption_detected); + *maxSVPtr = charnum-1; + + ip += (bitCount+7)>>3; + return ip-istart; +} + +/* Avoids the FORCE_INLINE of the _body() function. */ +static size_t FSE_readNCount_body_default( + short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); +} + +#if DYNAMIC_BMI2 +TARGET_ATTRIBUTE("bmi2") static size_t FSE_readNCount_body_bmi2( + short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); +} +#endif + +size_t FSE_readNCount_bmi2( + short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize, int bmi2) +{ +#if DYNAMIC_BMI2 + if (bmi2) { + return FSE_readNCount_body_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); + } +#endif + (void)bmi2; + return FSE_readNCount_body_default(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); +} + +size_t FSE_readNCount( + short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize, /* bmi2 */ 0); +} + + +/*! HUF_readStats() : + Read compact Huffman tree, saved by HUF_writeCTable(). + `huffWeight` is destination buffer. + `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32. + @return : size read from `src` , or an error Code . + Note : Needed by HUF_readCTable() and HUF_readDTableX?() . +*/ +size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize) +{ + U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; + return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* bmi2 */ 0); +} + +FORCE_INLINE_TEMPLATE size_t +HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize, + int bmi2) +{ + U32 weightTotal; + const BYTE* ip = (const BYTE*) src; + size_t iSize; + size_t oSize; + + if (!srcSize) return ERROR(srcSize_wrong); + iSize = ip[0]; + /* ZSTD_memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */ + + if (iSize >= 128) { /* special header */ + oSize = iSize - 127; + iSize = ((oSize+1)/2); + if (iSize+1 > srcSize) return ERROR(srcSize_wrong); + if (oSize >= hwSize) return ERROR(corruption_detected); + ip += 1; + { U32 n; + for (n=0; n> 4; + huffWeight[n+1] = ip[n/2] & 15; + } } } + else { /* header compressed with FSE (normal case) */ + if (iSize+1 > srcSize) return ERROR(srcSize_wrong); + /* max (hwSize-1) values decoded, as last one is implied */ + oSize = FSE_decompress_wksp_bmi2(huffWeight, hwSize-1, ip+1, iSize, 6, workSpace, wkspSize, bmi2); + if (FSE_isError(oSize)) return oSize; + } + + /* collect weight stats */ + ZSTD_memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32)); + weightTotal = 0; + { U32 n; for (n=0; n= HUF_TABLELOG_MAX) return ERROR(corruption_detected); + rankStats[huffWeight[n]]++; + weightTotal += (1 << huffWeight[n]) >> 1; + } } + if (weightTotal == 0) return ERROR(corruption_detected); + + /* get last non-null symbol weight (implied, total must be 2^n) */ + { U32 const tableLog = BIT_highbit32(weightTotal) + 1; + if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected); + *tableLogPtr = tableLog; + /* determine last weight */ + { U32 const total = 1 << tableLog; + U32 const rest = total - weightTotal; + U32 const verif = 1 << BIT_highbit32(rest); + U32 const lastWeight = BIT_highbit32(rest) + 1; + if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */ + huffWeight[oSize] = (BYTE)lastWeight; + rankStats[lastWeight]++; + } } + + /* check tree construction validity */ + if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */ + + /* results */ + *nbSymbolsPtr = (U32)(oSize+1); + return iSize+1; +} + +/* Avoids the FORCE_INLINE of the _body() function. */ +static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize) +{ + return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 0); +} + +#if DYNAMIC_BMI2 +static TARGET_ATTRIBUTE("bmi2") size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize) +{ + return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 1); +} +#endif + +size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize, + int bmi2) +{ +#if DYNAMIC_BMI2 + if (bmi2) { + return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize); + } +#endif + (void)bmi2; + return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize); +} +/**** ended inlining common/entropy_common.c ****/ +/**** start inlining common/error_private.c ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* The purpose of this file is to have a single list of error strings embedded in binary */ + +/**** skipping file: error_private.h ****/ + +const char* ERR_getErrorString(ERR_enum code) +{ +#ifdef ZSTD_STRIP_ERROR_STRINGS + (void)code; + return "Error strings stripped"; +#else + static const char* const notErrorCode = "Unspecified error code"; + switch( code ) + { + case PREFIX(no_error): return "No error detected"; + case PREFIX(GENERIC): return "Error (generic)"; + case PREFIX(prefix_unknown): return "Unknown frame descriptor"; + case PREFIX(version_unsupported): return "Version not supported"; + case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter"; + case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding"; + case PREFIX(corruption_detected): return "Corrupted block detected"; + case PREFIX(checksum_wrong): return "Restored data doesn't match checksum"; + case PREFIX(parameter_unsupported): return "Unsupported parameter"; + case PREFIX(parameter_outOfBound): return "Parameter is out of bound"; + case PREFIX(init_missing): return "Context should be init first"; + case PREFIX(memory_allocation): return "Allocation error : not enough memory"; + case PREFIX(workSpace_tooSmall): return "workSpace buffer is not large enough"; + case PREFIX(stage_wrong): return "Operation not authorized at current processing stage"; + case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported"; + case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large"; + case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small"; + case PREFIX(dictionary_corrupted): return "Dictionary is corrupted"; + case PREFIX(dictionary_wrong): return "Dictionary mismatch"; + case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples"; + case PREFIX(dstSize_tooSmall): return "Destination buffer is too small"; + case PREFIX(srcSize_wrong): return "Src size is incorrect"; + case PREFIX(dstBuffer_null): return "Operation on NULL destination buffer"; + /* following error codes are not stable and may be removed or changed in a future version */ + case PREFIX(frameIndex_tooLarge): return "Frame index is too large"; + case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking"; + case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong"; + case PREFIX(srcBuffer_wrong): return "Source buffer is wrong"; + case PREFIX(maxCode): + default: return notErrorCode; + } +#endif +} +/**** ended inlining common/error_private.c ****/ +/**** start inlining common/fse_decompress.c ****/ +/* ****************************************************************** + * FSE : Finite State Entropy decoder + * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + + +/* ************************************************************** +* Includes +****************************************************************/ +/**** skipping file: debug.h ****/ +/**** skipping file: bitstream.h ****/ +/**** skipping file: compiler.h ****/ +#define FSE_STATIC_LINKING_ONLY +/**** skipping file: fse.h ****/ +/**** skipping file: error_private.h ****/ +#define ZSTD_DEPS_NEED_MALLOC +/**** skipping file: zstd_deps.h ****/ + + +/* ************************************************************** +* Error Management +****************************************************************/ +#define FSE_isError ERR_isError +#define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ + + +/* ************************************************************** +* Templates +****************************************************************/ +/* + designed to be included + for type-specific functions (template emulation in C) + Objective is to write these functions only once, for improved maintenance +*/ + +/* safety checks */ +#ifndef FSE_FUNCTION_EXTENSION +# error "FSE_FUNCTION_EXTENSION must be defined" +#endif +#ifndef FSE_FUNCTION_TYPE +# error "FSE_FUNCTION_TYPE must be defined" +#endif + +/* Function names */ +#define FSE_CAT(X,Y) X##Y +#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) +#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) + + +/* Function templates */ +FSE_DTable* FSE_createDTable (unsigned tableLog) +{ + if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; + return (FSE_DTable*)ZSTD_malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) ); +} + +void FSE_freeDTable (FSE_DTable* dt) +{ + ZSTD_free(dt); +} + +static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize) +{ + void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */ + FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr); + U16* symbolNext = (U16*)workSpace; + BYTE* spread = (BYTE*)(symbolNext + maxSymbolValue + 1); + + U32 const maxSV1 = maxSymbolValue + 1; + U32 const tableSize = 1 << tableLog; + U32 highThreshold = tableSize-1; + + /* Sanity Checks */ + if (FSE_BUILD_DTABLE_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(maxSymbolValue_tooLarge); + if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge); + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); + + /* Init, lay down lowprob symbols */ + { FSE_DTableHeader DTableH; + DTableH.tableLog = (U16)tableLog; + DTableH.fastMode = 1; + { S16 const largeLimit= (S16)(1 << (tableLog-1)); + U32 s; + for (s=0; s= largeLimit) DTableH.fastMode=0; + symbolNext[s] = normalizedCounter[s]; + } } } + ZSTD_memcpy(dt, &DTableH, sizeof(DTableH)); + } + + /* Spread symbols */ + if (highThreshold == tableSize - 1) { + size_t const tableMask = tableSize-1; + size_t const step = FSE_TABLESTEP(tableSize); + /* First lay down the symbols in order. + * We use a uint64_t to lay down 8 bytes at a time. This reduces branch + * misses since small blocks generally have small table logs, so nearly + * all symbols have counts <= 8. We ensure we have 8 bytes at the end of + * our buffer to handle the over-write. + */ + { + U64 const add = 0x0101010101010101ull; + size_t pos = 0; + U64 sv = 0; + U32 s; + for (s=0; s highThreshold) position = (position + step) & tableMask; /* lowprob area */ + } } + if (position!=0) return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ + } + + /* Build Decoding table */ + { U32 u; + for (u=0; utableLog = 0; + DTableH->fastMode = 0; + + cell->newState = 0; + cell->symbol = symbolValue; + cell->nbBits = 0; + + return 0; +} + + +size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits) +{ + void* ptr = dt; + FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; + void* dPtr = dt + 1; + FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr; + const unsigned tableSize = 1 << nbBits; + const unsigned tableMask = tableSize - 1; + const unsigned maxSV1 = tableMask+1; + unsigned s; + + /* Sanity checks */ + if (nbBits < 1) return ERROR(GENERIC); /* min size */ + + /* Build Decoding Table */ + DTableH->tableLog = (U16)nbBits; + DTableH->fastMode = 1; + for (s=0; s sizeof(bitD.bitContainer)*8) /* This test must be static */ + BIT_reloadDStream(&bitD); + + op[1] = FSE_GETSYMBOL(&state2); + + if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ + { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } } + + op[2] = FSE_GETSYMBOL(&state1); + + if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ + BIT_reloadDStream(&bitD); + + op[3] = FSE_GETSYMBOL(&state2); + } + + /* tail */ + /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */ + while (1) { + if (op>(omax-2)) return ERROR(dstSize_tooSmall); + *op++ = FSE_GETSYMBOL(&state1); + if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) { + *op++ = FSE_GETSYMBOL(&state2); + break; + } + + if (op>(omax-2)) return ERROR(dstSize_tooSmall); + *op++ = FSE_GETSYMBOL(&state2); + if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) { + *op++ = FSE_GETSYMBOL(&state1); + break; + } } + + return op-ostart; +} + + +size_t FSE_decompress_usingDTable(void* dst, size_t originalSize, + const void* cSrc, size_t cSrcSize, + const FSE_DTable* dt) +{ + const void* ptr = dt; + const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr; + const U32 fastMode = DTableH->fastMode; + + /* select fast mode (static) */ + if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1); + return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0); +} + + +size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) +{ + return FSE_decompress_wksp_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, /* bmi2 */ 0); +} + +FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body( + void* dst, size_t dstCapacity, + const void* cSrc, size_t cSrcSize, + unsigned maxLog, void* workSpace, size_t wkspSize, + int bmi2) +{ + const BYTE* const istart = (const BYTE*)cSrc; + const BYTE* ip = istart; + short counting[FSE_MAX_SYMBOL_VALUE+1]; + unsigned tableLog; + unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; + FSE_DTable* const dtable = (FSE_DTable*)workSpace; + + /* normal FSE decoding mode */ + size_t const NCountLength = FSE_readNCount_bmi2(counting, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2); + if (FSE_isError(NCountLength)) return NCountLength; + if (tableLog > maxLog) return ERROR(tableLog_tooLarge); + assert(NCountLength <= cSrcSize); + ip += NCountLength; + cSrcSize -= NCountLength; + + if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge); + workSpace = dtable + FSE_DTABLE_SIZE_U32(tableLog); + wkspSize -= FSE_DTABLE_SIZE(tableLog); + + CHECK_F( FSE_buildDTable_internal(dtable, counting, maxSymbolValue, tableLog, workSpace, wkspSize) ); + + { + const void* ptr = dtable; + const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr; + const U32 fastMode = DTableH->fastMode; + + /* select fast mode (static) */ + if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 1); + return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 0); + } +} + +/* Avoids the FORCE_INLINE of the _body() function. */ +static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) +{ + return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 0); +} + +#if DYNAMIC_BMI2 +TARGET_ATTRIBUTE("bmi2") static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) +{ + return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1); +} +#endif + +size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2) +{ +#if DYNAMIC_BMI2 + if (bmi2) { + return FSE_decompress_wksp_body_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize); + } +#endif + (void)bmi2; + return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize); +} + + +typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; + +#ifndef ZSTD_NO_UNUSED_FUNCTIONS +size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) { + U32 wksp[FSE_BUILD_DTABLE_WKSP_SIZE_U32(FSE_TABLELOG_ABSOLUTE_MAX, FSE_MAX_SYMBOL_VALUE)]; + return FSE_buildDTable_wksp(dt, normalizedCounter, maxSymbolValue, tableLog, wksp, sizeof(wksp)); +} + +size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize) +{ + /* Static analyzer seems unable to understand this table will be properly initialized later */ + U32 wksp[FSE_DECOMPRESS_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)]; + return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, FSE_MAX_TABLELOG, wksp, sizeof(wksp)); +} +#endif + + +#endif /* FSE_COMMONDEFS_ONLY */ +/**** ended inlining common/fse_decompress.c ****/ +/**** start inlining common/threading.c ****/ +/** + * Copyright (c) 2016 Tino Reichardt + * All rights reserved. + * + * You can contact the author at: + * - zstdmt source repository: https://github.com/mcmilk/zstdmt + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * This file will hold wrapper for systems, which do not support pthreads + */ + +/**** start inlining threading.h ****/ +/** + * Copyright (c) 2016 Tino Reichardt + * All rights reserved. + * + * You can contact the author at: + * - zstdmt source repository: https://github.com/mcmilk/zstdmt + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef THREADING_H_938743 +#define THREADING_H_938743 + +/**** skipping file: debug.h ****/ + +#if defined (__cplusplus) +extern "C" { +#endif + +#if defined(ZSTD_MULTITHREAD) && defined(_WIN32) + +/** + * Windows minimalist Pthread Wrapper, based on : + * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html + */ +#ifdef WINVER +# undef WINVER +#endif +#define WINVER 0x0600 + +#ifdef _WIN32_WINNT +# undef _WIN32_WINNT +#endif +#define _WIN32_WINNT 0x0600 + +#ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +#endif + +#undef ERROR /* reported already defined on VS 2015 (Rich Geldreich) */ +#include +#undef ERROR +#define ERROR(name) ZSTD_ERROR(name) + + +/* mutex */ +#define ZSTD_pthread_mutex_t CRITICAL_SECTION +#define ZSTD_pthread_mutex_init(a, b) ((void)(b), InitializeCriticalSection((a)), 0) +#define ZSTD_pthread_mutex_destroy(a) DeleteCriticalSection((a)) +#define ZSTD_pthread_mutex_lock(a) EnterCriticalSection((a)) +#define ZSTD_pthread_mutex_unlock(a) LeaveCriticalSection((a)) + +/* condition variable */ +#define ZSTD_pthread_cond_t CONDITION_VARIABLE +#define ZSTD_pthread_cond_init(a, b) ((void)(b), InitializeConditionVariable((a)), 0) +#define ZSTD_pthread_cond_destroy(a) ((void)(a)) +#define ZSTD_pthread_cond_wait(a, b) SleepConditionVariableCS((a), (b), INFINITE) +#define ZSTD_pthread_cond_signal(a) WakeConditionVariable((a)) +#define ZSTD_pthread_cond_broadcast(a) WakeAllConditionVariable((a)) + +/* ZSTD_pthread_create() and ZSTD_pthread_join() */ +typedef struct { + HANDLE handle; + void* (*start_routine)(void*); + void* arg; +} ZSTD_pthread_t; + +int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused, + void* (*start_routine) (void*), void* arg); + +int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr); + +/** + * add here more wrappers as required + */ + + +#elif defined(ZSTD_MULTITHREAD) /* posix assumed ; need a better detection method */ +/* === POSIX Systems === */ +# include + +#if DEBUGLEVEL < 1 + +#define ZSTD_pthread_mutex_t pthread_mutex_t +#define ZSTD_pthread_mutex_init(a, b) pthread_mutex_init((a), (b)) +#define ZSTD_pthread_mutex_destroy(a) pthread_mutex_destroy((a)) +#define ZSTD_pthread_mutex_lock(a) pthread_mutex_lock((a)) +#define ZSTD_pthread_mutex_unlock(a) pthread_mutex_unlock((a)) + +#define ZSTD_pthread_cond_t pthread_cond_t +#define ZSTD_pthread_cond_init(a, b) pthread_cond_init((a), (b)) +#define ZSTD_pthread_cond_destroy(a) pthread_cond_destroy((a)) +#define ZSTD_pthread_cond_wait(a, b) pthread_cond_wait((a), (b)) +#define ZSTD_pthread_cond_signal(a) pthread_cond_signal((a)) +#define ZSTD_pthread_cond_broadcast(a) pthread_cond_broadcast((a)) + +#define ZSTD_pthread_t pthread_t +#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d)) +#define ZSTD_pthread_join(a, b) pthread_join((a),(b)) + +#else /* DEBUGLEVEL >= 1 */ + +/* Debug implementation of threading. + * In this implementation we use pointers for mutexes and condition variables. + * This way, if we forget to init/destroy them the program will crash or ASAN + * will report leaks. + */ + +#define ZSTD_pthread_mutex_t pthread_mutex_t* +int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr); +int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex); +#define ZSTD_pthread_mutex_lock(a) pthread_mutex_lock(*(a)) +#define ZSTD_pthread_mutex_unlock(a) pthread_mutex_unlock(*(a)) + +#define ZSTD_pthread_cond_t pthread_cond_t* +int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr); +int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond); +#define ZSTD_pthread_cond_wait(a, b) pthread_cond_wait(*(a), *(b)) +#define ZSTD_pthread_cond_signal(a) pthread_cond_signal(*(a)) +#define ZSTD_pthread_cond_broadcast(a) pthread_cond_broadcast(*(a)) + +#define ZSTD_pthread_t pthread_t +#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d)) +#define ZSTD_pthread_join(a, b) pthread_join((a),(b)) + +#endif + +#else /* ZSTD_MULTITHREAD not defined */ +/* No multithreading support */ + +typedef int ZSTD_pthread_mutex_t; +#define ZSTD_pthread_mutex_init(a, b) ((void)(a), (void)(b), 0) +#define ZSTD_pthread_mutex_destroy(a) ((void)(a)) +#define ZSTD_pthread_mutex_lock(a) ((void)(a)) +#define ZSTD_pthread_mutex_unlock(a) ((void)(a)) + +typedef int ZSTD_pthread_cond_t; +#define ZSTD_pthread_cond_init(a, b) ((void)(a), (void)(b), 0) +#define ZSTD_pthread_cond_destroy(a) ((void)(a)) +#define ZSTD_pthread_cond_wait(a, b) ((void)(a), (void)(b)) +#define ZSTD_pthread_cond_signal(a) ((void)(a)) +#define ZSTD_pthread_cond_broadcast(a) ((void)(a)) + +/* do not use ZSTD_pthread_t */ + +#endif /* ZSTD_MULTITHREAD */ + +#if defined (__cplusplus) +} +#endif + +#endif /* THREADING_H_938743 */ +/**** ended inlining threading.h ****/ + +/* create fake symbol to avoid empty translation unit warning */ +int g_ZSTD_threading_useless_symbol; + +#if defined(ZSTD_MULTITHREAD) && defined(_WIN32) + +/** + * Windows minimalist Pthread Wrapper, based on : + * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html + */ + + +/* === Dependencies === */ +#include +#include + + +/* === Implementation === */ + +static unsigned __stdcall worker(void *arg) +{ + ZSTD_pthread_t* const thread = (ZSTD_pthread_t*) arg; + thread->arg = thread->start_routine(thread->arg); + return 0; +} + +int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused, + void* (*start_routine) (void*), void* arg) +{ + (void)unused; + thread->arg = arg; + thread->start_routine = start_routine; + thread->handle = (HANDLE) _beginthreadex(NULL, 0, worker, thread, 0, NULL); + + if (!thread->handle) + return errno; + else + return 0; +} + +int ZSTD_pthread_join(ZSTD_pthread_t thread, void **value_ptr) +{ + DWORD result; + + if (!thread.handle) return 0; + + result = WaitForSingleObject(thread.handle, INFINITE); + switch (result) { + case WAIT_OBJECT_0: + if (value_ptr) *value_ptr = thread.arg; + return 0; + case WAIT_ABANDONED: + return EINVAL; + default: + return GetLastError(); + } +} + +#endif /* ZSTD_MULTITHREAD */ + +#if defined(ZSTD_MULTITHREAD) && DEBUGLEVEL >= 1 && !defined(_WIN32) + +#define ZSTD_DEPS_NEED_MALLOC +/**** skipping file: zstd_deps.h ****/ + +int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr) +{ + *mutex = (pthread_mutex_t*)ZSTD_malloc(sizeof(pthread_mutex_t)); + if (!*mutex) + return 1; + return pthread_mutex_init(*mutex, attr); +} + +int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex) +{ + if (!*mutex) + return 0; + { + int const ret = pthread_mutex_destroy(*mutex); + ZSTD_free(*mutex); + return ret; + } +} + +int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr) +{ + *cond = (pthread_cond_t*)ZSTD_malloc(sizeof(pthread_cond_t)); + if (!*cond) + return 1; + return pthread_cond_init(*cond, attr); +} + +int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond) +{ + if (!*cond) + return 0; + { + int const ret = pthread_cond_destroy(*cond); + ZSTD_free(*cond); + return ret; + } +} + +#endif +/**** ended inlining common/threading.c ****/ +/**** start inlining common/pool.c ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* ====== Dependencies ======= */ +/**** skipping file: zstd_deps.h ****/ +/**** skipping file: debug.h ****/ +/**** start inlining zstd_internal.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_CCOMMON_H_MODULE +#define ZSTD_CCOMMON_H_MODULE + +/* this module contains definitions which must be identical + * across compression, decompression and dictBuilder. + * It also contains a few functions useful to at least 2 of them + * and which benefit from being inlined */ + +/*-************************************* +* Dependencies +***************************************/ +#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON) +#include +#endif +/**** skipping file: compiler.h ****/ +/**** skipping file: mem.h ****/ +/**** skipping file: debug.h ****/ +/**** skipping file: error_private.h ****/ +#define ZSTD_STATIC_LINKING_ONLY +/**** start inlining ../zstd.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef ZSTD_H_235446 +#define ZSTD_H_235446 + +/* ====== Dependency ======*/ +#include /* INT_MAX */ +#include /* size_t */ + + +/* ===== ZSTDLIB_API : control library symbols visibility ===== */ +#ifndef ZSTDLIB_VISIBILITY +# if defined(__GNUC__) && (__GNUC__ >= 4) +# define ZSTDLIB_VISIBILITY __attribute__ ((visibility ("default"))) +# else +# define ZSTDLIB_VISIBILITY +# endif +#endif +#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) +# define ZSTDLIB_API __declspec(dllexport) ZSTDLIB_VISIBILITY +#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) +# define ZSTDLIB_API __declspec(dllimport) ZSTDLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +# define ZSTDLIB_API ZSTDLIB_VISIBILITY +#endif + + +/******************************************************************************* + Introduction + + zstd, short for Zstandard, is a fast lossless compression algorithm, targeting + real-time compression scenarios at zlib-level and better compression ratios. + The zstd compression library provides in-memory compression and decompression + functions. + + The library supports regular compression levels from 1 up to ZSTD_maxCLevel(), + which is currently 22. Levels >= 20, labeled `--ultra`, should be used with + caution, as they require more memory. The library also offers negative + compression levels, which extend the range of speed vs. ratio preferences. + The lower the level, the faster the speed (at the cost of compression). + + Compression can be done in: + - a single step (described as Simple API) + - a single step, reusing a context (described as Explicit context) + - unbounded multiple steps (described as Streaming compression) + + The compression ratio achievable on small data can be highly improved using + a dictionary. Dictionary compression can be performed in: + - a single step (described as Simple dictionary API) + - a single step, reusing a dictionary (described as Bulk-processing + dictionary API) + + Advanced experimental functions can be accessed using + `#define ZSTD_STATIC_LINKING_ONLY` before including zstd.h. + + Advanced experimental APIs should never be used with a dynamically-linked + library. They are not "stable"; their definitions or signatures may change in + the future. Only static linking is allowed. +*******************************************************************************/ + +/*------ Version ------*/ +#define ZSTD_VERSION_MAJOR 1 +#define ZSTD_VERSION_MINOR 4 +#define ZSTD_VERSION_RELEASE 9 +#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) + +/*! ZSTD_versionNumber() : + * Return runtime library version, the value is (MAJOR*100*100 + MINOR*100 + RELEASE). */ +ZSTDLIB_API unsigned ZSTD_versionNumber(void); + +#define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE +#define ZSTD_QUOTE(str) #str +#define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str) +#define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION) + +/*! ZSTD_versionString() : + * Return runtime library version, like "1.4.5". Requires v1.3.0+. */ +ZSTDLIB_API const char* ZSTD_versionString(void); + +/* ************************************* + * Default constant + ***************************************/ +#ifndef ZSTD_CLEVEL_DEFAULT +# define ZSTD_CLEVEL_DEFAULT 3 +#endif + +/* ************************************* + * Constants + ***************************************/ + +/* All magic numbers are supposed read/written to/from files/memory using little-endian convention */ +#define ZSTD_MAGICNUMBER 0xFD2FB528 /* valid since v0.8.0 */ +#define ZSTD_MAGIC_DICTIONARY 0xEC30A437 /* valid since v0.7.0 */ +#define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50 /* all 16 values, from 0x184D2A50 to 0x184D2A5F, signal the beginning of a skippable frame */ +#define ZSTD_MAGIC_SKIPPABLE_MASK 0xFFFFFFF0 + +#define ZSTD_BLOCKSIZELOG_MAX 17 +#define ZSTD_BLOCKSIZE_MAX (1<= `ZSTD_compressBound(srcSize)`. + * @return : compressed size written into `dst` (<= `dstCapacity), + * or an error code if it fails (which can be tested using ZSTD_isError()). */ +ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel); + +/*! ZSTD_decompress() : + * `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames. + * `dstCapacity` is an upper bound of originalSize to regenerate. + * If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data. + * @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), + * or an errorCode if it fails (which can be tested using ZSTD_isError()). */ +ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity, + const void* src, size_t compressedSize); + +/*! ZSTD_getFrameContentSize() : requires v1.3.0+ + * `src` should point to the start of a ZSTD encoded frame. + * `srcSize` must be at least as large as the frame header. + * hint : any size >= `ZSTD_frameHeaderSize_max` is large enough. + * @return : - decompressed size of `src` frame content, if known + * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined + * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) + * note 1 : a 0 return value means the frame is valid but "empty". + * note 2 : decompressed size is an optional field, it may not be present, typically in streaming mode. + * When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. + * In which case, it's necessary to use streaming mode to decompress data. + * Optionally, application can rely on some implicit limit, + * as ZSTD_decompress() only needs an upper bound of decompressed size. + * (For example, data could be necessarily cut into blocks <= 16 KB). + * note 3 : decompressed size is always present when compression is completed using single-pass functions, + * such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict(). + * note 4 : decompressed size can be very large (64-bits value), + * potentially larger than what local system can handle as a single memory segment. + * In which case, it's necessary to use streaming mode to decompress data. + * note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified. + * Always ensure return value fits within application's authorized limits. + * Each application can set its own limits. + * note 6 : This function replaces ZSTD_getDecompressedSize() */ +#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) +#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) +ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize); + +/*! ZSTD_getDecompressedSize() : + * NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize(). + * Both functions work the same way, but ZSTD_getDecompressedSize() blends + * "empty", "unknown" and "error" results to the same return value (0), + * while ZSTD_getFrameContentSize() gives them separate return values. + * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */ +ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize); + +/*! ZSTD_findFrameCompressedSize() : + * `src` should point to the start of a ZSTD frame or skippable frame. + * `srcSize` must be >= first frame size + * @return : the compressed size of the first frame starting at `src`, + * suitable to pass as `srcSize` to `ZSTD_decompress` or similar, + * or an error code if input is invalid */ +ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize); + + +/*====== Helper functions ======*/ +#define ZSTD_COMPRESSBOUND(srcSize) ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */ +ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */ +ZSTDLIB_API unsigned ZSTD_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ +ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /*!< provides readable string from an error code */ +ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed */ +ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compression level available */ + + +/*************************************** +* Explicit context +***************************************/ +/*= Compression context + * When compressing many times, + * it is recommended to allocate a context just once, + * and re-use it for each successive compression operation. + * This will make workload friendlier for system's memory. + * Note : re-using context is just a speed / resource optimization. + * It doesn't change the compression ratio, which remains identical. + * Note 2 : In multi-threaded environments, + * use one different context per thread for parallel execution. + */ +typedef struct ZSTD_CCtx_s ZSTD_CCtx; +ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void); +ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); + +/*! ZSTD_compressCCtx() : + * Same as ZSTD_compress(), using an explicit ZSTD_CCtx. + * Important : in order to behave similarly to `ZSTD_compress()`, + * this function compresses at requested compression level, + * __ignoring any other parameter__ . + * If any advanced parameter was set using the advanced API, + * they will all be reset. Only `compressionLevel` remains. + */ +ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel); + +/*= Decompression context + * When decompressing many times, + * it is recommended to allocate a context only once, + * and re-use it for each successive compression operation. + * This will make workload friendlier for system's memory. + * Use one context per thread for parallel execution. */ +typedef struct ZSTD_DCtx_s ZSTD_DCtx; +ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void); +ZSTDLIB_API size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); + +/*! ZSTD_decompressDCtx() : + * Same as ZSTD_decompress(), + * requires an allocated ZSTD_DCtx. + * Compatible with sticky parameters. + */ +ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + + +/*************************************** +* Advanced compression API +***************************************/ + +/* API design : + * Parameters are pushed one by one into an existing context, + * using ZSTD_CCtx_set*() functions. + * Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame. + * "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` ! + * __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ . + * + * It's possible to reset all parameters to "default" using ZSTD_CCtx_reset(). + * + * This API supercedes all other "advanced" API entry points in the experimental section. + * In the future, we expect to remove from experimental API entry points which are redundant with this API. + */ + + +/* Compression strategies, listed from fastest to strongest */ +typedef enum { ZSTD_fast=1, + ZSTD_dfast=2, + ZSTD_greedy=3, + ZSTD_lazy=4, + ZSTD_lazy2=5, + ZSTD_btlazy2=6, + ZSTD_btopt=7, + ZSTD_btultra=8, + ZSTD_btultra2=9 + /* note : new strategies _might_ be added in the future. + Only the order (from fast to strong) is guaranteed */ +} ZSTD_strategy; + + +typedef enum { + + /* compression parameters + * Note: When compressing with a ZSTD_CDict these parameters are superseded + * by the parameters used to construct the ZSTD_CDict. + * See ZSTD_CCtx_refCDict() for more info (superseded-by-cdict). */ + ZSTD_c_compressionLevel=100, /* Set compression parameters according to pre-defined cLevel table. + * Note that exact compression parameters are dynamically determined, + * depending on both compression level and srcSize (when known). + * Default level is ZSTD_CLEVEL_DEFAULT==3. + * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT. + * Note 1 : it's possible to pass a negative compression level. + * Note 2 : setting a level does not automatically set all other compression parameters + * to default. Setting this will however eventually dynamically impact the compression + * parameters which have not been manually set. The manually set + * ones will 'stick'. */ + /* Advanced compression parameters : + * It's possible to pin down compression parameters to some specific values. + * In which case, these values are no longer dynamically selected by the compressor */ + ZSTD_c_windowLog=101, /* Maximum allowed back-reference distance, expressed as power of 2. + * This will set a memory budget for streaming decompression, + * with larger values requiring more memory + * and typically compressing more. + * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX. + * Special: value 0 means "use default windowLog". + * Note: Using a windowLog greater than ZSTD_WINDOWLOG_LIMIT_DEFAULT + * requires explicitly allowing such size at streaming decompression stage. */ + ZSTD_c_hashLog=102, /* Size of the initial probe table, as a power of 2. + * Resulting memory usage is (1 << (hashLog+2)). + * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX. + * Larger tables improve compression ratio of strategies <= dFast, + * and improve speed of strategies > dFast. + * Special: value 0 means "use default hashLog". */ + ZSTD_c_chainLog=103, /* Size of the multi-probe search table, as a power of 2. + * Resulting memory usage is (1 << (chainLog+2)). + * Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX. + * Larger tables result in better and slower compression. + * This parameter is useless for "fast" strategy. + * It's still useful when using "dfast" strategy, + * in which case it defines a secondary probe table. + * Special: value 0 means "use default chainLog". */ + ZSTD_c_searchLog=104, /* Number of search attempts, as a power of 2. + * More attempts result in better and slower compression. + * This parameter is useless for "fast" and "dFast" strategies. + * Special: value 0 means "use default searchLog". */ + ZSTD_c_minMatch=105, /* Minimum size of searched matches. + * Note that Zstandard can still find matches of smaller size, + * it just tweaks its search algorithm to look for this size and larger. + * Larger values increase compression and decompression speed, but decrease ratio. + * Must be clamped between ZSTD_MINMATCH_MIN and ZSTD_MINMATCH_MAX. + * Note that currently, for all strategies < btopt, effective minimum is 4. + * , for all strategies > fast, effective maximum is 6. + * Special: value 0 means "use default minMatchLength". */ + ZSTD_c_targetLength=106, /* Impact of this field depends on strategy. + * For strategies btopt, btultra & btultra2: + * Length of Match considered "good enough" to stop search. + * Larger values make compression stronger, and slower. + * For strategy fast: + * Distance between match sampling. + * Larger values make compression faster, and weaker. + * Special: value 0 means "use default targetLength". */ + ZSTD_c_strategy=107, /* See ZSTD_strategy enum definition. + * The higher the value of selected strategy, the more complex it is, + * resulting in stronger and slower compression. + * Special: value 0 means "use default strategy". */ + + /* LDM mode parameters */ + ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching. + * This parameter is designed to improve compression ratio + * for large inputs, by finding large matches at long distance. + * It increases memory usage and window size. + * Note: enabling this parameter increases default ZSTD_c_windowLog to 128 MB + * except when expressly set to a different value. + * Note: will be enabled by default if ZSTD_c_windowLog >= 128 MB and + * compression strategy >= ZSTD_btopt (== compression level 16+) */ + ZSTD_c_ldmHashLog=161, /* Size of the table for long distance matching, as a power of 2. + * Larger values increase memory usage and compression ratio, + * but decrease compression speed. + * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX + * default: windowlog - 7. + * Special: value 0 means "automatically determine hashlog". */ + ZSTD_c_ldmMinMatch=162, /* Minimum match size for long distance matcher. + * Larger/too small values usually decrease compression ratio. + * Must be clamped between ZSTD_LDM_MINMATCH_MIN and ZSTD_LDM_MINMATCH_MAX. + * Special: value 0 means "use default value" (default: 64). */ + ZSTD_c_ldmBucketSizeLog=163, /* Log size of each bucket in the LDM hash table for collision resolution. + * Larger values improve collision resolution but decrease compression speed. + * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX. + * Special: value 0 means "use default value" (default: 3). */ + ZSTD_c_ldmHashRateLog=164, /* Frequency of inserting/looking up entries into the LDM hash table. + * Must be clamped between 0 and (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN). + * Default is MAX(0, (windowLog - ldmHashLog)), optimizing hash table usage. + * Larger values improve compression speed. + * Deviating far from default value will likely result in a compression ratio decrease. + * Special: value 0 means "automatically determine hashRateLog". */ + + /* frame parameters */ + ZSTD_c_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1) + * Content size must be known at the beginning of compression. + * This is automatically the case when using ZSTD_compress2(), + * For streaming scenarios, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */ + ZSTD_c_checksumFlag=201, /* A 32-bits checksum of content is written at end of frame (default:0) */ + ZSTD_c_dictIDFlag=202, /* When applicable, dictionary's ID is written into frame header (default:1) */ + + /* multi-threading parameters */ + /* These parameters are only active if multi-threading is enabled (compiled with build macro ZSTD_MULTITHREAD). + * Otherwise, trying to set any other value than default (0) will be a no-op and return an error. + * In a situation where it's unknown if the linked library supports multi-threading or not, + * setting ZSTD_c_nbWorkers to any value >= 1 and consulting the return value provides a quick way to check this property. + */ + ZSTD_c_nbWorkers=400, /* Select how many threads will be spawned to compress in parallel. + * When nbWorkers >= 1, triggers asynchronous mode when invoking ZSTD_compressStream*() : + * ZSTD_compressStream*() consumes input and flush output if possible, but immediately gives back control to caller, + * while compression is performed in parallel, within worker thread(s). + * (note : a strong exception to this rule is when first invocation of ZSTD_compressStream2() sets ZSTD_e_end : + * in which case, ZSTD_compressStream2() delegates to ZSTD_compress2(), which is always a blocking call). + * More workers improve speed, but also increase memory usage. + * Default value is `0`, aka "single-threaded mode" : no worker is spawned, + * compression is performed inside Caller's thread, and all invocations are blocking */ + ZSTD_c_jobSize=401, /* Size of a compression job. This value is enforced only when nbWorkers >= 1. + * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads. + * 0 means default, which is dynamically determined based on compression parameters. + * Job size must be a minimum of overlap size, or 1 MB, whichever is largest. + * The minimum size is automatically and transparently enforced. */ + ZSTD_c_overlapLog=402, /* Control the overlap size, as a fraction of window size. + * The overlap size is an amount of data reloaded from previous job at the beginning of a new job. + * It helps preserve compression ratio, while each job is compressed in parallel. + * This value is enforced only when nbWorkers >= 1. + * Larger values increase compression ratio, but decrease speed. + * Possible values range from 0 to 9 : + * - 0 means "default" : value will be determined by the library, depending on strategy + * - 1 means "no overlap" + * - 9 means "full overlap", using a full window size. + * Each intermediate rank increases/decreases load size by a factor 2 : + * 9: full window; 8: w/2; 7: w/4; 6: w/8; 5:w/16; 4: w/32; 3:w/64; 2:w/128; 1:no overlap; 0:default + * default value varies between 6 and 9, depending on strategy */ + + /* note : additional experimental parameters are also available + * within the experimental section of the API. + * At the time of this writing, they include : + * ZSTD_c_rsyncable + * ZSTD_c_format + * ZSTD_c_forceMaxWindow + * ZSTD_c_forceAttachDict + * ZSTD_c_literalCompressionMode + * ZSTD_c_targetCBlockSize + * ZSTD_c_srcSizeHint + * ZSTD_c_enableDedicatedDictSearch + * ZSTD_c_stableInBuffer + * ZSTD_c_stableOutBuffer + * ZSTD_c_blockDelimiters + * ZSTD_c_validateSequences + * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. + * note : never ever use experimentalParam? names directly; + * also, the enums values themselves are unstable and can still change. + */ + ZSTD_c_experimentalParam1=500, + ZSTD_c_experimentalParam2=10, + ZSTD_c_experimentalParam3=1000, + ZSTD_c_experimentalParam4=1001, + ZSTD_c_experimentalParam5=1002, + ZSTD_c_experimentalParam6=1003, + ZSTD_c_experimentalParam7=1004, + ZSTD_c_experimentalParam8=1005, + ZSTD_c_experimentalParam9=1006, + ZSTD_c_experimentalParam10=1007, + ZSTD_c_experimentalParam11=1008, + ZSTD_c_experimentalParam12=1009 +} ZSTD_cParameter; + +typedef struct { + size_t error; + int lowerBound; + int upperBound; +} ZSTD_bounds; + +/*! ZSTD_cParam_getBounds() : + * All parameters must belong to an interval with lower and upper bounds, + * otherwise they will either trigger an error or be automatically clamped. + * @return : a structure, ZSTD_bounds, which contains + * - an error status field, which must be tested using ZSTD_isError() + * - lower and upper bounds, both inclusive + */ +ZSTDLIB_API ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter cParam); + +/*! ZSTD_CCtx_setParameter() : + * Set one compression parameter, selected by enum ZSTD_cParameter. + * All parameters have valid bounds. Bounds can be queried using ZSTD_cParam_getBounds(). + * Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter). + * Setting a parameter is generally only possible during frame initialization (before starting compression). + * Exception : when using multi-threading mode (nbWorkers >= 1), + * the following parameters can be updated _during_ compression (within same frame): + * => compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy. + * new parameters will be active for next job only (after a flush()). + * @return : an error code (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value); + +/*! ZSTD_CCtx_setPledgedSrcSize() : + * Total input data size to be compressed as a single frame. + * Value will be written in frame header, unless if explicitly forbidden using ZSTD_c_contentSizeFlag. + * This value will also be controlled at end of frame, and trigger an error if not respected. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Note 1 : pledgedSrcSize==0 actually means zero, aka an empty frame. + * In order to mean "unknown content size", pass constant ZSTD_CONTENTSIZE_UNKNOWN. + * ZSTD_CONTENTSIZE_UNKNOWN is default value for any new frame. + * Note 2 : pledgedSrcSize is only valid once, for the next frame. + * It's discarded at the end of the frame, and replaced by ZSTD_CONTENTSIZE_UNKNOWN. + * Note 3 : Whenever all input data is provided and consumed in a single round, + * for example with ZSTD_compress2(), + * or invoking immediately ZSTD_compressStream2(,,,ZSTD_e_end), + * this value is automatically overridden by srcSize instead. + */ +ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize); + +typedef enum { + ZSTD_reset_session_only = 1, + ZSTD_reset_parameters = 2, + ZSTD_reset_session_and_parameters = 3 +} ZSTD_ResetDirective; + +/*! ZSTD_CCtx_reset() : + * There are 2 different things that can be reset, independently or jointly : + * - The session : will stop compressing current frame, and make CCtx ready to start a new one. + * Useful after an error, or to interrupt any ongoing compression. + * Any internal data not yet flushed is cancelled. + * Compression parameters and dictionary remain unchanged. + * They will be used to compress next frame. + * Resetting session never fails. + * - The parameters : changes all parameters back to "default". + * This removes any reference to any dictionary too. + * Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing) + * otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError()) + * - Both : similar to resetting the session, followed by resetting parameters. + */ +ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset); + +/*! ZSTD_compress2() : + * Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API. + * ZSTD_compress2() always starts a new frame. + * Should cctx hold data from a previously unfinished frame, everything about it is forgotten. + * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() + * - The function is always blocking, returns when compression is completed. + * Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. + * @return : compressed size written into `dst` (<= `dstCapacity), + * or an error code if it fails (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_compress2( ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + + +/*************************************** +* Advanced decompression API +***************************************/ + +/* The advanced API pushes parameters one by one into an existing DCtx context. + * Parameters are sticky, and remain valid for all following frames + * using the same DCtx context. + * It's possible to reset parameters to default values using ZSTD_DCtx_reset(). + * Note : This API is compatible with existing ZSTD_decompressDCtx() and ZSTD_decompressStream(). + * Therefore, no new decompression function is necessary. + */ + +typedef enum { + + ZSTD_d_windowLogMax=100, /* Select a size limit (in power of 2) beyond which + * the streaming API will refuse to allocate memory buffer + * in order to protect the host from unreasonable memory requirements. + * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. + * By default, a decompression context accepts window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT). + * Special: value 0 means "use default maximum windowLog". */ + + /* note : additional experimental parameters are also available + * within the experimental section of the API. + * At the time of this writing, they include : + * ZSTD_d_format + * ZSTD_d_stableOutBuffer + * ZSTD_d_forceIgnoreChecksum + * ZSTD_d_refMultipleDDicts + * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. + * note : never ever use experimentalParam? names directly + */ + ZSTD_d_experimentalParam1=1000, + ZSTD_d_experimentalParam2=1001, + ZSTD_d_experimentalParam3=1002, + ZSTD_d_experimentalParam4=1003 + +} ZSTD_dParameter; + +/*! ZSTD_dParam_getBounds() : + * All parameters must belong to an interval with lower and upper bounds, + * otherwise they will either trigger an error or be automatically clamped. + * @return : a structure, ZSTD_bounds, which contains + * - an error status field, which must be tested using ZSTD_isError() + * - both lower and upper bounds, inclusive + */ +ZSTDLIB_API ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam); + +/*! ZSTD_DCtx_setParameter() : + * Set one compression parameter, selected by enum ZSTD_dParameter. + * All parameters have valid bounds. Bounds can be queried using ZSTD_dParam_getBounds(). + * Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter). + * Setting a parameter is only possible during frame initialization (before starting decompression). + * @return : 0, or an error code (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int value); + +/*! ZSTD_DCtx_reset() : + * Return a DCtx to clean state. + * Session and parameters can be reset jointly or separately. + * Parameters can only be reset when no active frame is being decompressed. + * @return : 0, or an error code, which can be tested with ZSTD_isError() + */ +ZSTDLIB_API size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset); + + +/**************************** +* Streaming +****************************/ + +typedef struct ZSTD_inBuffer_s { + const void* src; /**< start of input buffer */ + size_t size; /**< size of input buffer */ + size_t pos; /**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */ +} ZSTD_inBuffer; + +typedef struct ZSTD_outBuffer_s { + void* dst; /**< start of output buffer */ + size_t size; /**< size of output buffer */ + size_t pos; /**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */ +} ZSTD_outBuffer; + + + +/*-*********************************************************************** +* Streaming compression - HowTo +* +* A ZSTD_CStream object is required to track streaming operation. +* Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources. +* ZSTD_CStream objects can be reused multiple times on consecutive compression operations. +* It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory. +* +* For parallel execution, use one separate ZSTD_CStream per thread. +* +* note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing. +* +* Parameters are sticky : when starting a new compression on the same context, +* it will re-use the same sticky parameters as previous compression session. +* When in doubt, it's recommended to fully initialize the context before usage. +* Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(), +* ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to +* set more specific parameters, the pledged source size, or load a dictionary. +* +* Use ZSTD_compressStream2() with ZSTD_e_continue as many times as necessary to +* consume input stream. The function will automatically update both `pos` +* fields within `input` and `output`. +* Note that the function may not consume the entire input, for example, because +* the output buffer is already full, in which case `input.pos < input.size`. +* The caller must check if input has been entirely consumed. +* If not, the caller must make some room to receive more compressed data, +* and then present again remaining input data. +* note: ZSTD_e_continue is guaranteed to make some forward progress when called, +* but doesn't guarantee maximal forward progress. This is especially relevant +* when compressing with multiple threads. The call won't block if it can +* consume some input, but if it can't it will wait for some, but not all, +* output to be flushed. +* @return : provides a minimum amount of data remaining to be flushed from internal buffers +* or an error code, which can be tested using ZSTD_isError(). +* +* At any moment, it's possible to flush whatever data might remain stuck within internal buffer, +* using ZSTD_compressStream2() with ZSTD_e_flush. `output->pos` will be updated. +* Note that, if `output->size` is too small, a single invocation with ZSTD_e_flush might not be enough (return code > 0). +* In which case, make some room to receive more compressed data, and call again ZSTD_compressStream2() with ZSTD_e_flush. +* You must continue calling ZSTD_compressStream2() with ZSTD_e_flush until it returns 0, at which point you can change the +* operation. +* note: ZSTD_e_flush will flush as much output as possible, meaning when compressing with multiple threads, it will +* block until the flush is complete or the output buffer is full. +* @return : 0 if internal buffers are entirely flushed, +* >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), +* or an error code, which can be tested using ZSTD_isError(). +* +* Calling ZSTD_compressStream2() with ZSTD_e_end instructs to finish a frame. +* It will perform a flush and write frame epilogue. +* The epilogue is required for decoders to consider a frame completed. +* flush operation is the same, and follows same rules as calling ZSTD_compressStream2() with ZSTD_e_flush. +* You must continue calling ZSTD_compressStream2() with ZSTD_e_end until it returns 0, at which point you are free to +* start a new frame. +* note: ZSTD_e_end will flush as much output as possible, meaning when compressing with multiple threads, it will +* block until the flush is complete or the output buffer is full. +* @return : 0 if frame fully completed and fully flushed, +* >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), +* or an error code, which can be tested using ZSTD_isError(). +* +* *******************************************************************/ + +typedef ZSTD_CCtx ZSTD_CStream; /**< CCtx and CStream are now effectively same object (>= v1.3.0) */ + /* Continue to distinguish them for compatibility with older versions <= v1.2.0 */ +/*===== ZSTD_CStream management functions =====*/ +ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void); +ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs); + +/*===== Streaming compression functions =====*/ +typedef enum { + ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal compression ratio */ + ZSTD_e_flush=1, /* flush any data provided so far, + * it creates (at least) one new block, that can be decoded immediately on reception; + * frame will continue: any future data can still reference previously compressed data, improving compression. + * note : multithreaded compression will block to flush as much output as possible. */ + ZSTD_e_end=2 /* flush any remaining data _and_ close current frame. + * note that frame is only closed after compressed data is fully flushed (return value == 0). + * After that point, any additional data starts a new frame. + * note : each frame is independent (does not reference any content from previous frame). + : note : multithreaded compression will block to flush as much output as possible. */ +} ZSTD_EndDirective; + +/*! ZSTD_compressStream2() : + * Behaves about the same as ZSTD_compressStream, with additional control on end directive. + * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() + * - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode) + * - output->pos must be <= dstCapacity, input->pos must be <= srcSize + * - output->pos and input->pos will be updated. They are guaranteed to remain below their respective limit. + * - endOp must be a valid directive + * - When nbWorkers==0 (default), function is blocking : it completes its job before returning to caller. + * - When nbWorkers>=1, function is non-blocking : it copies a portion of input, distributes jobs to internal worker threads, flush to output whatever is available, + * and then immediately returns, just indicating that there is some data remaining to be flushed. + * The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte. + * - Exception : if the first call requests a ZSTD_e_end directive and provides enough dstCapacity, the function delegates to ZSTD_compress2() which is always blocking. + * - @return provides a minimum amount of data remaining to be flushed from internal buffers + * or an error code, which can be tested using ZSTD_isError(). + * if @return != 0, flush is not fully completed, there is still some data left within internal buffers. + * This is useful for ZSTD_e_flush, since in this case more flushes are necessary to empty all buffers. + * For ZSTD_e_end, @return == 0 when internal buffers are fully flushed and frame is completed. + * - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0), + * only ZSTD_e_end or ZSTD_e_flush operations are allowed. + * Before starting a new compression job, or changing compression parameters, + * it is required to fully flush internal buffers. + */ +ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective endOp); + + +/* These buffer sizes are softly recommended. + * They are not required : ZSTD_compressStream*() happily accepts any buffer size, for both input and output. + * Respecting the recommended size just makes it a bit easier for ZSTD_compressStream*(), + * reducing the amount of memory shuffling and buffering, resulting in minor performance savings. + * + * However, note that these recommendations are from the perspective of a C caller program. + * If the streaming interface is invoked from some other language, + * especially managed ones such as Java or Go, through a foreign function interface such as jni or cgo, + * a major performance rule is to reduce crossing such interface to an absolute minimum. + * It's not rare that performance ends being spent more into the interface, rather than compression itself. + * In which cases, prefer using large buffers, as large as practical, + * for both input and output, to reduce the nb of roundtrips. + */ +ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */ +ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */ + + +/* ***************************************************************************** + * This following is a legacy streaming API. + * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2(). + * It is redundant, but remains fully supported. + * Advanced parameters and dictionary compression can only be used through the + * new API. + ******************************************************************************/ + +/*! + * Equivalent to: + * + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) + * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); + */ +ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel); +/*! + * Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue). + * NOTE: The return value is different. ZSTD_compressStream() returns a hint for + * the next read size (if non-zero and not an error). ZSTD_compressStream2() + * returns the minimum nb of bytes left to flush (if non-zero and not an error). + */ +ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); +/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */ +ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); +/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */ +ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); + + +/*-*************************************************************************** +* Streaming decompression - HowTo +* +* A ZSTD_DStream object is required to track streaming operations. +* Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources. +* ZSTD_DStream objects can be re-used multiple times. +* +* Use ZSTD_initDStream() to start a new decompression operation. +* @return : recommended first input size +* Alternatively, use advanced API to set specific properties. +* +* Use ZSTD_decompressStream() repetitively to consume your input. +* The function will update both `pos` fields. +* If `input.pos < input.size`, some input has not been consumed. +* It's up to the caller to present again remaining data. +* The function tries to flush all data decoded immediately, respecting output buffer size. +* If `output.pos < output.size`, decoder has flushed everything it could. +* But if `output.pos == output.size`, there might be some data left within internal buffers., +* In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer. +* Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX. +* @return : 0 when a frame is completely decoded and fully flushed, +* or an error code, which can be tested using ZSTD_isError(), +* or any other value > 0, which means there is still some decoding or flushing to do to complete current frame : +* the return value is a suggested next input size (just a hint for better latency) +* that will never request more than the remaining frame size. +* *******************************************************************************/ + +typedef ZSTD_DCtx ZSTD_DStream; /**< DCtx and DStream are now effectively same object (>= v1.3.0) */ + /* For compatibility with versions <= v1.2.0, prefer differentiating them. */ +/*===== ZSTD_DStream management functions =====*/ +ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void); +ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds); + +/*===== Streaming decompression functions =====*/ + +/* This function is redundant with the advanced API and equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * ZSTD_DCtx_refDDict(zds, NULL); + */ +ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); + +ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input); + +ZSTDLIB_API size_t ZSTD_DStreamInSize(void); /*!< recommended size for input buffer */ +ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */ + + +/************************** +* Simple dictionary API +***************************/ +/*! ZSTD_compress_usingDict() : + * Compression at an explicit compression level using a Dictionary. + * A dictionary can be any arbitrary data segment (also called a prefix), + * or a buffer with specified information (see dictBuilder/zdict.h). + * Note : This function loads the dictionary, resulting in significant startup delay. + * It's intended for a dictionary used only once. + * Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */ +ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + int compressionLevel); + +/*! ZSTD_decompress_usingDict() : + * Decompression using a known Dictionary. + * Dictionary must be identical to the one used during compression. + * Note : This function loads the dictionary, resulting in significant startup delay. + * It's intended for a dictionary used only once. + * Note : When `dict == NULL || dictSize < 8` no dictionary is used. */ +ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize); + + +/*********************************** + * Bulk processing dictionary API + **********************************/ +typedef struct ZSTD_CDict_s ZSTD_CDict; + +/*! ZSTD_createCDict() : + * When compressing multiple messages or blocks using the same dictionary, + * it's recommended to digest the dictionary only once, since it's a costly operation. + * ZSTD_createCDict() will create a state from digesting a dictionary. + * The resulting state can be used for future compression operations with very limited startup cost. + * ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only. + * @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict. + * Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content. + * Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer, + * in which case the only thing that it transports is the @compressionLevel. + * This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively, + * expecting a ZSTD_CDict parameter with any data, including those without a known dictionary. */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, + int compressionLevel); + +/*! ZSTD_freeCDict() : + * Function frees memory allocated by ZSTD_createCDict(). */ +ZSTDLIB_API size_t ZSTD_freeCDict(ZSTD_CDict* CDict); + +/*! ZSTD_compress_usingCDict() : + * Compression using a digested Dictionary. + * Recommended when same dictionary is used multiple times. + * Note : compression level is _decided at dictionary creation time_, + * and frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */ +ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict); + + +typedef struct ZSTD_DDict_s ZSTD_DDict; + +/*! ZSTD_createDDict() : + * Create a digested dictionary, ready to start decompression operation without startup delay. + * dictBuffer can be released after DDict creation, as its content is copied inside DDict. */ +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize); + +/*! ZSTD_freeDDict() : + * Function frees memory allocated with ZSTD_createDDict() */ +ZSTDLIB_API size_t ZSTD_freeDDict(ZSTD_DDict* ddict); + +/*! ZSTD_decompress_usingDDict() : + * Decompression using a digested Dictionary. + * Recommended when same dictionary is used multiple times. */ +ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_DDict* ddict); + + +/******************************** + * Dictionary helper functions + *******************************/ + +/*! ZSTD_getDictID_fromDict() : + * Provides the dictID stored within dictionary. + * if @return == 0, the dictionary is not conformant with Zstandard specification. + * It can still be loaded, but as a content-only dictionary. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize); + +/*! ZSTD_getDictID_fromDDict() : + * Provides the dictID of the dictionary loaded into `ddict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict); + +/*! ZSTD_getDictID_fromFrame() : + * Provides the dictID required to decompressed the frame stored within `src`. + * If @return == 0, the dictID could not be decoded. + * This could for one of the following reasons : + * - The frame does not require a dictionary to be decoded (most common case). + * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information. + * Note : this use case also happens when using a non-conformant dictionary. + * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). + * - This is not a Zstandard frame. + * When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); + + +/******************************************************************************* + * Advanced dictionary and prefix API + * + * This API allows dictionaries to be used with ZSTD_compress2(), + * ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and + * only reset with the context is reset with ZSTD_reset_parameters or + * ZSTD_reset_session_and_parameters. Prefixes are single-use. + ******************************************************************************/ + + +/*! ZSTD_CCtx_loadDictionary() : + * Create an internal CDict from `dict` buffer. + * Decompression will have to use same dictionary. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary, + * meaning "return to no-dictionary mode". + * Note 1 : Dictionary is sticky, it will be used for all future compressed frames. + * To return to "no-dictionary" situation, load a NULL dictionary (or reset parameters). + * Note 2 : Loading a dictionary involves building tables. + * It's also a CPU consuming operation, with non-negligible impact on latency. + * Tables are dependent on compression parameters, and for this reason, + * compression parameters can no longer be changed after loading a dictionary. + * Note 3 :`dict` content will be copied internally. + * Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead. + * In such a case, dictionary buffer must outlive its users. + * Note 4 : Use ZSTD_CCtx_loadDictionary_advanced() + * to precisely select how dictionary content must be interpreted. */ +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); + +/*! ZSTD_CCtx_refCDict() : + * Reference a prepared dictionary, to be used for all next compressed frames. + * Note that compression parameters are enforced from within CDict, + * and supersede any compression parameter previously set within CCtx. + * The parameters ignored are labelled as "superseded-by-cdict" in the ZSTD_cParameter enum docs. + * The ignored parameters will be used again if the CCtx is returned to no-dictionary mode. + * The dictionary will remain valid for future compressed frames using same CCtx. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special : Referencing a NULL CDict means "return to no-dictionary mode". + * Note 1 : Currently, only one dictionary can be managed. + * Referencing a new dictionary effectively "discards" any previous one. + * Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. */ +ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); + +/*! ZSTD_CCtx_refPrefix() : + * Reference a prefix (single-usage dictionary) for next compressed frame. + * A prefix is **only used once**. Tables are discarded at end of frame (ZSTD_e_end). + * Decompression will need same prefix to properly regenerate data. + * Compressing with a prefix is similar in outcome as performing a diff and compressing it, + * but performs much faster, especially during decompression (compression speed is tunable with compression level). + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary + * Note 1 : Prefix buffer is referenced. It **must** outlive compression. + * Its content must remain unmodified during compression. + * Note 2 : If the intention is to diff some large src data blob with some prior version of itself, + * ensure that the window size is large enough to contain the entire source. + * See ZSTD_c_windowLog. + * Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters. + * It's a CPU consuming operation, with non-negligible impact on latency. + * If there is a need to use the same prefix multiple times, consider loadDictionary instead. + * Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dct_rawContent). + * Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. */ +ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, + const void* prefix, size_t prefixSize); + +/*! ZSTD_DCtx_loadDictionary() : + * Create an internal DDict from dict buffer, + * to be used to decompress next frames. + * The dictionary remains valid for all future frames, until explicitly invalidated. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary, + * meaning "return to no-dictionary mode". + * Note 1 : Loading a dictionary involves building tables, + * which has a non-negligible impact on CPU usage and latency. + * It's recommended to "load once, use many times", to amortize the cost + * Note 2 :`dict` content will be copied internally, so `dict` can be released after loading. + * Use ZSTD_DCtx_loadDictionary_byReference() to reference dictionary content instead. + * Note 3 : Use ZSTD_DCtx_loadDictionary_advanced() to take control of + * how dictionary content is loaded and interpreted. + */ +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); + +/*! ZSTD_DCtx_refDDict() : + * Reference a prepared dictionary, to be used to decompress next frames. + * The dictionary remains active for decompression of future frames using same DCtx. + * + * If called with ZSTD_d_refMultipleDDicts enabled, repeated calls of this function + * will store the DDict references in a table, and the DDict used for decompression + * will be determined at decompression time, as per the dict ID in the frame. + * The memory for the table is allocated on the first call to refDDict, and can be + * freed with ZSTD_freeDCtx(). + * + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Note 1 : Currently, only one dictionary can be managed. + * Referencing a new dictionary effectively "discards" any previous one. + * Special: referencing a NULL DDict means "return to no-dictionary mode". + * Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx. + */ +ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); + +/*! ZSTD_DCtx_refPrefix() : + * Reference a prefix (single-usage dictionary) to decompress next frame. + * This is the reverse operation of ZSTD_CCtx_refPrefix(), + * and must use the same prefix as the one used during compression. + * Prefix is **only used once**. Reference is discarded at end of frame. + * End of frame is reached when ZSTD_decompressStream() returns 0. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Note 1 : Adding any prefix (including NULL) invalidates any previously set prefix or dictionary + * Note 2 : Prefix buffer is referenced. It **must** outlive decompression. + * Prefix buffer must remain unmodified up to the end of frame, + * reached when ZSTD_decompressStream() returns 0. + * Note 3 : By default, the prefix is treated as raw content (ZSTD_dct_rawContent). + * Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode (Experimental section) + * Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost. + * A full dictionary is more costly, as it requires building tables. + */ +ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, + const void* prefix, size_t prefixSize); + +/* === Memory management === */ + +/*! ZSTD_sizeof_*() : + * These functions give the _current_ memory usage of selected object. + * Note that object memory usage can evolve (increase or decrease) over time. */ +ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx); +ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs); +ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds); +ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict); +ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); + +#endif /* ZSTD_H_235446 */ + + +/* ************************************************************************************** + * ADVANCED AND EXPERIMENTAL FUNCTIONS + **************************************************************************************** + * The definitions in the following section are considered experimental. + * They are provided for advanced scenarios. + * They should never be used with a dynamic library, as prototypes may change in the future. + * Use them only in association with static linking. + * ***************************************************************************************/ + +#if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY) +#define ZSTD_H_ZSTD_STATIC_LINKING_ONLY + +/**************************************************************************************** + * experimental API (static linking only) + **************************************************************************************** + * The following symbols and constants + * are not planned to join "stable API" status in the near future. + * They can still change in future versions. + * Some of them are planned to remain in the static_only section indefinitely. + * Some of them might be removed in the future (especially when redundant with existing stable functions) + * ***************************************************************************************/ + +#define ZSTD_FRAMEHEADERSIZE_PREFIX(format) ((format) == ZSTD_f_zstd1 ? 5 : 1) /* minimum input size required to query frame header size */ +#define ZSTD_FRAMEHEADERSIZE_MIN(format) ((format) == ZSTD_f_zstd1 ? 6 : 2) +#define ZSTD_FRAMEHEADERSIZE_MAX 18 /* can be useful for static allocation */ +#define ZSTD_SKIPPABLEHEADERSIZE 8 + +/* compression parameter bounds */ +#define ZSTD_WINDOWLOG_MAX_32 30 +#define ZSTD_WINDOWLOG_MAX_64 31 +#define ZSTD_WINDOWLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64)) +#define ZSTD_WINDOWLOG_MIN 10 +#define ZSTD_HASHLOG_MAX ((ZSTD_WINDOWLOG_MAX < 30) ? ZSTD_WINDOWLOG_MAX : 30) +#define ZSTD_HASHLOG_MIN 6 +#define ZSTD_CHAINLOG_MAX_32 29 +#define ZSTD_CHAINLOG_MAX_64 30 +#define ZSTD_CHAINLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_CHAINLOG_MAX_32 : ZSTD_CHAINLOG_MAX_64)) +#define ZSTD_CHAINLOG_MIN ZSTD_HASHLOG_MIN +#define ZSTD_SEARCHLOG_MAX (ZSTD_WINDOWLOG_MAX-1) +#define ZSTD_SEARCHLOG_MIN 1 +#define ZSTD_MINMATCH_MAX 7 /* only for ZSTD_fast, other strategies are limited to 6 */ +#define ZSTD_MINMATCH_MIN 3 /* only for ZSTD_btopt+, faster strategies are limited to 4 */ +#define ZSTD_TARGETLENGTH_MAX ZSTD_BLOCKSIZE_MAX +#define ZSTD_TARGETLENGTH_MIN 0 /* note : comparing this constant to an unsigned results in a tautological test */ +#define ZSTD_STRATEGY_MIN ZSTD_fast +#define ZSTD_STRATEGY_MAX ZSTD_btultra2 + + +#define ZSTD_OVERLAPLOG_MIN 0 +#define ZSTD_OVERLAPLOG_MAX 9 + +#define ZSTD_WINDOWLOG_LIMIT_DEFAULT 27 /* by default, the streaming decoder will refuse any frame + * requiring larger than (1< 0: + * If litLength != 0: + * rep == 1 --> offset == repeat_offset_1 + * rep == 2 --> offset == repeat_offset_2 + * rep == 3 --> offset == repeat_offset_3 + * If litLength == 0: + * rep == 1 --> offset == repeat_offset_2 + * rep == 2 --> offset == repeat_offset_3 + * rep == 3 --> offset == repeat_offset_1 - 1 + * + * Note: This field is optional. ZSTD_generateSequences() will calculate the value of + * 'rep', but repeat offsets do not necessarily need to be calculated from an external + * sequence provider's perspective. For example, ZSTD_compressSequences() does not + * use this 'rep' field at all (as of now). + */ +} ZSTD_Sequence; + +typedef struct { + unsigned windowLog; /**< largest match distance : larger == more compression, more memory needed during decompression */ + unsigned chainLog; /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */ + unsigned hashLog; /**< dispatch table : larger == faster, more memory */ + unsigned searchLog; /**< nb of searches : larger == more compression, slower */ + unsigned minMatch; /**< match length searched : larger == faster decompression, sometimes less compression */ + unsigned targetLength; /**< acceptable match size for optimal parser (only) : larger == more compression, slower */ + ZSTD_strategy strategy; /**< see ZSTD_strategy definition above */ +} ZSTD_compressionParameters; + +typedef struct { + int contentSizeFlag; /**< 1: content size will be in frame header (when known) */ + int checksumFlag; /**< 1: generate a 32-bits checksum using XXH64 algorithm at end of frame, for error detection */ + int noDictIDFlag; /**< 1: no dictID will be saved into frame header (dictID is only useful for dictionary compression) */ +} ZSTD_frameParameters; + +typedef struct { + ZSTD_compressionParameters cParams; + ZSTD_frameParameters fParams; +} ZSTD_parameters; + +typedef enum { + ZSTD_dct_auto = 0, /* dictionary is "full" when starting with ZSTD_MAGIC_DICTIONARY, otherwise it is "rawContent" */ + ZSTD_dct_rawContent = 1, /* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */ + ZSTD_dct_fullDict = 2 /* refuses to load a dictionary if it does not respect Zstandard's specification, starting with ZSTD_MAGIC_DICTIONARY */ +} ZSTD_dictContentType_e; + +typedef enum { + ZSTD_dlm_byCopy = 0, /**< Copy dictionary content internally */ + ZSTD_dlm_byRef = 1 /**< Reference dictionary content -- the dictionary buffer must outlive its users. */ +} ZSTD_dictLoadMethod_e; + +typedef enum { + ZSTD_f_zstd1 = 0, /* zstd frame format, specified in zstd_compression_format.md (default) */ + ZSTD_f_zstd1_magicless = 1 /* Variant of zstd frame format, without initial 4-bytes magic number. + * Useful to save 4 bytes per generated frame. + * Decoder cannot recognise automatically this format, requiring this instruction. */ +} ZSTD_format_e; + +typedef enum { + /* Note: this enum controls ZSTD_d_forceIgnoreChecksum */ + ZSTD_d_validateChecksum = 0, + ZSTD_d_ignoreChecksum = 1 +} ZSTD_forceIgnoreChecksum_e; + +typedef enum { + /* Note: this enum controls ZSTD_d_refMultipleDDicts */ + ZSTD_rmd_refSingleDDict = 0, + ZSTD_rmd_refMultipleDDicts = 1 +} ZSTD_refMultipleDDicts_e; + +typedef enum { + /* Note: this enum and the behavior it controls are effectively internal + * implementation details of the compressor. They are expected to continue + * to evolve and should be considered only in the context of extremely + * advanced performance tuning. + * + * Zstd currently supports the use of a CDict in three ways: + * + * - The contents of the CDict can be copied into the working context. This + * means that the compression can search both the dictionary and input + * while operating on a single set of internal tables. This makes + * the compression faster per-byte of input. However, the initial copy of + * the CDict's tables incurs a fixed cost at the beginning of the + * compression. For small compressions (< 8 KB), that copy can dominate + * the cost of the compression. + * + * - The CDict's tables can be used in-place. In this model, compression is + * slower per input byte, because the compressor has to search two sets of + * tables. However, this model incurs no start-up cost (as long as the + * working context's tables can be reused). For small inputs, this can be + * faster than copying the CDict's tables. + * + * - The CDict's tables are not used at all, and instead we use the working + * context alone to reload the dictionary and use params based on the source + * size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict(). + * This method is effective when the dictionary sizes are very small relative + * to the input size, and the input size is fairly large to begin with. + * + * Zstd has a simple internal heuristic that selects which strategy to use + * at the beginning of a compression. However, if experimentation shows that + * Zstd is making poor choices, it is possible to override that choice with + * this enum. + */ + ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */ + ZSTD_dictForceAttach = 1, /* Never copy the dictionary. */ + ZSTD_dictForceCopy = 2, /* Always copy the dictionary. */ + ZSTD_dictForceLoad = 3 /* Always reload the dictionary */ +} ZSTD_dictAttachPref_e; + +typedef enum { + ZSTD_lcm_auto = 0, /**< Automatically determine the compression mode based on the compression level. + * Negative compression levels will be uncompressed, and positive compression + * levels will be compressed. */ + ZSTD_lcm_huffman = 1, /**< Always attempt Huffman compression. Uncompressed literals will still be + * emitted if Huffman compression is not profitable. */ + ZSTD_lcm_uncompressed = 2 /**< Always emit uncompressed literals. */ +} ZSTD_literalCompressionMode_e; + + +/*************************************** +* Frame size functions +***************************************/ + +/*! ZSTD_findDecompressedSize() : + * `src` should point to the start of a series of ZSTD encoded and/or skippable frames + * `srcSize` must be the _exact_ size of this series + * (i.e. there should be a frame boundary at `src + srcSize`) + * @return : - decompressed size of all data in all successive frames + * - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN + * - if an error occurred: ZSTD_CONTENTSIZE_ERROR + * + * note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode. + * When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. + * In which case, it's necessary to use streaming mode to decompress data. + * note 2 : decompressed size is always present when compression is done with ZSTD_compress() + * note 3 : decompressed size can be very large (64-bits value), + * potentially larger than what local system can handle as a single memory segment. + * In which case, it's necessary to use streaming mode to decompress data. + * note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified. + * Always ensure result fits within application's authorized limits. + * Each application can set its own limits. + * note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to + * read each contained frame header. This is fast as most of the data is skipped, + * however it does mean that all frame data must be present and valid. */ +ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize); + +/*! ZSTD_decompressBound() : + * `src` should point to the start of a series of ZSTD encoded and/or skippable frames + * `srcSize` must be the _exact_ size of this series + * (i.e. there should be a frame boundary at `src + srcSize`) + * @return : - upper-bound for the decompressed size of all data in all successive frames + * - if an error occurred: ZSTD_CONTENTSIZE_ERROR + * + * note 1 : an error can occur if `src` contains an invalid or incorrectly formatted frame. + * note 2 : the upper-bound is exact when the decompressed size field is available in every ZSTD encoded frame of `src`. + * in this case, `ZSTD_findDecompressedSize` and `ZSTD_decompressBound` return the same value. + * note 3 : when the decompressed size field isn't available, the upper-bound for that frame is calculated by: + * upper-bound = # blocks * min(128 KB, Window_Size) + */ +ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize); + +/*! ZSTD_frameHeaderSize() : + * srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX. + * @return : size of the Frame Header, + * or an error code (if srcSize is too small) */ +ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize); + +typedef enum { + ZSTD_sf_noBlockDelimiters = 0, /* Representation of ZSTD_Sequence has no block delimiters, sequences only */ + ZSTD_sf_explicitBlockDelimiters = 1 /* Representation of ZSTD_Sequence contains explicit block delimiters */ +} ZSTD_sequenceFormat_e; + +/*! ZSTD_generateSequences() : + * Generate sequences using ZSTD_compress2, given a source buffer. + * + * Each block will end with a dummy sequence + * with offset == 0, matchLength == 0, and litLength == length of last literals. + * litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0) + * simply acts as a block delimiter. + * + * zc can be used to insert custom compression params. + * This function invokes ZSTD_compress2 + * + * The output of this function can be fed into ZSTD_compressSequences() with CCtx + * setting of ZSTD_c_blockDelimiters as ZSTD_sf_explicitBlockDelimiters + * @return : number of sequences generated + */ + +ZSTDLIB_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, + size_t outSeqsSize, const void* src, size_t srcSize); + +/*! ZSTD_mergeBlockDelimiters() : + * Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals + * by merging them into into the literals of the next sequence. + * + * As such, the final generated result has no explicit representation of block boundaries, + * and the final last literals segment is not represented in the sequences. + * + * The output of this function can be fed into ZSTD_compressSequences() with CCtx + * setting of ZSTD_c_blockDelimiters as ZSTD_sf_noBlockDelimiters + * @return : number of sequences left after merging + */ +ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize); + +/*! ZSTD_compressSequences() : + * Compress an array of ZSTD_Sequence, generated from the original source buffer, into dst. + * If a dictionary is included, then the cctx should reference the dict. (see: ZSTD_CCtx_refCDict(), ZSTD_CCtx_loadDictionary(), etc.) + * The entire source is compressed into a single frame. + * + * The compression behavior changes based on cctx params. In particular: + * If ZSTD_c_blockDelimiters == ZSTD_sf_noBlockDelimiters, the array of ZSTD_Sequence is expected to contain + * no block delimiters (defined in ZSTD_Sequence). Block boundaries are roughly determined based on + * the block size derived from the cctx, and sequences may be split. This is the default setting. + * + * If ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, the array of ZSTD_Sequence is expected to contain + * block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided. + * + * If ZSTD_c_validateSequences == 0, this function will blindly accept the sequences provided. Invalid sequences cause undefined + * behavior. If ZSTD_c_validateSequences == 1, then if sequence is invalid (see doc/zstd_compression_format.md for + * specifics regarding offset/matchlength requirements) then the function will bail out and return an error. + * + * In addition to the two adjustable experimental params, there are other important cctx params. + * - ZSTD_c_minMatch MUST be set as less than or equal to the smallest match generated by the match finder. It has a minimum value of ZSTD_MINMATCH_MIN. + * - ZSTD_c_compressionLevel accordingly adjusts the strength of the entropy coder, as it would in typical compression. + * - ZSTD_c_windowLog affects offset validation: this function will return an error at higher debug levels if a provided offset + * is larger than what the spec allows for a given window log and dictionary (if present). See: doc/zstd_compression_format.md + * + * Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused. + * Note 2: Once we integrate ability to ingest repcodes, the explicit block delims mode must respect those repcodes exactly, + * and cannot emit an RLE block that disagrees with the repcode history + * @return : final compressed size or a ZSTD error. + */ +ZSTDLIB_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize, + const ZSTD_Sequence* inSeqs, size_t inSeqsSize, + const void* src, size_t srcSize); + + +/*! ZSTD_writeSkippableFrame() : + * Generates a zstd skippable frame containing data given by src, and writes it to dst buffer. + * + * Skippable frames begin with a a 4-byte magic number. There are 16 possible choices of magic number, + * ranging from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15. + * As such, the parameter magicVariant controls the exact skippable frame magic number variant used, so + * the magic number used will be ZSTD_MAGIC_SKIPPABLE_START + magicVariant. + * + * Returns an error if destination buffer is not large enough, if the source size is not representable + * with a 4-byte unsigned int, or if the parameter magicVariant is greater than 15 (and therefore invalid). + * + * @return : number of bytes written or a ZSTD error. + */ +ZSTDLIB_API size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity, + const void* src, size_t srcSize, unsigned magicVariant); + + +/*************************************** +* Memory management +***************************************/ + +/*! ZSTD_estimate*() : + * These functions make it possible to estimate memory usage + * of a future {D,C}Ctx, before its creation. + * + * ZSTD_estimateCCtxSize() will provide a memory budget large enough + * for any compression level up to selected one. + * Note : Unlike ZSTD_estimateCStreamSize*(), this estimate + * does not include space for a window buffer. + * Therefore, the estimation is only guaranteed for single-shot compressions, not streaming. + * The estimate will assume the input may be arbitrarily large, + * which is the worst case. + * + * When srcSize can be bound by a known and rather "small" value, + * this fact can be used to provide a tighter estimation + * because the CCtx compression context will need less memory. + * This tighter estimation can be provided by more advanced functions + * ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(), + * and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter(). + * Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits. + * + * Note 2 : only single-threaded compression is supported. + * ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1. + */ +ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel); +ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); +ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params); +ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void); + +/*! ZSTD_estimateCStreamSize() : + * ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one. + * It will also consider src size to be arbitrarily "large", which is worst case. + * If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation. + * ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. + * ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1. + * Note : CStream size estimation is only correct for single-threaded compression. + * ZSTD_DStream memory budget depends on window Size. + * This information can be passed manually, using ZSTD_estimateDStreamSize, + * or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame(); + * Note : if streaming is init with function ZSTD_init?Stream_usingDict(), + * an internal ?Dict will be created, which additional size is not estimated here. + * In this case, get total size by adding ZSTD_estimate?DictSize */ +ZSTDLIB_API size_t ZSTD_estimateCStreamSize(int compressionLevel); +ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams); +ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params); +ZSTDLIB_API size_t ZSTD_estimateDStreamSize(size_t windowSize); +ZSTDLIB_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize); + +/*! ZSTD_estimate?DictSize() : + * ZSTD_estimateCDictSize() will bet that src size is relatively "small", and content is copied, like ZSTD_createCDict(). + * ZSTD_estimateCDictSize_advanced() makes it possible to control compression parameters precisely, like ZSTD_createCDict_advanced(). + * Note : dictionaries created by reference (`ZSTD_dlm_byRef`) are logically smaller. + */ +ZSTDLIB_API size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel); +ZSTDLIB_API size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, ZSTD_dictLoadMethod_e dictLoadMethod); +ZSTDLIB_API size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod); + +/*! ZSTD_initStatic*() : + * Initialize an object using a pre-allocated fixed-size buffer. + * workspace: The memory area to emplace the object into. + * Provided pointer *must be 8-bytes aligned*. + * Buffer must outlive object. + * workspaceSize: Use ZSTD_estimate*Size() to determine + * how large workspace must be to support target scenario. + * @return : pointer to object (same address as workspace, just different type), + * or NULL if error (size too small, incorrect alignment, etc.) + * Note : zstd will never resize nor malloc() when using a static buffer. + * If the object requires more memory than available, + * zstd will just error out (typically ZSTD_error_memory_allocation). + * Note 2 : there is no corresponding "free" function. + * Since workspace is allocated externally, it must be freed externally too. + * Note 3 : cParams : use ZSTD_getCParams() to convert a compression level + * into its associated cParams. + * Limitation 1 : currently not compatible with internal dictionary creation, triggered by + * ZSTD_CCtx_loadDictionary(), ZSTD_initCStream_usingDict() or ZSTD_initDStream_usingDict(). + * Limitation 2 : static cctx currently not compatible with multi-threading. + * Limitation 3 : static dctx is incompatible with legacy support. + */ +ZSTDLIB_API ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize); +ZSTDLIB_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticCCtx() */ + +ZSTDLIB_API ZSTD_DCtx* ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize); +ZSTDLIB_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticDCtx() */ + +ZSTDLIB_API const ZSTD_CDict* ZSTD_initStaticCDict( + void* workspace, size_t workspaceSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams); + +ZSTDLIB_API const ZSTD_DDict* ZSTD_initStaticDDict( + void* workspace, size_t workspaceSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType); + + +/*! Custom memory allocation : + * These prototypes make it possible to pass your own allocation/free functions. + * ZSTD_customMem is provided at creation time, using ZSTD_create*_advanced() variants listed below. + * All allocation/free operations will be completed using these custom variants instead of regular ones. + */ +typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size); +typedef void (*ZSTD_freeFunction) (void* opaque, void* address); +typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem; +static +#ifdef __GNUC__ +__attribute__((__unused__)) +#endif +ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; /**< this constant defers to stdlib's functions */ + +ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem); + +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams, + ZSTD_customMem customMem); + +/* ! Thread pool : + * These prototypes make it possible to share a thread pool among multiple compression contexts. + * This can limit resources for applications with multiple threads where each one uses + * a threaded compression mode (via ZSTD_c_nbWorkers parameter). + * ZSTD_createThreadPool creates a new thread pool with a given number of threads. + * Note that the lifetime of such pool must exist while being used. + * ZSTD_CCtx_refThreadPool assigns a thread pool to a context (use NULL argument value + * to use an internal thread pool). + * ZSTD_freeThreadPool frees a thread pool. + */ +typedef struct POOL_ctx_s ZSTD_threadPool; +ZSTDLIB_API ZSTD_threadPool* ZSTD_createThreadPool(size_t numThreads); +ZSTDLIB_API void ZSTD_freeThreadPool (ZSTD_threadPool* pool); +ZSTDLIB_API size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool); + + +/* + * This API is temporary and is expected to change or disappear in the future! + */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2( + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + const ZSTD_CCtx_params* cctxParams, + ZSTD_customMem customMem); + +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced( + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_customMem customMem); + + +/*************************************** +* Advanced compression functions +***************************************/ + +/*! ZSTD_createCDict_byReference() : + * Create a digested dictionary for compression + * Dictionary content is just referenced, not duplicated. + * As a consequence, `dictBuffer` **must** outlive CDict, + * and its content must remain unmodified throughout the lifetime of CDict. + * note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel); + +/*! ZSTD_getDictID_fromCDict() : + * Provides the dictID of the dictionary loaded into `cdict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict); + +/*! ZSTD_getCParams() : + * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize. + * `estimatedSrcSize` value is optional, select 0 if not known */ +ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); + +/*! ZSTD_getParams() : + * same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`. + * All fields of `ZSTD_frameParameters` are set to default : contentSize=1, checksum=0, noDictID=0 */ +ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); + +/*! ZSTD_checkCParams() : + * Ensure param values remain within authorized range. + * @return 0 on success, or an error code (can be checked with ZSTD_isError()) */ +ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params); + +/*! ZSTD_adjustCParams() : + * optimize params for a given `srcSize` and `dictSize`. + * `srcSize` can be unknown, in which case use ZSTD_CONTENTSIZE_UNKNOWN. + * `dictSize` must be `0` when there is no dictionary. + * cPar can be invalid : all parameters will be clamped within valid range in the @return struct. + * This function never fails (wide contract) */ +ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize); + +/*! ZSTD_compress_advanced() : + * Note : this function is now DEPRECATED. + * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters. + * This prototype will be marked as deprecated and generate compilation warning on reaching v1.5.x */ +ZSTDLIB_API size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params); + +/*! ZSTD_compress_usingCDict_advanced() : + * Note : this function is now REDUNDANT. + * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters. + * This prototype will be marked as deprecated and generate compilation warning in some future version */ +ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict, + ZSTD_frameParameters fParams); + + +/*! ZSTD_CCtx_loadDictionary_byReference() : + * Same as ZSTD_CCtx_loadDictionary(), but dictionary content is referenced, instead of being copied into CCtx. + * It saves some memory, but also requires that `dict` outlives its usage within `cctx` */ +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); + +/*! ZSTD_CCtx_loadDictionary_advanced() : + * Same as ZSTD_CCtx_loadDictionary(), but gives finer control over + * how to load the dictionary (by copy ? by reference ?) + * and how to interpret it (automatic ? force raw mode ? full mode only ?) */ +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); + +/*! ZSTD_CCtx_refPrefix_advanced() : + * Same as ZSTD_CCtx_refPrefix(), but gives finer control over + * how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */ +ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); + +/* === experimental parameters === */ +/* these parameters can be used with ZSTD_setParameter() + * they are not guaranteed to remain supported in the future */ + + /* Enables rsyncable mode, + * which makes compressed files more rsync friendly + * by adding periodic synchronization points to the compressed data. + * The target average block size is ZSTD_c_jobSize / 2. + * It's possible to modify the job size to increase or decrease + * the granularity of the synchronization point. + * Once the jobSize is smaller than the window size, + * it will result in compression ratio degradation. + * NOTE 1: rsyncable mode only works when multithreading is enabled. + * NOTE 2: rsyncable performs poorly in combination with long range mode, + * since it will decrease the effectiveness of synchronization points, + * though mileage may vary. + * NOTE 3: Rsyncable mode limits maximum compression speed to ~400 MB/s. + * If the selected compression level is already running significantly slower, + * the overall speed won't be significantly impacted. + */ + #define ZSTD_c_rsyncable ZSTD_c_experimentalParam1 + +/* Select a compression format. + * The value must be of type ZSTD_format_e. + * See ZSTD_format_e enum definition for details */ +#define ZSTD_c_format ZSTD_c_experimentalParam2 + +/* Force back-reference distances to remain < windowSize, + * even when referencing into Dictionary content (default:0) */ +#define ZSTD_c_forceMaxWindow ZSTD_c_experimentalParam3 + +/* Controls whether the contents of a CDict + * are used in place, or copied into the working context. + * Accepts values from the ZSTD_dictAttachPref_e enum. + * See the comments on that enum for an explanation of the feature. */ +#define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4 + +/* Controls how the literals are compressed (default is auto). + * The value must be of type ZSTD_literalCompressionMode_e. + * See ZSTD_literalCompressionMode_t enum definition for details. + */ +#define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5 + +/* Tries to fit compressed block size to be around targetCBlockSize. + * No target when targetCBlockSize == 0. + * There is no guarantee on compressed block size (default:0) */ +#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6 + +/* User's best guess of source size. + * Hint is not valid when srcSizeHint == 0. + * There is no guarantee that hint is close to actual source size, + * but compression ratio may regress significantly if guess considerably underestimates */ +#define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7 + +/* Controls whether the new and experimental "dedicated dictionary search + * structure" can be used. This feature is still rough around the edges, be + * prepared for surprising behavior! + * + * How to use it: + * + * When using a CDict, whether to use this feature or not is controlled at + * CDict creation, and it must be set in a CCtxParams set passed into that + * construction (via ZSTD_createCDict_advanced2()). A compression will then + * use the feature or not based on how the CDict was constructed; the value of + * this param, set in the CCtx, will have no effect. + * + * However, when a dictionary buffer is passed into a CCtx, such as via + * ZSTD_CCtx_loadDictionary(), this param can be set on the CCtx to control + * whether the CDict that is created internally can use the feature or not. + * + * What it does: + * + * Normally, the internal data structures of the CDict are analogous to what + * would be stored in a CCtx after compressing the contents of a dictionary. + * To an approximation, a compression using a dictionary can then use those + * data structures to simply continue what is effectively a streaming + * compression where the simulated compression of the dictionary left off. + * Which is to say, the search structures in the CDict are normally the same + * format as in the CCtx. + * + * It is possible to do better, since the CDict is not like a CCtx: the search + * structures are written once during CDict creation, and then are only read + * after that, while the search structures in the CCtx are both read and + * written as the compression goes along. This means we can choose a search + * structure for the dictionary that is read-optimized. + * + * This feature enables the use of that different structure. + * + * Note that some of the members of the ZSTD_compressionParameters struct have + * different semantics and constraints in the dedicated search structure. It is + * highly recommended that you simply set a compression level in the CCtxParams + * you pass into the CDict creation call, and avoid messing with the cParams + * directly. + * + * Effects: + * + * This will only have any effect when the selected ZSTD_strategy + * implementation supports this feature. Currently, that's limited to + * ZSTD_greedy, ZSTD_lazy, and ZSTD_lazy2. + * + * Note that this means that the CDict tables can no longer be copied into the + * CCtx, so the dict attachment mode ZSTD_dictForceCopy will no longer be + * useable. The dictionary can only be attached or reloaded. + * + * In general, you should expect compression to be faster--sometimes very much + * so--and CDict creation to be slightly slower. Eventually, we will probably + * make this mode the default. + */ +#define ZSTD_c_enableDedicatedDictSearch ZSTD_c_experimentalParam8 + +/* ZSTD_c_stableInBuffer + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable. + * + * Tells the compressor that the ZSTD_inBuffer will ALWAYS be the same + * between calls, except for the modifications that zstd makes to pos (the + * caller must not modify pos). This is checked by the compressor, and + * compression will fail if it ever changes. This means the only flush + * mode that makes sense is ZSTD_e_end, so zstd will error if ZSTD_e_end + * is not used. The data in the ZSTD_inBuffer in the range [src, src + pos) + * MUST not be modified during compression or you will get data corruption. + * + * When this flag is enabled zstd won't allocate an input window buffer, + * because the user guarantees it can reference the ZSTD_inBuffer until + * the frame is complete. But, it will still allocate an output buffer + * large enough to fit a block (see ZSTD_c_stableOutBuffer). This will also + * avoid the memcpy() from the input buffer to the input window buffer. + * + * NOTE: ZSTD_compressStream2() will error if ZSTD_e_end is not used. + * That means this flag cannot be used with ZSTD_compressStream(). + * + * NOTE: So long as the ZSTD_inBuffer always points to valid memory, using + * this flag is ALWAYS memory safe, and will never access out-of-bounds + * memory. However, compression WILL fail if you violate the preconditions. + * + * WARNING: The data in the ZSTD_inBuffer in the range [dst, dst + pos) MUST + * not be modified during compression or you will get data corruption. This + * is because zstd needs to reference data in the ZSTD_inBuffer to find + * matches. Normally zstd maintains its own window buffer for this purpose, + * but passing this flag tells zstd to use the user provided buffer. + */ +#define ZSTD_c_stableInBuffer ZSTD_c_experimentalParam9 + +/* ZSTD_c_stableOutBuffer + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable. + * + * Tells he compressor that the ZSTD_outBuffer will not be resized between + * calls. Specifically: (out.size - out.pos) will never grow. This gives the + * compressor the freedom to say: If the compressed data doesn't fit in the + * output buffer then return ZSTD_error_dstSizeTooSmall. This allows us to + * always decompress directly into the output buffer, instead of decompressing + * into an internal buffer and copying to the output buffer. + * + * When this flag is enabled zstd won't allocate an output buffer, because + * it can write directly to the ZSTD_outBuffer. It will still allocate the + * input window buffer (see ZSTD_c_stableInBuffer). + * + * Zstd will check that (out.size - out.pos) never grows and return an error + * if it does. While not strictly necessary, this should prevent surprises. + */ +#define ZSTD_c_stableOutBuffer ZSTD_c_experimentalParam10 + +/* ZSTD_c_blockDelimiters + * Default is 0 == ZSTD_sf_noBlockDelimiters. + * + * For use with sequence compression API: ZSTD_compressSequences(). + * + * Designates whether or not the given array of ZSTD_Sequence contains block delimiters + * and last literals, which are defined as sequences with offset == 0 and matchLength == 0. + * See the definition of ZSTD_Sequence for more specifics. + */ +#define ZSTD_c_blockDelimiters ZSTD_c_experimentalParam11 + +/* ZSTD_c_validateSequences + * Default is 0 == disabled. Set to 1 to enable sequence validation. + * + * For use with sequence compression API: ZSTD_compressSequences(). + * Designates whether or not we validate sequences provided to ZSTD_compressSequences() + * during function execution. + * + * Without validation, providing a sequence that does not conform to the zstd spec will cause + * undefined behavior, and may produce a corrupted block. + * + * With validation enabled, a if sequence is invalid (see doc/zstd_compression_format.md for + * specifics regarding offset/matchlength requirements) then the function will bail out and + * return an error. + * + */ +#define ZSTD_c_validateSequences ZSTD_c_experimentalParam12 + +/*! ZSTD_CCtx_getParameter() : + * Get the requested compression parameter value, selected by enum ZSTD_cParameter, + * and store it into int* value. + * @return : 0, or an error code (which can be tested with ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtx_getParameter(const ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value); + + +/*! ZSTD_CCtx_params : + * Quick howto : + * - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure + * - ZSTD_CCtxParams_setParameter() : Push parameters one by one into + * an existing ZSTD_CCtx_params structure. + * This is similar to + * ZSTD_CCtx_setParameter(). + * - ZSTD_CCtx_setParametersUsingCCtxParams() : Apply parameters to + * an existing CCtx. + * These parameters will be applied to + * all subsequent frames. + * - ZSTD_compressStream2() : Do compression using the CCtx. + * - ZSTD_freeCCtxParams() : Free the memory. + * + * This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams() + * for static allocation of CCtx for single-threaded compression. + */ +ZSTDLIB_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void); +ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params); + +/*! ZSTD_CCtxParams_reset() : + * Reset params to default values. + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params); + +/*! ZSTD_CCtxParams_init() : + * Initializes the compression parameters of cctxParams according to + * compression level. All other parameters are reset to their default values. + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel); + +/*! ZSTD_CCtxParams_init_advanced() : + * Initializes the compression and frame parameters of cctxParams according to + * params. All other parameters are reset to their default values. + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params); + +/*! ZSTD_CCtxParams_setParameter() : + * Similar to ZSTD_CCtx_setParameter. + * Set one compression parameter, selected by enum ZSTD_cParameter. + * Parameters must be applied to a ZSTD_CCtx using + * ZSTD_CCtx_setParametersUsingCCtxParams(). + * @result : a code representing success or failure (which can be tested with + * ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value); + +/*! ZSTD_CCtxParams_getParameter() : + * Similar to ZSTD_CCtx_getParameter. + * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(const ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value); + +/*! ZSTD_CCtx_setParametersUsingCCtxParams() : + * Apply a set of ZSTD_CCtx_params to the compression context. + * This can be done even after compression is started, + * if nbWorkers==0, this will have no impact until a new compression is started. + * if nbWorkers>=1, new parameters will be picked up at next job, + * with a few restrictions (windowLog, pledgedSrcSize, nbWorkers, jobSize, and overlapLog are not updated). + */ +ZSTDLIB_API size_t ZSTD_CCtx_setParametersUsingCCtxParams( + ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params); + +/*! ZSTD_compressStream2_simpleArgs() : + * Same as ZSTD_compressStream2(), + * but using only integral types as arguments. + * This variant might be helpful for binders from dynamic languages + * which have troubles handling structures containing memory pointers. + */ +ZSTDLIB_API size_t ZSTD_compressStream2_simpleArgs ( + ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos, + ZSTD_EndDirective endOp); + + +/*************************************** +* Advanced decompression functions +***************************************/ + +/*! ZSTD_isFrame() : + * Tells if the content of `buffer` starts with a valid Frame Identifier. + * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. + * Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. + * Note 3 : Skippable Frame Identifiers are considered valid. */ +ZSTDLIB_API unsigned ZSTD_isFrame(const void* buffer, size_t size); + +/*! ZSTD_createDDict_byReference() : + * Create a digested dictionary, ready to start decompression operation without startup delay. + * Dictionary content is referenced, and therefore stays in dictBuffer. + * It is important that dictBuffer outlives DDict, + * it must remain read accessible throughout the lifetime of DDict */ +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize); + +/*! ZSTD_DCtx_loadDictionary_byReference() : + * Same as ZSTD_DCtx_loadDictionary(), + * but references `dict` content instead of copying it into `dctx`. + * This saves memory if `dict` remains around., + * However, it's imperative that `dict` remains accessible (and unmodified) while being used, so it must outlive decompression. */ +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); + +/*! ZSTD_DCtx_loadDictionary_advanced() : + * Same as ZSTD_DCtx_loadDictionary(), + * but gives direct control over + * how to load the dictionary (by copy ? by reference ?) + * and how to interpret it (automatic ? force raw mode ? full mode only ?). */ +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); + +/*! ZSTD_DCtx_refPrefix_advanced() : + * Same as ZSTD_DCtx_refPrefix(), but gives finer control over + * how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */ +ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); + +/*! ZSTD_DCtx_setMaxWindowSize() : + * Refuses allocating internal buffers for frames requiring a window size larger than provided limit. + * This protects a decoder context from reserving too much memory for itself (potential attack scenario). + * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. + * By default, a decompression context accepts all window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) + * @return : 0, or an error code (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize); + +/*! ZSTD_DCtx_getParameter() : + * Get the requested decompression parameter value, selected by enum ZSTD_dParameter, + * and store it into int* value. + * @return : 0, or an error code (which can be tested with ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value); + +/* ZSTD_d_format + * experimental parameter, + * allowing selection between ZSTD_format_e input compression formats + */ +#define ZSTD_d_format ZSTD_d_experimentalParam1 +/* ZSTD_d_stableOutBuffer + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable. + * + * Tells the decompressor that the ZSTD_outBuffer will ALWAYS be the same + * between calls, except for the modifications that zstd makes to pos (the + * caller must not modify pos). This is checked by the decompressor, and + * decompression will fail if it ever changes. Therefore the ZSTD_outBuffer + * MUST be large enough to fit the entire decompressed frame. This will be + * checked when the frame content size is known. The data in the ZSTD_outBuffer + * in the range [dst, dst + pos) MUST not be modified during decompression + * or you will get data corruption. + * + * When this flags is enabled zstd won't allocate an output buffer, because + * it can write directly to the ZSTD_outBuffer, but it will still allocate + * an input buffer large enough to fit any compressed block. This will also + * avoid the memcpy() from the internal output buffer to the ZSTD_outBuffer. + * If you need to avoid the input buffer allocation use the buffer-less + * streaming API. + * + * NOTE: So long as the ZSTD_outBuffer always points to valid memory, using + * this flag is ALWAYS memory safe, and will never access out-of-bounds + * memory. However, decompression WILL fail if you violate the preconditions. + * + * WARNING: The data in the ZSTD_outBuffer in the range [dst, dst + pos) MUST + * not be modified during decompression or you will get data corruption. This + * is because zstd needs to reference data in the ZSTD_outBuffer to regenerate + * matches. Normally zstd maintains its own buffer for this purpose, but passing + * this flag tells zstd to use the user provided buffer. + */ +#define ZSTD_d_stableOutBuffer ZSTD_d_experimentalParam2 + +/* ZSTD_d_forceIgnoreChecksum + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable + * + * Tells the decompressor to skip checksum validation during decompression, regardless + * of whether checksumming was specified during compression. This offers some + * slight performance benefits, and may be useful for debugging. + * Param has values of type ZSTD_forceIgnoreChecksum_e + */ +#define ZSTD_d_forceIgnoreChecksum ZSTD_d_experimentalParam3 + +/* ZSTD_d_refMultipleDDicts + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable + * + * If enabled and dctx is allocated on the heap, then additional memory will be allocated + * to store references to multiple ZSTD_DDict. That is, multiple calls of ZSTD_refDDict() + * using a given ZSTD_DCtx, rather than overwriting the previous DDict reference, will instead + * store all references. At decompression time, the appropriate dictID is selected + * from the set of DDicts based on the dictID in the frame. + * + * Usage is simply calling ZSTD_refDDict() on multiple dict buffers. + * + * Param has values of byte ZSTD_refMultipleDDicts_e + * + * WARNING: Enabling this parameter and calling ZSTD_DCtx_refDDict(), will trigger memory + * allocation for the hash table. ZSTD_freeDCtx() also frees this memory. + * Memory is allocated as per ZSTD_DCtx::customMem. + * + * Although this function allocates memory for the table, the user is still responsible for + * memory management of the underlying ZSTD_DDict* themselves. + */ +#define ZSTD_d_refMultipleDDicts ZSTD_d_experimentalParam4 + + +/*! ZSTD_DCtx_setFormat() : + * Instruct the decoder context about what kind of data to decode next. + * This instruction is mandatory to decode data without a fully-formed header, + * such ZSTD_f_zstd1_magicless for example. + * @return : 0, or an error code (which can be tested using ZSTD_isError()). */ +ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format); + +/*! ZSTD_decompressStream_simpleArgs() : + * Same as ZSTD_decompressStream(), + * but using only integral types as arguments. + * This can be helpful for binders from dynamic languages + * which have troubles handling structures containing memory pointers. + */ +ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs ( + ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos); + + +/******************************************************************** +* Advanced streaming functions +* Warning : most of these functions are now redundant with the Advanced API. +* Once Advanced API reaches "stable" status, +* redundant functions will be deprecated, and then at some point removed. +********************************************************************/ + +/*===== Advanced Streaming compression functions =====*/ + +/*! ZSTD_initCStream_srcSize() : + * This function is deprecated, and equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) + * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * + * pledgedSrcSize must be correct. If it is not known at init time, use + * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs, + * "0" also disables frame content size field. It may be enabled in the future. + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t +ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, + int compressionLevel, + unsigned long long pledgedSrcSize); + +/*! ZSTD_initCStream_usingDict() : + * This function is deprecated, and is equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); + * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); + * + * Creates of an internal CDict (incompatible with static CCtx), except if + * dict == NULL or dictSize < 8, in which case no dict is used. + * Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if + * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy. + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t +ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + int compressionLevel); + +/*! ZSTD_initCStream_advanced() : + * This function is deprecated, and is approximately equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * // Pseudocode: Set each zstd parameter and leave the rest as-is. + * for ((param, value) : params) { + * ZSTD_CCtx_setParameter(zcs, param, value); + * } + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); + * + * dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy. + * pledgedSrcSize must be correct. + * If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t +ZSTD_initCStream_advanced(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + ZSTD_parameters params, + unsigned long long pledgedSrcSize); + +/*! ZSTD_initCStream_usingCDict() : + * This function is deprecated, and equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_refCDict(zcs, cdict); + * + * note : cdict will just be referenced, and must outlive compression session + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); + +/*! ZSTD_initCStream_usingCDict_advanced() : + * This function is DEPRECATED, and is approximately equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * // Pseudocode: Set each zstd frame parameter and leave the rest as-is. + * for ((fParam, value) : fParams) { + * ZSTD_CCtx_setParameter(zcs, fParam, value); + * } + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * ZSTD_CCtx_refCDict(zcs, cdict); + * + * same as ZSTD_initCStream_usingCDict(), with control over frame parameters. + * pledgedSrcSize must be correct. If srcSize is not known at init time, use + * value ZSTD_CONTENTSIZE_UNKNOWN. + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t +ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, + const ZSTD_CDict* cdict, + ZSTD_frameParameters fParams, + unsigned long long pledgedSrcSize); + +/*! ZSTD_resetCStream() : + * This function is deprecated, and is equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * + * start a new frame, using same parameters from previous frame. + * This is typically useful to skip dictionary loading stage, since it will re-use it in-place. + * Note that zcs must be init at least once before using ZSTD_resetCStream(). + * If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN. + * If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end. + * For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs, + * but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead. + * @return : 0, or an error code (which can be tested using ZSTD_isError()) + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); + + +typedef struct { + unsigned long long ingested; /* nb input bytes read and buffered */ + unsigned long long consumed; /* nb input bytes actually compressed */ + unsigned long long produced; /* nb of compressed bytes generated and buffered */ + unsigned long long flushed; /* nb of compressed bytes flushed : not provided; can be tracked from caller side */ + unsigned currentJobID; /* MT only : latest started job nb */ + unsigned nbActiveWorkers; /* MT only : nb of workers actively compressing at probe time */ +} ZSTD_frameProgression; + +/* ZSTD_getFrameProgression() : + * tells how much data has been ingested (read from input) + * consumed (input actually compressed) and produced (output) for current frame. + * Note : (ingested - consumed) is amount of input data buffered internally, not yet compressed. + * Aggregates progression inside active worker threads. + */ +ZSTDLIB_API ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx); + +/*! ZSTD_toFlushNow() : + * Tell how many bytes are ready to be flushed immediately. + * Useful for multithreading scenarios (nbWorkers >= 1). + * Probe the oldest active job, defined as oldest job not yet entirely flushed, + * and check its output buffer. + * @return : amount of data stored in oldest job and ready to be flushed immediately. + * if @return == 0, it means either : + * + there is no active job (could be checked with ZSTD_frameProgression()), or + * + oldest job is still actively compressing data, + * but everything it has produced has also been flushed so far, + * therefore flush speed is limited by production speed of oldest job + * irrespective of the speed of concurrent (and newer) jobs. + */ +ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx); + + +/*===== Advanced Streaming decompression functions =====*/ + +/*! + * This function is deprecated, and is equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * ZSTD_DCtx_loadDictionary(zds, dict, dictSize); + * + * note: no dictionary will be used if dict == NULL or dictSize < 8 + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); + +/*! + * This function is deprecated, and is equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * ZSTD_DCtx_refDDict(zds, ddict); + * + * note : ddict is referenced, it must outlive decompression session + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); + +/*! + * This function is deprecated, and is equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * + * re-use decompression parameters from previous init; saves dictionary loading + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); + + +/********************************************************************* +* Buffer-less and synchronous inner streaming functions +* +* This is an advanced API, giving full control over buffer management, for users which need direct control over memory. +* But it's also a complex one, with several restrictions, documented below. +* Prefer normal streaming API for an easier experience. +********************************************************************* */ + +/** + Buffer-less streaming compression (synchronous mode) + + A ZSTD_CCtx object is required to track streaming operations. + Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource. + ZSTD_CCtx object can be re-used multiple times within successive compression operations. + + Start by initializing a context. + Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression, + or ZSTD_compressBegin_advanced(), for finer parameter control. + It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx() + + Then, consume your input using ZSTD_compressContinue(). + There are some important considerations to keep in mind when using this advanced function : + - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffers only. + - Interface is synchronous : input is consumed entirely and produces 1+ compressed blocks. + - Caller must ensure there is enough space in `dst` to store compressed data under worst case scenario. + Worst case evaluation is provided by ZSTD_compressBound(). + ZSTD_compressContinue() doesn't guarantee recover after a failed compression. + - ZSTD_compressContinue() presumes prior input ***is still accessible and unmodified*** (up to maximum distance size, see WindowLog). + It remembers all previous contiguous blocks, plus one separated memory segment (which can itself consists of multiple contiguous blocks) + - ZSTD_compressContinue() detects that prior input has been overwritten when `src` buffer overlaps. + In which case, it will "discard" the relevant memory section from its history. + + Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum. + It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame. + Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders. + + `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again. +*/ + +/*===== Buffer-less streaming compression functions =====*/ +ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); +ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); +ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */ +ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /**< note: fails if cdict==NULL */ +ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize); /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */ +ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**< note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */ + +ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + + +/** + Buffer-less streaming decompression (synchronous mode) + + A ZSTD_DCtx object is required to track streaming operations. + Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it. + A ZSTD_DCtx object can be re-used multiple times. + + First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader(). + Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough. + Data fragment must be large enough to ensure successful decoding. + `ZSTD_frameHeaderSize_max` bytes is guaranteed to always be large enough. + @result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled. + >0 : `srcSize` is too small, please provide at least @result bytes on next attempt. + errorCode, which can be tested using ZSTD_isError(). + + It fills a ZSTD_frameHeader structure with important information to correctly decode the frame, + such as the dictionary ID, content size, or maximum back-reference distance (`windowSize`). + Note that these values could be wrong, either because of data corruption, or because a 3rd party deliberately spoofs false information. + As a consequence, check that values remain within valid application range. + For example, do not allocate memory blindly, check that `windowSize` is within expectation. + Each application can set its own limits, depending on local restrictions. + For extended interoperability, it is recommended to support `windowSize` of at least 8 MB. + + ZSTD_decompressContinue() needs previous data blocks during decompression, up to `windowSize` bytes. + ZSTD_decompressContinue() is very sensitive to contiguity, + if 2 blocks don't follow each other, make sure that either the compressor breaks contiguity at the same place, + or that previous contiguous segment is large enough to properly handle maximum back-reference distance. + There are multiple ways to guarantee this condition. + + The most memory efficient way is to use a round buffer of sufficient size. + Sufficient size is determined by invoking ZSTD_decodingBufferSize_min(), + which can @return an error code if required value is too large for current system (in 32-bits mode). + In a round buffer methodology, ZSTD_decompressContinue() decompresses each block next to previous one, + up to the moment there is not enough room left in the buffer to guarantee decoding another full block, + which maximum size is provided in `ZSTD_frameHeader` structure, field `blockSizeMax`. + At which point, decoding can resume from the beginning of the buffer. + Note that already decoded data stored in the buffer should be flushed before being overwritten. + + There are alternatives possible, for example using two or more buffers of size `windowSize` each, though they consume more memory. + + Finally, if you control the compression process, you can also ignore all buffer size rules, + as long as the encoder and decoder progress in "lock-step", + aka use exactly the same buffer sizes, break contiguity at the same place, etc. + + Once buffers are setup, start decompression, with ZSTD_decompressBegin(). + If decompression requires a dictionary, use ZSTD_decompressBegin_usingDict() or ZSTD_decompressBegin_usingDDict(). + + Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively. + ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail. + + @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity). + It can be zero : it just means ZSTD_decompressContinue() has decoded some metadata item. + It can also be an error code, which can be tested with ZSTD_isError(). + + A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero. + Context can then be reset to start a new decompression. + + Note : it's possible to know if next input to present is a header or a block, using ZSTD_nextInputType(). + This information is not required to properly decode a frame. + + == Special case : skippable frames == + + Skippable frames allow integration of user-defined data into a flow of concatenated frames. + Skippable frames will be ignored (skipped) by decompressor. + The format of skippable frames is as follows : + a) Skippable frame ID - 4 Bytes, Little endian format, any value from 0x184D2A50 to 0x184D2A5F + b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits + c) Frame Content - any content (User Data) of length equal to Frame Size + For skippable frames ZSTD_getFrameHeader() returns zfhPtr->frameType==ZSTD_skippableFrame. + For skippable frames ZSTD_decompressContinue() always returns 0 : it only skips the content. +*/ + +/*===== Buffer-less streaming decompression functions =====*/ +typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e; +typedef struct { + unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */ + unsigned long long windowSize; /* can be very large, up to <= frameContentSize */ + unsigned blockSizeMax; + ZSTD_frameType_e frameType; /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */ + unsigned headerSize; + unsigned dictID; + unsigned checksumFlag; +} ZSTD_frameHeader; + +/*! ZSTD_getFrameHeader() : + * decode Frame Header, or requires larger `srcSize`. + * @return : 0, `zfhPtr` is correctly filled, + * >0, `srcSize` is too small, value is wanted `srcSize` amount, + * or an error code, which can be tested using ZSTD_isError() */ +ZSTDLIB_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize); /**< doesn't consume input */ +/*! ZSTD_getFrameHeader_advanced() : + * same as ZSTD_getFrameHeader(), + * with added capability to select a format (like ZSTD_f_zstd1_magicless) */ +ZSTDLIB_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format); +ZSTDLIB_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize); /**< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */ + +ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); +ZSTDLIB_API size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); + +ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + +/* misc */ +ZSTDLIB_API void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx); +typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e; +ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); + + + + +/* ============================ */ +/** Block level API */ +/* ============================ */ + +/*! + Block functions produce and decode raw zstd blocks, without frame metadata. + Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes). + But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes. + + A few rules to respect : + - Compressing and decompressing require a context structure + + Use ZSTD_createCCtx() and ZSTD_createDCtx() + - It is necessary to init context before starting + + compression : any ZSTD_compressBegin*() variant, including with dictionary + + decompression : any ZSTD_decompressBegin*() variant, including with dictionary + + copyCCtx() and copyDCtx() can be used too + - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB + + If input is larger than a block size, it's necessary to split input data into multiple blocks + + For inputs larger than a single block, consider using regular ZSTD_compress() instead. + Frame metadata is not that costly, and quickly becomes negligible as source size grows larger than a block. + - When a block is considered not compressible enough, ZSTD_compressBlock() result will be 0 (zero) ! + ===> In which case, nothing is produced into `dst` ! + + User __must__ test for such outcome and deal directly with uncompressed data + + A block cannot be declared incompressible if ZSTD_compressBlock() return value was != 0. + Doing so would mess up with statistics history, leading to potential data corruption. + + ZSTD_decompressBlock() _doesn't accept uncompressed data as input_ !! + + In case of multiple successive blocks, should some of them be uncompressed, + decoder must be informed of their existence in order to follow proper history. + Use ZSTD_insertBlock() for such a case. +*/ + +/*===== Raw zstd block functions =====*/ +ZSTDLIB_API size_t ZSTD_getBlockSize (const ZSTD_CCtx* cctx); +ZSTDLIB_API size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */ + + +#endif /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */ + +#if defined (__cplusplus) +} +#endif +/**** ended inlining ../zstd.h ****/ +#define FSE_STATIC_LINKING_ONLY +/**** skipping file: fse.h ****/ +#define HUF_STATIC_LINKING_ONLY +/**** skipping file: huf.h ****/ +#ifndef XXH_STATIC_LINKING_ONLY +# define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */ +#endif +/**** start inlining xxhash.h ****/ +/* + * xxHash - Extremely Fast Hash algorithm + * Header File + * Copyright (c) 2012-2021, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - xxHash source repository : https://github.com/Cyan4973/xxHash + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +*/ + +/* Notice extracted from xxHash homepage : + +xxHash is an extremely fast Hash algorithm, running at RAM speed limits. +It also successfully passes all tests from the SMHasher suite. + +Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) + +Name Speed Q.Score Author +xxHash 5.4 GB/s 10 +CrapWow 3.2 GB/s 2 Andrew +MumurHash 3a 2.7 GB/s 10 Austin Appleby +SpookyHash 2.0 GB/s 10 Bob Jenkins +SBox 1.4 GB/s 9 Bret Mulvey +Lookup3 1.2 GB/s 9 Bob Jenkins +SuperFastHash 1.2 GB/s 1 Paul Hsieh +CityHash64 1.05 GB/s 10 Pike & Alakuijala +FNV 0.55 GB/s 5 Fowler, Noll, Vo +CRC32 0.43 GB/s 9 +MD5-32 0.33 GB/s 10 Ronald L. Rivest +SHA1-32 0.28 GB/s 10 + +Q.Score is a measure of quality of the hash function. +It depends on successfully passing SMHasher test set. +10 is a perfect score. + +A 64-bits version, named XXH64, is available since r35. +It offers much better speed, but for 64-bits applications only. +Name Speed on 64 bits Speed on 32 bits +XXH64 13.8 GB/s 1.9 GB/s +XXH32 6.8 GB/s 6.0 GB/s +*/ + +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef XXHASH_H_5627135585666179 +#define XXHASH_H_5627135585666179 1 + + +/* **************************** +* Definitions +******************************/ +/**** skipping file: zstd_deps.h ****/ +typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; + + +/* **************************** +* API modifier +******************************/ +/** XXH_PRIVATE_API +* This is useful if you want to include xxhash functions in `static` mode +* in order to inline them, and remove their symbol from the public list. +* Methodology : +* #define XXH_PRIVATE_API +* #include "xxhash.h" +* `xxhash.c` is automatically included. +* It's not useful to compile and link it as a separate module anymore. +*/ +#ifdef XXH_PRIVATE_API +# ifndef XXH_STATIC_LINKING_ONLY +# define XXH_STATIC_LINKING_ONLY +# endif +# if defined(__GNUC__) +# define XXH_PUBLIC_API static __inline __attribute__((unused)) +# elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define XXH_PUBLIC_API static inline +# elif defined(_MSC_VER) +# define XXH_PUBLIC_API static __inline +# else +# define XXH_PUBLIC_API static /* this version may generate warnings for unused static functions; disable the relevant warning */ +# endif +#else +# define XXH_PUBLIC_API /* do nothing */ +#endif /* XXH_PRIVATE_API */ + +/*!XXH_NAMESPACE, aka Namespace Emulation : + +If you want to include _and expose_ xxHash functions from within your own library, +but also want to avoid symbol collisions with another library which also includes xxHash, + +you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library +with the value of XXH_NAMESPACE (so avoid to keep it NULL and avoid numeric values). + +Note that no change is required within the calling program as long as it includes `xxhash.h` : +regular symbol name will be automatically translated by this header. +*/ +#ifdef XXH_NAMESPACE +# define XXH_CAT(A,B) A##B +# define XXH_NAME2(A,B) XXH_CAT(A,B) +# define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32) +# define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) +# define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber) +# define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState) +# define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) +# define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState) +# define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) +# define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset) +# define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset) +# define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update) +# define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update) +# define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest) +# define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest) +# define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState) +# define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState) +# define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash) +# define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash) +# define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical) +# define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical) +#endif + + +/* ************************************* +* Version +***************************************/ +#define XXH_VERSION_MAJOR 0 +#define XXH_VERSION_MINOR 6 +#define XXH_VERSION_RELEASE 2 +#define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE) +XXH_PUBLIC_API unsigned XXH_versionNumber (void); + + +/* **************************** +* Simple Hash Functions +******************************/ +typedef unsigned int XXH32_hash_t; +typedef unsigned long long XXH64_hash_t; + +XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed); +XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed); + +/*! +XXH32() : + Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input". + The memory between input & input+length must be valid (allocated and read-accessible). + "seed" can be used to alter the result predictably. + Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s +XXH64() : + Calculate the 64-bits hash of sequence of length "len" stored at memory address "input". + "seed" can be used to alter the result predictably. + This function runs 2x faster on 64-bits systems, but slower on 32-bits systems (see benchmark). +*/ + + +/* **************************** +* Streaming Hash Functions +******************************/ +typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */ +typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ + +/*! State allocation, compatible with dynamic libraries */ + +XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void); +XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); + +XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void); +XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); + + +/* hash streaming */ + +XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed); +XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); +XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr); + +XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed); +XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); +XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr); + +/* +These functions generate the xxHash of an input provided in multiple segments. +Note that, for small input, they are slower than single-call functions, due to state management. +For small input, prefer `XXH32()` and `XXH64()` . + +XXH state must first be allocated, using XXH*_createState() . + +Start a new hash by initializing state with a seed, using XXH*_reset(). + +Then, feed the hash state by calling XXH*_update() as many times as necessary. +Obviously, input must be allocated and read accessible. +The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. + +Finally, a hash value can be produced anytime, by using XXH*_digest(). +This function returns the nn-bits hash as an int or long long. + +It's still possible to continue inserting input into the hash state after a digest, +and generate some new hashes later on, by calling again XXH*_digest(). + +When done, free XXH state space if it was allocated dynamically. +*/ + + +/* ************************** +* Utils +****************************/ +#if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) /* ! C99 */ +# define restrict /* disable restrict */ +#endif + +XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dst_state, const XXH32_state_t* restrict src_state); +XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dst_state, const XXH64_state_t* restrict src_state); + + +/* ************************** +* Canonical representation +****************************/ +/* Default result type for XXH functions are primitive unsigned 32 and 64 bits. +* The canonical representation uses human-readable write convention, aka big-endian (large digits first). +* These functions allow transformation of hash result into and from its canonical format. +* This way, hash values can be written into a file / memory, and remain comparable on different systems and programs. +*/ +typedef struct { unsigned char digest[4]; } XXH32_canonical_t; +typedef struct { unsigned char digest[8]; } XXH64_canonical_t; + +XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash); +XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash); + +XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src); +XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src); + +#endif /* XXHASH_H_5627135585666179 */ + + + +/* ================================================================================================ + This section contains definitions which are not guaranteed to remain stable. + They may change in future versions, becoming incompatible with a different version of the library. + They shall only be used with static linking. + Never use these definitions in association with dynamic linking ! +=================================================================================================== */ +#if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXH_STATIC_H_3543687687345) +#define XXH_STATIC_H_3543687687345 + +/* These definitions are only meant to allow allocation of XXH state + statically, on stack, or in a struct for example. + Do not use members directly. */ + + struct XXH32_state_s { + unsigned total_len_32; + unsigned large_len; + unsigned v1; + unsigned v2; + unsigned v3; + unsigned v4; + unsigned mem32[4]; /* buffer defined as U32 for alignment */ + unsigned memsize; + unsigned reserved; /* never read nor write, will be removed in a future version */ + }; /* typedef'd to XXH32_state_t */ + + struct XXH64_state_s { + unsigned long long total_len; + unsigned long long v1; + unsigned long long v2; + unsigned long long v3; + unsigned long long v4; + unsigned long long mem64[4]; /* buffer defined as U64 for alignment */ + unsigned memsize; + unsigned reserved[2]; /* never read nor write, will be removed in a future version */ + }; /* typedef'd to XXH64_state_t */ + + +# ifdef XXH_PRIVATE_API +/**** start inlining xxhash.c ****/ +/* + * xxHash - Fast Hash algorithm + * Copyright (c) 2012-2021, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - xxHash homepage: http://www.xxhash.com + * - xxHash source repository : https://github.com/Cyan4973/xxHash + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +*/ + + +/* ************************************* +* Tuning parameters +***************************************/ +/*!XXH_FORCE_MEMORY_ACCESS : + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method doesn't depend on compiler but violate C standard. + * It can generate buggy code on targets which do not support unaligned memory accesses. + * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) + * See http://stackoverflow.com/a/32095106/646947 for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ +# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) +# define XXH_FORCE_MEMORY_ACCESS 2 +# elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \ + (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) || \ + defined(__ICCARM__) +# define XXH_FORCE_MEMORY_ACCESS 1 +# endif +#endif + +/*!XXH_ACCEPT_NULL_INPUT_POINTER : + * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer. + * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input. + * By default, this option is disabled. To enable it, uncomment below define : + */ +/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */ + +/*!XXH_FORCE_NATIVE_FORMAT : + * By default, xxHash library provides endian-independent Hash values, based on little-endian convention. + * Results are therefore identical for little-endian and big-endian CPU. + * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. + * Should endian-independence be of no importance for your application, you may set the #define below to 1, + * to improve speed for Big-endian CPU. + * This option has no impact on Little_Endian CPU. + */ +#ifndef XXH_FORCE_NATIVE_FORMAT /* can be defined externally */ +# define XXH_FORCE_NATIVE_FORMAT 0 +#endif + +/*!XXH_FORCE_ALIGN_CHECK : + * This is a minor performance trick, only useful with lots of very small keys. + * It means : check for aligned/unaligned input. + * The check costs one initial branch per hash; set to 0 when the input data + * is guaranteed to be aligned. + */ +#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */ +# if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) +# define XXH_FORCE_ALIGN_CHECK 0 +# else +# define XXH_FORCE_ALIGN_CHECK 1 +# endif +#endif + + +/* ************************************* +* Includes & Memory related functions +***************************************/ +/* Modify the local functions below should you wish to use some other memory routines */ +/* for ZSTD_malloc(), ZSTD_free() */ +#define ZSTD_DEPS_NEED_MALLOC +/**** skipping file: zstd_deps.h ****/ +static void* XXH_malloc(size_t s) { return ZSTD_malloc(s); } +static void XXH_free (void* p) { ZSTD_free(p); } +static void* XXH_memcpy(void* dest, const void* src, size_t size) { return ZSTD_memcpy(dest,src,size); } + +#ifndef XXH_STATIC_LINKING_ONLY +# define XXH_STATIC_LINKING_ONLY +#endif +/**** skipping file: xxhash.h ****/ + + +/* ************************************* +* Compiler Specific Options +***************************************/ +/**** skipping file: compiler.h ****/ + + +/* ************************************* +* Basic Types +***************************************/ +/**** skipping file: mem.h ****/ + +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) + +/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ +static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; } +static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; } + +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign; + +static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } +static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } + +#else + +/* portable and safe solution. Generally efficient. + * see : http://stackoverflow.com/a/32095106/646947 + */ + +static U32 XXH_read32(const void* memPtr) +{ + U32 val; + ZSTD_memcpy(&val, memPtr, sizeof(val)); + return val; +} + +static U64 XXH_read64(const void* memPtr) +{ + U64 val; + ZSTD_memcpy(&val, memPtr, sizeof(val)); + return val; +} + +#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ + + +/* **************************************** +* Compiler-specific Functions and Macros +******************************************/ +#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) + +/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */ +#if defined(_MSC_VER) +# define XXH_rotl32(x,r) _rotl(x,r) +# define XXH_rotl64(x,r) _rotl64(x,r) +#else +#if defined(__ICCARM__) +# include +# define XXH_rotl32(x,r) __ROR(x,(32 - r)) +#else +# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r))) +#endif +# define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r))) +#endif + +#if defined(_MSC_VER) /* Visual Studio */ +# define XXH_swap32 _byteswap_ulong +# define XXH_swap64 _byteswap_uint64 +#elif GCC_VERSION >= 403 +# define XXH_swap32 __builtin_bswap32 +# define XXH_swap64 __builtin_bswap64 +#else +static U32 XXH_swap32 (U32 x) +{ + return ((x << 24) & 0xff000000 ) | + ((x << 8) & 0x00ff0000 ) | + ((x >> 8) & 0x0000ff00 ) | + ((x >> 24) & 0x000000ff ); +} +static U64 XXH_swap64 (U64 x) +{ + return ((x << 56) & 0xff00000000000000ULL) | + ((x << 40) & 0x00ff000000000000ULL) | + ((x << 24) & 0x0000ff0000000000ULL) | + ((x << 8) & 0x000000ff00000000ULL) | + ((x >> 8) & 0x00000000ff000000ULL) | + ((x >> 24) & 0x0000000000ff0000ULL) | + ((x >> 40) & 0x000000000000ff00ULL) | + ((x >> 56) & 0x00000000000000ffULL); +} +#endif + + +/* ************************************* +* Architecture Macros +***************************************/ +typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; + +/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */ +#ifndef XXH_CPU_LITTLE_ENDIAN + static const int g_one = 1; +# define XXH_CPU_LITTLE_ENDIAN (*(const char*)(&g_one)) +#endif + + +/* *************************** +* Memory reads +*****************************/ +typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; + +FORCE_INLINE_TEMPLATE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align) +{ + if (align==XXH_unaligned) + return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr)); + else + return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr); +} + +FORCE_INLINE_TEMPLATE U32 XXH_readLE32(const void* ptr, XXH_endianess endian) +{ + return XXH_readLE32_align(ptr, endian, XXH_unaligned); +} + +static U32 XXH_readBE32(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr); +} + +FORCE_INLINE_TEMPLATE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align) +{ + if (align==XXH_unaligned) + return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); + else + return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr); +} + +FORCE_INLINE_TEMPLATE U64 XXH_readLE64(const void* ptr, XXH_endianess endian) +{ + return XXH_readLE64_align(ptr, endian, XXH_unaligned); +} + +static U64 XXH_readBE64(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr); +} + + +/* ************************************* +* Macros +***************************************/ +#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ + + +/* ************************************* +* Constants +***************************************/ +static const U32 PRIME32_1 = 2654435761U; +static const U32 PRIME32_2 = 2246822519U; +static const U32 PRIME32_3 = 3266489917U; +static const U32 PRIME32_4 = 668265263U; +static const U32 PRIME32_5 = 374761393U; + +static const U64 PRIME64_1 = 11400714785074694791ULL; +static const U64 PRIME64_2 = 14029467366897019727ULL; +static const U64 PRIME64_3 = 1609587929392839161ULL; +static const U64 PRIME64_4 = 9650029242287828579ULL; +static const U64 PRIME64_5 = 2870177450012600261ULL; + +XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; } + + +/* ************************** +* Utils +****************************/ +XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dstState, const XXH32_state_t* restrict srcState) +{ + ZSTD_memcpy(dstState, srcState, sizeof(*dstState)); +} + +XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dstState, const XXH64_state_t* restrict srcState) +{ + ZSTD_memcpy(dstState, srcState, sizeof(*dstState)); +} + + +/* *************************** +* Simple Hash Functions +*****************************/ + +static U32 XXH32_round(U32 seed, U32 input) +{ + seed += input * PRIME32_2; + seed = XXH_rotl32(seed, 13); + seed *= PRIME32_1; + return seed; +} + +FORCE_INLINE_TEMPLATE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* bEnd = p + len; + U32 h32; +#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align) + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (p==NULL) { + len=0; + bEnd=p=(const BYTE*)(size_t)16; + } +#endif + + if (len>=16) { + const BYTE* const limit = bEnd - 16; + U32 v1 = seed + PRIME32_1 + PRIME32_2; + U32 v2 = seed + PRIME32_2; + U32 v3 = seed + 0; + U32 v4 = seed - PRIME32_1; + + do { + v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4; + v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4; + v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4; + v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4; + } while (p<=limit); + + h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); + } else { + h32 = seed + PRIME32_5; + } + + h32 += (U32) len; + + while (p+4<=bEnd) { + h32 += XXH_get32bits(p) * PRIME32_3; + h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; + p+=4; + } + + while (p> 15; + h32 *= PRIME32_2; + h32 ^= h32 >> 13; + h32 *= PRIME32_3; + h32 ^= h32 >> 16; + + return h32; +} + + +XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed) +{ +#if 0 + /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ + XXH32_CREATESTATE_STATIC(state); + XXH32_reset(state, seed); + XXH32_update(state, input, len); + return XXH32_digest(state); +#else + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if (XXH_FORCE_ALIGN_CHECK) { + if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */ + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); + else + return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); + } } + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); + else + return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); +#endif +} + + +static U64 XXH64_round(U64 acc, U64 input) +{ + acc += input * PRIME64_2; + acc = XXH_rotl64(acc, 31); + acc *= PRIME64_1; + return acc; +} + +static U64 XXH64_mergeRound(U64 acc, U64 val) +{ + val = XXH64_round(0, val); + acc ^= val; + acc = acc * PRIME64_1 + PRIME64_4; + return acc; +} + +FORCE_INLINE_TEMPLATE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + U64 h64; +#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align) + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (p==NULL) { + len=0; + bEnd=p=(const BYTE*)(size_t)32; + } +#endif + + if (len>=32) { + const BYTE* const limit = bEnd - 32; + U64 v1 = seed + PRIME64_1 + PRIME64_2; + U64 v2 = seed + PRIME64_2; + U64 v3 = seed + 0; + U64 v4 = seed - PRIME64_1; + + do { + v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8; + v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8; + v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8; + v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8; + } while (p<=limit); + + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); + h64 = XXH64_mergeRound(h64, v1); + h64 = XXH64_mergeRound(h64, v2); + h64 = XXH64_mergeRound(h64, v3); + h64 = XXH64_mergeRound(h64, v4); + + } else { + h64 = seed + PRIME64_5; + } + + h64 += (U64) len; + + while (p+8<=bEnd) { + U64 const k1 = XXH64_round(0, XXH_get64bits(p)); + h64 ^= k1; + h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; + p+=8; + } + + if (p+4<=bEnd) { + h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; + h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; + p+=4; + } + + while (p> 33; + h64 *= PRIME64_2; + h64 ^= h64 >> 29; + h64 *= PRIME64_3; + h64 ^= h64 >> 32; + + return h64; +} + + +XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed) +{ +#if 0 + /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ + XXH64_CREATESTATE_STATIC(state); + XXH64_reset(state, seed); + XXH64_update(state, input, len); + return XXH64_digest(state); +#else + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if (XXH_FORCE_ALIGN_CHECK) { + if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */ + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); + else + return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); + } } + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); + else + return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); +#endif +} + + +/* ************************************************** +* Advanced Hash Functions +****************************************************/ + +XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void) +{ + return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t)); +} +XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr) +{ + XXH_free(statePtr); + return XXH_OK; +} + +XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void) +{ + return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t)); +} +XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) +{ + XXH_free(statePtr); + return XXH_OK; +} + + +/*** Hash feed ***/ + +XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed) +{ + XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ + ZSTD_memset(&state, 0, sizeof(state)-4); /* do not write into reserved, for future removal */ + state.v1 = seed + PRIME32_1 + PRIME32_2; + state.v2 = seed + PRIME32_2; + state.v3 = seed + 0; + state.v4 = seed - PRIME32_1; + ZSTD_memcpy(statePtr, &state, sizeof(state)); + return XXH_OK; +} + + +XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed) +{ + XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ + ZSTD_memset(&state, 0, sizeof(state)-8); /* do not write into reserved, for future removal */ + state.v1 = seed + PRIME64_1 + PRIME64_2; + state.v2 = seed + PRIME64_2; + state.v3 = seed + 0; + state.v4 = seed - PRIME64_1; + ZSTD_memcpy(statePtr, &state, sizeof(state)); + return XXH_OK; +} + + +FORCE_INLINE_TEMPLATE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (input==NULL) return XXH_ERROR; +#endif + + state->total_len_32 += (unsigned)len; + state->large_len |= (len>=16) | (state->total_len_32>=16); + + if (state->memsize + len < 16) { /* fill in tmp buffer */ + XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len); + state->memsize += (unsigned)len; + return XXH_OK; + } + + if (state->memsize) { /* some data left from previous update */ + XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize); + { const U32* p32 = state->mem32; + state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++; + state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++; + state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++; + state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian)); p32++; + } + p += 16-state->memsize; + state->memsize = 0; + } + + if (p <= bEnd-16) { + const BYTE* const limit = bEnd - 16; + U32 v1 = state->v1; + U32 v2 = state->v2; + U32 v3 = state->v3; + U32 v4 = state->v4; + + do { + v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4; + v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4; + v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4; + v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4; + } while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) { + XXH_memcpy(state->mem32, p, (size_t)(bEnd-p)); + state->memsize = (unsigned)(bEnd-p); + } + + return XXH_OK; +} + +XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_update_endian(state_in, input, len, XXH_littleEndian); + else + return XXH32_update_endian(state_in, input, len, XXH_bigEndian); +} + + + +FORCE_INLINE_TEMPLATE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian) +{ + const BYTE * p = (const BYTE*)state->mem32; + const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize; + U32 h32; + + if (state->large_len) { + h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18); + } else { + h32 = state->v3 /* == seed */ + PRIME32_5; + } + + h32 += state->total_len_32; + + while (p+4<=bEnd) { + h32 += XXH_readLE32(p, endian) * PRIME32_3; + h32 = XXH_rotl32(h32, 17) * PRIME32_4; + p+=4; + } + + while (p> 15; + h32 *= PRIME32_2; + h32 ^= h32 >> 13; + h32 *= PRIME32_3; + h32 ^= h32 >> 16; + + return h32; +} + + +XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_digest_endian(state_in, XXH_littleEndian); + else + return XXH32_digest_endian(state_in, XXH_bigEndian); +} + + + +/* **** XXH64 **** */ + +FORCE_INLINE_TEMPLATE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (input==NULL) return XXH_ERROR; +#endif + + state->total_len += len; + + if (state->memsize + len < 32) { /* fill in tmp buffer */ + if (input != NULL) { + XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len); + } + state->memsize += (U32)len; + return XXH_OK; + } + + if (state->memsize) { /* tmp buffer is full */ + XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize); + state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian)); + state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian)); + state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian)); + state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian)); + p += 32-state->memsize; + state->memsize = 0; + } + + if (p+32 <= bEnd) { + const BYTE* const limit = bEnd - 32; + U64 v1 = state->v1; + U64 v2 = state->v2; + U64 v3 = state->v3; + U64 v4 = state->v4; + + do { + v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8; + v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8; + v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8; + v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8; + } while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) { + XXH_memcpy(state->mem64, p, (size_t)(bEnd-p)); + state->memsize = (unsigned)(bEnd-p); + } + + return XXH_OK; +} + +XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_update_endian(state_in, input, len, XXH_littleEndian); + else + return XXH64_update_endian(state_in, input, len, XXH_bigEndian); +} + + + +FORCE_INLINE_TEMPLATE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian) +{ + const BYTE * p = (const BYTE*)state->mem64; + const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize; + U64 h64; + + if (state->total_len >= 32) { + U64 const v1 = state->v1; + U64 const v2 = state->v2; + U64 const v3 = state->v3; + U64 const v4 = state->v4; + + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); + h64 = XXH64_mergeRound(h64, v1); + h64 = XXH64_mergeRound(h64, v2); + h64 = XXH64_mergeRound(h64, v3); + h64 = XXH64_mergeRound(h64, v4); + } else { + h64 = state->v3 + PRIME64_5; + } + + h64 += (U64) state->total_len; + + while (p+8<=bEnd) { + U64 const k1 = XXH64_round(0, XXH_readLE64(p, endian)); + h64 ^= k1; + h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; + p+=8; + } + + if (p+4<=bEnd) { + h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1; + h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; + p+=4; + } + + while (p> 33; + h64 *= PRIME64_2; + h64 ^= h64 >> 29; + h64 *= PRIME64_3; + h64 ^= h64 >> 32; + + return h64; +} + + +XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_digest_endian(state_in, XXH_littleEndian); + else + return XXH64_digest_endian(state_in, XXH_bigEndian); +} + + +/* ************************** +* Canonical representation +****************************/ + +/*! Default XXH result types are basic unsigned 32 and 64 bits. +* The canonical representation follows human-readable write convention, aka big-endian (large digits first). +* These functions allow transformation of hash result into and from its canonical format. +* This way, hash values can be written into a file or buffer, and remain comparable across different systems and programs. +*/ + +XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash) +{ + XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t)); + if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash); + ZSTD_memcpy(dst, &hash, sizeof(*dst)); +} + +XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash) +{ + XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t)); + if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash); + ZSTD_memcpy(dst, &hash, sizeof(*dst)); +} + +XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src) +{ + return XXH_readBE32(src); +} + +XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src) +{ + return XXH_readBE64(src); +} +/**** ended inlining xxhash.c ****/ +# endif + +#endif /* XXH_STATIC_LINKING_ONLY && XXH_STATIC_H_3543687687345 */ + + +#if defined (__cplusplus) +} +#endif +/**** ended inlining xxhash.h ****/ + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ---- static assert (debug) --- */ +#define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) +#define ZSTD_isError ERR_isError /* for inlining */ +#define FSE_isError ERR_isError +#define HUF_isError ERR_isError + + +/*-************************************* +* shared macros +***************************************/ +#undef MIN +#undef MAX +#define MIN(a,b) ((a)<(b) ? (a) : (b)) +#define MAX(a,b) ((a)>(b) ? (a) : (b)) + +/** + * Ignore: this is an internal helper. + * + * This is a helper function to help force C99-correctness during compilation. + * Under strict compilation modes, variadic macro arguments can't be empty. + * However, variadic function arguments can be. Using a function therefore lets + * us statically check that at least one (string) argument was passed, + * independent of the compilation flags. + */ +static INLINE_KEYWORD UNUSED_ATTR +void _force_has_format_string(const char *format, ...) { + (void)format; +} + +/** + * Ignore: this is an internal helper. + * + * We want to force this function invocation to be syntactically correct, but + * we don't want to force runtime evaluation of its arguments. + */ +#define _FORCE_HAS_FORMAT_STRING(...) \ + if (0) { \ + _force_has_format_string(__VA_ARGS__); \ + } + +/** + * Return the specified error if the condition evaluates to true. + * + * In debug modes, prints additional information. + * In order to do that (particularly, printing the conditional that failed), + * this can't just wrap RETURN_ERROR(). + */ +#define RETURN_ERROR_IF(cond, err, ...) \ + if (cond) { \ + RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \ + __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \ + _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ + RAWLOG(3, ": " __VA_ARGS__); \ + RAWLOG(3, "\n"); \ + return ERROR(err); \ + } + +/** + * Unconditionally return the specified error. + * + * In debug modes, prints additional information. + */ +#define RETURN_ERROR(err, ...) \ + do { \ + RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \ + __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \ + _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ + RAWLOG(3, ": " __VA_ARGS__); \ + RAWLOG(3, "\n"); \ + return ERROR(err); \ + } while(0); + +/** + * If the provided expression evaluates to an error code, returns that error code. + * + * In debug modes, prints additional information. + */ +#define FORWARD_IF_ERROR(err, ...) \ + do { \ + size_t const err_code = (err); \ + if (ERR_isError(err_code)) { \ + RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \ + __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \ + _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ + RAWLOG(3, ": " __VA_ARGS__); \ + RAWLOG(3, "\n"); \ + return err_code; \ + } \ + } while(0); + + +/*-************************************* +* Common constants +***************************************/ +#define ZSTD_OPT_NUM (1<<12) + +#define ZSTD_REP_NUM 3 /* number of repcodes */ +#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1) +static UNUSED_ATTR const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; + +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define BIT7 128 +#define BIT6 64 +#define BIT5 32 +#define BIT4 16 +#define BIT1 2 +#define BIT0 1 + +#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10 +static UNUSED_ATTR const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 }; +static UNUSED_ATTR const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 }; + +#define ZSTD_FRAMEIDSIZE 4 /* magic number size */ + +#define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */ +static UNUSED_ATTR const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; +typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e; + +#define ZSTD_FRAMECHECKSUMSIZE 4 + +#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ +#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */ + +#define HufLog 12 +typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e; + +#define LONGNBSEQ 0x7F00 + +#define MINMATCH 3 + +#define Litbits 8 +#define MaxLit ((1<= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN)); + + if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) { + /* Handle short offset copies. */ + do { + COPY8(op, ip) + } while (op < oend); + } else { + assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN); + /* Separate out the first COPY16() call because the copy length is + * almost certain to be short, so the branches have different + * probabilities. Since it is almost certain to be short, only do + * one COPY16() in the first call. Then, do two calls per loop since + * at that point it is more likely to have a high trip count. + */ +#ifdef __aarch64__ + do { + COPY16(op, ip); + } + while (op < oend); +#else + ZSTD_copy16(op, ip); + if (16 >= length) return; + op += 16; + ip += 16; + do { + COPY16(op, ip); + COPY16(op, ip); + } + while (op < oend); +#endif + } +} + +MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + size_t const length = MIN(dstCapacity, srcSize); + if (length > 0) { + ZSTD_memcpy(dst, src, length); + } + return length; +} + +/* define "workspace is too large" as this number of times larger than needed */ +#define ZSTD_WORKSPACETOOLARGE_FACTOR 3 + +/* when workspace is continuously too large + * during at least this number of times, + * context's memory usage is considered wasteful, + * because it's sized to handle a worst case scenario which rarely happens. + * In which case, resize it down to free some memory */ +#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 + +/* Controls whether the input/output buffer is buffered or stable. */ +typedef enum { + ZSTD_bm_buffered = 0, /* Buffer the input/output */ + ZSTD_bm_stable = 1 /* ZSTD_inBuffer/ZSTD_outBuffer is stable */ +} ZSTD_bufferMode_e; + + +/*-******************************************* +* Private declarations +*********************************************/ +typedef struct seqDef_s { + U32 offset; /* Offset code of the sequence */ + U16 litLength; + U16 matchLength; +} seqDef; + +typedef struct { + seqDef* sequencesStart; + seqDef* sequences; /* ptr to end of sequences */ + BYTE* litStart; + BYTE* lit; /* ptr to end of literals */ + BYTE* llCode; + BYTE* mlCode; + BYTE* ofCode; + size_t maxNbSeq; + size_t maxNbLit; + + /* longLengthPos and longLengthID to allow us to represent either a single litLength or matchLength + * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment + * the existing value of the litLength or matchLength by 0x10000. + */ + U32 longLengthID; /* 0 == no longLength; 1 == Represent the long literal; 2 == Represent the long match; */ + U32 longLengthPos; /* Index of the sequence to apply long length modification to */ +} seqStore_t; + +typedef struct { + U32 litLength; + U32 matchLength; +} ZSTD_sequenceLength; + +/** + * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences + * indicated by longLengthPos and longLengthID, and adds MINMATCH back to matchLength. + */ +MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq) +{ + ZSTD_sequenceLength seqLen; + seqLen.litLength = seq->litLength; + seqLen.matchLength = seq->matchLength + MINMATCH; + if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) { + if (seqStore->longLengthID == 1) { + seqLen.litLength += 0xFFFF; + } + if (seqStore->longLengthID == 2) { + seqLen.matchLength += 0xFFFF; + } + } + return seqLen; +} + +/** + * Contains the compressed frame size and an upper-bound for the decompressed frame size. + * Note: before using `compressedSize`, check for errors using ZSTD_isError(). + * similarly, before using `decompressedBound`, check for errors using: + * `decompressedBound != ZSTD_CONTENTSIZE_ERROR` + */ +typedef struct { + size_t compressedSize; + unsigned long long decompressedBound; +} ZSTD_frameSizeInfo; /* decompress & legacy */ + +const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */ +void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */ + +/* custom memory allocation functions */ +void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem); +void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem); +void ZSTD_customFree(void* ptr, ZSTD_customMem customMem); + + +MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */ +{ + assert(val != 0); + { +# if defined(_MSC_VER) /* Visual */ +# if STATIC_BMI2 == 1 + return _lzcnt_u32(val)^31; +# else + unsigned long r=0; + return _BitScanReverse(&r, val) ? (unsigned)r : 0; +# endif +# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ + return __builtin_clz (val) ^ 31; +# elif defined(__ICCARM__) /* IAR Intrinsic */ + return 31 - __CLZ(val); +# else /* Software version */ + static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; + U32 v = val; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + return DeBruijnClz[(v * 0x07C4ACDDU) >> 27]; +# endif + } +} + + +/* ZSTD_invalidateRepCodes() : + * ensures next compression will not use repcodes from previous block. + * Note : only works with regular variant; + * do not use with extDict variant ! */ +void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx); /* zstdmt, adaptive_compression (shouldn't get this definition from here) */ + + +typedef struct { + blockType_e blockType; + U32 lastBlock; + U32 origSize; +} blockProperties_t; /* declared here for decompress and fullbench */ + +/*! ZSTD_getcBlockSize() : + * Provides the size of compressed block from block header `src` */ +/* Used by: decompress, fullbench (does not get its definition from here) */ +size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, + blockProperties_t* bpPtr); + +/*! ZSTD_decodeSeqHeaders() : + * decode sequence header from src */ +/* Used by: decompress, fullbench (does not get its definition from here) */ +size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, + const void* src, size_t srcSize); + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_CCOMMON_H_MODULE */ +/**** ended inlining zstd_internal.h ****/ +/**** start inlining pool.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef POOL_H +#define POOL_H + +#if defined (__cplusplus) +extern "C" { +#endif + + +/**** skipping file: zstd_deps.h ****/ +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_customMem */ +/**** skipping file: ../zstd.h ****/ + +typedef struct POOL_ctx_s POOL_ctx; + +/*! POOL_create() : + * Create a thread pool with at most `numThreads` threads. + * `numThreads` must be at least 1. + * The maximum number of queued jobs before blocking is `queueSize`. + * @return : POOL_ctx pointer on success, else NULL. +*/ +POOL_ctx* POOL_create(size_t numThreads, size_t queueSize); + +POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, + ZSTD_customMem customMem); + +/*! POOL_free() : + * Free a thread pool returned by POOL_create(). + */ +void POOL_free(POOL_ctx* ctx); + +/*! POOL_resize() : + * Expands or shrinks pool's number of threads. + * This is more efficient than releasing + creating a new context, + * since it tries to preserve and re-use existing threads. + * `numThreads` must be at least 1. + * @return : 0 when resize was successful, + * !0 (typically 1) if there is an error. + * note : only numThreads can be resized, queueSize remains unchanged. + */ +int POOL_resize(POOL_ctx* ctx, size_t numThreads); + +/*! POOL_sizeof() : + * @return threadpool memory usage + * note : compatible with NULL (returns 0 in this case) + */ +size_t POOL_sizeof(POOL_ctx* ctx); + +/*! POOL_function : + * The function type that can be added to a thread pool. + */ +typedef void (*POOL_function)(void*); + +/*! POOL_add() : + * Add the job `function(opaque)` to the thread pool. `ctx` must be valid. + * Possibly blocks until there is room in the queue. + * Note : The function may be executed asynchronously, + * therefore, `opaque` must live until function has been completed. + */ +void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque); + + +/*! POOL_tryAdd() : + * Add the job `function(opaque)` to thread pool _if_ a worker is available. + * Returns immediately even if not (does not block). + * @return : 1 if successful, 0 if not. + */ +int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque); + + +#if defined (__cplusplus) +} +#endif + +#endif +/**** ended inlining pool.h ****/ + +/* ====== Compiler specifics ====== */ +#if defined(_MSC_VER) +# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */ +#endif + + +#ifdef ZSTD_MULTITHREAD + +/**** skipping file: threading.h ****/ + +/* A job is a function and an opaque argument */ +typedef struct POOL_job_s { + POOL_function function; + void *opaque; +} POOL_job; + +struct POOL_ctx_s { + ZSTD_customMem customMem; + /* Keep track of the threads */ + ZSTD_pthread_t* threads; + size_t threadCapacity; + size_t threadLimit; + + /* The queue is a circular buffer */ + POOL_job *queue; + size_t queueHead; + size_t queueTail; + size_t queueSize; + + /* The number of threads working on jobs */ + size_t numThreadsBusy; + /* Indicates if the queue is empty */ + int queueEmpty; + + /* The mutex protects the queue */ + ZSTD_pthread_mutex_t queueMutex; + /* Condition variable for pushers to wait on when the queue is full */ + ZSTD_pthread_cond_t queuePushCond; + /* Condition variables for poppers to wait on when the queue is empty */ + ZSTD_pthread_cond_t queuePopCond; + /* Indicates if the queue is shutting down */ + int shutdown; +}; + +/* POOL_thread() : + * Work thread for the thread pool. + * Waits for jobs and executes them. + * @returns : NULL on failure else non-null. + */ +static void* POOL_thread(void* opaque) { + POOL_ctx* const ctx = (POOL_ctx*)opaque; + if (!ctx) { return NULL; } + for (;;) { + /* Lock the mutex and wait for a non-empty queue or until shutdown */ + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + + while ( ctx->queueEmpty + || (ctx->numThreadsBusy >= ctx->threadLimit) ) { + if (ctx->shutdown) { + /* even if !queueEmpty, (possible if numThreadsBusy >= threadLimit), + * a few threads will be shutdown while !queueEmpty, + * but enough threads will remain active to finish the queue */ + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + return opaque; + } + ZSTD_pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex); + } + /* Pop a job off the queue */ + { POOL_job const job = ctx->queue[ctx->queueHead]; + ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize; + ctx->numThreadsBusy++; + ctx->queueEmpty = ctx->queueHead == ctx->queueTail; + /* Unlock the mutex, signal a pusher, and run the job */ + ZSTD_pthread_cond_signal(&ctx->queuePushCond); + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + + job.function(job.opaque); + + /* If the intended queue size was 0, signal after finishing job */ + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + ctx->numThreadsBusy--; + if (ctx->queueSize == 1) { + ZSTD_pthread_cond_signal(&ctx->queuePushCond); + } + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + } + } /* for (;;) */ + assert(0); /* Unreachable */ +} + +POOL_ctx* ZSTD_createThreadPool(size_t numThreads) { + return POOL_create (numThreads, 0); +} + +POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) { + return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem); +} + +POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, + ZSTD_customMem customMem) { + POOL_ctx* ctx; + /* Check parameters */ + if (!numThreads) { return NULL; } + /* Allocate the context and zero initialize */ + ctx = (POOL_ctx*)ZSTD_customCalloc(sizeof(POOL_ctx), customMem); + if (!ctx) { return NULL; } + /* Initialize the job queue. + * It needs one extra space since one space is wasted to differentiate + * empty and full queues. + */ + ctx->queueSize = queueSize + 1; + ctx->queue = (POOL_job*)ZSTD_customMalloc(ctx->queueSize * sizeof(POOL_job), customMem); + ctx->queueHead = 0; + ctx->queueTail = 0; + ctx->numThreadsBusy = 0; + ctx->queueEmpty = 1; + { + int error = 0; + error |= ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL); + error |= ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL); + error |= ZSTD_pthread_cond_init(&ctx->queuePopCond, NULL); + if (error) { POOL_free(ctx); return NULL; } + } + ctx->shutdown = 0; + /* Allocate space for the thread handles */ + ctx->threads = (ZSTD_pthread_t*)ZSTD_customMalloc(numThreads * sizeof(ZSTD_pthread_t), customMem); + ctx->threadCapacity = 0; + ctx->customMem = customMem; + /* Check for errors */ + if (!ctx->threads || !ctx->queue) { POOL_free(ctx); return NULL; } + /* Initialize the threads */ + { size_t i; + for (i = 0; i < numThreads; ++i) { + if (ZSTD_pthread_create(&ctx->threads[i], NULL, &POOL_thread, ctx)) { + ctx->threadCapacity = i; + POOL_free(ctx); + return NULL; + } } + ctx->threadCapacity = numThreads; + ctx->threadLimit = numThreads; + } + return ctx; +} + +/*! POOL_join() : + Shutdown the queue, wake any sleeping threads, and join all of the threads. +*/ +static void POOL_join(POOL_ctx* ctx) { + /* Shut down the queue */ + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + ctx->shutdown = 1; + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + /* Wake up sleeping threads */ + ZSTD_pthread_cond_broadcast(&ctx->queuePushCond); + ZSTD_pthread_cond_broadcast(&ctx->queuePopCond); + /* Join all of the threads */ + { size_t i; + for (i = 0; i < ctx->threadCapacity; ++i) { + ZSTD_pthread_join(ctx->threads[i], NULL); /* note : could fail */ + } } +} + +void POOL_free(POOL_ctx *ctx) { + if (!ctx) { return; } + POOL_join(ctx); + ZSTD_pthread_mutex_destroy(&ctx->queueMutex); + ZSTD_pthread_cond_destroy(&ctx->queuePushCond); + ZSTD_pthread_cond_destroy(&ctx->queuePopCond); + ZSTD_customFree(ctx->queue, ctx->customMem); + ZSTD_customFree(ctx->threads, ctx->customMem); + ZSTD_customFree(ctx, ctx->customMem); +} + +void ZSTD_freeThreadPool (ZSTD_threadPool* pool) { + POOL_free (pool); +} + +size_t POOL_sizeof(POOL_ctx *ctx) { + if (ctx==NULL) return 0; /* supports sizeof NULL */ + return sizeof(*ctx) + + ctx->queueSize * sizeof(POOL_job) + + ctx->threadCapacity * sizeof(ZSTD_pthread_t); +} + + +/* @return : 0 on success, 1 on error */ +static int POOL_resize_internal(POOL_ctx* ctx, size_t numThreads) +{ + if (numThreads <= ctx->threadCapacity) { + if (!numThreads) return 1; + ctx->threadLimit = numThreads; + return 0; + } + /* numThreads > threadCapacity */ + { ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_customMalloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem); + if (!threadPool) return 1; + /* replace existing thread pool */ + ZSTD_memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(*threadPool)); + ZSTD_customFree(ctx->threads, ctx->customMem); + ctx->threads = threadPool; + /* Initialize additional threads */ + { size_t threadId; + for (threadId = ctx->threadCapacity; threadId < numThreads; ++threadId) { + if (ZSTD_pthread_create(&threadPool[threadId], NULL, &POOL_thread, ctx)) { + ctx->threadCapacity = threadId; + return 1; + } } + } } + /* successfully expanded */ + ctx->threadCapacity = numThreads; + ctx->threadLimit = numThreads; + return 0; +} + +/* @return : 0 on success, 1 on error */ +int POOL_resize(POOL_ctx* ctx, size_t numThreads) +{ + int result; + if (ctx==NULL) return 1; + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + result = POOL_resize_internal(ctx, numThreads); + ZSTD_pthread_cond_broadcast(&ctx->queuePopCond); + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + return result; +} + +/** + * Returns 1 if the queue is full and 0 otherwise. + * + * When queueSize is 1 (pool was created with an intended queueSize of 0), + * then a queue is empty if there is a thread free _and_ no job is waiting. + */ +static int isQueueFull(POOL_ctx const* ctx) { + if (ctx->queueSize > 1) { + return ctx->queueHead == ((ctx->queueTail + 1) % ctx->queueSize); + } else { + return (ctx->numThreadsBusy == ctx->threadLimit) || + !ctx->queueEmpty; + } +} + + +static void POOL_add_internal(POOL_ctx* ctx, POOL_function function, void *opaque) +{ + POOL_job const job = {function, opaque}; + assert(ctx != NULL); + if (ctx->shutdown) return; + + ctx->queueEmpty = 0; + ctx->queue[ctx->queueTail] = job; + ctx->queueTail = (ctx->queueTail + 1) % ctx->queueSize; + ZSTD_pthread_cond_signal(&ctx->queuePopCond); +} + +void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque) +{ + assert(ctx != NULL); + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + /* Wait until there is space in the queue for the new job */ + while (isQueueFull(ctx) && (!ctx->shutdown)) { + ZSTD_pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex); + } + POOL_add_internal(ctx, function, opaque); + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); +} + + +int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) +{ + assert(ctx != NULL); + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + if (isQueueFull(ctx)) { + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + return 0; + } + POOL_add_internal(ctx, function, opaque); + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + return 1; +} + + +#else /* ZSTD_MULTITHREAD not defined */ + +/* ========================== */ +/* No multi-threading support */ +/* ========================== */ + + +/* We don't need any data, but if it is empty, malloc() might return NULL. */ +struct POOL_ctx_s { + int dummy; +}; +static POOL_ctx g_poolCtx; + +POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) { + return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem); +} + +POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem) { + (void)numThreads; + (void)queueSize; + (void)customMem; + return &g_poolCtx; +} + +void POOL_free(POOL_ctx* ctx) { + assert(!ctx || ctx == &g_poolCtx); + (void)ctx; +} + +int POOL_resize(POOL_ctx* ctx, size_t numThreads) { + (void)ctx; (void)numThreads; + return 0; +} + +void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque) { + (void)ctx; + function(opaque); +} + +int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) { + (void)ctx; + function(opaque); + return 1; +} + +size_t POOL_sizeof(POOL_ctx* ctx) { + if (ctx==NULL) return 0; /* supports sizeof NULL */ + assert(ctx == &g_poolCtx); + return sizeof(*ctx); +} + +#endif /* ZSTD_MULTITHREAD */ +/**** ended inlining common/pool.c ****/ +/**** start inlining common/zstd_common.c ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + + +/*-************************************* +* Dependencies +***************************************/ +#define ZSTD_DEPS_NEED_MALLOC +/**** skipping file: zstd_deps.h ****/ +/**** skipping file: error_private.h ****/ +/**** skipping file: zstd_internal.h ****/ + + +/*-**************************************** +* Version +******************************************/ +unsigned ZSTD_versionNumber(void) { return ZSTD_VERSION_NUMBER; } + +const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; } + + +/*-**************************************** +* ZSTD Error Management +******************************************/ +#undef ZSTD_isError /* defined within zstd_internal.h */ +/*! ZSTD_isError() : + * tells if a return value is an error code + * symbol is required for external callers */ +unsigned ZSTD_isError(size_t code) { return ERR_isError(code); } + +/*! ZSTD_getErrorName() : + * provides error code string from function result (useful for debugging) */ +const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); } + +/*! ZSTD_getError() : + * convert a `size_t` function result into a proper ZSTD_errorCode enum */ +ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); } + +/*! ZSTD_getErrorString() : + * provides error code string from enum */ +const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); } + + + +/*=************************************************************** +* Custom allocator +****************************************************************/ +void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem) +{ + if (customMem.customAlloc) + return customMem.customAlloc(customMem.opaque, size); + return ZSTD_malloc(size); +} + +void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem) +{ + if (customMem.customAlloc) { + /* calloc implemented as malloc+memset; + * not as efficient as calloc, but next best guess for custom malloc */ + void* const ptr = customMem.customAlloc(customMem.opaque, size); + ZSTD_memset(ptr, 0, size); + return ptr; + } + return ZSTD_calloc(1, size); +} + +void ZSTD_customFree(void* ptr, ZSTD_customMem customMem) +{ + if (ptr!=NULL) { + if (customMem.customFree) + customMem.customFree(customMem.opaque, ptr); + else + ZSTD_free(ptr); + } +} +/**** ended inlining common/zstd_common.c ****/ + +/**** start inlining compress/fse_compress.c ****/ +/* ****************************************************************** + * FSE : Finite State Entropy encoder + * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +/* ************************************************************** +* Includes +****************************************************************/ +/**** skipping file: ../common/compiler.h ****/ +/**** skipping file: ../common/mem.h ****/ +/**** skipping file: ../common/debug.h ****/ +/**** start inlining hist.h ****/ +/* ****************************************************************** + * hist : Histogram functions + * part of Finite State Entropy project + * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +/* --- dependencies --- */ +/**** skipping file: ../common/zstd_deps.h ****/ + + +/* --- simple histogram functions --- */ + +/*! HIST_count(): + * Provides the precise count of each byte within a table 'count'. + * 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1). + * Updates *maxSymbolValuePtr with actual largest symbol value detected. + * @return : count of the most frequent symbol (which isn't identified). + * or an error code, which can be tested using HIST_isError(). + * note : if return == srcSize, there is only one symbol. + */ +size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize); + +unsigned HIST_isError(size_t code); /**< tells if a return value is an error code */ + + +/* --- advanced histogram functions --- */ + +#define HIST_WKSP_SIZE_U32 1024 +#define HIST_WKSP_SIZE (HIST_WKSP_SIZE_U32 * sizeof(unsigned)) +/** HIST_count_wksp() : + * Same as HIST_count(), but using an externally provided scratch buffer. + * Benefit is this function will use very little stack space. + * `workSpace` is a writable buffer which must be 4-bytes aligned, + * `workSpaceSize` must be >= HIST_WKSP_SIZE + */ +size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize, + void* workSpace, size_t workSpaceSize); + +/** HIST_countFast() : + * same as HIST_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr. + * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` + */ +size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize); + +/** HIST_countFast_wksp() : + * Same as HIST_countFast(), but using an externally provided scratch buffer. + * `workSpace` is a writable buffer which must be 4-bytes aligned, + * `workSpaceSize` must be >= HIST_WKSP_SIZE + */ +size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize, + void* workSpace, size_t workSpaceSize); + +/*! HIST_count_simple() : + * Same as HIST_countFast(), this function is unsafe, + * and will segfault if any value within `src` is `> *maxSymbolValuePtr`. + * It is also a bit slower for large inputs. + * However, it does not need any additional memory (not even on stack). + * @return : count of the most frequent symbol. + * Note this function doesn't produce any error (i.e. it must succeed). + */ +unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize); +/**** ended inlining hist.h ****/ +/**** skipping file: ../common/bitstream.h ****/ +#define FSE_STATIC_LINKING_ONLY +/**** skipping file: ../common/fse.h ****/ +/**** skipping file: ../common/error_private.h ****/ +#define ZSTD_DEPS_NEED_MALLOC +#define ZSTD_DEPS_NEED_MATH64 +/**** skipping file: ../common/zstd_deps.h ****/ + + +/* ************************************************************** +* Error Management +****************************************************************/ +#define FSE_isError ERR_isError + + +/* ************************************************************** +* Templates +****************************************************************/ +/* + designed to be included + for type-specific functions (template emulation in C) + Objective is to write these functions only once, for improved maintenance +*/ + +/* safety checks */ +#ifndef FSE_FUNCTION_EXTENSION +# error "FSE_FUNCTION_EXTENSION must be defined" +#endif +#ifndef FSE_FUNCTION_TYPE +# error "FSE_FUNCTION_TYPE must be defined" +#endif + +/* Function names */ +#define FSE_CAT(X,Y) X##Y +#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) +#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) + + +/* Function templates */ + +/* FSE_buildCTable_wksp() : + * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). + * wkspSize should be sized to handle worst case situation, which is `1<>1 : 1) ; + FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); + U32 const step = FSE_TABLESTEP(tableSize); + + U32* cumul = (U32*)workSpace; + FSE_FUNCTION_TYPE* tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSymbolValue + 2)); + + U32 highThreshold = tableSize-1; + + if ((size_t)workSpace & 3) return ERROR(GENERIC); /* Must be 4 byte aligned */ + if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge); + /* CTable header */ + tableU16[-2] = (U16) tableLog; + tableU16[-1] = (U16) maxSymbolValue; + assert(tableLog < 16); /* required for threshold strategy to work */ + + /* For explanations on how to distribute symbol values over the table : + * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ + + #ifdef __clang_analyzer__ + ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */ + #endif + + /* symbol start positions */ + { U32 u; + cumul[0] = 0; + for (u=1; u <= maxSymbolValue+1; u++) { + if (normalizedCounter[u-1]==-1) { /* Low proba symbol */ + cumul[u] = cumul[u-1] + 1; + tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1); + } else { + cumul[u] = cumul[u-1] + normalizedCounter[u-1]; + } } + cumul[maxSymbolValue+1] = tableSize+1; + } + + /* Spread symbols */ + { U32 position = 0; + U32 symbol; + for (symbol=0; symbol<=maxSymbolValue; symbol++) { + int nbOccurrences; + int const freq = normalizedCounter[symbol]; + for (nbOccurrences=0; nbOccurrences highThreshold) + position = (position + step) & tableMask; /* Low proba area */ + } } + + assert(position==0); /* Must have initialized all positions */ + } + + /* Build table */ + { U32 u; for (u=0; u> 3) + 3; + return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */ +} + +static size_t +FSE_writeNCount_generic (void* header, size_t headerBufferSize, + const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, + unsigned writeIsSafe) +{ + BYTE* const ostart = (BYTE*) header; + BYTE* out = ostart; + BYTE* const oend = ostart + headerBufferSize; + int nbBits; + const int tableSize = 1 << tableLog; + int remaining; + int threshold; + U32 bitStream = 0; + int bitCount = 0; + unsigned symbol = 0; + unsigned const alphabetSize = maxSymbolValue + 1; + int previousIs0 = 0; + + /* Table Size */ + bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount; + bitCount += 4; + + /* Init */ + remaining = tableSize+1; /* +1 for extra accuracy */ + threshold = tableSize; + nbBits = tableLog+1; + + while ((symbol < alphabetSize) && (remaining>1)) { /* stops at 1 */ + if (previousIs0) { + unsigned start = symbol; + while ((symbol < alphabetSize) && !normalizedCounter[symbol]) symbol++; + if (symbol == alphabetSize) break; /* incorrect distribution */ + while (symbol >= start+24) { + start+=24; + bitStream += 0xFFFFU << bitCount; + if ((!writeIsSafe) && (out > oend-2)) + return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE) bitStream; + out[1] = (BYTE)(bitStream>>8); + out+=2; + bitStream>>=16; + } + while (symbol >= start+3) { + start+=3; + bitStream += 3 << bitCount; + bitCount += 2; + } + bitStream += (symbol-start) << bitCount; + bitCount += 2; + if (bitCount>16) { + if ((!writeIsSafe) && (out > oend - 2)) + return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE)bitStream; + out[1] = (BYTE)(bitStream>>8); + out += 2; + bitStream >>= 16; + bitCount -= 16; + } } + { int count = normalizedCounter[symbol++]; + int const max = (2*threshold-1) - remaining; + remaining -= count < 0 ? -count : count; + count++; /* +1 for extra accuracy */ + if (count>=threshold) + count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */ + bitStream += count << bitCount; + bitCount += nbBits; + bitCount -= (count>=1; } + } + if (bitCount>16) { + if ((!writeIsSafe) && (out > oend - 2)) + return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE)bitStream; + out[1] = (BYTE)(bitStream>>8); + out += 2; + bitStream >>= 16; + bitCount -= 16; + } } + + if (remaining != 1) + return ERROR(GENERIC); /* incorrect normalized distribution */ + assert(symbol <= alphabetSize); + + /* flush remaining bitStream */ + if ((!writeIsSafe) && (out > oend - 2)) + return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE)bitStream; + out[1] = (BYTE)(bitStream>>8); + out+= (bitCount+7) /8; + + return (out-ostart); +} + + +size_t FSE_writeNCount (void* buffer, size_t bufferSize, + const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) +{ + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported */ + if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */ + + if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog)) + return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0); + + return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1 /* write in buffer is safe */); +} + + +/*-************************************************************** +* FSE Compression Code +****************************************************************/ + +FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog) +{ + size_t size; + if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; + size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32); + return (FSE_CTable*)ZSTD_malloc(size); +} + +void FSE_freeCTable (FSE_CTable* ct) { ZSTD_free(ct); } + +/* provides the minimum logSize to safely represent a distribution */ +static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) +{ + U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1; + U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2; + U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols; + assert(srcSize > 1); /* Not supported, RLE should be used instead */ + return minBits; +} + +unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus) +{ + U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus; + U32 tableLog = maxTableLog; + U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue); + assert(srcSize > 1); /* Not supported, RLE should be used instead */ + if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; + if (maxBitsSrc < tableLog) tableLog = maxBitsSrc; /* Accuracy can be reduced */ + if (minBits > tableLog) tableLog = minBits; /* Need a minimum to safely represent all symbol values */ + if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG; + if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG; + return tableLog; +} + +unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) +{ + return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2); +} + +/* Secondary normalization method. + To be used when primary method fails. */ + +static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue, short lowProbCount) +{ + short const NOT_YET_ASSIGNED = -2; + U32 s; + U32 distributed = 0; + U32 ToDistribute; + + /* Init */ + U32 const lowThreshold = (U32)(total >> tableLog); + U32 lowOne = (U32)((total * 3) >> (tableLog + 1)); + + for (s=0; s<=maxSymbolValue; s++) { + if (count[s] == 0) { + norm[s]=0; + continue; + } + if (count[s] <= lowThreshold) { + norm[s] = lowProbCount; + distributed++; + total -= count[s]; + continue; + } + if (count[s] <= lowOne) { + norm[s] = 1; + distributed++; + total -= count[s]; + continue; + } + + norm[s]=NOT_YET_ASSIGNED; + } + ToDistribute = (1 << tableLog) - distributed; + + if (ToDistribute == 0) + return 0; + + if ((total / ToDistribute) > lowOne) { + /* risk of rounding to zero */ + lowOne = (U32)((total * 3) / (ToDistribute * 2)); + for (s=0; s<=maxSymbolValue; s++) { + if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) { + norm[s] = 1; + distributed++; + total -= count[s]; + continue; + } } + ToDistribute = (1 << tableLog) - distributed; + } + + if (distributed == maxSymbolValue+1) { + /* all values are pretty poor; + probably incompressible data (should have already been detected); + find max, then give all remaining points to max */ + U32 maxV = 0, maxC = 0; + for (s=0; s<=maxSymbolValue; s++) + if (count[s] > maxC) { maxV=s; maxC=count[s]; } + norm[maxV] += (short)ToDistribute; + return 0; + } + + if (total == 0) { + /* all of the symbols were low enough for the lowOne or lowThreshold */ + for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1)) + if (norm[s] > 0) { ToDistribute--; norm[s]++; } + return 0; + } + + { U64 const vStepLog = 62 - tableLog; + U64 const mid = (1ULL << (vStepLog-1)) - 1; + U64 const rStep = ZSTD_div64((((U64)1<> vStepLog); + U32 const sEnd = (U32)(end >> vStepLog); + U32 const weight = sEnd - sStart; + if (weight < 1) + return ERROR(GENERIC); + norm[s] = (short)weight; + tmpTotal = end; + } } } + + return 0; +} + +size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, + const unsigned* count, size_t total, + unsigned maxSymbolValue, unsigned useLowProbCount) +{ + /* Sanity checks */ + if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; + if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported size */ + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported size */ + if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */ + + { static U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 }; + short const lowProbCount = useLowProbCount ? -1 : 1; + U64 const scale = 62 - tableLog; + U64 const step = ZSTD_div64((U64)1<<62, (U32)total); /* <== here, one division ! */ + U64 const vStep = 1ULL<<(scale-20); + int stillToDistribute = 1<> tableLog); + + for (s=0; s<=maxSymbolValue; s++) { + if (count[s] == total) return 0; /* rle special case */ + if (count[s] == 0) { normalizedCounter[s]=0; continue; } + if (count[s] <= lowThreshold) { + normalizedCounter[s] = lowProbCount; + stillToDistribute--; + } else { + short proba = (short)((count[s]*step) >> scale); + if (proba<8) { + U64 restToBeat = vStep * rtbTable[proba]; + proba += (count[s]*step) - ((U64)proba< restToBeat; + } + if (proba > largestP) { largestP=proba; largest=s; } + normalizedCounter[s] = proba; + stillToDistribute -= proba; + } } + if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) { + /* corner case, need another normalization method */ + size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue, lowProbCount); + if (FSE_isError(errorCode)) return errorCode; + } + else normalizedCounter[largest] += (short)stillToDistribute; + } + +#if 0 + { /* Print Table (debug) */ + U32 s; + U32 nTotal = 0; + for (s=0; s<=maxSymbolValue; s++) + RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]); + for (s=0; s<=maxSymbolValue; s++) + nTotal += abs(normalizedCounter[s]); + if (nTotal != (1U<>1); /* assumption : tableLog >= 1 */ + FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); + unsigned s; + + /* Sanity checks */ + if (nbBits < 1) return ERROR(GENERIC); /* min size */ + + /* header */ + tableU16[-2] = (U16) nbBits; + tableU16[-1] = (U16) maxSymbolValue; + + /* Build table */ + for (s=0; s FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) { /* test bit 2 */ + FSE_encodeSymbol(&bitC, &CState2, *--ip); + FSE_encodeSymbol(&bitC, &CState1, *--ip); + FSE_FLUSHBITS(&bitC); + } + + /* 2 or 4 encoding per loop */ + while ( ip>istart ) { + + FSE_encodeSymbol(&bitC, &CState2, *--ip); + + if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 ) /* this test must be static */ + FSE_FLUSHBITS(&bitC); + + FSE_encodeSymbol(&bitC, &CState1, *--ip); + + if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) { /* this test must be static */ + FSE_encodeSymbol(&bitC, &CState2, *--ip); + FSE_encodeSymbol(&bitC, &CState1, *--ip); + } + + FSE_FLUSHBITS(&bitC); + } + + FSE_flushCState(&bitC, &CState2); + FSE_flushCState(&bitC, &CState1); + return BIT_closeCStream(&bitC); +} + +size_t FSE_compress_usingCTable (void* dst, size_t dstSize, + const void* src, size_t srcSize, + const FSE_CTable* ct) +{ + unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize)); + + if (fast) + return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1); + else + return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0); +} + + +size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); } + +#ifndef ZSTD_NO_UNUSED_FUNCTIONS +/* FSE_compress_wksp() : + * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). + * `wkspSize` size must be `(1< not compressible */ + if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */ + } + + tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue); + CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue, /* useLowProbCount */ srcSize >= 2048) ); + + /* Write table description header */ + { CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) ); + op += nc_err; + } + + /* Compress */ + CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, scratchBufferSize) ); + { CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, src, srcSize, CTable) ); + if (cSize == 0) return 0; /* not enough space for compressed data */ + op += cSize; + } + + /* check compressibility */ + if ( (size_t)(op-ostart) >= srcSize-1 ) return 0; + + return op-ostart; +} + +typedef struct { + FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)]; + union { + U32 hist_wksp[HIST_WKSP_SIZE_U32]; + BYTE scratchBuffer[1 << FSE_MAX_TABLELOG]; + } workspace; +} fseWkspMax_t; + +size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog) +{ + fseWkspMax_t scratchBuffer; + DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_COMPRESS_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */ + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); + return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer)); +} + +size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG); +} +#endif + +#endif /* FSE_COMMONDEFS_ONLY */ +/**** ended inlining compress/fse_compress.c ****/ +/**** start inlining compress/hist.c ****/ +/* ****************************************************************** + * hist : Histogram functions + * part of Finite State Entropy project + * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +/* --- dependencies --- */ +/**** skipping file: ../common/mem.h ****/ +/**** skipping file: ../common/debug.h ****/ +/**** skipping file: ../common/error_private.h ****/ +/**** skipping file: hist.h ****/ + + +/* --- Error management --- */ +unsigned HIST_isError(size_t code) { return ERR_isError(code); } + +/*-************************************************************** + * Histogram functions + ****************************************************************/ +unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize) +{ + const BYTE* ip = (const BYTE*)src; + const BYTE* const end = ip + srcSize; + unsigned maxSymbolValue = *maxSymbolValuePtr; + unsigned largestCount=0; + + ZSTD_memset(count, 0, (maxSymbolValue+1) * sizeof(*count)); + if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; } + + while (ip largestCount) largestCount = count[s]; + } + + return largestCount; +} + +typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e; + +/* HIST_count_parallel_wksp() : + * store histogram into 4 intermediate tables, recombined at the end. + * this design makes better use of OoO cpus, + * and is noticeably faster when some values are heavily repeated. + * But it needs some additional workspace for intermediate tables. + * `workSpace` must be a U32 table of size >= HIST_WKSP_SIZE_U32. + * @return : largest histogram frequency, + * or an error code (notably when histogram's alphabet is larger than *maxSymbolValuePtr) */ +static size_t HIST_count_parallel_wksp( + unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, + HIST_checkInput_e check, + U32* const workSpace) +{ + const BYTE* ip = (const BYTE*)source; + const BYTE* const iend = ip+sourceSize; + size_t const countSize = (*maxSymbolValuePtr + 1) * sizeof(*count); + unsigned max=0; + U32* const Counting1 = workSpace; + U32* const Counting2 = Counting1 + 256; + U32* const Counting3 = Counting2 + 256; + U32* const Counting4 = Counting3 + 256; + + /* safety checks */ + assert(*maxSymbolValuePtr <= 255); + if (!sourceSize) { + ZSTD_memset(count, 0, countSize); + *maxSymbolValuePtr = 0; + return 0; + } + ZSTD_memset(workSpace, 0, 4*256*sizeof(unsigned)); + + /* by stripes of 16 bytes */ + { U32 cached = MEM_read32(ip); ip += 4; + while (ip < iend-15) { + U32 c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + } + ip-=4; + } + + /* finish last symbols */ + while (ip max) max = Counting1[s]; + } } + + { unsigned maxSymbolValue = 255; + while (!Counting1[maxSymbolValue]) maxSymbolValue--; + if (check && maxSymbolValue > *maxSymbolValuePtr) return ERROR(maxSymbolValue_tooSmall); + *maxSymbolValuePtr = maxSymbolValue; + ZSTD_memmove(count, Counting1, countSize); /* in case count & Counting1 are overlapping */ + } + return (size_t)max; +} + +/* HIST_countFast_wksp() : + * Same as HIST_countFast(), but using an externally provided scratch buffer. + * `workSpace` is a writable buffer which must be 4-bytes aligned, + * `workSpaceSize` must be >= HIST_WKSP_SIZE + */ +size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, + void* workSpace, size_t workSpaceSize) +{ + if (sourceSize < 1500) /* heuristic threshold */ + return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize); + if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ + if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall); + return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace); +} + +/* HIST_count_wksp() : + * Same as HIST_count(), but using an externally provided scratch buffer. + * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */ +size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, + void* workSpace, size_t workSpaceSize) +{ + if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ + if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall); + if (*maxSymbolValuePtr < 255) + return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, checkMaxSymbolValue, (U32*)workSpace); + *maxSymbolValuePtr = 255; + return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize); +} + +#ifndef ZSTD_NO_UNUSED_FUNCTIONS +/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */ +size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize) +{ + unsigned tmpCounters[HIST_WKSP_SIZE_U32]; + return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters)); +} + +size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize) +{ + unsigned tmpCounters[HIST_WKSP_SIZE_U32]; + return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters, sizeof(tmpCounters)); +} +#endif +/**** ended inlining compress/hist.c ****/ +/**** start inlining compress/huf_compress.c ****/ +/* ****************************************************************** + * Huffman encoder, part of New Generation Entropy library + * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +/* ************************************************************** +* Compiler specifics +****************************************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +#endif + + +/* ************************************************************** +* Includes +****************************************************************/ +/**** skipping file: ../common/zstd_deps.h ****/ +/**** skipping file: ../common/compiler.h ****/ +/**** skipping file: ../common/bitstream.h ****/ +/**** skipping file: hist.h ****/ +#define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */ +/**** skipping file: ../common/fse.h ****/ +#define HUF_STATIC_LINKING_ONLY +/**** skipping file: ../common/huf.h ****/ +/**** skipping file: ../common/error_private.h ****/ + + +/* ************************************************************** +* Error Management +****************************************************************/ +#define HUF_isError ERR_isError +#define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ + + +/* ************************************************************** +* Utils +****************************************************************/ +unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) +{ + return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); +} + + +/* ******************************************************* +* HUF : Huffman block compression +*********************************************************/ +/* HUF_compressWeights() : + * Same as FSE_compress(), but dedicated to huff0's weights compression. + * The use case needs much less stack memory. + * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX. + */ +#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6 +static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize) +{ + BYTE* const ostart = (BYTE*) dst; + BYTE* op = ostart; + BYTE* const oend = ostart + dstSize; + + unsigned maxSymbolValue = HUF_TABLELOG_MAX; + U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER; + + FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)]; + U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)]; + + unsigned count[HUF_TABLELOG_MAX+1]; + S16 norm[HUF_TABLELOG_MAX+1]; + + /* init conditions */ + if (wtSize <= 1) return 0; /* Not compressible */ + + /* Scan input and build symbol stats */ + { unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize); /* never fails */ + if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */ + if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */ + } + + tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue); + CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) ); + + /* Write table description header */ + { CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), norm, maxSymbolValue, tableLog) ); + op += hSize; + } + + /* Compress */ + CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) ); + { CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, CTable) ); + if (cSize == 0) return 0; /* not enough space for compressed data */ + op += cSize; + } + + return (size_t)(op-ostart); +} + + +/*! HUF_writeCTable() : + `CTable` : Huffman tree to save, using huf representation. + @return : size of saved CTable */ +size_t HUF_writeCTable (void* dst, size_t maxDstSize, + const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog) +{ + BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */ + BYTE huffWeight[HUF_SYMBOLVALUE_MAX]; + BYTE* op = (BYTE*)dst; + U32 n; + + /* check conditions */ + if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); + + /* convert to weight */ + bitsToWeight[0] = 0; + for (n=1; n1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */ + op[0] = (BYTE)hSize; + return hSize+1; + } } + + /* write raw values as 4-bits (max : 15) */ + if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */ + if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */ + op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1)); + huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */ + for (n=0; n 0); + + /* check result */ + if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); + if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall); + + /* Prepare base value per rank */ + { U32 n, nextRankStart = 0; + for (n=1; n<=tableLog; n++) { + U32 curr = nextRankStart; + nextRankStart += (rankVal[n] << (n-1)); + rankVal[n] = curr; + } } + + /* fill nbBits */ + { U32 n; for (n=0; nn=tableLog+1 */ + U16 valPerRank[HUF_TABLELOG_MAX+2] = {0}; + { U32 n; for (n=0; n0; n--) { /* start at n=tablelog <-> w=1 */ + valPerRank[n] = min; /* get starting value within each rank */ + min += nbPerRank[n]; + min >>= 1; + } } + /* assign value within rank, symbol order */ + { U32 n; for (n=0; n maxNbBits to be maxNbBits. Then it adjusts + * the tree to so that it is a valid canonical Huffman tree. + * + * @pre The sum of the ranks of each symbol == 2^largestBits, + * where largestBits == huffNode[lastNonNull].nbBits. + * @post The sum of the ranks of each symbol == 2^largestBits, + * where largestBits is the return value <= maxNbBits. + * + * @param huffNode The Huffman tree modified in place to enforce maxNbBits. + * @param lastNonNull The symbol with the lowest count in the Huffman tree. + * @param maxNbBits The maximum allowed number of bits, which the Huffman tree + * may not respect. After this function the Huffman tree will + * respect maxNbBits. + * @return The maximum number of bits of the Huffman tree after adjustment, + * necessarily no more than maxNbBits. + */ +static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) +{ + const U32 largestBits = huffNode[lastNonNull].nbBits; + /* early exit : no elt > maxNbBits, so the tree is already valid. */ + if (largestBits <= maxNbBits) return largestBits; + + /* there are several too large elements (at least >= 2) */ + { int totalCost = 0; + const U32 baseCost = 1 << (largestBits - maxNbBits); + int n = (int)lastNonNull; + + /* Adjust any ranks > maxNbBits to maxNbBits. + * Compute totalCost, which is how far the sum of the ranks is + * we are over 2^largestBits after adjust the offending ranks. + */ + while (huffNode[n].nbBits > maxNbBits) { + totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); + huffNode[n].nbBits = (BYTE)maxNbBits; + n--; + } + /* n stops at huffNode[n].nbBits <= maxNbBits */ + assert(huffNode[n].nbBits <= maxNbBits); + /* n end at index of smallest symbol using < maxNbBits */ + while (huffNode[n].nbBits == maxNbBits) --n; + + /* renorm totalCost from 2^largestBits to 2^maxNbBits + * note : totalCost is necessarily a multiple of baseCost */ + assert((totalCost & (baseCost - 1)) == 0); + totalCost >>= (largestBits - maxNbBits); + assert(totalCost > 0); + + /* repay normalized cost */ + { U32 const noSymbol = 0xF0F0F0F0; + U32 rankLast[HUF_TABLELOG_MAX+2]; + + /* Get pos of last (smallest = lowest cum. count) symbol per rank */ + ZSTD_memset(rankLast, 0xF0, sizeof(rankLast)); + { U32 currentNbBits = maxNbBits; + int pos; + for (pos=n ; pos >= 0; pos--) { + if (huffNode[pos].nbBits >= currentNbBits) continue; + currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */ + rankLast[maxNbBits-currentNbBits] = (U32)pos; + } } + + while (totalCost > 0) { + /* Try to reduce the next power of 2 above totalCost because we + * gain back half the rank. + */ + U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1; + for ( ; nBitsToDecrease > 1; nBitsToDecrease--) { + U32 const highPos = rankLast[nBitsToDecrease]; + U32 const lowPos = rankLast[nBitsToDecrease-1]; + if (highPos == noSymbol) continue; + /* Decrease highPos if no symbols of lowPos or if it is + * not cheaper to remove 2 lowPos than highPos. + */ + if (lowPos == noSymbol) break; + { U32 const highTotal = huffNode[highPos].count; + U32 const lowTotal = 2 * huffNode[lowPos].count; + if (highTotal <= lowTotal) break; + } } + /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */ + assert(rankLast[nBitsToDecrease] != noSymbol || nBitsToDecrease == 1); + /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */ + while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol)) + nBitsToDecrease++; + assert(rankLast[nBitsToDecrease] != noSymbol); + /* Increase the number of bits to gain back half the rank cost. */ + totalCost -= 1 << (nBitsToDecrease-1); + huffNode[rankLast[nBitsToDecrease]].nbBits++; + + /* Fix up the new rank. + * If the new rank was empty, this symbol is now its smallest. + * Otherwise, this symbol will be the largest in the new rank so no adjustment. + */ + if (rankLast[nBitsToDecrease-1] == noSymbol) + rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; + /* Fix up the old rank. + * If the symbol was at position 0, meaning it was the highest weight symbol in the tree, + * it must be the only symbol in its rank, so the old rank now has no symbols. + * Otherwise, since the Huffman nodes are sorted by count, the previous position is now + * the smallest node in the rank. If the previous position belongs to a different rank, + * then the rank is now empty. + */ + if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */ + rankLast[nBitsToDecrease] = noSymbol; + else { + rankLast[nBitsToDecrease]--; + if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease) + rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */ + } + } /* while (totalCost > 0) */ + + /* If we've removed too much weight, then we have to add it back. + * To avoid overshooting again, we only adjust the smallest rank. + * We take the largest nodes from the lowest rank 0 and move them + * to rank 1. There's guaranteed to be enough rank 0 symbols because + * TODO. + */ + while (totalCost < 0) { /* Sometimes, cost correction overshoot */ + /* special case : no rank 1 symbol (using maxNbBits-1); + * let's create one from largest rank 0 (using maxNbBits). + */ + if (rankLast[1] == noSymbol) { + while (huffNode[n].nbBits == maxNbBits) n--; + huffNode[n+1].nbBits--; + assert(n >= 0); + rankLast[1] = (U32)(n+1); + totalCost++; + continue; + } + huffNode[ rankLast[1] + 1 ].nbBits--; + rankLast[1]++; + totalCost ++; + } + } /* repay normalized cost */ + } /* there are several too large elements (at least >= 2) */ + + return maxNbBits; +} + +typedef struct { + U32 base; + U32 curr; +} rankPos; + +typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32]; + +#define RANK_POSITION_TABLE_SIZE 32 + +typedef struct { + huffNodeTable huffNodeTbl; + rankPos rankPosition[RANK_POSITION_TABLE_SIZE]; +} HUF_buildCTable_wksp_tables; + +/** + * HUF_sort(): + * Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order. + * + * @param[out] huffNode Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled. + * Must have (maxSymbolValue + 1) entries. + * @param[in] count Histogram of the symbols. + * @param[in] maxSymbolValue Maximum symbol value. + * @param rankPosition This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries. + */ +static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue, rankPos* rankPosition) +{ + int n; + int const maxSymbolValue1 = (int)maxSymbolValue + 1; + + /* Compute base and set curr to base. + * For symbol s let lowerRank = BIT_highbit32(count[n]+1) and rank = lowerRank + 1. + * Then 2^lowerRank <= count[n]+1 <= 2^rank. + * We attribute each symbol to lowerRank's base value, because we want to know where + * each rank begins in the output, so for rank R we want to count ranks R+1 and above. + */ + ZSTD_memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE); + for (n = 0; n < maxSymbolValue1; ++n) { + U32 lowerRank = BIT_highbit32(count[n] + 1); + rankPosition[lowerRank].base++; + } + assert(rankPosition[RANK_POSITION_TABLE_SIZE - 1].base == 0); + for (n = RANK_POSITION_TABLE_SIZE - 1; n > 0; --n) { + rankPosition[n-1].base += rankPosition[n].base; + rankPosition[n-1].curr = rankPosition[n-1].base; + } + /* Sort */ + for (n = 0; n < maxSymbolValue1; ++n) { + U32 const c = count[n]; + U32 const r = BIT_highbit32(c+1) + 1; + U32 pos = rankPosition[r].curr++; + /* Insert into the correct position in the rank. + * We have at most 256 symbols, so this insertion should be fine. + */ + while ((pos > rankPosition[r].base) && (c > huffNode[pos-1].count)) { + huffNode[pos] = huffNode[pos-1]; + pos--; + } + huffNode[pos].count = c; + huffNode[pos].byte = (BYTE)n; + } +} + + +/** HUF_buildCTable_wksp() : + * Same as HUF_buildCTable(), but using externally allocated scratch buffer. + * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables). + */ +#define STARTNODE (HUF_SYMBOLVALUE_MAX+1) + +/* HUF_buildTree(): + * Takes the huffNode array sorted by HUF_sort() and builds an unlimited-depth Huffman tree. + * + * @param huffNode The array sorted by HUF_sort(). Builds the Huffman tree in this array. + * @param maxSymbolValue The maximum symbol value. + * @return The smallest node in the Huffman tree (by count). + */ +static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue) +{ + nodeElt* const huffNode0 = huffNode - 1; + int nonNullRank; + int lowS, lowN; + int nodeNb = STARTNODE; + int n, nodeRoot; + /* init for parents */ + nonNullRank = (int)maxSymbolValue; + while(huffNode[nonNullRank].count == 0) nonNullRank--; + lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb; + huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count; + huffNode[lowS].parent = huffNode[lowS-1].parent = (U16)nodeNb; + nodeNb++; lowS-=2; + for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30); + huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */ + + /* create parents */ + while (nodeNb <= nodeRoot) { + int const n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; + int const n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; + huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count; + huffNode[n1].parent = huffNode[n2].parent = (U16)nodeNb; + nodeNb++; + } + + /* distribute weights (unlimited tree height) */ + huffNode[nodeRoot].nbBits = 0; + for (n=nodeRoot-1; n>=STARTNODE; n--) + huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; + for (n=0; n<=nonNullRank; n++) + huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; + + return nonNullRank; +} + +/** + * HUF_buildCTableFromTree(): + * Build the CTable given the Huffman tree in huffNode. + * + * @param[out] CTable The output Huffman CTable. + * @param huffNode The Huffman tree. + * @param nonNullRank The last and smallest node in the Huffman tree. + * @param maxSymbolValue The maximum symbol value. + * @param maxNbBits The exact maximum number of bits used in the Huffman tree. + */ +static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits) +{ + /* fill result into ctable (val, nbBits) */ + int n; + U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0}; + U16 valPerRank[HUF_TABLELOG_MAX+1] = {0}; + int const alphabetSize = (int)(maxSymbolValue + 1); + for (n=0; n<=nonNullRank; n++) + nbPerRank[huffNode[n].nbBits]++; + /* determine starting value per rank */ + { U16 min = 0; + for (n=(int)maxNbBits; n>0; n--) { + valPerRank[n] = min; /* get starting value within each rank */ + min += nbPerRank[n]; + min >>= 1; + } } + for (n=0; nhuffNodeTbl; + nodeElt* const huffNode = huffNode0+1; + int nonNullRank; + + /* safety checks */ + if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ + if (wkspSize < sizeof(HUF_buildCTable_wksp_tables)) + return ERROR(workSpace_tooSmall); + if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; + if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) + return ERROR(maxSymbolValue_tooLarge); + ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable)); + + /* sort, decreasing order */ + HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition); + + /* build tree */ + nonNullRank = HUF_buildTree(huffNode, maxSymbolValue); + + /* enforce maxTableLog */ + maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits); + if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */ + + HUF_buildCTableFromTree(tree, huffNode, nonNullRank, maxSymbolValue, maxNbBits); + + return maxNbBits; +} + +size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) +{ + size_t nbBits = 0; + int s; + for (s = 0; s <= (int)maxSymbolValue; ++s) { + nbBits += CTable[s].nbBits * count[s]; + } + return nbBits >> 3; +} + +int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { + int bad = 0; + int s; + for (s = 0; s <= (int)maxSymbolValue; ++s) { + bad |= (count[s] != 0) & (CTable[s].nbBits == 0); + } + return !bad; +} + +size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } + +FORCE_INLINE_TEMPLATE void +HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable) +{ + BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits); +} + +#define HUF_FLUSHBITS(s) BIT_flushBits(s) + +#define HUF_FLUSHBITS_1(stream) \ + if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream) + +#define HUF_FLUSHBITS_2(stream) \ + if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream) + +FORCE_INLINE_TEMPLATE size_t +HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable) +{ + const BYTE* ip = (const BYTE*) src; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart; + size_t n; + BIT_CStream_t bitC; + + /* init */ + if (dstSize < 8) return 0; /* not enough space to compress */ + { size_t const initErr = BIT_initCStream(&bitC, op, (size_t)(oend-op)); + if (HUF_isError(initErr)) return 0; } + + n = srcSize & ~3; /* join to mod 4 */ + switch (srcSize & 3) + { + case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable); + HUF_FLUSHBITS_2(&bitC); + /* fall-through */ + case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable); + HUF_FLUSHBITS_1(&bitC); + /* fall-through */ + case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable); + HUF_FLUSHBITS(&bitC); + /* fall-through */ + case 0 : /* fall-through */ + default: break; + } + + for (; n>0; n-=4) { /* note : n&3==0 at this stage */ + HUF_encodeSymbol(&bitC, ip[n- 1], CTable); + HUF_FLUSHBITS_1(&bitC); + HUF_encodeSymbol(&bitC, ip[n- 2], CTable); + HUF_FLUSHBITS_2(&bitC); + HUF_encodeSymbol(&bitC, ip[n- 3], CTable); + HUF_FLUSHBITS_1(&bitC); + HUF_encodeSymbol(&bitC, ip[n- 4], CTable); + HUF_FLUSHBITS(&bitC); + } + + return BIT_closeCStream(&bitC); +} + +#if DYNAMIC_BMI2 + +static TARGET_ATTRIBUTE("bmi2") size_t +HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable) +{ + return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); +} + +static size_t +HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable) +{ + return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); +} + +static size_t +HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable, const int bmi2) +{ + if (bmi2) { + return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable); + } + return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable); +} + +#else + +static size_t +HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable, const int bmi2) +{ + (void)bmi2; + return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); +} + +#endif + +size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) +{ + return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); +} + + +static size_t +HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable, int bmi2) +{ + size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */ + const BYTE* ip = (const BYTE*) src; + const BYTE* const iend = ip + srcSize; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart; + + if (dstSize < 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */ + if (srcSize < 12) return 0; /* no saving possible : too small input */ + op += 6; /* jumpTable */ + + assert(op <= oend); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); + if (cSize==0) return 0; + assert(cSize <= 65535); + MEM_writeLE16(ostart, (U16)cSize); + op += cSize; + } + + ip += segmentSize; + assert(op <= oend); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); + if (cSize==0) return 0; + assert(cSize <= 65535); + MEM_writeLE16(ostart+2, (U16)cSize); + op += cSize; + } + + ip += segmentSize; + assert(op <= oend); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); + if (cSize==0) return 0; + assert(cSize <= 65535); + MEM_writeLE16(ostart+4, (U16)cSize); + op += cSize; + } + + ip += segmentSize; + assert(op <= oend); + assert(ip <= iend); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) ); + if (cSize==0) return 0; + op += cSize; + } + + return (size_t)(op-ostart); +} + +size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) +{ + return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); +} + +typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e; + +static size_t HUF_compressCTable_internal( + BYTE* const ostart, BYTE* op, BYTE* const oend, + const void* src, size_t srcSize, + HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2) +{ + size_t const cSize = (nbStreams==HUF_singleStream) ? + HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) : + HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2); + if (HUF_isError(cSize)) { return cSize; } + if (cSize==0) { return 0; } /* uncompressible */ + op += cSize; + /* check compressibility */ + assert(op >= ostart); + if ((size_t)(op-ostart) >= srcSize-1) { return 0; } + return (size_t)(op-ostart); +} + +typedef struct { + unsigned count[HUF_SYMBOLVALUE_MAX + 1]; + HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1]; + HUF_buildCTable_wksp_tables buildCTable_wksp; +} HUF_compress_tables_t; + +/* HUF_compress_internal() : + * `workSpace_align4` must be aligned on 4-bytes boundaries, + * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U32 unsigned */ +static size_t +HUF_compress_internal (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + HUF_nbStreams_e nbStreams, + void* workSpace_align4, size_t wkspSize, + HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat, + const int bmi2) +{ + HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace_align4; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart; + + HUF_STATIC_ASSERT(sizeof(*table) <= HUF_WORKSPACE_SIZE); + assert(((size_t)workSpace_align4 & 3) == 0); /* must be aligned on 4-bytes boundaries */ + + /* checks & inits */ + if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall); + if (!srcSize) return 0; /* Uncompressed */ + if (!dstSize) return 0; /* cannot fit anything within dst budget */ + if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */ + if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); + if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); + if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX; + if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT; + + /* Heuristic : If old table is valid, use it for small inputs */ + if (preferRepeat && repeat && *repeat == HUF_repeat_valid) { + return HUF_compressCTable_internal(ostart, op, oend, + src, srcSize, + nbStreams, oldHufTable, bmi2); + } + + /* Scan input and build symbol stats */ + { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace_align4, wkspSize) ); + if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */ + if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */ + } + + /* Check validity of previous table */ + if ( repeat + && *repeat == HUF_repeat_check + && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) { + *repeat = HUF_repeat_none; + } + /* Heuristic : use existing table for small inputs */ + if (preferRepeat && repeat && *repeat != HUF_repeat_none) { + return HUF_compressCTable_internal(ostart, op, oend, + src, srcSize, + nbStreams, oldHufTable, bmi2); + } + + /* Build Huffman Tree */ + huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); + { size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count, + maxSymbolValue, huffLog, + &table->buildCTable_wksp, sizeof(table->buildCTable_wksp)); + CHECK_F(maxBits); + huffLog = (U32)maxBits; + /* Zero unused symbols in CTable, so we can check it for validity */ + ZSTD_memset(table->CTable + (maxSymbolValue + 1), 0, + sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt))); + } + + /* Write table description header */ + { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) ); + /* Check if using previous huffman table is beneficial */ + if (repeat && *repeat != HUF_repeat_none) { + size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue); + size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue); + if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) { + return HUF_compressCTable_internal(ostart, op, oend, + src, srcSize, + nbStreams, oldHufTable, bmi2); + } } + + /* Use the new huffman table */ + if (hSize + 12ul >= srcSize) { return 0; } + op += hSize; + if (repeat) { *repeat = HUF_repeat_none; } + if (oldHufTable) + ZSTD_memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */ + } + return HUF_compressCTable_internal(ostart, op, oend, + src, srcSize, + nbStreams, table->CTable, bmi2); +} + + +size_t HUF_compress1X_wksp (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + void* workSpace, size_t wkspSize) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, + maxSymbolValue, huffLog, HUF_singleStream, + workSpace, wkspSize, + NULL, NULL, 0, 0 /*bmi2*/); +} + +size_t HUF_compress1X_repeat (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + void* workSpace, size_t wkspSize, + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, + maxSymbolValue, huffLog, HUF_singleStream, + workSpace, wkspSize, hufTable, + repeat, preferRepeat, bmi2); +} + +/* HUF_compress4X_repeat(): + * compress input using 4 streams. + * provide workspace to generate compression tables */ +size_t HUF_compress4X_wksp (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + void* workSpace, size_t wkspSize) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, + maxSymbolValue, huffLog, HUF_fourStreams, + workSpace, wkspSize, + NULL, NULL, 0, 0 /*bmi2*/); +} + +/* HUF_compress4X_repeat(): + * compress input using 4 streams. + * re-use an existing huffman compression table */ +size_t HUF_compress4X_repeat (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + void* workSpace, size_t wkspSize, + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, + maxSymbolValue, huffLog, HUF_fourStreams, + workSpace, wkspSize, + hufTable, repeat, preferRepeat, bmi2); +} + +#ifndef ZSTD_NO_UNUSED_FUNCTIONS +/** HUF_buildCTable() : + * @return : maxNbBits + * Note : count is used before tree is written, so they can safely overlap + */ +size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits) +{ + HUF_buildCTable_wksp_tables workspace; + return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, &workspace, sizeof(workspace)); +} + +size_t HUF_compress1X (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog) +{ + unsigned workSpace[HUF_WORKSPACE_SIZE_U32]; + return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); +} + +size_t HUF_compress2 (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog) +{ + unsigned workSpace[HUF_WORKSPACE_SIZE_U32]; + return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); +} + +size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT); +} +#endif +/**** ended inlining compress/huf_compress.c ****/ +/**** start inlining compress/zstd_compress_literals.c ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + /*-************************************* + * Dependencies + ***************************************/ +/**** start inlining zstd_compress_literals.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMPRESS_LITERALS_H +#define ZSTD_COMPRESS_LITERALS_H + +/**** start inlining zstd_compress_internal.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* This header contains definitions + * that shall **only** be used by modules within lib/compress. + */ + +#ifndef ZSTD_COMPRESS_H +#define ZSTD_COMPRESS_H + +/*-************************************* +* Dependencies +***************************************/ +/**** skipping file: ../common/zstd_internal.h ****/ +/**** start inlining ../common/zstd_trace.h ****/ +/* + * Copyright (c) 2016-2021, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_TRACE_H +#define ZSTD_TRACE_H + +#if defined (__cplusplus) +extern "C" { +#endif + +#include + +/* weak symbol support */ +#if !defined(ZSTD_HAVE_WEAK_SYMBOLS) && defined(__GNUC__) && \ + !defined(__APPLE__) && !defined(_WIN32) && !defined(__MINGW32__) && \ + !defined(__CYGWIN__) +# define ZSTD_HAVE_WEAK_SYMBOLS 1 +#else +# define ZSTD_HAVE_WEAK_SYMBOLS 0 +#endif +#if ZSTD_HAVE_WEAK_SYMBOLS +# define ZSTD_WEAK_ATTR __attribute__((__weak__)) +#else +# define ZSTD_WEAK_ATTR +#endif + +/* Only enable tracing when weak symbols are available. */ +#ifndef ZSTD_TRACE +# define ZSTD_TRACE ZSTD_HAVE_WEAK_SYMBOLS +#endif + +#if ZSTD_TRACE + +struct ZSTD_CCtx_s; +struct ZSTD_DCtx_s; +struct ZSTD_CCtx_params_s; + +typedef struct { + /** + * ZSTD_VERSION_NUMBER + * + * This is guaranteed to be the first member of ZSTD_trace. + * Otherwise, this struct is not stable between versions. If + * the version number does not match your expectation, you + * should not interpret the rest of the struct. + */ + unsigned version; + /** + * Non-zero if streaming (de)compression is used. + */ + unsigned streaming; + /** + * The dictionary ID. + */ + unsigned dictionaryID; + /** + * Is the dictionary cold? + * Only set on decompression. + */ + unsigned dictionaryIsCold; + /** + * The dictionary size or zero if no dictionary. + */ + size_t dictionarySize; + /** + * The uncompressed size of the data. + */ + size_t uncompressedSize; + /** + * The compressed size of the data. + */ + size_t compressedSize; + /** + * The fully resolved CCtx parameters (NULL on decompression). + */ + struct ZSTD_CCtx_params_s const* params; + /** + * The ZSTD_CCtx pointer (NULL on decompression). + */ + struct ZSTD_CCtx_s const* cctx; + /** + * The ZSTD_DCtx pointer (NULL on compression). + */ + struct ZSTD_DCtx_s const* dctx; +} ZSTD_Trace; + +/** + * A tracing context. It must be 0 when tracing is disabled. + * Otherwise, any non-zero value returned by a tracing begin() + * function is presented to any subsequent calls to end(). + * + * Any non-zero value is treated as tracing is enabled and not + * interpreted by the library. + * + * Two possible uses are: + * * A timestamp for when the begin() function was called. + * * A unique key identifying the (de)compression, like the + * address of the [dc]ctx pointer if you need to track + * more information than just a timestamp. + */ +typedef unsigned long long ZSTD_TraceCtx; + +/** + * Trace the beginning of a compression call. + * @param cctx The dctx pointer for the compression. + * It can be used as a key to map begin() to end(). + * @returns Non-zero if tracing is enabled. The return value is + * passed to ZSTD_trace_compress_end(). + */ +ZSTD_TraceCtx ZSTD_trace_compress_begin(struct ZSTD_CCtx_s const* cctx); + +/** + * Trace the end of a compression call. + * @param ctx The return value of ZSTD_trace_compress_begin(). + * @param trace The zstd tracing info. + */ +void ZSTD_trace_compress_end( + ZSTD_TraceCtx ctx, + ZSTD_Trace const* trace); + +/** + * Trace the beginning of a decompression call. + * @param dctx The dctx pointer for the decompression. + * It can be used as a key to map begin() to end(). + * @returns Non-zero if tracing is enabled. The return value is + * passed to ZSTD_trace_compress_end(). + */ +ZSTD_TraceCtx ZSTD_trace_decompress_begin(struct ZSTD_DCtx_s const* dctx); + +/** + * Trace the end of a decompression call. + * @param ctx The return value of ZSTD_trace_decompress_begin(). + * @param trace The zstd tracing info. + */ +void ZSTD_trace_decompress_end( + ZSTD_TraceCtx ctx, + ZSTD_Trace const* trace); + +#endif /* ZSTD_TRACE */ + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_TRACE_H */ +/**** ended inlining ../common/zstd_trace.h ****/ +/**** start inlining zstd_cwksp.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_CWKSP_H +#define ZSTD_CWKSP_H + +/*-************************************* +* Dependencies +***************************************/ +/**** skipping file: ../common/zstd_internal.h ****/ + +#if defined (__cplusplus) +extern "C" { +#endif + +/*-************************************* +* Constants +***************************************/ + +/* Since the workspace is effectively its own little malloc implementation / + * arena, when we run under ASAN, we should similarly insert redzones between + * each internal element of the workspace, so ASAN will catch overruns that + * reach outside an object but that stay inside the workspace. + * + * This defines the size of that redzone. + */ +#ifndef ZSTD_CWKSP_ASAN_REDZONE_SIZE +#define ZSTD_CWKSP_ASAN_REDZONE_SIZE 128 +#endif + +/*-************************************* +* Structures +***************************************/ +typedef enum { + ZSTD_cwksp_alloc_objects, + ZSTD_cwksp_alloc_buffers, + ZSTD_cwksp_alloc_aligned +} ZSTD_cwksp_alloc_phase_e; + +/** + * Used to describe whether the workspace is statically allocated (and will not + * necessarily ever be freed), or if it's dynamically allocated and we can + * expect a well-formed caller to free this. + */ +typedef enum { + ZSTD_cwksp_dynamic_alloc, + ZSTD_cwksp_static_alloc +} ZSTD_cwksp_static_alloc_e; + +/** + * Zstd fits all its internal datastructures into a single continuous buffer, + * so that it only needs to perform a single OS allocation (or so that a buffer + * can be provided to it and it can perform no allocations at all). This buffer + * is called the workspace. + * + * Several optimizations complicate that process of allocating memory ranges + * from this workspace for each internal datastructure: + * + * - These different internal datastructures have different setup requirements: + * + * - The static objects need to be cleared once and can then be trivially + * reused for each compression. + * + * - Various buffers don't need to be initialized at all--they are always + * written into before they're read. + * + * - The matchstate tables have a unique requirement that they don't need + * their memory to be totally cleared, but they do need the memory to have + * some bound, i.e., a guarantee that all values in the memory they've been + * allocated is less than some maximum value (which is the starting value + * for the indices that they will then use for compression). When this + * guarantee is provided to them, they can use the memory without any setup + * work. When it can't, they have to clear the area. + * + * - These buffers also have different alignment requirements. + * + * - We would like to reuse the objects in the workspace for multiple + * compressions without having to perform any expensive reallocation or + * reinitialization work. + * + * - We would like to be able to efficiently reuse the workspace across + * multiple compressions **even when the compression parameters change** and + * we need to resize some of the objects (where possible). + * + * To attempt to manage this buffer, given these constraints, the ZSTD_cwksp + * abstraction was created. It works as follows: + * + * Workspace Layout: + * + * [ ... workspace ... ] + * [objects][tables ... ->] free space [<- ... aligned][<- ... buffers] + * + * The various objects that live in the workspace are divided into the + * following categories, and are allocated separately: + * + * - Static objects: this is optionally the enclosing ZSTD_CCtx or ZSTD_CDict, + * so that literally everything fits in a single buffer. Note: if present, + * this must be the first object in the workspace, since ZSTD_customFree{CCtx, + * CDict}() rely on a pointer comparison to see whether one or two frees are + * required. + * + * - Fixed size objects: these are fixed-size, fixed-count objects that are + * nonetheless "dynamically" allocated in the workspace so that we can + * control how they're initialized separately from the broader ZSTD_CCtx. + * Examples: + * - Entropy Workspace + * - 2 x ZSTD_compressedBlockState_t + * - CDict dictionary contents + * + * - Tables: these are any of several different datastructures (hash tables, + * chain tables, binary trees) that all respect a common format: they are + * uint32_t arrays, all of whose values are between 0 and (nextSrc - base). + * Their sizes depend on the cparams. + * + * - Aligned: these buffers are used for various purposes that require 4 byte + * alignment, but don't require any initialization before they're used. + * + * - Buffers: these buffers are used for various purposes that don't require + * any alignment or initialization before they're used. This means they can + * be moved around at no cost for a new compression. + * + * Allocating Memory: + * + * The various types of objects must be allocated in order, so they can be + * correctly packed into the workspace buffer. That order is: + * + * 1. Objects + * 2. Buffers + * 3. Aligned + * 4. Tables + * + * Attempts to reserve objects of different types out of order will fail. + */ +typedef struct { + void* workspace; + void* workspaceEnd; + + void* objectEnd; + void* tableEnd; + void* tableValidEnd; + void* allocStart; + + BYTE allocFailed; + int workspaceOversizedDuration; + ZSTD_cwksp_alloc_phase_e phase; + ZSTD_cwksp_static_alloc_e isStatic; +} ZSTD_cwksp; + +/*-************************************* +* Functions +***************************************/ + +MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws); + +MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) { + (void)ws; + assert(ws->workspace <= ws->objectEnd); + assert(ws->objectEnd <= ws->tableEnd); + assert(ws->objectEnd <= ws->tableValidEnd); + assert(ws->tableEnd <= ws->allocStart); + assert(ws->tableValidEnd <= ws->allocStart); + assert(ws->allocStart <= ws->workspaceEnd); +} + +/** + * Align must be a power of 2. + */ +MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) { + size_t const mask = align - 1; + assert((align & mask) == 0); + return (size + mask) & ~mask; +} + +/** + * Use this to determine how much space in the workspace we will consume to + * allocate this object. (Normally it should be exactly the size of the object, + * but under special conditions, like ASAN, where we pad each object, it might + * be larger.) + * + * Since tables aren't currently redzoned, you don't need to call through this + * to figure out how much space you need for the matchState tables. Everything + * else is though. + */ +MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) { + if (size == 0) + return 0; +#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + return size + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE; +#else + return size; +#endif +} + +MEM_STATIC void ZSTD_cwksp_internal_advance_phase( + ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) { + assert(phase >= ws->phase); + if (phase > ws->phase) { + if (ws->phase < ZSTD_cwksp_alloc_buffers && + phase >= ZSTD_cwksp_alloc_buffers) { + ws->tableValidEnd = ws->objectEnd; + } + if (ws->phase < ZSTD_cwksp_alloc_aligned && + phase >= ZSTD_cwksp_alloc_aligned) { + /* If unaligned allocations down from a too-large top have left us + * unaligned, we need to realign our alloc ptr. Technically, this + * can consume space that is unaccounted for in the neededSpace + * calculation. However, I believe this can only happen when the + * workspace is too large, and specifically when it is too large + * by a larger margin than the space that will be consumed. */ + /* TODO: cleaner, compiler warning friendly way to do this??? */ + ws->allocStart = (BYTE*)ws->allocStart - ((size_t)ws->allocStart & (sizeof(U32)-1)); + if (ws->allocStart < ws->tableValidEnd) { + ws->tableValidEnd = ws->allocStart; + } + } + ws->phase = phase; + } +} + +/** + * Returns whether this object/buffer/etc was allocated in this workspace. + */ +MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) { + return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd); +} + +/** + * Internal function. Do not use directly. + */ +MEM_STATIC void* ZSTD_cwksp_reserve_internal( + ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) { + void* alloc; + void* bottom = ws->tableEnd; + ZSTD_cwksp_internal_advance_phase(ws, phase); + alloc = (BYTE *)ws->allocStart - bytes; + + if (bytes == 0) + return NULL; + +#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + /* over-reserve space */ + alloc = (BYTE *)alloc - 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE; +#endif + + DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining", + alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes); + ZSTD_cwksp_assert_internal_consistency(ws); + assert(alloc >= bottom); + if (alloc < bottom) { + DEBUGLOG(4, "cwksp: alloc failed!"); + ws->allocFailed = 1; + return NULL; + } + if (alloc < ws->tableValidEnd) { + ws->tableValidEnd = alloc; + } + ws->allocStart = alloc; + +#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on + * either size. */ + alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE; + if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) { + __asan_unpoison_memory_region(alloc, bytes); + } +#endif + + return alloc; +} + +/** + * Reserves and returns unaligned memory. + */ +MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) { + return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers); +} + +/** + * Reserves and returns memory sized on and aligned on sizeof(unsigned). + */ +MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) { + assert((bytes & (sizeof(U32)-1)) == 0); + return ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, sizeof(U32)), ZSTD_cwksp_alloc_aligned); +} + +/** + * Aligned on sizeof(unsigned). These buffers have the special property that + * their values remain constrained, allowing us to re-use them without + * memset()-ing them. + */ +MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) { + const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned; + void* alloc = ws->tableEnd; + void* end = (BYTE *)alloc + bytes; + void* top = ws->allocStart; + + DEBUGLOG(5, "cwksp: reserving %p table %zd bytes, %zd bytes remaining", + alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes); + assert((bytes & (sizeof(U32)-1)) == 0); + ZSTD_cwksp_internal_advance_phase(ws, phase); + ZSTD_cwksp_assert_internal_consistency(ws); + assert(end <= top); + if (end > top) { + DEBUGLOG(4, "cwksp: table alloc failed!"); + ws->allocFailed = 1; + return NULL; + } + ws->tableEnd = end; + +#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) { + __asan_unpoison_memory_region(alloc, bytes); + } +#endif + + return alloc; +} + +/** + * Aligned on sizeof(void*). + */ +MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) { + size_t roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*)); + void* alloc = ws->objectEnd; + void* end = (BYTE*)alloc + roundedBytes; + +#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + /* over-reserve space */ + end = (BYTE *)end + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE; +#endif + + DEBUGLOG(5, + "cwksp: reserving %p object %zd bytes (rounded to %zd), %zd bytes remaining", + alloc, bytes, roundedBytes, ZSTD_cwksp_available_space(ws) - roundedBytes); + assert(((size_t)alloc & (sizeof(void*)-1)) == 0); + assert((bytes & (sizeof(void*)-1)) == 0); + ZSTD_cwksp_assert_internal_consistency(ws); + /* we must be in the first phase, no advance is possible */ + if (ws->phase != ZSTD_cwksp_alloc_objects || end > ws->workspaceEnd) { + DEBUGLOG(4, "cwksp: object alloc failed!"); + ws->allocFailed = 1; + return NULL; + } + ws->objectEnd = end; + ws->tableEnd = end; + ws->tableValidEnd = end; + +#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on + * either size. */ + alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE; + if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) { + __asan_unpoison_memory_region(alloc, bytes); + } +#endif + + return alloc; +} + +MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty"); + +#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) + /* To validate that the table re-use logic is sound, and that we don't + * access table space that we haven't cleaned, we re-"poison" the table + * space every time we mark it dirty. */ + { + size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd; + assert(__msan_test_shadow(ws->objectEnd, size) == -1); + __msan_poison(ws->objectEnd, size); + } +#endif + + assert(ws->tableValidEnd >= ws->objectEnd); + assert(ws->tableValidEnd <= ws->allocStart); + ws->tableValidEnd = ws->objectEnd; + ZSTD_cwksp_assert_internal_consistency(ws); +} + +MEM_STATIC void ZSTD_cwksp_mark_tables_clean(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_clean"); + assert(ws->tableValidEnd >= ws->objectEnd); + assert(ws->tableValidEnd <= ws->allocStart); + if (ws->tableValidEnd < ws->tableEnd) { + ws->tableValidEnd = ws->tableEnd; + } + ZSTD_cwksp_assert_internal_consistency(ws); +} + +/** + * Zero the part of the allocated tables not already marked clean. + */ +MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: ZSTD_cwksp_clean_tables"); + assert(ws->tableValidEnd >= ws->objectEnd); + assert(ws->tableValidEnd <= ws->allocStart); + if (ws->tableValidEnd < ws->tableEnd) { + ZSTD_memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd); + } + ZSTD_cwksp_mark_tables_clean(ws); +} + +/** + * Invalidates table allocations. + * All other allocations remain valid. + */ +MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: clearing tables!"); + +#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + /* We don't do this when the workspace is statically allocated, because + * when that is the case, we have no capability to hook into the end of the + * workspace's lifecycle to unpoison the memory. + */ + if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) { + size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd; + __asan_poison_memory_region(ws->objectEnd, size); + } +#endif + + ws->tableEnd = ws->objectEnd; + ZSTD_cwksp_assert_internal_consistency(ws); +} + +/** + * Invalidates all buffer, aligned, and table allocations. + * Object allocations remain valid. + */ +MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: clearing!"); + +#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) + /* To validate that the context re-use logic is sound, and that we don't + * access stuff that this compression hasn't initialized, we re-"poison" + * the workspace (or at least the non-static, non-table parts of it) + * every time we start a new compression. */ + { + size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->tableValidEnd; + __msan_poison(ws->tableValidEnd, size); + } +#endif + +#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + /* We don't do this when the workspace is statically allocated, because + * when that is the case, we have no capability to hook into the end of the + * workspace's lifecycle to unpoison the memory. + */ + if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) { + size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->objectEnd; + __asan_poison_memory_region(ws->objectEnd, size); + } +#endif + + ws->tableEnd = ws->objectEnd; + ws->allocStart = ws->workspaceEnd; + ws->allocFailed = 0; + if (ws->phase > ZSTD_cwksp_alloc_buffers) { + ws->phase = ZSTD_cwksp_alloc_buffers; + } + ZSTD_cwksp_assert_internal_consistency(ws); +} + +/** + * The provided workspace takes ownership of the buffer [start, start+size). + * Any existing values in the workspace are ignored (the previously managed + * buffer, if present, must be separately freed). + */ +MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size, ZSTD_cwksp_static_alloc_e isStatic) { + DEBUGLOG(4, "cwksp: init'ing workspace with %zd bytes", size); + assert(((size_t)start & (sizeof(void*)-1)) == 0); /* ensure correct alignment */ + ws->workspace = start; + ws->workspaceEnd = (BYTE*)start + size; + ws->objectEnd = ws->workspace; + ws->tableValidEnd = ws->objectEnd; + ws->phase = ZSTD_cwksp_alloc_objects; + ws->isStatic = isStatic; + ZSTD_cwksp_clear(ws); + ws->workspaceOversizedDuration = 0; + ZSTD_cwksp_assert_internal_consistency(ws); +} + +MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem customMem) { + void* workspace = ZSTD_customMalloc(size, customMem); + DEBUGLOG(4, "cwksp: creating new workspace with %zd bytes", size); + RETURN_ERROR_IF(workspace == NULL, memory_allocation, "NULL pointer!"); + ZSTD_cwksp_init(ws, workspace, size, ZSTD_cwksp_dynamic_alloc); + return 0; +} + +MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) { + void *ptr = ws->workspace; + DEBUGLOG(4, "cwksp: freeing workspace"); + ZSTD_memset(ws, 0, sizeof(ZSTD_cwksp)); + ZSTD_customFree(ptr, customMem); +} + +/** + * Moves the management of a workspace from one cwksp to another. The src cwksp + * is left in an invalid state (src must be re-init()'ed before it's used again). + */ +MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) { + *dst = *src; + ZSTD_memset(src, 0, sizeof(ZSTD_cwksp)); +} + +MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) { + return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace); +} + +MEM_STATIC size_t ZSTD_cwksp_used(const ZSTD_cwksp* ws) { + return (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->workspace) + + (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->allocStart); +} + +MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) { + return ws->allocFailed; +} + +/*-************************************* +* Functions Checking Free Space +***************************************/ + +MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws) { + return (size_t)((BYTE*)ws->allocStart - (BYTE*)ws->tableEnd); +} + +MEM_STATIC int ZSTD_cwksp_check_available(ZSTD_cwksp* ws, size_t additionalNeededSpace) { + return ZSTD_cwksp_available_space(ws) >= additionalNeededSpace; +} + +MEM_STATIC int ZSTD_cwksp_check_too_large(ZSTD_cwksp* ws, size_t additionalNeededSpace) { + return ZSTD_cwksp_check_available( + ws, additionalNeededSpace * ZSTD_WORKSPACETOOLARGE_FACTOR); +} + +MEM_STATIC int ZSTD_cwksp_check_wasteful(ZSTD_cwksp* ws, size_t additionalNeededSpace) { + return ZSTD_cwksp_check_too_large(ws, additionalNeededSpace) + && ws->workspaceOversizedDuration > ZSTD_WORKSPACETOOLARGE_MAXDURATION; +} + +MEM_STATIC void ZSTD_cwksp_bump_oversized_duration( + ZSTD_cwksp* ws, size_t additionalNeededSpace) { + if (ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)) { + ws->workspaceOversizedDuration++; + } else { + ws->workspaceOversizedDuration = 0; + } +} + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_CWKSP_H */ +/**** ended inlining zstd_cwksp.h ****/ +#ifdef ZSTD_MULTITHREAD +/**** start inlining zstdmt_compress.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + #ifndef ZSTDMT_COMPRESS_H + #define ZSTDMT_COMPRESS_H + + #if defined (__cplusplus) + extern "C" { + #endif + + +/* Note : This is an internal API. + * These APIs used to be exposed with ZSTDLIB_API, + * because it used to be the only way to invoke MT compression. + * Now, you must use ZSTD_compress2 and ZSTD_compressStream2() instead. + * + * This API requires ZSTD_MULTITHREAD to be defined during compilation, + * otherwise ZSTDMT_createCCtx*() will fail. + */ + +/* === Dependencies === */ +/**** skipping file: ../common/zstd_deps.h ****/ +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters */ +/**** skipping file: ../zstd.h ****/ + + +/* === Constants === */ +#ifndef ZSTDMT_NBWORKERS_MAX +# define ZSTDMT_NBWORKERS_MAX 200 +#endif +#ifndef ZSTDMT_JOBSIZE_MIN +# define ZSTDMT_JOBSIZE_MIN (1 MB) +#endif +#define ZSTDMT_JOBLOG_MAX (MEM_32bits() ? 29 : 30) +#define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (1024 MB)) + + +/* ======================================================== + * === Private interface, for use by ZSTD_compress.c === + * === Not exposed in libzstd. Never invoke directly === + * ======================================================== */ + +/* === Memory management === */ +typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx; +/* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */ +ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, + ZSTD_customMem cMem, + ZSTD_threadPool *pool); +size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx); + +size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx); + +/* === Streaming functions === */ + +size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx); + +/*! ZSTDMT_initCStream_internal() : + * Private use only. Init streaming operation. + * expects params to be valid. + * must receive dict, or cdict, or none, but not both. + * @return : 0, or an error code */ +size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs, + const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, unsigned long long pledgedSrcSize); + +/*! ZSTDMT_compressStream_generic() : + * Combines ZSTDMT_compressStream() with optional ZSTDMT_flushStream() or ZSTDMT_endStream() + * depending on flush directive. + * @return : minimum amount of data still to be flushed + * 0 if fully flushed + * or an error code + * note : needs to be init using any ZSTD_initCStream*() variant */ +size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective endOp); + + /*! ZSTDMT_toFlushNow() + * Tell how many bytes are ready to be flushed immediately. + * Probe the oldest active job (not yet entirely flushed) and check its output buffer. + * If return 0, it means there is no active job, + * or, it means oldest job is still active, but everything produced has been flushed so far, + * therefore flushing is limited by speed of oldest job. */ +size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx); + +/*! ZSTDMT_updateCParams_whileCompressing() : + * Updates only a selected set of compression parameters, to remain compatible with current frame. + * New parameters will be applied to next compression job. */ +void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams); + +/*! ZSTDMT_getFrameProgression(): + * tells how much data has been consumed (input) and produced (output) for current frame. + * able to count progression inside worker threads. + */ +ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx); + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTDMT_COMPRESS_H */ +/**** ended inlining zstdmt_compress.h ****/ +#endif + +#if defined (__cplusplus) +extern "C" { +#endif + +/*-************************************* +* Constants +***************************************/ +#define kSearchStrength 8 +#define HASH_READ_SIZE 8 +#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted". + It could be confused for a real successor at index "1", if sorted as larger than its predecessor. + It's not a big deal though : candidate will just be sorted again. + Additionally, candidate position 1 will be lost. + But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss. + The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy. + This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */ + + +/*-************************************* +* Context memory management +***************************************/ +typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e; +typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage; + +typedef struct ZSTD_prefixDict_s { + const void* dict; + size_t dictSize; + ZSTD_dictContentType_e dictContentType; +} ZSTD_prefixDict; + +typedef struct { + void* dictBuffer; + void const* dict; + size_t dictSize; + ZSTD_dictContentType_e dictContentType; + ZSTD_CDict* cdict; +} ZSTD_localDict; + +typedef struct { + HUF_CElt CTable[HUF_CTABLE_SIZE_U32(255)]; + HUF_repeat repeatMode; +} ZSTD_hufCTables_t; + +typedef struct { + FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]; + FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)]; + FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)]; + FSE_repeat offcode_repeatMode; + FSE_repeat matchlength_repeatMode; + FSE_repeat litlength_repeatMode; +} ZSTD_fseCTables_t; + +typedef struct { + ZSTD_hufCTables_t huf; + ZSTD_fseCTables_t fse; +} ZSTD_entropyCTables_t; + +typedef struct { + U32 off; /* Offset code (offset + ZSTD_REP_MOVE) for the match */ + U32 len; /* Raw length of match */ +} ZSTD_match_t; + +typedef struct { + U32 offset; /* Offset of sequence */ + U32 litLength; /* Length of literals prior to match */ + U32 matchLength; /* Raw length of match */ +} rawSeq; + +typedef struct { + rawSeq* seq; /* The start of the sequences */ + size_t pos; /* The index in seq where reading stopped. pos <= size. */ + size_t posInSequence; /* The position within the sequence at seq[pos] where reading + stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength */ + size_t size; /* The number of sequences. <= capacity. */ + size_t capacity; /* The capacity starting from `seq` pointer */ +} rawSeqStore_t; + +UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0}; + +typedef struct { + int price; + U32 off; + U32 mlen; + U32 litlen; + U32 rep[ZSTD_REP_NUM]; +} ZSTD_optimal_t; + +typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e; + +typedef struct { + /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */ + unsigned* litFreq; /* table of literals statistics, of size 256 */ + unsigned* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */ + unsigned* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */ + unsigned* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */ + ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */ + ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */ + + U32 litSum; /* nb of literals */ + U32 litLengthSum; /* nb of litLength codes */ + U32 matchLengthSum; /* nb of matchLength codes */ + U32 offCodeSum; /* nb of offset codes */ + U32 litSumBasePrice; /* to compare to log2(litfreq) */ + U32 litLengthSumBasePrice; /* to compare to log2(llfreq) */ + U32 matchLengthSumBasePrice;/* to compare to log2(mlfreq) */ + U32 offCodeSumBasePrice; /* to compare to log2(offreq) */ + ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */ + const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */ + ZSTD_literalCompressionMode_e literalCompressionMode; +} optState_t; + +typedef struct { + ZSTD_entropyCTables_t entropy; + U32 rep[ZSTD_REP_NUM]; +} ZSTD_compressedBlockState_t; + +typedef struct { + BYTE const* nextSrc; /* next block here to continue on current prefix */ + BYTE const* base; /* All regular indexes relative to this position */ + BYTE const* dictBase; /* extDict indexes relative to this position */ + U32 dictLimit; /* below that point, need extDict */ + U32 lowLimit; /* below that point, no more valid data */ +} ZSTD_window_t; + +typedef struct ZSTD_matchState_t ZSTD_matchState_t; +struct ZSTD_matchState_t { + ZSTD_window_t window; /* State for window round buffer management */ + U32 loadedDictEnd; /* index of end of dictionary, within context's referential. + * When loadedDictEnd != 0, a dictionary is in use, and still valid. + * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance. + * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity(). + * When dict referential is copied into active context (i.e. not attached), + * loadedDictEnd == dictSize, since referential starts from zero. + */ + U32 nextToUpdate; /* index from which to continue table update */ + U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */ + U32* hashTable; + U32* hashTable3; + U32* chainTable; + int dedicatedDictSearch; /* Indicates whether this matchState is using the + * dedicated dictionary search structure. + */ + optState_t opt; /* optimal parser state */ + const ZSTD_matchState_t* dictMatchState; + ZSTD_compressionParameters cParams; + const rawSeqStore_t* ldmSeqStore; +}; + +typedef struct { + ZSTD_compressedBlockState_t* prevCBlock; + ZSTD_compressedBlockState_t* nextCBlock; + ZSTD_matchState_t matchState; +} ZSTD_blockState_t; + +typedef struct { + U32 offset; + U32 checksum; +} ldmEntry_t; + +typedef struct { + BYTE const* split; + U32 hash; + U32 checksum; + ldmEntry_t* bucket; +} ldmMatchCandidate_t; + +#define LDM_BATCH_SIZE 64 + +typedef struct { + ZSTD_window_t window; /* State for the window round buffer management */ + ldmEntry_t* hashTable; + U32 loadedDictEnd; + BYTE* bucketOffsets; /* Next position in bucket to insert entry */ + size_t splitIndices[LDM_BATCH_SIZE]; + ldmMatchCandidate_t matchCandidates[LDM_BATCH_SIZE]; +} ldmState_t; + +typedef struct { + U32 enableLdm; /* 1 if enable long distance matching */ + U32 hashLog; /* Log size of hashTable */ + U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */ + U32 minMatchLength; /* Minimum match length */ + U32 hashRateLog; /* Log number of entries to skip */ + U32 windowLog; /* Window log for the LDM */ +} ldmParams_t; + +typedef struct { + int collectSequences; + ZSTD_Sequence* seqStart; + size_t seqIndex; + size_t maxSequences; +} SeqCollector; + +struct ZSTD_CCtx_params_s { + ZSTD_format_e format; + ZSTD_compressionParameters cParams; + ZSTD_frameParameters fParams; + + int compressionLevel; + int forceWindow; /* force back-references to respect limit of + * 1< 63) ? ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; +} + +/* ZSTD_MLcode() : + * note : mlBase = matchLength - MINMATCH; + * because it's the format it's stored in seqStore->sequences */ +MEM_STATIC U32 ZSTD_MLcode(U32 mlBase) +{ + static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, + 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 }; + static const U32 ML_deltaCode = 36; + return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase]; +} + +typedef struct repcodes_s { + U32 rep[3]; +} repcodes_t; + +MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0) +{ + repcodes_t newReps; + if (offset >= ZSTD_REP_NUM) { /* full offset */ + newReps.rep[2] = rep[1]; + newReps.rep[1] = rep[0]; + newReps.rep[0] = offset - ZSTD_REP_MOVE; + } else { /* repcode */ + U32 const repCode = offset + ll0; + if (repCode > 0) { /* note : if repCode==0, no change */ + U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; + newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2]; + newReps.rep[1] = rep[0]; + newReps.rep[0] = currentOffset; + } else { /* repCode == 0 */ + ZSTD_memcpy(&newReps, rep, sizeof(newReps)); + } + } + return newReps; +} + +/* ZSTD_cParam_withinBounds: + * @return 1 if value is within cParam bounds, + * 0 otherwise */ +MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value) +{ + ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); + if (ZSTD_isError(bounds.error)) return 0; + if (value < bounds.lowerBound) return 0; + if (value > bounds.upperBound) return 0; + return 1; +} + +/* ZSTD_noCompressBlock() : + * Writes uncompressed block to dst buffer from given src. + * Returns the size of the block */ +MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock) +{ + U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3); + RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity, + dstSize_tooSmall, "dst buf too small for uncompressed block"); + MEM_writeLE24(dst, cBlockHeader24); + ZSTD_memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize); + return ZSTD_blockHeaderSize + srcSize; +} + +MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock) +{ + BYTE* const op = (BYTE*)dst; + U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3); + RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, ""); + MEM_writeLE24(op, cBlockHeader); + op[3] = src; + return 4; +} + + +/* ZSTD_minGain() : + * minimum compression required + * to generate a compress block or a compressed literals section. + * note : use same formula for both situations */ +MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat) +{ + U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6; + ZSTD_STATIC_ASSERT(ZSTD_btultra == 8); + assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); + return (srcSize >> minlog) + 2; +} + +MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams) +{ + switch (cctxParams->literalCompressionMode) { + case ZSTD_lcm_huffman: + return 0; + case ZSTD_lcm_uncompressed: + return 1; + default: + assert(0 /* impossible: pre-validated */); + /* fall-through */ + case ZSTD_lcm_auto: + return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0); + } +} + +/*! ZSTD_safecopyLiterals() : + * memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w. + * Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single + * large copies. + */ +static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) { + assert(iend > ilimit_w); + if (ip <= ilimit_w) { + ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap); + op += ilimit_w - ip; + ip = ilimit_w; + } + while (ip < iend) *op++ = *ip++; +} + +/*! ZSTD_storeSeq() : + * Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t. + * `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes). + * `mlBase` : matchLength - MINMATCH + * Allowed to overread literals up to litLimit. +*/ +HINT_INLINE UNUSED_ATTR +void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase) +{ + BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH; + BYTE const* const litEnd = literals + litLength; +#if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6) + static const BYTE* g_start = NULL; + if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */ + { U32 const pos = (U32)((const BYTE*)literals - g_start); + DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u", + pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode); + } +#endif + assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq); + /* copy Literals */ + assert(seqStorePtr->maxNbLit <= 128 KB); + assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit); + assert(literals + litLength <= litLimit); + if (litEnd <= litLimit_w) { + /* Common case we can use wildcopy. + * First copy 16 bytes, because literals are likely short. + */ + assert(WILDCOPY_OVERLENGTH >= 16); + ZSTD_copy16(seqStorePtr->lit, literals); + if (litLength > 16) { + ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap); + } + } else { + ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w); + } + seqStorePtr->lit += litLength; + + /* literal Length */ + if (litLength>0xFFFF) { + assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */ + seqStorePtr->longLengthID = 1; + seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + } + seqStorePtr->sequences[0].litLength = (U16)litLength; + + /* match offset */ + seqStorePtr->sequences[0].offset = offCode + 1; + + /* match Length */ + if (mlBase>0xFFFF) { + assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */ + seqStorePtr->longLengthID = 2; + seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + } + seqStorePtr->sequences[0].matchLength = (U16)mlBase; + + seqStorePtr->sequences++; +} + + +/*-************************************* +* Match length counter +***************************************/ +static unsigned ZSTD_NbCommonBytes (size_t val) +{ + if (MEM_isLittleEndian()) { + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) +# if STATIC_BMI2 + return _tzcnt_u64(val) >> 3; +# else + unsigned long r = 0; + return _BitScanForward64( &r, (U64)val ) ? (unsigned)(r >> 3) : 0; +# endif +# elif defined(__GNUC__) && (__GNUC__ >= 4) + return (__builtin_ctzll((U64)val) >> 3); +# else + static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, + 0, 3, 1, 3, 1, 4, 2, 7, + 0, 2, 3, 6, 1, 5, 3, 5, + 1, 3, 4, 4, 2, 5, 6, 7, + 7, 0, 1, 2, 3, 3, 4, 6, + 2, 6, 5, 5, 3, 4, 5, 6, + 7, 1, 2, 4, 6, 4, 4, 5, + 7, 2, 6, 5, 7, 6, 7, 7 }; + return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r=0; + return _BitScanForward( &r, (U32)val ) ? (unsigned)(r >> 3) : 0; +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_ctz((U32)val) >> 3); +# else + static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, + 3, 2, 2, 1, 3, 2, 0, 1, + 3, 3, 1, 2, 2, 2, 2, 0, + 3, 1, 2, 0, 1, 0, 1, 1 }; + return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; +# endif + } + } else { /* Big Endian CPU */ + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) +# if STATIC_BMI2 + return _lzcnt_u64(val) >> 3; +# else + unsigned long r = 0; + return _BitScanReverse64(&r, (U64)val) ? (unsigned)(r >> 3) : 0; +# endif +# elif defined(__GNUC__) && (__GNUC__ >= 4) + return (__builtin_clzll(val) >> 3); +# else + unsigned r; + const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ + if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } + if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } + r += (!val); + return r; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r = 0; + return _BitScanReverse( &r, (unsigned long)val ) ? (unsigned)(r >> 3) : 0; +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_clz((U32)val) >> 3); +# else + unsigned r; + if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } + r += (!val); + return r; +# endif + } } +} + + +MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit) +{ + const BYTE* const pStart = pIn; + const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1); + + if (pIn < pInLoopLimit) { + { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); + if (diff) return ZSTD_NbCommonBytes(diff); } + pIn+=sizeof(size_t); pMatch+=sizeof(size_t); + while (pIn < pInLoopLimit) { + size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); + if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; } + pIn += ZSTD_NbCommonBytes(diff); + return (size_t)(pIn - pStart); + } } + if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; } + if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; } + if ((pIn> (32-h) ; } +MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */ + +static const U32 prime4bytes = 2654435761U; +static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; } +static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); } + +static const U64 prime5bytes = 889523592379ULL; +static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; } +static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); } + +static const U64 prime6bytes = 227718039650203ULL; +static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; } +static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); } + +static const U64 prime7bytes = 58295818150454627ULL; +static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; } +static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); } + +static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL; +static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; } +static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); } + +MEM_STATIC FORCE_INLINE_ATTR +size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) +{ + switch(mls) + { + default: + case 4: return ZSTD_hash4Ptr(p, hBits); + case 5: return ZSTD_hash5Ptr(p, hBits); + case 6: return ZSTD_hash6Ptr(p, hBits); + case 7: return ZSTD_hash7Ptr(p, hBits); + case 8: return ZSTD_hash8Ptr(p, hBits); + } +} + +/** ZSTD_ipow() : + * Return base^exponent. + */ +static U64 ZSTD_ipow(U64 base, U64 exponent) +{ + U64 power = 1; + while (exponent) { + if (exponent & 1) power *= base; + exponent >>= 1; + base *= base; + } + return power; +} + +#define ZSTD_ROLL_HASH_CHAR_OFFSET 10 + +/** ZSTD_rollingHash_append() : + * Add the buffer to the hash value. + */ +static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size) +{ + BYTE const* istart = (BYTE const*)buf; + size_t pos; + for (pos = 0; pos < size; ++pos) { + hash *= prime8bytes; + hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET; + } + return hash; +} + +/** ZSTD_rollingHash_compute() : + * Compute the rolling hash value of the buffer. + */ +MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size) +{ + return ZSTD_rollingHash_append(0, buf, size); +} + +/** ZSTD_rollingHash_primePower() : + * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash + * over a window of length bytes. + */ +MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length) +{ + return ZSTD_ipow(prime8bytes, length - 1); +} + +/** ZSTD_rollingHash_rotate() : + * Rotate the rolling hash by one byte. + */ +MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower) +{ + hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower; + hash *= prime8bytes; + hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET; + return hash; +} + +/*-************************************* +* Round buffer management +***************************************/ +#if (ZSTD_WINDOWLOG_MAX_64 > 31) +# error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX" +#endif +/* Max current allowed */ +#define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX)) +/* Maximum chunk size before overflow correction needs to be called again */ +#define ZSTD_CHUNKSIZE_MAX \ + ( ((U32)-1) /* Maximum ending current index */ \ + - ZSTD_CURRENT_MAX) /* Maximum beginning lowLimit */ + +/** + * ZSTD_window_clear(): + * Clears the window containing the history by simply setting it to empty. + */ +MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window) +{ + size_t const endT = (size_t)(window->nextSrc - window->base); + U32 const end = (U32)endT; + + window->lowLimit = end; + window->dictLimit = end; +} + +/** + * ZSTD_window_hasExtDict(): + * Returns non-zero if the window has a non-empty extDict. + */ +MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window) +{ + return window.lowLimit < window.dictLimit; +} + +/** + * ZSTD_matchState_dictMode(): + * Inspects the provided matchState and figures out what dictMode should be + * passed to the compressor. + */ +MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms) +{ + return ZSTD_window_hasExtDict(ms->window) ? + ZSTD_extDict : + ms->dictMatchState != NULL ? + (ms->dictMatchState->dedicatedDictSearch ? ZSTD_dedicatedDictSearch : ZSTD_dictMatchState) : + ZSTD_noDict; +} + +/** + * ZSTD_window_needOverflowCorrection(): + * Returns non-zero if the indices are getting too large and need overflow + * protection. + */ +MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window, + void const* srcEnd) +{ + U32 const curr = (U32)((BYTE const*)srcEnd - window.base); + return curr > ZSTD_CURRENT_MAX; +} + +/** + * ZSTD_window_correctOverflow(): + * Reduces the indices to protect from index overflow. + * Returns the correction made to the indices, which must be applied to every + * stored index. + * + * The least significant cycleLog bits of the indices must remain the same, + * which may be 0. Every index up to maxDist in the past must be valid. + * NOTE: (maxDist & cycleMask) must be zero. + */ +MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, + U32 maxDist, void const* src) +{ + /* preemptive overflow correction: + * 1. correction is large enough: + * lowLimit > (3<<29) ==> current > 3<<29 + 1< (3<<29 + 1< (3<<29) - (1< (3<<29) - (1<<30) (NOTE: chainLog <= 30) + * > 1<<29 + * + * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow: + * After correction, current is less than (1<base < 1<<32. + * 3. (cctx->lowLimit + 1< 3<<29 + 1<base); + U32 const currentCycle0 = curr & cycleMask; + /* Exclude zero so that newCurrent - maxDist >= 1. */ + U32 const currentCycle1 = currentCycle0 == 0 ? (1U << cycleLog) : currentCycle0; + U32 const newCurrent = currentCycle1 + maxDist; + U32 const correction = curr - newCurrent; + assert((maxDist & cycleMask) == 0); + assert(curr > newCurrent); + /* Loose bound, should be around 1<<29 (see above) */ + assert(correction > 1<<28); + + window->base += correction; + window->dictBase += correction; + if (window->lowLimit <= correction) window->lowLimit = 1; + else window->lowLimit -= correction; + if (window->dictLimit <= correction) window->dictLimit = 1; + else window->dictLimit -= correction; + + /* Ensure we can still reference the full window. */ + assert(newCurrent >= maxDist); + assert(newCurrent - maxDist >= 1); + /* Ensure that lowLimit and dictLimit didn't underflow. */ + assert(window->lowLimit <= newCurrent); + assert(window->dictLimit <= newCurrent); + + DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction, + window->lowLimit); + return correction; +} + +/** + * ZSTD_window_enforceMaxDist(): + * Updates lowLimit so that: + * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd + * + * It ensures index is valid as long as index >= lowLimit. + * This must be called before a block compression call. + * + * loadedDictEnd is only defined if a dictionary is in use for current compression. + * As the name implies, loadedDictEnd represents the index at end of dictionary. + * The value lies within context's referential, it can be directly compared to blockEndIdx. + * + * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0. + * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit. + * This is because dictionaries are allowed to be referenced fully + * as long as the last byte of the dictionary is in the window. + * Once input has progressed beyond window size, dictionary cannot be referenced anymore. + * + * In normal dict mode, the dictionary lies between lowLimit and dictLimit. + * In dictMatchState mode, lowLimit and dictLimit are the same, + * and the dictionary is below them. + * forceWindow and dictMatchState are therefore incompatible. + */ +MEM_STATIC void +ZSTD_window_enforceMaxDist(ZSTD_window_t* window, + const void* blockEnd, + U32 maxDist, + U32* loadedDictEndPtr, + const ZSTD_matchState_t** dictMatchStatePtr) +{ + U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); + U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0; + DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", + (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); + + /* - When there is no dictionary : loadedDictEnd == 0. + In which case, the test (blockEndIdx > maxDist) is merely to avoid + overflowing next operation `newLowLimit = blockEndIdx - maxDist`. + - When there is a standard dictionary : + Index referential is copied from the dictionary, + which means it starts from 0. + In which case, loadedDictEnd == dictSize, + and it makes sense to compare `blockEndIdx > maxDist + dictSize` + since `blockEndIdx` also starts from zero. + - When there is an attached dictionary : + loadedDictEnd is expressed within the referential of the context, + so it can be directly compared against blockEndIdx. + */ + if (blockEndIdx > maxDist + loadedDictEnd) { + U32 const newLowLimit = blockEndIdx - maxDist; + if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit; + if (window->dictLimit < window->lowLimit) { + DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u", + (unsigned)window->dictLimit, (unsigned)window->lowLimit); + window->dictLimit = window->lowLimit; + } + /* On reaching window size, dictionaries are invalidated */ + if (loadedDictEndPtr) *loadedDictEndPtr = 0; + if (dictMatchStatePtr) *dictMatchStatePtr = NULL; + } +} + +/* Similar to ZSTD_window_enforceMaxDist(), + * but only invalidates dictionary + * when input progresses beyond window size. + * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL) + * loadedDictEnd uses same referential as window->base + * maxDist is the window size */ +MEM_STATIC void +ZSTD_checkDictValidity(const ZSTD_window_t* window, + const void* blockEnd, + U32 maxDist, + U32* loadedDictEndPtr, + const ZSTD_matchState_t** dictMatchStatePtr) +{ + assert(loadedDictEndPtr != NULL); + assert(dictMatchStatePtr != NULL); + { U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); + U32 const loadedDictEnd = *loadedDictEndPtr; + DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", + (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); + assert(blockEndIdx >= loadedDictEnd); + + if (blockEndIdx > loadedDictEnd + maxDist) { + /* On reaching window size, dictionaries are invalidated. + * For simplification, if window size is reached anywhere within next block, + * the dictionary is invalidated for the full block. + */ + DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)"); + *loadedDictEndPtr = 0; + *dictMatchStatePtr = NULL; + } else { + if (*loadedDictEndPtr != 0) { + DEBUGLOG(6, "dictionary considered valid for current block"); + } } } +} + +MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) { + ZSTD_memset(window, 0, sizeof(*window)); + window->base = (BYTE const*)""; + window->dictBase = (BYTE const*)""; + window->dictLimit = 1; /* start from 1, so that 1st position is valid */ + window->lowLimit = 1; /* it ensures first and later CCtx usages compress the same */ + window->nextSrc = window->base + 1; /* see issue #1241 */ +} + +/** + * ZSTD_window_update(): + * Updates the window by appending [src, src + srcSize) to the window. + * If it is not contiguous, the current prefix becomes the extDict, and we + * forget about the extDict. Handles overlap of the prefix and extDict. + * Returns non-zero if the segment is contiguous. + */ +MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, + void const* src, size_t srcSize) +{ + BYTE const* const ip = (BYTE const*)src; + U32 contiguous = 1; + DEBUGLOG(5, "ZSTD_window_update"); + if (srcSize == 0) + return contiguous; + assert(window->base != NULL); + assert(window->dictBase != NULL); + /* Check if blocks follow each other */ + if (src != window->nextSrc) { + /* not contiguous */ + size_t const distanceFromBase = (size_t)(window->nextSrc - window->base); + DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit); + window->lowLimit = window->dictLimit; + assert(distanceFromBase == (size_t)(U32)distanceFromBase); /* should never overflow */ + window->dictLimit = (U32)distanceFromBase; + window->dictBase = window->base; + window->base = ip - distanceFromBase; + /* ms->nextToUpdate = window->dictLimit; */ + if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit; /* too small extDict */ + contiguous = 0; + } + window->nextSrc = ip + srcSize; + /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */ + if ( (ip+srcSize > window->dictBase + window->lowLimit) + & (ip < window->dictBase + window->dictLimit)) { + ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase; + U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx; + window->lowLimit = lowLimitMax; + DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit); + } + return contiguous; +} + +/** + * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix. + */ +MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog) +{ + U32 const maxDistance = 1U << windowLog; + U32 const lowestValid = ms->window.lowLimit; + U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; + U32 const isDictionary = (ms->loadedDictEnd != 0); + /* When using a dictionary the entire dictionary is valid if a single byte of the dictionary + * is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't + * valid for the entire block. So this check is sufficient to find the lowest valid match index. + */ + U32 const matchLowest = isDictionary ? lowestValid : withinWindow; + return matchLowest; +} + +/** + * Returns the lowest allowed match index in the prefix. + */ +MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog) +{ + U32 const maxDistance = 1U << windowLog; + U32 const lowestValid = ms->window.dictLimit; + U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; + U32 const isDictionary = (ms->loadedDictEnd != 0); + /* When computing the lowest prefix index we need to take the dictionary into account to handle + * the edge case where the dictionary and the source are contiguous in memory. + */ + U32 const matchLowest = isDictionary ? lowestValid : withinWindow; + return matchLowest; +} + + + +/* debug functions */ +#if (DEBUGLEVEL>=2) + +MEM_STATIC double ZSTD_fWeight(U32 rawStat) +{ + U32 const fp_accuracy = 8; + U32 const fp_multiplier = (1 << fp_accuracy); + U32 const newStat = rawStat + 1; + U32 const hb = ZSTD_highbit32(newStat); + U32 const BWeight = hb * fp_multiplier; + U32 const FWeight = (newStat << fp_accuracy) >> hb; + U32 const weight = BWeight + FWeight; + assert(hb + fp_accuracy < 31); + return (double)weight / fp_multiplier; +} + +/* display a table content, + * listing each element, its frequency, and its predicted bit cost */ +MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max) +{ + unsigned u, sum; + for (u=0, sum=0; u<=max; u++) sum += table[u]; + DEBUGLOG(2, "total nb elts: %u", sum); + for (u=0; u<=max; u++) { + DEBUGLOG(2, "%2u: %5u (%.2f)", + u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) ); + } +} + +#endif + + +#if defined (__cplusplus) +} +#endif + +/* =============================================================== + * Shared internal declarations + * These prototypes may be called from sources not in lib/compress + * =============================================================== */ + +/* ZSTD_loadCEntropy() : + * dict : must point at beginning of a valid zstd dictionary. + * return : size of dictionary header (size of magic number + dict ID + entropy tables) + * assumptions : magic number supposed already checked + * and dictSize >= 8 */ +size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, + const void* const dict, size_t dictSize); + +void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs); + +/* ============================================================== + * Private declarations + * These prototypes shall only be called from within lib/compress + * ============================================================== */ + +/* ZSTD_getCParamsFromCCtxParams() : + * cParams are built depending on compressionLevel, src size hints, + * LDM and manually set compression parameters. + * Note: srcSizeHint == 0 means 0! + */ +ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( + const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); + +/*! ZSTD_initCStream_internal() : + * Private use only. Init streaming operation. + * expects params to be valid. + * must receive dict, or cdict, or none, but not both. + * @return : 0, or an error code */ +size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize); + +void ZSTD_resetSeqStore(seqStore_t* ssPtr); + +/*! ZSTD_getCParamsFromCDict() : + * as the name implies */ +ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict); + +/* ZSTD_compressBegin_advanced_internal() : + * Private use only. To be called from zstdmt_compress.c. */ +size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_dictContentType_e dictContentType, + ZSTD_dictTableLoadMethod_e dtlm, + const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, + unsigned long long pledgedSrcSize); + +/* ZSTD_compress_advanced_internal() : + * Private use only. To be called from zstdmt_compress.c. */ +size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + const ZSTD_CCtx_params* params); + + +/* ZSTD_writeLastEmptyBlock() : + * output an empty Block with end-of-frame mark to complete a frame + * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) + * or an error code if `dstCapacity` is too small ( 1 */ +U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat); + +/** ZSTD_CCtx_trace() : + * Trace the end of a compression call. + */ +void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize); + +#endif /* ZSTD_COMPRESS_H */ +/**** ended inlining zstd_compress_internal.h ****/ + + +size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize); + +size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize); + +size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, + ZSTD_hufCTables_t* nextHuf, + ZSTD_strategy strategy, int disableLiteralCompression, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + void* entropyWorkspace, size_t entropyWorkspaceSize, + const int bmi2); + +#endif /* ZSTD_COMPRESS_LITERALS_H */ +/**** ended inlining zstd_compress_literals.h ****/ + +size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + BYTE* const ostart = (BYTE*)dst; + U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); + + RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, ""); + + switch(flSize) + { + case 1: /* 2 - 1 - 5 */ + ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3)); + break; + case 2: /* 2 - 2 - 12 */ + MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4))); + break; + case 3: /* 2 - 2 - 20 */ + MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4))); + break; + default: /* not necessary : flSize is {1,2,3} */ + assert(0); + } + + ZSTD_memcpy(ostart + flSize, src, srcSize); + DEBUGLOG(5, "Raw literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize)); + return srcSize + flSize; +} + +size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + BYTE* const ostart = (BYTE*)dst; + U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); + + (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */ + + switch(flSize) + { + case 1: /* 2 - 1 - 5 */ + ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3)); + break; + case 2: /* 2 - 2 - 12 */ + MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4))); + break; + case 3: /* 2 - 2 - 20 */ + MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4))); + break; + default: /* not necessary : flSize is {1,2,3} */ + assert(0); + } + + ostart[flSize] = *(const BYTE*)src; + DEBUGLOG(5, "RLE literals: %u -> %u", (U32)srcSize, (U32)flSize + 1); + return flSize+1; +} + +size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, + ZSTD_hufCTables_t* nextHuf, + ZSTD_strategy strategy, int disableLiteralCompression, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + void* entropyWorkspace, size_t entropyWorkspaceSize, + const int bmi2) +{ + size_t const minGain = ZSTD_minGain(srcSize, strategy); + size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); + BYTE* const ostart = (BYTE*)dst; + U32 singleStream = srcSize < 256; + symbolEncodingType_e hType = set_compressed; + size_t cLitSize; + + DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i srcSize=%u)", + disableLiteralCompression, (U32)srcSize); + + /* Prepare nextEntropy assuming reusing the existing table */ + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + + if (disableLiteralCompression) + return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + + /* small ? don't even attempt compression (speed opt) */ +# define COMPRESS_LITERALS_SIZE_MIN 63 + { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; + if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + } + + RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression"); + { HUF_repeat repeat = prevHuf->repeatMode; + int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0; + if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; + cLitSize = singleStream ? + HUF_compress1X_repeat( + ostart+lhSize, dstCapacity-lhSize, src, srcSize, + HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, + (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) : + HUF_compress4X_repeat( + ostart+lhSize, dstCapacity-lhSize, src, srcSize, + HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, + (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2); + if (repeat != HUF_repeat_none) { + /* reused the existing table */ + DEBUGLOG(5, "Reusing previous huffman table"); + hType = set_repeat; + } + } + + if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) { + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + } + if (cLitSize==1) { + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); + } + + if (hType == set_compressed) { + /* using a newly constructed table */ + nextHuf->repeatMode = HUF_repeat_check; + } + + /* Build header */ + switch(lhSize) + { + case 3: /* 2 - 2 - 10 - 10 */ + { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); + MEM_writeLE24(ostart, lhc); + break; + } + case 4: /* 2 - 2 - 14 - 14 */ + { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18); + MEM_writeLE32(ostart, lhc); + break; + } + case 5: /* 2 - 2 - 18 - 18 */ + { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22); + MEM_writeLE32(ostart, lhc); + ostart[4] = (BYTE)(cLitSize >> 10); + break; + } + default: /* not possible : lhSize is {3,4,5} */ + assert(0); + } + DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)srcSize, (U32)(lhSize+cLitSize)); + return lhSize+cLitSize; +} +/**** ended inlining compress/zstd_compress_literals.c ****/ +/**** start inlining compress/zstd_compress_sequences.c ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + /*-************************************* + * Dependencies + ***************************************/ +/**** start inlining zstd_compress_sequences.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMPRESS_SEQUENCES_H +#define ZSTD_COMPRESS_SEQUENCES_H + +/**** skipping file: ../common/fse.h ****/ +/**** skipping file: ../common/zstd_internal.h ****/ + +typedef enum { + ZSTD_defaultDisallowed = 0, + ZSTD_defaultAllowed = 1 +} ZSTD_defaultPolicy_e; + +symbolEncodingType_e +ZSTD_selectEncodingType( + FSE_repeat* repeatMode, unsigned const* count, unsigned const max, + size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, + FSE_CTable const* prevCTable, + short const* defaultNorm, U32 defaultNormLog, + ZSTD_defaultPolicy_e const isDefaultAllowed, + ZSTD_strategy const strategy); + +size_t +ZSTD_buildCTable(void* dst, size_t dstCapacity, + FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, + unsigned* count, U32 max, + const BYTE* codeTable, size_t nbSeq, + const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, + const FSE_CTable* prevCTable, size_t prevCTableSize, + void* entropyWorkspace, size_t entropyWorkspaceSize); + +size_t ZSTD_encodeSequences( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2); + +size_t ZSTD_fseBitCost( + FSE_CTable const* ctable, + unsigned const* count, + unsigned const max); + +size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, + unsigned const* count, unsigned const max); +#endif /* ZSTD_COMPRESS_SEQUENCES_H */ +/**** ended inlining zstd_compress_sequences.h ****/ + +/** + * -log2(x / 256) lookup table for x in [0, 256). + * If x == 0: Return 0 + * Else: Return floor(-log2(x / 256) * 256) + */ +static unsigned const kInverseProbabilityLog256[256] = { + 0, 2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162, + 1130, 1100, 1073, 1047, 1024, 1001, 980, 960, 941, 923, 906, 889, + 874, 859, 844, 830, 817, 804, 791, 779, 768, 756, 745, 734, + 724, 714, 704, 694, 685, 676, 667, 658, 650, 642, 633, 626, + 618, 610, 603, 595, 588, 581, 574, 567, 561, 554, 548, 542, + 535, 529, 523, 517, 512, 506, 500, 495, 489, 484, 478, 473, + 468, 463, 458, 453, 448, 443, 438, 434, 429, 424, 420, 415, + 411, 407, 402, 398, 394, 390, 386, 382, 377, 373, 370, 366, + 362, 358, 354, 350, 347, 343, 339, 336, 332, 329, 325, 322, + 318, 315, 311, 308, 305, 302, 298, 295, 292, 289, 286, 282, + 279, 276, 273, 270, 267, 264, 261, 258, 256, 253, 250, 247, + 244, 241, 239, 236, 233, 230, 228, 225, 222, 220, 217, 215, + 212, 209, 207, 204, 202, 199, 197, 194, 192, 190, 187, 185, + 182, 180, 178, 175, 173, 171, 168, 166, 164, 162, 159, 157, + 155, 153, 151, 149, 146, 144, 142, 140, 138, 136, 134, 132, + 130, 128, 126, 123, 121, 119, 117, 115, 114, 112, 110, 108, + 106, 104, 102, 100, 98, 96, 94, 93, 91, 89, 87, 85, + 83, 82, 80, 78, 76, 74, 73, 71, 69, 67, 66, 64, + 62, 61, 59, 57, 55, 54, 52, 50, 49, 47, 46, 44, + 42, 41, 39, 37, 36, 34, 33, 31, 30, 28, 26, 25, + 23, 22, 20, 19, 17, 16, 14, 13, 11, 10, 8, 7, + 5, 4, 2, 1, +}; + +static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) { + void const* ptr = ctable; + U16 const* u16ptr = (U16 const*)ptr; + U32 const maxSymbolValue = MEM_read16(u16ptr + 1); + return maxSymbolValue; +} + +/** + * Returns true if we should use ncount=-1 else we should + * use ncount=1 for low probability symbols instead. + */ +static unsigned ZSTD_useLowProbCount(size_t const nbSeq) +{ + /* Heuristic: This should cover most blocks <= 16K and + * start to fade out after 16K to about 32K depending on + * comprssibility. + */ + return nbSeq >= 2048; +} + +/** + * Returns the cost in bytes of encoding the normalized count header. + * Returns an error if any of the helper functions return an error. + */ +static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max, + size_t const nbSeq, unsigned const FSELog) +{ + BYTE wksp[FSE_NCOUNTBOUND]; + S16 norm[MaxSeq + 1]; + const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); + FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max, ZSTD_useLowProbCount(nbSeq)), ""); + return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog); +} + +/** + * Returns the cost in bits of encoding the distribution described by count + * using the entropy bound. + */ +static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total) +{ + unsigned cost = 0; + unsigned s; + for (s = 0; s <= max; ++s) { + unsigned norm = (unsigned)((256 * count[s]) / total); + if (count[s] != 0 && norm == 0) + norm = 1; + assert(count[s] < total); + cost += count[s] * kInverseProbabilityLog256[norm]; + } + return cost >> 8; +} + +/** + * Returns the cost in bits of encoding the distribution in count using ctable. + * Returns an error if ctable cannot represent all the symbols in count. + */ +size_t ZSTD_fseBitCost( + FSE_CTable const* ctable, + unsigned const* count, + unsigned const max) +{ + unsigned const kAccuracyLog = 8; + size_t cost = 0; + unsigned s; + FSE_CState_t cstate; + FSE_initCState(&cstate, ctable); + if (ZSTD_getFSEMaxSymbolValue(ctable) < max) { + DEBUGLOG(5, "Repeat FSE_CTable has maxSymbolValue %u < %u", + ZSTD_getFSEMaxSymbolValue(ctable), max); + return ERROR(GENERIC); + } + for (s = 0; s <= max; ++s) { + unsigned const tableLog = cstate.stateLog; + unsigned const badCost = (tableLog + 1) << kAccuracyLog; + unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog); + if (count[s] == 0) + continue; + if (bitCost >= badCost) { + DEBUGLOG(5, "Repeat FSE_CTable has Prob[%u] == 0", s); + return ERROR(GENERIC); + } + cost += (size_t)count[s] * bitCost; + } + return cost >> kAccuracyLog; +} + +/** + * Returns the cost in bits of encoding the distribution in count using the + * table described by norm. The max symbol support by norm is assumed >= max. + * norm must be valid for every symbol with non-zero probability in count. + */ +size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, + unsigned const* count, unsigned const max) +{ + unsigned const shift = 8 - accuracyLog; + size_t cost = 0; + unsigned s; + assert(accuracyLog <= 8); + for (s = 0; s <= max; ++s) { + unsigned const normAcc = (norm[s] != -1) ? (unsigned)norm[s] : 1; + unsigned const norm256 = normAcc << shift; + assert(norm256 > 0); + assert(norm256 < 256); + cost += count[s] * kInverseProbabilityLog256[norm256]; + } + return cost >> 8; +} + +symbolEncodingType_e +ZSTD_selectEncodingType( + FSE_repeat* repeatMode, unsigned const* count, unsigned const max, + size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, + FSE_CTable const* prevCTable, + short const* defaultNorm, U32 defaultNormLog, + ZSTD_defaultPolicy_e const isDefaultAllowed, + ZSTD_strategy const strategy) +{ + ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0); + if (mostFrequent == nbSeq) { + *repeatMode = FSE_repeat_none; + if (isDefaultAllowed && nbSeq <= 2) { + /* Prefer set_basic over set_rle when there are 2 or less symbols, + * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol. + * If basic encoding isn't possible, always choose RLE. + */ + DEBUGLOG(5, "Selected set_basic"); + return set_basic; + } + DEBUGLOG(5, "Selected set_rle"); + return set_rle; + } + if (strategy < ZSTD_lazy) { + if (isDefaultAllowed) { + size_t const staticFse_nbSeq_max = 1000; + size_t const mult = 10 - strategy; + size_t const baseLog = 3; + size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog; /* 28-36 for offset, 56-72 for lengths */ + assert(defaultNormLog >= 5 && defaultNormLog <= 6); /* xx_DEFAULTNORMLOG */ + assert(mult <= 9 && mult >= 7); + if ( (*repeatMode == FSE_repeat_valid) + && (nbSeq < staticFse_nbSeq_max) ) { + DEBUGLOG(5, "Selected set_repeat"); + return set_repeat; + } + if ( (nbSeq < dynamicFse_nbSeq_min) + || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) { + DEBUGLOG(5, "Selected set_basic"); + /* The format allows default tables to be repeated, but it isn't useful. + * When using simple heuristics to select encoding type, we don't want + * to confuse these tables with dictionaries. When running more careful + * analysis, we don't need to waste time checking both repeating tables + * and default tables. + */ + *repeatMode = FSE_repeat_none; + return set_basic; + } + } + } else { + size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC); + size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC); + size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog); + size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq); + + if (isDefaultAllowed) { + assert(!ZSTD_isError(basicCost)); + assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost))); + } + assert(!ZSTD_isError(NCountCost)); + assert(compressedCost < ERROR(maxCode)); + DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u", + (unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost); + if (basicCost <= repeatCost && basicCost <= compressedCost) { + DEBUGLOG(5, "Selected set_basic"); + assert(isDefaultAllowed); + *repeatMode = FSE_repeat_none; + return set_basic; + } + if (repeatCost <= compressedCost) { + DEBUGLOG(5, "Selected set_repeat"); + assert(!ZSTD_isError(repeatCost)); + return set_repeat; + } + assert(compressedCost < basicCost && compressedCost < repeatCost); + } + DEBUGLOG(5, "Selected set_compressed"); + *repeatMode = FSE_repeat_check; + return set_compressed; +} + +size_t +ZSTD_buildCTable(void* dst, size_t dstCapacity, + FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, + unsigned* count, U32 max, + const BYTE* codeTable, size_t nbSeq, + const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, + const FSE_CTable* prevCTable, size_t prevCTableSize, + void* entropyWorkspace, size_t entropyWorkspaceSize) +{ + BYTE* op = (BYTE*)dst; + const BYTE* const oend = op + dstCapacity; + DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity); + + switch (type) { + case set_rle: + FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max), ""); + RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall, "not enough space"); + *op = codeTable[0]; + return 1; + case set_repeat: + ZSTD_memcpy(nextCTable, prevCTable, prevCTableSize); + return 0; + case set_basic: + FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize), ""); /* note : could be pre-calculated */ + return 0; + case set_compressed: { + S16 norm[MaxSeq + 1]; + size_t nbSeq_1 = nbSeq; + const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); + if (count[codeTable[nbSeq-1]] > 1) { + count[codeTable[nbSeq-1]]--; + nbSeq_1--; + } + assert(nbSeq_1 > 1); + assert(entropyWorkspaceSize >= FSE_BUILD_CTABLE_WORKSPACE_SIZE(MaxSeq, MaxFSELog)); + FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), ""); + { size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */ + FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed"); + FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, entropyWorkspace, entropyWorkspaceSize), ""); + return NCountSize; + } + } + default: assert(0); RETURN_ERROR(GENERIC, "impossible to reach"); + } +} + +FORCE_INLINE_TEMPLATE size_t +ZSTD_encodeSequences_body( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets) +{ + BIT_CStream_t blockStream; + FSE_CState_t stateMatchLength; + FSE_CState_t stateOffsetBits; + FSE_CState_t stateLitLength; + + RETURN_ERROR_IF( + ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)), + dstSize_tooSmall, "not enough space remaining"); + DEBUGLOG(6, "available space for bitstream : %i (dstCapacity=%u)", + (int)(blockStream.endPtr - blockStream.startPtr), + (unsigned)dstCapacity); + + /* first symbols */ + FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); + FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]); + FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); + BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); + if (longOffsets) { + U32 const ofBits = ofCodeTable[nbSeq-1]; + unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); + if (extraBits) { + BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits); + BIT_flushBits(&blockStream); + } + BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits, + ofBits - extraBits); + } else { + BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]); + } + BIT_flushBits(&blockStream); + + { size_t n; + for (n=nbSeq-2 ; n= 64-7-(LLFSELog+MLFSELog+OffFSELog))) + BIT_flushBits(&blockStream); /* (7)*/ + BIT_addBits(&blockStream, sequences[n].litLength, llBits); + if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); + if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream); + if (longOffsets) { + unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); + if (extraBits) { + BIT_addBits(&blockStream, sequences[n].offset, extraBits); + BIT_flushBits(&blockStream); /* (7)*/ + } + BIT_addBits(&blockStream, sequences[n].offset >> extraBits, + ofBits - extraBits); /* 31 */ + } else { + BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */ + } + BIT_flushBits(&blockStream); /* (7)*/ + DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr)); + } } + + DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog); + FSE_flushCState(&blockStream, &stateMatchLength); + DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog); + FSE_flushCState(&blockStream, &stateOffsetBits); + DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog); + FSE_flushCState(&blockStream, &stateLitLength); + + { size_t const streamSize = BIT_closeCStream(&blockStream); + RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space"); + return streamSize; + } +} + +static size_t +ZSTD_encodeSequences_default( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets) +{ + return ZSTD_encodeSequences_body(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); +} + + +#if DYNAMIC_BMI2 + +static TARGET_ATTRIBUTE("bmi2") size_t +ZSTD_encodeSequences_bmi2( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets) +{ + return ZSTD_encodeSequences_body(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); +} + +#endif + +size_t ZSTD_encodeSequences( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2) +{ + DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity); +#if DYNAMIC_BMI2 + if (bmi2) { + return ZSTD_encodeSequences_bmi2(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); + } +#endif + (void)bmi2; + return ZSTD_encodeSequences_default(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); +} +/**** ended inlining compress/zstd_compress_sequences.c ****/ +/**** start inlining compress/zstd_compress_superblock.c ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + /*-************************************* + * Dependencies + ***************************************/ +/**** start inlining zstd_compress_superblock.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMPRESS_ADVANCED_H +#define ZSTD_COMPRESS_ADVANCED_H + +/*-************************************* +* Dependencies +***************************************/ + +/**** skipping file: ../zstd.h ****/ + +/*-************************************* +* Target Compressed Block Size +***************************************/ + +/* ZSTD_compressSuperBlock() : + * Used to compress a super block when targetCBlockSize is being used. + * The given block will be compressed into multiple sub blocks that are around targetCBlockSize. */ +size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + void const* src, size_t srcSize, + unsigned lastBlock); + +#endif /* ZSTD_COMPRESS_ADVANCED_H */ +/**** ended inlining zstd_compress_superblock.h ****/ + +/**** skipping file: ../common/zstd_internal.h ****/ +/**** skipping file: hist.h ****/ +/**** skipping file: zstd_compress_internal.h ****/ +/**** skipping file: zstd_compress_sequences.h ****/ +/**** skipping file: zstd_compress_literals.h ****/ + +/*-************************************* +* Superblock entropy buffer structs +***************************************/ +/** ZSTD_hufCTablesMetadata_t : + * Stores Literals Block Type for a super-block in hType, and + * huffman tree description in hufDesBuffer. + * hufDesSize refers to the size of huffman tree description in bytes. + * This metadata is populated in ZSTD_buildSuperBlockEntropy_literal() */ +typedef struct { + symbolEncodingType_e hType; + BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE]; + size_t hufDesSize; +} ZSTD_hufCTablesMetadata_t; + +/** ZSTD_fseCTablesMetadata_t : + * Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and + * fse tables in fseTablesBuffer. + * fseTablesSize refers to the size of fse tables in bytes. + * This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */ +typedef struct { + symbolEncodingType_e llType; + symbolEncodingType_e ofType; + symbolEncodingType_e mlType; + BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE]; + size_t fseTablesSize; + size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */ +} ZSTD_fseCTablesMetadata_t; + +typedef struct { + ZSTD_hufCTablesMetadata_t hufMetadata; + ZSTD_fseCTablesMetadata_t fseMetadata; +} ZSTD_entropyCTablesMetadata_t; + + +/** ZSTD_buildSuperBlockEntropy_literal() : + * Builds entropy for the super-block literals. + * Stores literals block type (raw, rle, compressed, repeat) and + * huffman description table to hufMetadata. + * @return : size of huffman description table or error code */ +static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSize, + const ZSTD_hufCTables_t* prevHuf, + ZSTD_hufCTables_t* nextHuf, + ZSTD_hufCTablesMetadata_t* hufMetadata, + const int disableLiteralsCompression, + void* workspace, size_t wkspSize) +{ + BYTE* const wkspStart = (BYTE*)workspace; + BYTE* const wkspEnd = wkspStart + wkspSize; + BYTE* const countWkspStart = wkspStart; + unsigned* const countWksp = (unsigned*)workspace; + const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned); + BYTE* const nodeWksp = countWkspStart + countWkspSize; + const size_t nodeWkspSize = wkspEnd-nodeWksp; + unsigned maxSymbolValue = 255; + unsigned huffLog = HUF_TABLELOG_DEFAULT; + HUF_repeat repeat = prevHuf->repeatMode; + + DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize); + + /* Prepare nextEntropy assuming reusing the existing table */ + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + + if (disableLiteralsCompression) { + DEBUGLOG(5, "set_basic - disabled"); + hufMetadata->hType = set_basic; + return 0; + } + + /* small ? don't even attempt compression (speed opt) */ +# define COMPRESS_LITERALS_SIZE_MIN 63 + { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; + if (srcSize <= minLitSize) { + DEBUGLOG(5, "set_basic - too small"); + hufMetadata->hType = set_basic; + return 0; + } + } + + /* Scan input and build symbol stats */ + { size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize); + FORWARD_IF_ERROR(largest, "HIST_count_wksp failed"); + if (largest == srcSize) { + DEBUGLOG(5, "set_rle"); + hufMetadata->hType = set_rle; + return 0; + } + if (largest <= (srcSize >> 7)+4) { + DEBUGLOG(5, "set_basic - no gain"); + hufMetadata->hType = set_basic; + return 0; + } + } + + /* Validate the previous Huffman table */ + if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) { + repeat = HUF_repeat_none; + } + + /* Build Huffman Tree */ + ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable)); + huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); + { size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp, + maxSymbolValue, huffLog, + nodeWksp, nodeWkspSize); + FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp"); + huffLog = (U32)maxBits; + { /* Build and write the CTable */ + size_t const newCSize = HUF_estimateCompressedSize( + (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue); + size_t const hSize = HUF_writeCTable( + hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer), + (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog); + /* Check against repeating the previous CTable */ + if (repeat != HUF_repeat_none) { + size_t const oldCSize = HUF_estimateCompressedSize( + (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue); + if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) { + DEBUGLOG(5, "set_repeat - smaller"); + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + hufMetadata->hType = set_repeat; + return 0; + } + } + if (newCSize + hSize >= srcSize) { + DEBUGLOG(5, "set_basic - no gains"); + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + hufMetadata->hType = set_basic; + return 0; + } + DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize); + hufMetadata->hType = set_compressed; + nextHuf->repeatMode = HUF_repeat_check; + return hSize; + } + } +} + +/** ZSTD_buildSuperBlockEntropy_sequences() : + * Builds entropy for the super-block sequences. + * Stores symbol compression modes and fse table to fseMetadata. + * @return : size of fse tables or error code */ +static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr, + const ZSTD_fseCTables_t* prevEntropy, + ZSTD_fseCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + ZSTD_fseCTablesMetadata_t* fseMetadata, + void* workspace, size_t wkspSize) +{ + BYTE* const wkspStart = (BYTE*)workspace; + BYTE* const wkspEnd = wkspStart + wkspSize; + BYTE* const countWkspStart = wkspStart; + unsigned* const countWksp = (unsigned*)workspace; + const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned); + BYTE* const cTableWksp = countWkspStart + countWkspSize; + const size_t cTableWkspSize = wkspEnd-cTableWksp; + ZSTD_strategy const strategy = cctxParams->cParams.strategy; + FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable; + FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable; + FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable; + const BYTE* const ofCodeTable = seqStorePtr->ofCode; + const BYTE* const llCodeTable = seqStorePtr->llCode; + const BYTE* const mlCodeTable = seqStorePtr->mlCode; + size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; + BYTE* const ostart = fseMetadata->fseTablesBuffer; + BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer); + BYTE* op = ostart; + + assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE)); + DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq); + ZSTD_memset(workspace, 0, wkspSize); + + fseMetadata->lastCountSize = 0; + /* convert length/distances into codes */ + ZSTD_seqToCodes(seqStorePtr); + /* build CTable for Literal Lengths */ + { U32 LLtype; + unsigned max = MaxLL; + size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, llCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ + DEBUGLOG(5, "Building LL table"); + nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode; + LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode, + countWksp, max, mostFrequent, nbSeq, + LLFSELog, prevEntropy->litlengthCTable, + LL_defaultNorm, LL_defaultNormLog, + ZSTD_defaultAllowed, strategy); + assert(set_basic < set_compressed && set_rle < set_compressed); + assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, + countWksp, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL, + prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable), + cTableWksp, cTableWkspSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed"); + if (LLtype == set_compressed) + fseMetadata->lastCountSize = countSize; + op += countSize; + fseMetadata->llType = (symbolEncodingType_e) LLtype; + } } + /* build CTable for Offsets */ + { U32 Offtype; + unsigned max = MaxOff; + size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, ofCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ + /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ + ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; + DEBUGLOG(5, "Building OF table"); + nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode; + Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode, + countWksp, max, mostFrequent, nbSeq, + OffFSELog, prevEntropy->offcodeCTable, + OF_defaultNorm, OF_defaultNormLog, + defaultPolicy, strategy); + assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, + countWksp, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, + prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable), + cTableWksp, cTableWkspSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed"); + if (Offtype == set_compressed) + fseMetadata->lastCountSize = countSize; + op += countSize; + fseMetadata->ofType = (symbolEncodingType_e) Offtype; + } } + /* build CTable for MatchLengths */ + { U32 MLtype; + unsigned max = MaxML; + size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, mlCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ + DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); + nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode; + MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode, + countWksp, max, mostFrequent, nbSeq, + MLFSELog, prevEntropy->matchlengthCTable, + ML_defaultNorm, ML_defaultNormLog, + ZSTD_defaultAllowed, strategy); + assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, + countWksp, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML, + prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable), + cTableWksp, cTableWkspSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed"); + if (MLtype == set_compressed) + fseMetadata->lastCountSize = countSize; + op += countSize; + fseMetadata->mlType = (symbolEncodingType_e) MLtype; + } } + assert((size_t) (op-ostart) <= sizeof(fseMetadata->fseTablesBuffer)); + return op-ostart; +} + + +/** ZSTD_buildSuperBlockEntropy() : + * Builds entropy for the super-block. + * @return : 0 on success or error code */ +static size_t +ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + ZSTD_entropyCTablesMetadata_t* entropyMetadata, + void* workspace, size_t wkspSize) +{ + size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart; + DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy"); + entropyMetadata->hufMetadata.hufDesSize = + ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize, + &prevEntropy->huf, &nextEntropy->huf, + &entropyMetadata->hufMetadata, + ZSTD_disableLiteralsCompression(cctxParams), + workspace, wkspSize); + FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildSuperBlockEntropy_literal failed"); + entropyMetadata->fseMetadata.fseTablesSize = + ZSTD_buildSuperBlockEntropy_sequences(seqStorePtr, + &prevEntropy->fse, &nextEntropy->fse, + cctxParams, + &entropyMetadata->fseMetadata, + workspace, wkspSize); + FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildSuperBlockEntropy_sequences failed"); + return 0; +} + +/** ZSTD_compressSubBlock_literal() : + * Compresses literals section for a sub-block. + * When we have to write the Huffman table we will sometimes choose a header + * size larger than necessary. This is because we have to pick the header size + * before we know the table size + compressed size, so we have a bound on the + * table size. If we guessed incorrectly, we fall back to uncompressed literals. + * + * We write the header when writeEntropy=1 and set entropyWritten=1 when we succeeded + * in writing the header, otherwise it is set to 0. + * + * hufMetadata->hType has literals block type info. + * If it is set_basic, all sub-blocks literals section will be Raw_Literals_Block. + * If it is set_rle, all sub-blocks literals section will be RLE_Literals_Block. + * If it is set_compressed, first sub-block's literals section will be Compressed_Literals_Block + * If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block + * and the following sub-blocks' literals sections will be Treeless_Literals_Block. + * @return : compressed size of literals section of a sub-block + * Or 0 if it unable to compress. + * Or error code */ +static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable, + const ZSTD_hufCTablesMetadata_t* hufMetadata, + const BYTE* literals, size_t litSize, + void* dst, size_t dstSize, + const int bmi2, int writeEntropy, int* entropyWritten) +{ + size_t const header = writeEntropy ? 200 : 0; + size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header)); + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart + lhSize; + U32 const singleStream = lhSize == 3; + symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat; + size_t cLitSize = 0; + + (void)bmi2; /* TODO bmi2... */ + + DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy); + + *entropyWritten = 0; + if (litSize == 0 || hufMetadata->hType == set_basic) { + DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal"); + return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize); + } else if (hufMetadata->hType == set_rle) { + DEBUGLOG(5, "ZSTD_compressSubBlock_literal using rle literal"); + return ZSTD_compressRleLiteralsBlock(dst, dstSize, literals, litSize); + } + + assert(litSize > 0); + assert(hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat); + + if (writeEntropy && hufMetadata->hType == set_compressed) { + ZSTD_memcpy(op, hufMetadata->hufDesBuffer, hufMetadata->hufDesSize); + op += hufMetadata->hufDesSize; + cLitSize += hufMetadata->hufDesSize; + DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize); + } + + /* TODO bmi2 */ + { const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable) + : HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable); + op += cSize; + cLitSize += cSize; + if (cSize == 0 || ERR_isError(cSize)) { + DEBUGLOG(5, "Failed to write entropy tables %s", ZSTD_getErrorName(cSize)); + return 0; + } + /* If we expand and we aren't writing a header then emit uncompressed */ + if (!writeEntropy && cLitSize >= litSize) { + DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal because uncompressible"); + return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize); + } + /* If we are writing headers then allow expansion that doesn't change our header size. */ + if (lhSize < (size_t)(3 + (cLitSize >= 1 KB) + (cLitSize >= 16 KB))) { + assert(cLitSize > litSize); + DEBUGLOG(5, "Literals expanded beyond allowed header size"); + return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize); + } + DEBUGLOG(5, "ZSTD_compressSubBlock_literal (cSize=%zu)", cSize); + } + + /* Build header */ + switch(lhSize) + { + case 3: /* 2 - 2 - 10 - 10 */ + { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14); + MEM_writeLE24(ostart, lhc); + break; + } + case 4: /* 2 - 2 - 14 - 14 */ + { U32 const lhc = hType + (2 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<18); + MEM_writeLE32(ostart, lhc); + break; + } + case 5: /* 2 - 2 - 18 - 18 */ + { U32 const lhc = hType + (3 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<22); + MEM_writeLE32(ostart, lhc); + ostart[4] = (BYTE)(cLitSize >> 10); + break; + } + default: /* not possible : lhSize is {3,4,5} */ + assert(0); + } + *entropyWritten = 1; + DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)litSize, (U32)(op-ostart)); + return op-ostart; +} + +static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef* sequences, size_t nbSeq, size_t litSize, int lastSequence) { + const seqDef* const sstart = sequences; + const seqDef* const send = sequences + nbSeq; + const seqDef* sp = sstart; + size_t matchLengthSum = 0; + size_t litLengthSum = 0; + (void)litLengthSum; + while (send-sp > 0) { + ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp); + litLengthSum += seqLen.litLength; + matchLengthSum += seqLen.matchLength; + sp++; + } + assert(litLengthSum <= litSize); + if (!lastSequence) { + assert(litLengthSum == litSize); + } + return matchLengthSum + litSize; +} + +/** ZSTD_compressSubBlock_sequences() : + * Compresses sequences section for a sub-block. + * fseMetadata->llType, fseMetadata->ofType, and fseMetadata->mlType have + * symbol compression modes for the super-block. + * The first successfully compressed block will have these in its header. + * We set entropyWritten=1 when we succeed in compressing the sequences. + * The following sub-blocks will always have repeat mode. + * @return : compressed size of sequences section of a sub-block + * Or 0 if it is unable to compress + * Or error code. */ +static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables, + const ZSTD_fseCTablesMetadata_t* fseMetadata, + const seqDef* sequences, size_t nbSeq, + const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + const int bmi2, int writeEntropy, int* entropyWritten) +{ + const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart; + BYTE* seqHead; + + DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (nbSeq=%zu, writeEntropy=%d, longOffsets=%d)", nbSeq, writeEntropy, longOffsets); + + *entropyWritten = 0; + /* Sequences Header */ + RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, + dstSize_tooSmall, ""); + if (nbSeq < 0x7F) + *op++ = (BYTE)nbSeq; + else if (nbSeq < LONGNBSEQ) + op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; + else + op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; + if (nbSeq==0) { + return op - ostart; + } + + /* seqHead : flags for FSE encoding type */ + seqHead = op++; + + DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (seqHeadSize=%u)", (unsigned)(op-ostart)); + + if (writeEntropy) { + const U32 LLtype = fseMetadata->llType; + const U32 Offtype = fseMetadata->ofType; + const U32 MLtype = fseMetadata->mlType; + DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (fseTablesSize=%zu)", fseMetadata->fseTablesSize); + *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); + ZSTD_memcpy(op, fseMetadata->fseTablesBuffer, fseMetadata->fseTablesSize); + op += fseMetadata->fseTablesSize; + } else { + const U32 repeat = set_repeat; + *seqHead = (BYTE)((repeat<<6) + (repeat<<4) + (repeat<<2)); + } + + { size_t const bitstreamSize = ZSTD_encodeSequences( + op, oend - op, + fseTables->matchlengthCTable, mlCode, + fseTables->offcodeCTable, ofCode, + fseTables->litlengthCTable, llCode, + sequences, nbSeq, + longOffsets, bmi2); + FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed"); + op += bitstreamSize; + /* zstd versions <= 1.3.4 mistakenly report corruption when + * FSE_readNCount() receives a buffer < 4 bytes. + * Fixed by https://github.com/facebook/zstd/pull/1146. + * This can happen when the last set_compressed table present is 2 + * bytes and the bitstream is only one byte. + * In this exceedingly rare case, we will simply emit an uncompressed + * block, since it isn't worth optimizing. + */ +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + if (writeEntropy && fseMetadata->lastCountSize && fseMetadata->lastCountSize + bitstreamSize < 4) { + /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ + assert(fseMetadata->lastCountSize + bitstreamSize == 3); + DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by " + "emitting an uncompressed block."); + return 0; + } +#endif + DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (bitstreamSize=%zu)", bitstreamSize); + } + + /* zstd versions <= 1.4.0 mistakenly report error when + * sequences section body size is less than 3 bytes. + * Fixed by https://github.com/facebook/zstd/pull/1664. + * This can happen when the previous sequences section block is compressed + * with rle mode and the current block's sequences section is compressed + * with repeat mode where sequences section body size can be 1 byte. + */ +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + if (op-seqHead < 4) { + DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.4.0 by emitting " + "an uncompressed block when sequences are < 4 bytes"); + return 0; + } +#endif + + *entropyWritten = 1; + return op - ostart; +} + +/** ZSTD_compressSubBlock() : + * Compresses a single sub-block. + * @return : compressed size of the sub-block + * Or 0 if it failed to compress. */ +static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy, + const ZSTD_entropyCTablesMetadata_t* entropyMetadata, + const seqDef* sequences, size_t nbSeq, + const BYTE* literals, size_t litSize, + const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + const int bmi2, + int writeLitEntropy, int writeSeqEntropy, + int* litEntropyWritten, int* seqEntropyWritten, + U32 lastBlock) +{ + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart + ZSTD_blockHeaderSize; + DEBUGLOG(5, "ZSTD_compressSubBlock (litSize=%zu, nbSeq=%zu, writeLitEntropy=%d, writeSeqEntropy=%d, lastBlock=%d)", + litSize, nbSeq, writeLitEntropy, writeSeqEntropy, lastBlock); + { size_t cLitSize = ZSTD_compressSubBlock_literal((const HUF_CElt*)entropy->huf.CTable, + &entropyMetadata->hufMetadata, literals, litSize, + op, oend-op, bmi2, writeLitEntropy, litEntropyWritten); + FORWARD_IF_ERROR(cLitSize, "ZSTD_compressSubBlock_literal failed"); + if (cLitSize == 0) return 0; + op += cLitSize; + } + { size_t cSeqSize = ZSTD_compressSubBlock_sequences(&entropy->fse, + &entropyMetadata->fseMetadata, + sequences, nbSeq, + llCode, mlCode, ofCode, + cctxParams, + op, oend-op, + bmi2, writeSeqEntropy, seqEntropyWritten); + FORWARD_IF_ERROR(cSeqSize, "ZSTD_compressSubBlock_sequences failed"); + if (cSeqSize == 0) return 0; + op += cSeqSize; + } + /* Write block header */ + { size_t cSize = (op-ostart)-ZSTD_blockHeaderSize; + U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); + MEM_writeLE24(ostart, cBlockHeader24); + } + return op-ostart; +} + +static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize, + const ZSTD_hufCTables_t* huf, + const ZSTD_hufCTablesMetadata_t* hufMetadata, + void* workspace, size_t wkspSize, + int writeEntropy) +{ + unsigned* const countWksp = (unsigned*)workspace; + unsigned maxSymbolValue = 255; + size_t literalSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */ + + if (hufMetadata->hType == set_basic) return litSize; + else if (hufMetadata->hType == set_rle) return 1; + else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) { + size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize); + if (ZSTD_isError(largest)) return litSize; + { size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue); + if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize; + return cLitSizeEstimate + literalSectionHeaderSize; + } } + assert(0); /* impossible */ + return 0; +} + +static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type, + const BYTE* codeTable, unsigned maxCode, + size_t nbSeq, const FSE_CTable* fseCTable, + const U32* additionalBits, + short const* defaultNorm, U32 defaultNormLog, U32 defaultMax, + void* workspace, size_t wkspSize) +{ + unsigned* const countWksp = (unsigned*)workspace; + const BYTE* ctp = codeTable; + const BYTE* const ctStart = ctp; + const BYTE* const ctEnd = ctStart + nbSeq; + size_t cSymbolTypeSizeEstimateInBits = 0; + unsigned max = maxCode; + + HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize); /* can't fail */ + if (type == set_basic) { + /* We selected this encoding type, so it must be valid. */ + assert(max <= defaultMax); + cSymbolTypeSizeEstimateInBits = max <= defaultMax + ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max) + : ERROR(GENERIC); + } else if (type == set_rle) { + cSymbolTypeSizeEstimateInBits = 0; + } else if (type == set_compressed || type == set_repeat) { + cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max); + } + if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) return nbSeq * 10; + while (ctp < ctEnd) { + if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp]; + else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */ + ctp++; + } + return cSymbolTypeSizeEstimateInBits / 8; +} + +static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable, + const BYTE* llCodeTable, + const BYTE* mlCodeTable, + size_t nbSeq, + const ZSTD_fseCTables_t* fseTables, + const ZSTD_fseCTablesMetadata_t* fseMetadata, + void* workspace, size_t wkspSize, + int writeEntropy) +{ + size_t sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */ + size_t cSeqSizeEstimate = 0; + cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff, + nbSeq, fseTables->offcodeCTable, NULL, + OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, + workspace, wkspSize); + cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->llType, llCodeTable, MaxLL, + nbSeq, fseTables->litlengthCTable, LL_bits, + LL_defaultNorm, LL_defaultNormLog, MaxLL, + workspace, wkspSize); + cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, MaxML, + nbSeq, fseTables->matchlengthCTable, ML_bits, + ML_defaultNorm, ML_defaultNormLog, MaxML, + workspace, wkspSize); + if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize; + return cSeqSizeEstimate + sequencesSectionHeaderSize; +} + +static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize, + const BYTE* ofCodeTable, + const BYTE* llCodeTable, + const BYTE* mlCodeTable, + size_t nbSeq, + const ZSTD_entropyCTables_t* entropy, + const ZSTD_entropyCTablesMetadata_t* entropyMetadata, + void* workspace, size_t wkspSize, + int writeLitEntropy, int writeSeqEntropy) { + size_t cSizeEstimate = 0; + cSizeEstimate += ZSTD_estimateSubBlockSize_literal(literals, litSize, + &entropy->huf, &entropyMetadata->hufMetadata, + workspace, wkspSize, writeLitEntropy); + cSizeEstimate += ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable, + nbSeq, &entropy->fse, &entropyMetadata->fseMetadata, + workspace, wkspSize, writeSeqEntropy); + return cSizeEstimate + ZSTD_blockHeaderSize; +} + +static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata) +{ + if (fseMetadata->llType == set_compressed || fseMetadata->llType == set_rle) + return 1; + if (fseMetadata->mlType == set_compressed || fseMetadata->mlType == set_rle) + return 1; + if (fseMetadata->ofType == set_compressed || fseMetadata->ofType == set_rle) + return 1; + return 0; +} + +/** ZSTD_compressSubBlock_multi() : + * Breaks super-block into multiple sub-blocks and compresses them. + * Entropy will be written to the first block. + * The following blocks will use repeat mode to compress. + * All sub-blocks are compressed blocks (no raw or rle blocks). + * @return : compressed size of the super block (which is multiple ZSTD blocks) + * Or 0 if it failed to compress. */ +static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, + const ZSTD_compressedBlockState_t* prevCBlock, + ZSTD_compressedBlockState_t* nextCBlock, + const ZSTD_entropyCTablesMetadata_t* entropyMetadata, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const int bmi2, U32 lastBlock, + void* workspace, size_t wkspSize) +{ + const seqDef* const sstart = seqStorePtr->sequencesStart; + const seqDef* const send = seqStorePtr->sequences; + const seqDef* sp = sstart; + const BYTE* const lstart = seqStorePtr->litStart; + const BYTE* const lend = seqStorePtr->lit; + const BYTE* lp = lstart; + BYTE const* ip = (BYTE const*)src; + BYTE const* const iend = ip + srcSize; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart; + const BYTE* llCodePtr = seqStorePtr->llCode; + const BYTE* mlCodePtr = seqStorePtr->mlCode; + const BYTE* ofCodePtr = seqStorePtr->ofCode; + size_t targetCBlockSize = cctxParams->targetCBlockSize; + size_t litSize, seqCount; + int writeLitEntropy = entropyMetadata->hufMetadata.hType == set_compressed; + int writeSeqEntropy = 1; + int lastSequence = 0; + + DEBUGLOG(5, "ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)", + (unsigned)(lend-lp), (unsigned)(send-sstart)); + + litSize = 0; + seqCount = 0; + do { + size_t cBlockSizeEstimate = 0; + if (sstart == send) { + lastSequence = 1; + } else { + const seqDef* const sequence = sp + seqCount; + lastSequence = sequence == send - 1; + litSize += ZSTD_getSequenceLength(seqStorePtr, sequence).litLength; + seqCount++; + } + if (lastSequence) { + assert(lp <= lend); + assert(litSize <= (size_t)(lend - lp)); + litSize = (size_t)(lend - lp); + } + /* I think there is an optimization opportunity here. + * Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful + * since it recalculates estimate from scratch. + * For example, it would recount literal distribution and symbol codes everytime. + */ + cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount, + &nextCBlock->entropy, entropyMetadata, + workspace, wkspSize, writeLitEntropy, writeSeqEntropy); + if (cBlockSizeEstimate > targetCBlockSize || lastSequence) { + int litEntropyWritten = 0; + int seqEntropyWritten = 0; + const size_t decompressedSize = ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSequence); + const size_t cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata, + sp, seqCount, + lp, litSize, + llCodePtr, mlCodePtr, ofCodePtr, + cctxParams, + op, oend-op, + bmi2, writeLitEntropy, writeSeqEntropy, + &litEntropyWritten, &seqEntropyWritten, + lastBlock && lastSequence); + FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed"); + if (cSize > 0 && cSize < decompressedSize) { + DEBUGLOG(5, "Committed the sub-block"); + assert(ip + decompressedSize <= iend); + ip += decompressedSize; + sp += seqCount; + lp += litSize; + op += cSize; + llCodePtr += seqCount; + mlCodePtr += seqCount; + ofCodePtr += seqCount; + litSize = 0; + seqCount = 0; + /* Entropy only needs to be written once */ + if (litEntropyWritten) { + writeLitEntropy = 0; + } + if (seqEntropyWritten) { + writeSeqEntropy = 0; + } + } + } + } while (!lastSequence); + if (writeLitEntropy) { + DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten"); + ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf)); + } + if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) { + /* If we haven't written our entropy tables, then we've violated our contract and + * must emit an uncompressed block. + */ + DEBUGLOG(5, "ZSTD_compressSubBlock_multi has sequence entropy tables unwritten"); + return 0; + } + if (ip < iend) { + size_t const cSize = ZSTD_noCompressBlock(op, oend - op, ip, iend - ip, lastBlock); + DEBUGLOG(5, "ZSTD_compressSubBlock_multi last sub-block uncompressed, %zu bytes", (size_t)(iend - ip)); + FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); + assert(cSize != 0); + op += cSize; + /* We have to regenerate the repcodes because we've skipped some sequences */ + if (sp < send) { + seqDef const* seq; + repcodes_t rep; + ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep)); + for (seq = sstart; seq < sp; ++seq) { + rep = ZSTD_updateRep(rep.rep, seq->offset - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0); + } + ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep)); + } + } + DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed"); + return op-ostart; +} + +size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + void const* src, size_t srcSize, + unsigned lastBlock) { + ZSTD_entropyCTablesMetadata_t entropyMetadata; + + FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(&zc->seqStore, + &zc->blockState.prevCBlock->entropy, + &zc->blockState.nextCBlock->entropy, + &zc->appliedParams, + &entropyMetadata, + zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), ""); + + return ZSTD_compressSubBlock_multi(&zc->seqStore, + zc->blockState.prevCBlock, + zc->blockState.nextCBlock, + &entropyMetadata, + &zc->appliedParams, + dst, dstCapacity, + src, srcSize, + zc->bmi2, lastBlock, + zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */); +} +/**** ended inlining compress/zstd_compress_superblock.c ****/ +/**** start inlining compress/zstd_compress.c ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/*-************************************* +* Dependencies +***************************************/ +/**** skipping file: ../common/zstd_deps.h ****/ +/**** start inlining ../common/cpu.h ****/ +/* + * Copyright (c) 2018-2021, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMMON_CPU_H +#define ZSTD_COMMON_CPU_H + +/** + * Implementation taken from folly/CpuId.h + * https://github.com/facebook/folly/blob/master/folly/CpuId.h + */ + +/**** skipping file: mem.h ****/ + +#ifdef _MSC_VER +#include +#endif + +typedef struct { + U32 f1c; + U32 f1d; + U32 f7b; + U32 f7c; +} ZSTD_cpuid_t; + +MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) { + U32 f1c = 0; + U32 f1d = 0; + U32 f7b = 0; + U32 f7c = 0; +#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) + int reg[4]; + __cpuid((int*)reg, 0); + { + int const n = reg[0]; + if (n >= 1) { + __cpuid((int*)reg, 1); + f1c = (U32)reg[2]; + f1d = (U32)reg[3]; + } + if (n >= 7) { + __cpuidex((int*)reg, 7, 0); + f7b = (U32)reg[1]; + f7c = (U32)reg[2]; + } + } +#elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__) + /* The following block like the normal cpuid branch below, but gcc + * reserves ebx for use of its pic register so we must specially + * handle the save and restore to avoid clobbering the register + */ + U32 n; + __asm__( + "pushl %%ebx\n\t" + "cpuid\n\t" + "popl %%ebx\n\t" + : "=a"(n) + : "a"(0) + : "ecx", "edx"); + if (n >= 1) { + U32 f1a; + __asm__( + "pushl %%ebx\n\t" + "cpuid\n\t" + "popl %%ebx\n\t" + : "=a"(f1a), "=c"(f1c), "=d"(f1d) + : "a"(1)); + } + if (n >= 7) { + __asm__( + "pushl %%ebx\n\t" + "cpuid\n\t" + "movl %%ebx, %%eax\n\t" + "popl %%ebx" + : "=a"(f7b), "=c"(f7c) + : "a"(7), "c"(0) + : "edx"); + } +#elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__) + U32 n; + __asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx"); + if (n >= 1) { + U32 f1a; + __asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx"); + } + if (n >= 7) { + U32 f7a; + __asm__("cpuid" + : "=a"(f7a), "=b"(f7b), "=c"(f7c) + : "a"(7), "c"(0) + : "edx"); + } +#endif + { + ZSTD_cpuid_t cpuid; + cpuid.f1c = f1c; + cpuid.f1d = f1d; + cpuid.f7b = f7b; + cpuid.f7c = f7c; + return cpuid; + } +} + +#define X(name, r, bit) \ + MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) { \ + return ((cpuid.r) & (1U << bit)) != 0; \ + } + +/* cpuid(1): Processor Info and Feature Bits. */ +#define C(name, bit) X(name, f1c, bit) + C(sse3, 0) + C(pclmuldq, 1) + C(dtes64, 2) + C(monitor, 3) + C(dscpl, 4) + C(vmx, 5) + C(smx, 6) + C(eist, 7) + C(tm2, 8) + C(ssse3, 9) + C(cnxtid, 10) + C(fma, 12) + C(cx16, 13) + C(xtpr, 14) + C(pdcm, 15) + C(pcid, 17) + C(dca, 18) + C(sse41, 19) + C(sse42, 20) + C(x2apic, 21) + C(movbe, 22) + C(popcnt, 23) + C(tscdeadline, 24) + C(aes, 25) + C(xsave, 26) + C(osxsave, 27) + C(avx, 28) + C(f16c, 29) + C(rdrand, 30) +#undef C +#define D(name, bit) X(name, f1d, bit) + D(fpu, 0) + D(vme, 1) + D(de, 2) + D(pse, 3) + D(tsc, 4) + D(msr, 5) + D(pae, 6) + D(mce, 7) + D(cx8, 8) + D(apic, 9) + D(sep, 11) + D(mtrr, 12) + D(pge, 13) + D(mca, 14) + D(cmov, 15) + D(pat, 16) + D(pse36, 17) + D(psn, 18) + D(clfsh, 19) + D(ds, 21) + D(acpi, 22) + D(mmx, 23) + D(fxsr, 24) + D(sse, 25) + D(sse2, 26) + D(ss, 27) + D(htt, 28) + D(tm, 29) + D(pbe, 31) +#undef D + +/* cpuid(7): Extended Features. */ +#define B(name, bit) X(name, f7b, bit) + B(bmi1, 3) + B(hle, 4) + B(avx2, 5) + B(smep, 7) + B(bmi2, 8) + B(erms, 9) + B(invpcid, 10) + B(rtm, 11) + B(mpx, 14) + B(avx512f, 16) + B(avx512dq, 17) + B(rdseed, 18) + B(adx, 19) + B(smap, 20) + B(avx512ifma, 21) + B(pcommit, 22) + B(clflushopt, 23) + B(clwb, 24) + B(avx512pf, 26) + B(avx512er, 27) + B(avx512cd, 28) + B(sha, 29) + B(avx512bw, 30) + B(avx512vl, 31) +#undef B +#define C(name, bit) X(name, f7c, bit) + C(prefetchwt1, 0) + C(avx512vbmi, 1) +#undef C + +#undef X + +#endif /* ZSTD_COMMON_CPU_H */ +/**** ended inlining ../common/cpu.h ****/ +/**** skipping file: ../common/mem.h ****/ +/**** skipping file: ../common/zstd_trace.h ****/ +/**** skipping file: hist.h ****/ +#define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */ +/**** skipping file: ../common/fse.h ****/ +#define HUF_STATIC_LINKING_ONLY +/**** skipping file: ../common/huf.h ****/ +/**** skipping file: zstd_compress_internal.h ****/ +/**** skipping file: zstd_compress_sequences.h ****/ +/**** skipping file: zstd_compress_literals.h ****/ +/**** start inlining zstd_fast.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_FAST_H +#define ZSTD_FAST_H + +#if defined (__cplusplus) +extern "C" { +#endif + +/**** skipping file: ../common/mem.h ****/ +/**** skipping file: zstd_compress_internal.h ****/ + +void ZSTD_fillHashTable(ZSTD_matchState_t* ms, + void const* end, ZSTD_dictTableLoadMethod_e dtlm); +size_t ZSTD_compressBlock_fast( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_fast_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_fast_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_FAST_H */ +/**** ended inlining zstd_fast.h ****/ +/**** start inlining zstd_double_fast.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_DOUBLE_FAST_H +#define ZSTD_DOUBLE_FAST_H + +#if defined (__cplusplus) +extern "C" { +#endif + +/**** skipping file: ../common/mem.h ****/ +/**** skipping file: zstd_compress_internal.h ****/ + +void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, + void const* end, ZSTD_dictTableLoadMethod_e dtlm); +size_t ZSTD_compressBlock_doubleFast( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_doubleFast_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_doubleFast_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_DOUBLE_FAST_H */ +/**** ended inlining zstd_double_fast.h ****/ +/**** start inlining zstd_lazy.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_LAZY_H +#define ZSTD_LAZY_H + +#if defined (__cplusplus) +extern "C" { +#endif + +/**** skipping file: zstd_compress_internal.h ****/ + +/** + * Dedicated Dictionary Search Structure bucket log. In the + * ZSTD_dedicatedDictSearch mode, the hashTable has + * 2 ** ZSTD_LAZY_DDSS_BUCKET_LOG entries in each bucket, rather than just + * one. + */ +#define ZSTD_LAZY_DDSS_BUCKET_LOG 2 + +U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip); + +void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip); + +void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */ + +size_t ZSTD_compressBlock_btlazy2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + +size_t ZSTD_compressBlock_btlazy2_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + +size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + +size_t ZSTD_compressBlock_greedy_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btlazy2_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_LAZY_H */ +/**** ended inlining zstd_lazy.h ****/ +/**** start inlining zstd_opt.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_OPT_H +#define ZSTD_OPT_H + +#if defined (__cplusplus) +extern "C" { +#endif + +/**** skipping file: zstd_compress_internal.h ****/ + +/* used in ZSTD_loadDictionaryContent() */ +void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend); + +size_t ZSTD_compressBlock_btopt( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btultra( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btultra2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + + +size_t ZSTD_compressBlock_btopt_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btultra_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + +size_t ZSTD_compressBlock_btopt_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btultra_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + + /* note : no btultra2 variant for extDict nor dictMatchState, + * because btultra2 is not meant to work with dictionaries + * and is only specific for the first block (no prefix) */ + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_OPT_H */ +/**** ended inlining zstd_opt.h ****/ +/**** start inlining zstd_ldm.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_LDM_H +#define ZSTD_LDM_H + +#if defined (__cplusplus) +extern "C" { +#endif + +/**** skipping file: zstd_compress_internal.h ****/ +/**** skipping file: ../zstd.h ****/ + +/*-************************************* +* Long distance matching +***************************************/ + +#define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_LIMIT_DEFAULT + +void ZSTD_ldm_fillHashTable( + ldmState_t* state, const BYTE* ip, + const BYTE* iend, ldmParams_t const* params); + +/** + * ZSTD_ldm_generateSequences(): + * + * Generates the sequences using the long distance match finder. + * Generates long range matching sequences in `sequences`, which parse a prefix + * of the source. `sequences` must be large enough to store every sequence, + * which can be checked with `ZSTD_ldm_getMaxNbSeq()`. + * @returns 0 or an error code. + * + * NOTE: The user must have called ZSTD_window_update() for all of the input + * they have, even if they pass it to ZSTD_ldm_generateSequences() in chunks. + * NOTE: This function returns an error if it runs out of space to store + * sequences. + */ +size_t ZSTD_ldm_generateSequences( + ldmState_t* ldms, rawSeqStore_t* sequences, + ldmParams_t const* params, void const* src, size_t srcSize); + +/** + * ZSTD_ldm_blockCompress(): + * + * Compresses a block using the predefined sequences, along with a secondary + * block compressor. The literals section of every sequence is passed to the + * secondary block compressor, and those sequences are interspersed with the + * predefined sequences. Returns the length of the last literals. + * Updates `rawSeqStore.pos` to indicate how many sequences have been consumed. + * `rawSeqStore.seq` may also be updated to split the last sequence between two + * blocks. + * @return The length of the last literals. + * + * NOTE: The source must be at most the maximum block size, but the predefined + * sequences can be any size, and may be longer than the block. In the case that + * they are longer than the block, the last sequences may need to be split into + * two. We handle that case correctly, and update `rawSeqStore` appropriately. + * NOTE: This function does not return any errors. + */ +size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + +/** + * ZSTD_ldm_skipSequences(): + * + * Skip past `srcSize` bytes worth of sequences in `rawSeqStore`. + * Avoids emitting matches less than `minMatch` bytes. + * Must be called for data that is not passed to ZSTD_ldm_blockCompress(). + */ +void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, + U32 const minMatch); + +/* ZSTD_ldm_skipRawSeqStoreBytes(): + * Moves forward in rawSeqStore by nbBytes, updating fields 'pos' and 'posInSequence'. + * Not to be used in conjunction with ZSTD_ldm_skipSequences(). + * Must be called for data with is not passed to ZSTD_ldm_blockCompress(). + */ +void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes); + +/** ZSTD_ldm_getTableSize() : + * Estimate the space needed for long distance matching tables or 0 if LDM is + * disabled. + */ +size_t ZSTD_ldm_getTableSize(ldmParams_t params); + +/** ZSTD_ldm_getSeqSpace() : + * Return an upper bound on the number of sequences that can be produced by + * the long distance matcher, or 0 if LDM is disabled. + */ +size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize); + +/** ZSTD_ldm_adjustParameters() : + * If the params->hashRateLog is not set, set it to its default value based on + * windowLog and params->hashLog. + * + * Ensures that params->bucketSizeLog is <= params->hashLog (setting it to + * params->hashLog if it is not). + * + * Ensures that the minMatchLength >= targetLength during optimal parsing. + */ +void ZSTD_ldm_adjustParameters(ldmParams_t* params, + ZSTD_compressionParameters const* cParams); + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_FAST_H */ +/**** ended inlining zstd_ldm.h ****/ +/**** skipping file: zstd_compress_superblock.h ****/ + +/* *************************************************************** +* Tuning parameters +*****************************************************************/ +/*! + * COMPRESS_HEAPMODE : + * Select how default decompression function ZSTD_compress() allocates its context, + * on stack (0, default), or into heap (1). + * Note that functions with explicit context such as ZSTD_compressCCtx() are unaffected. + */ +#ifndef ZSTD_COMPRESS_HEAPMODE +# define ZSTD_COMPRESS_HEAPMODE 0 +#endif + + +/*-************************************* +* Helper functions +***************************************/ +/* ZSTD_compressBound() + * Note that the result from this function is only compatible with the "normal" + * full-block strategy. + * When there are a lot of small blocks due to frequent flush in streaming mode + * the overhead of headers can make the compressed data to be larger than the + * return value of ZSTD_compressBound(). + */ +size_t ZSTD_compressBound(size_t srcSize) { + return ZSTD_COMPRESSBOUND(srcSize); +} + + +/*-************************************* +* Context memory management +***************************************/ +struct ZSTD_CDict_s { + const void* dictContent; + size_t dictContentSize; + ZSTD_dictContentType_e dictContentType; /* The dictContentType the CDict was created with */ + U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */ + ZSTD_cwksp workspace; + ZSTD_matchState_t matchState; + ZSTD_compressedBlockState_t cBlockState; + ZSTD_customMem customMem; + U32 dictID; + int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */ +}; /* typedef'd to ZSTD_CDict within "zstd.h" */ + +ZSTD_CCtx* ZSTD_createCCtx(void) +{ + return ZSTD_createCCtx_advanced(ZSTD_defaultCMem); +} + +static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager) +{ + assert(cctx != NULL); + ZSTD_memset(cctx, 0, sizeof(*cctx)); + cctx->customMem = memManager; + cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); + { size_t const err = ZSTD_CCtx_reset(cctx, ZSTD_reset_parameters); + assert(!ZSTD_isError(err)); + (void)err; + } +} + +ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem) +{ + ZSTD_STATIC_ASSERT(zcss_init==0); + ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1)); + if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; + { ZSTD_CCtx* const cctx = (ZSTD_CCtx*)ZSTD_customMalloc(sizeof(ZSTD_CCtx), customMem); + if (!cctx) return NULL; + ZSTD_initCCtx(cctx, customMem); + return cctx; + } +} + +ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize) +{ + ZSTD_cwksp ws; + ZSTD_CCtx* cctx; + if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL; /* minimum size */ + if ((size_t)workspace & 7) return NULL; /* must be 8-aligned */ + ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_static_alloc); + + cctx = (ZSTD_CCtx*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CCtx)); + if (cctx == NULL) return NULL; + + ZSTD_memset(cctx, 0, sizeof(ZSTD_CCtx)); + ZSTD_cwksp_move(&cctx->workspace, &ws); + cctx->staticSize = workspaceSize; + + /* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */ + if (!ZSTD_cwksp_check_available(&cctx->workspace, ENTROPY_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL; + cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t)); + cctx->blockState.nextCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t)); + cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cctx->workspace, ENTROPY_WORKSPACE_SIZE); + cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); + return cctx; +} + +/** + * Clears and frees all of the dictionaries in the CCtx. + */ +static void ZSTD_clearAllDicts(ZSTD_CCtx* cctx) +{ + ZSTD_customFree(cctx->localDict.dictBuffer, cctx->customMem); + ZSTD_freeCDict(cctx->localDict.cdict); + ZSTD_memset(&cctx->localDict, 0, sizeof(cctx->localDict)); + ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); + cctx->cdict = NULL; +} + +static size_t ZSTD_sizeof_localDict(ZSTD_localDict dict) +{ + size_t const bufferSize = dict.dictBuffer != NULL ? dict.dictSize : 0; + size_t const cdictSize = ZSTD_sizeof_CDict(dict.cdict); + return bufferSize + cdictSize; +} + +static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx) +{ + assert(cctx != NULL); + assert(cctx->staticSize == 0); + ZSTD_clearAllDicts(cctx); +#ifdef ZSTD_MULTITHREAD + ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL; +#endif + ZSTD_cwksp_free(&cctx->workspace, cctx->customMem); +} + +size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) +{ + if (cctx==NULL) return 0; /* support free on NULL */ + RETURN_ERROR_IF(cctx->staticSize, memory_allocation, + "not compatible with static CCtx"); + { + int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx); + ZSTD_freeCCtxContent(cctx); + if (!cctxInWorkspace) { + ZSTD_customFree(cctx, cctx->customMem); + } + } + return 0; +} + + +static size_t ZSTD_sizeof_mtctx(const ZSTD_CCtx* cctx) +{ +#ifdef ZSTD_MULTITHREAD + return ZSTDMT_sizeof_CCtx(cctx->mtctx); +#else + (void)cctx; + return 0; +#endif +} + + +size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx) +{ + if (cctx==NULL) return 0; /* support sizeof on NULL */ + /* cctx may be in the workspace */ + return (cctx->workspace.workspace == cctx ? 0 : sizeof(*cctx)) + + ZSTD_cwksp_sizeof(&cctx->workspace) + + ZSTD_sizeof_localDict(cctx->localDict) + + ZSTD_sizeof_mtctx(cctx); +} + +size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs) +{ + return ZSTD_sizeof_CCtx(zcs); /* same object */ +} + +/* private API call, for dictBuilder only */ +const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); } + +/* Returns 1 if compression parameters are such that we should + * enable long distance matching (wlog >= 27, strategy >= btopt). + * Returns 0 otherwise. + */ +static U32 ZSTD_CParams_shouldEnableLdm(const ZSTD_compressionParameters* const cParams) { + return cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27; +} + +static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams( + ZSTD_compressionParameters cParams) +{ + ZSTD_CCtx_params cctxParams; + /* should not matter, as all cParams are presumed properly defined */ + ZSTD_CCtxParams_init(&cctxParams, ZSTD_CLEVEL_DEFAULT); + cctxParams.cParams = cParams; + + if (ZSTD_CParams_shouldEnableLdm(&cParams)) { + DEBUGLOG(4, "ZSTD_makeCCtxParamsFromCParams(): Including LDM into cctx params"); + cctxParams.ldmParams.enableLdm = 1; + /* LDM is enabled by default for optimal parser and window size >= 128MB */ + ZSTD_ldm_adjustParameters(&cctxParams.ldmParams, &cParams); + assert(cctxParams.ldmParams.hashLog >= cctxParams.ldmParams.bucketSizeLog); + assert(cctxParams.ldmParams.hashRateLog < 32); + } + + assert(!ZSTD_checkCParams(cParams)); + return cctxParams; +} + +static ZSTD_CCtx_params* ZSTD_createCCtxParams_advanced( + ZSTD_customMem customMem) +{ + ZSTD_CCtx_params* params; + if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; + params = (ZSTD_CCtx_params*)ZSTD_customCalloc( + sizeof(ZSTD_CCtx_params), customMem); + if (!params) { return NULL; } + ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT); + params->customMem = customMem; + return params; +} + +ZSTD_CCtx_params* ZSTD_createCCtxParams(void) +{ + return ZSTD_createCCtxParams_advanced(ZSTD_defaultCMem); +} + +size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params) +{ + if (params == NULL) { return 0; } + ZSTD_customFree(params, params->customMem); + return 0; +} + +size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params) +{ + return ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT); +} + +size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) { + RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!"); + ZSTD_memset(cctxParams, 0, sizeof(*cctxParams)); + cctxParams->compressionLevel = compressionLevel; + cctxParams->fParams.contentSizeFlag = 1; + return 0; +} + +#define ZSTD_NO_CLEVEL 0 + +/** + * Initializes the cctxParams from params and compressionLevel. + * @param compressionLevel If params are derived from a compression level then that compression level, otherwise ZSTD_NO_CLEVEL. + */ +static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, ZSTD_parameters const* params, int compressionLevel) +{ + assert(!ZSTD_checkCParams(params->cParams)); + ZSTD_memset(cctxParams, 0, sizeof(*cctxParams)); + cctxParams->cParams = params->cParams; + cctxParams->fParams = params->fParams; + /* Should not matter, as all cParams are presumed properly defined. + * But, set it for tracing anyway. + */ + cctxParams->compressionLevel = compressionLevel; +} + +size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params) +{ + RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!"); + FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , ""); + ZSTD_CCtxParams_init_internal(cctxParams, ¶ms, ZSTD_NO_CLEVEL); + return 0; +} + +/** + * Sets cctxParams' cParams and fParams from params, but otherwise leaves them alone. + * @param param Validated zstd parameters. + */ +static void ZSTD_CCtxParams_setZstdParams( + ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params) +{ + assert(!ZSTD_checkCParams(params->cParams)); + cctxParams->cParams = params->cParams; + cctxParams->fParams = params->fParams; + /* Should not matter, as all cParams are presumed properly defined. + * But, set it for tracing anyway. + */ + cctxParams->compressionLevel = ZSTD_NO_CLEVEL; +} + +ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) +{ + ZSTD_bounds bounds = { 0, 0, 0 }; + + switch(param) + { + case ZSTD_c_compressionLevel: + bounds.lowerBound = ZSTD_minCLevel(); + bounds.upperBound = ZSTD_maxCLevel(); + return bounds; + + case ZSTD_c_windowLog: + bounds.lowerBound = ZSTD_WINDOWLOG_MIN; + bounds.upperBound = ZSTD_WINDOWLOG_MAX; + return bounds; + + case ZSTD_c_hashLog: + bounds.lowerBound = ZSTD_HASHLOG_MIN; + bounds.upperBound = ZSTD_HASHLOG_MAX; + return bounds; + + case ZSTD_c_chainLog: + bounds.lowerBound = ZSTD_CHAINLOG_MIN; + bounds.upperBound = ZSTD_CHAINLOG_MAX; + return bounds; + + case ZSTD_c_searchLog: + bounds.lowerBound = ZSTD_SEARCHLOG_MIN; + bounds.upperBound = ZSTD_SEARCHLOG_MAX; + return bounds; + + case ZSTD_c_minMatch: + bounds.lowerBound = ZSTD_MINMATCH_MIN; + bounds.upperBound = ZSTD_MINMATCH_MAX; + return bounds; + + case ZSTD_c_targetLength: + bounds.lowerBound = ZSTD_TARGETLENGTH_MIN; + bounds.upperBound = ZSTD_TARGETLENGTH_MAX; + return bounds; + + case ZSTD_c_strategy: + bounds.lowerBound = ZSTD_STRATEGY_MIN; + bounds.upperBound = ZSTD_STRATEGY_MAX; + return bounds; + + case ZSTD_c_contentSizeFlag: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_checksumFlag: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_dictIDFlag: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_nbWorkers: + bounds.lowerBound = 0; +#ifdef ZSTD_MULTITHREAD + bounds.upperBound = ZSTDMT_NBWORKERS_MAX; +#else + bounds.upperBound = 0; +#endif + return bounds; + + case ZSTD_c_jobSize: + bounds.lowerBound = 0; +#ifdef ZSTD_MULTITHREAD + bounds.upperBound = ZSTDMT_JOBSIZE_MAX; +#else + bounds.upperBound = 0; +#endif + return bounds; + + case ZSTD_c_overlapLog: +#ifdef ZSTD_MULTITHREAD + bounds.lowerBound = ZSTD_OVERLAPLOG_MIN; + bounds.upperBound = ZSTD_OVERLAPLOG_MAX; +#else + bounds.lowerBound = 0; + bounds.upperBound = 0; +#endif + return bounds; + + case ZSTD_c_enableDedicatedDictSearch: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_enableLongDistanceMatching: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_ldmHashLog: + bounds.lowerBound = ZSTD_LDM_HASHLOG_MIN; + bounds.upperBound = ZSTD_LDM_HASHLOG_MAX; + return bounds; + + case ZSTD_c_ldmMinMatch: + bounds.lowerBound = ZSTD_LDM_MINMATCH_MIN; + bounds.upperBound = ZSTD_LDM_MINMATCH_MAX; + return bounds; + + case ZSTD_c_ldmBucketSizeLog: + bounds.lowerBound = ZSTD_LDM_BUCKETSIZELOG_MIN; + bounds.upperBound = ZSTD_LDM_BUCKETSIZELOG_MAX; + return bounds; + + case ZSTD_c_ldmHashRateLog: + bounds.lowerBound = ZSTD_LDM_HASHRATELOG_MIN; + bounds.upperBound = ZSTD_LDM_HASHRATELOG_MAX; + return bounds; + + /* experimental parameters */ + case ZSTD_c_rsyncable: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_forceMaxWindow : + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_format: + ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless); + bounds.lowerBound = ZSTD_f_zstd1; + bounds.upperBound = ZSTD_f_zstd1_magicless; /* note : how to ensure at compile time that this is the highest value enum ? */ + return bounds; + + case ZSTD_c_forceAttachDict: + ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceLoad); + bounds.lowerBound = ZSTD_dictDefaultAttach; + bounds.upperBound = ZSTD_dictForceLoad; /* note : how to ensure at compile time that this is the highest value enum ? */ + return bounds; + + case ZSTD_c_literalCompressionMode: + ZSTD_STATIC_ASSERT(ZSTD_lcm_auto < ZSTD_lcm_huffman && ZSTD_lcm_huffman < ZSTD_lcm_uncompressed); + bounds.lowerBound = ZSTD_lcm_auto; + bounds.upperBound = ZSTD_lcm_uncompressed; + return bounds; + + case ZSTD_c_targetCBlockSize: + bounds.lowerBound = ZSTD_TARGETCBLOCKSIZE_MIN; + bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX; + return bounds; + + case ZSTD_c_srcSizeHint: + bounds.lowerBound = ZSTD_SRCSIZEHINT_MIN; + bounds.upperBound = ZSTD_SRCSIZEHINT_MAX; + return bounds; + + case ZSTD_c_stableInBuffer: + case ZSTD_c_stableOutBuffer: + bounds.lowerBound = (int)ZSTD_bm_buffered; + bounds.upperBound = (int)ZSTD_bm_stable; + return bounds; + + case ZSTD_c_blockDelimiters: + bounds.lowerBound = (int)ZSTD_sf_noBlockDelimiters; + bounds.upperBound = (int)ZSTD_sf_explicitBlockDelimiters; + return bounds; + + case ZSTD_c_validateSequences: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + default: + bounds.error = ERROR(parameter_unsupported); + return bounds; + } +} + +/* ZSTD_cParam_clampBounds: + * Clamps the value into the bounded range. + */ +static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value) +{ + ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); + if (ZSTD_isError(bounds.error)) return bounds.error; + if (*value < bounds.lowerBound) *value = bounds.lowerBound; + if (*value > bounds.upperBound) *value = bounds.upperBound; + return 0; +} + +#define BOUNDCHECK(cParam, val) { \ + RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \ + parameter_outOfBound, "Param out of bounds"); \ +} + + +static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) +{ + switch(param) + { + case ZSTD_c_compressionLevel: + case ZSTD_c_hashLog: + case ZSTD_c_chainLog: + case ZSTD_c_searchLog: + case ZSTD_c_minMatch: + case ZSTD_c_targetLength: + case ZSTD_c_strategy: + return 1; + + case ZSTD_c_format: + case ZSTD_c_windowLog: + case ZSTD_c_contentSizeFlag: + case ZSTD_c_checksumFlag: + case ZSTD_c_dictIDFlag: + case ZSTD_c_forceMaxWindow : + case ZSTD_c_nbWorkers: + case ZSTD_c_jobSize: + case ZSTD_c_overlapLog: + case ZSTD_c_rsyncable: + case ZSTD_c_enableDedicatedDictSearch: + case ZSTD_c_enableLongDistanceMatching: + case ZSTD_c_ldmHashLog: + case ZSTD_c_ldmMinMatch: + case ZSTD_c_ldmBucketSizeLog: + case ZSTD_c_ldmHashRateLog: + case ZSTD_c_forceAttachDict: + case ZSTD_c_literalCompressionMode: + case ZSTD_c_targetCBlockSize: + case ZSTD_c_srcSizeHint: + case ZSTD_c_stableInBuffer: + case ZSTD_c_stableOutBuffer: + case ZSTD_c_blockDelimiters: + case ZSTD_c_validateSequences: + default: + return 0; + } +} + +size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) +{ + DEBUGLOG(4, "ZSTD_CCtx_setParameter (%i, %i)", (int)param, value); + if (cctx->streamStage != zcss_init) { + if (ZSTD_isUpdateAuthorized(param)) { + cctx->cParamsChanged = 1; + } else { + RETURN_ERROR(stage_wrong, "can only set params in ctx init stage"); + } } + + switch(param) + { + case ZSTD_c_nbWorkers: + RETURN_ERROR_IF((value!=0) && cctx->staticSize, parameter_unsupported, + "MT not compatible with static alloc"); + break; + + case ZSTD_c_compressionLevel: + case ZSTD_c_windowLog: + case ZSTD_c_hashLog: + case ZSTD_c_chainLog: + case ZSTD_c_searchLog: + case ZSTD_c_minMatch: + case ZSTD_c_targetLength: + case ZSTD_c_strategy: + case ZSTD_c_ldmHashRateLog: + case ZSTD_c_format: + case ZSTD_c_contentSizeFlag: + case ZSTD_c_checksumFlag: + case ZSTD_c_dictIDFlag: + case ZSTD_c_forceMaxWindow: + case ZSTD_c_forceAttachDict: + case ZSTD_c_literalCompressionMode: + case ZSTD_c_jobSize: + case ZSTD_c_overlapLog: + case ZSTD_c_rsyncable: + case ZSTD_c_enableDedicatedDictSearch: + case ZSTD_c_enableLongDistanceMatching: + case ZSTD_c_ldmHashLog: + case ZSTD_c_ldmMinMatch: + case ZSTD_c_ldmBucketSizeLog: + case ZSTD_c_targetCBlockSize: + case ZSTD_c_srcSizeHint: + case ZSTD_c_stableInBuffer: + case ZSTD_c_stableOutBuffer: + case ZSTD_c_blockDelimiters: + case ZSTD_c_validateSequences: + break; + + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); + } + return ZSTD_CCtxParams_setParameter(&cctx->requestedParams, param, value); +} + +size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, + ZSTD_cParameter param, int value) +{ + DEBUGLOG(4, "ZSTD_CCtxParams_setParameter (%i, %i)", (int)param, value); + switch(param) + { + case ZSTD_c_format : + BOUNDCHECK(ZSTD_c_format, value); + CCtxParams->format = (ZSTD_format_e)value; + return (size_t)CCtxParams->format; + + case ZSTD_c_compressionLevel : { + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), ""); + if (value == 0) + CCtxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* 0 == default */ + else + CCtxParams->compressionLevel = value; + if (CCtxParams->compressionLevel >= 0) return (size_t)CCtxParams->compressionLevel; + return 0; /* return type (size_t) cannot represent negative values */ + } + + case ZSTD_c_windowLog : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_windowLog, value); + CCtxParams->cParams.windowLog = (U32)value; + return CCtxParams->cParams.windowLog; + + case ZSTD_c_hashLog : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_hashLog, value); + CCtxParams->cParams.hashLog = (U32)value; + return CCtxParams->cParams.hashLog; + + case ZSTD_c_chainLog : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_chainLog, value); + CCtxParams->cParams.chainLog = (U32)value; + return CCtxParams->cParams.chainLog; + + case ZSTD_c_searchLog : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_searchLog, value); + CCtxParams->cParams.searchLog = (U32)value; + return (size_t)value; + + case ZSTD_c_minMatch : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_minMatch, value); + CCtxParams->cParams.minMatch = value; + return CCtxParams->cParams.minMatch; + + case ZSTD_c_targetLength : + BOUNDCHECK(ZSTD_c_targetLength, value); + CCtxParams->cParams.targetLength = value; + return CCtxParams->cParams.targetLength; + + case ZSTD_c_strategy : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_strategy, value); + CCtxParams->cParams.strategy = (ZSTD_strategy)value; + return (size_t)CCtxParams->cParams.strategy; + + case ZSTD_c_contentSizeFlag : + /* Content size written in frame header _when known_ (default:1) */ + DEBUGLOG(4, "set content size flag = %u", (value!=0)); + CCtxParams->fParams.contentSizeFlag = value != 0; + return CCtxParams->fParams.contentSizeFlag; + + case ZSTD_c_checksumFlag : + /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */ + CCtxParams->fParams.checksumFlag = value != 0; + return CCtxParams->fParams.checksumFlag; + + case ZSTD_c_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */ + DEBUGLOG(4, "set dictIDFlag = %u", (value!=0)); + CCtxParams->fParams.noDictIDFlag = !value; + return !CCtxParams->fParams.noDictIDFlag; + + case ZSTD_c_forceMaxWindow : + CCtxParams->forceWindow = (value != 0); + return CCtxParams->forceWindow; + + case ZSTD_c_forceAttachDict : { + const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value; + BOUNDCHECK(ZSTD_c_forceAttachDict, pref); + CCtxParams->attachDictPref = pref; + return CCtxParams->attachDictPref; + } + + case ZSTD_c_literalCompressionMode : { + const ZSTD_literalCompressionMode_e lcm = (ZSTD_literalCompressionMode_e)value; + BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm); + CCtxParams->literalCompressionMode = lcm; + return CCtxParams->literalCompressionMode; + } + + case ZSTD_c_nbWorkers : +#ifndef ZSTD_MULTITHREAD + RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); + return 0; +#else + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), ""); + CCtxParams->nbWorkers = value; + return CCtxParams->nbWorkers; +#endif + + case ZSTD_c_jobSize : +#ifndef ZSTD_MULTITHREAD + RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); + return 0; +#else + /* Adjust to the minimum non-default value. */ + if (value != 0 && value < ZSTDMT_JOBSIZE_MIN) + value = ZSTDMT_JOBSIZE_MIN; + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), ""); + assert(value >= 0); + CCtxParams->jobSize = value; + return CCtxParams->jobSize; +#endif + + case ZSTD_c_overlapLog : +#ifndef ZSTD_MULTITHREAD + RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); + return 0; +#else + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), ""); + CCtxParams->overlapLog = value; + return CCtxParams->overlapLog; +#endif + + case ZSTD_c_rsyncable : +#ifndef ZSTD_MULTITHREAD + RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); + return 0; +#else + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), ""); + CCtxParams->rsyncable = value; + return CCtxParams->rsyncable; +#endif + + case ZSTD_c_enableDedicatedDictSearch : + CCtxParams->enableDedicatedDictSearch = (value!=0); + return CCtxParams->enableDedicatedDictSearch; + + case ZSTD_c_enableLongDistanceMatching : + CCtxParams->ldmParams.enableLdm = (value!=0); + return CCtxParams->ldmParams.enableLdm; + + case ZSTD_c_ldmHashLog : + if (value!=0) /* 0 ==> auto */ + BOUNDCHECK(ZSTD_c_ldmHashLog, value); + CCtxParams->ldmParams.hashLog = value; + return CCtxParams->ldmParams.hashLog; + + case ZSTD_c_ldmMinMatch : + if (value!=0) /* 0 ==> default */ + BOUNDCHECK(ZSTD_c_ldmMinMatch, value); + CCtxParams->ldmParams.minMatchLength = value; + return CCtxParams->ldmParams.minMatchLength; + + case ZSTD_c_ldmBucketSizeLog : + if (value!=0) /* 0 ==> default */ + BOUNDCHECK(ZSTD_c_ldmBucketSizeLog, value); + CCtxParams->ldmParams.bucketSizeLog = value; + return CCtxParams->ldmParams.bucketSizeLog; + + case ZSTD_c_ldmHashRateLog : + RETURN_ERROR_IF(value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN, + parameter_outOfBound, "Param out of bounds!"); + CCtxParams->ldmParams.hashRateLog = value; + return CCtxParams->ldmParams.hashRateLog; + + case ZSTD_c_targetCBlockSize : + if (value!=0) /* 0 ==> default */ + BOUNDCHECK(ZSTD_c_targetCBlockSize, value); + CCtxParams->targetCBlockSize = value; + return CCtxParams->targetCBlockSize; + + case ZSTD_c_srcSizeHint : + if (value!=0) /* 0 ==> default */ + BOUNDCHECK(ZSTD_c_srcSizeHint, value); + CCtxParams->srcSizeHint = value; + return CCtxParams->srcSizeHint; + + case ZSTD_c_stableInBuffer: + BOUNDCHECK(ZSTD_c_stableInBuffer, value); + CCtxParams->inBufferMode = (ZSTD_bufferMode_e)value; + return CCtxParams->inBufferMode; + + case ZSTD_c_stableOutBuffer: + BOUNDCHECK(ZSTD_c_stableOutBuffer, value); + CCtxParams->outBufferMode = (ZSTD_bufferMode_e)value; + return CCtxParams->outBufferMode; + + case ZSTD_c_blockDelimiters: + BOUNDCHECK(ZSTD_c_blockDelimiters, value); + CCtxParams->blockDelimiters = (ZSTD_sequenceFormat_e)value; + return CCtxParams->blockDelimiters; + + case ZSTD_c_validateSequences: + BOUNDCHECK(ZSTD_c_validateSequences, value); + CCtxParams->validateSequences = value; + return CCtxParams->validateSequences; + + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); + } +} + +size_t ZSTD_CCtx_getParameter(ZSTD_CCtx const* cctx, ZSTD_cParameter param, int* value) +{ + return ZSTD_CCtxParams_getParameter(&cctx->requestedParams, param, value); +} + +size_t ZSTD_CCtxParams_getParameter( + ZSTD_CCtx_params const* CCtxParams, ZSTD_cParameter param, int* value) +{ + switch(param) + { + case ZSTD_c_format : + *value = CCtxParams->format; + break; + case ZSTD_c_compressionLevel : + *value = CCtxParams->compressionLevel; + break; + case ZSTD_c_windowLog : + *value = (int)CCtxParams->cParams.windowLog; + break; + case ZSTD_c_hashLog : + *value = (int)CCtxParams->cParams.hashLog; + break; + case ZSTD_c_chainLog : + *value = (int)CCtxParams->cParams.chainLog; + break; + case ZSTD_c_searchLog : + *value = CCtxParams->cParams.searchLog; + break; + case ZSTD_c_minMatch : + *value = CCtxParams->cParams.minMatch; + break; + case ZSTD_c_targetLength : + *value = CCtxParams->cParams.targetLength; + break; + case ZSTD_c_strategy : + *value = (unsigned)CCtxParams->cParams.strategy; + break; + case ZSTD_c_contentSizeFlag : + *value = CCtxParams->fParams.contentSizeFlag; + break; + case ZSTD_c_checksumFlag : + *value = CCtxParams->fParams.checksumFlag; + break; + case ZSTD_c_dictIDFlag : + *value = !CCtxParams->fParams.noDictIDFlag; + break; + case ZSTD_c_forceMaxWindow : + *value = CCtxParams->forceWindow; + break; + case ZSTD_c_forceAttachDict : + *value = CCtxParams->attachDictPref; + break; + case ZSTD_c_literalCompressionMode : + *value = CCtxParams->literalCompressionMode; + break; + case ZSTD_c_nbWorkers : +#ifndef ZSTD_MULTITHREAD + assert(CCtxParams->nbWorkers == 0); +#endif + *value = CCtxParams->nbWorkers; + break; + case ZSTD_c_jobSize : +#ifndef ZSTD_MULTITHREAD + RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); +#else + assert(CCtxParams->jobSize <= INT_MAX); + *value = (int)CCtxParams->jobSize; + break; +#endif + case ZSTD_c_overlapLog : +#ifndef ZSTD_MULTITHREAD + RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); +#else + *value = CCtxParams->overlapLog; + break; +#endif + case ZSTD_c_rsyncable : +#ifndef ZSTD_MULTITHREAD + RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); +#else + *value = CCtxParams->rsyncable; + break; +#endif + case ZSTD_c_enableDedicatedDictSearch : + *value = CCtxParams->enableDedicatedDictSearch; + break; + case ZSTD_c_enableLongDistanceMatching : + *value = CCtxParams->ldmParams.enableLdm; + break; + case ZSTD_c_ldmHashLog : + *value = CCtxParams->ldmParams.hashLog; + break; + case ZSTD_c_ldmMinMatch : + *value = CCtxParams->ldmParams.minMatchLength; + break; + case ZSTD_c_ldmBucketSizeLog : + *value = CCtxParams->ldmParams.bucketSizeLog; + break; + case ZSTD_c_ldmHashRateLog : + *value = CCtxParams->ldmParams.hashRateLog; + break; + case ZSTD_c_targetCBlockSize : + *value = (int)CCtxParams->targetCBlockSize; + break; + case ZSTD_c_srcSizeHint : + *value = (int)CCtxParams->srcSizeHint; + break; + case ZSTD_c_stableInBuffer : + *value = (int)CCtxParams->inBufferMode; + break; + case ZSTD_c_stableOutBuffer : + *value = (int)CCtxParams->outBufferMode; + break; + case ZSTD_c_blockDelimiters : + *value = (int)CCtxParams->blockDelimiters; + break; + case ZSTD_c_validateSequences : + *value = (int)CCtxParams->validateSequences; + break; + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); + } + return 0; +} + +/** ZSTD_CCtx_setParametersUsingCCtxParams() : + * just applies `params` into `cctx` + * no action is performed, parameters are merely stored. + * If ZSTDMT is enabled, parameters are pushed to cctx->mtctx. + * This is possible even if a compression is ongoing. + * In which case, new parameters will be applied on the fly, starting with next compression job. + */ +size_t ZSTD_CCtx_setParametersUsingCCtxParams( + ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params) +{ + DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams"); + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "The context is in the wrong stage!"); + RETURN_ERROR_IF(cctx->cdict, stage_wrong, + "Can't override parameters with cdict attached (some must " + "be inherited from the cdict)."); + + cctx->requestedParams = *params; + return 0; +} + +ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize); + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't set pledgedSrcSize when not in init stage."); + cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1; + return 0; +} + +static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams( + int const compressionLevel, + size_t const dictSize); +static int ZSTD_dedicatedDictSearch_isSupported( + const ZSTD_compressionParameters* cParams); +static void ZSTD_dedicatedDictSearch_revertCParams( + ZSTD_compressionParameters* cParams); + +/** + * Initializes the local dict using the requested parameters. + * NOTE: This does not use the pledged src size, because it may be used for more + * than one compression. + */ +static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx) +{ + ZSTD_localDict* const dl = &cctx->localDict; + if (dl->dict == NULL) { + /* No local dictionary. */ + assert(dl->dictBuffer == NULL); + assert(dl->cdict == NULL); + assert(dl->dictSize == 0); + return 0; + } + if (dl->cdict != NULL) { + assert(cctx->cdict == dl->cdict); + /* Local dictionary already initialized. */ + return 0; + } + assert(dl->dictSize > 0); + assert(cctx->cdict == NULL); + assert(cctx->prefixDict.dict == NULL); + + dl->cdict = ZSTD_createCDict_advanced2( + dl->dict, + dl->dictSize, + ZSTD_dlm_byRef, + dl->dictContentType, + &cctx->requestedParams, + cctx->customMem); + RETURN_ERROR_IF(!dl->cdict, memory_allocation, "ZSTD_createCDict_advanced failed"); + cctx->cdict = dl->cdict; + return 0; +} + +size_t ZSTD_CCtx_loadDictionary_advanced( + ZSTD_CCtx* cctx, const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType) +{ + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't load a dictionary when ctx is not in init stage."); + DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize); + ZSTD_clearAllDicts(cctx); /* in case one already exists */ + if (dict == NULL || dictSize == 0) /* no dictionary mode */ + return 0; + if (dictLoadMethod == ZSTD_dlm_byRef) { + cctx->localDict.dict = dict; + } else { + void* dictBuffer; + RETURN_ERROR_IF(cctx->staticSize, memory_allocation, + "no malloc for static CCtx"); + dictBuffer = ZSTD_customMalloc(dictSize, cctx->customMem); + RETURN_ERROR_IF(!dictBuffer, memory_allocation, "NULL pointer!"); + ZSTD_memcpy(dictBuffer, dict, dictSize); + cctx->localDict.dictBuffer = dictBuffer; + cctx->localDict.dict = dictBuffer; + } + cctx->localDict.dictSize = dictSize; + cctx->localDict.dictContentType = dictContentType; + return 0; +} + +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference( + ZSTD_CCtx* cctx, const void* dict, size_t dictSize) +{ + return ZSTD_CCtx_loadDictionary_advanced( + cctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto); +} + +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize) +{ + return ZSTD_CCtx_loadDictionary_advanced( + cctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto); +} + + +size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) +{ + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't ref a dict when ctx not in init stage."); + /* Free the existing local cdict (if any) to save memory. */ + ZSTD_clearAllDicts(cctx); + cctx->cdict = cdict; + return 0; +} + +size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool) +{ + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't ref a pool when ctx not in init stage."); + cctx->pool = pool; + return 0; +} + +size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize) +{ + return ZSTD_CCtx_refPrefix_advanced(cctx, prefix, prefixSize, ZSTD_dct_rawContent); +} + +size_t ZSTD_CCtx_refPrefix_advanced( + ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) +{ + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't ref a prefix when ctx not in init stage."); + ZSTD_clearAllDicts(cctx); + if (prefix != NULL && prefixSize > 0) { + cctx->prefixDict.dict = prefix; + cctx->prefixDict.dictSize = prefixSize; + cctx->prefixDict.dictContentType = dictContentType; + } + return 0; +} + +/*! ZSTD_CCtx_reset() : + * Also dumps dictionary */ +size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset) +{ + if ( (reset == ZSTD_reset_session_only) + || (reset == ZSTD_reset_session_and_parameters) ) { + cctx->streamStage = zcss_init; + cctx->pledgedSrcSizePlusOne = 0; + } + if ( (reset == ZSTD_reset_parameters) + || (reset == ZSTD_reset_session_and_parameters) ) { + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't reset parameters only when not in init stage."); + ZSTD_clearAllDicts(cctx); + return ZSTD_CCtxParams_reset(&cctx->requestedParams); + } + return 0; +} + + +/** ZSTD_checkCParams() : + control CParam values remain within authorized range. + @return : 0, or an error code if one value is beyond authorized range */ +size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams) +{ + BOUNDCHECK(ZSTD_c_windowLog, (int)cParams.windowLog); + BOUNDCHECK(ZSTD_c_chainLog, (int)cParams.chainLog); + BOUNDCHECK(ZSTD_c_hashLog, (int)cParams.hashLog); + BOUNDCHECK(ZSTD_c_searchLog, (int)cParams.searchLog); + BOUNDCHECK(ZSTD_c_minMatch, (int)cParams.minMatch); + BOUNDCHECK(ZSTD_c_targetLength,(int)cParams.targetLength); + BOUNDCHECK(ZSTD_c_strategy, cParams.strategy); + return 0; +} + +/** ZSTD_clampCParams() : + * make CParam values within valid range. + * @return : valid CParams */ +static ZSTD_compressionParameters +ZSTD_clampCParams(ZSTD_compressionParameters cParams) +{ +# define CLAMP_TYPE(cParam, val, type) { \ + ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); \ + if ((int)valbounds.upperBound) val=(type)bounds.upperBound; \ + } +# define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned) + CLAMP(ZSTD_c_windowLog, cParams.windowLog); + CLAMP(ZSTD_c_chainLog, cParams.chainLog); + CLAMP(ZSTD_c_hashLog, cParams.hashLog); + CLAMP(ZSTD_c_searchLog, cParams.searchLog); + CLAMP(ZSTD_c_minMatch, cParams.minMatch); + CLAMP(ZSTD_c_targetLength,cParams.targetLength); + CLAMP_TYPE(ZSTD_c_strategy,cParams.strategy, ZSTD_strategy); + return cParams; +} + +/** ZSTD_cycleLog() : + * condition for correct operation : hashLog > 1 */ +U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat) +{ + U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2); + return hashLog - btScale; +} + +/** ZSTD_dictAndWindowLog() : + * Returns an adjusted window log that is large enough to fit the source and the dictionary. + * The zstd format says that the entire dictionary is valid if one byte of the dictionary + * is within the window. So the hashLog and chainLog should be large enough to reference both + * the dictionary and the window. So we must use this adjusted dictAndWindowLog when downsizing + * the hashLog and windowLog. + * NOTE: srcSize must not be ZSTD_CONTENTSIZE_UNKNOWN. + */ +static U32 ZSTD_dictAndWindowLog(U32 windowLog, U64 srcSize, U64 dictSize) +{ + const U64 maxWindowSize = 1ULL << ZSTD_WINDOWLOG_MAX; + /* No dictionary ==> No change */ + if (dictSize == 0) { + return windowLog; + } + assert(windowLog <= ZSTD_WINDOWLOG_MAX); + assert(srcSize != ZSTD_CONTENTSIZE_UNKNOWN); /* Handled in ZSTD_adjustCParams_internal() */ + { + U64 const windowSize = 1ULL << windowLog; + U64 const dictAndWindowSize = dictSize + windowSize; + /* If the window size is already large enough to fit both the source and the dictionary + * then just use the window size. Otherwise adjust so that it fits the dictionary and + * the window. + */ + if (windowSize >= dictSize + srcSize) { + return windowLog; /* Window size large enough already */ + } else if (dictAndWindowSize >= maxWindowSize) { + return ZSTD_WINDOWLOG_MAX; /* Larger than max window log */ + } else { + return ZSTD_highbit32((U32)dictAndWindowSize - 1) + 1; + } + } +} + +/** ZSTD_adjustCParams_internal() : + * optimize `cPar` for a specified input (`srcSize` and `dictSize`). + * mostly downsize to reduce memory consumption and initialization latency. + * `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known. + * `mode` is the mode for parameter adjustment. See docs for `ZSTD_cParamMode_e`. + * note : `srcSize==0` means 0! + * condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */ +static ZSTD_compressionParameters +ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, + unsigned long long srcSize, + size_t dictSize, + ZSTD_cParamMode_e mode) +{ + const U64 minSrcSize = 513; /* (1<<9) + 1 */ + const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1); + assert(ZSTD_checkCParams(cPar)==0); + + switch (mode) { + case ZSTD_cpm_unknown: + case ZSTD_cpm_noAttachDict: + /* If we don't know the source size, don't make any + * assumptions about it. We will already have selected + * smaller parameters if a dictionary is in use. + */ + break; + case ZSTD_cpm_createCDict: + /* Assume a small source size when creating a dictionary + * with an unkown source size. + */ + if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN) + srcSize = minSrcSize; + break; + case ZSTD_cpm_attachDict: + /* Dictionary has its own dedicated parameters which have + * already been selected. We are selecting parameters + * for only the source. + */ + dictSize = 0; + break; + default: + assert(0); + break; + } + + /* resize windowLog if input is small enough, to use less memory */ + if ( (srcSize < maxWindowResize) + && (dictSize < maxWindowResize) ) { + U32 const tSize = (U32)(srcSize + dictSize); + static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN; + U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN : + ZSTD_highbit32(tSize-1) + 1; + if (cPar.windowLog > srcLog) cPar.windowLog = srcLog; + } + if (srcSize != ZSTD_CONTENTSIZE_UNKNOWN) { + U32 const dictAndWindowLog = ZSTD_dictAndWindowLog(cPar.windowLog, (U64)srcSize, (U64)dictSize); + U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy); + if (cPar.hashLog > dictAndWindowLog+1) cPar.hashLog = dictAndWindowLog+1; + if (cycleLog > dictAndWindowLog) + cPar.chainLog -= (cycleLog - dictAndWindowLog); + } + + if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) + cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* minimum wlog required for valid frame header */ + + return cPar; +} + +ZSTD_compressionParameters +ZSTD_adjustCParams(ZSTD_compressionParameters cPar, + unsigned long long srcSize, + size_t dictSize) +{ + cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */ + if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN; + return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown); +} + +static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); +static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); + +static void ZSTD_overrideCParams( + ZSTD_compressionParameters* cParams, + const ZSTD_compressionParameters* overrides) +{ + if (overrides->windowLog) cParams->windowLog = overrides->windowLog; + if (overrides->hashLog) cParams->hashLog = overrides->hashLog; + if (overrides->chainLog) cParams->chainLog = overrides->chainLog; + if (overrides->searchLog) cParams->searchLog = overrides->searchLog; + if (overrides->minMatch) cParams->minMatch = overrides->minMatch; + if (overrides->targetLength) cParams->targetLength = overrides->targetLength; + if (overrides->strategy) cParams->strategy = overrides->strategy; +} + +ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( + const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) +{ + ZSTD_compressionParameters cParams; + if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) { + srcSizeHint = CCtxParams->srcSizeHint; + } + cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize, mode); + if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG; + ZSTD_overrideCParams(&cParams, &CCtxParams->cParams); + assert(!ZSTD_checkCParams(cParams)); + /* srcSizeHint == 0 means 0 */ + return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode); +} + +static size_t +ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams, + const U32 forCCtx) +{ + size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); + size_t const hSize = ((size_t)1) << cParams->hashLog; + U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; + size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0; + /* We don't use ZSTD_cwksp_alloc_size() here because the tables aren't + * surrounded by redzones in ASAN. */ + size_t const tableSpace = chainSize * sizeof(U32) + + hSize * sizeof(U32) + + h3Size * sizeof(U32); + size_t const optPotentialSpace = + ZSTD_cwksp_alloc_size((MaxML+1) * sizeof(U32)) + + ZSTD_cwksp_alloc_size((MaxLL+1) * sizeof(U32)) + + ZSTD_cwksp_alloc_size((MaxOff+1) * sizeof(U32)) + + ZSTD_cwksp_alloc_size((1<strategy >= ZSTD_btopt)) + ? optPotentialSpace + : 0; + DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u", + (U32)chainSize, (U32)hSize, (U32)h3Size); + return tableSpace + optSpace; +} + +static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( + const ZSTD_compressionParameters* cParams, + const ldmParams_t* ldmParams, + const int isStatic, + const size_t buffInSize, + const size_t buffOutSize, + const U64 pledgedSrcSize) +{ + size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << cParams->windowLog), pledgedSrcSize)); + size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); + U32 const divider = (cParams->minMatch==3) ? 3 : 4; + size_t const maxNbSeq = blockSize / divider; + size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize) + + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef)) + + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE)); + size_t const entropySpace = ZSTD_cwksp_alloc_size(ENTROPY_WORKSPACE_SIZE); + size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t)); + size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, /* forCCtx */ 1); + + size_t const ldmSpace = ZSTD_ldm_getTableSize(*ldmParams); + size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(*ldmParams, blockSize); + size_t const ldmSeqSpace = ldmParams->enableLdm ? + ZSTD_cwksp_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0; + + + size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize) + + ZSTD_cwksp_alloc_size(buffOutSize); + + size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0; + + size_t const neededSpace = + cctxSpace + + entropySpace + + blockStateSpace + + ldmSpace + + ldmSeqSpace + + matchStateSize + + tokenSpace + + bufferSpace; + + DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace); + return neededSpace; +} + +size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) +{ + ZSTD_compressionParameters const cParams = + ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); + + RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); + /* estimateCCtxSize is for one-shot compression. So no buffers should + * be needed. However, we still allocate two 0-sized buffers, which can + * take space under ASAN. */ + return ZSTD_estimateCCtxSize_usingCCtxParams_internal( + &cParams, ¶ms->ldmParams, 1, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN); +} + +size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams) +{ + ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams); + return ZSTD_estimateCCtxSize_usingCCtxParams(¶ms); +} + +static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel) +{ + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); + return ZSTD_estimateCCtxSize_usingCParams(cParams); +} + +size_t ZSTD_estimateCCtxSize(int compressionLevel) +{ + int level; + size_t memBudget = 0; + for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) { + size_t const newMB = ZSTD_estimateCCtxSize_internal(level); + if (newMB > memBudget) memBudget = newMB; + } + return memBudget; +} + +size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params) +{ + RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); + { ZSTD_compressionParameters const cParams = + ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); + size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); + size_t const inBuffSize = (params->inBufferMode == ZSTD_bm_buffered) + ? ((size_t)1 << cParams.windowLog) + blockSize + : 0; + size_t const outBuffSize = (params->outBufferMode == ZSTD_bm_buffered) + ? ZSTD_compressBound(blockSize) + 1 + : 0; + + return ZSTD_estimateCCtxSize_usingCCtxParams_internal( + &cParams, ¶ms->ldmParams, 1, inBuffSize, outBuffSize, + ZSTD_CONTENTSIZE_UNKNOWN); + } +} + +size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams) +{ + ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams); + return ZSTD_estimateCStreamSize_usingCCtxParams(¶ms); +} + +static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel) +{ + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); + return ZSTD_estimateCStreamSize_usingCParams(cParams); +} + +size_t ZSTD_estimateCStreamSize(int compressionLevel) +{ + int level; + size_t memBudget = 0; + for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) { + size_t const newMB = ZSTD_estimateCStreamSize_internal(level); + if (newMB > memBudget) memBudget = newMB; + } + return memBudget; +} + +/* ZSTD_getFrameProgression(): + * tells how much data has been consumed (input) and produced (output) for current frame. + * able to count progression inside worker threads (non-blocking mode). + */ +ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx) +{ +#ifdef ZSTD_MULTITHREAD + if (cctx->appliedParams.nbWorkers > 0) { + return ZSTDMT_getFrameProgression(cctx->mtctx); + } +#endif + { ZSTD_frameProgression fp; + size_t const buffered = (cctx->inBuff == NULL) ? 0 : + cctx->inBuffPos - cctx->inToCompress; + if (buffered) assert(cctx->inBuffPos >= cctx->inToCompress); + assert(buffered <= ZSTD_BLOCKSIZE_MAX); + fp.ingested = cctx->consumedSrcSize + buffered; + fp.consumed = cctx->consumedSrcSize; + fp.produced = cctx->producedCSize; + fp.flushed = cctx->producedCSize; /* simplified; some data might still be left within streaming output buffer */ + fp.currentJobID = 0; + fp.nbActiveWorkers = 0; + return fp; +} } + +/*! ZSTD_toFlushNow() + * Only useful for multithreading scenarios currently (nbWorkers >= 1). + */ +size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx) +{ +#ifdef ZSTD_MULTITHREAD + if (cctx->appliedParams.nbWorkers > 0) { + return ZSTDMT_toFlushNow(cctx->mtctx); + } +#endif + (void)cctx; + return 0; /* over-simplification; could also check if context is currently running in streaming mode, and in which case, report how many bytes are left to be flushed within output buffer */ +} + +static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1, + ZSTD_compressionParameters cParams2) +{ + (void)cParams1; + (void)cParams2; + assert(cParams1.windowLog == cParams2.windowLog); + assert(cParams1.chainLog == cParams2.chainLog); + assert(cParams1.hashLog == cParams2.hashLog); + assert(cParams1.searchLog == cParams2.searchLog); + assert(cParams1.minMatch == cParams2.minMatch); + assert(cParams1.targetLength == cParams2.targetLength); + assert(cParams1.strategy == cParams2.strategy); +} + +void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs) +{ + int i; + for (i = 0; i < ZSTD_REP_NUM; ++i) + bs->rep[i] = repStartValue[i]; + bs->entropy.huf.repeatMode = HUF_repeat_none; + bs->entropy.fse.offcode_repeatMode = FSE_repeat_none; + bs->entropy.fse.matchlength_repeatMode = FSE_repeat_none; + bs->entropy.fse.litlength_repeatMode = FSE_repeat_none; +} + +/*! ZSTD_invalidateMatchState() + * Invalidate all the matches in the match finder tables. + * Requires nextSrc and base to be set (can be NULL). + */ +static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms) +{ + ZSTD_window_clear(&ms->window); + + ms->nextToUpdate = ms->window.dictLimit; + ms->loadedDictEnd = 0; + ms->opt.litLengthSum = 0; /* force reset of btopt stats */ + ms->dictMatchState = NULL; +} + +/** + * Controls, for this matchState reset, whether the tables need to be cleared / + * prepared for the coming compression (ZSTDcrp_makeClean), or whether the + * tables can be left unclean (ZSTDcrp_leaveDirty), because we know that a + * subsequent operation will overwrite the table space anyways (e.g., copying + * the matchState contents in from a CDict). + */ +typedef enum { + ZSTDcrp_makeClean, + ZSTDcrp_leaveDirty +} ZSTD_compResetPolicy_e; + +/** + * Controls, for this matchState reset, whether indexing can continue where it + * left off (ZSTDirp_continue), or whether it needs to be restarted from zero + * (ZSTDirp_reset). + */ +typedef enum { + ZSTDirp_continue, + ZSTDirp_reset +} ZSTD_indexResetPolicy_e; + +typedef enum { + ZSTD_resetTarget_CDict, + ZSTD_resetTarget_CCtx +} ZSTD_resetTarget_e; + +static size_t +ZSTD_reset_matchState(ZSTD_matchState_t* ms, + ZSTD_cwksp* ws, + const ZSTD_compressionParameters* cParams, + const ZSTD_compResetPolicy_e crp, + const ZSTD_indexResetPolicy_e forceResetIndex, + const ZSTD_resetTarget_e forWho) +{ + size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); + size_t const hSize = ((size_t)1) << cParams->hashLog; + U32 const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; + size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0; + + DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset); + if (forceResetIndex == ZSTDirp_reset) { + ZSTD_window_init(&ms->window); + ZSTD_cwksp_mark_tables_dirty(ws); + } + + ms->hashLog3 = hashLog3; + + ZSTD_invalidateMatchState(ms); + + assert(!ZSTD_cwksp_reserve_failed(ws)); /* check that allocation hasn't already failed */ + + ZSTD_cwksp_clear_tables(ws); + + DEBUGLOG(5, "reserving table space"); + /* table Space */ + ms->hashTable = (U32*)ZSTD_cwksp_reserve_table(ws, hSize * sizeof(U32)); + ms->chainTable = (U32*)ZSTD_cwksp_reserve_table(ws, chainSize * sizeof(U32)); + ms->hashTable3 = (U32*)ZSTD_cwksp_reserve_table(ws, h3Size * sizeof(U32)); + RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation, + "failed a workspace allocation in ZSTD_reset_matchState"); + + DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_leaveDirty); + if (crp!=ZSTDcrp_leaveDirty) { + /* reset tables only */ + ZSTD_cwksp_clean_tables(ws); + } + + /* opt parser space */ + if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) { + DEBUGLOG(4, "reserving optimal parser space"); + ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (1<opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned)); + ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned)); + ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned)); + ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t)); + ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t)); + } + + ms->cParams = *cParams; + + RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation, + "failed a workspace allocation in ZSTD_reset_matchState"); + + return 0; +} + +/* ZSTD_indexTooCloseToMax() : + * minor optimization : prefer memset() rather than reduceIndex() + * which is measurably slow in some circumstances (reported for Visual Studio). + * Works when re-using a context for a lot of smallish inputs : + * if all inputs are smaller than ZSTD_INDEXOVERFLOW_MARGIN, + * memset() will be triggered before reduceIndex(). + */ +#define ZSTD_INDEXOVERFLOW_MARGIN (16 MB) +static int ZSTD_indexTooCloseToMax(ZSTD_window_t w) +{ + return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN); +} + +/*! ZSTD_resetCCtx_internal() : + note : `params` are assumed fully validated at this stage */ +static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, + ZSTD_CCtx_params params, + U64 const pledgedSrcSize, + ZSTD_compResetPolicy_e const crp, + ZSTD_buffered_policy_e const zbuff) +{ + ZSTD_cwksp* const ws = &zc->workspace; + DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u", + (U32)pledgedSrcSize, params.cParams.windowLog); + assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + + zc->isFirstBlock = 1; + + if (params.ldmParams.enableLdm) { + /* Adjust long distance matching parameters */ + ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams); + assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog); + assert(params.ldmParams.hashRateLog < 32); + } + + { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize)); + size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); + U32 const divider = (params.cParams.minMatch==3) ? 3 : 4; + size_t const maxNbSeq = blockSize / divider; + size_t const buffOutSize = (zbuff == ZSTDb_buffered && params.outBufferMode == ZSTD_bm_buffered) + ? ZSTD_compressBound(blockSize) + 1 + : 0; + size_t const buffInSize = (zbuff == ZSTDb_buffered && params.inBufferMode == ZSTD_bm_buffered) + ? windowSize + blockSize + : 0; + size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize); + + int const indexTooClose = ZSTD_indexTooCloseToMax(zc->blockState.matchState.window); + ZSTD_indexResetPolicy_e needsIndexReset = + (!indexTooClose && zc->initialized) ? ZSTDirp_continue : ZSTDirp_reset; + + size_t const neededSpace = + ZSTD_estimateCCtxSize_usingCCtxParams_internal( + ¶ms.cParams, ¶ms.ldmParams, zc->staticSize != 0, + buffInSize, buffOutSize, pledgedSrcSize); + FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!"); + + if (!zc->staticSize) ZSTD_cwksp_bump_oversized_duration(ws, 0); + + /* Check if workspace is large enough, alloc a new one if needed */ + { + int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace; + int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace); + + DEBUGLOG(4, "Need %zu B workspace", neededSpace); + DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize); + + if (workspaceTooSmall || workspaceWasteful) { + DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB", + ZSTD_cwksp_sizeof(ws) >> 10, + neededSpace >> 10); + + RETURN_ERROR_IF(zc->staticSize, memory_allocation, "static cctx : no resize"); + + needsIndexReset = ZSTDirp_reset; + + ZSTD_cwksp_free(ws, zc->customMem); + FORWARD_IF_ERROR(ZSTD_cwksp_create(ws, neededSpace, zc->customMem), ""); + + DEBUGLOG(5, "reserving object space"); + /* Statically sized space. + * entropyWorkspace never moves, + * though prev/next block swap places */ + assert(ZSTD_cwksp_check_available(ws, 2 * sizeof(ZSTD_compressedBlockState_t))); + zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t)); + RETURN_ERROR_IF(zc->blockState.prevCBlock == NULL, memory_allocation, "couldn't allocate prevCBlock"); + zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t)); + RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock"); + zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, ENTROPY_WORKSPACE_SIZE); + RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate entropyWorkspace"); + } } + + ZSTD_cwksp_clear(ws); + + /* init params */ + zc->appliedParams = params; + zc->blockState.matchState.cParams = params.cParams; + zc->pledgedSrcSizePlusOne = pledgedSrcSize+1; + zc->consumedSrcSize = 0; + zc->producedCSize = 0; + if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN) + zc->appliedParams.fParams.contentSizeFlag = 0; + DEBUGLOG(4, "pledged content size : %u ; flag : %u", + (unsigned)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag); + zc->blockSize = blockSize; + + XXH64_reset(&zc->xxhState, 0); + zc->stage = ZSTDcs_init; + zc->dictID = 0; + zc->dictContentSize = 0; + + ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock); + + /* ZSTD_wildcopy() is used to copy into the literals buffer, + * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes. + */ + zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH); + zc->seqStore.maxNbLit = blockSize; + + /* buffers */ + zc->bufferedPolicy = zbuff; + zc->inBuffSize = buffInSize; + zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize); + zc->outBuffSize = buffOutSize; + zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffOutSize); + + /* ldm bucketOffsets table */ + if (params.ldmParams.enableLdm) { + /* TODO: avoid memset? */ + size_t const numBuckets = + ((size_t)1) << (params.ldmParams.hashLog - + params.ldmParams.bucketSizeLog); + zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, numBuckets); + ZSTD_memset(zc->ldmState.bucketOffsets, 0, numBuckets); + } + + /* sequences storage */ + ZSTD_referenceExternalSequences(zc, NULL, 0); + zc->seqStore.maxNbSeq = maxNbSeq; + zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); + zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); + zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); + zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef)); + + FORWARD_IF_ERROR(ZSTD_reset_matchState( + &zc->blockState.matchState, + ws, + ¶ms.cParams, + crp, + needsIndexReset, + ZSTD_resetTarget_CCtx), ""); + + /* ldm hash table */ + if (params.ldmParams.enableLdm) { + /* TODO: avoid memset? */ + size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog; + zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t)); + ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t)); + zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq)); + zc->maxNbLdmSequences = maxNbLdmSeq; + + ZSTD_window_init(&zc->ldmState.window); + ZSTD_window_clear(&zc->ldmState.window); + zc->ldmState.loadedDictEnd = 0; + } + + /* Due to alignment, when reusing a workspace, we can actually consume + * up to 3 extra bytes for alignment. See the comments in zstd_cwksp.h + */ + assert(ZSTD_cwksp_used(ws) >= neededSpace && + ZSTD_cwksp_used(ws) <= neededSpace + 3); + + DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws)); + zc->initialized = 1; + + return 0; + } +} + +/* ZSTD_invalidateRepCodes() : + * ensures next compression will not use repcodes from previous block. + * Note : only works with regular variant; + * do not use with extDict variant ! */ +void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) { + int i; + for (i=0; iblockState.prevCBlock->rep[i] = 0; + assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window)); +} + +/* These are the approximate sizes for each strategy past which copying the + * dictionary tables into the working context is faster than using them + * in-place. + */ +static const size_t attachDictSizeCutoffs[ZSTD_STRATEGY_MAX+1] = { + 8 KB, /* unused */ + 8 KB, /* ZSTD_fast */ + 16 KB, /* ZSTD_dfast */ + 32 KB, /* ZSTD_greedy */ + 32 KB, /* ZSTD_lazy */ + 32 KB, /* ZSTD_lazy2 */ + 32 KB, /* ZSTD_btlazy2 */ + 32 KB, /* ZSTD_btopt */ + 8 KB, /* ZSTD_btultra */ + 8 KB /* ZSTD_btultra2 */ +}; + +static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, + U64 pledgedSrcSize) +{ + size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy]; + int const dedicatedDictSearch = cdict->matchState.dedicatedDictSearch; + return dedicatedDictSearch + || ( ( pledgedSrcSize <= cutoff + || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN + || params->attachDictPref == ZSTD_dictForceAttach ) + && params->attachDictPref != ZSTD_dictForceCopy + && !params->forceWindow ); /* dictMatchState isn't correctly + * handled in _enforceMaxDist */ +} + +static size_t +ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, + U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) +{ + { + ZSTD_compressionParameters adjusted_cdict_cParams = cdict->matchState.cParams; + unsigned const windowLog = params.cParams.windowLog; + assert(windowLog != 0); + /* Resize working context table params for input only, since the dict + * has its own tables. */ + /* pledgedSrcSize == 0 means 0! */ + + if (cdict->matchState.dedicatedDictSearch) { + ZSTD_dedicatedDictSearch_revertCParams(&adjusted_cdict_cParams); + } + + params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize, + cdict->dictContentSize, ZSTD_cpm_attachDict); + params.cParams.windowLog = windowLog; + FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, + ZSTDcrp_makeClean, zbuff), ""); + assert(cctx->appliedParams.cParams.strategy == adjusted_cdict_cParams.strategy); + } + + { const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc + - cdict->matchState.window.base); + const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit; + if (cdictLen == 0) { + /* don't even attach dictionaries with no contents */ + DEBUGLOG(4, "skipping attaching empty dictionary"); + } else { + DEBUGLOG(4, "attaching dictionary into context"); + cctx->blockState.matchState.dictMatchState = &cdict->matchState; + + /* prep working match state so dict matches never have negative indices + * when they are translated to the working context's index space. */ + if (cctx->blockState.matchState.window.dictLimit < cdictEnd) { + cctx->blockState.matchState.window.nextSrc = + cctx->blockState.matchState.window.base + cdictEnd; + ZSTD_window_clear(&cctx->blockState.matchState.window); + } + /* loadedDictEnd is expressed within the referential of the active context */ + cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit; + } } + + cctx->dictID = cdict->dictID; + cctx->dictContentSize = cdict->dictContentSize; + + /* copy block state */ + ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); + + return 0; +} + +static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, + U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) +{ + const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams; + + assert(!cdict->matchState.dedicatedDictSearch); + + DEBUGLOG(4, "copying dictionary into context"); + + { unsigned const windowLog = params.cParams.windowLog; + assert(windowLog != 0); + /* Copy only compression parameters related to tables. */ + params.cParams = *cdict_cParams; + params.cParams.windowLog = windowLog; + FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, + ZSTDcrp_leaveDirty, zbuff), ""); + assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); + assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog); + assert(cctx->appliedParams.cParams.chainLog == cdict_cParams->chainLog); + } + + ZSTD_cwksp_mark_tables_dirty(&cctx->workspace); + + /* copy tables */ + { size_t const chainSize = (cdict_cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict_cParams->chainLog); + size_t const hSize = (size_t)1 << cdict_cParams->hashLog; + + ZSTD_memcpy(cctx->blockState.matchState.hashTable, + cdict->matchState.hashTable, + hSize * sizeof(U32)); + ZSTD_memcpy(cctx->blockState.matchState.chainTable, + cdict->matchState.chainTable, + chainSize * sizeof(U32)); + } + + /* Zero the hashTable3, since the cdict never fills it */ + { int const h3log = cctx->blockState.matchState.hashLog3; + size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; + assert(cdict->matchState.hashLog3 == 0); + ZSTD_memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32)); + } + + ZSTD_cwksp_mark_tables_clean(&cctx->workspace); + + /* copy dictionary offsets */ + { ZSTD_matchState_t const* srcMatchState = &cdict->matchState; + ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState; + dstMatchState->window = srcMatchState->window; + dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; + dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; + } + + cctx->dictID = cdict->dictID; + cctx->dictContentSize = cdict->dictContentSize; + + /* copy block state */ + ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); + + return 0; +} + +/* We have a choice between copying the dictionary context into the working + * context, or referencing the dictionary context from the working context + * in-place. We decide here which strategy to use. */ +static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx, + const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, + U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) +{ + + DEBUGLOG(4, "ZSTD_resetCCtx_usingCDict (pledgedSrcSize=%u)", + (unsigned)pledgedSrcSize); + + if (ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) { + return ZSTD_resetCCtx_byAttachingCDict( + cctx, cdict, *params, pledgedSrcSize, zbuff); + } else { + return ZSTD_resetCCtx_byCopyingCDict( + cctx, cdict, *params, pledgedSrcSize, zbuff); + } +} + +/*! ZSTD_copyCCtx_internal() : + * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. + * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). + * The "context", in this case, refers to the hash and chain tables, + * entropy tables, and dictionary references. + * `windowLog` value is enforced if != 0, otherwise value is copied from srcCCtx. + * @return : 0, or an error code */ +static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, + const ZSTD_CCtx* srcCCtx, + ZSTD_frameParameters fParams, + U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) +{ + DEBUGLOG(5, "ZSTD_copyCCtx_internal"); + RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong, + "Can't copy a ctx that's not in init stage."); + + ZSTD_memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem)); + { ZSTD_CCtx_params params = dstCCtx->requestedParams; + /* Copy only compression parameters related to tables. */ + params.cParams = srcCCtx->appliedParams.cParams; + params.fParams = fParams; + ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize, + ZSTDcrp_leaveDirty, zbuff); + assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog); + assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy); + assert(dstCCtx->appliedParams.cParams.hashLog == srcCCtx->appliedParams.cParams.hashLog); + assert(dstCCtx->appliedParams.cParams.chainLog == srcCCtx->appliedParams.cParams.chainLog); + assert(dstCCtx->blockState.matchState.hashLog3 == srcCCtx->blockState.matchState.hashLog3); + } + + ZSTD_cwksp_mark_tables_dirty(&dstCCtx->workspace); + + /* copy tables */ + { size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog); + size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog; + int const h3log = srcCCtx->blockState.matchState.hashLog3; + size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; + + ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable, + srcCCtx->blockState.matchState.hashTable, + hSize * sizeof(U32)); + ZSTD_memcpy(dstCCtx->blockState.matchState.chainTable, + srcCCtx->blockState.matchState.chainTable, + chainSize * sizeof(U32)); + ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable3, + srcCCtx->blockState.matchState.hashTable3, + h3Size * sizeof(U32)); + } + + ZSTD_cwksp_mark_tables_clean(&dstCCtx->workspace); + + /* copy dictionary offsets */ + { + const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState; + ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState; + dstMatchState->window = srcMatchState->window; + dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; + dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; + } + dstCCtx->dictID = srcCCtx->dictID; + dstCCtx->dictContentSize = srcCCtx->dictContentSize; + + /* copy block state */ + ZSTD_memcpy(dstCCtx->blockState.prevCBlock, srcCCtx->blockState.prevCBlock, sizeof(*srcCCtx->blockState.prevCBlock)); + + return 0; +} + +/*! ZSTD_copyCCtx() : + * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. + * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). + * pledgedSrcSize==0 means "unknown". +* @return : 0, or an error code */ +size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize) +{ + ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; + ZSTD_buffered_policy_e const zbuff = srcCCtx->bufferedPolicy; + ZSTD_STATIC_ASSERT((U32)ZSTDb_buffered==1); + if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; + fParams.contentSizeFlag = (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN); + + return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx, + fParams, pledgedSrcSize, + zbuff); +} + + +#define ZSTD_ROWSIZE 16 +/*! ZSTD_reduceTable() : + * reduce table indexes by `reducerValue`, or squash to zero. + * PreserveMark preserves "unsorted mark" for btlazy2 strategy. + * It must be set to a clear 0/1 value, to remove branch during inlining. + * Presume table size is a multiple of ZSTD_ROWSIZE + * to help auto-vectorization */ +FORCE_INLINE_TEMPLATE void +ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerValue, int const preserveMark) +{ + int const nbRows = (int)size / ZSTD_ROWSIZE; + int cellNb = 0; + int rowNb; + assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */ + assert(size < (1U<<31)); /* can be casted to int */ + +#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) + /* To validate that the table re-use logic is sound, and that we don't + * access table space that we haven't cleaned, we re-"poison" the table + * space every time we mark it dirty. + * + * This function however is intended to operate on those dirty tables and + * re-clean them. So when this function is used correctly, we can unpoison + * the memory it operated on. This introduces a blind spot though, since + * if we now try to operate on __actually__ poisoned memory, we will not + * detect that. */ + __msan_unpoison(table, size * sizeof(U32)); +#endif + + for (rowNb=0 ; rowNb < nbRows ; rowNb++) { + int column; + for (column=0; columncParams.hashLog; + ZSTD_reduceTable(ms->hashTable, hSize, reducerValue); + } + + if (params->cParams.strategy != ZSTD_fast) { + U32 const chainSize = (U32)1 << params->cParams.chainLog; + if (params->cParams.strategy == ZSTD_btlazy2) + ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue); + else + ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue); + } + + if (ms->hashLog3) { + U32 const h3Size = (U32)1 << ms->hashLog3; + ZSTD_reduceTable(ms->hashTable3, h3Size, reducerValue); + } +} + + +/*-******************************************************* +* Block entropic compression +*********************************************************/ + +/* See doc/zstd_compression_format.md for detailed format description */ + +void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) +{ + const seqDef* const sequences = seqStorePtr->sequencesStart; + BYTE* const llCodeTable = seqStorePtr->llCode; + BYTE* const ofCodeTable = seqStorePtr->ofCode; + BYTE* const mlCodeTable = seqStorePtr->mlCode; + U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + U32 u; + assert(nbSeq <= seqStorePtr->maxNbSeq); + for (u=0; ulongLengthID==1) + llCodeTable[seqStorePtr->longLengthPos] = MaxLL; + if (seqStorePtr->longLengthID==2) + mlCodeTable[seqStorePtr->longLengthPos] = MaxML; +} + +/* ZSTD_useTargetCBlockSize(): + * Returns if target compressed block size param is being used. + * If used, compression will do best effort to make a compressed block size to be around targetCBlockSize. + * Returns 1 if true, 0 otherwise. */ +static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams) +{ + DEBUGLOG(5, "ZSTD_useTargetCBlockSize (targetCBlockSize=%zu)", cctxParams->targetCBlockSize); + return (cctxParams->targetCBlockSize != 0); +} + +/* ZSTD_entropyCompressSequences_internal(): + * actually compresses both literals and sequences */ +MEM_STATIC size_t +ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + void* entropyWorkspace, size_t entropyWkspSize, + const int bmi2) +{ + const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; + ZSTD_strategy const strategy = cctxParams->cParams.strategy; + unsigned* count = (unsigned*)entropyWorkspace; + FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable; + FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable; + FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable; + U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ + const seqDef* const sequences = seqStorePtr->sequencesStart; + const BYTE* const ofCodeTable = seqStorePtr->ofCode; + const BYTE* const llCodeTable = seqStorePtr->llCode; + const BYTE* const mlCodeTable = seqStorePtr->mlCode; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart; + size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + BYTE* seqHead; + BYTE* lastNCount = NULL; + + entropyWorkspace = count + (MaxSeq + 1); + entropyWkspSize -= (MaxSeq + 1) * sizeof(*count); + + DEBUGLOG(4, "ZSTD_entropyCompressSequences_internal (nbSeq=%zu)", nbSeq); + ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<= HUF_WORKSPACE_SIZE); + + /* Compress literals */ + { const BYTE* const literals = seqStorePtr->litStart; + size_t const litSize = (size_t)(seqStorePtr->lit - literals); + size_t const cSize = ZSTD_compressLiterals( + &prevEntropy->huf, &nextEntropy->huf, + cctxParams->cParams.strategy, + ZSTD_disableLiteralsCompression(cctxParams), + op, dstCapacity, + literals, litSize, + entropyWorkspace, entropyWkspSize, + bmi2); + FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed"); + assert(cSize <= dstCapacity); + op += cSize; + } + + /* Sequences Header */ + RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, + dstSize_tooSmall, "Can't fit seq hdr in output buf!"); + if (nbSeq < 128) { + *op++ = (BYTE)nbSeq; + } else if (nbSeq < LONGNBSEQ) { + op[0] = (BYTE)((nbSeq>>8) + 0x80); + op[1] = (BYTE)nbSeq; + op+=2; + } else { + op[0]=0xFF; + MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)); + op+=3; + } + assert(op <= oend); + if (nbSeq==0) { + /* Copy the old tables over as if we repeated them */ + ZSTD_memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse)); + return (size_t)(op - ostart); + } + + /* seqHead : flags for FSE encoding type */ + seqHead = op++; + assert(op <= oend); + + /* convert length/distances into codes */ + ZSTD_seqToCodes(seqStorePtr); + /* build CTable for Literal Lengths */ + { unsigned max = MaxLL; + size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ + DEBUGLOG(5, "Building LL table"); + nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode; + LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode, + count, max, mostFrequent, nbSeq, + LLFSELog, prevEntropy->fse.litlengthCTable, + LL_defaultNorm, LL_defaultNormLog, + ZSTD_defaultAllowed, strategy); + assert(set_basic < set_compressed && set_rle < set_compressed); + assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable( + op, (size_t)(oend - op), + CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, + count, max, llCodeTable, nbSeq, + LL_defaultNorm, LL_defaultNormLog, MaxLL, + prevEntropy->fse.litlengthCTable, + sizeof(prevEntropy->fse.litlengthCTable), + entropyWorkspace, entropyWkspSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed"); + if (LLtype == set_compressed) + lastNCount = op; + op += countSize; + assert(op <= oend); + } } + /* build CTable for Offsets */ + { unsigned max = MaxOff; + size_t const mostFrequent = HIST_countFast_wksp( + count, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ + /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ + ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; + DEBUGLOG(5, "Building OF table"); + nextEntropy->fse.offcode_repeatMode = prevEntropy->fse.offcode_repeatMode; + Offtype = ZSTD_selectEncodingType(&nextEntropy->fse.offcode_repeatMode, + count, max, mostFrequent, nbSeq, + OffFSELog, prevEntropy->fse.offcodeCTable, + OF_defaultNorm, OF_defaultNormLog, + defaultPolicy, strategy); + assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable( + op, (size_t)(oend - op), + CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, + count, max, ofCodeTable, nbSeq, + OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, + prevEntropy->fse.offcodeCTable, + sizeof(prevEntropy->fse.offcodeCTable), + entropyWorkspace, entropyWkspSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed"); + if (Offtype == set_compressed) + lastNCount = op; + op += countSize; + assert(op <= oend); + } } + /* build CTable for MatchLengths */ + { unsigned max = MaxML; + size_t const mostFrequent = HIST_countFast_wksp( + count, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ + DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); + nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode; + MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode, + count, max, mostFrequent, nbSeq, + MLFSELog, prevEntropy->fse.matchlengthCTable, + ML_defaultNorm, ML_defaultNormLog, + ZSTD_defaultAllowed, strategy); + assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable( + op, (size_t)(oend - op), + CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, + count, max, mlCodeTable, nbSeq, + ML_defaultNorm, ML_defaultNormLog, MaxML, + prevEntropy->fse.matchlengthCTable, + sizeof(prevEntropy->fse.matchlengthCTable), + entropyWorkspace, entropyWkspSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed"); + if (MLtype == set_compressed) + lastNCount = op; + op += countSize; + assert(op <= oend); + } } + + *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); + + { size_t const bitstreamSize = ZSTD_encodeSequences( + op, (size_t)(oend - op), + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, + longOffsets, bmi2); + FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed"); + op += bitstreamSize; + assert(op <= oend); + /* zstd versions <= 1.3.4 mistakenly report corruption when + * FSE_readNCount() receives a buffer < 4 bytes. + * Fixed by https://github.com/facebook/zstd/pull/1146. + * This can happen when the last set_compressed table present is 2 + * bytes and the bitstream is only one byte. + * In this exceedingly rare case, we will simply emit an uncompressed + * block, since it isn't worth optimizing. + */ + if (lastNCount && (op - lastNCount) < 4) { + /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ + assert(op - lastNCount == 3); + DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by " + "emitting an uncompressed block."); + return 0; + } + } + + DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart)); + return (size_t)(op - ostart); +} + +MEM_STATIC size_t +ZSTD_entropyCompressSequences(seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + size_t srcSize, + void* entropyWorkspace, size_t entropyWkspSize, + int bmi2) +{ + size_t const cSize = ZSTD_entropyCompressSequences_internal( + seqStorePtr, prevEntropy, nextEntropy, cctxParams, + dst, dstCapacity, + entropyWorkspace, entropyWkspSize, bmi2); + if (cSize == 0) return 0; + /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block. + * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block. + */ + if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity)) + return 0; /* block not compressed */ + FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSequences_internal failed"); + + /* Check compressibility */ + { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy); + if (cSize >= maxCSize) return 0; /* block not compressed */ + } + DEBUGLOG(4, "ZSTD_entropyCompressSequences() cSize: %zu\n", cSize); + return cSize; +} + +/* ZSTD_selectBlockCompressor() : + * Not static, but internal use only (used by long distance matcher) + * assumption : strat is a valid strategy */ +ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode) +{ + static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = { + { ZSTD_compressBlock_fast /* default for 0 */, + ZSTD_compressBlock_fast, + ZSTD_compressBlock_doubleFast, + ZSTD_compressBlock_greedy, + ZSTD_compressBlock_lazy, + ZSTD_compressBlock_lazy2, + ZSTD_compressBlock_btlazy2, + ZSTD_compressBlock_btopt, + ZSTD_compressBlock_btultra, + ZSTD_compressBlock_btultra2 }, + { ZSTD_compressBlock_fast_extDict /* default for 0 */, + ZSTD_compressBlock_fast_extDict, + ZSTD_compressBlock_doubleFast_extDict, + ZSTD_compressBlock_greedy_extDict, + ZSTD_compressBlock_lazy_extDict, + ZSTD_compressBlock_lazy2_extDict, + ZSTD_compressBlock_btlazy2_extDict, + ZSTD_compressBlock_btopt_extDict, + ZSTD_compressBlock_btultra_extDict, + ZSTD_compressBlock_btultra_extDict }, + { ZSTD_compressBlock_fast_dictMatchState /* default for 0 */, + ZSTD_compressBlock_fast_dictMatchState, + ZSTD_compressBlock_doubleFast_dictMatchState, + ZSTD_compressBlock_greedy_dictMatchState, + ZSTD_compressBlock_lazy_dictMatchState, + ZSTD_compressBlock_lazy2_dictMatchState, + ZSTD_compressBlock_btlazy2_dictMatchState, + ZSTD_compressBlock_btopt_dictMatchState, + ZSTD_compressBlock_btultra_dictMatchState, + ZSTD_compressBlock_btultra_dictMatchState }, + { NULL /* default for 0 */, + NULL, + NULL, + ZSTD_compressBlock_greedy_dedicatedDictSearch, + ZSTD_compressBlock_lazy_dedicatedDictSearch, + ZSTD_compressBlock_lazy2_dedicatedDictSearch, + NULL, + NULL, + NULL, + NULL } + }; + ZSTD_blockCompressor selectedCompressor; + ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1); + + assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); + selectedCompressor = blockCompressor[(int)dictMode][(int)strat]; + assert(selectedCompressor != NULL); + return selectedCompressor; +} + +static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr, + const BYTE* anchor, size_t lastLLSize) +{ + ZSTD_memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; +} + +void ZSTD_resetSeqStore(seqStore_t* ssPtr) +{ + ssPtr->lit = ssPtr->litStart; + ssPtr->sequences = ssPtr->sequencesStart; + ssPtr->longLengthID = 0; +} + +typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; + +static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) +{ + ZSTD_matchState_t* const ms = &zc->blockState.matchState; + DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)", srcSize); + assert(srcSize <= ZSTD_BLOCKSIZE_MAX); + /* Assert that we have correctly flushed the ctx params into the ms's copy */ + ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams); + if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { + if (zc->appliedParams.cParams.strategy >= ZSTD_btopt) { + ZSTD_ldm_skipRawSeqStoreBytes(&zc->externSeqStore, srcSize); + } else { + ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch); + } + return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */ + } + ZSTD_resetSeqStore(&(zc->seqStore)); + /* required for optimal parser to read stats from dictionary */ + ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy; + /* tell the optimal parser how we expect to compress literals */ + ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode; + /* a gap between an attached dict and the current window is not safe, + * they must remain adjacent, + * and when that stops being the case, the dict must be unset */ + assert(ms->dictMatchState == NULL || ms->loadedDictEnd == ms->window.dictLimit); + + /* limited update after a very long match */ + { const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + const U32 curr = (U32)(istart-base); + if (sizeof(ptrdiff_t)==8) assert(istart - base < (ptrdiff_t)(U32)(-1)); /* ensure no overflow */ + if (curr > ms->nextToUpdate + 384) + ms->nextToUpdate = curr - MIN(192, (U32)(curr - ms->nextToUpdate - 384)); + } + + /* select and store sequences */ + { ZSTD_dictMode_e const dictMode = ZSTD_matchState_dictMode(ms); + size_t lastLLSize; + { int i; + for (i = 0; i < ZSTD_REP_NUM; ++i) + zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i]; + } + if (zc->externSeqStore.pos < zc->externSeqStore.size) { + assert(!zc->appliedParams.ldmParams.enableLdm); + /* Updates ldmSeqStore.pos */ + lastLLSize = + ZSTD_ldm_blockCompress(&zc->externSeqStore, + ms, &zc->seqStore, + zc->blockState.nextCBlock->rep, + src, srcSize); + assert(zc->externSeqStore.pos <= zc->externSeqStore.size); + } else if (zc->appliedParams.ldmParams.enableLdm) { + rawSeqStore_t ldmSeqStore = kNullRawSeqStore; + + ldmSeqStore.seq = zc->ldmSequences; + ldmSeqStore.capacity = zc->maxNbLdmSequences; + /* Updates ldmSeqStore.size */ + FORWARD_IF_ERROR(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore, + &zc->appliedParams.ldmParams, + src, srcSize), ""); + /* Updates ldmSeqStore.pos */ + lastLLSize = + ZSTD_ldm_blockCompress(&ldmSeqStore, + ms, &zc->seqStore, + zc->blockState.nextCBlock->rep, + src, srcSize); + assert(ldmSeqStore.pos == ldmSeqStore.size); + } else { /* not long range mode */ + ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, dictMode); + ms->ldmSeqStore = NULL; + lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); + } + { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize; + ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize); + } } + return ZSTDbss_compress; +} + +static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) +{ + const seqStore_t* seqStore = ZSTD_getSeqStore(zc); + const seqDef* seqStoreSeqs = seqStore->sequencesStart; + size_t seqStoreSeqSize = seqStore->sequences - seqStoreSeqs; + size_t seqStoreLiteralsSize = (size_t)(seqStore->lit - seqStore->litStart); + size_t literalsRead = 0; + size_t lastLLSize; + + ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex]; + size_t i; + repcodes_t updatedRepcodes; + + assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences); + /* Ensure we have enough space for last literals "sequence" */ + assert(zc->seqCollector.maxSequences >= seqStoreSeqSize + 1); + ZSTD_memcpy(updatedRepcodes.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t)); + for (i = 0; i < seqStoreSeqSize; ++i) { + U32 rawOffset = seqStoreSeqs[i].offset - ZSTD_REP_NUM; + outSeqs[i].litLength = seqStoreSeqs[i].litLength; + outSeqs[i].matchLength = seqStoreSeqs[i].matchLength + MINMATCH; + outSeqs[i].rep = 0; + + if (i == seqStore->longLengthPos) { + if (seqStore->longLengthID == 1) { + outSeqs[i].litLength += 0x10000; + } else if (seqStore->longLengthID == 2) { + outSeqs[i].matchLength += 0x10000; + } + } + + if (seqStoreSeqs[i].offset <= ZSTD_REP_NUM) { + /* Derive the correct offset corresponding to a repcode */ + outSeqs[i].rep = seqStoreSeqs[i].offset; + if (outSeqs[i].litLength != 0) { + rawOffset = updatedRepcodes.rep[outSeqs[i].rep - 1]; + } else { + if (outSeqs[i].rep == 3) { + rawOffset = updatedRepcodes.rep[0] - 1; + } else { + rawOffset = updatedRepcodes.rep[outSeqs[i].rep]; + } + } + } + outSeqs[i].offset = rawOffset; + /* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode + so we provide seqStoreSeqs[i].offset - 1 */ + updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, + seqStoreSeqs[i].offset - 1, + seqStoreSeqs[i].litLength == 0); + literalsRead += outSeqs[i].litLength; + } + /* Insert last literals (if any exist) in the block as a sequence with ml == off == 0. + * If there are no last literals, then we'll emit (of: 0, ml: 0, ll: 0), which is a marker + * for the block boundary, according to the API. + */ + assert(seqStoreLiteralsSize >= literalsRead); + lastLLSize = seqStoreLiteralsSize - literalsRead; + outSeqs[i].litLength = (U32)lastLLSize; + outSeqs[i].matchLength = outSeqs[i].offset = outSeqs[i].rep = 0; + seqStoreSeqSize++; + zc->seqCollector.seqIndex += seqStoreSeqSize; +} + +size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, + size_t outSeqsSize, const void* src, size_t srcSize) +{ + const size_t dstCapacity = ZSTD_compressBound(srcSize); + void* dst = ZSTD_customMalloc(dstCapacity, ZSTD_defaultCMem); + SeqCollector seqCollector; + + RETURN_ERROR_IF(dst == NULL, memory_allocation, "NULL pointer!"); + + seqCollector.collectSequences = 1; + seqCollector.seqStart = outSeqs; + seqCollector.seqIndex = 0; + seqCollector.maxSequences = outSeqsSize; + zc->seqCollector = seqCollector; + + ZSTD_compress2(zc, dst, dstCapacity, src, srcSize); + ZSTD_customFree(dst, ZSTD_defaultCMem); + return zc->seqCollector.seqIndex; +} + +size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize) { + size_t in = 0; + size_t out = 0; + for (; in < seqsSize; ++in) { + if (sequences[in].offset == 0 && sequences[in].matchLength == 0) { + if (in != seqsSize - 1) { + sequences[in+1].litLength += sequences[in].litLength; + } + } else { + sequences[out] = sequences[in]; + ++out; + } + } + return out; +} + +/* Unrolled loop to read four size_ts of input at a time. Returns 1 if is RLE, 0 if not. */ +static int ZSTD_isRLE(const BYTE* src, size_t length) { + const BYTE* ip = src; + const BYTE value = ip[0]; + const size_t valueST = (size_t)((U64)value * 0x0101010101010101ULL); + const size_t unrollSize = sizeof(size_t) * 4; + const size_t unrollMask = unrollSize - 1; + const size_t prefixLength = length & unrollMask; + size_t i; + size_t u; + if (length == 1) return 1; + /* Check if prefix is RLE first before using unrolled loop */ + if (prefixLength && ZSTD_count(ip+1, ip, ip+prefixLength) != prefixLength-1) { + return 0; + } + for (i = prefixLength; i != length; i += unrollSize) { + for (u = 0; u < unrollSize; u += sizeof(size_t)) { + if (MEM_readST(ip + i + u) != valueST) { + return 0; + } + } + } + return 1; +} + +/* Returns true if the given block may be RLE. + * This is just a heuristic based on the compressibility. + * It may return both false positives and false negatives. + */ +static int ZSTD_maybeRLE(seqStore_t const* seqStore) +{ + size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart); + size_t const nbLits = (size_t)(seqStore->lit - seqStore->litStart); + + return nbSeqs < 4 && nbLits < 10; +} + +static void ZSTD_confirmRepcodesAndEntropyTables(ZSTD_CCtx* zc) +{ + ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock; + zc->blockState.prevCBlock = zc->blockState.nextCBlock; + zc->blockState.nextCBlock = tmp; +} + +static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, U32 frame) +{ + /* This the upper bound for the length of an rle block. + * This isn't the actual upper bound. Finding the real threshold + * needs further investigation. + */ + const U32 rleMaxLength = 25; + size_t cSize; + const BYTE* ip = (const BYTE*)src; + BYTE* op = (BYTE*)dst; + DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", + (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, + (unsigned)zc->blockState.matchState.nextToUpdate); + + { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); + FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); + if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; } + } + + if (zc->seqCollector.collectSequences) { + ZSTD_copyBlockSequences(zc); + ZSTD_confirmRepcodesAndEntropyTables(zc); + return 0; + } + + /* encode sequences and literals */ + cSize = ZSTD_entropyCompressSequences(&zc->seqStore, + &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, + &zc->appliedParams, + dst, dstCapacity, + srcSize, + zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, + zc->bmi2); + + if (zc->seqCollector.collectSequences) { + ZSTD_copyBlockSequences(zc); + return 0; + } + + + if (frame && + /* We don't want to emit our first block as a RLE even if it qualifies because + * doing so will cause the decoder (cli only) to throw a "should consume all input error." + * This is only an issue for zstd <= v1.4.3 + */ + !zc->isFirstBlock && + cSize < rleMaxLength && + ZSTD_isRLE(ip, srcSize)) + { + cSize = 1; + op[0] = ip[0]; + } + +out: + if (!ZSTD_isError(cSize) && cSize > 1) { + ZSTD_confirmRepcodesAndEntropyTables(zc); + } + /* We check that dictionaries have offset codes available for the first + * block. After the first block, the offcode table might not have large + * enough codes to represent the offsets in the data. + */ + if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) + zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; + + return cSize; +} + +static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const size_t bss, U32 lastBlock) +{ + DEBUGLOG(6, "Attempting ZSTD_compressSuperBlock()"); + if (bss == ZSTDbss_compress) { + if (/* We don't want to emit our first block as a RLE even if it qualifies because + * doing so will cause the decoder (cli only) to throw a "should consume all input error." + * This is only an issue for zstd <= v1.4.3 + */ + !zc->isFirstBlock && + ZSTD_maybeRLE(&zc->seqStore) && + ZSTD_isRLE((BYTE const*)src, srcSize)) + { + return ZSTD_rleCompressBlock(dst, dstCapacity, *(BYTE const*)src, srcSize, lastBlock); + } + /* Attempt superblock compression. + * + * Note that compressed size of ZSTD_compressSuperBlock() is not bound by the + * standard ZSTD_compressBound(). This is a problem, because even if we have + * space now, taking an extra byte now could cause us to run out of space later + * and violate ZSTD_compressBound(). + * + * Define blockBound(blockSize) = blockSize + ZSTD_blockHeaderSize. + * + * In order to respect ZSTD_compressBound() we must attempt to emit a raw + * uncompressed block in these cases: + * * cSize == 0: Return code for an uncompressed block. + * * cSize == dstSize_tooSmall: We may have expanded beyond blockBound(srcSize). + * ZSTD_noCompressBlock() will return dstSize_tooSmall if we are really out of + * output space. + * * cSize >= blockBound(srcSize): We have expanded the block too much so + * emit an uncompressed block. + */ + { + size_t const cSize = ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock); + if (cSize != ERROR(dstSize_tooSmall)) { + size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy); + FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed"); + if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) { + ZSTD_confirmRepcodesAndEntropyTables(zc); + return cSize; + } + } + } + } + + DEBUGLOG(6, "Resorting to ZSTD_noCompressBlock()"); + /* Superblock compression failed, attempt to emit a single no compress block. + * The decoder will be able to stream this block since it is uncompressed. + */ + return ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock); +} + +static size_t ZSTD_compressBlock_targetCBlockSize(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32 lastBlock) +{ + size_t cSize = 0; + const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); + DEBUGLOG(5, "ZSTD_compressBlock_targetCBlockSize (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u, srcSize=%zu)", + (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate, srcSize); + FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); + + cSize = ZSTD_compressBlock_targetCBlockSize_body(zc, dst, dstCapacity, src, srcSize, bss, lastBlock); + FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize_body failed"); + + if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) + zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; + + return cSize; +} + +static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, + ZSTD_cwksp* ws, + ZSTD_CCtx_params const* params, + void const* ip, + void const* iend) +{ + if (ZSTD_window_needOverflowCorrection(ms->window, iend)) { + U32 const maxDist = (U32)1 << params->cParams.windowLog; + U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy); + U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip); + ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30); + ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30); + ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31); + ZSTD_cwksp_mark_tables_dirty(ws); + ZSTD_reduceIndex(ms, params, correction); + ZSTD_cwksp_mark_tables_clean(ws); + if (ms->nextToUpdate < correction) ms->nextToUpdate = 0; + else ms->nextToUpdate -= correction; + /* invalidate dictionaries on overflow correction */ + ms->loadedDictEnd = 0; + ms->dictMatchState = NULL; + } +} + +/*! ZSTD_compress_frameChunk() : +* Compress a chunk of data into one or multiple blocks. +* All blocks will be terminated, all input will be consumed. +* Function will issue an error if there is not enough `dstCapacity` to hold the compressed content. +* Frame is supposed already started (header already produced) +* @return : compressed size, or an error code +*/ +static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32 lastFrameChunk) +{ + size_t blockSize = cctx->blockSize; + size_t remaining = srcSize; + const BYTE* ip = (const BYTE*)src; + BYTE* const ostart = (BYTE*)dst; + BYTE* op = ostart; + U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog; + + assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX); + + DEBUGLOG(4, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize); + if (cctx->appliedParams.fParams.checksumFlag && srcSize) + XXH64_update(&cctx->xxhState, src, srcSize); + + while (remaining) { + ZSTD_matchState_t* const ms = &cctx->blockState.matchState; + U32 const lastBlock = lastFrameChunk & (blockSize >= remaining); + + RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE, + dstSize_tooSmall, + "not enough space to store compressed block"); + if (remaining < blockSize) blockSize = remaining; + + ZSTD_overflowCorrectIfNeeded( + ms, &cctx->workspace, &cctx->appliedParams, ip, ip + blockSize); + ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState); + + /* Ensure hash/chain table insertion resumes no sooner than lowlimit */ + if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit; + + { size_t cSize; + if (ZSTD_useTargetCBlockSize(&cctx->appliedParams)) { + cSize = ZSTD_compressBlock_targetCBlockSize(cctx, op, dstCapacity, ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize failed"); + assert(cSize > 0); + assert(cSize <= blockSize + ZSTD_blockHeaderSize); + } else { + cSize = ZSTD_compressBlock_internal(cctx, + op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, + ip, blockSize, 1 /* frame */); + FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_internal failed"); + + if (cSize == 0) { /* block is not compressible */ + cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); + } else { + U32 const cBlockHeader = cSize == 1 ? + lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : + lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); + MEM_writeLE24(op, cBlockHeader); + cSize += ZSTD_blockHeaderSize; + } + } + + + ip += blockSize; + assert(remaining >= blockSize); + remaining -= blockSize; + op += cSize; + assert(dstCapacity >= cSize); + dstCapacity -= cSize; + cctx->isFirstBlock = 0; + DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u", + (unsigned)cSize); + } } + + if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending; + return (size_t)(op-ostart); +} + + +static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, + const ZSTD_CCtx_params* params, U64 pledgedSrcSize, U32 dictID) +{ BYTE* const op = (BYTE*)dst; + U32 const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */ + U32 const dictIDSizeCode = params->fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */ + U32 const checksumFlag = params->fParams.checksumFlag>0; + U32 const windowSize = (U32)1 << params->cParams.windowLog; + U32 const singleSegment = params->fParams.contentSizeFlag && (windowSize >= pledgedSrcSize); + BYTE const windowLogByte = (BYTE)((params->cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3); + U32 const fcsCode = params->fParams.contentSizeFlag ? + (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0; /* 0-3 */ + BYTE const frameHeaderDescriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) ); + size_t pos=0; + + assert(!(params->fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)); + RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall, + "dst buf is too small to fit worst-case frame header size."); + DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u", + !params->fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode); + if (params->format == ZSTD_f_zstd1) { + MEM_writeLE32(dst, ZSTD_MAGICNUMBER); + pos = 4; + } + op[pos++] = frameHeaderDescriptionByte; + if (!singleSegment) op[pos++] = windowLogByte; + switch(dictIDSizeCode) + { + default: assert(0); /* impossible */ + case 0 : break; + case 1 : op[pos] = (BYTE)(dictID); pos++; break; + case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break; + case 3 : MEM_writeLE32(op+pos, dictID); pos+=4; break; + } + switch(fcsCode) + { + default: assert(0); /* impossible */ + case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break; + case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break; + case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break; + case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break; + } + return pos; +} + +/* ZSTD_writeSkippableFrame_advanced() : + * Writes out a skippable frame with the specified magic number variant (16 are supported), + * from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15, and the desired source data. + * + * Returns the total number of bytes written, or a ZSTD error code. + */ +size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity, + const void* src, size_t srcSize, unsigned magicVariant) { + BYTE* op = (BYTE*)dst; + RETURN_ERROR_IF(dstCapacity < srcSize + ZSTD_SKIPPABLEHEADERSIZE /* Skippable frame overhead */, + dstSize_tooSmall, "Not enough room for skippable frame"); + RETURN_ERROR_IF(srcSize > (unsigned)0xFFFFFFFF, srcSize_wrong, "Src size too large for skippable frame"); + RETURN_ERROR_IF(magicVariant > 15, parameter_outOfBound, "Skippable frame magic number variant not supported"); + + MEM_writeLE32(op, (U32)(ZSTD_MAGIC_SKIPPABLE_START + magicVariant)); + MEM_writeLE32(op+4, (U32)srcSize); + ZSTD_memcpy(op+8, src, srcSize); + return srcSize + ZSTD_SKIPPABLEHEADERSIZE; +} + +/* ZSTD_writeLastEmptyBlock() : + * output an empty Block with end-of-frame mark to complete a frame + * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) + * or an error code if `dstCapacity` is too small (stage != ZSTDcs_init, stage_wrong, + "wrong cctx stage"); + RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm, + parameter_unsupported, + "incompatible with ldm"); + cctx->externSeqStore.seq = seq; + cctx->externSeqStore.size = nbSeq; + cctx->externSeqStore.capacity = nbSeq; + cctx->externSeqStore.pos = 0; + cctx->externSeqStore.posInSequence = 0; + return 0; +} + + +static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32 frame, U32 lastFrameChunk) +{ + ZSTD_matchState_t* const ms = &cctx->blockState.matchState; + size_t fhSize = 0; + + DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u", + cctx->stage, (unsigned)srcSize); + RETURN_ERROR_IF(cctx->stage==ZSTDcs_created, stage_wrong, + "missing init (ZSTD_compressBegin)"); + + if (frame && (cctx->stage==ZSTDcs_init)) { + fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, + cctx->pledgedSrcSizePlusOne-1, cctx->dictID); + FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed"); + assert(fhSize <= dstCapacity); + dstCapacity -= fhSize; + dst = (char*)dst + fhSize; + cctx->stage = ZSTDcs_ongoing; + } + + if (!srcSize) return fhSize; /* do not generate an empty block if no input */ + + if (!ZSTD_window_update(&ms->window, src, srcSize)) { + ms->nextToUpdate = ms->window.dictLimit; + } + if (cctx->appliedParams.ldmParams.enableLdm) { + ZSTD_window_update(&cctx->ldmState.window, src, srcSize); + } + + if (!frame) { + /* overflow check and correction for block mode */ + ZSTD_overflowCorrectIfNeeded( + ms, &cctx->workspace, &cctx->appliedParams, + src, (BYTE const*)src + srcSize); + } + + DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize); + { size_t const cSize = frame ? + ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) : + ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */); + FORWARD_IF_ERROR(cSize, "%s", frame ? "ZSTD_compress_frameChunk failed" : "ZSTD_compressBlock_internal failed"); + cctx->consumedSrcSize += srcSize; + cctx->producedCSize += (cSize + fhSize); + assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); + if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ + ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); + RETURN_ERROR_IF( + cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne, + srcSize_wrong, + "error : pledgedSrcSize = %u, while realSrcSize >= %u", + (unsigned)cctx->pledgedSrcSizePlusOne-1, + (unsigned)cctx->consumedSrcSize); + } + return cSize + fhSize; + } +} + +size_t ZSTD_compressContinue (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (unsigned)srcSize); + return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */); +} + + +size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx) +{ + ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams; + assert(!ZSTD_checkCParams(cParams)); + return MIN (ZSTD_BLOCKSIZE_MAX, (U32)1 << cParams.windowLog); +} + +size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize); + { size_t const blockSizeMax = ZSTD_getBlockSize(cctx); + RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong, "input is larger than a block"); } + + return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */); +} + +/*! ZSTD_loadDictionaryContent() : + * @return : 0, or an error code + */ +static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, + ldmState_t* ls, + ZSTD_cwksp* ws, + ZSTD_CCtx_params const* params, + const void* src, size_t srcSize, + ZSTD_dictTableLoadMethod_e dtlm) +{ + const BYTE* ip = (const BYTE*) src; + const BYTE* const iend = ip + srcSize; + + ZSTD_window_update(&ms->window, src, srcSize); + ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base); + + if (params->ldmParams.enableLdm && ls != NULL) { + ZSTD_window_update(&ls->window, src, srcSize); + ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base); + } + + /* Assert that we the ms params match the params we're being given */ + ZSTD_assertEqualCParams(params->cParams, ms->cParams); + + if (srcSize <= HASH_READ_SIZE) return 0; + + while (iend - ip > HASH_READ_SIZE) { + size_t const remaining = (size_t)(iend - ip); + size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX); + const BYTE* const ichunk = ip + chunk; + + ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, ichunk); + + if (params->ldmParams.enableLdm && ls != NULL) + ZSTD_ldm_fillHashTable(ls, (const BYTE*)src, (const BYTE*)src + srcSize, ¶ms->ldmParams); + + switch(params->cParams.strategy) + { + case ZSTD_fast: + ZSTD_fillHashTable(ms, ichunk, dtlm); + break; + case ZSTD_dfast: + ZSTD_fillDoubleHashTable(ms, ichunk, dtlm); + break; + + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + if (chunk >= HASH_READ_SIZE && ms->dedicatedDictSearch) { + assert(chunk == remaining); /* must load everything in one go */ + ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, ichunk-HASH_READ_SIZE); + } else if (chunk >= HASH_READ_SIZE) { + ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE); + } + break; + + case ZSTD_btlazy2: /* we want the dictionary table fully sorted */ + case ZSTD_btopt: + case ZSTD_btultra: + case ZSTD_btultra2: + if (chunk >= HASH_READ_SIZE) + ZSTD_updateTree(ms, ichunk-HASH_READ_SIZE, ichunk); + break; + + default: + assert(0); /* not possible : not a valid strategy id */ + } + + ip = ichunk; + } + + ms->nextToUpdate = (U32)(iend - ms->window.base); + return 0; +} + + +/* Dictionaries that assign zero probability to symbols that show up causes problems + * when FSE encoding. Mark dictionaries with zero probability symbols as FSE_repeat_check + * and only dictionaries with 100% valid symbols can be assumed valid. + */ +static FSE_repeat ZSTD_dictNCountRepeat(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) +{ + U32 s; + if (dictMaxSymbolValue < maxSymbolValue) { + return FSE_repeat_check; + } + for (s = 0; s <= maxSymbolValue; ++s) { + if (normalizedCounter[s] == 0) { + return FSE_repeat_check; + } + } + return FSE_repeat_valid; +} + +size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, + const void* const dict, size_t dictSize) +{ + short offcodeNCount[MaxOff+1]; + unsigned offcodeMaxValue = MaxOff; + const BYTE* dictPtr = (const BYTE*)dict; /* skip magic num and dict ID */ + const BYTE* const dictEnd = dictPtr + dictSize; + dictPtr += 8; + bs->entropy.huf.repeatMode = HUF_repeat_check; + + { unsigned maxSymbolValue = 255; + unsigned hasZeroWeights = 1; + size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, + dictEnd-dictPtr, &hasZeroWeights); + + /* We only set the loaded table as valid if it contains all non-zero + * weights. Otherwise, we set it to check */ + if (!hasZeroWeights) + bs->entropy.huf.repeatMode = HUF_repeat_valid; + + RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted, ""); + dictPtr += hufHeaderSize; + } + + { unsigned offcodeLog; + size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); + RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, ""); + /* fill all offset symbols to avoid garbage at end of table */ + RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( + bs->entropy.fse.offcodeCTable, + offcodeNCount, MaxOff, offcodeLog, + workspace, HUF_WORKSPACE_SIZE)), + dictionary_corrupted, ""); + /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */ + dictPtr += offcodeHeaderSize; + } + + { short matchlengthNCount[MaxML+1]; + unsigned matchlengthMaxValue = MaxML, matchlengthLog; + size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); + RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, ""); + RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( + bs->entropy.fse.matchlengthCTable, + matchlengthNCount, matchlengthMaxValue, matchlengthLog, + workspace, HUF_WORKSPACE_SIZE)), + dictionary_corrupted, ""); + bs->entropy.fse.matchlength_repeatMode = ZSTD_dictNCountRepeat(matchlengthNCount, matchlengthMaxValue, MaxML); + dictPtr += matchlengthHeaderSize; + } + + { short litlengthNCount[MaxLL+1]; + unsigned litlengthMaxValue = MaxLL, litlengthLog; + size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); + RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, ""); + RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( + bs->entropy.fse.litlengthCTable, + litlengthNCount, litlengthMaxValue, litlengthLog, + workspace, HUF_WORKSPACE_SIZE)), + dictionary_corrupted, ""); + bs->entropy.fse.litlength_repeatMode = ZSTD_dictNCountRepeat(litlengthNCount, litlengthMaxValue, MaxLL); + dictPtr += litlengthHeaderSize; + } + + RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, ""); + bs->rep[0] = MEM_readLE32(dictPtr+0); + bs->rep[1] = MEM_readLE32(dictPtr+4); + bs->rep[2] = MEM_readLE32(dictPtr+8); + dictPtr += 12; + + { size_t const dictContentSize = (size_t)(dictEnd - dictPtr); + U32 offcodeMax = MaxOff; + if (dictContentSize <= ((U32)-1) - 128 KB) { + U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */ + offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */ + } + /* All offset values <= dictContentSize + 128 KB must be representable for a valid table */ + bs->entropy.fse.offcode_repeatMode = ZSTD_dictNCountRepeat(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff)); + + /* All repCodes must be <= dictContentSize and != 0 */ + { U32 u; + for (u=0; u<3; u++) { + RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted, ""); + RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted, ""); + } } } + + return dictPtr - (const BYTE*)dict; +} + +/* Dictionary format : + * See : + * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#dictionary-format + */ +/*! ZSTD_loadZstdDictionary() : + * @return : dictID, or an error code + * assumptions : magic number supposed already checked + * dictSize supposed >= 8 + */ +static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, + ZSTD_matchState_t* ms, + ZSTD_cwksp* ws, + ZSTD_CCtx_params const* params, + const void* dict, size_t dictSize, + ZSTD_dictTableLoadMethod_e dtlm, + void* workspace) +{ + const BYTE* dictPtr = (const BYTE*)dict; + const BYTE* const dictEnd = dictPtr + dictSize; + size_t dictID; + size_t eSize; + + ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<= 8); + assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY); + + dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr + 4 /* skip magic number */ ); + eSize = ZSTD_loadCEntropy(bs, workspace, dict, dictSize); + FORWARD_IF_ERROR(eSize, "ZSTD_loadCEntropy failed"); + dictPtr += eSize; + + { + size_t const dictContentSize = (size_t)(dictEnd - dictPtr); + FORWARD_IF_ERROR(ZSTD_loadDictionaryContent( + ms, NULL, ws, params, dictPtr, dictContentSize, dtlm), ""); + } + return dictID; +} + +/** ZSTD_compress_insertDictionary() : +* @return : dictID, or an error code */ +static size_t +ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, + ZSTD_matchState_t* ms, + ldmState_t* ls, + ZSTD_cwksp* ws, + const ZSTD_CCtx_params* params, + const void* dict, size_t dictSize, + ZSTD_dictContentType_e dictContentType, + ZSTD_dictTableLoadMethod_e dtlm, + void* workspace) +{ + DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize); + if ((dict==NULL) || (dictSize<8)) { + RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, ""); + return 0; + } + + ZSTD_reset_compressedBlockState(bs); + + /* dict restricted modes */ + if (dictContentType == ZSTD_dct_rawContent) + return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm); + + if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) { + if (dictContentType == ZSTD_dct_auto) { + DEBUGLOG(4, "raw content dictionary detected"); + return ZSTD_loadDictionaryContent( + ms, ls, ws, params, dict, dictSize, dtlm); + } + RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, ""); + assert(0); /* impossible */ + } + + /* dict as full zstd dictionary */ + return ZSTD_loadZstdDictionary( + bs, ms, ws, params, dict, dictSize, dtlm, workspace); +} + +#define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB) +#define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6ULL) + +/*! ZSTD_compressBegin_internal() : + * @return : 0, or an error code */ +static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_dictContentType_e dictContentType, + ZSTD_dictTableLoadMethod_e dtlm, + const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) +{ +#if ZSTD_TRACE + cctx->traceCtx = ZSTD_trace_compress_begin(cctx); +#endif + DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog); + /* params are supposed to be fully validated at this point */ + assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); + assert(!((dict) && (cdict))); /* either dict or cdict, not both */ + if ( (cdict) + && (cdict->dictContentSize > 0) + && ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF + || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER + || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN + || cdict->compressionLevel == 0) + && (params->attachDictPref != ZSTD_dictForceLoad) ) { + return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff); + } + + FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, *params, pledgedSrcSize, + ZSTDcrp_makeClean, zbuff) , ""); + { size_t const dictID = cdict ? + ZSTD_compress_insertDictionary( + cctx->blockState.prevCBlock, &cctx->blockState.matchState, + &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent, + cdict->dictContentSize, cdict->dictContentType, dtlm, + cctx->entropyWorkspace) + : ZSTD_compress_insertDictionary( + cctx->blockState.prevCBlock, &cctx->blockState.matchState, + &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, dict, dictSize, + dictContentType, dtlm, cctx->entropyWorkspace); + FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed"); + assert(dictID <= UINT_MAX); + cctx->dictID = (U32)dictID; + cctx->dictContentSize = cdict ? cdict->dictContentSize : dictSize; + } + return 0; +} + +size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_dictContentType_e dictContentType, + ZSTD_dictTableLoadMethod_e dtlm, + const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, + unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params->cParams.windowLog); + /* compression parameters verification and optimization */ + FORWARD_IF_ERROR( ZSTD_checkCParams(params->cParams) , ""); + return ZSTD_compressBegin_internal(cctx, + dict, dictSize, dictContentType, dtlm, + cdict, + params, pledgedSrcSize, + ZSTDb_not_buffered); +} + +/*! ZSTD_compressBegin_advanced() : +* @return : 0, or an error code */ +size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_parameters params, unsigned long long pledgedSrcSize) +{ + ZSTD_CCtx_params cctxParams; + ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, ZSTD_NO_CLEVEL); + return ZSTD_compressBegin_advanced_internal(cctx, + dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, + NULL /*cdict*/, + &cctxParams, pledgedSrcSize); +} + +size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_CCtx_params cctxParams; + { + ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict); + ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel); + } + DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize); + return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, + &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered); +} + +size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel) +{ + return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel); +} + + +/*! ZSTD_writeEpilogue() : +* Ends a frame. +* @return : nb of bytes written into dst (or an error code) */ +static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) +{ + BYTE* const ostart = (BYTE*)dst; + BYTE* op = ostart; + size_t fhSize = 0; + + DEBUGLOG(4, "ZSTD_writeEpilogue"); + RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing"); + + /* special case : empty frame */ + if (cctx->stage == ZSTDcs_init) { + fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0); + FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed"); + dstCapacity -= fhSize; + op += fhSize; + cctx->stage = ZSTDcs_ongoing; + } + + if (cctx->stage != ZSTDcs_ending) { + /* write one last empty block, make it the "last" block */ + U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0; + RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for epilogue"); + MEM_writeLE32(op, cBlockHeader24); + op += ZSTD_blockHeaderSize; + dstCapacity -= ZSTD_blockHeaderSize; + } + + if (cctx->appliedParams.fParams.checksumFlag) { + U32 const checksum = (U32) XXH64_digest(&cctx->xxhState); + RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum"); + DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", (unsigned)checksum); + MEM_writeLE32(op, checksum); + op += 4; + } + + cctx->stage = ZSTDcs_created; /* return to "created but no init" status */ + return op-ostart; +} + +void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize) +{ +#if ZSTD_TRACE + if (cctx->traceCtx) { + int const streaming = cctx->inBuffSize > 0 || cctx->outBuffSize > 0 || cctx->appliedParams.nbWorkers > 0; + ZSTD_Trace trace; + ZSTD_memset(&trace, 0, sizeof(trace)); + trace.version = ZSTD_VERSION_NUMBER; + trace.streaming = streaming; + trace.dictionaryID = cctx->dictID; + trace.dictionarySize = cctx->dictContentSize; + trace.uncompressedSize = cctx->consumedSrcSize; + trace.compressedSize = cctx->producedCSize + extraCSize; + trace.params = &cctx->appliedParams; + trace.cctx = cctx; + ZSTD_trace_compress_end(cctx->traceCtx, &trace); + } + cctx->traceCtx = 0; +#else + (void)cctx; + (void)extraCSize; +#endif +} + +size_t ZSTD_compressEnd (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + size_t endResult; + size_t const cSize = ZSTD_compressContinue_internal(cctx, + dst, dstCapacity, src, srcSize, + 1 /* frame mode */, 1 /* last chunk */); + FORWARD_IF_ERROR(cSize, "ZSTD_compressContinue_internal failed"); + endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize); + FORWARD_IF_ERROR(endResult, "ZSTD_writeEpilogue failed"); + assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); + if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ + ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); + DEBUGLOG(4, "end of frame : controlling src size"); + RETURN_ERROR_IF( + cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1, + srcSize_wrong, + "error : pledgedSrcSize = %u, while realSrcSize = %u", + (unsigned)cctx->pledgedSrcSizePlusOne-1, + (unsigned)cctx->consumedSrcSize); + } + ZSTD_CCtx_trace(cctx, endResult); + return cSize + endResult; +} + +size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params) +{ + ZSTD_CCtx_params cctxParams; + DEBUGLOG(4, "ZSTD_compress_advanced"); + FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), ""); + ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, ZSTD_NO_CLEVEL); + return ZSTD_compress_advanced_internal(cctx, + dst, dstCapacity, + src, srcSize, + dict, dictSize, + &cctxParams); +} + +/* Internal */ +size_t ZSTD_compress_advanced_internal( + ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + const ZSTD_CCtx_params* params) +{ + DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize); + FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, + dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, + params, srcSize, ZSTDb_not_buffered) , ""); + return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); +} + +size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize, + int compressionLevel) +{ + ZSTD_CCtx_params cctxParams; + { + ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0, ZSTD_cpm_noAttachDict); + assert(params.fParams.contentSizeFlag == 1); + ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT: compressionLevel); + } + DEBUGLOG(4, "ZSTD_compress_usingDict (srcSize=%u)", (unsigned)srcSize); + return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctxParams); +} + +size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel) +{ + DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)", (unsigned)srcSize); + assert(cctx != NULL); + return ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel); +} + +size_t ZSTD_compress(void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel) +{ + size_t result; +#if ZSTD_COMPRESS_HEAPMODE + ZSTD_CCtx* cctx = ZSTD_createCCtx(); + RETURN_ERROR_IF(!cctx, memory_allocation, "ZSTD_createCCtx failed"); + result = ZSTD_compressCCtx(cctx, dst, dstCapacity, src, srcSize, compressionLevel); + ZSTD_freeCCtx(cctx); +#else + ZSTD_CCtx ctxBody; + ZSTD_initCCtx(&ctxBody, ZSTD_defaultCMem); + result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel); + ZSTD_freeCCtxContent(&ctxBody); /* can't free ctxBody itself, as it's on stack; free only heap content */ +#endif + return result; +} + + +/* ===== Dictionary API ===== */ + +/*! ZSTD_estimateCDictSize_advanced() : + * Estimate amount of memory that will be needed to create a dictionary with following arguments */ +size_t ZSTD_estimateCDictSize_advanced( + size_t dictSize, ZSTD_compressionParameters cParams, + ZSTD_dictLoadMethod_e dictLoadMethod) +{ + DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict)); + return ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) + + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) + + (dictLoadMethod == ZSTD_dlm_byRef ? 0 + : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *)))); +} + +size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel) +{ + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); + return ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy); +} + +size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict) +{ + if (cdict==NULL) return 0; /* support sizeof on NULL */ + DEBUGLOG(5, "sizeof(*cdict) : %u", (unsigned)sizeof(*cdict)); + /* cdict may be in the workspace */ + return (cdict->workspace.workspace == cdict ? 0 : sizeof(*cdict)) + + ZSTD_cwksp_sizeof(&cdict->workspace); +} + +static size_t ZSTD_initCDict_internal( + ZSTD_CDict* cdict, + const void* dictBuffer, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_CCtx_params params) +{ + DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (unsigned)dictContentType); + assert(!ZSTD_checkCParams(params.cParams)); + cdict->matchState.cParams = params.cParams; + cdict->matchState.dedicatedDictSearch = params.enableDedicatedDictSearch; + if (cdict->matchState.dedicatedDictSearch && dictSize > ZSTD_CHUNKSIZE_MAX) { + cdict->matchState.dedicatedDictSearch = 0; + } + if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) { + cdict->dictContent = dictBuffer; + } else { + void *internalBuffer = ZSTD_cwksp_reserve_object(&cdict->workspace, ZSTD_cwksp_align(dictSize, sizeof(void*))); + RETURN_ERROR_IF(!internalBuffer, memory_allocation, "NULL pointer!"); + cdict->dictContent = internalBuffer; + ZSTD_memcpy(internalBuffer, dictBuffer, dictSize); + } + cdict->dictContentSize = dictSize; + cdict->dictContentType = dictContentType; + + cdict->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cdict->workspace, HUF_WORKSPACE_SIZE); + + + /* Reset the state to no dictionary */ + ZSTD_reset_compressedBlockState(&cdict->cBlockState); + FORWARD_IF_ERROR(ZSTD_reset_matchState( + &cdict->matchState, + &cdict->workspace, + ¶ms.cParams, + ZSTDcrp_makeClean, + ZSTDirp_reset, + ZSTD_resetTarget_CDict), ""); + /* (Maybe) load the dictionary + * Skips loading the dictionary if it is < 8 bytes. + */ + { params.compressionLevel = ZSTD_CLEVEL_DEFAULT; + params.fParams.contentSizeFlag = 1; + { size_t const dictID = ZSTD_compress_insertDictionary( + &cdict->cBlockState, &cdict->matchState, NULL, &cdict->workspace, + ¶ms, cdict->dictContent, cdict->dictContentSize, + dictContentType, ZSTD_dtlm_full, cdict->entropyWorkspace); + FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed"); + assert(dictID <= (size_t)(U32)-1); + cdict->dictID = (U32)dictID; + } + } + + return 0; +} + +static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_compressionParameters cParams, ZSTD_customMem customMem) +{ + if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; + + { size_t const workspaceSize = + ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) + + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) + + (dictLoadMethod == ZSTD_dlm_byRef ? 0 + : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*)))); + void* const workspace = ZSTD_customMalloc(workspaceSize, customMem); + ZSTD_cwksp ws; + ZSTD_CDict* cdict; + + if (!workspace) { + ZSTD_customFree(workspace, customMem); + return NULL; + } + + ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_dynamic_alloc); + + cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict)); + assert(cdict != NULL); + ZSTD_cwksp_move(&cdict->workspace, &ws); + cdict->customMem = customMem; + cdict->compressionLevel = ZSTD_NO_CLEVEL; /* signals advanced API usage */ + + return cdict; + } +} + +ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams, + ZSTD_customMem customMem) +{ + ZSTD_CCtx_params cctxParams; + ZSTD_memset(&cctxParams, 0, sizeof(cctxParams)); + ZSTD_CCtxParams_init(&cctxParams, 0); + cctxParams.cParams = cParams; + cctxParams.customMem = customMem; + return ZSTD_createCDict_advanced2( + dictBuffer, dictSize, + dictLoadMethod, dictContentType, + &cctxParams, customMem); +} + +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2( + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + const ZSTD_CCtx_params* originalCctxParams, + ZSTD_customMem customMem) +{ + ZSTD_CCtx_params cctxParams = *originalCctxParams; + ZSTD_compressionParameters cParams; + ZSTD_CDict* cdict; + + DEBUGLOG(3, "ZSTD_createCDict_advanced2, mode %u", (unsigned)dictContentType); + if (!customMem.customAlloc ^ !customMem.customFree) return NULL; + + if (cctxParams.enableDedicatedDictSearch) { + cParams = ZSTD_dedicatedDictSearch_getCParams( + cctxParams.compressionLevel, dictSize); + ZSTD_overrideCParams(&cParams, &cctxParams.cParams); + } else { + cParams = ZSTD_getCParamsFromCCtxParams( + &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); + } + + if (!ZSTD_dedicatedDictSearch_isSupported(&cParams)) { + /* Fall back to non-DDSS params */ + cctxParams.enableDedicatedDictSearch = 0; + cParams = ZSTD_getCParamsFromCCtxParams( + &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); + } + + cctxParams.cParams = cParams; + + cdict = ZSTD_createCDict_advanced_internal(dictSize, + dictLoadMethod, cctxParams.cParams, + customMem); + + if (ZSTD_isError( ZSTD_initCDict_internal(cdict, + dict, dictSize, + dictLoadMethod, dictContentType, + cctxParams) )) { + ZSTD_freeCDict(cdict); + return NULL; + } + + return cdict; +} + +ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); + ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize, + ZSTD_dlm_byCopy, ZSTD_dct_auto, + cParams, ZSTD_defaultCMem); + if (cdict) + cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel; + return cdict; +} + +ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); + ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize, + ZSTD_dlm_byRef, ZSTD_dct_auto, + cParams, ZSTD_defaultCMem); + if (cdict) + cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel; + return cdict; +} + +size_t ZSTD_freeCDict(ZSTD_CDict* cdict) +{ + if (cdict==NULL) return 0; /* support free on NULL */ + { ZSTD_customMem const cMem = cdict->customMem; + int cdictInWorkspace = ZSTD_cwksp_owns_buffer(&cdict->workspace, cdict); + ZSTD_cwksp_free(&cdict->workspace, cMem); + if (!cdictInWorkspace) { + ZSTD_customFree(cdict, cMem); + } + return 0; + } +} + +/*! ZSTD_initStaticCDict_advanced() : + * Generate a digested dictionary in provided memory area. + * workspace: The memory area to emplace the dictionary into. + * Provided pointer must 8-bytes aligned. + * It must outlive dictionary usage. + * workspaceSize: Use ZSTD_estimateCDictSize() + * to determine how large workspace must be. + * cParams : use ZSTD_getCParams() to transform a compression level + * into its relevants cParams. + * @return : pointer to ZSTD_CDict*, or NULL if error (size too small) + * Note : there is no corresponding "free" function. + * Since workspace was allocated externally, it must be freed externally. + */ +const ZSTD_CDict* ZSTD_initStaticCDict( + void* workspace, size_t workspaceSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams) +{ + size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0); + size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + + (dictLoadMethod == ZSTD_dlm_byRef ? 0 + : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*)))) + + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) + + matchStateSize; + ZSTD_CDict* cdict; + ZSTD_CCtx_params params; + + if ((size_t)workspace & 7) return NULL; /* 8-aligned */ + + { + ZSTD_cwksp ws; + ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_static_alloc); + cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict)); + if (cdict == NULL) return NULL; + ZSTD_cwksp_move(&cdict->workspace, &ws); + } + + DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u", + (unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize)); + if (workspaceSize < neededSize) return NULL; + + ZSTD_CCtxParams_init(¶ms, 0); + params.cParams = cParams; + + if (ZSTD_isError( ZSTD_initCDict_internal(cdict, + dict, dictSize, + dictLoadMethod, dictContentType, + params) )) + return NULL; + + return cdict; +} + +ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict) +{ + assert(cdict != NULL); + return cdict->matchState.cParams; +} + +/*! ZSTD_getDictID_fromCDict() : + * Provides the dictID of the dictionary loaded into `cdict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict) +{ + if (cdict==NULL) return 0; + return cdict->dictID; +} + + +/* ZSTD_compressBegin_usingCDict_advanced() : + * cdict must be != NULL */ +size_t ZSTD_compressBegin_usingCDict_advanced( + ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, + ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize) +{ + ZSTD_CCtx_params cctxParams; + DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced"); + RETURN_ERROR_IF(cdict==NULL, dictionary_wrong, "NULL pointer!"); + /* Initialize the cctxParams from the cdict */ + { + ZSTD_parameters params; + params.fParams = fParams; + params.cParams = ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF + || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER + || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN + || cdict->compressionLevel == 0 ) ? + ZSTD_getCParamsFromCDict(cdict) + : ZSTD_getCParams(cdict->compressionLevel, + pledgedSrcSize, + cdict->dictContentSize); + ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, cdict->compressionLevel); + } + /* Increase window log to fit the entire dictionary and source if the + * source size is known. Limit the increase to 19, which is the + * window log for compression level 1 with the largest source size. + */ + if (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN) { + U32 const limitedSrcSize = (U32)MIN(pledgedSrcSize, 1U << 19); + U32 const limitedSrcLog = limitedSrcSize > 1 ? ZSTD_highbit32(limitedSrcSize - 1) + 1 : 1; + cctxParams.cParams.windowLog = MAX(cctxParams.cParams.windowLog, limitedSrcLog); + } + return ZSTD_compressBegin_internal(cctx, + NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, + cdict, + &cctxParams, pledgedSrcSize, + ZSTDb_not_buffered); +} + +/* ZSTD_compressBegin_usingCDict() : + * pledgedSrcSize=0 means "unknown" + * if pledgedSrcSize>0, it will enable contentSizeFlag */ +size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) +{ + ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; + DEBUGLOG(4, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u", !fParams.noDictIDFlag); + return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN); +} + +size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict, ZSTD_frameParameters fParams) +{ + FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize), ""); /* will check if cdict != NULL */ + return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); +} + +/*! ZSTD_compress_usingCDict() : + * Compression using a digested Dictionary. + * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times. + * Note that compression parameters are decided at CDict creation time + * while frame parameters are hardcoded */ +size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict) +{ + ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; + return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, fParams); +} + + + +/* ****************************************************************** +* Streaming +********************************************************************/ + +ZSTD_CStream* ZSTD_createCStream(void) +{ + DEBUGLOG(3, "ZSTD_createCStream"); + return ZSTD_createCStream_advanced(ZSTD_defaultCMem); +} + +ZSTD_CStream* ZSTD_initStaticCStream(void *workspace, size_t workspaceSize) +{ + return ZSTD_initStaticCCtx(workspace, workspaceSize); +} + +ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem) +{ /* CStream and CCtx are now same object */ + return ZSTD_createCCtx_advanced(customMem); +} + +size_t ZSTD_freeCStream(ZSTD_CStream* zcs) +{ + return ZSTD_freeCCtx(zcs); /* same object */ +} + + + +/*====== Initialization ======*/ + +size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX; } + +size_t ZSTD_CStreamOutSize(void) +{ + return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ; +} + +static ZSTD_cParamMode_e ZSTD_getCParamMode(ZSTD_CDict const* cdict, ZSTD_CCtx_params const* params, U64 pledgedSrcSize) +{ + if (cdict != NULL && ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) + return ZSTD_cpm_attachDict; + else + return ZSTD_cpm_noAttachDict; +} + +/* ZSTD_resetCStream(): + * pledgedSrcSize == 0 means "unknown" */ +size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pss) +{ + /* temporary : 0 interpreted as "unknown" during transition period. + * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. + * 0 will be interpreted as "empty" in the future. + */ + U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; + DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (unsigned)pledgedSrcSize); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); + return 0; +} + +/*! ZSTD_initCStream_internal() : + * Note : for lib/compress only. Used by zstdmt_compress.c. + * Assumption 1 : params are valid + * Assumption 2 : either dict, or cdict, is defined, not both */ +size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, + unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_initCStream_internal"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); + assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); + zcs->requestedParams = *params; + assert(!((dict) && (cdict))); /* either dict or cdict, not both */ + if (dict) { + FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); + } else { + /* Dictionary is cleared if !cdict */ + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); + } + return 0; +} + +/* ZSTD_initCStream_usingCDict_advanced() : + * same as ZSTD_initCStream_usingCDict(), with control over frame parameters */ +size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, + const ZSTD_CDict* cdict, + ZSTD_frameParameters fParams, + unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); + zcs->requestedParams.fParams = fParams; + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); + return 0; +} + +/* note : cdict must outlive compression session */ +size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict) +{ + DEBUGLOG(4, "ZSTD_initCStream_usingCDict"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); + return 0; +} + + +/* ZSTD_initCStream_advanced() : + * pledgedSrcSize must be exact. + * if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. + * dict is loaded with default parameters ZSTD_dct_auto and ZSTD_dlm_byCopy. */ +size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + ZSTD_parameters params, unsigned long long pss) +{ + /* for compatibility with older programs relying on this behavior. + * Users should now specify ZSTD_CONTENTSIZE_UNKNOWN. + * This line will be removed in the future. + */ + U64 const pledgedSrcSize = (pss==0 && params.fParams.contentSizeFlag==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; + DEBUGLOG(4, "ZSTD_initCStream_advanced"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); + FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , ""); + ZSTD_CCtxParams_setZstdParams(&zcs->requestedParams, ¶ms); + FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); + return 0; +} + +size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel) +{ + DEBUGLOG(4, "ZSTD_initCStream_usingDict"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); + return 0; +} + +size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pss) +{ + /* temporary : 0 interpreted as "unknown" during transition period. + * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. + * 0 will be interpreted as "empty" in the future. + */ + U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; + DEBUGLOG(4, "ZSTD_initCStream_srcSize"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); + return 0; +} + +size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel) +{ + DEBUGLOG(4, "ZSTD_initCStream"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); + return 0; +} + +/*====== Compression ======*/ + +static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx) +{ + size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos; + if (hintInSize==0) hintInSize = cctx->blockSize; + return hintInSize; +} + +/** ZSTD_compressStream_generic(): + * internal function for all *compressStream*() variants + * non-static, because can be called from zstdmt_compress.c + * @return : hint size for next input */ +static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective const flushMode) +{ + const char* const istart = (const char*)input->src; + const char* const iend = input->size != 0 ? istart + input->size : istart; + const char* ip = input->pos != 0 ? istart + input->pos : istart; + char* const ostart = (char*)output->dst; + char* const oend = output->size != 0 ? ostart + output->size : ostart; + char* op = output->pos != 0 ? ostart + output->pos : ostart; + U32 someMoreWork = 1; + + /* check expectations */ + DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (unsigned)flushMode); + if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) { + assert(zcs->inBuff != NULL); + assert(zcs->inBuffSize > 0); + } + if (zcs->appliedParams.outBufferMode == ZSTD_bm_buffered) { + assert(zcs->outBuff != NULL); + assert(zcs->outBuffSize > 0); + } + assert(output->pos <= output->size); + assert(input->pos <= input->size); + assert((U32)flushMode <= (U32)ZSTD_e_end); + + while (someMoreWork) { + switch(zcs->streamStage) + { + case zcss_init: + RETURN_ERROR(init_missing, "call ZSTD_initCStream() first!"); + + case zcss_load: + if ( (flushMode == ZSTD_e_end) + && ( (size_t)(oend-op) >= ZSTD_compressBound(iend-ip) /* Enough output space */ + || zcs->appliedParams.outBufferMode == ZSTD_bm_stable) /* OR we are allowed to return dstSizeTooSmall */ + && (zcs->inBuffPos == 0) ) { + /* shortcut to compression pass directly into output buffer */ + size_t const cSize = ZSTD_compressEnd(zcs, + op, oend-op, ip, iend-ip); + DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize); + FORWARD_IF_ERROR(cSize, "ZSTD_compressEnd failed"); + ip = iend; + op += cSize; + zcs->frameEnded = 1; + ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + someMoreWork = 0; break; + } + /* complete loading into inBuffer in buffered mode */ + if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) { + size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos; + size_t const loaded = ZSTD_limitCopy( + zcs->inBuff + zcs->inBuffPos, toLoad, + ip, iend-ip); + zcs->inBuffPos += loaded; + if (loaded != 0) + ip += loaded; + if ( (flushMode == ZSTD_e_continue) + && (zcs->inBuffPos < zcs->inBuffTarget) ) { + /* not enough input to fill full block : stop here */ + someMoreWork = 0; break; + } + if ( (flushMode == ZSTD_e_flush) + && (zcs->inBuffPos == zcs->inToCompress) ) { + /* empty */ + someMoreWork = 0; break; + } + } + /* compress current block (note : this stage cannot be stopped in the middle) */ + DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode); + { int const inputBuffered = (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered); + void* cDst; + size_t cSize; + size_t oSize = oend-op; + size_t const iSize = inputBuffered + ? zcs->inBuffPos - zcs->inToCompress + : MIN((size_t)(iend - ip), zcs->blockSize); + if (oSize >= ZSTD_compressBound(iSize) || zcs->appliedParams.outBufferMode == ZSTD_bm_stable) + cDst = op; /* compress into output buffer, to skip flush stage */ + else + cDst = zcs->outBuff, oSize = zcs->outBuffSize; + if (inputBuffered) { + unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend); + cSize = lastBlock ? + ZSTD_compressEnd(zcs, cDst, oSize, + zcs->inBuff + zcs->inToCompress, iSize) : + ZSTD_compressContinue(zcs, cDst, oSize, + zcs->inBuff + zcs->inToCompress, iSize); + FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed"); + zcs->frameEnded = lastBlock; + /* prepare next block */ + zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize; + if (zcs->inBuffTarget > zcs->inBuffSize) + zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize; + DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u", + (unsigned)zcs->inBuffTarget, (unsigned)zcs->inBuffSize); + if (!lastBlock) + assert(zcs->inBuffTarget <= zcs->inBuffSize); + zcs->inToCompress = zcs->inBuffPos; + } else { + unsigned const lastBlock = (ip + iSize == iend); + assert(flushMode == ZSTD_e_end /* Already validated */); + cSize = lastBlock ? + ZSTD_compressEnd(zcs, cDst, oSize, ip, iSize) : + ZSTD_compressContinue(zcs, cDst, oSize, ip, iSize); + /* Consume the input prior to error checking to mirror buffered mode. */ + if (iSize > 0) + ip += iSize; + FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed"); + zcs->frameEnded = lastBlock; + if (lastBlock) + assert(ip == iend); + } + if (cDst == op) { /* no need to flush */ + op += cSize; + if (zcs->frameEnded) { + DEBUGLOG(5, "Frame completed directly in outBuffer"); + someMoreWork = 0; + ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + } + break; + } + zcs->outBuffContentSize = cSize; + zcs->outBuffFlushedSize = 0; + zcs->streamStage = zcss_flush; /* pass-through to flush stage */ + } + /* fall-through */ + case zcss_flush: + DEBUGLOG(5, "flush stage"); + assert(zcs->appliedParams.outBufferMode == ZSTD_bm_buffered); + { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; + size_t const flushed = ZSTD_limitCopy(op, (size_t)(oend-op), + zcs->outBuff + zcs->outBuffFlushedSize, toFlush); + DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u", + (unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed); + if (flushed) + op += flushed; + zcs->outBuffFlushedSize += flushed; + if (toFlush!=flushed) { + /* flush not fully completed, presumably because dst is too small */ + assert(op==oend); + someMoreWork = 0; + break; + } + zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0; + if (zcs->frameEnded) { + DEBUGLOG(5, "Frame completed on flush"); + someMoreWork = 0; + ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + break; + } + zcs->streamStage = zcss_load; + break; + } + + default: /* impossible */ + assert(0); + } + } + + input->pos = ip - istart; + output->pos = op - ostart; + if (zcs->frameEnded) return 0; + return ZSTD_nextInputSizeHint(zcs); +} + +static size_t ZSTD_nextInputSizeHint_MTorST(const ZSTD_CCtx* cctx) +{ +#ifdef ZSTD_MULTITHREAD + if (cctx->appliedParams.nbWorkers >= 1) { + assert(cctx->mtctx != NULL); + return ZSTDMT_nextInputSizeHint(cctx->mtctx); + } +#endif + return ZSTD_nextInputSizeHint(cctx); + +} + +size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input) +{ + FORWARD_IF_ERROR( ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue) , ""); + return ZSTD_nextInputSizeHint_MTorST(zcs); +} + +/* After a compression call set the expected input/output buffer. + * This is validated at the start of the next compression call. + */ +static void ZSTD_setBufferExpectations(ZSTD_CCtx* cctx, ZSTD_outBuffer const* output, ZSTD_inBuffer const* input) +{ + if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) { + cctx->expectedInBuffer = *input; + } + if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) { + cctx->expectedOutBufferSize = output->size - output->pos; + } +} + +/* Validate that the input/output buffers match the expectations set by + * ZSTD_setBufferExpectations. + */ +static size_t ZSTD_checkBufferStability(ZSTD_CCtx const* cctx, + ZSTD_outBuffer const* output, + ZSTD_inBuffer const* input, + ZSTD_EndDirective endOp) +{ + if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) { + ZSTD_inBuffer const expect = cctx->expectedInBuffer; + if (expect.src != input->src || expect.pos != input->pos || expect.size != input->size) + RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer enabled but input differs!"); + if (endOp != ZSTD_e_end) + RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer can only be used with ZSTD_e_end!"); + } + if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) { + size_t const outBufferSize = output->size - output->pos; + if (cctx->expectedOutBufferSize != outBufferSize) + RETURN_ERROR(dstBuffer_wrong, "ZSTD_c_stableOutBuffer enabled but output size differs!"); + } + return 0; +} + +static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx, + ZSTD_EndDirective endOp, + size_t inSize) { + ZSTD_CCtx_params params = cctx->requestedParams; + ZSTD_prefixDict const prefixDict = cctx->prefixDict; + FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */ + ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */ + assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */ + if (cctx->cdict) + params.compressionLevel = cctx->cdict->compressionLevel; /* let cdict take priority in terms of compression level */ + DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage"); + if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1; /* auto-fix pledgedSrcSize */ + { + size_t const dictSize = prefixDict.dict + ? prefixDict.dictSize + : (cctx->cdict ? cctx->cdict->dictContentSize : 0); + ZSTD_cParamMode_e const mode = ZSTD_getCParamMode(cctx->cdict, ¶ms, cctx->pledgedSrcSizePlusOne - 1); + params.cParams = ZSTD_getCParamsFromCCtxParams( + ¶ms, cctx->pledgedSrcSizePlusOne-1, + dictSize, mode); + } + + if (ZSTD_CParams_shouldEnableLdm(¶ms.cParams)) { + /* Enable LDM by default for optimal parser and window size >= 128MB */ + DEBUGLOG(4, "LDM enabled by default (window size >= 128MB, strategy >= btopt)"); + params.ldmParams.enableLdm = 1; + } + +#ifdef ZSTD_MULTITHREAD + if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) { + params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */ + } + if (params.nbWorkers > 0) { +#if ZSTD_TRACE + cctx->traceCtx = ZSTD_trace_compress_begin(cctx); +#endif + /* mt context creation */ + if (cctx->mtctx == NULL) { + DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u", + params.nbWorkers); + cctx->mtctx = ZSTDMT_createCCtx_advanced((U32)params.nbWorkers, cctx->customMem, cctx->pool); + RETURN_ERROR_IF(cctx->mtctx == NULL, memory_allocation, "NULL pointer!"); + } + /* mt compression */ + DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbWorkers=%u", params.nbWorkers); + FORWARD_IF_ERROR( ZSTDMT_initCStream_internal( + cctx->mtctx, + prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, + cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) , ""); + cctx->dictID = cctx->cdict ? cctx->cdict->dictID : 0; + cctx->dictContentSize = cctx->cdict ? cctx->cdict->dictContentSize : prefixDict.dictSize; + cctx->consumedSrcSize = 0; + cctx->producedCSize = 0; + cctx->streamStage = zcss_load; + cctx->appliedParams = params; + } else +#endif + { U64 const pledgedSrcSize = cctx->pledgedSrcSizePlusOne - 1; + assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, + prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, ZSTD_dtlm_fast, + cctx->cdict, + ¶ms, pledgedSrcSize, + ZSTDb_buffered) , ""); + assert(cctx->appliedParams.nbWorkers == 0); + cctx->inToCompress = 0; + cctx->inBuffPos = 0; + if (cctx->appliedParams.inBufferMode == ZSTD_bm_buffered) { + /* for small input: avoid automatic flush on reaching end of block, since + * it would require to add a 3-bytes null block to end frame + */ + cctx->inBuffTarget = cctx->blockSize + (cctx->blockSize == pledgedSrcSize); + } else { + cctx->inBuffTarget = 0; + } + cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0; + cctx->streamStage = zcss_load; + cctx->frameEnded = 0; + } + return 0; +} + +size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective endOp) +{ + DEBUGLOG(5, "ZSTD_compressStream2, endOp=%u ", (unsigned)endOp); + /* check conditions */ + RETURN_ERROR_IF(output->pos > output->size, dstSize_tooSmall, "invalid output buffer"); + RETURN_ERROR_IF(input->pos > input->size, srcSize_wrong, "invalid input buffer"); + RETURN_ERROR_IF((U32)endOp > (U32)ZSTD_e_end, parameter_outOfBound, "invalid endDirective"); + assert(cctx != NULL); + + /* transparent initialization stage */ + if (cctx->streamStage == zcss_init) { + FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, endOp, input->size), "CompressStream2 initialization failed"); + ZSTD_setBufferExpectations(cctx, output, input); /* Set initial buffer expectations now that we've initialized */ + } + /* end of transparent initialization stage */ + + FORWARD_IF_ERROR(ZSTD_checkBufferStability(cctx, output, input, endOp), "invalid buffers"); + /* compression stage */ +#ifdef ZSTD_MULTITHREAD + if (cctx->appliedParams.nbWorkers > 0) { + size_t flushMin; + if (cctx->cParamsChanged) { + ZSTDMT_updateCParams_whileCompressing(cctx->mtctx, &cctx->requestedParams); + cctx->cParamsChanged = 0; + } + for (;;) { + size_t const ipos = input->pos; + size_t const opos = output->pos; + flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp); + cctx->consumedSrcSize += (U64)(input->pos - ipos); + cctx->producedCSize += (U64)(output->pos - opos); + if ( ZSTD_isError(flushMin) + || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */ + if (flushMin == 0) + ZSTD_CCtx_trace(cctx, 0); + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); + } + FORWARD_IF_ERROR(flushMin, "ZSTDMT_compressStream_generic failed"); + + if (endOp == ZSTD_e_continue) { + /* We only require some progress with ZSTD_e_continue, not maximal progress. + * We're done if we've consumed or produced any bytes, or either buffer is + * full. + */ + if (input->pos != ipos || output->pos != opos || input->pos == input->size || output->pos == output->size) + break; + } else { + assert(endOp == ZSTD_e_flush || endOp == ZSTD_e_end); + /* We require maximal progress. We're done when the flush is complete or the + * output buffer is full. + */ + if (flushMin == 0 || output->pos == output->size) + break; + } + } + DEBUGLOG(5, "completed ZSTD_compressStream2 delegating to ZSTDMT_compressStream_generic"); + /* Either we don't require maximum forward progress, we've finished the + * flush, or we are out of output space. + */ + assert(endOp == ZSTD_e_continue || flushMin == 0 || output->pos == output->size); + ZSTD_setBufferExpectations(cctx, output, input); + return flushMin; + } +#endif + FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) , ""); + DEBUGLOG(5, "completed ZSTD_compressStream2"); + ZSTD_setBufferExpectations(cctx, output, input); + return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */ +} + +size_t ZSTD_compressStream2_simpleArgs ( + ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos, + ZSTD_EndDirective endOp) +{ + ZSTD_outBuffer output = { dst, dstCapacity, *dstPos }; + ZSTD_inBuffer input = { src, srcSize, *srcPos }; + /* ZSTD_compressStream2() will check validity of dstPos and srcPos */ + size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp); + *dstPos = output.pos; + *srcPos = input.pos; + return cErr; +} + +size_t ZSTD_compress2(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + ZSTD_bufferMode_e const originalInBufferMode = cctx->requestedParams.inBufferMode; + ZSTD_bufferMode_e const originalOutBufferMode = cctx->requestedParams.outBufferMode; + DEBUGLOG(4, "ZSTD_compress2 (srcSize=%u)", (unsigned)srcSize); + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); + /* Enable stable input/output buffers. */ + cctx->requestedParams.inBufferMode = ZSTD_bm_stable; + cctx->requestedParams.outBufferMode = ZSTD_bm_stable; + { size_t oPos = 0; + size_t iPos = 0; + size_t const result = ZSTD_compressStream2_simpleArgs(cctx, + dst, dstCapacity, &oPos, + src, srcSize, &iPos, + ZSTD_e_end); + /* Reset to the original values. */ + cctx->requestedParams.inBufferMode = originalInBufferMode; + cctx->requestedParams.outBufferMode = originalOutBufferMode; + FORWARD_IF_ERROR(result, "ZSTD_compressStream2_simpleArgs failed"); + if (result != 0) { /* compression not completed, due to lack of output space */ + assert(oPos == dstCapacity); + RETURN_ERROR(dstSize_tooSmall, ""); + } + assert(iPos == srcSize); /* all input is expected consumed */ + return oPos; + } +} + +typedef struct { + U32 idx; /* Index in array of ZSTD_Sequence */ + U32 posInSequence; /* Position within sequence at idx */ + size_t posInSrc; /* Number of bytes given by sequences provided so far */ +} ZSTD_sequencePosition; + +/* Returns a ZSTD error code if sequence is not valid */ +static size_t ZSTD_validateSequence(U32 offCode, U32 matchLength, + size_t posInSrc, U32 windowLog, size_t dictSize, U32 minMatch) { + size_t offsetBound; + U32 windowSize = 1 << windowLog; + /* posInSrc represents the amount of data the the decoder would decode up to this point. + * As long as the amount of data decoded is less than or equal to window size, offsets may be + * larger than the total length of output decoded in order to reference the dict, even larger than + * window size. After output surpasses windowSize, we're limited to windowSize offsets again. + */ + offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize; + RETURN_ERROR_IF(offCode > offsetBound + ZSTD_REP_MOVE, corruption_detected, "Offset too large!"); + RETURN_ERROR_IF(matchLength < minMatch, corruption_detected, "Matchlength too small"); + return 0; +} + +/* Returns an offset code, given a sequence's raw offset, the ongoing repcode array, and whether litLength == 0 */ +static U32 ZSTD_finalizeOffCode(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0) { + U32 offCode = rawOffset + ZSTD_REP_MOVE; + U32 repCode = 0; + + if (!ll0 && rawOffset == rep[0]) { + repCode = 1; + } else if (rawOffset == rep[1]) { + repCode = 2 - ll0; + } else if (rawOffset == rep[2]) { + repCode = 3 - ll0; + } else if (ll0 && rawOffset == rep[0] - 1) { + repCode = 3; + } + if (repCode) { + /* ZSTD_storeSeq expects a number in the range [0, 2] to represent a repcode */ + offCode = repCode - 1; + } + return offCode; +} + +/* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of + * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter. + */ +static size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, + const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, + const void* src, size_t blockSize) { + U32 idx = seqPos->idx; + BYTE const* ip = (BYTE const*)(src); + const BYTE* const iend = ip + blockSize; + repcodes_t updatedRepcodes; + U32 dictSize; + U32 litLength; + U32 matchLength; + U32 ll0; + U32 offCode; + + if (cctx->cdict) { + dictSize = (U32)cctx->cdict->dictContentSize; + } else if (cctx->prefixDict.dict) { + dictSize = (U32)cctx->prefixDict.dictSize; + } else { + dictSize = 0; + } + ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t)); + for (; (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0) && idx < inSeqsSize; ++idx) { + litLength = inSeqs[idx].litLength; + matchLength = inSeqs[idx].matchLength; + ll0 = litLength == 0; + offCode = ZSTD_finalizeOffCode(inSeqs[idx].offset, updatedRepcodes.rep, ll0); + updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0); + + DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength); + if (cctx->appliedParams.validateSequences) { + seqPos->posInSrc += litLength + matchLength; + FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc, + cctx->appliedParams.cParams.windowLog, dictSize, + cctx->appliedParams.cParams.minMatch), + "Sequence validation failed"); + } + RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation, + "Not enough memory allocated. Try adjusting ZSTD_c_minMatch."); + ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH); + ip += matchLength + litLength; + } + ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t)); + + if (inSeqs[idx].litLength) { + DEBUGLOG(6, "Storing last literals of size: %u", inSeqs[idx].litLength); + ZSTD_storeLastLiterals(&cctx->seqStore, ip, inSeqs[idx].litLength); + ip += inSeqs[idx].litLength; + seqPos->posInSrc += inSeqs[idx].litLength; + } + RETURN_ERROR_IF(ip != iend, corruption_detected, "Blocksize doesn't agree with block delimiter!"); + seqPos->idx = idx+1; + return 0; +} + +/* Returns the number of bytes to move the current read position back by. Only non-zero + * if we ended up splitting a sequence. Otherwise, it may return a ZSTD error if something + * went wrong. + * + * This function will attempt to scan through blockSize bytes represented by the sequences + * in inSeqs, storing any (partial) sequences. + * + * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to + * avoid splitting a match, or to avoid splitting a match such that it would produce a match + * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block. + */ +static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, + const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, + const void* src, size_t blockSize) { + U32 idx = seqPos->idx; + U32 startPosInSequence = seqPos->posInSequence; + U32 endPosInSequence = seqPos->posInSequence + (U32)blockSize; + size_t dictSize; + BYTE const* ip = (BYTE const*)(src); + BYTE const* iend = ip + blockSize; /* May be adjusted if we decide to process fewer than blockSize bytes */ + repcodes_t updatedRepcodes; + U32 bytesAdjustment = 0; + U32 finalMatchSplit = 0; + U32 litLength; + U32 matchLength; + U32 rawOffset; + U32 offCode; + + if (cctx->cdict) { + dictSize = cctx->cdict->dictContentSize; + } else if (cctx->prefixDict.dict) { + dictSize = cctx->prefixDict.dictSize; + } else { + dictSize = 0; + } + DEBUGLOG(5, "ZSTD_copySequencesToSeqStore: idx: %u PIS: %u blockSize: %zu", idx, startPosInSequence, blockSize); + DEBUGLOG(5, "Start seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength); + ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t)); + while (endPosInSequence && idx < inSeqsSize && !finalMatchSplit) { + const ZSTD_Sequence currSeq = inSeqs[idx]; + litLength = currSeq.litLength; + matchLength = currSeq.matchLength; + rawOffset = currSeq.offset; + + /* Modify the sequence depending on where endPosInSequence lies */ + if (endPosInSequence >= currSeq.litLength + currSeq.matchLength) { + if (startPosInSequence >= litLength) { + startPosInSequence -= litLength; + litLength = 0; + matchLength -= startPosInSequence; + } else { + litLength -= startPosInSequence; + } + /* Move to the next sequence */ + endPosInSequence -= currSeq.litLength + currSeq.matchLength; + startPosInSequence = 0; + idx++; + } else { + /* This is the final (partial) sequence we're adding from inSeqs, and endPosInSequence + does not reach the end of the match. So, we have to split the sequence */ + DEBUGLOG(6, "Require a split: diff: %u, idx: %u PIS: %u", + currSeq.litLength + currSeq.matchLength - endPosInSequence, idx, endPosInSequence); + if (endPosInSequence > litLength) { + U32 firstHalfMatchLength; + litLength = startPosInSequence >= litLength ? 0 : litLength - startPosInSequence; + firstHalfMatchLength = endPosInSequence - startPosInSequence - litLength; + if (matchLength > blockSize && firstHalfMatchLength >= cctx->appliedParams.cParams.minMatch) { + /* Only ever split the match if it is larger than the block size */ + U32 secondHalfMatchLength = currSeq.matchLength + currSeq.litLength - endPosInSequence; + if (secondHalfMatchLength < cctx->appliedParams.cParams.minMatch) { + /* Move the endPosInSequence backward so that it creates match of minMatch length */ + endPosInSequence -= cctx->appliedParams.cParams.minMatch - secondHalfMatchLength; + bytesAdjustment = cctx->appliedParams.cParams.minMatch - secondHalfMatchLength; + firstHalfMatchLength -= bytesAdjustment; + } + matchLength = firstHalfMatchLength; + /* Flag that we split the last match - after storing the sequence, exit the loop, + but keep the value of endPosInSequence */ + finalMatchSplit = 1; + } else { + /* Move the position in sequence backwards so that we don't split match, and break to store + * the last literals. We use the original currSeq.litLength as a marker for where endPosInSequence + * should go. We prefer to do this whenever it is not necessary to split the match, or if doing so + * would cause the first half of the match to be too small + */ + bytesAdjustment = endPosInSequence - currSeq.litLength; + endPosInSequence = currSeq.litLength; + break; + } + } else { + /* This sequence ends inside the literals, break to store the last literals */ + break; + } + } + /* Check if this offset can be represented with a repcode */ + { U32 ll0 = (litLength == 0); + offCode = ZSTD_finalizeOffCode(rawOffset, updatedRepcodes.rep, ll0); + updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0); + } + + if (cctx->appliedParams.validateSequences) { + seqPos->posInSrc += litLength + matchLength; + FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc, + cctx->appliedParams.cParams.windowLog, dictSize, + cctx->appliedParams.cParams.minMatch), + "Sequence validation failed"); + } + DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength); + RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation, + "Not enough memory allocated. Try adjusting ZSTD_c_minMatch."); + ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH); + ip += matchLength + litLength; + } + DEBUGLOG(5, "Ending seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength); + assert(idx == inSeqsSize || endPosInSequence <= inSeqs[idx].litLength + inSeqs[idx].matchLength); + seqPos->idx = idx; + seqPos->posInSequence = endPosInSequence; + ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t)); + + iend -= bytesAdjustment; + if (ip != iend) { + /* Store any last literals */ + U32 lastLLSize = (U32)(iend - ip); + assert(ip <= iend); + DEBUGLOG(6, "Storing last literals of size: %u", lastLLSize); + ZSTD_storeLastLiterals(&cctx->seqStore, ip, lastLLSize); + seqPos->posInSrc += lastLLSize; + } + + return bytesAdjustment; +} + +typedef size_t (*ZSTD_sequenceCopier) (ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, + const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, + const void* src, size_t blockSize); +static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode) { + ZSTD_sequenceCopier sequenceCopier = NULL; + assert(ZSTD_cParam_withinBounds(ZSTD_c_blockDelimiters, mode)); + if (mode == ZSTD_sf_explicitBlockDelimiters) { + return ZSTD_copySequencesToSeqStoreExplicitBlockDelim; + } else if (mode == ZSTD_sf_noBlockDelimiters) { + return ZSTD_copySequencesToSeqStoreNoBlockDelim; + } + assert(sequenceCopier != NULL); + return sequenceCopier; +} + +/* Compress, block-by-block, all of the sequences given. + * + * Returns the cumulative size of all compressed blocks (including their headers), otherwise a ZSTD error. + */ +static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const ZSTD_Sequence* inSeqs, size_t inSeqsSize, + const void* src, size_t srcSize) { + size_t cSize = 0; + U32 lastBlock; + size_t blockSize; + size_t compressedSeqsSize; + size_t remaining = srcSize; + ZSTD_sequencePosition seqPos = {0, 0, 0}; + + BYTE const* ip = (BYTE const*)src; + BYTE* op = (BYTE*)dst; + ZSTD_sequenceCopier sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters); + + DEBUGLOG(4, "ZSTD_compressSequences_internal srcSize: %zu, inSeqsSize: %zu", srcSize, inSeqsSize); + /* Special case: empty frame */ + if (remaining == 0) { + U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1); + RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "No room for empty frame block header"); + MEM_writeLE32(op, cBlockHeader24); + op += ZSTD_blockHeaderSize; + dstCapacity -= ZSTD_blockHeaderSize; + cSize += ZSTD_blockHeaderSize; + } + + while (remaining) { + size_t cBlockSize; + size_t additionalByteAdjustment; + lastBlock = remaining <= cctx->blockSize; + blockSize = lastBlock ? (U32)remaining : (U32)cctx->blockSize; + ZSTD_resetSeqStore(&cctx->seqStore); + DEBUGLOG(4, "Working on new block. Blocksize: %zu", blockSize); + + additionalByteAdjustment = sequenceCopier(cctx, &seqPos, inSeqs, inSeqsSize, ip, blockSize); + FORWARD_IF_ERROR(additionalByteAdjustment, "Bad sequence copy"); + blockSize -= additionalByteAdjustment; + + /* If blocks are too small, emit as a nocompress block */ + if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { + cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed"); + DEBUGLOG(4, "Block too small, writing out nocompress block: cSize: %zu", cBlockSize); + cSize += cBlockSize; + ip += blockSize; + op += cBlockSize; + remaining -= blockSize; + dstCapacity -= cBlockSize; + continue; + } + + compressedSeqsSize = ZSTD_entropyCompressSequences(&cctx->seqStore, + &cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy, + &cctx->appliedParams, + op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize, + blockSize, + cctx->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, + cctx->bmi2); + FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed"); + DEBUGLOG(4, "Compressed sequences size: %zu", compressedSeqsSize); + + if (!cctx->isFirstBlock && + ZSTD_maybeRLE(&cctx->seqStore) && + ZSTD_isRLE((BYTE const*)src, srcSize)) { + /* We don't want to emit our first block as a RLE even if it qualifies because + * doing so will cause the decoder (cli only) to throw a "should consume all input error." + * This is only an issue for zstd <= v1.4.3 + */ + compressedSeqsSize = 1; + } + + if (compressedSeqsSize == 0) { + /* ZSTD_noCompressBlock writes the block header as well */ + cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed"); + DEBUGLOG(4, "Writing out nocompress block, size: %zu", cBlockSize); + } else if (compressedSeqsSize == 1) { + cBlockSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cBlockSize, "RLE compress block failed"); + DEBUGLOG(4, "Writing out RLE block, size: %zu", cBlockSize); + } else { + U32 cBlockHeader; + /* Error checking and repcodes update */ + ZSTD_confirmRepcodesAndEntropyTables(cctx); + if (cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) + cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; + + /* Write block header into beginning of block*/ + cBlockHeader = lastBlock + (((U32)bt_compressed)<<1) + (U32)(compressedSeqsSize << 3); + MEM_writeLE24(op, cBlockHeader); + cBlockSize = ZSTD_blockHeaderSize + compressedSeqsSize; + DEBUGLOG(4, "Writing out compressed block, size: %zu", cBlockSize); + } + + cSize += cBlockSize; + DEBUGLOG(4, "cSize running total: %zu", cSize); + + if (lastBlock) { + break; + } else { + ip += blockSize; + op += cBlockSize; + remaining -= blockSize; + dstCapacity -= cBlockSize; + cctx->isFirstBlock = 0; + } + } + + return cSize; +} + +size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapacity, + const ZSTD_Sequence* inSeqs, size_t inSeqsSize, + const void* src, size_t srcSize) { + BYTE* op = (BYTE*)dst; + size_t cSize = 0; + size_t compressedBlocksSize = 0; + size_t frameHeaderSize = 0; + + /* Transparent initialization stage, same as compressStream2() */ + DEBUGLOG(3, "ZSTD_compressSequences()"); + assert(cctx != NULL); + FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_end, srcSize), "CCtx initialization failed"); + /* Begin writing output, starting with frame header */ + frameHeaderSize = ZSTD_writeFrameHeader(op, dstCapacity, &cctx->appliedParams, srcSize, cctx->dictID); + op += frameHeaderSize; + dstCapacity -= frameHeaderSize; + cSize += frameHeaderSize; + if (cctx->appliedParams.fParams.checksumFlag && srcSize) { + XXH64_update(&cctx->xxhState, src, srcSize); + } + /* cSize includes block header size and compressed sequences size */ + compressedBlocksSize = ZSTD_compressSequences_internal(cctx, + op, dstCapacity, + inSeqs, inSeqsSize, + src, srcSize); + FORWARD_IF_ERROR(compressedBlocksSize, "Compressing blocks failed!"); + cSize += compressedBlocksSize; + dstCapacity -= compressedBlocksSize; + + if (cctx->appliedParams.fParams.checksumFlag) { + U32 const checksum = (U32) XXH64_digest(&cctx->xxhState); + RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum"); + DEBUGLOG(4, "Write checksum : %08X", (unsigned)checksum); + MEM_writeLE32((char*)dst + cSize, checksum); + cSize += 4; + } + + DEBUGLOG(3, "Final compressed size: %zu", cSize); + return cSize; +} + +/*====== Finalize ======*/ + +/*! ZSTD_flushStream() : + * @return : amount of data remaining to flush */ +size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) +{ + ZSTD_inBuffer input = { NULL, 0, 0 }; + return ZSTD_compressStream2(zcs, output, &input, ZSTD_e_flush); +} + + +size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) +{ + ZSTD_inBuffer input = { NULL, 0, 0 }; + size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end); + FORWARD_IF_ERROR( remainingToFlush , "ZSTD_compressStream2 failed"); + if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */ + /* single thread mode : attempt to calculate remaining to flush more precisely */ + { size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE; + size_t const checksumSize = (size_t)(zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4); + size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize; + DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush); + return toFlush; + } +} + + +/*-===== Pre-defined compression levels =====-*/ + +#define ZSTD_MAX_CLEVEL 22 +int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; } +int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; } + +static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = { +{ /* "default" - for any srcSize > 256 KB */ + /* W, C, H, S, L, TL, strat */ + { 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */ + { 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */ + { 20, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */ + { 21, 16, 17, 1, 5, 0, ZSTD_dfast }, /* level 3 */ + { 21, 18, 18, 1, 5, 0, ZSTD_dfast }, /* level 4 */ + { 21, 18, 19, 2, 5, 2, ZSTD_greedy }, /* level 5 */ + { 21, 19, 19, 3, 5, 4, ZSTD_greedy }, /* level 6 */ + { 21, 19, 19, 3, 5, 8, ZSTD_lazy }, /* level 7 */ + { 21, 19, 19, 3, 5, 16, ZSTD_lazy2 }, /* level 8 */ + { 21, 19, 20, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */ + { 22, 20, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */ + { 22, 21, 22, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */ + { 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */ + { 22, 21, 22, 5, 5, 32, ZSTD_btlazy2 }, /* level 13 */ + { 22, 22, 23, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */ + { 22, 23, 23, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */ + { 22, 22, 22, 5, 5, 48, ZSTD_btopt }, /* level 16 */ + { 23, 23, 22, 5, 4, 64, ZSTD_btopt }, /* level 17 */ + { 23, 23, 22, 6, 3, 64, ZSTD_btultra }, /* level 18 */ + { 23, 24, 22, 7, 3,256, ZSTD_btultra2}, /* level 19 */ + { 25, 25, 23, 7, 3,256, ZSTD_btultra2}, /* level 20 */ + { 26, 26, 24, 7, 3,512, ZSTD_btultra2}, /* level 21 */ + { 27, 27, 25, 9, 3,999, ZSTD_btultra2}, /* level 22 */ +}, +{ /* for srcSize <= 256 KB */ + /* W, C, H, S, L, T, strat */ + { 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ + { 18, 13, 14, 1, 6, 0, ZSTD_fast }, /* level 1 */ + { 18, 14, 14, 1, 5, 0, ZSTD_dfast }, /* level 2 */ + { 18, 16, 16, 1, 4, 0, ZSTD_dfast }, /* level 3 */ + { 18, 16, 17, 2, 5, 2, ZSTD_greedy }, /* level 4.*/ + { 18, 18, 18, 3, 5, 2, ZSTD_greedy }, /* level 5.*/ + { 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/ + { 18, 18, 19, 4, 4, 4, ZSTD_lazy }, /* level 7 */ + { 18, 18, 19, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ + { 18, 18, 19, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ + { 18, 18, 19, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ + { 18, 18, 19, 5, 4, 12, ZSTD_btlazy2 }, /* level 11.*/ + { 18, 19, 19, 7, 4, 12, ZSTD_btlazy2 }, /* level 12.*/ + { 18, 18, 19, 4, 4, 16, ZSTD_btopt }, /* level 13 */ + { 18, 18, 19, 4, 3, 32, ZSTD_btopt }, /* level 14.*/ + { 18, 18, 19, 6, 3,128, ZSTD_btopt }, /* level 15.*/ + { 18, 19, 19, 6, 3,128, ZSTD_btultra }, /* level 16.*/ + { 18, 19, 19, 8, 3,256, ZSTD_btultra }, /* level 17.*/ + { 18, 19, 19, 6, 3,128, ZSTD_btultra2}, /* level 18.*/ + { 18, 19, 19, 8, 3,256, ZSTD_btultra2}, /* level 19.*/ + { 18, 19, 19, 10, 3,512, ZSTD_btultra2}, /* level 20.*/ + { 18, 19, 19, 12, 3,512, ZSTD_btultra2}, /* level 21.*/ + { 18, 19, 19, 13, 3,999, ZSTD_btultra2}, /* level 22.*/ +}, +{ /* for srcSize <= 128 KB */ + /* W, C, H, S, L, T, strat */ + { 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ + { 17, 12, 13, 1, 6, 0, ZSTD_fast }, /* level 1 */ + { 17, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */ + { 17, 15, 16, 2, 5, 0, ZSTD_dfast }, /* level 3 */ + { 17, 17, 17, 2, 4, 0, ZSTD_dfast }, /* level 4 */ + { 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */ + { 17, 17, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */ + { 17, 17, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */ + { 17, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ + { 17, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ + { 17, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ + { 17, 17, 17, 5, 4, 8, ZSTD_btlazy2 }, /* level 11 */ + { 17, 18, 17, 7, 4, 12, ZSTD_btlazy2 }, /* level 12 */ + { 17, 18, 17, 3, 4, 12, ZSTD_btopt }, /* level 13.*/ + { 17, 18, 17, 4, 3, 32, ZSTD_btopt }, /* level 14.*/ + { 17, 18, 17, 6, 3,256, ZSTD_btopt }, /* level 15.*/ + { 17, 18, 17, 6, 3,128, ZSTD_btultra }, /* level 16.*/ + { 17, 18, 17, 8, 3,256, ZSTD_btultra }, /* level 17.*/ + { 17, 18, 17, 10, 3,512, ZSTD_btultra }, /* level 18.*/ + { 17, 18, 17, 5, 3,256, ZSTD_btultra2}, /* level 19.*/ + { 17, 18, 17, 7, 3,512, ZSTD_btultra2}, /* level 20.*/ + { 17, 18, 17, 9, 3,512, ZSTD_btultra2}, /* level 21.*/ + { 17, 18, 17, 11, 3,999, ZSTD_btultra2}, /* level 22.*/ +}, +{ /* for srcSize <= 16 KB */ + /* W, C, H, S, L, T, strat */ + { 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ + { 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */ + { 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */ + { 14, 14, 15, 2, 4, 0, ZSTD_dfast }, /* level 3 */ + { 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4 */ + { 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/ + { 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */ + { 14, 14, 14, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */ + { 14, 14, 14, 8, 4, 8, ZSTD_lazy2 }, /* level 8.*/ + { 14, 15, 14, 5, 4, 8, ZSTD_btlazy2 }, /* level 9.*/ + { 14, 15, 14, 9, 4, 8, ZSTD_btlazy2 }, /* level 10.*/ + { 14, 15, 14, 3, 4, 12, ZSTD_btopt }, /* level 11.*/ + { 14, 15, 14, 4, 3, 24, ZSTD_btopt }, /* level 12.*/ + { 14, 15, 14, 5, 3, 32, ZSTD_btultra }, /* level 13.*/ + { 14, 15, 15, 6, 3, 64, ZSTD_btultra }, /* level 14.*/ + { 14, 15, 15, 7, 3,256, ZSTD_btultra }, /* level 15.*/ + { 14, 15, 15, 5, 3, 48, ZSTD_btultra2}, /* level 16.*/ + { 14, 15, 15, 6, 3,128, ZSTD_btultra2}, /* level 17.*/ + { 14, 15, 15, 7, 3,256, ZSTD_btultra2}, /* level 18.*/ + { 14, 15, 15, 8, 3,256, ZSTD_btultra2}, /* level 19.*/ + { 14, 15, 15, 8, 3,512, ZSTD_btultra2}, /* level 20.*/ + { 14, 15, 15, 9, 3,512, ZSTD_btultra2}, /* level 21.*/ + { 14, 15, 15, 10, 3,999, ZSTD_btultra2}, /* level 22.*/ +}, +}; + +static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(int const compressionLevel, size_t const dictSize) +{ + ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, 0, dictSize, ZSTD_cpm_createCDict); + switch (cParams.strategy) { + case ZSTD_fast: + case ZSTD_dfast: + break; + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + cParams.hashLog += ZSTD_LAZY_DDSS_BUCKET_LOG; + break; + case ZSTD_btlazy2: + case ZSTD_btopt: + case ZSTD_btultra: + case ZSTD_btultra2: + break; + } + return cParams; +} + +static int ZSTD_dedicatedDictSearch_isSupported( + ZSTD_compressionParameters const* cParams) +{ + return (cParams->strategy >= ZSTD_greedy) && (cParams->strategy <= ZSTD_lazy2); +} + +/** + * Reverses the adjustment applied to cparams when enabling dedicated dict + * search. This is used to recover the params set to be used in the working + * context. (Otherwise, those tables would also grow.) + */ +static void ZSTD_dedicatedDictSearch_revertCParams( + ZSTD_compressionParameters* cParams) { + switch (cParams->strategy) { + case ZSTD_fast: + case ZSTD_dfast: + break; + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + cParams->hashLog -= ZSTD_LAZY_DDSS_BUCKET_LOG; + break; + case ZSTD_btlazy2: + case ZSTD_btopt: + case ZSTD_btultra: + case ZSTD_btultra2: + break; + } +} + +static U64 ZSTD_getCParamRowSize(U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) +{ + switch (mode) { + case ZSTD_cpm_unknown: + case ZSTD_cpm_noAttachDict: + case ZSTD_cpm_createCDict: + break; + case ZSTD_cpm_attachDict: + dictSize = 0; + break; + default: + assert(0); + break; + } + { int const unknown = srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN; + size_t const addedSize = unknown && dictSize > 0 ? 500 : 0; + return unknown && dictSize == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : srcSizeHint+dictSize+addedSize; + } +} + +/*! ZSTD_getCParams_internal() : + * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. + * Note: srcSizeHint 0 means 0, use ZSTD_CONTENTSIZE_UNKNOWN for unknown. + * Use dictSize == 0 for unknown or unused. + * Note: `mode` controls how we treat the `dictSize`. See docs for `ZSTD_cParamMode_e`. */ +static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) +{ + U64 const rSize = ZSTD_getCParamRowSize(srcSizeHint, dictSize, mode); + U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); + int row; + DEBUGLOG(5, "ZSTD_getCParams_internal (cLevel=%i)", compressionLevel); + + /* row */ + if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT; /* 0 == default */ + else if (compressionLevel < 0) row = 0; /* entry 0 is baseline for fast mode */ + else if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL; + else row = compressionLevel; + + { ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row]; + /* acceleration factor */ + if (compressionLevel < 0) { + int const clampedCompressionLevel = MAX(ZSTD_minCLevel(), compressionLevel); + cp.targetLength = (unsigned)(-clampedCompressionLevel); + } + /* refine parameters based on srcSize & dictSize */ + return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode); + } +} + +/*! ZSTD_getCParams() : + * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. + * Size values are optional, provide 0 if not known or unused */ +ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) +{ + if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; + return ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown); +} + +/*! ZSTD_getParams() : + * same idea as ZSTD_getCParams() + * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). + * Fields of `ZSTD_frameParameters` are set to default values */ +static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) { + ZSTD_parameters params; + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, mode); + DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel); + ZSTD_memset(¶ms, 0, sizeof(params)); + params.cParams = cParams; + params.fParams.contentSizeFlag = 1; + return params; +} + +/*! ZSTD_getParams() : + * same idea as ZSTD_getCParams() + * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). + * Fields of `ZSTD_frameParameters` are set to default values */ +ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { + if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; + return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown); +} +/**** ended inlining compress/zstd_compress.c ****/ +/**** start inlining compress/zstd_double_fast.c ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/**** skipping file: zstd_compress_internal.h ****/ +/**** skipping file: zstd_double_fast.h ****/ + + +void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, + void const* end, ZSTD_dictTableLoadMethod_e dtlm) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashLarge = ms->hashTable; + U32 const hBitsL = cParams->hashLog; + U32 const mls = cParams->minMatch; + U32* const hashSmall = ms->chainTable; + U32 const hBitsS = cParams->chainLog; + const BYTE* const base = ms->window.base; + const BYTE* ip = base + ms->nextToUpdate; + const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; + const U32 fastHashFillStep = 3; + + /* Always insert every fastHashFillStep position into the hash tables. + * Insert the other positions into the large hash table if their entry + * is empty. + */ + for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) { + U32 const curr = (U32)(ip - base); + U32 i; + for (i = 0; i < fastHashFillStep; ++i) { + size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls); + size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8); + if (i == 0) + hashSmall[smHash] = curr + i; + if (i == 0 || hashLarge[lgHash] == 0) + hashLarge[lgHash] = curr + i; + /* Only load extra positions for ZSTD_dtlm_full */ + if (dtlm == ZSTD_dtlm_fast) + break; + } } +} + + +FORCE_INLINE_TEMPLATE +size_t ZSTD_compressBlock_doubleFast_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, + U32 const mls /* template */, ZSTD_dictMode_e const dictMode) +{ + ZSTD_compressionParameters const* cParams = &ms->cParams; + U32* const hashLong = ms->hashTable; + const U32 hBitsL = cParams->hashLog; + U32* const hashSmall = ms->chainTable; + const U32 hBitsS = cParams->chainLog; + const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); + /* presumes that, if there is a dictionary, it must be using Attach mode */ + const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); + const BYTE* const prefixLowest = base + prefixLowestIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - HASH_READ_SIZE; + U32 offset_1=rep[0], offset_2=rep[1]; + U32 offsetSaved = 0; + + const ZSTD_matchState_t* const dms = ms->dictMatchState; + const ZSTD_compressionParameters* const dictCParams = + dictMode == ZSTD_dictMatchState ? + &dms->cParams : NULL; + const U32* const dictHashLong = dictMode == ZSTD_dictMatchState ? + dms->hashTable : NULL; + const U32* const dictHashSmall = dictMode == ZSTD_dictMatchState ? + dms->chainTable : NULL; + const U32 dictStartIndex = dictMode == ZSTD_dictMatchState ? + dms->window.dictLimit : 0; + const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ? + dms->window.base : NULL; + const BYTE* const dictStart = dictMode == ZSTD_dictMatchState ? + dictBase + dictStartIndex : NULL; + const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ? + dms->window.nextSrc : NULL; + const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ? + prefixLowestIndex - (U32)(dictEnd - dictBase) : + 0; + const U32 dictHBitsL = dictMode == ZSTD_dictMatchState ? + dictCParams->hashLog : hBitsL; + const U32 dictHBitsS = dictMode == ZSTD_dictMatchState ? + dictCParams->chainLog : hBitsS; + const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart)); + + DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic"); + + assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState); + + /* if a dictionary is attached, it must be within window range */ + if (dictMode == ZSTD_dictMatchState) { + assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex); + } + + /* init */ + ip += (dictAndPrefixLength == 0); + if (dictMode == ZSTD_noDict) { + U32 const curr = (U32)(ip - base); + U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog); + U32 const maxRep = curr - windowLow; + if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; + if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; + } + if (dictMode == ZSTD_dictMatchState) { + /* dictMatchState repCode checks don't currently handle repCode == 0 + * disabling. */ + assert(offset_1 <= dictAndPrefixLength); + assert(offset_2 <= dictAndPrefixLength); + } + + /* Main Search Loop */ + while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ + size_t mLength; + U32 offset; + size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8); + size_t const h = ZSTD_hashPtr(ip, hBitsS, mls); + size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8); + size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls); + U32 const curr = (U32)(ip-base); + U32 const matchIndexL = hashLong[h2]; + U32 matchIndexS = hashSmall[h]; + const BYTE* matchLong = base + matchIndexL; + const BYTE* match = base + matchIndexS; + const U32 repIndex = curr + 1 - offset_1; + const BYTE* repMatch = (dictMode == ZSTD_dictMatchState + && repIndex < prefixLowestIndex) ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + hashLong[h2] = hashSmall[h] = curr; /* update hash tables */ + + /* check dictMatchState repcode */ + if (dictMode == ZSTD_dictMatchState + && ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; + mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; + ip++; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); + goto _match_stored; + } + + /* check noDict repcode */ + if ( dictMode == ZSTD_noDict + && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { + mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; + ip++; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); + goto _match_stored; + } + + if (matchIndexL > prefixLowestIndex) { + /* check prefix long match */ + if (MEM_read64(matchLong) == MEM_read64(ip)) { + mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8; + offset = (U32)(ip-matchLong); + while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ + goto _match_found; + } + } else if (dictMode == ZSTD_dictMatchState) { + /* check dictMatchState long match */ + U32 const dictMatchIndexL = dictHashLong[dictHL]; + const BYTE* dictMatchL = dictBase + dictMatchIndexL; + assert(dictMatchL < dictEnd); + + if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) { + mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8; + offset = (U32)(curr - dictMatchIndexL - dictIndexDelta); + while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */ + goto _match_found; + } } + + if (matchIndexS > prefixLowestIndex) { + /* check prefix short match */ + if (MEM_read32(match) == MEM_read32(ip)) { + goto _search_next_long; + } + } else if (dictMode == ZSTD_dictMatchState) { + /* check dictMatchState short match */ + U32 const dictMatchIndexS = dictHashSmall[dictHS]; + match = dictBase + dictMatchIndexS; + matchIndexS = dictMatchIndexS + dictIndexDelta; + + if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) { + goto _search_next_long; + } } + + ip += ((ip-anchor) >> kSearchStrength) + 1; +#if defined(__aarch64__) + PREFETCH_L1(ip+256); +#endif + continue; + +_search_next_long: + + { size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8); + size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8); + U32 const matchIndexL3 = hashLong[hl3]; + const BYTE* matchL3 = base + matchIndexL3; + hashLong[hl3] = curr + 1; + + /* check prefix long +1 match */ + if (matchIndexL3 > prefixLowestIndex) { + if (MEM_read64(matchL3) == MEM_read64(ip+1)) { + mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8; + ip++; + offset = (U32)(ip-matchL3); + while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */ + goto _match_found; + } + } else if (dictMode == ZSTD_dictMatchState) { + /* check dict long +1 match */ + U32 const dictMatchIndexL3 = dictHashLong[dictHLNext]; + const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3; + assert(dictMatchL3 < dictEnd); + if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) { + mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8; + ip++; + offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta); + while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */ + goto _match_found; + } } } + + /* if no long +1 match, explore the short match we found */ + if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) { + mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4; + offset = (U32)(curr - matchIndexS); + while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + } else { + mLength = ZSTD_count(ip+4, match+4, iend) + 4; + offset = (U32)(ip - match); + while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + } + + /* fall-through */ + +_match_found: + offset_2 = offset_1; + offset_1 = offset; + + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + +_match_stored: + /* match found */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Complementary insertion */ + /* done after iLimit test, as candidates could be > iend-8 */ + { U32 const indexToInsert = curr+2; + hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; + hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); + hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; + hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); + } + + /* check immediate repcode */ + if (dictMode == ZSTD_dictMatchState) { + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState + && repIndex2 < prefixLowestIndex ? + dictBase + repIndex2 - dictIndexDelta : + base + repIndex2; + if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4; + U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } } + + if (dictMode == ZSTD_noDict) { + while ( (ip <= ilimit) + && ( (offset_2>0) + & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { + /* store sequence */ + size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; + U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH); + ip += rLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } } } + } /* while (ip < ilimit) */ + + /* save reps for next block */ + rep[0] = offset_1 ? offset_1 : offsetSaved; + rep[1] = offset_2 ? offset_2 : offsetSaved; + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + + +size_t ZSTD_compressBlock_doubleFast( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + const U32 mls = ms->cParams.minMatch; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict); + case 5 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict); + case 6 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict); + case 7 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict); + } +} + + +size_t ZSTD_compressBlock_doubleFast_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + const U32 mls = ms->cParams.minMatch; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState); + case 5 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState); + case 6 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState); + case 7 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState); + } +} + + +static size_t ZSTD_compressBlock_doubleFast_extDict_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, + U32 const mls /* template */) +{ + ZSTD_compressionParameters const* cParams = &ms->cParams; + U32* const hashLong = ms->hashTable; + U32 const hBitsL = cParams->hashLog; + U32* const hashSmall = ms->chainTable; + U32 const hBitsS = cParams->chainLog; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ms->window.base; + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); + const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); + const U32 dictStartIndex = lowLimit; + const U32 dictLimit = ms->window.dictLimit; + const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit; + const BYTE* const prefixStart = base + prefixStartIndex; + const BYTE* const dictBase = ms->window.dictBase; + const BYTE* const dictStart = dictBase + dictStartIndex; + const BYTE* const dictEnd = dictBase + prefixStartIndex; + U32 offset_1=rep[0], offset_2=rep[1]; + + DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize); + + /* if extDict is invalidated due to maxDistance, switch to "regular" variant */ + if (prefixStartIndex == dictStartIndex) + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict); + + /* Search Loop */ + while (ip < ilimit) { /* < instead of <=, because (ip+1) */ + const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls); + const U32 matchIndex = hashSmall[hSmall]; + const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base; + const BYTE* match = matchBase + matchIndex; + + const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8); + const U32 matchLongIndex = hashLong[hLong]; + const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base; + const BYTE* matchLong = matchLongBase + matchLongIndex; + + const U32 curr = (U32)(ip-base); + const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */ + const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + size_t mLength; + hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */ + + if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */ + & (repIndex > dictStartIndex)) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; + mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; + ip++; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); + } else { + if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { + const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend; + const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart; + U32 offset; + mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8; + offset = curr - matchLongIndex; + while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + + } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) { + size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); + U32 const matchIndex3 = hashLong[h3]; + const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base; + const BYTE* match3 = match3Base + matchIndex3; + U32 offset; + hashLong[h3] = curr + 1; + if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) { + const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend; + const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart; + mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8; + ip++; + offset = curr+1 - matchIndex3; + while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */ + } else { + const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; + const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; + mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; + offset = curr - matchIndex; + while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + } + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + + } else { + ip += ((ip-anchor) >> kSearchStrength) + 1; + continue; + } } + + /* move to next sequence start */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Complementary insertion */ + /* done after iLimit test, as candidates could be > iend-8 */ + { U32 const indexToInsert = curr+2; + hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; + hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); + hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; + hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); + } + + /* check immediate repcode */ + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; + if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */ + & (repIndex2 > dictStartIndex)) + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; + U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } } } + + /* save reps for next block */ + rep[0] = offset_1; + rep[1] = offset_2; + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + + +size_t ZSTD_compressBlock_doubleFast_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + U32 const mls = ms->cParams.minMatch; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 4); + case 5 : + return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 5); + case 6 : + return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 6); + case 7 : + return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 7); + } +} +/**** ended inlining compress/zstd_double_fast.c ****/ +/**** start inlining compress/zstd_fast.c ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/**** skipping file: zstd_compress_internal.h ****/ +/**** skipping file: zstd_fast.h ****/ + + +void ZSTD_fillHashTable(ZSTD_matchState_t* ms, + const void* const end, + ZSTD_dictTableLoadMethod_e dtlm) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hBits = cParams->hashLog; + U32 const mls = cParams->minMatch; + const BYTE* const base = ms->window.base; + const BYTE* ip = base + ms->nextToUpdate; + const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; + const U32 fastHashFillStep = 3; + + /* Always insert every fastHashFillStep position into the hash table. + * Insert the other positions if their hash entry is empty. + */ + for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) { + U32 const curr = (U32)(ip - base); + size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls); + hashTable[hash0] = curr; + if (dtlm == ZSTD_dtlm_fast) continue; + /* Only load extra positions for ZSTD_dtlm_full */ + { U32 p; + for (p = 1; p < fastHashFillStep; ++p) { + size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls); + if (hashTable[hash] == 0) { /* not yet filled */ + hashTable[hash] = curr + p; + } } } } +} + + +FORCE_INLINE_TEMPLATE size_t +ZSTD_compressBlock_fast_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, + U32 const mls) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hlog = cParams->hashLog; + /* support stepSize of 0 */ + size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1; + const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + /* We check ip0 (ip + 0) and ip1 (ip + 1) each loop */ + const BYTE* ip0 = istart; + const BYTE* ip1; + const BYTE* anchor = istart; + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); + const U32 prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); + const BYTE* const prefixStart = base + prefixStartIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - HASH_READ_SIZE; + U32 offset_1=rep[0], offset_2=rep[1]; + U32 offsetSaved = 0; + + /* init */ + DEBUGLOG(5, "ZSTD_compressBlock_fast_generic"); + ip0 += (ip0 == prefixStart); + ip1 = ip0 + 1; + { U32 const curr = (U32)(ip0 - base); + U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog); + U32 const maxRep = curr - windowLow; + if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; + if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; + } + + /* Main Search Loop */ +#ifdef __INTEL_COMPILER + /* From intel 'The vector pragma indicates that the loop should be + * vectorized if it is legal to do so'. Can be used together with + * #pragma ivdep (but have opted to exclude that because intel + * warns against using it).*/ + #pragma vector always +#endif + while (ip1 < ilimit) { /* < instead of <=, because check at ip0+2 */ + size_t mLength; + BYTE const* ip2 = ip0 + 2; + size_t const h0 = ZSTD_hashPtr(ip0, hlog, mls); + U32 const val0 = MEM_read32(ip0); + size_t const h1 = ZSTD_hashPtr(ip1, hlog, mls); + U32 const val1 = MEM_read32(ip1); + U32 const current0 = (U32)(ip0-base); + U32 const current1 = (U32)(ip1-base); + U32 const matchIndex0 = hashTable[h0]; + U32 const matchIndex1 = hashTable[h1]; + BYTE const* repMatch = ip2 - offset_1; + const BYTE* match0 = base + matchIndex0; + const BYTE* match1 = base + matchIndex1; + U32 offcode; + +#if defined(__aarch64__) + PREFETCH_L1(ip0+256); +#endif + + hashTable[h0] = current0; /* update hash table */ + hashTable[h1] = current1; /* update hash table */ + + assert(ip0 + 1 == ip1); + + if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) { + mLength = (ip2[-1] == repMatch[-1]) ? 1 : 0; + ip0 = ip2 - mLength; + match0 = repMatch - mLength; + mLength += 4; + offcode = 0; + goto _match; + } + if ((matchIndex0 > prefixStartIndex) && MEM_read32(match0) == val0) { + /* found a regular match */ + goto _offset; + } + if ((matchIndex1 > prefixStartIndex) && MEM_read32(match1) == val1) { + /* found a regular match after one literal */ + ip0 = ip1; + match0 = match1; + goto _offset; + } + { size_t const step = ((size_t)(ip0-anchor) >> (kSearchStrength - 1)) + stepSize; + assert(step >= 2); + ip0 += step; + ip1 += step; + continue; + } +_offset: /* Requires: ip0, match0 */ + /* Compute the offset code */ + offset_2 = offset_1; + offset_1 = (U32)(ip0-match0); + offcode = offset_1 + ZSTD_REP_MOVE; + mLength = 4; + /* Count the backwards match length */ + while (((ip0>anchor) & (match0>prefixStart)) + && (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */ + +_match: /* Requires: ip0, match0, offcode */ + /* Count the forward length */ + mLength += ZSTD_count(ip0+mLength, match0+mLength, iend); + ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH); + /* match found */ + ip0 += mLength; + anchor = ip0; + + if (ip0 <= ilimit) { + /* Fill Table */ + assert(base+current0+2 > istart); /* check base overflow */ + hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */ + hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base); + + if (offset_2 > 0) { /* offset_2==0 means offset_2 is invalidated */ + while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) { + /* store sequence */ + size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4; + { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ + hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base); + ip0 += rLength; + ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH); + anchor = ip0; + continue; /* faster when present (confirmed on gcc-8) ... (?) */ + } } } + ip1 = ip0 + 1; + } + + /* save reps for next block */ + rep[0] = offset_1 ? offset_1 : offsetSaved; + rep[1] = offset_2 ? offset_2 : offsetSaved; + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + + +size_t ZSTD_compressBlock_fast( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + U32 const mls = ms->cParams.minMatch; + assert(ms->dictMatchState == NULL); + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4); + case 5 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5); + case 6 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6); + case 7 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7); + } +} + +FORCE_INLINE_TEMPLATE +size_t ZSTD_compressBlock_fast_dictMatchState_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, U32 const mls) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hlog = cParams->hashLog; + /* support stepSize of 0 */ + U32 const stepSize = cParams->targetLength + !(cParams->targetLength); + const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 prefixStartIndex = ms->window.dictLimit; + const BYTE* const prefixStart = base + prefixStartIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - HASH_READ_SIZE; + U32 offset_1=rep[0], offset_2=rep[1]; + U32 offsetSaved = 0; + + const ZSTD_matchState_t* const dms = ms->dictMatchState; + const ZSTD_compressionParameters* const dictCParams = &dms->cParams ; + const U32* const dictHashTable = dms->hashTable; + const U32 dictStartIndex = dms->window.dictLimit; + const BYTE* const dictBase = dms->window.base; + const BYTE* const dictStart = dictBase + dictStartIndex; + const BYTE* const dictEnd = dms->window.nextSrc; + const U32 dictIndexDelta = prefixStartIndex - (U32)(dictEnd - dictBase); + const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart); + const U32 dictHLog = dictCParams->hashLog; + + /* if a dictionary is still attached, it necessarily means that + * it is within window size. So we just check it. */ + const U32 maxDistance = 1U << cParams->windowLog; + const U32 endIndex = (U32)((size_t)(ip - base) + srcSize); + assert(endIndex - prefixStartIndex <= maxDistance); + (void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */ + + /* ensure there will be no underflow + * when translating a dict index into a local index */ + assert(prefixStartIndex >= (U32)(dictEnd - dictBase)); + + /* init */ + DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic"); + ip += (dictAndPrefixLength == 0); + /* dictMatchState repCode checks don't currently handle repCode == 0 + * disabling. */ + assert(offset_1 <= dictAndPrefixLength); + assert(offset_2 <= dictAndPrefixLength); + + /* Main Search Loop */ + while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ + size_t mLength; + size_t const h = ZSTD_hashPtr(ip, hlog, mls); + U32 const curr = (U32)(ip-base); + U32 const matchIndex = hashTable[h]; + const BYTE* match = base + matchIndex; + const U32 repIndex = curr + 1 - offset_1; + const BYTE* repMatch = (repIndex < prefixStartIndex) ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + hashTable[h] = curr; /* update hash table */ + + if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */ + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; + mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; + ip++; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); + } else if ( (matchIndex <= prefixStartIndex) ) { + size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls); + U32 const dictMatchIndex = dictHashTable[dictHash]; + const BYTE* dictMatch = dictBase + dictMatchIndex; + if (dictMatchIndex <= dictStartIndex || + MEM_read32(dictMatch) != MEM_read32(ip)) { + assert(stepSize >= 1); + ip += ((ip-anchor) >> kSearchStrength) + stepSize; + continue; + } else { + /* found a dict match */ + U32 const offset = (U32)(curr-dictMatchIndex-dictIndexDelta); + mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4; + while (((ip>anchor) & (dictMatch>dictStart)) + && (ip[-1] == dictMatch[-1])) { + ip--; dictMatch--; mLength++; + } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + } + } else if (MEM_read32(match) != MEM_read32(ip)) { + /* it's not a match, and we're not going to check the dictionary */ + assert(stepSize >= 1); + ip += ((ip-anchor) >> kSearchStrength) + stepSize; + continue; + } else { + /* found a regular match */ + U32 const offset = (U32)(ip-match); + mLength = ZSTD_count(ip+4, match+4, iend) + 4; + while (((ip>anchor) & (match>prefixStart)) + && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + } + + /* match found */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Fill Table */ + assert(base+curr+2 > istart); /* check base overflow */ + hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; /* here because curr+2 could be > iend-8 */ + hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); + + /* check immediate repcode */ + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? + dictBase - dictIndexDelta + repIndex2 : + base + repIndex2; + if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; + U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); + hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } + } + } + + /* save reps for next block */ + rep[0] = offset_1 ? offset_1 : offsetSaved; + rep[1] = offset_2 ? offset_2 : offsetSaved; + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + +size_t ZSTD_compressBlock_fast_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + U32 const mls = ms->cParams.minMatch; + assert(ms->dictMatchState != NULL); + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 4); + case 5 : + return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 5); + case 6 : + return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 6); + case 7 : + return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 7); + } +} + + +static size_t ZSTD_compressBlock_fast_extDict_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, U32 const mls) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hlog = cParams->hashLog; + /* support stepSize of 0 */ + U32 const stepSize = cParams->targetLength + !(cParams->targetLength); + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); + const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); + const U32 dictStartIndex = lowLimit; + const BYTE* const dictStart = dictBase + dictStartIndex; + const U32 dictLimit = ms->window.dictLimit; + const U32 prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit; + const BYTE* const prefixStart = base + prefixStartIndex; + const BYTE* const dictEnd = dictBase + prefixStartIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + U32 offset_1=rep[0], offset_2=rep[1]; + + DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1); + + /* switch to "regular" variant if extDict is invalidated due to maxDistance */ + if (prefixStartIndex == dictStartIndex) + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls); + + /* Search Loop */ + while (ip < ilimit) { /* < instead of <=, because (ip+1) */ + const size_t h = ZSTD_hashPtr(ip, hlog, mls); + const U32 matchIndex = hashTable[h]; + const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base; + const BYTE* match = matchBase + matchIndex; + const U32 curr = (U32)(ip-base); + const U32 repIndex = curr + 1 - offset_1; + const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + hashTable[h] = curr; /* update hash table */ + DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr); + assert(offset_1 <= curr +1); /* check repIndex */ + + if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex)) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; + size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4; + ip++; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength-MINMATCH); + ip += rLength; + anchor = ip; + } else { + if ( (matchIndex < dictStartIndex) || + (MEM_read32(match) != MEM_read32(ip)) ) { + assert(stepSize >= 1); + ip += ((ip-anchor) >> kSearchStrength) + stepSize; + continue; + } + { const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; + const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; + U32 const offset = curr - matchIndex; + size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; + while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + offset_2 = offset_1; offset_1 = offset; /* update offset history */ + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ip += mLength; + anchor = ip; + } } + + if (ip <= ilimit) { + /* Fill Table */ + hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; + hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); + /* check immediate repcode */ + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; + if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex)) /* intentional overflow */ + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; + { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2-MINMATCH); + hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } } } + + /* save reps for next block */ + rep[0] = offset_1; + rep[1] = offset_2; + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + + +size_t ZSTD_compressBlock_fast_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + U32 const mls = ms->cParams.minMatch; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 4); + case 5 : + return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 5); + case 6 : + return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 6); + case 7 : + return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 7); + } +} +/**** ended inlining compress/zstd_fast.c ****/ +/**** start inlining compress/zstd_lazy.c ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/**** skipping file: zstd_compress_internal.h ****/ +/**** skipping file: zstd_lazy.h ****/ + + +/*-************************************* +* Binary Tree search +***************************************/ + +static void +ZSTD_updateDUBT(ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* iend, + U32 mls) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hashLog = cParams->hashLog; + + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + + const BYTE* const base = ms->window.base; + U32 const target = (U32)(ip - base); + U32 idx = ms->nextToUpdate; + + if (idx != target) + DEBUGLOG(7, "ZSTD_updateDUBT, from %u to %u (dictLimit:%u)", + idx, target, ms->window.dictLimit); + assert(ip + 8 <= iend); /* condition for ZSTD_hashPtr */ + (void)iend; + + assert(idx >= ms->window.dictLimit); /* condition for valid base+idx */ + for ( ; idx < target ; idx++) { + size_t const h = ZSTD_hashPtr(base + idx, hashLog, mls); /* assumption : ip + 8 <= iend */ + U32 const matchIndex = hashTable[h]; + + U32* const nextCandidatePtr = bt + 2*(idx&btMask); + U32* const sortMarkPtr = nextCandidatePtr + 1; + + DEBUGLOG(8, "ZSTD_updateDUBT: insert %u", idx); + hashTable[h] = idx; /* Update Hash Table */ + *nextCandidatePtr = matchIndex; /* update BT like a chain */ + *sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK; + } + ms->nextToUpdate = target; +} + + +/** ZSTD_insertDUBT1() : + * sort one already inserted but unsorted position + * assumption : curr >= btlow == (curr - btmask) + * doesn't fail */ +static void +ZSTD_insertDUBT1(ZSTD_matchState_t* ms, + U32 curr, const BYTE* inputEnd, + U32 nbCompares, U32 btLow, + const ZSTD_dictMode_e dictMode) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const ip = (curr>=dictLimit) ? base + curr : dictBase + curr; + const BYTE* const iend = (curr>=dictLimit) ? inputEnd : dictBase + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* match; + U32* smallerPtr = bt + 2*(curr&btMask); + U32* largerPtr = smallerPtr + 1; + U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */ + U32 dummy32; /* to be nullified at the end */ + U32 const windowValid = ms->window.lowLimit; + U32 const maxDistance = 1U << cParams->windowLog; + U32 const windowLow = (curr - windowValid > maxDistance) ? curr - maxDistance : windowValid; + + + DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)", + curr, dictLimit, windowLow); + assert(curr >= btLow); + assert(ip < iend); /* condition for ZSTD_count */ + + while (nbCompares-- && (matchIndex > windowLow)) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + assert(matchIndex < curr); + /* note : all candidates are now supposed sorted, + * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK + * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */ + + if ( (dictMode != ZSTD_extDict) + || (matchIndex+matchLength >= dictLimit) /* both in current segment*/ + || (curr < dictLimit) /* both in extDict */) { + const BYTE* const mBase = ( (dictMode != ZSTD_extDict) + || (matchIndex+matchLength >= dictLimit)) ? + base : dictBase; + assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */ + || (curr < dictLimit) ); + match = mBase + matchIndex; + matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* preparation for next read of match[matchLength] */ + } + + DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ", + curr, matchIndex, (U32)matchLength); + + if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ + break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */ + } + + if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */ + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */ + DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u", + matchIndex, btLow, nextPtr[1]); + smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */ + matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */ + DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u", + matchIndex, btLow, nextPtr[0]); + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; +} + + +static size_t +ZSTD_DUBT_findBetterDictMatch ( + ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iend, + size_t* offsetPtr, + size_t bestLength, + U32 nbCompares, + U32 const mls, + const ZSTD_dictMode_e dictMode) +{ + const ZSTD_matchState_t * const dms = ms->dictMatchState; + const ZSTD_compressionParameters* const dmsCParams = &dms->cParams; + const U32 * const dictHashTable = dms->hashTable; + U32 const hashLog = dmsCParams->hashLog; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32 dictMatchIndex = dictHashTable[h]; + + const BYTE* const base = ms->window.base; + const BYTE* const prefixStart = base + ms->window.dictLimit; + U32 const curr = (U32)(ip-base); + const BYTE* const dictBase = dms->window.base; + const BYTE* const dictEnd = dms->window.nextSrc; + U32 const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base); + U32 const dictLowLimit = dms->window.lowLimit; + U32 const dictIndexDelta = ms->window.lowLimit - dictHighLimit; + + U32* const dictBt = dms->chainTable; + U32 const btLog = dmsCParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + U32 const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask; + + size_t commonLengthSmaller=0, commonLengthLarger=0; + + (void)dictMode; + assert(dictMode == ZSTD_dictMatchState); + + while (nbCompares-- && (dictMatchIndex > dictLowLimit)) { + U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + const BYTE* match = dictBase + dictMatchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (dictMatchIndex+matchLength >= dictHighLimit) + match = base + dictMatchIndex + dictIndexDelta; /* to prepare for next usage of match[matchLength] */ + + if (matchLength > bestLength) { + U32 matchIndex = dictMatchIndex + dictIndexDelta; + if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) { + DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)", + curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + curr - matchIndex, dictMatchIndex, matchIndex); + bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex; + } + if (ip+matchLength == iend) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */ + break; /* drop, to guarantee consistency (miss a little bit of compression) */ + } + } + + if (match[matchLength] < ip[matchLength]) { + if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } else { + /* match is larger than current */ + if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */ + commonLengthLarger = matchLength; + dictMatchIndex = nextPtr[0]; + } + } + + if (bestLength >= MINMATCH) { + U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex; + DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)", + curr, (U32)bestLength, (U32)*offsetPtr, mIndex); + } + return bestLength; + +} + + +static size_t +ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iend, + size_t* offsetPtr, + U32 const mls, + const ZSTD_dictMode_e dictMode) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hashLog = cParams->hashLog; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32 matchIndex = hashTable[h]; + + const BYTE* const base = ms->window.base; + U32 const curr = (U32)(ip-base); + U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog); + + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + U32 const btLow = (btMask >= curr) ? 0 : curr - btMask; + U32 const unsortLimit = MAX(btLow, windowLow); + + U32* nextCandidate = bt + 2*(matchIndex&btMask); + U32* unsortedMark = bt + 2*(matchIndex&btMask) + 1; + U32 nbCompares = 1U << cParams->searchLog; + U32 nbCandidates = nbCompares; + U32 previousCandidate = 0; + + DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", curr); + assert(ip <= iend-8); /* required for h calculation */ + assert(dictMode != ZSTD_dedicatedDictSearch); + + /* reach end of unsorted candidates list */ + while ( (matchIndex > unsortLimit) + && (*unsortedMark == ZSTD_DUBT_UNSORTED_MARK) + && (nbCandidates > 1) ) { + DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted", + matchIndex); + *unsortedMark = previousCandidate; /* the unsortedMark becomes a reversed chain, to move up back to original position */ + previousCandidate = matchIndex; + matchIndex = *nextCandidate; + nextCandidate = bt + 2*(matchIndex&btMask); + unsortedMark = bt + 2*(matchIndex&btMask) + 1; + nbCandidates --; + } + + /* nullify last candidate if it's still unsorted + * simplification, detrimental to compression ratio, beneficial for speed */ + if ( (matchIndex > unsortLimit) + && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) { + DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u", + matchIndex); + *nextCandidate = *unsortedMark = 0; + } + + /* batch sort stacked candidates */ + matchIndex = previousCandidate; + while (matchIndex) { /* will end on matchIndex == 0 */ + U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1; + U32 const nextCandidateIdx = *nextCandidateIdxPtr; + ZSTD_insertDUBT1(ms, matchIndex, iend, + nbCandidates, unsortLimit, dictMode); + matchIndex = nextCandidateIdx; + nbCandidates++; + } + + /* find longest match */ + { size_t commonLengthSmaller = 0, commonLengthLarger = 0; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + U32* smallerPtr = bt + 2*(curr&btMask); + U32* largerPtr = bt + 2*(curr&btMask) + 1; + U32 matchEndIdx = curr + 8 + 1; + U32 dummy32; /* to be nullified at the end */ + size_t bestLength = 0; + + matchIndex = hashTable[h]; + hashTable[h] = curr; /* Update Hash Table */ + + while (nbCompares-- && (matchIndex > windowLow)) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + const BYTE* match; + + if ((dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) { + match = base + matchIndex; + matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ + } + + if (matchLength > bestLength) { + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) + bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex; + if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ + if (dictMode == ZSTD_dictMatchState) { + nbCompares = 0; /* in addition to avoiding checking any + * further in this loop, make sure we + * skip checking in the dictionary. */ + } + break; /* drop, to guarantee consistency (miss a little bit of compression) */ + } + } + + if (match[matchLength] < ip[matchLength]) { + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ + matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; + + if (dictMode == ZSTD_dictMatchState && nbCompares) { + bestLength = ZSTD_DUBT_findBetterDictMatch( + ms, ip, iend, + offsetPtr, bestLength, nbCompares, + mls, dictMode); + } + + assert(matchEndIdx > curr+8); /* ensure nextToUpdate is increased */ + ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ + if (bestLength >= MINMATCH) { + U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex; + DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)", + curr, (U32)bestLength, (U32)*offsetPtr, mIndex); + } + return bestLength; + } +} + + +/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */ +FORCE_INLINE_TEMPLATE size_t +ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 mls /* template */, + const ZSTD_dictMode_e dictMode) +{ + DEBUGLOG(7, "ZSTD_BtFindBestMatch"); + if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */ + ZSTD_updateDUBT(ms, ip, iLimit, mls); + return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode); +} + + +static size_t +ZSTD_BtFindBestMatch_selectMLS ( ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict); + case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict); + case 7 : + case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict); + } +} + + +static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState); + case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState); + case 7 : + case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState); + } +} + + +static size_t ZSTD_BtFindBestMatch_extDict_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict); + case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict); + case 7 : + case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict); + } +} + + + +/* ********************************* +* Hash Chain +***********************************/ +#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & (mask)] + +/* Update chains up to ip (excluded) + Assumption : always within prefix (i.e. not within extDict) */ +FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal( + ZSTD_matchState_t* ms, + const ZSTD_compressionParameters* const cParams, + const BYTE* ip, U32 const mls) +{ + U32* const hashTable = ms->hashTable; + const U32 hashLog = cParams->hashLog; + U32* const chainTable = ms->chainTable; + const U32 chainMask = (1 << cParams->chainLog) - 1; + const BYTE* const base = ms->window.base; + const U32 target = (U32)(ip - base); + U32 idx = ms->nextToUpdate; + + while(idx < target) { /* catch up */ + size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls); + NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; + hashTable[h] = idx; + idx++; + } + + ms->nextToUpdate = target; + return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; +} + +U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) { + const ZSTD_compressionParameters* const cParams = &ms->cParams; + return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch); +} + +void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip) +{ + const BYTE* const base = ms->window.base; + U32 const target = (U32)(ip - base); + U32* const hashTable = ms->hashTable; + U32* const chainTable = ms->chainTable; + U32 const chainSize = 1 << ms->cParams.chainLog; + U32 idx = ms->nextToUpdate; + U32 const minChain = chainSize < target ? target - chainSize : idx; + U32 const bucketSize = 1 << ZSTD_LAZY_DDSS_BUCKET_LOG; + U32 const cacheSize = bucketSize - 1; + U32 const chainAttempts = (1 << ms->cParams.searchLog) - cacheSize; + U32 const chainLimit = chainAttempts > 255 ? 255 : chainAttempts; + + /* We know the hashtable is oversized by a factor of `bucketSize`. + * We are going to temporarily pretend `bucketSize == 1`, keeping only a + * single entry. We will use the rest of the space to construct a temporary + * chaintable. + */ + U32 const hashLog = ms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG; + U32* const tmpHashTable = hashTable; + U32* const tmpChainTable = hashTable + ((size_t)1 << hashLog); + U32 const tmpChainSize = ((1 << ZSTD_LAZY_DDSS_BUCKET_LOG) - 1) << hashLog; + U32 const tmpMinChain = tmpChainSize < target ? target - tmpChainSize : idx; + + U32 hashIdx; + + assert(ms->cParams.chainLog <= 24); + assert(ms->cParams.hashLog >= ms->cParams.chainLog); + assert(idx != 0); + assert(tmpMinChain <= minChain); + + /* fill conventional hash table and conventional chain table */ + for ( ; idx < target; idx++) { + U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch); + if (idx >= tmpMinChain) { + tmpChainTable[idx - tmpMinChain] = hashTable[h]; + } + tmpHashTable[h] = idx; + } + + /* sort chains into ddss chain table */ + { + U32 chainPos = 0; + for (hashIdx = 0; hashIdx < (1U << hashLog); hashIdx++) { + U32 count; + U32 countBeyondMinChain = 0; + U32 i = tmpHashTable[hashIdx]; + for (count = 0; i >= tmpMinChain && count < cacheSize; count++) { + /* skip through the chain to the first position that won't be + * in the hash cache bucket */ + if (i < minChain) { + countBeyondMinChain++; + } + i = tmpChainTable[i - tmpMinChain]; + } + if (count == cacheSize) { + for (count = 0; count < chainLimit;) { + if (i < minChain) { + if (!i || countBeyondMinChain++ > cacheSize) { + /* only allow pulling `cacheSize` number of entries + * into the cache or chainTable beyond `minChain`, + * to replace the entries pulled out of the + * chainTable into the cache. This lets us reach + * back further without increasing the total number + * of entries in the chainTable, guaranteeing the + * DDSS chain table will fit into the space + * allocated for the regular one. */ + break; + } + } + chainTable[chainPos++] = i; + count++; + if (i < tmpMinChain) { + break; + } + i = tmpChainTable[i - tmpMinChain]; + } + } else { + count = 0; + } + if (count) { + tmpHashTable[hashIdx] = ((chainPos - count) << 8) + count; + } else { + tmpHashTable[hashIdx] = 0; + } + } + assert(chainPos <= chainSize); /* I believe this is guaranteed... */ + } + + /* move chain pointers into the last entry of each hash bucket */ + for (hashIdx = (1 << hashLog); hashIdx; ) { + U32 const bucketIdx = --hashIdx << ZSTD_LAZY_DDSS_BUCKET_LOG; + U32 const chainPackedPointer = tmpHashTable[hashIdx]; + U32 i; + for (i = 0; i < cacheSize; i++) { + hashTable[bucketIdx + i] = 0; + } + hashTable[bucketIdx + bucketSize - 1] = chainPackedPointer; + } + + /* fill the buckets of the hash table */ + for (idx = ms->nextToUpdate; idx < target; idx++) { + U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch) + << ZSTD_LAZY_DDSS_BUCKET_LOG; + U32 i; + /* Shift hash cache down 1. */ + for (i = cacheSize - 1; i; i--) + hashTable[h + i] = hashTable[h + i - 1]; + hashTable[h] = idx; + } + + ms->nextToUpdate = target; +} + + +/* inlining is important to hardwire a hot branch (template emulation) */ +FORCE_INLINE_TEMPLATE +size_t ZSTD_HcFindBestMatch_generic ( + ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 mls, const ZSTD_dictMode_e dictMode) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const chainTable = ms->chainTable; + const U32 chainSize = (1 << cParams->chainLog); + const U32 chainMask = chainSize-1; + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const U32 curr = (U32)(ip-base); + const U32 maxDistance = 1U << cParams->windowLog; + const U32 lowestValid = ms->window.lowLimit; + const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; + const U32 isDictionary = (ms->loadedDictEnd != 0); + const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance; + const U32 minChain = curr > chainSize ? curr - chainSize : 0; + U32 nbAttempts = 1U << cParams->searchLog; + size_t ml=4-1; + + const ZSTD_matchState_t* const dms = ms->dictMatchState; + const U32 ddsHashLog = dictMode == ZSTD_dedicatedDictSearch + ? dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG : 0; + const size_t ddsIdx = dictMode == ZSTD_dedicatedDictSearch + ? ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG : 0; + + U32 matchIndex; + + if (dictMode == ZSTD_dedicatedDictSearch) { + const U32* entry = &dms->hashTable[ddsIdx]; + PREFETCH_L1(entry); + } + + /* HC4 match finder */ + matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls); + + for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) { + size_t currentMl=0; + if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) { + const BYTE* const match = base + matchIndex; + assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */ + if (match[ml] == ip[ml]) /* potentially better */ + currentMl = ZSTD_count(ip, match, iLimit); + } else { + const BYTE* const match = dictBase + matchIndex; + assert(match+4 <= dictEnd); + if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4; + } + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = curr - matchIndex + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ + } + + if (matchIndex <= minChain) break; + matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); + } + + if (dictMode == ZSTD_dedicatedDictSearch) { + const U32 ddsLowestIndex = dms->window.dictLimit; + const BYTE* const ddsBase = dms->window.base; + const BYTE* const ddsEnd = dms->window.nextSrc; + const U32 ddsSize = (U32)(ddsEnd - ddsBase); + const U32 ddsIndexDelta = dictLimit - ddsSize; + const U32 bucketSize = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG); + const U32 bucketLimit = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1; + U32 ddsAttempt; + + for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) { + PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]); + } + + { + U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1]; + U32 const chainIndex = chainPackedPointer >> 8; + + PREFETCH_L1(&dms->chainTable[chainIndex]); + } + + for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) { + size_t currentMl=0; + const BYTE* match; + matchIndex = dms->hashTable[ddsIdx + ddsAttempt]; + match = ddsBase + matchIndex; + + if (!matchIndex) { + return ml; + } + + /* guaranteed by table construction */ + (void)ddsLowestIndex; + assert(matchIndex >= ddsLowestIndex); + assert(match+4 <= ddsEnd); + if (MEM_read32(match) == MEM_read32(ip)) { + /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4; + } + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) { + /* best possible, avoids read overflow on next attempt */ + return ml; + } + } + } + + { + U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1]; + U32 chainIndex = chainPackedPointer >> 8; + U32 const chainLength = chainPackedPointer & 0xFF; + U32 const chainAttempts = nbAttempts - ddsAttempt; + U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts; + U32 chainAttempt; + + for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) { + PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]); + } + + for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) { + size_t currentMl=0; + const BYTE* match; + matchIndex = dms->chainTable[chainIndex]; + match = ddsBase + matchIndex; + + /* guaranteed by table construction */ + assert(matchIndex >= ddsLowestIndex); + assert(match+4 <= ddsEnd); + if (MEM_read32(match) == MEM_read32(ip)) { + /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4; + } + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ + } + } + } + } else if (dictMode == ZSTD_dictMatchState) { + const U32* const dmsChainTable = dms->chainTable; + const U32 dmsChainSize = (1 << dms->cParams.chainLog); + const U32 dmsChainMask = dmsChainSize - 1; + const U32 dmsLowestIndex = dms->window.dictLimit; + const BYTE* const dmsBase = dms->window.base; + const BYTE* const dmsEnd = dms->window.nextSrc; + const U32 dmsSize = (U32)(dmsEnd - dmsBase); + const U32 dmsIndexDelta = dictLimit - dmsSize; + const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0; + + matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)]; + + for ( ; (matchIndex>=dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) { + size_t currentMl=0; + const BYTE* const match = dmsBase + matchIndex; + assert(match+4 <= dmsEnd); + if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4; + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = curr - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ + } + + if (matchIndex <= dmsMinChain) break; + + matchIndex = dmsChainTable[matchIndex & dmsChainMask]; + } + } + + return ml; +} + + +FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict); + case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict); + case 7 : + case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict); + } +} + + +static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState); + case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState); + case 7 : + case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState); + } +} + + +static size_t ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dedicatedDictSearch); + case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dedicatedDictSearch); + case 7 : + case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dedicatedDictSearch); + } +} + + +FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict); + case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict); + case 7 : + case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict); + } +} + + +/* ******************************* +* Common parser - lazy strategy +*********************************/ +typedef enum { search_hashChain, search_binaryTree } searchMethod_e; + +FORCE_INLINE_TEMPLATE size_t +ZSTD_compressBlock_lazy_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, + U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize, + const searchMethod_e searchMethod, const U32 depth, + ZSTD_dictMode_e const dictMode) +{ + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ms->window.base; + const U32 prefixLowestIndex = ms->window.dictLimit; + const BYTE* const prefixLowest = base + prefixLowestIndex; + + typedef size_t (*searchMax_f)( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); + + /** + * This table is indexed first by the four ZSTD_dictMode_e values, and then + * by the two searchMethod_e values. NULLs are placed for configurations + * that should never occur (extDict modes go to the other implementation + * below and there is no DDSS for binary tree search yet). + */ + const searchMax_f searchFuncs[4][2] = { + { + ZSTD_HcFindBestMatch_selectMLS, + ZSTD_BtFindBestMatch_selectMLS + }, + { + NULL, + NULL + }, + { + ZSTD_HcFindBestMatch_dictMatchState_selectMLS, + ZSTD_BtFindBestMatch_dictMatchState_selectMLS + }, + { + ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS, + NULL + } + }; + + searchMax_f const searchMax = searchFuncs[dictMode][searchMethod == search_binaryTree]; + U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0; + + const int isDMS = dictMode == ZSTD_dictMatchState; + const int isDDS = dictMode == ZSTD_dedicatedDictSearch; + const int isDxS = isDMS || isDDS; + const ZSTD_matchState_t* const dms = ms->dictMatchState; + const U32 dictLowestIndex = isDxS ? dms->window.dictLimit : 0; + const BYTE* const dictBase = isDxS ? dms->window.base : NULL; + const BYTE* const dictLowest = isDxS ? dictBase + dictLowestIndex : NULL; + const BYTE* const dictEnd = isDxS ? dms->window.nextSrc : NULL; + const U32 dictIndexDelta = isDxS ? + prefixLowestIndex - (U32)(dictEnd - dictBase) : + 0; + const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest)); + + assert(searchMax != NULL); + + DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode); + + /* init */ + ip += (dictAndPrefixLength == 0); + if (dictMode == ZSTD_noDict) { + U32 const curr = (U32)(ip - base); + U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, ms->cParams.windowLog); + U32 const maxRep = curr - windowLow; + if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0; + if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0; + } + if (isDxS) { + /* dictMatchState repCode checks don't currently handle repCode == 0 + * disabling. */ + assert(offset_1 <= dictAndPrefixLength); + assert(offset_2 <= dictAndPrefixLength); + } + + /* Match Loop */ +#if defined(__GNUC__) && defined(__x86_64__) + /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the + * code alignment is perturbed. To fix the instability align the loop on 32-bytes. + */ + __asm__(".p2align 5"); +#endif + while (ip < ilimit) { + size_t matchLength=0; + size_t offset=0; + const BYTE* start=ip+1; + + /* check repCode */ + if (isDxS) { + const U32 repIndex = (U32)(ip - base) + 1 - offset_1; + const BYTE* repMatch = ((dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch) + && repIndex < prefixLowestIndex) ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; + if (depth==0) goto _storeSequence; + } + } + if ( dictMode == ZSTD_noDict + && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { + matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; + if (depth==0) goto _storeSequence; + } + + /* first search (depth 0) */ + { size_t offsetFound = 999999999; + size_t const ml2 = searchMax(ms, ip, iend, &offsetFound); + if (ml2 > matchLength) + matchLength = ml2, start = ip, offset=offsetFound; + } + + if (matchLength < 4) { + ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */ + continue; + } + + /* let's try to find a better solution */ + if (depth>=1) + while (ip0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { + size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; + int const gain2 = (int)(mlRep * 3); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); + if ((mlRep >= 4) && (gain2 > gain1)) + matchLength = mlRep, offset = 0, start = ip; + } + if (isDxS) { + const U32 repIndex = (U32)(ip - base) - offset_1; + const BYTE* repMatch = repIndex < prefixLowestIndex ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + && (MEM_read32(repMatch) == MEM_read32(ip)) ) { + const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; + size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; + int const gain2 = (int)(mlRep * 3); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); + if ((mlRep >= 4) && (gain2 > gain1)) + matchLength = mlRep, offset = 0, start = ip; + } + } + { size_t offset2=999999999; + size_t const ml2 = searchMax(ms, ip, iend, &offset2); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; /* search a better one */ + } } + + /* let's find an even better one */ + if ((depth==2) && (ip0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { + size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; + int const gain2 = (int)(mlRep * 4); + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); + if ((mlRep >= 4) && (gain2 > gain1)) + matchLength = mlRep, offset = 0, start = ip; + } + if (isDxS) { + const U32 repIndex = (U32)(ip - base) - offset_1; + const BYTE* repMatch = repIndex < prefixLowestIndex ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + && (MEM_read32(repMatch) == MEM_read32(ip)) ) { + const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; + size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; + int const gain2 = (int)(mlRep * 4); + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); + if ((mlRep >= 4) && (gain2 > gain1)) + matchLength = mlRep, offset = 0, start = ip; + } + } + { size_t offset2=999999999; + size_t const ml2 = searchMax(ms, ip, iend, &offset2); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; + } } } + break; /* nothing found : store previous solution */ + } + + /* NOTE: + * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior. + * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which + * overflows the pointer, which is undefined behavior. + */ + /* catch up */ + if (offset) { + if (dictMode == ZSTD_noDict) { + while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest)) + && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */ + { start--; matchLength++; } + } + if (isDxS) { + U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); + const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex; + const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest; + while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ + } + offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); + } + /* store sequence */ +_storeSequence: + { size_t const litLength = start - anchor; + ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH); + anchor = ip = start + matchLength; + } + + /* check immediate repcode */ + if (isDxS) { + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex = current2 - offset_2; + const BYTE* repMatch = repIndex < prefixLowestIndex ? + dictBase - dictIndexDelta + repIndex : + base + repIndex; + if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */) + && (MEM_read32(repMatch) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4; + offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); + ip += matchLength; + anchor = ip; + continue; + } + break; + } + } + + if (dictMode == ZSTD_noDict) { + while ( ((ip <= ilimit) & (offset_2>0)) + && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) { + /* store sequence */ + matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; + offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); + ip += matchLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } } } + + /* Save reps for next block */ + rep[0] = offset_1 ? offset_1 : savedOffset; + rep[1] = offset_2 ? offset_2 : savedOffset; + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + + +size_t ZSTD_compressBlock_btlazy2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_lazy2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_lazy( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_greedy( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_btlazy2_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_lazy2_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_lazy_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_greedy_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState); +} + + +size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch); +} + +size_t ZSTD_compressBlock_lazy_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch); +} + +size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch); +} + + +FORCE_INLINE_TEMPLATE +size_t ZSTD_compressBlock_lazy_extDict_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, + U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize, + const searchMethod_e searchMethod, const U32 depth) +{ + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ms->window.base; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* const dictBase = ms->window.dictBase; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const dictStart = dictBase + ms->window.lowLimit; + const U32 windowLog = ms->cParams.windowLog; + + typedef size_t (*searchMax_f)( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); + searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS; + + U32 offset_1 = rep[0], offset_2 = rep[1]; + + DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic"); + + /* init */ + ip += (ip == prefixStart); + + /* Match Loop */ +#if defined(__GNUC__) && defined(__x86_64__) + /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the + * code alignment is perturbed. To fix the instability align the loop on 32-bytes. + */ + __asm__(".p2align 5"); +#endif + while (ip < ilimit) { + size_t matchLength=0; + size_t offset=0; + const BYTE* start=ip+1; + U32 curr = (U32)(ip-base); + + /* check repCode */ + { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr+1, windowLog); + const U32 repIndex = (U32)(curr+1 - offset_1); + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ + if (MEM_read32(ip+1) == MEM_read32(repMatch)) { + /* repcode detected we should take it */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4; + if (depth==0) goto _storeSequence; + } } + + /* first search (depth 0) */ + { size_t offsetFound = 999999999; + size_t const ml2 = searchMax(ms, ip, iend, &offsetFound); + if (ml2 > matchLength) + matchLength = ml2, start = ip, offset=offsetFound; + } + + if (matchLength < 4) { + ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */ + continue; + } + + /* let's try to find a better solution */ + if (depth>=1) + while (ip= 3) & (repIndex > windowLow)) /* intentional overflow */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; + int const gain2 = (int)(repLength * 3); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); + if ((repLength >= 4) && (gain2 > gain1)) + matchLength = repLength, offset = 0, start = ip; + } } + + /* search match, depth 1 */ + { size_t offset2=999999999; + size_t const ml2 = searchMax(ms, ip, iend, &offset2); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; /* search a better one */ + } } + + /* let's find an even better one */ + if ((depth==2) && (ip= 3) & (repIndex > windowLow)) /* intentional overflow */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; + int const gain2 = (int)(repLength * 4); + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); + if ((repLength >= 4) && (gain2 > gain1)) + matchLength = repLength, offset = 0, start = ip; + } } + + /* search match, depth 2 */ + { size_t offset2=999999999; + size_t const ml2 = searchMax(ms, ip, iend, &offset2); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; + } } } + break; /* nothing found : store previous solution */ + } + + /* catch up */ + if (offset) { + U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); + const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; + const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart; + while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ + offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); + } + + /* store sequence */ +_storeSequence: + { size_t const litLength = start - anchor; + ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH); + anchor = ip = start + matchLength; + } + + /* check immediate repcode */ + while (ip <= ilimit) { + const U32 repCurrent = (U32)(ip-base); + const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog); + const U32 repIndex = repCurrent - offset_2; + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected we should take it */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; + offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); + ip += matchLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } + break; + } } + + /* Save reps for next block */ + rep[0] = offset_1; + rep[1] = offset_2; + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + + +size_t ZSTD_compressBlock_greedy_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0); +} + +size_t ZSTD_compressBlock_lazy_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) + +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1); +} + +size_t ZSTD_compressBlock_lazy2_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) + +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2); +} + +size_t ZSTD_compressBlock_btlazy2_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) + +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2); +} +/**** ended inlining compress/zstd_lazy.c ****/ +/**** start inlining compress/zstd_ldm.c ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/**** skipping file: zstd_ldm.h ****/ + +/**** skipping file: ../common/debug.h ****/ +/**** skipping file: ../common/xxhash.h ****/ +/**** skipping file: zstd_fast.h ****/ +/**** skipping file: zstd_double_fast.h ****/ +/**** start inlining zstd_ldm_geartab.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_LDM_GEARTAB_H +#define ZSTD_LDM_GEARTAB_H + +static U64 ZSTD_ldm_gearTab[256] = { + 0xf5b8f72c5f77775c, 0x84935f266b7ac412, 0xb647ada9ca730ccc, + 0xb065bb4b114fb1de, 0x34584e7e8c3a9fd0, 0x4e97e17c6ae26b05, + 0x3a03d743bc99a604, 0xcecd042422c4044f, 0x76de76c58524259e, + 0x9c8528f65badeaca, 0x86563706e2097529, 0x2902475fa375d889, + 0xafb32a9739a5ebe6, 0xce2714da3883e639, 0x21eaf821722e69e, + 0x37b628620b628, 0x49a8d455d88caf5, 0x8556d711e6958140, + 0x4f7ae74fc605c1f, 0x829f0c3468bd3a20, 0x4ffdc885c625179e, + 0x8473de048a3daf1b, 0x51008822b05646b2, 0x69d75d12b2d1cc5f, + 0x8c9d4a19159154bc, 0xc3cc10f4abbd4003, 0xd06ddc1cecb97391, + 0xbe48e6e7ed80302e, 0x3481db31cee03547, 0xacc3f67cdaa1d210, + 0x65cb771d8c7f96cc, 0x8eb27177055723dd, 0xc789950d44cd94be, + 0x934feadc3700b12b, 0x5e485f11edbdf182, 0x1e2e2a46fd64767a, + 0x2969ca71d82efa7c, 0x9d46e9935ebbba2e, 0xe056b67e05e6822b, + 0x94d73f55739d03a0, 0xcd7010bdb69b5a03, 0x455ef9fcd79b82f4, + 0x869cb54a8749c161, 0x38d1a4fa6185d225, 0xb475166f94bbe9bb, + 0xa4143548720959f1, 0x7aed4780ba6b26ba, 0xd0ce264439e02312, + 0x84366d746078d508, 0xa8ce973c72ed17be, 0x21c323a29a430b01, + 0x9962d617e3af80ee, 0xab0ce91d9c8cf75b, 0x530e8ee6d19a4dbc, + 0x2ef68c0cf53f5d72, 0xc03a681640a85506, 0x496e4e9f9c310967, + 0x78580472b59b14a0, 0x273824c23b388577, 0x66bf923ad45cb553, + 0x47ae1a5a2492ba86, 0x35e304569e229659, 0x4765182a46870b6f, + 0x6cbab625e9099412, 0xddac9a2e598522c1, 0x7172086e666624f2, + 0xdf5003ca503b7837, 0x88c0c1db78563d09, 0x58d51865acfc289d, + 0x177671aec65224f1, 0xfb79d8a241e967d7, 0x2be1e101cad9a49a, + 0x6625682f6e29186b, 0x399553457ac06e50, 0x35dffb4c23abb74, + 0x429db2591f54aade, 0xc52802a8037d1009, 0x6acb27381f0b25f3, + 0xf45e2551ee4f823b, 0x8b0ea2d99580c2f7, 0x3bed519cbcb4e1e1, + 0xff452823dbb010a, 0x9d42ed614f3dd267, 0x5b9313c06257c57b, + 0xa114b8008b5e1442, 0xc1fe311c11c13d4b, 0x66e8763ea34c5568, + 0x8b982af1c262f05d, 0xee8876faaa75fbb7, 0x8a62a4d0d172bb2a, + 0xc13d94a3b7449a97, 0x6dbbba9dc15d037c, 0xc786101f1d92e0f1, + 0xd78681a907a0b79b, 0xf61aaf2962c9abb9, 0x2cfd16fcd3cb7ad9, + 0x868c5b6744624d21, 0x25e650899c74ddd7, 0xba042af4a7c37463, + 0x4eb1a539465a3eca, 0xbe09dbf03b05d5ca, 0x774e5a362b5472ba, + 0x47a1221229d183cd, 0x504b0ca18ef5a2df, 0xdffbdfbde2456eb9, + 0x46cd2b2fbee34634, 0xf2aef8fe819d98c3, 0x357f5276d4599d61, + 0x24a5483879c453e3, 0x88026889192b4b9, 0x28da96671782dbec, + 0x4ef37c40588e9aaa, 0x8837b90651bc9fb3, 0xc164f741d3f0e5d6, + 0xbc135a0a704b70ba, 0x69cd868f7622ada, 0xbc37ba89e0b9c0ab, + 0x47c14a01323552f6, 0x4f00794bacee98bb, 0x7107de7d637a69d5, + 0x88af793bb6f2255e, 0xf3c6466b8799b598, 0xc288c616aa7f3b59, + 0x81ca63cf42fca3fd, 0x88d85ace36a2674b, 0xd056bd3792389e7, + 0xe55c396c4e9dd32d, 0xbefb504571e6c0a6, 0x96ab32115e91e8cc, + 0xbf8acb18de8f38d1, 0x66dae58801672606, 0x833b6017872317fb, + 0xb87c16f2d1c92864, 0xdb766a74e58b669c, 0x89659f85c61417be, + 0xc8daad856011ea0c, 0x76a4b565b6fe7eae, 0xa469d085f6237312, + 0xaaf0365683a3e96c, 0x4dbb746f8424f7b8, 0x638755af4e4acc1, + 0x3d7807f5bde64486, 0x17be6d8f5bbb7639, 0x903f0cd44dc35dc, + 0x67b672eafdf1196c, 0xa676ff93ed4c82f1, 0x521d1004c5053d9d, + 0x37ba9ad09ccc9202, 0x84e54d297aacfb51, 0xa0b4b776a143445, + 0x820d471e20b348e, 0x1874383cb83d46dc, 0x97edeec7a1efe11c, + 0xb330e50b1bdc42aa, 0x1dd91955ce70e032, 0xa514cdb88f2939d5, + 0x2791233fd90db9d3, 0x7b670a4cc50f7a9b, 0x77c07d2a05c6dfa5, + 0xe3778b6646d0a6fa, 0xb39c8eda47b56749, 0x933ed448addbef28, + 0xaf846af6ab7d0bf4, 0xe5af208eb666e49, 0x5e6622f73534cd6a, + 0x297daeca42ef5b6e, 0x862daef3d35539a6, 0xe68722498f8e1ea9, + 0x981c53093dc0d572, 0xfa09b0bfbf86fbf5, 0x30b1e96166219f15, + 0x70e7d466bdc4fb83, 0x5a66736e35f2a8e9, 0xcddb59d2b7c1baef, + 0xd6c7d247d26d8996, 0xea4e39eac8de1ba3, 0x539c8bb19fa3aff2, + 0x9f90e4c5fd508d8, 0xa34e5956fbaf3385, 0x2e2f8e151d3ef375, + 0x173691e9b83faec1, 0xb85a8d56bf016379, 0x8382381267408ae3, + 0xb90f901bbdc0096d, 0x7c6ad32933bcec65, 0x76bb5e2f2c8ad595, + 0x390f851a6cf46d28, 0xc3e6064da1c2da72, 0xc52a0c101cfa5389, + 0xd78eaf84a3fbc530, 0x3781b9e2288b997e, 0x73c2f6dea83d05c4, + 0x4228e364c5b5ed7, 0x9d7a3edf0da43911, 0x8edcfeda24686756, + 0x5e7667a7b7a9b3a1, 0x4c4f389fa143791d, 0xb08bc1023da7cddc, + 0x7ab4be3ae529b1cc, 0x754e6132dbe74ff9, 0x71635442a839df45, + 0x2f6fb1643fbe52de, 0x961e0a42cf7a8177, 0xf3b45d83d89ef2ea, + 0xee3de4cf4a6e3e9b, 0xcd6848542c3295e7, 0xe4cee1664c78662f, + 0x9947548b474c68c4, 0x25d73777a5ed8b0b, 0xc915b1d636b7fc, + 0x21c2ba75d9b0d2da, 0x5f6b5dcf608a64a1, 0xdcf333255ff9570c, + 0x633b922418ced4ee, 0xc136dde0b004b34a, 0x58cc83b05d4b2f5a, + 0x5eb424dda28e42d2, 0x62df47369739cd98, 0xb4e0b42485e4ce17, + 0x16e1f0c1f9a8d1e7, 0x8ec3916707560ebf, 0x62ba6e2df2cc9db3, + 0xcbf9f4ff77d83a16, 0x78d9d7d07d2bbcc4, 0xef554ce1e02c41f4, + 0x8d7581127eccf94d, 0xa9b53336cb3c8a05, 0x38c42c0bf45c4f91, + 0x640893cdf4488863, 0x80ec34bc575ea568, 0x39f324f5b48eaa40, + 0xe9d9ed1f8eff527f, 0x9224fc058cc5a214, 0xbaba00b04cfe7741, + 0x309a9f120fcf52af, 0xa558f3ec65626212, 0x424bec8b7adabe2f, + 0x41622513a6aea433, 0xb88da2d5324ca798, 0xd287733b245528a4, + 0x9a44697e6d68aec3, 0x7b1093be2f49bb28, 0x50bbec632e3d8aad, + 0x6cd90723e1ea8283, 0x897b9e7431b02bf3, 0x219efdcb338a7047, + 0x3b0311f0a27c0656, 0xdb17bf91c0db96e7, 0x8cd4fd6b4e85a5b2, + 0xfab071054ba6409d, 0x40d6fe831fa9dfd9, 0xaf358debad7d791e, + 0xeb8d0e25a65e3e58, 0xbbcbd3df14e08580, 0xcf751f27ecdab2b, + 0x2b4da14f2613d8f4 +}; + +#endif /* ZSTD_LDM_GEARTAB_H */ +/**** ended inlining zstd_ldm_geartab.h ****/ + +#define LDM_BUCKET_SIZE_LOG 3 +#define LDM_MIN_MATCH_LENGTH 64 +#define LDM_HASH_RLOG 7 + +typedef struct { + U64 rolling; + U64 stopMask; +} ldmRollingHashState_t; + +/** ZSTD_ldm_gear_init(): + * + * Initializes the rolling hash state such that it will honor the + * settings in params. */ +static void ZSTD_ldm_gear_init(ldmRollingHashState_t* state, ldmParams_t const* params) +{ + unsigned maxBitsInMask = MIN(params->minMatchLength, 64); + unsigned hashRateLog = params->hashRateLog; + + state->rolling = ~(U32)0; + + /* The choice of the splitting criterion is subject to two conditions: + * 1. it has to trigger on average every 2^(hashRateLog) bytes; + * 2. ideally, it has to depend on a window of minMatchLength bytes. + * + * In the gear hash algorithm, bit n depends on the last n bytes; + * so in order to obtain a good quality splitting criterion it is + * preferable to use bits with high weight. + * + * To match condition 1 we use a mask with hashRateLog bits set + * and, because of the previous remark, we make sure these bits + * have the highest possible weight while still respecting + * condition 2. + */ + if (hashRateLog > 0 && hashRateLog <= maxBitsInMask) { + state->stopMask = (((U64)1 << hashRateLog) - 1) << (maxBitsInMask - hashRateLog); + } else { + /* In this degenerate case we simply honor the hash rate. */ + state->stopMask = ((U64)1 << hashRateLog) - 1; + } +} + +/** ZSTD_ldm_gear_feed(): + * + * Registers in the splits array all the split points found in the first + * size bytes following the data pointer. This function terminates when + * either all the data has been processed or LDM_BATCH_SIZE splits are + * present in the splits array. + * + * Precondition: The splits array must not be full. + * Returns: The number of bytes processed. */ +static size_t ZSTD_ldm_gear_feed(ldmRollingHashState_t* state, + BYTE const* data, size_t size, + size_t* splits, unsigned* numSplits) +{ + size_t n; + U64 hash, mask; + + hash = state->rolling; + mask = state->stopMask; + n = 0; + +#define GEAR_ITER_ONCE() do { \ + hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \ + n += 1; \ + if (UNLIKELY((hash & mask) == 0)) { \ + splits[*numSplits] = n; \ + *numSplits += 1; \ + if (*numSplits == LDM_BATCH_SIZE) \ + goto done; \ + } \ + } while (0) + + while (n + 3 < size) { + GEAR_ITER_ONCE(); + GEAR_ITER_ONCE(); + GEAR_ITER_ONCE(); + GEAR_ITER_ONCE(); + } + while (n < size) { + GEAR_ITER_ONCE(); + } + +#undef GEAR_ITER_ONCE + +done: + state->rolling = hash; + return n; +} + +void ZSTD_ldm_adjustParameters(ldmParams_t* params, + ZSTD_compressionParameters const* cParams) +{ + params->windowLog = cParams->windowLog; + ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX); + DEBUGLOG(4, "ZSTD_ldm_adjustParameters"); + if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG; + if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH; + if (params->hashLog == 0) { + params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG); + assert(params->hashLog <= ZSTD_HASHLOG_MAX); + } + if (params->hashRateLog == 0) { + params->hashRateLog = params->windowLog < params->hashLog + ? 0 + : params->windowLog - params->hashLog; + } + params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog); +} + +size_t ZSTD_ldm_getTableSize(ldmParams_t params) +{ + size_t const ldmHSize = ((size_t)1) << params.hashLog; + size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog); + size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog); + size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize) + + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t)); + return params.enableLdm ? totalSize : 0; +} + +size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize) +{ + return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0; +} + +/** ZSTD_ldm_getBucket() : + * Returns a pointer to the start of the bucket associated with hash. */ +static ldmEntry_t* ZSTD_ldm_getBucket( + ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams) +{ + return ldmState->hashTable + (hash << ldmParams.bucketSizeLog); +} + +/** ZSTD_ldm_insertEntry() : + * Insert the entry with corresponding hash into the hash table */ +static void ZSTD_ldm_insertEntry(ldmState_t* ldmState, + size_t const hash, const ldmEntry_t entry, + ldmParams_t const ldmParams) +{ + BYTE* const pOffset = ldmState->bucketOffsets + hash; + unsigned const offset = *pOffset; + + *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + offset) = entry; + *pOffset = (BYTE)((offset + 1) & ((1u << ldmParams.bucketSizeLog) - 1)); + +} + +/** ZSTD_ldm_countBackwardsMatch() : + * Returns the number of bytes that match backwards before pIn and pMatch. + * + * We count only bytes where pMatch >= pBase and pIn >= pAnchor. */ +static size_t ZSTD_ldm_countBackwardsMatch( + const BYTE* pIn, const BYTE* pAnchor, + const BYTE* pMatch, const BYTE* pMatchBase) +{ + size_t matchLength = 0; + while (pIn > pAnchor && pMatch > pMatchBase && pIn[-1] == pMatch[-1]) { + pIn--; + pMatch--; + matchLength++; + } + return matchLength; +} + +/** ZSTD_ldm_countBackwardsMatch_2segments() : + * Returns the number of bytes that match backwards from pMatch, + * even with the backwards match spanning 2 different segments. + * + * On reaching `pMatchBase`, start counting from mEnd */ +static size_t ZSTD_ldm_countBackwardsMatch_2segments( + const BYTE* pIn, const BYTE* pAnchor, + const BYTE* pMatch, const BYTE* pMatchBase, + const BYTE* pExtDictStart, const BYTE* pExtDictEnd) +{ + size_t matchLength = ZSTD_ldm_countBackwardsMatch(pIn, pAnchor, pMatch, pMatchBase); + if (pMatch - matchLength != pMatchBase || pMatchBase == pExtDictStart) { + /* If backwards match is entirely in the extDict or prefix, immediately return */ + return matchLength; + } + DEBUGLOG(7, "ZSTD_ldm_countBackwardsMatch_2segments: found 2-parts backwards match (length in prefix==%zu)", matchLength); + matchLength += ZSTD_ldm_countBackwardsMatch(pIn - matchLength, pAnchor, pExtDictEnd, pExtDictStart); + DEBUGLOG(7, "final backwards match length = %zu", matchLength); + return matchLength; +} + +/** ZSTD_ldm_fillFastTables() : + * + * Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies. + * This is similar to ZSTD_loadDictionaryContent. + * + * The tables for the other strategies are filled within their + * block compressors. */ +static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms, + void const* end) +{ + const BYTE* const iend = (const BYTE*)end; + + switch(ms->cParams.strategy) + { + case ZSTD_fast: + ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast); + break; + + case ZSTD_dfast: + ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast); + break; + + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + case ZSTD_btlazy2: + case ZSTD_btopt: + case ZSTD_btultra: + case ZSTD_btultra2: + break; + default: + assert(0); /* not possible : not a valid strategy id */ + } + + return 0; +} + +void ZSTD_ldm_fillHashTable( + ldmState_t* ldmState, const BYTE* ip, + const BYTE* iend, ldmParams_t const* params) +{ + U32 const minMatchLength = params->minMatchLength; + U32 const hBits = params->hashLog - params->bucketSizeLog; + BYTE const* const base = ldmState->window.base; + BYTE const* const istart = ip; + ldmRollingHashState_t hashState; + size_t* const splits = ldmState->splitIndices; + unsigned numSplits; + + DEBUGLOG(5, "ZSTD_ldm_fillHashTable"); + + ZSTD_ldm_gear_init(&hashState, params); + while (ip < iend) { + size_t hashed; + unsigned n; + + numSplits = 0; + hashed = ZSTD_ldm_gear_feed(&hashState, ip, iend - ip, splits, &numSplits); + + for (n = 0; n < numSplits; n++) { + if (ip + splits[n] >= istart + minMatchLength) { + BYTE const* const split = ip + splits[n] - minMatchLength; + U64 const xxhash = XXH64(split, minMatchLength, 0); + U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1)); + ldmEntry_t entry; + + entry.offset = (U32)(split - base); + entry.checksum = (U32)(xxhash >> 32); + ZSTD_ldm_insertEntry(ldmState, hash, entry, *params); + } + } + + ip += hashed; + } +} + + +/** ZSTD_ldm_limitTableUpdate() : + * + * Sets cctx->nextToUpdate to a position corresponding closer to anchor + * if it is far way + * (after a long match, only update tables a limited amount). */ +static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor) +{ + U32 const curr = (U32)(anchor - ms->window.base); + if (curr > ms->nextToUpdate + 1024) { + ms->nextToUpdate = + curr - MIN(512, curr - ms->nextToUpdate - 1024); + } +} + +static size_t ZSTD_ldm_generateSequences_internal( + ldmState_t* ldmState, rawSeqStore_t* rawSeqStore, + ldmParams_t const* params, void const* src, size_t srcSize) +{ + /* LDM parameters */ + int const extDict = ZSTD_window_hasExtDict(ldmState->window); + U32 const minMatchLength = params->minMatchLength; + U32 const entsPerBucket = 1U << params->bucketSizeLog; + U32 const hBits = params->hashLog - params->bucketSizeLog; + /* Prefix and extDict parameters */ + U32 const dictLimit = ldmState->window.dictLimit; + U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit; + BYTE const* const base = ldmState->window.base; + BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL; + BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL; + BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL; + BYTE const* const lowPrefixPtr = base + dictLimit; + /* Input bounds */ + BYTE const* const istart = (BYTE const*)src; + BYTE const* const iend = istart + srcSize; + BYTE const* const ilimit = iend - HASH_READ_SIZE; + /* Input positions */ + BYTE const* anchor = istart; + BYTE const* ip = istart; + /* Rolling hash state */ + ldmRollingHashState_t hashState; + /* Arrays for staged-processing */ + size_t* const splits = ldmState->splitIndices; + ldmMatchCandidate_t* const candidates = ldmState->matchCandidates; + unsigned numSplits; + + if (srcSize < minMatchLength) + return iend - anchor; + + /* Initialize the rolling hash state with the first minMatchLength bytes */ + ZSTD_ldm_gear_init(&hashState, params); + { + size_t n = 0; + + while (n < minMatchLength) { + numSplits = 0; + n += ZSTD_ldm_gear_feed(&hashState, ip + n, minMatchLength - n, + splits, &numSplits); + } + ip += minMatchLength; + } + + while (ip < ilimit) { + size_t hashed; + unsigned n; + + numSplits = 0; + hashed = ZSTD_ldm_gear_feed(&hashState, ip, ilimit - ip, + splits, &numSplits); + + for (n = 0; n < numSplits; n++) { + BYTE const* const split = ip + splits[n] - minMatchLength; + U64 const xxhash = XXH64(split, minMatchLength, 0); + U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1)); + + candidates[n].split = split; + candidates[n].hash = hash; + candidates[n].checksum = (U32)(xxhash >> 32); + candidates[n].bucket = ZSTD_ldm_getBucket(ldmState, hash, *params); + PREFETCH_L1(candidates[n].bucket); + } + + for (n = 0; n < numSplits; n++) { + size_t forwardMatchLength = 0, backwardMatchLength = 0, + bestMatchLength = 0, mLength; + BYTE const* const split = candidates[n].split; + U32 const checksum = candidates[n].checksum; + U32 const hash = candidates[n].hash; + ldmEntry_t* const bucket = candidates[n].bucket; + ldmEntry_t const* cur; + ldmEntry_t const* bestEntry = NULL; + ldmEntry_t newEntry; + + newEntry.offset = (U32)(split - base); + newEntry.checksum = checksum; + + /* If a split point would generate a sequence overlapping with + * the previous one, we merely register it in the hash table and + * move on */ + if (split < anchor) { + ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params); + continue; + } + + for (cur = bucket; cur < bucket + entsPerBucket; cur++) { + size_t curForwardMatchLength, curBackwardMatchLength, + curTotalMatchLength; + if (cur->checksum != checksum || cur->offset <= lowestIndex) { + continue; + } + if (extDict) { + BYTE const* const curMatchBase = + cur->offset < dictLimit ? dictBase : base; + BYTE const* const pMatch = curMatchBase + cur->offset; + BYTE const* const matchEnd = + cur->offset < dictLimit ? dictEnd : iend; + BYTE const* const lowMatchPtr = + cur->offset < dictLimit ? dictStart : lowPrefixPtr; + curForwardMatchLength = + ZSTD_count_2segments(split, pMatch, iend, matchEnd, lowPrefixPtr); + if (curForwardMatchLength < minMatchLength) { + continue; + } + curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch_2segments( + split, anchor, pMatch, lowMatchPtr, dictStart, dictEnd); + } else { /* !extDict */ + BYTE const* const pMatch = base + cur->offset; + curForwardMatchLength = ZSTD_count(split, pMatch, iend); + if (curForwardMatchLength < minMatchLength) { + continue; + } + curBackwardMatchLength = + ZSTD_ldm_countBackwardsMatch(split, anchor, pMatch, lowPrefixPtr); + } + curTotalMatchLength = curForwardMatchLength + curBackwardMatchLength; + + if (curTotalMatchLength > bestMatchLength) { + bestMatchLength = curTotalMatchLength; + forwardMatchLength = curForwardMatchLength; + backwardMatchLength = curBackwardMatchLength; + bestEntry = cur; + } + } + + /* No match found -- insert an entry into the hash table + * and process the next candidate match */ + if (bestEntry == NULL) { + ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params); + continue; + } + + /* Match found */ + mLength = forwardMatchLength + backwardMatchLength; + { + U32 const offset = (U32)(split - base) - bestEntry->offset; + rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size; + + /* Out of sequence storage */ + if (rawSeqStore->size == rawSeqStore->capacity) + return ERROR(dstSize_tooSmall); + seq->litLength = (U32)(split - backwardMatchLength - anchor); + seq->matchLength = (U32)mLength; + seq->offset = offset; + rawSeqStore->size++; + } + + /* Insert the current entry into the hash table --- it must be + * done after the previous block to avoid clobbering bestEntry */ + ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params); + + anchor = split + forwardMatchLength; + } + + ip += hashed; + } + + return iend - anchor; +} + +/*! ZSTD_ldm_reduceTable() : + * reduce table indexes by `reducerValue` */ +static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size, + U32 const reducerValue) +{ + U32 u; + for (u = 0; u < size; u++) { + if (table[u].offset < reducerValue) table[u].offset = 0; + else table[u].offset -= reducerValue; + } +} + +size_t ZSTD_ldm_generateSequences( + ldmState_t* ldmState, rawSeqStore_t* sequences, + ldmParams_t const* params, void const* src, size_t srcSize) +{ + U32 const maxDist = 1U << params->windowLog; + BYTE const* const istart = (BYTE const*)src; + BYTE const* const iend = istart + srcSize; + size_t const kMaxChunkSize = 1 << 20; + size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0); + size_t chunk; + size_t leftoverSize = 0; + + assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize); + /* Check that ZSTD_window_update() has been called for this chunk prior + * to passing it to this function. + */ + assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize); + /* The input could be very large (in zstdmt), so it must be broken up into + * chunks to enforce the maximum distance and handle overflow correction. + */ + assert(sequences->pos <= sequences->size); + assert(sequences->size <= sequences->capacity); + for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) { + BYTE const* const chunkStart = istart + chunk * kMaxChunkSize; + size_t const remaining = (size_t)(iend - chunkStart); + BYTE const *const chunkEnd = + (remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize; + size_t const chunkSize = chunkEnd - chunkStart; + size_t newLeftoverSize; + size_t const prevSize = sequences->size; + + assert(chunkStart < iend); + /* 1. Perform overflow correction if necessary. */ + if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) { + U32 const ldmHSize = 1U << params->hashLog; + U32 const correction = ZSTD_window_correctOverflow( + &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart); + ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction); + /* invalidate dictionaries on overflow correction */ + ldmState->loadedDictEnd = 0; + } + /* 2. We enforce the maximum offset allowed. + * + * kMaxChunkSize should be small enough that we don't lose too much of + * the window through early invalidation. + * TODO: * Test the chunk size. + * * Try invalidation after the sequence generation and test the + * the offset against maxDist directly. + * + * NOTE: Because of dictionaries + sequence splitting we MUST make sure + * that any offset used is valid at the END of the sequence, since it may + * be split into two sequences. This condition holds when using + * ZSTD_window_enforceMaxDist(), but if we move to checking offsets + * against maxDist directly, we'll have to carefully handle that case. + */ + ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL); + /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */ + newLeftoverSize = ZSTD_ldm_generateSequences_internal( + ldmState, sequences, params, chunkStart, chunkSize); + if (ZSTD_isError(newLeftoverSize)) + return newLeftoverSize; + /* 4. We add the leftover literals from previous iterations to the first + * newly generated sequence, or add the `newLeftoverSize` if none are + * generated. + */ + /* Prepend the leftover literals from the last call */ + if (prevSize < sequences->size) { + sequences->seq[prevSize].litLength += (U32)leftoverSize; + leftoverSize = newLeftoverSize; + } else { + assert(newLeftoverSize == chunkSize); + leftoverSize += chunkSize; + } + } + return 0; +} + +void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) { + while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) { + rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos; + if (srcSize <= seq->litLength) { + /* Skip past srcSize literals */ + seq->litLength -= (U32)srcSize; + return; + } + srcSize -= seq->litLength; + seq->litLength = 0; + if (srcSize < seq->matchLength) { + /* Skip past the first srcSize of the match */ + seq->matchLength -= (U32)srcSize; + if (seq->matchLength < minMatch) { + /* The match is too short, omit it */ + if (rawSeqStore->pos + 1 < rawSeqStore->size) { + seq[1].litLength += seq[0].matchLength; + } + rawSeqStore->pos++; + } + return; + } + srcSize -= seq->matchLength; + seq->matchLength = 0; + rawSeqStore->pos++; + } +} + +/** + * If the sequence length is longer than remaining then the sequence is split + * between this block and the next. + * + * Returns the current sequence to handle, or if the rest of the block should + * be literals, it returns a sequence with offset == 0. + */ +static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore, + U32 const remaining, U32 const minMatch) +{ + rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos]; + assert(sequence.offset > 0); + /* Likely: No partial sequence */ + if (remaining >= sequence.litLength + sequence.matchLength) { + rawSeqStore->pos++; + return sequence; + } + /* Cut the sequence short (offset == 0 ==> rest is literals). */ + if (remaining <= sequence.litLength) { + sequence.offset = 0; + } else if (remaining < sequence.litLength + sequence.matchLength) { + sequence.matchLength = remaining - sequence.litLength; + if (sequence.matchLength < minMatch) { + sequence.offset = 0; + } + } + /* Skip past `remaining` bytes for the future sequences. */ + ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch); + return sequence; +} + +void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) { + U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes); + while (currPos && rawSeqStore->pos < rawSeqStore->size) { + rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos]; + if (currPos >= currSeq.litLength + currSeq.matchLength) { + currPos -= currSeq.litLength + currSeq.matchLength; + rawSeqStore->pos++; + } else { + rawSeqStore->posInSequence = currPos; + break; + } + } + if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) { + rawSeqStore->posInSequence = 0; + } +} + +size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + unsigned const minMatch = cParams->minMatch; + ZSTD_blockCompressor const blockCompressor = + ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms)); + /* Input bounds */ + BYTE const* const istart = (BYTE const*)src; + BYTE const* const iend = istart + srcSize; + /* Input positions */ + BYTE const* ip = istart; + + DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize); + /* If using opt parser, use LDMs only as candidates rather than always accepting them */ + if (cParams->strategy >= ZSTD_btopt) { + size_t lastLLSize; + ms->ldmSeqStore = rawSeqStore; + lastLLSize = blockCompressor(ms, seqStore, rep, src, srcSize); + ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore, srcSize); + return lastLLSize; + } + + assert(rawSeqStore->pos <= rawSeqStore->size); + assert(rawSeqStore->size <= rawSeqStore->capacity); + /* Loop through each sequence and apply the block compressor to the literals */ + while (rawSeqStore->pos < rawSeqStore->size && ip < iend) { + /* maybeSplitSequence updates rawSeqStore->pos */ + rawSeq const sequence = maybeSplitSequence(rawSeqStore, + (U32)(iend - ip), minMatch); + int i; + /* End signal */ + if (sequence.offset == 0) + break; + + assert(ip + sequence.litLength + sequence.matchLength <= iend); + + /* Fill tables for block compressor */ + ZSTD_ldm_limitTableUpdate(ms, ip); + ZSTD_ldm_fillFastTables(ms, ip); + /* Run the block compressor */ + DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength); + { + size_t const newLitLength = + blockCompressor(ms, seqStore, rep, ip, sequence.litLength); + ip += sequence.litLength; + /* Update the repcodes */ + for (i = ZSTD_REP_NUM - 1; i > 0; i--) + rep[i] = rep[i-1]; + rep[0] = sequence.offset; + /* Store the sequence */ + ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend, + sequence.offset + ZSTD_REP_MOVE, + sequence.matchLength - MINMATCH); + ip += sequence.matchLength; + } + } + /* Fill the tables for the block compressor */ + ZSTD_ldm_limitTableUpdate(ms, ip); + ZSTD_ldm_fillFastTables(ms, ip); + /* Compress the last literals */ + return blockCompressor(ms, seqStore, rep, ip, iend - ip); +} +/**** ended inlining compress/zstd_ldm.c ****/ +/**** start inlining compress/zstd_opt.c ****/ +/* + * Copyright (c) 2016-2021, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/**** skipping file: zstd_compress_internal.h ****/ +/**** skipping file: hist.h ****/ +/**** skipping file: zstd_opt.h ****/ + + +#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */ +#define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */ +#define ZSTD_MAX_PRICE (1<<30) + +#define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */ + + +/*-************************************* +* Price functions for optimal parser +***************************************/ + +#if 0 /* approximation at bit level */ +# define BITCOST_ACCURACY 0 +# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) +# define WEIGHT(stat) ((void)opt, ZSTD_bitWeight(stat)) +#elif 0 /* fractional bit accuracy */ +# define BITCOST_ACCURACY 8 +# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) +# define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat)) +#else /* opt==approx, ultra==accurate */ +# define BITCOST_ACCURACY 8 +# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) +# define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat)) +#endif + +MEM_STATIC U32 ZSTD_bitWeight(U32 stat) +{ + return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER); +} + +MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat) +{ + U32 const stat = rawStat + 1; + U32 const hb = ZSTD_highbit32(stat); + U32 const BWeight = hb * BITCOST_MULTIPLIER; + U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb; + U32 const weight = BWeight + FWeight; + assert(hb + BITCOST_ACCURACY < 31); + return weight; +} + +#if (DEBUGLEVEL>=2) +/* debugging function, + * @return price in bytes as fractional value + * for debug messages only */ +MEM_STATIC double ZSTD_fCost(U32 price) +{ + return (double)price / (BITCOST_MULTIPLIER*8); +} +#endif + +static int ZSTD_compressedLiterals(optState_t const* const optPtr) +{ + return optPtr->literalCompressionMode != ZSTD_lcm_uncompressed; +} + +static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel) +{ + if (ZSTD_compressedLiterals(optPtr)) + optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel); + optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel); + optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel); + optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel); +} + + +/* ZSTD_downscaleStat() : + * reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus) + * return the resulting sum of elements */ +static U32 ZSTD_downscaleStat(unsigned* table, U32 lastEltIndex, int malus) +{ + U32 s, sum=0; + DEBUGLOG(5, "ZSTD_downscaleStat (nbElts=%u)", (unsigned)lastEltIndex+1); + assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31); + for (s=0; s> (ZSTD_FREQ_DIV+malus)); + sum += table[s]; + } + return sum; +} + +/* ZSTD_rescaleFreqs() : + * if first block (detected by optPtr->litLengthSum == 0) : init statistics + * take hints from dictionary if there is one + * or init from zero, using src for literals stats, or flat 1 for match symbols + * otherwise downscale existing stats, to be used as seed for next block. + */ +static void +ZSTD_rescaleFreqs(optState_t* const optPtr, + const BYTE* const src, size_t const srcSize, + int const optLevel) +{ + int const compressedLiterals = ZSTD_compressedLiterals(optPtr); + DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize); + optPtr->priceType = zop_dynamic; + + if (optPtr->litLengthSum == 0) { /* first block : init */ + if (srcSize <= ZSTD_PREDEF_THRESHOLD) { /* heuristic */ + DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef"); + optPtr->priceType = zop_predef; + } + + assert(optPtr->symbolCosts != NULL); + if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) { + /* huffman table presumed generated by dictionary */ + optPtr->priceType = zop_dynamic; + + if (compressedLiterals) { + unsigned lit; + assert(optPtr->litFreq != NULL); + optPtr->litSum = 0; + for (lit=0; lit<=MaxLit; lit++) { + U32 const scaleLog = 11; /* scale to 2K */ + U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit); + assert(bitCost <= scaleLog); + optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; + optPtr->litSum += optPtr->litFreq[lit]; + } } + + { unsigned ll; + FSE_CState_t llstate; + FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable); + optPtr->litLengthSum = 0; + for (ll=0; ll<=MaxLL; ll++) { + U32 const scaleLog = 10; /* scale to 1K */ + U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll); + assert(bitCost < scaleLog); + optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; + optPtr->litLengthSum += optPtr->litLengthFreq[ll]; + } } + + { unsigned ml; + FSE_CState_t mlstate; + FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable); + optPtr->matchLengthSum = 0; + for (ml=0; ml<=MaxML; ml++) { + U32 const scaleLog = 10; + U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml); + assert(bitCost < scaleLog); + optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; + optPtr->matchLengthSum += optPtr->matchLengthFreq[ml]; + } } + + { unsigned of; + FSE_CState_t ofstate; + FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable); + optPtr->offCodeSum = 0; + for (of=0; of<=MaxOff; of++) { + U32 const scaleLog = 10; + U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of); + assert(bitCost < scaleLog); + optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; + optPtr->offCodeSum += optPtr->offCodeFreq[of]; + } } + + } else { /* not a dictionary */ + + assert(optPtr->litFreq != NULL); + if (compressedLiterals) { + unsigned lit = MaxLit; + HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */ + optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); + } + + { unsigned ll; + for (ll=0; ll<=MaxLL; ll++) + optPtr->litLengthFreq[ll] = 1; + } + optPtr->litLengthSum = MaxLL+1; + + { unsigned ml; + for (ml=0; ml<=MaxML; ml++) + optPtr->matchLengthFreq[ml] = 1; + } + optPtr->matchLengthSum = MaxML+1; + + { unsigned of; + for (of=0; of<=MaxOff; of++) + optPtr->offCodeFreq[of] = 1; + } + optPtr->offCodeSum = MaxOff+1; + + } + + } else { /* new block : re-use previous statistics, scaled down */ + + if (compressedLiterals) + optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); + optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0); + optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0); + optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0); + } + + ZSTD_setBasePrices(optPtr, optLevel); +} + +/* ZSTD_rawLiteralsCost() : + * price of literals (only) in specified segment (which length can be 0). + * does not include price of literalLength symbol */ +static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength, + const optState_t* const optPtr, + int optLevel) +{ + if (litLength == 0) return 0; + + if (!ZSTD_compressedLiterals(optPtr)) + return (litLength << 3) * BITCOST_MULTIPLIER; /* Uncompressed - 8 bytes per literal. */ + + if (optPtr->priceType == zop_predef) + return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */ + + /* dynamic statistics */ + { U32 price = litLength * optPtr->litSumBasePrice; + U32 u; + for (u=0; u < litLength; u++) { + assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice); /* literal cost should never be negative */ + price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel); + } + return price; + } +} + +/* ZSTD_litLengthPrice() : + * cost of literalLength symbol */ +static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel) +{ + if (optPtr->priceType == zop_predef) return WEIGHT(litLength, optLevel); + + /* dynamic statistics */ + { U32 const llCode = ZSTD_LLcode(litLength); + return (LL_bits[llCode] * BITCOST_MULTIPLIER) + + optPtr->litLengthSumBasePrice + - WEIGHT(optPtr->litLengthFreq[llCode], optLevel); + } +} + +/* ZSTD_getMatchPrice() : + * Provides the cost of the match part (offset + matchLength) of a sequence + * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence. + * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */ +FORCE_INLINE_TEMPLATE U32 +ZSTD_getMatchPrice(U32 const offset, + U32 const matchLength, + const optState_t* const optPtr, + int const optLevel) +{ + U32 price; + U32 const offCode = ZSTD_highbit32(offset+1); + U32 const mlBase = matchLength - MINMATCH; + assert(matchLength >= MINMATCH); + + if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */ + return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER); + + /* dynamic statistics */ + price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel)); + if ((optLevel<2) /*static*/ && offCode >= 20) + price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */ + + /* match Length */ + { U32 const mlCode = ZSTD_MLcode(mlBase); + price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel)); + } + + price += BITCOST_MULTIPLIER / 5; /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */ + + DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price); + return price; +} + +/* ZSTD_updateStats() : + * assumption : literals + litLengtn <= iend */ +static void ZSTD_updateStats(optState_t* const optPtr, + U32 litLength, const BYTE* literals, + U32 offsetCode, U32 matchLength) +{ + /* literals */ + if (ZSTD_compressedLiterals(optPtr)) { + U32 u; + for (u=0; u < litLength; u++) + optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD; + optPtr->litSum += litLength*ZSTD_LITFREQ_ADD; + } + + /* literal Length */ + { U32 const llCode = ZSTD_LLcode(litLength); + optPtr->litLengthFreq[llCode]++; + optPtr->litLengthSum++; + } + + /* match offset code (0-2=>repCode; 3+=>offset+2) */ + { U32 const offCode = ZSTD_highbit32(offsetCode+1); + assert(offCode <= MaxOff); + optPtr->offCodeFreq[offCode]++; + optPtr->offCodeSum++; + } + + /* match Length */ + { U32 const mlBase = matchLength - MINMATCH; + U32 const mlCode = ZSTD_MLcode(mlBase); + optPtr->matchLengthFreq[mlCode]++; + optPtr->matchLengthSum++; + } +} + + +/* ZSTD_readMINMATCH() : + * function safe only for comparisons + * assumption : memPtr must be at least 4 bytes before end of buffer */ +MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length) +{ + switch (length) + { + default : + case 4 : return MEM_read32(memPtr); + case 3 : if (MEM_isLittleEndian()) + return MEM_read32(memPtr)<<8; + else + return MEM_read32(memPtr)>>8; + } +} + + +/* Update hashTable3 up to ip (excluded) + Assumption : always within prefix (i.e. not within extDict) */ +static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, + U32* nextToUpdate3, + const BYTE* const ip) +{ + U32* const hashTable3 = ms->hashTable3; + U32 const hashLog3 = ms->hashLog3; + const BYTE* const base = ms->window.base; + U32 idx = *nextToUpdate3; + U32 const target = (U32)(ip - base); + size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3); + assert(hashLog3 > 0); + + while(idx < target) { + hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx; + idx++; + } + + *nextToUpdate3 = target; + return hashTable3[hash3]; +} + + +/*-************************************* +* Binary Tree search +***************************************/ +/** ZSTD_insertBt1() : add one or multiple positions to tree. + * ip : assumed <= iend-8 . + * @return : nb of positions added */ +static U32 ZSTD_insertBt1( + ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iend, + U32 const mls, const int extDict) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hashLog = cParams->hashLog; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + U32 matchIndex = hashTable[h]; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* match; + const U32 curr = (U32)(ip-base); + const U32 btLow = btMask >= curr ? 0 : curr - btMask; + U32* smallerPtr = bt + 2*(curr&btMask); + U32* largerPtr = smallerPtr + 1; + U32 dummy32; /* to be nullified at the end */ + U32 const windowLow = ms->window.lowLimit; + U32 matchEndIdx = curr+8+1; + size_t bestLength = 8; + U32 nbCompares = 1U << cParams->searchLog; +#ifdef ZSTD_C_PREDICT + U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0); + U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1); + predictedSmall += (predictedSmall>0); + predictedLarge += (predictedLarge>0); +#endif /* ZSTD_C_PREDICT */ + + DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr); + + assert(ip <= iend-8); /* required for h calculation */ + hashTable[h] = curr; /* Update Hash Table */ + + assert(windowLow > 0); + while (nbCompares-- && (matchIndex >= windowLow)) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + assert(matchIndex < curr); + +#ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */ + const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */ + if (matchIndex == predictedSmall) { + /* no need to check length, result known */ + *smallerPtr = matchIndex; + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ + matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + predictedSmall = predictPtr[1] + (predictPtr[1]>0); + continue; + } + if (matchIndex == predictedLarge) { + *largerPtr = matchIndex; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + predictedLarge = predictPtr[0] + (predictPtr[0]>0); + continue; + } +#endif + + if (!extDict || (matchIndex+matchLength >= dictLimit)) { + assert(matchIndex+matchLength >= dictLimit); /* might be wrong if actually extDict */ + match = base + matchIndex; + matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ + } + + if (matchLength > bestLength) { + bestLength = matchLength; + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + } + + if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ + break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */ + } + + if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */ + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */ + smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */ + matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; + { U32 positions = 0; + if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */ + assert(matchEndIdx > curr + 8); + return MAX(positions, matchEndIdx - (curr + 8)); + } +} + +FORCE_INLINE_TEMPLATE +void ZSTD_updateTree_internal( + ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iend, + const U32 mls, const ZSTD_dictMode_e dictMode) +{ + const BYTE* const base = ms->window.base; + U32 const target = (U32)(ip - base); + U32 idx = ms->nextToUpdate; + DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)", + idx, target, dictMode); + + while(idx < target) { + U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict); + assert(idx < (U32)(idx + forward)); + idx += forward; + } + assert((size_t)(ip - base) <= (size_t)(U32)(-1)); + assert((size_t)(iend - base) <= (size_t)(U32)(-1)); + ms->nextToUpdate = target; +} + +void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) { + ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict); +} + +FORCE_INLINE_TEMPLATE +U32 ZSTD_insertBtAndGetAllMatches ( + ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */ + ZSTD_matchState_t* ms, + U32* nextToUpdate3, + const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode, + const U32 rep[ZSTD_REP_NUM], + U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */ + const U32 lengthToBeat, + U32 const mls /* template */) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); + const BYTE* const base = ms->window.base; + U32 const curr = (U32)(ip-base); + U32 const hashLog = cParams->hashLog; + U32 const minMatch = (mls==3) ? 3 : 4; + U32* const hashTable = ms->hashTable; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32 matchIndex = hashTable[h]; + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask= (1U << btLog) - 1; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const dictBase = ms->window.dictBase; + U32 const dictLimit = ms->window.dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + U32 const btLow = (btMask >= curr) ? 0 : curr - btMask; + U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog); + U32 const matchLow = windowLow ? windowLow : 1; + U32* smallerPtr = bt + 2*(curr&btMask); + U32* largerPtr = bt + 2*(curr&btMask) + 1; + U32 matchEndIdx = curr+8+1; /* farthest referenced position of any match => detects repetitive patterns */ + U32 dummy32; /* to be nullified at the end */ + U32 mnum = 0; + U32 nbCompares = 1U << cParams->searchLog; + + const ZSTD_matchState_t* dms = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL; + const ZSTD_compressionParameters* const dmsCParams = + dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL; + const BYTE* const dmsBase = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL; + const BYTE* const dmsEnd = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL; + U32 const dmsHighLimit = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0; + U32 const dmsLowLimit = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0; + U32 const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0; + U32 const dmsHashLog = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog; + U32 const dmsBtLog = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog; + U32 const dmsBtMask = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0; + U32 const dmsBtLow = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit; + + size_t bestLength = lengthToBeat-1; + DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr); + + /* check repCode */ + assert(ll0 <= 1); /* necessarily 1 or 0 */ + { U32 const lastR = ZSTD_REP_NUM + ll0; + U32 repCode; + for (repCode = ll0; repCode < lastR; repCode++) { + U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; + U32 const repIndex = curr - repOffset; + U32 repLen = 0; + assert(curr >= dictLimit); + if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) { /* equivalent to `curr > repIndex >= dictLimit` */ + /* We must validate the repcode offset because when we're using a dictionary the + * valid offset range shrinks when the dictionary goes out of bounds. + */ + if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) { + repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch; + } + } else { /* repIndex < dictLimit || repIndex >= curr */ + const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ? + dmsBase + repIndex - dmsIndexDelta : + dictBase + repIndex; + assert(curr >= windowLow); + if ( dictMode == ZSTD_extDict + && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow) /* equivalent to `curr > repIndex >= windowLow` */ + & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */) + && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { + repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch; + } + if (dictMode == ZSTD_dictMatchState + && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `curr > repIndex >= dmsLowLimit` */ + & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */ + && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { + repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch; + } } + /* save longer solution */ + if (repLen > bestLength) { + DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u", + repCode, ll0, repOffset, repLen); + bestLength = repLen; + matches[mnum].off = repCode - ll0; + matches[mnum].len = (U32)repLen; + mnum++; + if ( (repLen > sufficient_len) + | (ip+repLen == iLimit) ) { /* best possible */ + return mnum; + } } } } + + /* HC3 match finder */ + if ((mls == 3) /*static*/ && (bestLength < mls)) { + U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip); + if ((matchIndex3 >= matchLow) + & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) { + size_t mlen; + if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) { + const BYTE* const match = base + matchIndex3; + mlen = ZSTD_count(ip, match, iLimit); + } else { + const BYTE* const match = dictBase + matchIndex3; + mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart); + } + + /* save best solution */ + if (mlen >= mls /* == 3 > bestLength */) { + DEBUGLOG(8, "found small match with hlog3, of length %u", + (U32)mlen); + bestLength = mlen; + assert(curr > matchIndex3); + assert(mnum==0); /* no prior solution */ + matches[0].off = (curr - matchIndex3) + ZSTD_REP_MOVE; + matches[0].len = (U32)mlen; + mnum = 1; + if ( (mlen > sufficient_len) | + (ip+mlen == iLimit) ) { /* best possible length */ + ms->nextToUpdate = curr+1; /* skip insertion */ + return 1; + } } } + /* no dictMatchState lookup: dicts don't have a populated HC3 table */ + } + + hashTable[h] = curr; /* Update Hash Table */ + + while (nbCompares-- && (matchIndex >= matchLow)) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + const BYTE* match; + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + assert(curr > matchIndex); + + if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) { + assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */ + match = base + matchIndex; + if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */ + matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit); + } else { + match = dictBase + matchIndex; + assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */ + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* prepare for match[matchLength] read */ + } + + if (matchLength > bestLength) { + DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)", + (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE); + assert(matchEndIdx > matchIndex); + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + bestLength = matchLength; + matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE; + matches[mnum].len = (U32)matchLength; + mnum++; + if ( (matchLength > ZSTD_OPT_NUM) + | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) { + if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */ + break; /* drop, to preserve bt consistency (miss a little bit of compression) */ + } + } + + if (match[matchLength] < ip[matchLength]) { + /* match smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new candidate => larger than match, which was smaller than current */ + matchIndex = nextPtr[1]; /* new matchIndex, larger than previous, closer to current */ + } else { + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; + + if (dictMode == ZSTD_dictMatchState && nbCompares) { + size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls); + U32 dictMatchIndex = dms->hashTable[dmsH]; + const U32* const dmsBt = dms->chainTable; + commonLengthSmaller = commonLengthLarger = 0; + while (nbCompares-- && (dictMatchIndex > dmsLowLimit)) { + const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + const BYTE* match = dmsBase + dictMatchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart); + if (dictMatchIndex+matchLength >= dmsHighLimit) + match = base + dictMatchIndex + dmsIndexDelta; /* to prepare for next usage of match[matchLength] */ + + if (matchLength > bestLength) { + matchIndex = dictMatchIndex + dmsIndexDelta; + DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)", + (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE); + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + bestLength = matchLength; + matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE; + matches[mnum].len = (U32)matchLength; + mnum++; + if ( (matchLength > ZSTD_OPT_NUM) + | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) { + break; /* drop, to guarantee consistency (miss a little bit of compression) */ + } + } + + if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */ + if (match[matchLength] < ip[matchLength]) { + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } else { + /* match is larger than current */ + commonLengthLarger = matchLength; + dictMatchIndex = nextPtr[0]; + } + } + } + + assert(matchEndIdx > curr+8); + ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ + return mnum; +} + + +FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches ( + ZSTD_match_t* matches, /* store result (match found, increasing size) in this table */ + ZSTD_matchState_t* ms, + U32* nextToUpdate3, + const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode, + const U32 rep[ZSTD_REP_NUM], + U32 const ll0, + U32 const lengthToBeat) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32 const matchLengthSearch = cParams->minMatch; + DEBUGLOG(8, "ZSTD_BtGetAllMatches"); + if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */ + ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode); + switch(matchLengthSearch) + { + case 3 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 3); + default : + case 4 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 4); + case 5 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 5); + case 7 : + case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6); + } +} + +/************************* +* LDM helper functions * +*************************/ + +/* Struct containing info needed to make decision about ldm inclusion */ +typedef struct { + rawSeqStore_t seqStore; /* External match candidates store for this block */ + U32 startPosInBlock; /* Start position of the current match candidate */ + U32 endPosInBlock; /* End position of the current match candidate */ + U32 offset; /* Offset of the match candidate */ +} ZSTD_optLdm_t; + +/* ZSTD_optLdm_skipRawSeqStoreBytes(): + * Moves forward in rawSeqStore by nbBytes, which will update the fields 'pos' and 'posInSequence'. + */ +static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) { + U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes); + while (currPos && rawSeqStore->pos < rawSeqStore->size) { + rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos]; + if (currPos >= currSeq.litLength + currSeq.matchLength) { + currPos -= currSeq.litLength + currSeq.matchLength; + rawSeqStore->pos++; + } else { + rawSeqStore->posInSequence = currPos; + break; + } + } + if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) { + rawSeqStore->posInSequence = 0; + } +} + +/* ZSTD_opt_getNextMatchAndUpdateSeqStore(): + * Calculates the beginning and end of the next match in the current block. + * Updates 'pos' and 'posInSequence' of the ldmSeqStore. + */ +static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock, + U32 blockBytesRemaining) { + rawSeq currSeq; + U32 currBlockEndPos; + U32 literalsBytesRemaining; + U32 matchBytesRemaining; + + /* Setting match end position to MAX to ensure we never use an LDM during this block */ + if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) { + optLdm->startPosInBlock = UINT_MAX; + optLdm->endPosInBlock = UINT_MAX; + return; + } + /* Calculate appropriate bytes left in matchLength and litLength after adjusting + based on ldmSeqStore->posInSequence */ + currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos]; + assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength); + currBlockEndPos = currPosInBlock + blockBytesRemaining; + literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ? + currSeq.litLength - (U32)optLdm->seqStore.posInSequence : + 0; + matchBytesRemaining = (literalsBytesRemaining == 0) ? + currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) : + currSeq.matchLength; + + /* If there are more literal bytes than bytes remaining in block, no ldm is possible */ + if (literalsBytesRemaining >= blockBytesRemaining) { + optLdm->startPosInBlock = UINT_MAX; + optLdm->endPosInBlock = UINT_MAX; + ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining); + return; + } + + /* Matches may be < MINMATCH by this process. In that case, we will reject them + when we are deciding whether or not to add the ldm */ + optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining; + optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining; + optLdm->offset = currSeq.offset; + + if (optLdm->endPosInBlock > currBlockEndPos) { + /* Match ends after the block ends, we can't use the whole match */ + optLdm->endPosInBlock = currBlockEndPos; + ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock); + } else { + /* Consume nb of bytes equal to size of sequence left */ + ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining); + } +} + +/* ZSTD_optLdm_maybeAddMatch(): + * Adds a match if it's long enough, based on it's 'matchStartPosInBlock' + * and 'matchEndPosInBlock', into 'matches'. Maintains the correct ordering of 'matches' + */ +static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches, + ZSTD_optLdm_t* optLdm, U32 currPosInBlock) { + U32 posDiff = currPosInBlock - optLdm->startPosInBlock; + /* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */ + U32 candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff; + U32 candidateOffCode = optLdm->offset + ZSTD_REP_MOVE; + + /* Ensure that current block position is not outside of the match */ + if (currPosInBlock < optLdm->startPosInBlock + || currPosInBlock >= optLdm->endPosInBlock + || candidateMatchLength < MINMATCH) { + return; + } + + if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) { + DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u", + candidateOffCode, candidateMatchLength, currPosInBlock); + matches[*nbMatches].len = candidateMatchLength; + matches[*nbMatches].off = candidateOffCode; + (*nbMatches)++; + } +} + +/* ZSTD_optLdm_processMatchCandidate(): + * Wrapper function to update ldm seq store and call ldm functions as necessary. + */ +static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_t* matches, U32* nbMatches, + U32 currPosInBlock, U32 remainingBytes) { + if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) { + return; + } + + if (currPosInBlock >= optLdm->endPosInBlock) { + if (currPosInBlock > optLdm->endPosInBlock) { + /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily + * at the end of a match from the ldm seq store, and will often be some bytes + * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots" + */ + U32 posOvershoot = currPosInBlock - optLdm->endPosInBlock; + ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot); + } + ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes); + } + ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock); +} + +/*-******************************* +* Optimal parser +*********************************/ + + +static U32 ZSTD_totalLen(ZSTD_optimal_t sol) +{ + return sol.litlen + sol.mlen; +} + +#if 0 /* debug */ + +static void +listStats(const U32* table, int lastEltID) +{ + int const nbElts = lastEltID + 1; + int enb; + for (enb=0; enb < nbElts; enb++) { + (void)table; + /* RAWLOG(2, "%3i:%3i, ", enb, table[enb]); */ + RAWLOG(2, "%4i,", table[enb]); + } + RAWLOG(2, " \n"); +} + +#endif + +FORCE_INLINE_TEMPLATE size_t +ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, + seqStore_t* seqStore, + U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize, + const int optLevel, + const ZSTD_dictMode_e dictMode) +{ + optState_t* const optStatePtr = &ms->opt; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ms->window.base; + const BYTE* const prefixStart = base + ms->window.dictLimit; + const ZSTD_compressionParameters* const cParams = &ms->cParams; + + U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); + U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4; + U32 nextToUpdate3 = ms->nextToUpdate; + + ZSTD_optimal_t* const opt = optStatePtr->priceTable; + ZSTD_match_t* const matches = optStatePtr->matchTable; + ZSTD_optimal_t lastSequence; + ZSTD_optLdm_t optLdm; + + optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore; + optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0; + ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip)); + + /* init */ + DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u", + (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate); + assert(optLevel <= 2); + ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel); + ip += (ip==prefixStart); + + /* Match Loop */ + while (ip < ilimit) { + U32 cur, last_pos = 0; + + /* find first match */ + { U32 const litlen = (U32)(ip - anchor); + U32 const ll0 = !litlen; + U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch); + ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches, + (U32)(ip-istart), (U32)(iend - ip)); + if (!nbMatches) { ip++; continue; } + + /* initialize opt[0] */ + { U32 i ; for (i=0; i immediate encoding */ + { U32 const maxML = matches[nbMatches-1].len; + U32 const maxOffset = matches[nbMatches-1].off; + DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series", + nbMatches, maxML, maxOffset, (U32)(ip-prefixStart)); + + if (maxML > sufficient_len) { + lastSequence.litlen = litlen; + lastSequence.mlen = maxML; + lastSequence.off = maxOffset; + DEBUGLOG(6, "large match (%u>%u), immediate encoding", + maxML, sufficient_len); + cur = 0; + last_pos = ZSTD_totalLen(lastSequence); + goto _shortestPath; + } } + + /* set prices for first matches starting position == 0 */ + { U32 const literalsPrice = opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel); + U32 pos; + U32 matchNb; + for (pos = 1; pos < minMatch; pos++) { + opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */ + } + for (matchNb = 0; matchNb < nbMatches; matchNb++) { + U32 const offset = matches[matchNb].off; + U32 const end = matches[matchNb].len; + for ( ; pos <= end ; pos++ ) { + U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel); + U32 const sequencePrice = literalsPrice + matchPrice; + DEBUGLOG(7, "rPos:%u => set initial price : %.2f", + pos, ZSTD_fCost(sequencePrice)); + opt[pos].mlen = pos; + opt[pos].off = offset; + opt[pos].litlen = litlen; + opt[pos].price = sequencePrice; + } } + last_pos = pos-1; + } + } + + /* check further positions */ + for (cur = 1; cur <= last_pos; cur++) { + const BYTE* const inr = ip + cur; + assert(cur < ZSTD_OPT_NUM); + DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur) + + /* Fix current position with one literal if cheaper */ + { U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1; + int const price = opt[cur-1].price + + ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel) + + ZSTD_litLengthPrice(litlen, optStatePtr, optLevel) + - ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel); + assert(price < 1000000000); /* overflow check */ + if (price <= opt[cur].price) { + DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)", + inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen, + opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]); + opt[cur].mlen = 0; + opt[cur].off = 0; + opt[cur].litlen = litlen; + opt[cur].price = price; + } else { + DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)", + inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), + opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]); + } + } + + /* Set the repcodes of the current position. We must do it here + * because we rely on the repcodes of the 2nd to last sequence being + * correct to set the next chunks repcodes during the backward + * traversal. + */ + ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t)); + assert(cur >= opt[cur].mlen); + if (opt[cur].mlen != 0) { + U32 const prev = cur - opt[cur].mlen; + repcodes_t newReps = ZSTD_updateRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0); + ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t)); + } else { + ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t)); + } + + /* last match must start at a minimum distance of 8 from oend */ + if (inr > ilimit) continue; + + if (cur == last_pos) break; + + if ( (optLevel==0) /*static_test*/ + && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) { + DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1); + continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */ + } + + { U32 const ll0 = (opt[cur].mlen != 0); + U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0; + U32 const previousPrice = opt[cur].price; + U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel); + U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch); + U32 matchNb; + + ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches, + (U32)(inr-istart), (U32)(iend-inr)); + + if (!nbMatches) { + DEBUGLOG(7, "rPos:%u : no match found", cur); + continue; + } + + { U32 const maxML = matches[nbMatches-1].len; + DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u", + inr-istart, cur, nbMatches, maxML); + + if ( (maxML > sufficient_len) + || (cur + maxML >= ZSTD_OPT_NUM) ) { + lastSequence.mlen = maxML; + lastSequence.off = matches[nbMatches-1].off; + lastSequence.litlen = litlen; + cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0; /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */ + last_pos = cur + ZSTD_totalLen(lastSequence); + if (cur > ZSTD_OPT_NUM) cur = 0; /* underflow => first match */ + goto _shortestPath; + } } + + /* set prices using matches found at position == cur */ + for (matchNb = 0; matchNb < nbMatches; matchNb++) { + U32 const offset = matches[matchNb].off; + U32 const lastML = matches[matchNb].len; + U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch; + U32 mlen; + + DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u", + matchNb, matches[matchNb].off, lastML, litlen); + + for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */ + U32 const pos = cur + mlen; + int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel); + + if ((pos > last_pos) || (price < opt[pos].price)) { + DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)", + pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price)); + while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } /* fill empty positions */ + opt[pos].mlen = mlen; + opt[pos].off = offset; + opt[pos].litlen = litlen; + opt[pos].price = price; + } else { + DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)", + pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price)); + if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */ + } + } } } + } /* for (cur = 1; cur <= last_pos; cur++) */ + + lastSequence = opt[last_pos]; + cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0; /* single sequence, and it starts before `ip` */ + assert(cur < ZSTD_OPT_NUM); /* control overflow*/ + +_shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ + assert(opt[0].mlen == 0); + + /* Set the next chunk's repcodes based on the repcodes of the beginning + * of the last match, and the last sequence. This avoids us having to + * update them while traversing the sequences. + */ + if (lastSequence.mlen != 0) { + repcodes_t reps = ZSTD_updateRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0); + ZSTD_memcpy(rep, &reps, sizeof(reps)); + } else { + ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t)); + } + + { U32 const storeEnd = cur + 1; + U32 storeStart = storeEnd; + U32 seqPos = cur; + + DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)", + last_pos, cur); (void)last_pos; + assert(storeEnd < ZSTD_OPT_NUM); + DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)", + storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off); + opt[storeEnd] = lastSequence; + while (seqPos > 0) { + U32 const backDist = ZSTD_totalLen(opt[seqPos]); + storeStart--; + DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)", + seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off); + opt[storeStart] = opt[seqPos]; + seqPos = (seqPos > backDist) ? seqPos - backDist : 0; + } + + /* save sequences */ + DEBUGLOG(6, "sending selected sequences into seqStore") + { U32 storePos; + for (storePos=storeStart; storePos <= storeEnd; storePos++) { + U32 const llen = opt[storePos].litlen; + U32 const mlen = opt[storePos].mlen; + U32 const offCode = opt[storePos].off; + U32 const advance = llen + mlen; + DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u", + anchor - istart, (unsigned)llen, (unsigned)mlen); + + if (mlen==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */ + assert(storePos == storeEnd); /* must be last sequence */ + ip = anchor + llen; /* last "sequence" is a bunch of literals => don't progress anchor */ + continue; /* will finish */ + } + + assert(anchor + llen <= iend); + ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen); + ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH); + anchor += advance; + ip = anchor; + } } + ZSTD_setBasePrices(optStatePtr, optLevel); + } + } /* while (ip < ilimit) */ + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + + +size_t ZSTD_compressBlock_btopt( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_compressBlock_btopt"); + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_noDict); +} + + +/* used in 2-pass strategy */ +static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus) +{ + U32 s, sum=0; + assert(ZSTD_FREQ_DIV+bonus >= 0); + for (s=0; slitSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0); + optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0); + optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0); + optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0); +} + +/* ZSTD_initStats_ultra(): + * make a first compression pass, just to seed stats with more accurate starting values. + * only works on first block, with no dictionary and no ldm. + * this function cannot error, hence its contract must be respected. + */ +static void +ZSTD_initStats_ultra(ZSTD_matchState_t* ms, + seqStore_t* seqStore, + U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */ + ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep)); + + DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize); + assert(ms->opt.litLengthSum == 0); /* first block */ + assert(seqStore->sequences == seqStore->sequencesStart); /* no ldm */ + assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */ + assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */ + + ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/ + + /* invalidate first scan from history */ + ZSTD_resetSeqStore(seqStore); + ms->window.base -= srcSize; + ms->window.dictLimit += (U32)srcSize; + ms->window.lowLimit = ms->window.dictLimit; + ms->nextToUpdate = ms->window.dictLimit; + + /* re-inforce weight of collected statistics */ + ZSTD_upscaleStats(&ms->opt); +} + +size_t ZSTD_compressBlock_btultra( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize); + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_btultra2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + U32 const curr = (U32)((const BYTE*)src - ms->window.base); + DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize); + + /* 2-pass strategy: + * this strategy makes a first pass over first block to collect statistics + * and seed next round's statistics with it. + * After 1st pass, function forgets everything, and starts a new block. + * Consequently, this can only work if no data has been previously loaded in tables, + * aka, no dictionary, no prefix, no ldm preprocessing. + * The compression ratio gain is generally small (~0.5% on first block), + * the cost is 2x cpu time on first block. */ + assert(srcSize <= ZSTD_BLOCKSIZE_MAX); + if ( (ms->opt.litLengthSum==0) /* first block */ + && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */ + && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */ + && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */ + && (srcSize > ZSTD_PREDEF_THRESHOLD) + ) { + ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize); + } + + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_btopt_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_btultra_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_btopt_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_extDict); +} + +size_t ZSTD_compressBlock_btultra_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict); +} + +/* note : no btultra2 variant for extDict nor dictMatchState, + * because btultra2 is not meant to work with dictionaries + * and is only specific for the first block (no prefix) */ +/**** ended inlining compress/zstd_opt.c ****/ +#ifdef ZSTD_MULTITHREAD +/**** start inlining compress/zstdmt_compress.c ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* ====== Compiler specifics ====== */ +#if defined(_MSC_VER) +# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */ +#endif + + +/* ====== Constants ====== */ +#define ZSTDMT_OVERLAPLOG_DEFAULT 0 + + +/* ====== Dependencies ====== */ +/**** skipping file: ../common/zstd_deps.h ****/ +/**** skipping file: ../common/mem.h ****/ +/**** skipping file: ../common/pool.h ****/ +/**** skipping file: ../common/threading.h ****/ +/**** skipping file: zstd_compress_internal.h ****/ +/**** skipping file: zstd_ldm.h ****/ +/**** skipping file: zstdmt_compress.h ****/ + +/* Guards code to support resizing the SeqPool. + * We will want to resize the SeqPool to save memory in the future. + * Until then, comment the code out since it is unused. + */ +#define ZSTD_RESIZE_SEQPOOL 0 + +/* ====== Debug ====== */ +#if defined(DEBUGLEVEL) && (DEBUGLEVEL>=2) \ + && !defined(_MSC_VER) \ + && !defined(__MINGW32__) + +# include +# include +# include + +# define DEBUG_PRINTHEX(l,p,n) { \ + unsigned debug_u; \ + for (debug_u=0; debug_u<(n); debug_u++) \ + RAWLOG(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \ + RAWLOG(l, " \n"); \ +} + +static unsigned long long GetCurrentClockTimeMicroseconds(void) +{ + static clock_t _ticksPerSecond = 0; + if (_ticksPerSecond <= 0) _ticksPerSecond = sysconf(_SC_CLK_TCK); + + { struct tms junk; clock_t newTicks = (clock_t) times(&junk); + return ((((unsigned long long)newTicks)*(1000000))/_ticksPerSecond); +} } + +#define MUTEX_WAIT_TIME_DLEVEL 6 +#define ZSTD_PTHREAD_MUTEX_LOCK(mutex) { \ + if (DEBUGLEVEL >= MUTEX_WAIT_TIME_DLEVEL) { \ + unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \ + ZSTD_pthread_mutex_lock(mutex); \ + { unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \ + unsigned long long const elapsedTime = (afterTime-beforeTime); \ + if (elapsedTime > 1000) { /* or whatever threshold you like; I'm using 1 millisecond here */ \ + DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread took %llu microseconds to acquire mutex %s \n", \ + elapsedTime, #mutex); \ + } } \ + } else { \ + ZSTD_pthread_mutex_lock(mutex); \ + } \ +} + +#else + +# define ZSTD_PTHREAD_MUTEX_LOCK(m) ZSTD_pthread_mutex_lock(m) +# define DEBUG_PRINTHEX(l,p,n) {} + +#endif + + +/* ===== Buffer Pool ===== */ +/* a single Buffer Pool can be invoked from multiple threads in parallel */ + +typedef struct buffer_s { + void* start; + size_t capacity; +} buffer_t; + +static const buffer_t g_nullBuffer = { NULL, 0 }; + +typedef struct ZSTDMT_bufferPool_s { + ZSTD_pthread_mutex_t poolMutex; + size_t bufferSize; + unsigned totalBuffers; + unsigned nbBuffers; + ZSTD_customMem cMem; + buffer_t bTable[1]; /* variable size */ +} ZSTDMT_bufferPool; + +static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbWorkers, ZSTD_customMem cMem) +{ + unsigned const maxNbBuffers = 2*nbWorkers + 3; + ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_customCalloc( + sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t), cMem); + if (bufPool==NULL) return NULL; + if (ZSTD_pthread_mutex_init(&bufPool->poolMutex, NULL)) { + ZSTD_customFree(bufPool, cMem); + return NULL; + } + bufPool->bufferSize = 64 KB; + bufPool->totalBuffers = maxNbBuffers; + bufPool->nbBuffers = 0; + bufPool->cMem = cMem; + return bufPool; +} + +static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool) +{ + unsigned u; + DEBUGLOG(3, "ZSTDMT_freeBufferPool (address:%08X)", (U32)(size_t)bufPool); + if (!bufPool) return; /* compatibility with free on NULL */ + for (u=0; utotalBuffers; u++) { + DEBUGLOG(4, "free buffer %2u (address:%08X)", u, (U32)(size_t)bufPool->bTable[u].start); + ZSTD_customFree(bufPool->bTable[u].start, bufPool->cMem); + } + ZSTD_pthread_mutex_destroy(&bufPool->poolMutex); + ZSTD_customFree(bufPool, bufPool->cMem); +} + +/* only works at initialization, not during compression */ +static size_t ZSTDMT_sizeof_bufferPool(ZSTDMT_bufferPool* bufPool) +{ + size_t const poolSize = sizeof(*bufPool) + + (bufPool->totalBuffers - 1) * sizeof(buffer_t); + unsigned u; + size_t totalBufferSize = 0; + ZSTD_pthread_mutex_lock(&bufPool->poolMutex); + for (u=0; utotalBuffers; u++) + totalBufferSize += bufPool->bTable[u].capacity; + ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); + + return poolSize + totalBufferSize; +} + +/* ZSTDMT_setBufferSize() : + * all future buffers provided by this buffer pool will have _at least_ this size + * note : it's better for all buffers to have same size, + * as they become freely interchangeable, reducing malloc/free usages and memory fragmentation */ +static void ZSTDMT_setBufferSize(ZSTDMT_bufferPool* const bufPool, size_t const bSize) +{ + ZSTD_pthread_mutex_lock(&bufPool->poolMutex); + DEBUGLOG(4, "ZSTDMT_setBufferSize: bSize = %u", (U32)bSize); + bufPool->bufferSize = bSize; + ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); +} + + +static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool, U32 nbWorkers) +{ + unsigned const maxNbBuffers = 2*nbWorkers + 3; + if (srcBufPool==NULL) return NULL; + if (srcBufPool->totalBuffers >= maxNbBuffers) /* good enough */ + return srcBufPool; + /* need a larger buffer pool */ + { ZSTD_customMem const cMem = srcBufPool->cMem; + size_t const bSize = srcBufPool->bufferSize; /* forward parameters */ + ZSTDMT_bufferPool* newBufPool; + ZSTDMT_freeBufferPool(srcBufPool); + newBufPool = ZSTDMT_createBufferPool(nbWorkers, cMem); + if (newBufPool==NULL) return newBufPool; + ZSTDMT_setBufferSize(newBufPool, bSize); + return newBufPool; + } +} + +/** ZSTDMT_getBuffer() : + * assumption : bufPool must be valid + * @return : a buffer, with start pointer and size + * note: allocation may fail, in this case, start==NULL and size==0 */ +static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool) +{ + size_t const bSize = bufPool->bufferSize; + DEBUGLOG(5, "ZSTDMT_getBuffer: bSize = %u", (U32)bufPool->bufferSize); + ZSTD_pthread_mutex_lock(&bufPool->poolMutex); + if (bufPool->nbBuffers) { /* try to use an existing buffer */ + buffer_t const buf = bufPool->bTable[--(bufPool->nbBuffers)]; + size_t const availBufferSize = buf.capacity; + bufPool->bTable[bufPool->nbBuffers] = g_nullBuffer; + if ((availBufferSize >= bSize) & ((availBufferSize>>3) <= bSize)) { + /* large enough, but not too much */ + DEBUGLOG(5, "ZSTDMT_getBuffer: provide buffer %u of size %u", + bufPool->nbBuffers, (U32)buf.capacity); + ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); + return buf; + } + /* size conditions not respected : scratch this buffer, create new one */ + DEBUGLOG(5, "ZSTDMT_getBuffer: existing buffer does not meet size conditions => freeing"); + ZSTD_customFree(buf.start, bufPool->cMem); + } + ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); + /* create new buffer */ + DEBUGLOG(5, "ZSTDMT_getBuffer: create a new buffer"); + { buffer_t buffer; + void* const start = ZSTD_customMalloc(bSize, bufPool->cMem); + buffer.start = start; /* note : start can be NULL if malloc fails ! */ + buffer.capacity = (start==NULL) ? 0 : bSize; + if (start==NULL) { + DEBUGLOG(5, "ZSTDMT_getBuffer: buffer allocation failure !!"); + } else { + DEBUGLOG(5, "ZSTDMT_getBuffer: created buffer of size %u", (U32)bSize); + } + return buffer; + } +} + +#if ZSTD_RESIZE_SEQPOOL +/** ZSTDMT_resizeBuffer() : + * assumption : bufPool must be valid + * @return : a buffer that is at least the buffer pool buffer size. + * If a reallocation happens, the data in the input buffer is copied. + */ +static buffer_t ZSTDMT_resizeBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buffer) +{ + size_t const bSize = bufPool->bufferSize; + if (buffer.capacity < bSize) { + void* const start = ZSTD_customMalloc(bSize, bufPool->cMem); + buffer_t newBuffer; + newBuffer.start = start; + newBuffer.capacity = start == NULL ? 0 : bSize; + if (start != NULL) { + assert(newBuffer.capacity >= buffer.capacity); + ZSTD_memcpy(newBuffer.start, buffer.start, buffer.capacity); + DEBUGLOG(5, "ZSTDMT_resizeBuffer: created buffer of size %u", (U32)bSize); + return newBuffer; + } + DEBUGLOG(5, "ZSTDMT_resizeBuffer: buffer allocation failure !!"); + } + return buffer; +} +#endif + +/* store buffer for later re-use, up to pool capacity */ +static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf) +{ + DEBUGLOG(5, "ZSTDMT_releaseBuffer"); + if (buf.start == NULL) return; /* compatible with release on NULL */ + ZSTD_pthread_mutex_lock(&bufPool->poolMutex); + if (bufPool->nbBuffers < bufPool->totalBuffers) { + bufPool->bTable[bufPool->nbBuffers++] = buf; /* stored for later use */ + DEBUGLOG(5, "ZSTDMT_releaseBuffer: stored buffer of size %u in slot %u", + (U32)buf.capacity, (U32)(bufPool->nbBuffers-1)); + ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); + return; + } + ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); + /* Reached bufferPool capacity (should not happen) */ + DEBUGLOG(5, "ZSTDMT_releaseBuffer: pool capacity reached => freeing "); + ZSTD_customFree(buf.start, bufPool->cMem); +} + + +/* ===== Seq Pool Wrapper ====== */ + +typedef ZSTDMT_bufferPool ZSTDMT_seqPool; + +static size_t ZSTDMT_sizeof_seqPool(ZSTDMT_seqPool* seqPool) +{ + return ZSTDMT_sizeof_bufferPool(seqPool); +} + +static rawSeqStore_t bufferToSeq(buffer_t buffer) +{ + rawSeqStore_t seq = kNullRawSeqStore; + seq.seq = (rawSeq*)buffer.start; + seq.capacity = buffer.capacity / sizeof(rawSeq); + return seq; +} + +static buffer_t seqToBuffer(rawSeqStore_t seq) +{ + buffer_t buffer; + buffer.start = seq.seq; + buffer.capacity = seq.capacity * sizeof(rawSeq); + return buffer; +} + +static rawSeqStore_t ZSTDMT_getSeq(ZSTDMT_seqPool* seqPool) +{ + if (seqPool->bufferSize == 0) { + return kNullRawSeqStore; + } + return bufferToSeq(ZSTDMT_getBuffer(seqPool)); +} + +#if ZSTD_RESIZE_SEQPOOL +static rawSeqStore_t ZSTDMT_resizeSeq(ZSTDMT_seqPool* seqPool, rawSeqStore_t seq) +{ + return bufferToSeq(ZSTDMT_resizeBuffer(seqPool, seqToBuffer(seq))); +} +#endif + +static void ZSTDMT_releaseSeq(ZSTDMT_seqPool* seqPool, rawSeqStore_t seq) +{ + ZSTDMT_releaseBuffer(seqPool, seqToBuffer(seq)); +} + +static void ZSTDMT_setNbSeq(ZSTDMT_seqPool* const seqPool, size_t const nbSeq) +{ + ZSTDMT_setBufferSize(seqPool, nbSeq * sizeof(rawSeq)); +} + +static ZSTDMT_seqPool* ZSTDMT_createSeqPool(unsigned nbWorkers, ZSTD_customMem cMem) +{ + ZSTDMT_seqPool* const seqPool = ZSTDMT_createBufferPool(nbWorkers, cMem); + if (seqPool == NULL) return NULL; + ZSTDMT_setNbSeq(seqPool, 0); + return seqPool; +} + +static void ZSTDMT_freeSeqPool(ZSTDMT_seqPool* seqPool) +{ + ZSTDMT_freeBufferPool(seqPool); +} + +static ZSTDMT_seqPool* ZSTDMT_expandSeqPool(ZSTDMT_seqPool* pool, U32 nbWorkers) +{ + return ZSTDMT_expandBufferPool(pool, nbWorkers); +} + + +/* ===== CCtx Pool ===== */ +/* a single CCtx Pool can be invoked from multiple threads in parallel */ + +typedef struct { + ZSTD_pthread_mutex_t poolMutex; + int totalCCtx; + int availCCtx; + ZSTD_customMem cMem; + ZSTD_CCtx* cctx[1]; /* variable size */ +} ZSTDMT_CCtxPool; + +/* note : all CCtx borrowed from the pool should be released back to the pool _before_ freeing the pool */ +static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool) +{ + int cid; + for (cid=0; cidtotalCCtx; cid++) + ZSTD_freeCCtx(pool->cctx[cid]); /* note : compatible with free on NULL */ + ZSTD_pthread_mutex_destroy(&pool->poolMutex); + ZSTD_customFree(pool, pool->cMem); +} + +/* ZSTDMT_createCCtxPool() : + * implies nbWorkers >= 1 , checked by caller ZSTDMT_createCCtx() */ +static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(int nbWorkers, + ZSTD_customMem cMem) +{ + ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_customCalloc( + sizeof(ZSTDMT_CCtxPool) + (nbWorkers-1)*sizeof(ZSTD_CCtx*), cMem); + assert(nbWorkers > 0); + if (!cctxPool) return NULL; + if (ZSTD_pthread_mutex_init(&cctxPool->poolMutex, NULL)) { + ZSTD_customFree(cctxPool, cMem); + return NULL; + } + cctxPool->cMem = cMem; + cctxPool->totalCCtx = nbWorkers; + cctxPool->availCCtx = 1; /* at least one cctx for single-thread mode */ + cctxPool->cctx[0] = ZSTD_createCCtx_advanced(cMem); + if (!cctxPool->cctx[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; } + DEBUGLOG(3, "cctxPool created, with %u workers", nbWorkers); + return cctxPool; +} + +static ZSTDMT_CCtxPool* ZSTDMT_expandCCtxPool(ZSTDMT_CCtxPool* srcPool, + int nbWorkers) +{ + if (srcPool==NULL) return NULL; + if (nbWorkers <= srcPool->totalCCtx) return srcPool; /* good enough */ + /* need a larger cctx pool */ + { ZSTD_customMem const cMem = srcPool->cMem; + ZSTDMT_freeCCtxPool(srcPool); + return ZSTDMT_createCCtxPool(nbWorkers, cMem); + } +} + +/* only works during initialization phase, not during compression */ +static size_t ZSTDMT_sizeof_CCtxPool(ZSTDMT_CCtxPool* cctxPool) +{ + ZSTD_pthread_mutex_lock(&cctxPool->poolMutex); + { unsigned const nbWorkers = cctxPool->totalCCtx; + size_t const poolSize = sizeof(*cctxPool) + + (nbWorkers-1) * sizeof(ZSTD_CCtx*); + unsigned u; + size_t totalCCtxSize = 0; + for (u=0; ucctx[u]); + } + ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex); + assert(nbWorkers > 0); + return poolSize + totalCCtxSize; + } +} + +static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* cctxPool) +{ + DEBUGLOG(5, "ZSTDMT_getCCtx"); + ZSTD_pthread_mutex_lock(&cctxPool->poolMutex); + if (cctxPool->availCCtx) { + cctxPool->availCCtx--; + { ZSTD_CCtx* const cctx = cctxPool->cctx[cctxPool->availCCtx]; + ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex); + return cctx; + } } + ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex); + DEBUGLOG(5, "create one more CCtx"); + return ZSTD_createCCtx_advanced(cctxPool->cMem); /* note : can be NULL, when creation fails ! */ +} + +static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx) +{ + if (cctx==NULL) return; /* compatibility with release on NULL */ + ZSTD_pthread_mutex_lock(&pool->poolMutex); + if (pool->availCCtx < pool->totalCCtx) + pool->cctx[pool->availCCtx++] = cctx; + else { + /* pool overflow : should not happen, since totalCCtx==nbWorkers */ + DEBUGLOG(4, "CCtx pool overflow : free cctx"); + ZSTD_freeCCtx(cctx); + } + ZSTD_pthread_mutex_unlock(&pool->poolMutex); +} + +/* ==== Serial State ==== */ + +typedef struct { + void const* start; + size_t size; +} range_t; + +typedef struct { + /* All variables in the struct are protected by mutex. */ + ZSTD_pthread_mutex_t mutex; + ZSTD_pthread_cond_t cond; + ZSTD_CCtx_params params; + ldmState_t ldmState; + XXH64_state_t xxhState; + unsigned nextJobID; + /* Protects ldmWindow. + * Must be acquired after the main mutex when acquiring both. + */ + ZSTD_pthread_mutex_t ldmWindowMutex; + ZSTD_pthread_cond_t ldmWindowCond; /* Signaled when ldmWindow is updated */ + ZSTD_window_t ldmWindow; /* A thread-safe copy of ldmState.window */ +} serialState_t; + +static int +ZSTDMT_serialState_reset(serialState_t* serialState, + ZSTDMT_seqPool* seqPool, + ZSTD_CCtx_params params, + size_t jobSize, + const void* dict, size_t const dictSize, + ZSTD_dictContentType_e dictContentType) +{ + /* Adjust parameters */ + if (params.ldmParams.enableLdm) { + DEBUGLOG(4, "LDM window size = %u KB", (1U << params.cParams.windowLog) >> 10); + ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams); + assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog); + assert(params.ldmParams.hashRateLog < 32); + } else { + ZSTD_memset(¶ms.ldmParams, 0, sizeof(params.ldmParams)); + } + serialState->nextJobID = 0; + if (params.fParams.checksumFlag) + XXH64_reset(&serialState->xxhState, 0); + if (params.ldmParams.enableLdm) { + ZSTD_customMem cMem = params.customMem; + unsigned const hashLog = params.ldmParams.hashLog; + size_t const hashSize = ((size_t)1 << hashLog) * sizeof(ldmEntry_t); + unsigned const bucketLog = + params.ldmParams.hashLog - params.ldmParams.bucketSizeLog; + unsigned const prevBucketLog = + serialState->params.ldmParams.hashLog - + serialState->params.ldmParams.bucketSizeLog; + size_t const numBuckets = (size_t)1 << bucketLog; + /* Size the seq pool tables */ + ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize)); + /* Reset the window */ + ZSTD_window_init(&serialState->ldmState.window); + /* Resize tables and output space if necessary. */ + if (serialState->ldmState.hashTable == NULL || serialState->params.ldmParams.hashLog < hashLog) { + ZSTD_customFree(serialState->ldmState.hashTable, cMem); + serialState->ldmState.hashTable = (ldmEntry_t*)ZSTD_customMalloc(hashSize, cMem); + } + if (serialState->ldmState.bucketOffsets == NULL || prevBucketLog < bucketLog) { + ZSTD_customFree(serialState->ldmState.bucketOffsets, cMem); + serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_customMalloc(numBuckets, cMem); + } + if (!serialState->ldmState.hashTable || !serialState->ldmState.bucketOffsets) + return 1; + /* Zero the tables */ + ZSTD_memset(serialState->ldmState.hashTable, 0, hashSize); + ZSTD_memset(serialState->ldmState.bucketOffsets, 0, numBuckets); + + /* Update window state and fill hash table with dict */ + serialState->ldmState.loadedDictEnd = 0; + if (dictSize > 0) { + if (dictContentType == ZSTD_dct_rawContent) { + BYTE const* const dictEnd = (const BYTE*)dict + dictSize; + ZSTD_window_update(&serialState->ldmState.window, dict, dictSize); + ZSTD_ldm_fillHashTable(&serialState->ldmState, (const BYTE*)dict, dictEnd, ¶ms.ldmParams); + serialState->ldmState.loadedDictEnd = params.forceWindow ? 0 : (U32)(dictEnd - serialState->ldmState.window.base); + } else { + /* don't even load anything */ + } + } + + /* Initialize serialState's copy of ldmWindow. */ + serialState->ldmWindow = serialState->ldmState.window; + } + + serialState->params = params; + serialState->params.jobSize = (U32)jobSize; + return 0; +} + +static int ZSTDMT_serialState_init(serialState_t* serialState) +{ + int initError = 0; + ZSTD_memset(serialState, 0, sizeof(*serialState)); + initError |= ZSTD_pthread_mutex_init(&serialState->mutex, NULL); + initError |= ZSTD_pthread_cond_init(&serialState->cond, NULL); + initError |= ZSTD_pthread_mutex_init(&serialState->ldmWindowMutex, NULL); + initError |= ZSTD_pthread_cond_init(&serialState->ldmWindowCond, NULL); + return initError; +} + +static void ZSTDMT_serialState_free(serialState_t* serialState) +{ + ZSTD_customMem cMem = serialState->params.customMem; + ZSTD_pthread_mutex_destroy(&serialState->mutex); + ZSTD_pthread_cond_destroy(&serialState->cond); + ZSTD_pthread_mutex_destroy(&serialState->ldmWindowMutex); + ZSTD_pthread_cond_destroy(&serialState->ldmWindowCond); + ZSTD_customFree(serialState->ldmState.hashTable, cMem); + ZSTD_customFree(serialState->ldmState.bucketOffsets, cMem); +} + +static void ZSTDMT_serialState_update(serialState_t* serialState, + ZSTD_CCtx* jobCCtx, rawSeqStore_t seqStore, + range_t src, unsigned jobID) +{ + /* Wait for our turn */ + ZSTD_PTHREAD_MUTEX_LOCK(&serialState->mutex); + while (serialState->nextJobID < jobID) { + DEBUGLOG(5, "wait for serialState->cond"); + ZSTD_pthread_cond_wait(&serialState->cond, &serialState->mutex); + } + /* A future job may error and skip our job */ + if (serialState->nextJobID == jobID) { + /* It is now our turn, do any processing necessary */ + if (serialState->params.ldmParams.enableLdm) { + size_t error; + assert(seqStore.seq != NULL && seqStore.pos == 0 && + seqStore.size == 0 && seqStore.capacity > 0); + assert(src.size <= serialState->params.jobSize); + ZSTD_window_update(&serialState->ldmState.window, src.start, src.size); + error = ZSTD_ldm_generateSequences( + &serialState->ldmState, &seqStore, + &serialState->params.ldmParams, src.start, src.size); + /* We provide a large enough buffer to never fail. */ + assert(!ZSTD_isError(error)); (void)error; + /* Update ldmWindow to match the ldmState.window and signal the main + * thread if it is waiting for a buffer. + */ + ZSTD_PTHREAD_MUTEX_LOCK(&serialState->ldmWindowMutex); + serialState->ldmWindow = serialState->ldmState.window; + ZSTD_pthread_cond_signal(&serialState->ldmWindowCond); + ZSTD_pthread_mutex_unlock(&serialState->ldmWindowMutex); + } + if (serialState->params.fParams.checksumFlag && src.size > 0) + XXH64_update(&serialState->xxhState, src.start, src.size); + } + /* Now it is the next jobs turn */ + serialState->nextJobID++; + ZSTD_pthread_cond_broadcast(&serialState->cond); + ZSTD_pthread_mutex_unlock(&serialState->mutex); + + if (seqStore.size > 0) { + size_t const err = ZSTD_referenceExternalSequences( + jobCCtx, seqStore.seq, seqStore.size); + assert(serialState->params.ldmParams.enableLdm); + assert(!ZSTD_isError(err)); + (void)err; + } +} + +static void ZSTDMT_serialState_ensureFinished(serialState_t* serialState, + unsigned jobID, size_t cSize) +{ + ZSTD_PTHREAD_MUTEX_LOCK(&serialState->mutex); + if (serialState->nextJobID <= jobID) { + assert(ZSTD_isError(cSize)); (void)cSize; + DEBUGLOG(5, "Skipping past job %u because of error", jobID); + serialState->nextJobID = jobID + 1; + ZSTD_pthread_cond_broadcast(&serialState->cond); + + ZSTD_PTHREAD_MUTEX_LOCK(&serialState->ldmWindowMutex); + ZSTD_window_clear(&serialState->ldmWindow); + ZSTD_pthread_cond_signal(&serialState->ldmWindowCond); + ZSTD_pthread_mutex_unlock(&serialState->ldmWindowMutex); + } + ZSTD_pthread_mutex_unlock(&serialState->mutex); + +} + + +/* ------------------------------------------ */ +/* ===== Worker thread ===== */ +/* ------------------------------------------ */ + +static const range_t kNullRange = { NULL, 0 }; + +typedef struct { + size_t consumed; /* SHARED - set0 by mtctx, then modified by worker AND read by mtctx */ + size_t cSize; /* SHARED - set0 by mtctx, then modified by worker AND read by mtctx, then set0 by mtctx */ + ZSTD_pthread_mutex_t job_mutex; /* Thread-safe - used by mtctx and worker */ + ZSTD_pthread_cond_t job_cond; /* Thread-safe - used by mtctx and worker */ + ZSTDMT_CCtxPool* cctxPool; /* Thread-safe - used by mtctx and (all) workers */ + ZSTDMT_bufferPool* bufPool; /* Thread-safe - used by mtctx and (all) workers */ + ZSTDMT_seqPool* seqPool; /* Thread-safe - used by mtctx and (all) workers */ + serialState_t* serial; /* Thread-safe - used by mtctx and (all) workers */ + buffer_t dstBuff; /* set by worker (or mtctx), then read by worker & mtctx, then modified by mtctx => no barrier */ + range_t prefix; /* set by mtctx, then read by worker & mtctx => no barrier */ + range_t src; /* set by mtctx, then read by worker & mtctx => no barrier */ + unsigned jobID; /* set by mtctx, then read by worker => no barrier */ + unsigned firstJob; /* set by mtctx, then read by worker => no barrier */ + unsigned lastJob; /* set by mtctx, then read by worker => no barrier */ + ZSTD_CCtx_params params; /* set by mtctx, then read by worker => no barrier */ + const ZSTD_CDict* cdict; /* set by mtctx, then read by worker => no barrier */ + unsigned long long fullFrameSize; /* set by mtctx, then read by worker => no barrier */ + size_t dstFlushed; /* used only by mtctx */ + unsigned frameChecksumNeeded; /* used only by mtctx */ +} ZSTDMT_jobDescription; + +#define JOB_ERROR(e) { \ + ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex); \ + job->cSize = e; \ + ZSTD_pthread_mutex_unlock(&job->job_mutex); \ + goto _endJob; \ +} + +/* ZSTDMT_compressionJob() is a POOL_function type */ +static void ZSTDMT_compressionJob(void* jobDescription) +{ + ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription; + ZSTD_CCtx_params jobParams = job->params; /* do not modify job->params ! copy it, modify the copy */ + ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(job->cctxPool); + rawSeqStore_t rawSeqStore = ZSTDMT_getSeq(job->seqPool); + buffer_t dstBuff = job->dstBuff; + size_t lastCBlockSize = 0; + + /* resources */ + if (cctx==NULL) JOB_ERROR(ERROR(memory_allocation)); + if (dstBuff.start == NULL) { /* streaming job : doesn't provide a dstBuffer */ + dstBuff = ZSTDMT_getBuffer(job->bufPool); + if (dstBuff.start==NULL) JOB_ERROR(ERROR(memory_allocation)); + job->dstBuff = dstBuff; /* this value can be read in ZSTDMT_flush, when it copies the whole job */ + } + if (jobParams.ldmParams.enableLdm && rawSeqStore.seq == NULL) + JOB_ERROR(ERROR(memory_allocation)); + + /* Don't compute the checksum for chunks, since we compute it externally, + * but write it in the header. + */ + if (job->jobID != 0) jobParams.fParams.checksumFlag = 0; + /* Don't run LDM for the chunks, since we handle it externally */ + jobParams.ldmParams.enableLdm = 0; + /* Correct nbWorkers to 0. */ + jobParams.nbWorkers = 0; + + + /* init */ + if (job->cdict) { + size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, &jobParams, job->fullFrameSize); + assert(job->firstJob); /* only allowed for first job */ + if (ZSTD_isError(initError)) JOB_ERROR(initError); + } else { /* srcStart points at reloaded section */ + U64 const pledgedSrcSize = job->firstJob ? job->fullFrameSize : job->src.size; + { size_t const forceWindowError = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob); + if (ZSTD_isError(forceWindowError)) JOB_ERROR(forceWindowError); + } + { size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, + job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */ + ZSTD_dtlm_fast, + NULL, /*cdict*/ + &jobParams, pledgedSrcSize); + if (ZSTD_isError(initError)) JOB_ERROR(initError); + } } + + /* Perform serial step as early as possible, but after CCtx initialization */ + ZSTDMT_serialState_update(job->serial, cctx, rawSeqStore, job->src, job->jobID); + + if (!job->firstJob) { /* flush and overwrite frame header when it's not first job */ + size_t const hSize = ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.capacity, job->src.start, 0); + if (ZSTD_isError(hSize)) JOB_ERROR(hSize); + DEBUGLOG(5, "ZSTDMT_compressionJob: flush and overwrite %u bytes of frame header (not first job)", (U32)hSize); + ZSTD_invalidateRepCodes(cctx); + } + + /* compress */ + { size_t const chunkSize = 4*ZSTD_BLOCKSIZE_MAX; + int const nbChunks = (int)((job->src.size + (chunkSize-1)) / chunkSize); + const BYTE* ip = (const BYTE*) job->src.start; + BYTE* const ostart = (BYTE*)dstBuff.start; + BYTE* op = ostart; + BYTE* oend = op + dstBuff.capacity; + int chunkNb; + if (sizeof(size_t) > sizeof(int)) assert(job->src.size < ((size_t)INT_MAX) * chunkSize); /* check overflow */ + DEBUGLOG(5, "ZSTDMT_compressionJob: compress %u bytes in %i blocks", (U32)job->src.size, nbChunks); + assert(job->cSize == 0); + for (chunkNb = 1; chunkNb < nbChunks; chunkNb++) { + size_t const cSize = ZSTD_compressContinue(cctx, op, oend-op, ip, chunkSize); + if (ZSTD_isError(cSize)) JOB_ERROR(cSize); + ip += chunkSize; + op += cSize; assert(op < oend); + /* stats */ + ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex); + job->cSize += cSize; + job->consumed = chunkSize * chunkNb; + DEBUGLOG(5, "ZSTDMT_compressionJob: compress new block : cSize==%u bytes (total: %u)", + (U32)cSize, (U32)job->cSize); + ZSTD_pthread_cond_signal(&job->job_cond); /* warns some more data is ready to be flushed */ + ZSTD_pthread_mutex_unlock(&job->job_mutex); + } + /* last block */ + assert(chunkSize > 0); + assert((chunkSize & (chunkSize - 1)) == 0); /* chunkSize must be power of 2 for mask==(chunkSize-1) to work */ + if ((nbChunks > 0) | job->lastJob /*must output a "last block" flag*/ ) { + size_t const lastBlockSize1 = job->src.size & (chunkSize-1); + size_t const lastBlockSize = ((lastBlockSize1==0) & (job->src.size>=chunkSize)) ? chunkSize : lastBlockSize1; + size_t const cSize = (job->lastJob) ? + ZSTD_compressEnd (cctx, op, oend-op, ip, lastBlockSize) : + ZSTD_compressContinue(cctx, op, oend-op, ip, lastBlockSize); + if (ZSTD_isError(cSize)) JOB_ERROR(cSize); + lastCBlockSize = cSize; + } } + ZSTD_CCtx_trace(cctx, 0); + +_endJob: + ZSTDMT_serialState_ensureFinished(job->serial, job->jobID, job->cSize); + if (job->prefix.size > 0) + DEBUGLOG(5, "Finished with prefix: %zx", (size_t)job->prefix.start); + DEBUGLOG(5, "Finished with source: %zx", (size_t)job->src.start); + /* release resources */ + ZSTDMT_releaseSeq(job->seqPool, rawSeqStore); + ZSTDMT_releaseCCtx(job->cctxPool, cctx); + /* report */ + ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex); + if (ZSTD_isError(job->cSize)) assert(lastCBlockSize == 0); + job->cSize += lastCBlockSize; + job->consumed = job->src.size; /* when job->consumed == job->src.size , compression job is presumed completed */ + ZSTD_pthread_cond_signal(&job->job_cond); + ZSTD_pthread_mutex_unlock(&job->job_mutex); +} + + +/* ------------------------------------------ */ +/* ===== Multi-threaded compression ===== */ +/* ------------------------------------------ */ + +typedef struct { + range_t prefix; /* read-only non-owned prefix buffer */ + buffer_t buffer; + size_t filled; +} inBuff_t; + +typedef struct { + BYTE* buffer; /* The round input buffer. All jobs get references + * to pieces of the buffer. ZSTDMT_tryGetInputRange() + * handles handing out job input buffers, and makes + * sure it doesn't overlap with any pieces still in use. + */ + size_t capacity; /* The capacity of buffer. */ + size_t pos; /* The position of the current inBuff in the round + * buffer. Updated past the end if the inBuff once + * the inBuff is sent to the worker thread. + * pos <= capacity. + */ +} roundBuff_t; + +static const roundBuff_t kNullRoundBuff = {NULL, 0, 0}; + +#define RSYNC_LENGTH 32 + +typedef struct { + U64 hash; + U64 hitMask; + U64 primePower; +} rsyncState_t; + +struct ZSTDMT_CCtx_s { + POOL_ctx* factory; + ZSTDMT_jobDescription* jobs; + ZSTDMT_bufferPool* bufPool; + ZSTDMT_CCtxPool* cctxPool; + ZSTDMT_seqPool* seqPool; + ZSTD_CCtx_params params; + size_t targetSectionSize; + size_t targetPrefixSize; + int jobReady; /* 1 => one job is already prepared, but pool has shortage of workers. Don't create a new job. */ + inBuff_t inBuff; + roundBuff_t roundBuff; + serialState_t serial; + rsyncState_t rsync; + unsigned jobIDMask; + unsigned doneJobID; + unsigned nextJobID; + unsigned frameEnded; + unsigned allJobsCompleted; + unsigned long long frameContentSize; + unsigned long long consumed; + unsigned long long produced; + ZSTD_customMem cMem; + ZSTD_CDict* cdictLocal; + const ZSTD_CDict* cdict; + unsigned providedFactory: 1; +}; + +static void ZSTDMT_freeJobsTable(ZSTDMT_jobDescription* jobTable, U32 nbJobs, ZSTD_customMem cMem) +{ + U32 jobNb; + if (jobTable == NULL) return; + for (jobNb=0; jobNb mtctx->jobIDMask+1) { /* need more job capacity */ + ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem); + mtctx->jobIDMask = 0; + mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, mtctx->cMem); + if (mtctx->jobs==NULL) return ERROR(memory_allocation); + assert((nbJobs != 0) && ((nbJobs & (nbJobs - 1)) == 0)); /* ensure nbJobs is a power of 2 */ + mtctx->jobIDMask = nbJobs - 1; + } + return 0; +} + + +/* ZSTDMT_CCtxParam_setNbWorkers(): + * Internal use only */ +static size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers) +{ + return ZSTD_CCtxParams_setParameter(params, ZSTD_c_nbWorkers, (int)nbWorkers); +} + +MEM_STATIC ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced_internal(unsigned nbWorkers, ZSTD_customMem cMem, ZSTD_threadPool* pool) +{ + ZSTDMT_CCtx* mtctx; + U32 nbJobs = nbWorkers + 2; + int initError; + DEBUGLOG(3, "ZSTDMT_createCCtx_advanced (nbWorkers = %u)", nbWorkers); + + if (nbWorkers < 1) return NULL; + nbWorkers = MIN(nbWorkers , ZSTDMT_NBWORKERS_MAX); + if ((cMem.customAlloc!=NULL) ^ (cMem.customFree!=NULL)) + /* invalid custom allocator */ + return NULL; + + mtctx = (ZSTDMT_CCtx*) ZSTD_customCalloc(sizeof(ZSTDMT_CCtx), cMem); + if (!mtctx) return NULL; + ZSTDMT_CCtxParam_setNbWorkers(&mtctx->params, nbWorkers); + mtctx->cMem = cMem; + mtctx->allJobsCompleted = 1; + if (pool != NULL) { + mtctx->factory = pool; + mtctx->providedFactory = 1; + } + else { + mtctx->factory = POOL_create_advanced(nbWorkers, 0, cMem); + mtctx->providedFactory = 0; + } + mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, cMem); + assert(nbJobs > 0); assert((nbJobs & (nbJobs - 1)) == 0); /* ensure nbJobs is a power of 2 */ + mtctx->jobIDMask = nbJobs - 1; + mtctx->bufPool = ZSTDMT_createBufferPool(nbWorkers, cMem); + mtctx->cctxPool = ZSTDMT_createCCtxPool(nbWorkers, cMem); + mtctx->seqPool = ZSTDMT_createSeqPool(nbWorkers, cMem); + initError = ZSTDMT_serialState_init(&mtctx->serial); + mtctx->roundBuff = kNullRoundBuff; + if (!mtctx->factory | !mtctx->jobs | !mtctx->bufPool | !mtctx->cctxPool | !mtctx->seqPool | initError) { + ZSTDMT_freeCCtx(mtctx); + return NULL; + } + DEBUGLOG(3, "mt_cctx created, for %u threads", nbWorkers); + return mtctx; +} + +ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, ZSTD_customMem cMem, ZSTD_threadPool* pool) +{ +#ifdef ZSTD_MULTITHREAD + return ZSTDMT_createCCtx_advanced_internal(nbWorkers, cMem, pool); +#else + (void)nbWorkers; + (void)cMem; + (void)pool; + return NULL; +#endif +} + + +/* ZSTDMT_releaseAllJobResources() : + * note : ensure all workers are killed first ! */ +static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx) +{ + unsigned jobID; + DEBUGLOG(3, "ZSTDMT_releaseAllJobResources"); + for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) { + /* Copy the mutex/cond out */ + ZSTD_pthread_mutex_t const mutex = mtctx->jobs[jobID].job_mutex; + ZSTD_pthread_cond_t const cond = mtctx->jobs[jobID].job_cond; + + DEBUGLOG(4, "job%02u: release dst address %08X", jobID, (U32)(size_t)mtctx->jobs[jobID].dstBuff.start); + ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff); + + /* Clear the job description, but keep the mutex/cond */ + ZSTD_memset(&mtctx->jobs[jobID], 0, sizeof(mtctx->jobs[jobID])); + mtctx->jobs[jobID].job_mutex = mutex; + mtctx->jobs[jobID].job_cond = cond; + } + mtctx->inBuff.buffer = g_nullBuffer; + mtctx->inBuff.filled = 0; + mtctx->allJobsCompleted = 1; +} + +static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* mtctx) +{ + DEBUGLOG(4, "ZSTDMT_waitForAllJobsCompleted"); + while (mtctx->doneJobID < mtctx->nextJobID) { + unsigned const jobID = mtctx->doneJobID & mtctx->jobIDMask; + ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[jobID].job_mutex); + while (mtctx->jobs[jobID].consumed < mtctx->jobs[jobID].src.size) { + DEBUGLOG(4, "waiting for jobCompleted signal from job %u", mtctx->doneJobID); /* we want to block when waiting for data to flush */ + ZSTD_pthread_cond_wait(&mtctx->jobs[jobID].job_cond, &mtctx->jobs[jobID].job_mutex); + } + ZSTD_pthread_mutex_unlock(&mtctx->jobs[jobID].job_mutex); + mtctx->doneJobID++; + } +} + +size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx) +{ + if (mtctx==NULL) return 0; /* compatible with free on NULL */ + if (!mtctx->providedFactory) + POOL_free(mtctx->factory); /* stop and free worker threads */ + ZSTDMT_releaseAllJobResources(mtctx); /* release job resources into pools first */ + ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem); + ZSTDMT_freeBufferPool(mtctx->bufPool); + ZSTDMT_freeCCtxPool(mtctx->cctxPool); + ZSTDMT_freeSeqPool(mtctx->seqPool); + ZSTDMT_serialState_free(&mtctx->serial); + ZSTD_freeCDict(mtctx->cdictLocal); + if (mtctx->roundBuff.buffer) + ZSTD_customFree(mtctx->roundBuff.buffer, mtctx->cMem); + ZSTD_customFree(mtctx, mtctx->cMem); + return 0; +} + +size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx) +{ + if (mtctx == NULL) return 0; /* supports sizeof NULL */ + return sizeof(*mtctx) + + POOL_sizeof(mtctx->factory) + + ZSTDMT_sizeof_bufferPool(mtctx->bufPool) + + (mtctx->jobIDMask+1) * sizeof(ZSTDMT_jobDescription) + + ZSTDMT_sizeof_CCtxPool(mtctx->cctxPool) + + ZSTDMT_sizeof_seqPool(mtctx->seqPool) + + ZSTD_sizeof_CDict(mtctx->cdictLocal) + + mtctx->roundBuff.capacity; +} + + +/* ZSTDMT_resize() : + * @return : error code if fails, 0 on success */ +static size_t ZSTDMT_resize(ZSTDMT_CCtx* mtctx, unsigned nbWorkers) +{ + if (POOL_resize(mtctx->factory, nbWorkers)) return ERROR(memory_allocation); + FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbWorkers) , ""); + mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, nbWorkers); + if (mtctx->bufPool == NULL) return ERROR(memory_allocation); + mtctx->cctxPool = ZSTDMT_expandCCtxPool(mtctx->cctxPool, nbWorkers); + if (mtctx->cctxPool == NULL) return ERROR(memory_allocation); + mtctx->seqPool = ZSTDMT_expandSeqPool(mtctx->seqPool, nbWorkers); + if (mtctx->seqPool == NULL) return ERROR(memory_allocation); + ZSTDMT_CCtxParam_setNbWorkers(&mtctx->params, nbWorkers); + return 0; +} + + +/*! ZSTDMT_updateCParams_whileCompressing() : + * Updates a selected set of compression parameters, remaining compatible with currently active frame. + * New parameters will be applied to next compression job. */ +void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams) +{ + U32 const saved_wlog = mtctx->params.cParams.windowLog; /* Do not modify windowLog while compressing */ + int const compressionLevel = cctxParams->compressionLevel; + DEBUGLOG(5, "ZSTDMT_updateCParams_whileCompressing (level:%i)", + compressionLevel); + mtctx->params.compressionLevel = compressionLevel; + { ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); + cParams.windowLog = saved_wlog; + mtctx->params.cParams = cParams; + } +} + +/* ZSTDMT_getFrameProgression(): + * tells how much data has been consumed (input) and produced (output) for current frame. + * able to count progression inside worker threads. + * Note : mutex will be acquired during statistics collection inside workers. */ +ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx) +{ + ZSTD_frameProgression fps; + DEBUGLOG(5, "ZSTDMT_getFrameProgression"); + fps.ingested = mtctx->consumed + mtctx->inBuff.filled; + fps.consumed = mtctx->consumed; + fps.produced = fps.flushed = mtctx->produced; + fps.currentJobID = mtctx->nextJobID; + fps.nbActiveWorkers = 0; + { unsigned jobNb; + unsigned lastJobNb = mtctx->nextJobID + mtctx->jobReady; assert(mtctx->jobReady <= 1); + DEBUGLOG(6, "ZSTDMT_getFrameProgression: jobs: from %u to <%u (jobReady:%u)", + mtctx->doneJobID, lastJobNb, mtctx->jobReady) + for (jobNb = mtctx->doneJobID ; jobNb < lastJobNb ; jobNb++) { + unsigned const wJobID = jobNb & mtctx->jobIDMask; + ZSTDMT_jobDescription* jobPtr = &mtctx->jobs[wJobID]; + ZSTD_pthread_mutex_lock(&jobPtr->job_mutex); + { size_t const cResult = jobPtr->cSize; + size_t const produced = ZSTD_isError(cResult) ? 0 : cResult; + size_t const flushed = ZSTD_isError(cResult) ? 0 : jobPtr->dstFlushed; + assert(flushed <= produced); + fps.ingested += jobPtr->src.size; + fps.consumed += jobPtr->consumed; + fps.produced += produced; + fps.flushed += flushed; + fps.nbActiveWorkers += (jobPtr->consumed < jobPtr->src.size); + } + ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex); + } + } + return fps; +} + + +size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx) +{ + size_t toFlush; + unsigned const jobID = mtctx->doneJobID; + assert(jobID <= mtctx->nextJobID); + if (jobID == mtctx->nextJobID) return 0; /* no active job => nothing to flush */ + + /* look into oldest non-fully-flushed job */ + { unsigned const wJobID = jobID & mtctx->jobIDMask; + ZSTDMT_jobDescription* const jobPtr = &mtctx->jobs[wJobID]; + ZSTD_pthread_mutex_lock(&jobPtr->job_mutex); + { size_t const cResult = jobPtr->cSize; + size_t const produced = ZSTD_isError(cResult) ? 0 : cResult; + size_t const flushed = ZSTD_isError(cResult) ? 0 : jobPtr->dstFlushed; + assert(flushed <= produced); + assert(jobPtr->consumed <= jobPtr->src.size); + toFlush = produced - flushed; + /* if toFlush==0, nothing is available to flush. + * However, jobID is expected to still be active: + * if jobID was already completed and fully flushed, + * ZSTDMT_flushProduced() should have already moved onto next job. + * Therefore, some input has not yet been consumed. */ + if (toFlush==0) { + assert(jobPtr->consumed < jobPtr->src.size); + } + } + ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex); + } + + return toFlush; +} + + +/* ------------------------------------------ */ +/* ===== Multi-threaded compression ===== */ +/* ------------------------------------------ */ + +static unsigned ZSTDMT_computeTargetJobLog(const ZSTD_CCtx_params* params) +{ + unsigned jobLog; + if (params->ldmParams.enableLdm) { + /* In Long Range Mode, the windowLog is typically oversized. + * In which case, it's preferable to determine the jobSize + * based on cycleLog instead. */ + jobLog = MAX(21, ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy) + 3); + } else { + jobLog = MAX(20, params->cParams.windowLog + 2); + } + return MIN(jobLog, (unsigned)ZSTDMT_JOBLOG_MAX); +} + +static int ZSTDMT_overlapLog_default(ZSTD_strategy strat) +{ + switch(strat) + { + case ZSTD_btultra2: + return 9; + case ZSTD_btultra: + case ZSTD_btopt: + return 8; + case ZSTD_btlazy2: + case ZSTD_lazy2: + return 7; + case ZSTD_lazy: + case ZSTD_greedy: + case ZSTD_dfast: + case ZSTD_fast: + default:; + } + return 6; +} + +static int ZSTDMT_overlapLog(int ovlog, ZSTD_strategy strat) +{ + assert(0 <= ovlog && ovlog <= 9); + if (ovlog == 0) return ZSTDMT_overlapLog_default(strat); + return ovlog; +} + +static size_t ZSTDMT_computeOverlapSize(const ZSTD_CCtx_params* params) +{ + int const overlapRLog = 9 - ZSTDMT_overlapLog(params->overlapLog, params->cParams.strategy); + int ovLog = (overlapRLog >= 8) ? 0 : (params->cParams.windowLog - overlapRLog); + assert(0 <= overlapRLog && overlapRLog <= 8); + if (params->ldmParams.enableLdm) { + /* In Long Range Mode, the windowLog is typically oversized. + * In which case, it's preferable to determine the jobSize + * based on chainLog instead. + * Then, ovLog becomes a fraction of the jobSize, rather than windowSize */ + ovLog = MIN(params->cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2) + - overlapRLog; + } + assert(0 <= ovLog && ovLog <= ZSTD_WINDOWLOG_MAX); + DEBUGLOG(4, "overlapLog : %i", params->overlapLog); + DEBUGLOG(4, "overlap size : %i", 1 << ovLog); + return (ovLog==0) ? 0 : (size_t)1 << ovLog; +} + +/* ====================================== */ +/* ======= Streaming API ======= */ +/* ====================================== */ + +size_t ZSTDMT_initCStream_internal( + ZSTDMT_CCtx* mtctx, + const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType, + const ZSTD_CDict* cdict, ZSTD_CCtx_params params, + unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTDMT_initCStream_internal (pledgedSrcSize=%u, nbWorkers=%u, cctxPool=%u)", + (U32)pledgedSrcSize, params.nbWorkers, mtctx->cctxPool->totalCCtx); + + /* params supposed partially fully validated at this point */ + assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + assert(!((dict) && (cdict))); /* either dict or cdict, not both */ + + /* init */ + if (params.nbWorkers != mtctx->params.nbWorkers) + FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) , ""); + + if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN; + if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = (size_t)ZSTDMT_JOBSIZE_MAX; + + DEBUGLOG(4, "ZSTDMT_initCStream_internal: %u workers", params.nbWorkers); + + if (mtctx->allJobsCompleted == 0) { /* previous compression not correctly finished */ + ZSTDMT_waitForAllJobsCompleted(mtctx); + ZSTDMT_releaseAllJobResources(mtctx); + mtctx->allJobsCompleted = 1; + } + + mtctx->params = params; + mtctx->frameContentSize = pledgedSrcSize; + if (dict) { + ZSTD_freeCDict(mtctx->cdictLocal); + mtctx->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, + ZSTD_dlm_byCopy, dictContentType, /* note : a loadPrefix becomes an internal CDict */ + params.cParams, mtctx->cMem); + mtctx->cdict = mtctx->cdictLocal; + if (mtctx->cdictLocal == NULL) return ERROR(memory_allocation); + } else { + ZSTD_freeCDict(mtctx->cdictLocal); + mtctx->cdictLocal = NULL; + mtctx->cdict = cdict; + } + + mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(¶ms); + DEBUGLOG(4, "overlapLog=%i => %u KB", params.overlapLog, (U32)(mtctx->targetPrefixSize>>10)); + mtctx->targetSectionSize = params.jobSize; + if (mtctx->targetSectionSize == 0) { + mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(¶ms); + } + assert(mtctx->targetSectionSize <= (size_t)ZSTDMT_JOBSIZE_MAX); + + if (params.rsyncable) { + /* Aim for the targetsectionSize as the average job size. */ + U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20); + U32 const rsyncBits = ZSTD_highbit32(jobSizeMB) + 20; + assert(jobSizeMB >= 1); + DEBUGLOG(4, "rsyncLog = %u", rsyncBits); + mtctx->rsync.hash = 0; + mtctx->rsync.hitMask = (1ULL << rsyncBits) - 1; + mtctx->rsync.primePower = ZSTD_rollingHash_primePower(RSYNC_LENGTH); + } + if (mtctx->targetSectionSize < mtctx->targetPrefixSize) mtctx->targetSectionSize = mtctx->targetPrefixSize; /* job size must be >= overlap size */ + DEBUGLOG(4, "Job Size : %u KB (note : set to %u)", (U32)(mtctx->targetSectionSize>>10), (U32)params.jobSize); + DEBUGLOG(4, "inBuff Size : %u KB", (U32)(mtctx->targetSectionSize>>10)); + ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(mtctx->targetSectionSize)); + { + /* If ldm is enabled we need windowSize space. */ + size_t const windowSize = mtctx->params.ldmParams.enableLdm ? (1U << mtctx->params.cParams.windowLog) : 0; + /* Two buffers of slack, plus extra space for the overlap + * This is the minimum slack that LDM works with. One extra because + * flush might waste up to targetSectionSize-1 bytes. Another extra + * for the overlap (if > 0), then one to fill which doesn't overlap + * with the LDM window. + */ + size_t const nbSlackBuffers = 2 + (mtctx->targetPrefixSize > 0); + size_t const slackSize = mtctx->targetSectionSize * nbSlackBuffers; + /* Compute the total size, and always have enough slack */ + size_t const nbWorkers = MAX(mtctx->params.nbWorkers, 1); + size_t const sectionsSize = mtctx->targetSectionSize * nbWorkers; + size_t const capacity = MAX(windowSize, sectionsSize) + slackSize; + if (mtctx->roundBuff.capacity < capacity) { + if (mtctx->roundBuff.buffer) + ZSTD_customFree(mtctx->roundBuff.buffer, mtctx->cMem); + mtctx->roundBuff.buffer = (BYTE*)ZSTD_customMalloc(capacity, mtctx->cMem); + if (mtctx->roundBuff.buffer == NULL) { + mtctx->roundBuff.capacity = 0; + return ERROR(memory_allocation); + } + mtctx->roundBuff.capacity = capacity; + } + } + DEBUGLOG(4, "roundBuff capacity : %u KB", (U32)(mtctx->roundBuff.capacity>>10)); + mtctx->roundBuff.pos = 0; + mtctx->inBuff.buffer = g_nullBuffer; + mtctx->inBuff.filled = 0; + mtctx->inBuff.prefix = kNullRange; + mtctx->doneJobID = 0; + mtctx->nextJobID = 0; + mtctx->frameEnded = 0; + mtctx->allJobsCompleted = 0; + mtctx->consumed = 0; + mtctx->produced = 0; + if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, mtctx->targetSectionSize, + dict, dictSize, dictContentType)) + return ERROR(memory_allocation); + return 0; +} + + +/* ZSTDMT_writeLastEmptyBlock() + * Write a single empty block with an end-of-frame to finish a frame. + * Job must be created from streaming variant. + * This function is always successful if expected conditions are fulfilled. + */ +static void ZSTDMT_writeLastEmptyBlock(ZSTDMT_jobDescription* job) +{ + assert(job->lastJob == 1); + assert(job->src.size == 0); /* last job is empty -> will be simplified into a last empty block */ + assert(job->firstJob == 0); /* cannot be first job, as it also needs to create frame header */ + assert(job->dstBuff.start == NULL); /* invoked from streaming variant only (otherwise, dstBuff might be user's output) */ + job->dstBuff = ZSTDMT_getBuffer(job->bufPool); + if (job->dstBuff.start == NULL) { + job->cSize = ERROR(memory_allocation); + return; + } + assert(job->dstBuff.capacity >= ZSTD_blockHeaderSize); /* no buffer should ever be that small */ + job->src = kNullRange; + job->cSize = ZSTD_writeLastEmptyBlock(job->dstBuff.start, job->dstBuff.capacity); + assert(!ZSTD_isError(job->cSize)); + assert(job->consumed == 0); +} + +static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* mtctx, size_t srcSize, ZSTD_EndDirective endOp) +{ + unsigned const jobID = mtctx->nextJobID & mtctx->jobIDMask; + int const endFrame = (endOp == ZSTD_e_end); + + if (mtctx->nextJobID > mtctx->doneJobID + mtctx->jobIDMask) { + DEBUGLOG(5, "ZSTDMT_createCompressionJob: will not create new job : table is full"); + assert((mtctx->nextJobID & mtctx->jobIDMask) == (mtctx->doneJobID & mtctx->jobIDMask)); + return 0; + } + + if (!mtctx->jobReady) { + BYTE const* src = (BYTE const*)mtctx->inBuff.buffer.start; + DEBUGLOG(5, "ZSTDMT_createCompressionJob: preparing job %u to compress %u bytes with %u preload ", + mtctx->nextJobID, (U32)srcSize, (U32)mtctx->inBuff.prefix.size); + mtctx->jobs[jobID].src.start = src; + mtctx->jobs[jobID].src.size = srcSize; + assert(mtctx->inBuff.filled >= srcSize); + mtctx->jobs[jobID].prefix = mtctx->inBuff.prefix; + mtctx->jobs[jobID].consumed = 0; + mtctx->jobs[jobID].cSize = 0; + mtctx->jobs[jobID].params = mtctx->params; + mtctx->jobs[jobID].cdict = mtctx->nextJobID==0 ? mtctx->cdict : NULL; + mtctx->jobs[jobID].fullFrameSize = mtctx->frameContentSize; + mtctx->jobs[jobID].dstBuff = g_nullBuffer; + mtctx->jobs[jobID].cctxPool = mtctx->cctxPool; + mtctx->jobs[jobID].bufPool = mtctx->bufPool; + mtctx->jobs[jobID].seqPool = mtctx->seqPool; + mtctx->jobs[jobID].serial = &mtctx->serial; + mtctx->jobs[jobID].jobID = mtctx->nextJobID; + mtctx->jobs[jobID].firstJob = (mtctx->nextJobID==0); + mtctx->jobs[jobID].lastJob = endFrame; + mtctx->jobs[jobID].frameChecksumNeeded = mtctx->params.fParams.checksumFlag && endFrame && (mtctx->nextJobID>0); + mtctx->jobs[jobID].dstFlushed = 0; + + /* Update the round buffer pos and clear the input buffer to be reset */ + mtctx->roundBuff.pos += srcSize; + mtctx->inBuff.buffer = g_nullBuffer; + mtctx->inBuff.filled = 0; + /* Set the prefix */ + if (!endFrame) { + size_t const newPrefixSize = MIN(srcSize, mtctx->targetPrefixSize); + mtctx->inBuff.prefix.start = src + srcSize - newPrefixSize; + mtctx->inBuff.prefix.size = newPrefixSize; + } else { /* endFrame==1 => no need for another input buffer */ + mtctx->inBuff.prefix = kNullRange; + mtctx->frameEnded = endFrame; + if (mtctx->nextJobID == 0) { + /* single job exception : checksum is already calculated directly within worker thread */ + mtctx->params.fParams.checksumFlag = 0; + } } + + if ( (srcSize == 0) + && (mtctx->nextJobID>0)/*single job must also write frame header*/ ) { + DEBUGLOG(5, "ZSTDMT_createCompressionJob: creating a last empty block to end frame"); + assert(endOp == ZSTD_e_end); /* only possible case : need to end the frame with an empty last block */ + ZSTDMT_writeLastEmptyBlock(mtctx->jobs + jobID); + mtctx->nextJobID++; + return 0; + } + } + + DEBUGLOG(5, "ZSTDMT_createCompressionJob: posting job %u : %u bytes (end:%u, jobNb == %u (mod:%u))", + mtctx->nextJobID, + (U32)mtctx->jobs[jobID].src.size, + mtctx->jobs[jobID].lastJob, + mtctx->nextJobID, + jobID); + if (POOL_tryAdd(mtctx->factory, ZSTDMT_compressionJob, &mtctx->jobs[jobID])) { + mtctx->nextJobID++; + mtctx->jobReady = 0; + } else { + DEBUGLOG(5, "ZSTDMT_createCompressionJob: no worker available for job %u", mtctx->nextJobID); + mtctx->jobReady = 1; + } + return 0; +} + + +/*! ZSTDMT_flushProduced() : + * flush whatever data has been produced but not yet flushed in current job. + * move to next job if current one is fully flushed. + * `output` : `pos` will be updated with amount of data flushed . + * `blockToFlush` : if >0, the function will block and wait if there is no data available to flush . + * @return : amount of data remaining within internal buffer, 0 if no more, 1 if unknown but > 0, or an error code */ +static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, unsigned blockToFlush, ZSTD_EndDirective end) +{ + unsigned const wJobID = mtctx->doneJobID & mtctx->jobIDMask; + DEBUGLOG(5, "ZSTDMT_flushProduced (blocking:%u , job %u <= %u)", + blockToFlush, mtctx->doneJobID, mtctx->nextJobID); + assert(output->size >= output->pos); + + ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[wJobID].job_mutex); + if ( blockToFlush + && (mtctx->doneJobID < mtctx->nextJobID) ) { + assert(mtctx->jobs[wJobID].dstFlushed <= mtctx->jobs[wJobID].cSize); + while (mtctx->jobs[wJobID].dstFlushed == mtctx->jobs[wJobID].cSize) { /* nothing to flush */ + if (mtctx->jobs[wJobID].consumed == mtctx->jobs[wJobID].src.size) { + DEBUGLOG(5, "job %u is completely consumed (%u == %u) => don't wait for cond, there will be none", + mtctx->doneJobID, (U32)mtctx->jobs[wJobID].consumed, (U32)mtctx->jobs[wJobID].src.size); + break; + } + DEBUGLOG(5, "waiting for something to flush from job %u (currently flushed: %u bytes)", + mtctx->doneJobID, (U32)mtctx->jobs[wJobID].dstFlushed); + ZSTD_pthread_cond_wait(&mtctx->jobs[wJobID].job_cond, &mtctx->jobs[wJobID].job_mutex); /* block when nothing to flush but some to come */ + } } + + /* try to flush something */ + { size_t cSize = mtctx->jobs[wJobID].cSize; /* shared */ + size_t const srcConsumed = mtctx->jobs[wJobID].consumed; /* shared */ + size_t const srcSize = mtctx->jobs[wJobID].src.size; /* read-only, could be done after mutex lock, but no-declaration-after-statement */ + ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex); + if (ZSTD_isError(cSize)) { + DEBUGLOG(5, "ZSTDMT_flushProduced: job %u : compression error detected : %s", + mtctx->doneJobID, ZSTD_getErrorName(cSize)); + ZSTDMT_waitForAllJobsCompleted(mtctx); + ZSTDMT_releaseAllJobResources(mtctx); + return cSize; + } + /* add frame checksum if necessary (can only happen once) */ + assert(srcConsumed <= srcSize); + if ( (srcConsumed == srcSize) /* job completed -> worker no longer active */ + && mtctx->jobs[wJobID].frameChecksumNeeded ) { + U32 const checksum = (U32)XXH64_digest(&mtctx->serial.xxhState); + DEBUGLOG(4, "ZSTDMT_flushProduced: writing checksum : %08X \n", checksum); + MEM_writeLE32((char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].cSize, checksum); + cSize += 4; + mtctx->jobs[wJobID].cSize += 4; /* can write this shared value, as worker is no longer active */ + mtctx->jobs[wJobID].frameChecksumNeeded = 0; + } + + if (cSize > 0) { /* compression is ongoing or completed */ + size_t const toFlush = MIN(cSize - mtctx->jobs[wJobID].dstFlushed, output->size - output->pos); + DEBUGLOG(5, "ZSTDMT_flushProduced: Flushing %u bytes from job %u (completion:%u/%u, generated:%u)", + (U32)toFlush, mtctx->doneJobID, (U32)srcConsumed, (U32)srcSize, (U32)cSize); + assert(mtctx->doneJobID < mtctx->nextJobID); + assert(cSize >= mtctx->jobs[wJobID].dstFlushed); + assert(mtctx->jobs[wJobID].dstBuff.start != NULL); + if (toFlush > 0) { + ZSTD_memcpy((char*)output->dst + output->pos, + (const char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].dstFlushed, + toFlush); + } + output->pos += toFlush; + mtctx->jobs[wJobID].dstFlushed += toFlush; /* can write : this value is only used by mtctx */ + + if ( (srcConsumed == srcSize) /* job is completed */ + && (mtctx->jobs[wJobID].dstFlushed == cSize) ) { /* output buffer fully flushed => free this job position */ + DEBUGLOG(5, "Job %u completed (%u bytes), moving to next one", + mtctx->doneJobID, (U32)mtctx->jobs[wJobID].dstFlushed); + ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[wJobID].dstBuff); + DEBUGLOG(5, "dstBuffer released"); + mtctx->jobs[wJobID].dstBuff = g_nullBuffer; + mtctx->jobs[wJobID].cSize = 0; /* ensure this job slot is considered "not started" in future check */ + mtctx->consumed += srcSize; + mtctx->produced += cSize; + mtctx->doneJobID++; + } } + + /* return value : how many bytes left in buffer ; fake it to 1 when unknown but >0 */ + if (cSize > mtctx->jobs[wJobID].dstFlushed) return (cSize - mtctx->jobs[wJobID].dstFlushed); + if (srcSize > srcConsumed) return 1; /* current job not completely compressed */ + } + if (mtctx->doneJobID < mtctx->nextJobID) return 1; /* some more jobs ongoing */ + if (mtctx->jobReady) return 1; /* one job is ready to push, just not yet in the list */ + if (mtctx->inBuff.filled > 0) return 1; /* input is not empty, and still needs to be converted into a job */ + mtctx->allJobsCompleted = mtctx->frameEnded; /* all jobs are entirely flushed => if this one is last one, frame is completed */ + if (end == ZSTD_e_end) return !mtctx->frameEnded; /* for ZSTD_e_end, question becomes : is frame completed ? instead of : are internal buffers fully flushed ? */ + return 0; /* internal buffers fully flushed */ +} + +/** + * Returns the range of data used by the earliest job that is not yet complete. + * If the data of the first job is broken up into two segments, we cover both + * sections. + */ +static range_t ZSTDMT_getInputDataInUse(ZSTDMT_CCtx* mtctx) +{ + unsigned const firstJobID = mtctx->doneJobID; + unsigned const lastJobID = mtctx->nextJobID; + unsigned jobID; + + for (jobID = firstJobID; jobID < lastJobID; ++jobID) { + unsigned const wJobID = jobID & mtctx->jobIDMask; + size_t consumed; + + ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[wJobID].job_mutex); + consumed = mtctx->jobs[wJobID].consumed; + ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex); + + if (consumed < mtctx->jobs[wJobID].src.size) { + range_t range = mtctx->jobs[wJobID].prefix; + if (range.size == 0) { + /* Empty prefix */ + range = mtctx->jobs[wJobID].src; + } + /* Job source in multiple segments not supported yet */ + assert(range.start <= mtctx->jobs[wJobID].src.start); + return range; + } + } + return kNullRange; +} + +/** + * Returns non-zero iff buffer and range overlap. + */ +static int ZSTDMT_isOverlapped(buffer_t buffer, range_t range) +{ + BYTE const* const bufferStart = (BYTE const*)buffer.start; + BYTE const* const bufferEnd = bufferStart + buffer.capacity; + BYTE const* const rangeStart = (BYTE const*)range.start; + BYTE const* const rangeEnd = range.size != 0 ? rangeStart + range.size : rangeStart; + + if (rangeStart == NULL || bufferStart == NULL) + return 0; + /* Empty ranges cannot overlap */ + if (bufferStart == bufferEnd || rangeStart == rangeEnd) + return 0; + + return bufferStart < rangeEnd && rangeStart < bufferEnd; +} + +static int ZSTDMT_doesOverlapWindow(buffer_t buffer, ZSTD_window_t window) +{ + range_t extDict; + range_t prefix; + + DEBUGLOG(5, "ZSTDMT_doesOverlapWindow"); + extDict.start = window.dictBase + window.lowLimit; + extDict.size = window.dictLimit - window.lowLimit; + + prefix.start = window.base + window.dictLimit; + prefix.size = window.nextSrc - (window.base + window.dictLimit); + DEBUGLOG(5, "extDict [0x%zx, 0x%zx)", + (size_t)extDict.start, + (size_t)extDict.start + extDict.size); + DEBUGLOG(5, "prefix [0x%zx, 0x%zx)", + (size_t)prefix.start, + (size_t)prefix.start + prefix.size); + + return ZSTDMT_isOverlapped(buffer, extDict) + || ZSTDMT_isOverlapped(buffer, prefix); +} + +static void ZSTDMT_waitForLdmComplete(ZSTDMT_CCtx* mtctx, buffer_t buffer) +{ + if (mtctx->params.ldmParams.enableLdm) { + ZSTD_pthread_mutex_t* mutex = &mtctx->serial.ldmWindowMutex; + DEBUGLOG(5, "ZSTDMT_waitForLdmComplete"); + DEBUGLOG(5, "source [0x%zx, 0x%zx)", + (size_t)buffer.start, + (size_t)buffer.start + buffer.capacity); + ZSTD_PTHREAD_MUTEX_LOCK(mutex); + while (ZSTDMT_doesOverlapWindow(buffer, mtctx->serial.ldmWindow)) { + DEBUGLOG(5, "Waiting for LDM to finish..."); + ZSTD_pthread_cond_wait(&mtctx->serial.ldmWindowCond, mutex); + } + DEBUGLOG(6, "Done waiting for LDM to finish"); + ZSTD_pthread_mutex_unlock(mutex); + } +} + +/** + * Attempts to set the inBuff to the next section to fill. + * If any part of the new section is still in use we give up. + * Returns non-zero if the buffer is filled. + */ +static int ZSTDMT_tryGetInputRange(ZSTDMT_CCtx* mtctx) +{ + range_t const inUse = ZSTDMT_getInputDataInUse(mtctx); + size_t const spaceLeft = mtctx->roundBuff.capacity - mtctx->roundBuff.pos; + size_t const target = mtctx->targetSectionSize; + buffer_t buffer; + + DEBUGLOG(5, "ZSTDMT_tryGetInputRange"); + assert(mtctx->inBuff.buffer.start == NULL); + assert(mtctx->roundBuff.capacity >= target); + + if (spaceLeft < target) { + /* ZSTD_invalidateRepCodes() doesn't work for extDict variants. + * Simply copy the prefix to the beginning in that case. + */ + BYTE* const start = (BYTE*)mtctx->roundBuff.buffer; + size_t const prefixSize = mtctx->inBuff.prefix.size; + + buffer.start = start; + buffer.capacity = prefixSize; + if (ZSTDMT_isOverlapped(buffer, inUse)) { + DEBUGLOG(5, "Waiting for buffer..."); + return 0; + } + ZSTDMT_waitForLdmComplete(mtctx, buffer); + ZSTD_memmove(start, mtctx->inBuff.prefix.start, prefixSize); + mtctx->inBuff.prefix.start = start; + mtctx->roundBuff.pos = prefixSize; + } + buffer.start = mtctx->roundBuff.buffer + mtctx->roundBuff.pos; + buffer.capacity = target; + + if (ZSTDMT_isOverlapped(buffer, inUse)) { + DEBUGLOG(5, "Waiting for buffer..."); + return 0; + } + assert(!ZSTDMT_isOverlapped(buffer, mtctx->inBuff.prefix)); + + ZSTDMT_waitForLdmComplete(mtctx, buffer); + + DEBUGLOG(5, "Using prefix range [%zx, %zx)", + (size_t)mtctx->inBuff.prefix.start, + (size_t)mtctx->inBuff.prefix.start + mtctx->inBuff.prefix.size); + DEBUGLOG(5, "Using source range [%zx, %zx)", + (size_t)buffer.start, + (size_t)buffer.start + buffer.capacity); + + + mtctx->inBuff.buffer = buffer; + mtctx->inBuff.filled = 0; + assert(mtctx->roundBuff.pos + buffer.capacity <= mtctx->roundBuff.capacity); + return 1; +} + +typedef struct { + size_t toLoad; /* The number of bytes to load from the input. */ + int flush; /* Boolean declaring if we must flush because we found a synchronization point. */ +} syncPoint_t; + +/** + * Searches through the input for a synchronization point. If one is found, we + * will instruct the caller to flush, and return the number of bytes to load. + * Otherwise, we will load as many bytes as possible and instruct the caller + * to continue as normal. + */ +static syncPoint_t +findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input) +{ + BYTE const* const istart = (BYTE const*)input.src + input.pos; + U64 const primePower = mtctx->rsync.primePower; + U64 const hitMask = mtctx->rsync.hitMask; + + syncPoint_t syncPoint; + U64 hash; + BYTE const* prev; + size_t pos; + + syncPoint.toLoad = MIN(input.size - input.pos, mtctx->targetSectionSize - mtctx->inBuff.filled); + syncPoint.flush = 0; + if (!mtctx->params.rsyncable) + /* Rsync is disabled. */ + return syncPoint; + if (mtctx->inBuff.filled + syncPoint.toLoad < RSYNC_LENGTH) + /* Not enough to compute the hash. + * We will miss any synchronization points in this RSYNC_LENGTH byte + * window. However, since it depends only in the internal buffers, if the + * state is already synchronized, we will remain synchronized. + * Additionally, the probability that we miss a synchronization point is + * low: RSYNC_LENGTH / targetSectionSize. + */ + return syncPoint; + /* Initialize the loop variables. */ + if (mtctx->inBuff.filled >= RSYNC_LENGTH) { + /* We have enough bytes buffered to initialize the hash. + * Start scanning at the beginning of the input. + */ + pos = 0; + prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH; + hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH); + if ((hash & hitMask) == hitMask) { + /* We're already at a sync point so don't load any more until + * we're able to flush this sync point. + * This likely happened because the job table was full so we + * couldn't add our job. + */ + syncPoint.toLoad = 0; + syncPoint.flush = 1; + return syncPoint; + } + } else { + /* We don't have enough bytes buffered to initialize the hash, but + * we know we have at least RSYNC_LENGTH bytes total. + * Start scanning after the first RSYNC_LENGTH bytes less the bytes + * already buffered. + */ + pos = RSYNC_LENGTH - mtctx->inBuff.filled; + prev = (BYTE const*)mtctx->inBuff.buffer.start - pos; + hash = ZSTD_rollingHash_compute(mtctx->inBuff.buffer.start, mtctx->inBuff.filled); + hash = ZSTD_rollingHash_append(hash, istart, pos); + } + /* Starting with the hash of the previous RSYNC_LENGTH bytes, roll + * through the input. If we hit a synchronization point, then cut the + * job off, and tell the compressor to flush the job. Otherwise, load + * all the bytes and continue as normal. + * If we go too long without a synchronization point (targetSectionSize) + * then a block will be emitted anyways, but this is okay, since if we + * are already synchronized we will remain synchronized. + */ + for (; pos < syncPoint.toLoad; ++pos) { + BYTE const toRemove = pos < RSYNC_LENGTH ? prev[pos] : istart[pos - RSYNC_LENGTH]; + /* if (pos >= RSYNC_LENGTH) assert(ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash); */ + hash = ZSTD_rollingHash_rotate(hash, toRemove, istart[pos], primePower); + if ((hash & hitMask) == hitMask) { + syncPoint.toLoad = pos + 1; + syncPoint.flush = 1; + break; + } + } + return syncPoint; +} + +size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx) +{ + size_t hintInSize = mtctx->targetSectionSize - mtctx->inBuff.filled; + if (hintInSize==0) hintInSize = mtctx->targetSectionSize; + return hintInSize; +} + +/** ZSTDMT_compressStream_generic() : + * internal use only - exposed to be invoked from zstd_compress.c + * assumption : output and input are valid (pos <= size) + * @return : minimum amount of data remaining to flush, 0 if none */ +size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective endOp) +{ + unsigned forwardInputProgress = 0; + DEBUGLOG(5, "ZSTDMT_compressStream_generic (endOp=%u, srcSize=%u)", + (U32)endOp, (U32)(input->size - input->pos)); + assert(output->pos <= output->size); + assert(input->pos <= input->size); + + if ((mtctx->frameEnded) && (endOp==ZSTD_e_continue)) { + /* current frame being ended. Only flush/end are allowed */ + return ERROR(stage_wrong); + } + + /* fill input buffer */ + if ( (!mtctx->jobReady) + && (input->size > input->pos) ) { /* support NULL input */ + if (mtctx->inBuff.buffer.start == NULL) { + assert(mtctx->inBuff.filled == 0); /* Can't fill an empty buffer */ + if (!ZSTDMT_tryGetInputRange(mtctx)) { + /* It is only possible for this operation to fail if there are + * still compression jobs ongoing. + */ + DEBUGLOG(5, "ZSTDMT_tryGetInputRange failed"); + assert(mtctx->doneJobID != mtctx->nextJobID); + } else + DEBUGLOG(5, "ZSTDMT_tryGetInputRange completed successfully : mtctx->inBuff.buffer.start = %p", mtctx->inBuff.buffer.start); + } + if (mtctx->inBuff.buffer.start != NULL) { + syncPoint_t const syncPoint = findSynchronizationPoint(mtctx, *input); + if (syncPoint.flush && endOp == ZSTD_e_continue) { + endOp = ZSTD_e_flush; + } + assert(mtctx->inBuff.buffer.capacity >= mtctx->targetSectionSize); + DEBUGLOG(5, "ZSTDMT_compressStream_generic: adding %u bytes on top of %u to buffer of size %u", + (U32)syncPoint.toLoad, (U32)mtctx->inBuff.filled, (U32)mtctx->targetSectionSize); + ZSTD_memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, syncPoint.toLoad); + input->pos += syncPoint.toLoad; + mtctx->inBuff.filled += syncPoint.toLoad; + forwardInputProgress = syncPoint.toLoad>0; + } + } + if ((input->pos < input->size) && (endOp == ZSTD_e_end)) { + /* Can't end yet because the input is not fully consumed. + * We are in one of these cases: + * - mtctx->inBuff is NULL & empty: we couldn't get an input buffer so don't create a new job. + * - We filled the input buffer: flush this job but don't end the frame. + * - We hit a synchronization point: flush this job but don't end the frame. + */ + assert(mtctx->inBuff.filled == 0 || mtctx->inBuff.filled == mtctx->targetSectionSize || mtctx->params.rsyncable); + endOp = ZSTD_e_flush; + } + + if ( (mtctx->jobReady) + || (mtctx->inBuff.filled >= mtctx->targetSectionSize) /* filled enough : let's compress */ + || ((endOp != ZSTD_e_continue) && (mtctx->inBuff.filled > 0)) /* something to flush : let's go */ + || ((endOp == ZSTD_e_end) && (!mtctx->frameEnded)) ) { /* must finish the frame with a zero-size block */ + size_t const jobSize = mtctx->inBuff.filled; + assert(mtctx->inBuff.filled <= mtctx->targetSectionSize); + FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp) , ""); + } + + /* check for potential compressed data ready to be flushed */ + { size_t const remainingToFlush = ZSTDMT_flushProduced(mtctx, output, !forwardInputProgress, endOp); /* block if there was no forward input progress */ + if (input->pos < input->size) return MAX(remainingToFlush, 1); /* input not consumed : do not end flush yet */ + DEBUGLOG(5, "end of ZSTDMT_compressStream_generic: remainingToFlush = %u", (U32)remainingToFlush); + return remainingToFlush; + } +} +/**** ended inlining compress/zstdmt_compress.c ****/ +#endif + +/**** start inlining decompress/huf_decompress.c ****/ +/* ****************************************************************** + * huff0 huffman decoder, + * part of Finite State Entropy library + * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +/* ************************************************************** +* Dependencies +****************************************************************/ +/**** skipping file: ../common/zstd_deps.h ****/ +/**** skipping file: ../common/compiler.h ****/ +/**** skipping file: ../common/bitstream.h ****/ +/**** skipping file: ../common/fse.h ****/ +#define HUF_STATIC_LINKING_ONLY +/**** skipping file: ../common/huf.h ****/ +/**** skipping file: ../common/error_private.h ****/ + +/* ************************************************************** +* Macros +****************************************************************/ + +/* These two optional macros force the use one way or another of the two + * Huffman decompression implementations. You can't force in both directions + * at the same time. + */ +#if defined(HUF_FORCE_DECOMPRESS_X1) && \ + defined(HUF_FORCE_DECOMPRESS_X2) +#error "Cannot force the use of the X1 and X2 decoders at the same time!" +#endif + + +/* ************************************************************** +* Error Management +****************************************************************/ +#define HUF_isError ERR_isError + + +/* ************************************************************** +* Byte alignment for workSpace management +****************************************************************/ +#define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1) +#define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) + + +/* ************************************************************** +* BMI2 Variant Wrappers +****************************************************************/ +#if DYNAMIC_BMI2 + +#define HUF_DGEN(fn) \ + \ + static size_t fn##_default( \ + void* dst, size_t dstSize, \ + const void* cSrc, size_t cSrcSize, \ + const HUF_DTable* DTable) \ + { \ + return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ + } \ + \ + static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \ + void* dst, size_t dstSize, \ + const void* cSrc, size_t cSrcSize, \ + const HUF_DTable* DTable) \ + { \ + return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ + } \ + \ + static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ + size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ + { \ + if (bmi2) { \ + return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \ + } \ + return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \ + } + +#else + +#define HUF_DGEN(fn) \ + static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ + size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ + { \ + (void)bmi2; \ + return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ + } + +#endif + + +/*-***************************/ +/* generic DTableDesc */ +/*-***************************/ +typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc; + +static DTableDesc HUF_getDTableDesc(const HUF_DTable* table) +{ + DTableDesc dtd; + ZSTD_memcpy(&dtd, table, sizeof(dtd)); + return dtd; +} + + +#ifndef HUF_FORCE_DECOMPRESS_X2 + +/*-***************************/ +/* single-symbol decoding */ +/*-***************************/ +typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */ + +/** + * Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at + * a time. + */ +static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) { + U64 D4; + if (MEM_isLittleEndian()) { + D4 = symbol + (nbBits << 8); + } else { + D4 = (symbol << 8) + nbBits; + } + D4 *= 0x0001000100010001ULL; + return D4; +} + +typedef struct { + U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; + U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1]; + U32 statsWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; + BYTE symbols[HUF_SYMBOLVALUE_MAX + 1]; + BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; +} HUF_ReadDTableX1_Workspace; + + +size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize) +{ + return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0); +} + +size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2) +{ + U32 tableLog = 0; + U32 nbSymbols = 0; + size_t iSize; + void* const dtPtr = DTable + 1; + HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr; + HUF_ReadDTableX1_Workspace* wksp = (HUF_ReadDTableX1_Workspace*)workSpace; + + DEBUG_STATIC_ASSERT(HUF_DECOMPRESS_WORKSPACE_SIZE >= sizeof(*wksp)); + if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge); + + DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); + /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ + + iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2); + if (HUF_isError(iSize)) return iSize; + + /* Table header */ + { DTableDesc dtd = HUF_getDTableDesc(DTable); + if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */ + dtd.tableType = 0; + dtd.tableLog = (BYTE)tableLog; + ZSTD_memcpy(DTable, &dtd, sizeof(dtd)); + } + + /* Compute symbols and rankStart given rankVal: + * + * rankVal already contains the number of values of each weight. + * + * symbols contains the symbols ordered by weight. First are the rankVal[0] + * weight 0 symbols, followed by the rankVal[1] weight 1 symbols, and so on. + * symbols[0] is filled (but unused) to avoid a branch. + * + * rankStart contains the offset where each rank belongs in the DTable. + * rankStart[0] is not filled because there are no entries in the table for + * weight 0. + */ + { + int n; + int nextRankStart = 0; + int const unroll = 4; + int const nLimit = (int)nbSymbols - unroll + 1; + for (n=0; n<(int)tableLog+1; n++) { + U32 const curr = nextRankStart; + nextRankStart += wksp->rankVal[n]; + wksp->rankStart[n] = curr; + } + for (n=0; n < nLimit; n += unroll) { + int u; + for (u=0; u < unroll; ++u) { + size_t const w = wksp->huffWeight[n+u]; + wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u); + } + } + for (; n < (int)nbSymbols; ++n) { + size_t const w = wksp->huffWeight[n]; + wksp->symbols[wksp->rankStart[w]++] = (BYTE)n; + } + } + + /* fill DTable + * We fill all entries of each weight in order. + * That way length is a constant for each iteration of the outter loop. + * We can switch based on the length to a different inner loop which is + * optimized for that particular case. + */ + { + U32 w; + int symbol=wksp->rankVal[0]; + int rankStart=0; + for (w=1; wrankVal[w]; + int const length = (1 << w) >> 1; + int uStart = rankStart; + BYTE const nbBits = (BYTE)(tableLog + 1 - w); + int s; + int u; + switch (length) { + case 1: + for (s=0; ssymbols[symbol + s]; + D.nbBits = nbBits; + dt[uStart] = D; + uStart += 1; + } + break; + case 2: + for (s=0; ssymbols[symbol + s]; + D.nbBits = nbBits; + dt[uStart+0] = D; + dt[uStart+1] = D; + uStart += 2; + } + break; + case 4: + for (s=0; ssymbols[symbol + s], nbBits); + MEM_write64(dt + uStart, D4); + uStart += 4; + } + break; + case 8: + for (s=0; ssymbols[symbol + s], nbBits); + MEM_write64(dt + uStart, D4); + MEM_write64(dt + uStart + 4, D4); + uStart += 8; + } + break; + default: + for (s=0; ssymbols[symbol + s], nbBits); + for (u=0; u < length; u += 16) { + MEM_write64(dt + uStart + u + 0, D4); + MEM_write64(dt + uStart + u + 4, D4); + MEM_write64(dt + uStart + u + 8, D4); + MEM_write64(dt + uStart + u + 12, D4); + } + assert(u == length); + uStart += length; + } + break; + } + symbol += symbolCount; + rankStart += symbolCount * length; + } + } + return iSize; +} + +FORCE_INLINE_TEMPLATE BYTE +HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog) +{ + size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ + BYTE const c = dt[val].byte; + BIT_skipBits(Dstream, dt[val].nbBits); + return c; +} + +#define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \ + *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \ + if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ + HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) + +#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \ + if (MEM_64bits()) \ + HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) + +HINT_INLINE size_t +HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog) +{ + BYTE* const pStart = p; + + /* up to 4 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) { + HUF_DECODE_SYMBOLX1_2(p, bitDPtr); + HUF_DECODE_SYMBOLX1_1(p, bitDPtr); + HUF_DECODE_SYMBOLX1_2(p, bitDPtr); + HUF_DECODE_SYMBOLX1_0(p, bitDPtr); + } + + /* [0-3] symbols remaining */ + if (MEM_32bits()) + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd)) + HUF_DECODE_SYMBOLX1_0(p, bitDPtr); + + /* no more data to retrieve from bitstream, no need to reload */ + while (p < pEnd) + HUF_DECODE_SYMBOLX1_0(p, bitDPtr); + + return pEnd-pStart; +} + +FORCE_INLINE_TEMPLATE size_t +HUF_decompress1X1_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + BYTE* op = (BYTE*)dst; + BYTE* const oend = op + dstSize; + const void* dtPtr = DTable + 1; + const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; + BIT_DStream_t bitD; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + U32 const dtLog = dtd.tableLog; + + CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); + + HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog); + + if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); + + return dstSize; +} + +FORCE_INLINE_TEMPLATE size_t +HUF_decompress4X1_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + /* Check */ + if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ + + { const BYTE* const istart = (const BYTE*) cSrc; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + BYTE* const olimit = oend - 3; + const void* const dtPtr = DTable + 1; + const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; + + /* Init */ + BIT_DStream_t bitD1; + BIT_DStream_t bitD2; + BIT_DStream_t bitD3; + BIT_DStream_t bitD4; + size_t const length1 = MEM_readLE16(istart); + size_t const length2 = MEM_readLE16(istart+2); + size_t const length3 = MEM_readLE16(istart+4); + size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); + const BYTE* const istart1 = istart + 6; /* jumpTable */ + const BYTE* const istart2 = istart1 + length1; + const BYTE* const istart3 = istart2 + length2; + const BYTE* const istart4 = istart3 + length3; + const size_t segmentSize = (dstSize+3) / 4; + BYTE* const opStart2 = ostart + segmentSize; + BYTE* const opStart3 = opStart2 + segmentSize; + BYTE* const opStart4 = opStart3 + segmentSize; + BYTE* op1 = ostart; + BYTE* op2 = opStart2; + BYTE* op3 = opStart3; + BYTE* op4 = opStart4; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + U32 const dtLog = dtd.tableLog; + U32 endSignal = 1; + + if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); + CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); + CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); + CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); + + /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */ + for ( ; (endSignal) & (op4 < olimit) ; ) { + HUF_DECODE_SYMBOLX1_2(op1, &bitD1); + HUF_DECODE_SYMBOLX1_2(op2, &bitD2); + HUF_DECODE_SYMBOLX1_2(op3, &bitD3); + HUF_DECODE_SYMBOLX1_2(op4, &bitD4); + HUF_DECODE_SYMBOLX1_1(op1, &bitD1); + HUF_DECODE_SYMBOLX1_1(op2, &bitD2); + HUF_DECODE_SYMBOLX1_1(op3, &bitD3); + HUF_DECODE_SYMBOLX1_1(op4, &bitD4); + HUF_DECODE_SYMBOLX1_2(op1, &bitD1); + HUF_DECODE_SYMBOLX1_2(op2, &bitD2); + HUF_DECODE_SYMBOLX1_2(op3, &bitD3); + HUF_DECODE_SYMBOLX1_2(op4, &bitD4); + HUF_DECODE_SYMBOLX1_0(op1, &bitD1); + HUF_DECODE_SYMBOLX1_0(op2, &bitD2); + HUF_DECODE_SYMBOLX1_0(op3, &bitD3); + HUF_DECODE_SYMBOLX1_0(op4, &bitD4); + endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; + } + + /* check corruption */ + /* note : should not be necessary : op# advance in lock step, and we control op4. + * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */ + if (op1 > opStart2) return ERROR(corruption_detected); + if (op2 > opStart3) return ERROR(corruption_detected); + if (op3 > opStart4) return ERROR(corruption_detected); + /* note : op4 supposed already verified within main loop */ + + /* finish bitStreams one by one */ + HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog); + HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog); + HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog); + HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog); + + /* check */ + { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); + if (!endCheck) return ERROR(corruption_detected); } + + /* decoded size */ + return dstSize; + } +} + + +typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize, + const void *cSrc, + size_t cSrcSize, + const HUF_DTable *DTable); + +HUF_DGEN(HUF_decompress1X1_usingDTable_internal) +HUF_DGEN(HUF_decompress4X1_usingDTable_internal) + + + +size_t HUF_decompress1X1_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 0) return ERROR(GENERIC); + return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} + +size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); +} + + +size_t HUF_decompress4X1_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 0) return ERROR(GENERIC); + return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} + +static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize, int bmi2) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); +} + +size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0); +} + + +#endif /* HUF_FORCE_DECOMPRESS_X2 */ + + +#ifndef HUF_FORCE_DECOMPRESS_X1 + +/* *************************/ +/* double-symbols decoding */ +/* *************************/ + +typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */ +typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t; +typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1]; +typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX]; + + +/* HUF_fillDTableX2Level2() : + * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */ +static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed, + const U32* rankValOrigin, const int minWeight, + const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, + U32 nbBitsBaseline, U16 baseSeq) +{ + HUF_DEltX2 DElt; + U32 rankVal[HUF_TABLELOG_MAX + 1]; + + /* get pre-calculated rankVal */ + ZSTD_memcpy(rankVal, rankValOrigin, sizeof(rankVal)); + + /* fill skipped values */ + if (minWeight>1) { + U32 i, skipSize = rankVal[minWeight]; + MEM_writeLE16(&(DElt.sequence), baseSeq); + DElt.nbBits = (BYTE)(consumed); + DElt.length = 1; + for (i = 0; i < skipSize; i++) + DTable[i] = DElt; + } + + /* fill DTable */ + { U32 s; for (s=0; s= 1 */ + + rankVal[weight] += length; + } } +} + + +static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog, + const sortedSymbol_t* sortedList, const U32 sortedListSize, + const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, + const U32 nbBitsBaseline) +{ + U32 rankVal[HUF_TABLELOG_MAX + 1]; + const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */ + const U32 minBits = nbBitsBaseline - maxWeight; + U32 s; + + ZSTD_memcpy(rankVal, rankValOrigin, sizeof(rankVal)); + + /* fill DTable */ + for (s=0; s= minBits) { /* enough room for a second symbol */ + U32 sortedRank; + int minWeight = nbBits + scaleLog; + if (minWeight < 1) minWeight = 1; + sortedRank = rankStart[minWeight]; + HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits, + rankValOrigin[nbBits], minWeight, + sortedList+sortedRank, sortedListSize-sortedRank, + nbBitsBaseline, symbol); + } else { + HUF_DEltX2 DElt; + MEM_writeLE16(&(DElt.sequence), symbol); + DElt.nbBits = (BYTE)(nbBits); + DElt.length = 1; + { U32 const end = start + length; + U32 u; + for (u = start; u < end; u++) DTable[u] = DElt; + } } + rankVal[weight] += length; + } +} + +size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize) +{ + U32 tableLog, maxW, sizeOfSort, nbSymbols; + DTableDesc dtd = HUF_getDTableDesc(DTable); + U32 const maxTableLog = dtd.maxTableLog; + size_t iSize; + void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */ + HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr; + U32 *rankStart; + + rankValCol_t* rankVal; + U32* rankStats; + U32* rankStart0; + sortedSymbol_t* sortedSymbol; + BYTE* weightList; + size_t spaceUsed32 = 0; + + rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32); + spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2; + rankStats = (U32 *)workSpace + spaceUsed32; + spaceUsed32 += HUF_TABLELOG_MAX + 1; + rankStart0 = (U32 *)workSpace + spaceUsed32; + spaceUsed32 += HUF_TABLELOG_MAX + 2; + sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t); + spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2; + weightList = (BYTE *)((U32 *)workSpace + spaceUsed32); + spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; + + if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge); + + rankStart = rankStart0 + 1; + ZSTD_memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1)); + + DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */ + if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); + /* ZSTD_memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ + + iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); + if (HUF_isError(iSize)) return iSize; + + /* check result */ + if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */ + + /* find maxWeight */ + for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */ + + /* Get start index of each weight */ + { U32 w, nextRankStart = 0; + for (w=1; w> consumed; + } } } } + + HUF_fillDTableX2(dt, maxTableLog, + sortedSymbol, sizeOfSort, + rankStart0, rankVal, maxW, + tableLog+1); + + dtd.tableLog = (BYTE)maxTableLog; + dtd.tableType = 1; + ZSTD_memcpy(DTable, &dtd, sizeof(dtd)); + return iSize; +} + + +FORCE_INLINE_TEMPLATE U32 +HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) +{ + size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ + ZSTD_memcpy(op, dt+val, 2); + BIT_skipBits(DStream, dt[val].nbBits); + return dt[val].length; +} + +FORCE_INLINE_TEMPLATE U32 +HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) +{ + size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ + ZSTD_memcpy(op, dt+val, 1); + if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits); + else { + if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) { + BIT_skipBits(DStream, dt[val].nbBits); + if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8)) + /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ + DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); + } } + return 1; +} + +#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ + ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ + if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ + ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ + if (MEM_64bits()) \ + ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) + +HINT_INLINE size_t +HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, + const HUF_DEltX2* const dt, const U32 dtLog) +{ + BYTE* const pStart = p; + + /* up to 8 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) { + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_1(p, bitDPtr); + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + } + + /* closer to end : up to 2 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2)) + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + + while (p <= pEnd-2) + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ + + if (p < pEnd) + p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog); + + return p-pStart; +} + +FORCE_INLINE_TEMPLATE size_t +HUF_decompress1X2_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + BIT_DStream_t bitD; + + /* Init */ + CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); + + /* decode */ + { BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */ + const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog); + } + + /* check */ + if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); + + /* decoded size */ + return dstSize; +} + +FORCE_INLINE_TEMPLATE size_t +HUF_decompress4X2_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ + + { const BYTE* const istart = (const BYTE*) cSrc; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + BYTE* const olimit = oend - (sizeof(size_t)-1); + const void* const dtPtr = DTable+1; + const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; + + /* Init */ + BIT_DStream_t bitD1; + BIT_DStream_t bitD2; + BIT_DStream_t bitD3; + BIT_DStream_t bitD4; + size_t const length1 = MEM_readLE16(istart); + size_t const length2 = MEM_readLE16(istart+2); + size_t const length3 = MEM_readLE16(istart+4); + size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); + const BYTE* const istart1 = istart + 6; /* jumpTable */ + const BYTE* const istart2 = istart1 + length1; + const BYTE* const istart3 = istart2 + length2; + const BYTE* const istart4 = istart3 + length3; + size_t const segmentSize = (dstSize+3) / 4; + BYTE* const opStart2 = ostart + segmentSize; + BYTE* const opStart3 = opStart2 + segmentSize; + BYTE* const opStart4 = opStart3 + segmentSize; + BYTE* op1 = ostart; + BYTE* op2 = opStart2; + BYTE* op3 = opStart3; + BYTE* op4 = opStart4; + U32 endSignal = 1; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + U32 const dtLog = dtd.tableLog; + + if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); + CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); + CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); + CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); + + /* 16-32 symbols per loop (4-8 symbols per stream) */ + for ( ; (endSignal) & (op4 < olimit); ) { +#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__)) + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_1(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_0(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_1(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_0(op2, &bitD2); + endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_1(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_0(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_1(op4, &bitD4); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_0(op4, &bitD4); + endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; +#else + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_1(op1, &bitD1); + HUF_DECODE_SYMBOLX2_1(op2, &bitD2); + HUF_DECODE_SYMBOLX2_1(op3, &bitD3); + HUF_DECODE_SYMBOLX2_1(op4, &bitD4); + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_0(op1, &bitD1); + HUF_DECODE_SYMBOLX2_0(op2, &bitD2); + HUF_DECODE_SYMBOLX2_0(op3, &bitD3); + HUF_DECODE_SYMBOLX2_0(op4, &bitD4); + endSignal = (U32)LIKELY( + (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished) + & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished) + & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished) + & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished)); +#endif + } + + /* check corruption */ + if (op1 > opStart2) return ERROR(corruption_detected); + if (op2 > opStart3) return ERROR(corruption_detected); + if (op3 > opStart4) return ERROR(corruption_detected); + /* note : op4 already verified within main loop */ + + /* finish bitStreams one by one */ + HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); + HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); + HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); + HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); + + /* check */ + { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); + if (!endCheck) return ERROR(corruption_detected); } + + /* decoded size */ + return dstSize; + } +} + +HUF_DGEN(HUF_decompress1X2_usingDTable_internal) +HUF_DGEN(HUF_decompress4X2_usingDTable_internal) + +size_t HUF_decompress1X2_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 1) return ERROR(GENERIC); + return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} + +size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, + workSpace, wkspSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); +} + + +size_t HUF_decompress4X2_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 1) return ERROR(GENERIC); + return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} + +static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize, int bmi2) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, + workSpace, wkspSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); +} + +size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0); +} + + +#endif /* HUF_FORCE_DECOMPRESS_X1 */ + + +/* ***********************************/ +/* Universal decompression selectors */ +/* ***********************************/ + +size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dtd; + assert(dtd.tableType == 0); + return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dtd; + assert(dtd.tableType == 1); + return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#else + return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : + HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#endif +} + +size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dtd; + assert(dtd.tableType == 0); + return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dtd; + assert(dtd.tableType == 1); + return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#else + return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : + HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#endif +} + + +#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) +typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t; +static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = +{ + /* single, double, quad */ + {{0,0}, {1,1}, {2,2}}, /* Q==0 : impossible */ + {{0,0}, {1,1}, {2,2}}, /* Q==1 : impossible */ + {{ 38,130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */ + {{ 448,128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */ + {{ 556,128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */ + {{ 714,128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */ + {{ 883,128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */ + {{ 897,128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */ + {{ 926,128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */ + {{ 947,128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */ + {{1107,128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */ + {{1177,128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */ + {{1242,128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */ + {{1349,128}, {2644,106}, {5260,106}}, /* Q ==13 : 81-87% */ + {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */ + {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */ +}; +#endif + +/** HUF_selectDecoder() : + * Tells which decoder is likely to decode faster, + * based on a set of pre-computed metrics. + * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 . + * Assumption : 0 < dstSize <= 128 KB */ +U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize) +{ + assert(dstSize > 0); + assert(dstSize <= 128*1024); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dstSize; + (void)cSrcSize; + return 0; +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dstSize; + (void)cSrcSize; + return 1; +#else + /* decoder timing evaluation */ + { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */ + U32 const D256 = (U32)(dstSize >> 8); + U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256); + U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256); + DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */ + return DTime1 < DTime0; + } +#endif +} + + +size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, + size_t dstSize, const void* cSrc, + size_t cSrcSize, void* workSpace, + size_t wkspSize) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize == 0) return ERROR(corruption_detected); + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); +#else + return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize): + HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); +#endif + } +} + +size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ + if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ + if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize); +#else + return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize): + HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize); +#endif + } +} + + +size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dtd; + assert(dtd.tableType == 0); + return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dtd; + assert(dtd.tableType == 1); + return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#else + return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : + HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#endif +} + +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); +} +#endif + +size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dtd; + assert(dtd.tableType == 0); + return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dtd; + assert(dtd.tableType == 1); + return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#else + return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : + HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#endif +} + +size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize == 0) return ERROR(corruption_detected); + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); +#else + return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) : + HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); +#endif + } +} + +#ifndef ZSTD_NO_UNUSED_FUNCTIONS +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_readDTableX1_wksp(DTable, src, srcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); + return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize); +} +#endif + +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_readDTableX2_wksp(DTable, src, srcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); + return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); +} +#endif + +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} +size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); + return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); +} +#endif + +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); + return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); +} +#endif + +typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); + +size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ +#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) + static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 }; +#endif + + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ + if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ + if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize); +#else + return decompress[algoNb](dst, dstSize, cSrc, cSrcSize); +#endif + } +} + +size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ + if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ + if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize); +#else + return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : + HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; +#endif + } +} + +size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} +#endif +/**** ended inlining decompress/huf_decompress.c ****/ +/**** start inlining decompress/zstd_ddict.c ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* zstd_ddict.c : + * concentrates all logic that needs to know the internals of ZSTD_DDict object */ + +/*-******************************************************* +* Dependencies +*********************************************************/ +/**** skipping file: ../common/zstd_deps.h ****/ +/**** skipping file: ../common/cpu.h ****/ +/**** skipping file: ../common/mem.h ****/ +#define FSE_STATIC_LINKING_ONLY +/**** skipping file: ../common/fse.h ****/ +#define HUF_STATIC_LINKING_ONLY +/**** skipping file: ../common/huf.h ****/ +/**** start inlining zstd_decompress_internal.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* zstd_decompress_internal: + * objects and definitions shared within lib/decompress modules */ + + #ifndef ZSTD_DECOMPRESS_INTERNAL_H + #define ZSTD_DECOMPRESS_INTERNAL_H + + +/*-******************************************************* + * Dependencies + *********************************************************/ +/**** skipping file: ../common/mem.h ****/ +/**** skipping file: ../common/zstd_internal.h ****/ +/**** skipping file: ../common/zstd_trace.h ****/ + + + +/*-******************************************************* + * Constants + *********************************************************/ +static UNUSED_ATTR const U32 LL_base[MaxLL+1] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 18, 20, 22, 24, 28, 32, 40, + 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, + 0x2000, 0x4000, 0x8000, 0x10000 }; + +static UNUSED_ATTR const U32 OF_base[MaxOff+1] = { + 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, + 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, + 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, + 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD }; + +static UNUSED_ATTR const U32 OF_bits[MaxOff+1] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 }; + +static UNUSED_ATTR const U32 ML_base[MaxML+1] = { + 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, + 27, 28, 29, 30, 31, 32, 33, 34, + 35, 37, 39, 41, 43, 47, 51, 59, + 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, + 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 }; + + +/*-******************************************************* + * Decompression types + *********************************************************/ + typedef struct { + U32 fastMode; + U32 tableLog; + } ZSTD_seqSymbol_header; + + typedef struct { + U16 nextState; + BYTE nbAdditionalBits; + BYTE nbBits; + U32 baseValue; + } ZSTD_seqSymbol; + + #define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log))) + +#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64)) +#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32)) + +typedef struct { + ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */ + ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */ + ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */ + HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ + U32 rep[ZSTD_REP_NUM]; + U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32]; +} ZSTD_entropyDTables_t; + +typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader, + ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock, + ZSTDds_decompressLastBlock, ZSTDds_checkChecksum, + ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage; + +typedef enum { zdss_init=0, zdss_loadHeader, + zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage; + +typedef enum { + ZSTD_use_indefinitely = -1, /* Use the dictionary indefinitely */ + ZSTD_dont_use = 0, /* Do not use the dictionary (if one exists free it) */ + ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */ +} ZSTD_dictUses_e; + +/* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */ +typedef struct { + const ZSTD_DDict** ddictPtrTable; + size_t ddictPtrTableSize; + size_t ddictPtrCount; +} ZSTD_DDictHashSet; + +struct ZSTD_DCtx_s +{ + const ZSTD_seqSymbol* LLTptr; + const ZSTD_seqSymbol* MLTptr; + const ZSTD_seqSymbol* OFTptr; + const HUF_DTable* HUFptr; + ZSTD_entropyDTables_t entropy; + U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; /* space needed when building huffman tables */ + const void* previousDstEnd; /* detect continuity */ + const void* prefixStart; /* start of current segment */ + const void* virtualStart; /* virtual start of previous segment if it was just before current one */ + const void* dictEnd; /* end of previous segment */ + size_t expected; + ZSTD_frameHeader fParams; + U64 processedCSize; + U64 decodedSize; + blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */ + ZSTD_dStage stage; + U32 litEntropy; + U32 fseEntropy; + XXH64_state_t xxhState; + size_t headerSize; + ZSTD_format_e format; + ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum; /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */ + U32 validateChecksum; /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */ + const BYTE* litPtr; + ZSTD_customMem customMem; + size_t litSize; + size_t rleSize; + size_t staticSize; + int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */ + + /* dictionary */ + ZSTD_DDict* ddictLocal; + const ZSTD_DDict* ddict; /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */ + U32 dictID; + int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */ + ZSTD_dictUses_e dictUses; + ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */ + ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */ + + /* streaming */ + ZSTD_dStreamStage streamStage; + char* inBuff; + size_t inBuffSize; + size_t inPos; + size_t maxWindowSize; + char* outBuff; + size_t outBuffSize; + size_t outStart; + size_t outEnd; + size_t lhSize; + void* legacyContext; + U32 previousLegacyVersion; + U32 legacyVersion; + U32 hostageByte; + int noForwardProgress; + ZSTD_bufferMode_e outBufferMode; + ZSTD_outBuffer expectedOutBuffer; + + /* workspace */ + BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH]; + BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; + + size_t oversizedDuration; + +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + void const* dictContentBeginForFuzzing; + void const* dictContentEndForFuzzing; +#endif + + /* Tracing */ +#if ZSTD_TRACE + ZSTD_TraceCtx traceCtx; +#endif +}; /* typedef'd to ZSTD_DCtx within "zstd.h" */ + + +/*-******************************************************* + * Shared internal functions + *********************************************************/ + +/*! ZSTD_loadDEntropy() : + * dict : must point at beginning of a valid zstd dictionary. + * @return : size of dictionary header (size of magic number + dict ID + entropy tables) */ +size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, + const void* const dict, size_t const dictSize); + +/*! ZSTD_checkContinuity() : + * check if next `dst` follows previous position, where decompression ended. + * If yes, do nothing (continue on current segment). + * If not, classify previous segment as "external dictionary", and start a new segment. + * This function cannot fail. */ +void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize); + + +#endif /* ZSTD_DECOMPRESS_INTERNAL_H */ +/**** ended inlining zstd_decompress_internal.h ****/ +/**** start inlining zstd_ddict.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +#ifndef ZSTD_DDICT_H +#define ZSTD_DDICT_H + +/*-******************************************************* + * Dependencies + *********************************************************/ +/**** skipping file: ../common/zstd_deps.h ****/ +/**** skipping file: ../zstd.h ****/ + + +/*-******************************************************* + * Interface + *********************************************************/ + +/* note: several prototypes are already published in `zstd.h` : + * ZSTD_createDDict() + * ZSTD_createDDict_byReference() + * ZSTD_createDDict_advanced() + * ZSTD_freeDDict() + * ZSTD_initStaticDDict() + * ZSTD_sizeof_DDict() + * ZSTD_estimateDDictSize() + * ZSTD_getDictID_fromDict() + */ + +const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict); +size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict); + +void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); + + + +#endif /* ZSTD_DDICT_H */ +/**** ended inlining zstd_ddict.h ****/ + +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) +/**** start inlining ../legacy/zstd_legacy.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_LEGACY_H +#define ZSTD_LEGACY_H + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ************************************* +* Includes +***************************************/ +/**** skipping file: ../common/mem.h ****/ +/**** skipping file: ../common/error_private.h ****/ +/**** skipping file: ../common/zstd_internal.h ****/ + +#if !defined (ZSTD_LEGACY_SUPPORT) || (ZSTD_LEGACY_SUPPORT == 0) +# undef ZSTD_LEGACY_SUPPORT +# define ZSTD_LEGACY_SUPPORT 8 +#endif + +#if (ZSTD_LEGACY_SUPPORT <= 1) +/**** start inlining zstd_v01.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_V01_H_28739879432 +#define ZSTD_V01_H_28739879432 + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ************************************* +* Includes +***************************************/ +#include /* size_t */ + + +/* ************************************* +* Simple one-step function +***************************************/ +/** +ZSTDv01_decompress() : decompress ZSTD frames compliant with v0.1.x format + compressedSize : is the exact source size + maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated. + It must be equal or larger than originalSize, otherwise decompression will fail. + return : the number of bytes decompressed into destination buffer (originalSize) + or an errorCode if it fails (which can be tested using ZSTDv01_isError()) +*/ +size_t ZSTDv01_decompress( void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + + /** + ZSTDv01_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.1.x format + srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + cSize (output parameter) : the number of bytes that would be read to decompress this frame + or an error code if it fails (which can be tested using ZSTDv01_isError()) + dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame + or ZSTD_CONTENTSIZE_ERROR if an error occurs + + note : assumes `cSize` and `dBound` are _not_ NULL. + */ +void ZSTDv01_findFrameSizeInfoLegacy(const void *src, size_t srcSize, + size_t* cSize, unsigned long long* dBound); + +/** +ZSTDv01_isError() : tells if the result of ZSTDv01_decompress() is an error +*/ +unsigned ZSTDv01_isError(size_t code); + + +/* ************************************* +* Advanced functions +***************************************/ +typedef struct ZSTDv01_Dctx_s ZSTDv01_Dctx; +ZSTDv01_Dctx* ZSTDv01_createDCtx(void); +size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx); + +size_t ZSTDv01_decompressDCtx(void* ctx, + void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + +/* ************************************* +* Streaming functions +***************************************/ +size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx); + +size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx); +size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); +/** + Use above functions alternatively. + ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block. + Result is the number of bytes regenerated within 'dst'. + It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header. +*/ + +/* ************************************* +* Prefix - version detection +***************************************/ +#define ZSTDv01_magicNumber 0xFD2FB51E /* Big Endian version */ +#define ZSTDv01_magicNumberLE 0x1EB52FFD /* Little Endian version */ + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_V01_H_28739879432 */ +/**** ended inlining zstd_v01.h ****/ +#endif +#if (ZSTD_LEGACY_SUPPORT <= 2) +/**** start inlining zstd_v02.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_V02_H_4174539423 +#define ZSTD_V02_H_4174539423 + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ************************************* +* Includes +***************************************/ +#include /* size_t */ + + +/* ************************************* +* Simple one-step function +***************************************/ +/** +ZSTDv02_decompress() : decompress ZSTD frames compliant with v0.2.x format + compressedSize : is the exact source size + maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated. + It must be equal or larger than originalSize, otherwise decompression will fail. + return : the number of bytes decompressed into destination buffer (originalSize) + or an errorCode if it fails (which can be tested using ZSTDv01_isError()) +*/ +size_t ZSTDv02_decompress( void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + + /** + ZSTDv02_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.2.x format + srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + cSize (output parameter) : the number of bytes that would be read to decompress this frame + or an error code if it fails (which can be tested using ZSTDv01_isError()) + dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame + or ZSTD_CONTENTSIZE_ERROR if an error occurs + + note : assumes `cSize` and `dBound` are _not_ NULL. + */ +void ZSTDv02_findFrameSizeInfoLegacy(const void *src, size_t srcSize, + size_t* cSize, unsigned long long* dBound); + +/** +ZSTDv02_isError() : tells if the result of ZSTDv02_decompress() is an error +*/ +unsigned ZSTDv02_isError(size_t code); + + +/* ************************************* +* Advanced functions +***************************************/ +typedef struct ZSTDv02_Dctx_s ZSTDv02_Dctx; +ZSTDv02_Dctx* ZSTDv02_createDCtx(void); +size_t ZSTDv02_freeDCtx(ZSTDv02_Dctx* dctx); + +size_t ZSTDv02_decompressDCtx(void* ctx, + void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + +/* ************************************* +* Streaming functions +***************************************/ +size_t ZSTDv02_resetDCtx(ZSTDv02_Dctx* dctx); + +size_t ZSTDv02_nextSrcSizeToDecompress(ZSTDv02_Dctx* dctx); +size_t ZSTDv02_decompressContinue(ZSTDv02_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); +/** + Use above functions alternatively. + ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block. + Result is the number of bytes regenerated within 'dst'. + It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header. +*/ + +/* ************************************* +* Prefix - version detection +***************************************/ +#define ZSTDv02_magicNumber 0xFD2FB522 /* v0.2 */ + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_V02_H_4174539423 */ +/**** ended inlining zstd_v02.h ****/ +#endif +#if (ZSTD_LEGACY_SUPPORT <= 3) +/**** start inlining zstd_v03.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_V03_H_298734209782 +#define ZSTD_V03_H_298734209782 + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ************************************* +* Includes +***************************************/ +#include /* size_t */ + + +/* ************************************* +* Simple one-step function +***************************************/ +/** +ZSTDv03_decompress() : decompress ZSTD frames compliant with v0.3.x format + compressedSize : is the exact source size + maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated. + It must be equal or larger than originalSize, otherwise decompression will fail. + return : the number of bytes decompressed into destination buffer (originalSize) + or an errorCode if it fails (which can be tested using ZSTDv01_isError()) +*/ +size_t ZSTDv03_decompress( void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + + /** + ZSTDv03_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.3.x format + srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + cSize (output parameter) : the number of bytes that would be read to decompress this frame + or an error code if it fails (which can be tested using ZSTDv01_isError()) + dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame + or ZSTD_CONTENTSIZE_ERROR if an error occurs + + note : assumes `cSize` and `dBound` are _not_ NULL. + */ + void ZSTDv03_findFrameSizeInfoLegacy(const void *src, size_t srcSize, + size_t* cSize, unsigned long long* dBound); + + /** +ZSTDv03_isError() : tells if the result of ZSTDv03_decompress() is an error +*/ +unsigned ZSTDv03_isError(size_t code); + + +/* ************************************* +* Advanced functions +***************************************/ +typedef struct ZSTDv03_Dctx_s ZSTDv03_Dctx; +ZSTDv03_Dctx* ZSTDv03_createDCtx(void); +size_t ZSTDv03_freeDCtx(ZSTDv03_Dctx* dctx); + +size_t ZSTDv03_decompressDCtx(void* ctx, + void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + +/* ************************************* +* Streaming functions +***************************************/ +size_t ZSTDv03_resetDCtx(ZSTDv03_Dctx* dctx); + +size_t ZSTDv03_nextSrcSizeToDecompress(ZSTDv03_Dctx* dctx); +size_t ZSTDv03_decompressContinue(ZSTDv03_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); +/** + Use above functions alternatively. + ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block. + Result is the number of bytes regenerated within 'dst'. + It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header. +*/ + +/* ************************************* +* Prefix - version detection +***************************************/ +#define ZSTDv03_magicNumber 0xFD2FB523 /* v0.3 */ + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_V03_H_298734209782 */ +/**** ended inlining zstd_v03.h ****/ +#endif +#if (ZSTD_LEGACY_SUPPORT <= 4) +/**** start inlining zstd_v04.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_V04_H_91868324769238 +#define ZSTD_V04_H_91868324769238 + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ************************************* +* Includes +***************************************/ +#include /* size_t */ + + +/* ************************************* +* Simple one-step function +***************************************/ +/** +ZSTDv04_decompress() : decompress ZSTD frames compliant with v0.4.x format + compressedSize : is the exact source size + maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated. + It must be equal or larger than originalSize, otherwise decompression will fail. + return : the number of bytes decompressed into destination buffer (originalSize) + or an errorCode if it fails (which can be tested using ZSTDv01_isError()) +*/ +size_t ZSTDv04_decompress( void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + + /** + ZSTDv04_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.4.x format + srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + cSize (output parameter) : the number of bytes that would be read to decompress this frame + or an error code if it fails (which can be tested using ZSTDv01_isError()) + dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame + or ZSTD_CONTENTSIZE_ERROR if an error occurs + + note : assumes `cSize` and `dBound` are _not_ NULL. + */ + void ZSTDv04_findFrameSizeInfoLegacy(const void *src, size_t srcSize, + size_t* cSize, unsigned long long* dBound); + +/** +ZSTDv04_isError() : tells if the result of ZSTDv04_decompress() is an error +*/ +unsigned ZSTDv04_isError(size_t code); + + +/* ************************************* +* Advanced functions +***************************************/ +typedef struct ZSTDv04_Dctx_s ZSTDv04_Dctx; +ZSTDv04_Dctx* ZSTDv04_createDCtx(void); +size_t ZSTDv04_freeDCtx(ZSTDv04_Dctx* dctx); + +size_t ZSTDv04_decompressDCtx(ZSTDv04_Dctx* dctx, + void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + + +/* ************************************* +* Direct Streaming +***************************************/ +size_t ZSTDv04_resetDCtx(ZSTDv04_Dctx* dctx); + +size_t ZSTDv04_nextSrcSizeToDecompress(ZSTDv04_Dctx* dctx); +size_t ZSTDv04_decompressContinue(ZSTDv04_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); +/** + Use above functions alternatively. + ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block. + Result is the number of bytes regenerated within 'dst'. + It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header. +*/ + + +/* ************************************* +* Buffered Streaming +***************************************/ +typedef struct ZBUFFv04_DCtx_s ZBUFFv04_DCtx; +ZBUFFv04_DCtx* ZBUFFv04_createDCtx(void); +size_t ZBUFFv04_freeDCtx(ZBUFFv04_DCtx* dctx); + +size_t ZBUFFv04_decompressInit(ZBUFFv04_DCtx* dctx); +size_t ZBUFFv04_decompressWithDictionary(ZBUFFv04_DCtx* dctx, const void* dict, size_t dictSize); + +size_t ZBUFFv04_decompressContinue(ZBUFFv04_DCtx* dctx, void* dst, size_t* maxDstSizePtr, const void* src, size_t* srcSizePtr); + +/** ************************************************ +* Streaming decompression +* +* A ZBUFF_DCtx object is required to track streaming operation. +* Use ZBUFF_createDCtx() and ZBUFF_freeDCtx() to create/release resources. +* Use ZBUFF_decompressInit() to start a new decompression operation. +* ZBUFF_DCtx objects can be reused multiple times. +* +* Optionally, a reference to a static dictionary can be set, using ZBUFF_decompressWithDictionary() +* It must be the same content as the one set during compression phase. +* Dictionary content must remain accessible during the decompression process. +* +* Use ZBUFF_decompressContinue() repetitively to consume your input. +* *srcSizePtr and *maxDstSizePtr can be any size. +* The function will report how many bytes were read or written by modifying *srcSizePtr and *maxDstSizePtr. +* Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again. +* The content of dst will be overwritten (up to *maxDstSizePtr) at each function call, so save its content if it matters or change dst. +* @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency) +* or 0 when a frame is completely decoded +* or an error code, which can be tested using ZBUFF_isError(). +* +* Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedDInSize / ZBUFF_recommendedDOutSize +* output : ZBUFF_recommendedDOutSize==128 KB block size is the internal unit, it ensures it's always possible to write a full block when it's decoded. +* input : ZBUFF_recommendedDInSize==128Kb+3; just follow indications from ZBUFF_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 . +* **************************************************/ +unsigned ZBUFFv04_isError(size_t errorCode); +const char* ZBUFFv04_getErrorName(size_t errorCode); + + +/** The below functions provide recommended buffer sizes for Compression or Decompression operations. +* These sizes are not compulsory, they just tend to offer better latency */ +size_t ZBUFFv04_recommendedDInSize(void); +size_t ZBUFFv04_recommendedDOutSize(void); + + +/* ************************************* +* Prefix - version detection +***************************************/ +#define ZSTDv04_magicNumber 0xFD2FB524 /* v0.4 */ + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_V04_H_91868324769238 */ +/**** ended inlining zstd_v04.h ****/ +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) +/**** start inlining zstd_v05.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTDv05_H +#define ZSTDv05_H + +#if defined (__cplusplus) +extern "C" { +#endif + +/*-************************************* +* Dependencies +***************************************/ +#include /* size_t */ +/**** skipping file: ../common/mem.h ****/ + + +/* ************************************* +* Simple functions +***************************************/ +/*! ZSTDv05_decompress() : + `compressedSize` : is the _exact_ size of the compressed blob, otherwise decompression will fail. + `dstCapacity` must be large enough, equal or larger than originalSize. + @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), + or an errorCode if it fails (which can be tested using ZSTDv05_isError()) */ +size_t ZSTDv05_decompress( void* dst, size_t dstCapacity, + const void* src, size_t compressedSize); + + /** + ZSTDv05_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.5.x format + srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + cSize (output parameter) : the number of bytes that would be read to decompress this frame + or an error code if it fails (which can be tested using ZSTDv01_isError()) + dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame + or ZSTD_CONTENTSIZE_ERROR if an error occurs + + note : assumes `cSize` and `dBound` are _not_ NULL. + */ +void ZSTDv05_findFrameSizeInfoLegacy(const void *src, size_t srcSize, + size_t* cSize, unsigned long long* dBound); + +/* ************************************* +* Helper functions +***************************************/ +/* Error Management */ +unsigned ZSTDv05_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ +const char* ZSTDv05_getErrorName(size_t code); /*!< provides readable string for an error code */ + + +/* ************************************* +* Explicit memory management +***************************************/ +/** Decompression context */ +typedef struct ZSTDv05_DCtx_s ZSTDv05_DCtx; +ZSTDv05_DCtx* ZSTDv05_createDCtx(void); +size_t ZSTDv05_freeDCtx(ZSTDv05_DCtx* dctx); /*!< @return : errorCode */ + +/** ZSTDv05_decompressDCtx() : +* Same as ZSTDv05_decompress(), but requires an already allocated ZSTDv05_DCtx (see ZSTDv05_createDCtx()) */ +size_t ZSTDv05_decompressDCtx(ZSTDv05_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + + +/*-*********************** +* Simple Dictionary API +*************************/ +/*! ZSTDv05_decompress_usingDict() : +* Decompression using a pre-defined Dictionary content (see dictBuilder). +* Dictionary must be identical to the one used during compression, otherwise regenerated data will be corrupted. +* Note : dict can be NULL, in which case, it's equivalent to ZSTDv05_decompressDCtx() */ +size_t ZSTDv05_decompress_usingDict(ZSTDv05_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize); + +/*-************************ +* Advanced Streaming API +***************************/ +typedef enum { ZSTDv05_fast, ZSTDv05_greedy, ZSTDv05_lazy, ZSTDv05_lazy2, ZSTDv05_btlazy2, ZSTDv05_opt, ZSTDv05_btopt } ZSTDv05_strategy; +typedef struct { + U64 srcSize; + U32 windowLog; /* the only useful information to retrieve */ + U32 contentLog; U32 hashLog; U32 searchLog; U32 searchLength; U32 targetLength; ZSTDv05_strategy strategy; +} ZSTDv05_parameters; +size_t ZSTDv05_getFrameParams(ZSTDv05_parameters* params, const void* src, size_t srcSize); + +size_t ZSTDv05_decompressBegin_usingDict(ZSTDv05_DCtx* dctx, const void* dict, size_t dictSize); +void ZSTDv05_copyDCtx(ZSTDv05_DCtx* dstDCtx, const ZSTDv05_DCtx* srcDCtx); +size_t ZSTDv05_nextSrcSizeToDecompress(ZSTDv05_DCtx* dctx); +size_t ZSTDv05_decompressContinue(ZSTDv05_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + + +/*-*********************** +* ZBUFF API +*************************/ +typedef struct ZBUFFv05_DCtx_s ZBUFFv05_DCtx; +ZBUFFv05_DCtx* ZBUFFv05_createDCtx(void); +size_t ZBUFFv05_freeDCtx(ZBUFFv05_DCtx* dctx); + +size_t ZBUFFv05_decompressInit(ZBUFFv05_DCtx* dctx); +size_t ZBUFFv05_decompressInitDictionary(ZBUFFv05_DCtx* dctx, const void* dict, size_t dictSize); + +size_t ZBUFFv05_decompressContinue(ZBUFFv05_DCtx* dctx, + void* dst, size_t* dstCapacityPtr, + const void* src, size_t* srcSizePtr); + +/*-*************************************************************************** +* Streaming decompression +* +* A ZBUFFv05_DCtx object is required to track streaming operations. +* Use ZBUFFv05_createDCtx() and ZBUFFv05_freeDCtx() to create/release resources. +* Use ZBUFFv05_decompressInit() to start a new decompression operation, +* or ZBUFFv05_decompressInitDictionary() if decompression requires a dictionary. +* Note that ZBUFFv05_DCtx objects can be reused multiple times. +* +* Use ZBUFFv05_decompressContinue() repetitively to consume your input. +* *srcSizePtr and *dstCapacityPtr can be any size. +* The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr. +* Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again. +* The content of @dst will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters or change @dst. +* @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency) +* or 0 when a frame is completely decoded +* or an error code, which can be tested using ZBUFFv05_isError(). +* +* Hint : recommended buffer sizes (not compulsory) : ZBUFFv05_recommendedDInSize() / ZBUFFv05_recommendedDOutSize() +* output : ZBUFFv05_recommendedDOutSize==128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded. +* input : ZBUFFv05_recommendedDInSize==128Kb+3; just follow indications from ZBUFFv05_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 . +* *******************************************************************************/ + + +/* ************************************* +* Tool functions +***************************************/ +unsigned ZBUFFv05_isError(size_t errorCode); +const char* ZBUFFv05_getErrorName(size_t errorCode); + +/** Functions below provide recommended buffer sizes for Compression or Decompression operations. +* These sizes are just hints, and tend to offer better latency */ +size_t ZBUFFv05_recommendedDInSize(void); +size_t ZBUFFv05_recommendedDOutSize(void); + + + +/*-************************************* +* Constants +***************************************/ +#define ZSTDv05_MAGICNUMBER 0xFD2FB525 /* v0.5 */ + + + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTDv0505_H */ +/**** ended inlining zstd_v05.h ****/ +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) +/**** start inlining zstd_v06.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTDv06_H +#define ZSTDv06_H + +#if defined (__cplusplus) +extern "C" { +#endif + +/*====== Dependency ======*/ +#include /* size_t */ + + +/*====== Export for Windows ======*/ +/*! +* ZSTDv06_DLL_EXPORT : +* Enable exporting of functions when building a Windows DLL +*/ +#if defined(_WIN32) && defined(ZSTDv06_DLL_EXPORT) && (ZSTDv06_DLL_EXPORT==1) +# define ZSTDLIBv06_API __declspec(dllexport) +#else +# define ZSTDLIBv06_API +#endif + + +/* ************************************* +* Simple functions +***************************************/ +/*! ZSTDv06_decompress() : + `compressedSize` : is the _exact_ size of the compressed blob, otherwise decompression will fail. + `dstCapacity` must be large enough, equal or larger than originalSize. + @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), + or an errorCode if it fails (which can be tested using ZSTDv06_isError()) */ +ZSTDLIBv06_API size_t ZSTDv06_decompress( void* dst, size_t dstCapacity, + const void* src, size_t compressedSize); + +/** +ZSTDv06_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.6.x format + srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + cSize (output parameter) : the number of bytes that would be read to decompress this frame + or an error code if it fails (which can be tested using ZSTDv01_isError()) + dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame + or ZSTD_CONTENTSIZE_ERROR if an error occurs + + note : assumes `cSize` and `dBound` are _not_ NULL. +*/ +void ZSTDv06_findFrameSizeInfoLegacy(const void *src, size_t srcSize, + size_t* cSize, unsigned long long* dBound); + +/* ************************************* +* Helper functions +***************************************/ +ZSTDLIBv06_API size_t ZSTDv06_compressBound(size_t srcSize); /*!< maximum compressed size (worst case scenario) */ + +/* Error Management */ +ZSTDLIBv06_API unsigned ZSTDv06_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ +ZSTDLIBv06_API const char* ZSTDv06_getErrorName(size_t code); /*!< provides readable string for an error code */ + + +/* ************************************* +* Explicit memory management +***************************************/ +/** Decompression context */ +typedef struct ZSTDv06_DCtx_s ZSTDv06_DCtx; +ZSTDLIBv06_API ZSTDv06_DCtx* ZSTDv06_createDCtx(void); +ZSTDLIBv06_API size_t ZSTDv06_freeDCtx(ZSTDv06_DCtx* dctx); /*!< @return : errorCode */ + +/** ZSTDv06_decompressDCtx() : +* Same as ZSTDv06_decompress(), but requires an already allocated ZSTDv06_DCtx (see ZSTDv06_createDCtx()) */ +ZSTDLIBv06_API size_t ZSTDv06_decompressDCtx(ZSTDv06_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + + +/*-*********************** +* Dictionary API +*************************/ +/*! ZSTDv06_decompress_usingDict() : +* Decompression using a pre-defined Dictionary content (see dictBuilder). +* Dictionary must be identical to the one used during compression, otherwise regenerated data will be corrupted. +* Note : dict can be NULL, in which case, it's equivalent to ZSTDv06_decompressDCtx() */ +ZSTDLIBv06_API size_t ZSTDv06_decompress_usingDict(ZSTDv06_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize); + + +/*-************************ +* Advanced Streaming API +***************************/ +struct ZSTDv06_frameParams_s { unsigned long long frameContentSize; unsigned windowLog; }; +typedef struct ZSTDv06_frameParams_s ZSTDv06_frameParams; + +ZSTDLIBv06_API size_t ZSTDv06_getFrameParams(ZSTDv06_frameParams* fparamsPtr, const void* src, size_t srcSize); /**< doesn't consume input */ +ZSTDLIBv06_API size_t ZSTDv06_decompressBegin_usingDict(ZSTDv06_DCtx* dctx, const void* dict, size_t dictSize); +ZSTDLIBv06_API void ZSTDv06_copyDCtx(ZSTDv06_DCtx* dctx, const ZSTDv06_DCtx* preparedDCtx); + +ZSTDLIBv06_API size_t ZSTDv06_nextSrcSizeToDecompress(ZSTDv06_DCtx* dctx); +ZSTDLIBv06_API size_t ZSTDv06_decompressContinue(ZSTDv06_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + + + +/* ************************************* +* ZBUFF API +***************************************/ + +typedef struct ZBUFFv06_DCtx_s ZBUFFv06_DCtx; +ZSTDLIBv06_API ZBUFFv06_DCtx* ZBUFFv06_createDCtx(void); +ZSTDLIBv06_API size_t ZBUFFv06_freeDCtx(ZBUFFv06_DCtx* dctx); + +ZSTDLIBv06_API size_t ZBUFFv06_decompressInit(ZBUFFv06_DCtx* dctx); +ZSTDLIBv06_API size_t ZBUFFv06_decompressInitDictionary(ZBUFFv06_DCtx* dctx, const void* dict, size_t dictSize); + +ZSTDLIBv06_API size_t ZBUFFv06_decompressContinue(ZBUFFv06_DCtx* dctx, + void* dst, size_t* dstCapacityPtr, + const void* src, size_t* srcSizePtr); + +/*-*************************************************************************** +* Streaming decompression howto +* +* A ZBUFFv06_DCtx object is required to track streaming operations. +* Use ZBUFFv06_createDCtx() and ZBUFFv06_freeDCtx() to create/release resources. +* Use ZBUFFv06_decompressInit() to start a new decompression operation, +* or ZBUFFv06_decompressInitDictionary() if decompression requires a dictionary. +* Note that ZBUFFv06_DCtx objects can be re-init multiple times. +* +* Use ZBUFFv06_decompressContinue() repetitively to consume your input. +* *srcSizePtr and *dstCapacityPtr can be any size. +* The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr. +* Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again. +* The content of `dst` will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters, or change `dst`. +* @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency), +* or 0 when a frame is completely decoded, +* or an error code, which can be tested using ZBUFFv06_isError(). +* +* Hint : recommended buffer sizes (not compulsory) : ZBUFFv06_recommendedDInSize() and ZBUFFv06_recommendedDOutSize() +* output : ZBUFFv06_recommendedDOutSize== 128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded. +* input : ZBUFFv06_recommendedDInSize == 128KB + 3; +* just follow indications from ZBUFFv06_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 . +* *******************************************************************************/ + + +/* ************************************* +* Tool functions +***************************************/ +ZSTDLIBv06_API unsigned ZBUFFv06_isError(size_t errorCode); +ZSTDLIBv06_API const char* ZBUFFv06_getErrorName(size_t errorCode); + +/** Functions below provide recommended buffer sizes for Compression or Decompression operations. +* These sizes are just hints, they tend to offer better latency */ +ZSTDLIBv06_API size_t ZBUFFv06_recommendedDInSize(void); +ZSTDLIBv06_API size_t ZBUFFv06_recommendedDOutSize(void); + + +/*-************************************* +* Constants +***************************************/ +#define ZSTDv06_MAGICNUMBER 0xFD2FB526 /* v0.6 */ + + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTDv06_BUFFERED_H */ +/**** ended inlining zstd_v06.h ****/ +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) +/**** start inlining zstd_v07.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTDv07_H_235446 +#define ZSTDv07_H_235446 + +#if defined (__cplusplus) +extern "C" { +#endif + +/*====== Dependency ======*/ +#include /* size_t */ + + +/*====== Export for Windows ======*/ +/*! +* ZSTDv07_DLL_EXPORT : +* Enable exporting of functions when building a Windows DLL +*/ +#if defined(_WIN32) && defined(ZSTDv07_DLL_EXPORT) && (ZSTDv07_DLL_EXPORT==1) +# define ZSTDLIBv07_API __declspec(dllexport) +#else +# define ZSTDLIBv07_API +#endif + + +/* ************************************* +* Simple API +***************************************/ +/*! ZSTDv07_getDecompressedSize() : +* @return : decompressed size if known, 0 otherwise. + note 1 : if `0`, follow up with ZSTDv07_getFrameParams() to know precise failure cause. + note 2 : decompressed size could be wrong or intentionally modified ! + always ensure results fit within application's authorized limits */ +unsigned long long ZSTDv07_getDecompressedSize(const void* src, size_t srcSize); + +/*! ZSTDv07_decompress() : + `compressedSize` : must be _exact_ size of compressed input, otherwise decompression will fail. + `dstCapacity` must be equal or larger than originalSize. + @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), + or an errorCode if it fails (which can be tested using ZSTDv07_isError()) */ +ZSTDLIBv07_API size_t ZSTDv07_decompress( void* dst, size_t dstCapacity, + const void* src, size_t compressedSize); + +/** +ZSTDv07_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.7.x format + srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + cSize (output parameter) : the number of bytes that would be read to decompress this frame + or an error code if it fails (which can be tested using ZSTDv01_isError()) + dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame + or ZSTD_CONTENTSIZE_ERROR if an error occurs + + note : assumes `cSize` and `dBound` are _not_ NULL. +*/ +void ZSTDv07_findFrameSizeInfoLegacy(const void *src, size_t srcSize, + size_t* cSize, unsigned long long* dBound); + +/*====== Helper functions ======*/ +ZSTDLIBv07_API unsigned ZSTDv07_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ +ZSTDLIBv07_API const char* ZSTDv07_getErrorName(size_t code); /*!< provides readable string from an error code */ + + +/*-************************************* +* Explicit memory management +***************************************/ +/** Decompression context */ +typedef struct ZSTDv07_DCtx_s ZSTDv07_DCtx; +ZSTDLIBv07_API ZSTDv07_DCtx* ZSTDv07_createDCtx(void); +ZSTDLIBv07_API size_t ZSTDv07_freeDCtx(ZSTDv07_DCtx* dctx); /*!< @return : errorCode */ + +/** ZSTDv07_decompressDCtx() : +* Same as ZSTDv07_decompress(), requires an allocated ZSTDv07_DCtx (see ZSTDv07_createDCtx()) */ +ZSTDLIBv07_API size_t ZSTDv07_decompressDCtx(ZSTDv07_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + + +/*-************************ +* Simple dictionary API +***************************/ +/*! ZSTDv07_decompress_usingDict() : +* Decompression using a pre-defined Dictionary content (see dictBuilder). +* Dictionary must be identical to the one used during compression. +* Note : This function load the dictionary, resulting in a significant startup time */ +ZSTDLIBv07_API size_t ZSTDv07_decompress_usingDict(ZSTDv07_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize); + + +/*-************************** +* Advanced Dictionary API +****************************/ +/*! ZSTDv07_createDDict() : +* Create a digested dictionary, ready to start decompression operation without startup delay. +* `dict` can be released after creation */ +typedef struct ZSTDv07_DDict_s ZSTDv07_DDict; +ZSTDLIBv07_API ZSTDv07_DDict* ZSTDv07_createDDict(const void* dict, size_t dictSize); +ZSTDLIBv07_API size_t ZSTDv07_freeDDict(ZSTDv07_DDict* ddict); + +/*! ZSTDv07_decompress_usingDDict() : +* Decompression using a pre-digested Dictionary +* Faster startup than ZSTDv07_decompress_usingDict(), recommended when same dictionary is used multiple times. */ +ZSTDLIBv07_API size_t ZSTDv07_decompress_usingDDict(ZSTDv07_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTDv07_DDict* ddict); + +typedef struct { + unsigned long long frameContentSize; + unsigned windowSize; + unsigned dictID; + unsigned checksumFlag; +} ZSTDv07_frameParams; + +ZSTDLIBv07_API size_t ZSTDv07_getFrameParams(ZSTDv07_frameParams* fparamsPtr, const void* src, size_t srcSize); /**< doesn't consume input */ + + + + +/* ************************************* +* Streaming functions +***************************************/ +typedef struct ZBUFFv07_DCtx_s ZBUFFv07_DCtx; +ZSTDLIBv07_API ZBUFFv07_DCtx* ZBUFFv07_createDCtx(void); +ZSTDLIBv07_API size_t ZBUFFv07_freeDCtx(ZBUFFv07_DCtx* dctx); + +ZSTDLIBv07_API size_t ZBUFFv07_decompressInit(ZBUFFv07_DCtx* dctx); +ZSTDLIBv07_API size_t ZBUFFv07_decompressInitDictionary(ZBUFFv07_DCtx* dctx, const void* dict, size_t dictSize); + +ZSTDLIBv07_API size_t ZBUFFv07_decompressContinue(ZBUFFv07_DCtx* dctx, + void* dst, size_t* dstCapacityPtr, + const void* src, size_t* srcSizePtr); + +/*-*************************************************************************** +* Streaming decompression howto +* +* A ZBUFFv07_DCtx object is required to track streaming operations. +* Use ZBUFFv07_createDCtx() and ZBUFFv07_freeDCtx() to create/release resources. +* Use ZBUFFv07_decompressInit() to start a new decompression operation, +* or ZBUFFv07_decompressInitDictionary() if decompression requires a dictionary. +* Note that ZBUFFv07_DCtx objects can be re-init multiple times. +* +* Use ZBUFFv07_decompressContinue() repetitively to consume your input. +* *srcSizePtr and *dstCapacityPtr can be any size. +* The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr. +* Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again. +* The content of `dst` will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters, or change `dst`. +* @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency), +* or 0 when a frame is completely decoded, +* or an error code, which can be tested using ZBUFFv07_isError(). +* +* Hint : recommended buffer sizes (not compulsory) : ZBUFFv07_recommendedDInSize() and ZBUFFv07_recommendedDOutSize() +* output : ZBUFFv07_recommendedDOutSize== 128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded. +* input : ZBUFFv07_recommendedDInSize == 128KB + 3; +* just follow indications from ZBUFFv07_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 . +* *******************************************************************************/ + + +/* ************************************* +* Tool functions +***************************************/ +ZSTDLIBv07_API unsigned ZBUFFv07_isError(size_t errorCode); +ZSTDLIBv07_API const char* ZBUFFv07_getErrorName(size_t errorCode); + +/** Functions below provide recommended buffer sizes for Compression or Decompression operations. +* These sizes are just hints, they tend to offer better latency */ +ZSTDLIBv07_API size_t ZBUFFv07_recommendedDInSize(void); +ZSTDLIBv07_API size_t ZBUFFv07_recommendedDOutSize(void); + + +/*-************************************* +* Constants +***************************************/ +#define ZSTDv07_MAGICNUMBER 0xFD2FB527 /* v0.7 */ + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTDv07_H_235446 */ +/**** ended inlining zstd_v07.h ****/ +#endif + +/** ZSTD_isLegacy() : + @return : > 0 if supported by legacy decoder. 0 otherwise. + return value is the version. +*/ +MEM_STATIC unsigned ZSTD_isLegacy(const void* src, size_t srcSize) +{ + U32 magicNumberLE; + if (srcSize<4) return 0; + magicNumberLE = MEM_readLE32(src); + switch(magicNumberLE) + { +#if (ZSTD_LEGACY_SUPPORT <= 1) + case ZSTDv01_magicNumberLE:return 1; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 2) + case ZSTDv02_magicNumber : return 2; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 3) + case ZSTDv03_magicNumber : return 3; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 4) + case ZSTDv04_magicNumber : return 4; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case ZSTDv05_MAGICNUMBER : return 5; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case ZSTDv06_MAGICNUMBER : return 6; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case ZSTDv07_MAGICNUMBER : return 7; +#endif + default : return 0; + } +} + + +MEM_STATIC unsigned long long ZSTD_getDecompressedSize_legacy(const void* src, size_t srcSize) +{ + U32 const version = ZSTD_isLegacy(src, srcSize); + if (version < 5) return 0; /* no decompressed size in frame header, or not a legacy format */ +#if (ZSTD_LEGACY_SUPPORT <= 5) + if (version==5) { + ZSTDv05_parameters fParams; + size_t const frResult = ZSTDv05_getFrameParams(&fParams, src, srcSize); + if (frResult != 0) return 0; + return fParams.srcSize; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + if (version==6) { + ZSTDv06_frameParams fParams; + size_t const frResult = ZSTDv06_getFrameParams(&fParams, src, srcSize); + if (frResult != 0) return 0; + return fParams.frameContentSize; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + if (version==7) { + ZSTDv07_frameParams fParams; + size_t const frResult = ZSTDv07_getFrameParams(&fParams, src, srcSize); + if (frResult != 0) return 0; + return fParams.frameContentSize; + } +#endif + return 0; /* should not be possible */ +} + + +MEM_STATIC size_t ZSTD_decompressLegacy( + void* dst, size_t dstCapacity, + const void* src, size_t compressedSize, + const void* dict,size_t dictSize) +{ + U32 const version = ZSTD_isLegacy(src, compressedSize); + (void)dst; (void)dstCapacity; (void)dict; (void)dictSize; /* unused when ZSTD_LEGACY_SUPPORT >= 8 */ + switch(version) + { +#if (ZSTD_LEGACY_SUPPORT <= 1) + case 1 : + return ZSTDv01_decompress(dst, dstCapacity, src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 2) + case 2 : + return ZSTDv02_decompress(dst, dstCapacity, src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 3) + case 3 : + return ZSTDv03_decompress(dst, dstCapacity, src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 4) + case 4 : + return ZSTDv04_decompress(dst, dstCapacity, src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case 5 : + { size_t result; + ZSTDv05_DCtx* const zd = ZSTDv05_createDCtx(); + if (zd==NULL) return ERROR(memory_allocation); + result = ZSTDv05_decompress_usingDict(zd, dst, dstCapacity, src, compressedSize, dict, dictSize); + ZSTDv05_freeDCtx(zd); + return result; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case 6 : + { size_t result; + ZSTDv06_DCtx* const zd = ZSTDv06_createDCtx(); + if (zd==NULL) return ERROR(memory_allocation); + result = ZSTDv06_decompress_usingDict(zd, dst, dstCapacity, src, compressedSize, dict, dictSize); + ZSTDv06_freeDCtx(zd); + return result; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case 7 : + { size_t result; + ZSTDv07_DCtx* const zd = ZSTDv07_createDCtx(); + if (zd==NULL) return ERROR(memory_allocation); + result = ZSTDv07_decompress_usingDict(zd, dst, dstCapacity, src, compressedSize, dict, dictSize); + ZSTDv07_freeDCtx(zd); + return result; + } +#endif + default : + return ERROR(prefix_unknown); + } +} + +MEM_STATIC ZSTD_frameSizeInfo ZSTD_findFrameSizeInfoLegacy(const void *src, size_t srcSize) +{ + ZSTD_frameSizeInfo frameSizeInfo; + U32 const version = ZSTD_isLegacy(src, srcSize); + switch(version) + { +#if (ZSTD_LEGACY_SUPPORT <= 1) + case 1 : + ZSTDv01_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 2) + case 2 : + ZSTDv02_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 3) + case 3 : + ZSTDv03_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 4) + case 4 : + ZSTDv04_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case 5 : + ZSTDv05_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case 6 : + ZSTDv06_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case 7 : + ZSTDv07_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; +#endif + default : + frameSizeInfo.compressedSize = ERROR(prefix_unknown); + frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR; + break; + } + if (!ZSTD_isError(frameSizeInfo.compressedSize) && frameSizeInfo.compressedSize > srcSize) { + frameSizeInfo.compressedSize = ERROR(srcSize_wrong); + frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR; + } + return frameSizeInfo; +} + +MEM_STATIC size_t ZSTD_findFrameCompressedSizeLegacy(const void *src, size_t srcSize) +{ + ZSTD_frameSizeInfo frameSizeInfo = ZSTD_findFrameSizeInfoLegacy(src, srcSize); + return frameSizeInfo.compressedSize; +} + +MEM_STATIC size_t ZSTD_freeLegacyStreamContext(void* legacyContext, U32 version) +{ + switch(version) + { + default : + case 1 : + case 2 : + case 3 : + (void)legacyContext; + return ERROR(version_unsupported); +#if (ZSTD_LEGACY_SUPPORT <= 4) + case 4 : return ZBUFFv04_freeDCtx((ZBUFFv04_DCtx*)legacyContext); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case 5 : return ZBUFFv05_freeDCtx((ZBUFFv05_DCtx*)legacyContext); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case 6 : return ZBUFFv06_freeDCtx((ZBUFFv06_DCtx*)legacyContext); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case 7 : return ZBUFFv07_freeDCtx((ZBUFFv07_DCtx*)legacyContext); +#endif + } +} + + +MEM_STATIC size_t ZSTD_initLegacyStream(void** legacyContext, U32 prevVersion, U32 newVersion, + const void* dict, size_t dictSize) +{ + DEBUGLOG(5, "ZSTD_initLegacyStream for v0.%u", newVersion); + if (prevVersion != newVersion) ZSTD_freeLegacyStreamContext(*legacyContext, prevVersion); + switch(newVersion) + { + default : + case 1 : + case 2 : + case 3 : + (void)dict; (void)dictSize; + return 0; +#if (ZSTD_LEGACY_SUPPORT <= 4) + case 4 : + { + ZBUFFv04_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv04_createDCtx() : (ZBUFFv04_DCtx*)*legacyContext; + if (dctx==NULL) return ERROR(memory_allocation); + ZBUFFv04_decompressInit(dctx); + ZBUFFv04_decompressWithDictionary(dctx, dict, dictSize); + *legacyContext = dctx; + return 0; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case 5 : + { + ZBUFFv05_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv05_createDCtx() : (ZBUFFv05_DCtx*)*legacyContext; + if (dctx==NULL) return ERROR(memory_allocation); + ZBUFFv05_decompressInitDictionary(dctx, dict, dictSize); + *legacyContext = dctx; + return 0; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case 6 : + { + ZBUFFv06_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv06_createDCtx() : (ZBUFFv06_DCtx*)*legacyContext; + if (dctx==NULL) return ERROR(memory_allocation); + ZBUFFv06_decompressInitDictionary(dctx, dict, dictSize); + *legacyContext = dctx; + return 0; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case 7 : + { + ZBUFFv07_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv07_createDCtx() : (ZBUFFv07_DCtx*)*legacyContext; + if (dctx==NULL) return ERROR(memory_allocation); + ZBUFFv07_decompressInitDictionary(dctx, dict, dictSize); + *legacyContext = dctx; + return 0; + } +#endif + } +} + + + +MEM_STATIC size_t ZSTD_decompressLegacyStream(void* legacyContext, U32 version, + ZSTD_outBuffer* output, ZSTD_inBuffer* input) +{ + DEBUGLOG(5, "ZSTD_decompressLegacyStream for v0.%u", version); + switch(version) + { + default : + case 1 : + case 2 : + case 3 : + (void)legacyContext; (void)output; (void)input; + return ERROR(version_unsupported); +#if (ZSTD_LEGACY_SUPPORT <= 4) + case 4 : + { + ZBUFFv04_DCtx* dctx = (ZBUFFv04_DCtx*) legacyContext; + const void* src = (const char*)input->src + input->pos; + size_t readSize = input->size - input->pos; + void* dst = (char*)output->dst + output->pos; + size_t decodedSize = output->size - output->pos; + size_t const hintSize = ZBUFFv04_decompressContinue(dctx, dst, &decodedSize, src, &readSize); + output->pos += decodedSize; + input->pos += readSize; + return hintSize; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case 5 : + { + ZBUFFv05_DCtx* dctx = (ZBUFFv05_DCtx*) legacyContext; + const void* src = (const char*)input->src + input->pos; + size_t readSize = input->size - input->pos; + void* dst = (char*)output->dst + output->pos; + size_t decodedSize = output->size - output->pos; + size_t const hintSize = ZBUFFv05_decompressContinue(dctx, dst, &decodedSize, src, &readSize); + output->pos += decodedSize; + input->pos += readSize; + return hintSize; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case 6 : + { + ZBUFFv06_DCtx* dctx = (ZBUFFv06_DCtx*) legacyContext; + const void* src = (const char*)input->src + input->pos; + size_t readSize = input->size - input->pos; + void* dst = (char*)output->dst + output->pos; + size_t decodedSize = output->size - output->pos; + size_t const hintSize = ZBUFFv06_decompressContinue(dctx, dst, &decodedSize, src, &readSize); + output->pos += decodedSize; + input->pos += readSize; + return hintSize; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case 7 : + { + ZBUFFv07_DCtx* dctx = (ZBUFFv07_DCtx*) legacyContext; + const void* src = (const char*)input->src + input->pos; + size_t readSize = input->size - input->pos; + void* dst = (char*)output->dst + output->pos; + size_t decodedSize = output->size - output->pos; + size_t const hintSize = ZBUFFv07_decompressContinue(dctx, dst, &decodedSize, src, &readSize); + output->pos += decodedSize; + input->pos += readSize; + return hintSize; + } +#endif + } +} + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_LEGACY_H */ +/**** ended inlining ../legacy/zstd_legacy.h ****/ +#endif + + + +/*-******************************************************* +* Types +*********************************************************/ +struct ZSTD_DDict_s { + void* dictBuffer; + const void* dictContent; + size_t dictSize; + ZSTD_entropyDTables_t entropy; + U32 dictID; + U32 entropyPresent; + ZSTD_customMem cMem; +}; /* typedef'd to ZSTD_DDict within "zstd.h" */ + +const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict) +{ + assert(ddict != NULL); + return ddict->dictContent; +} + +size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict) +{ + assert(ddict != NULL); + return ddict->dictSize; +} + +void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) +{ + DEBUGLOG(4, "ZSTD_copyDDictParameters"); + assert(dctx != NULL); + assert(ddict != NULL); + dctx->dictID = ddict->dictID; + dctx->prefixStart = ddict->dictContent; + dctx->virtualStart = ddict->dictContent; + dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize; + dctx->previousDstEnd = dctx->dictEnd; +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + dctx->dictContentBeginForFuzzing = dctx->prefixStart; + dctx->dictContentEndForFuzzing = dctx->previousDstEnd; +#endif + if (ddict->entropyPresent) { + dctx->litEntropy = 1; + dctx->fseEntropy = 1; + dctx->LLTptr = ddict->entropy.LLTable; + dctx->MLTptr = ddict->entropy.MLTable; + dctx->OFTptr = ddict->entropy.OFTable; + dctx->HUFptr = ddict->entropy.hufTable; + dctx->entropy.rep[0] = ddict->entropy.rep[0]; + dctx->entropy.rep[1] = ddict->entropy.rep[1]; + dctx->entropy.rep[2] = ddict->entropy.rep[2]; + } else { + dctx->litEntropy = 0; + dctx->fseEntropy = 0; + } +} + + +static size_t +ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict, + ZSTD_dictContentType_e dictContentType) +{ + ddict->dictID = 0; + ddict->entropyPresent = 0; + if (dictContentType == ZSTD_dct_rawContent) return 0; + + if (ddict->dictSize < 8) { + if (dictContentType == ZSTD_dct_fullDict) + return ERROR(dictionary_corrupted); /* only accept specified dictionaries */ + return 0; /* pure content mode */ + } + { U32 const magic = MEM_readLE32(ddict->dictContent); + if (magic != ZSTD_MAGIC_DICTIONARY) { + if (dictContentType == ZSTD_dct_fullDict) + return ERROR(dictionary_corrupted); /* only accept specified dictionaries */ + return 0; /* pure content mode */ + } + } + ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE); + + /* load entropy tables */ + RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy( + &ddict->entropy, ddict->dictContent, ddict->dictSize)), + dictionary_corrupted, ""); + ddict->entropyPresent = 1; + return 0; +} + + +static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType) +{ + if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) { + ddict->dictBuffer = NULL; + ddict->dictContent = dict; + if (!dict) dictSize = 0; + } else { + void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem); + ddict->dictBuffer = internalBuffer; + ddict->dictContent = internalBuffer; + if (!internalBuffer) return ERROR(memory_allocation); + ZSTD_memcpy(internalBuffer, dict, dictSize); + } + ddict->dictSize = dictSize; + ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ + + /* parse dictionary content */ + FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , ""); + + return 0; +} + +ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_customMem customMem) +{ + if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; + + { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem); + if (ddict == NULL) return NULL; + ddict->cMem = customMem; + { size_t const initResult = ZSTD_initDDict_internal(ddict, + dict, dictSize, + dictLoadMethod, dictContentType); + if (ZSTD_isError(initResult)) { + ZSTD_freeDDict(ddict); + return NULL; + } } + return ddict; + } +} + +/*! ZSTD_createDDict() : +* Create a digested dictionary, to start decompression without startup delay. +* `dict` content is copied inside DDict. +* Consequently, `dict` can be released after `ZSTD_DDict` creation */ +ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize) +{ + ZSTD_customMem const allocator = { NULL, NULL, NULL }; + return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator); +} + +/*! ZSTD_createDDict_byReference() : + * Create a digested dictionary, to start decompression without startup delay. + * Dictionary content is simply referenced, it will be accessed during decompression. + * Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */ +ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize) +{ + ZSTD_customMem const allocator = { NULL, NULL, NULL }; + return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator); +} + + +const ZSTD_DDict* ZSTD_initStaticDDict( + void* sBuffer, size_t sBufferSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType) +{ + size_t const neededSpace = sizeof(ZSTD_DDict) + + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); + ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer; + assert(sBuffer != NULL); + assert(dict != NULL); + if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */ + if (sBufferSize < neededSpace) return NULL; + if (dictLoadMethod == ZSTD_dlm_byCopy) { + ZSTD_memcpy(ddict+1, dict, dictSize); /* local copy */ + dict = ddict+1; + } + if (ZSTD_isError( ZSTD_initDDict_internal(ddict, + dict, dictSize, + ZSTD_dlm_byRef, dictContentType) )) + return NULL; + return ddict; +} + + +size_t ZSTD_freeDDict(ZSTD_DDict* ddict) +{ + if (ddict==NULL) return 0; /* support free on NULL */ + { ZSTD_customMem const cMem = ddict->cMem; + ZSTD_customFree(ddict->dictBuffer, cMem); + ZSTD_customFree(ddict, cMem); + return 0; + } +} + +/*! ZSTD_estimateDDictSize() : + * Estimate amount of memory that will be needed to create a dictionary for decompression. + * Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */ +size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod) +{ + return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); +} + +size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict) +{ + if (ddict==NULL) return 0; /* support sizeof on NULL */ + return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ; +} + +/*! ZSTD_getDictID_fromDDict() : + * Provides the dictID of the dictionary loaded into `ddict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict) +{ + if (ddict==NULL) return 0; + return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize); +} +/**** ended inlining decompress/zstd_ddict.c ****/ +/**** start inlining decompress/zstd_decompress.c ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* *************************************************************** +* Tuning parameters +*****************************************************************/ +/*! + * HEAPMODE : + * Select how default decompression function ZSTD_decompress() allocates its context, + * on stack (0), or into heap (1, default; requires malloc()). + * Note that functions with explicit context such as ZSTD_decompressDCtx() are unaffected. + */ +#ifndef ZSTD_HEAPMODE +# define ZSTD_HEAPMODE 1 +#endif + +/*! +* LEGACY_SUPPORT : +* if set to 1+, ZSTD_decompress() can decode older formats (v0.1+) +*/ +#ifndef ZSTD_LEGACY_SUPPORT +# define ZSTD_LEGACY_SUPPORT 0 +#endif + +/*! + * MAXWINDOWSIZE_DEFAULT : + * maximum window size accepted by DStream __by default__. + * Frames requiring more memory will be rejected. + * It's possible to set a different limit using ZSTD_DCtx_setMaxWindowSize(). + */ +#ifndef ZSTD_MAXWINDOWSIZE_DEFAULT +# define ZSTD_MAXWINDOWSIZE_DEFAULT (((U32)1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) + 1) +#endif + +/*! + * NO_FORWARD_PROGRESS_MAX : + * maximum allowed nb of calls to ZSTD_decompressStream() + * without any forward progress + * (defined as: no byte read from input, and no byte flushed to output) + * before triggering an error. + */ +#ifndef ZSTD_NO_FORWARD_PROGRESS_MAX +# define ZSTD_NO_FORWARD_PROGRESS_MAX 16 +#endif + + +/*-******************************************************* +* Dependencies +*********************************************************/ +/**** skipping file: ../common/zstd_deps.h ****/ +/**** skipping file: ../common/cpu.h ****/ +/**** skipping file: ../common/mem.h ****/ +/**** skipping file: ../common/zstd_trace.h ****/ +#define FSE_STATIC_LINKING_ONLY +/**** skipping file: ../common/fse.h ****/ +#define HUF_STATIC_LINKING_ONLY +/**** skipping file: ../common/huf.h ****/ +/**** skipping file: ../common/xxhash.h ****/ +/**** skipping file: ../common/zstd_internal.h ****/ +/**** skipping file: zstd_decompress_internal.h ****/ +/**** skipping file: zstd_ddict.h ****/ +/**** start inlining zstd_decompress_block.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +#ifndef ZSTD_DEC_BLOCK_H +#define ZSTD_DEC_BLOCK_H + +/*-******************************************************* + * Dependencies + *********************************************************/ +/**** skipping file: ../common/zstd_deps.h ****/ +/**** skipping file: ../zstd.h ****/ +/**** skipping file: ../common/zstd_internal.h ****/ +/**** skipping file: zstd_decompress_internal.h ****/ + + +/* === Prototypes === */ + +/* note: prototypes already published within `zstd.h` : + * ZSTD_decompressBlock() + */ + +/* note: prototypes already published within `zstd_internal.h` : + * ZSTD_getcBlockSize() + * ZSTD_decodeSeqHeaders() + */ + + +/* ZSTD_decompressBlock_internal() : + * decompress block, starting at `src`, + * into destination buffer `dst`. + * @return : decompressed block size, + * or an error code (which can be tested using ZSTD_isError()) + */ +size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, const int frame); + +/* ZSTD_buildFSETable() : + * generate FSE decoding table for one symbol (ll, ml or off) + * this function must be called with valid parameters only + * (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.) + * in which case it cannot fail. + * The workspace must be 4-byte aligned and at least ZSTD_BUILD_FSE_TABLE_WKSP_SIZE bytes, which is + * defined in zstd_decompress_internal.h. + * Internal use only. + */ +void ZSTD_buildFSETable(ZSTD_seqSymbol* dt, + const short* normalizedCounter, unsigned maxSymbolValue, + const U32* baseValue, const U32* nbAdditionalBits, + unsigned tableLog, void* wksp, size_t wkspSize, + int bmi2); + + +#endif /* ZSTD_DEC_BLOCK_H */ +/**** ended inlining zstd_decompress_block.h ****/ + +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) +/**** skipping file: ../legacy/zstd_legacy.h ****/ +#endif + + + +/************************************* + * Multiple DDicts Hashset internals * + *************************************/ + +#define DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT 4 +#define DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT 3 /* These two constants represent SIZE_MULT/COUNT_MULT load factor without using a float. + * Currently, that means a 0.75 load factor. + * So, if count * COUNT_MULT / size * SIZE_MULT != 0, then we've exceeded + * the load factor of the ddict hash set. + */ + +#define DDICT_HASHSET_TABLE_BASE_SIZE 64 +#define DDICT_HASHSET_RESIZE_FACTOR 2 + +/* Hash function to determine starting position of dict insertion within the table + * Returns an index between [0, hashSet->ddictPtrTableSize] + */ +static size_t ZSTD_DDictHashSet_getIndex(const ZSTD_DDictHashSet* hashSet, U32 dictID) { + const U64 hash = XXH64(&dictID, sizeof(U32), 0); + /* DDict ptr table size is a multiple of 2, use size - 1 as mask to get index within [0, hashSet->ddictPtrTableSize) */ + return hash & (hashSet->ddictPtrTableSize - 1); +} + +/* Adds DDict to a hashset without resizing it. + * If inserting a DDict with a dictID that already exists in the set, replaces the one in the set. + * Returns 0 if successful, or a zstd error code if something went wrong. + */ +static size_t ZSTD_DDictHashSet_emplaceDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict) { + const U32 dictID = ZSTD_getDictID_fromDDict(ddict); + size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID); + const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1; + RETURN_ERROR_IF(hashSet->ddictPtrCount == hashSet->ddictPtrTableSize, GENERIC, "Hash set is full!"); + DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx); + while (hashSet->ddictPtrTable[idx] != NULL) { + /* Replace existing ddict if inserting ddict with same dictID */ + if (ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]) == dictID) { + DEBUGLOG(4, "DictID already exists, replacing rather than adding"); + hashSet->ddictPtrTable[idx] = ddict; + return 0; + } + idx &= idxRangeMask; + idx++; + } + DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx); + hashSet->ddictPtrTable[idx] = ddict; + hashSet->ddictPtrCount++; + return 0; +} + +/* Expands hash table by factor of DDICT_HASHSET_RESIZE_FACTOR and + * rehashes all values, allocates new table, frees old table. + * Returns 0 on success, otherwise a zstd error code. + */ +static size_t ZSTD_DDictHashSet_expand(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) { + size_t newTableSize = hashSet->ddictPtrTableSize * DDICT_HASHSET_RESIZE_FACTOR; + const ZSTD_DDict** newTable = (const ZSTD_DDict**)ZSTD_customCalloc(sizeof(ZSTD_DDict*) * newTableSize, customMem); + const ZSTD_DDict** oldTable = hashSet->ddictPtrTable; + size_t oldTableSize = hashSet->ddictPtrTableSize; + size_t i; + + DEBUGLOG(4, "Expanding DDict hash table! Old size: %zu new size: %zu", oldTableSize, newTableSize); + RETURN_ERROR_IF(!newTable, memory_allocation, "Expanded hashset allocation failed!"); + hashSet->ddictPtrTable = newTable; + hashSet->ddictPtrTableSize = newTableSize; + hashSet->ddictPtrCount = 0; + for (i = 0; i < oldTableSize; ++i) { + if (oldTable[i] != NULL) { + FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, oldTable[i]), ""); + } + } + ZSTD_customFree((void*)oldTable, customMem); + DEBUGLOG(4, "Finished re-hash"); + return 0; +} + +/* Fetches a DDict with the given dictID + * Returns the ZSTD_DDict* with the requested dictID. If it doesn't exist, then returns NULL. + */ +static const ZSTD_DDict* ZSTD_DDictHashSet_getDDict(ZSTD_DDictHashSet* hashSet, U32 dictID) { + size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID); + const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1; + DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx); + for (;;) { + size_t currDictID = ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]); + if (currDictID == dictID || currDictID == 0) { + /* currDictID == 0 implies a NULL ddict entry */ + break; + } else { + idx &= idxRangeMask; /* Goes to start of table when we reach the end */ + idx++; + } + } + DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx); + return hashSet->ddictPtrTable[idx]; +} + +/* Allocates space for and returns a ddict hash set + * The hash set's ZSTD_DDict* table has all values automatically set to NULL to begin with. + * Returns NULL if allocation failed. + */ +static ZSTD_DDictHashSet* ZSTD_createDDictHashSet(ZSTD_customMem customMem) { + ZSTD_DDictHashSet* ret = (ZSTD_DDictHashSet*)ZSTD_customMalloc(sizeof(ZSTD_DDictHashSet), customMem); + DEBUGLOG(4, "Allocating new hash set"); + ret->ddictPtrTable = (const ZSTD_DDict**)ZSTD_customCalloc(DDICT_HASHSET_TABLE_BASE_SIZE * sizeof(ZSTD_DDict*), customMem); + ret->ddictPtrTableSize = DDICT_HASHSET_TABLE_BASE_SIZE; + ret->ddictPtrCount = 0; + if (!ret || !ret->ddictPtrTable) { + return NULL; + } + return ret; +} + +/* Frees the table of ZSTD_DDict* within a hashset, then frees the hashset itself. + * Note: The ZSTD_DDict* within the table are NOT freed. + */ +static void ZSTD_freeDDictHashSet(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) { + DEBUGLOG(4, "Freeing ddict hash set"); + if (hashSet && hashSet->ddictPtrTable) { + ZSTD_customFree((void*)hashSet->ddictPtrTable, customMem); + } + if (hashSet) { + ZSTD_customFree(hashSet, customMem); + } +} + +/* Public function: Adds a DDict into the ZSTD_DDictHashSet, possibly triggering a resize of the hash set. + * Returns 0 on success, or a ZSTD error. + */ +static size_t ZSTD_DDictHashSet_addDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict, ZSTD_customMem customMem) { + DEBUGLOG(4, "Adding dict ID: %u to hashset with - Count: %zu Tablesize: %zu", ZSTD_getDictID_fromDDict(ddict), hashSet->ddictPtrCount, hashSet->ddictPtrTableSize); + if (hashSet->ddictPtrCount * DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT / hashSet->ddictPtrTableSize * DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT != 0) { + FORWARD_IF_ERROR(ZSTD_DDictHashSet_expand(hashSet, customMem), ""); + } + FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, ddict), ""); + return 0; +} + +/*-************************************************************* +* Context management +***************************************************************/ +size_t ZSTD_sizeof_DCtx (const ZSTD_DCtx* dctx) +{ + if (dctx==NULL) return 0; /* support sizeof NULL */ + return sizeof(*dctx) + + ZSTD_sizeof_DDict(dctx->ddictLocal) + + dctx->inBuffSize + dctx->outBuffSize; +} + +size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); } + + +static size_t ZSTD_startingInputLength(ZSTD_format_e format) +{ + size_t const startingInputLength = ZSTD_FRAMEHEADERSIZE_PREFIX(format); + /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */ + assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) ); + return startingInputLength; +} + +static void ZSTD_DCtx_resetParameters(ZSTD_DCtx* dctx) +{ + assert(dctx->streamStage == zdss_init); + dctx->format = ZSTD_f_zstd1; + dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; + dctx->outBufferMode = ZSTD_bm_buffered; + dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum; + dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict; +} + +static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) +{ + dctx->staticSize = 0; + dctx->ddict = NULL; + dctx->ddictLocal = NULL; + dctx->dictEnd = NULL; + dctx->ddictIsCold = 0; + dctx->dictUses = ZSTD_dont_use; + dctx->inBuff = NULL; + dctx->inBuffSize = 0; + dctx->outBuffSize = 0; + dctx->streamStage = zdss_init; + dctx->legacyContext = NULL; + dctx->previousLegacyVersion = 0; + dctx->noForwardProgress = 0; + dctx->oversizedDuration = 0; + dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); + dctx->ddictSet = NULL; + ZSTD_DCtx_resetParameters(dctx); +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + dctx->dictContentEndForFuzzing = NULL; +#endif +} + +ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize) +{ + ZSTD_DCtx* const dctx = (ZSTD_DCtx*) workspace; + + if ((size_t)workspace & 7) return NULL; /* 8-aligned */ + if (workspaceSize < sizeof(ZSTD_DCtx)) return NULL; /* minimum size */ + + ZSTD_initDCtx_internal(dctx); + dctx->staticSize = workspaceSize; + dctx->inBuff = (char*)(dctx+1); + return dctx; +} + +ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem) +{ + if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; + + { ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_customMalloc(sizeof(*dctx), customMem); + if (!dctx) return NULL; + dctx->customMem = customMem; + ZSTD_initDCtx_internal(dctx); + return dctx; + } +} + +ZSTD_DCtx* ZSTD_createDCtx(void) +{ + DEBUGLOG(3, "ZSTD_createDCtx"); + return ZSTD_createDCtx_advanced(ZSTD_defaultCMem); +} + +static void ZSTD_clearDict(ZSTD_DCtx* dctx) +{ + ZSTD_freeDDict(dctx->ddictLocal); + dctx->ddictLocal = NULL; + dctx->ddict = NULL; + dctx->dictUses = ZSTD_dont_use; +} + +size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx) +{ + if (dctx==NULL) return 0; /* support free on NULL */ + RETURN_ERROR_IF(dctx->staticSize, memory_allocation, "not compatible with static DCtx"); + { ZSTD_customMem const cMem = dctx->customMem; + ZSTD_clearDict(dctx); + ZSTD_customFree(dctx->inBuff, cMem); + dctx->inBuff = NULL; +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (dctx->legacyContext) + ZSTD_freeLegacyStreamContext(dctx->legacyContext, dctx->previousLegacyVersion); +#endif + if (dctx->ddictSet) { + ZSTD_freeDDictHashSet(dctx->ddictSet, cMem); + dctx->ddictSet = NULL; + } + ZSTD_customFree(dctx, cMem); + return 0; + } +} + +/* no longer useful */ +void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx) +{ + size_t const toCopy = (size_t)((char*)(&dstDCtx->inBuff) - (char*)dstDCtx); + ZSTD_memcpy(dstDCtx, srcDCtx, toCopy); /* no need to copy workspace */ +} + +/* Given a dctx with a digested frame params, re-selects the correct ZSTD_DDict based on + * the requested dict ID from the frame. If there exists a reference to the correct ZSTD_DDict, then + * accordingly sets the ddict to be used to decompress the frame. + * + * If no DDict is found, then no action is taken, and the ZSTD_DCtx::ddict remains as-is. + * + * ZSTD_d_refMultipleDDicts must be enabled for this function to be called. + */ +static void ZSTD_DCtx_selectFrameDDict(ZSTD_DCtx* dctx) { + assert(dctx->refMultipleDDicts && dctx->ddictSet); + DEBUGLOG(4, "Adjusting DDict based on requested dict ID from frame"); + if (dctx->ddict) { + const ZSTD_DDict* frameDDict = ZSTD_DDictHashSet_getDDict(dctx->ddictSet, dctx->fParams.dictID); + if (frameDDict) { + DEBUGLOG(4, "DDict found!"); + ZSTD_clearDict(dctx); + dctx->dictID = dctx->fParams.dictID; + dctx->ddict = frameDDict; + dctx->dictUses = ZSTD_use_indefinitely; + } + } +} + + +/*-************************************************************* + * Frame header decoding + ***************************************************************/ + +/*! ZSTD_isFrame() : + * Tells if the content of `buffer` starts with a valid Frame Identifier. + * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. + * Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. + * Note 3 : Skippable Frame Identifiers are considered valid. */ +unsigned ZSTD_isFrame(const void* buffer, size_t size) +{ + if (size < ZSTD_FRAMEIDSIZE) return 0; + { U32 const magic = MEM_readLE32(buffer); + if (magic == ZSTD_MAGICNUMBER) return 1; + if ((magic & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) return 1; + } +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (ZSTD_isLegacy(buffer, size)) return 1; +#endif + return 0; +} + +/** ZSTD_frameHeaderSize_internal() : + * srcSize must be large enough to reach header size fields. + * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless. + * @return : size of the Frame Header + * or an error code, which can be tested with ZSTD_isError() */ +static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format) +{ + size_t const minInputSize = ZSTD_startingInputLength(format); + RETURN_ERROR_IF(srcSize < minInputSize, srcSize_wrong, ""); + + { BYTE const fhd = ((const BYTE*)src)[minInputSize-1]; + U32 const dictID= fhd & 3; + U32 const singleSegment = (fhd >> 5) & 1; + U32 const fcsId = fhd >> 6; + return minInputSize + !singleSegment + + ZSTD_did_fieldSize[dictID] + ZSTD_fcs_fieldSize[fcsId] + + (singleSegment && !fcsId); + } +} + +/** ZSTD_frameHeaderSize() : + * srcSize must be >= ZSTD_frameHeaderSize_prefix. + * @return : size of the Frame Header, + * or an error code (if srcSize is too small) */ +size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize) +{ + return ZSTD_frameHeaderSize_internal(src, srcSize, ZSTD_f_zstd1); +} + + +/** ZSTD_getFrameHeader_advanced() : + * decode Frame Header, or require larger `srcSize`. + * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless + * @return : 0, `zfhPtr` is correctly filled, + * >0, `srcSize` is too small, value is wanted `srcSize` amount, + * or an error code, which can be tested using ZSTD_isError() */ +size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format) +{ + const BYTE* ip = (const BYTE*)src; + size_t const minInputSize = ZSTD_startingInputLength(format); + + ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */ + if (srcSize < minInputSize) return minInputSize; + RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter"); + + if ( (format != ZSTD_f_zstd1_magicless) + && (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) { + if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { + /* skippable frame */ + if (srcSize < ZSTD_SKIPPABLEHEADERSIZE) + return ZSTD_SKIPPABLEHEADERSIZE; /* magic number + frame length */ + ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr)); + zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_FRAMEIDSIZE); + zfhPtr->frameType = ZSTD_skippableFrame; + return 0; + } + RETURN_ERROR(prefix_unknown, ""); + } + + /* ensure there is enough `srcSize` to fully read/decode frame header */ + { size_t const fhsize = ZSTD_frameHeaderSize_internal(src, srcSize, format); + if (srcSize < fhsize) return fhsize; + zfhPtr->headerSize = (U32)fhsize; + } + + { BYTE const fhdByte = ip[minInputSize-1]; + size_t pos = minInputSize; + U32 const dictIDSizeCode = fhdByte&3; + U32 const checksumFlag = (fhdByte>>2)&1; + U32 const singleSegment = (fhdByte>>5)&1; + U32 const fcsID = fhdByte>>6; + U64 windowSize = 0; + U32 dictID = 0; + U64 frameContentSize = ZSTD_CONTENTSIZE_UNKNOWN; + RETURN_ERROR_IF((fhdByte & 0x08) != 0, frameParameter_unsupported, + "reserved bits, must be zero"); + + if (!singleSegment) { + BYTE const wlByte = ip[pos++]; + U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN; + RETURN_ERROR_IF(windowLog > ZSTD_WINDOWLOG_MAX, frameParameter_windowTooLarge, ""); + windowSize = (1ULL << windowLog); + windowSize += (windowSize >> 3) * (wlByte&7); + } + switch(dictIDSizeCode) + { + default: assert(0); /* impossible */ + case 0 : break; + case 1 : dictID = ip[pos]; pos++; break; + case 2 : dictID = MEM_readLE16(ip+pos); pos+=2; break; + case 3 : dictID = MEM_readLE32(ip+pos); pos+=4; break; + } + switch(fcsID) + { + default: assert(0); /* impossible */ + case 0 : if (singleSegment) frameContentSize = ip[pos]; break; + case 1 : frameContentSize = MEM_readLE16(ip+pos)+256; break; + case 2 : frameContentSize = MEM_readLE32(ip+pos); break; + case 3 : frameContentSize = MEM_readLE64(ip+pos); break; + } + if (singleSegment) windowSize = frameContentSize; + + zfhPtr->frameType = ZSTD_frame; + zfhPtr->frameContentSize = frameContentSize; + zfhPtr->windowSize = windowSize; + zfhPtr->blockSizeMax = (unsigned) MIN(windowSize, ZSTD_BLOCKSIZE_MAX); + zfhPtr->dictID = dictID; + zfhPtr->checksumFlag = checksumFlag; + } + return 0; +} + +/** ZSTD_getFrameHeader() : + * decode Frame Header, or require larger `srcSize`. + * note : this function does not consume input, it only reads it. + * @return : 0, `zfhPtr` is correctly filled, + * >0, `srcSize` is too small, value is wanted `srcSize` amount, + * or an error code, which can be tested using ZSTD_isError() */ +size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize) +{ + return ZSTD_getFrameHeader_advanced(zfhPtr, src, srcSize, ZSTD_f_zstd1); +} + + +/** ZSTD_getFrameContentSize() : + * compatible with legacy mode + * @return : decompressed size of the single frame pointed to be `src` if known, otherwise + * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined + * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */ +unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize) +{ +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (ZSTD_isLegacy(src, srcSize)) { + unsigned long long const ret = ZSTD_getDecompressedSize_legacy(src, srcSize); + return ret == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : ret; + } +#endif + { ZSTD_frameHeader zfh; + if (ZSTD_getFrameHeader(&zfh, src, srcSize) != 0) + return ZSTD_CONTENTSIZE_ERROR; + if (zfh.frameType == ZSTD_skippableFrame) { + return 0; + } else { + return zfh.frameContentSize; + } } +} + +static size_t readSkippableFrameSize(void const* src, size_t srcSize) +{ + size_t const skippableHeaderSize = ZSTD_SKIPPABLEHEADERSIZE; + U32 sizeU32; + + RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong, ""); + + sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE); + RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32, + frameParameter_unsupported, ""); + { + size_t const skippableSize = skippableHeaderSize + sizeU32; + RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong, ""); + return skippableSize; + } +} + +/** ZSTD_findDecompressedSize() : + * compatible with legacy mode + * `srcSize` must be the exact length of some number of ZSTD compressed and/or + * skippable frames + * @return : decompressed size of the frames contained */ +unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize) +{ + unsigned long long totalDstSize = 0; + + while (srcSize >= ZSTD_startingInputLength(ZSTD_f_zstd1)) { + U32 const magicNumber = MEM_readLE32(src); + + if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { + size_t const skippableSize = readSkippableFrameSize(src, srcSize); + if (ZSTD_isError(skippableSize)) { + return ZSTD_CONTENTSIZE_ERROR; + } + assert(skippableSize <= srcSize); + + src = (const BYTE *)src + skippableSize; + srcSize -= skippableSize; + continue; + } + + { unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize); + if (ret >= ZSTD_CONTENTSIZE_ERROR) return ret; + + /* check for overflow */ + if (totalDstSize + ret < totalDstSize) return ZSTD_CONTENTSIZE_ERROR; + totalDstSize += ret; + } + { size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize); + if (ZSTD_isError(frameSrcSize)) { + return ZSTD_CONTENTSIZE_ERROR; + } + + src = (const BYTE *)src + frameSrcSize; + srcSize -= frameSrcSize; + } + } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */ + + if (srcSize) return ZSTD_CONTENTSIZE_ERROR; + + return totalDstSize; +} + +/** ZSTD_getDecompressedSize() : + * compatible with legacy mode + * @return : decompressed size if known, 0 otherwise + note : 0 can mean any of the following : + - frame content is empty + - decompressed size field is not present in frame header + - frame header unknown / not supported + - frame header not complete (`srcSize` too small) */ +unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize) +{ + unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize); + ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_ERROR < ZSTD_CONTENTSIZE_UNKNOWN); + return (ret >= ZSTD_CONTENTSIZE_ERROR) ? 0 : ret; +} + + +/** ZSTD_decodeFrameHeader() : + * `headerSize` must be the size provided by ZSTD_frameHeaderSize(). + * If multiple DDict references are enabled, also will choose the correct DDict to use. + * @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */ +static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize) +{ + size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format); + if (ZSTD_isError(result)) return result; /* invalid header */ + RETURN_ERROR_IF(result>0, srcSize_wrong, "headerSize too small"); + + /* Reference DDict requested by frame if dctx references multiple ddicts */ + if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts && dctx->ddictSet) { + ZSTD_DCtx_selectFrameDDict(dctx); + } + +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + /* Skip the dictID check in fuzzing mode, because it makes the search + * harder. + */ + RETURN_ERROR_IF(dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID), + dictionary_wrong, ""); +#endif + dctx->validateChecksum = (dctx->fParams.checksumFlag && !dctx->forceIgnoreChecksum) ? 1 : 0; + if (dctx->validateChecksum) XXH64_reset(&dctx->xxhState, 0); + dctx->processedCSize += headerSize; + return 0; +} + +static ZSTD_frameSizeInfo ZSTD_errorFrameSizeInfo(size_t ret) +{ + ZSTD_frameSizeInfo frameSizeInfo; + frameSizeInfo.compressedSize = ret; + frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR; + return frameSizeInfo; +} + +static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize) +{ + ZSTD_frameSizeInfo frameSizeInfo; + ZSTD_memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo)); + +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (ZSTD_isLegacy(src, srcSize)) + return ZSTD_findFrameSizeInfoLegacy(src, srcSize); +#endif + + if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE) + && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { + frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize); + assert(ZSTD_isError(frameSizeInfo.compressedSize) || + frameSizeInfo.compressedSize <= srcSize); + return frameSizeInfo; + } else { + const BYTE* ip = (const BYTE*)src; + const BYTE* const ipstart = ip; + size_t remainingSize = srcSize; + size_t nbBlocks = 0; + ZSTD_frameHeader zfh; + + /* Extract Frame Header */ + { size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize); + if (ZSTD_isError(ret)) + return ZSTD_errorFrameSizeInfo(ret); + if (ret > 0) + return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); + } + + ip += zfh.headerSize; + remainingSize -= zfh.headerSize; + + /* Iterate over each block */ + while (1) { + blockProperties_t blockProperties; + size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties); + if (ZSTD_isError(cBlockSize)) + return ZSTD_errorFrameSizeInfo(cBlockSize); + + if (ZSTD_blockHeaderSize + cBlockSize > remainingSize) + return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); + + ip += ZSTD_blockHeaderSize + cBlockSize; + remainingSize -= ZSTD_blockHeaderSize + cBlockSize; + nbBlocks++; + + if (blockProperties.lastBlock) break; + } + + /* Final frame content checksum */ + if (zfh.checksumFlag) { + if (remainingSize < 4) + return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); + ip += 4; + } + + frameSizeInfo.compressedSize = (size_t)(ip - ipstart); + frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) + ? zfh.frameContentSize + : nbBlocks * zfh.blockSizeMax; + return frameSizeInfo; + } +} + +/** ZSTD_findFrameCompressedSize() : + * compatible with legacy mode + * `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame + * `srcSize` must be at least as large as the frame contained + * @return : the compressed size of the frame starting at `src` */ +size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize) +{ + ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize); + return frameSizeInfo.compressedSize; +} + +/** ZSTD_decompressBound() : + * compatible with legacy mode + * `src` must point to the start of a ZSTD frame or a skippeable frame + * `srcSize` must be at least as large as the frame contained + * @return : the maximum decompressed size of the compressed source + */ +unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize) +{ + unsigned long long bound = 0; + /* Iterate over each frame */ + while (srcSize > 0) { + ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize); + size_t const compressedSize = frameSizeInfo.compressedSize; + unsigned long long const decompressedBound = frameSizeInfo.decompressedBound; + if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR) + return ZSTD_CONTENTSIZE_ERROR; + assert(srcSize >= compressedSize); + src = (const BYTE*)src + compressedSize; + srcSize -= compressedSize; + bound += decompressedBound; + } + return bound; +} + + +/*-************************************************************* + * Frame decoding + ***************************************************************/ + +/** ZSTD_insertBlock() : + * insert `src` block into `dctx` history. Useful to track uncompressed blocks. */ +size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize) +{ + DEBUGLOG(5, "ZSTD_insertBlock: %u bytes", (unsigned)blockSize); + ZSTD_checkContinuity(dctx, blockStart, blockSize); + dctx->previousDstEnd = (const char*)blockStart + blockSize; + return blockSize; +} + + +static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_copyRawBlock"); + RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall, ""); + if (dst == NULL) { + if (srcSize == 0) return 0; + RETURN_ERROR(dstBuffer_null, ""); + } + ZSTD_memcpy(dst, src, srcSize); + return srcSize; +} + +static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity, + BYTE b, + size_t regenSize) +{ + RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall, ""); + if (dst == NULL) { + if (regenSize == 0) return 0; + RETURN_ERROR(dstBuffer_null, ""); + } + ZSTD_memset(dst, b, regenSize); + return regenSize; +} + +static void ZSTD_DCtx_trace_end(ZSTD_DCtx const* dctx, U64 uncompressedSize, U64 compressedSize, unsigned streaming) +{ +#if ZSTD_TRACE + if (dctx->traceCtx) { + ZSTD_Trace trace; + ZSTD_memset(&trace, 0, sizeof(trace)); + trace.version = ZSTD_VERSION_NUMBER; + trace.streaming = streaming; + if (dctx->ddict) { + trace.dictionaryID = ZSTD_getDictID_fromDDict(dctx->ddict); + trace.dictionarySize = ZSTD_DDict_dictSize(dctx->ddict); + trace.dictionaryIsCold = dctx->ddictIsCold; + } + trace.uncompressedSize = (size_t)uncompressedSize; + trace.compressedSize = (size_t)compressedSize; + trace.dctx = dctx; + ZSTD_trace_decompress_end(dctx->traceCtx, &trace); + } +#else + (void)dctx; + (void)uncompressedSize; + (void)compressedSize; + (void)streaming; +#endif +} + + +/*! ZSTD_decompressFrame() : + * @dctx must be properly initialized + * will update *srcPtr and *srcSizePtr, + * to make *srcPtr progress by one frame. */ +static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void** srcPtr, size_t *srcSizePtr) +{ + const BYTE* const istart = (const BYTE*)(*srcPtr); + const BYTE* ip = istart; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = dstCapacity != 0 ? ostart + dstCapacity : ostart; + BYTE* op = ostart; + size_t remainingSrcSize = *srcSizePtr; + + DEBUGLOG(4, "ZSTD_decompressFrame (srcSize:%i)", (int)*srcSizePtr); + + /* check */ + RETURN_ERROR_IF( + remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN(dctx->format)+ZSTD_blockHeaderSize, + srcSize_wrong, ""); + + /* Frame Header */ + { size_t const frameHeaderSize = ZSTD_frameHeaderSize_internal( + ip, ZSTD_FRAMEHEADERSIZE_PREFIX(dctx->format), dctx->format); + if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize; + RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize, + srcSize_wrong, ""); + FORWARD_IF_ERROR( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) , ""); + ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize; + } + + /* Loop on each block */ + while (1) { + size_t decodedSize; + blockProperties_t blockProperties; + size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSrcSize, &blockProperties); + if (ZSTD_isError(cBlockSize)) return cBlockSize; + + ip += ZSTD_blockHeaderSize; + remainingSrcSize -= ZSTD_blockHeaderSize; + RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong, ""); + + switch(blockProperties.blockType) + { + case bt_compressed: + decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oend-op), ip, cBlockSize, /* frame */ 1); + break; + case bt_raw : + decodedSize = ZSTD_copyRawBlock(op, (size_t)(oend-op), ip, cBlockSize); + break; + case bt_rle : + decodedSize = ZSTD_setRleBlock(op, (size_t)(oend-op), *ip, blockProperties.origSize); + break; + case bt_reserved : + default: + RETURN_ERROR(corruption_detected, "invalid block type"); + } + + if (ZSTD_isError(decodedSize)) return decodedSize; + if (dctx->validateChecksum) + XXH64_update(&dctx->xxhState, op, decodedSize); + if (decodedSize != 0) + op += decodedSize; + assert(ip != NULL); + ip += cBlockSize; + remainingSrcSize -= cBlockSize; + if (blockProperties.lastBlock) break; + } + + if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) { + RETURN_ERROR_IF((U64)(op-ostart) != dctx->fParams.frameContentSize, + corruption_detected, ""); + } + if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */ + RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong, ""); + if (!dctx->forceIgnoreChecksum) { + U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState); + U32 checkRead; + checkRead = MEM_readLE32(ip); + RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong, ""); + } + ip += 4; + remainingSrcSize -= 4; + } + ZSTD_DCtx_trace_end(dctx, (U64)(op-ostart), (U64)(ip-istart), /* streaming */ 0); + /* Allow caller to get size read */ + *srcPtr = ip; + *srcSizePtr = remainingSrcSize; + return (size_t)(op-ostart); +} + +static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize, + const ZSTD_DDict* ddict) +{ + void* const dststart = dst; + int moreThan1Frame = 0; + + DEBUGLOG(5, "ZSTD_decompressMultiFrame"); + assert(dict==NULL || ddict==NULL); /* either dict or ddict set, not both */ + + if (ddict) { + dict = ZSTD_DDict_dictContent(ddict); + dictSize = ZSTD_DDict_dictSize(ddict); + } + + while (srcSize >= ZSTD_startingInputLength(dctx->format)) { + +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (ZSTD_isLegacy(src, srcSize)) { + size_t decodedSize; + size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize); + if (ZSTD_isError(frameSize)) return frameSize; + RETURN_ERROR_IF(dctx->staticSize, memory_allocation, + "legacy support is not compatible with static dctx"); + + decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize); + if (ZSTD_isError(decodedSize)) return decodedSize; + + assert(decodedSize <= dstCapacity); + dst = (BYTE*)dst + decodedSize; + dstCapacity -= decodedSize; + + src = (const BYTE*)src + frameSize; + srcSize -= frameSize; + + continue; + } +#endif + + { U32 const magicNumber = MEM_readLE32(src); + DEBUGLOG(4, "reading magic number %08X (expecting %08X)", + (unsigned)magicNumber, ZSTD_MAGICNUMBER); + if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { + size_t const skippableSize = readSkippableFrameSize(src, srcSize); + FORWARD_IF_ERROR(skippableSize, "readSkippableFrameSize failed"); + assert(skippableSize <= srcSize); + + src = (const BYTE *)src + skippableSize; + srcSize -= skippableSize; + continue; + } } + + if (ddict) { + /* we were called from ZSTD_decompress_usingDDict */ + FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(dctx, ddict), ""); + } else { + /* this will initialize correctly with no dict if dict == NULL, so + * use this in all cases but ddict */ + FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize), ""); + } + ZSTD_checkContinuity(dctx, dst, dstCapacity); + + { const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity, + &src, &srcSize); + RETURN_ERROR_IF( + (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown) + && (moreThan1Frame==1), + srcSize_wrong, + "At least one frame successfully completed, " + "but following bytes are garbage: " + "it's more likely to be a srcSize error, " + "specifying more input bytes than size of frame(s). " + "Note: one could be unlucky, it might be a corruption error instead, " + "happening right at the place where we expect zstd magic bytes. " + "But this is _much_ less likely than a srcSize field error."); + if (ZSTD_isError(res)) return res; + assert(res <= dstCapacity); + if (res != 0) + dst = (BYTE*)dst + res; + dstCapacity -= res; + } + moreThan1Frame = 1; + } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */ + + RETURN_ERROR_IF(srcSize, srcSize_wrong, "input not entirely consumed"); + + return (size_t)((BYTE*)dst - (BYTE*)dststart); +} + +size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize) +{ + return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, dict, dictSize, NULL); +} + + +static ZSTD_DDict const* ZSTD_getDDict(ZSTD_DCtx* dctx) +{ + switch (dctx->dictUses) { + default: + assert(0 /* Impossible */); + /* fall-through */ + case ZSTD_dont_use: + ZSTD_clearDict(dctx); + return NULL; + case ZSTD_use_indefinitely: + return dctx->ddict; + case ZSTD_use_once: + dctx->dictUses = ZSTD_dont_use; + return dctx->ddict; + } +} + +size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + return ZSTD_decompress_usingDDict(dctx, dst, dstCapacity, src, srcSize, ZSTD_getDDict(dctx)); +} + + +size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ +#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1) + size_t regenSize; + ZSTD_DCtx* const dctx = ZSTD_createDCtx(); + RETURN_ERROR_IF(dctx==NULL, memory_allocation, "NULL pointer!"); + regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize); + ZSTD_freeDCtx(dctx); + return regenSize; +#else /* stack mode */ + ZSTD_DCtx dctx; + ZSTD_initDCtx_internal(&dctx); + return ZSTD_decompressDCtx(&dctx, dst, dstCapacity, src, srcSize); +#endif +} + + +/*-************************************** +* Advanced Streaming Decompression API +* Bufferless and synchronous +****************************************/ +size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; } + +/** + * Similar to ZSTD_nextSrcSizeToDecompress(), but when when a block input can be streamed, + * we allow taking a partial block as the input. Currently only raw uncompressed blocks can + * be streamed. + * + * For blocks that can be streamed, this allows us to reduce the latency until we produce + * output, and avoid copying the input. + * + * @param inputSize - The total amount of input that the caller currently has. + */ +static size_t ZSTD_nextSrcSizeToDecompressWithInputSize(ZSTD_DCtx* dctx, size_t inputSize) { + if (!(dctx->stage == ZSTDds_decompressBlock || dctx->stage == ZSTDds_decompressLastBlock)) + return dctx->expected; + if (dctx->bType != bt_raw) + return dctx->expected; + return MIN(MAX(inputSize, 1), dctx->expected); +} + +ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) { + switch(dctx->stage) + { + default: /* should not happen */ + assert(0); + case ZSTDds_getFrameHeaderSize: + case ZSTDds_decodeFrameHeader: + return ZSTDnit_frameHeader; + case ZSTDds_decodeBlockHeader: + return ZSTDnit_blockHeader; + case ZSTDds_decompressBlock: + return ZSTDnit_block; + case ZSTDds_decompressLastBlock: + return ZSTDnit_lastBlock; + case ZSTDds_checkChecksum: + return ZSTDnit_checksum; + case ZSTDds_decodeSkippableHeader: + case ZSTDds_skipFrame: + return ZSTDnit_skippableFrame; + } +} + +static int ZSTD_isSkipFrame(ZSTD_DCtx* dctx) { return dctx->stage == ZSTDds_skipFrame; } + +/** ZSTD_decompressContinue() : + * srcSize : must be the exact nb of bytes expected (see ZSTD_nextSrcSizeToDecompress()) + * @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity) + * or an error code, which can be tested using ZSTD_isError() */ +size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (unsigned)srcSize); + /* Sanity check */ + RETURN_ERROR_IF(srcSize != ZSTD_nextSrcSizeToDecompressWithInputSize(dctx, srcSize), srcSize_wrong, "not allowed"); + ZSTD_checkContinuity(dctx, dst, dstCapacity); + + dctx->processedCSize += srcSize; + + switch (dctx->stage) + { + case ZSTDds_getFrameHeaderSize : + assert(src != NULL); + if (dctx->format == ZSTD_f_zstd1) { /* allows header */ + assert(srcSize >= ZSTD_FRAMEIDSIZE); /* to read skippable magic number */ + if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ + ZSTD_memcpy(dctx->headerBuffer, src, srcSize); + dctx->expected = ZSTD_SKIPPABLEHEADERSIZE - srcSize; /* remaining to load to get full skippable frame header */ + dctx->stage = ZSTDds_decodeSkippableHeader; + return 0; + } } + dctx->headerSize = ZSTD_frameHeaderSize_internal(src, srcSize, dctx->format); + if (ZSTD_isError(dctx->headerSize)) return dctx->headerSize; + ZSTD_memcpy(dctx->headerBuffer, src, srcSize); + dctx->expected = dctx->headerSize - srcSize; + dctx->stage = ZSTDds_decodeFrameHeader; + return 0; + + case ZSTDds_decodeFrameHeader: + assert(src != NULL); + ZSTD_memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize); + FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize), ""); + dctx->expected = ZSTD_blockHeaderSize; + dctx->stage = ZSTDds_decodeBlockHeader; + return 0; + + case ZSTDds_decodeBlockHeader: + { blockProperties_t bp; + size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp); + if (ZSTD_isError(cBlockSize)) return cBlockSize; + RETURN_ERROR_IF(cBlockSize > dctx->fParams.blockSizeMax, corruption_detected, "Block Size Exceeds Maximum"); + dctx->expected = cBlockSize; + dctx->bType = bp.blockType; + dctx->rleSize = bp.origSize; + if (cBlockSize) { + dctx->stage = bp.lastBlock ? ZSTDds_decompressLastBlock : ZSTDds_decompressBlock; + return 0; + } + /* empty block */ + if (bp.lastBlock) { + if (dctx->fParams.checksumFlag) { + dctx->expected = 4; + dctx->stage = ZSTDds_checkChecksum; + } else { + dctx->expected = 0; /* end of frame */ + dctx->stage = ZSTDds_getFrameHeaderSize; + } + } else { + dctx->expected = ZSTD_blockHeaderSize; /* jump to next header */ + dctx->stage = ZSTDds_decodeBlockHeader; + } + return 0; + } + + case ZSTDds_decompressLastBlock: + case ZSTDds_decompressBlock: + DEBUGLOG(5, "ZSTD_decompressContinue: case ZSTDds_decompressBlock"); + { size_t rSize; + switch(dctx->bType) + { + case bt_compressed: + DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed"); + rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1); + dctx->expected = 0; /* Streaming not supported */ + break; + case bt_raw : + assert(srcSize <= dctx->expected); + rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize); + FORWARD_IF_ERROR(rSize, "ZSTD_copyRawBlock failed"); + assert(rSize == srcSize); + dctx->expected -= rSize; + break; + case bt_rle : + rSize = ZSTD_setRleBlock(dst, dstCapacity, *(const BYTE*)src, dctx->rleSize); + dctx->expected = 0; /* Streaming not supported */ + break; + case bt_reserved : /* should never happen */ + default: + RETURN_ERROR(corruption_detected, "invalid block type"); + } + FORWARD_IF_ERROR(rSize, ""); + RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum"); + DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize); + dctx->decodedSize += rSize; + if (dctx->validateChecksum) XXH64_update(&dctx->xxhState, dst, rSize); + dctx->previousDstEnd = (char*)dst + rSize; + + /* Stay on the same stage until we are finished streaming the block. */ + if (dctx->expected > 0) { + return rSize; + } + + if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */ + DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u", (unsigned)dctx->decodedSize); + RETURN_ERROR_IF( + dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN + && dctx->decodedSize != dctx->fParams.frameContentSize, + corruption_detected, ""); + if (dctx->fParams.checksumFlag) { /* another round for frame checksum */ + dctx->expected = 4; + dctx->stage = ZSTDds_checkChecksum; + } else { + ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize, /* streaming */ 1); + dctx->expected = 0; /* ends here */ + dctx->stage = ZSTDds_getFrameHeaderSize; + } + } else { + dctx->stage = ZSTDds_decodeBlockHeader; + dctx->expected = ZSTD_blockHeaderSize; + } + return rSize; + } + + case ZSTDds_checkChecksum: + assert(srcSize == 4); /* guaranteed by dctx->expected */ + { + if (dctx->validateChecksum) { + U32 const h32 = (U32)XXH64_digest(&dctx->xxhState); + U32 const check32 = MEM_readLE32(src); + DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32); + RETURN_ERROR_IF(check32 != h32, checksum_wrong, ""); + } + ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize, /* streaming */ 1); + dctx->expected = 0; + dctx->stage = ZSTDds_getFrameHeaderSize; + return 0; + } + + case ZSTDds_decodeSkippableHeader: + assert(src != NULL); + assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE); + ZSTD_memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize); /* complete skippable header */ + dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE); /* note : dctx->expected can grow seriously large, beyond local buffer size */ + dctx->stage = ZSTDds_skipFrame; + return 0; + + case ZSTDds_skipFrame: + dctx->expected = 0; + dctx->stage = ZSTDds_getFrameHeaderSize; + return 0; + + default: + assert(0); /* impossible */ + RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */ + } +} + + +static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + dctx->dictEnd = dctx->previousDstEnd; + dctx->virtualStart = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart)); + dctx->prefixStart = dict; + dctx->previousDstEnd = (const char*)dict + dictSize; +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + dctx->dictContentBeginForFuzzing = dctx->prefixStart; + dctx->dictContentEndForFuzzing = dctx->previousDstEnd; +#endif + return 0; +} + +/*! ZSTD_loadDEntropy() : + * dict : must point at beginning of a valid zstd dictionary. + * @return : size of entropy tables read */ +size_t +ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, + const void* const dict, size_t const dictSize) +{ + const BYTE* dictPtr = (const BYTE*)dict; + const BYTE* const dictEnd = dictPtr + dictSize; + + RETURN_ERROR_IF(dictSize <= 8, dictionary_corrupted, "dict is too small"); + assert(MEM_readLE32(dict) == ZSTD_MAGIC_DICTIONARY); /* dict must be valid */ + dictPtr += 8; /* skip header = magic + dictID */ + + ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, OFTable) == offsetof(ZSTD_entropyDTables_t, LLTable) + sizeof(entropy->LLTable)); + ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, MLTable) == offsetof(ZSTD_entropyDTables_t, OFTable) + sizeof(entropy->OFTable)); + ZSTD_STATIC_ASSERT(sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable) >= HUF_DECOMPRESS_WORKSPACE_SIZE); + { void* const workspace = &entropy->LLTable; /* use fse tables as temporary workspace; implies fse tables are grouped together */ + size_t const workspaceSize = sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable); +#ifdef HUF_FORCE_DECOMPRESS_X1 + /* in minimal huffman, we always use X1 variants */ + size_t const hSize = HUF_readDTableX1_wksp(entropy->hufTable, + dictPtr, dictEnd - dictPtr, + workspace, workspaceSize); +#else + size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable, + dictPtr, (size_t)(dictEnd - dictPtr), + workspace, workspaceSize); +#endif + RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted, ""); + dictPtr += hSize; + } + + { short offcodeNCount[MaxOff+1]; + unsigned offcodeMaxValue = MaxOff, offcodeLog; + size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, (size_t)(dictEnd-dictPtr)); + RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(offcodeMaxValue > MaxOff, dictionary_corrupted, ""); + RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, ""); + ZSTD_buildFSETable( entropy->OFTable, + offcodeNCount, offcodeMaxValue, + OF_base, OF_bits, + offcodeLog, + entropy->workspace, sizeof(entropy->workspace), + /* bmi2 */0); + dictPtr += offcodeHeaderSize; + } + + { short matchlengthNCount[MaxML+1]; + unsigned matchlengthMaxValue = MaxML, matchlengthLog; + size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, (size_t)(dictEnd-dictPtr)); + RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(matchlengthMaxValue > MaxML, dictionary_corrupted, ""); + RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, ""); + ZSTD_buildFSETable( entropy->MLTable, + matchlengthNCount, matchlengthMaxValue, + ML_base, ML_bits, + matchlengthLog, + entropy->workspace, sizeof(entropy->workspace), + /* bmi2 */ 0); + dictPtr += matchlengthHeaderSize; + } + + { short litlengthNCount[MaxLL+1]; + unsigned litlengthMaxValue = MaxLL, litlengthLog; + size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, (size_t)(dictEnd-dictPtr)); + RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(litlengthMaxValue > MaxLL, dictionary_corrupted, ""); + RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, ""); + ZSTD_buildFSETable( entropy->LLTable, + litlengthNCount, litlengthMaxValue, + LL_base, LL_bits, + litlengthLog, + entropy->workspace, sizeof(entropy->workspace), + /* bmi2 */ 0); + dictPtr += litlengthHeaderSize; + } + + RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, ""); + { int i; + size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12)); + for (i=0; i<3; i++) { + U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4; + RETURN_ERROR_IF(rep==0 || rep > dictContentSize, + dictionary_corrupted, ""); + entropy->rep[i] = rep; + } } + + return (size_t)(dictPtr - (const BYTE*)dict); +} + +static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + if (dictSize < 8) return ZSTD_refDictContent(dctx, dict, dictSize); + { U32 const magic = MEM_readLE32(dict); + if (magic != ZSTD_MAGIC_DICTIONARY) { + return ZSTD_refDictContent(dctx, dict, dictSize); /* pure content mode */ + } } + dctx->dictID = MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE); + + /* load entropy tables */ + { size_t const eSize = ZSTD_loadDEntropy(&dctx->entropy, dict, dictSize); + RETURN_ERROR_IF(ZSTD_isError(eSize), dictionary_corrupted, ""); + dict = (const char*)dict + eSize; + dictSize -= eSize; + } + dctx->litEntropy = dctx->fseEntropy = 1; + + /* reference dictionary content */ + return ZSTD_refDictContent(dctx, dict, dictSize); +} + +size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) +{ + assert(dctx != NULL); +#if ZSTD_TRACE + dctx->traceCtx = ZSTD_trace_decompress_begin(dctx); +#endif + dctx->expected = ZSTD_startingInputLength(dctx->format); /* dctx->format must be properly set */ + dctx->stage = ZSTDds_getFrameHeaderSize; + dctx->processedCSize = 0; + dctx->decodedSize = 0; + dctx->previousDstEnd = NULL; + dctx->prefixStart = NULL; + dctx->virtualStart = NULL; + dctx->dictEnd = NULL; + dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ + dctx->litEntropy = dctx->fseEntropy = 0; + dctx->dictID = 0; + dctx->bType = bt_reserved; + ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue)); + ZSTD_memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */ + dctx->LLTptr = dctx->entropy.LLTable; + dctx->MLTptr = dctx->entropy.MLTable; + dctx->OFTptr = dctx->entropy.OFTable; + dctx->HUFptr = dctx->entropy.hufTable; + return 0; +} + +size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , ""); + if (dict && dictSize) + RETURN_ERROR_IF( + ZSTD_isError(ZSTD_decompress_insertDictionary(dctx, dict, dictSize)), + dictionary_corrupted, ""); + return 0; +} + + +/* ====== ZSTD_DDict ====== */ + +size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) +{ + DEBUGLOG(4, "ZSTD_decompressBegin_usingDDict"); + assert(dctx != NULL); + if (ddict) { + const char* const dictStart = (const char*)ZSTD_DDict_dictContent(ddict); + size_t const dictSize = ZSTD_DDict_dictSize(ddict); + const void* const dictEnd = dictStart + dictSize; + dctx->ddictIsCold = (dctx->dictEnd != dictEnd); + DEBUGLOG(4, "DDict is %s", + dctx->ddictIsCold ? "~cold~" : "hot!"); + } + FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , ""); + if (ddict) { /* NULL ddict is equivalent to no dictionary */ + ZSTD_copyDDictParameters(dctx, ddict); + } + return 0; +} + +/*! ZSTD_getDictID_fromDict() : + * Provides the dictID stored within dictionary. + * if @return == 0, the dictionary is not conformant with Zstandard specification. + * It can still be loaded, but as a content-only dictionary. */ +unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize) +{ + if (dictSize < 8) return 0; + if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) return 0; + return MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE); +} + +/*! ZSTD_getDictID_fromFrame() : + * Provides the dictID required to decompress frame stored within `src`. + * If @return == 0, the dictID could not be decoded. + * This could for one of the following reasons : + * - The frame does not require a dictionary (most common case). + * - The frame was built with dictID intentionally removed. + * Needed dictionary is a hidden information. + * Note : this use case also happens when using a non-conformant dictionary. + * - `srcSize` is too small, and as a result, frame header could not be decoded. + * Note : possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`. + * - This is not a Zstandard frame. + * When identifying the exact failure cause, it's possible to use + * ZSTD_getFrameHeader(), which will provide a more precise error code. */ +unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize) +{ + ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0 }; + size_t const hError = ZSTD_getFrameHeader(&zfp, src, srcSize); + if (ZSTD_isError(hError)) return 0; + return zfp.dictID; +} + + +/*! ZSTD_decompress_usingDDict() : +* Decompression using a pre-digested Dictionary +* Use dictionary without significant overhead. */ +size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_DDict* ddict) +{ + /* pass content and size in case legacy frames are encountered */ + return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, + NULL, 0, + ddict); +} + + +/*===================================== +* Streaming decompression +*====================================*/ + +ZSTD_DStream* ZSTD_createDStream(void) +{ + DEBUGLOG(3, "ZSTD_createDStream"); + return ZSTD_createDStream_advanced(ZSTD_defaultCMem); +} + +ZSTD_DStream* ZSTD_initStaticDStream(void *workspace, size_t workspaceSize) +{ + return ZSTD_initStaticDCtx(workspace, workspaceSize); +} + +ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem) +{ + return ZSTD_createDCtx_advanced(customMem); +} + +size_t ZSTD_freeDStream(ZSTD_DStream* zds) +{ + return ZSTD_freeDCtx(zds); +} + + +/* *** Initialization *** */ + +size_t ZSTD_DStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize; } +size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_MAX; } + +size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType) +{ + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); + ZSTD_clearDict(dctx); + if (dict && dictSize != 0) { + dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem); + RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation, "NULL pointer!"); + dctx->ddict = dctx->ddictLocal; + dctx->dictUses = ZSTD_use_indefinitely; + } + return 0; +} + +size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto); +} + +size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto); +} + +size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) +{ + FORWARD_IF_ERROR(ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType), ""); + dctx->dictUses = ZSTD_use_once; + return 0; +} + +size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize) +{ + return ZSTD_DCtx_refPrefix_advanced(dctx, prefix, prefixSize, ZSTD_dct_rawContent); +} + + +/* ZSTD_initDStream_usingDict() : + * return : expected size, aka ZSTD_startingInputLength(). + * this function cannot fail */ +size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize) +{ + DEBUGLOG(4, "ZSTD_initDStream_usingDict"); + FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) , ""); + return ZSTD_startingInputLength(zds->format); +} + +/* note : this variant can't fail */ +size_t ZSTD_initDStream(ZSTD_DStream* zds) +{ + DEBUGLOG(4, "ZSTD_initDStream"); + return ZSTD_initDStream_usingDDict(zds, NULL); +} + +/* ZSTD_initDStream_usingDDict() : + * ddict will just be referenced, and must outlive decompression session + * this function cannot fail */ +size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict) +{ + FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) , ""); + return ZSTD_startingInputLength(dctx->format); +} + +/* ZSTD_resetDStream() : + * return : expected size, aka ZSTD_startingInputLength(). + * this function cannot fail */ +size_t ZSTD_resetDStream(ZSTD_DStream* dctx) +{ + FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only), ""); + return ZSTD_startingInputLength(dctx->format); +} + + +size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) +{ + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); + ZSTD_clearDict(dctx); + if (ddict) { + dctx->ddict = ddict; + dctx->dictUses = ZSTD_use_indefinitely; + if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts) { + if (dctx->ddictSet == NULL) { + dctx->ddictSet = ZSTD_createDDictHashSet(dctx->customMem); + if (!dctx->ddictSet) { + RETURN_ERROR(memory_allocation, "Failed to allocate memory for hash set!"); + } + } + assert(!dctx->staticSize); /* Impossible: ddictSet cannot have been allocated if static dctx */ + FORWARD_IF_ERROR(ZSTD_DDictHashSet_addDDict(dctx->ddictSet, ddict, dctx->customMem), ""); + } + } + return 0; +} + +/* ZSTD_DCtx_setMaxWindowSize() : + * note : no direct equivalence in ZSTD_DCtx_setParameter, + * since this version sets windowSize, and the other sets windowLog */ +size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize) +{ + ZSTD_bounds const bounds = ZSTD_dParam_getBounds(ZSTD_d_windowLogMax); + size_t const min = (size_t)1 << bounds.lowerBound; + size_t const max = (size_t)1 << bounds.upperBound; + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); + RETURN_ERROR_IF(maxWindowSize < min, parameter_outOfBound, ""); + RETURN_ERROR_IF(maxWindowSize > max, parameter_outOfBound, ""); + dctx->maxWindowSize = maxWindowSize; + return 0; +} + +size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format) +{ + return ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, (int)format); +} + +ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam) +{ + ZSTD_bounds bounds = { 0, 0, 0 }; + switch(dParam) { + case ZSTD_d_windowLogMax: + bounds.lowerBound = ZSTD_WINDOWLOG_ABSOLUTEMIN; + bounds.upperBound = ZSTD_WINDOWLOG_MAX; + return bounds; + case ZSTD_d_format: + bounds.lowerBound = (int)ZSTD_f_zstd1; + bounds.upperBound = (int)ZSTD_f_zstd1_magicless; + ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless); + return bounds; + case ZSTD_d_stableOutBuffer: + bounds.lowerBound = (int)ZSTD_bm_buffered; + bounds.upperBound = (int)ZSTD_bm_stable; + return bounds; + case ZSTD_d_forceIgnoreChecksum: + bounds.lowerBound = (int)ZSTD_d_validateChecksum; + bounds.upperBound = (int)ZSTD_d_ignoreChecksum; + return bounds; + case ZSTD_d_refMultipleDDicts: + bounds.lowerBound = (int)ZSTD_rmd_refSingleDDict; + bounds.upperBound = (int)ZSTD_rmd_refMultipleDDicts; + return bounds; + default:; + } + bounds.error = ERROR(parameter_unsupported); + return bounds; +} + +/* ZSTD_dParam_withinBounds: + * @return 1 if value is within dParam bounds, + * 0 otherwise */ +static int ZSTD_dParam_withinBounds(ZSTD_dParameter dParam, int value) +{ + ZSTD_bounds const bounds = ZSTD_dParam_getBounds(dParam); + if (ZSTD_isError(bounds.error)) return 0; + if (value < bounds.lowerBound) return 0; + if (value > bounds.upperBound) return 0; + return 1; +} + +#define CHECK_DBOUNDS(p,v) { \ + RETURN_ERROR_IF(!ZSTD_dParam_withinBounds(p, v), parameter_outOfBound, ""); \ +} + +size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value) +{ + switch (param) { + case ZSTD_d_windowLogMax: + *value = (int)ZSTD_highbit32((U32)dctx->maxWindowSize); + return 0; + case ZSTD_d_format: + *value = (int)dctx->format; + return 0; + case ZSTD_d_stableOutBuffer: + *value = (int)dctx->outBufferMode; + return 0; + case ZSTD_d_forceIgnoreChecksum: + *value = (int)dctx->forceIgnoreChecksum; + return 0; + case ZSTD_d_refMultipleDDicts: + *value = (int)dctx->refMultipleDDicts; + return 0; + default:; + } + RETURN_ERROR(parameter_unsupported, ""); +} + +size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value) +{ + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); + switch(dParam) { + case ZSTD_d_windowLogMax: + if (value == 0) value = ZSTD_WINDOWLOG_LIMIT_DEFAULT; + CHECK_DBOUNDS(ZSTD_d_windowLogMax, value); + dctx->maxWindowSize = ((size_t)1) << value; + return 0; + case ZSTD_d_format: + CHECK_DBOUNDS(ZSTD_d_format, value); + dctx->format = (ZSTD_format_e)value; + return 0; + case ZSTD_d_stableOutBuffer: + CHECK_DBOUNDS(ZSTD_d_stableOutBuffer, value); + dctx->outBufferMode = (ZSTD_bufferMode_e)value; + return 0; + case ZSTD_d_forceIgnoreChecksum: + CHECK_DBOUNDS(ZSTD_d_forceIgnoreChecksum, value); + dctx->forceIgnoreChecksum = (ZSTD_forceIgnoreChecksum_e)value; + return 0; + case ZSTD_d_refMultipleDDicts: + CHECK_DBOUNDS(ZSTD_d_refMultipleDDicts, value); + if (dctx->staticSize != 0) { + RETURN_ERROR(parameter_unsupported, "Static dctx does not support multiple DDicts!"); + } + dctx->refMultipleDDicts = (ZSTD_refMultipleDDicts_e)value; + return 0; + default:; + } + RETURN_ERROR(parameter_unsupported, ""); +} + +size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset) +{ + if ( (reset == ZSTD_reset_session_only) + || (reset == ZSTD_reset_session_and_parameters) ) { + dctx->streamStage = zdss_init; + dctx->noForwardProgress = 0; + } + if ( (reset == ZSTD_reset_parameters) + || (reset == ZSTD_reset_session_and_parameters) ) { + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); + ZSTD_clearDict(dctx); + ZSTD_DCtx_resetParameters(dctx); + } + return 0; +} + + +size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx) +{ + return ZSTD_sizeof_DCtx(dctx); +} + +size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize) +{ + size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX); + unsigned long long const neededRBSize = windowSize + blockSize + (WILDCOPY_OVERLENGTH * 2); + unsigned long long const neededSize = MIN(frameContentSize, neededRBSize); + size_t const minRBSize = (size_t) neededSize; + RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize, + frameParameter_windowTooLarge, ""); + return minRBSize; +} + +size_t ZSTD_estimateDStreamSize(size_t windowSize) +{ + size_t const blockSize = MIN(windowSize, ZSTD_BLOCKSIZE_MAX); + size_t const inBuffSize = blockSize; /* no block can be larger */ + size_t const outBuffSize = ZSTD_decodingBufferSize_min(windowSize, ZSTD_CONTENTSIZE_UNKNOWN); + return ZSTD_estimateDCtxSize() + inBuffSize + outBuffSize; +} + +size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize) +{ + U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX; /* note : should be user-selectable, but requires an additional parameter (or a dctx) */ + ZSTD_frameHeader zfh; + size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize); + if (ZSTD_isError(err)) return err; + RETURN_ERROR_IF(err>0, srcSize_wrong, ""); + RETURN_ERROR_IF(zfh.windowSize > windowSizeMax, + frameParameter_windowTooLarge, ""); + return ZSTD_estimateDStreamSize((size_t)zfh.windowSize); +} + + +/* ***** Decompression ***** */ + +static int ZSTD_DCtx_isOverflow(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize) +{ + return (zds->inBuffSize + zds->outBuffSize) >= (neededInBuffSize + neededOutBuffSize) * ZSTD_WORKSPACETOOLARGE_FACTOR; +} + +static void ZSTD_DCtx_updateOversizedDuration(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize) +{ + if (ZSTD_DCtx_isOverflow(zds, neededInBuffSize, neededOutBuffSize)) + zds->oversizedDuration++; + else + zds->oversizedDuration = 0; +} + +static int ZSTD_DCtx_isOversizedTooLong(ZSTD_DStream* zds) +{ + return zds->oversizedDuration >= ZSTD_WORKSPACETOOLARGE_MAXDURATION; +} + +/* Checks that the output buffer hasn't changed if ZSTD_obm_stable is used. */ +static size_t ZSTD_checkOutBuffer(ZSTD_DStream const* zds, ZSTD_outBuffer const* output) +{ + ZSTD_outBuffer const expect = zds->expectedOutBuffer; + /* No requirement when ZSTD_obm_stable is not enabled. */ + if (zds->outBufferMode != ZSTD_bm_stable) + return 0; + /* Any buffer is allowed in zdss_init, this must be the same for every other call until + * the context is reset. + */ + if (zds->streamStage == zdss_init) + return 0; + /* The buffer must match our expectation exactly. */ + if (expect.dst == output->dst && expect.pos == output->pos && expect.size == output->size) + return 0; + RETURN_ERROR(dstBuffer_wrong, "ZSTD_d_stableOutBuffer enabled but output differs!"); +} + +/* Calls ZSTD_decompressContinue() with the right parameters for ZSTD_decompressStream() + * and updates the stage and the output buffer state. This call is extracted so it can be + * used both when reading directly from the ZSTD_inBuffer, and in buffered input mode. + * NOTE: You must break after calling this function since the streamStage is modified. + */ +static size_t ZSTD_decompressContinueStream( + ZSTD_DStream* zds, char** op, char* oend, + void const* src, size_t srcSize) { + int const isSkipFrame = ZSTD_isSkipFrame(zds); + if (zds->outBufferMode == ZSTD_bm_buffered) { + size_t const dstSize = isSkipFrame ? 0 : zds->outBuffSize - zds->outStart; + size_t const decodedSize = ZSTD_decompressContinue(zds, + zds->outBuff + zds->outStart, dstSize, src, srcSize); + FORWARD_IF_ERROR(decodedSize, ""); + if (!decodedSize && !isSkipFrame) { + zds->streamStage = zdss_read; + } else { + zds->outEnd = zds->outStart + decodedSize; + zds->streamStage = zdss_flush; + } + } else { + /* Write directly into the output buffer */ + size_t const dstSize = isSkipFrame ? 0 : (size_t)(oend - *op); + size_t const decodedSize = ZSTD_decompressContinue(zds, *op, dstSize, src, srcSize); + FORWARD_IF_ERROR(decodedSize, ""); + *op += decodedSize; + /* Flushing is not needed. */ + zds->streamStage = zdss_read; + assert(*op <= oend); + assert(zds->outBufferMode == ZSTD_bm_stable); + } + return 0; +} + +size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input) +{ + const char* const src = (const char*)input->src; + const char* const istart = input->pos != 0 ? src + input->pos : src; + const char* const iend = input->size != 0 ? src + input->size : src; + const char* ip = istart; + char* const dst = (char*)output->dst; + char* const ostart = output->pos != 0 ? dst + output->pos : dst; + char* const oend = output->size != 0 ? dst + output->size : dst; + char* op = ostart; + U32 someMoreWork = 1; + + DEBUGLOG(5, "ZSTD_decompressStream"); + RETURN_ERROR_IF( + input->pos > input->size, + srcSize_wrong, + "forbidden. in: pos: %u vs size: %u", + (U32)input->pos, (U32)input->size); + RETURN_ERROR_IF( + output->pos > output->size, + dstSize_tooSmall, + "forbidden. out: pos: %u vs size: %u", + (U32)output->pos, (U32)output->size); + DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos)); + FORWARD_IF_ERROR(ZSTD_checkOutBuffer(zds, output), ""); + + while (someMoreWork) { + switch(zds->streamStage) + { + case zdss_init : + DEBUGLOG(5, "stage zdss_init => transparent reset "); + zds->streamStage = zdss_loadHeader; + zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0; + zds->legacyVersion = 0; + zds->hostageByte = 0; + zds->expectedOutBuffer = *output; + /* fall-through */ + + case zdss_loadHeader : + DEBUGLOG(5, "stage zdss_loadHeader (srcSize : %u)", (U32)(iend - ip)); +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) + if (zds->legacyVersion) { + RETURN_ERROR_IF(zds->staticSize, memory_allocation, + "legacy support is incompatible with static dctx"); + { size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input); + if (hint==0) zds->streamStage = zdss_init; + return hint; + } } +#endif + { size_t const hSize = ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format); + if (zds->refMultipleDDicts && zds->ddictSet) { + ZSTD_DCtx_selectFrameDDict(zds); + } + DEBUGLOG(5, "header size : %u", (U32)hSize); + if (ZSTD_isError(hSize)) { +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) + U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart); + if (legacyVersion) { + ZSTD_DDict const* const ddict = ZSTD_getDDict(zds); + const void* const dict = ddict ? ZSTD_DDict_dictContent(ddict) : NULL; + size_t const dictSize = ddict ? ZSTD_DDict_dictSize(ddict) : 0; + DEBUGLOG(5, "ZSTD_decompressStream: detected legacy version v0.%u", legacyVersion); + RETURN_ERROR_IF(zds->staticSize, memory_allocation, + "legacy support is incompatible with static dctx"); + FORWARD_IF_ERROR(ZSTD_initLegacyStream(&zds->legacyContext, + zds->previousLegacyVersion, legacyVersion, + dict, dictSize), ""); + zds->legacyVersion = zds->previousLegacyVersion = legacyVersion; + { size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, legacyVersion, output, input); + if (hint==0) zds->streamStage = zdss_init; /* or stay in stage zdss_loadHeader */ + return hint; + } } +#endif + return hSize; /* error */ + } + if (hSize != 0) { /* need more input */ + size_t const toLoad = hSize - zds->lhSize; /* if hSize!=0, hSize > zds->lhSize */ + size_t const remainingInput = (size_t)(iend-ip); + assert(iend >= ip); + if (toLoad > remainingInput) { /* not enough input to load full header */ + if (remainingInput > 0) { + ZSTD_memcpy(zds->headerBuffer + zds->lhSize, ip, remainingInput); + zds->lhSize += remainingInput; + } + input->pos = input->size; + return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */ + } + assert(ip != NULL); + ZSTD_memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad; + break; + } } + + /* check for single-pass mode opportunity */ + if (zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN + && zds->fParams.frameType != ZSTD_skippableFrame + && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) { + size_t const cSize = ZSTD_findFrameCompressedSize(istart, (size_t)(iend-istart)); + if (cSize <= (size_t)(iend-istart)) { + /* shortcut : using single-pass mode */ + size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, (size_t)(oend-op), istart, cSize, ZSTD_getDDict(zds)); + if (ZSTD_isError(decompressedSize)) return decompressedSize; + DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()") + ip = istart + cSize; + op += decompressedSize; + zds->expected = 0; + zds->streamStage = zdss_init; + someMoreWork = 0; + break; + } } + + /* Check output buffer is large enough for ZSTD_odm_stable. */ + if (zds->outBufferMode == ZSTD_bm_stable + && zds->fParams.frameType != ZSTD_skippableFrame + && zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN + && (U64)(size_t)(oend-op) < zds->fParams.frameContentSize) { + RETURN_ERROR(dstSize_tooSmall, "ZSTD_obm_stable passed but ZSTD_outBuffer is too small"); + } + + /* Consume header (see ZSTDds_decodeFrameHeader) */ + DEBUGLOG(4, "Consume header"); + FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), ""); + + if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ + zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE); + zds->stage = ZSTDds_skipFrame; + } else { + FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize), ""); + zds->expected = ZSTD_blockHeaderSize; + zds->stage = ZSTDds_decodeBlockHeader; + } + + /* control buffer memory usage */ + DEBUGLOG(4, "Control max memory usage (%u KB <= max %u KB)", + (U32)(zds->fParams.windowSize >>10), + (U32)(zds->maxWindowSize >> 10) ); + zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN); + RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize, + frameParameter_windowTooLarge, ""); + + /* Adapt buffer sizes to frame header instructions */ + { size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */); + size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_bm_buffered + ? ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize) + : 0; + + ZSTD_DCtx_updateOversizedDuration(zds, neededInBuffSize, neededOutBuffSize); + + { int const tooSmall = (zds->inBuffSize < neededInBuffSize) || (zds->outBuffSize < neededOutBuffSize); + int const tooLarge = ZSTD_DCtx_isOversizedTooLong(zds); + + if (tooSmall || tooLarge) { + size_t const bufferSize = neededInBuffSize + neededOutBuffSize; + DEBUGLOG(4, "inBuff : from %u to %u", + (U32)zds->inBuffSize, (U32)neededInBuffSize); + DEBUGLOG(4, "outBuff : from %u to %u", + (U32)zds->outBuffSize, (U32)neededOutBuffSize); + if (zds->staticSize) { /* static DCtx */ + DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize); + assert(zds->staticSize >= sizeof(ZSTD_DCtx)); /* controlled at init */ + RETURN_ERROR_IF( + bufferSize > zds->staticSize - sizeof(ZSTD_DCtx), + memory_allocation, ""); + } else { + ZSTD_customFree(zds->inBuff, zds->customMem); + zds->inBuffSize = 0; + zds->outBuffSize = 0; + zds->inBuff = (char*)ZSTD_customMalloc(bufferSize, zds->customMem); + RETURN_ERROR_IF(zds->inBuff == NULL, memory_allocation, ""); + } + zds->inBuffSize = neededInBuffSize; + zds->outBuff = zds->inBuff + zds->inBuffSize; + zds->outBuffSize = neededOutBuffSize; + } } } + zds->streamStage = zdss_read; + /* fall-through */ + + case zdss_read: + DEBUGLOG(5, "stage zdss_read"); + { size_t const neededInSize = ZSTD_nextSrcSizeToDecompressWithInputSize(zds, (size_t)(iend - ip)); + DEBUGLOG(5, "neededInSize = %u", (U32)neededInSize); + if (neededInSize==0) { /* end of frame */ + zds->streamStage = zdss_init; + someMoreWork = 0; + break; + } + if ((size_t)(iend-ip) >= neededInSize) { /* decode directly from src */ + FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, ip, neededInSize), ""); + ip += neededInSize; + /* Function modifies the stage so we must break */ + break; + } } + if (ip==iend) { someMoreWork = 0; break; } /* no more input */ + zds->streamStage = zdss_load; + /* fall-through */ + + case zdss_load: + { size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds); + size_t const toLoad = neededInSize - zds->inPos; + int const isSkipFrame = ZSTD_isSkipFrame(zds); + size_t loadedSize; + /* At this point we shouldn't be decompressing a block that we can stream. */ + assert(neededInSize == ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip)); + if (isSkipFrame) { + loadedSize = MIN(toLoad, (size_t)(iend-ip)); + } else { + RETURN_ERROR_IF(toLoad > zds->inBuffSize - zds->inPos, + corruption_detected, + "should never happen"); + loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, (size_t)(iend-ip)); + } + ip += loadedSize; + zds->inPos += loadedSize; + if (loadedSize < toLoad) { someMoreWork = 0; break; } /* not enough input, wait for more */ + + /* decode loaded input */ + zds->inPos = 0; /* input is consumed */ + FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, zds->inBuff, neededInSize), ""); + /* Function modifies the stage so we must break */ + break; + } + case zdss_flush: + { size_t const toFlushSize = zds->outEnd - zds->outStart; + size_t const flushedSize = ZSTD_limitCopy(op, (size_t)(oend-op), zds->outBuff + zds->outStart, toFlushSize); + op += flushedSize; + zds->outStart += flushedSize; + if (flushedSize == toFlushSize) { /* flush completed */ + zds->streamStage = zdss_read; + if ( (zds->outBuffSize < zds->fParams.frameContentSize) + && (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) { + DEBUGLOG(5, "restart filling outBuff from beginning (left:%i, needed:%u)", + (int)(zds->outBuffSize - zds->outStart), + (U32)zds->fParams.blockSizeMax); + zds->outStart = zds->outEnd = 0; + } + break; + } } + /* cannot complete flush */ + someMoreWork = 0; + break; + + default: + assert(0); /* impossible */ + RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */ + } } + + /* result */ + input->pos = (size_t)(ip - (const char*)(input->src)); + output->pos = (size_t)(op - (char*)(output->dst)); + + /* Update the expected output buffer for ZSTD_obm_stable. */ + zds->expectedOutBuffer = *output; + + if ((ip==istart) && (op==ostart)) { /* no forward progress */ + zds->noForwardProgress ++; + if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) { + RETURN_ERROR_IF(op==oend, dstSize_tooSmall, ""); + RETURN_ERROR_IF(ip==iend, srcSize_wrong, ""); + assert(0); + } + } else { + zds->noForwardProgress = 0; + } + { size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds); + if (!nextSrcSizeHint) { /* frame fully decoded */ + if (zds->outEnd == zds->outStart) { /* output fully flushed */ + if (zds->hostageByte) { + if (input->pos >= input->size) { + /* can't release hostage (not present) */ + zds->streamStage = zdss_read; + return 1; + } + input->pos++; /* release hostage */ + } /* zds->hostageByte */ + return 0; + } /* zds->outEnd == zds->outStart */ + if (!zds->hostageByte) { /* output not fully flushed; keep last byte as hostage; will be released when all output is flushed */ + input->pos--; /* note : pos > 0, otherwise, impossible to finish reading last block */ + zds->hostageByte=1; + } + return 1; + } /* nextSrcSizeHint==0 */ + nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds) == ZSTDnit_block); /* preload header of next block */ + assert(zds->inPos <= nextSrcSizeHint); + nextSrcSizeHint -= zds->inPos; /* part already loaded*/ + return nextSrcSizeHint; + } +} + +size_t ZSTD_decompressStream_simpleArgs ( + ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos) +{ + ZSTD_outBuffer output = { dst, dstCapacity, *dstPos }; + ZSTD_inBuffer input = { src, srcSize, *srcPos }; + /* ZSTD_compress_generic() will check validity of dstPos and srcPos */ + size_t const cErr = ZSTD_decompressStream(dctx, &output, &input); + *dstPos = output.pos; + *srcPos = input.pos; + return cErr; +} +/**** ended inlining decompress/zstd_decompress.c ****/ +/**** start inlining decompress/zstd_decompress_block.c ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* zstd_decompress_block : + * this module takes care of decompressing _compressed_ block */ + +/*-******************************************************* +* Dependencies +*********************************************************/ +/**** skipping file: ../common/zstd_deps.h ****/ +/**** skipping file: ../common/compiler.h ****/ +/**** skipping file: ../common/cpu.h ****/ +/**** skipping file: ../common/mem.h ****/ +#define FSE_STATIC_LINKING_ONLY +/**** skipping file: ../common/fse.h ****/ +#define HUF_STATIC_LINKING_ONLY +/**** skipping file: ../common/huf.h ****/ +/**** skipping file: ../common/zstd_internal.h ****/ +/**** skipping file: zstd_decompress_internal.h ****/ +/**** skipping file: zstd_ddict.h ****/ +/**** skipping file: zstd_decompress_block.h ****/ + +/*_******************************************************* +* Macros +**********************************************************/ + +/* These two optional macros force the use one way or another of the two + * ZSTD_decompressSequences implementations. You can't force in both directions + * at the same time. + */ +#if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) +#error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!" +#endif + + +/*_******************************************************* +* Memory operations +**********************************************************/ +static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); } + + +/*-************************************************************* + * Block decoding + ***************************************************************/ + +/*! ZSTD_getcBlockSize() : + * Provides the size of compressed block from block header `src` */ +size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, + blockProperties_t* bpPtr) +{ + RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, ""); + + { U32 const cBlockHeader = MEM_readLE24(src); + U32 const cSize = cBlockHeader >> 3; + bpPtr->lastBlock = cBlockHeader & 1; + bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3); + bpPtr->origSize = cSize; /* only useful for RLE */ + if (bpPtr->blockType == bt_rle) return 1; + RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, ""); + return cSize; + } +} + + +/* Hidden declaration for fullbench */ +size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, + const void* src, size_t srcSize); +/*! ZSTD_decodeLiteralsBlock() : + * @return : nb of bytes read from src (< srcSize ) + * note : symbol not declared but exposed for fullbench */ +size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, + const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */ +{ + DEBUGLOG(5, "ZSTD_decodeLiteralsBlock"); + RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, ""); + + { const BYTE* const istart = (const BYTE*) src; + symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3); + + switch(litEncType) + { + case set_repeat: + DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block"); + RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, ""); + /* fall-through */ + + case set_compressed: + RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3"); + { size_t lhSize, litSize, litCSize; + U32 singleStream=0; + U32 const lhlCode = (istart[0] >> 2) & 3; + U32 const lhc = MEM_readLE32(istart); + size_t hufSuccess; + switch(lhlCode) + { + case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */ + /* 2 - 2 - 10 - 10 */ + singleStream = !lhlCode; + lhSize = 3; + litSize = (lhc >> 4) & 0x3FF; + litCSize = (lhc >> 14) & 0x3FF; + break; + case 2: + /* 2 - 2 - 14 - 14 */ + lhSize = 4; + litSize = (lhc >> 4) & 0x3FFF; + litCSize = lhc >> 18; + break; + case 3: + /* 2 - 2 - 18 - 18 */ + lhSize = 5; + litSize = (lhc >> 4) & 0x3FFFF; + litCSize = (lhc >> 22) + ((size_t)istart[4] << 10); + break; + } + RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); + RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, ""); + + /* prefetch huffman table if cold */ + if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) { + PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable)); + } + + if (litEncType==set_repeat) { + if (singleStream) { + hufSuccess = HUF_decompress1X_usingDTable_bmi2( + dctx->litBuffer, litSize, istart+lhSize, litCSize, + dctx->HUFptr, dctx->bmi2); + } else { + hufSuccess = HUF_decompress4X_usingDTable_bmi2( + dctx->litBuffer, litSize, istart+lhSize, litCSize, + dctx->HUFptr, dctx->bmi2); + } + } else { + if (singleStream) { +#if defined(HUF_FORCE_DECOMPRESS_X2) + hufSuccess = HUF_decompress1X_DCtx_wksp( + dctx->entropy.hufTable, dctx->litBuffer, litSize, + istart+lhSize, litCSize, dctx->workspace, + sizeof(dctx->workspace)); +#else + hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2( + dctx->entropy.hufTable, dctx->litBuffer, litSize, + istart+lhSize, litCSize, dctx->workspace, + sizeof(dctx->workspace), dctx->bmi2); +#endif + } else { + hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2( + dctx->entropy.hufTable, dctx->litBuffer, litSize, + istart+lhSize, litCSize, dctx->workspace, + sizeof(dctx->workspace), dctx->bmi2); + } + } + + RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, ""); + + dctx->litPtr = dctx->litBuffer; + dctx->litSize = litSize; + dctx->litEntropy = 1; + if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable; + ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); + return litCSize + lhSize; + } + + case set_basic: + { size_t litSize, lhSize; + U32 const lhlCode = ((istart[0]) >> 2) & 3; + switch(lhlCode) + { + case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ + lhSize = 1; + litSize = istart[0] >> 3; + break; + case 1: + lhSize = 2; + litSize = MEM_readLE16(istart) >> 4; + break; + case 3: + lhSize = 3; + litSize = MEM_readLE24(istart) >> 4; + break; + } + + if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */ + RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, ""); + ZSTD_memcpy(dctx->litBuffer, istart+lhSize, litSize); + dctx->litPtr = dctx->litBuffer; + dctx->litSize = litSize; + ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); + return lhSize+litSize; + } + /* direct reference into compressed stream */ + dctx->litPtr = istart+lhSize; + dctx->litSize = litSize; + return lhSize+litSize; + } + + case set_rle: + { U32 const lhlCode = ((istart[0]) >> 2) & 3; + size_t litSize, lhSize; + switch(lhlCode) + { + case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ + lhSize = 1; + litSize = istart[0] >> 3; + break; + case 1: + lhSize = 2; + litSize = MEM_readLE16(istart) >> 4; + break; + case 3: + lhSize = 3; + litSize = MEM_readLE24(istart) >> 4; + RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4"); + break; + } + RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); + ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH); + dctx->litPtr = dctx->litBuffer; + dctx->litSize = litSize; + return lhSize+1; + } + default: + RETURN_ERROR(corruption_detected, "impossible"); + } + } +} + +/* Default FSE distribution tables. + * These are pre-calculated FSE decoding tables using default distributions as defined in specification : + * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions + * They were generated programmatically with following method : + * - start from default distributions, present in /lib/common/zstd_internal.h + * - generate tables normally, using ZSTD_buildFSETable() + * - printout the content of tables + * - pretify output, report below, test with fuzzer to ensure it's correct */ + +/* Default FSE distribution table for Literal Lengths */ +static const ZSTD_seqSymbol LL_defaultDTable[(1<tableLog = 0; + DTableH->fastMode = 0; + + cell->nbBits = 0; + cell->nextState = 0; + assert(nbAddBits < 255); + cell->nbAdditionalBits = (BYTE)nbAddBits; + cell->baseValue = baseValue; +} + + +/* ZSTD_buildFSETable() : + * generate FSE decoding table for one symbol (ll, ml or off) + * cannot fail if input is valid => + * all inputs are presumed validated at this stage */ +FORCE_INLINE_TEMPLATE +void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt, + const short* normalizedCounter, unsigned maxSymbolValue, + const U32* baseValue, const U32* nbAdditionalBits, + unsigned tableLog, void* wksp, size_t wkspSize) +{ + ZSTD_seqSymbol* const tableDecode = dt+1; + U32 const maxSV1 = maxSymbolValue + 1; + U32 const tableSize = 1 << tableLog; + + U16* symbolNext = (U16*)wksp; + BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1); + U32 highThreshold = tableSize - 1; + + + /* Sanity Checks */ + assert(maxSymbolValue <= MaxSeq); + assert(tableLog <= MaxFSELog); + assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE); + (void)wkspSize; + /* Init, lay down lowprob symbols */ + { ZSTD_seqSymbol_header DTableH; + DTableH.tableLog = tableLog; + DTableH.fastMode = 1; + { S16 const largeLimit= (S16)(1 << (tableLog-1)); + U32 s; + for (s=0; s= largeLimit) DTableH.fastMode=0; + assert(normalizedCounter[s]>=0); + symbolNext[s] = (U16)normalizedCounter[s]; + } } } + ZSTD_memcpy(dt, &DTableH, sizeof(DTableH)); + } + + /* Spread symbols */ + assert(tableSize <= 512); + /* Specialized symbol spreading for the case when there are + * no low probability (-1 count) symbols. When compressing + * small blocks we avoid low probability symbols to hit this + * case, since header decoding speed matters more. + */ + if (highThreshold == tableSize - 1) { + size_t const tableMask = tableSize-1; + size_t const step = FSE_TABLESTEP(tableSize); + /* First lay down the symbols in order. + * We use a uint64_t to lay down 8 bytes at a time. This reduces branch + * misses since small blocks generally have small table logs, so nearly + * all symbols have counts <= 8. We ensure we have 8 bytes at the end of + * our buffer to handle the over-write. + */ + { + U64 const add = 0x0101010101010101ull; + size_t pos = 0; + U64 sv = 0; + U32 s; + for (s=0; s highThreshold) position = (position + step) & tableMask; /* lowprob area */ + } } + assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ + } + + /* Build Decoding table */ + { + U32 u; + for (u=0; u max, corruption_detected, ""); + { U32 const symbol = *(const BYTE*)src; + U32 const baseline = baseValue[symbol]; + U32 const nbBits = nbAdditionalBits[symbol]; + ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits); + } + *DTablePtr = DTableSpace; + return 1; + case set_basic : + *DTablePtr = defaultTable; + return 0; + case set_repeat: + RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, ""); + /* prefetch FSE table if used */ + if (ddictIsCold && (nbSeq > 24 /* heuristic */)) { + const void* const pStart = *DTablePtr; + size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog)); + PREFETCH_AREA(pStart, pSize); + } + return 0; + case set_compressed : + { unsigned tableLog; + S16 norm[MaxSeq+1]; + size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize); + RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, ""); + RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, ""); + ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2); + *DTablePtr = DTableSpace; + return headerSize; + } + default : + assert(0); + RETURN_ERROR(GENERIC, "impossible"); + } +} + +size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, + const void* src, size_t srcSize) +{ + const BYTE* const istart = (const BYTE*)src; + const BYTE* const iend = istart + srcSize; + const BYTE* ip = istart; + int nbSeq; + DEBUGLOG(5, "ZSTD_decodeSeqHeaders"); + + /* check */ + RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, ""); + + /* SeqHead */ + nbSeq = *ip++; + if (!nbSeq) { + *nbSeqPtr=0; + RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, ""); + return 1; + } + if (nbSeq > 0x7F) { + if (nbSeq == 0xFF) { + RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, ""); + nbSeq = MEM_readLE16(ip) + LONGNBSEQ; + ip+=2; + } else { + RETURN_ERROR_IF(ip >= iend, srcSize_wrong, ""); + nbSeq = ((nbSeq-0x80)<<8) + *ip++; + } + } + *nbSeqPtr = nbSeq; + + /* FSE table descriptors */ + RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */ + { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6); + symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3); + symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3); + ip++; + + /* Build DTables */ + { size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr, + LLtype, MaxLL, LLFSELog, + ip, iend-ip, + LL_base, LL_bits, + LL_defaultDTable, dctx->fseEntropy, + dctx->ddictIsCold, nbSeq, + dctx->workspace, sizeof(dctx->workspace), + dctx->bmi2); + RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed"); + ip += llhSize; + } + + { size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr, + OFtype, MaxOff, OffFSELog, + ip, iend-ip, + OF_base, OF_bits, + OF_defaultDTable, dctx->fseEntropy, + dctx->ddictIsCold, nbSeq, + dctx->workspace, sizeof(dctx->workspace), + dctx->bmi2); + RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed"); + ip += ofhSize; + } + + { size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr, + MLtype, MaxML, MLFSELog, + ip, iend-ip, + ML_base, ML_bits, + ML_defaultDTable, dctx->fseEntropy, + dctx->ddictIsCold, nbSeq, + dctx->workspace, sizeof(dctx->workspace), + dctx->bmi2); + RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed"); + ip += mlhSize; + } + } + + return ip-istart; +} + + +typedef struct { + size_t litLength; + size_t matchLength; + size_t offset; + const BYTE* match; +} seq_t; + +typedef struct { + size_t state; + const ZSTD_seqSymbol* table; +} ZSTD_fseState; + +typedef struct { + BIT_DStream_t DStream; + ZSTD_fseState stateLL; + ZSTD_fseState stateOffb; + ZSTD_fseState stateML; + size_t prevOffset[ZSTD_REP_NUM]; + const BYTE* prefixStart; + const BYTE* dictEnd; + size_t pos; +} seqState_t; + +/*! ZSTD_overlapCopy8() : + * Copies 8 bytes from ip to op and updates op and ip where ip <= op. + * If the offset is < 8 then the offset is spread to at least 8 bytes. + * + * Precondition: *ip <= *op + * Postcondition: *op - *op >= 8 + */ +HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) { + assert(*ip <= *op); + if (offset < 8) { + /* close range match, overlap */ + static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */ + static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */ + int const sub2 = dec64table[offset]; + (*op)[0] = (*ip)[0]; + (*op)[1] = (*ip)[1]; + (*op)[2] = (*ip)[2]; + (*op)[3] = (*ip)[3]; + *ip += dec32table[offset]; + ZSTD_copy4(*op+4, *ip); + *ip -= sub2; + } else { + ZSTD_copy8(*op, *ip); + } + *ip += 8; + *op += 8; + assert(*op - *ip >= 8); +} + +/*! ZSTD_safecopy() : + * Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer + * and write up to 16 bytes past oend_w (op >= oend_w is allowed). + * This function is only called in the uncommon case where the sequence is near the end of the block. It + * should be fast for a single long sequence, but can be slow for several short sequences. + * + * @param ovtype controls the overlap detection + * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart. + * - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart. + * The src buffer must be before the dst buffer. + */ +static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) { + ptrdiff_t const diff = op - ip; + BYTE* const oend = op + length; + + assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) || + (ovtype == ZSTD_overlap_src_before_dst && diff >= 0)); + + if (length < 8) { + /* Handle short lengths. */ + while (op < oend) *op++ = *ip++; + return; + } + if (ovtype == ZSTD_overlap_src_before_dst) { + /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */ + assert(length >= 8); + ZSTD_overlapCopy8(&op, &ip, diff); + assert(op - ip >= 8); + assert(op <= oend); + } + + if (oend <= oend_w) { + /* No risk of overwrite. */ + ZSTD_wildcopy(op, ip, length, ovtype); + return; + } + if (op <= oend_w) { + /* Wildcopy until we get close to the end. */ + assert(oend > oend_w); + ZSTD_wildcopy(op, ip, oend_w - op, ovtype); + ip += oend_w - op; + op = oend_w; + } + /* Handle the leftovers. */ + while (op < oend) *op++ = *ip++; +} + +/* ZSTD_execSequenceEnd(): + * This version handles cases that are near the end of the output buffer. It requires + * more careful checks to make sure there is no overflow. By separating out these hard + * and unlikely cases, we can speed up the common cases. + * + * NOTE: This function needs to be fast for a single long sequence, but doesn't need + * to be optimized for many small sequences, since those fall into ZSTD_execSequence(). + */ +FORCE_NOINLINE +size_t ZSTD_execSequenceEnd(BYTE* op, + BYTE* const oend, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) +{ + BYTE* const oLitEnd = op + sequence.litLength; + size_t const sequenceLength = sequence.litLength + sequence.matchLength; + const BYTE* const iLitEnd = *litPtr + sequence.litLength; + const BYTE* match = oLitEnd - sequence.offset; + BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; + + /* bounds checks : careful of address space overflow in 32-bit mode */ + RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer"); + RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer"); + assert(op < op + sequenceLength); + assert(oLitEnd < op + sequenceLength); + + /* copy literals */ + ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap); + op = oLitEnd; + *litPtr = iLitEnd; + + /* copy Match */ + if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { + /* offset beyond prefix */ + RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, ""); + match = dictEnd - (prefixStart-match); + if (match + sequence.matchLength <= dictEnd) { + ZSTD_memmove(oLitEnd, match, sequence.matchLength); + return sequenceLength; + } + /* span extDict & currentPrefixSegment */ + { size_t const length1 = dictEnd - match; + ZSTD_memmove(oLitEnd, match, length1); + op = oLitEnd + length1; + sequence.matchLength -= length1; + match = prefixStart; + } } + ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst); + return sequenceLength; +} + +HINT_INLINE +size_t ZSTD_execSequence(BYTE* op, + BYTE* const oend, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) +{ + BYTE* const oLitEnd = op + sequence.litLength; + size_t const sequenceLength = sequence.litLength + sequence.matchLength; + BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ + BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; /* risk : address space underflow on oend=NULL */ + const BYTE* const iLitEnd = *litPtr + sequence.litLength; + const BYTE* match = oLitEnd - sequence.offset; + + assert(op != NULL /* Precondition */); + assert(oend_w < oend /* No underflow */); + /* Handle edge cases in a slow path: + * - Read beyond end of literals + * - Match end is within WILDCOPY_OVERLIMIT of oend + * - 32-bit mode and the match length overflows + */ + if (UNLIKELY( + iLitEnd > litLimit || + oMatchEnd > oend_w || + (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH))) + return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd); + + /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */ + assert(op <= oLitEnd /* No overflow */); + assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */); + assert(oMatchEnd <= oend /* No underflow */); + assert(iLitEnd <= litLimit /* Literal length is in bounds */); + assert(oLitEnd <= oend_w /* Can wildcopy literals */); + assert(oMatchEnd <= oend_w /* Can wildcopy matches */); + + /* Copy Literals: + * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9. + * We likely don't need the full 32-byte wildcopy. + */ + assert(WILDCOPY_OVERLENGTH >= 16); + ZSTD_copy16(op, (*litPtr)); + if (UNLIKELY(sequence.litLength > 16)) { + ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap); + } + op = oLitEnd; + *litPtr = iLitEnd; /* update for next sequence */ + + /* Copy Match */ + if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { + /* offset beyond prefix -> go into extDict */ + RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, ""); + match = dictEnd + (match - prefixStart); + if (match + sequence.matchLength <= dictEnd) { + ZSTD_memmove(oLitEnd, match, sequence.matchLength); + return sequenceLength; + } + /* span extDict & currentPrefixSegment */ + { size_t const length1 = dictEnd - match; + ZSTD_memmove(oLitEnd, match, length1); + op = oLitEnd + length1; + sequence.matchLength -= length1; + match = prefixStart; + } } + /* Match within prefix of 1 or more bytes */ + assert(op <= oMatchEnd); + assert(oMatchEnd <= oend_w); + assert(match >= prefixStart); + assert(sequence.matchLength >= 1); + + /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy + * without overlap checking. + */ + if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) { + /* We bet on a full wildcopy for matches, since we expect matches to be + * longer than literals (in general). In silesia, ~10% of matches are longer + * than 16 bytes. + */ + ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap); + return sequenceLength; + } + assert(sequence.offset < WILDCOPY_VECLEN); + + /* Copy 8 bytes and spread the offset to be >= 8. */ + ZSTD_overlapCopy8(&op, &match, sequence.offset); + + /* If the match length is > 8 bytes, then continue with the wildcopy. */ + if (sequence.matchLength > 8) { + assert(op < oMatchEnd); + ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); + } + return sequenceLength; +} + +static void +ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt) +{ + const void* ptr = dt; + const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr; + DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog); + DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits", + (U32)DStatePtr->state, DTableH->tableLog); + BIT_reloadDStream(bitD); + DStatePtr->table = dt + 1; +} + +FORCE_INLINE_TEMPLATE void +ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD) +{ + ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + size_t const lowBits = BIT_readBits(bitD, nbBits); + DStatePtr->state = DInfo.nextState + lowBits; +} + +FORCE_INLINE_TEMPLATE void +ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD_seqSymbol const DInfo) +{ + U32 const nbBits = DInfo.nbBits; + size_t const lowBits = BIT_readBits(bitD, nbBits); + DStatePtr->state = DInfo.nextState + lowBits; +} + +/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum + * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1) + * bits before reloading. This value is the maximum number of bytes we read + * after reloading when we are decoding long offsets. + */ +#define LONG_OFFSETS_MAX_EXTRA_BITS_32 \ + (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \ + ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \ + : 0) + +typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e; +typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e; + +FORCE_INLINE_TEMPLATE seq_t +ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const ZSTD_prefetch_e prefetch) +{ + seq_t seq; + ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state]; + ZSTD_seqSymbol const mlDInfo = seqState->stateML.table[seqState->stateML.state]; + ZSTD_seqSymbol const ofDInfo = seqState->stateOffb.table[seqState->stateOffb.state]; + U32 const llBase = llDInfo.baseValue; + U32 const mlBase = mlDInfo.baseValue; + U32 const ofBase = ofDInfo.baseValue; + BYTE const llBits = llDInfo.nbAdditionalBits; + BYTE const mlBits = mlDInfo.nbAdditionalBits; + BYTE const ofBits = ofDInfo.nbAdditionalBits; + BYTE const totalBits = llBits+mlBits+ofBits; + + /* sequence */ + { size_t offset; + if (ofBits > 1) { + ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); + ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5); + assert(ofBits <= MaxOff); + if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) { + U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed); + offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); + BIT_reloadDStream(&seqState->DStream); + if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits); + assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */ + } else { + offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); + } + seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset; + } else { + U32 const ll0 = (llBase == 0); + if (LIKELY((ofBits == 0))) { + if (LIKELY(!ll0)) + offset = seqState->prevOffset[0]; + else { + offset = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset; + } + } else { + offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1); + { size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; + temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ + if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset = temp; + } } } + seq.offset = offset; + } + + seq.matchLength = mlBase; + if (mlBits > 0) + seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/); + + if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) + BIT_reloadDStream(&seqState->DStream); + if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog))) + BIT_reloadDStream(&seqState->DStream); + /* Ensure there are enough bits to read the rest of data in 64-bit mode. */ + ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); + + seq.litLength = llBase; + if (llBits > 0) + seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/); + + if (MEM_32bits()) + BIT_reloadDStream(&seqState->DStream); + + DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u", + (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); + + if (prefetch == ZSTD_p_prefetch) { + size_t const pos = seqState->pos + seq.litLength; + const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart; + seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted. + * No consequence though : no memory access will occur, offset is only used for prefetching */ + seqState->pos = pos + seq.matchLength; + } + + /* ANS state update + * gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo(). + * clang-9.2.0 does 7% worse with ZSTD_updateFseState(). + * Naturally it seems like ZSTD_updateFseStateWithDInfo() should be the + * better option, so it is the default for other compilers. But, if you + * measure that it is worse, please put up a pull request. + */ + { +#if defined(__GNUC__) && !defined(__clang__) + const int kUseUpdateFseState = 1; +#else + const int kUseUpdateFseState = 0; +#endif + if (kUseUpdateFseState) { + ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ + ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ + ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ + } else { + ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llDInfo); /* <= 9 bits */ + ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlDInfo); /* <= 9 bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ + ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofDInfo); /* <= 8 bits */ + } + } + + return seq; +} + +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION +MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd) +{ + size_t const windowSize = dctx->fParams.windowSize; + /* No dictionary used. */ + if (dctx->dictContentEndForFuzzing == NULL) return 0; + /* Dictionary is our prefix. */ + if (prefixStart == dctx->dictContentBeginForFuzzing) return 1; + /* Dictionary is not our ext-dict. */ + if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0; + /* Dictionary is not within our window size. */ + if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0; + /* Dictionary is active. */ + return 1; +} + +MEM_STATIC void ZSTD_assertValidSequence( + ZSTD_DCtx const* dctx, + BYTE const* op, BYTE const* oend, + seq_t const seq, + BYTE const* prefixStart, BYTE const* virtualStart) +{ +#if DEBUGLEVEL >= 1 + size_t const windowSize = dctx->fParams.windowSize; + size_t const sequenceSize = seq.litLength + seq.matchLength; + BYTE const* const oLitEnd = op + seq.litLength; + DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u", + (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); + assert(op <= oend); + assert((size_t)(oend - op) >= sequenceSize); + assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX); + if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) { + size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing); + /* Offset must be within the dictionary. */ + assert(seq.offset <= (size_t)(oLitEnd - virtualStart)); + assert(seq.offset <= windowSize + dictSize); + } else { + /* Offset must be within our window. */ + assert(seq.offset <= windowSize); + } +#else + (void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart; +#endif +} +#endif + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG +FORCE_INLINE_TEMPLATE size_t +DONT_VECTORIZE +ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + const BYTE* ip = (const BYTE*)seqStart; + const BYTE* const iend = ip + seqSize; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + maxDstSize; + BYTE* op = ostart; + const BYTE* litPtr = dctx->litPtr; + const BYTE* const litEnd = litPtr + dctx->litSize; + const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); + const BYTE* const vBase = (const BYTE*) (dctx->virtualStart); + const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); + DEBUGLOG(5, "ZSTD_decompressSequences_body"); + (void)frame; + + /* Regen sequences */ + if (nbSeq) { + seqState_t seqState; + size_t error = 0; + dctx->fseEntropy = 1; + { U32 i; for (i=0; ientropy.rep[i]; } + RETURN_ERROR_IF( + ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)), + corruption_detected, ""); + ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); + ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); + ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); + assert(dst != NULL); + + ZSTD_STATIC_ASSERT( + BIT_DStream_unfinished < BIT_DStream_completed && + BIT_DStream_endOfBuffer < BIT_DStream_completed && + BIT_DStream_completed < BIT_DStream_overflow); + +#if defined(__GNUC__) && defined(__x86_64__) + /* Align the decompression loop to 32 + 16 bytes. + * + * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression + * speed swings based on the alignment of the decompression loop. This + * performance swing is caused by parts of the decompression loop falling + * out of the DSB. The entire decompression loop should fit in the DSB, + * when it can't we get much worse performance. You can measure if you've + * hit the good case or the bad case with this perf command for some + * compressed file test.zst: + * + * perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \ + * -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst + * + * If you see most cycles served out of the MITE you've hit the bad case. + * If you see most cycles served out of the DSB you've hit the good case. + * If it is pretty even then you may be in an okay case. + * + * I've been able to reproduce this issue on the following CPUs: + * - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9 + * Use Instruments->Counters to get DSB/MITE cycles. + * I never got performance swings, but I was able to + * go from the good case of mostly DSB to half of the + * cycles served from MITE. + * - Coffeelake: Intel i9-9900k + * + * I haven't been able to reproduce the instability or DSB misses on any + * of the following CPUS: + * - Haswell + * - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH + * - Skylake + * + * If you are seeing performance stability this script can help test. + * It tests on 4 commits in zstd where I saw performance change. + * + * https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4 + */ + __asm__(".p2align 5"); + __asm__("nop"); + __asm__(".p2align 4"); +#endif + for ( ; ; ) { + seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_noPrefetch); + size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd); +#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) + assert(!ZSTD_isError(oneSeqSize)); + if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); +#endif + DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); + BIT_reloadDStream(&(seqState.DStream)); + op += oneSeqSize; + /* gcc and clang both don't like early returns in this loop. + * Instead break and check for an error at the end of the loop. + */ + if (UNLIKELY(ZSTD_isError(oneSeqSize))) { + error = oneSeqSize; + break; + } + if (UNLIKELY(!--nbSeq)) break; + } + + /* check if reached exact end */ + DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq); + if (ZSTD_isError(error)) return error; + RETURN_ERROR_IF(nbSeq, corruption_detected, ""); + RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, ""); + /* save reps for next block */ + { U32 i; for (i=0; ientropy.rep[i] = (U32)(seqState.prevOffset[i]); } + } + + /* last literal segment */ + { size_t const lastLLSize = litEnd - litPtr; + RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); + if (op != NULL) { + ZSTD_memcpy(op, litPtr, lastLLSize); + op += lastLLSize; + } + } + + return op-ostart; +} + +static size_t +ZSTD_decompressSequences_default(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT +FORCE_INLINE_TEMPLATE size_t +ZSTD_decompressSequencesLong_body( + ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + const BYTE* ip = (const BYTE*)seqStart; + const BYTE* const iend = ip + seqSize; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + maxDstSize; + BYTE* op = ostart; + const BYTE* litPtr = dctx->litPtr; + const BYTE* const litEnd = litPtr + dctx->litSize; + const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); + const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart); + const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); + (void)frame; + + /* Regen sequences */ + if (nbSeq) { +#define STORED_SEQS 4 +#define STORED_SEQS_MASK (STORED_SEQS-1) +#define ADVANCED_SEQS 4 + seq_t sequences[STORED_SEQS]; + int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS); + seqState_t seqState; + int seqNb; + dctx->fseEntropy = 1; + { int i; for (i=0; ientropy.rep[i]; } + seqState.prefixStart = prefixStart; + seqState.pos = (size_t)(op-prefixStart); + seqState.dictEnd = dictEnd; + assert(dst != NULL); + assert(iend >= ip); + RETURN_ERROR_IF( + ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)), + corruption_detected, ""); + ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); + ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); + ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); + + /* prepare in advance */ + for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNbentropy.rep[i] = (U32)(seqState.prevOffset[i]); } + } + + /* last literal segment */ + { size_t const lastLLSize = litEnd - litPtr; + RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); + if (op != NULL) { + ZSTD_memcpy(op, litPtr, lastLLSize); + op += lastLLSize; + } + } + + return op-ostart; +} + +static size_t +ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ + + + +#if DYNAMIC_BMI2 + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG +static TARGET_ATTRIBUTE("bmi2") size_t +DONT_VECTORIZE +ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT +static TARGET_ATTRIBUTE("bmi2") size_t +ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ + +#endif /* DYNAMIC_BMI2 */ + +typedef size_t (*ZSTD_decompressSequences_t)( + ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame); + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG +static size_t +ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + DEBUGLOG(5, "ZSTD_decompressSequences"); +#if DYNAMIC_BMI2 + if (dctx->bmi2) { + return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); + } +#endif + return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ + + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT +/* ZSTD_decompressSequencesLong() : + * decompression function triggered when a minimum share of offsets is considered "long", + * aka out of cache. + * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance". + * This function will try to mitigate main memory latency through the use of prefetching */ +static size_t +ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + DEBUGLOG(5, "ZSTD_decompressSequencesLong"); +#if DYNAMIC_BMI2 + if (dctx->bmi2) { + return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); + } +#endif + return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ + + + +#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) +/* ZSTD_getLongOffsetsShare() : + * condition : offTable must be valid + * @return : "share" of long offsets (arbitrarily defined as > (1<<23)) + * compared to maximum possible of (1< 22) total += 1; + } + + assert(tableLog <= OffFSELog); + total <<= (OffFSELog - tableLog); /* scale to OffFSELog */ + + return total; +} +#endif + +size_t +ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, const int frame) +{ /* blockType == blockCompressed */ + const BYTE* ip = (const BYTE*)src; + /* isLongOffset must be true if there are long offsets. + * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN. + * We don't expect that to be the case in 64-bit mode. + * In block mode, window size is not known, so we have to be conservative. + * (note: but it could be evaluated from current-lowLimit) + */ + ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN)))); + DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize); + + RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, ""); + + /* Decode literals section */ + { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); + DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize); + if (ZSTD_isError(litCSize)) return litCSize; + ip += litCSize; + srcSize -= litCSize; + } + + /* Build Decoding Tables */ + { + /* These macros control at build-time which decompressor implementation + * we use. If neither is defined, we do some inspection and dispatch at + * runtime. + */ +#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) + int usePrefetchDecoder = dctx->ddictIsCold; +#endif + int nbSeq; + size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize); + if (ZSTD_isError(seqHSize)) return seqHSize; + ip += seqHSize; + srcSize -= seqHSize; + + RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled"); + +#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) + if ( !usePrefetchDecoder + && (!frame || (dctx->fParams.windowSize > (1<<24))) + && (nbSeq>ADVANCED_SEQS) ) { /* could probably use a larger nbSeq limit */ + U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr); + U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */ + usePrefetchDecoder = (shareLongOffsets >= minShare); + } +#endif + + dctx->ddictIsCold = 0; + +#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) + if (usePrefetchDecoder) +#endif +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT + return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); +#endif + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG + /* else */ + return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); +#endif + } +} + + +void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize) +{ + if (dst != dctx->previousDstEnd && dstSize > 0) { /* not contiguous */ + dctx->dictEnd = dctx->previousDstEnd; + dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart)); + dctx->prefixStart = dst; + dctx->previousDstEnd = dst; + } +} + + +size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + size_t dSize; + ZSTD_checkContinuity(dctx, dst, dstCapacity); + dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0); + dctx->previousDstEnd = (char*)dst + dSize; + return dSize; +} +/**** ended inlining decompress/zstd_decompress_block.c ****/ + +/**** start inlining dictBuilder/cover.c ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* ***************************************************************************** + * Constructs a dictionary using a heuristic based on the following paper: + * + * Liao, Petri, Moffat, Wirth + * Effective Construction of Relative Lempel-Ziv Dictionaries + * Published in WWW 2016. + * + * Adapted from code originally written by @ot (Giuseppe Ottaviano). + ******************************************************************************/ + +/*-************************************* +* Dependencies +***************************************/ +#include /* fprintf */ +#include /* malloc, free, qsort */ +#include /* memset */ +#include /* clock */ + +/**** skipping file: ../common/mem.h ****/ +/**** skipping file: ../common/pool.h ****/ +/**** skipping file: ../common/threading.h ****/ +/**** start inlining cover.h ****/ +/* + * Copyright (c) 2017-2021, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include /* fprintf */ +#include /* malloc, free, qsort */ +#include /* memset */ +#include /* clock */ +/**** skipping file: ../common/mem.h ****/ +/**** skipping file: ../common/pool.h ****/ +/**** skipping file: ../common/threading.h ****/ +/**** skipping file: ../common/zstd_internal.h ****/ +#ifndef ZDICT_STATIC_LINKING_ONLY +#define ZDICT_STATIC_LINKING_ONLY +#endif +/**** start inlining zdict.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef DICTBUILDER_H_001 +#define DICTBUILDER_H_001 + +#if defined (__cplusplus) +extern "C" { +#endif + + +/*====== Dependencies ======*/ +#include /* size_t */ + + +/* ===== ZDICTLIB_API : control library symbols visibility ===== */ +#ifndef ZDICTLIB_VISIBILITY +# if defined(__GNUC__) && (__GNUC__ >= 4) +# define ZDICTLIB_VISIBILITY __attribute__ ((visibility ("default"))) +# else +# define ZDICTLIB_VISIBILITY +# endif +#endif +#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) +# define ZDICTLIB_API __declspec(dllexport) ZDICTLIB_VISIBILITY +#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) +# define ZDICTLIB_API __declspec(dllimport) ZDICTLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +# define ZDICTLIB_API ZDICTLIB_VISIBILITY +#endif + + +/*! ZDICT_trainFromBuffer(): + * Train a dictionary from an array of samples. + * Redirect towards ZDICT_optimizeTrainFromBuffer_fastCover() single-threaded, with d=8, steps=4, + * f=20, and accel=1. + * Samples must be stored concatenated in a single flat buffer `samplesBuffer`, + * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. + * The resulting dictionary will be saved into `dictBuffer`. + * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) + * or an error code, which can be tested with ZDICT_isError(). + * Note: Dictionary training will fail if there are not enough samples to construct a + * dictionary, or if most of the samples are too small (< 8 bytes being the lower limit). + * If dictionary training fails, you should use zstd without a dictionary, as the dictionary + * would've been ineffective anyways. If you believe your samples would benefit from a dictionary + * please open an issue with details, and we can look into it. + * Note: ZDICT_trainFromBuffer()'s memory usage is about 6 MB. + * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. + * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`. + * In general, it's recommended to provide a few thousands samples, though this can vary a lot. + * It's recommended that total size of all samples be about ~x100 times the target size of dictionary. + */ +ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, + const size_t* samplesSizes, unsigned nbSamples); + +typedef struct { + int compressionLevel; /*< optimize for a specific zstd compression level; 0 means default */ + unsigned notificationLevel; /*< Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */ + unsigned dictID; /*< force dictID value; 0 means auto mode (32-bits random value) */ +} ZDICT_params_t; + +/*! ZDICT_finalizeDictionary(): + * Given a custom content as a basis for dictionary, and a set of samples, + * finalize dictionary by adding headers and statistics according to the zstd + * dictionary format. + * + * Samples must be stored concatenated in a flat buffer `samplesBuffer`, + * supplied with an array of sizes `samplesSizes`, providing the size of each + * sample in order. The samples are used to construct the statistics, so they + * should be representative of what you will compress with this dictionary. + * + * The compression level can be set in `parameters`. You should pass the + * compression level you expect to use in production. The statistics for each + * compression level differ, so tuning the dictionary for the compression level + * can help quite a bit. + * + * You can set an explicit dictionary ID in `parameters`, or allow us to pick + * a random dictionary ID for you, but we can't guarantee no collisions. + * + * The dstDictBuffer and the dictContent may overlap, and the content will be + * appended to the end of the header. If the header + the content doesn't fit in + * maxDictSize the beginning of the content is truncated to make room, since it + * is presumed that the most profitable content is at the end of the dictionary, + * since that is the cheapest to reference. + * + * `dictContentSize` must be >= ZDICT_CONTENTSIZE_MIN bytes. + * `maxDictSize` must be >= max(dictContentSize, ZSTD_DICTSIZE_MIN). + * + * @return: size of dictionary stored into `dstDictBuffer` (<= `maxDictSize`), + * or an error code, which can be tested by ZDICT_isError(). + * Note: ZDICT_finalizeDictionary() will push notifications into stderr if + * instructed to, using notificationLevel>0. + * NOTE: This function currently may fail in several edge cases including: + * * Not enough samples + * * Samples are uncompressible + * * Samples are all exactly the same + */ +ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dstDictBuffer, size_t maxDictSize, + const void* dictContent, size_t dictContentSize, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_params_t parameters); + + +/*====== Helper functions ======*/ +ZDICTLIB_API unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize); /**< extracts dictID; @return zero if error (not a valid dictionary) */ +ZDICTLIB_API size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize); /* returns dict header size; returns a ZSTD error code on failure */ +ZDICTLIB_API unsigned ZDICT_isError(size_t errorCode); +ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode); + + + +#ifdef ZDICT_STATIC_LINKING_ONLY + +/* ==================================================================================== + * The definitions in this section are considered experimental. + * They should never be used with a dynamic library, as they may change in the future. + * They are provided for advanced usages. + * Use them only in association with static linking. + * ==================================================================================== */ + +#define ZDICT_CONTENTSIZE_MIN 128 +#define ZDICT_DICTSIZE_MIN 256 + +/*! ZDICT_cover_params_t: + * k and d are the only required parameters. + * For others, value 0 means default. + */ +typedef struct { + unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */ + unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */ + unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */ + unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */ + double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (1.0), 1.0 when all samples are used for both training and testing */ + unsigned shrinkDict; /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking */ + unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */ + ZDICT_params_t zParams; +} ZDICT_cover_params_t; + +typedef struct { + unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */ + unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */ + unsigned f; /* log of size of frequency array : constraint: 0 < f <= 31 : 1 means default(20)*/ + unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */ + unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */ + double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (0.75), 1.0 when all samples are used for both training and testing */ + unsigned accel; /* Acceleration level: constraint: 0 < accel <= 10, higher means faster and less accurate, 0 means default(1) */ + unsigned shrinkDict; /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking */ + unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */ + + ZDICT_params_t zParams; +} ZDICT_fastCover_params_t; + +/*! ZDICT_trainFromBuffer_cover(): + * Train a dictionary from an array of samples using the COVER algorithm. + * Samples must be stored concatenated in a single flat buffer `samplesBuffer`, + * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. + * The resulting dictionary will be saved into `dictBuffer`. + * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) + * or an error code, which can be tested with ZDICT_isError(). + * See ZDICT_trainFromBuffer() for details on failure modes. + * Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte. + * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. + * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`. + * In general, it's recommended to provide a few thousands samples, though this can vary a lot. + * It's recommended that total size of all samples be about ~x100 times the target size of dictionary. + */ +ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover( + void *dictBuffer, size_t dictBufferCapacity, + const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, + ZDICT_cover_params_t parameters); + +/*! ZDICT_optimizeTrainFromBuffer_cover(): + * The same requirements as above hold for all the parameters except `parameters`. + * This function tries many parameter combinations and picks the best parameters. + * `*parameters` is filled with the best parameters found, + * dictionary constructed with those parameters is stored in `dictBuffer`. + * + * All of the parameters d, k, steps are optional. + * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8}. + * if steps is zero it defaults to its default value. + * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000]. + * + * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) + * or an error code, which can be tested with ZDICT_isError(). + * On success `*parameters` contains the parameters selected. + * See ZDICT_trainFromBuffer() for details on failure modes. + * Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread. + */ +ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( + void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_cover_params_t* parameters); + +/*! ZDICT_trainFromBuffer_fastCover(): + * Train a dictionary from an array of samples using a modified version of COVER algorithm. + * Samples must be stored concatenated in a single flat buffer `samplesBuffer`, + * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. + * d and k are required. + * All other parameters are optional, will use default values if not provided + * The resulting dictionary will be saved into `dictBuffer`. + * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) + * or an error code, which can be tested with ZDICT_isError(). + * See ZDICT_trainFromBuffer() for details on failure modes. + * Note: ZDICT_trainFromBuffer_fastCover() requires 6 * 2^f bytes of memory. + * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. + * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`. + * In general, it's recommended to provide a few thousands samples, though this can vary a lot. + * It's recommended that total size of all samples be about ~x100 times the target size of dictionary. + */ +ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(void *dictBuffer, + size_t dictBufferCapacity, const void *samplesBuffer, + const size_t *samplesSizes, unsigned nbSamples, + ZDICT_fastCover_params_t parameters); + +/*! ZDICT_optimizeTrainFromBuffer_fastCover(): + * The same requirements as above hold for all the parameters except `parameters`. + * This function tries many parameter combinations (specifically, k and d combinations) + * and picks the best parameters. `*parameters` is filled with the best parameters found, + * dictionary constructed with those parameters is stored in `dictBuffer`. + * All of the parameters d, k, steps, f, and accel are optional. + * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8}. + * if steps is zero it defaults to its default value. + * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000]. + * If f is zero, default value of 20 is used. + * If accel is zero, default value of 1 is used. + * + * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) + * or an error code, which can be tested with ZDICT_isError(). + * On success `*parameters` contains the parameters selected. + * See ZDICT_trainFromBuffer() for details on failure modes. + * Note: ZDICT_optimizeTrainFromBuffer_fastCover() requires about 6 * 2^f bytes of memory for each thread. + */ +ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer, + size_t dictBufferCapacity, const void* samplesBuffer, + const size_t* samplesSizes, unsigned nbSamples, + ZDICT_fastCover_params_t* parameters); + +typedef struct { + unsigned selectivityLevel; /* 0 means default; larger => select more => larger dictionary */ + ZDICT_params_t zParams; +} ZDICT_legacy_params_t; + +/*! ZDICT_trainFromBuffer_legacy(): + * Train a dictionary from an array of samples. + * Samples must be stored concatenated in a single flat buffer `samplesBuffer`, + * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. + * The resulting dictionary will be saved into `dictBuffer`. + * `parameters` is optional and can be provided with values set to 0 to mean "default". + * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) + * or an error code, which can be tested with ZDICT_isError(). + * See ZDICT_trainFromBuffer() for details on failure modes. + * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. + * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`. + * In general, it's recommended to provide a few thousands samples, though this can vary a lot. + * It's recommended that total size of all samples be about ~x100 times the target size of dictionary. + * Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0. + */ +ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy( + void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_legacy_params_t parameters); + + +/* Deprecation warnings */ +/* It is generally possible to disable deprecation warnings from compiler, + for example with -Wno-deprecated-declarations for gcc + or _CRT_SECURE_NO_WARNINGS in Visual. + Otherwise, it's also possible to manually define ZDICT_DISABLE_DEPRECATE_WARNINGS */ +#ifdef ZDICT_DISABLE_DEPRECATE_WARNINGS +# define ZDICT_DEPRECATED(message) ZDICTLIB_API /* disable deprecation warnings */ +#else +# define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) +# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */ +# define ZDICT_DEPRECATED(message) [[deprecated(message)]] ZDICTLIB_API +# elif defined(__clang__) || (ZDICT_GCC_VERSION >= 405) +# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message))) +# elif (ZDICT_GCC_VERSION >= 301) +# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated)) +# elif defined(_MSC_VER) +# define ZDICT_DEPRECATED(message) ZDICTLIB_API __declspec(deprecated(message)) +# else +# pragma message("WARNING: You need to implement ZDICT_DEPRECATED for this compiler") +# define ZDICT_DEPRECATED(message) ZDICTLIB_API +# endif +#endif /* ZDICT_DISABLE_DEPRECATE_WARNINGS */ + +ZDICT_DEPRECATED("use ZDICT_finalizeDictionary() instead") +size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples); + + +#endif /* ZDICT_STATIC_LINKING_ONLY */ + +#if defined (__cplusplus) +} +#endif + +#endif /* DICTBUILDER_H_001 */ +/**** ended inlining zdict.h ****/ + +/** + * COVER_best_t is used for two purposes: + * 1. Synchronizing threads. + * 2. Saving the best parameters and dictionary. + * + * All of the methods except COVER_best_init() are thread safe if zstd is + * compiled with multithreaded support. + */ +typedef struct COVER_best_s { + ZSTD_pthread_mutex_t mutex; + ZSTD_pthread_cond_t cond; + size_t liveJobs; + void *dict; + size_t dictSize; + ZDICT_cover_params_t parameters; + size_t compressedSize; +} COVER_best_t; + +/** + * A segment is a range in the source as well as the score of the segment. + */ +typedef struct { + U32 begin; + U32 end; + U32 score; +} COVER_segment_t; + +/** + *Number of epochs and size of each epoch. + */ +typedef struct { + U32 num; + U32 size; +} COVER_epoch_info_t; + +/** + * Struct used for the dictionary selection function. + */ +typedef struct COVER_dictSelection { + BYTE* dictContent; + size_t dictSize; + size_t totalCompressedSize; +} COVER_dictSelection_t; + +/** + * Computes the number of epochs and the size of each epoch. + * We will make sure that each epoch gets at least 10 * k bytes. + * + * The COVER algorithms divide the data up into epochs of equal size and + * select one segment from each epoch. + * + * @param maxDictSize The maximum allowed dictionary size. + * @param nbDmers The number of dmers we are training on. + * @param k The parameter k (segment size). + * @param passes The target number of passes over the dmer corpus. + * More passes means a better dictionary. + */ +COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize, U32 nbDmers, + U32 k, U32 passes); + +/** + * Warns the user when their corpus is too small. + */ +void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel); + +/** + * Checks total compressed size of a dictionary + */ +size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters, + const size_t *samplesSizes, const BYTE *samples, + size_t *offsets, + size_t nbTrainSamples, size_t nbSamples, + BYTE *const dict, size_t dictBufferCapacity); + +/** + * Returns the sum of the sample sizes. + */ +size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) ; + +/** + * Initialize the `COVER_best_t`. + */ +void COVER_best_init(COVER_best_t *best); + +/** + * Wait until liveJobs == 0. + */ +void COVER_best_wait(COVER_best_t *best); + +/** + * Call COVER_best_wait() and then destroy the COVER_best_t. + */ +void COVER_best_destroy(COVER_best_t *best); + +/** + * Called when a thread is about to be launched. + * Increments liveJobs. + */ +void COVER_best_start(COVER_best_t *best); + +/** + * Called when a thread finishes executing, both on error or success. + * Decrements liveJobs and signals any waiting threads if liveJobs == 0. + * If this dictionary is the best so far save it and its parameters. + */ +void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters, + COVER_dictSelection_t selection); +/** + * Error function for COVER_selectDict function. Checks if the return + * value is an error. + */ +unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection); + + /** + * Error function for COVER_selectDict function. Returns a struct where + * return.totalCompressedSize is a ZSTD error. + */ +COVER_dictSelection_t COVER_dictSelectionError(size_t error); + +/** + * Always call after selectDict is called to free up used memory from + * newly created dictionary. + */ +void COVER_dictSelectionFree(COVER_dictSelection_t selection); + +/** + * Called to finalize the dictionary and select one based on whether or not + * the shrink-dict flag was enabled. If enabled the dictionary used is the + * smallest dictionary within a specified regression of the compressed size + * from the largest dictionary. + */ + COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity, + size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples, + size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize); +/**** ended inlining cover.h ****/ +/**** skipping file: ../common/zstd_internal.h ****/ +#ifndef ZDICT_STATIC_LINKING_ONLY +#define ZDICT_STATIC_LINKING_ONLY +#endif +/**** skipping file: zdict.h ****/ + +/*-************************************* +* Constants +***************************************/ +#define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB)) +#define COVER_DEFAULT_SPLITPOINT 1.0 + +/*-************************************* +* Console display +***************************************/ +#ifndef LOCALDISPLAYLEVEL +static int g_displayLevel = 2; +#endif +#undef DISPLAY +#define DISPLAY(...) \ + { \ + fprintf(stderr, __VA_ARGS__); \ + fflush(stderr); \ + } +#undef LOCALDISPLAYLEVEL +#define LOCALDISPLAYLEVEL(displayLevel, l, ...) \ + if (displayLevel >= l) { \ + DISPLAY(__VA_ARGS__); \ + } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */ +#undef DISPLAYLEVEL +#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__) + +#ifndef LOCALDISPLAYUPDATE +static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100; +static clock_t g_time = 0; +#endif +#undef LOCALDISPLAYUPDATE +#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \ + if (displayLevel >= l) { \ + if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \ + g_time = clock(); \ + DISPLAY(__VA_ARGS__); \ + } \ + } +#undef DISPLAYUPDATE +#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__) + +/*-************************************* +* Hash table +*************************************** +* A small specialized hash map for storing activeDmers. +* The map does not resize, so if it becomes full it will loop forever. +* Thus, the map must be large enough to store every value. +* The map implements linear probing and keeps its load less than 0.5. +*/ + +#define MAP_EMPTY_VALUE ((U32)-1) +typedef struct COVER_map_pair_t_s { + U32 key; + U32 value; +} COVER_map_pair_t; + +typedef struct COVER_map_s { + COVER_map_pair_t *data; + U32 sizeLog; + U32 size; + U32 sizeMask; +} COVER_map_t; + +/** + * Clear the map. + */ +static void COVER_map_clear(COVER_map_t *map) { + memset(map->data, MAP_EMPTY_VALUE, map->size * sizeof(COVER_map_pair_t)); +} + +/** + * Initializes a map of the given size. + * Returns 1 on success and 0 on failure. + * The map must be destroyed with COVER_map_destroy(). + * The map is only guaranteed to be large enough to hold size elements. + */ +static int COVER_map_init(COVER_map_t *map, U32 size) { + map->sizeLog = ZSTD_highbit32(size) + 2; + map->size = (U32)1 << map->sizeLog; + map->sizeMask = map->size - 1; + map->data = (COVER_map_pair_t *)malloc(map->size * sizeof(COVER_map_pair_t)); + if (!map->data) { + map->sizeLog = 0; + map->size = 0; + return 0; + } + COVER_map_clear(map); + return 1; +} + +/** + * Internal hash function + */ +static const U32 COVER_prime4bytes = 2654435761U; +static U32 COVER_map_hash(COVER_map_t *map, U32 key) { + return (key * COVER_prime4bytes) >> (32 - map->sizeLog); +} + +/** + * Helper function that returns the index that a key should be placed into. + */ +static U32 COVER_map_index(COVER_map_t *map, U32 key) { + const U32 hash = COVER_map_hash(map, key); + U32 i; + for (i = hash;; i = (i + 1) & map->sizeMask) { + COVER_map_pair_t *pos = &map->data[i]; + if (pos->value == MAP_EMPTY_VALUE) { + return i; + } + if (pos->key == key) { + return i; + } + } +} + +/** + * Returns the pointer to the value for key. + * If key is not in the map, it is inserted and the value is set to 0. + * The map must not be full. + */ +static U32 *COVER_map_at(COVER_map_t *map, U32 key) { + COVER_map_pair_t *pos = &map->data[COVER_map_index(map, key)]; + if (pos->value == MAP_EMPTY_VALUE) { + pos->key = key; + pos->value = 0; + } + return &pos->value; +} + +/** + * Deletes key from the map if present. + */ +static void COVER_map_remove(COVER_map_t *map, U32 key) { + U32 i = COVER_map_index(map, key); + COVER_map_pair_t *del = &map->data[i]; + U32 shift = 1; + if (del->value == MAP_EMPTY_VALUE) { + return; + } + for (i = (i + 1) & map->sizeMask;; i = (i + 1) & map->sizeMask) { + COVER_map_pair_t *const pos = &map->data[i]; + /* If the position is empty we are done */ + if (pos->value == MAP_EMPTY_VALUE) { + del->value = MAP_EMPTY_VALUE; + return; + } + /* If pos can be moved to del do so */ + if (((i - COVER_map_hash(map, pos->key)) & map->sizeMask) >= shift) { + del->key = pos->key; + del->value = pos->value; + del = pos; + shift = 1; + } else { + ++shift; + } + } +} + +/** + * Destroys a map that is inited with COVER_map_init(). + */ +static void COVER_map_destroy(COVER_map_t *map) { + if (map->data) { + free(map->data); + } + map->data = NULL; + map->size = 0; +} + +/*-************************************* +* Context +***************************************/ + +typedef struct { + const BYTE *samples; + size_t *offsets; + const size_t *samplesSizes; + size_t nbSamples; + size_t nbTrainSamples; + size_t nbTestSamples; + U32 *suffix; + size_t suffixSize; + U32 *freqs; + U32 *dmerAt; + unsigned d; +} COVER_ctx_t; + +/* We need a global context for qsort... */ +static COVER_ctx_t *g_coverCtx = NULL; + +/*-************************************* +* Helper functions +***************************************/ + +/** + * Returns the sum of the sample sizes. + */ +size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) { + size_t sum = 0; + unsigned i; + for (i = 0; i < nbSamples; ++i) { + sum += samplesSizes[i]; + } + return sum; +} + +/** + * Returns -1 if the dmer at lp is less than the dmer at rp. + * Return 0 if the dmers at lp and rp are equal. + * Returns 1 if the dmer at lp is greater than the dmer at rp. + */ +static int COVER_cmp(COVER_ctx_t *ctx, const void *lp, const void *rp) { + U32 const lhs = *(U32 const *)lp; + U32 const rhs = *(U32 const *)rp; + return memcmp(ctx->samples + lhs, ctx->samples + rhs, ctx->d); +} +/** + * Faster version for d <= 8. + */ +static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) { + U64 const mask = (ctx->d == 8) ? (U64)-1 : (((U64)1 << (8 * ctx->d)) - 1); + U64 const lhs = MEM_readLE64(ctx->samples + *(U32 const *)lp) & mask; + U64 const rhs = MEM_readLE64(ctx->samples + *(U32 const *)rp) & mask; + if (lhs < rhs) { + return -1; + } + return (lhs > rhs); +} + +/** + * Same as COVER_cmp() except ties are broken by pointer value + * NOTE: g_coverCtx must be set to call this function. A global is required because + * qsort doesn't take an opaque pointer. + */ +static int WIN_CDECL COVER_strict_cmp(const void *lp, const void *rp) { + int result = COVER_cmp(g_coverCtx, lp, rp); + if (result == 0) { + result = lp < rp ? -1 : 1; + } + return result; +} +/** + * Faster version for d <= 8. + */ +static int WIN_CDECL COVER_strict_cmp8(const void *lp, const void *rp) { + int result = COVER_cmp8(g_coverCtx, lp, rp); + if (result == 0) { + result = lp < rp ? -1 : 1; + } + return result; +} + +/** + * Returns the first pointer in [first, last) whose element does not compare + * less than value. If no such element exists it returns last. + */ +static const size_t *COVER_lower_bound(const size_t *first, const size_t *last, + size_t value) { + size_t count = last - first; + while (count != 0) { + size_t step = count / 2; + const size_t *ptr = first; + ptr += step; + if (*ptr < value) { + first = ++ptr; + count -= step + 1; + } else { + count = step; + } + } + return first; +} + +/** + * Generic groupBy function. + * Groups an array sorted by cmp into groups with equivalent values. + * Calls grp for each group. + */ +static void +COVER_groupBy(const void *data, size_t count, size_t size, COVER_ctx_t *ctx, + int (*cmp)(COVER_ctx_t *, const void *, const void *), + void (*grp)(COVER_ctx_t *, const void *, const void *)) { + const BYTE *ptr = (const BYTE *)data; + size_t num = 0; + while (num < count) { + const BYTE *grpEnd = ptr + size; + ++num; + while (num < count && cmp(ctx, ptr, grpEnd) == 0) { + grpEnd += size; + ++num; + } + grp(ctx, ptr, grpEnd); + ptr = grpEnd; + } +} + +/*-************************************* +* Cover functions +***************************************/ + +/** + * Called on each group of positions with the same dmer. + * Counts the frequency of each dmer and saves it in the suffix array. + * Fills `ctx->dmerAt`. + */ +static void COVER_group(COVER_ctx_t *ctx, const void *group, + const void *groupEnd) { + /* The group consists of all the positions with the same first d bytes. */ + const U32 *grpPtr = (const U32 *)group; + const U32 *grpEnd = (const U32 *)groupEnd; + /* The dmerId is how we will reference this dmer. + * This allows us to map the whole dmer space to a much smaller space, the + * size of the suffix array. + */ + const U32 dmerId = (U32)(grpPtr - ctx->suffix); + /* Count the number of samples this dmer shows up in */ + U32 freq = 0; + /* Details */ + const size_t *curOffsetPtr = ctx->offsets; + const size_t *offsetsEnd = ctx->offsets + ctx->nbSamples; + /* Once *grpPtr >= curSampleEnd this occurrence of the dmer is in a + * different sample than the last. + */ + size_t curSampleEnd = ctx->offsets[0]; + for (; grpPtr != grpEnd; ++grpPtr) { + /* Save the dmerId for this position so we can get back to it. */ + ctx->dmerAt[*grpPtr] = dmerId; + /* Dictionaries only help for the first reference to the dmer. + * After that zstd can reference the match from the previous reference. + * So only count each dmer once for each sample it is in. + */ + if (*grpPtr < curSampleEnd) { + continue; + } + freq += 1; + /* Binary search to find the end of the sample *grpPtr is in. + * In the common case that grpPtr + 1 == grpEnd we can skip the binary + * search because the loop is over. + */ + if (grpPtr + 1 != grpEnd) { + const size_t *sampleEndPtr = + COVER_lower_bound(curOffsetPtr, offsetsEnd, *grpPtr); + curSampleEnd = *sampleEndPtr; + curOffsetPtr = sampleEndPtr + 1; + } + } + /* At this point we are never going to look at this segment of the suffix + * array again. We take advantage of this fact to save memory. + * We store the frequency of the dmer in the first position of the group, + * which is dmerId. + */ + ctx->suffix[dmerId] = freq; +} + + +/** + * Selects the best segment in an epoch. + * Segments of are scored according to the function: + * + * Let F(d) be the frequency of dmer d. + * Let S_i be the dmer at position i of segment S which has length k. + * + * Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1}) + * + * Once the dmer d is in the dictionary we set F(d) = 0. + */ +static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs, + COVER_map_t *activeDmers, U32 begin, + U32 end, + ZDICT_cover_params_t parameters) { + /* Constants */ + const U32 k = parameters.k; + const U32 d = parameters.d; + const U32 dmersInK = k - d + 1; + /* Try each segment (activeSegment) and save the best (bestSegment) */ + COVER_segment_t bestSegment = {0, 0, 0}; + COVER_segment_t activeSegment; + /* Reset the activeDmers in the segment */ + COVER_map_clear(activeDmers); + /* The activeSegment starts at the beginning of the epoch. */ + activeSegment.begin = begin; + activeSegment.end = begin; + activeSegment.score = 0; + /* Slide the activeSegment through the whole epoch. + * Save the best segment in bestSegment. + */ + while (activeSegment.end < end) { + /* The dmerId for the dmer at the next position */ + U32 newDmer = ctx->dmerAt[activeSegment.end]; + /* The entry in activeDmers for this dmerId */ + U32 *newDmerOcc = COVER_map_at(activeDmers, newDmer); + /* If the dmer isn't already present in the segment add its score. */ + if (*newDmerOcc == 0) { + /* The paper suggest using the L-0.5 norm, but experiments show that it + * doesn't help. + */ + activeSegment.score += freqs[newDmer]; + } + /* Add the dmer to the segment */ + activeSegment.end += 1; + *newDmerOcc += 1; + + /* If the window is now too large, drop the first position */ + if (activeSegment.end - activeSegment.begin == dmersInK + 1) { + U32 delDmer = ctx->dmerAt[activeSegment.begin]; + U32 *delDmerOcc = COVER_map_at(activeDmers, delDmer); + activeSegment.begin += 1; + *delDmerOcc -= 1; + /* If this is the last occurrence of the dmer, subtract its score */ + if (*delDmerOcc == 0) { + COVER_map_remove(activeDmers, delDmer); + activeSegment.score -= freqs[delDmer]; + } + } + + /* If this segment is the best so far save it */ + if (activeSegment.score > bestSegment.score) { + bestSegment = activeSegment; + } + } + { + /* Trim off the zero frequency head and tail from the segment. */ + U32 newBegin = bestSegment.end; + U32 newEnd = bestSegment.begin; + U32 pos; + for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) { + U32 freq = freqs[ctx->dmerAt[pos]]; + if (freq != 0) { + newBegin = MIN(newBegin, pos); + newEnd = pos + 1; + } + } + bestSegment.begin = newBegin; + bestSegment.end = newEnd; + } + { + /* Zero out the frequency of each dmer covered by the chosen segment. */ + U32 pos; + for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) { + freqs[ctx->dmerAt[pos]] = 0; + } + } + return bestSegment; +} + +/** + * Check the validity of the parameters. + * Returns non-zero if the parameters are valid and 0 otherwise. + */ +static int COVER_checkParameters(ZDICT_cover_params_t parameters, + size_t maxDictSize) { + /* k and d are required parameters */ + if (parameters.d == 0 || parameters.k == 0) { + return 0; + } + /* k <= maxDictSize */ + if (parameters.k > maxDictSize) { + return 0; + } + /* d <= k */ + if (parameters.d > parameters.k) { + return 0; + } + /* 0 < splitPoint <= 1 */ + if (parameters.splitPoint <= 0 || parameters.splitPoint > 1){ + return 0; + } + return 1; +} + +/** + * Clean up a context initialized with `COVER_ctx_init()`. + */ +static void COVER_ctx_destroy(COVER_ctx_t *ctx) { + if (!ctx) { + return; + } + if (ctx->suffix) { + free(ctx->suffix); + ctx->suffix = NULL; + } + if (ctx->freqs) { + free(ctx->freqs); + ctx->freqs = NULL; + } + if (ctx->dmerAt) { + free(ctx->dmerAt); + ctx->dmerAt = NULL; + } + if (ctx->offsets) { + free(ctx->offsets); + ctx->offsets = NULL; + } +} + +/** + * Prepare a context for dictionary building. + * The context is only dependent on the parameter `d` and can used multiple + * times. + * Returns 0 on success or error code on error. + * The context must be destroyed with `COVER_ctx_destroy()`. + */ +static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer, + const size_t *samplesSizes, unsigned nbSamples, + unsigned d, double splitPoint) { + const BYTE *const samples = (const BYTE *)samplesBuffer; + const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples); + /* Split samples into testing and training sets */ + const unsigned nbTrainSamples = splitPoint < 1.0 ? (unsigned)((double)nbSamples * splitPoint) : nbSamples; + const unsigned nbTestSamples = splitPoint < 1.0 ? nbSamples - nbTrainSamples : nbSamples; + const size_t trainingSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes, nbTrainSamples) : totalSamplesSize; + const size_t testSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes + nbTrainSamples, nbTestSamples) : totalSamplesSize; + /* Checks */ + if (totalSamplesSize < MAX(d, sizeof(U64)) || + totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) { + DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n", + (unsigned)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20)); + return ERROR(srcSize_wrong); + } + /* Check if there are at least 5 training samples */ + if (nbTrainSamples < 5) { + DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples); + return ERROR(srcSize_wrong); + } + /* Check if there's testing sample */ + if (nbTestSamples < 1) { + DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples); + return ERROR(srcSize_wrong); + } + /* Zero the context */ + memset(ctx, 0, sizeof(*ctx)); + DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples, + (unsigned)trainingSamplesSize); + DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples, + (unsigned)testSamplesSize); + ctx->samples = samples; + ctx->samplesSizes = samplesSizes; + ctx->nbSamples = nbSamples; + ctx->nbTrainSamples = nbTrainSamples; + ctx->nbTestSamples = nbTestSamples; + /* Partial suffix array */ + ctx->suffixSize = trainingSamplesSize - MAX(d, sizeof(U64)) + 1; + ctx->suffix = (U32 *)malloc(ctx->suffixSize * sizeof(U32)); + /* Maps index to the dmerID */ + ctx->dmerAt = (U32 *)malloc(ctx->suffixSize * sizeof(U32)); + /* The offsets of each file */ + ctx->offsets = (size_t *)malloc((nbSamples + 1) * sizeof(size_t)); + if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) { + DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n"); + COVER_ctx_destroy(ctx); + return ERROR(memory_allocation); + } + ctx->freqs = NULL; + ctx->d = d; + + /* Fill offsets from the samplesSizes */ + { + U32 i; + ctx->offsets[0] = 0; + for (i = 1; i <= nbSamples; ++i) { + ctx->offsets[i] = ctx->offsets[i - 1] + samplesSizes[i - 1]; + } + } + DISPLAYLEVEL(2, "Constructing partial suffix array\n"); + { + /* suffix is a partial suffix array. + * It only sorts suffixes by their first parameters.d bytes. + * The sort is stable, so each dmer group is sorted by position in input. + */ + U32 i; + for (i = 0; i < ctx->suffixSize; ++i) { + ctx->suffix[i] = i; + } + /* qsort doesn't take an opaque pointer, so pass as a global. + * On OpenBSD qsort() is not guaranteed to be stable, their mergesort() is. + */ + g_coverCtx = ctx; +#if defined(__OpenBSD__) + mergesort(ctx->suffix, ctx->suffixSize, sizeof(U32), + (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp)); +#else + qsort(ctx->suffix, ctx->suffixSize, sizeof(U32), + (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp)); +#endif + } + DISPLAYLEVEL(2, "Computing frequencies\n"); + /* For each dmer group (group of positions with the same first d bytes): + * 1. For each position we set dmerAt[position] = dmerID. The dmerID is + * (groupBeginPtr - suffix). This allows us to go from position to + * dmerID so we can look up values in freq. + * 2. We calculate how many samples the dmer occurs in and save it in + * freqs[dmerId]. + */ + COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx, + (ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group); + ctx->freqs = ctx->suffix; + ctx->suffix = NULL; + return 0; +} + +void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel) +{ + const double ratio = (double)nbDmers / maxDictSize; + if (ratio >= 10) { + return; + } + LOCALDISPLAYLEVEL(displayLevel, 1, + "WARNING: The maximum dictionary size %u is too large " + "compared to the source size %u! " + "size(source)/size(dictionary) = %f, but it should be >= " + "10! This may lead to a subpar dictionary! We recommend " + "training on sources at least 10x, and preferably 100x " + "the size of the dictionary! \n", (U32)maxDictSize, + (U32)nbDmers, ratio); +} + +COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize, + U32 nbDmers, U32 k, U32 passes) +{ + const U32 minEpochSize = k * 10; + COVER_epoch_info_t epochs; + epochs.num = MAX(1, maxDictSize / k / passes); + epochs.size = nbDmers / epochs.num; + if (epochs.size >= minEpochSize) { + assert(epochs.size * epochs.num <= nbDmers); + return epochs; + } + epochs.size = MIN(minEpochSize, nbDmers); + epochs.num = nbDmers / epochs.size; + assert(epochs.size * epochs.num <= nbDmers); + return epochs; +} + +/** + * Given the prepared context build the dictionary. + */ +static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs, + COVER_map_t *activeDmers, void *dictBuffer, + size_t dictBufferCapacity, + ZDICT_cover_params_t parameters) { + BYTE *const dict = (BYTE *)dictBuffer; + size_t tail = dictBufferCapacity; + /* Divide the data into epochs. We will select one segment from each epoch. */ + const COVER_epoch_info_t epochs = COVER_computeEpochs( + (U32)dictBufferCapacity, (U32)ctx->suffixSize, parameters.k, 4); + const size_t maxZeroScoreRun = MAX(10, MIN(100, epochs.num >> 3)); + size_t zeroScoreRun = 0; + size_t epoch; + DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", + (U32)epochs.num, (U32)epochs.size); + /* Loop through the epochs until there are no more segments or the dictionary + * is full. + */ + for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs.num) { + const U32 epochBegin = (U32)(epoch * epochs.size); + const U32 epochEnd = epochBegin + epochs.size; + size_t segmentSize; + /* Select a segment */ + COVER_segment_t segment = COVER_selectSegment( + ctx, freqs, activeDmers, epochBegin, epochEnd, parameters); + /* If the segment covers no dmers, then we are out of content. + * There may be new content in other epochs, for continue for some time. + */ + if (segment.score == 0) { + if (++zeroScoreRun >= maxZeroScoreRun) { + break; + } + continue; + } + zeroScoreRun = 0; + /* Trim the segment if necessary and if it is too small then we are done */ + segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail); + if (segmentSize < parameters.d) { + break; + } + /* We fill the dictionary from the back to allow the best segments to be + * referenced with the smallest offsets. + */ + tail -= segmentSize; + memcpy(dict + tail, ctx->samples + segment.begin, segmentSize); + DISPLAYUPDATE( + 2, "\r%u%% ", + (unsigned)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity)); + } + DISPLAYLEVEL(2, "\r%79s\r", ""); + return tail; +} + +ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover( + void *dictBuffer, size_t dictBufferCapacity, + const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, + ZDICT_cover_params_t parameters) +{ + BYTE* const dict = (BYTE*)dictBuffer; + COVER_ctx_t ctx; + COVER_map_t activeDmers; + parameters.splitPoint = 1.0; + /* Initialize global data */ + g_displayLevel = parameters.zParams.notificationLevel; + /* Checks */ + if (!COVER_checkParameters(parameters, dictBufferCapacity)) { + DISPLAYLEVEL(1, "Cover parameters incorrect\n"); + return ERROR(parameter_outOfBound); + } + if (nbSamples == 0) { + DISPLAYLEVEL(1, "Cover must have at least one input file\n"); + return ERROR(srcSize_wrong); + } + if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { + DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", + ZDICT_DICTSIZE_MIN); + return ERROR(dstSize_tooSmall); + } + /* Initialize context and activeDmers */ + { + size_t const initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, + parameters.d, parameters.splitPoint); + if (ZSTD_isError(initVal)) { + return initVal; + } + } + COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, g_displayLevel); + if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) { + DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n"); + COVER_ctx_destroy(&ctx); + return ERROR(memory_allocation); + } + + DISPLAYLEVEL(2, "Building dictionary\n"); + { + const size_t tail = + COVER_buildDictionary(&ctx, ctx.freqs, &activeDmers, dictBuffer, + dictBufferCapacity, parameters); + const size_t dictionarySize = ZDICT_finalizeDictionary( + dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail, + samplesBuffer, samplesSizes, nbSamples, parameters.zParams); + if (!ZSTD_isError(dictionarySize)) { + DISPLAYLEVEL(2, "Constructed dictionary of size %u\n", + (unsigned)dictionarySize); + } + COVER_ctx_destroy(&ctx); + COVER_map_destroy(&activeDmers); + return dictionarySize; + } +} + + + +size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters, + const size_t *samplesSizes, const BYTE *samples, + size_t *offsets, + size_t nbTrainSamples, size_t nbSamples, + BYTE *const dict, size_t dictBufferCapacity) { + size_t totalCompressedSize = ERROR(GENERIC); + /* Pointers */ + ZSTD_CCtx *cctx; + ZSTD_CDict *cdict; + void *dst; + /* Local variables */ + size_t dstCapacity; + size_t i; + /* Allocate dst with enough space to compress the maximum sized sample */ + { + size_t maxSampleSize = 0; + i = parameters.splitPoint < 1.0 ? nbTrainSamples : 0; + for (; i < nbSamples; ++i) { + maxSampleSize = MAX(samplesSizes[i], maxSampleSize); + } + dstCapacity = ZSTD_compressBound(maxSampleSize); + dst = malloc(dstCapacity); + } + /* Create the cctx and cdict */ + cctx = ZSTD_createCCtx(); + cdict = ZSTD_createCDict(dict, dictBufferCapacity, + parameters.zParams.compressionLevel); + if (!dst || !cctx || !cdict) { + goto _compressCleanup; + } + /* Compress each sample and sum their sizes (or error) */ + totalCompressedSize = dictBufferCapacity; + i = parameters.splitPoint < 1.0 ? nbTrainSamples : 0; + for (; i < nbSamples; ++i) { + const size_t size = ZSTD_compress_usingCDict( + cctx, dst, dstCapacity, samples + offsets[i], + samplesSizes[i], cdict); + if (ZSTD_isError(size)) { + totalCompressedSize = size; + goto _compressCleanup; + } + totalCompressedSize += size; + } +_compressCleanup: + ZSTD_freeCCtx(cctx); + ZSTD_freeCDict(cdict); + if (dst) { + free(dst); + } + return totalCompressedSize; +} + + +/** + * Initialize the `COVER_best_t`. + */ +void COVER_best_init(COVER_best_t *best) { + if (best==NULL) return; /* compatible with init on NULL */ + (void)ZSTD_pthread_mutex_init(&best->mutex, NULL); + (void)ZSTD_pthread_cond_init(&best->cond, NULL); + best->liveJobs = 0; + best->dict = NULL; + best->dictSize = 0; + best->compressedSize = (size_t)-1; + memset(&best->parameters, 0, sizeof(best->parameters)); +} + +/** + * Wait until liveJobs == 0. + */ +void COVER_best_wait(COVER_best_t *best) { + if (!best) { + return; + } + ZSTD_pthread_mutex_lock(&best->mutex); + while (best->liveJobs != 0) { + ZSTD_pthread_cond_wait(&best->cond, &best->mutex); + } + ZSTD_pthread_mutex_unlock(&best->mutex); +} + +/** + * Call COVER_best_wait() and then destroy the COVER_best_t. + */ +void COVER_best_destroy(COVER_best_t *best) { + if (!best) { + return; + } + COVER_best_wait(best); + if (best->dict) { + free(best->dict); + } + ZSTD_pthread_mutex_destroy(&best->mutex); + ZSTD_pthread_cond_destroy(&best->cond); +} + +/** + * Called when a thread is about to be launched. + * Increments liveJobs. + */ +void COVER_best_start(COVER_best_t *best) { + if (!best) { + return; + } + ZSTD_pthread_mutex_lock(&best->mutex); + ++best->liveJobs; + ZSTD_pthread_mutex_unlock(&best->mutex); +} + +/** + * Called when a thread finishes executing, both on error or success. + * Decrements liveJobs and signals any waiting threads if liveJobs == 0. + * If this dictionary is the best so far save it and its parameters. + */ +void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters, + COVER_dictSelection_t selection) { + void* dict = selection.dictContent; + size_t compressedSize = selection.totalCompressedSize; + size_t dictSize = selection.dictSize; + if (!best) { + return; + } + { + size_t liveJobs; + ZSTD_pthread_mutex_lock(&best->mutex); + --best->liveJobs; + liveJobs = best->liveJobs; + /* If the new dictionary is better */ + if (compressedSize < best->compressedSize) { + /* Allocate space if necessary */ + if (!best->dict || best->dictSize < dictSize) { + if (best->dict) { + free(best->dict); + } + best->dict = malloc(dictSize); + if (!best->dict) { + best->compressedSize = ERROR(GENERIC); + best->dictSize = 0; + ZSTD_pthread_cond_signal(&best->cond); + ZSTD_pthread_mutex_unlock(&best->mutex); + return; + } + } + /* Save the dictionary, parameters, and size */ + if (dict) { + memcpy(best->dict, dict, dictSize); + best->dictSize = dictSize; + best->parameters = parameters; + best->compressedSize = compressedSize; + } + } + if (liveJobs == 0) { + ZSTD_pthread_cond_broadcast(&best->cond); + } + ZSTD_pthread_mutex_unlock(&best->mutex); + } +} + +COVER_dictSelection_t COVER_dictSelectionError(size_t error) { + COVER_dictSelection_t selection = { NULL, 0, error }; + return selection; +} + +unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection) { + return (ZSTD_isError(selection.totalCompressedSize) || !selection.dictContent); +} + +void COVER_dictSelectionFree(COVER_dictSelection_t selection){ + free(selection.dictContent); +} + +COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity, + size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples, + size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize) { + + size_t largestDict = 0; + size_t largestCompressed = 0; + BYTE* customDictContentEnd = customDictContent + dictContentSize; + + BYTE * largestDictbuffer = (BYTE *)malloc(dictBufferCapacity); + BYTE * candidateDictBuffer = (BYTE *)malloc(dictBufferCapacity); + double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00; + + if (!largestDictbuffer || !candidateDictBuffer) { + free(largestDictbuffer); + free(candidateDictBuffer); + return COVER_dictSelectionError(dictContentSize); + } + + /* Initial dictionary size and compressed size */ + memcpy(largestDictbuffer, customDictContent, dictContentSize); + dictContentSize = ZDICT_finalizeDictionary( + largestDictbuffer, dictBufferCapacity, customDictContent, dictContentSize, + samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams); + + if (ZDICT_isError(dictContentSize)) { + free(largestDictbuffer); + free(candidateDictBuffer); + return COVER_dictSelectionError(dictContentSize); + } + + totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes, + samplesBuffer, offsets, + nbCheckSamples, nbSamples, + largestDictbuffer, dictContentSize); + + if (ZSTD_isError(totalCompressedSize)) { + free(largestDictbuffer); + free(candidateDictBuffer); + return COVER_dictSelectionError(totalCompressedSize); + } + + if (params.shrinkDict == 0) { + COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize }; + free(candidateDictBuffer); + return selection; + } + + largestDict = dictContentSize; + largestCompressed = totalCompressedSize; + dictContentSize = ZDICT_DICTSIZE_MIN; + + /* Largest dict is initially at least ZDICT_DICTSIZE_MIN */ + while (dictContentSize < largestDict) { + memcpy(candidateDictBuffer, largestDictbuffer, largestDict); + dictContentSize = ZDICT_finalizeDictionary( + candidateDictBuffer, dictBufferCapacity, customDictContentEnd - dictContentSize, dictContentSize, + samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams); + + if (ZDICT_isError(dictContentSize)) { + free(largestDictbuffer); + free(candidateDictBuffer); + return COVER_dictSelectionError(dictContentSize); + + } + + totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes, + samplesBuffer, offsets, + nbCheckSamples, nbSamples, + candidateDictBuffer, dictContentSize); + + if (ZSTD_isError(totalCompressedSize)) { + free(largestDictbuffer); + free(candidateDictBuffer); + return COVER_dictSelectionError(totalCompressedSize); + } + + if (totalCompressedSize <= largestCompressed * regressionTolerance) { + COVER_dictSelection_t selection = { candidateDictBuffer, dictContentSize, totalCompressedSize }; + free(largestDictbuffer); + return selection; + } + dictContentSize *= 2; + } + dictContentSize = largestDict; + totalCompressedSize = largestCompressed; + { + COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize }; + free(candidateDictBuffer); + return selection; + } +} + +/** + * Parameters for COVER_tryParameters(). + */ +typedef struct COVER_tryParameters_data_s { + const COVER_ctx_t *ctx; + COVER_best_t *best; + size_t dictBufferCapacity; + ZDICT_cover_params_t parameters; +} COVER_tryParameters_data_t; + +/** + * Tries a set of parameters and updates the COVER_best_t with the results. + * This function is thread safe if zstd is compiled with multithreaded support. + * It takes its parameters as an *OWNING* opaque pointer to support threading. + */ +static void COVER_tryParameters(void *opaque) +{ + /* Save parameters as local variables */ + COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t*)opaque; + const COVER_ctx_t *const ctx = data->ctx; + const ZDICT_cover_params_t parameters = data->parameters; + size_t dictBufferCapacity = data->dictBufferCapacity; + size_t totalCompressedSize = ERROR(GENERIC); + /* Allocate space for hash table, dict, and freqs */ + COVER_map_t activeDmers; + BYTE* const dict = (BYTE*)malloc(dictBufferCapacity); + COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC)); + U32* const freqs = (U32*)malloc(ctx->suffixSize * sizeof(U32)); + if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) { + DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n"); + goto _cleanup; + } + if (!dict || !freqs) { + DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n"); + goto _cleanup; + } + /* Copy the frequencies because we need to modify them */ + memcpy(freqs, ctx->freqs, ctx->suffixSize * sizeof(U32)); + /* Build the dictionary */ + { + const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict, + dictBufferCapacity, parameters); + selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail, + ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets, + totalCompressedSize); + + if (COVER_dictSelectionIsError(selection)) { + DISPLAYLEVEL(1, "Failed to select dictionary\n"); + goto _cleanup; + } + } +_cleanup: + free(dict); + COVER_best_finish(data->best, parameters, selection); + free(data); + COVER_map_destroy(&activeDmers); + COVER_dictSelectionFree(selection); + free(freqs); +} + +ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( + void* dictBuffer, size_t dictBufferCapacity, const void* samplesBuffer, + const size_t* samplesSizes, unsigned nbSamples, + ZDICT_cover_params_t* parameters) +{ + /* constants */ + const unsigned nbThreads = parameters->nbThreads; + const double splitPoint = + parameters->splitPoint <= 0.0 ? COVER_DEFAULT_SPLITPOINT : parameters->splitPoint; + const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d; + const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d; + const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k; + const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k; + const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps; + const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1); + const unsigned kIterations = + (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize); + const unsigned shrinkDict = 0; + /* Local variables */ + const int displayLevel = parameters->zParams.notificationLevel; + unsigned iteration = 1; + unsigned d; + unsigned k; + COVER_best_t best; + POOL_ctx *pool = NULL; + int warned = 0; + + /* Checks */ + if (splitPoint <= 0 || splitPoint > 1) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n"); + return ERROR(parameter_outOfBound); + } + if (kMinK < kMaxD || kMaxK < kMinK) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n"); + return ERROR(parameter_outOfBound); + } + if (nbSamples == 0) { + DISPLAYLEVEL(1, "Cover must have at least one input file\n"); + return ERROR(srcSize_wrong); + } + if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { + DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", + ZDICT_DICTSIZE_MIN); + return ERROR(dstSize_tooSmall); + } + if (nbThreads > 1) { + pool = POOL_create(nbThreads, 1); + if (!pool) { + return ERROR(memory_allocation); + } + } + /* Initialization */ + COVER_best_init(&best); + /* Turn down global display level to clean up display at level 2 and below */ + g_displayLevel = displayLevel == 0 ? 0 : displayLevel - 1; + /* Loop through d first because each new value needs a new context */ + LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n", + kIterations); + for (d = kMinD; d <= kMaxD; d += 2) { + /* Initialize the context for this value of d */ + COVER_ctx_t ctx; + LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d); + { + const size_t initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint); + if (ZSTD_isError(initVal)) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n"); + COVER_best_destroy(&best); + POOL_free(pool); + return initVal; + } + } + if (!warned) { + COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, displayLevel); + warned = 1; + } + /* Loop through k reusing the same context */ + for (k = kMinK; k <= kMaxK; k += kStepSize) { + /* Prepare the arguments */ + COVER_tryParameters_data_t *data = (COVER_tryParameters_data_t *)malloc( + sizeof(COVER_tryParameters_data_t)); + LOCALDISPLAYLEVEL(displayLevel, 3, "k=%u\n", k); + if (!data) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to allocate parameters\n"); + COVER_best_destroy(&best); + COVER_ctx_destroy(&ctx); + POOL_free(pool); + return ERROR(memory_allocation); + } + data->ctx = &ctx; + data->best = &best; + data->dictBufferCapacity = dictBufferCapacity; + data->parameters = *parameters; + data->parameters.k = k; + data->parameters.d = d; + data->parameters.splitPoint = splitPoint; + data->parameters.steps = kSteps; + data->parameters.shrinkDict = shrinkDict; + data->parameters.zParams.notificationLevel = g_displayLevel; + /* Check the parameters */ + if (!COVER_checkParameters(data->parameters, dictBufferCapacity)) { + DISPLAYLEVEL(1, "Cover parameters incorrect\n"); + free(data); + continue; + } + /* Call the function and pass ownership of data to it */ + COVER_best_start(&best); + if (pool) { + POOL_add(pool, &COVER_tryParameters, data); + } else { + COVER_tryParameters(data); + } + /* Print status */ + LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%% ", + (unsigned)((iteration * 100) / kIterations)); + ++iteration; + } + COVER_best_wait(&best); + COVER_ctx_destroy(&ctx); + } + LOCALDISPLAYLEVEL(displayLevel, 2, "\r%79s\r", ""); + /* Fill the output buffer and parameters with output of the best parameters */ + { + const size_t dictSize = best.dictSize; + if (ZSTD_isError(best.compressedSize)) { + const size_t compressedSize = best.compressedSize; + COVER_best_destroy(&best); + POOL_free(pool); + return compressedSize; + } + *parameters = best.parameters; + memcpy(dictBuffer, best.dict, dictSize); + COVER_best_destroy(&best); + POOL_free(pool); + return dictSize; + } +} +/**** ended inlining dictBuilder/cover.c ****/ +/**** start inlining dictBuilder/divsufsort.c ****/ +/* + * divsufsort.c for libdivsufsort-lite + * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/*- Compiler specifics -*/ +#ifdef __clang__ +#pragma clang diagnostic ignored "-Wshorten-64-to-32" +#endif + +#if defined(_MSC_VER) +# pragma warning(disable : 4244) +# pragma warning(disable : 4127) /* C4127 : Condition expression is constant */ +#endif + + +/*- Dependencies -*/ +#include +#include +#include + +/**** start inlining divsufsort.h ****/ +/* + * divsufsort.h for libdivsufsort-lite + * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _DIVSUFSORT_H +#define _DIVSUFSORT_H 1 + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + + +/*- Prototypes -*/ + +/** + * Constructs the suffix array of a given string. + * @param T [0..n-1] The input string. + * @param SA [0..n-1] The output array of suffixes. + * @param n The length of the given string. + * @param openMP enables OpenMP optimization. + * @return 0 if no error occurred, -1 or -2 otherwise. + */ +int +divsufsort(const unsigned char *T, int *SA, int n, int openMP); + +/** + * Constructs the burrows-wheeler transformed string of a given string. + * @param T [0..n-1] The input string. + * @param U [0..n-1] The output string. (can be T) + * @param A [0..n-1] The temporary array. (can be NULL) + * @param n The length of the given string. + * @param num_indexes The length of secondary indexes array. (can be NULL) + * @param indexes The secondary indexes array. (can be NULL) + * @param openMP enables OpenMP optimization. + * @return The primary index if no error occurred, -1 or -2 otherwise. + */ +int +divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char * num_indexes, int * indexes, int openMP); + + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ + +#endif /* _DIVSUFSORT_H */ +/**** ended inlining divsufsort.h ****/ + +/*- Constants -*/ +#if defined(INLINE) +# undef INLINE +#endif +#if !defined(INLINE) +# define INLINE __inline +#endif +#if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1) +# undef ALPHABET_SIZE +#endif +#if !defined(ALPHABET_SIZE) +# define ALPHABET_SIZE (256) +#endif +#define BUCKET_A_SIZE (ALPHABET_SIZE) +#define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE) +#if defined(SS_INSERTIONSORT_THRESHOLD) +# if SS_INSERTIONSORT_THRESHOLD < 1 +# undef SS_INSERTIONSORT_THRESHOLD +# define SS_INSERTIONSORT_THRESHOLD (1) +# endif +#else +# define SS_INSERTIONSORT_THRESHOLD (8) +#endif +#if defined(SS_BLOCKSIZE) +# if SS_BLOCKSIZE < 0 +# undef SS_BLOCKSIZE +# define SS_BLOCKSIZE (0) +# elif 32768 <= SS_BLOCKSIZE +# undef SS_BLOCKSIZE +# define SS_BLOCKSIZE (32767) +# endif +#else +# define SS_BLOCKSIZE (1024) +#endif +/* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */ +#if SS_BLOCKSIZE == 0 +# define SS_MISORT_STACKSIZE (96) +#elif SS_BLOCKSIZE <= 4096 +# define SS_MISORT_STACKSIZE (16) +#else +# define SS_MISORT_STACKSIZE (24) +#endif +#define SS_SMERGE_STACKSIZE (32) +#define TR_INSERTIONSORT_THRESHOLD (8) +#define TR_STACKSIZE (64) + + +/*- Macros -*/ +#ifndef SWAP +# define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0) +#endif /* SWAP */ +#ifndef MIN +# define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b)) +#endif /* MIN */ +#ifndef MAX +# define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b)) +#endif /* MAX */ +#define STACK_PUSH(_a, _b, _c, _d)\ + do {\ + assert(ssize < STACK_SIZE);\ + stack[ssize].a = (_a), stack[ssize].b = (_b),\ + stack[ssize].c = (_c), stack[ssize++].d = (_d);\ + } while(0) +#define STACK_PUSH5(_a, _b, _c, _d, _e)\ + do {\ + assert(ssize < STACK_SIZE);\ + stack[ssize].a = (_a), stack[ssize].b = (_b),\ + stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\ + } while(0) +#define STACK_POP(_a, _b, _c, _d)\ + do {\ + assert(0 <= ssize);\ + if(ssize == 0) { return; }\ + (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\ + (_c) = stack[ssize].c, (_d) = stack[ssize].d;\ + } while(0) +#define STACK_POP5(_a, _b, _c, _d, _e)\ + do {\ + assert(0 <= ssize);\ + if(ssize == 0) { return; }\ + (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\ + (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\ + } while(0) +#define BUCKET_A(_c0) bucket_A[(_c0)] +#if ALPHABET_SIZE == 256 +#define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)]) +#define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)]) +#else +#define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)]) +#define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)]) +#endif + + +/*- Private Functions -*/ + +static const int lg_table[256]= { + -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 +}; + +#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) + +static INLINE +int +ss_ilg(int n) { +#if SS_BLOCKSIZE == 0 + return (n & 0xffff0000) ? + ((n & 0xff000000) ? + 24 + lg_table[(n >> 24) & 0xff] : + 16 + lg_table[(n >> 16) & 0xff]) : + ((n & 0x0000ff00) ? + 8 + lg_table[(n >> 8) & 0xff] : + 0 + lg_table[(n >> 0) & 0xff]); +#elif SS_BLOCKSIZE < 256 + return lg_table[n]; +#else + return (n & 0xff00) ? + 8 + lg_table[(n >> 8) & 0xff] : + 0 + lg_table[(n >> 0) & 0xff]; +#endif +} + +#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */ + +#if SS_BLOCKSIZE != 0 + +static const int sqq_table[256] = { + 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61, + 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89, + 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109, +110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, +128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, +143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155, +156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168, +169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180, +181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191, +192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201, +202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211, +212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221, +221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230, +230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238, +239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, +247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255 +}; + +static INLINE +int +ss_isqrt(int x) { + int y, e; + + if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; } + e = (x & 0xffff0000) ? + ((x & 0xff000000) ? + 24 + lg_table[(x >> 24) & 0xff] : + 16 + lg_table[(x >> 16) & 0xff]) : + ((x & 0x0000ff00) ? + 8 + lg_table[(x >> 8) & 0xff] : + 0 + lg_table[(x >> 0) & 0xff]); + + if(e >= 16) { + y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7); + if(e >= 24) { y = (y + 1 + x / y) >> 1; } + y = (y + 1 + x / y) >> 1; + } else if(e >= 8) { + y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1; + } else { + return sqq_table[x] >> 4; + } + + return (x < (y * y)) ? y - 1 : y; +} + +#endif /* SS_BLOCKSIZE != 0 */ + + +/*---------------------------------------------------------------------------*/ + +/* Compares two suffixes. */ +static INLINE +int +ss_compare(const unsigned char *T, + const int *p1, const int *p2, + int depth) { + const unsigned char *U1, *U2, *U1n, *U2n; + + for(U1 = T + depth + *p1, + U2 = T + depth + *p2, + U1n = T + *(p1 + 1) + 2, + U2n = T + *(p2 + 1) + 2; + (U1 < U1n) && (U2 < U2n) && (*U1 == *U2); + ++U1, ++U2) { + } + + return U1 < U1n ? + (U2 < U2n ? *U1 - *U2 : 1) : + (U2 < U2n ? -1 : 0); +} + + +/*---------------------------------------------------------------------------*/ + +#if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) + +/* Insertionsort for small size groups */ +static +void +ss_insertionsort(const unsigned char *T, const int *PA, + int *first, int *last, int depth) { + int *i, *j; + int t; + int r; + + for(i = last - 2; first <= i; --i) { + for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) { + do { *(j - 1) = *j; } while((++j < last) && (*j < 0)); + if(last <= j) { break; } + } + if(r == 0) { *j = ~*j; } + *(j - 1) = t; + } +} + +#endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */ + + +/*---------------------------------------------------------------------------*/ + +#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) + +static INLINE +void +ss_fixdown(const unsigned char *Td, const int *PA, + int *SA, int i, int size) { + int j, k; + int v; + int c, d, e; + + for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) { + d = Td[PA[SA[k = j++]]]; + if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; } + if(d <= c) { break; } + } + SA[i] = v; +} + +/* Simple top-down heapsort. */ +static +void +ss_heapsort(const unsigned char *Td, const int *PA, int *SA, int size) { + int i, m; + int t; + + m = size; + if((size % 2) == 0) { + m--; + if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); } + } + + for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); } + if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); } + for(i = m - 1; 0 < i; --i) { + t = SA[0], SA[0] = SA[i]; + ss_fixdown(Td, PA, SA, 0, i); + SA[i] = t; + } +} + + +/*---------------------------------------------------------------------------*/ + +/* Returns the median of three elements. */ +static INLINE +int * +ss_median3(const unsigned char *Td, const int *PA, + int *v1, int *v2, int *v3) { + int *t; + if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); } + if(Td[PA[*v2]] > Td[PA[*v3]]) { + if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; } + else { return v3; } + } + return v2; +} + +/* Returns the median of five elements. */ +static INLINE +int * +ss_median5(const unsigned char *Td, const int *PA, + int *v1, int *v2, int *v3, int *v4, int *v5) { + int *t; + if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); } + if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); } + if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); } + if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); } + if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); } + if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; } + return v3; +} + +/* Returns the pivot element. */ +static INLINE +int * +ss_pivot(const unsigned char *Td, const int *PA, int *first, int *last) { + int *middle; + int t; + + t = last - first; + middle = first + t / 2; + + if(t <= 512) { + if(t <= 32) { + return ss_median3(Td, PA, first, middle, last - 1); + } else { + t >>= 2; + return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1); + } + } + t >>= 3; + first = ss_median3(Td, PA, first, first + t, first + (t << 1)); + middle = ss_median3(Td, PA, middle - t, middle, middle + t); + last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1); + return ss_median3(Td, PA, first, middle, last); +} + + +/*---------------------------------------------------------------------------*/ + +/* Binary partition for substrings. */ +static INLINE +int * +ss_partition(const int *PA, + int *first, int *last, int depth) { + int *a, *b; + int t; + for(a = first - 1, b = last;;) { + for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; } + for(; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { } + if(b <= a) { break; } + t = ~*b; + *b = *a; + *a = t; + } + if(first < a) { *first = ~*first; } + return a; +} + +/* Multikey introsort for medium size groups. */ +static +void +ss_mintrosort(const unsigned char *T, const int *PA, + int *first, int *last, + int depth) { +#define STACK_SIZE SS_MISORT_STACKSIZE + struct { int *a, *b, c; int d; } stack[STACK_SIZE]; + const unsigned char *Td; + int *a, *b, *c, *d, *e, *f; + int s, t; + int ssize; + int limit; + int v, x = 0; + + for(ssize = 0, limit = ss_ilg(last - first);;) { + + if((last - first) <= SS_INSERTIONSORT_THRESHOLD) { +#if 1 < SS_INSERTIONSORT_THRESHOLD + if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); } +#endif + STACK_POP(first, last, depth, limit); + continue; + } + + Td = T + depth; + if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); } + if(limit < 0) { + for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) { + if((x = Td[PA[*a]]) != v) { + if(1 < (a - first)) { break; } + v = x; + first = a; + } + } + if(Td[PA[*first] - 1] < v) { + first = ss_partition(PA, first, a, depth); + } + if((a - first) <= (last - a)) { + if(1 < (a - first)) { + STACK_PUSH(a, last, depth, -1); + last = a, depth += 1, limit = ss_ilg(a - first); + } else { + first = a, limit = -1; + } + } else { + if(1 < (last - a)) { + STACK_PUSH(first, a, depth + 1, ss_ilg(a - first)); + first = a, limit = -1; + } else { + last = a, depth += 1, limit = ss_ilg(a - first); + } + } + continue; + } + + /* choose pivot */ + a = ss_pivot(Td, PA, first, last); + v = Td[PA[*a]]; + SWAP(*first, *a); + + /* partition */ + for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { } + if(((a = b) < last) && (x < v)) { + for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + } + for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { } + if((b < (d = c)) && (x > v)) { + for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + for(; b < c;) { + SWAP(*b, *c); + for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + + if(a <= d) { + c = b - 1; + + if((s = a - first) > (t = b - a)) { s = t; } + for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + if((s = d - c) > (t = last - d - 1)) { s = t; } + for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + + a = first + (b - a), c = last - (d - c); + b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth); + + if((a - first) <= (last - c)) { + if((last - c) <= (c - b)) { + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + STACK_PUSH(c, last, depth, limit); + last = a; + } else if((a - first) <= (c - b)) { + STACK_PUSH(c, last, depth, limit); + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + last = a; + } else { + STACK_PUSH(c, last, depth, limit); + STACK_PUSH(first, a, depth, limit); + first = b, last = c, depth += 1, limit = ss_ilg(c - b); + } + } else { + if((a - first) <= (c - b)) { + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + STACK_PUSH(first, a, depth, limit); + first = c; + } else if((last - c) <= (c - b)) { + STACK_PUSH(first, a, depth, limit); + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + first = c; + } else { + STACK_PUSH(first, a, depth, limit); + STACK_PUSH(c, last, depth, limit); + first = b, last = c, depth += 1, limit = ss_ilg(c - b); + } + } + } else { + limit += 1; + if(Td[PA[*first] - 1] < v) { + first = ss_partition(PA, first, last, depth); + limit = ss_ilg(last - first); + } + depth += 1; + } + } +#undef STACK_SIZE +} + +#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */ + + +/*---------------------------------------------------------------------------*/ + +#if SS_BLOCKSIZE != 0 + +static INLINE +void +ss_blockswap(int *a, int *b, int n) { + int t; + for(; 0 < n; --n, ++a, ++b) { + t = *a, *a = *b, *b = t; + } +} + +static INLINE +void +ss_rotate(int *first, int *middle, int *last) { + int *a, *b, t; + int l, r; + l = middle - first, r = last - middle; + for(; (0 < l) && (0 < r);) { + if(l == r) { ss_blockswap(first, middle, l); break; } + if(l < r) { + a = last - 1, b = middle - 1; + t = *a; + do { + *a-- = *b, *b-- = *a; + if(b < first) { + *a = t; + last = a; + if((r -= l + 1) <= l) { break; } + a -= 1, b = middle - 1; + t = *a; + } + } while(1); + } else { + a = first, b = middle; + t = *a; + do { + *a++ = *b, *b++ = *a; + if(last <= b) { + *a = t; + first = a + 1; + if((l -= r + 1) <= r) { break; } + a += 1, b = middle; + t = *a; + } + } while(1); + } + } +} + + +/*---------------------------------------------------------------------------*/ + +static +void +ss_inplacemerge(const unsigned char *T, const int *PA, + int *first, int *middle, int *last, + int depth) { + const int *p; + int *a, *b; + int len, half; + int q, r; + int x; + + for(;;) { + if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); } + else { x = 0; p = PA + *(last - 1); } + for(a = first, len = middle - first, half = len >> 1, r = -1; + 0 < len; + len = half, half >>= 1) { + b = a + half; + q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth); + if(q < 0) { + a = b + 1; + half -= (len & 1) ^ 1; + } else { + r = q; + } + } + if(a < middle) { + if(r == 0) { *a = ~*a; } + ss_rotate(a, middle, last); + last -= middle - a; + middle = a; + if(first == middle) { break; } + } + --last; + if(x != 0) { while(*--last < 0) { } } + if(middle == last) { break; } + } +} + + +/*---------------------------------------------------------------------------*/ + +/* Merge-forward with internal buffer. */ +static +void +ss_mergeforward(const unsigned char *T, const int *PA, + int *first, int *middle, int *last, + int *buf, int depth) { + int *a, *b, *c, *bufend; + int t; + int r; + + bufend = buf + (middle - first) - 1; + ss_blockswap(buf, first, middle - first); + + for(t = *(a = first), b = buf, c = middle;;) { + r = ss_compare(T, PA + *b, PA + *c, depth); + if(r < 0) { + do { + *a++ = *b; + if(bufend <= b) { *bufend = t; return; } + *b++ = *a; + } while(*b < 0); + } else if(r > 0) { + do { + *a++ = *c, *c++ = *a; + if(last <= c) { + while(b < bufend) { *a++ = *b, *b++ = *a; } + *a = *b, *b = t; + return; + } + } while(*c < 0); + } else { + *c = ~*c; + do { + *a++ = *b; + if(bufend <= b) { *bufend = t; return; } + *b++ = *a; + } while(*b < 0); + + do { + *a++ = *c, *c++ = *a; + if(last <= c) { + while(b < bufend) { *a++ = *b, *b++ = *a; } + *a = *b, *b = t; + return; + } + } while(*c < 0); + } + } +} + +/* Merge-backward with internal buffer. */ +static +void +ss_mergebackward(const unsigned char *T, const int *PA, + int *first, int *middle, int *last, + int *buf, int depth) { + const int *p1, *p2; + int *a, *b, *c, *bufend; + int t; + int r; + int x; + + bufend = buf + (last - middle) - 1; + ss_blockswap(buf, middle, last - middle); + + x = 0; + if(*bufend < 0) { p1 = PA + ~*bufend; x |= 1; } + else { p1 = PA + *bufend; } + if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; } + else { p2 = PA + *(middle - 1); } + for(t = *(a = last - 1), b = bufend, c = middle - 1;;) { + r = ss_compare(T, p1, p2, depth); + if(0 < r) { + if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; } + *a-- = *b; + if(b <= buf) { *buf = t; break; } + *b-- = *a; + if(*b < 0) { p1 = PA + ~*b; x |= 1; } + else { p1 = PA + *b; } + } else if(r < 0) { + if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; } + *a-- = *c, *c-- = *a; + if(c < first) { + while(buf < b) { *a-- = *b, *b-- = *a; } + *a = *b, *b = t; + break; + } + if(*c < 0) { p2 = PA + ~*c; x |= 2; } + else { p2 = PA + *c; } + } else { + if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; } + *a-- = ~*b; + if(b <= buf) { *buf = t; break; } + *b-- = *a; + if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; } + *a-- = *c, *c-- = *a; + if(c < first) { + while(buf < b) { *a-- = *b, *b-- = *a; } + *a = *b, *b = t; + break; + } + if(*b < 0) { p1 = PA + ~*b; x |= 1; } + else { p1 = PA + *b; } + if(*c < 0) { p2 = PA + ~*c; x |= 2; } + else { p2 = PA + *c; } + } + } +} + +/* D&C based merge. */ +static +void +ss_swapmerge(const unsigned char *T, const int *PA, + int *first, int *middle, int *last, + int *buf, int bufsize, int depth) { +#define STACK_SIZE SS_SMERGE_STACKSIZE +#define GETIDX(a) ((0 <= (a)) ? (a) : (~(a))) +#define MERGE_CHECK(a, b, c)\ + do {\ + if(((c) & 1) ||\ + (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\ + *(a) = ~*(a);\ + }\ + if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\ + *(b) = ~*(b);\ + }\ + } while(0) + struct { int *a, *b, *c; int d; } stack[STACK_SIZE]; + int *l, *r, *lm, *rm; + int m, len, half; + int ssize; + int check, next; + + for(check = 0, ssize = 0;;) { + if((last - middle) <= bufsize) { + if((first < middle) && (middle < last)) { + ss_mergebackward(T, PA, first, middle, last, buf, depth); + } + MERGE_CHECK(first, last, check); + STACK_POP(first, middle, last, check); + continue; + } + + if((middle - first) <= bufsize) { + if(first < middle) { + ss_mergeforward(T, PA, first, middle, last, buf, depth); + } + MERGE_CHECK(first, last, check); + STACK_POP(first, middle, last, check); + continue; + } + + for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1; + 0 < len; + len = half, half >>= 1) { + if(ss_compare(T, PA + GETIDX(*(middle + m + half)), + PA + GETIDX(*(middle - m - half - 1)), depth) < 0) { + m += half + 1; + half -= (len & 1) ^ 1; + } + } + + if(0 < m) { + lm = middle - m, rm = middle + m; + ss_blockswap(lm, middle, m); + l = r = middle, next = 0; + if(rm < last) { + if(*rm < 0) { + *rm = ~*rm; + if(first < lm) { for(; *--l < 0;) { } next |= 4; } + next |= 1; + } else if(first < lm) { + for(; *r < 0; ++r) { } + next |= 2; + } + } + + if((l - first) <= (last - r)) { + STACK_PUSH(r, rm, last, (next & 3) | (check & 4)); + middle = lm, last = l, check = (check & 3) | (next & 4); + } else { + if((next & 2) && (r == middle)) { next ^= 6; } + STACK_PUSH(first, lm, l, (check & 3) | (next & 4)); + first = r, middle = rm, check = (next & 3) | (check & 4); + } + } else { + if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) { + *middle = ~*middle; + } + MERGE_CHECK(first, last, check); + STACK_POP(first, middle, last, check); + } + } +#undef STACK_SIZE +} + +#endif /* SS_BLOCKSIZE != 0 */ + + +/*---------------------------------------------------------------------------*/ + +/* Substring sort */ +static +void +sssort(const unsigned char *T, const int *PA, + int *first, int *last, + int *buf, int bufsize, + int depth, int n, int lastsuffix) { + int *a; +#if SS_BLOCKSIZE != 0 + int *b, *middle, *curbuf; + int j, k, curbufsize, limit; +#endif + int i; + + if(lastsuffix != 0) { ++first; } + +#if SS_BLOCKSIZE == 0 + ss_mintrosort(T, PA, first, last, depth); +#else + if((bufsize < SS_BLOCKSIZE) && + (bufsize < (last - first)) && + (bufsize < (limit = ss_isqrt(last - first)))) { + if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; } + buf = middle = last - limit, bufsize = limit; + } else { + middle = last, limit = 0; + } + for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) { +#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE + ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth); +#elif 1 < SS_BLOCKSIZE + ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth); +#endif + curbufsize = last - (a + SS_BLOCKSIZE); + curbuf = a + SS_BLOCKSIZE; + if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; } + for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) { + ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth); + } + } +#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE + ss_mintrosort(T, PA, a, middle, depth); +#elif 1 < SS_BLOCKSIZE + ss_insertionsort(T, PA, a, middle, depth); +#endif + for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) { + if(i & 1) { + ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth); + a -= k; + } + } + if(limit != 0) { +#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE + ss_mintrosort(T, PA, middle, last, depth); +#elif 1 < SS_BLOCKSIZE + ss_insertionsort(T, PA, middle, last, depth); +#endif + ss_inplacemerge(T, PA, first, middle, last, depth); + } +#endif + + if(lastsuffix != 0) { + /* Insert last type B* suffix. */ + int PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2; + for(a = first, i = *(first - 1); + (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth))); + ++a) { + *(a - 1) = *a; + } + *(a - 1) = i; + } +} + + +/*---------------------------------------------------------------------------*/ + +static INLINE +int +tr_ilg(int n) { + return (n & 0xffff0000) ? + ((n & 0xff000000) ? + 24 + lg_table[(n >> 24) & 0xff] : + 16 + lg_table[(n >> 16) & 0xff]) : + ((n & 0x0000ff00) ? + 8 + lg_table[(n >> 8) & 0xff] : + 0 + lg_table[(n >> 0) & 0xff]); +} + + +/*---------------------------------------------------------------------------*/ + +/* Simple insertionsort for small size groups. */ +static +void +tr_insertionsort(const int *ISAd, int *first, int *last) { + int *a, *b; + int t, r; + + for(a = first + 1; a < last; ++a) { + for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) { + do { *(b + 1) = *b; } while((first <= --b) && (*b < 0)); + if(b < first) { break; } + } + if(r == 0) { *b = ~*b; } + *(b + 1) = t; + } +} + + +/*---------------------------------------------------------------------------*/ + +static INLINE +void +tr_fixdown(const int *ISAd, int *SA, int i, int size) { + int j, k; + int v; + int c, d, e; + + for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) { + d = ISAd[SA[k = j++]]; + if(d < (e = ISAd[SA[j]])) { k = j; d = e; } + if(d <= c) { break; } + } + SA[i] = v; +} + +/* Simple top-down heapsort. */ +static +void +tr_heapsort(const int *ISAd, int *SA, int size) { + int i, m; + int t; + + m = size; + if((size % 2) == 0) { + m--; + if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); } + } + + for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); } + if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); } + for(i = m - 1; 0 < i; --i) { + t = SA[0], SA[0] = SA[i]; + tr_fixdown(ISAd, SA, 0, i); + SA[i] = t; + } +} + + +/*---------------------------------------------------------------------------*/ + +/* Returns the median of three elements. */ +static INLINE +int * +tr_median3(const int *ISAd, int *v1, int *v2, int *v3) { + int *t; + if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); } + if(ISAd[*v2] > ISAd[*v3]) { + if(ISAd[*v1] > ISAd[*v3]) { return v1; } + else { return v3; } + } + return v2; +} + +/* Returns the median of five elements. */ +static INLINE +int * +tr_median5(const int *ISAd, + int *v1, int *v2, int *v3, int *v4, int *v5) { + int *t; + if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); } + if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); } + if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); } + if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); } + if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); } + if(ISAd[*v3] > ISAd[*v4]) { return v4; } + return v3; +} + +/* Returns the pivot element. */ +static INLINE +int * +tr_pivot(const int *ISAd, int *first, int *last) { + int *middle; + int t; + + t = last - first; + middle = first + t / 2; + + if(t <= 512) { + if(t <= 32) { + return tr_median3(ISAd, first, middle, last - 1); + } else { + t >>= 2; + return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1); + } + } + t >>= 3; + first = tr_median3(ISAd, first, first + t, first + (t << 1)); + middle = tr_median3(ISAd, middle - t, middle, middle + t); + last = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1); + return tr_median3(ISAd, first, middle, last); +} + + +/*---------------------------------------------------------------------------*/ + +typedef struct _trbudget_t trbudget_t; +struct _trbudget_t { + int chance; + int remain; + int incval; + int count; +}; + +static INLINE +void +trbudget_init(trbudget_t *budget, int chance, int incval) { + budget->chance = chance; + budget->remain = budget->incval = incval; +} + +static INLINE +int +trbudget_check(trbudget_t *budget, int size) { + if(size <= budget->remain) { budget->remain -= size; return 1; } + if(budget->chance == 0) { budget->count += size; return 0; } + budget->remain += budget->incval - size; + budget->chance -= 1; + return 1; +} + + +/*---------------------------------------------------------------------------*/ + +static INLINE +void +tr_partition(const int *ISAd, + int *first, int *middle, int *last, + int **pa, int **pb, int v) { + int *a, *b, *c, *d, *e, *f; + int t, s; + int x = 0; + + for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { } + if(((a = b) < last) && (x < v)) { + for(; (++b < last) && ((x = ISAd[*b]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + } + for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { } + if((b < (d = c)) && (x > v)) { + for(; (b < --c) && ((x = ISAd[*c]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + for(; b < c;) { + SWAP(*b, *c); + for(; (++b < c) && ((x = ISAd[*b]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + for(; (b < --c) && ((x = ISAd[*c]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + + if(a <= d) { + c = b - 1; + if((s = a - first) > (t = b - a)) { s = t; } + for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + if((s = d - c) > (t = last - d - 1)) { s = t; } + for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + first += (b - a), last -= (d - c); + } + *pa = first, *pb = last; +} + +static +void +tr_copy(int *ISA, const int *SA, + int *first, int *a, int *b, int *last, + int depth) { + /* sort suffixes of middle partition + by using sorted order of suffixes of left and right partition. */ + int *c, *d, *e; + int s, v; + + v = b - SA - 1; + for(c = first, d = a - 1; c <= d; ++c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *++d = s; + ISA[s] = d - SA; + } + } + for(c = last - 1, e = d + 1, d = b; e < d; --c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *--d = s; + ISA[s] = d - SA; + } + } +} + +static +void +tr_partialcopy(int *ISA, const int *SA, + int *first, int *a, int *b, int *last, + int depth) { + int *c, *d, *e; + int s, v; + int rank, lastrank, newrank = -1; + + v = b - SA - 1; + lastrank = -1; + for(c = first, d = a - 1; c <= d; ++c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *++d = s; + rank = ISA[s + depth]; + if(lastrank != rank) { lastrank = rank; newrank = d - SA; } + ISA[s] = newrank; + } + } + + lastrank = -1; + for(e = d; first <= e; --e) { + rank = ISA[*e]; + if(lastrank != rank) { lastrank = rank; newrank = e - SA; } + if(newrank != rank) { ISA[*e] = newrank; } + } + + lastrank = -1; + for(c = last - 1, e = d + 1, d = b; e < d; --c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *--d = s; + rank = ISA[s + depth]; + if(lastrank != rank) { lastrank = rank; newrank = d - SA; } + ISA[s] = newrank; + } + } +} + +static +void +tr_introsort(int *ISA, const int *ISAd, + int *SA, int *first, int *last, + trbudget_t *budget) { +#define STACK_SIZE TR_STACKSIZE + struct { const int *a; int *b, *c; int d, e; }stack[STACK_SIZE]; + int *a, *b, *c; + int t; + int v, x = 0; + int incr = ISAd - ISA; + int limit, next; + int ssize, trlink = -1; + + for(ssize = 0, limit = tr_ilg(last - first);;) { + + if(limit < 0) { + if(limit == -1) { + /* tandem repeat partition */ + tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1); + + /* update ranks */ + if(a < last) { + for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; } + } + if(b < last) { + for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } + } + + /* push */ + if(1 < (b - a)) { + STACK_PUSH5(NULL, a, b, 0, 0); + STACK_PUSH5(ISAd - incr, first, last, -2, trlink); + trlink = ssize - 2; + } + if((a - first) <= (last - b)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink); + last = a, limit = tr_ilg(a - first); + } else if(1 < (last - b)) { + first = b, limit = tr_ilg(last - b); + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } else { + if(1 < (last - b)) { + STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink); + first = b, limit = tr_ilg(last - b); + } else if(1 < (a - first)) { + last = a, limit = tr_ilg(a - first); + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } else if(limit == -2) { + /* tandem repeat copy */ + a = stack[--ssize].b, b = stack[ssize].c; + if(stack[ssize].d == 0) { + tr_copy(ISA, SA, first, a, b, last, ISAd - ISA); + } else { + if(0 <= trlink) { stack[trlink].d = -1; } + tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA); + } + STACK_POP5(ISAd, first, last, limit, trlink); + } else { + /* sorted partition */ + if(0 <= *first) { + a = first; + do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a)); + first = a; + } + if(first < last) { + a = first; do { *a = ~*a; } while(*++a < 0); + next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1; + if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } } + + /* push */ + if(trbudget_check(budget, a - first)) { + if((a - first) <= (last - a)) { + STACK_PUSH5(ISAd, a, last, -3, trlink); + ISAd += incr, last = a, limit = next; + } else { + if(1 < (last - a)) { + STACK_PUSH5(ISAd + incr, first, a, next, trlink); + first = a, limit = -3; + } else { + ISAd += incr, last = a, limit = next; + } + } + } else { + if(0 <= trlink) { stack[trlink].d = -1; } + if(1 < (last - a)) { + first = a, limit = -3; + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + continue; + } + + if((last - first) <= TR_INSERTIONSORT_THRESHOLD) { + tr_insertionsort(ISAd, first, last); + limit = -3; + continue; + } + + if(limit-- == 0) { + tr_heapsort(ISAd, first, last - first); + for(a = last - 1; first < a; a = b) { + for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; } + } + limit = -3; + continue; + } + + /* choose pivot */ + a = tr_pivot(ISAd, first, last); + SWAP(*first, *a); + v = ISAd[*first]; + + /* partition */ + tr_partition(ISAd, first, first + 1, last, &a, &b, v); + if((last - first) != (b - a)) { + next = (ISA[*a] != v) ? tr_ilg(b - a) : -1; + + /* update ranks */ + for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; } + if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } } + + /* push */ + if((1 < (b - a)) && (trbudget_check(budget, b - a))) { + if((a - first) <= (last - b)) { + if((last - b) <= (b - a)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + STACK_PUSH5(ISAd, b, last, limit, trlink); + last = a; + } else if(1 < (last - b)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + first = b; + } else { + ISAd += incr, first = a, last = b, limit = next; + } + } else if((a - first) <= (b - a)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd, b, last, limit, trlink); + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + last = a; + } else { + STACK_PUSH5(ISAd, b, last, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } else { + STACK_PUSH5(ISAd, b, last, limit, trlink); + STACK_PUSH5(ISAd, first, a, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } else { + if((a - first) <= (b - a)) { + if(1 < (last - b)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + STACK_PUSH5(ISAd, first, a, limit, trlink); + first = b; + } else if(1 < (a - first)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + last = a; + } else { + ISAd += incr, first = a, last = b, limit = next; + } + } else if((last - b) <= (b - a)) { + if(1 < (last - b)) { + STACK_PUSH5(ISAd, first, a, limit, trlink); + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + first = b; + } else { + STACK_PUSH5(ISAd, first, a, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } else { + STACK_PUSH5(ISAd, first, a, limit, trlink); + STACK_PUSH5(ISAd, b, last, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } + } else { + if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; } + if((a - first) <= (last - b)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd, b, last, limit, trlink); + last = a; + } else if(1 < (last - b)) { + first = b; + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } else { + if(1 < (last - b)) { + STACK_PUSH5(ISAd, first, a, limit, trlink); + first = b; + } else if(1 < (a - first)) { + last = a; + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } + } else { + if(trbudget_check(budget, last - first)) { + limit = tr_ilg(last - first), ISAd += incr; + } else { + if(0 <= trlink) { stack[trlink].d = -1; } + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } +#undef STACK_SIZE +} + + + +/*---------------------------------------------------------------------------*/ + +/* Tandem repeat sort */ +static +void +trsort(int *ISA, int *SA, int n, int depth) { + int *ISAd; + int *first, *last; + trbudget_t budget; + int t, skip, unsorted; + + trbudget_init(&budget, tr_ilg(n) * 2 / 3, n); +/* trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */ + for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) { + first = SA; + skip = 0; + unsorted = 0; + do { + if((t = *first) < 0) { first -= t; skip += t; } + else { + if(skip != 0) { *(first + skip) = skip; skip = 0; } + last = SA + ISA[t] + 1; + if(1 < (last - first)) { + budget.count = 0; + tr_introsort(ISA, ISAd, SA, first, last, &budget); + if(budget.count != 0) { unsorted += budget.count; } + else { skip = first - last; } + } else if((last - first) == 1) { + skip = -1; + } + first = last; + } + } while(first < (SA + n)); + if(skip != 0) { *(first + skip) = skip; } + if(unsorted == 0) { break; } + } +} + + +/*---------------------------------------------------------------------------*/ + +/* Sorts suffixes of type B*. */ +static +int +sort_typeBstar(const unsigned char *T, int *SA, + int *bucket_A, int *bucket_B, + int n, int openMP) { + int *PAb, *ISAb, *buf; +#ifdef LIBBSC_OPENMP + int *curbuf; + int l; +#endif + int i, j, k, t, m, bufsize; + int c0, c1; +#ifdef LIBBSC_OPENMP + int d0, d1; +#endif + (void)openMP; + + /* Initialize bucket arrays. */ + for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; } + for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; } + + /* Count the number of occurrences of the first one or two characters of each + type A, B and B* suffix. Moreover, store the beginning position of all + type B* suffixes into the array SA. */ + for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) { + /* type A suffix. */ + do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1)); + if(0 <= i) { + /* type B* suffix. */ + ++BUCKET_BSTAR(c0, c1); + SA[--m] = i; + /* type B suffix. */ + for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { + ++BUCKET_B(c0, c1); + } + } + } + m = n - m; +/* +note: + A type B* suffix is lexicographically smaller than a type B suffix that + begins with the same first two characters. +*/ + + /* Calculate the index of start/end point of each bucket. */ + for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) { + t = i + BUCKET_A(c0); + BUCKET_A(c0) = i + j; /* start point */ + i = t + BUCKET_B(c0, c0); + for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) { + j += BUCKET_BSTAR(c0, c1); + BUCKET_BSTAR(c0, c1) = j; /* end point */ + i += BUCKET_B(c0, c1); + } + } + + if(0 < m) { + /* Sort the type B* suffixes by their first two characters. */ + PAb = SA + n - m; ISAb = SA + m; + for(i = m - 2; 0 <= i; --i) { + t = PAb[i], c0 = T[t], c1 = T[t + 1]; + SA[--BUCKET_BSTAR(c0, c1)] = i; + } + t = PAb[m - 1], c0 = T[t], c1 = T[t + 1]; + SA[--BUCKET_BSTAR(c0, c1)] = m - 1; + + /* Sort the type B* substrings using sssort. */ +#ifdef LIBBSC_OPENMP + if (openMP) + { + buf = SA + m; + c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m; +#pragma omp parallel default(shared) private(bufsize, curbuf, k, l, d0, d1) + { + bufsize = (n - (2 * m)) / omp_get_num_threads(); + curbuf = buf + omp_get_thread_num() * bufsize; + k = 0; + for(;;) { + #pragma omp critical(sssort_lock) + { + if(0 < (l = j)) { + d0 = c0, d1 = c1; + do { + k = BUCKET_BSTAR(d0, d1); + if(--d1 <= d0) { + d1 = ALPHABET_SIZE - 1; + if(--d0 < 0) { break; } + } + } while(((l - k) <= 1) && (0 < (l = k))); + c0 = d0, c1 = d1, j = k; + } + } + if(l == 0) { break; } + sssort(T, PAb, SA + k, SA + l, + curbuf, bufsize, 2, n, *(SA + k) == (m - 1)); + } + } + } + else + { + buf = SA + m, bufsize = n - (2 * m); + for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) { + for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) { + i = BUCKET_BSTAR(c0, c1); + if(1 < (j - i)) { + sssort(T, PAb, SA + i, SA + j, + buf, bufsize, 2, n, *(SA + i) == (m - 1)); + } + } + } + } +#else + buf = SA + m, bufsize = n - (2 * m); + for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) { + for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) { + i = BUCKET_BSTAR(c0, c1); + if(1 < (j - i)) { + sssort(T, PAb, SA + i, SA + j, + buf, bufsize, 2, n, *(SA + i) == (m - 1)); + } + } + } +#endif + + /* Compute ranks of type B* substrings. */ + for(i = m - 1; 0 <= i; --i) { + if(0 <= SA[i]) { + j = i; + do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i])); + SA[i + 1] = i - j; + if(i <= 0) { break; } + } + j = i; + do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0); + ISAb[SA[i]] = j; + } + + /* Construct the inverse suffix array of type B* suffixes using trsort. */ + trsort(ISAb, SA, m, 1); + + /* Set the sorted order of type B* suffixes. */ + for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) { + for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { } + if(0 <= i) { + t = i; + for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { } + SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t; + } + } + + /* Calculate the index of start/end point of each bucket. */ + BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */ + for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) { + i = BUCKET_A(c0 + 1) - 1; + for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) { + t = i - BUCKET_B(c0, c1); + BUCKET_B(c0, c1) = i; /* end point */ + + /* Move all type B* suffixes to the correct position. */ + for(i = t, j = BUCKET_BSTAR(c0, c1); + j <= k; + --i, --k) { SA[i] = SA[k]; } + } + BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */ + BUCKET_B(c0, c0) = i; /* end point */ + } + } + + return m; +} + +/* Constructs the suffix array by using the sorted order of type B* suffixes. */ +static +void +construct_SA(const unsigned char *T, int *SA, + int *bucket_A, int *bucket_B, + int n, int m) { + int *i, *j, *k; + int s; + int c0, c1, c2; + + if(0 < m) { + /* Construct the sorted order of type B suffixes by using + the sorted order of type B* suffixes. */ + for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { + /* Scan the suffix array from right to left. */ + for(i = SA + BUCKET_BSTAR(c1, c1 + 1), + j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; + i <= j; + --j) { + if(0 < (s = *j)) { + assert(T[s] == c1); + assert(((s + 1) < n) && (T[s] <= T[s + 1])); + assert(T[s - 1] <= T[s]); + *j = ~s; + c0 = T[--s]; + if((0 < s) && (T[s - 1] > c0)) { s = ~s; } + if(c0 != c2) { + if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } + k = SA + BUCKET_B(c2 = c0, c1); + } + assert(k < j); assert(k != NULL); + *k-- = s; + } else { + assert(((s == 0) && (T[s] == c1)) || (s < 0)); + *j = ~s; + } + } + } + } + + /* Construct the suffix array by using + the sorted order of type B suffixes. */ + k = SA + BUCKET_A(c2 = T[n - 1]); + *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1); + /* Scan the suffix array from left to right. */ + for(i = SA, j = SA + n; i < j; ++i) { + if(0 < (s = *i)) { + assert(T[s - 1] >= T[s]); + c0 = T[--s]; + if((s == 0) || (T[s - 1] < c0)) { s = ~s; } + if(c0 != c2) { + BUCKET_A(c2) = k - SA; + k = SA + BUCKET_A(c2 = c0); + } + assert(i < k); + *k++ = s; + } else { + assert(s < 0); + *i = ~s; + } + } +} + +/* Constructs the burrows-wheeler transformed string directly + by using the sorted order of type B* suffixes. */ +static +int +construct_BWT(const unsigned char *T, int *SA, + int *bucket_A, int *bucket_B, + int n, int m) { + int *i, *j, *k, *orig; + int s; + int c0, c1, c2; + + if(0 < m) { + /* Construct the sorted order of type B suffixes by using + the sorted order of type B* suffixes. */ + for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { + /* Scan the suffix array from right to left. */ + for(i = SA + BUCKET_BSTAR(c1, c1 + 1), + j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; + i <= j; + --j) { + if(0 < (s = *j)) { + assert(T[s] == c1); + assert(((s + 1) < n) && (T[s] <= T[s + 1])); + assert(T[s - 1] <= T[s]); + c0 = T[--s]; + *j = ~((int)c0); + if((0 < s) && (T[s - 1] > c0)) { s = ~s; } + if(c0 != c2) { + if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } + k = SA + BUCKET_B(c2 = c0, c1); + } + assert(k < j); assert(k != NULL); + *k-- = s; + } else if(s != 0) { + *j = ~s; +#ifndef NDEBUG + } else { + assert(T[s] == c1); +#endif + } + } + } + } + + /* Construct the BWTed string by using + the sorted order of type B suffixes. */ + k = SA + BUCKET_A(c2 = T[n - 1]); + *k++ = (T[n - 2] < c2) ? ~((int)T[n - 2]) : (n - 1); + /* Scan the suffix array from left to right. */ + for(i = SA, j = SA + n, orig = SA; i < j; ++i) { + if(0 < (s = *i)) { + assert(T[s - 1] >= T[s]); + c0 = T[--s]; + *i = c0; + if((0 < s) && (T[s - 1] < c0)) { s = ~((int)T[s - 1]); } + if(c0 != c2) { + BUCKET_A(c2) = k - SA; + k = SA + BUCKET_A(c2 = c0); + } + assert(i < k); + *k++ = s; + } else if(s != 0) { + *i = ~s; + } else { + orig = i; + } + } + + return orig - SA; +} + +/* Constructs the burrows-wheeler transformed string directly + by using the sorted order of type B* suffixes. */ +static +int +construct_BWT_indexes(const unsigned char *T, int *SA, + int *bucket_A, int *bucket_B, + int n, int m, + unsigned char * num_indexes, int * indexes) { + int *i, *j, *k, *orig; + int s; + int c0, c1, c2; + + int mod = n / 8; + { + mod |= mod >> 1; mod |= mod >> 2; + mod |= mod >> 4; mod |= mod >> 8; + mod |= mod >> 16; mod >>= 1; + + *num_indexes = (unsigned char)((n - 1) / (mod + 1)); + } + + if(0 < m) { + /* Construct the sorted order of type B suffixes by using + the sorted order of type B* suffixes. */ + for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { + /* Scan the suffix array from right to left. */ + for(i = SA + BUCKET_BSTAR(c1, c1 + 1), + j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; + i <= j; + --j) { + if(0 < (s = *j)) { + assert(T[s] == c1); + assert(((s + 1) < n) && (T[s] <= T[s + 1])); + assert(T[s - 1] <= T[s]); + + if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = j - SA; + + c0 = T[--s]; + *j = ~((int)c0); + if((0 < s) && (T[s - 1] > c0)) { s = ~s; } + if(c0 != c2) { + if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } + k = SA + BUCKET_B(c2 = c0, c1); + } + assert(k < j); assert(k != NULL); + *k-- = s; + } else if(s != 0) { + *j = ~s; +#ifndef NDEBUG + } else { + assert(T[s] == c1); +#endif + } + } + } + } + + /* Construct the BWTed string by using + the sorted order of type B suffixes. */ + k = SA + BUCKET_A(c2 = T[n - 1]); + if (T[n - 2] < c2) { + if (((n - 1) & mod) == 0) indexes[(n - 1) / (mod + 1) - 1] = k - SA; + *k++ = ~((int)T[n - 2]); + } + else { + *k++ = n - 1; + } + + /* Scan the suffix array from left to right. */ + for(i = SA, j = SA + n, orig = SA; i < j; ++i) { + if(0 < (s = *i)) { + assert(T[s - 1] >= T[s]); + + if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = i - SA; + + c0 = T[--s]; + *i = c0; + if(c0 != c2) { + BUCKET_A(c2) = k - SA; + k = SA + BUCKET_A(c2 = c0); + } + assert(i < k); + if((0 < s) && (T[s - 1] < c0)) { + if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = k - SA; + *k++ = ~((int)T[s - 1]); + } else + *k++ = s; + } else if(s != 0) { + *i = ~s; + } else { + orig = i; + } + } + + return orig - SA; +} + + +/*---------------------------------------------------------------------------*/ + +/*- Function -*/ + +int +divsufsort(const unsigned char *T, int *SA, int n, int openMP) { + int *bucket_A, *bucket_B; + int m; + int err = 0; + + /* Check arguments. */ + if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; } + else if(n == 0) { return 0; } + else if(n == 1) { SA[0] = 0; return 0; } + else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; } + + bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int)); + bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int)); + + /* Suffixsort. */ + if((bucket_A != NULL) && (bucket_B != NULL)) { + m = sort_typeBstar(T, SA, bucket_A, bucket_B, n, openMP); + construct_SA(T, SA, bucket_A, bucket_B, n, m); + } else { + err = -2; + } + + free(bucket_B); + free(bucket_A); + + return err; +} + +int +divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char * num_indexes, int * indexes, int openMP) { + int *B; + int *bucket_A, *bucket_B; + int m, pidx, i; + + /* Check arguments. */ + if((T == NULL) || (U == NULL) || (n < 0)) { return -1; } + else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; } + + if((B = A) == NULL) { B = (int *)malloc((size_t)(n + 1) * sizeof(int)); } + bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int)); + bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int)); + + /* Burrows-Wheeler Transform. */ + if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) { + m = sort_typeBstar(T, B, bucket_A, bucket_B, n, openMP); + + if (num_indexes == NULL || indexes == NULL) { + pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m); + } else { + pidx = construct_BWT_indexes(T, B, bucket_A, bucket_B, n, m, num_indexes, indexes); + } + + /* Copy to output string. */ + U[0] = T[n - 1]; + for(i = 0; i < pidx; ++i) { U[i + 1] = (unsigned char)B[i]; } + for(i += 1; i < n; ++i) { U[i] = (unsigned char)B[i]; } + pidx += 1; + } else { + pidx = -2; + } + + free(bucket_B); + free(bucket_A); + if(A == NULL) { free(B); } + + return pidx; +} +/**** ended inlining dictBuilder/divsufsort.c ****/ +/**** start inlining dictBuilder/fastcover.c ****/ +/* + * Copyright (c) 2018-2021, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/*-************************************* +* Dependencies +***************************************/ +#include /* fprintf */ +#include /* malloc, free, qsort */ +#include /* memset */ +#include /* clock */ + +/**** skipping file: ../common/mem.h ****/ +/**** skipping file: ../common/pool.h ****/ +/**** skipping file: ../common/threading.h ****/ +/**** skipping file: cover.h ****/ +/**** skipping file: ../common/zstd_internal.h ****/ +/**** skipping file: ../compress/zstd_compress_internal.h ****/ +#ifndef ZDICT_STATIC_LINKING_ONLY +#define ZDICT_STATIC_LINKING_ONLY +#endif +/**** skipping file: zdict.h ****/ + + +/*-************************************* +* Constants +***************************************/ +#define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB)) +#define FASTCOVER_MAX_F 31 +#define FASTCOVER_MAX_ACCEL 10 +#define FASTCOVER_DEFAULT_SPLITPOINT 0.75 +#define DEFAULT_F 20 +#define DEFAULT_ACCEL 1 + + +/*-************************************* +* Console display +***************************************/ +#ifndef LOCALDISPLAYLEVEL +static int g_displayLevel = 2; +#endif +#undef DISPLAY +#define DISPLAY(...) \ + { \ + fprintf(stderr, __VA_ARGS__); \ + fflush(stderr); \ + } +#undef LOCALDISPLAYLEVEL +#define LOCALDISPLAYLEVEL(displayLevel, l, ...) \ + if (displayLevel >= l) { \ + DISPLAY(__VA_ARGS__); \ + } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */ +#undef DISPLAYLEVEL +#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__) + +#ifndef LOCALDISPLAYUPDATE +static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100; +static clock_t g_time = 0; +#endif +#undef LOCALDISPLAYUPDATE +#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \ + if (displayLevel >= l) { \ + if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \ + g_time = clock(); \ + DISPLAY(__VA_ARGS__); \ + } \ + } +#undef DISPLAYUPDATE +#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__) + + +/*-************************************* +* Hash Functions +***************************************/ +/** + * Hash the d-byte value pointed to by p and mod 2^f into the frequency vector + */ +static size_t FASTCOVER_hashPtrToIndex(const void* p, U32 f, unsigned d) { + if (d == 6) { + return ZSTD_hash6Ptr(p, f); + } + return ZSTD_hash8Ptr(p, f); +} + + +/*-************************************* +* Acceleration +***************************************/ +typedef struct { + unsigned finalize; /* Percentage of training samples used for ZDICT_finalizeDictionary */ + unsigned skip; /* Number of dmer skipped between each dmer counted in computeFrequency */ +} FASTCOVER_accel_t; + + +static const FASTCOVER_accel_t FASTCOVER_defaultAccelParameters[FASTCOVER_MAX_ACCEL+1] = { + { 100, 0 }, /* accel = 0, should not happen because accel = 0 defaults to accel = 1 */ + { 100, 0 }, /* accel = 1 */ + { 50, 1 }, /* accel = 2 */ + { 34, 2 }, /* accel = 3 */ + { 25, 3 }, /* accel = 4 */ + { 20, 4 }, /* accel = 5 */ + { 17, 5 }, /* accel = 6 */ + { 14, 6 }, /* accel = 7 */ + { 13, 7 }, /* accel = 8 */ + { 11, 8 }, /* accel = 9 */ + { 10, 9 }, /* accel = 10 */ +}; + + +/*-************************************* +* Context +***************************************/ +typedef struct { + const BYTE *samples; + size_t *offsets; + const size_t *samplesSizes; + size_t nbSamples; + size_t nbTrainSamples; + size_t nbTestSamples; + size_t nbDmers; + U32 *freqs; + unsigned d; + unsigned f; + FASTCOVER_accel_t accelParams; +} FASTCOVER_ctx_t; + + +/*-************************************* +* Helper functions +***************************************/ +/** + * Selects the best segment in an epoch. + * Segments of are scored according to the function: + * + * Let F(d) be the frequency of all dmers with hash value d. + * Let S_i be hash value of the dmer at position i of segment S which has length k. + * + * Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1}) + * + * Once the dmer with hash value d is in the dictionary we set F(d) = 0. + */ +static COVER_segment_t FASTCOVER_selectSegment(const FASTCOVER_ctx_t *ctx, + U32 *freqs, U32 begin, U32 end, + ZDICT_cover_params_t parameters, + U16* segmentFreqs) { + /* Constants */ + const U32 k = parameters.k; + const U32 d = parameters.d; + const U32 f = ctx->f; + const U32 dmersInK = k - d + 1; + + /* Try each segment (activeSegment) and save the best (bestSegment) */ + COVER_segment_t bestSegment = {0, 0, 0}; + COVER_segment_t activeSegment; + + /* Reset the activeDmers in the segment */ + /* The activeSegment starts at the beginning of the epoch. */ + activeSegment.begin = begin; + activeSegment.end = begin; + activeSegment.score = 0; + + /* Slide the activeSegment through the whole epoch. + * Save the best segment in bestSegment. + */ + while (activeSegment.end < end) { + /* Get hash value of current dmer */ + const size_t idx = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.end, f, d); + + /* Add frequency of this index to score if this is the first occurrence of index in active segment */ + if (segmentFreqs[idx] == 0) { + activeSegment.score += freqs[idx]; + } + /* Increment end of segment and segmentFreqs*/ + activeSegment.end += 1; + segmentFreqs[idx] += 1; + /* If the window is now too large, drop the first position */ + if (activeSegment.end - activeSegment.begin == dmersInK + 1) { + /* Get hash value of the dmer to be eliminated from active segment */ + const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, f, d); + segmentFreqs[delIndex] -= 1; + /* Subtract frequency of this index from score if this is the last occurrence of this index in active segment */ + if (segmentFreqs[delIndex] == 0) { + activeSegment.score -= freqs[delIndex]; + } + /* Increment start of segment */ + activeSegment.begin += 1; + } + + /* If this segment is the best so far save it */ + if (activeSegment.score > bestSegment.score) { + bestSegment = activeSegment; + } + } + + /* Zero out rest of segmentFreqs array */ + while (activeSegment.begin < end) { + const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, f, d); + segmentFreqs[delIndex] -= 1; + activeSegment.begin += 1; + } + + { + /* Zero the frequency of hash value of each dmer covered by the chosen segment. */ + U32 pos; + for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) { + const size_t i = FASTCOVER_hashPtrToIndex(ctx->samples + pos, f, d); + freqs[i] = 0; + } + } + + return bestSegment; +} + + +static int FASTCOVER_checkParameters(ZDICT_cover_params_t parameters, + size_t maxDictSize, unsigned f, + unsigned accel) { + /* k, d, and f are required parameters */ + if (parameters.d == 0 || parameters.k == 0) { + return 0; + } + /* d has to be 6 or 8 */ + if (parameters.d != 6 && parameters.d != 8) { + return 0; + } + /* k <= maxDictSize */ + if (parameters.k > maxDictSize) { + return 0; + } + /* d <= k */ + if (parameters.d > parameters.k) { + return 0; + } + /* 0 < f <= FASTCOVER_MAX_F*/ + if (f > FASTCOVER_MAX_F || f == 0) { + return 0; + } + /* 0 < splitPoint <= 1 */ + if (parameters.splitPoint <= 0 || parameters.splitPoint > 1) { + return 0; + } + /* 0 < accel <= 10 */ + if (accel > 10 || accel == 0) { + return 0; + } + return 1; +} + + +/** + * Clean up a context initialized with `FASTCOVER_ctx_init()`. + */ +static void +FASTCOVER_ctx_destroy(FASTCOVER_ctx_t* ctx) +{ + if (!ctx) return; + + free(ctx->freqs); + ctx->freqs = NULL; + + free(ctx->offsets); + ctx->offsets = NULL; +} + + +/** + * Calculate for frequency of hash value of each dmer in ctx->samples + */ +static void +FASTCOVER_computeFrequency(U32* freqs, const FASTCOVER_ctx_t* ctx) +{ + const unsigned f = ctx->f; + const unsigned d = ctx->d; + const unsigned skip = ctx->accelParams.skip; + const unsigned readLength = MAX(d, 8); + size_t i; + assert(ctx->nbTrainSamples >= 5); + assert(ctx->nbTrainSamples <= ctx->nbSamples); + for (i = 0; i < ctx->nbTrainSamples; i++) { + size_t start = ctx->offsets[i]; /* start of current dmer */ + size_t const currSampleEnd = ctx->offsets[i+1]; + while (start + readLength <= currSampleEnd) { + const size_t dmerIndex = FASTCOVER_hashPtrToIndex(ctx->samples + start, f, d); + freqs[dmerIndex]++; + start = start + skip + 1; + } + } +} + + +/** + * Prepare a context for dictionary building. + * The context is only dependent on the parameter `d` and can used multiple + * times. + * Returns 0 on success or error code on error. + * The context must be destroyed with `FASTCOVER_ctx_destroy()`. + */ +static size_t +FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx, + const void* samplesBuffer, + const size_t* samplesSizes, unsigned nbSamples, + unsigned d, double splitPoint, unsigned f, + FASTCOVER_accel_t accelParams) +{ + const BYTE* const samples = (const BYTE*)samplesBuffer; + const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples); + /* Split samples into testing and training sets */ + const unsigned nbTrainSamples = splitPoint < 1.0 ? (unsigned)((double)nbSamples * splitPoint) : nbSamples; + const unsigned nbTestSamples = splitPoint < 1.0 ? nbSamples - nbTrainSamples : nbSamples; + const size_t trainingSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes, nbTrainSamples) : totalSamplesSize; + const size_t testSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes + nbTrainSamples, nbTestSamples) : totalSamplesSize; + + /* Checks */ + if (totalSamplesSize < MAX(d, sizeof(U64)) || + totalSamplesSize >= (size_t)FASTCOVER_MAX_SAMPLES_SIZE) { + DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n", + (unsigned)(totalSamplesSize >> 20), (FASTCOVER_MAX_SAMPLES_SIZE >> 20)); + return ERROR(srcSize_wrong); + } + + /* Check if there are at least 5 training samples */ + if (nbTrainSamples < 5) { + DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid\n", nbTrainSamples); + return ERROR(srcSize_wrong); + } + + /* Check if there's testing sample */ + if (nbTestSamples < 1) { + DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.\n", nbTestSamples); + return ERROR(srcSize_wrong); + } + + /* Zero the context */ + memset(ctx, 0, sizeof(*ctx)); + DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples, + (unsigned)trainingSamplesSize); + DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples, + (unsigned)testSamplesSize); + + ctx->samples = samples; + ctx->samplesSizes = samplesSizes; + ctx->nbSamples = nbSamples; + ctx->nbTrainSamples = nbTrainSamples; + ctx->nbTestSamples = nbTestSamples; + ctx->nbDmers = trainingSamplesSize - MAX(d, sizeof(U64)) + 1; + ctx->d = d; + ctx->f = f; + ctx->accelParams = accelParams; + + /* The offsets of each file */ + ctx->offsets = (size_t*)calloc((nbSamples + 1), sizeof(size_t)); + if (ctx->offsets == NULL) { + DISPLAYLEVEL(1, "Failed to allocate scratch buffers \n"); + FASTCOVER_ctx_destroy(ctx); + return ERROR(memory_allocation); + } + + /* Fill offsets from the samplesSizes */ + { U32 i; + ctx->offsets[0] = 0; + assert(nbSamples >= 5); + for (i = 1; i <= nbSamples; ++i) { + ctx->offsets[i] = ctx->offsets[i - 1] + samplesSizes[i - 1]; + } + } + + /* Initialize frequency array of size 2^f */ + ctx->freqs = (U32*)calloc(((U64)1 << f), sizeof(U32)); + if (ctx->freqs == NULL) { + DISPLAYLEVEL(1, "Failed to allocate frequency table \n"); + FASTCOVER_ctx_destroy(ctx); + return ERROR(memory_allocation); + } + + DISPLAYLEVEL(2, "Computing frequencies\n"); + FASTCOVER_computeFrequency(ctx->freqs, ctx); + + return 0; +} + + +/** + * Given the prepared context build the dictionary. + */ +static size_t +FASTCOVER_buildDictionary(const FASTCOVER_ctx_t* ctx, + U32* freqs, + void* dictBuffer, size_t dictBufferCapacity, + ZDICT_cover_params_t parameters, + U16* segmentFreqs) +{ + BYTE *const dict = (BYTE *)dictBuffer; + size_t tail = dictBufferCapacity; + /* Divide the data into epochs. We will select one segment from each epoch. */ + const COVER_epoch_info_t epochs = COVER_computeEpochs( + (U32)dictBufferCapacity, (U32)ctx->nbDmers, parameters.k, 1); + const size_t maxZeroScoreRun = 10; + size_t zeroScoreRun = 0; + size_t epoch; + DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", + (U32)epochs.num, (U32)epochs.size); + /* Loop through the epochs until there are no more segments or the dictionary + * is full. + */ + for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs.num) { + const U32 epochBegin = (U32)(epoch * epochs.size); + const U32 epochEnd = epochBegin + epochs.size; + size_t segmentSize; + /* Select a segment */ + COVER_segment_t segment = FASTCOVER_selectSegment( + ctx, freqs, epochBegin, epochEnd, parameters, segmentFreqs); + + /* If the segment covers no dmers, then we are out of content. + * There may be new content in other epochs, for continue for some time. + */ + if (segment.score == 0) { + if (++zeroScoreRun >= maxZeroScoreRun) { + break; + } + continue; + } + zeroScoreRun = 0; + + /* Trim the segment if necessary and if it is too small then we are done */ + segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail); + if (segmentSize < parameters.d) { + break; + } + + /* We fill the dictionary from the back to allow the best segments to be + * referenced with the smallest offsets. + */ + tail -= segmentSize; + memcpy(dict + tail, ctx->samples + segment.begin, segmentSize); + DISPLAYUPDATE( + 2, "\r%u%% ", + (unsigned)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity)); + } + DISPLAYLEVEL(2, "\r%79s\r", ""); + return tail; +} + +/** + * Parameters for FASTCOVER_tryParameters(). + */ +typedef struct FASTCOVER_tryParameters_data_s { + const FASTCOVER_ctx_t* ctx; + COVER_best_t* best; + size_t dictBufferCapacity; + ZDICT_cover_params_t parameters; +} FASTCOVER_tryParameters_data_t; + + +/** + * Tries a set of parameters and updates the COVER_best_t with the results. + * This function is thread safe if zstd is compiled with multithreaded support. + * It takes its parameters as an *OWNING* opaque pointer to support threading. + */ +static void FASTCOVER_tryParameters(void* opaque) +{ + /* Save parameters as local variables */ + FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t*)opaque; + const FASTCOVER_ctx_t *const ctx = data->ctx; + const ZDICT_cover_params_t parameters = data->parameters; + size_t dictBufferCapacity = data->dictBufferCapacity; + size_t totalCompressedSize = ERROR(GENERIC); + /* Initialize array to keep track of frequency of dmer within activeSegment */ + U16* segmentFreqs = (U16*)calloc(((U64)1 << ctx->f), sizeof(U16)); + /* Allocate space for hash table, dict, and freqs */ + BYTE *const dict = (BYTE*)malloc(dictBufferCapacity); + COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC)); + U32* freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32)); + if (!segmentFreqs || !dict || !freqs) { + DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n"); + goto _cleanup; + } + /* Copy the frequencies because we need to modify them */ + memcpy(freqs, ctx->freqs, ((U64)1 << ctx->f) * sizeof(U32)); + /* Build the dictionary */ + { const size_t tail = FASTCOVER_buildDictionary(ctx, freqs, dict, dictBufferCapacity, + parameters, segmentFreqs); + + const unsigned nbFinalizeSamples = (unsigned)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100); + selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail, + ctx->samples, ctx->samplesSizes, nbFinalizeSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets, + totalCompressedSize); + + if (COVER_dictSelectionIsError(selection)) { + DISPLAYLEVEL(1, "Failed to select dictionary\n"); + goto _cleanup; + } + } +_cleanup: + free(dict); + COVER_best_finish(data->best, parameters, selection); + free(data); + free(segmentFreqs); + COVER_dictSelectionFree(selection); + free(freqs); +} + + +static void +FASTCOVER_convertToCoverParams(ZDICT_fastCover_params_t fastCoverParams, + ZDICT_cover_params_t* coverParams) +{ + coverParams->k = fastCoverParams.k; + coverParams->d = fastCoverParams.d; + coverParams->steps = fastCoverParams.steps; + coverParams->nbThreads = fastCoverParams.nbThreads; + coverParams->splitPoint = fastCoverParams.splitPoint; + coverParams->zParams = fastCoverParams.zParams; + coverParams->shrinkDict = fastCoverParams.shrinkDict; +} + + +static void +FASTCOVER_convertToFastCoverParams(ZDICT_cover_params_t coverParams, + ZDICT_fastCover_params_t* fastCoverParams, + unsigned f, unsigned accel) +{ + fastCoverParams->k = coverParams.k; + fastCoverParams->d = coverParams.d; + fastCoverParams->steps = coverParams.steps; + fastCoverParams->nbThreads = coverParams.nbThreads; + fastCoverParams->splitPoint = coverParams.splitPoint; + fastCoverParams->f = f; + fastCoverParams->accel = accel; + fastCoverParams->zParams = coverParams.zParams; + fastCoverParams->shrinkDict = coverParams.shrinkDict; +} + + +ZDICTLIB_API size_t +ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, + const size_t* samplesSizes, unsigned nbSamples, + ZDICT_fastCover_params_t parameters) +{ + BYTE* const dict = (BYTE*)dictBuffer; + FASTCOVER_ctx_t ctx; + ZDICT_cover_params_t coverParams; + FASTCOVER_accel_t accelParams; + /* Initialize global data */ + g_displayLevel = parameters.zParams.notificationLevel; + /* Assign splitPoint and f if not provided */ + parameters.splitPoint = 1.0; + parameters.f = parameters.f == 0 ? DEFAULT_F : parameters.f; + parameters.accel = parameters.accel == 0 ? DEFAULT_ACCEL : parameters.accel; + /* Convert to cover parameter */ + memset(&coverParams, 0 , sizeof(coverParams)); + FASTCOVER_convertToCoverParams(parameters, &coverParams); + /* Checks */ + if (!FASTCOVER_checkParameters(coverParams, dictBufferCapacity, parameters.f, + parameters.accel)) { + DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n"); + return ERROR(parameter_outOfBound); + } + if (nbSamples == 0) { + DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n"); + return ERROR(srcSize_wrong); + } + if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { + DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", + ZDICT_DICTSIZE_MIN); + return ERROR(dstSize_tooSmall); + } + /* Assign corresponding FASTCOVER_accel_t to accelParams*/ + accelParams = FASTCOVER_defaultAccelParameters[parameters.accel]; + /* Initialize context */ + { + size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, + coverParams.d, parameters.splitPoint, parameters.f, + accelParams); + if (ZSTD_isError(initVal)) { + DISPLAYLEVEL(1, "Failed to initialize context\n"); + return initVal; + } + } + COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, g_displayLevel); + /* Build the dictionary */ + DISPLAYLEVEL(2, "Building dictionary\n"); + { + /* Initialize array to keep track of frequency of dmer within activeSegment */ + U16* segmentFreqs = (U16 *)calloc(((U64)1 << parameters.f), sizeof(U16)); + const size_t tail = FASTCOVER_buildDictionary(&ctx, ctx.freqs, dictBuffer, + dictBufferCapacity, coverParams, segmentFreqs); + const unsigned nbFinalizeSamples = (unsigned)(ctx.nbTrainSamples * ctx.accelParams.finalize / 100); + const size_t dictionarySize = ZDICT_finalizeDictionary( + dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail, + samplesBuffer, samplesSizes, nbFinalizeSamples, coverParams.zParams); + if (!ZSTD_isError(dictionarySize)) { + DISPLAYLEVEL(2, "Constructed dictionary of size %u\n", + (unsigned)dictionarySize); + } + FASTCOVER_ctx_destroy(&ctx); + free(segmentFreqs); + return dictionarySize; + } +} + + +ZDICTLIB_API size_t +ZDICT_optimizeTrainFromBuffer_fastCover( + void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, + const size_t* samplesSizes, unsigned nbSamples, + ZDICT_fastCover_params_t* parameters) +{ + ZDICT_cover_params_t coverParams; + FASTCOVER_accel_t accelParams; + /* constants */ + const unsigned nbThreads = parameters->nbThreads; + const double splitPoint = + parameters->splitPoint <= 0.0 ? FASTCOVER_DEFAULT_SPLITPOINT : parameters->splitPoint; + const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d; + const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d; + const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k; + const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k; + const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps; + const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1); + const unsigned kIterations = + (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize); + const unsigned f = parameters->f == 0 ? DEFAULT_F : parameters->f; + const unsigned accel = parameters->accel == 0 ? DEFAULT_ACCEL : parameters->accel; + const unsigned shrinkDict = 0; + /* Local variables */ + const int displayLevel = parameters->zParams.notificationLevel; + unsigned iteration = 1; + unsigned d; + unsigned k; + COVER_best_t best; + POOL_ctx *pool = NULL; + int warned = 0; + /* Checks */ + if (splitPoint <= 0 || splitPoint > 1) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect splitPoint\n"); + return ERROR(parameter_outOfBound); + } + if (accel == 0 || accel > FASTCOVER_MAX_ACCEL) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect accel\n"); + return ERROR(parameter_outOfBound); + } + if (kMinK < kMaxD || kMaxK < kMinK) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect k\n"); + return ERROR(parameter_outOfBound); + } + if (nbSamples == 0) { + LOCALDISPLAYLEVEL(displayLevel, 1, "FASTCOVER must have at least one input file\n"); + return ERROR(srcSize_wrong); + } + if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { + LOCALDISPLAYLEVEL(displayLevel, 1, "dictBufferCapacity must be at least %u\n", + ZDICT_DICTSIZE_MIN); + return ERROR(dstSize_tooSmall); + } + if (nbThreads > 1) { + pool = POOL_create(nbThreads, 1); + if (!pool) { + return ERROR(memory_allocation); + } + } + /* Initialization */ + COVER_best_init(&best); + memset(&coverParams, 0 , sizeof(coverParams)); + FASTCOVER_convertToCoverParams(*parameters, &coverParams); + accelParams = FASTCOVER_defaultAccelParameters[accel]; + /* Turn down global display level to clean up display at level 2 and below */ + g_displayLevel = displayLevel == 0 ? 0 : displayLevel - 1; + /* Loop through d first because each new value needs a new context */ + LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n", + kIterations); + for (d = kMinD; d <= kMaxD; d += 2) { + /* Initialize the context for this value of d */ + FASTCOVER_ctx_t ctx; + LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d); + { + size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams); + if (ZSTD_isError(initVal)) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n"); + COVER_best_destroy(&best); + POOL_free(pool); + return initVal; + } + } + if (!warned) { + COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, displayLevel); + warned = 1; + } + /* Loop through k reusing the same context */ + for (k = kMinK; k <= kMaxK; k += kStepSize) { + /* Prepare the arguments */ + FASTCOVER_tryParameters_data_t *data = (FASTCOVER_tryParameters_data_t *)malloc( + sizeof(FASTCOVER_tryParameters_data_t)); + LOCALDISPLAYLEVEL(displayLevel, 3, "k=%u\n", k); + if (!data) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to allocate parameters\n"); + COVER_best_destroy(&best); + FASTCOVER_ctx_destroy(&ctx); + POOL_free(pool); + return ERROR(memory_allocation); + } + data->ctx = &ctx; + data->best = &best; + data->dictBufferCapacity = dictBufferCapacity; + data->parameters = coverParams; + data->parameters.k = k; + data->parameters.d = d; + data->parameters.splitPoint = splitPoint; + data->parameters.steps = kSteps; + data->parameters.shrinkDict = shrinkDict; + data->parameters.zParams.notificationLevel = g_displayLevel; + /* Check the parameters */ + if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity, + data->ctx->f, accel)) { + DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n"); + free(data); + continue; + } + /* Call the function and pass ownership of data to it */ + COVER_best_start(&best); + if (pool) { + POOL_add(pool, &FASTCOVER_tryParameters, data); + } else { + FASTCOVER_tryParameters(data); + } + /* Print status */ + LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%% ", + (unsigned)((iteration * 100) / kIterations)); + ++iteration; + } + COVER_best_wait(&best); + FASTCOVER_ctx_destroy(&ctx); + } + LOCALDISPLAYLEVEL(displayLevel, 2, "\r%79s\r", ""); + /* Fill the output buffer and parameters with output of the best parameters */ + { + const size_t dictSize = best.dictSize; + if (ZSTD_isError(best.compressedSize)) { + const size_t compressedSize = best.compressedSize; + COVER_best_destroy(&best); + POOL_free(pool); + return compressedSize; + } + FASTCOVER_convertToFastCoverParams(best.parameters, parameters, f, accel); + memcpy(dictBuffer, best.dict, dictSize); + COVER_best_destroy(&best); + POOL_free(pool); + return dictSize; + } + +} +/**** ended inlining dictBuilder/fastcover.c ****/ +/**** start inlining dictBuilder/zdict.c ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/*-************************************** +* Tuning parameters +****************************************/ +#define MINRATIO 4 /* minimum nb of apparition to be selected in dictionary */ +#define ZDICT_MAX_SAMPLES_SIZE (2000U << 20) +#define ZDICT_MIN_SAMPLES_SIZE (ZDICT_CONTENTSIZE_MIN * MINRATIO) + + +/*-************************************** +* Compiler Options +****************************************/ +/* Unix Large Files support (>4GB) */ +#define _FILE_OFFSET_BITS 64 +#if (defined(__sun__) && (!defined(__LP64__))) /* Sun Solaris 32-bits requires specific definitions */ +# ifndef _LARGEFILE_SOURCE +# define _LARGEFILE_SOURCE +# endif +#elif ! defined(__LP64__) /* No point defining Large file for 64 bit */ +# ifndef _LARGEFILE64_SOURCE +# define _LARGEFILE64_SOURCE +# endif +#endif + + +/*-************************************* +* Dependencies +***************************************/ +#include /* malloc, free */ +#include /* memset */ +#include /* fprintf, fopen, ftello64 */ +#include /* clock */ + +/**** skipping file: ../common/mem.h ****/ +/**** skipping file: ../common/fse.h ****/ +#define HUF_STATIC_LINKING_ONLY +/**** skipping file: ../common/huf.h ****/ +/**** skipping file: ../common/zstd_internal.h ****/ +/**** skipping file: ../common/xxhash.h ****/ +/**** skipping file: divsufsort.h ****/ +#ifndef ZDICT_STATIC_LINKING_ONLY +# define ZDICT_STATIC_LINKING_ONLY +#endif +/**** skipping file: zdict.h ****/ +/**** skipping file: ../compress/zstd_compress_internal.h ****/ + + +/*-************************************* +* Constants +***************************************/ +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define DICTLISTSIZE_DEFAULT 10000 + +#define NOISELENGTH 32 + +static const U32 g_selectivity_default = 9; + + +/*-************************************* +* Console display +***************************************/ +#undef DISPLAY +#define DISPLAY(...) { fprintf(stderr, __VA_ARGS__); fflush( stderr ); } +#undef DISPLAYLEVEL +#define DISPLAYLEVEL(l, ...) if (notificationLevel>=l) { DISPLAY(__VA_ARGS__); } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */ + +static clock_t ZDICT_clockSpan(clock_t nPrevious) { return clock() - nPrevious; } + +static void ZDICT_printHex(const void* ptr, size_t length) +{ + const BYTE* const b = (const BYTE*)ptr; + size_t u; + for (u=0; u126) c = '.'; /* non-printable char */ + DISPLAY("%c", c); + } +} + + +/*-******************************************************** +* Helper functions +**********************************************************/ +unsigned ZDICT_isError(size_t errorCode) { return ERR_isError(errorCode); } + +const char* ZDICT_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); } + +unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize) +{ + if (dictSize < 8) return 0; + if (MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return 0; + return MEM_readLE32((const char*)dictBuffer + 4); +} + +size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize) +{ + size_t headerSize; + if (dictSize <= 8 || MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return ERROR(dictionary_corrupted); + + { ZSTD_compressedBlockState_t* bs = (ZSTD_compressedBlockState_t*)malloc(sizeof(ZSTD_compressedBlockState_t)); + U32* wksp = (U32*)malloc(HUF_WORKSPACE_SIZE); + if (!bs || !wksp) { + headerSize = ERROR(memory_allocation); + } else { + ZSTD_reset_compressedBlockState(bs); + headerSize = ZSTD_loadCEntropy(bs, wksp, dictBuffer, dictSize); + } + + free(bs); + free(wksp); + } + + return headerSize; +} + +/*-******************************************************** +* Dictionary training functions +**********************************************************/ +static unsigned ZDICT_NbCommonBytes (size_t val) +{ + if (MEM_isLittleEndian()) { + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + _BitScanForward64( &r, (U64)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_ctzll((U64)val) >> 3); +# else + static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; + return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r=0; + _BitScanForward( &r, (U32)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_ctz((U32)val) >> 3); +# else + static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; + return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; +# endif + } + } else { /* Big Endian CPU */ + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + _BitScanReverse64( &r, val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_clzll(val) >> 3); +# else + unsigned r; + const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ + if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } + if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } + r += (!val); + return r; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r = 0; + _BitScanReverse( &r, (unsigned long)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_clz((U32)val) >> 3); +# else + unsigned r; + if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } + r += (!val); + return r; +# endif + } } +} + + +/*! ZDICT_count() : + Count the nb of common bytes between 2 pointers. + Note : this function presumes end of buffer followed by noisy guard band. +*/ +static size_t ZDICT_count(const void* pIn, const void* pMatch) +{ + const char* const pStart = (const char*)pIn; + for (;;) { + size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); + if (!diff) { + pIn = (const char*)pIn+sizeof(size_t); + pMatch = (const char*)pMatch+sizeof(size_t); + continue; + } + pIn = (const char*)pIn+ZDICT_NbCommonBytes(diff); + return (size_t)((const char*)pIn - pStart); + } +} + + +typedef struct { + U32 pos; + U32 length; + U32 savings; +} dictItem; + +static void ZDICT_initDictItem(dictItem* d) +{ + d->pos = 1; + d->length = 0; + d->savings = (U32)(-1); +} + + +#define LLIMIT 64 /* heuristic determined experimentally */ +#define MINMATCHLENGTH 7 /* heuristic determined experimentally */ +static dictItem ZDICT_analyzePos( + BYTE* doneMarks, + const int* suffix, U32 start, + const void* buffer, U32 minRatio, U32 notificationLevel) +{ + U32 lengthList[LLIMIT] = {0}; + U32 cumulLength[LLIMIT] = {0}; + U32 savings[LLIMIT] = {0}; + const BYTE* b = (const BYTE*)buffer; + size_t maxLength = LLIMIT; + size_t pos = suffix[start]; + U32 end = start; + dictItem solution; + + /* init */ + memset(&solution, 0, sizeof(solution)); + doneMarks[pos] = 1; + + /* trivial repetition cases */ + if ( (MEM_read16(b+pos+0) == MEM_read16(b+pos+2)) + ||(MEM_read16(b+pos+1) == MEM_read16(b+pos+3)) + ||(MEM_read16(b+pos+2) == MEM_read16(b+pos+4)) ) { + /* skip and mark segment */ + U16 const pattern16 = MEM_read16(b+pos+4); + U32 u, patternEnd = 6; + while (MEM_read16(b+pos+patternEnd) == pattern16) patternEnd+=2 ; + if (b[pos+patternEnd] == b[pos+patternEnd-1]) patternEnd++; + for (u=1; u= MINMATCHLENGTH); + } + + /* look backward */ + { size_t length; + do { + length = ZDICT_count(b + pos, b + *(suffix+start-1)); + if (length >=MINMATCHLENGTH) start--; + } while(length >= MINMATCHLENGTH); + } + + /* exit if not found a minimum nb of repetitions */ + if (end-start < minRatio) { + U32 idx; + for(idx=start; idx= %i at pos %7u ", (unsigned)(end-start), MINMATCHLENGTH, (unsigned)pos); + DISPLAYLEVEL(4, "\n"); + + for (mml = MINMATCHLENGTH ; ; mml++) { + BYTE currentChar = 0; + U32 currentCount = 0; + U32 currentID = refinedStart; + U32 id; + U32 selectedCount = 0; + U32 selectedID = currentID; + for (id =refinedStart; id < refinedEnd; id++) { + if (b[suffix[id] + mml] != currentChar) { + if (currentCount > selectedCount) { + selectedCount = currentCount; + selectedID = currentID; + } + currentID = id; + currentChar = b[ suffix[id] + mml]; + currentCount = 0; + } + currentCount ++; + } + if (currentCount > selectedCount) { /* for last */ + selectedCount = currentCount; + selectedID = currentID; + } + + if (selectedCount < minRatio) + break; + refinedStart = selectedID; + refinedEnd = refinedStart + selectedCount; + } + + /* evaluate gain based on new dict */ + start = refinedStart; + pos = suffix[refinedStart]; + end = start; + memset(lengthList, 0, sizeof(lengthList)); + + /* look forward */ + { size_t length; + do { + end++; + length = ZDICT_count(b + pos, b + suffix[end]); + if (length >= LLIMIT) length = LLIMIT-1; + lengthList[length]++; + } while (length >=MINMATCHLENGTH); + } + + /* look backward */ + { size_t length = MINMATCHLENGTH; + while ((length >= MINMATCHLENGTH) & (start > 0)) { + length = ZDICT_count(b + pos, b + suffix[start - 1]); + if (length >= LLIMIT) length = LLIMIT - 1; + lengthList[length]++; + if (length >= MINMATCHLENGTH) start--; + } + } + + /* largest useful length */ + memset(cumulLength, 0, sizeof(cumulLength)); + cumulLength[maxLength-1] = lengthList[maxLength-1]; + for (i=(int)(maxLength-2); i>=0; i--) + cumulLength[i] = cumulLength[i+1] + lengthList[i]; + + for (i=LLIMIT-1; i>=MINMATCHLENGTH; i--) if (cumulLength[i]>=minRatio) break; + maxLength = i; + + /* reduce maxLength in case of final into repetitive data */ + { U32 l = (U32)maxLength; + BYTE const c = b[pos + maxLength-1]; + while (b[pos+l-2]==c) l--; + maxLength = l; + } + if (maxLength < MINMATCHLENGTH) return solution; /* skip : no long-enough solution */ + + /* calculate savings */ + savings[5] = 0; + for (i=MINMATCHLENGTH; i<=(int)maxLength; i++) + savings[i] = savings[i-1] + (lengthList[i] * (i-3)); + + DISPLAYLEVEL(4, "Selected dict at position %u, of length %u : saves %u (ratio: %.2f) \n", + (unsigned)pos, (unsigned)maxLength, (unsigned)savings[maxLength], (double)savings[maxLength] / maxLength); + + solution.pos = (U32)pos; + solution.length = (U32)maxLength; + solution.savings = savings[maxLength]; + + /* mark positions done */ + { U32 id; + for (id=start; id solution.length) length = solution.length; + } + pEnd = (U32)(testedPos + length); + for (p=testedPos; ppos; + const U32 eltEnd = elt.pos + elt.length; + const char* const buf = (const char*) buffer; + + /* tail overlap */ + U32 u; for (u=1; u elt.pos) && (table[u].pos <= eltEnd)) { /* overlap, existing > new */ + /* append */ + U32 const addedLength = table[u].pos - elt.pos; + table[u].length += addedLength; + table[u].pos = elt.pos; + table[u].savings += elt.savings * addedLength / elt.length; /* rough approx */ + table[u].savings += elt.length / 8; /* rough approx bonus */ + elt = table[u]; + /* sort : improve rank */ + while ((u>1) && (table[u-1].savings < elt.savings)) + table[u] = table[u-1], u--; + table[u] = elt; + return u; + } } + + /* front overlap */ + for (u=1; u= elt.pos) && (table[u].pos < elt.pos)) { /* overlap, existing < new */ + /* append */ + int const addedLength = (int)eltEnd - (table[u].pos + table[u].length); + table[u].savings += elt.length / 8; /* rough approx bonus */ + if (addedLength > 0) { /* otherwise, elt fully included into existing */ + table[u].length += addedLength; + table[u].savings += elt.savings * addedLength / elt.length; /* rough approx */ + } + /* sort : improve rank */ + elt = table[u]; + while ((u>1) && (table[u-1].savings < elt.savings)) + table[u] = table[u-1], u--; + table[u] = elt; + return u; + } + + if (MEM_read64(buf + table[u].pos) == MEM_read64(buf + elt.pos + 1)) { + if (isIncluded(buf + table[u].pos, buf + elt.pos + 1, table[u].length)) { + size_t const addedLength = MAX( (int)elt.length - (int)table[u].length , 1 ); + table[u].pos = elt.pos; + table[u].savings += (U32)(elt.savings * addedLength / elt.length); + table[u].length = MIN(elt.length, table[u].length + 1); + return u; + } + } + } + + return 0; +} + + +static void ZDICT_removeDictItem(dictItem* table, U32 id) +{ + /* convention : table[0].pos stores nb of elts */ + U32 const max = table[0].pos; + U32 u; + if (!id) return; /* protection, should never happen */ + for (u=id; upos--; +} + + +static void ZDICT_insertDictItem(dictItem* table, U32 maxSize, dictItem elt, const void* buffer) +{ + /* merge if possible */ + U32 mergeId = ZDICT_tryMerge(table, elt, 0, buffer); + if (mergeId) { + U32 newMerge = 1; + while (newMerge) { + newMerge = ZDICT_tryMerge(table, table[mergeId], mergeId, buffer); + if (newMerge) ZDICT_removeDictItem(table, mergeId); + mergeId = newMerge; + } + return; + } + + /* insert */ + { U32 current; + U32 nextElt = table->pos; + if (nextElt >= maxSize) nextElt = maxSize-1; + current = nextElt-1; + while (table[current].savings < elt.savings) { + table[current+1] = table[current]; + current--; + } + table[current+1] = elt; + table->pos = nextElt+1; + } +} + + +static U32 ZDICT_dictSize(const dictItem* dictList) +{ + U32 u, dictSize = 0; + for (u=1; u=l) { \ + if (ZDICT_clockSpan(displayClock) > refreshRate) \ + { displayClock = clock(); DISPLAY(__VA_ARGS__); \ + if (notificationLevel>=4) fflush(stderr); } } + + /* init */ + DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */ + if (!suffix0 || !reverseSuffix || !doneMarks || !filePos) { + result = ERROR(memory_allocation); + goto _cleanup; + } + if (minRatio < MINRATIO) minRatio = MINRATIO; + memset(doneMarks, 0, bufferSize+16); + + /* limit sample set size (divsufsort limitation)*/ + if (bufferSize > ZDICT_MAX_SAMPLES_SIZE) DISPLAYLEVEL(3, "sample set too large : reduced to %u MB ...\n", (unsigned)(ZDICT_MAX_SAMPLES_SIZE>>20)); + while (bufferSize > ZDICT_MAX_SAMPLES_SIZE) bufferSize -= fileSizes[--nbFiles]; + + /* sort */ + DISPLAYLEVEL(2, "sorting %u files of total size %u MB ...\n", nbFiles, (unsigned)(bufferSize>>20)); + { int const divSuftSortResult = divsufsort((const unsigned char*)buffer, suffix, (int)bufferSize, 0); + if (divSuftSortResult != 0) { result = ERROR(GENERIC); goto _cleanup; } + } + suffix[bufferSize] = (int)bufferSize; /* leads into noise */ + suffix0[0] = (int)bufferSize; /* leads into noise */ + /* build reverse suffix sort */ + { size_t pos; + for (pos=0; pos < bufferSize; pos++) + reverseSuffix[suffix[pos]] = (U32)pos; + /* note filePos tracks borders between samples. + It's not used at this stage, but planned to become useful in a later update */ + filePos[0] = 0; + for (pos=1; pos> 21); + } +} + + +typedef struct +{ + ZSTD_CDict* dict; /* dictionary */ + ZSTD_CCtx* zc; /* working context */ + void* workPlace; /* must be ZSTD_BLOCKSIZE_MAX allocated */ +} EStats_ress_t; + +#define MAXREPOFFSET 1024 + +static void ZDICT_countEStats(EStats_ress_t esr, const ZSTD_parameters* params, + unsigned* countLit, unsigned* offsetcodeCount, unsigned* matchlengthCount, unsigned* litlengthCount, U32* repOffsets, + const void* src, size_t srcSize, + U32 notificationLevel) +{ + size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params->cParams.windowLog); + size_t cSize; + + if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */ + { size_t const errorCode = ZSTD_compressBegin_usingCDict(esr.zc, esr.dict); + if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_compressBegin_usingCDict failed \n"); return; } + + } + cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize); + if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (unsigned)srcSize); return; } + + if (cSize) { /* if == 0; block is not compressible */ + const seqStore_t* const seqStorePtr = ZSTD_getSeqStore(esr.zc); + + /* literals stats */ + { const BYTE* bytePtr; + for(bytePtr = seqStorePtr->litStart; bytePtr < seqStorePtr->lit; bytePtr++) + countLit[*bytePtr]++; + } + + /* seqStats */ + { U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + ZSTD_seqToCodes(seqStorePtr); + + { const BYTE* codePtr = seqStorePtr->ofCode; + U32 u; + for (u=0; umlCode; + U32 u; + for (u=0; ullCode; + U32 u; + for (u=0; u= 2) { /* rep offsets */ + const seqDef* const seq = seqStorePtr->sequencesStart; + U32 offset1 = seq[0].offset - 3; + U32 offset2 = seq[1].offset - 3; + if (offset1 >= MAXREPOFFSET) offset1 = 0; + if (offset2 >= MAXREPOFFSET) offset2 = 0; + repOffsets[offset1] += 3; + repOffsets[offset2] += 1; + } } } +} + +static size_t ZDICT_totalSampleSize(const size_t* fileSizes, unsigned nbFiles) +{ + size_t total=0; + unsigned u; + for (u=0; u0; u--) { + offsetCount_t tmp; + if (table[u-1].count >= table[u].count) break; + tmp = table[u-1]; + table[u-1] = table[u]; + table[u] = tmp; + } +} + +/* ZDICT_flatLit() : + * rewrite `countLit` to contain a mostly flat but still compressible distribution of literals. + * necessary to avoid generating a non-compressible distribution that HUF_writeCTable() cannot encode. + */ +static void ZDICT_flatLit(unsigned* countLit) +{ + int u; + for (u=1; u<256; u++) countLit[u] = 2; + countLit[0] = 4; + countLit[253] = 1; + countLit[254] = 1; +} + +#define OFFCODE_MAX 30 /* only applicable to first block */ +static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, + int compressionLevel, + const void* srcBuffer, const size_t* fileSizes, unsigned nbFiles, + const void* dictBuffer, size_t dictBufferSize, + unsigned notificationLevel) +{ + unsigned countLit[256]; + HUF_CREATE_STATIC_CTABLE(hufTable, 255); + unsigned offcodeCount[OFFCODE_MAX+1]; + short offcodeNCount[OFFCODE_MAX+1]; + U32 offcodeMax = ZSTD_highbit32((U32)(dictBufferSize + 128 KB)); + unsigned matchLengthCount[MaxML+1]; + short matchLengthNCount[MaxML+1]; + unsigned litLengthCount[MaxLL+1]; + short litLengthNCount[MaxLL+1]; + U32 repOffset[MAXREPOFFSET]; + offsetCount_t bestRepOffset[ZSTD_REP_NUM+1]; + EStats_ress_t esr = { NULL, NULL, NULL }; + ZSTD_parameters params; + U32 u, huffLog = 11, Offlog = OffFSELog, mlLog = MLFSELog, llLog = LLFSELog, total; + size_t pos = 0, errorCode; + size_t eSize = 0; + size_t const totalSrcSize = ZDICT_totalSampleSize(fileSizes, nbFiles); + size_t const averageSampleSize = totalSrcSize / (nbFiles + !nbFiles); + BYTE* dstPtr = (BYTE*)dstBuffer; + + /* init */ + DEBUGLOG(4, "ZDICT_analyzeEntropy"); + if (offcodeMax>OFFCODE_MAX) { eSize = ERROR(dictionaryCreation_failed); goto _cleanup; } /* too large dictionary */ + for (u=0; u<256; u++) countLit[u] = 1; /* any character must be described */ + for (u=0; u<=offcodeMax; u++) offcodeCount[u] = 1; + for (u=0; u<=MaxML; u++) matchLengthCount[u] = 1; + for (u=0; u<=MaxLL; u++) litLengthCount[u] = 1; + memset(repOffset, 0, sizeof(repOffset)); + repOffset[1] = repOffset[4] = repOffset[8] = 1; + memset(bestRepOffset, 0, sizeof(bestRepOffset)); + if (compressionLevel==0) compressionLevel = ZSTD_CLEVEL_DEFAULT; + params = ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize); + + esr.dict = ZSTD_createCDict_advanced(dictBuffer, dictBufferSize, ZSTD_dlm_byRef, ZSTD_dct_rawContent, params.cParams, ZSTD_defaultCMem); + esr.zc = ZSTD_createCCtx(); + esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX); + if (!esr.dict || !esr.zc || !esr.workPlace) { + eSize = ERROR(memory_allocation); + DISPLAYLEVEL(1, "Not enough memory \n"); + goto _cleanup; + } + + /* collect stats on all samples */ + for (u=0; u dictBufferCapacity) dictContentSize = dictBufferCapacity - hSize; + { size_t const dictSize = hSize + dictContentSize; + char* dictEnd = (char*)dictBuffer + dictSize; + memmove(dictEnd - dictContentSize, customDictContent, dictContentSize); + memcpy(dictBuffer, header, hSize); + return dictSize; + } +} + + +static size_t ZDICT_addEntropyTablesFromBuffer_advanced( + void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_params_t params) +{ + int const compressionLevel = (params.compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : params.compressionLevel; + U32 const notificationLevel = params.notificationLevel; + size_t hSize = 8; + + /* calculate entropy tables */ + DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */ + DISPLAYLEVEL(2, "statistics ... \n"); + { size_t const eSize = ZDICT_analyzeEntropy((char*)dictBuffer+hSize, dictBufferCapacity-hSize, + compressionLevel, + samplesBuffer, samplesSizes, nbSamples, + (char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, + notificationLevel); + if (ZDICT_isError(eSize)) return eSize; + hSize += eSize; + } + + /* add dictionary header (after entropy tables) */ + MEM_writeLE32(dictBuffer, ZSTD_MAGIC_DICTIONARY); + { U64 const randomID = XXH64((char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, 0); + U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768; + U32 const dictID = params.dictID ? params.dictID : compliantID; + MEM_writeLE32((char*)dictBuffer+4, dictID); + } + + if (hSize + dictContentSize < dictBufferCapacity) + memmove((char*)dictBuffer + hSize, (char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize); + return MIN(dictBufferCapacity, hSize+dictContentSize); +} + +/*! ZDICT_trainFromBuffer_unsafe_legacy() : +* Warning : `samplesBuffer` must be followed by noisy guard band !!! +* @return : size of dictionary, or an error code which can be tested with ZDICT_isError() +*/ +static size_t ZDICT_trainFromBuffer_unsafe_legacy( + void* dictBuffer, size_t maxDictSize, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_legacy_params_t params) +{ + U32 const dictListSize = MAX(MAX(DICTLISTSIZE_DEFAULT, nbSamples), (U32)(maxDictSize/16)); + dictItem* const dictList = (dictItem*)malloc(dictListSize * sizeof(*dictList)); + unsigned const selectivity = params.selectivityLevel == 0 ? g_selectivity_default : params.selectivityLevel; + unsigned const minRep = (selectivity > 30) ? MINRATIO : nbSamples >> selectivity; + size_t const targetDictSize = maxDictSize; + size_t const samplesBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples); + size_t dictSize = 0; + U32 const notificationLevel = params.zParams.notificationLevel; + + /* checks */ + if (!dictList) return ERROR(memory_allocation); + if (maxDictSize < ZDICT_DICTSIZE_MIN) { free(dictList); return ERROR(dstSize_tooSmall); } /* requested dictionary size is too small */ + if (samplesBuffSize < ZDICT_MIN_SAMPLES_SIZE) { free(dictList); return ERROR(dictionaryCreation_failed); } /* not enough source to create dictionary */ + + /* init */ + ZDICT_initDictItem(dictList); + + /* build dictionary */ + ZDICT_trainBuffer_legacy(dictList, dictListSize, + samplesBuffer, samplesBuffSize, + samplesSizes, nbSamples, + minRep, notificationLevel); + + /* display best matches */ + if (params.zParams.notificationLevel>= 3) { + unsigned const nb = MIN(25, dictList[0].pos); + unsigned const dictContentSize = ZDICT_dictSize(dictList); + unsigned u; + DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", (unsigned)dictList[0].pos-1, dictContentSize); + DISPLAYLEVEL(3, "list %u best segments \n", nb-1); + for (u=1; u samplesBuffSize) || ((pos + length) > samplesBuffSize)) { + free(dictList); + return ERROR(GENERIC); /* should never happen */ + } + DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |", + u, length, pos, (unsigned)dictList[u].savings); + ZDICT_printHex((const char*)samplesBuffer+pos, printedLength); + DISPLAYLEVEL(3, "| \n"); + } } + + + /* create dictionary */ + { unsigned dictContentSize = ZDICT_dictSize(dictList); + if (dictContentSize < ZDICT_CONTENTSIZE_MIN) { free(dictList); return ERROR(dictionaryCreation_failed); } /* dictionary content too small */ + if (dictContentSize < targetDictSize/4) { + DISPLAYLEVEL(2, "! warning : selected content significantly smaller than requested (%u < %u) \n", dictContentSize, (unsigned)maxDictSize); + if (samplesBuffSize < 10 * targetDictSize) + DISPLAYLEVEL(2, "! consider increasing the number of samples (total size : %u MB)\n", (unsigned)(samplesBuffSize>>20)); + if (minRep > MINRATIO) { + DISPLAYLEVEL(2, "! consider increasing selectivity to produce larger dictionary (-s%u) \n", selectivity+1); + DISPLAYLEVEL(2, "! note : larger dictionaries are not necessarily better, test its efficiency on samples \n"); + } + } + + if ((dictContentSize > targetDictSize*3) && (nbSamples > 2*MINRATIO) && (selectivity>1)) { + unsigned proposedSelectivity = selectivity-1; + while ((nbSamples >> proposedSelectivity) <= MINRATIO) { proposedSelectivity--; } + DISPLAYLEVEL(2, "! note : calculated dictionary significantly larger than requested (%u > %u) \n", dictContentSize, (unsigned)maxDictSize); + DISPLAYLEVEL(2, "! consider increasing dictionary size, or produce denser dictionary (-s%u) \n", proposedSelectivity); + DISPLAYLEVEL(2, "! always test dictionary efficiency on real samples \n"); + } + + /* limit dictionary size */ + { U32 const max = dictList->pos; /* convention : nb of useful elts within dictList */ + U32 currentSize = 0; + U32 n; for (n=1; n targetDictSize) { currentSize -= dictList[n].length; break; } + } + dictList->pos = n; + dictContentSize = currentSize; + } + + /* build dict content */ + { U32 u; + BYTE* ptr = (BYTE*)dictBuffer + maxDictSize; + for (u=1; upos; u++) { + U32 l = dictList[u].length; + ptr -= l; + if (ptr<(BYTE*)dictBuffer) { free(dictList); return ERROR(GENERIC); } /* should not happen */ + memcpy(ptr, (const char*)samplesBuffer+dictList[u].pos, l); + } } + + dictSize = ZDICT_addEntropyTablesFromBuffer_advanced(dictBuffer, dictContentSize, maxDictSize, + samplesBuffer, samplesSizes, nbSamples, + params.zParams); + } + + /* clean up */ + free(dictList); + return dictSize; +} + + +/* ZDICT_trainFromBuffer_legacy() : + * issue : samplesBuffer need to be followed by a noisy guard band. + * work around : duplicate the buffer, and add the noise */ +size_t ZDICT_trainFromBuffer_legacy(void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_legacy_params_t params) +{ + size_t result; + void* newBuff; + size_t const sBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples); + if (sBuffSize < ZDICT_MIN_SAMPLES_SIZE) return 0; /* not enough content => no dictionary */ + + newBuff = malloc(sBuffSize + NOISELENGTH); + if (!newBuff) return ERROR(memory_allocation); + + memcpy(newBuff, samplesBuffer, sBuffSize); + ZDICT_fillNoise((char*)newBuff + sBuffSize, NOISELENGTH); /* guard band, for end of buffer condition */ + + result = + ZDICT_trainFromBuffer_unsafe_legacy(dictBuffer, dictBufferCapacity, newBuff, + samplesSizes, nbSamples, params); + free(newBuff); + return result; +} + + +size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples) +{ + ZDICT_fastCover_params_t params; + DEBUGLOG(3, "ZDICT_trainFromBuffer"); + memset(¶ms, 0, sizeof(params)); + params.d = 8; + params.steps = 4; + /* Use default level since no compression level information is available */ + params.zParams.compressionLevel = ZSTD_CLEVEL_DEFAULT; +#if defined(DEBUGLEVEL) && (DEBUGLEVEL>=1) + params.zParams.notificationLevel = DEBUGLEVEL; +#endif + return ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer, dictBufferCapacity, + samplesBuffer, samplesSizes, nbSamples, + ¶ms); +} + +size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples) +{ + ZDICT_params_t params; + memset(¶ms, 0, sizeof(params)); + return ZDICT_addEntropyTablesFromBuffer_advanced(dictBuffer, dictContentSize, dictBufferCapacity, + samplesBuffer, samplesSizes, nbSamples, + params); +} +/**** ended inlining dictBuilder/zdict.c ****/ diff --git a/thirdparty/basisu/zstd/zstd.h b/thirdparty/basisu/zstd/zstd.h new file mode 100644 index 000000000..222339d71 --- /dev/null +++ b/thirdparty/basisu/zstd/zstd.h @@ -0,0 +1,2450 @@ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef ZSTD_H_235446 +#define ZSTD_H_235446 + +/* ====== Dependency ======*/ +#include /* INT_MAX */ +#include /* size_t */ + + +/* ===== ZSTDLIB_API : control library symbols visibility ===== */ +#ifndef ZSTDLIB_VISIBILITY +# if defined(__GNUC__) && (__GNUC__ >= 4) +# define ZSTDLIB_VISIBILITY __attribute__ ((visibility ("default"))) +# else +# define ZSTDLIB_VISIBILITY +# endif +#endif +#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) +# define ZSTDLIB_API __declspec(dllexport) ZSTDLIB_VISIBILITY +#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) +# define ZSTDLIB_API __declspec(dllimport) ZSTDLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +# define ZSTDLIB_API ZSTDLIB_VISIBILITY +#endif + + +/******************************************************************************* + Introduction + + zstd, short for Zstandard, is a fast lossless compression algorithm, targeting + real-time compression scenarios at zlib-level and better compression ratios. + The zstd compression library provides in-memory compression and decompression + functions. + + The library supports regular compression levels from 1 up to ZSTD_maxCLevel(), + which is currently 22. Levels >= 20, labeled `--ultra`, should be used with + caution, as they require more memory. The library also offers negative + compression levels, which extend the range of speed vs. ratio preferences. + The lower the level, the faster the speed (at the cost of compression). + + Compression can be done in: + - a single step (described as Simple API) + - a single step, reusing a context (described as Explicit context) + - unbounded multiple steps (described as Streaming compression) + + The compression ratio achievable on small data can be highly improved using + a dictionary. Dictionary compression can be performed in: + - a single step (described as Simple dictionary API) + - a single step, reusing a dictionary (described as Bulk-processing + dictionary API) + + Advanced experimental functions can be accessed using + `#define ZSTD_STATIC_LINKING_ONLY` before including zstd.h. + + Advanced experimental APIs should never be used with a dynamically-linked + library. They are not "stable"; their definitions or signatures may change in + the future. Only static linking is allowed. +*******************************************************************************/ + +/*------ Version ------*/ +#define ZSTD_VERSION_MAJOR 1 +#define ZSTD_VERSION_MINOR 4 +#define ZSTD_VERSION_RELEASE 9 +#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) + +/*! ZSTD_versionNumber() : + * Return runtime library version, the value is (MAJOR*100*100 + MINOR*100 + RELEASE). */ +ZSTDLIB_API unsigned ZSTD_versionNumber(void); + +#define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE +#define ZSTD_QUOTE(str) #str +#define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str) +#define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION) + +/*! ZSTD_versionString() : + * Return runtime library version, like "1.4.5". Requires v1.3.0+. */ +ZSTDLIB_API const char* ZSTD_versionString(void); + +/* ************************************* + * Default constant + ***************************************/ +#ifndef ZSTD_CLEVEL_DEFAULT +# define ZSTD_CLEVEL_DEFAULT 3 +#endif + +/* ************************************* + * Constants + ***************************************/ + +/* All magic numbers are supposed read/written to/from files/memory using little-endian convention */ +#define ZSTD_MAGICNUMBER 0xFD2FB528 /* valid since v0.8.0 */ +#define ZSTD_MAGIC_DICTIONARY 0xEC30A437 /* valid since v0.7.0 */ +#define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50 /* all 16 values, from 0x184D2A50 to 0x184D2A5F, signal the beginning of a skippable frame */ +#define ZSTD_MAGIC_SKIPPABLE_MASK 0xFFFFFFF0 + +#define ZSTD_BLOCKSIZELOG_MAX 17 +#define ZSTD_BLOCKSIZE_MAX (1<= `ZSTD_compressBound(srcSize)`. + * @return : compressed size written into `dst` (<= `dstCapacity), + * or an error code if it fails (which can be tested using ZSTD_isError()). */ +ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel); + +/*! ZSTD_decompress() : + * `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames. + * `dstCapacity` is an upper bound of originalSize to regenerate. + * If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data. + * @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), + * or an errorCode if it fails (which can be tested using ZSTD_isError()). */ +ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity, + const void* src, size_t compressedSize); + +/*! ZSTD_getFrameContentSize() : requires v1.3.0+ + * `src` should point to the start of a ZSTD encoded frame. + * `srcSize` must be at least as large as the frame header. + * hint : any size >= `ZSTD_frameHeaderSize_max` is large enough. + * @return : - decompressed size of `src` frame content, if known + * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined + * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) + * note 1 : a 0 return value means the frame is valid but "empty". + * note 2 : decompressed size is an optional field, it may not be present, typically in streaming mode. + * When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. + * In which case, it's necessary to use streaming mode to decompress data. + * Optionally, application can rely on some implicit limit, + * as ZSTD_decompress() only needs an upper bound of decompressed size. + * (For example, data could be necessarily cut into blocks <= 16 KB). + * note 3 : decompressed size is always present when compression is completed using single-pass functions, + * such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict(). + * note 4 : decompressed size can be very large (64-bits value), + * potentially larger than what local system can handle as a single memory segment. + * In which case, it's necessary to use streaming mode to decompress data. + * note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified. + * Always ensure return value fits within application's authorized limits. + * Each application can set its own limits. + * note 6 : This function replaces ZSTD_getDecompressedSize() */ +#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) +#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) +ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize); + +/*! ZSTD_getDecompressedSize() : + * NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize(). + * Both functions work the same way, but ZSTD_getDecompressedSize() blends + * "empty", "unknown" and "error" results to the same return value (0), + * while ZSTD_getFrameContentSize() gives them separate return values. + * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */ +ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize); + +/*! ZSTD_findFrameCompressedSize() : + * `src` should point to the start of a ZSTD frame or skippable frame. + * `srcSize` must be >= first frame size + * @return : the compressed size of the first frame starting at `src`, + * suitable to pass as `srcSize` to `ZSTD_decompress` or similar, + * or an error code if input is invalid */ +ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize); + + +/*====== Helper functions ======*/ +#define ZSTD_COMPRESSBOUND(srcSize) ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */ +ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */ +ZSTDLIB_API unsigned ZSTD_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ +ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /*!< provides readable string from an error code */ +ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed */ +ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compression level available */ + + +/*************************************** +* Explicit context +***************************************/ +/*= Compression context + * When compressing many times, + * it is recommended to allocate a context just once, + * and re-use it for each successive compression operation. + * This will make workload friendlier for system's memory. + * Note : re-using context is just a speed / resource optimization. + * It doesn't change the compression ratio, which remains identical. + * Note 2 : In multi-threaded environments, + * use one different context per thread for parallel execution. + */ +typedef struct ZSTD_CCtx_s ZSTD_CCtx; +ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void); +ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); + +/*! ZSTD_compressCCtx() : + * Same as ZSTD_compress(), using an explicit ZSTD_CCtx. + * Important : in order to behave similarly to `ZSTD_compress()`, + * this function compresses at requested compression level, + * __ignoring any other parameter__ . + * If any advanced parameter was set using the advanced API, + * they will all be reset. Only `compressionLevel` remains. + */ +ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel); + +/*= Decompression context + * When decompressing many times, + * it is recommended to allocate a context only once, + * and re-use it for each successive compression operation. + * This will make workload friendlier for system's memory. + * Use one context per thread for parallel execution. */ +typedef struct ZSTD_DCtx_s ZSTD_DCtx; +ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void); +ZSTDLIB_API size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); + +/*! ZSTD_decompressDCtx() : + * Same as ZSTD_decompress(), + * requires an allocated ZSTD_DCtx. + * Compatible with sticky parameters. + */ +ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + + +/*************************************** +* Advanced compression API +***************************************/ + +/* API design : + * Parameters are pushed one by one into an existing context, + * using ZSTD_CCtx_set*() functions. + * Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame. + * "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` ! + * __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ . + * + * It's possible to reset all parameters to "default" using ZSTD_CCtx_reset(). + * + * This API supercedes all other "advanced" API entry points in the experimental section. + * In the future, we expect to remove from experimental API entry points which are redundant with this API. + */ + + +/* Compression strategies, listed from fastest to strongest */ +typedef enum { ZSTD_fast=1, + ZSTD_dfast=2, + ZSTD_greedy=3, + ZSTD_lazy=4, + ZSTD_lazy2=5, + ZSTD_btlazy2=6, + ZSTD_btopt=7, + ZSTD_btultra=8, + ZSTD_btultra2=9 + /* note : new strategies _might_ be added in the future. + Only the order (from fast to strong) is guaranteed */ +} ZSTD_strategy; + + +typedef enum { + + /* compression parameters + * Note: When compressing with a ZSTD_CDict these parameters are superseded + * by the parameters used to construct the ZSTD_CDict. + * See ZSTD_CCtx_refCDict() for more info (superseded-by-cdict). */ + ZSTD_c_compressionLevel=100, /* Set compression parameters according to pre-defined cLevel table. + * Note that exact compression parameters are dynamically determined, + * depending on both compression level and srcSize (when known). + * Default level is ZSTD_CLEVEL_DEFAULT==3. + * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT. + * Note 1 : it's possible to pass a negative compression level. + * Note 2 : setting a level does not automatically set all other compression parameters + * to default. Setting this will however eventually dynamically impact the compression + * parameters which have not been manually set. The manually set + * ones will 'stick'. */ + /* Advanced compression parameters : + * It's possible to pin down compression parameters to some specific values. + * In which case, these values are no longer dynamically selected by the compressor */ + ZSTD_c_windowLog=101, /* Maximum allowed back-reference distance, expressed as power of 2. + * This will set a memory budget for streaming decompression, + * with larger values requiring more memory + * and typically compressing more. + * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX. + * Special: value 0 means "use default windowLog". + * Note: Using a windowLog greater than ZSTD_WINDOWLOG_LIMIT_DEFAULT + * requires explicitly allowing such size at streaming decompression stage. */ + ZSTD_c_hashLog=102, /* Size of the initial probe table, as a power of 2. + * Resulting memory usage is (1 << (hashLog+2)). + * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX. + * Larger tables improve compression ratio of strategies <= dFast, + * and improve speed of strategies > dFast. + * Special: value 0 means "use default hashLog". */ + ZSTD_c_chainLog=103, /* Size of the multi-probe search table, as a power of 2. + * Resulting memory usage is (1 << (chainLog+2)). + * Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX. + * Larger tables result in better and slower compression. + * This parameter is useless for "fast" strategy. + * It's still useful when using "dfast" strategy, + * in which case it defines a secondary probe table. + * Special: value 0 means "use default chainLog". */ + ZSTD_c_searchLog=104, /* Number of search attempts, as a power of 2. + * More attempts result in better and slower compression. + * This parameter is useless for "fast" and "dFast" strategies. + * Special: value 0 means "use default searchLog". */ + ZSTD_c_minMatch=105, /* Minimum size of searched matches. + * Note that Zstandard can still find matches of smaller size, + * it just tweaks its search algorithm to look for this size and larger. + * Larger values increase compression and decompression speed, but decrease ratio. + * Must be clamped between ZSTD_MINMATCH_MIN and ZSTD_MINMATCH_MAX. + * Note that currently, for all strategies < btopt, effective minimum is 4. + * , for all strategies > fast, effective maximum is 6. + * Special: value 0 means "use default minMatchLength". */ + ZSTD_c_targetLength=106, /* Impact of this field depends on strategy. + * For strategies btopt, btultra & btultra2: + * Length of Match considered "good enough" to stop search. + * Larger values make compression stronger, and slower. + * For strategy fast: + * Distance between match sampling. + * Larger values make compression faster, and weaker. + * Special: value 0 means "use default targetLength". */ + ZSTD_c_strategy=107, /* See ZSTD_strategy enum definition. + * The higher the value of selected strategy, the more complex it is, + * resulting in stronger and slower compression. + * Special: value 0 means "use default strategy". */ + + /* LDM mode parameters */ + ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching. + * This parameter is designed to improve compression ratio + * for large inputs, by finding large matches at long distance. + * It increases memory usage and window size. + * Note: enabling this parameter increases default ZSTD_c_windowLog to 128 MB + * except when expressly set to a different value. + * Note: will be enabled by default if ZSTD_c_windowLog >= 128 MB and + * compression strategy >= ZSTD_btopt (== compression level 16+) */ + ZSTD_c_ldmHashLog=161, /* Size of the table for long distance matching, as a power of 2. + * Larger values increase memory usage and compression ratio, + * but decrease compression speed. + * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX + * default: windowlog - 7. + * Special: value 0 means "automatically determine hashlog". */ + ZSTD_c_ldmMinMatch=162, /* Minimum match size for long distance matcher. + * Larger/too small values usually decrease compression ratio. + * Must be clamped between ZSTD_LDM_MINMATCH_MIN and ZSTD_LDM_MINMATCH_MAX. + * Special: value 0 means "use default value" (default: 64). */ + ZSTD_c_ldmBucketSizeLog=163, /* Log size of each bucket in the LDM hash table for collision resolution. + * Larger values improve collision resolution but decrease compression speed. + * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX. + * Special: value 0 means "use default value" (default: 3). */ + ZSTD_c_ldmHashRateLog=164, /* Frequency of inserting/looking up entries into the LDM hash table. + * Must be clamped between 0 and (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN). + * Default is MAX(0, (windowLog - ldmHashLog)), optimizing hash table usage. + * Larger values improve compression speed. + * Deviating far from default value will likely result in a compression ratio decrease. + * Special: value 0 means "automatically determine hashRateLog". */ + + /* frame parameters */ + ZSTD_c_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1) + * Content size must be known at the beginning of compression. + * This is automatically the case when using ZSTD_compress2(), + * For streaming scenarios, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */ + ZSTD_c_checksumFlag=201, /* A 32-bits checksum of content is written at end of frame (default:0) */ + ZSTD_c_dictIDFlag=202, /* When applicable, dictionary's ID is written into frame header (default:1) */ + + /* multi-threading parameters */ + /* These parameters are only active if multi-threading is enabled (compiled with build macro ZSTD_MULTITHREAD). + * Otherwise, trying to set any other value than default (0) will be a no-op and return an error. + * In a situation where it's unknown if the linked library supports multi-threading or not, + * setting ZSTD_c_nbWorkers to any value >= 1 and consulting the return value provides a quick way to check this property. + */ + ZSTD_c_nbWorkers=400, /* Select how many threads will be spawned to compress in parallel. + * When nbWorkers >= 1, triggers asynchronous mode when invoking ZSTD_compressStream*() : + * ZSTD_compressStream*() consumes input and flush output if possible, but immediately gives back control to caller, + * while compression is performed in parallel, within worker thread(s). + * (note : a strong exception to this rule is when first invocation of ZSTD_compressStream2() sets ZSTD_e_end : + * in which case, ZSTD_compressStream2() delegates to ZSTD_compress2(), which is always a blocking call). + * More workers improve speed, but also increase memory usage. + * Default value is `0`, aka "single-threaded mode" : no worker is spawned, + * compression is performed inside Caller's thread, and all invocations are blocking */ + ZSTD_c_jobSize=401, /* Size of a compression job. This value is enforced only when nbWorkers >= 1. + * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads. + * 0 means default, which is dynamically determined based on compression parameters. + * Job size must be a minimum of overlap size, or 1 MB, whichever is largest. + * The minimum size is automatically and transparently enforced. */ + ZSTD_c_overlapLog=402, /* Control the overlap size, as a fraction of window size. + * The overlap size is an amount of data reloaded from previous job at the beginning of a new job. + * It helps preserve compression ratio, while each job is compressed in parallel. + * This value is enforced only when nbWorkers >= 1. + * Larger values increase compression ratio, but decrease speed. + * Possible values range from 0 to 9 : + * - 0 means "default" : value will be determined by the library, depending on strategy + * - 1 means "no overlap" + * - 9 means "full overlap", using a full window size. + * Each intermediate rank increases/decreases load size by a factor 2 : + * 9: full window; 8: w/2; 7: w/4; 6: w/8; 5:w/16; 4: w/32; 3:w/64; 2:w/128; 1:no overlap; 0:default + * default value varies between 6 and 9, depending on strategy */ + + /* note : additional experimental parameters are also available + * within the experimental section of the API. + * At the time of this writing, they include : + * ZSTD_c_rsyncable + * ZSTD_c_format + * ZSTD_c_forceMaxWindow + * ZSTD_c_forceAttachDict + * ZSTD_c_literalCompressionMode + * ZSTD_c_targetCBlockSize + * ZSTD_c_srcSizeHint + * ZSTD_c_enableDedicatedDictSearch + * ZSTD_c_stableInBuffer + * ZSTD_c_stableOutBuffer + * ZSTD_c_blockDelimiters + * ZSTD_c_validateSequences + * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. + * note : never ever use experimentalParam? names directly; + * also, the enums values themselves are unstable and can still change. + */ + ZSTD_c_experimentalParam1=500, + ZSTD_c_experimentalParam2=10, + ZSTD_c_experimentalParam3=1000, + ZSTD_c_experimentalParam4=1001, + ZSTD_c_experimentalParam5=1002, + ZSTD_c_experimentalParam6=1003, + ZSTD_c_experimentalParam7=1004, + ZSTD_c_experimentalParam8=1005, + ZSTD_c_experimentalParam9=1006, + ZSTD_c_experimentalParam10=1007, + ZSTD_c_experimentalParam11=1008, + ZSTD_c_experimentalParam12=1009 +} ZSTD_cParameter; + +typedef struct { + size_t error; + int lowerBound; + int upperBound; +} ZSTD_bounds; + +/*! ZSTD_cParam_getBounds() : + * All parameters must belong to an interval with lower and upper bounds, + * otherwise they will either trigger an error or be automatically clamped. + * @return : a structure, ZSTD_bounds, which contains + * - an error status field, which must be tested using ZSTD_isError() + * - lower and upper bounds, both inclusive + */ +ZSTDLIB_API ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter cParam); + +/*! ZSTD_CCtx_setParameter() : + * Set one compression parameter, selected by enum ZSTD_cParameter. + * All parameters have valid bounds. Bounds can be queried using ZSTD_cParam_getBounds(). + * Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter). + * Setting a parameter is generally only possible during frame initialization (before starting compression). + * Exception : when using multi-threading mode (nbWorkers >= 1), + * the following parameters can be updated _during_ compression (within same frame): + * => compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy. + * new parameters will be active for next job only (after a flush()). + * @return : an error code (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value); + +/*! ZSTD_CCtx_setPledgedSrcSize() : + * Total input data size to be compressed as a single frame. + * Value will be written in frame header, unless if explicitly forbidden using ZSTD_c_contentSizeFlag. + * This value will also be controlled at end of frame, and trigger an error if not respected. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Note 1 : pledgedSrcSize==0 actually means zero, aka an empty frame. + * In order to mean "unknown content size", pass constant ZSTD_CONTENTSIZE_UNKNOWN. + * ZSTD_CONTENTSIZE_UNKNOWN is default value for any new frame. + * Note 2 : pledgedSrcSize is only valid once, for the next frame. + * It's discarded at the end of the frame, and replaced by ZSTD_CONTENTSIZE_UNKNOWN. + * Note 3 : Whenever all input data is provided and consumed in a single round, + * for example with ZSTD_compress2(), + * or invoking immediately ZSTD_compressStream2(,,,ZSTD_e_end), + * this value is automatically overridden by srcSize instead. + */ +ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize); + +typedef enum { + ZSTD_reset_session_only = 1, + ZSTD_reset_parameters = 2, + ZSTD_reset_session_and_parameters = 3 +} ZSTD_ResetDirective; + +/*! ZSTD_CCtx_reset() : + * There are 2 different things that can be reset, independently or jointly : + * - The session : will stop compressing current frame, and make CCtx ready to start a new one. + * Useful after an error, or to interrupt any ongoing compression. + * Any internal data not yet flushed is cancelled. + * Compression parameters and dictionary remain unchanged. + * They will be used to compress next frame. + * Resetting session never fails. + * - The parameters : changes all parameters back to "default". + * This removes any reference to any dictionary too. + * Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing) + * otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError()) + * - Both : similar to resetting the session, followed by resetting parameters. + */ +ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset); + +/*! ZSTD_compress2() : + * Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API. + * ZSTD_compress2() always starts a new frame. + * Should cctx hold data from a previously unfinished frame, everything about it is forgotten. + * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() + * - The function is always blocking, returns when compression is completed. + * Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. + * @return : compressed size written into `dst` (<= `dstCapacity), + * or an error code if it fails (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_compress2( ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + + +/*************************************** +* Advanced decompression API +***************************************/ + +/* The advanced API pushes parameters one by one into an existing DCtx context. + * Parameters are sticky, and remain valid for all following frames + * using the same DCtx context. + * It's possible to reset parameters to default values using ZSTD_DCtx_reset(). + * Note : This API is compatible with existing ZSTD_decompressDCtx() and ZSTD_decompressStream(). + * Therefore, no new decompression function is necessary. + */ + +typedef enum { + + ZSTD_d_windowLogMax=100, /* Select a size limit (in power of 2) beyond which + * the streaming API will refuse to allocate memory buffer + * in order to protect the host from unreasonable memory requirements. + * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. + * By default, a decompression context accepts window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT). + * Special: value 0 means "use default maximum windowLog". */ + + /* note : additional experimental parameters are also available + * within the experimental section of the API. + * At the time of this writing, they include : + * ZSTD_d_format + * ZSTD_d_stableOutBuffer + * ZSTD_d_forceIgnoreChecksum + * ZSTD_d_refMultipleDDicts + * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. + * note : never ever use experimentalParam? names directly + */ + ZSTD_d_experimentalParam1=1000, + ZSTD_d_experimentalParam2=1001, + ZSTD_d_experimentalParam3=1002, + ZSTD_d_experimentalParam4=1003 + +} ZSTD_dParameter; + +/*! ZSTD_dParam_getBounds() : + * All parameters must belong to an interval with lower and upper bounds, + * otherwise they will either trigger an error or be automatically clamped. + * @return : a structure, ZSTD_bounds, which contains + * - an error status field, which must be tested using ZSTD_isError() + * - both lower and upper bounds, inclusive + */ +ZSTDLIB_API ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam); + +/*! ZSTD_DCtx_setParameter() : + * Set one compression parameter, selected by enum ZSTD_dParameter. + * All parameters have valid bounds. Bounds can be queried using ZSTD_dParam_getBounds(). + * Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter). + * Setting a parameter is only possible during frame initialization (before starting decompression). + * @return : 0, or an error code (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int value); + +/*! ZSTD_DCtx_reset() : + * Return a DCtx to clean state. + * Session and parameters can be reset jointly or separately. + * Parameters can only be reset when no active frame is being decompressed. + * @return : 0, or an error code, which can be tested with ZSTD_isError() + */ +ZSTDLIB_API size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset); + + +/**************************** +* Streaming +****************************/ + +typedef struct ZSTD_inBuffer_s { + const void* src; /**< start of input buffer */ + size_t size; /**< size of input buffer */ + size_t pos; /**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */ +} ZSTD_inBuffer; + +typedef struct ZSTD_outBuffer_s { + void* dst; /**< start of output buffer */ + size_t size; /**< size of output buffer */ + size_t pos; /**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */ +} ZSTD_outBuffer; + + + +/*-*********************************************************************** +* Streaming compression - HowTo +* +* A ZSTD_CStream object is required to track streaming operation. +* Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources. +* ZSTD_CStream objects can be reused multiple times on consecutive compression operations. +* It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory. +* +* For parallel execution, use one separate ZSTD_CStream per thread. +* +* note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing. +* +* Parameters are sticky : when starting a new compression on the same context, +* it will re-use the same sticky parameters as previous compression session. +* When in doubt, it's recommended to fully initialize the context before usage. +* Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(), +* ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to +* set more specific parameters, the pledged source size, or load a dictionary. +* +* Use ZSTD_compressStream2() with ZSTD_e_continue as many times as necessary to +* consume input stream. The function will automatically update both `pos` +* fields within `input` and `output`. +* Note that the function may not consume the entire input, for example, because +* the output buffer is already full, in which case `input.pos < input.size`. +* The caller must check if input has been entirely consumed. +* If not, the caller must make some room to receive more compressed data, +* and then present again remaining input data. +* note: ZSTD_e_continue is guaranteed to make some forward progress when called, +* but doesn't guarantee maximal forward progress. This is especially relevant +* when compressing with multiple threads. The call won't block if it can +* consume some input, but if it can't it will wait for some, but not all, +* output to be flushed. +* @return : provides a minimum amount of data remaining to be flushed from internal buffers +* or an error code, which can be tested using ZSTD_isError(). +* +* At any moment, it's possible to flush whatever data might remain stuck within internal buffer, +* using ZSTD_compressStream2() with ZSTD_e_flush. `output->pos` will be updated. +* Note that, if `output->size` is too small, a single invocation with ZSTD_e_flush might not be enough (return code > 0). +* In which case, make some room to receive more compressed data, and call again ZSTD_compressStream2() with ZSTD_e_flush. +* You must continue calling ZSTD_compressStream2() with ZSTD_e_flush until it returns 0, at which point you can change the +* operation. +* note: ZSTD_e_flush will flush as much output as possible, meaning when compressing with multiple threads, it will +* block until the flush is complete or the output buffer is full. +* @return : 0 if internal buffers are entirely flushed, +* >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), +* or an error code, which can be tested using ZSTD_isError(). +* +* Calling ZSTD_compressStream2() with ZSTD_e_end instructs to finish a frame. +* It will perform a flush and write frame epilogue. +* The epilogue is required for decoders to consider a frame completed. +* flush operation is the same, and follows same rules as calling ZSTD_compressStream2() with ZSTD_e_flush. +* You must continue calling ZSTD_compressStream2() with ZSTD_e_end until it returns 0, at which point you are free to +* start a new frame. +* note: ZSTD_e_end will flush as much output as possible, meaning when compressing with multiple threads, it will +* block until the flush is complete or the output buffer is full. +* @return : 0 if frame fully completed and fully flushed, +* >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), +* or an error code, which can be tested using ZSTD_isError(). +* +* *******************************************************************/ + +typedef ZSTD_CCtx ZSTD_CStream; /**< CCtx and CStream are now effectively same object (>= v1.3.0) */ + /* Continue to distinguish them for compatibility with older versions <= v1.2.0 */ +/*===== ZSTD_CStream management functions =====*/ +ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void); +ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs); + +/*===== Streaming compression functions =====*/ +typedef enum { + ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal compression ratio */ + ZSTD_e_flush=1, /* flush any data provided so far, + * it creates (at least) one new block, that can be decoded immediately on reception; + * frame will continue: any future data can still reference previously compressed data, improving compression. + * note : multithreaded compression will block to flush as much output as possible. */ + ZSTD_e_end=2 /* flush any remaining data _and_ close current frame. + * note that frame is only closed after compressed data is fully flushed (return value == 0). + * After that point, any additional data starts a new frame. + * note : each frame is independent (does not reference any content from previous frame). + : note : multithreaded compression will block to flush as much output as possible. */ +} ZSTD_EndDirective; + +/*! ZSTD_compressStream2() : + * Behaves about the same as ZSTD_compressStream, with additional control on end directive. + * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() + * - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode) + * - output->pos must be <= dstCapacity, input->pos must be <= srcSize + * - output->pos and input->pos will be updated. They are guaranteed to remain below their respective limit. + * - endOp must be a valid directive + * - When nbWorkers==0 (default), function is blocking : it completes its job before returning to caller. + * - When nbWorkers>=1, function is non-blocking : it copies a portion of input, distributes jobs to internal worker threads, flush to output whatever is available, + * and then immediately returns, just indicating that there is some data remaining to be flushed. + * The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte. + * - Exception : if the first call requests a ZSTD_e_end directive and provides enough dstCapacity, the function delegates to ZSTD_compress2() which is always blocking. + * - @return provides a minimum amount of data remaining to be flushed from internal buffers + * or an error code, which can be tested using ZSTD_isError(). + * if @return != 0, flush is not fully completed, there is still some data left within internal buffers. + * This is useful for ZSTD_e_flush, since in this case more flushes are necessary to empty all buffers. + * For ZSTD_e_end, @return == 0 when internal buffers are fully flushed and frame is completed. + * - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0), + * only ZSTD_e_end or ZSTD_e_flush operations are allowed. + * Before starting a new compression job, or changing compression parameters, + * it is required to fully flush internal buffers. + */ +ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective endOp); + + +/* These buffer sizes are softly recommended. + * They are not required : ZSTD_compressStream*() happily accepts any buffer size, for both input and output. + * Respecting the recommended size just makes it a bit easier for ZSTD_compressStream*(), + * reducing the amount of memory shuffling and buffering, resulting in minor performance savings. + * + * However, note that these recommendations are from the perspective of a C caller program. + * If the streaming interface is invoked from some other language, + * especially managed ones such as Java or Go, through a foreign function interface such as jni or cgo, + * a major performance rule is to reduce crossing such interface to an absolute minimum. + * It's not rare that performance ends being spent more into the interface, rather than compression itself. + * In which cases, prefer using large buffers, as large as practical, + * for both input and output, to reduce the nb of roundtrips. + */ +ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */ +ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */ + + +/* ***************************************************************************** + * This following is a legacy streaming API. + * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2(). + * It is redundant, but remains fully supported. + * Advanced parameters and dictionary compression can only be used through the + * new API. + ******************************************************************************/ + +/*! + * Equivalent to: + * + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) + * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); + */ +ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel); +/*! + * Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue). + * NOTE: The return value is different. ZSTD_compressStream() returns a hint for + * the next read size (if non-zero and not an error). ZSTD_compressStream2() + * returns the minimum nb of bytes left to flush (if non-zero and not an error). + */ +ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); +/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */ +ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); +/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */ +ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); + + +/*-*************************************************************************** +* Streaming decompression - HowTo +* +* A ZSTD_DStream object is required to track streaming operations. +* Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources. +* ZSTD_DStream objects can be re-used multiple times. +* +* Use ZSTD_initDStream() to start a new decompression operation. +* @return : recommended first input size +* Alternatively, use advanced API to set specific properties. +* +* Use ZSTD_decompressStream() repetitively to consume your input. +* The function will update both `pos` fields. +* If `input.pos < input.size`, some input has not been consumed. +* It's up to the caller to present again remaining data. +* The function tries to flush all data decoded immediately, respecting output buffer size. +* If `output.pos < output.size`, decoder has flushed everything it could. +* But if `output.pos == output.size`, there might be some data left within internal buffers., +* In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer. +* Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX. +* @return : 0 when a frame is completely decoded and fully flushed, +* or an error code, which can be tested using ZSTD_isError(), +* or any other value > 0, which means there is still some decoding or flushing to do to complete current frame : +* the return value is a suggested next input size (just a hint for better latency) +* that will never request more than the remaining frame size. +* *******************************************************************************/ + +typedef ZSTD_DCtx ZSTD_DStream; /**< DCtx and DStream are now effectively same object (>= v1.3.0) */ + /* For compatibility with versions <= v1.2.0, prefer differentiating them. */ +/*===== ZSTD_DStream management functions =====*/ +ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void); +ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds); + +/*===== Streaming decompression functions =====*/ + +/* This function is redundant with the advanced API and equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * ZSTD_DCtx_refDDict(zds, NULL); + */ +ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); + +ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input); + +ZSTDLIB_API size_t ZSTD_DStreamInSize(void); /*!< recommended size for input buffer */ +ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */ + + +/************************** +* Simple dictionary API +***************************/ +/*! ZSTD_compress_usingDict() : + * Compression at an explicit compression level using a Dictionary. + * A dictionary can be any arbitrary data segment (also called a prefix), + * or a buffer with specified information (see dictBuilder/zdict.h). + * Note : This function loads the dictionary, resulting in significant startup delay. + * It's intended for a dictionary used only once. + * Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */ +ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + int compressionLevel); + +/*! ZSTD_decompress_usingDict() : + * Decompression using a known Dictionary. + * Dictionary must be identical to the one used during compression. + * Note : This function loads the dictionary, resulting in significant startup delay. + * It's intended for a dictionary used only once. + * Note : When `dict == NULL || dictSize < 8` no dictionary is used. */ +ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize); + + +/*********************************** + * Bulk processing dictionary API + **********************************/ +typedef struct ZSTD_CDict_s ZSTD_CDict; + +/*! ZSTD_createCDict() : + * When compressing multiple messages or blocks using the same dictionary, + * it's recommended to digest the dictionary only once, since it's a costly operation. + * ZSTD_createCDict() will create a state from digesting a dictionary. + * The resulting state can be used for future compression operations with very limited startup cost. + * ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only. + * @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict. + * Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content. + * Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer, + * in which case the only thing that it transports is the @compressionLevel. + * This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively, + * expecting a ZSTD_CDict parameter with any data, including those without a known dictionary. */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, + int compressionLevel); + +/*! ZSTD_freeCDict() : + * Function frees memory allocated by ZSTD_createCDict(). */ +ZSTDLIB_API size_t ZSTD_freeCDict(ZSTD_CDict* CDict); + +/*! ZSTD_compress_usingCDict() : + * Compression using a digested Dictionary. + * Recommended when same dictionary is used multiple times. + * Note : compression level is _decided at dictionary creation time_, + * and frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */ +ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict); + + +typedef struct ZSTD_DDict_s ZSTD_DDict; + +/*! ZSTD_createDDict() : + * Create a digested dictionary, ready to start decompression operation without startup delay. + * dictBuffer can be released after DDict creation, as its content is copied inside DDict. */ +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize); + +/*! ZSTD_freeDDict() : + * Function frees memory allocated with ZSTD_createDDict() */ +ZSTDLIB_API size_t ZSTD_freeDDict(ZSTD_DDict* ddict); + +/*! ZSTD_decompress_usingDDict() : + * Decompression using a digested Dictionary. + * Recommended when same dictionary is used multiple times. */ +ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_DDict* ddict); + + +/******************************** + * Dictionary helper functions + *******************************/ + +/*! ZSTD_getDictID_fromDict() : + * Provides the dictID stored within dictionary. + * if @return == 0, the dictionary is not conformant with Zstandard specification. + * It can still be loaded, but as a content-only dictionary. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize); + +/*! ZSTD_getDictID_fromDDict() : + * Provides the dictID of the dictionary loaded into `ddict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict); + +/*! ZSTD_getDictID_fromFrame() : + * Provides the dictID required to decompressed the frame stored within `src`. + * If @return == 0, the dictID could not be decoded. + * This could for one of the following reasons : + * - The frame does not require a dictionary to be decoded (most common case). + * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information. + * Note : this use case also happens when using a non-conformant dictionary. + * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). + * - This is not a Zstandard frame. + * When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); + + +/******************************************************************************* + * Advanced dictionary and prefix API + * + * This API allows dictionaries to be used with ZSTD_compress2(), + * ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and + * only reset with the context is reset with ZSTD_reset_parameters or + * ZSTD_reset_session_and_parameters. Prefixes are single-use. + ******************************************************************************/ + + +/*! ZSTD_CCtx_loadDictionary() : + * Create an internal CDict from `dict` buffer. + * Decompression will have to use same dictionary. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary, + * meaning "return to no-dictionary mode". + * Note 1 : Dictionary is sticky, it will be used for all future compressed frames. + * To return to "no-dictionary" situation, load a NULL dictionary (or reset parameters). + * Note 2 : Loading a dictionary involves building tables. + * It's also a CPU consuming operation, with non-negligible impact on latency. + * Tables are dependent on compression parameters, and for this reason, + * compression parameters can no longer be changed after loading a dictionary. + * Note 3 :`dict` content will be copied internally. + * Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead. + * In such a case, dictionary buffer must outlive its users. + * Note 4 : Use ZSTD_CCtx_loadDictionary_advanced() + * to precisely select how dictionary content must be interpreted. */ +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); + +/*! ZSTD_CCtx_refCDict() : + * Reference a prepared dictionary, to be used for all next compressed frames. + * Note that compression parameters are enforced from within CDict, + * and supersede any compression parameter previously set within CCtx. + * The parameters ignored are labelled as "superseded-by-cdict" in the ZSTD_cParameter enum docs. + * The ignored parameters will be used again if the CCtx is returned to no-dictionary mode. + * The dictionary will remain valid for future compressed frames using same CCtx. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special : Referencing a NULL CDict means "return to no-dictionary mode". + * Note 1 : Currently, only one dictionary can be managed. + * Referencing a new dictionary effectively "discards" any previous one. + * Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. */ +ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); + +/*! ZSTD_CCtx_refPrefix() : + * Reference a prefix (single-usage dictionary) for next compressed frame. + * A prefix is **only used once**. Tables are discarded at end of frame (ZSTD_e_end). + * Decompression will need same prefix to properly regenerate data. + * Compressing with a prefix is similar in outcome as performing a diff and compressing it, + * but performs much faster, especially during decompression (compression speed is tunable with compression level). + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary + * Note 1 : Prefix buffer is referenced. It **must** outlive compression. + * Its content must remain unmodified during compression. + * Note 2 : If the intention is to diff some large src data blob with some prior version of itself, + * ensure that the window size is large enough to contain the entire source. + * See ZSTD_c_windowLog. + * Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters. + * It's a CPU consuming operation, with non-negligible impact on latency. + * If there is a need to use the same prefix multiple times, consider loadDictionary instead. + * Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dct_rawContent). + * Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. */ +ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, + const void* prefix, size_t prefixSize); + +/*! ZSTD_DCtx_loadDictionary() : + * Create an internal DDict from dict buffer, + * to be used to decompress next frames. + * The dictionary remains valid for all future frames, until explicitly invalidated. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary, + * meaning "return to no-dictionary mode". + * Note 1 : Loading a dictionary involves building tables, + * which has a non-negligible impact on CPU usage and latency. + * It's recommended to "load once, use many times", to amortize the cost + * Note 2 :`dict` content will be copied internally, so `dict` can be released after loading. + * Use ZSTD_DCtx_loadDictionary_byReference() to reference dictionary content instead. + * Note 3 : Use ZSTD_DCtx_loadDictionary_advanced() to take control of + * how dictionary content is loaded and interpreted. + */ +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); + +/*! ZSTD_DCtx_refDDict() : + * Reference a prepared dictionary, to be used to decompress next frames. + * The dictionary remains active for decompression of future frames using same DCtx. + * + * If called with ZSTD_d_refMultipleDDicts enabled, repeated calls of this function + * will store the DDict references in a table, and the DDict used for decompression + * will be determined at decompression time, as per the dict ID in the frame. + * The memory for the table is allocated on the first call to refDDict, and can be + * freed with ZSTD_freeDCtx(). + * + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Note 1 : Currently, only one dictionary can be managed. + * Referencing a new dictionary effectively "discards" any previous one. + * Special: referencing a NULL DDict means "return to no-dictionary mode". + * Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx. + */ +ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); + +/*! ZSTD_DCtx_refPrefix() : + * Reference a prefix (single-usage dictionary) to decompress next frame. + * This is the reverse operation of ZSTD_CCtx_refPrefix(), + * and must use the same prefix as the one used during compression. + * Prefix is **only used once**. Reference is discarded at end of frame. + * End of frame is reached when ZSTD_decompressStream() returns 0. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Note 1 : Adding any prefix (including NULL) invalidates any previously set prefix or dictionary + * Note 2 : Prefix buffer is referenced. It **must** outlive decompression. + * Prefix buffer must remain unmodified up to the end of frame, + * reached when ZSTD_decompressStream() returns 0. + * Note 3 : By default, the prefix is treated as raw content (ZSTD_dct_rawContent). + * Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode (Experimental section) + * Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost. + * A full dictionary is more costly, as it requires building tables. + */ +ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, + const void* prefix, size_t prefixSize); + +/* === Memory management === */ + +/*! ZSTD_sizeof_*() : + * These functions give the _current_ memory usage of selected object. + * Note that object memory usage can evolve (increase or decrease) over time. */ +ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx); +ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs); +ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds); +ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict); +ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); + +#endif /* ZSTD_H_235446 */ + + +/* ************************************************************************************** + * ADVANCED AND EXPERIMENTAL FUNCTIONS + **************************************************************************************** + * The definitions in the following section are considered experimental. + * They are provided for advanced scenarios. + * They should never be used with a dynamic library, as prototypes may change in the future. + * Use them only in association with static linking. + * ***************************************************************************************/ + +#if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY) +#define ZSTD_H_ZSTD_STATIC_LINKING_ONLY + +/**************************************************************************************** + * experimental API (static linking only) + **************************************************************************************** + * The following symbols and constants + * are not planned to join "stable API" status in the near future. + * They can still change in future versions. + * Some of them are planned to remain in the static_only section indefinitely. + * Some of them might be removed in the future (especially when redundant with existing stable functions) + * ***************************************************************************************/ + +#define ZSTD_FRAMEHEADERSIZE_PREFIX(format) ((format) == ZSTD_f_zstd1 ? 5 : 1) /* minimum input size required to query frame header size */ +#define ZSTD_FRAMEHEADERSIZE_MIN(format) ((format) == ZSTD_f_zstd1 ? 6 : 2) +#define ZSTD_FRAMEHEADERSIZE_MAX 18 /* can be useful for static allocation */ +#define ZSTD_SKIPPABLEHEADERSIZE 8 + +/* compression parameter bounds */ +#define ZSTD_WINDOWLOG_MAX_32 30 +#define ZSTD_WINDOWLOG_MAX_64 31 +#define ZSTD_WINDOWLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64)) +#define ZSTD_WINDOWLOG_MIN 10 +#define ZSTD_HASHLOG_MAX ((ZSTD_WINDOWLOG_MAX < 30) ? ZSTD_WINDOWLOG_MAX : 30) +#define ZSTD_HASHLOG_MIN 6 +#define ZSTD_CHAINLOG_MAX_32 29 +#define ZSTD_CHAINLOG_MAX_64 30 +#define ZSTD_CHAINLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_CHAINLOG_MAX_32 : ZSTD_CHAINLOG_MAX_64)) +#define ZSTD_CHAINLOG_MIN ZSTD_HASHLOG_MIN +#define ZSTD_SEARCHLOG_MAX (ZSTD_WINDOWLOG_MAX-1) +#define ZSTD_SEARCHLOG_MIN 1 +#define ZSTD_MINMATCH_MAX 7 /* only for ZSTD_fast, other strategies are limited to 6 */ +#define ZSTD_MINMATCH_MIN 3 /* only for ZSTD_btopt+, faster strategies are limited to 4 */ +#define ZSTD_TARGETLENGTH_MAX ZSTD_BLOCKSIZE_MAX +#define ZSTD_TARGETLENGTH_MIN 0 /* note : comparing this constant to an unsigned results in a tautological test */ +#define ZSTD_STRATEGY_MIN ZSTD_fast +#define ZSTD_STRATEGY_MAX ZSTD_btultra2 + + +#define ZSTD_OVERLAPLOG_MIN 0 +#define ZSTD_OVERLAPLOG_MAX 9 + +#define ZSTD_WINDOWLOG_LIMIT_DEFAULT 27 /* by default, the streaming decoder will refuse any frame + * requiring larger than (1< 0: + * If litLength != 0: + * rep == 1 --> offset == repeat_offset_1 + * rep == 2 --> offset == repeat_offset_2 + * rep == 3 --> offset == repeat_offset_3 + * If litLength == 0: + * rep == 1 --> offset == repeat_offset_2 + * rep == 2 --> offset == repeat_offset_3 + * rep == 3 --> offset == repeat_offset_1 - 1 + * + * Note: This field is optional. ZSTD_generateSequences() will calculate the value of + * 'rep', but repeat offsets do not necessarily need to be calculated from an external + * sequence provider's perspective. For example, ZSTD_compressSequences() does not + * use this 'rep' field at all (as of now). + */ +} ZSTD_Sequence; + +typedef struct { + unsigned windowLog; /**< largest match distance : larger == more compression, more memory needed during decompression */ + unsigned chainLog; /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */ + unsigned hashLog; /**< dispatch table : larger == faster, more memory */ + unsigned searchLog; /**< nb of searches : larger == more compression, slower */ + unsigned minMatch; /**< match length searched : larger == faster decompression, sometimes less compression */ + unsigned targetLength; /**< acceptable match size for optimal parser (only) : larger == more compression, slower */ + ZSTD_strategy strategy; /**< see ZSTD_strategy definition above */ +} ZSTD_compressionParameters; + +typedef struct { + int contentSizeFlag; /**< 1: content size will be in frame header (when known) */ + int checksumFlag; /**< 1: generate a 32-bits checksum using XXH64 algorithm at end of frame, for error detection */ + int noDictIDFlag; /**< 1: no dictID will be saved into frame header (dictID is only useful for dictionary compression) */ +} ZSTD_frameParameters; + +typedef struct { + ZSTD_compressionParameters cParams; + ZSTD_frameParameters fParams; +} ZSTD_parameters; + +typedef enum { + ZSTD_dct_auto = 0, /* dictionary is "full" when starting with ZSTD_MAGIC_DICTIONARY, otherwise it is "rawContent" */ + ZSTD_dct_rawContent = 1, /* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */ + ZSTD_dct_fullDict = 2 /* refuses to load a dictionary if it does not respect Zstandard's specification, starting with ZSTD_MAGIC_DICTIONARY */ +} ZSTD_dictContentType_e; + +typedef enum { + ZSTD_dlm_byCopy = 0, /**< Copy dictionary content internally */ + ZSTD_dlm_byRef = 1 /**< Reference dictionary content -- the dictionary buffer must outlive its users. */ +} ZSTD_dictLoadMethod_e; + +typedef enum { + ZSTD_f_zstd1 = 0, /* zstd frame format, specified in zstd_compression_format.md (default) */ + ZSTD_f_zstd1_magicless = 1 /* Variant of zstd frame format, without initial 4-bytes magic number. + * Useful to save 4 bytes per generated frame. + * Decoder cannot recognise automatically this format, requiring this instruction. */ +} ZSTD_format_e; + +typedef enum { + /* Note: this enum controls ZSTD_d_forceIgnoreChecksum */ + ZSTD_d_validateChecksum = 0, + ZSTD_d_ignoreChecksum = 1 +} ZSTD_forceIgnoreChecksum_e; + +typedef enum { + /* Note: this enum controls ZSTD_d_refMultipleDDicts */ + ZSTD_rmd_refSingleDDict = 0, + ZSTD_rmd_refMultipleDDicts = 1 +} ZSTD_refMultipleDDicts_e; + +typedef enum { + /* Note: this enum and the behavior it controls are effectively internal + * implementation details of the compressor. They are expected to continue + * to evolve and should be considered only in the context of extremely + * advanced performance tuning. + * + * Zstd currently supports the use of a CDict in three ways: + * + * - The contents of the CDict can be copied into the working context. This + * means that the compression can search both the dictionary and input + * while operating on a single set of internal tables. This makes + * the compression faster per-byte of input. However, the initial copy of + * the CDict's tables incurs a fixed cost at the beginning of the + * compression. For small compressions (< 8 KB), that copy can dominate + * the cost of the compression. + * + * - The CDict's tables can be used in-place. In this model, compression is + * slower per input byte, because the compressor has to search two sets of + * tables. However, this model incurs no start-up cost (as long as the + * working context's tables can be reused). For small inputs, this can be + * faster than copying the CDict's tables. + * + * - The CDict's tables are not used at all, and instead we use the working + * context alone to reload the dictionary and use params based on the source + * size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict(). + * This method is effective when the dictionary sizes are very small relative + * to the input size, and the input size is fairly large to begin with. + * + * Zstd has a simple internal heuristic that selects which strategy to use + * at the beginning of a compression. However, if experimentation shows that + * Zstd is making poor choices, it is possible to override that choice with + * this enum. + */ + ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */ + ZSTD_dictForceAttach = 1, /* Never copy the dictionary. */ + ZSTD_dictForceCopy = 2, /* Always copy the dictionary. */ + ZSTD_dictForceLoad = 3 /* Always reload the dictionary */ +} ZSTD_dictAttachPref_e; + +typedef enum { + ZSTD_lcm_auto = 0, /**< Automatically determine the compression mode based on the compression level. + * Negative compression levels will be uncompressed, and positive compression + * levels will be compressed. */ + ZSTD_lcm_huffman = 1, /**< Always attempt Huffman compression. Uncompressed literals will still be + * emitted if Huffman compression is not profitable. */ + ZSTD_lcm_uncompressed = 2 /**< Always emit uncompressed literals. */ +} ZSTD_literalCompressionMode_e; + + +/*************************************** +* Frame size functions +***************************************/ + +/*! ZSTD_findDecompressedSize() : + * `src` should point to the start of a series of ZSTD encoded and/or skippable frames + * `srcSize` must be the _exact_ size of this series + * (i.e. there should be a frame boundary at `src + srcSize`) + * @return : - decompressed size of all data in all successive frames + * - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN + * - if an error occurred: ZSTD_CONTENTSIZE_ERROR + * + * note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode. + * When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. + * In which case, it's necessary to use streaming mode to decompress data. + * note 2 : decompressed size is always present when compression is done with ZSTD_compress() + * note 3 : decompressed size can be very large (64-bits value), + * potentially larger than what local system can handle as a single memory segment. + * In which case, it's necessary to use streaming mode to decompress data. + * note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified. + * Always ensure result fits within application's authorized limits. + * Each application can set its own limits. + * note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to + * read each contained frame header. This is fast as most of the data is skipped, + * however it does mean that all frame data must be present and valid. */ +ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize); + +/*! ZSTD_decompressBound() : + * `src` should point to the start of a series of ZSTD encoded and/or skippable frames + * `srcSize` must be the _exact_ size of this series + * (i.e. there should be a frame boundary at `src + srcSize`) + * @return : - upper-bound for the decompressed size of all data in all successive frames + * - if an error occurred: ZSTD_CONTENTSIZE_ERROR + * + * note 1 : an error can occur if `src` contains an invalid or incorrectly formatted frame. + * note 2 : the upper-bound is exact when the decompressed size field is available in every ZSTD encoded frame of `src`. + * in this case, `ZSTD_findDecompressedSize` and `ZSTD_decompressBound` return the same value. + * note 3 : when the decompressed size field isn't available, the upper-bound for that frame is calculated by: + * upper-bound = # blocks * min(128 KB, Window_Size) + */ +ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize); + +/*! ZSTD_frameHeaderSize() : + * srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX. + * @return : size of the Frame Header, + * or an error code (if srcSize is too small) */ +ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize); + +typedef enum { + ZSTD_sf_noBlockDelimiters = 0, /* Representation of ZSTD_Sequence has no block delimiters, sequences only */ + ZSTD_sf_explicitBlockDelimiters = 1 /* Representation of ZSTD_Sequence contains explicit block delimiters */ +} ZSTD_sequenceFormat_e; + +/*! ZSTD_generateSequences() : + * Generate sequences using ZSTD_compress2, given a source buffer. + * + * Each block will end with a dummy sequence + * with offset == 0, matchLength == 0, and litLength == length of last literals. + * litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0) + * simply acts as a block delimiter. + * + * zc can be used to insert custom compression params. + * This function invokes ZSTD_compress2 + * + * The output of this function can be fed into ZSTD_compressSequences() with CCtx + * setting of ZSTD_c_blockDelimiters as ZSTD_sf_explicitBlockDelimiters + * @return : number of sequences generated + */ + +ZSTDLIB_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, + size_t outSeqsSize, const void* src, size_t srcSize); + +/*! ZSTD_mergeBlockDelimiters() : + * Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals + * by merging them into into the literals of the next sequence. + * + * As such, the final generated result has no explicit representation of block boundaries, + * and the final last literals segment is not represented in the sequences. + * + * The output of this function can be fed into ZSTD_compressSequences() with CCtx + * setting of ZSTD_c_blockDelimiters as ZSTD_sf_noBlockDelimiters + * @return : number of sequences left after merging + */ +ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize); + +/*! ZSTD_compressSequences() : + * Compress an array of ZSTD_Sequence, generated from the original source buffer, into dst. + * If a dictionary is included, then the cctx should reference the dict. (see: ZSTD_CCtx_refCDict(), ZSTD_CCtx_loadDictionary(), etc.) + * The entire source is compressed into a single frame. + * + * The compression behavior changes based on cctx params. In particular: + * If ZSTD_c_blockDelimiters == ZSTD_sf_noBlockDelimiters, the array of ZSTD_Sequence is expected to contain + * no block delimiters (defined in ZSTD_Sequence). Block boundaries are roughly determined based on + * the block size derived from the cctx, and sequences may be split. This is the default setting. + * + * If ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, the array of ZSTD_Sequence is expected to contain + * block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided. + * + * If ZSTD_c_validateSequences == 0, this function will blindly accept the sequences provided. Invalid sequences cause undefined + * behavior. If ZSTD_c_validateSequences == 1, then if sequence is invalid (see doc/zstd_compression_format.md for + * specifics regarding offset/matchlength requirements) then the function will bail out and return an error. + * + * In addition to the two adjustable experimental params, there are other important cctx params. + * - ZSTD_c_minMatch MUST be set as less than or equal to the smallest match generated by the match finder. It has a minimum value of ZSTD_MINMATCH_MIN. + * - ZSTD_c_compressionLevel accordingly adjusts the strength of the entropy coder, as it would in typical compression. + * - ZSTD_c_windowLog affects offset validation: this function will return an error at higher debug levels if a provided offset + * is larger than what the spec allows for a given window log and dictionary (if present). See: doc/zstd_compression_format.md + * + * Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused. + * Note 2: Once we integrate ability to ingest repcodes, the explicit block delims mode must respect those repcodes exactly, + * and cannot emit an RLE block that disagrees with the repcode history + * @return : final compressed size or a ZSTD error. + */ +ZSTDLIB_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize, + const ZSTD_Sequence* inSeqs, size_t inSeqsSize, + const void* src, size_t srcSize); + + +/*! ZSTD_writeSkippableFrame() : + * Generates a zstd skippable frame containing data given by src, and writes it to dst buffer. + * + * Skippable frames begin with a a 4-byte magic number. There are 16 possible choices of magic number, + * ranging from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15. + * As such, the parameter magicVariant controls the exact skippable frame magic number variant used, so + * the magic number used will be ZSTD_MAGIC_SKIPPABLE_START + magicVariant. + * + * Returns an error if destination buffer is not large enough, if the source size is not representable + * with a 4-byte unsigned int, or if the parameter magicVariant is greater than 15 (and therefore invalid). + * + * @return : number of bytes written or a ZSTD error. + */ +ZSTDLIB_API size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity, + const void* src, size_t srcSize, unsigned magicVariant); + + +/*************************************** +* Memory management +***************************************/ + +/*! ZSTD_estimate*() : + * These functions make it possible to estimate memory usage + * of a future {D,C}Ctx, before its creation. + * + * ZSTD_estimateCCtxSize() will provide a memory budget large enough + * for any compression level up to selected one. + * Note : Unlike ZSTD_estimateCStreamSize*(), this estimate + * does not include space for a window buffer. + * Therefore, the estimation is only guaranteed for single-shot compressions, not streaming. + * The estimate will assume the input may be arbitrarily large, + * which is the worst case. + * + * When srcSize can be bound by a known and rather "small" value, + * this fact can be used to provide a tighter estimation + * because the CCtx compression context will need less memory. + * This tighter estimation can be provided by more advanced functions + * ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(), + * and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter(). + * Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits. + * + * Note 2 : only single-threaded compression is supported. + * ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1. + */ +ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel); +ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); +ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params); +ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void); + +/*! ZSTD_estimateCStreamSize() : + * ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one. + * It will also consider src size to be arbitrarily "large", which is worst case. + * If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation. + * ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. + * ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1. + * Note : CStream size estimation is only correct for single-threaded compression. + * ZSTD_DStream memory budget depends on window Size. + * This information can be passed manually, using ZSTD_estimateDStreamSize, + * or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame(); + * Note : if streaming is init with function ZSTD_init?Stream_usingDict(), + * an internal ?Dict will be created, which additional size is not estimated here. + * In this case, get total size by adding ZSTD_estimate?DictSize */ +ZSTDLIB_API size_t ZSTD_estimateCStreamSize(int compressionLevel); +ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams); +ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params); +ZSTDLIB_API size_t ZSTD_estimateDStreamSize(size_t windowSize); +ZSTDLIB_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize); + +/*! ZSTD_estimate?DictSize() : + * ZSTD_estimateCDictSize() will bet that src size is relatively "small", and content is copied, like ZSTD_createCDict(). + * ZSTD_estimateCDictSize_advanced() makes it possible to control compression parameters precisely, like ZSTD_createCDict_advanced(). + * Note : dictionaries created by reference (`ZSTD_dlm_byRef`) are logically smaller. + */ +ZSTDLIB_API size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel); +ZSTDLIB_API size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, ZSTD_dictLoadMethod_e dictLoadMethod); +ZSTDLIB_API size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod); + +/*! ZSTD_initStatic*() : + * Initialize an object using a pre-allocated fixed-size buffer. + * workspace: The memory area to emplace the object into. + * Provided pointer *must be 8-bytes aligned*. + * Buffer must outlive object. + * workspaceSize: Use ZSTD_estimate*Size() to determine + * how large workspace must be to support target scenario. + * @return : pointer to object (same address as workspace, just different type), + * or NULL if error (size too small, incorrect alignment, etc.) + * Note : zstd will never resize nor malloc() when using a static buffer. + * If the object requires more memory than available, + * zstd will just error out (typically ZSTD_error_memory_allocation). + * Note 2 : there is no corresponding "free" function. + * Since workspace is allocated externally, it must be freed externally too. + * Note 3 : cParams : use ZSTD_getCParams() to convert a compression level + * into its associated cParams. + * Limitation 1 : currently not compatible with internal dictionary creation, triggered by + * ZSTD_CCtx_loadDictionary(), ZSTD_initCStream_usingDict() or ZSTD_initDStream_usingDict(). + * Limitation 2 : static cctx currently not compatible with multi-threading. + * Limitation 3 : static dctx is incompatible with legacy support. + */ +ZSTDLIB_API ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize); +ZSTDLIB_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticCCtx() */ + +ZSTDLIB_API ZSTD_DCtx* ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize); +ZSTDLIB_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticDCtx() */ + +ZSTDLIB_API const ZSTD_CDict* ZSTD_initStaticCDict( + void* workspace, size_t workspaceSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams); + +ZSTDLIB_API const ZSTD_DDict* ZSTD_initStaticDDict( + void* workspace, size_t workspaceSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType); + + +/*! Custom memory allocation : + * These prototypes make it possible to pass your own allocation/free functions. + * ZSTD_customMem is provided at creation time, using ZSTD_create*_advanced() variants listed below. + * All allocation/free operations will be completed using these custom variants instead of regular ones. + */ +typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size); +typedef void (*ZSTD_freeFunction) (void* opaque, void* address); +typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem; +static +#ifdef __GNUC__ +__attribute__((__unused__)) +#endif +ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; /**< this constant defers to stdlib's functions */ + +ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem); + +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams, + ZSTD_customMem customMem); + +/* ! Thread pool : + * These prototypes make it possible to share a thread pool among multiple compression contexts. + * This can limit resources for applications with multiple threads where each one uses + * a threaded compression mode (via ZSTD_c_nbWorkers parameter). + * ZSTD_createThreadPool creates a new thread pool with a given number of threads. + * Note that the lifetime of such pool must exist while being used. + * ZSTD_CCtx_refThreadPool assigns a thread pool to a context (use NULL argument value + * to use an internal thread pool). + * ZSTD_freeThreadPool frees a thread pool. + */ +typedef struct POOL_ctx_s ZSTD_threadPool; +ZSTDLIB_API ZSTD_threadPool* ZSTD_createThreadPool(size_t numThreads); +ZSTDLIB_API void ZSTD_freeThreadPool (ZSTD_threadPool* pool); +ZSTDLIB_API size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool); + + +/* + * This API is temporary and is expected to change or disappear in the future! + */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2( + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + const ZSTD_CCtx_params* cctxParams, + ZSTD_customMem customMem); + +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced( + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_customMem customMem); + + +/*************************************** +* Advanced compression functions +***************************************/ + +/*! ZSTD_createCDict_byReference() : + * Create a digested dictionary for compression + * Dictionary content is just referenced, not duplicated. + * As a consequence, `dictBuffer` **must** outlive CDict, + * and its content must remain unmodified throughout the lifetime of CDict. + * note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel); + +/*! ZSTD_getDictID_fromCDict() : + * Provides the dictID of the dictionary loaded into `cdict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict); + +/*! ZSTD_getCParams() : + * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize. + * `estimatedSrcSize` value is optional, select 0 if not known */ +ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); + +/*! ZSTD_getParams() : + * same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`. + * All fields of `ZSTD_frameParameters` are set to default : contentSize=1, checksum=0, noDictID=0 */ +ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); + +/*! ZSTD_checkCParams() : + * Ensure param values remain within authorized range. + * @return 0 on success, or an error code (can be checked with ZSTD_isError()) */ +ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params); + +/*! ZSTD_adjustCParams() : + * optimize params for a given `srcSize` and `dictSize`. + * `srcSize` can be unknown, in which case use ZSTD_CONTENTSIZE_UNKNOWN. + * `dictSize` must be `0` when there is no dictionary. + * cPar can be invalid : all parameters will be clamped within valid range in the @return struct. + * This function never fails (wide contract) */ +ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize); + +/*! ZSTD_compress_advanced() : + * Note : this function is now DEPRECATED. + * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters. + * This prototype will be marked as deprecated and generate compilation warning on reaching v1.5.x */ +ZSTDLIB_API size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params); + +/*! ZSTD_compress_usingCDict_advanced() : + * Note : this function is now REDUNDANT. + * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters. + * This prototype will be marked as deprecated and generate compilation warning in some future version */ +ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict, + ZSTD_frameParameters fParams); + + +/*! ZSTD_CCtx_loadDictionary_byReference() : + * Same as ZSTD_CCtx_loadDictionary(), but dictionary content is referenced, instead of being copied into CCtx. + * It saves some memory, but also requires that `dict` outlives its usage within `cctx` */ +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); + +/*! ZSTD_CCtx_loadDictionary_advanced() : + * Same as ZSTD_CCtx_loadDictionary(), but gives finer control over + * how to load the dictionary (by copy ? by reference ?) + * and how to interpret it (automatic ? force raw mode ? full mode only ?) */ +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); + +/*! ZSTD_CCtx_refPrefix_advanced() : + * Same as ZSTD_CCtx_refPrefix(), but gives finer control over + * how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */ +ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); + +/* === experimental parameters === */ +/* these parameters can be used with ZSTD_setParameter() + * they are not guaranteed to remain supported in the future */ + + /* Enables rsyncable mode, + * which makes compressed files more rsync friendly + * by adding periodic synchronization points to the compressed data. + * The target average block size is ZSTD_c_jobSize / 2. + * It's possible to modify the job size to increase or decrease + * the granularity of the synchronization point. + * Once the jobSize is smaller than the window size, + * it will result in compression ratio degradation. + * NOTE 1: rsyncable mode only works when multithreading is enabled. + * NOTE 2: rsyncable performs poorly in combination with long range mode, + * since it will decrease the effectiveness of synchronization points, + * though mileage may vary. + * NOTE 3: Rsyncable mode limits maximum compression speed to ~400 MB/s. + * If the selected compression level is already running significantly slower, + * the overall speed won't be significantly impacted. + */ + #define ZSTD_c_rsyncable ZSTD_c_experimentalParam1 + +/* Select a compression format. + * The value must be of type ZSTD_format_e. + * See ZSTD_format_e enum definition for details */ +#define ZSTD_c_format ZSTD_c_experimentalParam2 + +/* Force back-reference distances to remain < windowSize, + * even when referencing into Dictionary content (default:0) */ +#define ZSTD_c_forceMaxWindow ZSTD_c_experimentalParam3 + +/* Controls whether the contents of a CDict + * are used in place, or copied into the working context. + * Accepts values from the ZSTD_dictAttachPref_e enum. + * See the comments on that enum for an explanation of the feature. */ +#define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4 + +/* Controls how the literals are compressed (default is auto). + * The value must be of type ZSTD_literalCompressionMode_e. + * See ZSTD_literalCompressionMode_t enum definition for details. + */ +#define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5 + +/* Tries to fit compressed block size to be around targetCBlockSize. + * No target when targetCBlockSize == 0. + * There is no guarantee on compressed block size (default:0) */ +#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6 + +/* User's best guess of source size. + * Hint is not valid when srcSizeHint == 0. + * There is no guarantee that hint is close to actual source size, + * but compression ratio may regress significantly if guess considerably underestimates */ +#define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7 + +/* Controls whether the new and experimental "dedicated dictionary search + * structure" can be used. This feature is still rough around the edges, be + * prepared for surprising behavior! + * + * How to use it: + * + * When using a CDict, whether to use this feature or not is controlled at + * CDict creation, and it must be set in a CCtxParams set passed into that + * construction (via ZSTD_createCDict_advanced2()). A compression will then + * use the feature or not based on how the CDict was constructed; the value of + * this param, set in the CCtx, will have no effect. + * + * However, when a dictionary buffer is passed into a CCtx, such as via + * ZSTD_CCtx_loadDictionary(), this param can be set on the CCtx to control + * whether the CDict that is created internally can use the feature or not. + * + * What it does: + * + * Normally, the internal data structures of the CDict are analogous to what + * would be stored in a CCtx after compressing the contents of a dictionary. + * To an approximation, a compression using a dictionary can then use those + * data structures to simply continue what is effectively a streaming + * compression where the simulated compression of the dictionary left off. + * Which is to say, the search structures in the CDict are normally the same + * format as in the CCtx. + * + * It is possible to do better, since the CDict is not like a CCtx: the search + * structures are written once during CDict creation, and then are only read + * after that, while the search structures in the CCtx are both read and + * written as the compression goes along. This means we can choose a search + * structure for the dictionary that is read-optimized. + * + * This feature enables the use of that different structure. + * + * Note that some of the members of the ZSTD_compressionParameters struct have + * different semantics and constraints in the dedicated search structure. It is + * highly recommended that you simply set a compression level in the CCtxParams + * you pass into the CDict creation call, and avoid messing with the cParams + * directly. + * + * Effects: + * + * This will only have any effect when the selected ZSTD_strategy + * implementation supports this feature. Currently, that's limited to + * ZSTD_greedy, ZSTD_lazy, and ZSTD_lazy2. + * + * Note that this means that the CDict tables can no longer be copied into the + * CCtx, so the dict attachment mode ZSTD_dictForceCopy will no longer be + * useable. The dictionary can only be attached or reloaded. + * + * In general, you should expect compression to be faster--sometimes very much + * so--and CDict creation to be slightly slower. Eventually, we will probably + * make this mode the default. + */ +#define ZSTD_c_enableDedicatedDictSearch ZSTD_c_experimentalParam8 + +/* ZSTD_c_stableInBuffer + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable. + * + * Tells the compressor that the ZSTD_inBuffer will ALWAYS be the same + * between calls, except for the modifications that zstd makes to pos (the + * caller must not modify pos). This is checked by the compressor, and + * compression will fail if it ever changes. This means the only flush + * mode that makes sense is ZSTD_e_end, so zstd will error if ZSTD_e_end + * is not used. The data in the ZSTD_inBuffer in the range [src, src + pos) + * MUST not be modified during compression or you will get data corruption. + * + * When this flag is enabled zstd won't allocate an input window buffer, + * because the user guarantees it can reference the ZSTD_inBuffer until + * the frame is complete. But, it will still allocate an output buffer + * large enough to fit a block (see ZSTD_c_stableOutBuffer). This will also + * avoid the memcpy() from the input buffer to the input window buffer. + * + * NOTE: ZSTD_compressStream2() will error if ZSTD_e_end is not used. + * That means this flag cannot be used with ZSTD_compressStream(). + * + * NOTE: So long as the ZSTD_inBuffer always points to valid memory, using + * this flag is ALWAYS memory safe, and will never access out-of-bounds + * memory. However, compression WILL fail if you violate the preconditions. + * + * WARNING: The data in the ZSTD_inBuffer in the range [dst, dst + pos) MUST + * not be modified during compression or you will get data corruption. This + * is because zstd needs to reference data in the ZSTD_inBuffer to find + * matches. Normally zstd maintains its own window buffer for this purpose, + * but passing this flag tells zstd to use the user provided buffer. + */ +#define ZSTD_c_stableInBuffer ZSTD_c_experimentalParam9 + +/* ZSTD_c_stableOutBuffer + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable. + * + * Tells he compressor that the ZSTD_outBuffer will not be resized between + * calls. Specifically: (out.size - out.pos) will never grow. This gives the + * compressor the freedom to say: If the compressed data doesn't fit in the + * output buffer then return ZSTD_error_dstSizeTooSmall. This allows us to + * always decompress directly into the output buffer, instead of decompressing + * into an internal buffer and copying to the output buffer. + * + * When this flag is enabled zstd won't allocate an output buffer, because + * it can write directly to the ZSTD_outBuffer. It will still allocate the + * input window buffer (see ZSTD_c_stableInBuffer). + * + * Zstd will check that (out.size - out.pos) never grows and return an error + * if it does. While not strictly necessary, this should prevent surprises. + */ +#define ZSTD_c_stableOutBuffer ZSTD_c_experimentalParam10 + +/* ZSTD_c_blockDelimiters + * Default is 0 == ZSTD_sf_noBlockDelimiters. + * + * For use with sequence compression API: ZSTD_compressSequences(). + * + * Designates whether or not the given array of ZSTD_Sequence contains block delimiters + * and last literals, which are defined as sequences with offset == 0 and matchLength == 0. + * See the definition of ZSTD_Sequence for more specifics. + */ +#define ZSTD_c_blockDelimiters ZSTD_c_experimentalParam11 + +/* ZSTD_c_validateSequences + * Default is 0 == disabled. Set to 1 to enable sequence validation. + * + * For use with sequence compression API: ZSTD_compressSequences(). + * Designates whether or not we validate sequences provided to ZSTD_compressSequences() + * during function execution. + * + * Without validation, providing a sequence that does not conform to the zstd spec will cause + * undefined behavior, and may produce a corrupted block. + * + * With validation enabled, a if sequence is invalid (see doc/zstd_compression_format.md for + * specifics regarding offset/matchlength requirements) then the function will bail out and + * return an error. + * + */ +#define ZSTD_c_validateSequences ZSTD_c_experimentalParam12 + +/*! ZSTD_CCtx_getParameter() : + * Get the requested compression parameter value, selected by enum ZSTD_cParameter, + * and store it into int* value. + * @return : 0, or an error code (which can be tested with ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtx_getParameter(const ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value); + + +/*! ZSTD_CCtx_params : + * Quick howto : + * - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure + * - ZSTD_CCtxParams_setParameter() : Push parameters one by one into + * an existing ZSTD_CCtx_params structure. + * This is similar to + * ZSTD_CCtx_setParameter(). + * - ZSTD_CCtx_setParametersUsingCCtxParams() : Apply parameters to + * an existing CCtx. + * These parameters will be applied to + * all subsequent frames. + * - ZSTD_compressStream2() : Do compression using the CCtx. + * - ZSTD_freeCCtxParams() : Free the memory. + * + * This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams() + * for static allocation of CCtx for single-threaded compression. + */ +ZSTDLIB_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void); +ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params); + +/*! ZSTD_CCtxParams_reset() : + * Reset params to default values. + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params); + +/*! ZSTD_CCtxParams_init() : + * Initializes the compression parameters of cctxParams according to + * compression level. All other parameters are reset to their default values. + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel); + +/*! ZSTD_CCtxParams_init_advanced() : + * Initializes the compression and frame parameters of cctxParams according to + * params. All other parameters are reset to their default values. + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params); + +/*! ZSTD_CCtxParams_setParameter() : + * Similar to ZSTD_CCtx_setParameter. + * Set one compression parameter, selected by enum ZSTD_cParameter. + * Parameters must be applied to a ZSTD_CCtx using + * ZSTD_CCtx_setParametersUsingCCtxParams(). + * @result : a code representing success or failure (which can be tested with + * ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value); + +/*! ZSTD_CCtxParams_getParameter() : + * Similar to ZSTD_CCtx_getParameter. + * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(const ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value); + +/*! ZSTD_CCtx_setParametersUsingCCtxParams() : + * Apply a set of ZSTD_CCtx_params to the compression context. + * This can be done even after compression is started, + * if nbWorkers==0, this will have no impact until a new compression is started. + * if nbWorkers>=1, new parameters will be picked up at next job, + * with a few restrictions (windowLog, pledgedSrcSize, nbWorkers, jobSize, and overlapLog are not updated). + */ +ZSTDLIB_API size_t ZSTD_CCtx_setParametersUsingCCtxParams( + ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params); + +/*! ZSTD_compressStream2_simpleArgs() : + * Same as ZSTD_compressStream2(), + * but using only integral types as arguments. + * This variant might be helpful for binders from dynamic languages + * which have troubles handling structures containing memory pointers. + */ +ZSTDLIB_API size_t ZSTD_compressStream2_simpleArgs ( + ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos, + ZSTD_EndDirective endOp); + + +/*************************************** +* Advanced decompression functions +***************************************/ + +/*! ZSTD_isFrame() : + * Tells if the content of `buffer` starts with a valid Frame Identifier. + * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. + * Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. + * Note 3 : Skippable Frame Identifiers are considered valid. */ +ZSTDLIB_API unsigned ZSTD_isFrame(const void* buffer, size_t size); + +/*! ZSTD_createDDict_byReference() : + * Create a digested dictionary, ready to start decompression operation without startup delay. + * Dictionary content is referenced, and therefore stays in dictBuffer. + * It is important that dictBuffer outlives DDict, + * it must remain read accessible throughout the lifetime of DDict */ +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize); + +/*! ZSTD_DCtx_loadDictionary_byReference() : + * Same as ZSTD_DCtx_loadDictionary(), + * but references `dict` content instead of copying it into `dctx`. + * This saves memory if `dict` remains around., + * However, it's imperative that `dict` remains accessible (and unmodified) while being used, so it must outlive decompression. */ +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); + +/*! ZSTD_DCtx_loadDictionary_advanced() : + * Same as ZSTD_DCtx_loadDictionary(), + * but gives direct control over + * how to load the dictionary (by copy ? by reference ?) + * and how to interpret it (automatic ? force raw mode ? full mode only ?). */ +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); + +/*! ZSTD_DCtx_refPrefix_advanced() : + * Same as ZSTD_DCtx_refPrefix(), but gives finer control over + * how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */ +ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); + +/*! ZSTD_DCtx_setMaxWindowSize() : + * Refuses allocating internal buffers for frames requiring a window size larger than provided limit. + * This protects a decoder context from reserving too much memory for itself (potential attack scenario). + * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. + * By default, a decompression context accepts all window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) + * @return : 0, or an error code (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize); + +/*! ZSTD_DCtx_getParameter() : + * Get the requested decompression parameter value, selected by enum ZSTD_dParameter, + * and store it into int* value. + * @return : 0, or an error code (which can be tested with ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value); + +/* ZSTD_d_format + * experimental parameter, + * allowing selection between ZSTD_format_e input compression formats + */ +#define ZSTD_d_format ZSTD_d_experimentalParam1 +/* ZSTD_d_stableOutBuffer + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable. + * + * Tells the decompressor that the ZSTD_outBuffer will ALWAYS be the same + * between calls, except for the modifications that zstd makes to pos (the + * caller must not modify pos). This is checked by the decompressor, and + * decompression will fail if it ever changes. Therefore the ZSTD_outBuffer + * MUST be large enough to fit the entire decompressed frame. This will be + * checked when the frame content size is known. The data in the ZSTD_outBuffer + * in the range [dst, dst + pos) MUST not be modified during decompression + * or you will get data corruption. + * + * When this flags is enabled zstd won't allocate an output buffer, because + * it can write directly to the ZSTD_outBuffer, but it will still allocate + * an input buffer large enough to fit any compressed block. This will also + * avoid the memcpy() from the internal output buffer to the ZSTD_outBuffer. + * If you need to avoid the input buffer allocation use the buffer-less + * streaming API. + * + * NOTE: So long as the ZSTD_outBuffer always points to valid memory, using + * this flag is ALWAYS memory safe, and will never access out-of-bounds + * memory. However, decompression WILL fail if you violate the preconditions. + * + * WARNING: The data in the ZSTD_outBuffer in the range [dst, dst + pos) MUST + * not be modified during decompression or you will get data corruption. This + * is because zstd needs to reference data in the ZSTD_outBuffer to regenerate + * matches. Normally zstd maintains its own buffer for this purpose, but passing + * this flag tells zstd to use the user provided buffer. + */ +#define ZSTD_d_stableOutBuffer ZSTD_d_experimentalParam2 + +/* ZSTD_d_forceIgnoreChecksum + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable + * + * Tells the decompressor to skip checksum validation during decompression, regardless + * of whether checksumming was specified during compression. This offers some + * slight performance benefits, and may be useful for debugging. + * Param has values of type ZSTD_forceIgnoreChecksum_e + */ +#define ZSTD_d_forceIgnoreChecksum ZSTD_d_experimentalParam3 + +/* ZSTD_d_refMultipleDDicts + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable + * + * If enabled and dctx is allocated on the heap, then additional memory will be allocated + * to store references to multiple ZSTD_DDict. That is, multiple calls of ZSTD_refDDict() + * using a given ZSTD_DCtx, rather than overwriting the previous DDict reference, will instead + * store all references. At decompression time, the appropriate dictID is selected + * from the set of DDicts based on the dictID in the frame. + * + * Usage is simply calling ZSTD_refDDict() on multiple dict buffers. + * + * Param has values of byte ZSTD_refMultipleDDicts_e + * + * WARNING: Enabling this parameter and calling ZSTD_DCtx_refDDict(), will trigger memory + * allocation for the hash table. ZSTD_freeDCtx() also frees this memory. + * Memory is allocated as per ZSTD_DCtx::customMem. + * + * Although this function allocates memory for the table, the user is still responsible for + * memory management of the underlying ZSTD_DDict* themselves. + */ +#define ZSTD_d_refMultipleDDicts ZSTD_d_experimentalParam4 + + +/*! ZSTD_DCtx_setFormat() : + * Instruct the decoder context about what kind of data to decode next. + * This instruction is mandatory to decode data without a fully-formed header, + * such ZSTD_f_zstd1_magicless for example. + * @return : 0, or an error code (which can be tested using ZSTD_isError()). */ +ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format); + +/*! ZSTD_decompressStream_simpleArgs() : + * Same as ZSTD_decompressStream(), + * but using only integral types as arguments. + * This can be helpful for binders from dynamic languages + * which have troubles handling structures containing memory pointers. + */ +ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs ( + ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos); + + +/******************************************************************** +* Advanced streaming functions +* Warning : most of these functions are now redundant with the Advanced API. +* Once Advanced API reaches "stable" status, +* redundant functions will be deprecated, and then at some point removed. +********************************************************************/ + +/*===== Advanced Streaming compression functions =====*/ + +/*! ZSTD_initCStream_srcSize() : + * This function is deprecated, and equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) + * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * + * pledgedSrcSize must be correct. If it is not known at init time, use + * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs, + * "0" also disables frame content size field. It may be enabled in the future. + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t +ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, + int compressionLevel, + unsigned long long pledgedSrcSize); + +/*! ZSTD_initCStream_usingDict() : + * This function is deprecated, and is equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); + * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); + * + * Creates of an internal CDict (incompatible with static CCtx), except if + * dict == NULL or dictSize < 8, in which case no dict is used. + * Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if + * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy. + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t +ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + int compressionLevel); + +/*! ZSTD_initCStream_advanced() : + * This function is deprecated, and is approximately equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * // Pseudocode: Set each zstd parameter and leave the rest as-is. + * for ((param, value) : params) { + * ZSTD_CCtx_setParameter(zcs, param, value); + * } + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); + * + * dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy. + * pledgedSrcSize must be correct. + * If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t +ZSTD_initCStream_advanced(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + ZSTD_parameters params, + unsigned long long pledgedSrcSize); + +/*! ZSTD_initCStream_usingCDict() : + * This function is deprecated, and equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_refCDict(zcs, cdict); + * + * note : cdict will just be referenced, and must outlive compression session + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); + +/*! ZSTD_initCStream_usingCDict_advanced() : + * This function is DEPRECATED, and is approximately equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * // Pseudocode: Set each zstd frame parameter and leave the rest as-is. + * for ((fParam, value) : fParams) { + * ZSTD_CCtx_setParameter(zcs, fParam, value); + * } + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * ZSTD_CCtx_refCDict(zcs, cdict); + * + * same as ZSTD_initCStream_usingCDict(), with control over frame parameters. + * pledgedSrcSize must be correct. If srcSize is not known at init time, use + * value ZSTD_CONTENTSIZE_UNKNOWN. + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t +ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, + const ZSTD_CDict* cdict, + ZSTD_frameParameters fParams, + unsigned long long pledgedSrcSize); + +/*! ZSTD_resetCStream() : + * This function is deprecated, and is equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * + * start a new frame, using same parameters from previous frame. + * This is typically useful to skip dictionary loading stage, since it will re-use it in-place. + * Note that zcs must be init at least once before using ZSTD_resetCStream(). + * If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN. + * If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end. + * For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs, + * but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead. + * @return : 0, or an error code (which can be tested using ZSTD_isError()) + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); + + +typedef struct { + unsigned long long ingested; /* nb input bytes read and buffered */ + unsigned long long consumed; /* nb input bytes actually compressed */ + unsigned long long produced; /* nb of compressed bytes generated and buffered */ + unsigned long long flushed; /* nb of compressed bytes flushed : not provided; can be tracked from caller side */ + unsigned currentJobID; /* MT only : latest started job nb */ + unsigned nbActiveWorkers; /* MT only : nb of workers actively compressing at probe time */ +} ZSTD_frameProgression; + +/* ZSTD_getFrameProgression() : + * tells how much data has been ingested (read from input) + * consumed (input actually compressed) and produced (output) for current frame. + * Note : (ingested - consumed) is amount of input data buffered internally, not yet compressed. + * Aggregates progression inside active worker threads. + */ +ZSTDLIB_API ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx); + +/*! ZSTD_toFlushNow() : + * Tell how many bytes are ready to be flushed immediately. + * Useful for multithreading scenarios (nbWorkers >= 1). + * Probe the oldest active job, defined as oldest job not yet entirely flushed, + * and check its output buffer. + * @return : amount of data stored in oldest job and ready to be flushed immediately. + * if @return == 0, it means either : + * + there is no active job (could be checked with ZSTD_frameProgression()), or + * + oldest job is still actively compressing data, + * but everything it has produced has also been flushed so far, + * therefore flush speed is limited by production speed of oldest job + * irrespective of the speed of concurrent (and newer) jobs. + */ +ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx); + + +/*===== Advanced Streaming decompression functions =====*/ + +/*! + * This function is deprecated, and is equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * ZSTD_DCtx_loadDictionary(zds, dict, dictSize); + * + * note: no dictionary will be used if dict == NULL or dictSize < 8 + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); + +/*! + * This function is deprecated, and is equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * ZSTD_DCtx_refDDict(zds, ddict); + * + * note : ddict is referenced, it must outlive decompression session + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); + +/*! + * This function is deprecated, and is equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * + * re-use decompression parameters from previous init; saves dictionary loading + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); + + +/********************************************************************* +* Buffer-less and synchronous inner streaming functions +* +* This is an advanced API, giving full control over buffer management, for users which need direct control over memory. +* But it's also a complex one, with several restrictions, documented below. +* Prefer normal streaming API for an easier experience. +********************************************************************* */ + +/** + Buffer-less streaming compression (synchronous mode) + + A ZSTD_CCtx object is required to track streaming operations. + Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource. + ZSTD_CCtx object can be re-used multiple times within successive compression operations. + + Start by initializing a context. + Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression, + or ZSTD_compressBegin_advanced(), for finer parameter control. + It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx() + + Then, consume your input using ZSTD_compressContinue(). + There are some important considerations to keep in mind when using this advanced function : + - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffers only. + - Interface is synchronous : input is consumed entirely and produces 1+ compressed blocks. + - Caller must ensure there is enough space in `dst` to store compressed data under worst case scenario. + Worst case evaluation is provided by ZSTD_compressBound(). + ZSTD_compressContinue() doesn't guarantee recover after a failed compression. + - ZSTD_compressContinue() presumes prior input ***is still accessible and unmodified*** (up to maximum distance size, see WindowLog). + It remembers all previous contiguous blocks, plus one separated memory segment (which can itself consists of multiple contiguous blocks) + - ZSTD_compressContinue() detects that prior input has been overwritten when `src` buffer overlaps. + In which case, it will "discard" the relevant memory section from its history. + + Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum. + It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame. + Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders. + + `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again. +*/ + +/*===== Buffer-less streaming compression functions =====*/ +ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); +ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); +ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */ +ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /**< note: fails if cdict==NULL */ +ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize); /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */ +ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**< note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */ + +ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + + +/** + Buffer-less streaming decompression (synchronous mode) + + A ZSTD_DCtx object is required to track streaming operations. + Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it. + A ZSTD_DCtx object can be re-used multiple times. + + First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader(). + Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough. + Data fragment must be large enough to ensure successful decoding. + `ZSTD_frameHeaderSize_max` bytes is guaranteed to always be large enough. + @result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled. + >0 : `srcSize` is too small, please provide at least @result bytes on next attempt. + errorCode, which can be tested using ZSTD_isError(). + + It fills a ZSTD_frameHeader structure with important information to correctly decode the frame, + such as the dictionary ID, content size, or maximum back-reference distance (`windowSize`). + Note that these values could be wrong, either because of data corruption, or because a 3rd party deliberately spoofs false information. + As a consequence, check that values remain within valid application range. + For example, do not allocate memory blindly, check that `windowSize` is within expectation. + Each application can set its own limits, depending on local restrictions. + For extended interoperability, it is recommended to support `windowSize` of at least 8 MB. + + ZSTD_decompressContinue() needs previous data blocks during decompression, up to `windowSize` bytes. + ZSTD_decompressContinue() is very sensitive to contiguity, + if 2 blocks don't follow each other, make sure that either the compressor breaks contiguity at the same place, + or that previous contiguous segment is large enough to properly handle maximum back-reference distance. + There are multiple ways to guarantee this condition. + + The most memory efficient way is to use a round buffer of sufficient size. + Sufficient size is determined by invoking ZSTD_decodingBufferSize_min(), + which can @return an error code if required value is too large for current system (in 32-bits mode). + In a round buffer methodology, ZSTD_decompressContinue() decompresses each block next to previous one, + up to the moment there is not enough room left in the buffer to guarantee decoding another full block, + which maximum size is provided in `ZSTD_frameHeader` structure, field `blockSizeMax`. + At which point, decoding can resume from the beginning of the buffer. + Note that already decoded data stored in the buffer should be flushed before being overwritten. + + There are alternatives possible, for example using two or more buffers of size `windowSize` each, though they consume more memory. + + Finally, if you control the compression process, you can also ignore all buffer size rules, + as long as the encoder and decoder progress in "lock-step", + aka use exactly the same buffer sizes, break contiguity at the same place, etc. + + Once buffers are setup, start decompression, with ZSTD_decompressBegin(). + If decompression requires a dictionary, use ZSTD_decompressBegin_usingDict() or ZSTD_decompressBegin_usingDDict(). + + Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively. + ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail. + + @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity). + It can be zero : it just means ZSTD_decompressContinue() has decoded some metadata item. + It can also be an error code, which can be tested with ZSTD_isError(). + + A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero. + Context can then be reset to start a new decompression. + + Note : it's possible to know if next input to present is a header or a block, using ZSTD_nextInputType(). + This information is not required to properly decode a frame. + + == Special case : skippable frames == + + Skippable frames allow integration of user-defined data into a flow of concatenated frames. + Skippable frames will be ignored (skipped) by decompressor. + The format of skippable frames is as follows : + a) Skippable frame ID - 4 Bytes, Little endian format, any value from 0x184D2A50 to 0x184D2A5F + b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits + c) Frame Content - any content (User Data) of length equal to Frame Size + For skippable frames ZSTD_getFrameHeader() returns zfhPtr->frameType==ZSTD_skippableFrame. + For skippable frames ZSTD_decompressContinue() always returns 0 : it only skips the content. +*/ + +/*===== Buffer-less streaming decompression functions =====*/ +typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e; +typedef struct { + unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */ + unsigned long long windowSize; /* can be very large, up to <= frameContentSize */ + unsigned blockSizeMax; + ZSTD_frameType_e frameType; /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */ + unsigned headerSize; + unsigned dictID; + unsigned checksumFlag; +} ZSTD_frameHeader; + +/*! ZSTD_getFrameHeader() : + * decode Frame Header, or requires larger `srcSize`. + * @return : 0, `zfhPtr` is correctly filled, + * >0, `srcSize` is too small, value is wanted `srcSize` amount, + * or an error code, which can be tested using ZSTD_isError() */ +ZSTDLIB_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize); /**< doesn't consume input */ +/*! ZSTD_getFrameHeader_advanced() : + * same as ZSTD_getFrameHeader(), + * with added capability to select a format (like ZSTD_f_zstd1_magicless) */ +ZSTDLIB_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format); +ZSTDLIB_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize); /**< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */ + +ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); +ZSTDLIB_API size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); + +ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + +/* misc */ +ZSTDLIB_API void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx); +typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e; +ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); + + + + +/* ============================ */ +/** Block level API */ +/* ============================ */ + +/*! + Block functions produce and decode raw zstd blocks, without frame metadata. + Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes). + But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes. + + A few rules to respect : + - Compressing and decompressing require a context structure + + Use ZSTD_createCCtx() and ZSTD_createDCtx() + - It is necessary to init context before starting + + compression : any ZSTD_compressBegin*() variant, including with dictionary + + decompression : any ZSTD_decompressBegin*() variant, including with dictionary + + copyCCtx() and copyDCtx() can be used too + - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB + + If input is larger than a block size, it's necessary to split input data into multiple blocks + + For inputs larger than a single block, consider using regular ZSTD_compress() instead. + Frame metadata is not that costly, and quickly becomes negligible as source size grows larger than a block. + - When a block is considered not compressible enough, ZSTD_compressBlock() result will be 0 (zero) ! + ===> In which case, nothing is produced into `dst` ! + + User __must__ test for such outcome and deal directly with uncompressed data + + A block cannot be declared incompressible if ZSTD_compressBlock() return value was != 0. + Doing so would mess up with statistics history, leading to potential data corruption. + + ZSTD_decompressBlock() _doesn't accept uncompressed data as input_ !! + + In case of multiple successive blocks, should some of them be uncompressed, + decoder must be informed of their existence in order to follow proper history. + Use ZSTD_insertBlock() for such a case. +*/ + +/*===== Raw zstd block functions =====*/ +ZSTDLIB_API size_t ZSTD_getBlockSize (const ZSTD_CCtx* cctx); +ZSTDLIB_API size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */ + + +#endif /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */ + +#if defined (__cplusplus) +} +#endif diff --git a/thirdparty/basisu/zstd/zstddeclib.c b/thirdparty/basisu/zstd/zstddeclib.c new file mode 100644 index 000000000..e06ad172d --- /dev/null +++ b/thirdparty/basisu/zstd/zstddeclib.c @@ -0,0 +1,15033 @@ +/** + * \file zstddeclib.c + * Single-file Zstandard decompressor. + * + * Generate using: + * \code + * combine.sh -r ../../lib -o zstddeclib.c zstddeclib-in.c + * \endcode + */ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ +/* + * Settings to bake for the standalone decompressor. + * + * Note: It's important that none of these affects 'zstd.h' (only the + * implementation files we're amalgamating). + * + * Note: MEM_MODULE stops xxhash redefining BYTE, U16, etc., which are also + * defined in mem.h (breaking C99 compatibility). + * + * Note: the undefs for xxHash allow Zstd's implementation to coinside with with + * standalone xxHash usage (with global defines). + */ +#define DEBUGLEVEL 0 +#define MEM_MODULE +#undef XXH_NAMESPACE +#define XXH_NAMESPACE ZSTD_ +#undef XXH_PRIVATE_API +#define XXH_PRIVATE_API +#undef XXH_INLINE_ALL +#define XXH_INLINE_ALL +#define ZSTD_LEGACY_SUPPORT 0 +#define ZSTD_STRIP_ERROR_STRINGS +#define ZSTD_TRACE 0 + +/* Include zstd_deps.h first with all the options we need enabled. */ +#define ZSTD_DEPS_NEED_MALLOC +/**** start inlining common/zstd_deps.h ****/ +/* + * Copyright (c) 2016-2021, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* This file provides common libc dependencies that zstd requires. + * The purpose is to allow replacing this file with a custom implementation + * to compile zstd without libc support. + */ + +/* Need: + * NULL + * INT_MAX + * UINT_MAX + * ZSTD_memcpy() + * ZSTD_memset() + * ZSTD_memmove() + */ +#ifndef ZSTD_DEPS_COMMON +#define ZSTD_DEPS_COMMON + +#include +#include +#include + +#if defined(__GNUC__) && __GNUC__ >= 4 +# define ZSTD_memcpy(d,s,l) __builtin_memcpy((d),(s),(l)) +# define ZSTD_memmove(d,s,l) __builtin_memmove((d),(s),(l)) +# define ZSTD_memset(p,v,l) __builtin_memset((p),(v),(l)) +#else +# define ZSTD_memcpy(d,s,l) memcpy((d),(s),(l)) +# define ZSTD_memmove(d,s,l) memmove((d),(s),(l)) +# define ZSTD_memset(p,v,l) memset((p),(v),(l)) +#endif + +#endif /* ZSTD_DEPS_COMMON */ + +/* Need: + * ZSTD_malloc() + * ZSTD_free() + * ZSTD_calloc() + */ +#ifdef ZSTD_DEPS_NEED_MALLOC +#ifndef ZSTD_DEPS_MALLOC +#define ZSTD_DEPS_MALLOC + +#include + +#define ZSTD_malloc(s) malloc(s) +#define ZSTD_calloc(n,s) calloc((n), (s)) +#define ZSTD_free(p) free((p)) + +#endif /* ZSTD_DEPS_MALLOC */ +#endif /* ZSTD_DEPS_NEED_MALLOC */ + +/* + * Provides 64-bit math support. + * Need: + * U64 ZSTD_div64(U64 dividend, U32 divisor) + */ +#ifdef ZSTD_DEPS_NEED_MATH64 +#ifndef ZSTD_DEPS_MATH64 +#define ZSTD_DEPS_MATH64 + +#define ZSTD_div64(dividend, divisor) ((dividend) / (divisor)) + +#endif /* ZSTD_DEPS_MATH64 */ +#endif /* ZSTD_DEPS_NEED_MATH64 */ + +/* Need: + * assert() + */ +#ifdef ZSTD_DEPS_NEED_ASSERT +#ifndef ZSTD_DEPS_ASSERT +#define ZSTD_DEPS_ASSERT + +#include + +#endif /* ZSTD_DEPS_ASSERT */ +#endif /* ZSTD_DEPS_NEED_ASSERT */ + +/* Need: + * ZSTD_DEBUG_PRINT() + */ +#ifdef ZSTD_DEPS_NEED_IO +#ifndef ZSTD_DEPS_IO +#define ZSTD_DEPS_IO + +#include +#define ZSTD_DEBUG_PRINT(...) fprintf(stderr, __VA_ARGS__) + +#endif /* ZSTD_DEPS_IO */ +#endif /* ZSTD_DEPS_NEED_IO */ + +/* Only requested when is known to be present. + * Need: + * intptr_t + */ +#ifdef ZSTD_DEPS_NEED_STDINT +#ifndef ZSTD_DEPS_STDINT +#define ZSTD_DEPS_STDINT + +#include + +#endif /* ZSTD_DEPS_STDINT */ +#endif /* ZSTD_DEPS_NEED_STDINT */ +/**** ended inlining common/zstd_deps.h ****/ + +/**** start inlining common/debug.c ****/ +/* ****************************************************************** + * debug + * Part of FSE library + * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + + +/* + * This module only hosts one global variable + * which can be used to dynamically influence the verbosity of traces, + * such as DEBUGLOG and RAWLOG + */ + +/**** start inlining debug.h ****/ +/* ****************************************************************** + * debug + * Part of FSE library + * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + + +/* + * The purpose of this header is to enable debug functions. + * They regroup assert(), DEBUGLOG() and RAWLOG() for run-time, + * and DEBUG_STATIC_ASSERT() for compile-time. + * + * By default, DEBUGLEVEL==0, which means run-time debug is disabled. + * + * Level 1 enables assert() only. + * Starting level 2, traces can be generated and pushed to stderr. + * The higher the level, the more verbose the traces. + * + * It's possible to dynamically adjust level using variable g_debug_level, + * which is only declared if DEBUGLEVEL>=2, + * and is a global variable, not multi-thread protected (use with care) + */ + +#ifndef DEBUG_H_12987983217 +#define DEBUG_H_12987983217 + +#if defined (__cplusplus) +extern "C" { +#endif + + +/* static assert is triggered at compile time, leaving no runtime artefact. + * static assert only works with compile-time constants. + * Also, this variant can only be used inside a function. */ +#define DEBUG_STATIC_ASSERT(c) (void)sizeof(char[(c) ? 1 : -1]) + + +/* DEBUGLEVEL is expected to be defined externally, + * typically through compiler command line. + * Value must be a number. */ +#ifndef DEBUGLEVEL +# define DEBUGLEVEL 0 +#endif + + +/* recommended values for DEBUGLEVEL : + * 0 : release mode, no debug, all run-time checks disabled + * 1 : enables assert() only, no display + * 2 : reserved, for currently active debug path + * 3 : events once per object lifetime (CCtx, CDict, etc.) + * 4 : events once per frame + * 5 : events once per block + * 6 : events once per sequence (verbose) + * 7+: events at every position (*very* verbose) + * + * It's generally inconvenient to output traces > 5. + * In which case, it's possible to selectively trigger high verbosity levels + * by modifying g_debug_level. + */ + +#if (DEBUGLEVEL>=1) +# define ZSTD_DEPS_NEED_ASSERT +/**** skipping file: zstd_deps.h ****/ +#else +# ifndef assert /* assert may be already defined, due to prior #include */ +# define assert(condition) ((void)0) /* disable assert (default) */ +# endif +#endif + +#if (DEBUGLEVEL>=2) +# define ZSTD_DEPS_NEED_IO +/**** skipping file: zstd_deps.h ****/ +extern int g_debuglevel; /* the variable is only declared, + it actually lives in debug.c, + and is shared by the whole process. + It's not thread-safe. + It's useful when enabling very verbose levels + on selective conditions (such as position in src) */ + +# define RAWLOG(l, ...) { \ + if (l<=g_debuglevel) { \ + ZSTD_DEBUG_PRINT(__VA_ARGS__); \ + } } +# define DEBUGLOG(l, ...) { \ + if (l<=g_debuglevel) { \ + ZSTD_DEBUG_PRINT(__FILE__ ": " __VA_ARGS__); \ + ZSTD_DEBUG_PRINT(" \n"); \ + } } +#else +# define RAWLOG(l, ...) {} /* disabled */ +# define DEBUGLOG(l, ...) {} /* disabled */ +#endif + + +#if defined (__cplusplus) +} +#endif + +#endif /* DEBUG_H_12987983217 */ +/**** ended inlining debug.h ****/ + +int g_debuglevel = DEBUGLEVEL; +/**** ended inlining common/debug.c ****/ +/**** start inlining common/entropy_common.c ****/ +/* ****************************************************************** + * Common functions of New Generation Entropy library + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +/* ************************************* +* Dependencies +***************************************/ +/**** start inlining mem.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef MEM_H_MODULE +#define MEM_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + +/*-**************************************** +* Dependencies +******************************************/ +#include /* size_t, ptrdiff_t */ +/**** start inlining compiler.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMPILER_H +#define ZSTD_COMPILER_H + +/*-******************************************************* +* Compiler specifics +*********************************************************/ +/* force inlining */ + +#if !defined(ZSTD_NO_INLINE) +#if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# define INLINE_KEYWORD inline +#else +# define INLINE_KEYWORD +#endif + +#if defined(__GNUC__) || defined(__ICCARM__) +# define FORCE_INLINE_ATTR __attribute__((always_inline)) +#elif defined(_MSC_VER) +# define FORCE_INLINE_ATTR __forceinline +#else +# define FORCE_INLINE_ATTR +#endif + +#else + +#define INLINE_KEYWORD +#define FORCE_INLINE_ATTR + +#endif + +/** + On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC). + This explictly marks such functions as __cdecl so that the code will still compile + if a CC other than __cdecl has been made the default. +*/ +#if defined(_MSC_VER) +# define WIN_CDECL __cdecl +#else +# define WIN_CDECL +#endif + +/** + * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant + * parameters. They must be inlined for the compiler to eliminate the constant + * branches. + */ +#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR +/** + * HINT_INLINE is used to help the compiler generate better code. It is *not* + * used for "templates", so it can be tweaked based on the compilers + * performance. + * + * gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the + * always_inline attribute. + * + * clang up to 5.0.0 (trunk) benefit tremendously from the always_inline + * attribute. + */ +#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5 +# define HINT_INLINE static INLINE_KEYWORD +#else +# define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR +#endif + +/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */ +#if defined(__GNUC__) +# define UNUSED_ATTR __attribute__((unused)) +#else +# define UNUSED_ATTR +#endif + +/* force no inlining */ +#ifdef _MSC_VER +# define FORCE_NOINLINE static __declspec(noinline) +#else +# if defined(__GNUC__) || defined(__ICCARM__) +# define FORCE_NOINLINE static __attribute__((__noinline__)) +# else +# define FORCE_NOINLINE static +# endif +#endif + + +/* target attribute */ +#ifndef __has_attribute + #define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */ +#endif +#if defined(__GNUC__) || defined(__ICCARM__) +# define TARGET_ATTRIBUTE(target) __attribute__((__target__(target))) +#else +# define TARGET_ATTRIBUTE(target) +#endif + +/* Enable runtime BMI2 dispatch based on the CPU. + * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default. + */ +#ifndef DYNAMIC_BMI2 + #if ((defined(__clang__) && __has_attribute(__target__)) \ + || (defined(__GNUC__) \ + && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \ + && (defined(__x86_64__) || defined(_M_X86)) \ + && !defined(__BMI2__) + # define DYNAMIC_BMI2 1 + #else + # define DYNAMIC_BMI2 0 + #endif +#endif + +/* prefetch + * can be disabled, by declaring NO_PREFETCH build macro */ +#if defined(NO_PREFETCH) +# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ +# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ +#else +# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */ +# include /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ +# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0) +# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1) +# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) +# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */) +# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */) +# elif defined(__aarch64__) +# define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))) +# define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))) +# else +# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ +# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ +# endif +#endif /* NO_PREFETCH */ + +#define CACHELINE_SIZE 64 + +#define PREFETCH_AREA(p, s) { \ + const char* const _ptr = (const char*)(p); \ + size_t const _size = (size_t)(s); \ + size_t _pos; \ + for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \ + PREFETCH_L2(_ptr + _pos); \ + } \ +} + +/* vectorization + * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */ +#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__) +# if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5) +# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize"))) +# else +# define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")") +# endif +#else +# define DONT_VECTORIZE +#endif + +/* Tell the compiler that a branch is likely or unlikely. + * Only use these macros if it causes the compiler to generate better code. + * If you can remove a LIKELY/UNLIKELY annotation without speed changes in gcc + * and clang, please do. + */ +#if defined(__GNUC__) +#define LIKELY(x) (__builtin_expect((x), 1)) +#define UNLIKELY(x) (__builtin_expect((x), 0)) +#else +#define LIKELY(x) (x) +#define UNLIKELY(x) (x) +#endif + +/* disable warnings */ +#ifdef _MSC_VER /* Visual Studio */ +# include /* For Visual 2005 */ +# pragma warning(disable : 4100) /* disable: C4100: unreferenced formal parameter */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */ +# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ +# pragma warning(disable : 4324) /* disable: C4324: padded structure */ +#endif + +/*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/ +#ifndef STATIC_BMI2 +# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) +# ifdef __AVX2__ //MSVC does not have a BMI2 specific flag, but every CPU that supports AVX2 also supports BMI2 +# define STATIC_BMI2 1 +# endif +# endif +#endif + +#ifndef STATIC_BMI2 + #define STATIC_BMI2 0 +#endif + +/* compat. with non-clang compilers */ +#ifndef __has_builtin +# define __has_builtin(x) 0 +#endif + +/* compat. with non-clang compilers */ +#ifndef __has_feature +# define __has_feature(x) 0 +#endif + +/* detects whether we are being compiled under msan */ +#ifndef ZSTD_MEMORY_SANITIZER +# if __has_feature(memory_sanitizer) +# define ZSTD_MEMORY_SANITIZER 1 +# else +# define ZSTD_MEMORY_SANITIZER 0 +# endif +#endif + +#if ZSTD_MEMORY_SANITIZER +/* Not all platforms that support msan provide sanitizers/msan_interface.h. + * We therefore declare the functions we need ourselves, rather than trying to + * include the header file... */ +#include /* size_t */ +#define ZSTD_DEPS_NEED_STDINT +/**** skipping file: zstd_deps.h ****/ + +/* Make memory region fully initialized (without changing its contents). */ +void __msan_unpoison(const volatile void *a, size_t size); + +/* Make memory region fully uninitialized (without changing its contents). + This is a legacy interface that does not update origin information. Use + __msan_allocated_memory() instead. */ +void __msan_poison(const volatile void *a, size_t size); + +/* Returns the offset of the first (at least partially) poisoned byte in the + memory range, or -1 if the whole range is good. */ +intptr_t __msan_test_shadow(const volatile void *x, size_t size); +#endif + +/* detects whether we are being compiled under asan */ +#ifndef ZSTD_ADDRESS_SANITIZER +# if __has_feature(address_sanitizer) +# define ZSTD_ADDRESS_SANITIZER 1 +# elif defined(__SANITIZE_ADDRESS__) +# define ZSTD_ADDRESS_SANITIZER 1 +# else +# define ZSTD_ADDRESS_SANITIZER 0 +# endif +#endif + +#if ZSTD_ADDRESS_SANITIZER +/* Not all platforms that support asan provide sanitizers/asan_interface.h. + * We therefore declare the functions we need ourselves, rather than trying to + * include the header file... */ +#include /* size_t */ + +/** + * Marks a memory region ([addr, addr+size)) as unaddressable. + * + * This memory must be previously allocated by your program. Instrumented + * code is forbidden from accessing addresses in this region until it is + * unpoisoned. This function is not guaranteed to poison the entire region - + * it could poison only a subregion of [addr, addr+size) due to ASan + * alignment restrictions. + * + * \note This function is not thread-safe because no two threads can poison or + * unpoison memory in the same memory region simultaneously. + * + * \param addr Start of memory region. + * \param size Size of memory region. */ +void __asan_poison_memory_region(void const volatile *addr, size_t size); + +/** + * Marks a memory region ([addr, addr+size)) as addressable. + * + * This memory must be previously allocated by your program. Accessing + * addresses in this region is allowed until this region is poisoned again. + * This function could unpoison a super-region of [addr, addr+size) due + * to ASan alignment restrictions. + * + * \note This function is not thread-safe because no two threads can + * poison or unpoison memory in the same memory region simultaneously. + * + * \param addr Start of memory region. + * \param size Size of memory region. */ +void __asan_unpoison_memory_region(void const volatile *addr, size_t size); +#endif + +#endif /* ZSTD_COMPILER_H */ +/**** ended inlining compiler.h ****/ +/**** skipping file: debug.h ****/ +/**** skipping file: zstd_deps.h ****/ + + +/*-**************************************** +* Compiler specifics +******************************************/ +#if defined(_MSC_VER) /* Visual Studio */ +# include /* _byteswap_ulong */ +# include /* _byteswap_* */ +#endif +#if defined(__GNUC__) +# define MEM_STATIC static __inline __attribute__((unused)) +#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define MEM_STATIC static inline +#elif defined(_MSC_VER) +# define MEM_STATIC static __inline +#else +# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif + +/*-************************************************************** +* Basic Types +*****************************************************************/ +#if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# if defined(_AIX) +# include +# else +# include /* intptr_t */ +# endif + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef int16_t S16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; + typedef int64_t S64; +#else +# include +#if CHAR_BIT != 8 +# error "this implementation requires char to be exactly 8-bit type" +#endif + typedef unsigned char BYTE; +#if USHRT_MAX != 65535 +# error "this implementation requires short to be exactly 16-bit type" +#endif + typedef unsigned short U16; + typedef signed short S16; +#if UINT_MAX != 4294967295 +# error "this implementation requires int to be exactly 32-bit type" +#endif + typedef unsigned int U32; + typedef signed int S32; +/* note : there are no limits defined for long long type in C90. + * limits exist in C99, however, in such case, is preferred */ + typedef unsigned long long U64; + typedef signed long long S64; +#endif + + +/*-************************************************************** +* Memory I/O API +*****************************************************************/ +/*=== Static platform detection ===*/ +MEM_STATIC unsigned MEM_32bits(void); +MEM_STATIC unsigned MEM_64bits(void); +MEM_STATIC unsigned MEM_isLittleEndian(void); + +/*=== Native unaligned read/write ===*/ +MEM_STATIC U16 MEM_read16(const void* memPtr); +MEM_STATIC U32 MEM_read32(const void* memPtr); +MEM_STATIC U64 MEM_read64(const void* memPtr); +MEM_STATIC size_t MEM_readST(const void* memPtr); + +MEM_STATIC void MEM_write16(void* memPtr, U16 value); +MEM_STATIC void MEM_write32(void* memPtr, U32 value); +MEM_STATIC void MEM_write64(void* memPtr, U64 value); + +/*=== Little endian unaligned read/write ===*/ +MEM_STATIC U16 MEM_readLE16(const void* memPtr); +MEM_STATIC U32 MEM_readLE24(const void* memPtr); +MEM_STATIC U32 MEM_readLE32(const void* memPtr); +MEM_STATIC U64 MEM_readLE64(const void* memPtr); +MEM_STATIC size_t MEM_readLEST(const void* memPtr); + +MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val); +MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val); +MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32); +MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64); +MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val); + +/*=== Big endian unaligned read/write ===*/ +MEM_STATIC U32 MEM_readBE32(const void* memPtr); +MEM_STATIC U64 MEM_readBE64(const void* memPtr); +MEM_STATIC size_t MEM_readBEST(const void* memPtr); + +MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32); +MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64); +MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val); + +/*=== Byteswap ===*/ +MEM_STATIC U32 MEM_swap32(U32 in); +MEM_STATIC U64 MEM_swap64(U64 in); +MEM_STATIC size_t MEM_swapST(size_t in); + + +/*-************************************************************** +* Memory I/O Implementation +*****************************************************************/ +/* MEM_FORCE_MEMORY_ACCESS : + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (i.e., not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method is portable but violate C standard. + * It can generate buggy code on targets depending on alignment. + * In some circumstances, it's the only known way to get the most performance (i.e. GCC + ARMv6) + * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ +# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) +# define MEM_FORCE_MEMORY_ACCESS 2 +# elif defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__) +# define MEM_FORCE_MEMORY_ACCESS 1 +# endif +#endif + +MEM_STATIC unsigned MEM_32bits(void) { return sizeof(size_t)==4; } +MEM_STATIC unsigned MEM_64bits(void) { return sizeof(size_t)==8; } + +MEM_STATIC unsigned MEM_isLittleEndian(void) +{ + const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ + return one.c[0]; +} + +#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2) + +/* violates C standard, by lying on structure alignment. +Only use if no other choice to achieve best performance on target platform */ +MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; } +MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; } +MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; } +MEM_STATIC size_t MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; } + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } +MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } +MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; } + +#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32)) + __pragma( pack(push, 1) ) + typedef struct { U16 v; } unalign16; + typedef struct { U32 v; } unalign32; + typedef struct { U64 v; } unalign64; + typedef struct { size_t v; } unalignArch; + __pragma( pack(pop) ) +#else + typedef struct { U16 v; } __attribute__((packed)) unalign16; + typedef struct { U32 v; } __attribute__((packed)) unalign32; + typedef struct { U64 v; } __attribute__((packed)) unalign64; + typedef struct { size_t v; } __attribute__((packed)) unalignArch; +#endif + +MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign16*)ptr)->v; } +MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign32*)ptr)->v; } +MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign64*)ptr)->v; } +MEM_STATIC size_t MEM_readST(const void* ptr) { return ((const unalignArch*)ptr)->v; } + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign16*)memPtr)->v = value; } +MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign32*)memPtr)->v = value; } +MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v = value; } + +#else + +/* default method, safe and standard. + can sometimes prove slower */ + +MEM_STATIC U16 MEM_read16(const void* memPtr) +{ + U16 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC U32 MEM_read32(const void* memPtr) +{ + U32 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC U64 MEM_read64(const void* memPtr) +{ + U64 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC size_t MEM_readST(const void* memPtr) +{ + size_t val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) +{ + ZSTD_memcpy(memPtr, &value, sizeof(value)); +} + +MEM_STATIC void MEM_write32(void* memPtr, U32 value) +{ + ZSTD_memcpy(memPtr, &value, sizeof(value)); +} + +MEM_STATIC void MEM_write64(void* memPtr, U64 value) +{ + ZSTD_memcpy(memPtr, &value, sizeof(value)); +} + +#endif /* MEM_FORCE_MEMORY_ACCESS */ + +MEM_STATIC U32 MEM_swap32(U32 in) +{ +#if defined(_MSC_VER) /* Visual Studio */ + return _byteswap_ulong(in); +#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \ + || (defined(__clang__) && __has_builtin(__builtin_bswap32)) + return __builtin_bswap32(in); +#else + return ((in << 24) & 0xff000000 ) | + ((in << 8) & 0x00ff0000 ) | + ((in >> 8) & 0x0000ff00 ) | + ((in >> 24) & 0x000000ff ); +#endif +} + +MEM_STATIC U64 MEM_swap64(U64 in) +{ +#if defined(_MSC_VER) /* Visual Studio */ + return _byteswap_uint64(in); +#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \ + || (defined(__clang__) && __has_builtin(__builtin_bswap64)) + return __builtin_bswap64(in); +#else + return ((in << 56) & 0xff00000000000000ULL) | + ((in << 40) & 0x00ff000000000000ULL) | + ((in << 24) & 0x0000ff0000000000ULL) | + ((in << 8) & 0x000000ff00000000ULL) | + ((in >> 8) & 0x00000000ff000000ULL) | + ((in >> 24) & 0x0000000000ff0000ULL) | + ((in >> 40) & 0x000000000000ff00ULL) | + ((in >> 56) & 0x00000000000000ffULL); +#endif +} + +MEM_STATIC size_t MEM_swapST(size_t in) +{ + if (MEM_32bits()) + return (size_t)MEM_swap32((U32)in); + else + return (size_t)MEM_swap64((U64)in); +} + +/*=== Little endian r/w ===*/ + +MEM_STATIC U16 MEM_readLE16(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read16(memPtr); + else { + const BYTE* p = (const BYTE*)memPtr; + return (U16)(p[0] + (p[1]<<8)); + } +} + +MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val) +{ + if (MEM_isLittleEndian()) { + MEM_write16(memPtr, val); + } else { + BYTE* p = (BYTE*)memPtr; + p[0] = (BYTE)val; + p[1] = (BYTE)(val>>8); + } +} + +MEM_STATIC U32 MEM_readLE24(const void* memPtr) +{ + return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16); +} + +MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val) +{ + MEM_writeLE16(memPtr, (U16)val); + ((BYTE*)memPtr)[2] = (BYTE)(val>>16); +} + +MEM_STATIC U32 MEM_readLE32(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read32(memPtr); + else + return MEM_swap32(MEM_read32(memPtr)); +} + +MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32) +{ + if (MEM_isLittleEndian()) + MEM_write32(memPtr, val32); + else + MEM_write32(memPtr, MEM_swap32(val32)); +} + +MEM_STATIC U64 MEM_readLE64(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read64(memPtr); + else + return MEM_swap64(MEM_read64(memPtr)); +} + +MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64) +{ + if (MEM_isLittleEndian()) + MEM_write64(memPtr, val64); + else + MEM_write64(memPtr, MEM_swap64(val64)); +} + +MEM_STATIC size_t MEM_readLEST(const void* memPtr) +{ + if (MEM_32bits()) + return (size_t)MEM_readLE32(memPtr); + else + return (size_t)MEM_readLE64(memPtr); +} + +MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val) +{ + if (MEM_32bits()) + MEM_writeLE32(memPtr, (U32)val); + else + MEM_writeLE64(memPtr, (U64)val); +} + +/*=== Big endian r/w ===*/ + +MEM_STATIC U32 MEM_readBE32(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_swap32(MEM_read32(memPtr)); + else + return MEM_read32(memPtr); +} + +MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32) +{ + if (MEM_isLittleEndian()) + MEM_write32(memPtr, MEM_swap32(val32)); + else + MEM_write32(memPtr, val32); +} + +MEM_STATIC U64 MEM_readBE64(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_swap64(MEM_read64(memPtr)); + else + return MEM_read64(memPtr); +} + +MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64) +{ + if (MEM_isLittleEndian()) + MEM_write64(memPtr, MEM_swap64(val64)); + else + MEM_write64(memPtr, val64); +} + +MEM_STATIC size_t MEM_readBEST(const void* memPtr) +{ + if (MEM_32bits()) + return (size_t)MEM_readBE32(memPtr); + else + return (size_t)MEM_readBE64(memPtr); +} + +MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val) +{ + if (MEM_32bits()) + MEM_writeBE32(memPtr, (U32)val); + else + MEM_writeBE64(memPtr, (U64)val); +} + +/* code only tested on 32 and 64 bits systems */ +MEM_STATIC void MEM_check(void) { DEBUG_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); } + + +#if defined (__cplusplus) +} +#endif + +#endif /* MEM_H_MODULE */ +/**** ended inlining mem.h ****/ +/**** start inlining error_private.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* Note : this module is expected to remain private, do not expose it */ + +#ifndef ERROR_H_MODULE +#define ERROR_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + + +/* **************************************** +* Dependencies +******************************************/ +/**** skipping file: zstd_deps.h ****/ +/**** start inlining zstd_errors.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_ERRORS_H_398273423 +#define ZSTD_ERRORS_H_398273423 + +#if defined (__cplusplus) +extern "C" { +#endif + +/*===== dependency =====*/ +#include /* size_t */ + + +/* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */ +#ifndef ZSTDERRORLIB_VISIBILITY +# if defined(__GNUC__) && (__GNUC__ >= 4) +# define ZSTDERRORLIB_VISIBILITY __attribute__ ((visibility ("default"))) +# else +# define ZSTDERRORLIB_VISIBILITY +# endif +#endif +#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) +# define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBILITY +#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) +# define ZSTDERRORLIB_API __declspec(dllimport) ZSTDERRORLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +# define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY +#endif + +/*-********************************************* + * Error codes list + *-********************************************* + * Error codes _values_ are pinned down since v1.3.1 only. + * Therefore, don't rely on values if you may link to any version < v1.3.1. + * + * Only values < 100 are considered stable. + * + * note 1 : this API shall be used with static linking only. + * dynamic linking is not yet officially supported. + * note 2 : Prefer relying on the enum than on its value whenever possible + * This is the only supported way to use the error list < v1.3.1 + * note 3 : ZSTD_isError() is always correct, whatever the library version. + **********************************************/ +typedef enum { + ZSTD_error_no_error = 0, + ZSTD_error_GENERIC = 1, + ZSTD_error_prefix_unknown = 10, + ZSTD_error_version_unsupported = 12, + ZSTD_error_frameParameter_unsupported = 14, + ZSTD_error_frameParameter_windowTooLarge = 16, + ZSTD_error_corruption_detected = 20, + ZSTD_error_checksum_wrong = 22, + ZSTD_error_dictionary_corrupted = 30, + ZSTD_error_dictionary_wrong = 32, + ZSTD_error_dictionaryCreation_failed = 34, + ZSTD_error_parameter_unsupported = 40, + ZSTD_error_parameter_outOfBound = 42, + ZSTD_error_tableLog_tooLarge = 44, + ZSTD_error_maxSymbolValue_tooLarge = 46, + ZSTD_error_maxSymbolValue_tooSmall = 48, + ZSTD_error_stage_wrong = 60, + ZSTD_error_init_missing = 62, + ZSTD_error_memory_allocation = 64, + ZSTD_error_workSpace_tooSmall= 66, + ZSTD_error_dstSize_tooSmall = 70, + ZSTD_error_srcSize_wrong = 72, + ZSTD_error_dstBuffer_null = 74, + /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */ + ZSTD_error_frameIndex_tooLarge = 100, + ZSTD_error_seekableIO = 102, + ZSTD_error_dstBuffer_wrong = 104, + ZSTD_error_srcBuffer_wrong = 105, + ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */ +} ZSTD_ErrorCode; + +/*! ZSTD_getErrorCode() : + convert a `size_t` function result into a `ZSTD_ErrorCode` enum type, + which can be used to compare with enum list published above */ +ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult); +ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code); /**< Same as ZSTD_getErrorName, but using a `ZSTD_ErrorCode` enum argument */ + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_ERRORS_H_398273423 */ +/**** ended inlining zstd_errors.h ****/ + + +/* **************************************** +* Compiler-specific +******************************************/ +#if defined(__GNUC__) +# define ERR_STATIC static __attribute__((unused)) +#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define ERR_STATIC static inline +#elif defined(_MSC_VER) +# define ERR_STATIC static __inline +#else +# define ERR_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif + + +/*-**************************************** +* Customization (error_public.h) +******************************************/ +typedef ZSTD_ErrorCode ERR_enum; +#define PREFIX(name) ZSTD_error_##name + + +/*-**************************************** +* Error codes handling +******************************************/ +#undef ERROR /* already defined on Visual Studio */ +#define ERROR(name) ZSTD_ERROR(name) +#define ZSTD_ERROR(name) ((size_t)-PREFIX(name)) + +ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } + +ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); } + +/* check and forward error code */ +#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e +#define CHECK_F(f) { CHECK_V_F(_var_err__, f); } + + +/*-**************************************** +* Error Strings +******************************************/ + +const char* ERR_getErrorString(ERR_enum code); /* error_private.c */ + +ERR_STATIC const char* ERR_getErrorName(size_t code) +{ + return ERR_getErrorString(ERR_getErrorCode(code)); +} + +#if defined (__cplusplus) +} +#endif + +#endif /* ERROR_H_MODULE */ +/**** ended inlining error_private.h ****/ +#define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */ +/**** start inlining fse.h ****/ +/* ****************************************************************** + * FSE : Finite State Entropy codec + * Public Prototypes declaration + * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef FSE_H +#define FSE_H + + +/*-***************************************** +* Dependencies +******************************************/ +/**** skipping file: zstd_deps.h ****/ + + +/*-***************************************** +* FSE_PUBLIC_API : control library symbols visibility +******************************************/ +#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4) +# define FSE_PUBLIC_API __attribute__ ((visibility ("default"))) +#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */ +# define FSE_PUBLIC_API __declspec(dllexport) +#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1) +# define FSE_PUBLIC_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +# define FSE_PUBLIC_API +#endif + +/*------ Version ------*/ +#define FSE_VERSION_MAJOR 0 +#define FSE_VERSION_MINOR 9 +#define FSE_VERSION_RELEASE 0 + +#define FSE_LIB_VERSION FSE_VERSION_MAJOR.FSE_VERSION_MINOR.FSE_VERSION_RELEASE +#define FSE_QUOTE(str) #str +#define FSE_EXPAND_AND_QUOTE(str) FSE_QUOTE(str) +#define FSE_VERSION_STRING FSE_EXPAND_AND_QUOTE(FSE_LIB_VERSION) + +#define FSE_VERSION_NUMBER (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE) +FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */ + + +/*-**************************************** +* FSE simple functions +******************************************/ +/*! FSE_compress() : + Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'. + 'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize). + @return : size of compressed data (<= dstCapacity). + Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! + if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead. + if FSE_isError(return), compression failed (more details using FSE_getErrorName()) +*/ +FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + +/*! FSE_decompress(): + Decompress FSE data from buffer 'cSrc', of size 'cSrcSize', + into already allocated destination buffer 'dst', of size 'dstCapacity'. + @return : size of regenerated data (<= maxDstSize), + or an error code, which can be tested using FSE_isError() . + + ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!! + Why ? : making this distinction requires a header. + Header management is intentionally delegated to the user layer, which can better manage special cases. +*/ +FSE_PUBLIC_API size_t FSE_decompress(void* dst, size_t dstCapacity, + const void* cSrc, size_t cSrcSize); + + +/*-***************************************** +* Tool functions +******************************************/ +FSE_PUBLIC_API size_t FSE_compressBound(size_t size); /* maximum compressed size */ + +/* Error Management */ +FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return value is an error code */ +FSE_PUBLIC_API const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */ + + +/*-***************************************** +* FSE advanced functions +******************************************/ +/*! FSE_compress2() : + Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog' + Both parameters can be defined as '0' to mean : use default value + @return : size of compressed data + Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!! + if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression. + if FSE_isError(return), it's an error code. +*/ +FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); + + +/*-***************************************** +* FSE detailed API +******************************************/ +/*! +FSE_compress() does the following: +1. count symbol occurrence from source[] into table count[] (see hist.h) +2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog) +3. save normalized counters to memory buffer using writeNCount() +4. build encoding table 'CTable' from normalized counters +5. encode the data stream using encoding table 'CTable' + +FSE_decompress() does the following: +1. read normalized counters with readNCount() +2. build decoding table 'DTable' from normalized counters +3. decode the data stream using decoding table 'DTable' + +The following API allows targeting specific sub-functions for advanced tasks. +For example, it's possible to compress several blocks using the same 'CTable', +or to save and provide normalized distribution using external method. +*/ + +/* *** COMPRESSION *** */ + +/*! FSE_optimalTableLog(): + dynamically downsize 'tableLog' when conditions are met. + It saves CPU time, by using smaller tables, while preserving or even improving compression ratio. + @return : recommended tableLog (necessarily <= 'maxTableLog') */ +FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); + +/*! FSE_normalizeCount(): + normalize counts so that sum(count[]) == Power_of_2 (2^tableLog) + 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1). + useLowProbCount is a boolean parameter which trades off compressed size for + faster header decoding. When it is set to 1, the compressed data will be slightly + smaller. And when it is set to 0, FSE_readNCount() and FSE_buildDTable() will be + faster. If you are compressing a small amount of data (< 2 KB) then useLowProbCount=0 + is a good default, since header deserialization makes a big speed difference. + Otherwise, useLowProbCount=1 is a good default, since the speed difference is small. + @return : tableLog, + or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, + const unsigned* count, size_t srcSize, unsigned maxSymbolValue, unsigned useLowProbCount); + +/*! FSE_NCountWriteBound(): + Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'. + Typically useful for allocation purpose. */ +FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog); + +/*! FSE_writeNCount(): + Compactly save 'normalizedCounter' into 'buffer'. + @return : size of the compressed table, + or an errorCode, which can be tested using FSE_isError(). */ +FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize, + const short* normalizedCounter, + unsigned maxSymbolValue, unsigned tableLog); + +/*! Constructor and Destructor of FSE_CTable. + Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */ +typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */ +FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog); +FSE_PUBLIC_API void FSE_freeCTable (FSE_CTable* ct); + +/*! FSE_buildCTable(): + Builds `ct`, which must be already allocated, using FSE_createCTable(). + @return : 0, or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); + +/*! FSE_compress_usingCTable(): + Compress `src` using `ct` into `dst` which must be already allocated. + @return : size of compressed data (<= `dstCapacity`), + or 0 if compressed data could not fit into `dst`, + or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct); + +/*! +Tutorial : +---------- +The first step is to count all symbols. FSE_count() does this job very fast. +Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells. +'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0] +maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value) +FSE_count() will return the number of occurrence of the most frequent symbol. +This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility. +If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). + +The next step is to normalize the frequencies. +FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'. +It also guarantees a minimum of 1 to any Symbol with frequency >= 1. +You can use 'tableLog'==0 to mean "use default tableLog value". +If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(), +which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default"). + +The result of FSE_normalizeCount() will be saved into a table, +called 'normalizedCounter', which is a table of signed short. +'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells. +The return value is tableLog if everything proceeded as expected. +It is 0 if there is a single symbol within distribution. +If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()). + +'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount(). +'buffer' must be already allocated. +For guaranteed success, buffer size must be at least FSE_headerBound(). +The result of the function is the number of bytes written into 'buffer'. +If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small). + +'normalizedCounter' can then be used to create the compression table 'CTable'. +The space required by 'CTable' must be already allocated, using FSE_createCTable(). +You can then use FSE_buildCTable() to fill 'CTable'. +If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()). + +'CTable' can then be used to compress 'src', with FSE_compress_usingCTable(). +Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize' +The function returns the size of compressed data (without header), necessarily <= `dstCapacity`. +If it returns '0', compressed data could not fit into 'dst'. +If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). +*/ + + +/* *** DECOMPRESSION *** */ + +/*! FSE_readNCount(): + Read compactly saved 'normalizedCounter' from 'rBuffer'. + @return : size read from 'rBuffer', + or an errorCode, which can be tested using FSE_isError(). + maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */ +FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter, + unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, + const void* rBuffer, size_t rBuffSize); + +/*! FSE_readNCount_bmi2(): + * Same as FSE_readNCount() but pass bmi2=1 when your CPU supports BMI2 and 0 otherwise. + */ +FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter, + unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, + const void* rBuffer, size_t rBuffSize, int bmi2); + +/*! Constructor and Destructor of FSE_DTable. + Note that its size depends on 'tableLog' */ +typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ +FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog); +FSE_PUBLIC_API void FSE_freeDTable(FSE_DTable* dt); + +/*! FSE_buildDTable(): + Builds 'dt', which must be already allocated, using FSE_createDTable(). + return : 0, or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); + +/*! FSE_decompress_usingDTable(): + Decompress compressed source `cSrc` of size `cSrcSize` using `dt` + into `dst` which must be already allocated. + @return : size of regenerated data (necessarily <= `dstCapacity`), + or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt); + +/*! +Tutorial : +---------- +(Note : these functions only decompress FSE-compressed blocks. + If block is uncompressed, use memcpy() instead + If block is a single repeated byte, use memset() instead ) + +The first step is to obtain the normalized frequencies of symbols. +This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount(). +'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short. +In practice, that means it's necessary to know 'maxSymbolValue' beforehand, +or size the table to handle worst case situations (typically 256). +FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'. +The result of FSE_readNCount() is the number of bytes read from 'rBuffer'. +Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that. +If there is an error, the function will return an error code, which can be tested using FSE_isError(). + +The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'. +This is performed by the function FSE_buildDTable(). +The space required by 'FSE_DTable' must be already allocated using FSE_createDTable(). +If there is an error, the function will return an error code, which can be tested using FSE_isError(). + +`FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable(). +`cSrcSize` must be strictly correct, otherwise decompression will fail. +FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`). +If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small) +*/ + +#endif /* FSE_H */ + +#if defined(FSE_STATIC_LINKING_ONLY) && !defined(FSE_H_FSE_STATIC_LINKING_ONLY) +#define FSE_H_FSE_STATIC_LINKING_ONLY + +/* *** Dependency *** */ +/**** start inlining bitstream.h ****/ +/* ****************************************************************** + * bitstream + * Part of FSE library + * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ +#ifndef BITSTREAM_H_MODULE +#define BITSTREAM_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif +/* +* This API consists of small unitary functions, which must be inlined for best performance. +* Since link-time-optimization is not available for all compilers, +* these functions are defined into a .h to be included. +*/ + +/*-**************************************** +* Dependencies +******************************************/ +/**** skipping file: mem.h ****/ +/**** skipping file: compiler.h ****/ +/**** skipping file: debug.h ****/ +/**** skipping file: error_private.h ****/ + + +/*========================================= +* Target specific +=========================================*/ +#ifndef ZSTD_NO_INTRINSICS +# if defined(__BMI__) && defined(__GNUC__) +# include /* support for bextr (experimental) */ +# elif defined(__ICCARM__) +# include +# endif +#endif + +#define STREAM_ACCUMULATOR_MIN_32 25 +#define STREAM_ACCUMULATOR_MIN_64 57 +#define STREAM_ACCUMULATOR_MIN ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64)) + + +/*-****************************************** +* bitStream encoding API (write forward) +********************************************/ +/* bitStream can mix input from multiple sources. + * A critical property of these streams is that they encode and decode in **reverse** direction. + * So the first bit sequence you add will be the last to be read, like a LIFO stack. + */ +typedef struct { + size_t bitContainer; + unsigned bitPos; + char* startPtr; + char* ptr; + char* endPtr; +} BIT_CStream_t; + +MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity); +MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits); +MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC); +MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC); + +/* Start with initCStream, providing the size of buffer to write into. +* bitStream will never write outside of this buffer. +* `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code. +* +* bits are first added to a local register. +* Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems. +* Writing data into memory is an explicit operation, performed by the flushBits function. +* Hence keep track how many bits are potentially stored into local register to avoid register overflow. +* After a flushBits, a maximum of 7 bits might still be stored into local register. +* +* Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers. +* +* Last operation is to close the bitStream. +* The function returns the final size of CStream in bytes. +* If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable) +*/ + + +/*-******************************************** +* bitStream decoding API (read backward) +**********************************************/ +typedef struct { + size_t bitContainer; + unsigned bitsConsumed; + const char* ptr; + const char* start; + const char* limitPtr; +} BIT_DStream_t; + +typedef enum { BIT_DStream_unfinished = 0, + BIT_DStream_endOfBuffer = 1, + BIT_DStream_completed = 2, + BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */ + /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */ + +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize); +MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits); +MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD); +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD); + + +/* Start by invoking BIT_initDStream(). +* A chunk of the bitStream is then stored into a local register. +* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). +* You can then retrieve bitFields stored into the local register, **in reverse order**. +* Local register is explicitly reloaded from memory by the BIT_reloadDStream() method. +* A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished. +* Otherwise, it can be less than that, so proceed accordingly. +* Checking if DStream has reached its end can be performed with BIT_endOfDStream(). +*/ + + +/*-**************************************** +* unsafe API +******************************************/ +MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits); +/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */ + +MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC); +/* unsafe version; does not check buffer overflow */ + +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); +/* faster, but works only if nbBits >= 1 */ + + + +/*-************************************************************** +* Internal functions +****************************************************************/ +MEM_STATIC unsigned BIT_highbit32 (U32 val) +{ + assert(val != 0); + { +# if defined(_MSC_VER) /* Visual */ +# if STATIC_BMI2 == 1 + return _lzcnt_u32(val) ^ 31; +# else + unsigned long r = 0; + return _BitScanReverse(&r, val) ? (unsigned)r : 0; +# endif +# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ + return __builtin_clz (val) ^ 31; +# elif defined(__ICCARM__) /* IAR Intrinsic */ + return 31 - __CLZ(val); +# else /* Software version */ + static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, + 11, 14, 16, 18, 22, 25, 3, 30, + 8, 12, 20, 28, 15, 17, 24, 7, + 19, 27, 23, 6, 26, 5, 4, 31 }; + U32 v = val; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; +# endif + } +} + +/*===== Local Constants =====*/ +static const unsigned BIT_mask[] = { + 0, 1, 3, 7, 0xF, 0x1F, + 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, + 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, + 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, + 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF, + 0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */ +#define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0])) + +/*-************************************************************** +* bitStream encoding +****************************************************************/ +/*! BIT_initCStream() : + * `dstCapacity` must be > sizeof(size_t) + * @return : 0 if success, + * otherwise an error code (can be tested using ERR_isError()) */ +MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, + void* startPtr, size_t dstCapacity) +{ + bitC->bitContainer = 0; + bitC->bitPos = 0; + bitC->startPtr = (char*)startPtr; + bitC->ptr = bitC->startPtr; + bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer); + if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall); + return 0; +} + +/*! BIT_addBits() : + * can add up to 31 bits into `bitC`. + * Note : does not check for register overflow ! */ +MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, + size_t value, unsigned nbBits) +{ + DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32); + assert(nbBits < BIT_MASK_SIZE); + assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); + bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos; + bitC->bitPos += nbBits; +} + +/*! BIT_addBitsFast() : + * works only if `value` is _clean_, + * meaning all high bits above nbBits are 0 */ +MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, + size_t value, unsigned nbBits) +{ + assert((value>>nbBits) == 0); + assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); + bitC->bitContainer |= value << bitC->bitPos; + bitC->bitPos += nbBits; +} + +/*! BIT_flushBitsFast() : + * assumption : bitContainer has not overflowed + * unsafe version; does not check buffer overflow */ +MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC) +{ + size_t const nbBytes = bitC->bitPos >> 3; + assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); + assert(bitC->ptr <= bitC->endPtr); + MEM_writeLEST(bitC->ptr, bitC->bitContainer); + bitC->ptr += nbBytes; + bitC->bitPos &= 7; + bitC->bitContainer >>= nbBytes*8; +} + +/*! BIT_flushBits() : + * assumption : bitContainer has not overflowed + * safe version; check for buffer overflow, and prevents it. + * note : does not signal buffer overflow. + * overflow will be revealed later on using BIT_closeCStream() */ +MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC) +{ + size_t const nbBytes = bitC->bitPos >> 3; + assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); + assert(bitC->ptr <= bitC->endPtr); + MEM_writeLEST(bitC->ptr, bitC->bitContainer); + bitC->ptr += nbBytes; + if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr; + bitC->bitPos &= 7; + bitC->bitContainer >>= nbBytes*8; +} + +/*! BIT_closeCStream() : + * @return : size of CStream, in bytes, + * or 0 if it could not fit into dstBuffer */ +MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC) +{ + BIT_addBitsFast(bitC, 1, 1); /* endMark */ + BIT_flushBits(bitC); + if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */ + return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0); +} + + +/*-******************************************************** +* bitStream decoding +**********************************************************/ +/*! BIT_initDStream() : + * Initialize a BIT_DStream_t. + * `bitD` : a pointer to an already allocated BIT_DStream_t structure. + * `srcSize` must be the *exact* size of the bitStream, in bytes. + * @return : size of stream (== srcSize), or an errorCode if a problem is detected + */ +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) +{ + if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } + + bitD->start = (const char*)srcBuffer; + bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer); + + if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */ + bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer); + bitD->bitContainer = MEM_readLEST(bitD->ptr); + { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; + bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */ + if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ } + } else { + bitD->ptr = bitD->start; + bitD->bitContainer = *(const BYTE*)(bitD->start); + switch(srcSize) + { + case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16); + /* fall-through */ + + case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24); + /* fall-through */ + + case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32); + /* fall-through */ + + case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24; + /* fall-through */ + + case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16; + /* fall-through */ + + case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8; + /* fall-through */ + + default: break; + } + { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; + bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; + if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */ + } + bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8; + } + + return srcSize; +} + +MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start) +{ + return bitContainer >> start; +} + +MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) +{ + U32 const regMask = sizeof(bitContainer)*8 - 1; + /* if start > regMask, bitstream is corrupted, and result is undefined */ + assert(nbBits < BIT_MASK_SIZE); + return (bitContainer >> (start & regMask)) & BIT_mask[nbBits]; +} + +MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) +{ +#if defined(STATIC_BMI2) && STATIC_BMI2 == 1 + return _bzhi_u64(bitContainer, nbBits); +#else + assert(nbBits < BIT_MASK_SIZE); + return bitContainer & BIT_mask[nbBits]; +#endif +} + +/*! BIT_lookBits() : + * Provides next n bits from local register. + * local register is not modified. + * On 32-bits, maxNbBits==24. + * On 64-bits, maxNbBits==56. + * @return : value extracted */ +MEM_STATIC FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) +{ + /* arbitrate between double-shift and shift+mask */ +#if 1 + /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8, + * bitstream is likely corrupted, and result is undefined */ + return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits); +#else + /* this code path is slower on my os-x laptop */ + U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; + return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask); +#endif +} + +/*! BIT_lookBitsFast() : + * unsafe version; only works if nbBits >= 1 */ +MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits) +{ + U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; + assert(nbBits >= 1); + return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask); +} + +MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) +{ + bitD->bitsConsumed += nbBits; +} + +/*! BIT_readBits() : + * Read (consume) next n bits from local register and update. + * Pay attention to not read more than nbBits contained into local register. + * @return : extracted value. */ +MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits) +{ + size_t const value = BIT_lookBits(bitD, nbBits); + BIT_skipBits(bitD, nbBits); + return value; +} + +/*! BIT_readBitsFast() : + * unsafe version; only works only if nbBits >= 1 */ +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits) +{ + size_t const value = BIT_lookBitsFast(bitD, nbBits); + assert(nbBits >= 1); + BIT_skipBits(bitD, nbBits); + return value; +} + +/*! BIT_reloadDStreamFast() : + * Similar to BIT_reloadDStream(), but with two differences: + * 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold! + * 2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this + * point you must use BIT_reloadDStream() to reload. + */ +MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD) +{ + if (UNLIKELY(bitD->ptr < bitD->limitPtr)) + return BIT_DStream_overflow; + assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8); + bitD->ptr -= bitD->bitsConsumed >> 3; + bitD->bitsConsumed &= 7; + bitD->bitContainer = MEM_readLEST(bitD->ptr); + return BIT_DStream_unfinished; +} + +/*! BIT_reloadDStream() : + * Refill `bitD` from buffer previously set in BIT_initDStream() . + * This function is safe, it guarantees it will not read beyond src buffer. + * @return : status of `BIT_DStream_t` internal register. + * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */ +MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) +{ + if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */ + return BIT_DStream_overflow; + + if (bitD->ptr >= bitD->limitPtr) { + return BIT_reloadDStreamFast(bitD); + } + if (bitD->ptr == bitD->start) { + if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; + return BIT_DStream_completed; + } + /* start < ptr < limitPtr */ + { U32 nbBytes = bitD->bitsConsumed >> 3; + BIT_DStream_status result = BIT_DStream_unfinished; + if (bitD->ptr - nbBytes < bitD->start) { + nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ + result = BIT_DStream_endOfBuffer; + } + bitD->ptr -= nbBytes; + bitD->bitsConsumed -= nbBytes*8; + bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */ + return result; + } +} + +/*! BIT_endOfDStream() : + * @return : 1 if DStream has _exactly_ reached its end (all bits consumed). + */ +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream) +{ + return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8)); +} + +#if defined (__cplusplus) +} +#endif + +#endif /* BITSTREAM_H_MODULE */ +/**** ended inlining bitstream.h ****/ + + +/* ***************************************** +* Static allocation +*******************************************/ +/* FSE buffer bounds */ +#define FSE_NCOUNTBOUND 512 +#define FSE_BLOCKBOUND(size) ((size) + ((size)>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */) +#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ + +/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */ +#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<((maxTableLog)-1)) + (((maxSymbolValue)+1)*2)) +#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<(maxTableLog))) + +/* or use the size to malloc() space directly. Pay attention to alignment restrictions though */ +#define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue) (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable)) +#define FSE_DTABLE_SIZE(maxTableLog) (FSE_DTABLE_SIZE_U32(maxTableLog) * sizeof(FSE_DTable)) + + +/* ***************************************** + * FSE advanced API + ***************************************** */ + +unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus); +/**< same as FSE_optimalTableLog(), which used `minus==2` */ + +/* FSE_compress_wksp() : + * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). + * FSE_COMPRESS_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable. + */ +#define FSE_COMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) ) +size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); + +size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits); +/**< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */ + +size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue); +/**< build a fake FSE_CTable, designed to compress always the same symbolValue */ + +/* FSE_buildCTable_wksp() : + * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). + * `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`. + */ +#define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (maxSymbolValue + 2 + (1ull << (tableLog - 2))) +#define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)) +size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); + +#define FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) (sizeof(short) * (maxSymbolValue + 1) + (1ULL << maxTableLog) + 8) +#define FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ((FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) + sizeof(unsigned) - 1) / sizeof(unsigned)) +FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); +/**< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */ + +size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits); +/**< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */ + +size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue); +/**< build a fake FSE_DTable, designed to always generate the same symbolValue */ + +#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue)) +#define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned)) +size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize); +/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */ + +size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2); +/**< Same as FSE_decompress_wksp() but with dynamic BMI2 support. Pass 1 if your CPU supports BMI2 or 0 if it doesn't. */ + +typedef enum { + FSE_repeat_none, /**< Cannot use the previous table */ + FSE_repeat_check, /**< Can use the previous table but it must be checked */ + FSE_repeat_valid /**< Can use the previous table and it is assumed to be valid */ + } FSE_repeat; + +/* ***************************************** +* FSE symbol compression API +*******************************************/ +/*! + This API consists of small unitary functions, which highly benefit from being inlined. + Hence their body are included in next section. +*/ +typedef struct { + ptrdiff_t value; + const void* stateTable; + const void* symbolTT; + unsigned stateLog; +} FSE_CState_t; + +static void FSE_initCState(FSE_CState_t* CStatePtr, const FSE_CTable* ct); + +static void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* CStatePtr, unsigned symbol); + +static void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* CStatePtr); + +/**< +These functions are inner components of FSE_compress_usingCTable(). +They allow the creation of custom streams, mixing multiple tables and bit sources. + +A key property to keep in mind is that encoding and decoding are done **in reverse direction**. +So the first symbol you will encode is the last you will decode, like a LIFO stack. + +You will need a few variables to track your CStream. They are : + +FSE_CTable ct; // Provided by FSE_buildCTable() +BIT_CStream_t bitStream; // bitStream tracking structure +FSE_CState_t state; // State tracking structure (can have several) + + +The first thing to do is to init bitStream and state. + size_t errorCode = BIT_initCStream(&bitStream, dstBuffer, maxDstSize); + FSE_initCState(&state, ct); + +Note that BIT_initCStream() can produce an error code, so its result should be tested, using FSE_isError(); +You can then encode your input data, byte after byte. +FSE_encodeSymbol() outputs a maximum of 'tableLog' bits at a time. +Remember decoding will be done in reverse direction. + FSE_encodeByte(&bitStream, &state, symbol); + +At any time, you can also add any bit sequence. +Note : maximum allowed nbBits is 25, for compatibility with 32-bits decoders + BIT_addBits(&bitStream, bitField, nbBits); + +The above methods don't commit data to memory, they just store it into local register, for speed. +Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). +Writing data to memory is a manual operation, performed by the flushBits function. + BIT_flushBits(&bitStream); + +Your last FSE encoding operation shall be to flush your last state value(s). + FSE_flushState(&bitStream, &state); + +Finally, you must close the bitStream. +The function returns the size of CStream in bytes. +If data couldn't fit into dstBuffer, it will return a 0 ( == not compressible) +If there is an error, it returns an errorCode (which can be tested using FSE_isError()). + size_t size = BIT_closeCStream(&bitStream); +*/ + + +/* ***************************************** +* FSE symbol decompression API +*******************************************/ +typedef struct { + size_t state; + const void* table; /* precise table may vary, depending on U16 */ +} FSE_DState_t; + + +static void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt); + +static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD); + +static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr); + +/**< +Let's now decompose FSE_decompress_usingDTable() into its unitary components. +You will decode FSE-encoded symbols from the bitStream, +and also any other bitFields you put in, **in reverse order**. + +You will need a few variables to track your bitStream. They are : + +BIT_DStream_t DStream; // Stream context +FSE_DState_t DState; // State context. Multiple ones are possible +FSE_DTable* DTablePtr; // Decoding table, provided by FSE_buildDTable() + +The first thing to do is to init the bitStream. + errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize); + +You should then retrieve your initial state(s) +(in reverse flushing order if you have several ones) : + errorCode = FSE_initDState(&DState, &DStream, DTablePtr); + +You can then decode your data, symbol after symbol. +For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'. +Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out). + unsigned char symbol = FSE_decodeSymbol(&DState, &DStream); + +You can retrieve any bitfield you eventually stored into the bitStream (in reverse order) +Note : maximum allowed nbBits is 25, for 32-bits compatibility + size_t bitField = BIT_readBits(&DStream, nbBits); + +All above operations only read from local register (which size depends on size_t). +Refueling the register from memory is manually performed by the reload method. + endSignal = FSE_reloadDStream(&DStream); + +BIT_reloadDStream() result tells if there is still some more data to read from DStream. +BIT_DStream_unfinished : there is still some data left into the DStream. +BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled. +BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed. +BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted. + +When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop, +to properly detect the exact end of stream. +After each decoded symbol, check if DStream is fully consumed using this simple test : + BIT_reloadDStream(&DStream) >= BIT_DStream_completed + +When it's done, verify decompression is fully completed, by checking both DStream and the relevant states. +Checking if DStream has reached its end is performed by : + BIT_endOfDStream(&DStream); +Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible. + FSE_endOfDState(&DState); +*/ + + +/* ***************************************** +* FSE unsafe API +*******************************************/ +static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD); +/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */ + + +/* ***************************************** +* Implementation of inlined functions +*******************************************/ +typedef struct { + int deltaFindState; + U32 deltaNbBits; +} FSE_symbolCompressionTransform; /* total 8 bytes */ + +MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct) +{ + const void* ptr = ct; + const U16* u16ptr = (const U16*) ptr; + const U32 tableLog = MEM_read16(ptr); + statePtr->value = (ptrdiff_t)1<stateTable = u16ptr+2; + statePtr->symbolTT = ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1); + statePtr->stateLog = tableLog; +} + + +/*! FSE_initCState2() : +* Same as FSE_initCState(), but the first symbol to include (which will be the last to be read) +* uses the smallest state value possible, saving the cost of this symbol */ +MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U32 symbol) +{ + FSE_initCState(statePtr, ct); + { const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; + const U16* stateTable = (const U16*)(statePtr->stateTable); + U32 nbBitsOut = (U32)((symbolTT.deltaNbBits + (1<<15)) >> 16); + statePtr->value = (nbBitsOut << 16) - symbolTT.deltaNbBits; + statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; + } +} + +MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, unsigned symbol) +{ + FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; + const U16* const stateTable = (const U16*)(statePtr->stateTable); + U32 const nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16); + BIT_addBits(bitC, statePtr->value, nbBitsOut); + statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; +} + +MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr) +{ + BIT_addBits(bitC, statePtr->value, statePtr->stateLog); + BIT_flushBits(bitC); +} + + +/* FSE_getMaxNbBits() : + * Approximate maximum cost of a symbol, in bits. + * Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2) + * note 1 : assume symbolValue is valid (<= maxSymbolValue) + * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */ +MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue) +{ + const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr; + return (symbolTT[symbolValue].deltaNbBits + ((1<<16)-1)) >> 16; +} + +/* FSE_bitCost() : + * Approximate symbol cost, as fractional value, using fixed-point format (accuracyLog fractional bits) + * note 1 : assume symbolValue is valid (<= maxSymbolValue) + * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */ +MEM_STATIC U32 FSE_bitCost(const void* symbolTTPtr, U32 tableLog, U32 symbolValue, U32 accuracyLog) +{ + const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr; + U32 const minNbBits = symbolTT[symbolValue].deltaNbBits >> 16; + U32 const threshold = (minNbBits+1) << 16; + assert(tableLog < 16); + assert(accuracyLog < 31-tableLog); /* ensure enough room for renormalization double shift */ + { U32 const tableSize = 1 << tableLog; + U32 const deltaFromThreshold = threshold - (symbolTT[symbolValue].deltaNbBits + tableSize); + U32 const normalizedDeltaFromThreshold = (deltaFromThreshold << accuracyLog) >> tableLog; /* linear interpolation (very approximate) */ + U32 const bitMultiplier = 1 << accuracyLog; + assert(symbolTT[symbolValue].deltaNbBits + tableSize <= threshold); + assert(normalizedDeltaFromThreshold <= bitMultiplier); + return (minNbBits+1)*bitMultiplier - normalizedDeltaFromThreshold; + } +} + + +/* ====== Decompression ====== */ + +typedef struct { + U16 tableLog; + U16 fastMode; +} FSE_DTableHeader; /* sizeof U32 */ + +typedef struct +{ + unsigned short newState; + unsigned char symbol; + unsigned char nbBits; +} FSE_decode_t; /* size == U32 */ + +MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt) +{ + const void* ptr = dt; + const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr; + DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog); + BIT_reloadDStream(bitD); + DStatePtr->table = dt + 1; +} + +MEM_STATIC BYTE FSE_peekSymbol(const FSE_DState_t* DStatePtr) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + return DInfo.symbol; +} + +MEM_STATIC void FSE_updateState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + size_t const lowBits = BIT_readBits(bitD, nbBits); + DStatePtr->state = DInfo.newState + lowBits; +} + +MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + BYTE const symbol = DInfo.symbol; + size_t const lowBits = BIT_readBits(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +/*! FSE_decodeSymbolFast() : + unsafe, only works if no symbol has a probability > 50% */ +MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + BYTE const symbol = DInfo.symbol; + size_t const lowBits = BIT_readBitsFast(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) +{ + return DStatePtr->state == 0; +} + + + +#ifndef FSE_COMMONDEFS_ONLY + +/* ************************************************************** +* Tuning parameters +****************************************************************/ +/*!MEMORY_USAGE : +* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) +* Increasing memory usage improves compression ratio +* Reduced memory usage can improve speed, due to cache effect +* Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ +#ifndef FSE_MAX_MEMORY_USAGE +# define FSE_MAX_MEMORY_USAGE 14 +#endif +#ifndef FSE_DEFAULT_MEMORY_USAGE +# define FSE_DEFAULT_MEMORY_USAGE 13 +#endif +#if (FSE_DEFAULT_MEMORY_USAGE > FSE_MAX_MEMORY_USAGE) +# error "FSE_DEFAULT_MEMORY_USAGE must be <= FSE_MAX_MEMORY_USAGE" +#endif + +/*!FSE_MAX_SYMBOL_VALUE : +* Maximum symbol value authorized. +* Required for proper stack allocation */ +#ifndef FSE_MAX_SYMBOL_VALUE +# define FSE_MAX_SYMBOL_VALUE 255 +#endif + +/* ************************************************************** +* template functions type & suffix +****************************************************************/ +#define FSE_FUNCTION_TYPE BYTE +#define FSE_FUNCTION_EXTENSION +#define FSE_DECODE_TYPE FSE_decode_t + + +#endif /* !FSE_COMMONDEFS_ONLY */ + + +/* *************************************************************** +* Constants +*****************************************************************/ +#define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2) +#define FSE_MAX_TABLESIZE (1U< FSE_TABLELOG_ABSOLUTE_MAX +# error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported" +#endif + +#define FSE_TABLESTEP(tableSize) (((tableSize)>>1) + ((tableSize)>>3) + 3) + + +#endif /* FSE_STATIC_LINKING_ONLY */ + + +#if defined (__cplusplus) +} +#endif +/**** ended inlining fse.h ****/ +#define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */ +/**** start inlining huf.h ****/ +/* ****************************************************************** + * huff0 huffman codec, + * part of Finite State Entropy library + * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef HUF_H_298734234 +#define HUF_H_298734234 + +/* *** Dependencies *** */ +/**** skipping file: zstd_deps.h ****/ + + +/* *** library symbols visibility *** */ +/* Note : when linking with -fvisibility=hidden on gcc, or by default on Visual, + * HUF symbols remain "private" (internal symbols for library only). + * Set macro FSE_DLL_EXPORT to 1 if you want HUF symbols visible on DLL interface */ +#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4) +# define HUF_PUBLIC_API __attribute__ ((visibility ("default"))) +#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */ +# define HUF_PUBLIC_API __declspec(dllexport) +#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1) +# define HUF_PUBLIC_API __declspec(dllimport) /* not required, just to generate faster code (saves a function pointer load from IAT and an indirect jump) */ +#else +# define HUF_PUBLIC_API +#endif + + +/* ========================== */ +/* *** simple functions *** */ +/* ========================== */ + +/** HUF_compress() : + * Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'. + * 'dst' buffer must be already allocated. + * Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize). + * `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB. + * @return : size of compressed data (<= `dstCapacity`). + * Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! + * if HUF_isError(return), compression failed (more details using HUF_getErrorName()) + */ +HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + +/** HUF_decompress() : + * Decompress HUF data from buffer 'cSrc', of size 'cSrcSize', + * into already allocated buffer 'dst', of minimum size 'dstSize'. + * `originalSize` : **must** be the ***exact*** size of original (uncompressed) data. + * Note : in contrast with FSE, HUF_decompress can regenerate + * RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data, + * because it knows size to regenerate (originalSize). + * @return : size of regenerated data (== originalSize), + * or an error code, which can be tested using HUF_isError() + */ +HUF_PUBLIC_API size_t HUF_decompress(void* dst, size_t originalSize, + const void* cSrc, size_t cSrcSize); + + +/* *** Tool functions *** */ +#define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */ +HUF_PUBLIC_API size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */ + +/* Error Management */ +HUF_PUBLIC_API unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */ +HUF_PUBLIC_API const char* HUF_getErrorName(size_t code); /**< provides error code string (useful for debugging) */ + + +/* *** Advanced function *** */ + +/** HUF_compress2() : + * Same as HUF_compress(), but offers control over `maxSymbolValue` and `tableLog`. + * `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX . + * `tableLog` must be `<= HUF_TABLELOG_MAX` . */ +HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned tableLog); + +/** HUF_compress4X_wksp() : + * Same as HUF_compress2(), but uses externally allocated `workSpace`. + * `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */ +#define HUF_WORKSPACE_SIZE ((6 << 10) + 256) +#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32)) +HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned tableLog, + void* workSpace, size_t wkspSize); + +#endif /* HUF_H_298734234 */ + +/* ****************************************************************** + * WARNING !! + * The following section contains advanced and experimental definitions + * which shall never be used in the context of a dynamic library, + * because they are not guaranteed to remain stable in the future. + * Only consider them in association with static linking. + * *****************************************************************/ +#if defined(HUF_STATIC_LINKING_ONLY) && !defined(HUF_H_HUF_STATIC_LINKING_ONLY) +#define HUF_H_HUF_STATIC_LINKING_ONLY + +/* *** Dependencies *** */ +/**** skipping file: mem.h ****/ +#define FSE_STATIC_LINKING_ONLY +/**** skipping file: fse.h ****/ + + +/* *** Constants *** */ +#define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ +#define HUF_TABLELOG_DEFAULT 11 /* default tableLog value when none specified */ +#define HUF_SYMBOLVALUE_MAX 255 + +#define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ +#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX) +# error "HUF_TABLELOG_MAX is too large !" +#endif + + +/* **************************************** +* Static allocation +******************************************/ +/* HUF buffer bounds */ +#define HUF_CTABLEBOUND 129 +#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8) /* only true when incompressible is pre-filtered with fast heuristic */ +#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ + +/* static allocation of HUF's Compression Table */ +/* this is a private definition, just exposed for allocation and strict aliasing purpose. never EVER access its members directly */ +struct HUF_CElt_s { + U16 val; + BYTE nbBits; +}; /* typedef'd to HUF_CElt */ +typedef struct HUF_CElt_s HUF_CElt; /* consider it an incomplete type */ +#define HUF_CTABLE_SIZE_U32(maxSymbolValue) ((maxSymbolValue)+1) /* Use tables of U32, for proper alignment */ +#define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32)) +#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \ + HUF_CElt name[HUF_CTABLE_SIZE_U32(maxSymbolValue)] /* no final ; */ + +/* static allocation of HUF's DTable */ +typedef U32 HUF_DTable; +#define HUF_DTABLE_SIZE(maxTableLog) (1 + (1<<(maxTableLog))) +#define HUF_CREATE_STATIC_DTABLEX1(DTable, maxTableLog) \ + HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1) * 0x01000001) } +#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \ + HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog) * 0x01000001) } + + +/* **************************************** +* Advanced decompression functions +******************************************/ +size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ +#endif + +size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< decodes RLE and uncompressed */ +size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */ +size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< considers RLE and uncompressed as errors */ +size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ +size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */ +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ +size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */ +#endif + + +/* **************************************** + * HUF detailed API + * ****************************************/ + +/*! HUF_compress() does the following: + * 1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h") + * 2. (optional) refine tableLog using HUF_optimalTableLog() + * 3. build Huffman table from count using HUF_buildCTable() + * 4. save Huffman table to memory buffer using HUF_writeCTable() + * 5. encode the data stream using HUF_compress4X_usingCTable() + * + * The following API allows targeting specific sub-functions for advanced tasks. + * For example, it's possible to compress several blocks using the same 'CTable', + * or to save and regenerate 'CTable' using external methods. + */ +unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); +size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */ +size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog); +size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); +size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); +int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); + +typedef enum { + HUF_repeat_none, /**< Cannot use the previous table */ + HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */ + HUF_repeat_valid /**< Can use the previous table and it is assumed to be valid */ + } HUF_repeat; +/** HUF_compress4X_repeat() : + * Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. + * If it uses hufTable it does not modify hufTable or repeat. + * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. + * If preferRepeat then the old table will always be used if valid. */ +size_t HUF_compress4X_repeat(void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned tableLog, + void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); + +/** HUF_buildCTable_wksp() : + * Same as HUF_buildCTable(), but using externally allocated scratch buffer. + * `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE. + */ +#define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1) +#define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned)) +size_t HUF_buildCTable_wksp (HUF_CElt* tree, + const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, + void* workSpace, size_t wkspSize); + +/*! HUF_readStats() : + * Read compact Huffman tree, saved by HUF_writeCTable(). + * `huffWeight` is destination buffer. + * @return : size read from `src` , or an error Code . + * Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */ +size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, + U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize); + +/*! HUF_readStats_wksp() : + * Same as HUF_readStats() but takes an external workspace which must be + * 4-byte aligned and its size must be >= HUF_READ_STATS_WORKSPACE_SIZE. + * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0. + */ +#define HUF_READ_STATS_WORKSPACE_SIZE_U32 FSE_DECOMPRESS_WKSP_SIZE_U32(6, HUF_TABLELOG_MAX-1) +#define HUF_READ_STATS_WORKSPACE_SIZE (HUF_READ_STATS_WORKSPACE_SIZE_U32 * sizeof(unsigned)) +size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, + U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workspace, size_t wkspSize, + int bmi2); + +/** HUF_readCTable() : + * Loading a CTable saved with HUF_writeCTable() */ +size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights); + +/** HUF_getNbBits() : + * Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX + * Note 1 : is not inlined, as HUF_CElt definition is private + * Note 2 : const void* used, so that it can provide a statically allocated table as argument (which uses type U32) */ +U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue); + +/* + * HUF_decompress() does the following: + * 1. select the decompression algorithm (X1, X2) based on pre-computed heuristics + * 2. build Huffman table from save, using HUF_readDTableX?() + * 3. decode 1 or 4 segments in parallel using HUF_decompress?X?_usingDTable() + */ + +/** HUF_selectDecoder() : + * Tells which decoder is likely to decode faster, + * based on a set of pre-computed metrics. + * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 . + * Assumption : 0 < dstSize <= 128 KB */ +U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize); + +/** + * The minimum workspace size for the `workSpace` used in + * HUF_readDTableX1_wksp() and HUF_readDTableX2_wksp(). + * + * The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when + * HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15. + * Buffer overflow errors may potentially occur if code modifications result in + * a required workspace size greater than that specified in the following + * macro. + */ +#define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10) +#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32)) + +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize); +size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); +#endif +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize); +size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); +#endif + +size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#endif +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#endif + + +/* ====================== */ +/* single stream variants */ +/* ====================== */ + +size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); +size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ +size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); +/** HUF_compress1X_repeat() : + * Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. + * If it uses hufTable it does not modify hufTable or repeat. + * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. + * If preferRepeat then the old table will always be used if valid. */ +size_t HUF_compress1X_repeat(void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned tableLog, + void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); + +size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */ +#endif + +size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); +size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ +size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */ +#endif +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ +size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */ +#endif + +size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */ +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#endif +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#endif + +/* BMI2 variants. + * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0. + */ +size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); +#endif +size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); +size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2); +#endif + +#endif /* HUF_STATIC_LINKING_ONLY */ + +#if defined (__cplusplus) +} +#endif +/**** ended inlining huf.h ****/ + + +/*=== Version ===*/ +unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; } + + +/*=== Error Management ===*/ +unsigned FSE_isError(size_t code) { return ERR_isError(code); } +const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); } + +unsigned HUF_isError(size_t code) { return ERR_isError(code); } +const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); } + + +/*-************************************************************** +* FSE NCount encoding-decoding +****************************************************************/ +static U32 FSE_ctz(U32 val) +{ + assert(val != 0); + { +# if defined(_MSC_VER) /* Visual */ + unsigned long r=0; + return _BitScanForward(&r, val) ? (unsigned)r : 0; +# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ + return __builtin_ctz(val); +# elif defined(__ICCARM__) /* IAR Intrinsic */ + return __CTZ(val); +# else /* Software version */ + U32 count = 0; + while ((val & 1) == 0) { + val >>= 1; + ++count; + } + return count; +# endif + } +} + +FORCE_INLINE_TEMPLATE +size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + const BYTE* const istart = (const BYTE*) headerBuffer; + const BYTE* const iend = istart + hbSize; + const BYTE* ip = istart; + int nbBits; + int remaining; + int threshold; + U32 bitStream; + int bitCount; + unsigned charnum = 0; + unsigned const maxSV1 = *maxSVPtr + 1; + int previous0 = 0; + + if (hbSize < 8) { + /* This function only works when hbSize >= 8 */ + char buffer[8] = {0}; + ZSTD_memcpy(buffer, headerBuffer, hbSize); + { size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr, + buffer, sizeof(buffer)); + if (FSE_isError(countSize)) return countSize; + if (countSize > hbSize) return ERROR(corruption_detected); + return countSize; + } } + assert(hbSize >= 8); + + /* init */ + ZSTD_memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */ + bitStream = MEM_readLE32(ip); + nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ + if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge); + bitStream >>= 4; + bitCount = 4; + *tableLogPtr = nbBits; + remaining = (1<> 1; + while (repeats >= 12) { + charnum += 3 * 12; + if (LIKELY(ip <= iend-7)) { + ip += 3; + } else { + bitCount -= (int)(8 * (iend - 7 - ip)); + bitCount &= 31; + ip = iend - 4; + } + bitStream = MEM_readLE32(ip) >> bitCount; + repeats = FSE_ctz(~bitStream | 0x80000000) >> 1; + } + charnum += 3 * repeats; + bitStream >>= 2 * repeats; + bitCount += 2 * repeats; + + /* Add the final repeat which isn't 0b11. */ + assert((bitStream & 3) < 3); + charnum += bitStream & 3; + bitCount += 2; + + /* This is an error, but break and return an error + * at the end, because returning out of a loop makes + * it harder for the compiler to optimize. + */ + if (charnum >= maxSV1) break; + + /* We don't need to set the normalized count to 0 + * because we already memset the whole buffer to 0. + */ + + if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { + assert((bitCount >> 3) <= 3); /* For first condition to work */ + ip += bitCount>>3; + bitCount &= 7; + } else { + bitCount -= (int)(8 * (iend - 4 - ip)); + bitCount &= 31; + ip = iend - 4; + } + bitStream = MEM_readLE32(ip) >> bitCount; + } + { + int const max = (2*threshold-1) - remaining; + int count; + + if ((bitStream & (threshold-1)) < (U32)max) { + count = bitStream & (threshold-1); + bitCount += nbBits-1; + } else { + count = bitStream & (2*threshold-1); + if (count >= threshold) count -= max; + bitCount += nbBits; + } + + count--; /* extra accuracy */ + /* When it matters (small blocks), this is a + * predictable branch, because we don't use -1. + */ + if (count >= 0) { + remaining -= count; + } else { + assert(count == -1); + remaining += count; + } + normalizedCounter[charnum++] = (short)count; + previous0 = !count; + + assert(threshold > 1); + if (remaining < threshold) { + /* This branch can be folded into the + * threshold update condition because we + * know that threshold > 1. + */ + if (remaining <= 1) break; + nbBits = BIT_highbit32(remaining) + 1; + threshold = 1 << (nbBits - 1); + } + if (charnum >= maxSV1) break; + + if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { + ip += bitCount>>3; + bitCount &= 7; + } else { + bitCount -= (int)(8 * (iend - 4 - ip)); + bitCount &= 31; + ip = iend - 4; + } + bitStream = MEM_readLE32(ip) >> bitCount; + } } + if (remaining != 1) return ERROR(corruption_detected); + /* Only possible when there are too many zeros. */ + if (charnum > maxSV1) return ERROR(maxSymbolValue_tooSmall); + if (bitCount > 32) return ERROR(corruption_detected); + *maxSVPtr = charnum-1; + + ip += (bitCount+7)>>3; + return ip-istart; +} + +/* Avoids the FORCE_INLINE of the _body() function. */ +static size_t FSE_readNCount_body_default( + short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); +} + +#if DYNAMIC_BMI2 +TARGET_ATTRIBUTE("bmi2") static size_t FSE_readNCount_body_bmi2( + short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); +} +#endif + +size_t FSE_readNCount_bmi2( + short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize, int bmi2) +{ +#if DYNAMIC_BMI2 + if (bmi2) { + return FSE_readNCount_body_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); + } +#endif + (void)bmi2; + return FSE_readNCount_body_default(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); +} + +size_t FSE_readNCount( + short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize, /* bmi2 */ 0); +} + + +/*! HUF_readStats() : + Read compact Huffman tree, saved by HUF_writeCTable(). + `huffWeight` is destination buffer. + `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32. + @return : size read from `src` , or an error Code . + Note : Needed by HUF_readCTable() and HUF_readDTableX?() . +*/ +size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize) +{ + U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; + return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* bmi2 */ 0); +} + +FORCE_INLINE_TEMPLATE size_t +HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize, + int bmi2) +{ + U32 weightTotal; + const BYTE* ip = (const BYTE*) src; + size_t iSize; + size_t oSize; + + if (!srcSize) return ERROR(srcSize_wrong); + iSize = ip[0]; + /* ZSTD_memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */ + + if (iSize >= 128) { /* special header */ + oSize = iSize - 127; + iSize = ((oSize+1)/2); + if (iSize+1 > srcSize) return ERROR(srcSize_wrong); + if (oSize >= hwSize) return ERROR(corruption_detected); + ip += 1; + { U32 n; + for (n=0; n> 4; + huffWeight[n+1] = ip[n/2] & 15; + } } } + else { /* header compressed with FSE (normal case) */ + if (iSize+1 > srcSize) return ERROR(srcSize_wrong); + /* max (hwSize-1) values decoded, as last one is implied */ + oSize = FSE_decompress_wksp_bmi2(huffWeight, hwSize-1, ip+1, iSize, 6, workSpace, wkspSize, bmi2); + if (FSE_isError(oSize)) return oSize; + } + + /* collect weight stats */ + ZSTD_memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32)); + weightTotal = 0; + { U32 n; for (n=0; n= HUF_TABLELOG_MAX) return ERROR(corruption_detected); + rankStats[huffWeight[n]]++; + weightTotal += (1 << huffWeight[n]) >> 1; + } } + if (weightTotal == 0) return ERROR(corruption_detected); + + /* get last non-null symbol weight (implied, total must be 2^n) */ + { U32 const tableLog = BIT_highbit32(weightTotal) + 1; + if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected); + *tableLogPtr = tableLog; + /* determine last weight */ + { U32 const total = 1 << tableLog; + U32 const rest = total - weightTotal; + U32 const verif = 1 << BIT_highbit32(rest); + U32 const lastWeight = BIT_highbit32(rest) + 1; + if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */ + huffWeight[oSize] = (BYTE)lastWeight; + rankStats[lastWeight]++; + } } + + /* check tree construction validity */ + if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */ + + /* results */ + *nbSymbolsPtr = (U32)(oSize+1); + return iSize+1; +} + +/* Avoids the FORCE_INLINE of the _body() function. */ +static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize) +{ + return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 0); +} + +#if DYNAMIC_BMI2 +static TARGET_ATTRIBUTE("bmi2") size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize) +{ + return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 1); +} +#endif + +size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize, + int bmi2) +{ +#if DYNAMIC_BMI2 + if (bmi2) { + return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize); + } +#endif + (void)bmi2; + return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize); +} +/**** ended inlining common/entropy_common.c ****/ +/**** start inlining common/error_private.c ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* The purpose of this file is to have a single list of error strings embedded in binary */ + +/**** skipping file: error_private.h ****/ + +const char* ERR_getErrorString(ERR_enum code) +{ +#ifdef ZSTD_STRIP_ERROR_STRINGS + (void)code; + return "Error strings stripped"; +#else + static const char* const notErrorCode = "Unspecified error code"; + switch( code ) + { + case PREFIX(no_error): return "No error detected"; + case PREFIX(GENERIC): return "Error (generic)"; + case PREFIX(prefix_unknown): return "Unknown frame descriptor"; + case PREFIX(version_unsupported): return "Version not supported"; + case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter"; + case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding"; + case PREFIX(corruption_detected): return "Corrupted block detected"; + case PREFIX(checksum_wrong): return "Restored data doesn't match checksum"; + case PREFIX(parameter_unsupported): return "Unsupported parameter"; + case PREFIX(parameter_outOfBound): return "Parameter is out of bound"; + case PREFIX(init_missing): return "Context should be init first"; + case PREFIX(memory_allocation): return "Allocation error : not enough memory"; + case PREFIX(workSpace_tooSmall): return "workSpace buffer is not large enough"; + case PREFIX(stage_wrong): return "Operation not authorized at current processing stage"; + case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported"; + case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large"; + case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small"; + case PREFIX(dictionary_corrupted): return "Dictionary is corrupted"; + case PREFIX(dictionary_wrong): return "Dictionary mismatch"; + case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples"; + case PREFIX(dstSize_tooSmall): return "Destination buffer is too small"; + case PREFIX(srcSize_wrong): return "Src size is incorrect"; + case PREFIX(dstBuffer_null): return "Operation on NULL destination buffer"; + /* following error codes are not stable and may be removed or changed in a future version */ + case PREFIX(frameIndex_tooLarge): return "Frame index is too large"; + case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking"; + case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong"; + case PREFIX(srcBuffer_wrong): return "Source buffer is wrong"; + case PREFIX(maxCode): + default: return notErrorCode; + } +#endif +} +/**** ended inlining common/error_private.c ****/ +/**** start inlining common/fse_decompress.c ****/ +/* ****************************************************************** + * FSE : Finite State Entropy decoder + * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + + +/* ************************************************************** +* Includes +****************************************************************/ +/**** skipping file: debug.h ****/ +/**** skipping file: bitstream.h ****/ +/**** skipping file: compiler.h ****/ +#define FSE_STATIC_LINKING_ONLY +/**** skipping file: fse.h ****/ +/**** skipping file: error_private.h ****/ +#define ZSTD_DEPS_NEED_MALLOC +/**** skipping file: zstd_deps.h ****/ + + +/* ************************************************************** +* Error Management +****************************************************************/ +#define FSE_isError ERR_isError +#define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ + + +/* ************************************************************** +* Templates +****************************************************************/ +/* + designed to be included + for type-specific functions (template emulation in C) + Objective is to write these functions only once, for improved maintenance +*/ + +/* safety checks */ +#ifndef FSE_FUNCTION_EXTENSION +# error "FSE_FUNCTION_EXTENSION must be defined" +#endif +#ifndef FSE_FUNCTION_TYPE +# error "FSE_FUNCTION_TYPE must be defined" +#endif + +/* Function names */ +#define FSE_CAT(X,Y) X##Y +#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) +#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) + + +/* Function templates */ +FSE_DTable* FSE_createDTable (unsigned tableLog) +{ + if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; + return (FSE_DTable*)ZSTD_malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) ); +} + +void FSE_freeDTable (FSE_DTable* dt) +{ + ZSTD_free(dt); +} + +static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize) +{ + void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */ + FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr); + U16* symbolNext = (U16*)workSpace; + BYTE* spread = (BYTE*)(symbolNext + maxSymbolValue + 1); + + U32 const maxSV1 = maxSymbolValue + 1; + U32 const tableSize = 1 << tableLog; + U32 highThreshold = tableSize-1; + + /* Sanity Checks */ + if (FSE_BUILD_DTABLE_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(maxSymbolValue_tooLarge); + if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge); + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); + + /* Init, lay down lowprob symbols */ + { FSE_DTableHeader DTableH; + DTableH.tableLog = (U16)tableLog; + DTableH.fastMode = 1; + { S16 const largeLimit= (S16)(1 << (tableLog-1)); + U32 s; + for (s=0; s= largeLimit) DTableH.fastMode=0; + symbolNext[s] = normalizedCounter[s]; + } } } + ZSTD_memcpy(dt, &DTableH, sizeof(DTableH)); + } + + /* Spread symbols */ + if (highThreshold == tableSize - 1) { + size_t const tableMask = tableSize-1; + size_t const step = FSE_TABLESTEP(tableSize); + /* First lay down the symbols in order. + * We use a uint64_t to lay down 8 bytes at a time. This reduces branch + * misses since small blocks generally have small table logs, so nearly + * all symbols have counts <= 8. We ensure we have 8 bytes at the end of + * our buffer to handle the over-write. + */ + { + U64 const add = 0x0101010101010101ull; + size_t pos = 0; + U64 sv = 0; + U32 s; + for (s=0; s highThreshold) position = (position + step) & tableMask; /* lowprob area */ + } } + if (position!=0) return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ + } + + /* Build Decoding table */ + { U32 u; + for (u=0; utableLog = 0; + DTableH->fastMode = 0; + + cell->newState = 0; + cell->symbol = symbolValue; + cell->nbBits = 0; + + return 0; +} + + +size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits) +{ + void* ptr = dt; + FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; + void* dPtr = dt + 1; + FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr; + const unsigned tableSize = 1 << nbBits; + const unsigned tableMask = tableSize - 1; + const unsigned maxSV1 = tableMask+1; + unsigned s; + + /* Sanity checks */ + if (nbBits < 1) return ERROR(GENERIC); /* min size */ + + /* Build Decoding Table */ + DTableH->tableLog = (U16)nbBits; + DTableH->fastMode = 1; + for (s=0; s sizeof(bitD.bitContainer)*8) /* This test must be static */ + BIT_reloadDStream(&bitD); + + op[1] = FSE_GETSYMBOL(&state2); + + if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ + { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } } + + op[2] = FSE_GETSYMBOL(&state1); + + if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ + BIT_reloadDStream(&bitD); + + op[3] = FSE_GETSYMBOL(&state2); + } + + /* tail */ + /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */ + while (1) { + if (op>(omax-2)) return ERROR(dstSize_tooSmall); + *op++ = FSE_GETSYMBOL(&state1); + if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) { + *op++ = FSE_GETSYMBOL(&state2); + break; + } + + if (op>(omax-2)) return ERROR(dstSize_tooSmall); + *op++ = FSE_GETSYMBOL(&state2); + if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) { + *op++ = FSE_GETSYMBOL(&state1); + break; + } } + + return op-ostart; +} + + +size_t FSE_decompress_usingDTable(void* dst, size_t originalSize, + const void* cSrc, size_t cSrcSize, + const FSE_DTable* dt) +{ + const void* ptr = dt; + const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr; + const U32 fastMode = DTableH->fastMode; + + /* select fast mode (static) */ + if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1); + return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0); +} + + +size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) +{ + return FSE_decompress_wksp_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, /* bmi2 */ 0); +} + +FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body( + void* dst, size_t dstCapacity, + const void* cSrc, size_t cSrcSize, + unsigned maxLog, void* workSpace, size_t wkspSize, + int bmi2) +{ + const BYTE* const istart = (const BYTE*)cSrc; + const BYTE* ip = istart; + short counting[FSE_MAX_SYMBOL_VALUE+1]; + unsigned tableLog; + unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; + FSE_DTable* const dtable = (FSE_DTable*)workSpace; + + /* normal FSE decoding mode */ + size_t const NCountLength = FSE_readNCount_bmi2(counting, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2); + if (FSE_isError(NCountLength)) return NCountLength; + if (tableLog > maxLog) return ERROR(tableLog_tooLarge); + assert(NCountLength <= cSrcSize); + ip += NCountLength; + cSrcSize -= NCountLength; + + if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge); + workSpace = dtable + FSE_DTABLE_SIZE_U32(tableLog); + wkspSize -= FSE_DTABLE_SIZE(tableLog); + + CHECK_F( FSE_buildDTable_internal(dtable, counting, maxSymbolValue, tableLog, workSpace, wkspSize) ); + + { + const void* ptr = dtable; + const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr; + const U32 fastMode = DTableH->fastMode; + + /* select fast mode (static) */ + if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 1); + return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 0); + } +} + +/* Avoids the FORCE_INLINE of the _body() function. */ +static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) +{ + return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 0); +} + +#if DYNAMIC_BMI2 +TARGET_ATTRIBUTE("bmi2") static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) +{ + return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1); +} +#endif + +size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2) +{ +#if DYNAMIC_BMI2 + if (bmi2) { + return FSE_decompress_wksp_body_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize); + } +#endif + (void)bmi2; + return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize); +} + + +typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; + +#ifndef ZSTD_NO_UNUSED_FUNCTIONS +size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) { + U32 wksp[FSE_BUILD_DTABLE_WKSP_SIZE_U32(FSE_TABLELOG_ABSOLUTE_MAX, FSE_MAX_SYMBOL_VALUE)]; + return FSE_buildDTable_wksp(dt, normalizedCounter, maxSymbolValue, tableLog, wksp, sizeof(wksp)); +} + +size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize) +{ + /* Static analyzer seems unable to understand this table will be properly initialized later */ + U32 wksp[FSE_DECOMPRESS_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)]; + return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, FSE_MAX_TABLELOG, wksp, sizeof(wksp)); +} +#endif + + +#endif /* FSE_COMMONDEFS_ONLY */ +/**** ended inlining common/fse_decompress.c ****/ +/**** start inlining common/zstd_common.c ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + + +/*-************************************* +* Dependencies +***************************************/ +#define ZSTD_DEPS_NEED_MALLOC +/**** skipping file: zstd_deps.h ****/ +/**** skipping file: error_private.h ****/ +/**** start inlining zstd_internal.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_CCOMMON_H_MODULE +#define ZSTD_CCOMMON_H_MODULE + +/* this module contains definitions which must be identical + * across compression, decompression and dictBuilder. + * It also contains a few functions useful to at least 2 of them + * and which benefit from being inlined */ + +/*-************************************* +* Dependencies +***************************************/ +#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON) +#include +#endif +/**** skipping file: compiler.h ****/ +/**** skipping file: mem.h ****/ +/**** skipping file: debug.h ****/ +/**** skipping file: error_private.h ****/ +#define ZSTD_STATIC_LINKING_ONLY +/**** start inlining ../zstd.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef ZSTD_H_235446 +#define ZSTD_H_235446 + +/* ====== Dependency ======*/ +#include /* INT_MAX */ +#include /* size_t */ + + +/* ===== ZSTDLIB_API : control library symbols visibility ===== */ +#ifndef ZSTDLIB_VISIBILITY +# if defined(__GNUC__) && (__GNUC__ >= 4) +# define ZSTDLIB_VISIBILITY __attribute__ ((visibility ("default"))) +# else +# define ZSTDLIB_VISIBILITY +# endif +#endif +#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) +# define ZSTDLIB_API __declspec(dllexport) ZSTDLIB_VISIBILITY +#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) +# define ZSTDLIB_API __declspec(dllimport) ZSTDLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +# define ZSTDLIB_API ZSTDLIB_VISIBILITY +#endif + + +/******************************************************************************* + Introduction + + zstd, short for Zstandard, is a fast lossless compression algorithm, targeting + real-time compression scenarios at zlib-level and better compression ratios. + The zstd compression library provides in-memory compression and decompression + functions. + + The library supports regular compression levels from 1 up to ZSTD_maxCLevel(), + which is currently 22. Levels >= 20, labeled `--ultra`, should be used with + caution, as they require more memory. The library also offers negative + compression levels, which extend the range of speed vs. ratio preferences. + The lower the level, the faster the speed (at the cost of compression). + + Compression can be done in: + - a single step (described as Simple API) + - a single step, reusing a context (described as Explicit context) + - unbounded multiple steps (described as Streaming compression) + + The compression ratio achievable on small data can be highly improved using + a dictionary. Dictionary compression can be performed in: + - a single step (described as Simple dictionary API) + - a single step, reusing a dictionary (described as Bulk-processing + dictionary API) + + Advanced experimental functions can be accessed using + `#define ZSTD_STATIC_LINKING_ONLY` before including zstd.h. + + Advanced experimental APIs should never be used with a dynamically-linked + library. They are not "stable"; their definitions or signatures may change in + the future. Only static linking is allowed. +*******************************************************************************/ + +/*------ Version ------*/ +#define ZSTD_VERSION_MAJOR 1 +#define ZSTD_VERSION_MINOR 4 +#define ZSTD_VERSION_RELEASE 9 +#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) + +/*! ZSTD_versionNumber() : + * Return runtime library version, the value is (MAJOR*100*100 + MINOR*100 + RELEASE). */ +ZSTDLIB_API unsigned ZSTD_versionNumber(void); + +#define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE +#define ZSTD_QUOTE(str) #str +#define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str) +#define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION) + +/*! ZSTD_versionString() : + * Return runtime library version, like "1.4.5". Requires v1.3.0+. */ +ZSTDLIB_API const char* ZSTD_versionString(void); + +/* ************************************* + * Default constant + ***************************************/ +#ifndef ZSTD_CLEVEL_DEFAULT +# define ZSTD_CLEVEL_DEFAULT 3 +#endif + +/* ************************************* + * Constants + ***************************************/ + +/* All magic numbers are supposed read/written to/from files/memory using little-endian convention */ +#define ZSTD_MAGICNUMBER 0xFD2FB528 /* valid since v0.8.0 */ +#define ZSTD_MAGIC_DICTIONARY 0xEC30A437 /* valid since v0.7.0 */ +#define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50 /* all 16 values, from 0x184D2A50 to 0x184D2A5F, signal the beginning of a skippable frame */ +#define ZSTD_MAGIC_SKIPPABLE_MASK 0xFFFFFFF0 + +#define ZSTD_BLOCKSIZELOG_MAX 17 +#define ZSTD_BLOCKSIZE_MAX (1<= `ZSTD_compressBound(srcSize)`. + * @return : compressed size written into `dst` (<= `dstCapacity), + * or an error code if it fails (which can be tested using ZSTD_isError()). */ +ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel); + +/*! ZSTD_decompress() : + * `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames. + * `dstCapacity` is an upper bound of originalSize to regenerate. + * If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data. + * @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), + * or an errorCode if it fails (which can be tested using ZSTD_isError()). */ +ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity, + const void* src, size_t compressedSize); + +/*! ZSTD_getFrameContentSize() : requires v1.3.0+ + * `src` should point to the start of a ZSTD encoded frame. + * `srcSize` must be at least as large as the frame header. + * hint : any size >= `ZSTD_frameHeaderSize_max` is large enough. + * @return : - decompressed size of `src` frame content, if known + * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined + * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) + * note 1 : a 0 return value means the frame is valid but "empty". + * note 2 : decompressed size is an optional field, it may not be present, typically in streaming mode. + * When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. + * In which case, it's necessary to use streaming mode to decompress data. + * Optionally, application can rely on some implicit limit, + * as ZSTD_decompress() only needs an upper bound of decompressed size. + * (For example, data could be necessarily cut into blocks <= 16 KB). + * note 3 : decompressed size is always present when compression is completed using single-pass functions, + * such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict(). + * note 4 : decompressed size can be very large (64-bits value), + * potentially larger than what local system can handle as a single memory segment. + * In which case, it's necessary to use streaming mode to decompress data. + * note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified. + * Always ensure return value fits within application's authorized limits. + * Each application can set its own limits. + * note 6 : This function replaces ZSTD_getDecompressedSize() */ +#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) +#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) +ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize); + +/*! ZSTD_getDecompressedSize() : + * NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize(). + * Both functions work the same way, but ZSTD_getDecompressedSize() blends + * "empty", "unknown" and "error" results to the same return value (0), + * while ZSTD_getFrameContentSize() gives them separate return values. + * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */ +ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize); + +/*! ZSTD_findFrameCompressedSize() : + * `src` should point to the start of a ZSTD frame or skippable frame. + * `srcSize` must be >= first frame size + * @return : the compressed size of the first frame starting at `src`, + * suitable to pass as `srcSize` to `ZSTD_decompress` or similar, + * or an error code if input is invalid */ +ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize); + + +/*====== Helper functions ======*/ +#define ZSTD_COMPRESSBOUND(srcSize) ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */ +ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */ +ZSTDLIB_API unsigned ZSTD_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ +ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /*!< provides readable string from an error code */ +ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed */ +ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compression level available */ + + +/*************************************** +* Explicit context +***************************************/ +/*= Compression context + * When compressing many times, + * it is recommended to allocate a context just once, + * and re-use it for each successive compression operation. + * This will make workload friendlier for system's memory. + * Note : re-using context is just a speed / resource optimization. + * It doesn't change the compression ratio, which remains identical. + * Note 2 : In multi-threaded environments, + * use one different context per thread for parallel execution. + */ +typedef struct ZSTD_CCtx_s ZSTD_CCtx; +ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void); +ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); + +/*! ZSTD_compressCCtx() : + * Same as ZSTD_compress(), using an explicit ZSTD_CCtx. + * Important : in order to behave similarly to `ZSTD_compress()`, + * this function compresses at requested compression level, + * __ignoring any other parameter__ . + * If any advanced parameter was set using the advanced API, + * they will all be reset. Only `compressionLevel` remains. + */ +ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel); + +/*= Decompression context + * When decompressing many times, + * it is recommended to allocate a context only once, + * and re-use it for each successive compression operation. + * This will make workload friendlier for system's memory. + * Use one context per thread for parallel execution. */ +typedef struct ZSTD_DCtx_s ZSTD_DCtx; +ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void); +ZSTDLIB_API size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); + +/*! ZSTD_decompressDCtx() : + * Same as ZSTD_decompress(), + * requires an allocated ZSTD_DCtx. + * Compatible with sticky parameters. + */ +ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + + +/*************************************** +* Advanced compression API +***************************************/ + +/* API design : + * Parameters are pushed one by one into an existing context, + * using ZSTD_CCtx_set*() functions. + * Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame. + * "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` ! + * __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ . + * + * It's possible to reset all parameters to "default" using ZSTD_CCtx_reset(). + * + * This API supercedes all other "advanced" API entry points in the experimental section. + * In the future, we expect to remove from experimental API entry points which are redundant with this API. + */ + + +/* Compression strategies, listed from fastest to strongest */ +typedef enum { ZSTD_fast=1, + ZSTD_dfast=2, + ZSTD_greedy=3, + ZSTD_lazy=4, + ZSTD_lazy2=5, + ZSTD_btlazy2=6, + ZSTD_btopt=7, + ZSTD_btultra=8, + ZSTD_btultra2=9 + /* note : new strategies _might_ be added in the future. + Only the order (from fast to strong) is guaranteed */ +} ZSTD_strategy; + + +typedef enum { + + /* compression parameters + * Note: When compressing with a ZSTD_CDict these parameters are superseded + * by the parameters used to construct the ZSTD_CDict. + * See ZSTD_CCtx_refCDict() for more info (superseded-by-cdict). */ + ZSTD_c_compressionLevel=100, /* Set compression parameters according to pre-defined cLevel table. + * Note that exact compression parameters are dynamically determined, + * depending on both compression level and srcSize (when known). + * Default level is ZSTD_CLEVEL_DEFAULT==3. + * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT. + * Note 1 : it's possible to pass a negative compression level. + * Note 2 : setting a level does not automatically set all other compression parameters + * to default. Setting this will however eventually dynamically impact the compression + * parameters which have not been manually set. The manually set + * ones will 'stick'. */ + /* Advanced compression parameters : + * It's possible to pin down compression parameters to some specific values. + * In which case, these values are no longer dynamically selected by the compressor */ + ZSTD_c_windowLog=101, /* Maximum allowed back-reference distance, expressed as power of 2. + * This will set a memory budget for streaming decompression, + * with larger values requiring more memory + * and typically compressing more. + * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX. + * Special: value 0 means "use default windowLog". + * Note: Using a windowLog greater than ZSTD_WINDOWLOG_LIMIT_DEFAULT + * requires explicitly allowing such size at streaming decompression stage. */ + ZSTD_c_hashLog=102, /* Size of the initial probe table, as a power of 2. + * Resulting memory usage is (1 << (hashLog+2)). + * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX. + * Larger tables improve compression ratio of strategies <= dFast, + * and improve speed of strategies > dFast. + * Special: value 0 means "use default hashLog". */ + ZSTD_c_chainLog=103, /* Size of the multi-probe search table, as a power of 2. + * Resulting memory usage is (1 << (chainLog+2)). + * Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX. + * Larger tables result in better and slower compression. + * This parameter is useless for "fast" strategy. + * It's still useful when using "dfast" strategy, + * in which case it defines a secondary probe table. + * Special: value 0 means "use default chainLog". */ + ZSTD_c_searchLog=104, /* Number of search attempts, as a power of 2. + * More attempts result in better and slower compression. + * This parameter is useless for "fast" and "dFast" strategies. + * Special: value 0 means "use default searchLog". */ + ZSTD_c_minMatch=105, /* Minimum size of searched matches. + * Note that Zstandard can still find matches of smaller size, + * it just tweaks its search algorithm to look for this size and larger. + * Larger values increase compression and decompression speed, but decrease ratio. + * Must be clamped between ZSTD_MINMATCH_MIN and ZSTD_MINMATCH_MAX. + * Note that currently, for all strategies < btopt, effective minimum is 4. + * , for all strategies > fast, effective maximum is 6. + * Special: value 0 means "use default minMatchLength". */ + ZSTD_c_targetLength=106, /* Impact of this field depends on strategy. + * For strategies btopt, btultra & btultra2: + * Length of Match considered "good enough" to stop search. + * Larger values make compression stronger, and slower. + * For strategy fast: + * Distance between match sampling. + * Larger values make compression faster, and weaker. + * Special: value 0 means "use default targetLength". */ + ZSTD_c_strategy=107, /* See ZSTD_strategy enum definition. + * The higher the value of selected strategy, the more complex it is, + * resulting in stronger and slower compression. + * Special: value 0 means "use default strategy". */ + + /* LDM mode parameters */ + ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching. + * This parameter is designed to improve compression ratio + * for large inputs, by finding large matches at long distance. + * It increases memory usage and window size. + * Note: enabling this parameter increases default ZSTD_c_windowLog to 128 MB + * except when expressly set to a different value. + * Note: will be enabled by default if ZSTD_c_windowLog >= 128 MB and + * compression strategy >= ZSTD_btopt (== compression level 16+) */ + ZSTD_c_ldmHashLog=161, /* Size of the table for long distance matching, as a power of 2. + * Larger values increase memory usage and compression ratio, + * but decrease compression speed. + * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX + * default: windowlog - 7. + * Special: value 0 means "automatically determine hashlog". */ + ZSTD_c_ldmMinMatch=162, /* Minimum match size for long distance matcher. + * Larger/too small values usually decrease compression ratio. + * Must be clamped between ZSTD_LDM_MINMATCH_MIN and ZSTD_LDM_MINMATCH_MAX. + * Special: value 0 means "use default value" (default: 64). */ + ZSTD_c_ldmBucketSizeLog=163, /* Log size of each bucket in the LDM hash table for collision resolution. + * Larger values improve collision resolution but decrease compression speed. + * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX. + * Special: value 0 means "use default value" (default: 3). */ + ZSTD_c_ldmHashRateLog=164, /* Frequency of inserting/looking up entries into the LDM hash table. + * Must be clamped between 0 and (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN). + * Default is MAX(0, (windowLog - ldmHashLog)), optimizing hash table usage. + * Larger values improve compression speed. + * Deviating far from default value will likely result in a compression ratio decrease. + * Special: value 0 means "automatically determine hashRateLog". */ + + /* frame parameters */ + ZSTD_c_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1) + * Content size must be known at the beginning of compression. + * This is automatically the case when using ZSTD_compress2(), + * For streaming scenarios, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */ + ZSTD_c_checksumFlag=201, /* A 32-bits checksum of content is written at end of frame (default:0) */ + ZSTD_c_dictIDFlag=202, /* When applicable, dictionary's ID is written into frame header (default:1) */ + + /* multi-threading parameters */ + /* These parameters are only active if multi-threading is enabled (compiled with build macro ZSTD_MULTITHREAD). + * Otherwise, trying to set any other value than default (0) will be a no-op and return an error. + * In a situation where it's unknown if the linked library supports multi-threading or not, + * setting ZSTD_c_nbWorkers to any value >= 1 and consulting the return value provides a quick way to check this property. + */ + ZSTD_c_nbWorkers=400, /* Select how many threads will be spawned to compress in parallel. + * When nbWorkers >= 1, triggers asynchronous mode when invoking ZSTD_compressStream*() : + * ZSTD_compressStream*() consumes input and flush output if possible, but immediately gives back control to caller, + * while compression is performed in parallel, within worker thread(s). + * (note : a strong exception to this rule is when first invocation of ZSTD_compressStream2() sets ZSTD_e_end : + * in which case, ZSTD_compressStream2() delegates to ZSTD_compress2(), which is always a blocking call). + * More workers improve speed, but also increase memory usage. + * Default value is `0`, aka "single-threaded mode" : no worker is spawned, + * compression is performed inside Caller's thread, and all invocations are blocking */ + ZSTD_c_jobSize=401, /* Size of a compression job. This value is enforced only when nbWorkers >= 1. + * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads. + * 0 means default, which is dynamically determined based on compression parameters. + * Job size must be a minimum of overlap size, or 1 MB, whichever is largest. + * The minimum size is automatically and transparently enforced. */ + ZSTD_c_overlapLog=402, /* Control the overlap size, as a fraction of window size. + * The overlap size is an amount of data reloaded from previous job at the beginning of a new job. + * It helps preserve compression ratio, while each job is compressed in parallel. + * This value is enforced only when nbWorkers >= 1. + * Larger values increase compression ratio, but decrease speed. + * Possible values range from 0 to 9 : + * - 0 means "default" : value will be determined by the library, depending on strategy + * - 1 means "no overlap" + * - 9 means "full overlap", using a full window size. + * Each intermediate rank increases/decreases load size by a factor 2 : + * 9: full window; 8: w/2; 7: w/4; 6: w/8; 5:w/16; 4: w/32; 3:w/64; 2:w/128; 1:no overlap; 0:default + * default value varies between 6 and 9, depending on strategy */ + + /* note : additional experimental parameters are also available + * within the experimental section of the API. + * At the time of this writing, they include : + * ZSTD_c_rsyncable + * ZSTD_c_format + * ZSTD_c_forceMaxWindow + * ZSTD_c_forceAttachDict + * ZSTD_c_literalCompressionMode + * ZSTD_c_targetCBlockSize + * ZSTD_c_srcSizeHint + * ZSTD_c_enableDedicatedDictSearch + * ZSTD_c_stableInBuffer + * ZSTD_c_stableOutBuffer + * ZSTD_c_blockDelimiters + * ZSTD_c_validateSequences + * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. + * note : never ever use experimentalParam? names directly; + * also, the enums values themselves are unstable and can still change. + */ + ZSTD_c_experimentalParam1=500, + ZSTD_c_experimentalParam2=10, + ZSTD_c_experimentalParam3=1000, + ZSTD_c_experimentalParam4=1001, + ZSTD_c_experimentalParam5=1002, + ZSTD_c_experimentalParam6=1003, + ZSTD_c_experimentalParam7=1004, + ZSTD_c_experimentalParam8=1005, + ZSTD_c_experimentalParam9=1006, + ZSTD_c_experimentalParam10=1007, + ZSTD_c_experimentalParam11=1008, + ZSTD_c_experimentalParam12=1009 +} ZSTD_cParameter; + +typedef struct { + size_t error; + int lowerBound; + int upperBound; +} ZSTD_bounds; + +/*! ZSTD_cParam_getBounds() : + * All parameters must belong to an interval with lower and upper bounds, + * otherwise they will either trigger an error or be automatically clamped. + * @return : a structure, ZSTD_bounds, which contains + * - an error status field, which must be tested using ZSTD_isError() + * - lower and upper bounds, both inclusive + */ +ZSTDLIB_API ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter cParam); + +/*! ZSTD_CCtx_setParameter() : + * Set one compression parameter, selected by enum ZSTD_cParameter. + * All parameters have valid bounds. Bounds can be queried using ZSTD_cParam_getBounds(). + * Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter). + * Setting a parameter is generally only possible during frame initialization (before starting compression). + * Exception : when using multi-threading mode (nbWorkers >= 1), + * the following parameters can be updated _during_ compression (within same frame): + * => compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy. + * new parameters will be active for next job only (after a flush()). + * @return : an error code (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value); + +/*! ZSTD_CCtx_setPledgedSrcSize() : + * Total input data size to be compressed as a single frame. + * Value will be written in frame header, unless if explicitly forbidden using ZSTD_c_contentSizeFlag. + * This value will also be controlled at end of frame, and trigger an error if not respected. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Note 1 : pledgedSrcSize==0 actually means zero, aka an empty frame. + * In order to mean "unknown content size", pass constant ZSTD_CONTENTSIZE_UNKNOWN. + * ZSTD_CONTENTSIZE_UNKNOWN is default value for any new frame. + * Note 2 : pledgedSrcSize is only valid once, for the next frame. + * It's discarded at the end of the frame, and replaced by ZSTD_CONTENTSIZE_UNKNOWN. + * Note 3 : Whenever all input data is provided and consumed in a single round, + * for example with ZSTD_compress2(), + * or invoking immediately ZSTD_compressStream2(,,,ZSTD_e_end), + * this value is automatically overridden by srcSize instead. + */ +ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize); + +typedef enum { + ZSTD_reset_session_only = 1, + ZSTD_reset_parameters = 2, + ZSTD_reset_session_and_parameters = 3 +} ZSTD_ResetDirective; + +/*! ZSTD_CCtx_reset() : + * There are 2 different things that can be reset, independently or jointly : + * - The session : will stop compressing current frame, and make CCtx ready to start a new one. + * Useful after an error, or to interrupt any ongoing compression. + * Any internal data not yet flushed is cancelled. + * Compression parameters and dictionary remain unchanged. + * They will be used to compress next frame. + * Resetting session never fails. + * - The parameters : changes all parameters back to "default". + * This removes any reference to any dictionary too. + * Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing) + * otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError()) + * - Both : similar to resetting the session, followed by resetting parameters. + */ +ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset); + +/*! ZSTD_compress2() : + * Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API. + * ZSTD_compress2() always starts a new frame. + * Should cctx hold data from a previously unfinished frame, everything about it is forgotten. + * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() + * - The function is always blocking, returns when compression is completed. + * Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. + * @return : compressed size written into `dst` (<= `dstCapacity), + * or an error code if it fails (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_compress2( ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + + +/*************************************** +* Advanced decompression API +***************************************/ + +/* The advanced API pushes parameters one by one into an existing DCtx context. + * Parameters are sticky, and remain valid for all following frames + * using the same DCtx context. + * It's possible to reset parameters to default values using ZSTD_DCtx_reset(). + * Note : This API is compatible with existing ZSTD_decompressDCtx() and ZSTD_decompressStream(). + * Therefore, no new decompression function is necessary. + */ + +typedef enum { + + ZSTD_d_windowLogMax=100, /* Select a size limit (in power of 2) beyond which + * the streaming API will refuse to allocate memory buffer + * in order to protect the host from unreasonable memory requirements. + * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. + * By default, a decompression context accepts window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT). + * Special: value 0 means "use default maximum windowLog". */ + + /* note : additional experimental parameters are also available + * within the experimental section of the API. + * At the time of this writing, they include : + * ZSTD_d_format + * ZSTD_d_stableOutBuffer + * ZSTD_d_forceIgnoreChecksum + * ZSTD_d_refMultipleDDicts + * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. + * note : never ever use experimentalParam? names directly + */ + ZSTD_d_experimentalParam1=1000, + ZSTD_d_experimentalParam2=1001, + ZSTD_d_experimentalParam3=1002, + ZSTD_d_experimentalParam4=1003 + +} ZSTD_dParameter; + +/*! ZSTD_dParam_getBounds() : + * All parameters must belong to an interval with lower and upper bounds, + * otherwise they will either trigger an error or be automatically clamped. + * @return : a structure, ZSTD_bounds, which contains + * - an error status field, which must be tested using ZSTD_isError() + * - both lower and upper bounds, inclusive + */ +ZSTDLIB_API ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam); + +/*! ZSTD_DCtx_setParameter() : + * Set one compression parameter, selected by enum ZSTD_dParameter. + * All parameters have valid bounds. Bounds can be queried using ZSTD_dParam_getBounds(). + * Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter). + * Setting a parameter is only possible during frame initialization (before starting decompression). + * @return : 0, or an error code (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int value); + +/*! ZSTD_DCtx_reset() : + * Return a DCtx to clean state. + * Session and parameters can be reset jointly or separately. + * Parameters can only be reset when no active frame is being decompressed. + * @return : 0, or an error code, which can be tested with ZSTD_isError() + */ +ZSTDLIB_API size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset); + + +/**************************** +* Streaming +****************************/ + +typedef struct ZSTD_inBuffer_s { + const void* src; /**< start of input buffer */ + size_t size; /**< size of input buffer */ + size_t pos; /**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */ +} ZSTD_inBuffer; + +typedef struct ZSTD_outBuffer_s { + void* dst; /**< start of output buffer */ + size_t size; /**< size of output buffer */ + size_t pos; /**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */ +} ZSTD_outBuffer; + + + +/*-*********************************************************************** +* Streaming compression - HowTo +* +* A ZSTD_CStream object is required to track streaming operation. +* Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources. +* ZSTD_CStream objects can be reused multiple times on consecutive compression operations. +* It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory. +* +* For parallel execution, use one separate ZSTD_CStream per thread. +* +* note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing. +* +* Parameters are sticky : when starting a new compression on the same context, +* it will re-use the same sticky parameters as previous compression session. +* When in doubt, it's recommended to fully initialize the context before usage. +* Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(), +* ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to +* set more specific parameters, the pledged source size, or load a dictionary. +* +* Use ZSTD_compressStream2() with ZSTD_e_continue as many times as necessary to +* consume input stream. The function will automatically update both `pos` +* fields within `input` and `output`. +* Note that the function may not consume the entire input, for example, because +* the output buffer is already full, in which case `input.pos < input.size`. +* The caller must check if input has been entirely consumed. +* If not, the caller must make some room to receive more compressed data, +* and then present again remaining input data. +* note: ZSTD_e_continue is guaranteed to make some forward progress when called, +* but doesn't guarantee maximal forward progress. This is especially relevant +* when compressing with multiple threads. The call won't block if it can +* consume some input, but if it can't it will wait for some, but not all, +* output to be flushed. +* @return : provides a minimum amount of data remaining to be flushed from internal buffers +* or an error code, which can be tested using ZSTD_isError(). +* +* At any moment, it's possible to flush whatever data might remain stuck within internal buffer, +* using ZSTD_compressStream2() with ZSTD_e_flush. `output->pos` will be updated. +* Note that, if `output->size` is too small, a single invocation with ZSTD_e_flush might not be enough (return code > 0). +* In which case, make some room to receive more compressed data, and call again ZSTD_compressStream2() with ZSTD_e_flush. +* You must continue calling ZSTD_compressStream2() with ZSTD_e_flush until it returns 0, at which point you can change the +* operation. +* note: ZSTD_e_flush will flush as much output as possible, meaning when compressing with multiple threads, it will +* block until the flush is complete or the output buffer is full. +* @return : 0 if internal buffers are entirely flushed, +* >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), +* or an error code, which can be tested using ZSTD_isError(). +* +* Calling ZSTD_compressStream2() with ZSTD_e_end instructs to finish a frame. +* It will perform a flush and write frame epilogue. +* The epilogue is required for decoders to consider a frame completed. +* flush operation is the same, and follows same rules as calling ZSTD_compressStream2() with ZSTD_e_flush. +* You must continue calling ZSTD_compressStream2() with ZSTD_e_end until it returns 0, at which point you are free to +* start a new frame. +* note: ZSTD_e_end will flush as much output as possible, meaning when compressing with multiple threads, it will +* block until the flush is complete or the output buffer is full. +* @return : 0 if frame fully completed and fully flushed, +* >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), +* or an error code, which can be tested using ZSTD_isError(). +* +* *******************************************************************/ + +typedef ZSTD_CCtx ZSTD_CStream; /**< CCtx and CStream are now effectively same object (>= v1.3.0) */ + /* Continue to distinguish them for compatibility with older versions <= v1.2.0 */ +/*===== ZSTD_CStream management functions =====*/ +ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void); +ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs); + +/*===== Streaming compression functions =====*/ +typedef enum { + ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal compression ratio */ + ZSTD_e_flush=1, /* flush any data provided so far, + * it creates (at least) one new block, that can be decoded immediately on reception; + * frame will continue: any future data can still reference previously compressed data, improving compression. + * note : multithreaded compression will block to flush as much output as possible. */ + ZSTD_e_end=2 /* flush any remaining data _and_ close current frame. + * note that frame is only closed after compressed data is fully flushed (return value == 0). + * After that point, any additional data starts a new frame. + * note : each frame is independent (does not reference any content from previous frame). + : note : multithreaded compression will block to flush as much output as possible. */ +} ZSTD_EndDirective; + +/*! ZSTD_compressStream2() : + * Behaves about the same as ZSTD_compressStream, with additional control on end directive. + * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() + * - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode) + * - output->pos must be <= dstCapacity, input->pos must be <= srcSize + * - output->pos and input->pos will be updated. They are guaranteed to remain below their respective limit. + * - endOp must be a valid directive + * - When nbWorkers==0 (default), function is blocking : it completes its job before returning to caller. + * - When nbWorkers>=1, function is non-blocking : it copies a portion of input, distributes jobs to internal worker threads, flush to output whatever is available, + * and then immediately returns, just indicating that there is some data remaining to be flushed. + * The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte. + * - Exception : if the first call requests a ZSTD_e_end directive and provides enough dstCapacity, the function delegates to ZSTD_compress2() which is always blocking. + * - @return provides a minimum amount of data remaining to be flushed from internal buffers + * or an error code, which can be tested using ZSTD_isError(). + * if @return != 0, flush is not fully completed, there is still some data left within internal buffers. + * This is useful for ZSTD_e_flush, since in this case more flushes are necessary to empty all buffers. + * For ZSTD_e_end, @return == 0 when internal buffers are fully flushed and frame is completed. + * - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0), + * only ZSTD_e_end or ZSTD_e_flush operations are allowed. + * Before starting a new compression job, or changing compression parameters, + * it is required to fully flush internal buffers. + */ +ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective endOp); + + +/* These buffer sizes are softly recommended. + * They are not required : ZSTD_compressStream*() happily accepts any buffer size, for both input and output. + * Respecting the recommended size just makes it a bit easier for ZSTD_compressStream*(), + * reducing the amount of memory shuffling and buffering, resulting in minor performance savings. + * + * However, note that these recommendations are from the perspective of a C caller program. + * If the streaming interface is invoked from some other language, + * especially managed ones such as Java or Go, through a foreign function interface such as jni or cgo, + * a major performance rule is to reduce crossing such interface to an absolute minimum. + * It's not rare that performance ends being spent more into the interface, rather than compression itself. + * In which cases, prefer using large buffers, as large as practical, + * for both input and output, to reduce the nb of roundtrips. + */ +ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */ +ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */ + + +/* ***************************************************************************** + * This following is a legacy streaming API. + * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2(). + * It is redundant, but remains fully supported. + * Advanced parameters and dictionary compression can only be used through the + * new API. + ******************************************************************************/ + +/*! + * Equivalent to: + * + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) + * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); + */ +ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel); +/*! + * Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue). + * NOTE: The return value is different. ZSTD_compressStream() returns a hint for + * the next read size (if non-zero and not an error). ZSTD_compressStream2() + * returns the minimum nb of bytes left to flush (if non-zero and not an error). + */ +ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); +/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */ +ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); +/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */ +ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); + + +/*-*************************************************************************** +* Streaming decompression - HowTo +* +* A ZSTD_DStream object is required to track streaming operations. +* Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources. +* ZSTD_DStream objects can be re-used multiple times. +* +* Use ZSTD_initDStream() to start a new decompression operation. +* @return : recommended first input size +* Alternatively, use advanced API to set specific properties. +* +* Use ZSTD_decompressStream() repetitively to consume your input. +* The function will update both `pos` fields. +* If `input.pos < input.size`, some input has not been consumed. +* It's up to the caller to present again remaining data. +* The function tries to flush all data decoded immediately, respecting output buffer size. +* If `output.pos < output.size`, decoder has flushed everything it could. +* But if `output.pos == output.size`, there might be some data left within internal buffers., +* In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer. +* Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX. +* @return : 0 when a frame is completely decoded and fully flushed, +* or an error code, which can be tested using ZSTD_isError(), +* or any other value > 0, which means there is still some decoding or flushing to do to complete current frame : +* the return value is a suggested next input size (just a hint for better latency) +* that will never request more than the remaining frame size. +* *******************************************************************************/ + +typedef ZSTD_DCtx ZSTD_DStream; /**< DCtx and DStream are now effectively same object (>= v1.3.0) */ + /* For compatibility with versions <= v1.2.0, prefer differentiating them. */ +/*===== ZSTD_DStream management functions =====*/ +ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void); +ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds); + +/*===== Streaming decompression functions =====*/ + +/* This function is redundant with the advanced API and equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * ZSTD_DCtx_refDDict(zds, NULL); + */ +ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); + +ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input); + +ZSTDLIB_API size_t ZSTD_DStreamInSize(void); /*!< recommended size for input buffer */ +ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */ + + +/************************** +* Simple dictionary API +***************************/ +/*! ZSTD_compress_usingDict() : + * Compression at an explicit compression level using a Dictionary. + * A dictionary can be any arbitrary data segment (also called a prefix), + * or a buffer with specified information (see dictBuilder/zdict.h). + * Note : This function loads the dictionary, resulting in significant startup delay. + * It's intended for a dictionary used only once. + * Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */ +ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + int compressionLevel); + +/*! ZSTD_decompress_usingDict() : + * Decompression using a known Dictionary. + * Dictionary must be identical to the one used during compression. + * Note : This function loads the dictionary, resulting in significant startup delay. + * It's intended for a dictionary used only once. + * Note : When `dict == NULL || dictSize < 8` no dictionary is used. */ +ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize); + + +/*********************************** + * Bulk processing dictionary API + **********************************/ +typedef struct ZSTD_CDict_s ZSTD_CDict; + +/*! ZSTD_createCDict() : + * When compressing multiple messages or blocks using the same dictionary, + * it's recommended to digest the dictionary only once, since it's a costly operation. + * ZSTD_createCDict() will create a state from digesting a dictionary. + * The resulting state can be used for future compression operations with very limited startup cost. + * ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only. + * @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict. + * Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content. + * Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer, + * in which case the only thing that it transports is the @compressionLevel. + * This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively, + * expecting a ZSTD_CDict parameter with any data, including those without a known dictionary. */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, + int compressionLevel); + +/*! ZSTD_freeCDict() : + * Function frees memory allocated by ZSTD_createCDict(). */ +ZSTDLIB_API size_t ZSTD_freeCDict(ZSTD_CDict* CDict); + +/*! ZSTD_compress_usingCDict() : + * Compression using a digested Dictionary. + * Recommended when same dictionary is used multiple times. + * Note : compression level is _decided at dictionary creation time_, + * and frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */ +ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict); + + +typedef struct ZSTD_DDict_s ZSTD_DDict; + +/*! ZSTD_createDDict() : + * Create a digested dictionary, ready to start decompression operation without startup delay. + * dictBuffer can be released after DDict creation, as its content is copied inside DDict. */ +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize); + +/*! ZSTD_freeDDict() : + * Function frees memory allocated with ZSTD_createDDict() */ +ZSTDLIB_API size_t ZSTD_freeDDict(ZSTD_DDict* ddict); + +/*! ZSTD_decompress_usingDDict() : + * Decompression using a digested Dictionary. + * Recommended when same dictionary is used multiple times. */ +ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_DDict* ddict); + + +/******************************** + * Dictionary helper functions + *******************************/ + +/*! ZSTD_getDictID_fromDict() : + * Provides the dictID stored within dictionary. + * if @return == 0, the dictionary is not conformant with Zstandard specification. + * It can still be loaded, but as a content-only dictionary. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize); + +/*! ZSTD_getDictID_fromDDict() : + * Provides the dictID of the dictionary loaded into `ddict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict); + +/*! ZSTD_getDictID_fromFrame() : + * Provides the dictID required to decompressed the frame stored within `src`. + * If @return == 0, the dictID could not be decoded. + * This could for one of the following reasons : + * - The frame does not require a dictionary to be decoded (most common case). + * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information. + * Note : this use case also happens when using a non-conformant dictionary. + * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). + * - This is not a Zstandard frame. + * When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); + + +/******************************************************************************* + * Advanced dictionary and prefix API + * + * This API allows dictionaries to be used with ZSTD_compress2(), + * ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and + * only reset with the context is reset with ZSTD_reset_parameters or + * ZSTD_reset_session_and_parameters. Prefixes are single-use. + ******************************************************************************/ + + +/*! ZSTD_CCtx_loadDictionary() : + * Create an internal CDict from `dict` buffer. + * Decompression will have to use same dictionary. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary, + * meaning "return to no-dictionary mode". + * Note 1 : Dictionary is sticky, it will be used for all future compressed frames. + * To return to "no-dictionary" situation, load a NULL dictionary (or reset parameters). + * Note 2 : Loading a dictionary involves building tables. + * It's also a CPU consuming operation, with non-negligible impact on latency. + * Tables are dependent on compression parameters, and for this reason, + * compression parameters can no longer be changed after loading a dictionary. + * Note 3 :`dict` content will be copied internally. + * Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead. + * In such a case, dictionary buffer must outlive its users. + * Note 4 : Use ZSTD_CCtx_loadDictionary_advanced() + * to precisely select how dictionary content must be interpreted. */ +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); + +/*! ZSTD_CCtx_refCDict() : + * Reference a prepared dictionary, to be used for all next compressed frames. + * Note that compression parameters are enforced from within CDict, + * and supersede any compression parameter previously set within CCtx. + * The parameters ignored are labelled as "superseded-by-cdict" in the ZSTD_cParameter enum docs. + * The ignored parameters will be used again if the CCtx is returned to no-dictionary mode. + * The dictionary will remain valid for future compressed frames using same CCtx. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special : Referencing a NULL CDict means "return to no-dictionary mode". + * Note 1 : Currently, only one dictionary can be managed. + * Referencing a new dictionary effectively "discards" any previous one. + * Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. */ +ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); + +/*! ZSTD_CCtx_refPrefix() : + * Reference a prefix (single-usage dictionary) for next compressed frame. + * A prefix is **only used once**. Tables are discarded at end of frame (ZSTD_e_end). + * Decompression will need same prefix to properly regenerate data. + * Compressing with a prefix is similar in outcome as performing a diff and compressing it, + * but performs much faster, especially during decompression (compression speed is tunable with compression level). + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary + * Note 1 : Prefix buffer is referenced. It **must** outlive compression. + * Its content must remain unmodified during compression. + * Note 2 : If the intention is to diff some large src data blob with some prior version of itself, + * ensure that the window size is large enough to contain the entire source. + * See ZSTD_c_windowLog. + * Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters. + * It's a CPU consuming operation, with non-negligible impact on latency. + * If there is a need to use the same prefix multiple times, consider loadDictionary instead. + * Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dct_rawContent). + * Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. */ +ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, + const void* prefix, size_t prefixSize); + +/*! ZSTD_DCtx_loadDictionary() : + * Create an internal DDict from dict buffer, + * to be used to decompress next frames. + * The dictionary remains valid for all future frames, until explicitly invalidated. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary, + * meaning "return to no-dictionary mode". + * Note 1 : Loading a dictionary involves building tables, + * which has a non-negligible impact on CPU usage and latency. + * It's recommended to "load once, use many times", to amortize the cost + * Note 2 :`dict` content will be copied internally, so `dict` can be released after loading. + * Use ZSTD_DCtx_loadDictionary_byReference() to reference dictionary content instead. + * Note 3 : Use ZSTD_DCtx_loadDictionary_advanced() to take control of + * how dictionary content is loaded and interpreted. + */ +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); + +/*! ZSTD_DCtx_refDDict() : + * Reference a prepared dictionary, to be used to decompress next frames. + * The dictionary remains active for decompression of future frames using same DCtx. + * + * If called with ZSTD_d_refMultipleDDicts enabled, repeated calls of this function + * will store the DDict references in a table, and the DDict used for decompression + * will be determined at decompression time, as per the dict ID in the frame. + * The memory for the table is allocated on the first call to refDDict, and can be + * freed with ZSTD_freeDCtx(). + * + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Note 1 : Currently, only one dictionary can be managed. + * Referencing a new dictionary effectively "discards" any previous one. + * Special: referencing a NULL DDict means "return to no-dictionary mode". + * Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx. + */ +ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); + +/*! ZSTD_DCtx_refPrefix() : + * Reference a prefix (single-usage dictionary) to decompress next frame. + * This is the reverse operation of ZSTD_CCtx_refPrefix(), + * and must use the same prefix as the one used during compression. + * Prefix is **only used once**. Reference is discarded at end of frame. + * End of frame is reached when ZSTD_decompressStream() returns 0. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Note 1 : Adding any prefix (including NULL) invalidates any previously set prefix or dictionary + * Note 2 : Prefix buffer is referenced. It **must** outlive decompression. + * Prefix buffer must remain unmodified up to the end of frame, + * reached when ZSTD_decompressStream() returns 0. + * Note 3 : By default, the prefix is treated as raw content (ZSTD_dct_rawContent). + * Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode (Experimental section) + * Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost. + * A full dictionary is more costly, as it requires building tables. + */ +ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, + const void* prefix, size_t prefixSize); + +/* === Memory management === */ + +/*! ZSTD_sizeof_*() : + * These functions give the _current_ memory usage of selected object. + * Note that object memory usage can evolve (increase or decrease) over time. */ +ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx); +ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs); +ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds); +ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict); +ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); + +#endif /* ZSTD_H_235446 */ + + +/* ************************************************************************************** + * ADVANCED AND EXPERIMENTAL FUNCTIONS + **************************************************************************************** + * The definitions in the following section are considered experimental. + * They are provided for advanced scenarios. + * They should never be used with a dynamic library, as prototypes may change in the future. + * Use them only in association with static linking. + * ***************************************************************************************/ + +#if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY) +#define ZSTD_H_ZSTD_STATIC_LINKING_ONLY + +/**************************************************************************************** + * experimental API (static linking only) + **************************************************************************************** + * The following symbols and constants + * are not planned to join "stable API" status in the near future. + * They can still change in future versions. + * Some of them are planned to remain in the static_only section indefinitely. + * Some of them might be removed in the future (especially when redundant with existing stable functions) + * ***************************************************************************************/ + +#define ZSTD_FRAMEHEADERSIZE_PREFIX(format) ((format) == ZSTD_f_zstd1 ? 5 : 1) /* minimum input size required to query frame header size */ +#define ZSTD_FRAMEHEADERSIZE_MIN(format) ((format) == ZSTD_f_zstd1 ? 6 : 2) +#define ZSTD_FRAMEHEADERSIZE_MAX 18 /* can be useful for static allocation */ +#define ZSTD_SKIPPABLEHEADERSIZE 8 + +/* compression parameter bounds */ +#define ZSTD_WINDOWLOG_MAX_32 30 +#define ZSTD_WINDOWLOG_MAX_64 31 +#define ZSTD_WINDOWLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64)) +#define ZSTD_WINDOWLOG_MIN 10 +#define ZSTD_HASHLOG_MAX ((ZSTD_WINDOWLOG_MAX < 30) ? ZSTD_WINDOWLOG_MAX : 30) +#define ZSTD_HASHLOG_MIN 6 +#define ZSTD_CHAINLOG_MAX_32 29 +#define ZSTD_CHAINLOG_MAX_64 30 +#define ZSTD_CHAINLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_CHAINLOG_MAX_32 : ZSTD_CHAINLOG_MAX_64)) +#define ZSTD_CHAINLOG_MIN ZSTD_HASHLOG_MIN +#define ZSTD_SEARCHLOG_MAX (ZSTD_WINDOWLOG_MAX-1) +#define ZSTD_SEARCHLOG_MIN 1 +#define ZSTD_MINMATCH_MAX 7 /* only for ZSTD_fast, other strategies are limited to 6 */ +#define ZSTD_MINMATCH_MIN 3 /* only for ZSTD_btopt+, faster strategies are limited to 4 */ +#define ZSTD_TARGETLENGTH_MAX ZSTD_BLOCKSIZE_MAX +#define ZSTD_TARGETLENGTH_MIN 0 /* note : comparing this constant to an unsigned results in a tautological test */ +#define ZSTD_STRATEGY_MIN ZSTD_fast +#define ZSTD_STRATEGY_MAX ZSTD_btultra2 + + +#define ZSTD_OVERLAPLOG_MIN 0 +#define ZSTD_OVERLAPLOG_MAX 9 + +#define ZSTD_WINDOWLOG_LIMIT_DEFAULT 27 /* by default, the streaming decoder will refuse any frame + * requiring larger than (1< 0: + * If litLength != 0: + * rep == 1 --> offset == repeat_offset_1 + * rep == 2 --> offset == repeat_offset_2 + * rep == 3 --> offset == repeat_offset_3 + * If litLength == 0: + * rep == 1 --> offset == repeat_offset_2 + * rep == 2 --> offset == repeat_offset_3 + * rep == 3 --> offset == repeat_offset_1 - 1 + * + * Note: This field is optional. ZSTD_generateSequences() will calculate the value of + * 'rep', but repeat offsets do not necessarily need to be calculated from an external + * sequence provider's perspective. For example, ZSTD_compressSequences() does not + * use this 'rep' field at all (as of now). + */ +} ZSTD_Sequence; + +typedef struct { + unsigned windowLog; /**< largest match distance : larger == more compression, more memory needed during decompression */ + unsigned chainLog; /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */ + unsigned hashLog; /**< dispatch table : larger == faster, more memory */ + unsigned searchLog; /**< nb of searches : larger == more compression, slower */ + unsigned minMatch; /**< match length searched : larger == faster decompression, sometimes less compression */ + unsigned targetLength; /**< acceptable match size for optimal parser (only) : larger == more compression, slower */ + ZSTD_strategy strategy; /**< see ZSTD_strategy definition above */ +} ZSTD_compressionParameters; + +typedef struct { + int contentSizeFlag; /**< 1: content size will be in frame header (when known) */ + int checksumFlag; /**< 1: generate a 32-bits checksum using XXH64 algorithm at end of frame, for error detection */ + int noDictIDFlag; /**< 1: no dictID will be saved into frame header (dictID is only useful for dictionary compression) */ +} ZSTD_frameParameters; + +typedef struct { + ZSTD_compressionParameters cParams; + ZSTD_frameParameters fParams; +} ZSTD_parameters; + +typedef enum { + ZSTD_dct_auto = 0, /* dictionary is "full" when starting with ZSTD_MAGIC_DICTIONARY, otherwise it is "rawContent" */ + ZSTD_dct_rawContent = 1, /* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */ + ZSTD_dct_fullDict = 2 /* refuses to load a dictionary if it does not respect Zstandard's specification, starting with ZSTD_MAGIC_DICTIONARY */ +} ZSTD_dictContentType_e; + +typedef enum { + ZSTD_dlm_byCopy = 0, /**< Copy dictionary content internally */ + ZSTD_dlm_byRef = 1 /**< Reference dictionary content -- the dictionary buffer must outlive its users. */ +} ZSTD_dictLoadMethod_e; + +typedef enum { + ZSTD_f_zstd1 = 0, /* zstd frame format, specified in zstd_compression_format.md (default) */ + ZSTD_f_zstd1_magicless = 1 /* Variant of zstd frame format, without initial 4-bytes magic number. + * Useful to save 4 bytes per generated frame. + * Decoder cannot recognise automatically this format, requiring this instruction. */ +} ZSTD_format_e; + +typedef enum { + /* Note: this enum controls ZSTD_d_forceIgnoreChecksum */ + ZSTD_d_validateChecksum = 0, + ZSTD_d_ignoreChecksum = 1 +} ZSTD_forceIgnoreChecksum_e; + +typedef enum { + /* Note: this enum controls ZSTD_d_refMultipleDDicts */ + ZSTD_rmd_refSingleDDict = 0, + ZSTD_rmd_refMultipleDDicts = 1 +} ZSTD_refMultipleDDicts_e; + +typedef enum { + /* Note: this enum and the behavior it controls are effectively internal + * implementation details of the compressor. They are expected to continue + * to evolve and should be considered only in the context of extremely + * advanced performance tuning. + * + * Zstd currently supports the use of a CDict in three ways: + * + * - The contents of the CDict can be copied into the working context. This + * means that the compression can search both the dictionary and input + * while operating on a single set of internal tables. This makes + * the compression faster per-byte of input. However, the initial copy of + * the CDict's tables incurs a fixed cost at the beginning of the + * compression. For small compressions (< 8 KB), that copy can dominate + * the cost of the compression. + * + * - The CDict's tables can be used in-place. In this model, compression is + * slower per input byte, because the compressor has to search two sets of + * tables. However, this model incurs no start-up cost (as long as the + * working context's tables can be reused). For small inputs, this can be + * faster than copying the CDict's tables. + * + * - The CDict's tables are not used at all, and instead we use the working + * context alone to reload the dictionary and use params based on the source + * size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict(). + * This method is effective when the dictionary sizes are very small relative + * to the input size, and the input size is fairly large to begin with. + * + * Zstd has a simple internal heuristic that selects which strategy to use + * at the beginning of a compression. However, if experimentation shows that + * Zstd is making poor choices, it is possible to override that choice with + * this enum. + */ + ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */ + ZSTD_dictForceAttach = 1, /* Never copy the dictionary. */ + ZSTD_dictForceCopy = 2, /* Always copy the dictionary. */ + ZSTD_dictForceLoad = 3 /* Always reload the dictionary */ +} ZSTD_dictAttachPref_e; + +typedef enum { + ZSTD_lcm_auto = 0, /**< Automatically determine the compression mode based on the compression level. + * Negative compression levels will be uncompressed, and positive compression + * levels will be compressed. */ + ZSTD_lcm_huffman = 1, /**< Always attempt Huffman compression. Uncompressed literals will still be + * emitted if Huffman compression is not profitable. */ + ZSTD_lcm_uncompressed = 2 /**< Always emit uncompressed literals. */ +} ZSTD_literalCompressionMode_e; + + +/*************************************** +* Frame size functions +***************************************/ + +/*! ZSTD_findDecompressedSize() : + * `src` should point to the start of a series of ZSTD encoded and/or skippable frames + * `srcSize` must be the _exact_ size of this series + * (i.e. there should be a frame boundary at `src + srcSize`) + * @return : - decompressed size of all data in all successive frames + * - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN + * - if an error occurred: ZSTD_CONTENTSIZE_ERROR + * + * note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode. + * When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. + * In which case, it's necessary to use streaming mode to decompress data. + * note 2 : decompressed size is always present when compression is done with ZSTD_compress() + * note 3 : decompressed size can be very large (64-bits value), + * potentially larger than what local system can handle as a single memory segment. + * In which case, it's necessary to use streaming mode to decompress data. + * note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified. + * Always ensure result fits within application's authorized limits. + * Each application can set its own limits. + * note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to + * read each contained frame header. This is fast as most of the data is skipped, + * however it does mean that all frame data must be present and valid. */ +ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize); + +/*! ZSTD_decompressBound() : + * `src` should point to the start of a series of ZSTD encoded and/or skippable frames + * `srcSize` must be the _exact_ size of this series + * (i.e. there should be a frame boundary at `src + srcSize`) + * @return : - upper-bound for the decompressed size of all data in all successive frames + * - if an error occurred: ZSTD_CONTENTSIZE_ERROR + * + * note 1 : an error can occur if `src` contains an invalid or incorrectly formatted frame. + * note 2 : the upper-bound is exact when the decompressed size field is available in every ZSTD encoded frame of `src`. + * in this case, `ZSTD_findDecompressedSize` and `ZSTD_decompressBound` return the same value. + * note 3 : when the decompressed size field isn't available, the upper-bound for that frame is calculated by: + * upper-bound = # blocks * min(128 KB, Window_Size) + */ +ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize); + +/*! ZSTD_frameHeaderSize() : + * srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX. + * @return : size of the Frame Header, + * or an error code (if srcSize is too small) */ +ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize); + +typedef enum { + ZSTD_sf_noBlockDelimiters = 0, /* Representation of ZSTD_Sequence has no block delimiters, sequences only */ + ZSTD_sf_explicitBlockDelimiters = 1 /* Representation of ZSTD_Sequence contains explicit block delimiters */ +} ZSTD_sequenceFormat_e; + +/*! ZSTD_generateSequences() : + * Generate sequences using ZSTD_compress2, given a source buffer. + * + * Each block will end with a dummy sequence + * with offset == 0, matchLength == 0, and litLength == length of last literals. + * litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0) + * simply acts as a block delimiter. + * + * zc can be used to insert custom compression params. + * This function invokes ZSTD_compress2 + * + * The output of this function can be fed into ZSTD_compressSequences() with CCtx + * setting of ZSTD_c_blockDelimiters as ZSTD_sf_explicitBlockDelimiters + * @return : number of sequences generated + */ + +ZSTDLIB_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, + size_t outSeqsSize, const void* src, size_t srcSize); + +/*! ZSTD_mergeBlockDelimiters() : + * Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals + * by merging them into into the literals of the next sequence. + * + * As such, the final generated result has no explicit representation of block boundaries, + * and the final last literals segment is not represented in the sequences. + * + * The output of this function can be fed into ZSTD_compressSequences() with CCtx + * setting of ZSTD_c_blockDelimiters as ZSTD_sf_noBlockDelimiters + * @return : number of sequences left after merging + */ +ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize); + +/*! ZSTD_compressSequences() : + * Compress an array of ZSTD_Sequence, generated from the original source buffer, into dst. + * If a dictionary is included, then the cctx should reference the dict. (see: ZSTD_CCtx_refCDict(), ZSTD_CCtx_loadDictionary(), etc.) + * The entire source is compressed into a single frame. + * + * The compression behavior changes based on cctx params. In particular: + * If ZSTD_c_blockDelimiters == ZSTD_sf_noBlockDelimiters, the array of ZSTD_Sequence is expected to contain + * no block delimiters (defined in ZSTD_Sequence). Block boundaries are roughly determined based on + * the block size derived from the cctx, and sequences may be split. This is the default setting. + * + * If ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, the array of ZSTD_Sequence is expected to contain + * block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided. + * + * If ZSTD_c_validateSequences == 0, this function will blindly accept the sequences provided. Invalid sequences cause undefined + * behavior. If ZSTD_c_validateSequences == 1, then if sequence is invalid (see doc/zstd_compression_format.md for + * specifics regarding offset/matchlength requirements) then the function will bail out and return an error. + * + * In addition to the two adjustable experimental params, there are other important cctx params. + * - ZSTD_c_minMatch MUST be set as less than or equal to the smallest match generated by the match finder. It has a minimum value of ZSTD_MINMATCH_MIN. + * - ZSTD_c_compressionLevel accordingly adjusts the strength of the entropy coder, as it would in typical compression. + * - ZSTD_c_windowLog affects offset validation: this function will return an error at higher debug levels if a provided offset + * is larger than what the spec allows for a given window log and dictionary (if present). See: doc/zstd_compression_format.md + * + * Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused. + * Note 2: Once we integrate ability to ingest repcodes, the explicit block delims mode must respect those repcodes exactly, + * and cannot emit an RLE block that disagrees with the repcode history + * @return : final compressed size or a ZSTD error. + */ +ZSTDLIB_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize, + const ZSTD_Sequence* inSeqs, size_t inSeqsSize, + const void* src, size_t srcSize); + + +/*! ZSTD_writeSkippableFrame() : + * Generates a zstd skippable frame containing data given by src, and writes it to dst buffer. + * + * Skippable frames begin with a a 4-byte magic number. There are 16 possible choices of magic number, + * ranging from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15. + * As such, the parameter magicVariant controls the exact skippable frame magic number variant used, so + * the magic number used will be ZSTD_MAGIC_SKIPPABLE_START + magicVariant. + * + * Returns an error if destination buffer is not large enough, if the source size is not representable + * with a 4-byte unsigned int, or if the parameter magicVariant is greater than 15 (and therefore invalid). + * + * @return : number of bytes written or a ZSTD error. + */ +ZSTDLIB_API size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity, + const void* src, size_t srcSize, unsigned magicVariant); + + +/*************************************** +* Memory management +***************************************/ + +/*! ZSTD_estimate*() : + * These functions make it possible to estimate memory usage + * of a future {D,C}Ctx, before its creation. + * + * ZSTD_estimateCCtxSize() will provide a memory budget large enough + * for any compression level up to selected one. + * Note : Unlike ZSTD_estimateCStreamSize*(), this estimate + * does not include space for a window buffer. + * Therefore, the estimation is only guaranteed for single-shot compressions, not streaming. + * The estimate will assume the input may be arbitrarily large, + * which is the worst case. + * + * When srcSize can be bound by a known and rather "small" value, + * this fact can be used to provide a tighter estimation + * because the CCtx compression context will need less memory. + * This tighter estimation can be provided by more advanced functions + * ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(), + * and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter(). + * Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits. + * + * Note 2 : only single-threaded compression is supported. + * ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1. + */ +ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel); +ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); +ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params); +ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void); + +/*! ZSTD_estimateCStreamSize() : + * ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one. + * It will also consider src size to be arbitrarily "large", which is worst case. + * If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation. + * ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. + * ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1. + * Note : CStream size estimation is only correct for single-threaded compression. + * ZSTD_DStream memory budget depends on window Size. + * This information can be passed manually, using ZSTD_estimateDStreamSize, + * or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame(); + * Note : if streaming is init with function ZSTD_init?Stream_usingDict(), + * an internal ?Dict will be created, which additional size is not estimated here. + * In this case, get total size by adding ZSTD_estimate?DictSize */ +ZSTDLIB_API size_t ZSTD_estimateCStreamSize(int compressionLevel); +ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams); +ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params); +ZSTDLIB_API size_t ZSTD_estimateDStreamSize(size_t windowSize); +ZSTDLIB_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize); + +/*! ZSTD_estimate?DictSize() : + * ZSTD_estimateCDictSize() will bet that src size is relatively "small", and content is copied, like ZSTD_createCDict(). + * ZSTD_estimateCDictSize_advanced() makes it possible to control compression parameters precisely, like ZSTD_createCDict_advanced(). + * Note : dictionaries created by reference (`ZSTD_dlm_byRef`) are logically smaller. + */ +ZSTDLIB_API size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel); +ZSTDLIB_API size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, ZSTD_dictLoadMethod_e dictLoadMethod); +ZSTDLIB_API size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod); + +/*! ZSTD_initStatic*() : + * Initialize an object using a pre-allocated fixed-size buffer. + * workspace: The memory area to emplace the object into. + * Provided pointer *must be 8-bytes aligned*. + * Buffer must outlive object. + * workspaceSize: Use ZSTD_estimate*Size() to determine + * how large workspace must be to support target scenario. + * @return : pointer to object (same address as workspace, just different type), + * or NULL if error (size too small, incorrect alignment, etc.) + * Note : zstd will never resize nor malloc() when using a static buffer. + * If the object requires more memory than available, + * zstd will just error out (typically ZSTD_error_memory_allocation). + * Note 2 : there is no corresponding "free" function. + * Since workspace is allocated externally, it must be freed externally too. + * Note 3 : cParams : use ZSTD_getCParams() to convert a compression level + * into its associated cParams. + * Limitation 1 : currently not compatible with internal dictionary creation, triggered by + * ZSTD_CCtx_loadDictionary(), ZSTD_initCStream_usingDict() or ZSTD_initDStream_usingDict(). + * Limitation 2 : static cctx currently not compatible with multi-threading. + * Limitation 3 : static dctx is incompatible with legacy support. + */ +ZSTDLIB_API ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize); +ZSTDLIB_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticCCtx() */ + +ZSTDLIB_API ZSTD_DCtx* ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize); +ZSTDLIB_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticDCtx() */ + +ZSTDLIB_API const ZSTD_CDict* ZSTD_initStaticCDict( + void* workspace, size_t workspaceSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams); + +ZSTDLIB_API const ZSTD_DDict* ZSTD_initStaticDDict( + void* workspace, size_t workspaceSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType); + + +/*! Custom memory allocation : + * These prototypes make it possible to pass your own allocation/free functions. + * ZSTD_customMem is provided at creation time, using ZSTD_create*_advanced() variants listed below. + * All allocation/free operations will be completed using these custom variants instead of regular ones. + */ +typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size); +typedef void (*ZSTD_freeFunction) (void* opaque, void* address); +typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem; +static +#ifdef __GNUC__ +__attribute__((__unused__)) +#endif +ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; /**< this constant defers to stdlib's functions */ + +ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem); + +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams, + ZSTD_customMem customMem); + +/* ! Thread pool : + * These prototypes make it possible to share a thread pool among multiple compression contexts. + * This can limit resources for applications with multiple threads where each one uses + * a threaded compression mode (via ZSTD_c_nbWorkers parameter). + * ZSTD_createThreadPool creates a new thread pool with a given number of threads. + * Note that the lifetime of such pool must exist while being used. + * ZSTD_CCtx_refThreadPool assigns a thread pool to a context (use NULL argument value + * to use an internal thread pool). + * ZSTD_freeThreadPool frees a thread pool. + */ +typedef struct POOL_ctx_s ZSTD_threadPool; +ZSTDLIB_API ZSTD_threadPool* ZSTD_createThreadPool(size_t numThreads); +ZSTDLIB_API void ZSTD_freeThreadPool (ZSTD_threadPool* pool); +ZSTDLIB_API size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool); + + +/* + * This API is temporary and is expected to change or disappear in the future! + */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2( + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + const ZSTD_CCtx_params* cctxParams, + ZSTD_customMem customMem); + +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced( + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_customMem customMem); + + +/*************************************** +* Advanced compression functions +***************************************/ + +/*! ZSTD_createCDict_byReference() : + * Create a digested dictionary for compression + * Dictionary content is just referenced, not duplicated. + * As a consequence, `dictBuffer` **must** outlive CDict, + * and its content must remain unmodified throughout the lifetime of CDict. + * note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel); + +/*! ZSTD_getDictID_fromCDict() : + * Provides the dictID of the dictionary loaded into `cdict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict); + +/*! ZSTD_getCParams() : + * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize. + * `estimatedSrcSize` value is optional, select 0 if not known */ +ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); + +/*! ZSTD_getParams() : + * same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`. + * All fields of `ZSTD_frameParameters` are set to default : contentSize=1, checksum=0, noDictID=0 */ +ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); + +/*! ZSTD_checkCParams() : + * Ensure param values remain within authorized range. + * @return 0 on success, or an error code (can be checked with ZSTD_isError()) */ +ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params); + +/*! ZSTD_adjustCParams() : + * optimize params for a given `srcSize` and `dictSize`. + * `srcSize` can be unknown, in which case use ZSTD_CONTENTSIZE_UNKNOWN. + * `dictSize` must be `0` when there is no dictionary. + * cPar can be invalid : all parameters will be clamped within valid range in the @return struct. + * This function never fails (wide contract) */ +ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize); + +/*! ZSTD_compress_advanced() : + * Note : this function is now DEPRECATED. + * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters. + * This prototype will be marked as deprecated and generate compilation warning on reaching v1.5.x */ +ZSTDLIB_API size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params); + +/*! ZSTD_compress_usingCDict_advanced() : + * Note : this function is now REDUNDANT. + * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters. + * This prototype will be marked as deprecated and generate compilation warning in some future version */ +ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict, + ZSTD_frameParameters fParams); + + +/*! ZSTD_CCtx_loadDictionary_byReference() : + * Same as ZSTD_CCtx_loadDictionary(), but dictionary content is referenced, instead of being copied into CCtx. + * It saves some memory, but also requires that `dict` outlives its usage within `cctx` */ +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); + +/*! ZSTD_CCtx_loadDictionary_advanced() : + * Same as ZSTD_CCtx_loadDictionary(), but gives finer control over + * how to load the dictionary (by copy ? by reference ?) + * and how to interpret it (automatic ? force raw mode ? full mode only ?) */ +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); + +/*! ZSTD_CCtx_refPrefix_advanced() : + * Same as ZSTD_CCtx_refPrefix(), but gives finer control over + * how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */ +ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); + +/* === experimental parameters === */ +/* these parameters can be used with ZSTD_setParameter() + * they are not guaranteed to remain supported in the future */ + + /* Enables rsyncable mode, + * which makes compressed files more rsync friendly + * by adding periodic synchronization points to the compressed data. + * The target average block size is ZSTD_c_jobSize / 2. + * It's possible to modify the job size to increase or decrease + * the granularity of the synchronization point. + * Once the jobSize is smaller than the window size, + * it will result in compression ratio degradation. + * NOTE 1: rsyncable mode only works when multithreading is enabled. + * NOTE 2: rsyncable performs poorly in combination with long range mode, + * since it will decrease the effectiveness of synchronization points, + * though mileage may vary. + * NOTE 3: Rsyncable mode limits maximum compression speed to ~400 MB/s. + * If the selected compression level is already running significantly slower, + * the overall speed won't be significantly impacted. + */ + #define ZSTD_c_rsyncable ZSTD_c_experimentalParam1 + +/* Select a compression format. + * The value must be of type ZSTD_format_e. + * See ZSTD_format_e enum definition for details */ +#define ZSTD_c_format ZSTD_c_experimentalParam2 + +/* Force back-reference distances to remain < windowSize, + * even when referencing into Dictionary content (default:0) */ +#define ZSTD_c_forceMaxWindow ZSTD_c_experimentalParam3 + +/* Controls whether the contents of a CDict + * are used in place, or copied into the working context. + * Accepts values from the ZSTD_dictAttachPref_e enum. + * See the comments on that enum for an explanation of the feature. */ +#define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4 + +/* Controls how the literals are compressed (default is auto). + * The value must be of type ZSTD_literalCompressionMode_e. + * See ZSTD_literalCompressionMode_t enum definition for details. + */ +#define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5 + +/* Tries to fit compressed block size to be around targetCBlockSize. + * No target when targetCBlockSize == 0. + * There is no guarantee on compressed block size (default:0) */ +#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6 + +/* User's best guess of source size. + * Hint is not valid when srcSizeHint == 0. + * There is no guarantee that hint is close to actual source size, + * but compression ratio may regress significantly if guess considerably underestimates */ +#define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7 + +/* Controls whether the new and experimental "dedicated dictionary search + * structure" can be used. This feature is still rough around the edges, be + * prepared for surprising behavior! + * + * How to use it: + * + * When using a CDict, whether to use this feature or not is controlled at + * CDict creation, and it must be set in a CCtxParams set passed into that + * construction (via ZSTD_createCDict_advanced2()). A compression will then + * use the feature or not based on how the CDict was constructed; the value of + * this param, set in the CCtx, will have no effect. + * + * However, when a dictionary buffer is passed into a CCtx, such as via + * ZSTD_CCtx_loadDictionary(), this param can be set on the CCtx to control + * whether the CDict that is created internally can use the feature or not. + * + * What it does: + * + * Normally, the internal data structures of the CDict are analogous to what + * would be stored in a CCtx after compressing the contents of a dictionary. + * To an approximation, a compression using a dictionary can then use those + * data structures to simply continue what is effectively a streaming + * compression where the simulated compression of the dictionary left off. + * Which is to say, the search structures in the CDict are normally the same + * format as in the CCtx. + * + * It is possible to do better, since the CDict is not like a CCtx: the search + * structures are written once during CDict creation, and then are only read + * after that, while the search structures in the CCtx are both read and + * written as the compression goes along. This means we can choose a search + * structure for the dictionary that is read-optimized. + * + * This feature enables the use of that different structure. + * + * Note that some of the members of the ZSTD_compressionParameters struct have + * different semantics and constraints in the dedicated search structure. It is + * highly recommended that you simply set a compression level in the CCtxParams + * you pass into the CDict creation call, and avoid messing with the cParams + * directly. + * + * Effects: + * + * This will only have any effect when the selected ZSTD_strategy + * implementation supports this feature. Currently, that's limited to + * ZSTD_greedy, ZSTD_lazy, and ZSTD_lazy2. + * + * Note that this means that the CDict tables can no longer be copied into the + * CCtx, so the dict attachment mode ZSTD_dictForceCopy will no longer be + * useable. The dictionary can only be attached or reloaded. + * + * In general, you should expect compression to be faster--sometimes very much + * so--and CDict creation to be slightly slower. Eventually, we will probably + * make this mode the default. + */ +#define ZSTD_c_enableDedicatedDictSearch ZSTD_c_experimentalParam8 + +/* ZSTD_c_stableInBuffer + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable. + * + * Tells the compressor that the ZSTD_inBuffer will ALWAYS be the same + * between calls, except for the modifications that zstd makes to pos (the + * caller must not modify pos). This is checked by the compressor, and + * compression will fail if it ever changes. This means the only flush + * mode that makes sense is ZSTD_e_end, so zstd will error if ZSTD_e_end + * is not used. The data in the ZSTD_inBuffer in the range [src, src + pos) + * MUST not be modified during compression or you will get data corruption. + * + * When this flag is enabled zstd won't allocate an input window buffer, + * because the user guarantees it can reference the ZSTD_inBuffer until + * the frame is complete. But, it will still allocate an output buffer + * large enough to fit a block (see ZSTD_c_stableOutBuffer). This will also + * avoid the memcpy() from the input buffer to the input window buffer. + * + * NOTE: ZSTD_compressStream2() will error if ZSTD_e_end is not used. + * That means this flag cannot be used with ZSTD_compressStream(). + * + * NOTE: So long as the ZSTD_inBuffer always points to valid memory, using + * this flag is ALWAYS memory safe, and will never access out-of-bounds + * memory. However, compression WILL fail if you violate the preconditions. + * + * WARNING: The data in the ZSTD_inBuffer in the range [dst, dst + pos) MUST + * not be modified during compression or you will get data corruption. This + * is because zstd needs to reference data in the ZSTD_inBuffer to find + * matches. Normally zstd maintains its own window buffer for this purpose, + * but passing this flag tells zstd to use the user provided buffer. + */ +#define ZSTD_c_stableInBuffer ZSTD_c_experimentalParam9 + +/* ZSTD_c_stableOutBuffer + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable. + * + * Tells he compressor that the ZSTD_outBuffer will not be resized between + * calls. Specifically: (out.size - out.pos) will never grow. This gives the + * compressor the freedom to say: If the compressed data doesn't fit in the + * output buffer then return ZSTD_error_dstSizeTooSmall. This allows us to + * always decompress directly into the output buffer, instead of decompressing + * into an internal buffer and copying to the output buffer. + * + * When this flag is enabled zstd won't allocate an output buffer, because + * it can write directly to the ZSTD_outBuffer. It will still allocate the + * input window buffer (see ZSTD_c_stableInBuffer). + * + * Zstd will check that (out.size - out.pos) never grows and return an error + * if it does. While not strictly necessary, this should prevent surprises. + */ +#define ZSTD_c_stableOutBuffer ZSTD_c_experimentalParam10 + +/* ZSTD_c_blockDelimiters + * Default is 0 == ZSTD_sf_noBlockDelimiters. + * + * For use with sequence compression API: ZSTD_compressSequences(). + * + * Designates whether or not the given array of ZSTD_Sequence contains block delimiters + * and last literals, which are defined as sequences with offset == 0 and matchLength == 0. + * See the definition of ZSTD_Sequence for more specifics. + */ +#define ZSTD_c_blockDelimiters ZSTD_c_experimentalParam11 + +/* ZSTD_c_validateSequences + * Default is 0 == disabled. Set to 1 to enable sequence validation. + * + * For use with sequence compression API: ZSTD_compressSequences(). + * Designates whether or not we validate sequences provided to ZSTD_compressSequences() + * during function execution. + * + * Without validation, providing a sequence that does not conform to the zstd spec will cause + * undefined behavior, and may produce a corrupted block. + * + * With validation enabled, a if sequence is invalid (see doc/zstd_compression_format.md for + * specifics regarding offset/matchlength requirements) then the function will bail out and + * return an error. + * + */ +#define ZSTD_c_validateSequences ZSTD_c_experimentalParam12 + +/*! ZSTD_CCtx_getParameter() : + * Get the requested compression parameter value, selected by enum ZSTD_cParameter, + * and store it into int* value. + * @return : 0, or an error code (which can be tested with ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtx_getParameter(const ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value); + + +/*! ZSTD_CCtx_params : + * Quick howto : + * - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure + * - ZSTD_CCtxParams_setParameter() : Push parameters one by one into + * an existing ZSTD_CCtx_params structure. + * This is similar to + * ZSTD_CCtx_setParameter(). + * - ZSTD_CCtx_setParametersUsingCCtxParams() : Apply parameters to + * an existing CCtx. + * These parameters will be applied to + * all subsequent frames. + * - ZSTD_compressStream2() : Do compression using the CCtx. + * - ZSTD_freeCCtxParams() : Free the memory. + * + * This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams() + * for static allocation of CCtx for single-threaded compression. + */ +ZSTDLIB_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void); +ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params); + +/*! ZSTD_CCtxParams_reset() : + * Reset params to default values. + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params); + +/*! ZSTD_CCtxParams_init() : + * Initializes the compression parameters of cctxParams according to + * compression level. All other parameters are reset to their default values. + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel); + +/*! ZSTD_CCtxParams_init_advanced() : + * Initializes the compression and frame parameters of cctxParams according to + * params. All other parameters are reset to their default values. + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params); + +/*! ZSTD_CCtxParams_setParameter() : + * Similar to ZSTD_CCtx_setParameter. + * Set one compression parameter, selected by enum ZSTD_cParameter. + * Parameters must be applied to a ZSTD_CCtx using + * ZSTD_CCtx_setParametersUsingCCtxParams(). + * @result : a code representing success or failure (which can be tested with + * ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value); + +/*! ZSTD_CCtxParams_getParameter() : + * Similar to ZSTD_CCtx_getParameter. + * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(const ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value); + +/*! ZSTD_CCtx_setParametersUsingCCtxParams() : + * Apply a set of ZSTD_CCtx_params to the compression context. + * This can be done even after compression is started, + * if nbWorkers==0, this will have no impact until a new compression is started. + * if nbWorkers>=1, new parameters will be picked up at next job, + * with a few restrictions (windowLog, pledgedSrcSize, nbWorkers, jobSize, and overlapLog are not updated). + */ +ZSTDLIB_API size_t ZSTD_CCtx_setParametersUsingCCtxParams( + ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params); + +/*! ZSTD_compressStream2_simpleArgs() : + * Same as ZSTD_compressStream2(), + * but using only integral types as arguments. + * This variant might be helpful for binders from dynamic languages + * which have troubles handling structures containing memory pointers. + */ +ZSTDLIB_API size_t ZSTD_compressStream2_simpleArgs ( + ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos, + ZSTD_EndDirective endOp); + + +/*************************************** +* Advanced decompression functions +***************************************/ + +/*! ZSTD_isFrame() : + * Tells if the content of `buffer` starts with a valid Frame Identifier. + * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. + * Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. + * Note 3 : Skippable Frame Identifiers are considered valid. */ +ZSTDLIB_API unsigned ZSTD_isFrame(const void* buffer, size_t size); + +/*! ZSTD_createDDict_byReference() : + * Create a digested dictionary, ready to start decompression operation without startup delay. + * Dictionary content is referenced, and therefore stays in dictBuffer. + * It is important that dictBuffer outlives DDict, + * it must remain read accessible throughout the lifetime of DDict */ +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize); + +/*! ZSTD_DCtx_loadDictionary_byReference() : + * Same as ZSTD_DCtx_loadDictionary(), + * but references `dict` content instead of copying it into `dctx`. + * This saves memory if `dict` remains around., + * However, it's imperative that `dict` remains accessible (and unmodified) while being used, so it must outlive decompression. */ +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); + +/*! ZSTD_DCtx_loadDictionary_advanced() : + * Same as ZSTD_DCtx_loadDictionary(), + * but gives direct control over + * how to load the dictionary (by copy ? by reference ?) + * and how to interpret it (automatic ? force raw mode ? full mode only ?). */ +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); + +/*! ZSTD_DCtx_refPrefix_advanced() : + * Same as ZSTD_DCtx_refPrefix(), but gives finer control over + * how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */ +ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); + +/*! ZSTD_DCtx_setMaxWindowSize() : + * Refuses allocating internal buffers for frames requiring a window size larger than provided limit. + * This protects a decoder context from reserving too much memory for itself (potential attack scenario). + * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. + * By default, a decompression context accepts all window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) + * @return : 0, or an error code (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize); + +/*! ZSTD_DCtx_getParameter() : + * Get the requested decompression parameter value, selected by enum ZSTD_dParameter, + * and store it into int* value. + * @return : 0, or an error code (which can be tested with ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value); + +/* ZSTD_d_format + * experimental parameter, + * allowing selection between ZSTD_format_e input compression formats + */ +#define ZSTD_d_format ZSTD_d_experimentalParam1 +/* ZSTD_d_stableOutBuffer + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable. + * + * Tells the decompressor that the ZSTD_outBuffer will ALWAYS be the same + * between calls, except for the modifications that zstd makes to pos (the + * caller must not modify pos). This is checked by the decompressor, and + * decompression will fail if it ever changes. Therefore the ZSTD_outBuffer + * MUST be large enough to fit the entire decompressed frame. This will be + * checked when the frame content size is known. The data in the ZSTD_outBuffer + * in the range [dst, dst + pos) MUST not be modified during decompression + * or you will get data corruption. + * + * When this flags is enabled zstd won't allocate an output buffer, because + * it can write directly to the ZSTD_outBuffer, but it will still allocate + * an input buffer large enough to fit any compressed block. This will also + * avoid the memcpy() from the internal output buffer to the ZSTD_outBuffer. + * If you need to avoid the input buffer allocation use the buffer-less + * streaming API. + * + * NOTE: So long as the ZSTD_outBuffer always points to valid memory, using + * this flag is ALWAYS memory safe, and will never access out-of-bounds + * memory. However, decompression WILL fail if you violate the preconditions. + * + * WARNING: The data in the ZSTD_outBuffer in the range [dst, dst + pos) MUST + * not be modified during decompression or you will get data corruption. This + * is because zstd needs to reference data in the ZSTD_outBuffer to regenerate + * matches. Normally zstd maintains its own buffer for this purpose, but passing + * this flag tells zstd to use the user provided buffer. + */ +#define ZSTD_d_stableOutBuffer ZSTD_d_experimentalParam2 + +/* ZSTD_d_forceIgnoreChecksum + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable + * + * Tells the decompressor to skip checksum validation during decompression, regardless + * of whether checksumming was specified during compression. This offers some + * slight performance benefits, and may be useful for debugging. + * Param has values of type ZSTD_forceIgnoreChecksum_e + */ +#define ZSTD_d_forceIgnoreChecksum ZSTD_d_experimentalParam3 + +/* ZSTD_d_refMultipleDDicts + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable + * + * If enabled and dctx is allocated on the heap, then additional memory will be allocated + * to store references to multiple ZSTD_DDict. That is, multiple calls of ZSTD_refDDict() + * using a given ZSTD_DCtx, rather than overwriting the previous DDict reference, will instead + * store all references. At decompression time, the appropriate dictID is selected + * from the set of DDicts based on the dictID in the frame. + * + * Usage is simply calling ZSTD_refDDict() on multiple dict buffers. + * + * Param has values of byte ZSTD_refMultipleDDicts_e + * + * WARNING: Enabling this parameter and calling ZSTD_DCtx_refDDict(), will trigger memory + * allocation for the hash table. ZSTD_freeDCtx() also frees this memory. + * Memory is allocated as per ZSTD_DCtx::customMem. + * + * Although this function allocates memory for the table, the user is still responsible for + * memory management of the underlying ZSTD_DDict* themselves. + */ +#define ZSTD_d_refMultipleDDicts ZSTD_d_experimentalParam4 + + +/*! ZSTD_DCtx_setFormat() : + * Instruct the decoder context about what kind of data to decode next. + * This instruction is mandatory to decode data without a fully-formed header, + * such ZSTD_f_zstd1_magicless for example. + * @return : 0, or an error code (which can be tested using ZSTD_isError()). */ +ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format); + +/*! ZSTD_decompressStream_simpleArgs() : + * Same as ZSTD_decompressStream(), + * but using only integral types as arguments. + * This can be helpful for binders from dynamic languages + * which have troubles handling structures containing memory pointers. + */ +ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs ( + ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos); + + +/******************************************************************** +* Advanced streaming functions +* Warning : most of these functions are now redundant with the Advanced API. +* Once Advanced API reaches "stable" status, +* redundant functions will be deprecated, and then at some point removed. +********************************************************************/ + +/*===== Advanced Streaming compression functions =====*/ + +/*! ZSTD_initCStream_srcSize() : + * This function is deprecated, and equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) + * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * + * pledgedSrcSize must be correct. If it is not known at init time, use + * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs, + * "0" also disables frame content size field. It may be enabled in the future. + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t +ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, + int compressionLevel, + unsigned long long pledgedSrcSize); + +/*! ZSTD_initCStream_usingDict() : + * This function is deprecated, and is equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); + * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); + * + * Creates of an internal CDict (incompatible with static CCtx), except if + * dict == NULL or dictSize < 8, in which case no dict is used. + * Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if + * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy. + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t +ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + int compressionLevel); + +/*! ZSTD_initCStream_advanced() : + * This function is deprecated, and is approximately equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * // Pseudocode: Set each zstd parameter and leave the rest as-is. + * for ((param, value) : params) { + * ZSTD_CCtx_setParameter(zcs, param, value); + * } + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); + * + * dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy. + * pledgedSrcSize must be correct. + * If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t +ZSTD_initCStream_advanced(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + ZSTD_parameters params, + unsigned long long pledgedSrcSize); + +/*! ZSTD_initCStream_usingCDict() : + * This function is deprecated, and equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_refCDict(zcs, cdict); + * + * note : cdict will just be referenced, and must outlive compression session + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); + +/*! ZSTD_initCStream_usingCDict_advanced() : + * This function is DEPRECATED, and is approximately equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * // Pseudocode: Set each zstd frame parameter and leave the rest as-is. + * for ((fParam, value) : fParams) { + * ZSTD_CCtx_setParameter(zcs, fParam, value); + * } + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * ZSTD_CCtx_refCDict(zcs, cdict); + * + * same as ZSTD_initCStream_usingCDict(), with control over frame parameters. + * pledgedSrcSize must be correct. If srcSize is not known at init time, use + * value ZSTD_CONTENTSIZE_UNKNOWN. + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t +ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, + const ZSTD_CDict* cdict, + ZSTD_frameParameters fParams, + unsigned long long pledgedSrcSize); + +/*! ZSTD_resetCStream() : + * This function is deprecated, and is equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * + * start a new frame, using same parameters from previous frame. + * This is typically useful to skip dictionary loading stage, since it will re-use it in-place. + * Note that zcs must be init at least once before using ZSTD_resetCStream(). + * If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN. + * If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end. + * For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs, + * but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead. + * @return : 0, or an error code (which can be tested using ZSTD_isError()) + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); + + +typedef struct { + unsigned long long ingested; /* nb input bytes read and buffered */ + unsigned long long consumed; /* nb input bytes actually compressed */ + unsigned long long produced; /* nb of compressed bytes generated and buffered */ + unsigned long long flushed; /* nb of compressed bytes flushed : not provided; can be tracked from caller side */ + unsigned currentJobID; /* MT only : latest started job nb */ + unsigned nbActiveWorkers; /* MT only : nb of workers actively compressing at probe time */ +} ZSTD_frameProgression; + +/* ZSTD_getFrameProgression() : + * tells how much data has been ingested (read from input) + * consumed (input actually compressed) and produced (output) for current frame. + * Note : (ingested - consumed) is amount of input data buffered internally, not yet compressed. + * Aggregates progression inside active worker threads. + */ +ZSTDLIB_API ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx); + +/*! ZSTD_toFlushNow() : + * Tell how many bytes are ready to be flushed immediately. + * Useful for multithreading scenarios (nbWorkers >= 1). + * Probe the oldest active job, defined as oldest job not yet entirely flushed, + * and check its output buffer. + * @return : amount of data stored in oldest job and ready to be flushed immediately. + * if @return == 0, it means either : + * + there is no active job (could be checked with ZSTD_frameProgression()), or + * + oldest job is still actively compressing data, + * but everything it has produced has also been flushed so far, + * therefore flush speed is limited by production speed of oldest job + * irrespective of the speed of concurrent (and newer) jobs. + */ +ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx); + + +/*===== Advanced Streaming decompression functions =====*/ + +/*! + * This function is deprecated, and is equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * ZSTD_DCtx_loadDictionary(zds, dict, dictSize); + * + * note: no dictionary will be used if dict == NULL or dictSize < 8 + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); + +/*! + * This function is deprecated, and is equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * ZSTD_DCtx_refDDict(zds, ddict); + * + * note : ddict is referenced, it must outlive decompression session + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); + +/*! + * This function is deprecated, and is equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * + * re-use decompression parameters from previous init; saves dictionary loading + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); + + +/********************************************************************* +* Buffer-less and synchronous inner streaming functions +* +* This is an advanced API, giving full control over buffer management, for users which need direct control over memory. +* But it's also a complex one, with several restrictions, documented below. +* Prefer normal streaming API for an easier experience. +********************************************************************* */ + +/** + Buffer-less streaming compression (synchronous mode) + + A ZSTD_CCtx object is required to track streaming operations. + Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource. + ZSTD_CCtx object can be re-used multiple times within successive compression operations. + + Start by initializing a context. + Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression, + or ZSTD_compressBegin_advanced(), for finer parameter control. + It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx() + + Then, consume your input using ZSTD_compressContinue(). + There are some important considerations to keep in mind when using this advanced function : + - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffers only. + - Interface is synchronous : input is consumed entirely and produces 1+ compressed blocks. + - Caller must ensure there is enough space in `dst` to store compressed data under worst case scenario. + Worst case evaluation is provided by ZSTD_compressBound(). + ZSTD_compressContinue() doesn't guarantee recover after a failed compression. + - ZSTD_compressContinue() presumes prior input ***is still accessible and unmodified*** (up to maximum distance size, see WindowLog). + It remembers all previous contiguous blocks, plus one separated memory segment (which can itself consists of multiple contiguous blocks) + - ZSTD_compressContinue() detects that prior input has been overwritten when `src` buffer overlaps. + In which case, it will "discard" the relevant memory section from its history. + + Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum. + It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame. + Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders. + + `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again. +*/ + +/*===== Buffer-less streaming compression functions =====*/ +ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); +ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); +ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */ +ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /**< note: fails if cdict==NULL */ +ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize); /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */ +ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**< note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */ + +ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + + +/** + Buffer-less streaming decompression (synchronous mode) + + A ZSTD_DCtx object is required to track streaming operations. + Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it. + A ZSTD_DCtx object can be re-used multiple times. + + First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader(). + Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough. + Data fragment must be large enough to ensure successful decoding. + `ZSTD_frameHeaderSize_max` bytes is guaranteed to always be large enough. + @result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled. + >0 : `srcSize` is too small, please provide at least @result bytes on next attempt. + errorCode, which can be tested using ZSTD_isError(). + + It fills a ZSTD_frameHeader structure with important information to correctly decode the frame, + such as the dictionary ID, content size, or maximum back-reference distance (`windowSize`). + Note that these values could be wrong, either because of data corruption, or because a 3rd party deliberately spoofs false information. + As a consequence, check that values remain within valid application range. + For example, do not allocate memory blindly, check that `windowSize` is within expectation. + Each application can set its own limits, depending on local restrictions. + For extended interoperability, it is recommended to support `windowSize` of at least 8 MB. + + ZSTD_decompressContinue() needs previous data blocks during decompression, up to `windowSize` bytes. + ZSTD_decompressContinue() is very sensitive to contiguity, + if 2 blocks don't follow each other, make sure that either the compressor breaks contiguity at the same place, + or that previous contiguous segment is large enough to properly handle maximum back-reference distance. + There are multiple ways to guarantee this condition. + + The most memory efficient way is to use a round buffer of sufficient size. + Sufficient size is determined by invoking ZSTD_decodingBufferSize_min(), + which can @return an error code if required value is too large for current system (in 32-bits mode). + In a round buffer methodology, ZSTD_decompressContinue() decompresses each block next to previous one, + up to the moment there is not enough room left in the buffer to guarantee decoding another full block, + which maximum size is provided in `ZSTD_frameHeader` structure, field `blockSizeMax`. + At which point, decoding can resume from the beginning of the buffer. + Note that already decoded data stored in the buffer should be flushed before being overwritten. + + There are alternatives possible, for example using two or more buffers of size `windowSize` each, though they consume more memory. + + Finally, if you control the compression process, you can also ignore all buffer size rules, + as long as the encoder and decoder progress in "lock-step", + aka use exactly the same buffer sizes, break contiguity at the same place, etc. + + Once buffers are setup, start decompression, with ZSTD_decompressBegin(). + If decompression requires a dictionary, use ZSTD_decompressBegin_usingDict() or ZSTD_decompressBegin_usingDDict(). + + Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively. + ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail. + + @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity). + It can be zero : it just means ZSTD_decompressContinue() has decoded some metadata item. + It can also be an error code, which can be tested with ZSTD_isError(). + + A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero. + Context can then be reset to start a new decompression. + + Note : it's possible to know if next input to present is a header or a block, using ZSTD_nextInputType(). + This information is not required to properly decode a frame. + + == Special case : skippable frames == + + Skippable frames allow integration of user-defined data into a flow of concatenated frames. + Skippable frames will be ignored (skipped) by decompressor. + The format of skippable frames is as follows : + a) Skippable frame ID - 4 Bytes, Little endian format, any value from 0x184D2A50 to 0x184D2A5F + b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits + c) Frame Content - any content (User Data) of length equal to Frame Size + For skippable frames ZSTD_getFrameHeader() returns zfhPtr->frameType==ZSTD_skippableFrame. + For skippable frames ZSTD_decompressContinue() always returns 0 : it only skips the content. +*/ + +/*===== Buffer-less streaming decompression functions =====*/ +typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e; +typedef struct { + unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */ + unsigned long long windowSize; /* can be very large, up to <= frameContentSize */ + unsigned blockSizeMax; + ZSTD_frameType_e frameType; /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */ + unsigned headerSize; + unsigned dictID; + unsigned checksumFlag; +} ZSTD_frameHeader; + +/*! ZSTD_getFrameHeader() : + * decode Frame Header, or requires larger `srcSize`. + * @return : 0, `zfhPtr` is correctly filled, + * >0, `srcSize` is too small, value is wanted `srcSize` amount, + * or an error code, which can be tested using ZSTD_isError() */ +ZSTDLIB_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize); /**< doesn't consume input */ +/*! ZSTD_getFrameHeader_advanced() : + * same as ZSTD_getFrameHeader(), + * with added capability to select a format (like ZSTD_f_zstd1_magicless) */ +ZSTDLIB_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format); +ZSTDLIB_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize); /**< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */ + +ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); +ZSTDLIB_API size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); + +ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + +/* misc */ +ZSTDLIB_API void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx); +typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e; +ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); + + + + +/* ============================ */ +/** Block level API */ +/* ============================ */ + +/*! + Block functions produce and decode raw zstd blocks, without frame metadata. + Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes). + But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes. + + A few rules to respect : + - Compressing and decompressing require a context structure + + Use ZSTD_createCCtx() and ZSTD_createDCtx() + - It is necessary to init context before starting + + compression : any ZSTD_compressBegin*() variant, including with dictionary + + decompression : any ZSTD_decompressBegin*() variant, including with dictionary + + copyCCtx() and copyDCtx() can be used too + - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB + + If input is larger than a block size, it's necessary to split input data into multiple blocks + + For inputs larger than a single block, consider using regular ZSTD_compress() instead. + Frame metadata is not that costly, and quickly becomes negligible as source size grows larger than a block. + - When a block is considered not compressible enough, ZSTD_compressBlock() result will be 0 (zero) ! + ===> In which case, nothing is produced into `dst` ! + + User __must__ test for such outcome and deal directly with uncompressed data + + A block cannot be declared incompressible if ZSTD_compressBlock() return value was != 0. + Doing so would mess up with statistics history, leading to potential data corruption. + + ZSTD_decompressBlock() _doesn't accept uncompressed data as input_ !! + + In case of multiple successive blocks, should some of them be uncompressed, + decoder must be informed of their existence in order to follow proper history. + Use ZSTD_insertBlock() for such a case. +*/ + +/*===== Raw zstd block functions =====*/ +ZSTDLIB_API size_t ZSTD_getBlockSize (const ZSTD_CCtx* cctx); +ZSTDLIB_API size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */ + + +#endif /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */ + +#if defined (__cplusplus) +} +#endif +/**** ended inlining ../zstd.h ****/ +#define FSE_STATIC_LINKING_ONLY +/**** skipping file: fse.h ****/ +#define HUF_STATIC_LINKING_ONLY +/**** skipping file: huf.h ****/ +#ifndef XXH_STATIC_LINKING_ONLY +# define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */ +#endif +/**** start inlining xxhash.h ****/ +/* + * xxHash - Extremely Fast Hash algorithm + * Header File + * Copyright (c) 2012-2021, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - xxHash source repository : https://github.com/Cyan4973/xxHash + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +*/ + +/* Notice extracted from xxHash homepage : + +xxHash is an extremely fast Hash algorithm, running at RAM speed limits. +It also successfully passes all tests from the SMHasher suite. + +Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) + +Name Speed Q.Score Author +xxHash 5.4 GB/s 10 +CrapWow 3.2 GB/s 2 Andrew +MumurHash 3a 2.7 GB/s 10 Austin Appleby +SpookyHash 2.0 GB/s 10 Bob Jenkins +SBox 1.4 GB/s 9 Bret Mulvey +Lookup3 1.2 GB/s 9 Bob Jenkins +SuperFastHash 1.2 GB/s 1 Paul Hsieh +CityHash64 1.05 GB/s 10 Pike & Alakuijala +FNV 0.55 GB/s 5 Fowler, Noll, Vo +CRC32 0.43 GB/s 9 +MD5-32 0.33 GB/s 10 Ronald L. Rivest +SHA1-32 0.28 GB/s 10 + +Q.Score is a measure of quality of the hash function. +It depends on successfully passing SMHasher test set. +10 is a perfect score. + +A 64-bits version, named XXH64, is available since r35. +It offers much better speed, but for 64-bits applications only. +Name Speed on 64 bits Speed on 32 bits +XXH64 13.8 GB/s 1.9 GB/s +XXH32 6.8 GB/s 6.0 GB/s +*/ + +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef XXHASH_H_5627135585666179 +#define XXHASH_H_5627135585666179 1 + + +/* **************************** +* Definitions +******************************/ +/**** skipping file: zstd_deps.h ****/ +typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; + + +/* **************************** +* API modifier +******************************/ +/** XXH_PRIVATE_API +* This is useful if you want to include xxhash functions in `static` mode +* in order to inline them, and remove their symbol from the public list. +* Methodology : +* #define XXH_PRIVATE_API +* #include "xxhash.h" +* `xxhash.c` is automatically included. +* It's not useful to compile and link it as a separate module anymore. +*/ +#ifdef XXH_PRIVATE_API +# ifndef XXH_STATIC_LINKING_ONLY +# define XXH_STATIC_LINKING_ONLY +# endif +# if defined(__GNUC__) +# define XXH_PUBLIC_API static __inline __attribute__((unused)) +# elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define XXH_PUBLIC_API static inline +# elif defined(_MSC_VER) +# define XXH_PUBLIC_API static __inline +# else +# define XXH_PUBLIC_API static /* this version may generate warnings for unused static functions; disable the relevant warning */ +# endif +#else +# define XXH_PUBLIC_API /* do nothing */ +#endif /* XXH_PRIVATE_API */ + +/*!XXH_NAMESPACE, aka Namespace Emulation : + +If you want to include _and expose_ xxHash functions from within your own library, +but also want to avoid symbol collisions with another library which also includes xxHash, + +you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library +with the value of XXH_NAMESPACE (so avoid to keep it NULL and avoid numeric values). + +Note that no change is required within the calling program as long as it includes `xxhash.h` : +regular symbol name will be automatically translated by this header. +*/ +#ifdef XXH_NAMESPACE +# define XXH_CAT(A,B) A##B +# define XXH_NAME2(A,B) XXH_CAT(A,B) +# define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32) +# define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) +# define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber) +# define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState) +# define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) +# define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState) +# define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) +# define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset) +# define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset) +# define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update) +# define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update) +# define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest) +# define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest) +# define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState) +# define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState) +# define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash) +# define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash) +# define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical) +# define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical) +#endif + + +/* ************************************* +* Version +***************************************/ +#define XXH_VERSION_MAJOR 0 +#define XXH_VERSION_MINOR 6 +#define XXH_VERSION_RELEASE 2 +#define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE) +XXH_PUBLIC_API unsigned XXH_versionNumber (void); + + +/* **************************** +* Simple Hash Functions +******************************/ +typedef unsigned int XXH32_hash_t; +typedef unsigned long long XXH64_hash_t; + +XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed); +XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed); + +/*! +XXH32() : + Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input". + The memory between input & input+length must be valid (allocated and read-accessible). + "seed" can be used to alter the result predictably. + Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s +XXH64() : + Calculate the 64-bits hash of sequence of length "len" stored at memory address "input". + "seed" can be used to alter the result predictably. + This function runs 2x faster on 64-bits systems, but slower on 32-bits systems (see benchmark). +*/ + + +/* **************************** +* Streaming Hash Functions +******************************/ +typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */ +typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ + +/*! State allocation, compatible with dynamic libraries */ + +XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void); +XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); + +XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void); +XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); + + +/* hash streaming */ + +XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed); +XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); +XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr); + +XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed); +XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); +XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr); + +/* +These functions generate the xxHash of an input provided in multiple segments. +Note that, for small input, they are slower than single-call functions, due to state management. +For small input, prefer `XXH32()` and `XXH64()` . + +XXH state must first be allocated, using XXH*_createState() . + +Start a new hash by initializing state with a seed, using XXH*_reset(). + +Then, feed the hash state by calling XXH*_update() as many times as necessary. +Obviously, input must be allocated and read accessible. +The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. + +Finally, a hash value can be produced anytime, by using XXH*_digest(). +This function returns the nn-bits hash as an int or long long. + +It's still possible to continue inserting input into the hash state after a digest, +and generate some new hashes later on, by calling again XXH*_digest(). + +When done, free XXH state space if it was allocated dynamically. +*/ + + +/* ************************** +* Utils +****************************/ +#if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) /* ! C99 */ +# define restrict /* disable restrict */ +#endif + +XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dst_state, const XXH32_state_t* restrict src_state); +XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dst_state, const XXH64_state_t* restrict src_state); + + +/* ************************** +* Canonical representation +****************************/ +/* Default result type for XXH functions are primitive unsigned 32 and 64 bits. +* The canonical representation uses human-readable write convention, aka big-endian (large digits first). +* These functions allow transformation of hash result into and from its canonical format. +* This way, hash values can be written into a file / memory, and remain comparable on different systems and programs. +*/ +typedef struct { unsigned char digest[4]; } XXH32_canonical_t; +typedef struct { unsigned char digest[8]; } XXH64_canonical_t; + +XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash); +XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash); + +XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src); +XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src); + +#endif /* XXHASH_H_5627135585666179 */ + + + +/* ================================================================================================ + This section contains definitions which are not guaranteed to remain stable. + They may change in future versions, becoming incompatible with a different version of the library. + They shall only be used with static linking. + Never use these definitions in association with dynamic linking ! +=================================================================================================== */ +#if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXH_STATIC_H_3543687687345) +#define XXH_STATIC_H_3543687687345 + +/* These definitions are only meant to allow allocation of XXH state + statically, on stack, or in a struct for example. + Do not use members directly. */ + + struct XXH32_state_s { + unsigned total_len_32; + unsigned large_len; + unsigned v1; + unsigned v2; + unsigned v3; + unsigned v4; + unsigned mem32[4]; /* buffer defined as U32 for alignment */ + unsigned memsize; + unsigned reserved; /* never read nor write, will be removed in a future version */ + }; /* typedef'd to XXH32_state_t */ + + struct XXH64_state_s { + unsigned long long total_len; + unsigned long long v1; + unsigned long long v2; + unsigned long long v3; + unsigned long long v4; + unsigned long long mem64[4]; /* buffer defined as U64 for alignment */ + unsigned memsize; + unsigned reserved[2]; /* never read nor write, will be removed in a future version */ + }; /* typedef'd to XXH64_state_t */ + + +# ifdef XXH_PRIVATE_API +/**** start inlining xxhash.c ****/ +/* + * xxHash - Fast Hash algorithm + * Copyright (c) 2012-2021, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - xxHash homepage: http://www.xxhash.com + * - xxHash source repository : https://github.com/Cyan4973/xxHash + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +*/ + + +/* ************************************* +* Tuning parameters +***************************************/ +/*!XXH_FORCE_MEMORY_ACCESS : + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method doesn't depend on compiler but violate C standard. + * It can generate buggy code on targets which do not support unaligned memory accesses. + * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) + * See http://stackoverflow.com/a/32095106/646947 for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ +# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) +# define XXH_FORCE_MEMORY_ACCESS 2 +# elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \ + (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) || \ + defined(__ICCARM__) +# define XXH_FORCE_MEMORY_ACCESS 1 +# endif +#endif + +/*!XXH_ACCEPT_NULL_INPUT_POINTER : + * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer. + * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input. + * By default, this option is disabled. To enable it, uncomment below define : + */ +/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */ + +/*!XXH_FORCE_NATIVE_FORMAT : + * By default, xxHash library provides endian-independent Hash values, based on little-endian convention. + * Results are therefore identical for little-endian and big-endian CPU. + * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. + * Should endian-independence be of no importance for your application, you may set the #define below to 1, + * to improve speed for Big-endian CPU. + * This option has no impact on Little_Endian CPU. + */ +#ifndef XXH_FORCE_NATIVE_FORMAT /* can be defined externally */ +# define XXH_FORCE_NATIVE_FORMAT 0 +#endif + +/*!XXH_FORCE_ALIGN_CHECK : + * This is a minor performance trick, only useful with lots of very small keys. + * It means : check for aligned/unaligned input. + * The check costs one initial branch per hash; set to 0 when the input data + * is guaranteed to be aligned. + */ +#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */ +# if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) +# define XXH_FORCE_ALIGN_CHECK 0 +# else +# define XXH_FORCE_ALIGN_CHECK 1 +# endif +#endif + + +/* ************************************* +* Includes & Memory related functions +***************************************/ +/* Modify the local functions below should you wish to use some other memory routines */ +/* for ZSTD_malloc(), ZSTD_free() */ +#define ZSTD_DEPS_NEED_MALLOC +/**** skipping file: zstd_deps.h ****/ +static void* XXH_malloc(size_t s) { return ZSTD_malloc(s); } +static void XXH_free (void* p) { ZSTD_free(p); } +static void* XXH_memcpy(void* dest, const void* src, size_t size) { return ZSTD_memcpy(dest,src,size); } + +#ifndef XXH_STATIC_LINKING_ONLY +# define XXH_STATIC_LINKING_ONLY +#endif +/**** skipping file: xxhash.h ****/ + + +/* ************************************* +* Compiler Specific Options +***************************************/ +/**** skipping file: compiler.h ****/ + + +/* ************************************* +* Basic Types +***************************************/ +/**** skipping file: mem.h ****/ + +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) + +/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ +static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; } +static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; } + +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign; + +static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } +static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } + +#else + +/* portable and safe solution. Generally efficient. + * see : http://stackoverflow.com/a/32095106/646947 + */ + +static U32 XXH_read32(const void* memPtr) +{ + U32 val; + ZSTD_memcpy(&val, memPtr, sizeof(val)); + return val; +} + +static U64 XXH_read64(const void* memPtr) +{ + U64 val; + ZSTD_memcpy(&val, memPtr, sizeof(val)); + return val; +} + +#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ + + +/* **************************************** +* Compiler-specific Functions and Macros +******************************************/ +#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) + +/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */ +#if defined(_MSC_VER) +# define XXH_rotl32(x,r) _rotl(x,r) +# define XXH_rotl64(x,r) _rotl64(x,r) +#else +#if defined(__ICCARM__) +# include +# define XXH_rotl32(x,r) __ROR(x,(32 - r)) +#else +# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r))) +#endif +# define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r))) +#endif + +#if defined(_MSC_VER) /* Visual Studio */ +# define XXH_swap32 _byteswap_ulong +# define XXH_swap64 _byteswap_uint64 +#elif GCC_VERSION >= 403 +# define XXH_swap32 __builtin_bswap32 +# define XXH_swap64 __builtin_bswap64 +#else +static U32 XXH_swap32 (U32 x) +{ + return ((x << 24) & 0xff000000 ) | + ((x << 8) & 0x00ff0000 ) | + ((x >> 8) & 0x0000ff00 ) | + ((x >> 24) & 0x000000ff ); +} +static U64 XXH_swap64 (U64 x) +{ + return ((x << 56) & 0xff00000000000000ULL) | + ((x << 40) & 0x00ff000000000000ULL) | + ((x << 24) & 0x0000ff0000000000ULL) | + ((x << 8) & 0x000000ff00000000ULL) | + ((x >> 8) & 0x00000000ff000000ULL) | + ((x >> 24) & 0x0000000000ff0000ULL) | + ((x >> 40) & 0x000000000000ff00ULL) | + ((x >> 56) & 0x00000000000000ffULL); +} +#endif + + +/* ************************************* +* Architecture Macros +***************************************/ +typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; + +/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */ +#ifndef XXH_CPU_LITTLE_ENDIAN + static const int g_one = 1; +# define XXH_CPU_LITTLE_ENDIAN (*(const char*)(&g_one)) +#endif + + +/* *************************** +* Memory reads +*****************************/ +typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; + +FORCE_INLINE_TEMPLATE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align) +{ + if (align==XXH_unaligned) + return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr)); + else + return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr); +} + +FORCE_INLINE_TEMPLATE U32 XXH_readLE32(const void* ptr, XXH_endianess endian) +{ + return XXH_readLE32_align(ptr, endian, XXH_unaligned); +} + +static U32 XXH_readBE32(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr); +} + +FORCE_INLINE_TEMPLATE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align) +{ + if (align==XXH_unaligned) + return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); + else + return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr); +} + +FORCE_INLINE_TEMPLATE U64 XXH_readLE64(const void* ptr, XXH_endianess endian) +{ + return XXH_readLE64_align(ptr, endian, XXH_unaligned); +} + +static U64 XXH_readBE64(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr); +} + + +/* ************************************* +* Macros +***************************************/ +#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ + + +/* ************************************* +* Constants +***************************************/ +static const U32 PRIME32_1 = 2654435761U; +static const U32 PRIME32_2 = 2246822519U; +static const U32 PRIME32_3 = 3266489917U; +static const U32 PRIME32_4 = 668265263U; +static const U32 PRIME32_5 = 374761393U; + +static const U64 PRIME64_1 = 11400714785074694791ULL; +static const U64 PRIME64_2 = 14029467366897019727ULL; +static const U64 PRIME64_3 = 1609587929392839161ULL; +static const U64 PRIME64_4 = 9650029242287828579ULL; +static const U64 PRIME64_5 = 2870177450012600261ULL; + +XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; } + + +/* ************************** +* Utils +****************************/ +XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dstState, const XXH32_state_t* restrict srcState) +{ + ZSTD_memcpy(dstState, srcState, sizeof(*dstState)); +} + +XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dstState, const XXH64_state_t* restrict srcState) +{ + ZSTD_memcpy(dstState, srcState, sizeof(*dstState)); +} + + +/* *************************** +* Simple Hash Functions +*****************************/ + +static U32 XXH32_round(U32 seed, U32 input) +{ + seed += input * PRIME32_2; + seed = XXH_rotl32(seed, 13); + seed *= PRIME32_1; + return seed; +} + +FORCE_INLINE_TEMPLATE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* bEnd = p + len; + U32 h32; +#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align) + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (p==NULL) { + len=0; + bEnd=p=(const BYTE*)(size_t)16; + } +#endif + + if (len>=16) { + const BYTE* const limit = bEnd - 16; + U32 v1 = seed + PRIME32_1 + PRIME32_2; + U32 v2 = seed + PRIME32_2; + U32 v3 = seed + 0; + U32 v4 = seed - PRIME32_1; + + do { + v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4; + v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4; + v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4; + v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4; + } while (p<=limit); + + h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); + } else { + h32 = seed + PRIME32_5; + } + + h32 += (U32) len; + + while (p+4<=bEnd) { + h32 += XXH_get32bits(p) * PRIME32_3; + h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; + p+=4; + } + + while (p> 15; + h32 *= PRIME32_2; + h32 ^= h32 >> 13; + h32 *= PRIME32_3; + h32 ^= h32 >> 16; + + return h32; +} + + +XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed) +{ +#if 0 + /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ + XXH32_CREATESTATE_STATIC(state); + XXH32_reset(state, seed); + XXH32_update(state, input, len); + return XXH32_digest(state); +#else + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if (XXH_FORCE_ALIGN_CHECK) { + if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */ + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); + else + return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); + } } + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); + else + return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); +#endif +} + + +static U64 XXH64_round(U64 acc, U64 input) +{ + acc += input * PRIME64_2; + acc = XXH_rotl64(acc, 31); + acc *= PRIME64_1; + return acc; +} + +static U64 XXH64_mergeRound(U64 acc, U64 val) +{ + val = XXH64_round(0, val); + acc ^= val; + acc = acc * PRIME64_1 + PRIME64_4; + return acc; +} + +FORCE_INLINE_TEMPLATE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + U64 h64; +#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align) + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (p==NULL) { + len=0; + bEnd=p=(const BYTE*)(size_t)32; + } +#endif + + if (len>=32) { + const BYTE* const limit = bEnd - 32; + U64 v1 = seed + PRIME64_1 + PRIME64_2; + U64 v2 = seed + PRIME64_2; + U64 v3 = seed + 0; + U64 v4 = seed - PRIME64_1; + + do { + v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8; + v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8; + v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8; + v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8; + } while (p<=limit); + + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); + h64 = XXH64_mergeRound(h64, v1); + h64 = XXH64_mergeRound(h64, v2); + h64 = XXH64_mergeRound(h64, v3); + h64 = XXH64_mergeRound(h64, v4); + + } else { + h64 = seed + PRIME64_5; + } + + h64 += (U64) len; + + while (p+8<=bEnd) { + U64 const k1 = XXH64_round(0, XXH_get64bits(p)); + h64 ^= k1; + h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; + p+=8; + } + + if (p+4<=bEnd) { + h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; + h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; + p+=4; + } + + while (p> 33; + h64 *= PRIME64_2; + h64 ^= h64 >> 29; + h64 *= PRIME64_3; + h64 ^= h64 >> 32; + + return h64; +} + + +XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed) +{ +#if 0 + /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ + XXH64_CREATESTATE_STATIC(state); + XXH64_reset(state, seed); + XXH64_update(state, input, len); + return XXH64_digest(state); +#else + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if (XXH_FORCE_ALIGN_CHECK) { + if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */ + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); + else + return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); + } } + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); + else + return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); +#endif +} + + +/* ************************************************** +* Advanced Hash Functions +****************************************************/ + +XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void) +{ + return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t)); +} +XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr) +{ + XXH_free(statePtr); + return XXH_OK; +} + +XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void) +{ + return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t)); +} +XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) +{ + XXH_free(statePtr); + return XXH_OK; +} + + +/*** Hash feed ***/ + +XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed) +{ + XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ + ZSTD_memset(&state, 0, sizeof(state)-4); /* do not write into reserved, for future removal */ + state.v1 = seed + PRIME32_1 + PRIME32_2; + state.v2 = seed + PRIME32_2; + state.v3 = seed + 0; + state.v4 = seed - PRIME32_1; + ZSTD_memcpy(statePtr, &state, sizeof(state)); + return XXH_OK; +} + + +XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed) +{ + XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ + ZSTD_memset(&state, 0, sizeof(state)-8); /* do not write into reserved, for future removal */ + state.v1 = seed + PRIME64_1 + PRIME64_2; + state.v2 = seed + PRIME64_2; + state.v3 = seed + 0; + state.v4 = seed - PRIME64_1; + ZSTD_memcpy(statePtr, &state, sizeof(state)); + return XXH_OK; +} + + +FORCE_INLINE_TEMPLATE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (input==NULL) return XXH_ERROR; +#endif + + state->total_len_32 += (unsigned)len; + state->large_len |= (len>=16) | (state->total_len_32>=16); + + if (state->memsize + len < 16) { /* fill in tmp buffer */ + XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len); + state->memsize += (unsigned)len; + return XXH_OK; + } + + if (state->memsize) { /* some data left from previous update */ + XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize); + { const U32* p32 = state->mem32; + state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++; + state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++; + state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++; + state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian)); p32++; + } + p += 16-state->memsize; + state->memsize = 0; + } + + if (p <= bEnd-16) { + const BYTE* const limit = bEnd - 16; + U32 v1 = state->v1; + U32 v2 = state->v2; + U32 v3 = state->v3; + U32 v4 = state->v4; + + do { + v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4; + v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4; + v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4; + v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4; + } while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) { + XXH_memcpy(state->mem32, p, (size_t)(bEnd-p)); + state->memsize = (unsigned)(bEnd-p); + } + + return XXH_OK; +} + +XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_update_endian(state_in, input, len, XXH_littleEndian); + else + return XXH32_update_endian(state_in, input, len, XXH_bigEndian); +} + + + +FORCE_INLINE_TEMPLATE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian) +{ + const BYTE * p = (const BYTE*)state->mem32; + const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize; + U32 h32; + + if (state->large_len) { + h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18); + } else { + h32 = state->v3 /* == seed */ + PRIME32_5; + } + + h32 += state->total_len_32; + + while (p+4<=bEnd) { + h32 += XXH_readLE32(p, endian) * PRIME32_3; + h32 = XXH_rotl32(h32, 17) * PRIME32_4; + p+=4; + } + + while (p> 15; + h32 *= PRIME32_2; + h32 ^= h32 >> 13; + h32 *= PRIME32_3; + h32 ^= h32 >> 16; + + return h32; +} + + +XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_digest_endian(state_in, XXH_littleEndian); + else + return XXH32_digest_endian(state_in, XXH_bigEndian); +} + + + +/* **** XXH64 **** */ + +FORCE_INLINE_TEMPLATE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (input==NULL) return XXH_ERROR; +#endif + + state->total_len += len; + + if (state->memsize + len < 32) { /* fill in tmp buffer */ + if (input != NULL) { + XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len); + } + state->memsize += (U32)len; + return XXH_OK; + } + + if (state->memsize) { /* tmp buffer is full */ + XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize); + state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian)); + state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian)); + state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian)); + state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian)); + p += 32-state->memsize; + state->memsize = 0; + } + + if (p+32 <= bEnd) { + const BYTE* const limit = bEnd - 32; + U64 v1 = state->v1; + U64 v2 = state->v2; + U64 v3 = state->v3; + U64 v4 = state->v4; + + do { + v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8; + v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8; + v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8; + v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8; + } while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) { + XXH_memcpy(state->mem64, p, (size_t)(bEnd-p)); + state->memsize = (unsigned)(bEnd-p); + } + + return XXH_OK; +} + +XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_update_endian(state_in, input, len, XXH_littleEndian); + else + return XXH64_update_endian(state_in, input, len, XXH_bigEndian); +} + + + +FORCE_INLINE_TEMPLATE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian) +{ + const BYTE * p = (const BYTE*)state->mem64; + const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize; + U64 h64; + + if (state->total_len >= 32) { + U64 const v1 = state->v1; + U64 const v2 = state->v2; + U64 const v3 = state->v3; + U64 const v4 = state->v4; + + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); + h64 = XXH64_mergeRound(h64, v1); + h64 = XXH64_mergeRound(h64, v2); + h64 = XXH64_mergeRound(h64, v3); + h64 = XXH64_mergeRound(h64, v4); + } else { + h64 = state->v3 + PRIME64_5; + } + + h64 += (U64) state->total_len; + + while (p+8<=bEnd) { + U64 const k1 = XXH64_round(0, XXH_readLE64(p, endian)); + h64 ^= k1; + h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; + p+=8; + } + + if (p+4<=bEnd) { + h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1; + h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; + p+=4; + } + + while (p> 33; + h64 *= PRIME64_2; + h64 ^= h64 >> 29; + h64 *= PRIME64_3; + h64 ^= h64 >> 32; + + return h64; +} + + +XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_digest_endian(state_in, XXH_littleEndian); + else + return XXH64_digest_endian(state_in, XXH_bigEndian); +} + + +/* ************************** +* Canonical representation +****************************/ + +/*! Default XXH result types are basic unsigned 32 and 64 bits. +* The canonical representation follows human-readable write convention, aka big-endian (large digits first). +* These functions allow transformation of hash result into and from its canonical format. +* This way, hash values can be written into a file or buffer, and remain comparable across different systems and programs. +*/ + +XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash) +{ + XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t)); + if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash); + ZSTD_memcpy(dst, &hash, sizeof(*dst)); +} + +XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash) +{ + XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t)); + if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash); + ZSTD_memcpy(dst, &hash, sizeof(*dst)); +} + +XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src) +{ + return XXH_readBE32(src); +} + +XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src) +{ + return XXH_readBE64(src); +} +/**** ended inlining xxhash.c ****/ +# endif + +#endif /* XXH_STATIC_LINKING_ONLY && XXH_STATIC_H_3543687687345 */ + + +#if defined (__cplusplus) +} +#endif +/**** ended inlining xxhash.h ****/ + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ---- static assert (debug) --- */ +#define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) +#define ZSTD_isError ERR_isError /* for inlining */ +#define FSE_isError ERR_isError +#define HUF_isError ERR_isError + + +/*-************************************* +* shared macros +***************************************/ +#undef MIN +#undef MAX +#define MIN(a,b) ((a)<(b) ? (a) : (b)) +#define MAX(a,b) ((a)>(b) ? (a) : (b)) + +/** + * Ignore: this is an internal helper. + * + * This is a helper function to help force C99-correctness during compilation. + * Under strict compilation modes, variadic macro arguments can't be empty. + * However, variadic function arguments can be. Using a function therefore lets + * us statically check that at least one (string) argument was passed, + * independent of the compilation flags. + */ +static INLINE_KEYWORD UNUSED_ATTR +void _force_has_format_string(const char *format, ...) { + (void)format; +} + +/** + * Ignore: this is an internal helper. + * + * We want to force this function invocation to be syntactically correct, but + * we don't want to force runtime evaluation of its arguments. + */ +#define _FORCE_HAS_FORMAT_STRING(...) \ + if (0) { \ + _force_has_format_string(__VA_ARGS__); \ + } + +/** + * Return the specified error if the condition evaluates to true. + * + * In debug modes, prints additional information. + * In order to do that (particularly, printing the conditional that failed), + * this can't just wrap RETURN_ERROR(). + */ +#define RETURN_ERROR_IF(cond, err, ...) \ + if (cond) { \ + RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \ + __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \ + _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ + RAWLOG(3, ": " __VA_ARGS__); \ + RAWLOG(3, "\n"); \ + return ERROR(err); \ + } + +/** + * Unconditionally return the specified error. + * + * In debug modes, prints additional information. + */ +#define RETURN_ERROR(err, ...) \ + do { \ + RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \ + __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \ + _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ + RAWLOG(3, ": " __VA_ARGS__); \ + RAWLOG(3, "\n"); \ + return ERROR(err); \ + } while(0); + +/** + * If the provided expression evaluates to an error code, returns that error code. + * + * In debug modes, prints additional information. + */ +#define FORWARD_IF_ERROR(err, ...) \ + do { \ + size_t const err_code = (err); \ + if (ERR_isError(err_code)) { \ + RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \ + __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \ + _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ + RAWLOG(3, ": " __VA_ARGS__); \ + RAWLOG(3, "\n"); \ + return err_code; \ + } \ + } while(0); + + +/*-************************************* +* Common constants +***************************************/ +#define ZSTD_OPT_NUM (1<<12) + +#define ZSTD_REP_NUM 3 /* number of repcodes */ +#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1) +static UNUSED_ATTR const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; + +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define BIT7 128 +#define BIT6 64 +#define BIT5 32 +#define BIT4 16 +#define BIT1 2 +#define BIT0 1 + +#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10 +static UNUSED_ATTR const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 }; +static UNUSED_ATTR const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 }; + +#define ZSTD_FRAMEIDSIZE 4 /* magic number size */ + +#define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */ +static UNUSED_ATTR const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; +typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e; + +#define ZSTD_FRAMECHECKSUMSIZE 4 + +#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ +#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */ + +#define HufLog 12 +typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e; + +#define LONGNBSEQ 0x7F00 + +#define MINMATCH 3 + +#define Litbits 8 +#define MaxLit ((1<= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN)); + + if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) { + /* Handle short offset copies. */ + do { + COPY8(op, ip) + } while (op < oend); + } else { + assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN); + /* Separate out the first COPY16() call because the copy length is + * almost certain to be short, so the branches have different + * probabilities. Since it is almost certain to be short, only do + * one COPY16() in the first call. Then, do two calls per loop since + * at that point it is more likely to have a high trip count. + */ +#ifdef __aarch64__ + do { + COPY16(op, ip); + } + while (op < oend); +#else + ZSTD_copy16(op, ip); + if (16 >= length) return; + op += 16; + ip += 16; + do { + COPY16(op, ip); + COPY16(op, ip); + } + while (op < oend); +#endif + } +} + +MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + size_t const length = MIN(dstCapacity, srcSize); + if (length > 0) { + ZSTD_memcpy(dst, src, length); + } + return length; +} + +/* define "workspace is too large" as this number of times larger than needed */ +#define ZSTD_WORKSPACETOOLARGE_FACTOR 3 + +/* when workspace is continuously too large + * during at least this number of times, + * context's memory usage is considered wasteful, + * because it's sized to handle a worst case scenario which rarely happens. + * In which case, resize it down to free some memory */ +#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 + +/* Controls whether the input/output buffer is buffered or stable. */ +typedef enum { + ZSTD_bm_buffered = 0, /* Buffer the input/output */ + ZSTD_bm_stable = 1 /* ZSTD_inBuffer/ZSTD_outBuffer is stable */ +} ZSTD_bufferMode_e; + + +/*-******************************************* +* Private declarations +*********************************************/ +typedef struct seqDef_s { + U32 offset; /* Offset code of the sequence */ + U16 litLength; + U16 matchLength; +} seqDef; + +typedef struct { + seqDef* sequencesStart; + seqDef* sequences; /* ptr to end of sequences */ + BYTE* litStart; + BYTE* lit; /* ptr to end of literals */ + BYTE* llCode; + BYTE* mlCode; + BYTE* ofCode; + size_t maxNbSeq; + size_t maxNbLit; + + /* longLengthPos and longLengthID to allow us to represent either a single litLength or matchLength + * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment + * the existing value of the litLength or matchLength by 0x10000. + */ + U32 longLengthID; /* 0 == no longLength; 1 == Represent the long literal; 2 == Represent the long match; */ + U32 longLengthPos; /* Index of the sequence to apply long length modification to */ +} seqStore_t; + +typedef struct { + U32 litLength; + U32 matchLength; +} ZSTD_sequenceLength; + +/** + * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences + * indicated by longLengthPos and longLengthID, and adds MINMATCH back to matchLength. + */ +MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq) +{ + ZSTD_sequenceLength seqLen; + seqLen.litLength = seq->litLength; + seqLen.matchLength = seq->matchLength + MINMATCH; + if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) { + if (seqStore->longLengthID == 1) { + seqLen.litLength += 0xFFFF; + } + if (seqStore->longLengthID == 2) { + seqLen.matchLength += 0xFFFF; + } + } + return seqLen; +} + +/** + * Contains the compressed frame size and an upper-bound for the decompressed frame size. + * Note: before using `compressedSize`, check for errors using ZSTD_isError(). + * similarly, before using `decompressedBound`, check for errors using: + * `decompressedBound != ZSTD_CONTENTSIZE_ERROR` + */ +typedef struct { + size_t compressedSize; + unsigned long long decompressedBound; +} ZSTD_frameSizeInfo; /* decompress & legacy */ + +const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */ +void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */ + +/* custom memory allocation functions */ +void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem); +void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem); +void ZSTD_customFree(void* ptr, ZSTD_customMem customMem); + + +MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */ +{ + assert(val != 0); + { +# if defined(_MSC_VER) /* Visual */ +# if STATIC_BMI2 == 1 + return _lzcnt_u32(val)^31; +# else + unsigned long r=0; + return _BitScanReverse(&r, val) ? (unsigned)r : 0; +# endif +# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ + return __builtin_clz (val) ^ 31; +# elif defined(__ICCARM__) /* IAR Intrinsic */ + return 31 - __CLZ(val); +# else /* Software version */ + static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; + U32 v = val; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + return DeBruijnClz[(v * 0x07C4ACDDU) >> 27]; +# endif + } +} + + +/* ZSTD_invalidateRepCodes() : + * ensures next compression will not use repcodes from previous block. + * Note : only works with regular variant; + * do not use with extDict variant ! */ +void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx); /* zstdmt, adaptive_compression (shouldn't get this definition from here) */ + + +typedef struct { + blockType_e blockType; + U32 lastBlock; + U32 origSize; +} blockProperties_t; /* declared here for decompress and fullbench */ + +/*! ZSTD_getcBlockSize() : + * Provides the size of compressed block from block header `src` */ +/* Used by: decompress, fullbench (does not get its definition from here) */ +size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, + blockProperties_t* bpPtr); + +/*! ZSTD_decodeSeqHeaders() : + * decode sequence header from src */ +/* Used by: decompress, fullbench (does not get its definition from here) */ +size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, + const void* src, size_t srcSize); + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_CCOMMON_H_MODULE */ +/**** ended inlining zstd_internal.h ****/ + + +/*-**************************************** +* Version +******************************************/ +unsigned ZSTD_versionNumber(void) { return ZSTD_VERSION_NUMBER; } + +const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; } + + +/*-**************************************** +* ZSTD Error Management +******************************************/ +#undef ZSTD_isError /* defined within zstd_internal.h */ +/*! ZSTD_isError() : + * tells if a return value is an error code + * symbol is required for external callers */ +unsigned ZSTD_isError(size_t code) { return ERR_isError(code); } + +/*! ZSTD_getErrorName() : + * provides error code string from function result (useful for debugging) */ +const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); } + +/*! ZSTD_getError() : + * convert a `size_t` function result into a proper ZSTD_errorCode enum */ +ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); } + +/*! ZSTD_getErrorString() : + * provides error code string from enum */ +const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); } + + + +/*=************************************************************** +* Custom allocator +****************************************************************/ +void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem) +{ + if (customMem.customAlloc) + return customMem.customAlloc(customMem.opaque, size); + return ZSTD_malloc(size); +} + +void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem) +{ + if (customMem.customAlloc) { + /* calloc implemented as malloc+memset; + * not as efficient as calloc, but next best guess for custom malloc */ + void* const ptr = customMem.customAlloc(customMem.opaque, size); + ZSTD_memset(ptr, 0, size); + return ptr; + } + return ZSTD_calloc(1, size); +} + +void ZSTD_customFree(void* ptr, ZSTD_customMem customMem) +{ + if (ptr!=NULL) { + if (customMem.customFree) + customMem.customFree(customMem.opaque, ptr); + else + ZSTD_free(ptr); + } +} +/**** ended inlining common/zstd_common.c ****/ + +/**** start inlining decompress/huf_decompress.c ****/ +/* ****************************************************************** + * huff0 huffman decoder, + * part of Finite State Entropy library + * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +/* ************************************************************** +* Dependencies +****************************************************************/ +/**** skipping file: ../common/zstd_deps.h ****/ +/**** skipping file: ../common/compiler.h ****/ +/**** skipping file: ../common/bitstream.h ****/ +/**** skipping file: ../common/fse.h ****/ +#define HUF_STATIC_LINKING_ONLY +/**** skipping file: ../common/huf.h ****/ +/**** skipping file: ../common/error_private.h ****/ + +/* ************************************************************** +* Macros +****************************************************************/ + +/* These two optional macros force the use one way or another of the two + * Huffman decompression implementations. You can't force in both directions + * at the same time. + */ +#if defined(HUF_FORCE_DECOMPRESS_X1) && \ + defined(HUF_FORCE_DECOMPRESS_X2) +#error "Cannot force the use of the X1 and X2 decoders at the same time!" +#endif + + +/* ************************************************************** +* Error Management +****************************************************************/ +#define HUF_isError ERR_isError + + +/* ************************************************************** +* Byte alignment for workSpace management +****************************************************************/ +#define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1) +#define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) + + +/* ************************************************************** +* BMI2 Variant Wrappers +****************************************************************/ +#if DYNAMIC_BMI2 + +#define HUF_DGEN(fn) \ + \ + static size_t fn##_default( \ + void* dst, size_t dstSize, \ + const void* cSrc, size_t cSrcSize, \ + const HUF_DTable* DTable) \ + { \ + return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ + } \ + \ + static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \ + void* dst, size_t dstSize, \ + const void* cSrc, size_t cSrcSize, \ + const HUF_DTable* DTable) \ + { \ + return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ + } \ + \ + static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ + size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ + { \ + if (bmi2) { \ + return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \ + } \ + return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \ + } + +#else + +#define HUF_DGEN(fn) \ + static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ + size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ + { \ + (void)bmi2; \ + return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ + } + +#endif + + +/*-***************************/ +/* generic DTableDesc */ +/*-***************************/ +typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc; + +static DTableDesc HUF_getDTableDesc(const HUF_DTable* table) +{ + DTableDesc dtd; + ZSTD_memcpy(&dtd, table, sizeof(dtd)); + return dtd; +} + + +#ifndef HUF_FORCE_DECOMPRESS_X2 + +/*-***************************/ +/* single-symbol decoding */ +/*-***************************/ +typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */ + +/** + * Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at + * a time. + */ +static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) { + U64 D4; + if (MEM_isLittleEndian()) { + D4 = symbol + (nbBits << 8); + } else { + D4 = (symbol << 8) + nbBits; + } + D4 *= 0x0001000100010001ULL; + return D4; +} + +typedef struct { + U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; + U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1]; + U32 statsWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; + BYTE symbols[HUF_SYMBOLVALUE_MAX + 1]; + BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; +} HUF_ReadDTableX1_Workspace; + + +size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize) +{ + return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0); +} + +size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2) +{ + U32 tableLog = 0; + U32 nbSymbols = 0; + size_t iSize; + void* const dtPtr = DTable + 1; + HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr; + HUF_ReadDTableX1_Workspace* wksp = (HUF_ReadDTableX1_Workspace*)workSpace; + + DEBUG_STATIC_ASSERT(HUF_DECOMPRESS_WORKSPACE_SIZE >= sizeof(*wksp)); + if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge); + + DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); + /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ + + iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2); + if (HUF_isError(iSize)) return iSize; + + /* Table header */ + { DTableDesc dtd = HUF_getDTableDesc(DTable); + if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */ + dtd.tableType = 0; + dtd.tableLog = (BYTE)tableLog; + ZSTD_memcpy(DTable, &dtd, sizeof(dtd)); + } + + /* Compute symbols and rankStart given rankVal: + * + * rankVal already contains the number of values of each weight. + * + * symbols contains the symbols ordered by weight. First are the rankVal[0] + * weight 0 symbols, followed by the rankVal[1] weight 1 symbols, and so on. + * symbols[0] is filled (but unused) to avoid a branch. + * + * rankStart contains the offset where each rank belongs in the DTable. + * rankStart[0] is not filled because there are no entries in the table for + * weight 0. + */ + { + int n; + int nextRankStart = 0; + int const unroll = 4; + int const nLimit = (int)nbSymbols - unroll + 1; + for (n=0; n<(int)tableLog+1; n++) { + U32 const curr = nextRankStart; + nextRankStart += wksp->rankVal[n]; + wksp->rankStart[n] = curr; + } + for (n=0; n < nLimit; n += unroll) { + int u; + for (u=0; u < unroll; ++u) { + size_t const w = wksp->huffWeight[n+u]; + wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u); + } + } + for (; n < (int)nbSymbols; ++n) { + size_t const w = wksp->huffWeight[n]; + wksp->symbols[wksp->rankStart[w]++] = (BYTE)n; + } + } + + /* fill DTable + * We fill all entries of each weight in order. + * That way length is a constant for each iteration of the outter loop. + * We can switch based on the length to a different inner loop which is + * optimized for that particular case. + */ + { + U32 w; + int symbol=wksp->rankVal[0]; + int rankStart=0; + for (w=1; wrankVal[w]; + int const length = (1 << w) >> 1; + int uStart = rankStart; + BYTE const nbBits = (BYTE)(tableLog + 1 - w); + int s; + int u; + switch (length) { + case 1: + for (s=0; ssymbols[symbol + s]; + D.nbBits = nbBits; + dt[uStart] = D; + uStart += 1; + } + break; + case 2: + for (s=0; ssymbols[symbol + s]; + D.nbBits = nbBits; + dt[uStart+0] = D; + dt[uStart+1] = D; + uStart += 2; + } + break; + case 4: + for (s=0; ssymbols[symbol + s], nbBits); + MEM_write64(dt + uStart, D4); + uStart += 4; + } + break; + case 8: + for (s=0; ssymbols[symbol + s], nbBits); + MEM_write64(dt + uStart, D4); + MEM_write64(dt + uStart + 4, D4); + uStart += 8; + } + break; + default: + for (s=0; ssymbols[symbol + s], nbBits); + for (u=0; u < length; u += 16) { + MEM_write64(dt + uStart + u + 0, D4); + MEM_write64(dt + uStart + u + 4, D4); + MEM_write64(dt + uStart + u + 8, D4); + MEM_write64(dt + uStart + u + 12, D4); + } + assert(u == length); + uStart += length; + } + break; + } + symbol += symbolCount; + rankStart += symbolCount * length; + } + } + return iSize; +} + +FORCE_INLINE_TEMPLATE BYTE +HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog) +{ + size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ + BYTE const c = dt[val].byte; + BIT_skipBits(Dstream, dt[val].nbBits); + return c; +} + +#define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \ + *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \ + if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ + HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) + +#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \ + if (MEM_64bits()) \ + HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) + +HINT_INLINE size_t +HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog) +{ + BYTE* const pStart = p; + + /* up to 4 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) { + HUF_DECODE_SYMBOLX1_2(p, bitDPtr); + HUF_DECODE_SYMBOLX1_1(p, bitDPtr); + HUF_DECODE_SYMBOLX1_2(p, bitDPtr); + HUF_DECODE_SYMBOLX1_0(p, bitDPtr); + } + + /* [0-3] symbols remaining */ + if (MEM_32bits()) + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd)) + HUF_DECODE_SYMBOLX1_0(p, bitDPtr); + + /* no more data to retrieve from bitstream, no need to reload */ + while (p < pEnd) + HUF_DECODE_SYMBOLX1_0(p, bitDPtr); + + return pEnd-pStart; +} + +FORCE_INLINE_TEMPLATE size_t +HUF_decompress1X1_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + BYTE* op = (BYTE*)dst; + BYTE* const oend = op + dstSize; + const void* dtPtr = DTable + 1; + const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; + BIT_DStream_t bitD; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + U32 const dtLog = dtd.tableLog; + + CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); + + HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog); + + if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); + + return dstSize; +} + +FORCE_INLINE_TEMPLATE size_t +HUF_decompress4X1_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + /* Check */ + if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ + + { const BYTE* const istart = (const BYTE*) cSrc; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + BYTE* const olimit = oend - 3; + const void* const dtPtr = DTable + 1; + const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; + + /* Init */ + BIT_DStream_t bitD1; + BIT_DStream_t bitD2; + BIT_DStream_t bitD3; + BIT_DStream_t bitD4; + size_t const length1 = MEM_readLE16(istart); + size_t const length2 = MEM_readLE16(istart+2); + size_t const length3 = MEM_readLE16(istart+4); + size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); + const BYTE* const istart1 = istart + 6; /* jumpTable */ + const BYTE* const istart2 = istart1 + length1; + const BYTE* const istart3 = istart2 + length2; + const BYTE* const istart4 = istart3 + length3; + const size_t segmentSize = (dstSize+3) / 4; + BYTE* const opStart2 = ostart + segmentSize; + BYTE* const opStart3 = opStart2 + segmentSize; + BYTE* const opStart4 = opStart3 + segmentSize; + BYTE* op1 = ostart; + BYTE* op2 = opStart2; + BYTE* op3 = opStart3; + BYTE* op4 = opStart4; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + U32 const dtLog = dtd.tableLog; + U32 endSignal = 1; + + if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); + CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); + CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); + CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); + + /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */ + for ( ; (endSignal) & (op4 < olimit) ; ) { + HUF_DECODE_SYMBOLX1_2(op1, &bitD1); + HUF_DECODE_SYMBOLX1_2(op2, &bitD2); + HUF_DECODE_SYMBOLX1_2(op3, &bitD3); + HUF_DECODE_SYMBOLX1_2(op4, &bitD4); + HUF_DECODE_SYMBOLX1_1(op1, &bitD1); + HUF_DECODE_SYMBOLX1_1(op2, &bitD2); + HUF_DECODE_SYMBOLX1_1(op3, &bitD3); + HUF_DECODE_SYMBOLX1_1(op4, &bitD4); + HUF_DECODE_SYMBOLX1_2(op1, &bitD1); + HUF_DECODE_SYMBOLX1_2(op2, &bitD2); + HUF_DECODE_SYMBOLX1_2(op3, &bitD3); + HUF_DECODE_SYMBOLX1_2(op4, &bitD4); + HUF_DECODE_SYMBOLX1_0(op1, &bitD1); + HUF_DECODE_SYMBOLX1_0(op2, &bitD2); + HUF_DECODE_SYMBOLX1_0(op3, &bitD3); + HUF_DECODE_SYMBOLX1_0(op4, &bitD4); + endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; + } + + /* check corruption */ + /* note : should not be necessary : op# advance in lock step, and we control op4. + * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */ + if (op1 > opStart2) return ERROR(corruption_detected); + if (op2 > opStart3) return ERROR(corruption_detected); + if (op3 > opStart4) return ERROR(corruption_detected); + /* note : op4 supposed already verified within main loop */ + + /* finish bitStreams one by one */ + HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog); + HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog); + HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog); + HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog); + + /* check */ + { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); + if (!endCheck) return ERROR(corruption_detected); } + + /* decoded size */ + return dstSize; + } +} + + +typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize, + const void *cSrc, + size_t cSrcSize, + const HUF_DTable *DTable); + +HUF_DGEN(HUF_decompress1X1_usingDTable_internal) +HUF_DGEN(HUF_decompress4X1_usingDTable_internal) + + + +size_t HUF_decompress1X1_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 0) return ERROR(GENERIC); + return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} + +size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); +} + + +size_t HUF_decompress4X1_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 0) return ERROR(GENERIC); + return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} + +static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize, int bmi2) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); +} + +size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0); +} + + +#endif /* HUF_FORCE_DECOMPRESS_X2 */ + + +#ifndef HUF_FORCE_DECOMPRESS_X1 + +/* *************************/ +/* double-symbols decoding */ +/* *************************/ + +typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */ +typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t; +typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1]; +typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX]; + + +/* HUF_fillDTableX2Level2() : + * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */ +static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed, + const U32* rankValOrigin, const int minWeight, + const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, + U32 nbBitsBaseline, U16 baseSeq) +{ + HUF_DEltX2 DElt; + U32 rankVal[HUF_TABLELOG_MAX + 1]; + + /* get pre-calculated rankVal */ + ZSTD_memcpy(rankVal, rankValOrigin, sizeof(rankVal)); + + /* fill skipped values */ + if (minWeight>1) { + U32 i, skipSize = rankVal[minWeight]; + MEM_writeLE16(&(DElt.sequence), baseSeq); + DElt.nbBits = (BYTE)(consumed); + DElt.length = 1; + for (i = 0; i < skipSize; i++) + DTable[i] = DElt; + } + + /* fill DTable */ + { U32 s; for (s=0; s= 1 */ + + rankVal[weight] += length; + } } +} + + +static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog, + const sortedSymbol_t* sortedList, const U32 sortedListSize, + const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, + const U32 nbBitsBaseline) +{ + U32 rankVal[HUF_TABLELOG_MAX + 1]; + const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */ + const U32 minBits = nbBitsBaseline - maxWeight; + U32 s; + + ZSTD_memcpy(rankVal, rankValOrigin, sizeof(rankVal)); + + /* fill DTable */ + for (s=0; s= minBits) { /* enough room for a second symbol */ + U32 sortedRank; + int minWeight = nbBits + scaleLog; + if (minWeight < 1) minWeight = 1; + sortedRank = rankStart[minWeight]; + HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits, + rankValOrigin[nbBits], minWeight, + sortedList+sortedRank, sortedListSize-sortedRank, + nbBitsBaseline, symbol); + } else { + HUF_DEltX2 DElt; + MEM_writeLE16(&(DElt.sequence), symbol); + DElt.nbBits = (BYTE)(nbBits); + DElt.length = 1; + { U32 const end = start + length; + U32 u; + for (u = start; u < end; u++) DTable[u] = DElt; + } } + rankVal[weight] += length; + } +} + +size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize) +{ + U32 tableLog, maxW, sizeOfSort, nbSymbols; + DTableDesc dtd = HUF_getDTableDesc(DTable); + U32 const maxTableLog = dtd.maxTableLog; + size_t iSize; + void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */ + HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr; + U32 *rankStart; + + rankValCol_t* rankVal; + U32* rankStats; + U32* rankStart0; + sortedSymbol_t* sortedSymbol; + BYTE* weightList; + size_t spaceUsed32 = 0; + + rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32); + spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2; + rankStats = (U32 *)workSpace + spaceUsed32; + spaceUsed32 += HUF_TABLELOG_MAX + 1; + rankStart0 = (U32 *)workSpace + spaceUsed32; + spaceUsed32 += HUF_TABLELOG_MAX + 2; + sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t); + spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2; + weightList = (BYTE *)((U32 *)workSpace + spaceUsed32); + spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; + + if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge); + + rankStart = rankStart0 + 1; + ZSTD_memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1)); + + DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */ + if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); + /* ZSTD_memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ + + iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); + if (HUF_isError(iSize)) return iSize; + + /* check result */ + if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */ + + /* find maxWeight */ + for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */ + + /* Get start index of each weight */ + { U32 w, nextRankStart = 0; + for (w=1; w> consumed; + } } } } + + HUF_fillDTableX2(dt, maxTableLog, + sortedSymbol, sizeOfSort, + rankStart0, rankVal, maxW, + tableLog+1); + + dtd.tableLog = (BYTE)maxTableLog; + dtd.tableType = 1; + ZSTD_memcpy(DTable, &dtd, sizeof(dtd)); + return iSize; +} + + +FORCE_INLINE_TEMPLATE U32 +HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) +{ + size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ + ZSTD_memcpy(op, dt+val, 2); + BIT_skipBits(DStream, dt[val].nbBits); + return dt[val].length; +} + +FORCE_INLINE_TEMPLATE U32 +HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) +{ + size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ + ZSTD_memcpy(op, dt+val, 1); + if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits); + else { + if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) { + BIT_skipBits(DStream, dt[val].nbBits); + if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8)) + /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ + DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); + } } + return 1; +} + +#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ + ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ + if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ + ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ + if (MEM_64bits()) \ + ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) + +HINT_INLINE size_t +HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, + const HUF_DEltX2* const dt, const U32 dtLog) +{ + BYTE* const pStart = p; + + /* up to 8 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) { + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_1(p, bitDPtr); + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + } + + /* closer to end : up to 2 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2)) + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + + while (p <= pEnd-2) + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ + + if (p < pEnd) + p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog); + + return p-pStart; +} + +FORCE_INLINE_TEMPLATE size_t +HUF_decompress1X2_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + BIT_DStream_t bitD; + + /* Init */ + CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); + + /* decode */ + { BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */ + const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog); + } + + /* check */ + if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); + + /* decoded size */ + return dstSize; +} + +FORCE_INLINE_TEMPLATE size_t +HUF_decompress4X2_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ + + { const BYTE* const istart = (const BYTE*) cSrc; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + BYTE* const olimit = oend - (sizeof(size_t)-1); + const void* const dtPtr = DTable+1; + const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; + + /* Init */ + BIT_DStream_t bitD1; + BIT_DStream_t bitD2; + BIT_DStream_t bitD3; + BIT_DStream_t bitD4; + size_t const length1 = MEM_readLE16(istart); + size_t const length2 = MEM_readLE16(istart+2); + size_t const length3 = MEM_readLE16(istart+4); + size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); + const BYTE* const istart1 = istart + 6; /* jumpTable */ + const BYTE* const istart2 = istart1 + length1; + const BYTE* const istart3 = istart2 + length2; + const BYTE* const istart4 = istart3 + length3; + size_t const segmentSize = (dstSize+3) / 4; + BYTE* const opStart2 = ostart + segmentSize; + BYTE* const opStart3 = opStart2 + segmentSize; + BYTE* const opStart4 = opStart3 + segmentSize; + BYTE* op1 = ostart; + BYTE* op2 = opStart2; + BYTE* op3 = opStart3; + BYTE* op4 = opStart4; + U32 endSignal = 1; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + U32 const dtLog = dtd.tableLog; + + if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); + CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); + CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); + CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); + + /* 16-32 symbols per loop (4-8 symbols per stream) */ + for ( ; (endSignal) & (op4 < olimit); ) { +#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__)) + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_1(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_0(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_1(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_0(op2, &bitD2); + endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_1(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_0(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_1(op4, &bitD4); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_0(op4, &bitD4); + endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; +#else + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_1(op1, &bitD1); + HUF_DECODE_SYMBOLX2_1(op2, &bitD2); + HUF_DECODE_SYMBOLX2_1(op3, &bitD3); + HUF_DECODE_SYMBOLX2_1(op4, &bitD4); + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_0(op1, &bitD1); + HUF_DECODE_SYMBOLX2_0(op2, &bitD2); + HUF_DECODE_SYMBOLX2_0(op3, &bitD3); + HUF_DECODE_SYMBOLX2_0(op4, &bitD4); + endSignal = (U32)LIKELY( + (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished) + & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished) + & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished) + & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished)); +#endif + } + + /* check corruption */ + if (op1 > opStart2) return ERROR(corruption_detected); + if (op2 > opStart3) return ERROR(corruption_detected); + if (op3 > opStart4) return ERROR(corruption_detected); + /* note : op4 already verified within main loop */ + + /* finish bitStreams one by one */ + HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); + HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); + HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); + HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); + + /* check */ + { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); + if (!endCheck) return ERROR(corruption_detected); } + + /* decoded size */ + return dstSize; + } +} + +HUF_DGEN(HUF_decompress1X2_usingDTable_internal) +HUF_DGEN(HUF_decompress4X2_usingDTable_internal) + +size_t HUF_decompress1X2_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 1) return ERROR(GENERIC); + return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} + +size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, + workSpace, wkspSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); +} + + +size_t HUF_decompress4X2_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 1) return ERROR(GENERIC); + return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} + +static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize, int bmi2) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, + workSpace, wkspSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); +} + +size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0); +} + + +#endif /* HUF_FORCE_DECOMPRESS_X1 */ + + +/* ***********************************/ +/* Universal decompression selectors */ +/* ***********************************/ + +size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dtd; + assert(dtd.tableType == 0); + return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dtd; + assert(dtd.tableType == 1); + return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#else + return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : + HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#endif +} + +size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dtd; + assert(dtd.tableType == 0); + return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dtd; + assert(dtd.tableType == 1); + return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#else + return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : + HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#endif +} + + +#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) +typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t; +static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = +{ + /* single, double, quad */ + {{0,0}, {1,1}, {2,2}}, /* Q==0 : impossible */ + {{0,0}, {1,1}, {2,2}}, /* Q==1 : impossible */ + {{ 38,130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */ + {{ 448,128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */ + {{ 556,128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */ + {{ 714,128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */ + {{ 883,128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */ + {{ 897,128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */ + {{ 926,128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */ + {{ 947,128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */ + {{1107,128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */ + {{1177,128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */ + {{1242,128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */ + {{1349,128}, {2644,106}, {5260,106}}, /* Q ==13 : 81-87% */ + {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */ + {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */ +}; +#endif + +/** HUF_selectDecoder() : + * Tells which decoder is likely to decode faster, + * based on a set of pre-computed metrics. + * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 . + * Assumption : 0 < dstSize <= 128 KB */ +U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize) +{ + assert(dstSize > 0); + assert(dstSize <= 128*1024); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dstSize; + (void)cSrcSize; + return 0; +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dstSize; + (void)cSrcSize; + return 1; +#else + /* decoder timing evaluation */ + { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */ + U32 const D256 = (U32)(dstSize >> 8); + U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256); + U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256); + DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */ + return DTime1 < DTime0; + } +#endif +} + + +size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, + size_t dstSize, const void* cSrc, + size_t cSrcSize, void* workSpace, + size_t wkspSize) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize == 0) return ERROR(corruption_detected); + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); +#else + return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize): + HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); +#endif + } +} + +size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ + if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ + if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize); +#else + return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize): + HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize); +#endif + } +} + + +size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dtd; + assert(dtd.tableType == 0); + return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dtd; + assert(dtd.tableType == 1); + return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#else + return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : + HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#endif +} + +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); +} +#endif + +size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dtd; + assert(dtd.tableType == 0); + return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dtd; + assert(dtd.tableType == 1); + return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#else + return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : + HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#endif +} + +size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize == 0) return ERROR(corruption_detected); + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); +#else + return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) : + HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); +#endif + } +} + +#ifndef ZSTD_NO_UNUSED_FUNCTIONS +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_readDTableX1_wksp(DTable, src, srcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); + return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize); +} +#endif + +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_readDTableX2_wksp(DTable, src, srcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); + return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); +} +#endif + +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} +size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); + return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); +} +#endif + +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); + return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); +} +#endif + +typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); + +size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ +#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) + static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 }; +#endif + + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ + if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ + if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize); +#else + return decompress[algoNb](dst, dstSize, cSrc, cSrcSize); +#endif + } +} + +size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ + if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ + if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize); +#else + return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : + HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; +#endif + } +} + +size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} +#endif +/**** ended inlining decompress/huf_decompress.c ****/ +/**** start inlining decompress/zstd_ddict.c ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* zstd_ddict.c : + * concentrates all logic that needs to know the internals of ZSTD_DDict object */ + +/*-******************************************************* +* Dependencies +*********************************************************/ +/**** skipping file: ../common/zstd_deps.h ****/ +/**** start inlining ../common/cpu.h ****/ +/* + * Copyright (c) 2018-2021, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMMON_CPU_H +#define ZSTD_COMMON_CPU_H + +/** + * Implementation taken from folly/CpuId.h + * https://github.com/facebook/folly/blob/master/folly/CpuId.h + */ + +/**** skipping file: mem.h ****/ + +#ifdef _MSC_VER +#include +#endif + +typedef struct { + U32 f1c; + U32 f1d; + U32 f7b; + U32 f7c; +} ZSTD_cpuid_t; + +MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) { + U32 f1c = 0; + U32 f1d = 0; + U32 f7b = 0; + U32 f7c = 0; +#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) + int reg[4]; + __cpuid((int*)reg, 0); + { + int const n = reg[0]; + if (n >= 1) { + __cpuid((int*)reg, 1); + f1c = (U32)reg[2]; + f1d = (U32)reg[3]; + } + if (n >= 7) { + __cpuidex((int*)reg, 7, 0); + f7b = (U32)reg[1]; + f7c = (U32)reg[2]; + } + } +#elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__) + /* The following block like the normal cpuid branch below, but gcc + * reserves ebx for use of its pic register so we must specially + * handle the save and restore to avoid clobbering the register + */ + U32 n; + __asm__( + "pushl %%ebx\n\t" + "cpuid\n\t" + "popl %%ebx\n\t" + : "=a"(n) + : "a"(0) + : "ecx", "edx"); + if (n >= 1) { + U32 f1a; + __asm__( + "pushl %%ebx\n\t" + "cpuid\n\t" + "popl %%ebx\n\t" + : "=a"(f1a), "=c"(f1c), "=d"(f1d) + : "a"(1)); + } + if (n >= 7) { + __asm__( + "pushl %%ebx\n\t" + "cpuid\n\t" + "movl %%ebx, %%eax\n\t" + "popl %%ebx" + : "=a"(f7b), "=c"(f7c) + : "a"(7), "c"(0) + : "edx"); + } +#elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__) + U32 n; + __asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx"); + if (n >= 1) { + U32 f1a; + __asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx"); + } + if (n >= 7) { + U32 f7a; + __asm__("cpuid" + : "=a"(f7a), "=b"(f7b), "=c"(f7c) + : "a"(7), "c"(0) + : "edx"); + } +#endif + { + ZSTD_cpuid_t cpuid; + cpuid.f1c = f1c; + cpuid.f1d = f1d; + cpuid.f7b = f7b; + cpuid.f7c = f7c; + return cpuid; + } +} + +#define X(name, r, bit) \ + MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) { \ + return ((cpuid.r) & (1U << bit)) != 0; \ + } + +/* cpuid(1): Processor Info and Feature Bits. */ +#define C(name, bit) X(name, f1c, bit) + C(sse3, 0) + C(pclmuldq, 1) + C(dtes64, 2) + C(monitor, 3) + C(dscpl, 4) + C(vmx, 5) + C(smx, 6) + C(eist, 7) + C(tm2, 8) + C(ssse3, 9) + C(cnxtid, 10) + C(fma, 12) + C(cx16, 13) + C(xtpr, 14) + C(pdcm, 15) + C(pcid, 17) + C(dca, 18) + C(sse41, 19) + C(sse42, 20) + C(x2apic, 21) + C(movbe, 22) + C(popcnt, 23) + C(tscdeadline, 24) + C(aes, 25) + C(xsave, 26) + C(osxsave, 27) + C(avx, 28) + C(f16c, 29) + C(rdrand, 30) +#undef C +#define D(name, bit) X(name, f1d, bit) + D(fpu, 0) + D(vme, 1) + D(de, 2) + D(pse, 3) + D(tsc, 4) + D(msr, 5) + D(pae, 6) + D(mce, 7) + D(cx8, 8) + D(apic, 9) + D(sep, 11) + D(mtrr, 12) + D(pge, 13) + D(mca, 14) + D(cmov, 15) + D(pat, 16) + D(pse36, 17) + D(psn, 18) + D(clfsh, 19) + D(ds, 21) + D(acpi, 22) + D(mmx, 23) + D(fxsr, 24) + D(sse, 25) + D(sse2, 26) + D(ss, 27) + D(htt, 28) + D(tm, 29) + D(pbe, 31) +#undef D + +/* cpuid(7): Extended Features. */ +#define B(name, bit) X(name, f7b, bit) + B(bmi1, 3) + B(hle, 4) + B(avx2, 5) + B(smep, 7) + B(bmi2, 8) + B(erms, 9) + B(invpcid, 10) + B(rtm, 11) + B(mpx, 14) + B(avx512f, 16) + B(avx512dq, 17) + B(rdseed, 18) + B(adx, 19) + B(smap, 20) + B(avx512ifma, 21) + B(pcommit, 22) + B(clflushopt, 23) + B(clwb, 24) + B(avx512pf, 26) + B(avx512er, 27) + B(avx512cd, 28) + B(sha, 29) + B(avx512bw, 30) + B(avx512vl, 31) +#undef B +#define C(name, bit) X(name, f7c, bit) + C(prefetchwt1, 0) + C(avx512vbmi, 1) +#undef C + +#undef X + +#endif /* ZSTD_COMMON_CPU_H */ +/**** ended inlining ../common/cpu.h ****/ +/**** skipping file: ../common/mem.h ****/ +#define FSE_STATIC_LINKING_ONLY +/**** skipping file: ../common/fse.h ****/ +#define HUF_STATIC_LINKING_ONLY +/**** skipping file: ../common/huf.h ****/ +/**** start inlining zstd_decompress_internal.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* zstd_decompress_internal: + * objects and definitions shared within lib/decompress modules */ + + #ifndef ZSTD_DECOMPRESS_INTERNAL_H + #define ZSTD_DECOMPRESS_INTERNAL_H + + +/*-******************************************************* + * Dependencies + *********************************************************/ +/**** skipping file: ../common/mem.h ****/ +/**** skipping file: ../common/zstd_internal.h ****/ +/**** start inlining ../common/zstd_trace.h ****/ +/* + * Copyright (c) 2016-2021, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_TRACE_H +#define ZSTD_TRACE_H + +#if defined (__cplusplus) +extern "C" { +#endif + +#include + +/* weak symbol support */ +#if !defined(ZSTD_HAVE_WEAK_SYMBOLS) && defined(__GNUC__) && \ + !defined(__APPLE__) && !defined(_WIN32) && !defined(__MINGW32__) && \ + !defined(__CYGWIN__) +# define ZSTD_HAVE_WEAK_SYMBOLS 1 +#else +# define ZSTD_HAVE_WEAK_SYMBOLS 0 +#endif +#if ZSTD_HAVE_WEAK_SYMBOLS +# define ZSTD_WEAK_ATTR __attribute__((__weak__)) +#else +# define ZSTD_WEAK_ATTR +#endif + +/* Only enable tracing when weak symbols are available. */ +#ifndef ZSTD_TRACE +# define ZSTD_TRACE ZSTD_HAVE_WEAK_SYMBOLS +#endif + +#if ZSTD_TRACE + +struct ZSTD_CCtx_s; +struct ZSTD_DCtx_s; +struct ZSTD_CCtx_params_s; + +typedef struct { + /** + * ZSTD_VERSION_NUMBER + * + * This is guaranteed to be the first member of ZSTD_trace. + * Otherwise, this struct is not stable between versions. If + * the version number does not match your expectation, you + * should not interpret the rest of the struct. + */ + unsigned version; + /** + * Non-zero if streaming (de)compression is used. + */ + unsigned streaming; + /** + * The dictionary ID. + */ + unsigned dictionaryID; + /** + * Is the dictionary cold? + * Only set on decompression. + */ + unsigned dictionaryIsCold; + /** + * The dictionary size or zero if no dictionary. + */ + size_t dictionarySize; + /** + * The uncompressed size of the data. + */ + size_t uncompressedSize; + /** + * The compressed size of the data. + */ + size_t compressedSize; + /** + * The fully resolved CCtx parameters (NULL on decompression). + */ + struct ZSTD_CCtx_params_s const* params; + /** + * The ZSTD_CCtx pointer (NULL on decompression). + */ + struct ZSTD_CCtx_s const* cctx; + /** + * The ZSTD_DCtx pointer (NULL on compression). + */ + struct ZSTD_DCtx_s const* dctx; +} ZSTD_Trace; + +/** + * A tracing context. It must be 0 when tracing is disabled. + * Otherwise, any non-zero value returned by a tracing begin() + * function is presented to any subsequent calls to end(). + * + * Any non-zero value is treated as tracing is enabled and not + * interpreted by the library. + * + * Two possible uses are: + * * A timestamp for when the begin() function was called. + * * A unique key identifying the (de)compression, like the + * address of the [dc]ctx pointer if you need to track + * more information than just a timestamp. + */ +typedef unsigned long long ZSTD_TraceCtx; + +/** + * Trace the beginning of a compression call. + * @param cctx The dctx pointer for the compression. + * It can be used as a key to map begin() to end(). + * @returns Non-zero if tracing is enabled. The return value is + * passed to ZSTD_trace_compress_end(). + */ +ZSTD_TraceCtx ZSTD_trace_compress_begin(struct ZSTD_CCtx_s const* cctx); + +/** + * Trace the end of a compression call. + * @param ctx The return value of ZSTD_trace_compress_begin(). + * @param trace The zstd tracing info. + */ +void ZSTD_trace_compress_end( + ZSTD_TraceCtx ctx, + ZSTD_Trace const* trace); + +/** + * Trace the beginning of a decompression call. + * @param dctx The dctx pointer for the decompression. + * It can be used as a key to map begin() to end(). + * @returns Non-zero if tracing is enabled. The return value is + * passed to ZSTD_trace_compress_end(). + */ +ZSTD_TraceCtx ZSTD_trace_decompress_begin(struct ZSTD_DCtx_s const* dctx); + +/** + * Trace the end of a decompression call. + * @param ctx The return value of ZSTD_trace_decompress_begin(). + * @param trace The zstd tracing info. + */ +void ZSTD_trace_decompress_end( + ZSTD_TraceCtx ctx, + ZSTD_Trace const* trace); + +#endif /* ZSTD_TRACE */ + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_TRACE_H */ +/**** ended inlining ../common/zstd_trace.h ****/ + + + +/*-******************************************************* + * Constants + *********************************************************/ +static UNUSED_ATTR const U32 LL_base[MaxLL+1] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 18, 20, 22, 24, 28, 32, 40, + 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, + 0x2000, 0x4000, 0x8000, 0x10000 }; + +static UNUSED_ATTR const U32 OF_base[MaxOff+1] = { + 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, + 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, + 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, + 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD }; + +static UNUSED_ATTR const U32 OF_bits[MaxOff+1] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 }; + +static UNUSED_ATTR const U32 ML_base[MaxML+1] = { + 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, + 27, 28, 29, 30, 31, 32, 33, 34, + 35, 37, 39, 41, 43, 47, 51, 59, + 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, + 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 }; + + +/*-******************************************************* + * Decompression types + *********************************************************/ + typedef struct { + U32 fastMode; + U32 tableLog; + } ZSTD_seqSymbol_header; + + typedef struct { + U16 nextState; + BYTE nbAdditionalBits; + BYTE nbBits; + U32 baseValue; + } ZSTD_seqSymbol; + + #define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log))) + +#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64)) +#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32)) + +typedef struct { + ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */ + ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */ + ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */ + HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ + U32 rep[ZSTD_REP_NUM]; + U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32]; +} ZSTD_entropyDTables_t; + +typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader, + ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock, + ZSTDds_decompressLastBlock, ZSTDds_checkChecksum, + ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage; + +typedef enum { zdss_init=0, zdss_loadHeader, + zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage; + +typedef enum { + ZSTD_use_indefinitely = -1, /* Use the dictionary indefinitely */ + ZSTD_dont_use = 0, /* Do not use the dictionary (if one exists free it) */ + ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */ +} ZSTD_dictUses_e; + +/* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */ +typedef struct { + const ZSTD_DDict** ddictPtrTable; + size_t ddictPtrTableSize; + size_t ddictPtrCount; +} ZSTD_DDictHashSet; + +struct ZSTD_DCtx_s +{ + const ZSTD_seqSymbol* LLTptr; + const ZSTD_seqSymbol* MLTptr; + const ZSTD_seqSymbol* OFTptr; + const HUF_DTable* HUFptr; + ZSTD_entropyDTables_t entropy; + U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; /* space needed when building huffman tables */ + const void* previousDstEnd; /* detect continuity */ + const void* prefixStart; /* start of current segment */ + const void* virtualStart; /* virtual start of previous segment if it was just before current one */ + const void* dictEnd; /* end of previous segment */ + size_t expected; + ZSTD_frameHeader fParams; + U64 processedCSize; + U64 decodedSize; + blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */ + ZSTD_dStage stage; + U32 litEntropy; + U32 fseEntropy; + XXH64_state_t xxhState; + size_t headerSize; + ZSTD_format_e format; + ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum; /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */ + U32 validateChecksum; /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */ + const BYTE* litPtr; + ZSTD_customMem customMem; + size_t litSize; + size_t rleSize; + size_t staticSize; + int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */ + + /* dictionary */ + ZSTD_DDict* ddictLocal; + const ZSTD_DDict* ddict; /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */ + U32 dictID; + int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */ + ZSTD_dictUses_e dictUses; + ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */ + ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */ + + /* streaming */ + ZSTD_dStreamStage streamStage; + char* inBuff; + size_t inBuffSize; + size_t inPos; + size_t maxWindowSize; + char* outBuff; + size_t outBuffSize; + size_t outStart; + size_t outEnd; + size_t lhSize; + void* legacyContext; + U32 previousLegacyVersion; + U32 legacyVersion; + U32 hostageByte; + int noForwardProgress; + ZSTD_bufferMode_e outBufferMode; + ZSTD_outBuffer expectedOutBuffer; + + /* workspace */ + BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH]; + BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; + + size_t oversizedDuration; + +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + void const* dictContentBeginForFuzzing; + void const* dictContentEndForFuzzing; +#endif + + /* Tracing */ +#if ZSTD_TRACE + ZSTD_TraceCtx traceCtx; +#endif +}; /* typedef'd to ZSTD_DCtx within "zstd.h" */ + + +/*-******************************************************* + * Shared internal functions + *********************************************************/ + +/*! ZSTD_loadDEntropy() : + * dict : must point at beginning of a valid zstd dictionary. + * @return : size of dictionary header (size of magic number + dict ID + entropy tables) */ +size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, + const void* const dict, size_t const dictSize); + +/*! ZSTD_checkContinuity() : + * check if next `dst` follows previous position, where decompression ended. + * If yes, do nothing (continue on current segment). + * If not, classify previous segment as "external dictionary", and start a new segment. + * This function cannot fail. */ +void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize); + + +#endif /* ZSTD_DECOMPRESS_INTERNAL_H */ +/**** ended inlining zstd_decompress_internal.h ****/ +/**** start inlining zstd_ddict.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +#ifndef ZSTD_DDICT_H +#define ZSTD_DDICT_H + +/*-******************************************************* + * Dependencies + *********************************************************/ +/**** skipping file: ../common/zstd_deps.h ****/ +/**** skipping file: ../zstd.h ****/ + + +/*-******************************************************* + * Interface + *********************************************************/ + +/* note: several prototypes are already published in `zstd.h` : + * ZSTD_createDDict() + * ZSTD_createDDict_byReference() + * ZSTD_createDDict_advanced() + * ZSTD_freeDDict() + * ZSTD_initStaticDDict() + * ZSTD_sizeof_DDict() + * ZSTD_estimateDDictSize() + * ZSTD_getDictID_fromDict() + */ + +const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict); +size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict); + +void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); + + + +#endif /* ZSTD_DDICT_H */ +/**** ended inlining zstd_ddict.h ****/ + +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) +/**** start inlining ../legacy/zstd_legacy.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_LEGACY_H +#define ZSTD_LEGACY_H + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ************************************* +* Includes +***************************************/ +/**** skipping file: ../common/mem.h ****/ +/**** skipping file: ../common/error_private.h ****/ +/**** skipping file: ../common/zstd_internal.h ****/ + +#if !defined (ZSTD_LEGACY_SUPPORT) || (ZSTD_LEGACY_SUPPORT == 0) +# undef ZSTD_LEGACY_SUPPORT +# define ZSTD_LEGACY_SUPPORT 8 +#endif + +#if (ZSTD_LEGACY_SUPPORT <= 1) +/**** start inlining zstd_v01.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_V01_H_28739879432 +#define ZSTD_V01_H_28739879432 + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ************************************* +* Includes +***************************************/ +#include /* size_t */ + + +/* ************************************* +* Simple one-step function +***************************************/ +/** +ZSTDv01_decompress() : decompress ZSTD frames compliant with v0.1.x format + compressedSize : is the exact source size + maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated. + It must be equal or larger than originalSize, otherwise decompression will fail. + return : the number of bytes decompressed into destination buffer (originalSize) + or an errorCode if it fails (which can be tested using ZSTDv01_isError()) +*/ +size_t ZSTDv01_decompress( void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + + /** + ZSTDv01_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.1.x format + srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + cSize (output parameter) : the number of bytes that would be read to decompress this frame + or an error code if it fails (which can be tested using ZSTDv01_isError()) + dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame + or ZSTD_CONTENTSIZE_ERROR if an error occurs + + note : assumes `cSize` and `dBound` are _not_ NULL. + */ +void ZSTDv01_findFrameSizeInfoLegacy(const void *src, size_t srcSize, + size_t* cSize, unsigned long long* dBound); + +/** +ZSTDv01_isError() : tells if the result of ZSTDv01_decompress() is an error +*/ +unsigned ZSTDv01_isError(size_t code); + + +/* ************************************* +* Advanced functions +***************************************/ +typedef struct ZSTDv01_Dctx_s ZSTDv01_Dctx; +ZSTDv01_Dctx* ZSTDv01_createDCtx(void); +size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx); + +size_t ZSTDv01_decompressDCtx(void* ctx, + void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + +/* ************************************* +* Streaming functions +***************************************/ +size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx); + +size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx); +size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); +/** + Use above functions alternatively. + ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block. + Result is the number of bytes regenerated within 'dst'. + It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header. +*/ + +/* ************************************* +* Prefix - version detection +***************************************/ +#define ZSTDv01_magicNumber 0xFD2FB51E /* Big Endian version */ +#define ZSTDv01_magicNumberLE 0x1EB52FFD /* Little Endian version */ + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_V01_H_28739879432 */ +/**** ended inlining zstd_v01.h ****/ +#endif +#if (ZSTD_LEGACY_SUPPORT <= 2) +/**** start inlining zstd_v02.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_V02_H_4174539423 +#define ZSTD_V02_H_4174539423 + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ************************************* +* Includes +***************************************/ +#include /* size_t */ + + +/* ************************************* +* Simple one-step function +***************************************/ +/** +ZSTDv02_decompress() : decompress ZSTD frames compliant with v0.2.x format + compressedSize : is the exact source size + maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated. + It must be equal or larger than originalSize, otherwise decompression will fail. + return : the number of bytes decompressed into destination buffer (originalSize) + or an errorCode if it fails (which can be tested using ZSTDv01_isError()) +*/ +size_t ZSTDv02_decompress( void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + + /** + ZSTDv02_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.2.x format + srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + cSize (output parameter) : the number of bytes that would be read to decompress this frame + or an error code if it fails (which can be tested using ZSTDv01_isError()) + dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame + or ZSTD_CONTENTSIZE_ERROR if an error occurs + + note : assumes `cSize` and `dBound` are _not_ NULL. + */ +void ZSTDv02_findFrameSizeInfoLegacy(const void *src, size_t srcSize, + size_t* cSize, unsigned long long* dBound); + +/** +ZSTDv02_isError() : tells if the result of ZSTDv02_decompress() is an error +*/ +unsigned ZSTDv02_isError(size_t code); + + +/* ************************************* +* Advanced functions +***************************************/ +typedef struct ZSTDv02_Dctx_s ZSTDv02_Dctx; +ZSTDv02_Dctx* ZSTDv02_createDCtx(void); +size_t ZSTDv02_freeDCtx(ZSTDv02_Dctx* dctx); + +size_t ZSTDv02_decompressDCtx(void* ctx, + void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + +/* ************************************* +* Streaming functions +***************************************/ +size_t ZSTDv02_resetDCtx(ZSTDv02_Dctx* dctx); + +size_t ZSTDv02_nextSrcSizeToDecompress(ZSTDv02_Dctx* dctx); +size_t ZSTDv02_decompressContinue(ZSTDv02_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); +/** + Use above functions alternatively. + ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block. + Result is the number of bytes regenerated within 'dst'. + It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header. +*/ + +/* ************************************* +* Prefix - version detection +***************************************/ +#define ZSTDv02_magicNumber 0xFD2FB522 /* v0.2 */ + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_V02_H_4174539423 */ +/**** ended inlining zstd_v02.h ****/ +#endif +#if (ZSTD_LEGACY_SUPPORT <= 3) +/**** start inlining zstd_v03.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_V03_H_298734209782 +#define ZSTD_V03_H_298734209782 + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ************************************* +* Includes +***************************************/ +#include /* size_t */ + + +/* ************************************* +* Simple one-step function +***************************************/ +/** +ZSTDv03_decompress() : decompress ZSTD frames compliant with v0.3.x format + compressedSize : is the exact source size + maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated. + It must be equal or larger than originalSize, otherwise decompression will fail. + return : the number of bytes decompressed into destination buffer (originalSize) + or an errorCode if it fails (which can be tested using ZSTDv01_isError()) +*/ +size_t ZSTDv03_decompress( void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + + /** + ZSTDv03_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.3.x format + srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + cSize (output parameter) : the number of bytes that would be read to decompress this frame + or an error code if it fails (which can be tested using ZSTDv01_isError()) + dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame + or ZSTD_CONTENTSIZE_ERROR if an error occurs + + note : assumes `cSize` and `dBound` are _not_ NULL. + */ + void ZSTDv03_findFrameSizeInfoLegacy(const void *src, size_t srcSize, + size_t* cSize, unsigned long long* dBound); + + /** +ZSTDv03_isError() : tells if the result of ZSTDv03_decompress() is an error +*/ +unsigned ZSTDv03_isError(size_t code); + + +/* ************************************* +* Advanced functions +***************************************/ +typedef struct ZSTDv03_Dctx_s ZSTDv03_Dctx; +ZSTDv03_Dctx* ZSTDv03_createDCtx(void); +size_t ZSTDv03_freeDCtx(ZSTDv03_Dctx* dctx); + +size_t ZSTDv03_decompressDCtx(void* ctx, + void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + +/* ************************************* +* Streaming functions +***************************************/ +size_t ZSTDv03_resetDCtx(ZSTDv03_Dctx* dctx); + +size_t ZSTDv03_nextSrcSizeToDecompress(ZSTDv03_Dctx* dctx); +size_t ZSTDv03_decompressContinue(ZSTDv03_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); +/** + Use above functions alternatively. + ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block. + Result is the number of bytes regenerated within 'dst'. + It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header. +*/ + +/* ************************************* +* Prefix - version detection +***************************************/ +#define ZSTDv03_magicNumber 0xFD2FB523 /* v0.3 */ + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_V03_H_298734209782 */ +/**** ended inlining zstd_v03.h ****/ +#endif +#if (ZSTD_LEGACY_SUPPORT <= 4) +/**** start inlining zstd_v04.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_V04_H_91868324769238 +#define ZSTD_V04_H_91868324769238 + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ************************************* +* Includes +***************************************/ +#include /* size_t */ + + +/* ************************************* +* Simple one-step function +***************************************/ +/** +ZSTDv04_decompress() : decompress ZSTD frames compliant with v0.4.x format + compressedSize : is the exact source size + maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated. + It must be equal or larger than originalSize, otherwise decompression will fail. + return : the number of bytes decompressed into destination buffer (originalSize) + or an errorCode if it fails (which can be tested using ZSTDv01_isError()) +*/ +size_t ZSTDv04_decompress( void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + + /** + ZSTDv04_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.4.x format + srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + cSize (output parameter) : the number of bytes that would be read to decompress this frame + or an error code if it fails (which can be tested using ZSTDv01_isError()) + dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame + or ZSTD_CONTENTSIZE_ERROR if an error occurs + + note : assumes `cSize` and `dBound` are _not_ NULL. + */ + void ZSTDv04_findFrameSizeInfoLegacy(const void *src, size_t srcSize, + size_t* cSize, unsigned long long* dBound); + +/** +ZSTDv04_isError() : tells if the result of ZSTDv04_decompress() is an error +*/ +unsigned ZSTDv04_isError(size_t code); + + +/* ************************************* +* Advanced functions +***************************************/ +typedef struct ZSTDv04_Dctx_s ZSTDv04_Dctx; +ZSTDv04_Dctx* ZSTDv04_createDCtx(void); +size_t ZSTDv04_freeDCtx(ZSTDv04_Dctx* dctx); + +size_t ZSTDv04_decompressDCtx(ZSTDv04_Dctx* dctx, + void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + + +/* ************************************* +* Direct Streaming +***************************************/ +size_t ZSTDv04_resetDCtx(ZSTDv04_Dctx* dctx); + +size_t ZSTDv04_nextSrcSizeToDecompress(ZSTDv04_Dctx* dctx); +size_t ZSTDv04_decompressContinue(ZSTDv04_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); +/** + Use above functions alternatively. + ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block. + Result is the number of bytes regenerated within 'dst'. + It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header. +*/ + + +/* ************************************* +* Buffered Streaming +***************************************/ +typedef struct ZBUFFv04_DCtx_s ZBUFFv04_DCtx; +ZBUFFv04_DCtx* ZBUFFv04_createDCtx(void); +size_t ZBUFFv04_freeDCtx(ZBUFFv04_DCtx* dctx); + +size_t ZBUFFv04_decompressInit(ZBUFFv04_DCtx* dctx); +size_t ZBUFFv04_decompressWithDictionary(ZBUFFv04_DCtx* dctx, const void* dict, size_t dictSize); + +size_t ZBUFFv04_decompressContinue(ZBUFFv04_DCtx* dctx, void* dst, size_t* maxDstSizePtr, const void* src, size_t* srcSizePtr); + +/** ************************************************ +* Streaming decompression +* +* A ZBUFF_DCtx object is required to track streaming operation. +* Use ZBUFF_createDCtx() and ZBUFF_freeDCtx() to create/release resources. +* Use ZBUFF_decompressInit() to start a new decompression operation. +* ZBUFF_DCtx objects can be reused multiple times. +* +* Optionally, a reference to a static dictionary can be set, using ZBUFF_decompressWithDictionary() +* It must be the same content as the one set during compression phase. +* Dictionary content must remain accessible during the decompression process. +* +* Use ZBUFF_decompressContinue() repetitively to consume your input. +* *srcSizePtr and *maxDstSizePtr can be any size. +* The function will report how many bytes were read or written by modifying *srcSizePtr and *maxDstSizePtr. +* Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again. +* The content of dst will be overwritten (up to *maxDstSizePtr) at each function call, so save its content if it matters or change dst. +* @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency) +* or 0 when a frame is completely decoded +* or an error code, which can be tested using ZBUFF_isError(). +* +* Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedDInSize / ZBUFF_recommendedDOutSize +* output : ZBUFF_recommendedDOutSize==128 KB block size is the internal unit, it ensures it's always possible to write a full block when it's decoded. +* input : ZBUFF_recommendedDInSize==128Kb+3; just follow indications from ZBUFF_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 . +* **************************************************/ +unsigned ZBUFFv04_isError(size_t errorCode); +const char* ZBUFFv04_getErrorName(size_t errorCode); + + +/** The below functions provide recommended buffer sizes for Compression or Decompression operations. +* These sizes are not compulsory, they just tend to offer better latency */ +size_t ZBUFFv04_recommendedDInSize(void); +size_t ZBUFFv04_recommendedDOutSize(void); + + +/* ************************************* +* Prefix - version detection +***************************************/ +#define ZSTDv04_magicNumber 0xFD2FB524 /* v0.4 */ + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_V04_H_91868324769238 */ +/**** ended inlining zstd_v04.h ****/ +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) +/**** start inlining zstd_v05.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTDv05_H +#define ZSTDv05_H + +#if defined (__cplusplus) +extern "C" { +#endif + +/*-************************************* +* Dependencies +***************************************/ +#include /* size_t */ +/**** skipping file: ../common/mem.h ****/ + + +/* ************************************* +* Simple functions +***************************************/ +/*! ZSTDv05_decompress() : + `compressedSize` : is the _exact_ size of the compressed blob, otherwise decompression will fail. + `dstCapacity` must be large enough, equal or larger than originalSize. + @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), + or an errorCode if it fails (which can be tested using ZSTDv05_isError()) */ +size_t ZSTDv05_decompress( void* dst, size_t dstCapacity, + const void* src, size_t compressedSize); + + /** + ZSTDv05_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.5.x format + srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + cSize (output parameter) : the number of bytes that would be read to decompress this frame + or an error code if it fails (which can be tested using ZSTDv01_isError()) + dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame + or ZSTD_CONTENTSIZE_ERROR if an error occurs + + note : assumes `cSize` and `dBound` are _not_ NULL. + */ +void ZSTDv05_findFrameSizeInfoLegacy(const void *src, size_t srcSize, + size_t* cSize, unsigned long long* dBound); + +/* ************************************* +* Helper functions +***************************************/ +/* Error Management */ +unsigned ZSTDv05_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ +const char* ZSTDv05_getErrorName(size_t code); /*!< provides readable string for an error code */ + + +/* ************************************* +* Explicit memory management +***************************************/ +/** Decompression context */ +typedef struct ZSTDv05_DCtx_s ZSTDv05_DCtx; +ZSTDv05_DCtx* ZSTDv05_createDCtx(void); +size_t ZSTDv05_freeDCtx(ZSTDv05_DCtx* dctx); /*!< @return : errorCode */ + +/** ZSTDv05_decompressDCtx() : +* Same as ZSTDv05_decompress(), but requires an already allocated ZSTDv05_DCtx (see ZSTDv05_createDCtx()) */ +size_t ZSTDv05_decompressDCtx(ZSTDv05_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + + +/*-*********************** +* Simple Dictionary API +*************************/ +/*! ZSTDv05_decompress_usingDict() : +* Decompression using a pre-defined Dictionary content (see dictBuilder). +* Dictionary must be identical to the one used during compression, otherwise regenerated data will be corrupted. +* Note : dict can be NULL, in which case, it's equivalent to ZSTDv05_decompressDCtx() */ +size_t ZSTDv05_decompress_usingDict(ZSTDv05_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize); + +/*-************************ +* Advanced Streaming API +***************************/ +typedef enum { ZSTDv05_fast, ZSTDv05_greedy, ZSTDv05_lazy, ZSTDv05_lazy2, ZSTDv05_btlazy2, ZSTDv05_opt, ZSTDv05_btopt } ZSTDv05_strategy; +typedef struct { + U64 srcSize; + U32 windowLog; /* the only useful information to retrieve */ + U32 contentLog; U32 hashLog; U32 searchLog; U32 searchLength; U32 targetLength; ZSTDv05_strategy strategy; +} ZSTDv05_parameters; +size_t ZSTDv05_getFrameParams(ZSTDv05_parameters* params, const void* src, size_t srcSize); + +size_t ZSTDv05_decompressBegin_usingDict(ZSTDv05_DCtx* dctx, const void* dict, size_t dictSize); +void ZSTDv05_copyDCtx(ZSTDv05_DCtx* dstDCtx, const ZSTDv05_DCtx* srcDCtx); +size_t ZSTDv05_nextSrcSizeToDecompress(ZSTDv05_DCtx* dctx); +size_t ZSTDv05_decompressContinue(ZSTDv05_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + + +/*-*********************** +* ZBUFF API +*************************/ +typedef struct ZBUFFv05_DCtx_s ZBUFFv05_DCtx; +ZBUFFv05_DCtx* ZBUFFv05_createDCtx(void); +size_t ZBUFFv05_freeDCtx(ZBUFFv05_DCtx* dctx); + +size_t ZBUFFv05_decompressInit(ZBUFFv05_DCtx* dctx); +size_t ZBUFFv05_decompressInitDictionary(ZBUFFv05_DCtx* dctx, const void* dict, size_t dictSize); + +size_t ZBUFFv05_decompressContinue(ZBUFFv05_DCtx* dctx, + void* dst, size_t* dstCapacityPtr, + const void* src, size_t* srcSizePtr); + +/*-*************************************************************************** +* Streaming decompression +* +* A ZBUFFv05_DCtx object is required to track streaming operations. +* Use ZBUFFv05_createDCtx() and ZBUFFv05_freeDCtx() to create/release resources. +* Use ZBUFFv05_decompressInit() to start a new decompression operation, +* or ZBUFFv05_decompressInitDictionary() if decompression requires a dictionary. +* Note that ZBUFFv05_DCtx objects can be reused multiple times. +* +* Use ZBUFFv05_decompressContinue() repetitively to consume your input. +* *srcSizePtr and *dstCapacityPtr can be any size. +* The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr. +* Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again. +* The content of @dst will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters or change @dst. +* @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency) +* or 0 when a frame is completely decoded +* or an error code, which can be tested using ZBUFFv05_isError(). +* +* Hint : recommended buffer sizes (not compulsory) : ZBUFFv05_recommendedDInSize() / ZBUFFv05_recommendedDOutSize() +* output : ZBUFFv05_recommendedDOutSize==128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded. +* input : ZBUFFv05_recommendedDInSize==128Kb+3; just follow indications from ZBUFFv05_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 . +* *******************************************************************************/ + + +/* ************************************* +* Tool functions +***************************************/ +unsigned ZBUFFv05_isError(size_t errorCode); +const char* ZBUFFv05_getErrorName(size_t errorCode); + +/** Functions below provide recommended buffer sizes for Compression or Decompression operations. +* These sizes are just hints, and tend to offer better latency */ +size_t ZBUFFv05_recommendedDInSize(void); +size_t ZBUFFv05_recommendedDOutSize(void); + + + +/*-************************************* +* Constants +***************************************/ +#define ZSTDv05_MAGICNUMBER 0xFD2FB525 /* v0.5 */ + + + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTDv0505_H */ +/**** ended inlining zstd_v05.h ****/ +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) +/**** start inlining zstd_v06.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTDv06_H +#define ZSTDv06_H + +#if defined (__cplusplus) +extern "C" { +#endif + +/*====== Dependency ======*/ +#include /* size_t */ + + +/*====== Export for Windows ======*/ +/*! +* ZSTDv06_DLL_EXPORT : +* Enable exporting of functions when building a Windows DLL +*/ +#if defined(_WIN32) && defined(ZSTDv06_DLL_EXPORT) && (ZSTDv06_DLL_EXPORT==1) +# define ZSTDLIBv06_API __declspec(dllexport) +#else +# define ZSTDLIBv06_API +#endif + + +/* ************************************* +* Simple functions +***************************************/ +/*! ZSTDv06_decompress() : + `compressedSize` : is the _exact_ size of the compressed blob, otherwise decompression will fail. + `dstCapacity` must be large enough, equal or larger than originalSize. + @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), + or an errorCode if it fails (which can be tested using ZSTDv06_isError()) */ +ZSTDLIBv06_API size_t ZSTDv06_decompress( void* dst, size_t dstCapacity, + const void* src, size_t compressedSize); + +/** +ZSTDv06_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.6.x format + srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + cSize (output parameter) : the number of bytes that would be read to decompress this frame + or an error code if it fails (which can be tested using ZSTDv01_isError()) + dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame + or ZSTD_CONTENTSIZE_ERROR if an error occurs + + note : assumes `cSize` and `dBound` are _not_ NULL. +*/ +void ZSTDv06_findFrameSizeInfoLegacy(const void *src, size_t srcSize, + size_t* cSize, unsigned long long* dBound); + +/* ************************************* +* Helper functions +***************************************/ +ZSTDLIBv06_API size_t ZSTDv06_compressBound(size_t srcSize); /*!< maximum compressed size (worst case scenario) */ + +/* Error Management */ +ZSTDLIBv06_API unsigned ZSTDv06_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ +ZSTDLIBv06_API const char* ZSTDv06_getErrorName(size_t code); /*!< provides readable string for an error code */ + + +/* ************************************* +* Explicit memory management +***************************************/ +/** Decompression context */ +typedef struct ZSTDv06_DCtx_s ZSTDv06_DCtx; +ZSTDLIBv06_API ZSTDv06_DCtx* ZSTDv06_createDCtx(void); +ZSTDLIBv06_API size_t ZSTDv06_freeDCtx(ZSTDv06_DCtx* dctx); /*!< @return : errorCode */ + +/** ZSTDv06_decompressDCtx() : +* Same as ZSTDv06_decompress(), but requires an already allocated ZSTDv06_DCtx (see ZSTDv06_createDCtx()) */ +ZSTDLIBv06_API size_t ZSTDv06_decompressDCtx(ZSTDv06_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + + +/*-*********************** +* Dictionary API +*************************/ +/*! ZSTDv06_decompress_usingDict() : +* Decompression using a pre-defined Dictionary content (see dictBuilder). +* Dictionary must be identical to the one used during compression, otherwise regenerated data will be corrupted. +* Note : dict can be NULL, in which case, it's equivalent to ZSTDv06_decompressDCtx() */ +ZSTDLIBv06_API size_t ZSTDv06_decompress_usingDict(ZSTDv06_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize); + + +/*-************************ +* Advanced Streaming API +***************************/ +struct ZSTDv06_frameParams_s { unsigned long long frameContentSize; unsigned windowLog; }; +typedef struct ZSTDv06_frameParams_s ZSTDv06_frameParams; + +ZSTDLIBv06_API size_t ZSTDv06_getFrameParams(ZSTDv06_frameParams* fparamsPtr, const void* src, size_t srcSize); /**< doesn't consume input */ +ZSTDLIBv06_API size_t ZSTDv06_decompressBegin_usingDict(ZSTDv06_DCtx* dctx, const void* dict, size_t dictSize); +ZSTDLIBv06_API void ZSTDv06_copyDCtx(ZSTDv06_DCtx* dctx, const ZSTDv06_DCtx* preparedDCtx); + +ZSTDLIBv06_API size_t ZSTDv06_nextSrcSizeToDecompress(ZSTDv06_DCtx* dctx); +ZSTDLIBv06_API size_t ZSTDv06_decompressContinue(ZSTDv06_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + + + +/* ************************************* +* ZBUFF API +***************************************/ + +typedef struct ZBUFFv06_DCtx_s ZBUFFv06_DCtx; +ZSTDLIBv06_API ZBUFFv06_DCtx* ZBUFFv06_createDCtx(void); +ZSTDLIBv06_API size_t ZBUFFv06_freeDCtx(ZBUFFv06_DCtx* dctx); + +ZSTDLIBv06_API size_t ZBUFFv06_decompressInit(ZBUFFv06_DCtx* dctx); +ZSTDLIBv06_API size_t ZBUFFv06_decompressInitDictionary(ZBUFFv06_DCtx* dctx, const void* dict, size_t dictSize); + +ZSTDLIBv06_API size_t ZBUFFv06_decompressContinue(ZBUFFv06_DCtx* dctx, + void* dst, size_t* dstCapacityPtr, + const void* src, size_t* srcSizePtr); + +/*-*************************************************************************** +* Streaming decompression howto +* +* A ZBUFFv06_DCtx object is required to track streaming operations. +* Use ZBUFFv06_createDCtx() and ZBUFFv06_freeDCtx() to create/release resources. +* Use ZBUFFv06_decompressInit() to start a new decompression operation, +* or ZBUFFv06_decompressInitDictionary() if decompression requires a dictionary. +* Note that ZBUFFv06_DCtx objects can be re-init multiple times. +* +* Use ZBUFFv06_decompressContinue() repetitively to consume your input. +* *srcSizePtr and *dstCapacityPtr can be any size. +* The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr. +* Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again. +* The content of `dst` will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters, or change `dst`. +* @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency), +* or 0 when a frame is completely decoded, +* or an error code, which can be tested using ZBUFFv06_isError(). +* +* Hint : recommended buffer sizes (not compulsory) : ZBUFFv06_recommendedDInSize() and ZBUFFv06_recommendedDOutSize() +* output : ZBUFFv06_recommendedDOutSize== 128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded. +* input : ZBUFFv06_recommendedDInSize == 128KB + 3; +* just follow indications from ZBUFFv06_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 . +* *******************************************************************************/ + + +/* ************************************* +* Tool functions +***************************************/ +ZSTDLIBv06_API unsigned ZBUFFv06_isError(size_t errorCode); +ZSTDLIBv06_API const char* ZBUFFv06_getErrorName(size_t errorCode); + +/** Functions below provide recommended buffer sizes for Compression or Decompression operations. +* These sizes are just hints, they tend to offer better latency */ +ZSTDLIBv06_API size_t ZBUFFv06_recommendedDInSize(void); +ZSTDLIBv06_API size_t ZBUFFv06_recommendedDOutSize(void); + + +/*-************************************* +* Constants +***************************************/ +#define ZSTDv06_MAGICNUMBER 0xFD2FB526 /* v0.6 */ + + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTDv06_BUFFERED_H */ +/**** ended inlining zstd_v06.h ****/ +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) +/**** start inlining zstd_v07.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTDv07_H_235446 +#define ZSTDv07_H_235446 + +#if defined (__cplusplus) +extern "C" { +#endif + +/*====== Dependency ======*/ +#include /* size_t */ + + +/*====== Export for Windows ======*/ +/*! +* ZSTDv07_DLL_EXPORT : +* Enable exporting of functions when building a Windows DLL +*/ +#if defined(_WIN32) && defined(ZSTDv07_DLL_EXPORT) && (ZSTDv07_DLL_EXPORT==1) +# define ZSTDLIBv07_API __declspec(dllexport) +#else +# define ZSTDLIBv07_API +#endif + + +/* ************************************* +* Simple API +***************************************/ +/*! ZSTDv07_getDecompressedSize() : +* @return : decompressed size if known, 0 otherwise. + note 1 : if `0`, follow up with ZSTDv07_getFrameParams() to know precise failure cause. + note 2 : decompressed size could be wrong or intentionally modified ! + always ensure results fit within application's authorized limits */ +unsigned long long ZSTDv07_getDecompressedSize(const void* src, size_t srcSize); + +/*! ZSTDv07_decompress() : + `compressedSize` : must be _exact_ size of compressed input, otherwise decompression will fail. + `dstCapacity` must be equal or larger than originalSize. + @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), + or an errorCode if it fails (which can be tested using ZSTDv07_isError()) */ +ZSTDLIBv07_API size_t ZSTDv07_decompress( void* dst, size_t dstCapacity, + const void* src, size_t compressedSize); + +/** +ZSTDv07_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.7.x format + srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + cSize (output parameter) : the number of bytes that would be read to decompress this frame + or an error code if it fails (which can be tested using ZSTDv01_isError()) + dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame + or ZSTD_CONTENTSIZE_ERROR if an error occurs + + note : assumes `cSize` and `dBound` are _not_ NULL. +*/ +void ZSTDv07_findFrameSizeInfoLegacy(const void *src, size_t srcSize, + size_t* cSize, unsigned long long* dBound); + +/*====== Helper functions ======*/ +ZSTDLIBv07_API unsigned ZSTDv07_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ +ZSTDLIBv07_API const char* ZSTDv07_getErrorName(size_t code); /*!< provides readable string from an error code */ + + +/*-************************************* +* Explicit memory management +***************************************/ +/** Decompression context */ +typedef struct ZSTDv07_DCtx_s ZSTDv07_DCtx; +ZSTDLIBv07_API ZSTDv07_DCtx* ZSTDv07_createDCtx(void); +ZSTDLIBv07_API size_t ZSTDv07_freeDCtx(ZSTDv07_DCtx* dctx); /*!< @return : errorCode */ + +/** ZSTDv07_decompressDCtx() : +* Same as ZSTDv07_decompress(), requires an allocated ZSTDv07_DCtx (see ZSTDv07_createDCtx()) */ +ZSTDLIBv07_API size_t ZSTDv07_decompressDCtx(ZSTDv07_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + + +/*-************************ +* Simple dictionary API +***************************/ +/*! ZSTDv07_decompress_usingDict() : +* Decompression using a pre-defined Dictionary content (see dictBuilder). +* Dictionary must be identical to the one used during compression. +* Note : This function load the dictionary, resulting in a significant startup time */ +ZSTDLIBv07_API size_t ZSTDv07_decompress_usingDict(ZSTDv07_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize); + + +/*-************************** +* Advanced Dictionary API +****************************/ +/*! ZSTDv07_createDDict() : +* Create a digested dictionary, ready to start decompression operation without startup delay. +* `dict` can be released after creation */ +typedef struct ZSTDv07_DDict_s ZSTDv07_DDict; +ZSTDLIBv07_API ZSTDv07_DDict* ZSTDv07_createDDict(const void* dict, size_t dictSize); +ZSTDLIBv07_API size_t ZSTDv07_freeDDict(ZSTDv07_DDict* ddict); + +/*! ZSTDv07_decompress_usingDDict() : +* Decompression using a pre-digested Dictionary +* Faster startup than ZSTDv07_decompress_usingDict(), recommended when same dictionary is used multiple times. */ +ZSTDLIBv07_API size_t ZSTDv07_decompress_usingDDict(ZSTDv07_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTDv07_DDict* ddict); + +typedef struct { + unsigned long long frameContentSize; + unsigned windowSize; + unsigned dictID; + unsigned checksumFlag; +} ZSTDv07_frameParams; + +ZSTDLIBv07_API size_t ZSTDv07_getFrameParams(ZSTDv07_frameParams* fparamsPtr, const void* src, size_t srcSize); /**< doesn't consume input */ + + + + +/* ************************************* +* Streaming functions +***************************************/ +typedef struct ZBUFFv07_DCtx_s ZBUFFv07_DCtx; +ZSTDLIBv07_API ZBUFFv07_DCtx* ZBUFFv07_createDCtx(void); +ZSTDLIBv07_API size_t ZBUFFv07_freeDCtx(ZBUFFv07_DCtx* dctx); + +ZSTDLIBv07_API size_t ZBUFFv07_decompressInit(ZBUFFv07_DCtx* dctx); +ZSTDLIBv07_API size_t ZBUFFv07_decompressInitDictionary(ZBUFFv07_DCtx* dctx, const void* dict, size_t dictSize); + +ZSTDLIBv07_API size_t ZBUFFv07_decompressContinue(ZBUFFv07_DCtx* dctx, + void* dst, size_t* dstCapacityPtr, + const void* src, size_t* srcSizePtr); + +/*-*************************************************************************** +* Streaming decompression howto +* +* A ZBUFFv07_DCtx object is required to track streaming operations. +* Use ZBUFFv07_createDCtx() and ZBUFFv07_freeDCtx() to create/release resources. +* Use ZBUFFv07_decompressInit() to start a new decompression operation, +* or ZBUFFv07_decompressInitDictionary() if decompression requires a dictionary. +* Note that ZBUFFv07_DCtx objects can be re-init multiple times. +* +* Use ZBUFFv07_decompressContinue() repetitively to consume your input. +* *srcSizePtr and *dstCapacityPtr can be any size. +* The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr. +* Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again. +* The content of `dst` will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters, or change `dst`. +* @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency), +* or 0 when a frame is completely decoded, +* or an error code, which can be tested using ZBUFFv07_isError(). +* +* Hint : recommended buffer sizes (not compulsory) : ZBUFFv07_recommendedDInSize() and ZBUFFv07_recommendedDOutSize() +* output : ZBUFFv07_recommendedDOutSize== 128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded. +* input : ZBUFFv07_recommendedDInSize == 128KB + 3; +* just follow indications from ZBUFFv07_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 . +* *******************************************************************************/ + + +/* ************************************* +* Tool functions +***************************************/ +ZSTDLIBv07_API unsigned ZBUFFv07_isError(size_t errorCode); +ZSTDLIBv07_API const char* ZBUFFv07_getErrorName(size_t errorCode); + +/** Functions below provide recommended buffer sizes for Compression or Decompression operations. +* These sizes are just hints, they tend to offer better latency */ +ZSTDLIBv07_API size_t ZBUFFv07_recommendedDInSize(void); +ZSTDLIBv07_API size_t ZBUFFv07_recommendedDOutSize(void); + + +/*-************************************* +* Constants +***************************************/ +#define ZSTDv07_MAGICNUMBER 0xFD2FB527 /* v0.7 */ + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTDv07_H_235446 */ +/**** ended inlining zstd_v07.h ****/ +#endif + +/** ZSTD_isLegacy() : + @return : > 0 if supported by legacy decoder. 0 otherwise. + return value is the version. +*/ +MEM_STATIC unsigned ZSTD_isLegacy(const void* src, size_t srcSize) +{ + U32 magicNumberLE; + if (srcSize<4) return 0; + magicNumberLE = MEM_readLE32(src); + switch(magicNumberLE) + { +#if (ZSTD_LEGACY_SUPPORT <= 1) + case ZSTDv01_magicNumberLE:return 1; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 2) + case ZSTDv02_magicNumber : return 2; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 3) + case ZSTDv03_magicNumber : return 3; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 4) + case ZSTDv04_magicNumber : return 4; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case ZSTDv05_MAGICNUMBER : return 5; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case ZSTDv06_MAGICNUMBER : return 6; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case ZSTDv07_MAGICNUMBER : return 7; +#endif + default : return 0; + } +} + + +MEM_STATIC unsigned long long ZSTD_getDecompressedSize_legacy(const void* src, size_t srcSize) +{ + U32 const version = ZSTD_isLegacy(src, srcSize); + if (version < 5) return 0; /* no decompressed size in frame header, or not a legacy format */ +#if (ZSTD_LEGACY_SUPPORT <= 5) + if (version==5) { + ZSTDv05_parameters fParams; + size_t const frResult = ZSTDv05_getFrameParams(&fParams, src, srcSize); + if (frResult != 0) return 0; + return fParams.srcSize; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + if (version==6) { + ZSTDv06_frameParams fParams; + size_t const frResult = ZSTDv06_getFrameParams(&fParams, src, srcSize); + if (frResult != 0) return 0; + return fParams.frameContentSize; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + if (version==7) { + ZSTDv07_frameParams fParams; + size_t const frResult = ZSTDv07_getFrameParams(&fParams, src, srcSize); + if (frResult != 0) return 0; + return fParams.frameContentSize; + } +#endif + return 0; /* should not be possible */ +} + + +MEM_STATIC size_t ZSTD_decompressLegacy( + void* dst, size_t dstCapacity, + const void* src, size_t compressedSize, + const void* dict,size_t dictSize) +{ + U32 const version = ZSTD_isLegacy(src, compressedSize); + (void)dst; (void)dstCapacity; (void)dict; (void)dictSize; /* unused when ZSTD_LEGACY_SUPPORT >= 8 */ + switch(version) + { +#if (ZSTD_LEGACY_SUPPORT <= 1) + case 1 : + return ZSTDv01_decompress(dst, dstCapacity, src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 2) + case 2 : + return ZSTDv02_decompress(dst, dstCapacity, src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 3) + case 3 : + return ZSTDv03_decompress(dst, dstCapacity, src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 4) + case 4 : + return ZSTDv04_decompress(dst, dstCapacity, src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case 5 : + { size_t result; + ZSTDv05_DCtx* const zd = ZSTDv05_createDCtx(); + if (zd==NULL) return ERROR(memory_allocation); + result = ZSTDv05_decompress_usingDict(zd, dst, dstCapacity, src, compressedSize, dict, dictSize); + ZSTDv05_freeDCtx(zd); + return result; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case 6 : + { size_t result; + ZSTDv06_DCtx* const zd = ZSTDv06_createDCtx(); + if (zd==NULL) return ERROR(memory_allocation); + result = ZSTDv06_decompress_usingDict(zd, dst, dstCapacity, src, compressedSize, dict, dictSize); + ZSTDv06_freeDCtx(zd); + return result; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case 7 : + { size_t result; + ZSTDv07_DCtx* const zd = ZSTDv07_createDCtx(); + if (zd==NULL) return ERROR(memory_allocation); + result = ZSTDv07_decompress_usingDict(zd, dst, dstCapacity, src, compressedSize, dict, dictSize); + ZSTDv07_freeDCtx(zd); + return result; + } +#endif + default : + return ERROR(prefix_unknown); + } +} + +MEM_STATIC ZSTD_frameSizeInfo ZSTD_findFrameSizeInfoLegacy(const void *src, size_t srcSize) +{ + ZSTD_frameSizeInfo frameSizeInfo; + U32 const version = ZSTD_isLegacy(src, srcSize); + switch(version) + { +#if (ZSTD_LEGACY_SUPPORT <= 1) + case 1 : + ZSTDv01_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 2) + case 2 : + ZSTDv02_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 3) + case 3 : + ZSTDv03_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 4) + case 4 : + ZSTDv04_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case 5 : + ZSTDv05_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case 6 : + ZSTDv06_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case 7 : + ZSTDv07_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; +#endif + default : + frameSizeInfo.compressedSize = ERROR(prefix_unknown); + frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR; + break; + } + if (!ZSTD_isError(frameSizeInfo.compressedSize) && frameSizeInfo.compressedSize > srcSize) { + frameSizeInfo.compressedSize = ERROR(srcSize_wrong); + frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR; + } + return frameSizeInfo; +} + +MEM_STATIC size_t ZSTD_findFrameCompressedSizeLegacy(const void *src, size_t srcSize) +{ + ZSTD_frameSizeInfo frameSizeInfo = ZSTD_findFrameSizeInfoLegacy(src, srcSize); + return frameSizeInfo.compressedSize; +} + +MEM_STATIC size_t ZSTD_freeLegacyStreamContext(void* legacyContext, U32 version) +{ + switch(version) + { + default : + case 1 : + case 2 : + case 3 : + (void)legacyContext; + return ERROR(version_unsupported); +#if (ZSTD_LEGACY_SUPPORT <= 4) + case 4 : return ZBUFFv04_freeDCtx((ZBUFFv04_DCtx*)legacyContext); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case 5 : return ZBUFFv05_freeDCtx((ZBUFFv05_DCtx*)legacyContext); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case 6 : return ZBUFFv06_freeDCtx((ZBUFFv06_DCtx*)legacyContext); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case 7 : return ZBUFFv07_freeDCtx((ZBUFFv07_DCtx*)legacyContext); +#endif + } +} + + +MEM_STATIC size_t ZSTD_initLegacyStream(void** legacyContext, U32 prevVersion, U32 newVersion, + const void* dict, size_t dictSize) +{ + DEBUGLOG(5, "ZSTD_initLegacyStream for v0.%u", newVersion); + if (prevVersion != newVersion) ZSTD_freeLegacyStreamContext(*legacyContext, prevVersion); + switch(newVersion) + { + default : + case 1 : + case 2 : + case 3 : + (void)dict; (void)dictSize; + return 0; +#if (ZSTD_LEGACY_SUPPORT <= 4) + case 4 : + { + ZBUFFv04_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv04_createDCtx() : (ZBUFFv04_DCtx*)*legacyContext; + if (dctx==NULL) return ERROR(memory_allocation); + ZBUFFv04_decompressInit(dctx); + ZBUFFv04_decompressWithDictionary(dctx, dict, dictSize); + *legacyContext = dctx; + return 0; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case 5 : + { + ZBUFFv05_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv05_createDCtx() : (ZBUFFv05_DCtx*)*legacyContext; + if (dctx==NULL) return ERROR(memory_allocation); + ZBUFFv05_decompressInitDictionary(dctx, dict, dictSize); + *legacyContext = dctx; + return 0; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case 6 : + { + ZBUFFv06_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv06_createDCtx() : (ZBUFFv06_DCtx*)*legacyContext; + if (dctx==NULL) return ERROR(memory_allocation); + ZBUFFv06_decompressInitDictionary(dctx, dict, dictSize); + *legacyContext = dctx; + return 0; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case 7 : + { + ZBUFFv07_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv07_createDCtx() : (ZBUFFv07_DCtx*)*legacyContext; + if (dctx==NULL) return ERROR(memory_allocation); + ZBUFFv07_decompressInitDictionary(dctx, dict, dictSize); + *legacyContext = dctx; + return 0; + } +#endif + } +} + + + +MEM_STATIC size_t ZSTD_decompressLegacyStream(void* legacyContext, U32 version, + ZSTD_outBuffer* output, ZSTD_inBuffer* input) +{ + DEBUGLOG(5, "ZSTD_decompressLegacyStream for v0.%u", version); + switch(version) + { + default : + case 1 : + case 2 : + case 3 : + (void)legacyContext; (void)output; (void)input; + return ERROR(version_unsupported); +#if (ZSTD_LEGACY_SUPPORT <= 4) + case 4 : + { + ZBUFFv04_DCtx* dctx = (ZBUFFv04_DCtx*) legacyContext; + const void* src = (const char*)input->src + input->pos; + size_t readSize = input->size - input->pos; + void* dst = (char*)output->dst + output->pos; + size_t decodedSize = output->size - output->pos; + size_t const hintSize = ZBUFFv04_decompressContinue(dctx, dst, &decodedSize, src, &readSize); + output->pos += decodedSize; + input->pos += readSize; + return hintSize; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case 5 : + { + ZBUFFv05_DCtx* dctx = (ZBUFFv05_DCtx*) legacyContext; + const void* src = (const char*)input->src + input->pos; + size_t readSize = input->size - input->pos; + void* dst = (char*)output->dst + output->pos; + size_t decodedSize = output->size - output->pos; + size_t const hintSize = ZBUFFv05_decompressContinue(dctx, dst, &decodedSize, src, &readSize); + output->pos += decodedSize; + input->pos += readSize; + return hintSize; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case 6 : + { + ZBUFFv06_DCtx* dctx = (ZBUFFv06_DCtx*) legacyContext; + const void* src = (const char*)input->src + input->pos; + size_t readSize = input->size - input->pos; + void* dst = (char*)output->dst + output->pos; + size_t decodedSize = output->size - output->pos; + size_t const hintSize = ZBUFFv06_decompressContinue(dctx, dst, &decodedSize, src, &readSize); + output->pos += decodedSize; + input->pos += readSize; + return hintSize; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case 7 : + { + ZBUFFv07_DCtx* dctx = (ZBUFFv07_DCtx*) legacyContext; + const void* src = (const char*)input->src + input->pos; + size_t readSize = input->size - input->pos; + void* dst = (char*)output->dst + output->pos; + size_t decodedSize = output->size - output->pos; + size_t const hintSize = ZBUFFv07_decompressContinue(dctx, dst, &decodedSize, src, &readSize); + output->pos += decodedSize; + input->pos += readSize; + return hintSize; + } +#endif + } +} + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_LEGACY_H */ +/**** ended inlining ../legacy/zstd_legacy.h ****/ +#endif + + + +/*-******************************************************* +* Types +*********************************************************/ +struct ZSTD_DDict_s { + void* dictBuffer; + const void* dictContent; + size_t dictSize; + ZSTD_entropyDTables_t entropy; + U32 dictID; + U32 entropyPresent; + ZSTD_customMem cMem; +}; /* typedef'd to ZSTD_DDict within "zstd.h" */ + +const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict) +{ + assert(ddict != NULL); + return ddict->dictContent; +} + +size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict) +{ + assert(ddict != NULL); + return ddict->dictSize; +} + +void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) +{ + DEBUGLOG(4, "ZSTD_copyDDictParameters"); + assert(dctx != NULL); + assert(ddict != NULL); + dctx->dictID = ddict->dictID; + dctx->prefixStart = ddict->dictContent; + dctx->virtualStart = ddict->dictContent; + dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize; + dctx->previousDstEnd = dctx->dictEnd; +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + dctx->dictContentBeginForFuzzing = dctx->prefixStart; + dctx->dictContentEndForFuzzing = dctx->previousDstEnd; +#endif + if (ddict->entropyPresent) { + dctx->litEntropy = 1; + dctx->fseEntropy = 1; + dctx->LLTptr = ddict->entropy.LLTable; + dctx->MLTptr = ddict->entropy.MLTable; + dctx->OFTptr = ddict->entropy.OFTable; + dctx->HUFptr = ddict->entropy.hufTable; + dctx->entropy.rep[0] = ddict->entropy.rep[0]; + dctx->entropy.rep[1] = ddict->entropy.rep[1]; + dctx->entropy.rep[2] = ddict->entropy.rep[2]; + } else { + dctx->litEntropy = 0; + dctx->fseEntropy = 0; + } +} + + +static size_t +ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict, + ZSTD_dictContentType_e dictContentType) +{ + ddict->dictID = 0; + ddict->entropyPresent = 0; + if (dictContentType == ZSTD_dct_rawContent) return 0; + + if (ddict->dictSize < 8) { + if (dictContentType == ZSTD_dct_fullDict) + return ERROR(dictionary_corrupted); /* only accept specified dictionaries */ + return 0; /* pure content mode */ + } + { U32 const magic = MEM_readLE32(ddict->dictContent); + if (magic != ZSTD_MAGIC_DICTIONARY) { + if (dictContentType == ZSTD_dct_fullDict) + return ERROR(dictionary_corrupted); /* only accept specified dictionaries */ + return 0; /* pure content mode */ + } + } + ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE); + + /* load entropy tables */ + RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy( + &ddict->entropy, ddict->dictContent, ddict->dictSize)), + dictionary_corrupted, ""); + ddict->entropyPresent = 1; + return 0; +} + + +static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType) +{ + if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) { + ddict->dictBuffer = NULL; + ddict->dictContent = dict; + if (!dict) dictSize = 0; + } else { + void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem); + ddict->dictBuffer = internalBuffer; + ddict->dictContent = internalBuffer; + if (!internalBuffer) return ERROR(memory_allocation); + ZSTD_memcpy(internalBuffer, dict, dictSize); + } + ddict->dictSize = dictSize; + ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ + + /* parse dictionary content */ + FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , ""); + + return 0; +} + +ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_customMem customMem) +{ + if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; + + { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem); + if (ddict == NULL) return NULL; + ddict->cMem = customMem; + { size_t const initResult = ZSTD_initDDict_internal(ddict, + dict, dictSize, + dictLoadMethod, dictContentType); + if (ZSTD_isError(initResult)) { + ZSTD_freeDDict(ddict); + return NULL; + } } + return ddict; + } +} + +/*! ZSTD_createDDict() : +* Create a digested dictionary, to start decompression without startup delay. +* `dict` content is copied inside DDict. +* Consequently, `dict` can be released after `ZSTD_DDict` creation */ +ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize) +{ + ZSTD_customMem const allocator = { NULL, NULL, NULL }; + return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator); +} + +/*! ZSTD_createDDict_byReference() : + * Create a digested dictionary, to start decompression without startup delay. + * Dictionary content is simply referenced, it will be accessed during decompression. + * Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */ +ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize) +{ + ZSTD_customMem const allocator = { NULL, NULL, NULL }; + return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator); +} + + +const ZSTD_DDict* ZSTD_initStaticDDict( + void* sBuffer, size_t sBufferSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType) +{ + size_t const neededSpace = sizeof(ZSTD_DDict) + + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); + ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer; + assert(sBuffer != NULL); + assert(dict != NULL); + if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */ + if (sBufferSize < neededSpace) return NULL; + if (dictLoadMethod == ZSTD_dlm_byCopy) { + ZSTD_memcpy(ddict+1, dict, dictSize); /* local copy */ + dict = ddict+1; + } + if (ZSTD_isError( ZSTD_initDDict_internal(ddict, + dict, dictSize, + ZSTD_dlm_byRef, dictContentType) )) + return NULL; + return ddict; +} + + +size_t ZSTD_freeDDict(ZSTD_DDict* ddict) +{ + if (ddict==NULL) return 0; /* support free on NULL */ + { ZSTD_customMem const cMem = ddict->cMem; + ZSTD_customFree(ddict->dictBuffer, cMem); + ZSTD_customFree(ddict, cMem); + return 0; + } +} + +/*! ZSTD_estimateDDictSize() : + * Estimate amount of memory that will be needed to create a dictionary for decompression. + * Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */ +size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod) +{ + return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); +} + +size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict) +{ + if (ddict==NULL) return 0; /* support sizeof on NULL */ + return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ; +} + +/*! ZSTD_getDictID_fromDDict() : + * Provides the dictID of the dictionary loaded into `ddict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict) +{ + if (ddict==NULL) return 0; + return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize); +} +/**** ended inlining decompress/zstd_ddict.c ****/ +/**** start inlining decompress/zstd_decompress.c ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* *************************************************************** +* Tuning parameters +*****************************************************************/ +/*! + * HEAPMODE : + * Select how default decompression function ZSTD_decompress() allocates its context, + * on stack (0), or into heap (1, default; requires malloc()). + * Note that functions with explicit context such as ZSTD_decompressDCtx() are unaffected. + */ +#ifndef ZSTD_HEAPMODE +# define ZSTD_HEAPMODE 1 +#endif + +/*! +* LEGACY_SUPPORT : +* if set to 1+, ZSTD_decompress() can decode older formats (v0.1+) +*/ +#ifndef ZSTD_LEGACY_SUPPORT +# define ZSTD_LEGACY_SUPPORT 0 +#endif + +/*! + * MAXWINDOWSIZE_DEFAULT : + * maximum window size accepted by DStream __by default__. + * Frames requiring more memory will be rejected. + * It's possible to set a different limit using ZSTD_DCtx_setMaxWindowSize(). + */ +#ifndef ZSTD_MAXWINDOWSIZE_DEFAULT +# define ZSTD_MAXWINDOWSIZE_DEFAULT (((U32)1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) + 1) +#endif + +/*! + * NO_FORWARD_PROGRESS_MAX : + * maximum allowed nb of calls to ZSTD_decompressStream() + * without any forward progress + * (defined as: no byte read from input, and no byte flushed to output) + * before triggering an error. + */ +#ifndef ZSTD_NO_FORWARD_PROGRESS_MAX +# define ZSTD_NO_FORWARD_PROGRESS_MAX 16 +#endif + + +/*-******************************************************* +* Dependencies +*********************************************************/ +/**** skipping file: ../common/zstd_deps.h ****/ +/**** skipping file: ../common/cpu.h ****/ +/**** skipping file: ../common/mem.h ****/ +/**** skipping file: ../common/zstd_trace.h ****/ +#define FSE_STATIC_LINKING_ONLY +/**** skipping file: ../common/fse.h ****/ +#define HUF_STATIC_LINKING_ONLY +/**** skipping file: ../common/huf.h ****/ +/**** skipping file: ../common/xxhash.h ****/ +/**** skipping file: ../common/zstd_internal.h ****/ +/**** skipping file: zstd_decompress_internal.h ****/ +/**** skipping file: zstd_ddict.h ****/ +/**** start inlining zstd_decompress_block.h ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +#ifndef ZSTD_DEC_BLOCK_H +#define ZSTD_DEC_BLOCK_H + +/*-******************************************************* + * Dependencies + *********************************************************/ +/**** skipping file: ../common/zstd_deps.h ****/ +/**** skipping file: ../zstd.h ****/ +/**** skipping file: ../common/zstd_internal.h ****/ +/**** skipping file: zstd_decompress_internal.h ****/ + + +/* === Prototypes === */ + +/* note: prototypes already published within `zstd.h` : + * ZSTD_decompressBlock() + */ + +/* note: prototypes already published within `zstd_internal.h` : + * ZSTD_getcBlockSize() + * ZSTD_decodeSeqHeaders() + */ + + +/* ZSTD_decompressBlock_internal() : + * decompress block, starting at `src`, + * into destination buffer `dst`. + * @return : decompressed block size, + * or an error code (which can be tested using ZSTD_isError()) + */ +size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, const int frame); + +/* ZSTD_buildFSETable() : + * generate FSE decoding table for one symbol (ll, ml or off) + * this function must be called with valid parameters only + * (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.) + * in which case it cannot fail. + * The workspace must be 4-byte aligned and at least ZSTD_BUILD_FSE_TABLE_WKSP_SIZE bytes, which is + * defined in zstd_decompress_internal.h. + * Internal use only. + */ +void ZSTD_buildFSETable(ZSTD_seqSymbol* dt, + const short* normalizedCounter, unsigned maxSymbolValue, + const U32* baseValue, const U32* nbAdditionalBits, + unsigned tableLog, void* wksp, size_t wkspSize, + int bmi2); + + +#endif /* ZSTD_DEC_BLOCK_H */ +/**** ended inlining zstd_decompress_block.h ****/ + +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) +/**** skipping file: ../legacy/zstd_legacy.h ****/ +#endif + + + +/************************************* + * Multiple DDicts Hashset internals * + *************************************/ + +#define DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT 4 +#define DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT 3 /* These two constants represent SIZE_MULT/COUNT_MULT load factor without using a float. + * Currently, that means a 0.75 load factor. + * So, if count * COUNT_MULT / size * SIZE_MULT != 0, then we've exceeded + * the load factor of the ddict hash set. + */ + +#define DDICT_HASHSET_TABLE_BASE_SIZE 64 +#define DDICT_HASHSET_RESIZE_FACTOR 2 + +/* Hash function to determine starting position of dict insertion within the table + * Returns an index between [0, hashSet->ddictPtrTableSize] + */ +static size_t ZSTD_DDictHashSet_getIndex(const ZSTD_DDictHashSet* hashSet, U32 dictID) { + const U64 hash = XXH64(&dictID, sizeof(U32), 0); + /* DDict ptr table size is a multiple of 2, use size - 1 as mask to get index within [0, hashSet->ddictPtrTableSize) */ + return hash & (hashSet->ddictPtrTableSize - 1); +} + +/* Adds DDict to a hashset without resizing it. + * If inserting a DDict with a dictID that already exists in the set, replaces the one in the set. + * Returns 0 if successful, or a zstd error code if something went wrong. + */ +static size_t ZSTD_DDictHashSet_emplaceDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict) { + const U32 dictID = ZSTD_getDictID_fromDDict(ddict); + size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID); + const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1; + RETURN_ERROR_IF(hashSet->ddictPtrCount == hashSet->ddictPtrTableSize, GENERIC, "Hash set is full!"); + DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx); + while (hashSet->ddictPtrTable[idx] != NULL) { + /* Replace existing ddict if inserting ddict with same dictID */ + if (ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]) == dictID) { + DEBUGLOG(4, "DictID already exists, replacing rather than adding"); + hashSet->ddictPtrTable[idx] = ddict; + return 0; + } + idx &= idxRangeMask; + idx++; + } + DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx); + hashSet->ddictPtrTable[idx] = ddict; + hashSet->ddictPtrCount++; + return 0; +} + +/* Expands hash table by factor of DDICT_HASHSET_RESIZE_FACTOR and + * rehashes all values, allocates new table, frees old table. + * Returns 0 on success, otherwise a zstd error code. + */ +static size_t ZSTD_DDictHashSet_expand(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) { + size_t newTableSize = hashSet->ddictPtrTableSize * DDICT_HASHSET_RESIZE_FACTOR; + const ZSTD_DDict** newTable = (const ZSTD_DDict**)ZSTD_customCalloc(sizeof(ZSTD_DDict*) * newTableSize, customMem); + const ZSTD_DDict** oldTable = hashSet->ddictPtrTable; + size_t oldTableSize = hashSet->ddictPtrTableSize; + size_t i; + + DEBUGLOG(4, "Expanding DDict hash table! Old size: %zu new size: %zu", oldTableSize, newTableSize); + RETURN_ERROR_IF(!newTable, memory_allocation, "Expanded hashset allocation failed!"); + hashSet->ddictPtrTable = newTable; + hashSet->ddictPtrTableSize = newTableSize; + hashSet->ddictPtrCount = 0; + for (i = 0; i < oldTableSize; ++i) { + if (oldTable[i] != NULL) { + FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, oldTable[i]), ""); + } + } + ZSTD_customFree((void*)oldTable, customMem); + DEBUGLOG(4, "Finished re-hash"); + return 0; +} + +/* Fetches a DDict with the given dictID + * Returns the ZSTD_DDict* with the requested dictID. If it doesn't exist, then returns NULL. + */ +static const ZSTD_DDict* ZSTD_DDictHashSet_getDDict(ZSTD_DDictHashSet* hashSet, U32 dictID) { + size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID); + const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1; + DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx); + for (;;) { + size_t currDictID = ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]); + if (currDictID == dictID || currDictID == 0) { + /* currDictID == 0 implies a NULL ddict entry */ + break; + } else { + idx &= idxRangeMask; /* Goes to start of table when we reach the end */ + idx++; + } + } + DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx); + return hashSet->ddictPtrTable[idx]; +} + +/* Allocates space for and returns a ddict hash set + * The hash set's ZSTD_DDict* table has all values automatically set to NULL to begin with. + * Returns NULL if allocation failed. + */ +static ZSTD_DDictHashSet* ZSTD_createDDictHashSet(ZSTD_customMem customMem) { + ZSTD_DDictHashSet* ret = (ZSTD_DDictHashSet*)ZSTD_customMalloc(sizeof(ZSTD_DDictHashSet), customMem); + DEBUGLOG(4, "Allocating new hash set"); + ret->ddictPtrTable = (const ZSTD_DDict**)ZSTD_customCalloc(DDICT_HASHSET_TABLE_BASE_SIZE * sizeof(ZSTD_DDict*), customMem); + ret->ddictPtrTableSize = DDICT_HASHSET_TABLE_BASE_SIZE; + ret->ddictPtrCount = 0; + if (!ret || !ret->ddictPtrTable) { + return NULL; + } + return ret; +} + +/* Frees the table of ZSTD_DDict* within a hashset, then frees the hashset itself. + * Note: The ZSTD_DDict* within the table are NOT freed. + */ +static void ZSTD_freeDDictHashSet(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) { + DEBUGLOG(4, "Freeing ddict hash set"); + if (hashSet && hashSet->ddictPtrTable) { + ZSTD_customFree((void*)hashSet->ddictPtrTable, customMem); + } + if (hashSet) { + ZSTD_customFree(hashSet, customMem); + } +} + +/* Public function: Adds a DDict into the ZSTD_DDictHashSet, possibly triggering a resize of the hash set. + * Returns 0 on success, or a ZSTD error. + */ +static size_t ZSTD_DDictHashSet_addDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict, ZSTD_customMem customMem) { + DEBUGLOG(4, "Adding dict ID: %u to hashset with - Count: %zu Tablesize: %zu", ZSTD_getDictID_fromDDict(ddict), hashSet->ddictPtrCount, hashSet->ddictPtrTableSize); + if (hashSet->ddictPtrCount * DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT / hashSet->ddictPtrTableSize * DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT != 0) { + FORWARD_IF_ERROR(ZSTD_DDictHashSet_expand(hashSet, customMem), ""); + } + FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, ddict), ""); + return 0; +} + +/*-************************************************************* +* Context management +***************************************************************/ +size_t ZSTD_sizeof_DCtx (const ZSTD_DCtx* dctx) +{ + if (dctx==NULL) return 0; /* support sizeof NULL */ + return sizeof(*dctx) + + ZSTD_sizeof_DDict(dctx->ddictLocal) + + dctx->inBuffSize + dctx->outBuffSize; +} + +size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); } + + +static size_t ZSTD_startingInputLength(ZSTD_format_e format) +{ + size_t const startingInputLength = ZSTD_FRAMEHEADERSIZE_PREFIX(format); + /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */ + assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) ); + return startingInputLength; +} + +static void ZSTD_DCtx_resetParameters(ZSTD_DCtx* dctx) +{ + assert(dctx->streamStage == zdss_init); + dctx->format = ZSTD_f_zstd1; + dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; + dctx->outBufferMode = ZSTD_bm_buffered; + dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum; + dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict; +} + +static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) +{ + dctx->staticSize = 0; + dctx->ddict = NULL; + dctx->ddictLocal = NULL; + dctx->dictEnd = NULL; + dctx->ddictIsCold = 0; + dctx->dictUses = ZSTD_dont_use; + dctx->inBuff = NULL; + dctx->inBuffSize = 0; + dctx->outBuffSize = 0; + dctx->streamStage = zdss_init; + dctx->legacyContext = NULL; + dctx->previousLegacyVersion = 0; + dctx->noForwardProgress = 0; + dctx->oversizedDuration = 0; + dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); + dctx->ddictSet = NULL; + ZSTD_DCtx_resetParameters(dctx); +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + dctx->dictContentEndForFuzzing = NULL; +#endif +} + +ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize) +{ + ZSTD_DCtx* const dctx = (ZSTD_DCtx*) workspace; + + if ((size_t)workspace & 7) return NULL; /* 8-aligned */ + if (workspaceSize < sizeof(ZSTD_DCtx)) return NULL; /* minimum size */ + + ZSTD_initDCtx_internal(dctx); + dctx->staticSize = workspaceSize; + dctx->inBuff = (char*)(dctx+1); + return dctx; +} + +ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem) +{ + if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; + + { ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_customMalloc(sizeof(*dctx), customMem); + if (!dctx) return NULL; + dctx->customMem = customMem; + ZSTD_initDCtx_internal(dctx); + return dctx; + } +} + +ZSTD_DCtx* ZSTD_createDCtx(void) +{ + DEBUGLOG(3, "ZSTD_createDCtx"); + return ZSTD_createDCtx_advanced(ZSTD_defaultCMem); +} + +static void ZSTD_clearDict(ZSTD_DCtx* dctx) +{ + ZSTD_freeDDict(dctx->ddictLocal); + dctx->ddictLocal = NULL; + dctx->ddict = NULL; + dctx->dictUses = ZSTD_dont_use; +} + +size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx) +{ + if (dctx==NULL) return 0; /* support free on NULL */ + RETURN_ERROR_IF(dctx->staticSize, memory_allocation, "not compatible with static DCtx"); + { ZSTD_customMem const cMem = dctx->customMem; + ZSTD_clearDict(dctx); + ZSTD_customFree(dctx->inBuff, cMem); + dctx->inBuff = NULL; +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (dctx->legacyContext) + ZSTD_freeLegacyStreamContext(dctx->legacyContext, dctx->previousLegacyVersion); +#endif + if (dctx->ddictSet) { + ZSTD_freeDDictHashSet(dctx->ddictSet, cMem); + dctx->ddictSet = NULL; + } + ZSTD_customFree(dctx, cMem); + return 0; + } +} + +/* no longer useful */ +void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx) +{ + size_t const toCopy = (size_t)((char*)(&dstDCtx->inBuff) - (char*)dstDCtx); + ZSTD_memcpy(dstDCtx, srcDCtx, toCopy); /* no need to copy workspace */ +} + +/* Given a dctx with a digested frame params, re-selects the correct ZSTD_DDict based on + * the requested dict ID from the frame. If there exists a reference to the correct ZSTD_DDict, then + * accordingly sets the ddict to be used to decompress the frame. + * + * If no DDict is found, then no action is taken, and the ZSTD_DCtx::ddict remains as-is. + * + * ZSTD_d_refMultipleDDicts must be enabled for this function to be called. + */ +static void ZSTD_DCtx_selectFrameDDict(ZSTD_DCtx* dctx) { + assert(dctx->refMultipleDDicts && dctx->ddictSet); + DEBUGLOG(4, "Adjusting DDict based on requested dict ID from frame"); + if (dctx->ddict) { + const ZSTD_DDict* frameDDict = ZSTD_DDictHashSet_getDDict(dctx->ddictSet, dctx->fParams.dictID); + if (frameDDict) { + DEBUGLOG(4, "DDict found!"); + ZSTD_clearDict(dctx); + dctx->dictID = dctx->fParams.dictID; + dctx->ddict = frameDDict; + dctx->dictUses = ZSTD_use_indefinitely; + } + } +} + + +/*-************************************************************* + * Frame header decoding + ***************************************************************/ + +/*! ZSTD_isFrame() : + * Tells if the content of `buffer` starts with a valid Frame Identifier. + * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. + * Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. + * Note 3 : Skippable Frame Identifiers are considered valid. */ +unsigned ZSTD_isFrame(const void* buffer, size_t size) +{ + if (size < ZSTD_FRAMEIDSIZE) return 0; + { U32 const magic = MEM_readLE32(buffer); + if (magic == ZSTD_MAGICNUMBER) return 1; + if ((magic & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) return 1; + } +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (ZSTD_isLegacy(buffer, size)) return 1; +#endif + return 0; +} + +/** ZSTD_frameHeaderSize_internal() : + * srcSize must be large enough to reach header size fields. + * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless. + * @return : size of the Frame Header + * or an error code, which can be tested with ZSTD_isError() */ +static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format) +{ + size_t const minInputSize = ZSTD_startingInputLength(format); + RETURN_ERROR_IF(srcSize < minInputSize, srcSize_wrong, ""); + + { BYTE const fhd = ((const BYTE*)src)[minInputSize-1]; + U32 const dictID= fhd & 3; + U32 const singleSegment = (fhd >> 5) & 1; + U32 const fcsId = fhd >> 6; + return minInputSize + !singleSegment + + ZSTD_did_fieldSize[dictID] + ZSTD_fcs_fieldSize[fcsId] + + (singleSegment && !fcsId); + } +} + +/** ZSTD_frameHeaderSize() : + * srcSize must be >= ZSTD_frameHeaderSize_prefix. + * @return : size of the Frame Header, + * or an error code (if srcSize is too small) */ +size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize) +{ + return ZSTD_frameHeaderSize_internal(src, srcSize, ZSTD_f_zstd1); +} + + +/** ZSTD_getFrameHeader_advanced() : + * decode Frame Header, or require larger `srcSize`. + * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless + * @return : 0, `zfhPtr` is correctly filled, + * >0, `srcSize` is too small, value is wanted `srcSize` amount, + * or an error code, which can be tested using ZSTD_isError() */ +size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format) +{ + const BYTE* ip = (const BYTE*)src; + size_t const minInputSize = ZSTD_startingInputLength(format); + + ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */ + if (srcSize < minInputSize) return minInputSize; + RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter"); + + if ( (format != ZSTD_f_zstd1_magicless) + && (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) { + if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { + /* skippable frame */ + if (srcSize < ZSTD_SKIPPABLEHEADERSIZE) + return ZSTD_SKIPPABLEHEADERSIZE; /* magic number + frame length */ + ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr)); + zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_FRAMEIDSIZE); + zfhPtr->frameType = ZSTD_skippableFrame; + return 0; + } + RETURN_ERROR(prefix_unknown, ""); + } + + /* ensure there is enough `srcSize` to fully read/decode frame header */ + { size_t const fhsize = ZSTD_frameHeaderSize_internal(src, srcSize, format); + if (srcSize < fhsize) return fhsize; + zfhPtr->headerSize = (U32)fhsize; + } + + { BYTE const fhdByte = ip[minInputSize-1]; + size_t pos = minInputSize; + U32 const dictIDSizeCode = fhdByte&3; + U32 const checksumFlag = (fhdByte>>2)&1; + U32 const singleSegment = (fhdByte>>5)&1; + U32 const fcsID = fhdByte>>6; + U64 windowSize = 0; + U32 dictID = 0; + U64 frameContentSize = ZSTD_CONTENTSIZE_UNKNOWN; + RETURN_ERROR_IF((fhdByte & 0x08) != 0, frameParameter_unsupported, + "reserved bits, must be zero"); + + if (!singleSegment) { + BYTE const wlByte = ip[pos++]; + U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN; + RETURN_ERROR_IF(windowLog > ZSTD_WINDOWLOG_MAX, frameParameter_windowTooLarge, ""); + windowSize = (1ULL << windowLog); + windowSize += (windowSize >> 3) * (wlByte&7); + } + switch(dictIDSizeCode) + { + default: assert(0); /* impossible */ + case 0 : break; + case 1 : dictID = ip[pos]; pos++; break; + case 2 : dictID = MEM_readLE16(ip+pos); pos+=2; break; + case 3 : dictID = MEM_readLE32(ip+pos); pos+=4; break; + } + switch(fcsID) + { + default: assert(0); /* impossible */ + case 0 : if (singleSegment) frameContentSize = ip[pos]; break; + case 1 : frameContentSize = MEM_readLE16(ip+pos)+256; break; + case 2 : frameContentSize = MEM_readLE32(ip+pos); break; + case 3 : frameContentSize = MEM_readLE64(ip+pos); break; + } + if (singleSegment) windowSize = frameContentSize; + + zfhPtr->frameType = ZSTD_frame; + zfhPtr->frameContentSize = frameContentSize; + zfhPtr->windowSize = windowSize; + zfhPtr->blockSizeMax = (unsigned) MIN(windowSize, ZSTD_BLOCKSIZE_MAX); + zfhPtr->dictID = dictID; + zfhPtr->checksumFlag = checksumFlag; + } + return 0; +} + +/** ZSTD_getFrameHeader() : + * decode Frame Header, or require larger `srcSize`. + * note : this function does not consume input, it only reads it. + * @return : 0, `zfhPtr` is correctly filled, + * >0, `srcSize` is too small, value is wanted `srcSize` amount, + * or an error code, which can be tested using ZSTD_isError() */ +size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize) +{ + return ZSTD_getFrameHeader_advanced(zfhPtr, src, srcSize, ZSTD_f_zstd1); +} + + +/** ZSTD_getFrameContentSize() : + * compatible with legacy mode + * @return : decompressed size of the single frame pointed to be `src` if known, otherwise + * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined + * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */ +unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize) +{ +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (ZSTD_isLegacy(src, srcSize)) { + unsigned long long const ret = ZSTD_getDecompressedSize_legacy(src, srcSize); + return ret == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : ret; + } +#endif + { ZSTD_frameHeader zfh; + if (ZSTD_getFrameHeader(&zfh, src, srcSize) != 0) + return ZSTD_CONTENTSIZE_ERROR; + if (zfh.frameType == ZSTD_skippableFrame) { + return 0; + } else { + return zfh.frameContentSize; + } } +} + +static size_t readSkippableFrameSize(void const* src, size_t srcSize) +{ + size_t const skippableHeaderSize = ZSTD_SKIPPABLEHEADERSIZE; + U32 sizeU32; + + RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong, ""); + + sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE); + RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32, + frameParameter_unsupported, ""); + { + size_t const skippableSize = skippableHeaderSize + sizeU32; + RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong, ""); + return skippableSize; + } +} + +/** ZSTD_findDecompressedSize() : + * compatible with legacy mode + * `srcSize` must be the exact length of some number of ZSTD compressed and/or + * skippable frames + * @return : decompressed size of the frames contained */ +unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize) +{ + unsigned long long totalDstSize = 0; + + while (srcSize >= ZSTD_startingInputLength(ZSTD_f_zstd1)) { + U32 const magicNumber = MEM_readLE32(src); + + if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { + size_t const skippableSize = readSkippableFrameSize(src, srcSize); + if (ZSTD_isError(skippableSize)) { + return ZSTD_CONTENTSIZE_ERROR; + } + assert(skippableSize <= srcSize); + + src = (const BYTE *)src + skippableSize; + srcSize -= skippableSize; + continue; + } + + { unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize); + if (ret >= ZSTD_CONTENTSIZE_ERROR) return ret; + + /* check for overflow */ + if (totalDstSize + ret < totalDstSize) return ZSTD_CONTENTSIZE_ERROR; + totalDstSize += ret; + } + { size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize); + if (ZSTD_isError(frameSrcSize)) { + return ZSTD_CONTENTSIZE_ERROR; + } + + src = (const BYTE *)src + frameSrcSize; + srcSize -= frameSrcSize; + } + } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */ + + if (srcSize) return ZSTD_CONTENTSIZE_ERROR; + + return totalDstSize; +} + +/** ZSTD_getDecompressedSize() : + * compatible with legacy mode + * @return : decompressed size if known, 0 otherwise + note : 0 can mean any of the following : + - frame content is empty + - decompressed size field is not present in frame header + - frame header unknown / not supported + - frame header not complete (`srcSize` too small) */ +unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize) +{ + unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize); + ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_ERROR < ZSTD_CONTENTSIZE_UNKNOWN); + return (ret >= ZSTD_CONTENTSIZE_ERROR) ? 0 : ret; +} + + +/** ZSTD_decodeFrameHeader() : + * `headerSize` must be the size provided by ZSTD_frameHeaderSize(). + * If multiple DDict references are enabled, also will choose the correct DDict to use. + * @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */ +static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize) +{ + size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format); + if (ZSTD_isError(result)) return result; /* invalid header */ + RETURN_ERROR_IF(result>0, srcSize_wrong, "headerSize too small"); + + /* Reference DDict requested by frame if dctx references multiple ddicts */ + if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts && dctx->ddictSet) { + ZSTD_DCtx_selectFrameDDict(dctx); + } + +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + /* Skip the dictID check in fuzzing mode, because it makes the search + * harder. + */ + RETURN_ERROR_IF(dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID), + dictionary_wrong, ""); +#endif + dctx->validateChecksum = (dctx->fParams.checksumFlag && !dctx->forceIgnoreChecksum) ? 1 : 0; + if (dctx->validateChecksum) XXH64_reset(&dctx->xxhState, 0); + dctx->processedCSize += headerSize; + return 0; +} + +static ZSTD_frameSizeInfo ZSTD_errorFrameSizeInfo(size_t ret) +{ + ZSTD_frameSizeInfo frameSizeInfo; + frameSizeInfo.compressedSize = ret; + frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR; + return frameSizeInfo; +} + +static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize) +{ + ZSTD_frameSizeInfo frameSizeInfo; + ZSTD_memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo)); + +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (ZSTD_isLegacy(src, srcSize)) + return ZSTD_findFrameSizeInfoLegacy(src, srcSize); +#endif + + if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE) + && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { + frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize); + assert(ZSTD_isError(frameSizeInfo.compressedSize) || + frameSizeInfo.compressedSize <= srcSize); + return frameSizeInfo; + } else { + const BYTE* ip = (const BYTE*)src; + const BYTE* const ipstart = ip; + size_t remainingSize = srcSize; + size_t nbBlocks = 0; + ZSTD_frameHeader zfh; + + /* Extract Frame Header */ + { size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize); + if (ZSTD_isError(ret)) + return ZSTD_errorFrameSizeInfo(ret); + if (ret > 0) + return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); + } + + ip += zfh.headerSize; + remainingSize -= zfh.headerSize; + + /* Iterate over each block */ + while (1) { + blockProperties_t blockProperties; + size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties); + if (ZSTD_isError(cBlockSize)) + return ZSTD_errorFrameSizeInfo(cBlockSize); + + if (ZSTD_blockHeaderSize + cBlockSize > remainingSize) + return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); + + ip += ZSTD_blockHeaderSize + cBlockSize; + remainingSize -= ZSTD_blockHeaderSize + cBlockSize; + nbBlocks++; + + if (blockProperties.lastBlock) break; + } + + /* Final frame content checksum */ + if (zfh.checksumFlag) { + if (remainingSize < 4) + return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); + ip += 4; + } + + frameSizeInfo.compressedSize = (size_t)(ip - ipstart); + frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) + ? zfh.frameContentSize + : nbBlocks * zfh.blockSizeMax; + return frameSizeInfo; + } +} + +/** ZSTD_findFrameCompressedSize() : + * compatible with legacy mode + * `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame + * `srcSize` must be at least as large as the frame contained + * @return : the compressed size of the frame starting at `src` */ +size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize) +{ + ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize); + return frameSizeInfo.compressedSize; +} + +/** ZSTD_decompressBound() : + * compatible with legacy mode + * `src` must point to the start of a ZSTD frame or a skippeable frame + * `srcSize` must be at least as large as the frame contained + * @return : the maximum decompressed size of the compressed source + */ +unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize) +{ + unsigned long long bound = 0; + /* Iterate over each frame */ + while (srcSize > 0) { + ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize); + size_t const compressedSize = frameSizeInfo.compressedSize; + unsigned long long const decompressedBound = frameSizeInfo.decompressedBound; + if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR) + return ZSTD_CONTENTSIZE_ERROR; + assert(srcSize >= compressedSize); + src = (const BYTE*)src + compressedSize; + srcSize -= compressedSize; + bound += decompressedBound; + } + return bound; +} + + +/*-************************************************************* + * Frame decoding + ***************************************************************/ + +/** ZSTD_insertBlock() : + * insert `src` block into `dctx` history. Useful to track uncompressed blocks. */ +size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize) +{ + DEBUGLOG(5, "ZSTD_insertBlock: %u bytes", (unsigned)blockSize); + ZSTD_checkContinuity(dctx, blockStart, blockSize); + dctx->previousDstEnd = (const char*)blockStart + blockSize; + return blockSize; +} + + +static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_copyRawBlock"); + RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall, ""); + if (dst == NULL) { + if (srcSize == 0) return 0; + RETURN_ERROR(dstBuffer_null, ""); + } + ZSTD_memcpy(dst, src, srcSize); + return srcSize; +} + +static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity, + BYTE b, + size_t regenSize) +{ + RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall, ""); + if (dst == NULL) { + if (regenSize == 0) return 0; + RETURN_ERROR(dstBuffer_null, ""); + } + ZSTD_memset(dst, b, regenSize); + return regenSize; +} + +static void ZSTD_DCtx_trace_end(ZSTD_DCtx const* dctx, U64 uncompressedSize, U64 compressedSize, unsigned streaming) +{ +#if ZSTD_TRACE + if (dctx->traceCtx) { + ZSTD_Trace trace; + ZSTD_memset(&trace, 0, sizeof(trace)); + trace.version = ZSTD_VERSION_NUMBER; + trace.streaming = streaming; + if (dctx->ddict) { + trace.dictionaryID = ZSTD_getDictID_fromDDict(dctx->ddict); + trace.dictionarySize = ZSTD_DDict_dictSize(dctx->ddict); + trace.dictionaryIsCold = dctx->ddictIsCold; + } + trace.uncompressedSize = (size_t)uncompressedSize; + trace.compressedSize = (size_t)compressedSize; + trace.dctx = dctx; + ZSTD_trace_decompress_end(dctx->traceCtx, &trace); + } +#else + (void)dctx; + (void)uncompressedSize; + (void)compressedSize; + (void)streaming; +#endif +} + + +/*! ZSTD_decompressFrame() : + * @dctx must be properly initialized + * will update *srcPtr and *srcSizePtr, + * to make *srcPtr progress by one frame. */ +static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void** srcPtr, size_t *srcSizePtr) +{ + const BYTE* const istart = (const BYTE*)(*srcPtr); + const BYTE* ip = istart; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = dstCapacity != 0 ? ostart + dstCapacity : ostart; + BYTE* op = ostart; + size_t remainingSrcSize = *srcSizePtr; + + DEBUGLOG(4, "ZSTD_decompressFrame (srcSize:%i)", (int)*srcSizePtr); + + /* check */ + RETURN_ERROR_IF( + remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN(dctx->format)+ZSTD_blockHeaderSize, + srcSize_wrong, ""); + + /* Frame Header */ + { size_t const frameHeaderSize = ZSTD_frameHeaderSize_internal( + ip, ZSTD_FRAMEHEADERSIZE_PREFIX(dctx->format), dctx->format); + if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize; + RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize, + srcSize_wrong, ""); + FORWARD_IF_ERROR( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) , ""); + ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize; + } + + /* Loop on each block */ + while (1) { + size_t decodedSize; + blockProperties_t blockProperties; + size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSrcSize, &blockProperties); + if (ZSTD_isError(cBlockSize)) return cBlockSize; + + ip += ZSTD_blockHeaderSize; + remainingSrcSize -= ZSTD_blockHeaderSize; + RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong, ""); + + switch(blockProperties.blockType) + { + case bt_compressed: + decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oend-op), ip, cBlockSize, /* frame */ 1); + break; + case bt_raw : + decodedSize = ZSTD_copyRawBlock(op, (size_t)(oend-op), ip, cBlockSize); + break; + case bt_rle : + decodedSize = ZSTD_setRleBlock(op, (size_t)(oend-op), *ip, blockProperties.origSize); + break; + case bt_reserved : + default: + RETURN_ERROR(corruption_detected, "invalid block type"); + } + + if (ZSTD_isError(decodedSize)) return decodedSize; + if (dctx->validateChecksum) + XXH64_update(&dctx->xxhState, op, decodedSize); + if (decodedSize != 0) + op += decodedSize; + assert(ip != NULL); + ip += cBlockSize; + remainingSrcSize -= cBlockSize; + if (blockProperties.lastBlock) break; + } + + if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) { + RETURN_ERROR_IF((U64)(op-ostart) != dctx->fParams.frameContentSize, + corruption_detected, ""); + } + if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */ + RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong, ""); + if (!dctx->forceIgnoreChecksum) { + U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState); + U32 checkRead; + checkRead = MEM_readLE32(ip); + RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong, ""); + } + ip += 4; + remainingSrcSize -= 4; + } + ZSTD_DCtx_trace_end(dctx, (U64)(op-ostart), (U64)(ip-istart), /* streaming */ 0); + /* Allow caller to get size read */ + *srcPtr = ip; + *srcSizePtr = remainingSrcSize; + return (size_t)(op-ostart); +} + +static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize, + const ZSTD_DDict* ddict) +{ + void* const dststart = dst; + int moreThan1Frame = 0; + + DEBUGLOG(5, "ZSTD_decompressMultiFrame"); + assert(dict==NULL || ddict==NULL); /* either dict or ddict set, not both */ + + if (ddict) { + dict = ZSTD_DDict_dictContent(ddict); + dictSize = ZSTD_DDict_dictSize(ddict); + } + + while (srcSize >= ZSTD_startingInputLength(dctx->format)) { + +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (ZSTD_isLegacy(src, srcSize)) { + size_t decodedSize; + size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize); + if (ZSTD_isError(frameSize)) return frameSize; + RETURN_ERROR_IF(dctx->staticSize, memory_allocation, + "legacy support is not compatible with static dctx"); + + decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize); + if (ZSTD_isError(decodedSize)) return decodedSize; + + assert(decodedSize <= dstCapacity); + dst = (BYTE*)dst + decodedSize; + dstCapacity -= decodedSize; + + src = (const BYTE*)src + frameSize; + srcSize -= frameSize; + + continue; + } +#endif + + { U32 const magicNumber = MEM_readLE32(src); + DEBUGLOG(4, "reading magic number %08X (expecting %08X)", + (unsigned)magicNumber, ZSTD_MAGICNUMBER); + if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { + size_t const skippableSize = readSkippableFrameSize(src, srcSize); + FORWARD_IF_ERROR(skippableSize, "readSkippableFrameSize failed"); + assert(skippableSize <= srcSize); + + src = (const BYTE *)src + skippableSize; + srcSize -= skippableSize; + continue; + } } + + if (ddict) { + /* we were called from ZSTD_decompress_usingDDict */ + FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(dctx, ddict), ""); + } else { + /* this will initialize correctly with no dict if dict == NULL, so + * use this in all cases but ddict */ + FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize), ""); + } + ZSTD_checkContinuity(dctx, dst, dstCapacity); + + { const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity, + &src, &srcSize); + RETURN_ERROR_IF( + (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown) + && (moreThan1Frame==1), + srcSize_wrong, + "At least one frame successfully completed, " + "but following bytes are garbage: " + "it's more likely to be a srcSize error, " + "specifying more input bytes than size of frame(s). " + "Note: one could be unlucky, it might be a corruption error instead, " + "happening right at the place where we expect zstd magic bytes. " + "But this is _much_ less likely than a srcSize field error."); + if (ZSTD_isError(res)) return res; + assert(res <= dstCapacity); + if (res != 0) + dst = (BYTE*)dst + res; + dstCapacity -= res; + } + moreThan1Frame = 1; + } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */ + + RETURN_ERROR_IF(srcSize, srcSize_wrong, "input not entirely consumed"); + + return (size_t)((BYTE*)dst - (BYTE*)dststart); +} + +size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize) +{ + return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, dict, dictSize, NULL); +} + + +static ZSTD_DDict const* ZSTD_getDDict(ZSTD_DCtx* dctx) +{ + switch (dctx->dictUses) { + default: + assert(0 /* Impossible */); + /* fall-through */ + case ZSTD_dont_use: + ZSTD_clearDict(dctx); + return NULL; + case ZSTD_use_indefinitely: + return dctx->ddict; + case ZSTD_use_once: + dctx->dictUses = ZSTD_dont_use; + return dctx->ddict; + } +} + +size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + return ZSTD_decompress_usingDDict(dctx, dst, dstCapacity, src, srcSize, ZSTD_getDDict(dctx)); +} + + +size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ +#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1) + size_t regenSize; + ZSTD_DCtx* const dctx = ZSTD_createDCtx(); + RETURN_ERROR_IF(dctx==NULL, memory_allocation, "NULL pointer!"); + regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize); + ZSTD_freeDCtx(dctx); + return regenSize; +#else /* stack mode */ + ZSTD_DCtx dctx; + ZSTD_initDCtx_internal(&dctx); + return ZSTD_decompressDCtx(&dctx, dst, dstCapacity, src, srcSize); +#endif +} + + +/*-************************************** +* Advanced Streaming Decompression API +* Bufferless and synchronous +****************************************/ +size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; } + +/** + * Similar to ZSTD_nextSrcSizeToDecompress(), but when when a block input can be streamed, + * we allow taking a partial block as the input. Currently only raw uncompressed blocks can + * be streamed. + * + * For blocks that can be streamed, this allows us to reduce the latency until we produce + * output, and avoid copying the input. + * + * @param inputSize - The total amount of input that the caller currently has. + */ +static size_t ZSTD_nextSrcSizeToDecompressWithInputSize(ZSTD_DCtx* dctx, size_t inputSize) { + if (!(dctx->stage == ZSTDds_decompressBlock || dctx->stage == ZSTDds_decompressLastBlock)) + return dctx->expected; + if (dctx->bType != bt_raw) + return dctx->expected; + return MIN(MAX(inputSize, 1), dctx->expected); +} + +ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) { + switch(dctx->stage) + { + default: /* should not happen */ + assert(0); + case ZSTDds_getFrameHeaderSize: + case ZSTDds_decodeFrameHeader: + return ZSTDnit_frameHeader; + case ZSTDds_decodeBlockHeader: + return ZSTDnit_blockHeader; + case ZSTDds_decompressBlock: + return ZSTDnit_block; + case ZSTDds_decompressLastBlock: + return ZSTDnit_lastBlock; + case ZSTDds_checkChecksum: + return ZSTDnit_checksum; + case ZSTDds_decodeSkippableHeader: + case ZSTDds_skipFrame: + return ZSTDnit_skippableFrame; + } +} + +static int ZSTD_isSkipFrame(ZSTD_DCtx* dctx) { return dctx->stage == ZSTDds_skipFrame; } + +/** ZSTD_decompressContinue() : + * srcSize : must be the exact nb of bytes expected (see ZSTD_nextSrcSizeToDecompress()) + * @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity) + * or an error code, which can be tested using ZSTD_isError() */ +size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (unsigned)srcSize); + /* Sanity check */ + RETURN_ERROR_IF(srcSize != ZSTD_nextSrcSizeToDecompressWithInputSize(dctx, srcSize), srcSize_wrong, "not allowed"); + ZSTD_checkContinuity(dctx, dst, dstCapacity); + + dctx->processedCSize += srcSize; + + switch (dctx->stage) + { + case ZSTDds_getFrameHeaderSize : + assert(src != NULL); + if (dctx->format == ZSTD_f_zstd1) { /* allows header */ + assert(srcSize >= ZSTD_FRAMEIDSIZE); /* to read skippable magic number */ + if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ + ZSTD_memcpy(dctx->headerBuffer, src, srcSize); + dctx->expected = ZSTD_SKIPPABLEHEADERSIZE - srcSize; /* remaining to load to get full skippable frame header */ + dctx->stage = ZSTDds_decodeSkippableHeader; + return 0; + } } + dctx->headerSize = ZSTD_frameHeaderSize_internal(src, srcSize, dctx->format); + if (ZSTD_isError(dctx->headerSize)) return dctx->headerSize; + ZSTD_memcpy(dctx->headerBuffer, src, srcSize); + dctx->expected = dctx->headerSize - srcSize; + dctx->stage = ZSTDds_decodeFrameHeader; + return 0; + + case ZSTDds_decodeFrameHeader: + assert(src != NULL); + ZSTD_memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize); + FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize), ""); + dctx->expected = ZSTD_blockHeaderSize; + dctx->stage = ZSTDds_decodeBlockHeader; + return 0; + + case ZSTDds_decodeBlockHeader: + { blockProperties_t bp; + size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp); + if (ZSTD_isError(cBlockSize)) return cBlockSize; + RETURN_ERROR_IF(cBlockSize > dctx->fParams.blockSizeMax, corruption_detected, "Block Size Exceeds Maximum"); + dctx->expected = cBlockSize; + dctx->bType = bp.blockType; + dctx->rleSize = bp.origSize; + if (cBlockSize) { + dctx->stage = bp.lastBlock ? ZSTDds_decompressLastBlock : ZSTDds_decompressBlock; + return 0; + } + /* empty block */ + if (bp.lastBlock) { + if (dctx->fParams.checksumFlag) { + dctx->expected = 4; + dctx->stage = ZSTDds_checkChecksum; + } else { + dctx->expected = 0; /* end of frame */ + dctx->stage = ZSTDds_getFrameHeaderSize; + } + } else { + dctx->expected = ZSTD_blockHeaderSize; /* jump to next header */ + dctx->stage = ZSTDds_decodeBlockHeader; + } + return 0; + } + + case ZSTDds_decompressLastBlock: + case ZSTDds_decompressBlock: + DEBUGLOG(5, "ZSTD_decompressContinue: case ZSTDds_decompressBlock"); + { size_t rSize; + switch(dctx->bType) + { + case bt_compressed: + DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed"); + rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1); + dctx->expected = 0; /* Streaming not supported */ + break; + case bt_raw : + assert(srcSize <= dctx->expected); + rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize); + FORWARD_IF_ERROR(rSize, "ZSTD_copyRawBlock failed"); + assert(rSize == srcSize); + dctx->expected -= rSize; + break; + case bt_rle : + rSize = ZSTD_setRleBlock(dst, dstCapacity, *(const BYTE*)src, dctx->rleSize); + dctx->expected = 0; /* Streaming not supported */ + break; + case bt_reserved : /* should never happen */ + default: + RETURN_ERROR(corruption_detected, "invalid block type"); + } + FORWARD_IF_ERROR(rSize, ""); + RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum"); + DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize); + dctx->decodedSize += rSize; + if (dctx->validateChecksum) XXH64_update(&dctx->xxhState, dst, rSize); + dctx->previousDstEnd = (char*)dst + rSize; + + /* Stay on the same stage until we are finished streaming the block. */ + if (dctx->expected > 0) { + return rSize; + } + + if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */ + DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u", (unsigned)dctx->decodedSize); + RETURN_ERROR_IF( + dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN + && dctx->decodedSize != dctx->fParams.frameContentSize, + corruption_detected, ""); + if (dctx->fParams.checksumFlag) { /* another round for frame checksum */ + dctx->expected = 4; + dctx->stage = ZSTDds_checkChecksum; + } else { + ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize, /* streaming */ 1); + dctx->expected = 0; /* ends here */ + dctx->stage = ZSTDds_getFrameHeaderSize; + } + } else { + dctx->stage = ZSTDds_decodeBlockHeader; + dctx->expected = ZSTD_blockHeaderSize; + } + return rSize; + } + + case ZSTDds_checkChecksum: + assert(srcSize == 4); /* guaranteed by dctx->expected */ + { + if (dctx->validateChecksum) { + U32 const h32 = (U32)XXH64_digest(&dctx->xxhState); + U32 const check32 = MEM_readLE32(src); + DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32); + RETURN_ERROR_IF(check32 != h32, checksum_wrong, ""); + } + ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize, /* streaming */ 1); + dctx->expected = 0; + dctx->stage = ZSTDds_getFrameHeaderSize; + return 0; + } + + case ZSTDds_decodeSkippableHeader: + assert(src != NULL); + assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE); + ZSTD_memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize); /* complete skippable header */ + dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE); /* note : dctx->expected can grow seriously large, beyond local buffer size */ + dctx->stage = ZSTDds_skipFrame; + return 0; + + case ZSTDds_skipFrame: + dctx->expected = 0; + dctx->stage = ZSTDds_getFrameHeaderSize; + return 0; + + default: + assert(0); /* impossible */ + RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */ + } +} + + +static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + dctx->dictEnd = dctx->previousDstEnd; + dctx->virtualStart = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart)); + dctx->prefixStart = dict; + dctx->previousDstEnd = (const char*)dict + dictSize; +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + dctx->dictContentBeginForFuzzing = dctx->prefixStart; + dctx->dictContentEndForFuzzing = dctx->previousDstEnd; +#endif + return 0; +} + +/*! ZSTD_loadDEntropy() : + * dict : must point at beginning of a valid zstd dictionary. + * @return : size of entropy tables read */ +size_t +ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, + const void* const dict, size_t const dictSize) +{ + const BYTE* dictPtr = (const BYTE*)dict; + const BYTE* const dictEnd = dictPtr + dictSize; + + RETURN_ERROR_IF(dictSize <= 8, dictionary_corrupted, "dict is too small"); + assert(MEM_readLE32(dict) == ZSTD_MAGIC_DICTIONARY); /* dict must be valid */ + dictPtr += 8; /* skip header = magic + dictID */ + + ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, OFTable) == offsetof(ZSTD_entropyDTables_t, LLTable) + sizeof(entropy->LLTable)); + ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, MLTable) == offsetof(ZSTD_entropyDTables_t, OFTable) + sizeof(entropy->OFTable)); + ZSTD_STATIC_ASSERT(sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable) >= HUF_DECOMPRESS_WORKSPACE_SIZE); + { void* const workspace = &entropy->LLTable; /* use fse tables as temporary workspace; implies fse tables are grouped together */ + size_t const workspaceSize = sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable); +#ifdef HUF_FORCE_DECOMPRESS_X1 + /* in minimal huffman, we always use X1 variants */ + size_t const hSize = HUF_readDTableX1_wksp(entropy->hufTable, + dictPtr, dictEnd - dictPtr, + workspace, workspaceSize); +#else + size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable, + dictPtr, (size_t)(dictEnd - dictPtr), + workspace, workspaceSize); +#endif + RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted, ""); + dictPtr += hSize; + } + + { short offcodeNCount[MaxOff+1]; + unsigned offcodeMaxValue = MaxOff, offcodeLog; + size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, (size_t)(dictEnd-dictPtr)); + RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(offcodeMaxValue > MaxOff, dictionary_corrupted, ""); + RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, ""); + ZSTD_buildFSETable( entropy->OFTable, + offcodeNCount, offcodeMaxValue, + OF_base, OF_bits, + offcodeLog, + entropy->workspace, sizeof(entropy->workspace), + /* bmi2 */0); + dictPtr += offcodeHeaderSize; + } + + { short matchlengthNCount[MaxML+1]; + unsigned matchlengthMaxValue = MaxML, matchlengthLog; + size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, (size_t)(dictEnd-dictPtr)); + RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(matchlengthMaxValue > MaxML, dictionary_corrupted, ""); + RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, ""); + ZSTD_buildFSETable( entropy->MLTable, + matchlengthNCount, matchlengthMaxValue, + ML_base, ML_bits, + matchlengthLog, + entropy->workspace, sizeof(entropy->workspace), + /* bmi2 */ 0); + dictPtr += matchlengthHeaderSize; + } + + { short litlengthNCount[MaxLL+1]; + unsigned litlengthMaxValue = MaxLL, litlengthLog; + size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, (size_t)(dictEnd-dictPtr)); + RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(litlengthMaxValue > MaxLL, dictionary_corrupted, ""); + RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, ""); + ZSTD_buildFSETable( entropy->LLTable, + litlengthNCount, litlengthMaxValue, + LL_base, LL_bits, + litlengthLog, + entropy->workspace, sizeof(entropy->workspace), + /* bmi2 */ 0); + dictPtr += litlengthHeaderSize; + } + + RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, ""); + { int i; + size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12)); + for (i=0; i<3; i++) { + U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4; + RETURN_ERROR_IF(rep==0 || rep > dictContentSize, + dictionary_corrupted, ""); + entropy->rep[i] = rep; + } } + + return (size_t)(dictPtr - (const BYTE*)dict); +} + +static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + if (dictSize < 8) return ZSTD_refDictContent(dctx, dict, dictSize); + { U32 const magic = MEM_readLE32(dict); + if (magic != ZSTD_MAGIC_DICTIONARY) { + return ZSTD_refDictContent(dctx, dict, dictSize); /* pure content mode */ + } } + dctx->dictID = MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE); + + /* load entropy tables */ + { size_t const eSize = ZSTD_loadDEntropy(&dctx->entropy, dict, dictSize); + RETURN_ERROR_IF(ZSTD_isError(eSize), dictionary_corrupted, ""); + dict = (const char*)dict + eSize; + dictSize -= eSize; + } + dctx->litEntropy = dctx->fseEntropy = 1; + + /* reference dictionary content */ + return ZSTD_refDictContent(dctx, dict, dictSize); +} + +size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) +{ + assert(dctx != NULL); +#if ZSTD_TRACE + dctx->traceCtx = ZSTD_trace_decompress_begin(dctx); +#endif + dctx->expected = ZSTD_startingInputLength(dctx->format); /* dctx->format must be properly set */ + dctx->stage = ZSTDds_getFrameHeaderSize; + dctx->processedCSize = 0; + dctx->decodedSize = 0; + dctx->previousDstEnd = NULL; + dctx->prefixStart = NULL; + dctx->virtualStart = NULL; + dctx->dictEnd = NULL; + dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ + dctx->litEntropy = dctx->fseEntropy = 0; + dctx->dictID = 0; + dctx->bType = bt_reserved; + ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue)); + ZSTD_memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */ + dctx->LLTptr = dctx->entropy.LLTable; + dctx->MLTptr = dctx->entropy.MLTable; + dctx->OFTptr = dctx->entropy.OFTable; + dctx->HUFptr = dctx->entropy.hufTable; + return 0; +} + +size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , ""); + if (dict && dictSize) + RETURN_ERROR_IF( + ZSTD_isError(ZSTD_decompress_insertDictionary(dctx, dict, dictSize)), + dictionary_corrupted, ""); + return 0; +} + + +/* ====== ZSTD_DDict ====== */ + +size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) +{ + DEBUGLOG(4, "ZSTD_decompressBegin_usingDDict"); + assert(dctx != NULL); + if (ddict) { + const char* const dictStart = (const char*)ZSTD_DDict_dictContent(ddict); + size_t const dictSize = ZSTD_DDict_dictSize(ddict); + const void* const dictEnd = dictStart + dictSize; + dctx->ddictIsCold = (dctx->dictEnd != dictEnd); + DEBUGLOG(4, "DDict is %s", + dctx->ddictIsCold ? "~cold~" : "hot!"); + } + FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , ""); + if (ddict) { /* NULL ddict is equivalent to no dictionary */ + ZSTD_copyDDictParameters(dctx, ddict); + } + return 0; +} + +/*! ZSTD_getDictID_fromDict() : + * Provides the dictID stored within dictionary. + * if @return == 0, the dictionary is not conformant with Zstandard specification. + * It can still be loaded, but as a content-only dictionary. */ +unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize) +{ + if (dictSize < 8) return 0; + if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) return 0; + return MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE); +} + +/*! ZSTD_getDictID_fromFrame() : + * Provides the dictID required to decompress frame stored within `src`. + * If @return == 0, the dictID could not be decoded. + * This could for one of the following reasons : + * - The frame does not require a dictionary (most common case). + * - The frame was built with dictID intentionally removed. + * Needed dictionary is a hidden information. + * Note : this use case also happens when using a non-conformant dictionary. + * - `srcSize` is too small, and as a result, frame header could not be decoded. + * Note : possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`. + * - This is not a Zstandard frame. + * When identifying the exact failure cause, it's possible to use + * ZSTD_getFrameHeader(), which will provide a more precise error code. */ +unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize) +{ + ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0 }; + size_t const hError = ZSTD_getFrameHeader(&zfp, src, srcSize); + if (ZSTD_isError(hError)) return 0; + return zfp.dictID; +} + + +/*! ZSTD_decompress_usingDDict() : +* Decompression using a pre-digested Dictionary +* Use dictionary without significant overhead. */ +size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_DDict* ddict) +{ + /* pass content and size in case legacy frames are encountered */ + return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, + NULL, 0, + ddict); +} + + +/*===================================== +* Streaming decompression +*====================================*/ + +ZSTD_DStream* ZSTD_createDStream(void) +{ + DEBUGLOG(3, "ZSTD_createDStream"); + return ZSTD_createDStream_advanced(ZSTD_defaultCMem); +} + +ZSTD_DStream* ZSTD_initStaticDStream(void *workspace, size_t workspaceSize) +{ + return ZSTD_initStaticDCtx(workspace, workspaceSize); +} + +ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem) +{ + return ZSTD_createDCtx_advanced(customMem); +} + +size_t ZSTD_freeDStream(ZSTD_DStream* zds) +{ + return ZSTD_freeDCtx(zds); +} + + +/* *** Initialization *** */ + +size_t ZSTD_DStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize; } +size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_MAX; } + +size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType) +{ + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); + ZSTD_clearDict(dctx); + if (dict && dictSize != 0) { + dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem); + RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation, "NULL pointer!"); + dctx->ddict = dctx->ddictLocal; + dctx->dictUses = ZSTD_use_indefinitely; + } + return 0; +} + +size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto); +} + +size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto); +} + +size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) +{ + FORWARD_IF_ERROR(ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType), ""); + dctx->dictUses = ZSTD_use_once; + return 0; +} + +size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize) +{ + return ZSTD_DCtx_refPrefix_advanced(dctx, prefix, prefixSize, ZSTD_dct_rawContent); +} + + +/* ZSTD_initDStream_usingDict() : + * return : expected size, aka ZSTD_startingInputLength(). + * this function cannot fail */ +size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize) +{ + DEBUGLOG(4, "ZSTD_initDStream_usingDict"); + FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) , ""); + return ZSTD_startingInputLength(zds->format); +} + +/* note : this variant can't fail */ +size_t ZSTD_initDStream(ZSTD_DStream* zds) +{ + DEBUGLOG(4, "ZSTD_initDStream"); + return ZSTD_initDStream_usingDDict(zds, NULL); +} + +/* ZSTD_initDStream_usingDDict() : + * ddict will just be referenced, and must outlive decompression session + * this function cannot fail */ +size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict) +{ + FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) , ""); + return ZSTD_startingInputLength(dctx->format); +} + +/* ZSTD_resetDStream() : + * return : expected size, aka ZSTD_startingInputLength(). + * this function cannot fail */ +size_t ZSTD_resetDStream(ZSTD_DStream* dctx) +{ + FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only), ""); + return ZSTD_startingInputLength(dctx->format); +} + + +size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) +{ + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); + ZSTD_clearDict(dctx); + if (ddict) { + dctx->ddict = ddict; + dctx->dictUses = ZSTD_use_indefinitely; + if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts) { + if (dctx->ddictSet == NULL) { + dctx->ddictSet = ZSTD_createDDictHashSet(dctx->customMem); + if (!dctx->ddictSet) { + RETURN_ERROR(memory_allocation, "Failed to allocate memory for hash set!"); + } + } + assert(!dctx->staticSize); /* Impossible: ddictSet cannot have been allocated if static dctx */ + FORWARD_IF_ERROR(ZSTD_DDictHashSet_addDDict(dctx->ddictSet, ddict, dctx->customMem), ""); + } + } + return 0; +} + +/* ZSTD_DCtx_setMaxWindowSize() : + * note : no direct equivalence in ZSTD_DCtx_setParameter, + * since this version sets windowSize, and the other sets windowLog */ +size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize) +{ + ZSTD_bounds const bounds = ZSTD_dParam_getBounds(ZSTD_d_windowLogMax); + size_t const min = (size_t)1 << bounds.lowerBound; + size_t const max = (size_t)1 << bounds.upperBound; + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); + RETURN_ERROR_IF(maxWindowSize < min, parameter_outOfBound, ""); + RETURN_ERROR_IF(maxWindowSize > max, parameter_outOfBound, ""); + dctx->maxWindowSize = maxWindowSize; + return 0; +} + +size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format) +{ + return ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, (int)format); +} + +ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam) +{ + ZSTD_bounds bounds = { 0, 0, 0 }; + switch(dParam) { + case ZSTD_d_windowLogMax: + bounds.lowerBound = ZSTD_WINDOWLOG_ABSOLUTEMIN; + bounds.upperBound = ZSTD_WINDOWLOG_MAX; + return bounds; + case ZSTD_d_format: + bounds.lowerBound = (int)ZSTD_f_zstd1; + bounds.upperBound = (int)ZSTD_f_zstd1_magicless; + ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless); + return bounds; + case ZSTD_d_stableOutBuffer: + bounds.lowerBound = (int)ZSTD_bm_buffered; + bounds.upperBound = (int)ZSTD_bm_stable; + return bounds; + case ZSTD_d_forceIgnoreChecksum: + bounds.lowerBound = (int)ZSTD_d_validateChecksum; + bounds.upperBound = (int)ZSTD_d_ignoreChecksum; + return bounds; + case ZSTD_d_refMultipleDDicts: + bounds.lowerBound = (int)ZSTD_rmd_refSingleDDict; + bounds.upperBound = (int)ZSTD_rmd_refMultipleDDicts; + return bounds; + default:; + } + bounds.error = ERROR(parameter_unsupported); + return bounds; +} + +/* ZSTD_dParam_withinBounds: + * @return 1 if value is within dParam bounds, + * 0 otherwise */ +static int ZSTD_dParam_withinBounds(ZSTD_dParameter dParam, int value) +{ + ZSTD_bounds const bounds = ZSTD_dParam_getBounds(dParam); + if (ZSTD_isError(bounds.error)) return 0; + if (value < bounds.lowerBound) return 0; + if (value > bounds.upperBound) return 0; + return 1; +} + +#define CHECK_DBOUNDS(p,v) { \ + RETURN_ERROR_IF(!ZSTD_dParam_withinBounds(p, v), parameter_outOfBound, ""); \ +} + +size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value) +{ + switch (param) { + case ZSTD_d_windowLogMax: + *value = (int)ZSTD_highbit32((U32)dctx->maxWindowSize); + return 0; + case ZSTD_d_format: + *value = (int)dctx->format; + return 0; + case ZSTD_d_stableOutBuffer: + *value = (int)dctx->outBufferMode; + return 0; + case ZSTD_d_forceIgnoreChecksum: + *value = (int)dctx->forceIgnoreChecksum; + return 0; + case ZSTD_d_refMultipleDDicts: + *value = (int)dctx->refMultipleDDicts; + return 0; + default:; + } + RETURN_ERROR(parameter_unsupported, ""); +} + +size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value) +{ + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); + switch(dParam) { + case ZSTD_d_windowLogMax: + if (value == 0) value = ZSTD_WINDOWLOG_LIMIT_DEFAULT; + CHECK_DBOUNDS(ZSTD_d_windowLogMax, value); + dctx->maxWindowSize = ((size_t)1) << value; + return 0; + case ZSTD_d_format: + CHECK_DBOUNDS(ZSTD_d_format, value); + dctx->format = (ZSTD_format_e)value; + return 0; + case ZSTD_d_stableOutBuffer: + CHECK_DBOUNDS(ZSTD_d_stableOutBuffer, value); + dctx->outBufferMode = (ZSTD_bufferMode_e)value; + return 0; + case ZSTD_d_forceIgnoreChecksum: + CHECK_DBOUNDS(ZSTD_d_forceIgnoreChecksum, value); + dctx->forceIgnoreChecksum = (ZSTD_forceIgnoreChecksum_e)value; + return 0; + case ZSTD_d_refMultipleDDicts: + CHECK_DBOUNDS(ZSTD_d_refMultipleDDicts, value); + if (dctx->staticSize != 0) { + RETURN_ERROR(parameter_unsupported, "Static dctx does not support multiple DDicts!"); + } + dctx->refMultipleDDicts = (ZSTD_refMultipleDDicts_e)value; + return 0; + default:; + } + RETURN_ERROR(parameter_unsupported, ""); +} + +size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset) +{ + if ( (reset == ZSTD_reset_session_only) + || (reset == ZSTD_reset_session_and_parameters) ) { + dctx->streamStage = zdss_init; + dctx->noForwardProgress = 0; + } + if ( (reset == ZSTD_reset_parameters) + || (reset == ZSTD_reset_session_and_parameters) ) { + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); + ZSTD_clearDict(dctx); + ZSTD_DCtx_resetParameters(dctx); + } + return 0; +} + + +size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx) +{ + return ZSTD_sizeof_DCtx(dctx); +} + +size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize) +{ + size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX); + unsigned long long const neededRBSize = windowSize + blockSize + (WILDCOPY_OVERLENGTH * 2); + unsigned long long const neededSize = MIN(frameContentSize, neededRBSize); + size_t const minRBSize = (size_t) neededSize; + RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize, + frameParameter_windowTooLarge, ""); + return minRBSize; +} + +size_t ZSTD_estimateDStreamSize(size_t windowSize) +{ + size_t const blockSize = MIN(windowSize, ZSTD_BLOCKSIZE_MAX); + size_t const inBuffSize = blockSize; /* no block can be larger */ + size_t const outBuffSize = ZSTD_decodingBufferSize_min(windowSize, ZSTD_CONTENTSIZE_UNKNOWN); + return ZSTD_estimateDCtxSize() + inBuffSize + outBuffSize; +} + +size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize) +{ + U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX; /* note : should be user-selectable, but requires an additional parameter (or a dctx) */ + ZSTD_frameHeader zfh; + size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize); + if (ZSTD_isError(err)) return err; + RETURN_ERROR_IF(err>0, srcSize_wrong, ""); + RETURN_ERROR_IF(zfh.windowSize > windowSizeMax, + frameParameter_windowTooLarge, ""); + return ZSTD_estimateDStreamSize((size_t)zfh.windowSize); +} + + +/* ***** Decompression ***** */ + +static int ZSTD_DCtx_isOverflow(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize) +{ + return (zds->inBuffSize + zds->outBuffSize) >= (neededInBuffSize + neededOutBuffSize) * ZSTD_WORKSPACETOOLARGE_FACTOR; +} + +static void ZSTD_DCtx_updateOversizedDuration(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize) +{ + if (ZSTD_DCtx_isOverflow(zds, neededInBuffSize, neededOutBuffSize)) + zds->oversizedDuration++; + else + zds->oversizedDuration = 0; +} + +static int ZSTD_DCtx_isOversizedTooLong(ZSTD_DStream* zds) +{ + return zds->oversizedDuration >= ZSTD_WORKSPACETOOLARGE_MAXDURATION; +} + +/* Checks that the output buffer hasn't changed if ZSTD_obm_stable is used. */ +static size_t ZSTD_checkOutBuffer(ZSTD_DStream const* zds, ZSTD_outBuffer const* output) +{ + ZSTD_outBuffer const expect = zds->expectedOutBuffer; + /* No requirement when ZSTD_obm_stable is not enabled. */ + if (zds->outBufferMode != ZSTD_bm_stable) + return 0; + /* Any buffer is allowed in zdss_init, this must be the same for every other call until + * the context is reset. + */ + if (zds->streamStage == zdss_init) + return 0; + /* The buffer must match our expectation exactly. */ + if (expect.dst == output->dst && expect.pos == output->pos && expect.size == output->size) + return 0; + RETURN_ERROR(dstBuffer_wrong, "ZSTD_d_stableOutBuffer enabled but output differs!"); +} + +/* Calls ZSTD_decompressContinue() with the right parameters for ZSTD_decompressStream() + * and updates the stage and the output buffer state. This call is extracted so it can be + * used both when reading directly from the ZSTD_inBuffer, and in buffered input mode. + * NOTE: You must break after calling this function since the streamStage is modified. + */ +static size_t ZSTD_decompressContinueStream( + ZSTD_DStream* zds, char** op, char* oend, + void const* src, size_t srcSize) { + int const isSkipFrame = ZSTD_isSkipFrame(zds); + if (zds->outBufferMode == ZSTD_bm_buffered) { + size_t const dstSize = isSkipFrame ? 0 : zds->outBuffSize - zds->outStart; + size_t const decodedSize = ZSTD_decompressContinue(zds, + zds->outBuff + zds->outStart, dstSize, src, srcSize); + FORWARD_IF_ERROR(decodedSize, ""); + if (!decodedSize && !isSkipFrame) { + zds->streamStage = zdss_read; + } else { + zds->outEnd = zds->outStart + decodedSize; + zds->streamStage = zdss_flush; + } + } else { + /* Write directly into the output buffer */ + size_t const dstSize = isSkipFrame ? 0 : (size_t)(oend - *op); + size_t const decodedSize = ZSTD_decompressContinue(zds, *op, dstSize, src, srcSize); + FORWARD_IF_ERROR(decodedSize, ""); + *op += decodedSize; + /* Flushing is not needed. */ + zds->streamStage = zdss_read; + assert(*op <= oend); + assert(zds->outBufferMode == ZSTD_bm_stable); + } + return 0; +} + +size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input) +{ + const char* const src = (const char*)input->src; + const char* const istart = input->pos != 0 ? src + input->pos : src; + const char* const iend = input->size != 0 ? src + input->size : src; + const char* ip = istart; + char* const dst = (char*)output->dst; + char* const ostart = output->pos != 0 ? dst + output->pos : dst; + char* const oend = output->size != 0 ? dst + output->size : dst; + char* op = ostart; + U32 someMoreWork = 1; + + DEBUGLOG(5, "ZSTD_decompressStream"); + RETURN_ERROR_IF( + input->pos > input->size, + srcSize_wrong, + "forbidden. in: pos: %u vs size: %u", + (U32)input->pos, (U32)input->size); + RETURN_ERROR_IF( + output->pos > output->size, + dstSize_tooSmall, + "forbidden. out: pos: %u vs size: %u", + (U32)output->pos, (U32)output->size); + DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos)); + FORWARD_IF_ERROR(ZSTD_checkOutBuffer(zds, output), ""); + + while (someMoreWork) { + switch(zds->streamStage) + { + case zdss_init : + DEBUGLOG(5, "stage zdss_init => transparent reset "); + zds->streamStage = zdss_loadHeader; + zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0; + zds->legacyVersion = 0; + zds->hostageByte = 0; + zds->expectedOutBuffer = *output; + /* fall-through */ + + case zdss_loadHeader : + DEBUGLOG(5, "stage zdss_loadHeader (srcSize : %u)", (U32)(iend - ip)); +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) + if (zds->legacyVersion) { + RETURN_ERROR_IF(zds->staticSize, memory_allocation, + "legacy support is incompatible with static dctx"); + { size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input); + if (hint==0) zds->streamStage = zdss_init; + return hint; + } } +#endif + { size_t const hSize = ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format); + if (zds->refMultipleDDicts && zds->ddictSet) { + ZSTD_DCtx_selectFrameDDict(zds); + } + DEBUGLOG(5, "header size : %u", (U32)hSize); + if (ZSTD_isError(hSize)) { +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) + U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart); + if (legacyVersion) { + ZSTD_DDict const* const ddict = ZSTD_getDDict(zds); + const void* const dict = ddict ? ZSTD_DDict_dictContent(ddict) : NULL; + size_t const dictSize = ddict ? ZSTD_DDict_dictSize(ddict) : 0; + DEBUGLOG(5, "ZSTD_decompressStream: detected legacy version v0.%u", legacyVersion); + RETURN_ERROR_IF(zds->staticSize, memory_allocation, + "legacy support is incompatible with static dctx"); + FORWARD_IF_ERROR(ZSTD_initLegacyStream(&zds->legacyContext, + zds->previousLegacyVersion, legacyVersion, + dict, dictSize), ""); + zds->legacyVersion = zds->previousLegacyVersion = legacyVersion; + { size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, legacyVersion, output, input); + if (hint==0) zds->streamStage = zdss_init; /* or stay in stage zdss_loadHeader */ + return hint; + } } +#endif + return hSize; /* error */ + } + if (hSize != 0) { /* need more input */ + size_t const toLoad = hSize - zds->lhSize; /* if hSize!=0, hSize > zds->lhSize */ + size_t const remainingInput = (size_t)(iend-ip); + assert(iend >= ip); + if (toLoad > remainingInput) { /* not enough input to load full header */ + if (remainingInput > 0) { + ZSTD_memcpy(zds->headerBuffer + zds->lhSize, ip, remainingInput); + zds->lhSize += remainingInput; + } + input->pos = input->size; + return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */ + } + assert(ip != NULL); + ZSTD_memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad; + break; + } } + + /* check for single-pass mode opportunity */ + if (zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN + && zds->fParams.frameType != ZSTD_skippableFrame + && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) { + size_t const cSize = ZSTD_findFrameCompressedSize(istart, (size_t)(iend-istart)); + if (cSize <= (size_t)(iend-istart)) { + /* shortcut : using single-pass mode */ + size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, (size_t)(oend-op), istart, cSize, ZSTD_getDDict(zds)); + if (ZSTD_isError(decompressedSize)) return decompressedSize; + DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()") + ip = istart + cSize; + op += decompressedSize; + zds->expected = 0; + zds->streamStage = zdss_init; + someMoreWork = 0; + break; + } } + + /* Check output buffer is large enough for ZSTD_odm_stable. */ + if (zds->outBufferMode == ZSTD_bm_stable + && zds->fParams.frameType != ZSTD_skippableFrame + && zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN + && (U64)(size_t)(oend-op) < zds->fParams.frameContentSize) { + RETURN_ERROR(dstSize_tooSmall, "ZSTD_obm_stable passed but ZSTD_outBuffer is too small"); + } + + /* Consume header (see ZSTDds_decodeFrameHeader) */ + DEBUGLOG(4, "Consume header"); + FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), ""); + + if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ + zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE); + zds->stage = ZSTDds_skipFrame; + } else { + FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize), ""); + zds->expected = ZSTD_blockHeaderSize; + zds->stage = ZSTDds_decodeBlockHeader; + } + + /* control buffer memory usage */ + DEBUGLOG(4, "Control max memory usage (%u KB <= max %u KB)", + (U32)(zds->fParams.windowSize >>10), + (U32)(zds->maxWindowSize >> 10) ); + zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN); + RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize, + frameParameter_windowTooLarge, ""); + + /* Adapt buffer sizes to frame header instructions */ + { size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */); + size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_bm_buffered + ? ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize) + : 0; + + ZSTD_DCtx_updateOversizedDuration(zds, neededInBuffSize, neededOutBuffSize); + + { int const tooSmall = (zds->inBuffSize < neededInBuffSize) || (zds->outBuffSize < neededOutBuffSize); + int const tooLarge = ZSTD_DCtx_isOversizedTooLong(zds); + + if (tooSmall || tooLarge) { + size_t const bufferSize = neededInBuffSize + neededOutBuffSize; + DEBUGLOG(4, "inBuff : from %u to %u", + (U32)zds->inBuffSize, (U32)neededInBuffSize); + DEBUGLOG(4, "outBuff : from %u to %u", + (U32)zds->outBuffSize, (U32)neededOutBuffSize); + if (zds->staticSize) { /* static DCtx */ + DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize); + assert(zds->staticSize >= sizeof(ZSTD_DCtx)); /* controlled at init */ + RETURN_ERROR_IF( + bufferSize > zds->staticSize - sizeof(ZSTD_DCtx), + memory_allocation, ""); + } else { + ZSTD_customFree(zds->inBuff, zds->customMem); + zds->inBuffSize = 0; + zds->outBuffSize = 0; + zds->inBuff = (char*)ZSTD_customMalloc(bufferSize, zds->customMem); + RETURN_ERROR_IF(zds->inBuff == NULL, memory_allocation, ""); + } + zds->inBuffSize = neededInBuffSize; + zds->outBuff = zds->inBuff + zds->inBuffSize; + zds->outBuffSize = neededOutBuffSize; + } } } + zds->streamStage = zdss_read; + /* fall-through */ + + case zdss_read: + DEBUGLOG(5, "stage zdss_read"); + { size_t const neededInSize = ZSTD_nextSrcSizeToDecompressWithInputSize(zds, (size_t)(iend - ip)); + DEBUGLOG(5, "neededInSize = %u", (U32)neededInSize); + if (neededInSize==0) { /* end of frame */ + zds->streamStage = zdss_init; + someMoreWork = 0; + break; + } + if ((size_t)(iend-ip) >= neededInSize) { /* decode directly from src */ + FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, ip, neededInSize), ""); + ip += neededInSize; + /* Function modifies the stage so we must break */ + break; + } } + if (ip==iend) { someMoreWork = 0; break; } /* no more input */ + zds->streamStage = zdss_load; + /* fall-through */ + + case zdss_load: + { size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds); + size_t const toLoad = neededInSize - zds->inPos; + int const isSkipFrame = ZSTD_isSkipFrame(zds); + size_t loadedSize; + /* At this point we shouldn't be decompressing a block that we can stream. */ + assert(neededInSize == ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip)); + if (isSkipFrame) { + loadedSize = MIN(toLoad, (size_t)(iend-ip)); + } else { + RETURN_ERROR_IF(toLoad > zds->inBuffSize - zds->inPos, + corruption_detected, + "should never happen"); + loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, (size_t)(iend-ip)); + } + ip += loadedSize; + zds->inPos += loadedSize; + if (loadedSize < toLoad) { someMoreWork = 0; break; } /* not enough input, wait for more */ + + /* decode loaded input */ + zds->inPos = 0; /* input is consumed */ + FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, zds->inBuff, neededInSize), ""); + /* Function modifies the stage so we must break */ + break; + } + case zdss_flush: + { size_t const toFlushSize = zds->outEnd - zds->outStart; + size_t const flushedSize = ZSTD_limitCopy(op, (size_t)(oend-op), zds->outBuff + zds->outStart, toFlushSize); + op += flushedSize; + zds->outStart += flushedSize; + if (flushedSize == toFlushSize) { /* flush completed */ + zds->streamStage = zdss_read; + if ( (zds->outBuffSize < zds->fParams.frameContentSize) + && (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) { + DEBUGLOG(5, "restart filling outBuff from beginning (left:%i, needed:%u)", + (int)(zds->outBuffSize - zds->outStart), + (U32)zds->fParams.blockSizeMax); + zds->outStart = zds->outEnd = 0; + } + break; + } } + /* cannot complete flush */ + someMoreWork = 0; + break; + + default: + assert(0); /* impossible */ + RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */ + } } + + /* result */ + input->pos = (size_t)(ip - (const char*)(input->src)); + output->pos = (size_t)(op - (char*)(output->dst)); + + /* Update the expected output buffer for ZSTD_obm_stable. */ + zds->expectedOutBuffer = *output; + + if ((ip==istart) && (op==ostart)) { /* no forward progress */ + zds->noForwardProgress ++; + if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) { + RETURN_ERROR_IF(op==oend, dstSize_tooSmall, ""); + RETURN_ERROR_IF(ip==iend, srcSize_wrong, ""); + assert(0); + } + } else { + zds->noForwardProgress = 0; + } + { size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds); + if (!nextSrcSizeHint) { /* frame fully decoded */ + if (zds->outEnd == zds->outStart) { /* output fully flushed */ + if (zds->hostageByte) { + if (input->pos >= input->size) { + /* can't release hostage (not present) */ + zds->streamStage = zdss_read; + return 1; + } + input->pos++; /* release hostage */ + } /* zds->hostageByte */ + return 0; + } /* zds->outEnd == zds->outStart */ + if (!zds->hostageByte) { /* output not fully flushed; keep last byte as hostage; will be released when all output is flushed */ + input->pos--; /* note : pos > 0, otherwise, impossible to finish reading last block */ + zds->hostageByte=1; + } + return 1; + } /* nextSrcSizeHint==0 */ + nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds) == ZSTDnit_block); /* preload header of next block */ + assert(zds->inPos <= nextSrcSizeHint); + nextSrcSizeHint -= zds->inPos; /* part already loaded*/ + return nextSrcSizeHint; + } +} + +size_t ZSTD_decompressStream_simpleArgs ( + ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos) +{ + ZSTD_outBuffer output = { dst, dstCapacity, *dstPos }; + ZSTD_inBuffer input = { src, srcSize, *srcPos }; + /* ZSTD_compress_generic() will check validity of dstPos and srcPos */ + size_t const cErr = ZSTD_decompressStream(dctx, &output, &input); + *dstPos = output.pos; + *srcPos = input.pos; + return cErr; +} +/**** ended inlining decompress/zstd_decompress.c ****/ +/**** start inlining decompress/zstd_decompress_block.c ****/ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* zstd_decompress_block : + * this module takes care of decompressing _compressed_ block */ + +/*-******************************************************* +* Dependencies +*********************************************************/ +/**** skipping file: ../common/zstd_deps.h ****/ +/**** skipping file: ../common/compiler.h ****/ +/**** skipping file: ../common/cpu.h ****/ +/**** skipping file: ../common/mem.h ****/ +#define FSE_STATIC_LINKING_ONLY +/**** skipping file: ../common/fse.h ****/ +#define HUF_STATIC_LINKING_ONLY +/**** skipping file: ../common/huf.h ****/ +/**** skipping file: ../common/zstd_internal.h ****/ +/**** skipping file: zstd_decompress_internal.h ****/ +/**** skipping file: zstd_ddict.h ****/ +/**** skipping file: zstd_decompress_block.h ****/ + +/*_******************************************************* +* Macros +**********************************************************/ + +/* These two optional macros force the use one way or another of the two + * ZSTD_decompressSequences implementations. You can't force in both directions + * at the same time. + */ +#if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) +#error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!" +#endif + + +/*_******************************************************* +* Memory operations +**********************************************************/ +static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); } + + +/*-************************************************************* + * Block decoding + ***************************************************************/ + +/*! ZSTD_getcBlockSize() : + * Provides the size of compressed block from block header `src` */ +size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, + blockProperties_t* bpPtr) +{ + RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, ""); + + { U32 const cBlockHeader = MEM_readLE24(src); + U32 const cSize = cBlockHeader >> 3; + bpPtr->lastBlock = cBlockHeader & 1; + bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3); + bpPtr->origSize = cSize; /* only useful for RLE */ + if (bpPtr->blockType == bt_rle) return 1; + RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, ""); + return cSize; + } +} + + +/* Hidden declaration for fullbench */ +size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, + const void* src, size_t srcSize); +/*! ZSTD_decodeLiteralsBlock() : + * @return : nb of bytes read from src (< srcSize ) + * note : symbol not declared but exposed for fullbench */ +size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, + const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */ +{ + DEBUGLOG(5, "ZSTD_decodeLiteralsBlock"); + RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, ""); + + { const BYTE* const istart = (const BYTE*) src; + symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3); + + switch(litEncType) + { + case set_repeat: + DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block"); + RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, ""); + /* fall-through */ + + case set_compressed: + RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3"); + { size_t lhSize, litSize, litCSize; + U32 singleStream=0; + U32 const lhlCode = (istart[0] >> 2) & 3; + U32 const lhc = MEM_readLE32(istart); + size_t hufSuccess; + switch(lhlCode) + { + case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */ + /* 2 - 2 - 10 - 10 */ + singleStream = !lhlCode; + lhSize = 3; + litSize = (lhc >> 4) & 0x3FF; + litCSize = (lhc >> 14) & 0x3FF; + break; + case 2: + /* 2 - 2 - 14 - 14 */ + lhSize = 4; + litSize = (lhc >> 4) & 0x3FFF; + litCSize = lhc >> 18; + break; + case 3: + /* 2 - 2 - 18 - 18 */ + lhSize = 5; + litSize = (lhc >> 4) & 0x3FFFF; + litCSize = (lhc >> 22) + ((size_t)istart[4] << 10); + break; + } + RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); + RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, ""); + + /* prefetch huffman table if cold */ + if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) { + PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable)); + } + + if (litEncType==set_repeat) { + if (singleStream) { + hufSuccess = HUF_decompress1X_usingDTable_bmi2( + dctx->litBuffer, litSize, istart+lhSize, litCSize, + dctx->HUFptr, dctx->bmi2); + } else { + hufSuccess = HUF_decompress4X_usingDTable_bmi2( + dctx->litBuffer, litSize, istart+lhSize, litCSize, + dctx->HUFptr, dctx->bmi2); + } + } else { + if (singleStream) { +#if defined(HUF_FORCE_DECOMPRESS_X2) + hufSuccess = HUF_decompress1X_DCtx_wksp( + dctx->entropy.hufTable, dctx->litBuffer, litSize, + istart+lhSize, litCSize, dctx->workspace, + sizeof(dctx->workspace)); +#else + hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2( + dctx->entropy.hufTable, dctx->litBuffer, litSize, + istart+lhSize, litCSize, dctx->workspace, + sizeof(dctx->workspace), dctx->bmi2); +#endif + } else { + hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2( + dctx->entropy.hufTable, dctx->litBuffer, litSize, + istart+lhSize, litCSize, dctx->workspace, + sizeof(dctx->workspace), dctx->bmi2); + } + } + + RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, ""); + + dctx->litPtr = dctx->litBuffer; + dctx->litSize = litSize; + dctx->litEntropy = 1; + if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable; + ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); + return litCSize + lhSize; + } + + case set_basic: + { size_t litSize, lhSize; + U32 const lhlCode = ((istart[0]) >> 2) & 3; + switch(lhlCode) + { + case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ + lhSize = 1; + litSize = istart[0] >> 3; + break; + case 1: + lhSize = 2; + litSize = MEM_readLE16(istart) >> 4; + break; + case 3: + lhSize = 3; + litSize = MEM_readLE24(istart) >> 4; + break; + } + + if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */ + RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, ""); + ZSTD_memcpy(dctx->litBuffer, istart+lhSize, litSize); + dctx->litPtr = dctx->litBuffer; + dctx->litSize = litSize; + ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); + return lhSize+litSize; + } + /* direct reference into compressed stream */ + dctx->litPtr = istart+lhSize; + dctx->litSize = litSize; + return lhSize+litSize; + } + + case set_rle: + { U32 const lhlCode = ((istart[0]) >> 2) & 3; + size_t litSize, lhSize; + switch(lhlCode) + { + case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ + lhSize = 1; + litSize = istart[0] >> 3; + break; + case 1: + lhSize = 2; + litSize = MEM_readLE16(istart) >> 4; + break; + case 3: + lhSize = 3; + litSize = MEM_readLE24(istart) >> 4; + RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4"); + break; + } + RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); + ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH); + dctx->litPtr = dctx->litBuffer; + dctx->litSize = litSize; + return lhSize+1; + } + default: + RETURN_ERROR(corruption_detected, "impossible"); + } + } +} + +/* Default FSE distribution tables. + * These are pre-calculated FSE decoding tables using default distributions as defined in specification : + * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions + * They were generated programmatically with following method : + * - start from default distributions, present in /lib/common/zstd_internal.h + * - generate tables normally, using ZSTD_buildFSETable() + * - printout the content of tables + * - pretify output, report below, test with fuzzer to ensure it's correct */ + +/* Default FSE distribution table for Literal Lengths */ +static const ZSTD_seqSymbol LL_defaultDTable[(1<tableLog = 0; + DTableH->fastMode = 0; + + cell->nbBits = 0; + cell->nextState = 0; + assert(nbAddBits < 255); + cell->nbAdditionalBits = (BYTE)nbAddBits; + cell->baseValue = baseValue; +} + + +/* ZSTD_buildFSETable() : + * generate FSE decoding table for one symbol (ll, ml or off) + * cannot fail if input is valid => + * all inputs are presumed validated at this stage */ +FORCE_INLINE_TEMPLATE +void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt, + const short* normalizedCounter, unsigned maxSymbolValue, + const U32* baseValue, const U32* nbAdditionalBits, + unsigned tableLog, void* wksp, size_t wkspSize) +{ + ZSTD_seqSymbol* const tableDecode = dt+1; + U32 const maxSV1 = maxSymbolValue + 1; + U32 const tableSize = 1 << tableLog; + + U16* symbolNext = (U16*)wksp; + BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1); + U32 highThreshold = tableSize - 1; + + + /* Sanity Checks */ + assert(maxSymbolValue <= MaxSeq); + assert(tableLog <= MaxFSELog); + assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE); + (void)wkspSize; + /* Init, lay down lowprob symbols */ + { ZSTD_seqSymbol_header DTableH; + DTableH.tableLog = tableLog; + DTableH.fastMode = 1; + { S16 const largeLimit= (S16)(1 << (tableLog-1)); + U32 s; + for (s=0; s= largeLimit) DTableH.fastMode=0; + assert(normalizedCounter[s]>=0); + symbolNext[s] = (U16)normalizedCounter[s]; + } } } + ZSTD_memcpy(dt, &DTableH, sizeof(DTableH)); + } + + /* Spread symbols */ + assert(tableSize <= 512); + /* Specialized symbol spreading for the case when there are + * no low probability (-1 count) symbols. When compressing + * small blocks we avoid low probability symbols to hit this + * case, since header decoding speed matters more. + */ + if (highThreshold == tableSize - 1) { + size_t const tableMask = tableSize-1; + size_t const step = FSE_TABLESTEP(tableSize); + /* First lay down the symbols in order. + * We use a uint64_t to lay down 8 bytes at a time. This reduces branch + * misses since small blocks generally have small table logs, so nearly + * all symbols have counts <= 8. We ensure we have 8 bytes at the end of + * our buffer to handle the over-write. + */ + { + U64 const add = 0x0101010101010101ull; + size_t pos = 0; + U64 sv = 0; + U32 s; + for (s=0; s highThreshold) position = (position + step) & tableMask; /* lowprob area */ + } } + assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ + } + + /* Build Decoding table */ + { + U32 u; + for (u=0; u max, corruption_detected, ""); + { U32 const symbol = *(const BYTE*)src; + U32 const baseline = baseValue[symbol]; + U32 const nbBits = nbAdditionalBits[symbol]; + ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits); + } + *DTablePtr = DTableSpace; + return 1; + case set_basic : + *DTablePtr = defaultTable; + return 0; + case set_repeat: + RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, ""); + /* prefetch FSE table if used */ + if (ddictIsCold && (nbSeq > 24 /* heuristic */)) { + const void* const pStart = *DTablePtr; + size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog)); + PREFETCH_AREA(pStart, pSize); + } + return 0; + case set_compressed : + { unsigned tableLog; + S16 norm[MaxSeq+1]; + size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize); + RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, ""); + RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, ""); + ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2); + *DTablePtr = DTableSpace; + return headerSize; + } + default : + assert(0); + RETURN_ERROR(GENERIC, "impossible"); + } +} + +size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, + const void* src, size_t srcSize) +{ + const BYTE* const istart = (const BYTE*)src; + const BYTE* const iend = istart + srcSize; + const BYTE* ip = istart; + int nbSeq; + DEBUGLOG(5, "ZSTD_decodeSeqHeaders"); + + /* check */ + RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, ""); + + /* SeqHead */ + nbSeq = *ip++; + if (!nbSeq) { + *nbSeqPtr=0; + RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, ""); + return 1; + } + if (nbSeq > 0x7F) { + if (nbSeq == 0xFF) { + RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, ""); + nbSeq = MEM_readLE16(ip) + LONGNBSEQ; + ip+=2; + } else { + RETURN_ERROR_IF(ip >= iend, srcSize_wrong, ""); + nbSeq = ((nbSeq-0x80)<<8) + *ip++; + } + } + *nbSeqPtr = nbSeq; + + /* FSE table descriptors */ + RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */ + { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6); + symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3); + symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3); + ip++; + + /* Build DTables */ + { size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr, + LLtype, MaxLL, LLFSELog, + ip, iend-ip, + LL_base, LL_bits, + LL_defaultDTable, dctx->fseEntropy, + dctx->ddictIsCold, nbSeq, + dctx->workspace, sizeof(dctx->workspace), + dctx->bmi2); + RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed"); + ip += llhSize; + } + + { size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr, + OFtype, MaxOff, OffFSELog, + ip, iend-ip, + OF_base, OF_bits, + OF_defaultDTable, dctx->fseEntropy, + dctx->ddictIsCold, nbSeq, + dctx->workspace, sizeof(dctx->workspace), + dctx->bmi2); + RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed"); + ip += ofhSize; + } + + { size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr, + MLtype, MaxML, MLFSELog, + ip, iend-ip, + ML_base, ML_bits, + ML_defaultDTable, dctx->fseEntropy, + dctx->ddictIsCold, nbSeq, + dctx->workspace, sizeof(dctx->workspace), + dctx->bmi2); + RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed"); + ip += mlhSize; + } + } + + return ip-istart; +} + + +typedef struct { + size_t litLength; + size_t matchLength; + size_t offset; + const BYTE* match; +} seq_t; + +typedef struct { + size_t state; + const ZSTD_seqSymbol* table; +} ZSTD_fseState; + +typedef struct { + BIT_DStream_t DStream; + ZSTD_fseState stateLL; + ZSTD_fseState stateOffb; + ZSTD_fseState stateML; + size_t prevOffset[ZSTD_REP_NUM]; + const BYTE* prefixStart; + const BYTE* dictEnd; + size_t pos; +} seqState_t; + +/*! ZSTD_overlapCopy8() : + * Copies 8 bytes from ip to op and updates op and ip where ip <= op. + * If the offset is < 8 then the offset is spread to at least 8 bytes. + * + * Precondition: *ip <= *op + * Postcondition: *op - *op >= 8 + */ +HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) { + assert(*ip <= *op); + if (offset < 8) { + /* close range match, overlap */ + static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */ + static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */ + int const sub2 = dec64table[offset]; + (*op)[0] = (*ip)[0]; + (*op)[1] = (*ip)[1]; + (*op)[2] = (*ip)[2]; + (*op)[3] = (*ip)[3]; + *ip += dec32table[offset]; + ZSTD_copy4(*op+4, *ip); + *ip -= sub2; + } else { + ZSTD_copy8(*op, *ip); + } + *ip += 8; + *op += 8; + assert(*op - *ip >= 8); +} + +/*! ZSTD_safecopy() : + * Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer + * and write up to 16 bytes past oend_w (op >= oend_w is allowed). + * This function is only called in the uncommon case where the sequence is near the end of the block. It + * should be fast for a single long sequence, but can be slow for several short sequences. + * + * @param ovtype controls the overlap detection + * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart. + * - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart. + * The src buffer must be before the dst buffer. + */ +static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) { + ptrdiff_t const diff = op - ip; + BYTE* const oend = op + length; + + assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) || + (ovtype == ZSTD_overlap_src_before_dst && diff >= 0)); + + if (length < 8) { + /* Handle short lengths. */ + while (op < oend) *op++ = *ip++; + return; + } + if (ovtype == ZSTD_overlap_src_before_dst) { + /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */ + assert(length >= 8); + ZSTD_overlapCopy8(&op, &ip, diff); + assert(op - ip >= 8); + assert(op <= oend); + } + + if (oend <= oend_w) { + /* No risk of overwrite. */ + ZSTD_wildcopy(op, ip, length, ovtype); + return; + } + if (op <= oend_w) { + /* Wildcopy until we get close to the end. */ + assert(oend > oend_w); + ZSTD_wildcopy(op, ip, oend_w - op, ovtype); + ip += oend_w - op; + op = oend_w; + } + /* Handle the leftovers. */ + while (op < oend) *op++ = *ip++; +} + +/* ZSTD_execSequenceEnd(): + * This version handles cases that are near the end of the output buffer. It requires + * more careful checks to make sure there is no overflow. By separating out these hard + * and unlikely cases, we can speed up the common cases. + * + * NOTE: This function needs to be fast for a single long sequence, but doesn't need + * to be optimized for many small sequences, since those fall into ZSTD_execSequence(). + */ +FORCE_NOINLINE +size_t ZSTD_execSequenceEnd(BYTE* op, + BYTE* const oend, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) +{ + BYTE* const oLitEnd = op + sequence.litLength; + size_t const sequenceLength = sequence.litLength + sequence.matchLength; + const BYTE* const iLitEnd = *litPtr + sequence.litLength; + const BYTE* match = oLitEnd - sequence.offset; + BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; + + /* bounds checks : careful of address space overflow in 32-bit mode */ + RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer"); + RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer"); + assert(op < op + sequenceLength); + assert(oLitEnd < op + sequenceLength); + + /* copy literals */ + ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap); + op = oLitEnd; + *litPtr = iLitEnd; + + /* copy Match */ + if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { + /* offset beyond prefix */ + RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, ""); + match = dictEnd - (prefixStart-match); + if (match + sequence.matchLength <= dictEnd) { + ZSTD_memmove(oLitEnd, match, sequence.matchLength); + return sequenceLength; + } + /* span extDict & currentPrefixSegment */ + { size_t const length1 = dictEnd - match; + ZSTD_memmove(oLitEnd, match, length1); + op = oLitEnd + length1; + sequence.matchLength -= length1; + match = prefixStart; + } } + ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst); + return sequenceLength; +} + +HINT_INLINE +size_t ZSTD_execSequence(BYTE* op, + BYTE* const oend, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) +{ + BYTE* const oLitEnd = op + sequence.litLength; + size_t const sequenceLength = sequence.litLength + sequence.matchLength; + BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ + BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; /* risk : address space underflow on oend=NULL */ + const BYTE* const iLitEnd = *litPtr + sequence.litLength; + const BYTE* match = oLitEnd - sequence.offset; + + assert(op != NULL /* Precondition */); + assert(oend_w < oend /* No underflow */); + /* Handle edge cases in a slow path: + * - Read beyond end of literals + * - Match end is within WILDCOPY_OVERLIMIT of oend + * - 32-bit mode and the match length overflows + */ + if (UNLIKELY( + iLitEnd > litLimit || + oMatchEnd > oend_w || + (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH))) + return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd); + + /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */ + assert(op <= oLitEnd /* No overflow */); + assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */); + assert(oMatchEnd <= oend /* No underflow */); + assert(iLitEnd <= litLimit /* Literal length is in bounds */); + assert(oLitEnd <= oend_w /* Can wildcopy literals */); + assert(oMatchEnd <= oend_w /* Can wildcopy matches */); + + /* Copy Literals: + * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9. + * We likely don't need the full 32-byte wildcopy. + */ + assert(WILDCOPY_OVERLENGTH >= 16); + ZSTD_copy16(op, (*litPtr)); + if (UNLIKELY(sequence.litLength > 16)) { + ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap); + } + op = oLitEnd; + *litPtr = iLitEnd; /* update for next sequence */ + + /* Copy Match */ + if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { + /* offset beyond prefix -> go into extDict */ + RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, ""); + match = dictEnd + (match - prefixStart); + if (match + sequence.matchLength <= dictEnd) { + ZSTD_memmove(oLitEnd, match, sequence.matchLength); + return sequenceLength; + } + /* span extDict & currentPrefixSegment */ + { size_t const length1 = dictEnd - match; + ZSTD_memmove(oLitEnd, match, length1); + op = oLitEnd + length1; + sequence.matchLength -= length1; + match = prefixStart; + } } + /* Match within prefix of 1 or more bytes */ + assert(op <= oMatchEnd); + assert(oMatchEnd <= oend_w); + assert(match >= prefixStart); + assert(sequence.matchLength >= 1); + + /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy + * without overlap checking. + */ + if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) { + /* We bet on a full wildcopy for matches, since we expect matches to be + * longer than literals (in general). In silesia, ~10% of matches are longer + * than 16 bytes. + */ + ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap); + return sequenceLength; + } + assert(sequence.offset < WILDCOPY_VECLEN); + + /* Copy 8 bytes and spread the offset to be >= 8. */ + ZSTD_overlapCopy8(&op, &match, sequence.offset); + + /* If the match length is > 8 bytes, then continue with the wildcopy. */ + if (sequence.matchLength > 8) { + assert(op < oMatchEnd); + ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); + } + return sequenceLength; +} + +static void +ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt) +{ + const void* ptr = dt; + const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr; + DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog); + DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits", + (U32)DStatePtr->state, DTableH->tableLog); + BIT_reloadDStream(bitD); + DStatePtr->table = dt + 1; +} + +FORCE_INLINE_TEMPLATE void +ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD) +{ + ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + size_t const lowBits = BIT_readBits(bitD, nbBits); + DStatePtr->state = DInfo.nextState + lowBits; +} + +FORCE_INLINE_TEMPLATE void +ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD_seqSymbol const DInfo) +{ + U32 const nbBits = DInfo.nbBits; + size_t const lowBits = BIT_readBits(bitD, nbBits); + DStatePtr->state = DInfo.nextState + lowBits; +} + +/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum + * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1) + * bits before reloading. This value is the maximum number of bytes we read + * after reloading when we are decoding long offsets. + */ +#define LONG_OFFSETS_MAX_EXTRA_BITS_32 \ + (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \ + ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \ + : 0) + +typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e; +typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e; + +FORCE_INLINE_TEMPLATE seq_t +ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const ZSTD_prefetch_e prefetch) +{ + seq_t seq; + ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state]; + ZSTD_seqSymbol const mlDInfo = seqState->stateML.table[seqState->stateML.state]; + ZSTD_seqSymbol const ofDInfo = seqState->stateOffb.table[seqState->stateOffb.state]; + U32 const llBase = llDInfo.baseValue; + U32 const mlBase = mlDInfo.baseValue; + U32 const ofBase = ofDInfo.baseValue; + BYTE const llBits = llDInfo.nbAdditionalBits; + BYTE const mlBits = mlDInfo.nbAdditionalBits; + BYTE const ofBits = ofDInfo.nbAdditionalBits; + BYTE const totalBits = llBits+mlBits+ofBits; + + /* sequence */ + { size_t offset; + if (ofBits > 1) { + ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); + ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5); + assert(ofBits <= MaxOff); + if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) { + U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed); + offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); + BIT_reloadDStream(&seqState->DStream); + if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits); + assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */ + } else { + offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); + } + seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset; + } else { + U32 const ll0 = (llBase == 0); + if (LIKELY((ofBits == 0))) { + if (LIKELY(!ll0)) + offset = seqState->prevOffset[0]; + else { + offset = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset; + } + } else { + offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1); + { size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; + temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ + if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset = temp; + } } } + seq.offset = offset; + } + + seq.matchLength = mlBase; + if (mlBits > 0) + seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/); + + if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) + BIT_reloadDStream(&seqState->DStream); + if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog))) + BIT_reloadDStream(&seqState->DStream); + /* Ensure there are enough bits to read the rest of data in 64-bit mode. */ + ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); + + seq.litLength = llBase; + if (llBits > 0) + seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/); + + if (MEM_32bits()) + BIT_reloadDStream(&seqState->DStream); + + DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u", + (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); + + if (prefetch == ZSTD_p_prefetch) { + size_t const pos = seqState->pos + seq.litLength; + const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart; + seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted. + * No consequence though : no memory access will occur, offset is only used for prefetching */ + seqState->pos = pos + seq.matchLength; + } + + /* ANS state update + * gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo(). + * clang-9.2.0 does 7% worse with ZSTD_updateFseState(). + * Naturally it seems like ZSTD_updateFseStateWithDInfo() should be the + * better option, so it is the default for other compilers. But, if you + * measure that it is worse, please put up a pull request. + */ + { +#if defined(__GNUC__) && !defined(__clang__) + const int kUseUpdateFseState = 1; +#else + const int kUseUpdateFseState = 0; +#endif + if (kUseUpdateFseState) { + ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ + ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ + ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ + } else { + ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llDInfo); /* <= 9 bits */ + ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlDInfo); /* <= 9 bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ + ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofDInfo); /* <= 8 bits */ + } + } + + return seq; +} + +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION +MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd) +{ + size_t const windowSize = dctx->fParams.windowSize; + /* No dictionary used. */ + if (dctx->dictContentEndForFuzzing == NULL) return 0; + /* Dictionary is our prefix. */ + if (prefixStart == dctx->dictContentBeginForFuzzing) return 1; + /* Dictionary is not our ext-dict. */ + if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0; + /* Dictionary is not within our window size. */ + if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0; + /* Dictionary is active. */ + return 1; +} + +MEM_STATIC void ZSTD_assertValidSequence( + ZSTD_DCtx const* dctx, + BYTE const* op, BYTE const* oend, + seq_t const seq, + BYTE const* prefixStart, BYTE const* virtualStart) +{ +#if DEBUGLEVEL >= 1 + size_t const windowSize = dctx->fParams.windowSize; + size_t const sequenceSize = seq.litLength + seq.matchLength; + BYTE const* const oLitEnd = op + seq.litLength; + DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u", + (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); + assert(op <= oend); + assert((size_t)(oend - op) >= sequenceSize); + assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX); + if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) { + size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing); + /* Offset must be within the dictionary. */ + assert(seq.offset <= (size_t)(oLitEnd - virtualStart)); + assert(seq.offset <= windowSize + dictSize); + } else { + /* Offset must be within our window. */ + assert(seq.offset <= windowSize); + } +#else + (void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart; +#endif +} +#endif + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG +FORCE_INLINE_TEMPLATE size_t +DONT_VECTORIZE +ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + const BYTE* ip = (const BYTE*)seqStart; + const BYTE* const iend = ip + seqSize; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + maxDstSize; + BYTE* op = ostart; + const BYTE* litPtr = dctx->litPtr; + const BYTE* const litEnd = litPtr + dctx->litSize; + const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); + const BYTE* const vBase = (const BYTE*) (dctx->virtualStart); + const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); + DEBUGLOG(5, "ZSTD_decompressSequences_body"); + (void)frame; + + /* Regen sequences */ + if (nbSeq) { + seqState_t seqState; + size_t error = 0; + dctx->fseEntropy = 1; + { U32 i; for (i=0; ientropy.rep[i]; } + RETURN_ERROR_IF( + ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)), + corruption_detected, ""); + ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); + ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); + ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); + assert(dst != NULL); + + ZSTD_STATIC_ASSERT( + BIT_DStream_unfinished < BIT_DStream_completed && + BIT_DStream_endOfBuffer < BIT_DStream_completed && + BIT_DStream_completed < BIT_DStream_overflow); + +#if defined(__GNUC__) && defined(__x86_64__) + /* Align the decompression loop to 32 + 16 bytes. + * + * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression + * speed swings based on the alignment of the decompression loop. This + * performance swing is caused by parts of the decompression loop falling + * out of the DSB. The entire decompression loop should fit in the DSB, + * when it can't we get much worse performance. You can measure if you've + * hit the good case or the bad case with this perf command for some + * compressed file test.zst: + * + * perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \ + * -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst + * + * If you see most cycles served out of the MITE you've hit the bad case. + * If you see most cycles served out of the DSB you've hit the good case. + * If it is pretty even then you may be in an okay case. + * + * I've been able to reproduce this issue on the following CPUs: + * - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9 + * Use Instruments->Counters to get DSB/MITE cycles. + * I never got performance swings, but I was able to + * go from the good case of mostly DSB to half of the + * cycles served from MITE. + * - Coffeelake: Intel i9-9900k + * + * I haven't been able to reproduce the instability or DSB misses on any + * of the following CPUS: + * - Haswell + * - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH + * - Skylake + * + * If you are seeing performance stability this script can help test. + * It tests on 4 commits in zstd where I saw performance change. + * + * https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4 + */ + __asm__(".p2align 5"); + __asm__("nop"); + __asm__(".p2align 4"); +#endif + for ( ; ; ) { + seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_noPrefetch); + size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd); +#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) + assert(!ZSTD_isError(oneSeqSize)); + if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); +#endif + DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); + BIT_reloadDStream(&(seqState.DStream)); + op += oneSeqSize; + /* gcc and clang both don't like early returns in this loop. + * Instead break and check for an error at the end of the loop. + */ + if (UNLIKELY(ZSTD_isError(oneSeqSize))) { + error = oneSeqSize; + break; + } + if (UNLIKELY(!--nbSeq)) break; + } + + /* check if reached exact end */ + DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq); + if (ZSTD_isError(error)) return error; + RETURN_ERROR_IF(nbSeq, corruption_detected, ""); + RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, ""); + /* save reps for next block */ + { U32 i; for (i=0; ientropy.rep[i] = (U32)(seqState.prevOffset[i]); } + } + + /* last literal segment */ + { size_t const lastLLSize = litEnd - litPtr; + RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); + if (op != NULL) { + ZSTD_memcpy(op, litPtr, lastLLSize); + op += lastLLSize; + } + } + + return op-ostart; +} + +static size_t +ZSTD_decompressSequences_default(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT +FORCE_INLINE_TEMPLATE size_t +ZSTD_decompressSequencesLong_body( + ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + const BYTE* ip = (const BYTE*)seqStart; + const BYTE* const iend = ip + seqSize; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + maxDstSize; + BYTE* op = ostart; + const BYTE* litPtr = dctx->litPtr; + const BYTE* const litEnd = litPtr + dctx->litSize; + const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); + const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart); + const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); + (void)frame; + + /* Regen sequences */ + if (nbSeq) { +#define STORED_SEQS 4 +#define STORED_SEQS_MASK (STORED_SEQS-1) +#define ADVANCED_SEQS 4 + seq_t sequences[STORED_SEQS]; + int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS); + seqState_t seqState; + int seqNb; + dctx->fseEntropy = 1; + { int i; for (i=0; ientropy.rep[i]; } + seqState.prefixStart = prefixStart; + seqState.pos = (size_t)(op-prefixStart); + seqState.dictEnd = dictEnd; + assert(dst != NULL); + assert(iend >= ip); + RETURN_ERROR_IF( + ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)), + corruption_detected, ""); + ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); + ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); + ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); + + /* prepare in advance */ + for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNbentropy.rep[i] = (U32)(seqState.prevOffset[i]); } + } + + /* last literal segment */ + { size_t const lastLLSize = litEnd - litPtr; + RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); + if (op != NULL) { + ZSTD_memcpy(op, litPtr, lastLLSize); + op += lastLLSize; + } + } + + return op-ostart; +} + +static size_t +ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ + + + +#if DYNAMIC_BMI2 + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG +static TARGET_ATTRIBUTE("bmi2") size_t +DONT_VECTORIZE +ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT +static TARGET_ATTRIBUTE("bmi2") size_t +ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ + +#endif /* DYNAMIC_BMI2 */ + +typedef size_t (*ZSTD_decompressSequences_t)( + ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame); + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG +static size_t +ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + DEBUGLOG(5, "ZSTD_decompressSequences"); +#if DYNAMIC_BMI2 + if (dctx->bmi2) { + return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); + } +#endif + return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ + + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT +/* ZSTD_decompressSequencesLong() : + * decompression function triggered when a minimum share of offsets is considered "long", + * aka out of cache. + * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance". + * This function will try to mitigate main memory latency through the use of prefetching */ +static size_t +ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + DEBUGLOG(5, "ZSTD_decompressSequencesLong"); +#if DYNAMIC_BMI2 + if (dctx->bmi2) { + return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); + } +#endif + return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ + + + +#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) +/* ZSTD_getLongOffsetsShare() : + * condition : offTable must be valid + * @return : "share" of long offsets (arbitrarily defined as > (1<<23)) + * compared to maximum possible of (1< 22) total += 1; + } + + assert(tableLog <= OffFSELog); + total <<= (OffFSELog - tableLog); /* scale to OffFSELog */ + + return total; +} +#endif + +size_t +ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, const int frame) +{ /* blockType == blockCompressed */ + const BYTE* ip = (const BYTE*)src; + /* isLongOffset must be true if there are long offsets. + * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN. + * We don't expect that to be the case in 64-bit mode. + * In block mode, window size is not known, so we have to be conservative. + * (note: but it could be evaluated from current-lowLimit) + */ + ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN)))); + DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize); + + RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, ""); + + /* Decode literals section */ + { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); + DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize); + if (ZSTD_isError(litCSize)) return litCSize; + ip += litCSize; + srcSize -= litCSize; + } + + /* Build Decoding Tables */ + { + /* These macros control at build-time which decompressor implementation + * we use. If neither is defined, we do some inspection and dispatch at + * runtime. + */ +#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) + int usePrefetchDecoder = dctx->ddictIsCold; +#endif + int nbSeq; + size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize); + if (ZSTD_isError(seqHSize)) return seqHSize; + ip += seqHSize; + srcSize -= seqHSize; + + RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled"); + +#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) + if ( !usePrefetchDecoder + && (!frame || (dctx->fParams.windowSize > (1<<24))) + && (nbSeq>ADVANCED_SEQS) ) { /* could probably use a larger nbSeq limit */ + U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr); + U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */ + usePrefetchDecoder = (shareLongOffsets >= minShare); + } +#endif + + dctx->ddictIsCold = 0; + +#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) + if (usePrefetchDecoder) +#endif +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT + return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); +#endif + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG + /* else */ + return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); +#endif + } +} + + +void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize) +{ + if (dst != dctx->previousDstEnd && dstSize > 0) { /* not contiguous */ + dctx->dictEnd = dctx->previousDstEnd; + dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart)); + dctx->prefixStart = dst; + dctx->previousDstEnd = dst; + } +} + + +size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + size_t dSize; + ZSTD_checkContinuity(dctx, dst, dstCapacity); + dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0); + dctx->previousDstEnd = (char*)dst + dSize; + return dSize; +} +/**** ended inlining decompress/zstd_decompress_block.c ****/ diff --git a/thirdparty/stb/stb_image.h b/thirdparty/stb/stb_image.h index 5e807a0a6..9eedabedc 100644 --- a/thirdparty/stb/stb_image.h +++ b/thirdparty/stb/stb_image.h @@ -1,4 +1,4 @@ -/* stb_image - v2.28 - public domain image loader - http://nothings.org/stb +/* stb_image - v2.30 - public domain image loader - http://nothings.org/stb no warranty implied; use at your own risk Do this: @@ -48,6 +48,8 @@ LICENSE RECENT REVISION HISTORY: + 2.30 (2024-05-31) avoid erroneous gcc warning + 2.29 (2023-05-xx) optimizations 2.28 (2023-01-29) many error fixes, security errors, just tons of stuff 2.27 (2021-07-11) document stbi_info better, 16-bit PNM support, bug fixes 2.26 (2020-07-13) many minor fixes @@ -1072,8 +1074,8 @@ static int stbi__addints_valid(int a, int b) return a <= INT_MAX - b; } -// returns 1 if the product of two signed shorts is valid, 0 on overflow. -static int stbi__mul2shorts_valid(short a, short b) +// returns 1 if the product of two ints fits in a signed short, 0 on overflow. +static int stbi__mul2shorts_valid(int a, int b) { if (b == 0 || b == -1) return 1; // multiplication by 0 is always 0; check for -1 so SHRT_MIN/b doesn't overflow if ((a >= 0) == (b >= 0)) return a <= SHRT_MAX/b; // product is positive, so similar to mul2sizes_valid @@ -3384,13 +3386,13 @@ static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan) return 1; } -static int stbi__skip_jpeg_junk_at_end(stbi__jpeg *j) +static stbi_uc stbi__skip_jpeg_junk_at_end(stbi__jpeg *j) { // some JPEGs have junk at end, skip over it but if we find what looks // like a valid marker, resume there while (!stbi__at_eof(j->s)) { - int x = stbi__get8(j->s); - while (x == 255) { // might be a marker + stbi_uc x = stbi__get8(j->s); + while (x == 0xff) { // might be a marker if (stbi__at_eof(j->s)) return STBI__MARKER_none; x = stbi__get8(j->s); if (x != 0x00 && x != 0xff) { @@ -4176,6 +4178,7 @@ typedef struct { stbi_uc *zbuffer, *zbuffer_end; int num_bits; + int hit_zeof_once; stbi__uint32 code_buffer; char *zout; @@ -4242,9 +4245,20 @@ stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z) int b,s; if (a->num_bits < 16) { if (stbi__zeof(a)) { - return -1; /* report error for unexpected end of data. */ + if (!a->hit_zeof_once) { + // This is the first time we hit eof, insert 16 extra padding btis + // to allow us to keep going; if we actually consume any of them + // though, that is invalid data. This is caught later. + a->hit_zeof_once = 1; + a->num_bits += 16; // add 16 implicit zero bits + } else { + // We already inserted our extra 16 padding bits and are again + // out, this stream is actually prematurely terminated. + return -1; + } + } else { + stbi__fill_bits(a); } - stbi__fill_bits(a); } b = z->fast[a->code_buffer & STBI__ZFAST_MASK]; if (b) { @@ -4309,6 +4323,13 @@ static int stbi__parse_huffman_block(stbi__zbuf *a) int len,dist; if (z == 256) { a->zout = zout; + if (a->hit_zeof_once && a->num_bits < 16) { + // The first time we hit zeof, we inserted 16 extra zero bits into our bit + // buffer so the decoder can just do its speculative decoding. But if we + // actually consumed any of those bits (which is the case when num_bits < 16), + // the stream actually read past the end so it is malformed. + return stbi__err("unexpected end","Corrupt PNG"); + } return 1; } if (z >= 286) return stbi__err("bad huffman code","Corrupt PNG"); // per DEFLATE, length codes 286 and 287 must not appear in compressed data @@ -4320,7 +4341,7 @@ static int stbi__parse_huffman_block(stbi__zbuf *a) dist = stbi__zdist_base[z]; if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]); if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG"); - if (zout + len > a->zout_end) { + if (len > a->zout_end - zout) { if (!stbi__zexpand(a, zout, len)) return 0; zout = a->zout; } @@ -4464,6 +4485,7 @@ static int stbi__parse_zlib(stbi__zbuf *a, int parse_header) if (!stbi__parse_zlib_header(a)) return 0; a->num_bits = 0; a->code_buffer = 0; + a->hit_zeof_once = 0; do { final = stbi__zreceive(a,1); type = stbi__zreceive(a,2); @@ -4619,9 +4641,8 @@ enum { STBI__F_up=2, STBI__F_avg=3, STBI__F_paeth=4, - // synthetic filters used for first scanline to avoid needing a dummy row of 0s - STBI__F_avg_first, - STBI__F_paeth_first + // synthetic filter used for first scanline to avoid needing a dummy row of 0s + STBI__F_avg_first }; static stbi_uc first_row_filter[5] = @@ -4630,29 +4651,56 @@ static stbi_uc first_row_filter[5] = STBI__F_sub, STBI__F_none, STBI__F_avg_first, - STBI__F_paeth_first + STBI__F_sub // Paeth with b=c=0 turns out to be equivalent to sub }; static int stbi__paeth(int a, int b, int c) { - int p = a + b - c; - int pa = abs(p-a); - int pb = abs(p-b); - int pc = abs(p-c); - if (pa <= pb && pa <= pc) return a; - if (pb <= pc) return b; - return c; + // This formulation looks very different from the reference in the PNG spec, but is + // actually equivalent and has favorable data dependencies and admits straightforward + // generation of branch-free code, which helps performance significantly. + int thresh = c*3 - (a + b); + int lo = a < b ? a : b; + int hi = a < b ? b : a; + int t0 = (hi <= thresh) ? lo : c; + int t1 = (thresh <= lo) ? hi : t0; + return t1; } static const stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 }; +// adds an extra all-255 alpha channel +// dest == src is legal +// img_n must be 1 or 3 +static void stbi__create_png_alpha_expand8(stbi_uc *dest, stbi_uc *src, stbi__uint32 x, int img_n) +{ + int i; + // must process data backwards since we allow dest==src + if (img_n == 1) { + for (i=x-1; i >= 0; --i) { + dest[i*2+1] = 255; + dest[i*2+0] = src[i]; + } + } else { + STBI_ASSERT(img_n == 3); + for (i=x-1; i >= 0; --i) { + dest[i*4+3] = 255; + dest[i*4+2] = src[i*3+2]; + dest[i*4+1] = src[i*3+1]; + dest[i*4+0] = src[i*3+0]; + } + } +} + // create the png data from post-deflated data static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color) { - int bytes = (depth == 16? 2 : 1); + int bytes = (depth == 16 ? 2 : 1); stbi__context *s = a->s; stbi__uint32 i,j,stride = x*out_n*bytes; stbi__uint32 img_len, img_width_bytes; + stbi_uc *filter_buf; + int all_ok = 1; int k; int img_n = s->img_n; // copy it into a local for later @@ -4664,8 +4712,11 @@ static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 r a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into if (!a->out) return stbi__err("outofmem", "Out of memory"); + // note: error exits here don't need to clean up a->out individually, + // stbi__do_png always does on error. if (!stbi__mad3sizes_valid(img_n, x, depth, 7)) return stbi__err("too large", "Corrupt PNG"); img_width_bytes = (((img_n * x * depth) + 7) >> 3); + if (!stbi__mad2sizes_valid(img_width_bytes, y, img_width_bytes)) return stbi__err("too large", "Corrupt PNG"); img_len = (img_width_bytes + 1) * y; // we used to check for exact match between raw_len and img_len on non-interlaced PNGs, @@ -4673,189 +4724,137 @@ static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 r // so just check for raw_len < img_len always. if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG"); + // Allocate two scan lines worth of filter workspace buffer. + filter_buf = (stbi_uc *) stbi__malloc_mad2(img_width_bytes, 2, 0); + if (!filter_buf) return stbi__err("outofmem", "Out of memory"); + + // Filtering for low-bit-depth images + if (depth < 8) { + filter_bytes = 1; + width = img_width_bytes; + } + for (j=0; j < y; ++j) { - stbi_uc *cur = a->out + stride*j; - stbi_uc *prior; + // cur/prior filter buffers alternate + stbi_uc *cur = filter_buf + (j & 1)*img_width_bytes; + stbi_uc *prior = filter_buf + (~j & 1)*img_width_bytes; + stbi_uc *dest = a->out + stride*j; + int nk = width * filter_bytes; int filter = *raw++; - if (filter > 4) - return stbi__err("invalid filter","Corrupt PNG"); - - if (depth < 8) { - if (img_width_bytes > x) return stbi__err("invalid width","Corrupt PNG"); - cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place - filter_bytes = 1; - width = img_width_bytes; + // check filter type + if (filter > 4) { + all_ok = stbi__err("invalid filter","Corrupt PNG"); + break; } - prior = cur - stride; // bugfix: need to compute this after 'cur +=' computation above // if first row, use special filter that doesn't sample previous row if (j == 0) filter = first_row_filter[filter]; - // handle first byte explicitly - for (k=0; k < filter_bytes; ++k) { - switch (filter) { - case STBI__F_none : cur[k] = raw[k]; break; - case STBI__F_sub : cur[k] = raw[k]; break; - case STBI__F_up : cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break; - case STBI__F_avg : cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); break; - case STBI__F_paeth : cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0,prior[k],0)); break; - case STBI__F_avg_first : cur[k] = raw[k]; break; - case STBI__F_paeth_first: cur[k] = raw[k]; break; - } - } - - if (depth == 8) { - if (img_n != out_n) - cur[img_n] = 255; // first pixel - raw += img_n; - cur += out_n; - prior += out_n; - } else if (depth == 16) { - if (img_n != out_n) { - cur[filter_bytes] = 255; // first pixel top byte - cur[filter_bytes+1] = 255; // first pixel bottom byte - } - raw += filter_bytes; - cur += output_bytes; - prior += output_bytes; - } else { - raw += 1; - cur += 1; - prior += 1; + // perform actual filtering + switch (filter) { + case STBI__F_none: + memcpy(cur, raw, nk); + break; + case STBI__F_sub: + memcpy(cur, raw, filter_bytes); + for (k = filter_bytes; k < nk; ++k) + cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); + break; + case STBI__F_up: + for (k = 0; k < nk; ++k) + cur[k] = STBI__BYTECAST(raw[k] + prior[k]); + break; + case STBI__F_avg: + for (k = 0; k < filter_bytes; ++k) + cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); + for (k = filter_bytes; k < nk; ++k) + cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); + break; + case STBI__F_paeth: + for (k = 0; k < filter_bytes; ++k) + cur[k] = STBI__BYTECAST(raw[k] + prior[k]); // prior[k] == stbi__paeth(0,prior[k],0) + for (k = filter_bytes; k < nk; ++k) + cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes], prior[k], prior[k-filter_bytes])); + break; + case STBI__F_avg_first: + memcpy(cur, raw, filter_bytes); + for (k = filter_bytes; k < nk; ++k) + cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); + break; } - // this is a little gross, so that we don't switch per-pixel or per-component - if (depth < 8 || img_n == out_n) { - int nk = (width - 1)*filter_bytes; - #define STBI__CASE(f) \ - case f: \ - for (k=0; k < nk; ++k) - switch (filter) { - // "none" filter turns into a memcpy here; make that explicit. - case STBI__F_none: memcpy(cur, raw, nk); break; - STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); } break; - STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break; - STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); } break; - STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); } break; - STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); } break; - STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); } break; - } - #undef STBI__CASE - raw += nk; - } else { - STBI_ASSERT(img_n+1 == out_n); - #define STBI__CASE(f) \ - case f: \ - for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \ - for (k=0; k < filter_bytes; ++k) - switch (filter) { - STBI__CASE(STBI__F_none) { cur[k] = raw[k]; } break; - STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); } break; - STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break; - STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); } break; - STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); } break; - STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); } break; - STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); } break; - } - #undef STBI__CASE - - // the loop above sets the high byte of the pixels' alpha, but for - // 16 bit png files we also need the low byte set. we'll do that here. - if (depth == 16) { - cur = a->out + stride*j; // start at the beginning of the row again - for (i=0; i < x; ++i,cur+=output_bytes) { - cur[filter_bytes+1] = 255; - } - } - } - } + raw += nk; - // we make a separate pass to expand bits to pixels; for performance, - // this could run two scanlines behind the above code, so it won't - // intefere with filtering but will still be in the cache. - if (depth < 8) { - for (j=0; j < y; ++j) { - stbi_uc *cur = a->out + stride*j; - stbi_uc *in = a->out + stride*j + x*out_n - img_width_bytes; - // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit - // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop + // expand decoded bits in cur to dest, also adding an extra alpha channel if desired + if (depth < 8) { stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range + stbi_uc *in = cur; + stbi_uc *out = dest; + stbi_uc inb = 0; + stbi__uint32 nsmp = x*img_n; - // note that the final byte might overshoot and write more data than desired. - // we can allocate enough data that this never writes out of memory, but it - // could also overwrite the next scanline. can it overwrite non-empty data - // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel. - // so we need to explicitly clamp the final ones - + // expand bits to bytes first if (depth == 4) { - for (k=x*img_n; k >= 2; k-=2, ++in) { - *cur++ = scale * ((*in >> 4) ); - *cur++ = scale * ((*in ) & 0x0f); + for (i=0; i < nsmp; ++i) { + if ((i & 1) == 0) inb = *in++; + *out++ = scale * (inb >> 4); + inb <<= 4; } - if (k > 0) *cur++ = scale * ((*in >> 4) ); } else if (depth == 2) { - for (k=x*img_n; k >= 4; k-=4, ++in) { - *cur++ = scale * ((*in >> 6) ); - *cur++ = scale * ((*in >> 4) & 0x03); - *cur++ = scale * ((*in >> 2) & 0x03); - *cur++ = scale * ((*in ) & 0x03); + for (i=0; i < nsmp; ++i) { + if ((i & 3) == 0) inb = *in++; + *out++ = scale * (inb >> 6); + inb <<= 2; } - if (k > 0) *cur++ = scale * ((*in >> 6) ); - if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03); - if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03); - } else if (depth == 1) { - for (k=x*img_n; k >= 8; k-=8, ++in) { - *cur++ = scale * ((*in >> 7) ); - *cur++ = scale * ((*in >> 6) & 0x01); - *cur++ = scale * ((*in >> 5) & 0x01); - *cur++ = scale * ((*in >> 4) & 0x01); - *cur++ = scale * ((*in >> 3) & 0x01); - *cur++ = scale * ((*in >> 2) & 0x01); - *cur++ = scale * ((*in >> 1) & 0x01); - *cur++ = scale * ((*in ) & 0x01); + } else { + STBI_ASSERT(depth == 1); + for (i=0; i < nsmp; ++i) { + if ((i & 7) == 0) inb = *in++; + *out++ = scale * (inb >> 7); + inb <<= 1; } - if (k > 0) *cur++ = scale * ((*in >> 7) ); - if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01); - if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01); - if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01); - if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01); - if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01); - if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01); } - if (img_n != out_n) { - int q; - // insert alpha = 255 - cur = a->out + stride*j; + + // insert alpha=255 values if desired + if (img_n != out_n) + stbi__create_png_alpha_expand8(dest, dest, x, img_n); + } else if (depth == 8) { + if (img_n == out_n) + memcpy(dest, cur, x*img_n); + else + stbi__create_png_alpha_expand8(dest, cur, x, img_n); + } else if (depth == 16) { + // convert the image data from big-endian to platform-native + stbi__uint16 *dest16 = (stbi__uint16*)dest; + stbi__uint32 nsmp = x*img_n; + + if (img_n == out_n) { + for (i = 0; i < nsmp; ++i, ++dest16, cur += 2) + *dest16 = (cur[0] << 8) | cur[1]; + } else { + STBI_ASSERT(img_n+1 == out_n); if (img_n == 1) { - for (q=x-1; q >= 0; --q) { - cur[q*2+1] = 255; - cur[q*2+0] = cur[q]; + for (i = 0; i < x; ++i, dest16 += 2, cur += 2) { + dest16[0] = (cur[0] << 8) | cur[1]; + dest16[1] = 0xffff; } } else { STBI_ASSERT(img_n == 3); - for (q=x-1; q >= 0; --q) { - cur[q*4+3] = 255; - cur[q*4+2] = cur[q*3+2]; - cur[q*4+1] = cur[q*3+1]; - cur[q*4+0] = cur[q*3+0]; + for (i = 0; i < x; ++i, dest16 += 4, cur += 6) { + dest16[0] = (cur[0] << 8) | cur[1]; + dest16[1] = (cur[2] << 8) | cur[3]; + dest16[2] = (cur[4] << 8) | cur[5]; + dest16[3] = 0xffff; } } } } - } else if (depth == 16) { - // force the image data from big-endian to platform-native. - // this is done in a separate pass due to the decoding relying - // on the data being untouched, but could probably be done - // per-line during decode if care is taken. - stbi_uc *cur = a->out; - stbi__uint16 *cur16 = (stbi__uint16*)cur; - - for(i=0; i < x*y*out_n; ++i,cur16++,cur+=2) { - *cur16 = (cur[0] << 8) | cur[1]; - } } + STBI_FREE(filter_buf); + if (!all_ok) return 0; + return 1; } @@ -5161,9 +5160,11 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp) // non-paletted with tRNS = constant alpha. if header-scanning, we can stop now. if (scan == STBI__SCAN_header) { ++s->img_n; return 1; } if (z->depth == 16) { - for (k = 0; k < s->img_n; ++k) tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is + for (k = 0; k < s->img_n && k < 3; ++k) // extra loop test to suppress false GCC warning + tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is } else { - for (k = 0; k < s->img_n; ++k) tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger + for (k = 0; k < s->img_n && k < 3; ++k) + tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger } } break; diff --git a/thirdparty/stb/stb_image_resize2.h b/thirdparty/stb/stb_image_resize2.h new file mode 100644 index 000000000..2a59c9598 --- /dev/null +++ b/thirdparty/stb/stb_image_resize2.h @@ -0,0 +1,10630 @@ +/* stb_image_resize2 - v2.15 - public domain image resizing + + by Jeff Roberts (v2) and Jorge L Rodriguez + http://github.com/nothings/stb + + Can be threaded with the extended API. SSE2, AVX, Neon and WASM SIMD support. Only + scaling and translation is supported, no rotations or shears. + + COMPILING & LINKING + In one C/C++ file that #includes this file, do this: + #define STB_IMAGE_RESIZE_IMPLEMENTATION + before the #include. That will create the implementation in that file. + + EASY API CALLS: + Easy API downsamples w/Mitchell filter, upsamples w/cubic interpolation, clamps to edge. + + stbir_resize_uint8_srgb( input_pixels, input_w, input_h, input_stride_in_bytes, + output_pixels, output_w, output_h, output_stride_in_bytes, + pixel_layout_enum ) + + stbir_resize_uint8_linear( input_pixels, input_w, input_h, input_stride_in_bytes, + output_pixels, output_w, output_h, output_stride_in_bytes, + pixel_layout_enum ) + + stbir_resize_float_linear( input_pixels, input_w, input_h, input_stride_in_bytes, + output_pixels, output_w, output_h, output_stride_in_bytes, + pixel_layout_enum ) + + If you pass NULL or zero for the output_pixels, we will allocate the output buffer + for you and return it from the function (free with free() or STBIR_FREE). + As a special case, XX_stride_in_bytes of 0 means packed continuously in memory. + + API LEVELS + There are three levels of API - easy-to-use, medium-complexity and extended-complexity. + + See the "header file" section of the source for API documentation. + + ADDITIONAL DOCUMENTATION + + MEMORY ALLOCATION + By default, we use malloc and free for memory allocation. To override the + memory allocation, before the implementation #include, add a: + + #define STBIR_MALLOC(size,user_data) ... + #define STBIR_FREE(ptr,user_data) ... + + Each resize makes exactly one call to malloc/free (unless you use the + extended API where you can do one allocation for many resizes). Under + address sanitizer, we do separate allocations to find overread/writes. + + PERFORMANCE + This library was written with an emphasis on performance. When testing + stb_image_resize with RGBA, the fastest mode is STBIR_4CHANNEL with + STBIR_TYPE_UINT8 pixels and CLAMPed edges (which is what many other resize + libs do by default). Also, make sure SIMD is turned on of course (default + for 64-bit targets). Avoid WRAP edge mode if you want the fastest speed. + + This library also comes with profiling built-in. If you define STBIR_PROFILE, + you can use the advanced API and get low-level profiling information by + calling stbir_resize_extended_profile_info() or stbir_resize_split_profile_info() + after a resize. + + SIMD + Most of the routines have optimized SSE2, AVX, NEON and WASM versions. + + On Microsoft compilers, we automatically turn on SIMD for 64-bit x64 and + ARM; for 32-bit x86 and ARM, you select SIMD mode by defining STBIR_SSE2 or + STBIR_NEON. For AVX and AVX2, we auto-select it by detecting the /arch:AVX + or /arch:AVX2 switches. You can also always manually turn SSE2, AVX or AVX2 + support on by defining STBIR_SSE2, STBIR_AVX or STBIR_AVX2. + + On Linux, SSE2 and Neon is on by default for 64-bit x64 or ARM64. For 32-bit, + we select x86 SIMD mode by whether you have -msse2, -mavx or -mavx2 enabled + on the command line. For 32-bit ARM, you must pass -mfpu=neon-vfpv4 for both + clang and GCC, but GCC also requires an additional -mfp16-format=ieee to + automatically enable NEON. + + On x86 platforms, you can also define STBIR_FP16C to turn on FP16C instructions + for converting back and forth to half-floats. This is autoselected when we + are using AVX2. Clang and GCC also require the -mf16c switch. ARM always uses + the built-in half float hardware NEON instructions. + + You can also tell us to use multiply-add instructions with STBIR_USE_FMA. + Because x86 doesn't always have fma, we turn it off by default to maintain + determinism across all platforms. If you don't care about non-FMA determinism + and are willing to restrict yourself to more recent x86 CPUs (around the AVX + timeframe), then fma will give you around a 15% speedup. + + You can force off SIMD in all cases by defining STBIR_NO_SIMD. You can turn + off AVX or AVX2 specifically with STBIR_NO_AVX or STBIR_NO_AVX2. AVX is 10% + to 40% faster, and AVX2 is generally another 12%. + + ALPHA CHANNEL + Most of the resizing functions provide the ability to control how the alpha + channel of an image is processed. + + When alpha represents transparency, it is important that when combining + colors with filtering, the pixels should not be treated equally; they + should use a weighted average based on their alpha values. For example, + if a pixel is 1% opaque bright green and another pixel is 99% opaque + black and you average them, the average will be 50% opaque, but the + unweighted average and will be a middling green color, while the weighted + average will be nearly black. This means the unweighted version introduced + green energy that didn't exist in the source image. + + (If you want to know why this makes sense, you can work out the math for + the following: consider what happens if you alpha composite a source image + over a fixed color and then average the output, vs. if you average the + source image pixels and then composite that over the same fixed color. + Only the weighted average produces the same result as the ground truth + composite-then-average result.) + + Therefore, it is in general best to "alpha weight" the pixels when applying + filters to them. This essentially means multiplying the colors by the alpha + values before combining them, and then dividing by the alpha value at the + end. + + The computer graphics industry introduced a technique called "premultiplied + alpha" or "associated alpha" in which image colors are stored in image files + already multiplied by their alpha. This saves some math when compositing, + and also avoids the need to divide by the alpha at the end (which is quite + inefficient). However, while premultiplied alpha is common in the movie CGI + industry, it is not commonplace in other industries like videogames, and most + consumer file formats are generally expected to contain not-premultiplied + colors. For example, Photoshop saves PNG files "unpremultiplied", and web + browsers like Chrome and Firefox expect PNG images to be unpremultiplied. + + Note that there are three possibilities that might describe your image + and resize expectation: + + 1. images are not premultiplied, alpha weighting is desired + 2. images are not premultiplied, alpha weighting is not desired + 3. images are premultiplied + + Both case #2 and case #3 require the exact same math: no alpha weighting + should be applied or removed. Only case 1 requires extra math operations; + the other two cases can be handled identically. + + stb_image_resize expects case #1 by default, applying alpha weighting to + images, expecting the input images to be unpremultiplied. This is what the + COLOR+ALPHA buffer types tell the resizer to do. + + When you use the pixel layouts STBIR_RGBA, STBIR_BGRA, STBIR_ARGB, + STBIR_ABGR, STBIR_RX, or STBIR_XR you are telling us that the pixels are + non-premultiplied. In these cases, the resizer will alpha weight the colors + (effectively creating the premultiplied image), do the filtering, and then + convert back to non-premult on exit. + + When you use the pixel layouts STBIR_RGBA_PM, STBIR_RGBA_PM, STBIR_RGBA_PM, + STBIR_RGBA_PM, STBIR_RX_PM or STBIR_XR_PM, you are telling that the pixels + ARE premultiplied. In this case, the resizer doesn't have to do the + premultipling - it can filter directly on the input. This about twice as + fast as the non-premultiplied case, so it's the right option if your data is + already setup correctly. + + When you use the pixel layout STBIR_4CHANNEL or STBIR_2CHANNEL, you are + telling us that there is no channel that represents transparency; it may be + RGB and some unrelated fourth channel that has been stored in the alpha + channel, but it is actually not alpha. No special processing will be + performed. + + The difference between the generic 4 or 2 channel layouts, and the + specialized _PM versions is with the _PM versions you are telling us that + the data *is* alpha, just don't premultiply it. That's important when + using SRGB pixel formats, we need to know where the alpha is, because + it is converted linearly (rather than with the SRGB converters). + + Because alpha weighting produces the same effect as premultiplying, you + even have the option with non-premultiplied inputs to let the resizer + produce a premultiplied output. Because the intially computed alpha-weighted + output image is effectively premultiplied, this is actually more performant + than the normal path which un-premultiplies the output image as a final step. + + Finally, when converting both in and out of non-premulitplied space (for + example, when using STBIR_RGBA), we go to somewhat heroic measures to + ensure that areas with zero alpha value pixels get something reasonable + in the RGB values. If you don't care about the RGB values of zero alpha + pixels, you can call the stbir_set_non_pm_alpha_speed_over_quality() + function - this runs a premultiplied resize about 25% faster. That said, + when you really care about speed, using premultiplied pixels for both in + and out (STBIR_RGBA_PM, etc) much faster than both of these premultiplied + options. + + PIXEL LAYOUT CONVERSION + The resizer can convert from some pixel layouts to others. When using the + stbir_set_pixel_layouts(), you can, for example, specify STBIR_RGBA + on input, and STBIR_ARGB on output, and it will re-organize the channels + during the resize. Currently, you can only convert between two pixel + layouts with the same number of channels. + + DETERMINISM + We commit to being deterministic (from x64 to ARM to scalar to SIMD, etc). + This requires compiling with fast-math off (using at least /fp:precise). + Also, you must turn off fp-contracting (which turns mult+adds into fmas)! + We attempt to do this with pragmas, but with Clang, you usually want to add + -ffp-contract=off to the command line as well. + + For 32-bit x86, you must use SSE and SSE2 codegen for determinism. That is, + if the scalar x87 unit gets used at all, we immediately lose determinism. + On Microsoft Visual Studio 2008 and earlier, from what we can tell there is + no way to be deterministic in 32-bit x86 (some x87 always leaks in, even + with fp:strict). On 32-bit x86 GCC, determinism requires both -msse2 and + -fpmath=sse. + + Note that we will not be deterministic with float data containing NaNs - + the NaNs will propagate differently on different SIMD and platforms. + + If you turn on STBIR_USE_FMA, then we will be deterministic with other + fma targets, but we will differ from non-fma targets (this is unavoidable, + because a fma isn't simply an add with a mult - it also introduces a + rounding difference compared to non-fma instruction sequences. + + FLOAT PIXEL FORMAT RANGE + Any range of values can be used for the non-alpha float data that you pass + in (0 to 1, -1 to 1, whatever). However, if you are inputting float values + but *outputting* bytes or shorts, you must use a range of 0 to 1 so that we + scale back properly. The alpha channel must also be 0 to 1 for any format + that does premultiplication prior to resizing. + + Note also that with float output, using filters with negative lobes, the + output filtered values might go slightly out of range. You can define + STBIR_FLOAT_LOW_CLAMP and/or STBIR_FLOAT_HIGH_CLAMP to specify the range + to clamp to on output, if that's important. + + MAX/MIN SCALE FACTORS + The input pixel resolutions are in integers, and we do the internal pointer + resolution in size_t sized integers. However, the scale ratio from input + resolution to output resolution is calculated in float form. This means + the effective possible scale ratio is limited to 24 bits (or 16 million + to 1). As you get close to the size of the float resolution (again, 16 + million pixels wide or high), you might start seeing float inaccuracy + issues in general in the pipeline. If you have to do extreme resizes, + you can usually do this is multiple stages (using float intermediate + buffers). + + FLIPPED IMAGES + Stride is just the delta from one scanline to the next. This means you can + use a negative stride to handle inverted images (point to the final + scanline and use a negative stride). You can invert the input or output, + using negative strides. + + DEFAULT FILTERS + For functions which don't provide explicit control over what filters to + use, you can change the compile-time defaults with: + + #define STBIR_DEFAULT_FILTER_UPSAMPLE STBIR_FILTER_something + #define STBIR_DEFAULT_FILTER_DOWNSAMPLE STBIR_FILTER_something + + See stbir_filter in the header-file section for the list of filters. + + NEW FILTERS + A number of 1D filter kernels are supplied. For a list of supported + filters, see the stbir_filter enum. You can install your own filters by + using the stbir_set_filter_callbacks function. + + PROGRESS + For interactive use with slow resize operations, you can use the + scanline callbacks in the extended API. It would have to be a *very* large + image resample to need progress though - we're very fast. + + CEIL and FLOOR + In scalar mode, the only functions we use from math.h are ceilf and floorf, + but if you have your own versions, you can define the STBIR_CEILF(v) and + STBIR_FLOORF(v) macros and we'll use them instead. In SIMD, we just use + our own versions. + + ASSERT + Define STBIR_ASSERT(boolval) to override assert() and not use assert.h + + PORTING FROM VERSION 1 + The API has changed. You can continue to use the old version of stb_image_resize.h, + which is available in the "deprecated/" directory. + + If you're using the old simple-to-use API, porting is straightforward. + (For more advanced APIs, read the documentation.) + + stbir_resize_uint8(): + - call `stbir_resize_uint8_linear`, cast channel count to `stbir_pixel_layout` + + stbir_resize_float(): + - call `stbir_resize_float_linear`, cast channel count to `stbir_pixel_layout` + + stbir_resize_uint8_srgb(): + - function name is unchanged + - cast channel count to `stbir_pixel_layout` + - above is sufficient unless your image has alpha and it's not RGBA/BGRA + - in that case, follow the below instructions for stbir_resize_uint8_srgb_edgemode + + stbir_resize_uint8_srgb_edgemode() + - switch to the "medium complexity" API + - stbir_resize(), very similar API but a few more parameters: + - pixel_layout: cast channel count to `stbir_pixel_layout` + - data_type: STBIR_TYPE_UINT8_SRGB + - edge: unchanged (STBIR_EDGE_WRAP, etc.) + - filter: STBIR_FILTER_DEFAULT + - which channel is alpha is specified in stbir_pixel_layout, see enum for details + + FUTURE TODOS + * For polyphase integral filters, we just memcpy the coeffs to dupe + them, but we should indirect and use the same coeff memory. + * Add pixel layout conversions for sensible different channel counts + (maybe, 1->3/4, 3->4, 4->1, 3->1). + * For SIMD encode and decode scanline routines, do any pre-aligning + for bad input/output buffer alignments and pitch? + * For very wide scanlines, we should we do vertical strips to stay within + L2 cache. Maybe do chunks of 1K pixels at a time. There would be + some pixel reconversion, but probably dwarfed by things falling out + of cache. Probably also something possible with alternating between + scattering and gathering at high resize scales? + * Should we have a multiple MIPs at the same time function (could keep + more memory in cache during multiple resizes)? + * Rewrite the coefficient generator to do many at once. + * AVX-512 vertical kernels - worried about downclocking here. + * Convert the reincludes to macros when we know they aren't changing. + * Experiment with pivoting the horizontal and always using the + vertical filters (which are faster, but perhaps not enough to overcome + the pivot cost and the extra memory touches). Need to buffer the whole + image so have to balance memory use. + * Most of our code is internally function pointers, should we compile + all the SIMD stuff always and dynamically dispatch? + + CONTRIBUTORS + Jeff Roberts: 2.0 implementation, optimizations, SIMD + Martins Mozeiko: NEON simd, WASM simd, clang and GCC whisperer + Fabian Giesen: half float and srgb converters + Sean Barrett: API design, optimizations + Jorge L Rodriguez: Original 1.0 implementation + Aras Pranckevicius: bugfixes + Nathan Reed: warning fixes for 1.0 + + REVISIONS + 2.15 (2025-07-17) fixed an assert in debug mode when using floats with input + callbacks, work around GCC warning when adding to null ptr + (thanks Johannes Spohr and Pyry Kovanen). + 2.14 (2025-05-09) fixed a bug using downsampling gather horizontal first, and + scatter with vertical first. + 2.13 (2025-02-27) fixed a bug when using input callbacks, turned off simd for + tiny-c, fixed some variables that should have been static, + fixes a bug when calculating temp memory with resizes that + exceed 2GB of temp memory (very large resizes). + 2.12 (2024-10-18) fix incorrect use of user_data with STBIR_FREE + 2.11 (2024-09-08) fix harmless asan warnings in 2-channel and 3-channel mode + with AVX-2, fix some weird scaling edge conditions with + point sample mode. + 2.10 (2024-07-27) fix the defines GCC and mingw for loop unroll control, + fix MSVC 32-bit arm half float routines. + 2.09 (2024-06-19) fix the defines for 32-bit ARM GCC builds (was selecting + hardware half floats). + 2.08 (2024-06-10) fix for RGB->BGR three channel flips and add SIMD (thanks + to Ryan Salsbury), fix for sub-rect resizes, use the + pragmas to control unrolling when they are available. + 2.07 (2024-05-24) fix for slow final split during threaded conversions of very + wide scanlines when downsampling (caused by extra input + converting), fix for wide scanline resamples with many + splits (int overflow), fix GCC warning. + 2.06 (2024-02-10) fix for identical width/height 3x or more down-scaling + undersampling a single row on rare resize ratios (about 1%). + 2.05 (2024-02-07) fix for 2 pixel to 1 pixel resizes with wrap (thanks Aras), + fix for output callback (thanks Julien Koenen). + 2.04 (2023-11-17) fix for rare AVX bug, shadowed symbol (thanks Nikola Smiljanic). + 2.03 (2023-11-01) ASAN and TSAN warnings fixed, minor tweaks. + 2.00 (2023-10-10) mostly new source: new api, optimizations, simd, vertical-first, etc + 2x-5x faster without simd, 4x-12x faster with simd, + in some cases, 20x to 40x faster esp resizing large to very small. + 0.96 (2019-03-04) fixed warnings + 0.95 (2017-07-23) fixed warnings + 0.94 (2017-03-18) fixed warnings + 0.93 (2017-03-03) fixed bug with certain combinations of heights + 0.92 (2017-01-02) fix integer overflow on large (>2GB) images + 0.91 (2016-04-02) fix warnings; fix handling of subpixel regions + 0.90 (2014-09-17) first released version + + LICENSE + See end of file for license information. +*/ + +#if !defined(STB_IMAGE_RESIZE_DO_HORIZONTALS) && !defined(STB_IMAGE_RESIZE_DO_VERTICALS) && !defined(STB_IMAGE_RESIZE_DO_CODERS) // for internal re-includes + +#ifndef STBIR_INCLUDE_STB_IMAGE_RESIZE2_H +#define STBIR_INCLUDE_STB_IMAGE_RESIZE2_H + +#include +#ifdef _MSC_VER +typedef unsigned char stbir_uint8; +typedef unsigned short stbir_uint16; +typedef unsigned int stbir_uint32; +typedef unsigned __int64 stbir_uint64; +#else +#include +typedef uint8_t stbir_uint8; +typedef uint16_t stbir_uint16; +typedef uint32_t stbir_uint32; +typedef uint64_t stbir_uint64; +#endif + +#ifndef STBIRDEF +#ifdef STB_IMAGE_RESIZE_STATIC +#define STBIRDEF static +#else +#ifdef __cplusplus +#define STBIRDEF extern "C" +#else +#define STBIRDEF extern +#endif +#endif +#endif + +////////////////////////////////////////////////////////////////////////////// +//// start "header file" /////////////////////////////////////////////////// +// +// Easy-to-use API: +// +// * stride is the offset between successive rows of image data +// in memory, in bytes. specify 0 for packed continuously in memory +// * colorspace is linear or sRGB as specified by function name +// * Uses the default filters +// * Uses edge mode clamped +// * returned result is 1 for success or 0 in case of an error. + + +// stbir_pixel_layout specifies: +// number of channels +// order of channels +// whether color is premultiplied by alpha +// for back compatibility, you can cast the old channel count to an stbir_pixel_layout +typedef enum +{ + STBIR_1CHANNEL = 1, + STBIR_2CHANNEL = 2, + STBIR_RGB = 3, // 3-chan, with order specified (for channel flipping) + STBIR_BGR = 0, // 3-chan, with order specified (for channel flipping) + STBIR_4CHANNEL = 5, + + STBIR_RGBA = 4, // alpha formats, where alpha is NOT premultiplied into color channels + STBIR_BGRA = 6, + STBIR_ARGB = 7, + STBIR_ABGR = 8, + STBIR_RA = 9, + STBIR_AR = 10, + + STBIR_RGBA_PM = 11, // alpha formats, where alpha is premultiplied into color channels + STBIR_BGRA_PM = 12, + STBIR_ARGB_PM = 13, + STBIR_ABGR_PM = 14, + STBIR_RA_PM = 15, + STBIR_AR_PM = 16, + + STBIR_RGBA_NO_AW = 11, // alpha formats, where NO alpha weighting is applied at all! + STBIR_BGRA_NO_AW = 12, // these are just synonyms for the _PM flags (which also do + STBIR_ARGB_NO_AW = 13, // no alpha weighting). These names just make it more clear + STBIR_ABGR_NO_AW = 14, // for some folks). + STBIR_RA_NO_AW = 15, + STBIR_AR_NO_AW = 16, + +} stbir_pixel_layout; + +//=============================================================== +// Simple-complexity API +// +// If output_pixels is NULL (0), then we will allocate the buffer and return it to you. +//-------------------------------- + +STBIRDEF unsigned char * stbir_resize_uint8_srgb( const unsigned char *input_pixels , int input_w , int input_h, int input_stride_in_bytes, + unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes, + stbir_pixel_layout pixel_type ); + +STBIRDEF unsigned char * stbir_resize_uint8_linear( const unsigned char *input_pixels , int input_w , int input_h, int input_stride_in_bytes, + unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes, + stbir_pixel_layout pixel_type ); + +STBIRDEF float * stbir_resize_float_linear( const float *input_pixels , int input_w , int input_h, int input_stride_in_bytes, + float *output_pixels, int output_w, int output_h, int output_stride_in_bytes, + stbir_pixel_layout pixel_type ); +//=============================================================== + +//=============================================================== +// Medium-complexity API +// +// This extends the easy-to-use API as follows: +// +// * Can specify the datatype - U8, U8_SRGB, U16, FLOAT, HALF_FLOAT +// * Edge wrap can selected explicitly +// * Filter can be selected explicitly +//-------------------------------- + +typedef enum +{ + STBIR_EDGE_CLAMP = 0, + STBIR_EDGE_REFLECT = 1, + STBIR_EDGE_WRAP = 2, // this edge mode is slower and uses more memory + STBIR_EDGE_ZERO = 3, +} stbir_edge; + +typedef enum +{ + STBIR_FILTER_DEFAULT = 0, // use same filter type that easy-to-use API chooses + STBIR_FILTER_BOX = 1, // A trapezoid w/1-pixel wide ramps, same result as box for integer scale ratios + STBIR_FILTER_TRIANGLE = 2, // On upsampling, produces same results as bilinear texture filtering + STBIR_FILTER_CUBICBSPLINE = 3, // The cubic b-spline (aka Mitchell-Netrevalli with B=1,C=0), gaussian-esque + STBIR_FILTER_CATMULLROM = 4, // An interpolating cubic spline + STBIR_FILTER_MITCHELL = 5, // Mitchell-Netrevalli filter with B=1/3, C=1/3 + STBIR_FILTER_POINT_SAMPLE = 6, // Simple point sampling + STBIR_FILTER_OTHER = 7, // User callback specified +} stbir_filter; + +typedef enum +{ + STBIR_TYPE_UINT8 = 0, + STBIR_TYPE_UINT8_SRGB = 1, + STBIR_TYPE_UINT8_SRGB_ALPHA = 2, // alpha channel, when present, should also be SRGB (this is very unusual) + STBIR_TYPE_UINT16 = 3, + STBIR_TYPE_FLOAT = 4, + STBIR_TYPE_HALF_FLOAT = 5 +} stbir_datatype; + +// medium api +STBIRDEF void * stbir_resize( const void *input_pixels , int input_w , int input_h, int input_stride_in_bytes, + void *output_pixels, int output_w, int output_h, int output_stride_in_bytes, + stbir_pixel_layout pixel_layout, stbir_datatype data_type, + stbir_edge edge, stbir_filter filter ); +//=============================================================== + + + +//=============================================================== +// Extended-complexity API +// +// This API exposes all resize functionality. +// +// * Separate filter types for each axis +// * Separate edge modes for each axis +// * Separate input and output data types +// * Can specify regions with subpixel correctness +// * Can specify alpha flags +// * Can specify a memory callback +// * Can specify a callback data type for pixel input and output +// * Can be threaded for a single resize +// * Can be used to resize many frames without recalculating the sampler info +// +// Use this API as follows: +// 1) Call the stbir_resize_init function on a local STBIR_RESIZE structure +// 2) Call any of the stbir_set functions +// 3) Optionally call stbir_build_samplers() if you are going to resample multiple times +// with the same input and output dimensions (like resizing video frames) +// 4) Resample by calling stbir_resize_extended(). +// 5) Call stbir_free_samplers() if you called stbir_build_samplers() +//-------------------------------- + + +// Types: + +// INPUT CALLBACK: this callback is used for input scanlines +typedef void const * stbir_input_callback( void * optional_output, void const * input_ptr, int num_pixels, int x, int y, void * context ); + +// OUTPUT CALLBACK: this callback is used for output scanlines +typedef void stbir_output_callback( void const * output_ptr, int num_pixels, int y, void * context ); + +// callbacks for user installed filters +typedef float stbir__kernel_callback( float x, float scale, void * user_data ); // centered at zero +typedef float stbir__support_callback( float scale, void * user_data ); + +// internal structure with precomputed scaling +typedef struct stbir__info stbir__info; + +typedef struct STBIR_RESIZE // use the stbir_resize_init and stbir_override functions to set these values for future compatibility +{ + void * user_data; + void const * input_pixels; + int input_w, input_h; + double input_s0, input_t0, input_s1, input_t1; + stbir_input_callback * input_cb; + void * output_pixels; + int output_w, output_h; + int output_subx, output_suby, output_subw, output_subh; + stbir_output_callback * output_cb; + int input_stride_in_bytes; + int output_stride_in_bytes; + int splits; + int fast_alpha; + int needs_rebuild; + int called_alloc; + stbir_pixel_layout input_pixel_layout_public; + stbir_pixel_layout output_pixel_layout_public; + stbir_datatype input_data_type; + stbir_datatype output_data_type; + stbir_filter horizontal_filter, vertical_filter; + stbir_edge horizontal_edge, vertical_edge; + stbir__kernel_callback * horizontal_filter_kernel; stbir__support_callback * horizontal_filter_support; + stbir__kernel_callback * vertical_filter_kernel; stbir__support_callback * vertical_filter_support; + stbir__info * samplers; +} STBIR_RESIZE; + +// extended complexity api + + +// First off, you must ALWAYS call stbir_resize_init on your resize structure before any of the other calls! +STBIRDEF void stbir_resize_init( STBIR_RESIZE * resize, + const void *input_pixels, int input_w, int input_h, int input_stride_in_bytes, // stride can be zero + void *output_pixels, int output_w, int output_h, int output_stride_in_bytes, // stride can be zero + stbir_pixel_layout pixel_layout, stbir_datatype data_type ); + +//=============================================================== +// You can update these parameters any time after resize_init and there is no cost +//-------------------------------- + +STBIRDEF void stbir_set_datatypes( STBIR_RESIZE * resize, stbir_datatype input_type, stbir_datatype output_type ); +STBIRDEF void stbir_set_pixel_callbacks( STBIR_RESIZE * resize, stbir_input_callback * input_cb, stbir_output_callback * output_cb ); // no callbacks by default +STBIRDEF void stbir_set_user_data( STBIR_RESIZE * resize, void * user_data ); // pass back STBIR_RESIZE* by default +STBIRDEF void stbir_set_buffer_ptrs( STBIR_RESIZE * resize, const void * input_pixels, int input_stride_in_bytes, void * output_pixels, int output_stride_in_bytes ); + +//=============================================================== + + +//=============================================================== +// If you call any of these functions, you will trigger a sampler rebuild! +//-------------------------------- + +STBIRDEF int stbir_set_pixel_layouts( STBIR_RESIZE * resize, stbir_pixel_layout input_pixel_layout, stbir_pixel_layout output_pixel_layout ); // sets new buffer layouts +STBIRDEF int stbir_set_edgemodes( STBIR_RESIZE * resize, stbir_edge horizontal_edge, stbir_edge vertical_edge ); // CLAMP by default + +STBIRDEF int stbir_set_filters( STBIR_RESIZE * resize, stbir_filter horizontal_filter, stbir_filter vertical_filter ); // STBIR_DEFAULT_FILTER_UPSAMPLE/DOWNSAMPLE by default +STBIRDEF int stbir_set_filter_callbacks( STBIR_RESIZE * resize, stbir__kernel_callback * horizontal_filter, stbir__support_callback * horizontal_support, stbir__kernel_callback * vertical_filter, stbir__support_callback * vertical_support ); + +STBIRDEF int stbir_set_pixel_subrect( STBIR_RESIZE * resize, int subx, int suby, int subw, int subh ); // sets both sub-regions (full regions by default) +STBIRDEF int stbir_set_input_subrect( STBIR_RESIZE * resize, double s0, double t0, double s1, double t1 ); // sets input sub-region (full region by default) +STBIRDEF int stbir_set_output_pixel_subrect( STBIR_RESIZE * resize, int subx, int suby, int subw, int subh ); // sets output sub-region (full region by default) + +// when inputting AND outputting non-premultiplied alpha pixels, we use a slower but higher quality technique +// that fills the zero alpha pixel's RGB values with something plausible. If you don't care about areas of +// zero alpha, you can call this function to get about a 25% speed improvement for STBIR_RGBA to STBIR_RGBA +// types of resizes. +STBIRDEF int stbir_set_non_pm_alpha_speed_over_quality( STBIR_RESIZE * resize, int non_pma_alpha_speed_over_quality ); +//=============================================================== + + +//=============================================================== +// You can call build_samplers to prebuild all the internal data we need to resample. +// Then, if you call resize_extended many times with the same resize, you only pay the +// cost once. +// If you do call build_samplers, you MUST call free_samplers eventually. +//-------------------------------- + +// This builds the samplers and does one allocation +STBIRDEF int stbir_build_samplers( STBIR_RESIZE * resize ); + +// You MUST call this, if you call stbir_build_samplers or stbir_build_samplers_with_splits +STBIRDEF void stbir_free_samplers( STBIR_RESIZE * resize ); +//=============================================================== + + +// And this is the main function to perform the resize synchronously on one thread. +STBIRDEF int stbir_resize_extended( STBIR_RESIZE * resize ); + + +//=============================================================== +// Use these functions for multithreading. +// 1) You call stbir_build_samplers_with_splits first on the main thread +// 2) Then stbir_resize_with_split on each thread +// 3) stbir_free_samplers when done on the main thread +//-------------------------------- + +// This will build samplers for threading. +// You can pass in the number of threads you'd like to use (try_splits). +// It returns the number of splits (threads) that you can call it with. +/// It might be less if the image resize can't be split up that many ways. + +STBIRDEF int stbir_build_samplers_with_splits( STBIR_RESIZE * resize, int try_splits ); + +// This function does a split of the resizing (you call this fuction for each +// split, on multiple threads). A split is a piece of the output resize pixel space. + +// Note that you MUST call stbir_build_samplers_with_splits before stbir_resize_extended_split! + +// Usually, you will always call stbir_resize_split with split_start as the thread_index +// and "1" for the split_count. +// But, if you have a weird situation where you MIGHT want 8 threads, but sometimes +// only 4 threads, you can use 0,2,4,6 for the split_start's and use "2" for the +// split_count each time to turn in into a 4 thread resize. (This is unusual). + +STBIRDEF int stbir_resize_extended_split( STBIR_RESIZE * resize, int split_start, int split_count ); +//=============================================================== + + +//=============================================================== +// Pixel Callbacks info: +//-------------------------------- + +// The input callback is super flexible - it calls you with the input address +// (based on the stride and base pointer), it gives you an optional_output +// pointer that you can fill, or you can just return your own pointer into +// your own data. +// +// You can also do conversion from non-supported data types if necessary - in +// this case, you ignore the input_ptr and just use the x and y parameters to +// calculate your own input_ptr based on the size of each non-supported pixel. +// (Something like the third example below.) +// +// You can also install just an input or just an output callback by setting the +// callback that you don't want to zero. +// +// First example, progress: (getting a callback that you can monitor the progress): +// void const * my_callback( void * optional_output, void const * input_ptr, int num_pixels, int x, int y, void * context ) +// { +// percentage_done = y / input_height; +// return input_ptr; // use buffer from call +// } +// +// Next example, copying: (copy from some other buffer or stream): +// void const * my_callback( void * optional_output, void const * input_ptr, int num_pixels, int x, int y, void * context ) +// { +// CopyOrStreamData( optional_output, other_data_src, num_pixels * pixel_width_in_bytes ); +// return optional_output; // return the optional buffer that we filled +// } +// +// Third example, input another buffer without copying: (zero-copy from other buffer): +// void const * my_callback( void * optional_output, void const * input_ptr, int num_pixels, int x, int y, void * context ) +// { +// void * pixels = ( (char*) other_image_base ) + ( y * other_image_stride ) + ( x * other_pixel_width_in_bytes ); +// return pixels; // return pointer to your data without copying +// } +// +// +// The output callback is considerably simpler - it just calls you so that you can dump +// out each scanline. You could even directly copy out to disk if you have a simple format +// like TGA or BMP. You can also convert to other output types here if you want. +// +// Simple example: +// void const * my_output( void * output_ptr, int num_pixels, int y, void * context ) +// { +// percentage_done = y / output_height; +// fwrite( output_ptr, pixel_width_in_bytes, num_pixels, output_file ); +// } +//=============================================================== + + + + +//=============================================================== +// optional built-in profiling API +//-------------------------------- + +#ifdef STBIR_PROFILE + +typedef struct STBIR_PROFILE_INFO +{ + stbir_uint64 total_clocks; + + // how many clocks spent (of total_clocks) in the various resize routines, along with a string description + // there are "resize_count" number of zones + stbir_uint64 clocks[ 8 ]; + char const ** descriptions; + + // count of clocks and descriptions + stbir_uint32 count; +} STBIR_PROFILE_INFO; + +// use after calling stbir_resize_extended (or stbir_build_samplers or stbir_build_samplers_with_splits) +STBIRDEF void stbir_resize_build_profile_info( STBIR_PROFILE_INFO * out_info, STBIR_RESIZE const * resize ); + +// use after calling stbir_resize_extended +STBIRDEF void stbir_resize_extended_profile_info( STBIR_PROFILE_INFO * out_info, STBIR_RESIZE const * resize ); + +// use after calling stbir_resize_extended_split +STBIRDEF void stbir_resize_split_profile_info( STBIR_PROFILE_INFO * out_info, STBIR_RESIZE const * resize, int split_start, int split_num ); + +//=============================================================== + +#endif + + +//// end header file ///////////////////////////////////////////////////// +#endif // STBIR_INCLUDE_STB_IMAGE_RESIZE2_H + +#if defined(STB_IMAGE_RESIZE_IMPLEMENTATION) || defined(STB_IMAGE_RESIZE2_IMPLEMENTATION) + +#ifndef STBIR_ASSERT +#include +#define STBIR_ASSERT(x) assert(x) +#endif + +#ifndef STBIR_MALLOC +#include +#define STBIR_MALLOC(size,user_data) ((void)(user_data), malloc(size)) +#define STBIR_FREE(ptr,user_data) ((void)(user_data), free(ptr)) +// (we used the comma operator to evaluate user_data, to avoid "unused parameter" warnings) +#endif + +#ifdef _MSC_VER + +#define stbir__inline __forceinline + +#else + +#define stbir__inline __inline__ + +// Clang address sanitizer +#if defined(__has_feature) + #if __has_feature(address_sanitizer) || __has_feature(memory_sanitizer) + #ifndef STBIR__SEPARATE_ALLOCATIONS + #define STBIR__SEPARATE_ALLOCATIONS + #endif + #endif +#endif + +#endif + +// GCC and MSVC +#if defined(__SANITIZE_ADDRESS__) + #ifndef STBIR__SEPARATE_ALLOCATIONS + #define STBIR__SEPARATE_ALLOCATIONS + #endif +#endif + +// Always turn off automatic FMA use - use STBIR_USE_FMA if you want. +// Otherwise, this is a determinism disaster. +#ifndef STBIR_DONT_CHANGE_FP_CONTRACT // override in case you don't want this behavior +#if defined(_MSC_VER) && !defined(__clang__) +#if _MSC_VER > 1200 +#pragma fp_contract(off) +#endif +#elif defined(__GNUC__) && !defined(__clang__) +#pragma GCC optimize("fp-contract=off") +#else +#pragma STDC FP_CONTRACT OFF +#endif +#endif + +#ifdef _MSC_VER +#define STBIR__UNUSED(v) (void)(v) +#else +#define STBIR__UNUSED(v) (void)sizeof(v) +#endif + +#define STBIR__ARRAY_SIZE(a) (sizeof((a))/sizeof((a)[0])) + + +#ifndef STBIR_DEFAULT_FILTER_UPSAMPLE +#define STBIR_DEFAULT_FILTER_UPSAMPLE STBIR_FILTER_CATMULLROM +#endif + +#ifndef STBIR_DEFAULT_FILTER_DOWNSAMPLE +#define STBIR_DEFAULT_FILTER_DOWNSAMPLE STBIR_FILTER_MITCHELL +#endif + + +#ifndef STBIR__HEADER_FILENAME +#define STBIR__HEADER_FILENAME "stb_image_resize2.h" +#endif + +// the internal pixel layout enums are in a different order, so we can easily do range comparisons of types +// the public pixel layout is ordered in a way that if you cast num_channels (1-4) to the enum, you get something sensible +typedef enum +{ + STBIRI_1CHANNEL = 0, + STBIRI_2CHANNEL = 1, + STBIRI_RGB = 2, + STBIRI_BGR = 3, + STBIRI_4CHANNEL = 4, + + STBIRI_RGBA = 5, + STBIRI_BGRA = 6, + STBIRI_ARGB = 7, + STBIRI_ABGR = 8, + STBIRI_RA = 9, + STBIRI_AR = 10, + + STBIRI_RGBA_PM = 11, + STBIRI_BGRA_PM = 12, + STBIRI_ARGB_PM = 13, + STBIRI_ABGR_PM = 14, + STBIRI_RA_PM = 15, + STBIRI_AR_PM = 16, +} stbir_internal_pixel_layout; + +// define the public pixel layouts to not compile inside the implementation (to avoid accidental use) +#define STBIR_BGR bad_dont_use_in_implementation +#define STBIR_1CHANNEL STBIR_BGR +#define STBIR_2CHANNEL STBIR_BGR +#define STBIR_RGB STBIR_BGR +#define STBIR_RGBA STBIR_BGR +#define STBIR_4CHANNEL STBIR_BGR +#define STBIR_BGRA STBIR_BGR +#define STBIR_ARGB STBIR_BGR +#define STBIR_ABGR STBIR_BGR +#define STBIR_RA STBIR_BGR +#define STBIR_AR STBIR_BGR +#define STBIR_RGBA_PM STBIR_BGR +#define STBIR_BGRA_PM STBIR_BGR +#define STBIR_ARGB_PM STBIR_BGR +#define STBIR_ABGR_PM STBIR_BGR +#define STBIR_RA_PM STBIR_BGR +#define STBIR_AR_PM STBIR_BGR + +// must match stbir_datatype +static unsigned char stbir__type_size[] = { + 1,1,1,2,4,2 // STBIR_TYPE_UINT8,STBIR_TYPE_UINT8_SRGB,STBIR_TYPE_UINT8_SRGB_ALPHA,STBIR_TYPE_UINT16,STBIR_TYPE_FLOAT,STBIR_TYPE_HALF_FLOAT +}; + +// When gathering, the contributors are which source pixels contribute. +// When scattering, the contributors are which destination pixels are contributed to. +typedef struct +{ + int n0; // First contributing pixel + int n1; // Last contributing pixel +} stbir__contributors; + +typedef struct +{ + int lowest; // First sample index for whole filter + int highest; // Last sample index for whole filter + int widest; // widest single set of samples for an output +} stbir__filter_extent_info; + +typedef struct +{ + int n0; // First pixel of decode buffer to write to + int n1; // Last pixel of decode that will be written to + int pixel_offset_for_input; // Pixel offset into input_scanline +} stbir__span; + +typedef struct stbir__scale_info +{ + int input_full_size; + int output_sub_size; + float scale; + float inv_scale; + float pixel_shift; // starting shift in output pixel space (in pixels) + int scale_is_rational; + stbir_uint32 scale_numerator, scale_denominator; +} stbir__scale_info; + +typedef struct +{ + stbir__contributors * contributors; + float* coefficients; + stbir__contributors * gather_prescatter_contributors; + float * gather_prescatter_coefficients; + stbir__scale_info scale_info; + float support; + stbir_filter filter_enum; + stbir__kernel_callback * filter_kernel; + stbir__support_callback * filter_support; + stbir_edge edge; + int coefficient_width; + int filter_pixel_width; + int filter_pixel_margin; + int num_contributors; + int contributors_size; + int coefficients_size; + stbir__filter_extent_info extent_info; + int is_gather; // 0 = scatter, 1 = gather with scale >= 1, 2 = gather with scale < 1 + int gather_prescatter_num_contributors; + int gather_prescatter_coefficient_width; + int gather_prescatter_contributors_size; + int gather_prescatter_coefficients_size; +} stbir__sampler; + +typedef struct +{ + stbir__contributors conservative; + int edge_sizes[2]; // this can be less than filter_pixel_margin, if the filter and scaling falls off + stbir__span spans[2]; // can be two spans, if doing input subrect with clamp mode WRAP +} stbir__extents; + +typedef struct +{ +#ifdef STBIR_PROFILE + union + { + struct { stbir_uint64 total, looping, vertical, horizontal, decode, encode, alpha, unalpha; } named; + stbir_uint64 array[8]; + } profile; + stbir_uint64 * current_zone_excluded_ptr; +#endif + float* decode_buffer; + + int ring_buffer_first_scanline; + int ring_buffer_last_scanline; + int ring_buffer_begin_index; // first_scanline is at this index in the ring buffer + int start_output_y, end_output_y; + int start_input_y, end_input_y; // used in scatter only + + #ifdef STBIR__SEPARATE_ALLOCATIONS + float** ring_buffers; // one pointer for each ring buffer + #else + float* ring_buffer; // one big buffer that we index into + #endif + + float* vertical_buffer; + + char no_cache_straddle[64]; +} stbir__per_split_info; + +typedef float * stbir__decode_pixels_func( float * decode, int width_times_channels, void const * input ); +typedef void stbir__alpha_weight_func( float * decode_buffer, int width_times_channels ); +typedef void stbir__horizontal_gather_channels_func( float * output_buffer, unsigned int output_sub_size, float const * decode_buffer, + stbir__contributors const * horizontal_contributors, float const * horizontal_coefficients, int coefficient_width ); +typedef void stbir__alpha_unweight_func(float * encode_buffer, int width_times_channels ); +typedef void stbir__encode_pixels_func( void * output, int width_times_channels, float const * encode ); + +struct stbir__info +{ +#ifdef STBIR_PROFILE + union + { + struct { stbir_uint64 total, build, alloc, horizontal, vertical, cleanup, pivot; } named; + stbir_uint64 array[7]; + } profile; + stbir_uint64 * current_zone_excluded_ptr; +#endif + stbir__sampler horizontal; + stbir__sampler vertical; + + void const * input_data; + void * output_data; + + int input_stride_bytes; + int output_stride_bytes; + int ring_buffer_length_bytes; // The length of an individual entry in the ring buffer. The total number of ring buffers is stbir__get_filter_pixel_width(filter) + int ring_buffer_num_entries; // Total number of entries in the ring buffer. + + stbir_datatype input_type; + stbir_datatype output_type; + + stbir_input_callback * in_pixels_cb; + void * user_data; + stbir_output_callback * out_pixels_cb; + + stbir__extents scanline_extents; + + void * alloced_mem; + stbir__per_split_info * split_info; // by default 1, but there will be N of these allocated based on the thread init you did + + stbir__decode_pixels_func * decode_pixels; + stbir__alpha_weight_func * alpha_weight; + stbir__horizontal_gather_channels_func * horizontal_gather_channels; + stbir__alpha_unweight_func * alpha_unweight; + stbir__encode_pixels_func * encode_pixels; + + int alloc_ring_buffer_num_entries; // Number of entries in the ring buffer that will be allocated + int splits; // count of splits + + stbir_internal_pixel_layout input_pixel_layout_internal; + stbir_internal_pixel_layout output_pixel_layout_internal; + + int input_color_and_type; + int offset_x, offset_y; // offset within output_data + int vertical_first; + int channels; + int effective_channels; // same as channels, except on RGBA/ARGB (7), or XA/AX (3) + size_t alloced_total; +}; + + +#define stbir__max_uint8_as_float 255.0f +#define stbir__max_uint16_as_float 65535.0f +#define stbir__max_uint8_as_float_inverted 3.9215689e-03f // (1.0f/255.0f) +#define stbir__max_uint16_as_float_inverted 1.5259022e-05f // (1.0f/65535.0f) +#define stbir__small_float ((float)1 / (1 << 20) / (1 << 20) / (1 << 20) / (1 << 20) / (1 << 20) / (1 << 20)) + +// min/max friendly +#define STBIR_CLAMP(x, xmin, xmax) for(;;) { \ + if ( (x) < (xmin) ) (x) = (xmin); \ + if ( (x) > (xmax) ) (x) = (xmax); \ + break; \ +} + +static stbir__inline int stbir__min(int a, int b) +{ + return a < b ? a : b; +} + +static stbir__inline int stbir__max(int a, int b) +{ + return a > b ? a : b; +} + +static float stbir__srgb_uchar_to_linear_float[256] = { + 0.000000f, 0.000304f, 0.000607f, 0.000911f, 0.001214f, 0.001518f, 0.001821f, 0.002125f, 0.002428f, 0.002732f, 0.003035f, + 0.003347f, 0.003677f, 0.004025f, 0.004391f, 0.004777f, 0.005182f, 0.005605f, 0.006049f, 0.006512f, 0.006995f, 0.007499f, + 0.008023f, 0.008568f, 0.009134f, 0.009721f, 0.010330f, 0.010960f, 0.011612f, 0.012286f, 0.012983f, 0.013702f, 0.014444f, + 0.015209f, 0.015996f, 0.016807f, 0.017642f, 0.018500f, 0.019382f, 0.020289f, 0.021219f, 0.022174f, 0.023153f, 0.024158f, + 0.025187f, 0.026241f, 0.027321f, 0.028426f, 0.029557f, 0.030713f, 0.031896f, 0.033105f, 0.034340f, 0.035601f, 0.036889f, + 0.038204f, 0.039546f, 0.040915f, 0.042311f, 0.043735f, 0.045186f, 0.046665f, 0.048172f, 0.049707f, 0.051269f, 0.052861f, + 0.054480f, 0.056128f, 0.057805f, 0.059511f, 0.061246f, 0.063010f, 0.064803f, 0.066626f, 0.068478f, 0.070360f, 0.072272f, + 0.074214f, 0.076185f, 0.078187f, 0.080220f, 0.082283f, 0.084376f, 0.086500f, 0.088656f, 0.090842f, 0.093059f, 0.095307f, + 0.097587f, 0.099899f, 0.102242f, 0.104616f, 0.107023f, 0.109462f, 0.111932f, 0.114435f, 0.116971f, 0.119538f, 0.122139f, + 0.124772f, 0.127438f, 0.130136f, 0.132868f, 0.135633f, 0.138432f, 0.141263f, 0.144128f, 0.147027f, 0.149960f, 0.152926f, + 0.155926f, 0.158961f, 0.162029f, 0.165132f, 0.168269f, 0.171441f, 0.174647f, 0.177888f, 0.181164f, 0.184475f, 0.187821f, + 0.191202f, 0.194618f, 0.198069f, 0.201556f, 0.205079f, 0.208637f, 0.212231f, 0.215861f, 0.219526f, 0.223228f, 0.226966f, + 0.230740f, 0.234551f, 0.238398f, 0.242281f, 0.246201f, 0.250158f, 0.254152f, 0.258183f, 0.262251f, 0.266356f, 0.270498f, + 0.274677f, 0.278894f, 0.283149f, 0.287441f, 0.291771f, 0.296138f, 0.300544f, 0.304987f, 0.309469f, 0.313989f, 0.318547f, + 0.323143f, 0.327778f, 0.332452f, 0.337164f, 0.341914f, 0.346704f, 0.351533f, 0.356400f, 0.361307f, 0.366253f, 0.371238f, + 0.376262f, 0.381326f, 0.386430f, 0.391573f, 0.396755f, 0.401978f, 0.407240f, 0.412543f, 0.417885f, 0.423268f, 0.428691f, + 0.434154f, 0.439657f, 0.445201f, 0.450786f, 0.456411f, 0.462077f, 0.467784f, 0.473532f, 0.479320f, 0.485150f, 0.491021f, + 0.496933f, 0.502887f, 0.508881f, 0.514918f, 0.520996f, 0.527115f, 0.533276f, 0.539480f, 0.545725f, 0.552011f, 0.558340f, + 0.564712f, 0.571125f, 0.577581f, 0.584078f, 0.590619f, 0.597202f, 0.603827f, 0.610496f, 0.617207f, 0.623960f, 0.630757f, + 0.637597f, 0.644480f, 0.651406f, 0.658375f, 0.665387f, 0.672443f, 0.679543f, 0.686685f, 0.693872f, 0.701102f, 0.708376f, + 0.715694f, 0.723055f, 0.730461f, 0.737911f, 0.745404f, 0.752942f, 0.760525f, 0.768151f, 0.775822f, 0.783538f, 0.791298f, + 0.799103f, 0.806952f, 0.814847f, 0.822786f, 0.830770f, 0.838799f, 0.846873f, 0.854993f, 0.863157f, 0.871367f, 0.879622f, + 0.887923f, 0.896269f, 0.904661f, 0.913099f, 0.921582f, 0.930111f, 0.938686f, 0.947307f, 0.955974f, 0.964686f, 0.973445f, + 0.982251f, 0.991102f, 1.0f +}; + +typedef union +{ + unsigned int u; + float f; +} stbir__FP32; + +// From https://gist.github.com/rygorous/2203834 + +static const stbir_uint32 fp32_to_srgb8_tab4[104] = { + 0x0073000d, 0x007a000d, 0x0080000d, 0x0087000d, 0x008d000d, 0x0094000d, 0x009a000d, 0x00a1000d, + 0x00a7001a, 0x00b4001a, 0x00c1001a, 0x00ce001a, 0x00da001a, 0x00e7001a, 0x00f4001a, 0x0101001a, + 0x010e0033, 0x01280033, 0x01410033, 0x015b0033, 0x01750033, 0x018f0033, 0x01a80033, 0x01c20033, + 0x01dc0067, 0x020f0067, 0x02430067, 0x02760067, 0x02aa0067, 0x02dd0067, 0x03110067, 0x03440067, + 0x037800ce, 0x03df00ce, 0x044600ce, 0x04ad00ce, 0x051400ce, 0x057b00c5, 0x05dd00bc, 0x063b00b5, + 0x06970158, 0x07420142, 0x07e30130, 0x087b0120, 0x090b0112, 0x09940106, 0x0a1700fc, 0x0a9500f2, + 0x0b0f01cb, 0x0bf401ae, 0x0ccb0195, 0x0d950180, 0x0e56016e, 0x0f0d015e, 0x0fbc0150, 0x10630143, + 0x11070264, 0x1238023e, 0x1357021d, 0x14660201, 0x156601e9, 0x165a01d3, 0x174401c0, 0x182401af, + 0x18fe0331, 0x1a9602fe, 0x1c1502d2, 0x1d7e02ad, 0x1ed4028d, 0x201a0270, 0x21520256, 0x227d0240, + 0x239f0443, 0x25c003fe, 0x27bf03c4, 0x29a10392, 0x2b6a0367, 0x2d1d0341, 0x2ebe031f, 0x304d0300, + 0x31d105b0, 0x34a80555, 0x37520507, 0x39d504c5, 0x3c37048b, 0x3e7c0458, 0x40a8042a, 0x42bd0401, + 0x44c20798, 0x488e071e, 0x4c1c06b6, 0x4f76065d, 0x52a50610, 0x55ac05cc, 0x5892058f, 0x5b590559, + 0x5e0c0a23, 0x631c0980, 0x67db08f6, 0x6c55087f, 0x70940818, 0x74a007bd, 0x787d076c, 0x7c330723, +}; + +static stbir__inline stbir_uint8 stbir__linear_to_srgb_uchar(float in) +{ + static const stbir__FP32 almostone = { 0x3f7fffff }; // 1-eps + static const stbir__FP32 minval = { (127-13) << 23 }; + stbir_uint32 tab,bias,scale,t; + stbir__FP32 f; + + // Clamp to [2^(-13), 1-eps]; these two values map to 0 and 1, respectively. + // The tests are carefully written so that NaNs map to 0, same as in the reference + // implementation. + if (!(in > minval.f)) // written this way to catch NaNs + return 0; + if (in > almostone.f) + return 255; + + // Do the table lookup and unpack bias, scale + f.f = in; + tab = fp32_to_srgb8_tab4[(f.u - minval.u) >> 20]; + bias = (tab >> 16) << 9; + scale = tab & 0xffff; + + // Grab next-highest mantissa bits and perform linear interpolation + t = (f.u >> 12) & 0xff; + return (unsigned char) ((bias + scale*t) >> 16); +} + +#ifndef STBIR_FORCE_GATHER_FILTER_SCANLINES_AMOUNT +#define STBIR_FORCE_GATHER_FILTER_SCANLINES_AMOUNT 32 // when downsampling and <= 32 scanlines of buffering, use gather. gather used down to 1/8th scaling for 25% win. +#endif + +#ifndef STBIR_FORCE_MINIMUM_SCANLINES_FOR_SPLITS +#define STBIR_FORCE_MINIMUM_SCANLINES_FOR_SPLITS 4 // when threading, what is the minimum number of scanlines for a split? +#endif + +#define STBIR_INPUT_CALLBACK_PADDING 3 + +#ifdef _M_IX86_FP +#if ( _M_IX86_FP >= 1 ) +#ifndef STBIR_SSE +#define STBIR_SSE +#endif +#endif +#endif + +#ifdef __TINYC__ + // tiny c has no intrinsics yet - this can become a version check if they add them + #define STBIR_NO_SIMD +#endif + +#if defined(_x86_64) || defined( __x86_64__ ) || defined( _M_X64 ) || defined(__x86_64) || defined(_M_AMD64) || defined(__SSE2__) || defined(STBIR_SSE) || defined(STBIR_SSE2) + #ifndef STBIR_SSE2 + #define STBIR_SSE2 + #endif + #if defined(__AVX__) || defined(STBIR_AVX2) + #ifndef STBIR_AVX + #ifndef STBIR_NO_AVX + #define STBIR_AVX + #endif + #endif + #endif + #if defined(__AVX2__) || defined(STBIR_AVX2) + #ifndef STBIR_NO_AVX2 + #ifndef STBIR_AVX2 + #define STBIR_AVX2 + #endif + #if defined( _MSC_VER ) && !defined(__clang__) + #ifndef STBIR_FP16C // FP16C instructions are on all AVX2 cpus, so we can autoselect it here on microsoft - clang needs -m16c + #define STBIR_FP16C + #endif + #endif + #endif + #endif + #ifdef __F16C__ + #ifndef STBIR_FP16C // turn on FP16C instructions if the define is set (for clang and gcc) + #define STBIR_FP16C + #endif + #endif +#endif + +#if defined( _M_ARM64 ) || defined( __aarch64__ ) || defined( __arm64__ ) || ((__ARM_NEON_FP & 4) != 0) || defined(__ARM_NEON__) +#ifndef STBIR_NEON +#define STBIR_NEON +#endif +#endif + +#if defined(_M_ARM) || defined(__arm__) +#ifdef STBIR_USE_FMA +#undef STBIR_USE_FMA // no FMA for 32-bit arm on MSVC +#endif +#endif + +#if defined(__wasm__) && defined(__wasm_simd128__) +#ifndef STBIR_WASM +#define STBIR_WASM +#endif +#endif + +// restrict pointers for the output pointers, other loop and unroll control +#if defined( _MSC_VER ) && !defined(__clang__) + #define STBIR_STREAMOUT_PTR( star ) star __restrict + #define STBIR_NO_UNROLL( ptr ) __assume(ptr) // this oddly keeps msvc from unrolling a loop + #if _MSC_VER >= 1900 + #define STBIR_NO_UNROLL_LOOP_START __pragma(loop( no_vector )) + #else + #define STBIR_NO_UNROLL_LOOP_START + #endif +#elif defined( __clang__ ) + #define STBIR_STREAMOUT_PTR( star ) star __restrict__ + #define STBIR_NO_UNROLL( ptr ) __asm__ (""::"r"(ptr)) + #if ( __clang_major__ >= 4 ) || ( ( __clang_major__ >= 3 ) && ( __clang_minor__ >= 5 ) ) + #define STBIR_NO_UNROLL_LOOP_START _Pragma("clang loop unroll(disable)") _Pragma("clang loop vectorize(disable)") + #else + #define STBIR_NO_UNROLL_LOOP_START + #endif +#elif defined( __GNUC__ ) + #define STBIR_STREAMOUT_PTR( star ) star __restrict__ + #define STBIR_NO_UNROLL( ptr ) __asm__ (""::"r"(ptr)) + #if __GNUC__ >= 14 + #define STBIR_NO_UNROLL_LOOP_START _Pragma("GCC unroll 0") _Pragma("GCC novector") + #else + #define STBIR_NO_UNROLL_LOOP_START + #endif + #define STBIR_NO_UNROLL_LOOP_START_INF_FOR +#else + #define STBIR_STREAMOUT_PTR( star ) star + #define STBIR_NO_UNROLL( ptr ) + #define STBIR_NO_UNROLL_LOOP_START +#endif + +#ifndef STBIR_NO_UNROLL_LOOP_START_INF_FOR +#define STBIR_NO_UNROLL_LOOP_START_INF_FOR STBIR_NO_UNROLL_LOOP_START +#endif + +#ifdef STBIR_NO_SIMD // force simd off for whatever reason + +// force simd off overrides everything else, so clear it all + +#ifdef STBIR_SSE2 +#undef STBIR_SSE2 +#endif + +#ifdef STBIR_AVX +#undef STBIR_AVX +#endif + +#ifdef STBIR_NEON +#undef STBIR_NEON +#endif + +#ifdef STBIR_AVX2 +#undef STBIR_AVX2 +#endif + +#ifdef STBIR_FP16C +#undef STBIR_FP16C +#endif + +#ifdef STBIR_WASM +#undef STBIR_WASM +#endif + +#ifdef STBIR_SIMD +#undef STBIR_SIMD +#endif + +#else // STBIR_SIMD + +#ifdef STBIR_SSE2 + #include + + #define stbir__simdf __m128 + #define stbir__simdi __m128i + + #define stbir_simdi_castf( reg ) _mm_castps_si128(reg) + #define stbir_simdf_casti( reg ) _mm_castsi128_ps(reg) + + #define stbir__simdf_load( reg, ptr ) (reg) = _mm_loadu_ps( (float const*)(ptr) ) + #define stbir__simdi_load( reg, ptr ) (reg) = _mm_loadu_si128 ( (stbir__simdi const*)(ptr) ) + #define stbir__simdf_load1( out, ptr ) (out) = _mm_load_ss( (float const*)(ptr) ) // top values can be random (not denormal or nan for perf) + #define stbir__simdi_load1( out, ptr ) (out) = _mm_castps_si128( _mm_load_ss( (float const*)(ptr) )) + #define stbir__simdf_load1z( out, ptr ) (out) = _mm_load_ss( (float const*)(ptr) ) // top values must be zero + #define stbir__simdf_frep4( fvar ) _mm_set_ps1( fvar ) + #define stbir__simdf_load1frep4( out, fvar ) (out) = _mm_set_ps1( fvar ) + #define stbir__simdf_load2( out, ptr ) (out) = _mm_castsi128_ps( _mm_loadl_epi64( (__m128i*)(ptr)) ) // top values can be random (not denormal or nan for perf) + #define stbir__simdf_load2z( out, ptr ) (out) = _mm_castsi128_ps( _mm_loadl_epi64( (__m128i*)(ptr)) ) // top values must be zero + #define stbir__simdf_load2hmerge( out, reg, ptr ) (out) = _mm_castpd_ps(_mm_loadh_pd( _mm_castps_pd(reg), (double*)(ptr) )) + + #define stbir__simdf_zeroP() _mm_setzero_ps() + #define stbir__simdf_zero( reg ) (reg) = _mm_setzero_ps() + + #define stbir__simdf_store( ptr, reg ) _mm_storeu_ps( (float*)(ptr), reg ) + #define stbir__simdf_store1( ptr, reg ) _mm_store_ss( (float*)(ptr), reg ) + #define stbir__simdf_store2( ptr, reg ) _mm_storel_epi64( (__m128i*)(ptr), _mm_castps_si128(reg) ) + #define stbir__simdf_store2h( ptr, reg ) _mm_storeh_pd( (double*)(ptr), _mm_castps_pd(reg) ) + + #define stbir__simdi_store( ptr, reg ) _mm_storeu_si128( (__m128i*)(ptr), reg ) + #define stbir__simdi_store1( ptr, reg ) _mm_store_ss( (float*)(ptr), _mm_castsi128_ps(reg) ) + #define stbir__simdi_store2( ptr, reg ) _mm_storel_epi64( (__m128i*)(ptr), (reg) ) + + #define stbir__prefetch( ptr ) _mm_prefetch((char*)(ptr), _MM_HINT_T0 ) + + #define stbir__simdi_expand_u8_to_u32(out0,out1,out2,out3,ireg) \ + { \ + stbir__simdi zero = _mm_setzero_si128(); \ + out2 = _mm_unpacklo_epi8( ireg, zero ); \ + out3 = _mm_unpackhi_epi8( ireg, zero ); \ + out0 = _mm_unpacklo_epi16( out2, zero ); \ + out1 = _mm_unpackhi_epi16( out2, zero ); \ + out2 = _mm_unpacklo_epi16( out3, zero ); \ + out3 = _mm_unpackhi_epi16( out3, zero ); \ + } + +#define stbir__simdi_expand_u8_to_1u32(out,ireg) \ + { \ + stbir__simdi zero = _mm_setzero_si128(); \ + out = _mm_unpacklo_epi8( ireg, zero ); \ + out = _mm_unpacklo_epi16( out, zero ); \ + } + + #define stbir__simdi_expand_u16_to_u32(out0,out1,ireg) \ + { \ + stbir__simdi zero = _mm_setzero_si128(); \ + out0 = _mm_unpacklo_epi16( ireg, zero ); \ + out1 = _mm_unpackhi_epi16( ireg, zero ); \ + } + + #define stbir__simdf_convert_float_to_i32( i, f ) (i) = _mm_cvttps_epi32(f) + #define stbir__simdf_convert_float_to_int( f ) _mm_cvtt_ss2si(f) + #define stbir__simdf_convert_float_to_uint8( f ) ((unsigned char)_mm_cvtsi128_si32(_mm_cvttps_epi32(_mm_max_ps(_mm_min_ps(f,STBIR__CONSTF(STBIR_max_uint8_as_float)),_mm_setzero_ps())))) + #define stbir__simdf_convert_float_to_short( f ) ((unsigned short)_mm_cvtsi128_si32(_mm_cvttps_epi32(_mm_max_ps(_mm_min_ps(f,STBIR__CONSTF(STBIR_max_uint16_as_float)),_mm_setzero_ps())))) + + #define stbir__simdi_to_int( i ) _mm_cvtsi128_si32(i) + #define stbir__simdi_convert_i32_to_float(out, ireg) (out) = _mm_cvtepi32_ps( ireg ) + #define stbir__simdf_add( out, reg0, reg1 ) (out) = _mm_add_ps( reg0, reg1 ) + #define stbir__simdf_mult( out, reg0, reg1 ) (out) = _mm_mul_ps( reg0, reg1 ) + #define stbir__simdf_mult_mem( out, reg, ptr ) (out) = _mm_mul_ps( reg, _mm_loadu_ps( (float const*)(ptr) ) ) + #define stbir__simdf_mult1_mem( out, reg, ptr ) (out) = _mm_mul_ss( reg, _mm_load_ss( (float const*)(ptr) ) ) + #define stbir__simdf_add_mem( out, reg, ptr ) (out) = _mm_add_ps( reg, _mm_loadu_ps( (float const*)(ptr) ) ) + #define stbir__simdf_add1_mem( out, reg, ptr ) (out) = _mm_add_ss( reg, _mm_load_ss( (float const*)(ptr) ) ) + + #ifdef STBIR_USE_FMA // not on by default to maintain bit identical simd to non-simd + #include + #define stbir__simdf_madd( out, add, mul1, mul2 ) (out) = _mm_fmadd_ps( mul1, mul2, add ) + #define stbir__simdf_madd1( out, add, mul1, mul2 ) (out) = _mm_fmadd_ss( mul1, mul2, add ) + #define stbir__simdf_madd_mem( out, add, mul, ptr ) (out) = _mm_fmadd_ps( mul, _mm_loadu_ps( (float const*)(ptr) ), add ) + #define stbir__simdf_madd1_mem( out, add, mul, ptr ) (out) = _mm_fmadd_ss( mul, _mm_load_ss( (float const*)(ptr) ), add ) + #else + #define stbir__simdf_madd( out, add, mul1, mul2 ) (out) = _mm_add_ps( add, _mm_mul_ps( mul1, mul2 ) ) + #define stbir__simdf_madd1( out, add, mul1, mul2 ) (out) = _mm_add_ss( add, _mm_mul_ss( mul1, mul2 ) ) + #define stbir__simdf_madd_mem( out, add, mul, ptr ) (out) = _mm_add_ps( add, _mm_mul_ps( mul, _mm_loadu_ps( (float const*)(ptr) ) ) ) + #define stbir__simdf_madd1_mem( out, add, mul, ptr ) (out) = _mm_add_ss( add, _mm_mul_ss( mul, _mm_load_ss( (float const*)(ptr) ) ) ) + #endif + + #define stbir__simdf_add1( out, reg0, reg1 ) (out) = _mm_add_ss( reg0, reg1 ) + #define stbir__simdf_mult1( out, reg0, reg1 ) (out) = _mm_mul_ss( reg0, reg1 ) + + #define stbir__simdf_and( out, reg0, reg1 ) (out) = _mm_and_ps( reg0, reg1 ) + #define stbir__simdf_or( out, reg0, reg1 ) (out) = _mm_or_ps( reg0, reg1 ) + + #define stbir__simdf_min( out, reg0, reg1 ) (out) = _mm_min_ps( reg0, reg1 ) + #define stbir__simdf_max( out, reg0, reg1 ) (out) = _mm_max_ps( reg0, reg1 ) + #define stbir__simdf_min1( out, reg0, reg1 ) (out) = _mm_min_ss( reg0, reg1 ) + #define stbir__simdf_max1( out, reg0, reg1 ) (out) = _mm_max_ss( reg0, reg1 ) + + #define stbir__simdf_0123ABCDto3ABx( out, reg0, reg1 ) (out)=_mm_castsi128_ps( _mm_shuffle_epi32( _mm_castps_si128( _mm_shuffle_ps( reg1,reg0, (0<<0) + (1<<2) + (2<<4) + (3<<6) )), (3<<0) + (0<<2) + (1<<4) + (2<<6) ) ) + #define stbir__simdf_0123ABCDto23Ax( out, reg0, reg1 ) (out)=_mm_castsi128_ps( _mm_shuffle_epi32( _mm_castps_si128( _mm_shuffle_ps( reg1,reg0, (0<<0) + (1<<2) + (2<<4) + (3<<6) )), (2<<0) + (3<<2) + (0<<4) + (1<<6) ) ) + + static const stbir__simdf STBIR_zeroones = { 0.0f,1.0f,0.0f,1.0f }; + static const stbir__simdf STBIR_onezeros = { 1.0f,0.0f,1.0f,0.0f }; + #define stbir__simdf_aaa1( out, alp, ones ) (out)=_mm_castsi128_ps( _mm_shuffle_epi32( _mm_castps_si128( _mm_movehl_ps( ones, alp ) ), (1<<0) + (1<<2) + (1<<4) + (2<<6) ) ) + #define stbir__simdf_1aaa( out, alp, ones ) (out)=_mm_castsi128_ps( _mm_shuffle_epi32( _mm_castps_si128( _mm_movelh_ps( ones, alp ) ), (0<<0) + (2<<2) + (2<<4) + (2<<6) ) ) + #define stbir__simdf_a1a1( out, alp, ones) (out) = _mm_or_ps( _mm_castsi128_ps( _mm_srli_epi64( _mm_castps_si128(alp), 32 ) ), STBIR_zeroones ) + #define stbir__simdf_1a1a( out, alp, ones) (out) = _mm_or_ps( _mm_castsi128_ps( _mm_slli_epi64( _mm_castps_si128(alp), 32 ) ), STBIR_onezeros ) + + #define stbir__simdf_swiz( reg, one, two, three, four ) _mm_castsi128_ps( _mm_shuffle_epi32( _mm_castps_si128( reg ), (one<<0) + (two<<2) + (three<<4) + (four<<6) ) ) + + #define stbir__simdi_and( out, reg0, reg1 ) (out) = _mm_and_si128( reg0, reg1 ) + #define stbir__simdi_or( out, reg0, reg1 ) (out) = _mm_or_si128( reg0, reg1 ) + #define stbir__simdi_16madd( out, reg0, reg1 ) (out) = _mm_madd_epi16( reg0, reg1 ) + + #define stbir__simdf_pack_to_8bytes(out,aa,bb) \ + { \ + stbir__simdf af,bf; \ + stbir__simdi a,b; \ + af = _mm_min_ps( aa, STBIR_max_uint8_as_float ); \ + bf = _mm_min_ps( bb, STBIR_max_uint8_as_float ); \ + af = _mm_max_ps( af, _mm_setzero_ps() ); \ + bf = _mm_max_ps( bf, _mm_setzero_ps() ); \ + a = _mm_cvttps_epi32( af ); \ + b = _mm_cvttps_epi32( bf ); \ + a = _mm_packs_epi32( a, b ); \ + out = _mm_packus_epi16( a, a ); \ + } + + #define stbir__simdf_load4_transposed( o0, o1, o2, o3, ptr ) \ + stbir__simdf_load( o0, (ptr) ); \ + stbir__simdf_load( o1, (ptr)+4 ); \ + stbir__simdf_load( o2, (ptr)+8 ); \ + stbir__simdf_load( o3, (ptr)+12 ); \ + { \ + __m128 tmp0, tmp1, tmp2, tmp3; \ + tmp0 = _mm_unpacklo_ps(o0, o1); \ + tmp2 = _mm_unpacklo_ps(o2, o3); \ + tmp1 = _mm_unpackhi_ps(o0, o1); \ + tmp3 = _mm_unpackhi_ps(o2, o3); \ + o0 = _mm_movelh_ps(tmp0, tmp2); \ + o1 = _mm_movehl_ps(tmp2, tmp0); \ + o2 = _mm_movelh_ps(tmp1, tmp3); \ + o3 = _mm_movehl_ps(tmp3, tmp1); \ + } + + #define stbir__interleave_pack_and_store_16_u8( ptr, r0, r1, r2, r3 ) \ + r0 = _mm_packs_epi32( r0, r1 ); \ + r2 = _mm_packs_epi32( r2, r3 ); \ + r1 = _mm_unpacklo_epi16( r0, r2 ); \ + r3 = _mm_unpackhi_epi16( r0, r2 ); \ + r0 = _mm_unpacklo_epi16( r1, r3 ); \ + r2 = _mm_unpackhi_epi16( r1, r3 ); \ + r0 = _mm_packus_epi16( r0, r2 ); \ + stbir__simdi_store( ptr, r0 ); \ + + #define stbir__simdi_32shr( out, reg, imm ) out = _mm_srli_epi32( reg, imm ) + + #if defined(_MSC_VER) && !defined(__clang__) + // msvc inits with 8 bytes + #define STBIR__CONST_32_TO_8( v ) (char)(unsigned char)((v)&255),(char)(unsigned char)(((v)>>8)&255),(char)(unsigned char)(((v)>>16)&255),(char)(unsigned char)(((v)>>24)&255) + #define STBIR__CONST_4_32i( v ) STBIR__CONST_32_TO_8( v ), STBIR__CONST_32_TO_8( v ), STBIR__CONST_32_TO_8( v ), STBIR__CONST_32_TO_8( v ) + #define STBIR__CONST_4d_32i( v0, v1, v2, v3 ) STBIR__CONST_32_TO_8( v0 ), STBIR__CONST_32_TO_8( v1 ), STBIR__CONST_32_TO_8( v2 ), STBIR__CONST_32_TO_8( v3 ) + #else + // everything else inits with long long's + #define STBIR__CONST_4_32i( v ) (long long)((((stbir_uint64)(stbir_uint32)(v))<<32)|((stbir_uint64)(stbir_uint32)(v))),(long long)((((stbir_uint64)(stbir_uint32)(v))<<32)|((stbir_uint64)(stbir_uint32)(v))) + #define STBIR__CONST_4d_32i( v0, v1, v2, v3 ) (long long)((((stbir_uint64)(stbir_uint32)(v1))<<32)|((stbir_uint64)(stbir_uint32)(v0))),(long long)((((stbir_uint64)(stbir_uint32)(v3))<<32)|((stbir_uint64)(stbir_uint32)(v2))) + #endif + + #define STBIR__SIMDF_CONST(var, x) stbir__simdf var = { x, x, x, x } + #define STBIR__SIMDI_CONST(var, x) stbir__simdi var = { STBIR__CONST_4_32i(x) } + #define STBIR__CONSTF(var) (var) + #define STBIR__CONSTI(var) (var) + + #if defined(STBIR_AVX) || defined(__SSE4_1__) + #include + #define stbir__simdf_pack_to_8words(out,reg0,reg1) out = _mm_packus_epi32(_mm_cvttps_epi32(_mm_max_ps(_mm_min_ps(reg0,STBIR__CONSTF(STBIR_max_uint16_as_float)),_mm_setzero_ps())), _mm_cvttps_epi32(_mm_max_ps(_mm_min_ps(reg1,STBIR__CONSTF(STBIR_max_uint16_as_float)),_mm_setzero_ps()))) + #else + static STBIR__SIMDI_CONST(stbir__s32_32768, 32768); + static STBIR__SIMDI_CONST(stbir__s16_32768, ((32768<<16)|32768)); + + #define stbir__simdf_pack_to_8words(out,reg0,reg1) \ + { \ + stbir__simdi tmp0,tmp1; \ + tmp0 = _mm_cvttps_epi32(_mm_max_ps(_mm_min_ps(reg0,STBIR__CONSTF(STBIR_max_uint16_as_float)),_mm_setzero_ps())); \ + tmp1 = _mm_cvttps_epi32(_mm_max_ps(_mm_min_ps(reg1,STBIR__CONSTF(STBIR_max_uint16_as_float)),_mm_setzero_ps())); \ + tmp0 = _mm_sub_epi32( tmp0, stbir__s32_32768 ); \ + tmp1 = _mm_sub_epi32( tmp1, stbir__s32_32768 ); \ + out = _mm_packs_epi32( tmp0, tmp1 ); \ + out = _mm_sub_epi16( out, stbir__s16_32768 ); \ + } + + #endif + + #define STBIR_SIMD + + // if we detect AVX, set the simd8 defines + #ifdef STBIR_AVX + #include + #define STBIR_SIMD8 + #define stbir__simdf8 __m256 + #define stbir__simdi8 __m256i + #define stbir__simdf8_load( out, ptr ) (out) = _mm256_loadu_ps( (float const *)(ptr) ) + #define stbir__simdi8_load( out, ptr ) (out) = _mm256_loadu_si256( (__m256i const *)(ptr) ) + #define stbir__simdf8_mult( out, a, b ) (out) = _mm256_mul_ps( (a), (b) ) + #define stbir__simdf8_store( ptr, out ) _mm256_storeu_ps( (float*)(ptr), out ) + #define stbir__simdi8_store( ptr, reg ) _mm256_storeu_si256( (__m256i*)(ptr), reg ) + #define stbir__simdf8_frep8( fval ) _mm256_set1_ps( fval ) + + #define stbir__simdf8_min( out, reg0, reg1 ) (out) = _mm256_min_ps( reg0, reg1 ) + #define stbir__simdf8_max( out, reg0, reg1 ) (out) = _mm256_max_ps( reg0, reg1 ) + + #define stbir__simdf8_add4halves( out, bot4, top8 ) (out) = _mm_add_ps( bot4, _mm256_extractf128_ps( top8, 1 ) ) + #define stbir__simdf8_mult_mem( out, reg, ptr ) (out) = _mm256_mul_ps( reg, _mm256_loadu_ps( (float const*)(ptr) ) ) + #define stbir__simdf8_add_mem( out, reg, ptr ) (out) = _mm256_add_ps( reg, _mm256_loadu_ps( (float const*)(ptr) ) ) + #define stbir__simdf8_add( out, a, b ) (out) = _mm256_add_ps( a, b ) + #define stbir__simdf8_load1b( out, ptr ) (out) = _mm256_broadcast_ss( ptr ) + #define stbir__simdf_load1rep4( out, ptr ) (out) = _mm_broadcast_ss( ptr ) // avx load instruction + + #define stbir__simdi8_convert_i32_to_float(out, ireg) (out) = _mm256_cvtepi32_ps( ireg ) + #define stbir__simdf8_convert_float_to_i32( i, f ) (i) = _mm256_cvttps_epi32(f) + + #define stbir__simdf8_bot4s( out, a, b ) (out) = _mm256_permute2f128_ps(a,b, (0<<0)+(2<<4) ) + #define stbir__simdf8_top4s( out, a, b ) (out) = _mm256_permute2f128_ps(a,b, (1<<0)+(3<<4) ) + + #define stbir__simdf8_gettop4( reg ) _mm256_extractf128_ps(reg,1) + + #ifdef STBIR_AVX2 + + #define stbir__simdi8_expand_u8_to_u32(out0,out1,ireg) \ + { \ + stbir__simdi8 a, zero =_mm256_setzero_si256();\ + a = _mm256_permute4x64_epi64( _mm256_unpacklo_epi8( _mm256_permute4x64_epi64(_mm256_castsi128_si256(ireg),(0<<0)+(2<<2)+(1<<4)+(3<<6)), zero ),(0<<0)+(2<<2)+(1<<4)+(3<<6)); \ + out0 = _mm256_unpacklo_epi16( a, zero ); \ + out1 = _mm256_unpackhi_epi16( a, zero ); \ + } + + #define stbir__simdf8_pack_to_16bytes(out,aa,bb) \ + { \ + stbir__simdi8 t; \ + stbir__simdf8 af,bf; \ + stbir__simdi8 a,b; \ + af = _mm256_min_ps( aa, STBIR_max_uint8_as_floatX ); \ + bf = _mm256_min_ps( bb, STBIR_max_uint8_as_floatX ); \ + af = _mm256_max_ps( af, _mm256_setzero_ps() ); \ + bf = _mm256_max_ps( bf, _mm256_setzero_ps() ); \ + a = _mm256_cvttps_epi32( af ); \ + b = _mm256_cvttps_epi32( bf ); \ + t = _mm256_permute4x64_epi64( _mm256_packs_epi32( a, b ), (0<<0)+(2<<2)+(1<<4)+(3<<6) ); \ + out = _mm256_castsi256_si128( _mm256_permute4x64_epi64( _mm256_packus_epi16( t, t ), (0<<0)+(2<<2)+(1<<4)+(3<<6) ) ); \ + } + + #define stbir__simdi8_expand_u16_to_u32(out,ireg) out = _mm256_unpacklo_epi16( _mm256_permute4x64_epi64(_mm256_castsi128_si256(ireg),(0<<0)+(2<<2)+(1<<4)+(3<<6)), _mm256_setzero_si256() ); + + #define stbir__simdf8_pack_to_16words(out,aa,bb) \ + { \ + stbir__simdf8 af,bf; \ + stbir__simdi8 a,b; \ + af = _mm256_min_ps( aa, STBIR_max_uint16_as_floatX ); \ + bf = _mm256_min_ps( bb, STBIR_max_uint16_as_floatX ); \ + af = _mm256_max_ps( af, _mm256_setzero_ps() ); \ + bf = _mm256_max_ps( bf, _mm256_setzero_ps() ); \ + a = _mm256_cvttps_epi32( af ); \ + b = _mm256_cvttps_epi32( bf ); \ + (out) = _mm256_permute4x64_epi64( _mm256_packus_epi32(a, b), (0<<0)+(2<<2)+(1<<4)+(3<<6) ); \ + } + + #else + + #define stbir__simdi8_expand_u8_to_u32(out0,out1,ireg) \ + { \ + stbir__simdi a,zero = _mm_setzero_si128(); \ + a = _mm_unpacklo_epi8( ireg, zero ); \ + out0 = _mm256_setr_m128i( _mm_unpacklo_epi16( a, zero ), _mm_unpackhi_epi16( a, zero ) ); \ + a = _mm_unpackhi_epi8( ireg, zero ); \ + out1 = _mm256_setr_m128i( _mm_unpacklo_epi16( a, zero ), _mm_unpackhi_epi16( a, zero ) ); \ + } + + #define stbir__simdf8_pack_to_16bytes(out,aa,bb) \ + { \ + stbir__simdi t; \ + stbir__simdf8 af,bf; \ + stbir__simdi8 a,b; \ + af = _mm256_min_ps( aa, STBIR_max_uint8_as_floatX ); \ + bf = _mm256_min_ps( bb, STBIR_max_uint8_as_floatX ); \ + af = _mm256_max_ps( af, _mm256_setzero_ps() ); \ + bf = _mm256_max_ps( bf, _mm256_setzero_ps() ); \ + a = _mm256_cvttps_epi32( af ); \ + b = _mm256_cvttps_epi32( bf ); \ + out = _mm_packs_epi32( _mm256_castsi256_si128(a), _mm256_extractf128_si256( a, 1 ) ); \ + out = _mm_packus_epi16( out, out ); \ + t = _mm_packs_epi32( _mm256_castsi256_si128(b), _mm256_extractf128_si256( b, 1 ) ); \ + t = _mm_packus_epi16( t, t ); \ + out = _mm_castps_si128( _mm_shuffle_ps( _mm_castsi128_ps(out), _mm_castsi128_ps(t), (0<<0)+(1<<2)+(0<<4)+(1<<6) ) ); \ + } + + #define stbir__simdi8_expand_u16_to_u32(out,ireg) \ + { \ + stbir__simdi a,b,zero = _mm_setzero_si128(); \ + a = _mm_unpacklo_epi16( ireg, zero ); \ + b = _mm_unpackhi_epi16( ireg, zero ); \ + out = _mm256_insertf128_si256( _mm256_castsi128_si256( a ), b, 1 ); \ + } + + #define stbir__simdf8_pack_to_16words(out,aa,bb) \ + { \ + stbir__simdi t0,t1; \ + stbir__simdf8 af,bf; \ + stbir__simdi8 a,b; \ + af = _mm256_min_ps( aa, STBIR_max_uint16_as_floatX ); \ + bf = _mm256_min_ps( bb, STBIR_max_uint16_as_floatX ); \ + af = _mm256_max_ps( af, _mm256_setzero_ps() ); \ + bf = _mm256_max_ps( bf, _mm256_setzero_ps() ); \ + a = _mm256_cvttps_epi32( af ); \ + b = _mm256_cvttps_epi32( bf ); \ + t0 = _mm_packus_epi32( _mm256_castsi256_si128(a), _mm256_extractf128_si256( a, 1 ) ); \ + t1 = _mm_packus_epi32( _mm256_castsi256_si128(b), _mm256_extractf128_si256( b, 1 ) ); \ + out = _mm256_setr_m128i( t0, t1 ); \ + } + + #endif + + static __m256i stbir_00001111 = { STBIR__CONST_4d_32i( 0, 0, 0, 0 ), STBIR__CONST_4d_32i( 1, 1, 1, 1 ) }; + #define stbir__simdf8_0123to00001111( out, in ) (out) = _mm256_permutevar_ps ( in, stbir_00001111 ) + + static __m256i stbir_22223333 = { STBIR__CONST_4d_32i( 2, 2, 2, 2 ), STBIR__CONST_4d_32i( 3, 3, 3, 3 ) }; + #define stbir__simdf8_0123to22223333( out, in ) (out) = _mm256_permutevar_ps ( in, stbir_22223333 ) + + #define stbir__simdf8_0123to2222( out, in ) (out) = stbir__simdf_swiz(_mm256_castps256_ps128(in), 2,2,2,2 ) + + #define stbir__simdf8_load4b( out, ptr ) (out) = _mm256_broadcast_ps( (__m128 const *)(ptr) ) + + static __m256i stbir_00112233 = { STBIR__CONST_4d_32i( 0, 0, 1, 1 ), STBIR__CONST_4d_32i( 2, 2, 3, 3 ) }; + #define stbir__simdf8_0123to00112233( out, in ) (out) = _mm256_permutevar_ps ( in, stbir_00112233 ) + #define stbir__simdf8_add4( out, a8, b ) (out) = _mm256_add_ps( a8, _mm256_castps128_ps256( b ) ) + + static __m256i stbir_load6 = { STBIR__CONST_4_32i( 0x80000000 ), STBIR__CONST_4d_32i( 0x80000000, 0x80000000, 0, 0 ) }; + #define stbir__simdf8_load6z( out, ptr ) (out) = _mm256_maskload_ps( ptr, stbir_load6 ) + + #define stbir__simdf8_0123to00000000( out, in ) (out) = _mm256_shuffle_ps ( in, in, (0<<0)+(0<<2)+(0<<4)+(0<<6) ) + #define stbir__simdf8_0123to11111111( out, in ) (out) = _mm256_shuffle_ps ( in, in, (1<<0)+(1<<2)+(1<<4)+(1<<6) ) + #define stbir__simdf8_0123to22222222( out, in ) (out) = _mm256_shuffle_ps ( in, in, (2<<0)+(2<<2)+(2<<4)+(2<<6) ) + #define stbir__simdf8_0123to33333333( out, in ) (out) = _mm256_shuffle_ps ( in, in, (3<<0)+(3<<2)+(3<<4)+(3<<6) ) + #define stbir__simdf8_0123to21032103( out, in ) (out) = _mm256_shuffle_ps ( in, in, (2<<0)+(1<<2)+(0<<4)+(3<<6) ) + #define stbir__simdf8_0123to32103210( out, in ) (out) = _mm256_shuffle_ps ( in, in, (3<<0)+(2<<2)+(1<<4)+(0<<6) ) + #define stbir__simdf8_0123to12301230( out, in ) (out) = _mm256_shuffle_ps ( in, in, (1<<0)+(2<<2)+(3<<4)+(0<<6) ) + #define stbir__simdf8_0123to10321032( out, in ) (out) = _mm256_shuffle_ps ( in, in, (1<<0)+(0<<2)+(3<<4)+(2<<6) ) + #define stbir__simdf8_0123to30123012( out, in ) (out) = _mm256_shuffle_ps ( in, in, (3<<0)+(0<<2)+(1<<4)+(2<<6) ) + + #define stbir__simdf8_0123to11331133( out, in ) (out) = _mm256_shuffle_ps ( in, in, (1<<0)+(1<<2)+(3<<4)+(3<<6) ) + #define stbir__simdf8_0123to00220022( out, in ) (out) = _mm256_shuffle_ps ( in, in, (0<<0)+(0<<2)+(2<<4)+(2<<6) ) + + #define stbir__simdf8_aaa1( out, alp, ones ) (out) = _mm256_blend_ps( alp, ones, (1<<0)+(1<<1)+(1<<2)+(0<<3)+(1<<4)+(1<<5)+(1<<6)+(0<<7)); (out)=_mm256_shuffle_ps( out,out, (3<<0) + (3<<2) + (3<<4) + (0<<6) ) + #define stbir__simdf8_1aaa( out, alp, ones ) (out) = _mm256_blend_ps( alp, ones, (0<<0)+(1<<1)+(1<<2)+(1<<3)+(0<<4)+(1<<5)+(1<<6)+(1<<7)); (out)=_mm256_shuffle_ps( out,out, (1<<0) + (0<<2) + (0<<4) + (0<<6) ) + #define stbir__simdf8_a1a1( out, alp, ones) (out) = _mm256_blend_ps( alp, ones, (1<<0)+(0<<1)+(1<<2)+(0<<3)+(1<<4)+(0<<5)+(1<<6)+(0<<7)); (out)=_mm256_shuffle_ps( out,out, (1<<0) + (0<<2) + (3<<4) + (2<<6) ) + #define stbir__simdf8_1a1a( out, alp, ones) (out) = _mm256_blend_ps( alp, ones, (0<<0)+(1<<1)+(0<<2)+(1<<3)+(0<<4)+(1<<5)+(0<<6)+(1<<7)); (out)=_mm256_shuffle_ps( out,out, (1<<0) + (0<<2) + (3<<4) + (2<<6) ) + + #define stbir__simdf8_zero( reg ) (reg) = _mm256_setzero_ps() + + #ifdef STBIR_USE_FMA // not on by default to maintain bit identical simd to non-simd + #define stbir__simdf8_madd( out, add, mul1, mul2 ) (out) = _mm256_fmadd_ps( mul1, mul2, add ) + #define stbir__simdf8_madd_mem( out, add, mul, ptr ) (out) = _mm256_fmadd_ps( mul, _mm256_loadu_ps( (float const*)(ptr) ), add ) + #define stbir__simdf8_madd_mem4( out, add, mul, ptr )(out) = _mm256_fmadd_ps( _mm256_setr_m128( mul, _mm_setzero_ps() ), _mm256_setr_m128( _mm_loadu_ps( (float const*)(ptr) ), _mm_setzero_ps() ), add ) + #else + #define stbir__simdf8_madd( out, add, mul1, mul2 ) (out) = _mm256_add_ps( add, _mm256_mul_ps( mul1, mul2 ) ) + #define stbir__simdf8_madd_mem( out, add, mul, ptr ) (out) = _mm256_add_ps( add, _mm256_mul_ps( mul, _mm256_loadu_ps( (float const*)(ptr) ) ) ) + #define stbir__simdf8_madd_mem4( out, add, mul, ptr ) (out) = _mm256_add_ps( add, _mm256_setr_m128( _mm_mul_ps( mul, _mm_loadu_ps( (float const*)(ptr) ) ), _mm_setzero_ps() ) ) + #endif + #define stbir__if_simdf8_cast_to_simdf4( val ) _mm256_castps256_ps128( val ) + + #endif + + #ifdef STBIR_FLOORF + #undef STBIR_FLOORF + #endif + #define STBIR_FLOORF stbir_simd_floorf + static stbir__inline float stbir_simd_floorf(float x) // martins floorf + { + #if defined(STBIR_AVX) || defined(__SSE4_1__) || defined(STBIR_SSE41) + __m128 t = _mm_set_ss(x); + return _mm_cvtss_f32( _mm_floor_ss(t, t) ); + #else + __m128 f = _mm_set_ss(x); + __m128 t = _mm_cvtepi32_ps(_mm_cvttps_epi32(f)); + __m128 r = _mm_add_ss(t, _mm_and_ps(_mm_cmplt_ss(f, t), _mm_set_ss(-1.0f))); + return _mm_cvtss_f32(r); + #endif + } + + #ifdef STBIR_CEILF + #undef STBIR_CEILF + #endif + #define STBIR_CEILF stbir_simd_ceilf + static stbir__inline float stbir_simd_ceilf(float x) // martins ceilf + { + #if defined(STBIR_AVX) || defined(__SSE4_1__) || defined(STBIR_SSE41) + __m128 t = _mm_set_ss(x); + return _mm_cvtss_f32( _mm_ceil_ss(t, t) ); + #else + __m128 f = _mm_set_ss(x); + __m128 t = _mm_cvtepi32_ps(_mm_cvttps_epi32(f)); + __m128 r = _mm_add_ss(t, _mm_and_ps(_mm_cmplt_ss(t, f), _mm_set_ss(1.0f))); + return _mm_cvtss_f32(r); + #endif + } + +#elif defined(STBIR_NEON) + + #include + + #define stbir__simdf float32x4_t + #define stbir__simdi uint32x4_t + + #define stbir_simdi_castf( reg ) vreinterpretq_u32_f32(reg) + #define stbir_simdf_casti( reg ) vreinterpretq_f32_u32(reg) + + #define stbir__simdf_load( reg, ptr ) (reg) = vld1q_f32( (float const*)(ptr) ) + #define stbir__simdi_load( reg, ptr ) (reg) = vld1q_u32( (uint32_t const*)(ptr) ) + #define stbir__simdf_load1( out, ptr ) (out) = vld1q_dup_f32( (float const*)(ptr) ) // top values can be random (not denormal or nan for perf) + #define stbir__simdi_load1( out, ptr ) (out) = vld1q_dup_u32( (uint32_t const*)(ptr) ) + #define stbir__simdf_load1z( out, ptr ) (out) = vld1q_lane_f32( (float const*)(ptr), vdupq_n_f32(0), 0 ) // top values must be zero + #define stbir__simdf_frep4( fvar ) vdupq_n_f32( fvar ) + #define stbir__simdf_load1frep4( out, fvar ) (out) = vdupq_n_f32( fvar ) + #define stbir__simdf_load2( out, ptr ) (out) = vcombine_f32( vld1_f32( (float const*)(ptr) ), vcreate_f32(0) ) // top values can be random (not denormal or nan for perf) + #define stbir__simdf_load2z( out, ptr ) (out) = vcombine_f32( vld1_f32( (float const*)(ptr) ), vcreate_f32(0) ) // top values must be zero + #define stbir__simdf_load2hmerge( out, reg, ptr ) (out) = vcombine_f32( vget_low_f32(reg), vld1_f32( (float const*)(ptr) ) ) + + #define stbir__simdf_zeroP() vdupq_n_f32(0) + #define stbir__simdf_zero( reg ) (reg) = vdupq_n_f32(0) + + #define stbir__simdf_store( ptr, reg ) vst1q_f32( (float*)(ptr), reg ) + #define stbir__simdf_store1( ptr, reg ) vst1q_lane_f32( (float*)(ptr), reg, 0) + #define stbir__simdf_store2( ptr, reg ) vst1_f32( (float*)(ptr), vget_low_f32(reg) ) + #define stbir__simdf_store2h( ptr, reg ) vst1_f32( (float*)(ptr), vget_high_f32(reg) ) + + #define stbir__simdi_store( ptr, reg ) vst1q_u32( (uint32_t*)(ptr), reg ) + #define stbir__simdi_store1( ptr, reg ) vst1q_lane_u32( (uint32_t*)(ptr), reg, 0 ) + #define stbir__simdi_store2( ptr, reg ) vst1_u32( (uint32_t*)(ptr), vget_low_u32(reg) ) + + #define stbir__prefetch( ptr ) + + #define stbir__simdi_expand_u8_to_u32(out0,out1,out2,out3,ireg) \ + { \ + uint16x8_t l = vmovl_u8( vget_low_u8 ( vreinterpretq_u8_u32(ireg) ) ); \ + uint16x8_t h = vmovl_u8( vget_high_u8( vreinterpretq_u8_u32(ireg) ) ); \ + out0 = vmovl_u16( vget_low_u16 ( l ) ); \ + out1 = vmovl_u16( vget_high_u16( l ) ); \ + out2 = vmovl_u16( vget_low_u16 ( h ) ); \ + out3 = vmovl_u16( vget_high_u16( h ) ); \ + } + + #define stbir__simdi_expand_u8_to_1u32(out,ireg) \ + { \ + uint16x8_t tmp = vmovl_u8( vget_low_u8( vreinterpretq_u8_u32(ireg) ) ); \ + out = vmovl_u16( vget_low_u16( tmp ) ); \ + } + + #define stbir__simdi_expand_u16_to_u32(out0,out1,ireg) \ + { \ + uint16x8_t tmp = vreinterpretq_u16_u32(ireg); \ + out0 = vmovl_u16( vget_low_u16 ( tmp ) ); \ + out1 = vmovl_u16( vget_high_u16( tmp ) ); \ + } + + #define stbir__simdf_convert_float_to_i32( i, f ) (i) = vreinterpretq_u32_s32( vcvtq_s32_f32(f) ) + #define stbir__simdf_convert_float_to_int( f ) vgetq_lane_s32(vcvtq_s32_f32(f), 0) + #define stbir__simdi_to_int( i ) (int)vgetq_lane_u32(i, 0) + #define stbir__simdf_convert_float_to_uint8( f ) ((unsigned char)vgetq_lane_s32(vcvtq_s32_f32(vmaxq_f32(vminq_f32(f,STBIR__CONSTF(STBIR_max_uint8_as_float)),vdupq_n_f32(0))), 0)) + #define stbir__simdf_convert_float_to_short( f ) ((unsigned short)vgetq_lane_s32(vcvtq_s32_f32(vmaxq_f32(vminq_f32(f,STBIR__CONSTF(STBIR_max_uint16_as_float)),vdupq_n_f32(0))), 0)) + #define stbir__simdi_convert_i32_to_float(out, ireg) (out) = vcvtq_f32_s32( vreinterpretq_s32_u32(ireg) ) + #define stbir__simdf_add( out, reg0, reg1 ) (out) = vaddq_f32( reg0, reg1 ) + #define stbir__simdf_mult( out, reg0, reg1 ) (out) = vmulq_f32( reg0, reg1 ) + #define stbir__simdf_mult_mem( out, reg, ptr ) (out) = vmulq_f32( reg, vld1q_f32( (float const*)(ptr) ) ) + #define stbir__simdf_mult1_mem( out, reg, ptr ) (out) = vmulq_f32( reg, vld1q_dup_f32( (float const*)(ptr) ) ) + #define stbir__simdf_add_mem( out, reg, ptr ) (out) = vaddq_f32( reg, vld1q_f32( (float const*)(ptr) ) ) + #define stbir__simdf_add1_mem( out, reg, ptr ) (out) = vaddq_f32( reg, vld1q_dup_f32( (float const*)(ptr) ) ) + + #ifdef STBIR_USE_FMA // not on by default to maintain bit identical simd to non-simd (and also x64 no madd to arm madd) + #define stbir__simdf_madd( out, add, mul1, mul2 ) (out) = vfmaq_f32( add, mul1, mul2 ) + #define stbir__simdf_madd1( out, add, mul1, mul2 ) (out) = vfmaq_f32( add, mul1, mul2 ) + #define stbir__simdf_madd_mem( out, add, mul, ptr ) (out) = vfmaq_f32( add, mul, vld1q_f32( (float const*)(ptr) ) ) + #define stbir__simdf_madd1_mem( out, add, mul, ptr ) (out) = vfmaq_f32( add, mul, vld1q_dup_f32( (float const*)(ptr) ) ) + #else + #define stbir__simdf_madd( out, add, mul1, mul2 ) (out) = vaddq_f32( add, vmulq_f32( mul1, mul2 ) ) + #define stbir__simdf_madd1( out, add, mul1, mul2 ) (out) = vaddq_f32( add, vmulq_f32( mul1, mul2 ) ) + #define stbir__simdf_madd_mem( out, add, mul, ptr ) (out) = vaddq_f32( add, vmulq_f32( mul, vld1q_f32( (float const*)(ptr) ) ) ) + #define stbir__simdf_madd1_mem( out, add, mul, ptr ) (out) = vaddq_f32( add, vmulq_f32( mul, vld1q_dup_f32( (float const*)(ptr) ) ) ) + #endif + + #define stbir__simdf_add1( out, reg0, reg1 ) (out) = vaddq_f32( reg0, reg1 ) + #define stbir__simdf_mult1( out, reg0, reg1 ) (out) = vmulq_f32( reg0, reg1 ) + + #define stbir__simdf_and( out, reg0, reg1 ) (out) = vreinterpretq_f32_u32( vandq_u32( vreinterpretq_u32_f32(reg0), vreinterpretq_u32_f32(reg1) ) ) + #define stbir__simdf_or( out, reg0, reg1 ) (out) = vreinterpretq_f32_u32( vorrq_u32( vreinterpretq_u32_f32(reg0), vreinterpretq_u32_f32(reg1) ) ) + + #define stbir__simdf_min( out, reg0, reg1 ) (out) = vminq_f32( reg0, reg1 ) + #define stbir__simdf_max( out, reg0, reg1 ) (out) = vmaxq_f32( reg0, reg1 ) + #define stbir__simdf_min1( out, reg0, reg1 ) (out) = vminq_f32( reg0, reg1 ) + #define stbir__simdf_max1( out, reg0, reg1 ) (out) = vmaxq_f32( reg0, reg1 ) + + #define stbir__simdf_0123ABCDto3ABx( out, reg0, reg1 ) (out) = vextq_f32( reg0, reg1, 3 ) + #define stbir__simdf_0123ABCDto23Ax( out, reg0, reg1 ) (out) = vextq_f32( reg0, reg1, 2 ) + + #define stbir__simdf_a1a1( out, alp, ones ) (out) = vzipq_f32(vuzpq_f32(alp, alp).val[1], ones).val[0] + #define stbir__simdf_1a1a( out, alp, ones ) (out) = vzipq_f32(ones, vuzpq_f32(alp, alp).val[0]).val[0] + + #if defined( _M_ARM64 ) || defined( __aarch64__ ) || defined( __arm64__ ) + + #define stbir__simdf_aaa1( out, alp, ones ) (out) = vcopyq_laneq_f32(vdupq_n_f32(vgetq_lane_f32(alp, 3)), 3, ones, 3) + #define stbir__simdf_1aaa( out, alp, ones ) (out) = vcopyq_laneq_f32(vdupq_n_f32(vgetq_lane_f32(alp, 0)), 0, ones, 0) + + #if defined( _MSC_VER ) && !defined(__clang__) + #define stbir_make16(a,b,c,d) vcombine_u8( \ + vcreate_u8( (4*a+0) | ((4*a+1)<<8) | ((4*a+2)<<16) | ((4*a+3)<<24) | \ + ((stbir_uint64)(4*b+0)<<32) | ((stbir_uint64)(4*b+1)<<40) | ((stbir_uint64)(4*b+2)<<48) | ((stbir_uint64)(4*b+3)<<56)), \ + vcreate_u8( (4*c+0) | ((4*c+1)<<8) | ((4*c+2)<<16) | ((4*c+3)<<24) | \ + ((stbir_uint64)(4*d+0)<<32) | ((stbir_uint64)(4*d+1)<<40) | ((stbir_uint64)(4*d+2)<<48) | ((stbir_uint64)(4*d+3)<<56) ) ) + + static stbir__inline uint8x16x2_t stbir_make16x2(float32x4_t rega,float32x4_t regb) + { + uint8x16x2_t r = { vreinterpretq_u8_f32(rega), vreinterpretq_u8_f32(regb) }; + return r; + } + #else + #define stbir_make16(a,b,c,d) (uint8x16_t){4*a+0,4*a+1,4*a+2,4*a+3,4*b+0,4*b+1,4*b+2,4*b+3,4*c+0,4*c+1,4*c+2,4*c+3,4*d+0,4*d+1,4*d+2,4*d+3} + #define stbir_make16x2(a,b) (uint8x16x2_t){{vreinterpretq_u8_f32(a),vreinterpretq_u8_f32(b)}} + #endif + + #define stbir__simdf_swiz( reg, one, two, three, four ) vreinterpretq_f32_u8( vqtbl1q_u8( vreinterpretq_u8_f32(reg), stbir_make16(one, two, three, four) ) ) + #define stbir__simdf_swiz2( rega, regb, one, two, three, four ) vreinterpretq_f32_u8( vqtbl2q_u8( stbir_make16x2(rega,regb), stbir_make16(one, two, three, four) ) ) + + #define stbir__simdi_16madd( out, reg0, reg1 ) \ + { \ + int16x8_t r0 = vreinterpretq_s16_u32(reg0); \ + int16x8_t r1 = vreinterpretq_s16_u32(reg1); \ + int32x4_t tmp0 = vmull_s16( vget_low_s16(r0), vget_low_s16(r1) ); \ + int32x4_t tmp1 = vmull_s16( vget_high_s16(r0), vget_high_s16(r1) ); \ + (out) = vreinterpretq_u32_s32( vpaddq_s32(tmp0, tmp1) ); \ + } + + #else + + #define stbir__simdf_aaa1( out, alp, ones ) (out) = vsetq_lane_f32(1.0f, vdupq_n_f32(vgetq_lane_f32(alp, 3)), 3) + #define stbir__simdf_1aaa( out, alp, ones ) (out) = vsetq_lane_f32(1.0f, vdupq_n_f32(vgetq_lane_f32(alp, 0)), 0) + + #if defined( _MSC_VER ) && !defined(__clang__) + static stbir__inline uint8x8x2_t stbir_make8x2(float32x4_t reg) + { + uint8x8x2_t r = { { vget_low_u8(vreinterpretq_u8_f32(reg)), vget_high_u8(vreinterpretq_u8_f32(reg)) } }; + return r; + } + #define stbir_make8(a,b) vcreate_u8( \ + (4*a+0) | ((4*a+1)<<8) | ((4*a+2)<<16) | ((4*a+3)<<24) | \ + ((stbir_uint64)(4*b+0)<<32) | ((stbir_uint64)(4*b+1)<<40) | ((stbir_uint64)(4*b+2)<<48) | ((stbir_uint64)(4*b+3)<<56) ) + #else + #define stbir_make8x2(reg) (uint8x8x2_t){ { vget_low_u8(vreinterpretq_u8_f32(reg)), vget_high_u8(vreinterpretq_u8_f32(reg)) } } + #define stbir_make8(a,b) (uint8x8_t){4*a+0,4*a+1,4*a+2,4*a+3,4*b+0,4*b+1,4*b+2,4*b+3} + #endif + + #define stbir__simdf_swiz( reg, one, two, three, four ) vreinterpretq_f32_u8( vcombine_u8( \ + vtbl2_u8( stbir_make8x2( reg ), stbir_make8( one, two ) ), \ + vtbl2_u8( stbir_make8x2( reg ), stbir_make8( three, four ) ) ) ) + + #define stbir__simdi_16madd( out, reg0, reg1 ) \ + { \ + int16x8_t r0 = vreinterpretq_s16_u32(reg0); \ + int16x8_t r1 = vreinterpretq_s16_u32(reg1); \ + int32x4_t tmp0 = vmull_s16( vget_low_s16(r0), vget_low_s16(r1) ); \ + int32x4_t tmp1 = vmull_s16( vget_high_s16(r0), vget_high_s16(r1) ); \ + int32x2_t out0 = vpadd_s32( vget_low_s32(tmp0), vget_high_s32(tmp0) ); \ + int32x2_t out1 = vpadd_s32( vget_low_s32(tmp1), vget_high_s32(tmp1) ); \ + (out) = vreinterpretq_u32_s32( vcombine_s32(out0, out1) ); \ + } + + #endif + + #define stbir__simdi_and( out, reg0, reg1 ) (out) = vandq_u32( reg0, reg1 ) + #define stbir__simdi_or( out, reg0, reg1 ) (out) = vorrq_u32( reg0, reg1 ) + + #define stbir__simdf_pack_to_8bytes(out,aa,bb) \ + { \ + float32x4_t af = vmaxq_f32( vminq_f32(aa,STBIR__CONSTF(STBIR_max_uint8_as_float) ), vdupq_n_f32(0) ); \ + float32x4_t bf = vmaxq_f32( vminq_f32(bb,STBIR__CONSTF(STBIR_max_uint8_as_float) ), vdupq_n_f32(0) ); \ + int16x4_t ai = vqmovn_s32( vcvtq_s32_f32( af ) ); \ + int16x4_t bi = vqmovn_s32( vcvtq_s32_f32( bf ) ); \ + uint8x8_t out8 = vqmovun_s16( vcombine_s16(ai, bi) ); \ + out = vreinterpretq_u32_u8( vcombine_u8(out8, out8) ); \ + } + + #define stbir__simdf_pack_to_8words(out,aa,bb) \ + { \ + float32x4_t af = vmaxq_f32( vminq_f32(aa,STBIR__CONSTF(STBIR_max_uint16_as_float) ), vdupq_n_f32(0) ); \ + float32x4_t bf = vmaxq_f32( vminq_f32(bb,STBIR__CONSTF(STBIR_max_uint16_as_float) ), vdupq_n_f32(0) ); \ + int32x4_t ai = vcvtq_s32_f32( af ); \ + int32x4_t bi = vcvtq_s32_f32( bf ); \ + out = vreinterpretq_u32_u16( vcombine_u16(vqmovun_s32(ai), vqmovun_s32(bi)) ); \ + } + + #define stbir__interleave_pack_and_store_16_u8( ptr, r0, r1, r2, r3 ) \ + { \ + int16x4x2_t tmp0 = vzip_s16( vqmovn_s32(vreinterpretq_s32_u32(r0)), vqmovn_s32(vreinterpretq_s32_u32(r2)) ); \ + int16x4x2_t tmp1 = vzip_s16( vqmovn_s32(vreinterpretq_s32_u32(r1)), vqmovn_s32(vreinterpretq_s32_u32(r3)) ); \ + uint8x8x2_t out = \ + { { \ + vqmovun_s16( vcombine_s16(tmp0.val[0], tmp0.val[1]) ), \ + vqmovun_s16( vcombine_s16(tmp1.val[0], tmp1.val[1]) ), \ + } }; \ + vst2_u8(ptr, out); \ + } + + #define stbir__simdf_load4_transposed( o0, o1, o2, o3, ptr ) \ + { \ + float32x4x4_t tmp = vld4q_f32(ptr); \ + o0 = tmp.val[0]; \ + o1 = tmp.val[1]; \ + o2 = tmp.val[2]; \ + o3 = tmp.val[3]; \ + } + + #define stbir__simdi_32shr( out, reg, imm ) out = vshrq_n_u32( reg, imm ) + + #if defined( _MSC_VER ) && !defined(__clang__) + #define STBIR__SIMDF_CONST(var, x) __declspec(align(8)) float var[] = { x, x, x, x } + #define STBIR__SIMDI_CONST(var, x) __declspec(align(8)) uint32_t var[] = { x, x, x, x } + #define STBIR__CONSTF(var) (*(const float32x4_t*)var) + #define STBIR__CONSTI(var) (*(const uint32x4_t*)var) + #else + #define STBIR__SIMDF_CONST(var, x) stbir__simdf var = { x, x, x, x } + #define STBIR__SIMDI_CONST(var, x) stbir__simdi var = { x, x, x, x } + #define STBIR__CONSTF(var) (var) + #define STBIR__CONSTI(var) (var) + #endif + + #ifdef STBIR_FLOORF + #undef STBIR_FLOORF + #endif + #define STBIR_FLOORF stbir_simd_floorf + static stbir__inline float stbir_simd_floorf(float x) + { + #if defined( _M_ARM64 ) || defined( __aarch64__ ) || defined( __arm64__ ) + return vget_lane_f32( vrndm_f32( vdup_n_f32(x) ), 0); + #else + float32x2_t f = vdup_n_f32(x); + float32x2_t t = vcvt_f32_s32(vcvt_s32_f32(f)); + uint32x2_t a = vclt_f32(f, t); + uint32x2_t b = vreinterpret_u32_f32(vdup_n_f32(-1.0f)); + float32x2_t r = vadd_f32(t, vreinterpret_f32_u32(vand_u32(a, b))); + return vget_lane_f32(r, 0); + #endif + } + + #ifdef STBIR_CEILF + #undef STBIR_CEILF + #endif + #define STBIR_CEILF stbir_simd_ceilf + static stbir__inline float stbir_simd_ceilf(float x) + { + #if defined( _M_ARM64 ) || defined( __aarch64__ ) || defined( __arm64__ ) + return vget_lane_f32( vrndp_f32( vdup_n_f32(x) ), 0); + #else + float32x2_t f = vdup_n_f32(x); + float32x2_t t = vcvt_f32_s32(vcvt_s32_f32(f)); + uint32x2_t a = vclt_f32(t, f); + uint32x2_t b = vreinterpret_u32_f32(vdup_n_f32(1.0f)); + float32x2_t r = vadd_f32(t, vreinterpret_f32_u32(vand_u32(a, b))); + return vget_lane_f32(r, 0); + #endif + } + + #define STBIR_SIMD + +#elif defined(STBIR_WASM) + + #include + + #define stbir__simdf v128_t + #define stbir__simdi v128_t + + #define stbir_simdi_castf( reg ) (reg) + #define stbir_simdf_casti( reg ) (reg) + + #define stbir__simdf_load( reg, ptr ) (reg) = wasm_v128_load( (void const*)(ptr) ) + #define stbir__simdi_load( reg, ptr ) (reg) = wasm_v128_load( (void const*)(ptr) ) + #define stbir__simdf_load1( out, ptr ) (out) = wasm_v128_load32_splat( (void const*)(ptr) ) // top values can be random (not denormal or nan for perf) + #define stbir__simdi_load1( out, ptr ) (out) = wasm_v128_load32_splat( (void const*)(ptr) ) + #define stbir__simdf_load1z( out, ptr ) (out) = wasm_v128_load32_zero( (void const*)(ptr) ) // top values must be zero + #define stbir__simdf_frep4( fvar ) wasm_f32x4_splat( fvar ) + #define stbir__simdf_load1frep4( out, fvar ) (out) = wasm_f32x4_splat( fvar ) + #define stbir__simdf_load2( out, ptr ) (out) = wasm_v128_load64_splat( (void const*)(ptr) ) // top values can be random (not denormal or nan for perf) + #define stbir__simdf_load2z( out, ptr ) (out) = wasm_v128_load64_zero( (void const*)(ptr) ) // top values must be zero + #define stbir__simdf_load2hmerge( out, reg, ptr ) (out) = wasm_v128_load64_lane( (void const*)(ptr), reg, 1 ) + + #define stbir__simdf_zeroP() wasm_f32x4_const_splat(0) + #define stbir__simdf_zero( reg ) (reg) = wasm_f32x4_const_splat(0) + + #define stbir__simdf_store( ptr, reg ) wasm_v128_store( (void*)(ptr), reg ) + #define stbir__simdf_store1( ptr, reg ) wasm_v128_store32_lane( (void*)(ptr), reg, 0 ) + #define stbir__simdf_store2( ptr, reg ) wasm_v128_store64_lane( (void*)(ptr), reg, 0 ) + #define stbir__simdf_store2h( ptr, reg ) wasm_v128_store64_lane( (void*)(ptr), reg, 1 ) + + #define stbir__simdi_store( ptr, reg ) wasm_v128_store( (void*)(ptr), reg ) + #define stbir__simdi_store1( ptr, reg ) wasm_v128_store32_lane( (void*)(ptr), reg, 0 ) + #define stbir__simdi_store2( ptr, reg ) wasm_v128_store64_lane( (void*)(ptr), reg, 0 ) + + #define stbir__prefetch( ptr ) + + #define stbir__simdi_expand_u8_to_u32(out0,out1,out2,out3,ireg) \ + { \ + v128_t l = wasm_u16x8_extend_low_u8x16 ( ireg ); \ + v128_t h = wasm_u16x8_extend_high_u8x16( ireg ); \ + out0 = wasm_u32x4_extend_low_u16x8 ( l ); \ + out1 = wasm_u32x4_extend_high_u16x8( l ); \ + out2 = wasm_u32x4_extend_low_u16x8 ( h ); \ + out3 = wasm_u32x4_extend_high_u16x8( h ); \ + } + + #define stbir__simdi_expand_u8_to_1u32(out,ireg) \ + { \ + v128_t tmp = wasm_u16x8_extend_low_u8x16(ireg); \ + out = wasm_u32x4_extend_low_u16x8(tmp); \ + } + + #define stbir__simdi_expand_u16_to_u32(out0,out1,ireg) \ + { \ + out0 = wasm_u32x4_extend_low_u16x8 ( ireg ); \ + out1 = wasm_u32x4_extend_high_u16x8( ireg ); \ + } + + #define stbir__simdf_convert_float_to_i32( i, f ) (i) = wasm_i32x4_trunc_sat_f32x4(f) + #define stbir__simdf_convert_float_to_int( f ) wasm_i32x4_extract_lane(wasm_i32x4_trunc_sat_f32x4(f), 0) + #define stbir__simdi_to_int( i ) wasm_i32x4_extract_lane(i, 0) + #define stbir__simdf_convert_float_to_uint8( f ) ((unsigned char)wasm_i32x4_extract_lane(wasm_i32x4_trunc_sat_f32x4(wasm_f32x4_max(wasm_f32x4_min(f,STBIR_max_uint8_as_float),wasm_f32x4_const_splat(0))), 0)) + #define stbir__simdf_convert_float_to_short( f ) ((unsigned short)wasm_i32x4_extract_lane(wasm_i32x4_trunc_sat_f32x4(wasm_f32x4_max(wasm_f32x4_min(f,STBIR_max_uint16_as_float),wasm_f32x4_const_splat(0))), 0)) + #define stbir__simdi_convert_i32_to_float(out, ireg) (out) = wasm_f32x4_convert_i32x4(ireg) + #define stbir__simdf_add( out, reg0, reg1 ) (out) = wasm_f32x4_add( reg0, reg1 ) + #define stbir__simdf_mult( out, reg0, reg1 ) (out) = wasm_f32x4_mul( reg0, reg1 ) + #define stbir__simdf_mult_mem( out, reg, ptr ) (out) = wasm_f32x4_mul( reg, wasm_v128_load( (void const*)(ptr) ) ) + #define stbir__simdf_mult1_mem( out, reg, ptr ) (out) = wasm_f32x4_mul( reg, wasm_v128_load32_splat( (void const*)(ptr) ) ) + #define stbir__simdf_add_mem( out, reg, ptr ) (out) = wasm_f32x4_add( reg, wasm_v128_load( (void const*)(ptr) ) ) + #define stbir__simdf_add1_mem( out, reg, ptr ) (out) = wasm_f32x4_add( reg, wasm_v128_load32_splat( (void const*)(ptr) ) ) + + #define stbir__simdf_madd( out, add, mul1, mul2 ) (out) = wasm_f32x4_add( add, wasm_f32x4_mul( mul1, mul2 ) ) + #define stbir__simdf_madd1( out, add, mul1, mul2 ) (out) = wasm_f32x4_add( add, wasm_f32x4_mul( mul1, mul2 ) ) + #define stbir__simdf_madd_mem( out, add, mul, ptr ) (out) = wasm_f32x4_add( add, wasm_f32x4_mul( mul, wasm_v128_load( (void const*)(ptr) ) ) ) + #define stbir__simdf_madd1_mem( out, add, mul, ptr ) (out) = wasm_f32x4_add( add, wasm_f32x4_mul( mul, wasm_v128_load32_splat( (void const*)(ptr) ) ) ) + + #define stbir__simdf_add1( out, reg0, reg1 ) (out) = wasm_f32x4_add( reg0, reg1 ) + #define stbir__simdf_mult1( out, reg0, reg1 ) (out) = wasm_f32x4_mul( reg0, reg1 ) + + #define stbir__simdf_and( out, reg0, reg1 ) (out) = wasm_v128_and( reg0, reg1 ) + #define stbir__simdf_or( out, reg0, reg1 ) (out) = wasm_v128_or( reg0, reg1 ) + + #define stbir__simdf_min( out, reg0, reg1 ) (out) = wasm_f32x4_min( reg0, reg1 ) + #define stbir__simdf_max( out, reg0, reg1 ) (out) = wasm_f32x4_max( reg0, reg1 ) + #define stbir__simdf_min1( out, reg0, reg1 ) (out) = wasm_f32x4_min( reg0, reg1 ) + #define stbir__simdf_max1( out, reg0, reg1 ) (out) = wasm_f32x4_max( reg0, reg1 ) + + #define stbir__simdf_0123ABCDto3ABx( out, reg0, reg1 ) (out) = wasm_i32x4_shuffle( reg0, reg1, 3, 4, 5, -1 ) + #define stbir__simdf_0123ABCDto23Ax( out, reg0, reg1 ) (out) = wasm_i32x4_shuffle( reg0, reg1, 2, 3, 4, -1 ) + + #define stbir__simdf_aaa1(out,alp,ones) (out) = wasm_i32x4_shuffle(alp, ones, 3, 3, 3, 4) + #define stbir__simdf_1aaa(out,alp,ones) (out) = wasm_i32x4_shuffle(alp, ones, 4, 0, 0, 0) + #define stbir__simdf_a1a1(out,alp,ones) (out) = wasm_i32x4_shuffle(alp, ones, 1, 4, 3, 4) + #define stbir__simdf_1a1a(out,alp,ones) (out) = wasm_i32x4_shuffle(alp, ones, 4, 0, 4, 2) + + #define stbir__simdf_swiz( reg, one, two, three, four ) wasm_i32x4_shuffle(reg, reg, one, two, three, four) + + #define stbir__simdi_and( out, reg0, reg1 ) (out) = wasm_v128_and( reg0, reg1 ) + #define stbir__simdi_or( out, reg0, reg1 ) (out) = wasm_v128_or( reg0, reg1 ) + #define stbir__simdi_16madd( out, reg0, reg1 ) (out) = wasm_i32x4_dot_i16x8( reg0, reg1 ) + + #define stbir__simdf_pack_to_8bytes(out,aa,bb) \ + { \ + v128_t af = wasm_f32x4_max( wasm_f32x4_min(aa, STBIR_max_uint8_as_float), wasm_f32x4_const_splat(0) ); \ + v128_t bf = wasm_f32x4_max( wasm_f32x4_min(bb, STBIR_max_uint8_as_float), wasm_f32x4_const_splat(0) ); \ + v128_t ai = wasm_i32x4_trunc_sat_f32x4( af ); \ + v128_t bi = wasm_i32x4_trunc_sat_f32x4( bf ); \ + v128_t out16 = wasm_i16x8_narrow_i32x4( ai, bi ); \ + out = wasm_u8x16_narrow_i16x8( out16, out16 ); \ + } + + #define stbir__simdf_pack_to_8words(out,aa,bb) \ + { \ + v128_t af = wasm_f32x4_max( wasm_f32x4_min(aa, STBIR_max_uint16_as_float), wasm_f32x4_const_splat(0)); \ + v128_t bf = wasm_f32x4_max( wasm_f32x4_min(bb, STBIR_max_uint16_as_float), wasm_f32x4_const_splat(0)); \ + v128_t ai = wasm_i32x4_trunc_sat_f32x4( af ); \ + v128_t bi = wasm_i32x4_trunc_sat_f32x4( bf ); \ + out = wasm_u16x8_narrow_i32x4( ai, bi ); \ + } + + #define stbir__interleave_pack_and_store_16_u8( ptr, r0, r1, r2, r3 ) \ + { \ + v128_t tmp0 = wasm_i16x8_narrow_i32x4(r0, r1); \ + v128_t tmp1 = wasm_i16x8_narrow_i32x4(r2, r3); \ + v128_t tmp = wasm_u8x16_narrow_i16x8(tmp0, tmp1); \ + tmp = wasm_i8x16_shuffle(tmp, tmp, 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15); \ + wasm_v128_store( (void*)(ptr), tmp); \ + } + + #define stbir__simdf_load4_transposed( o0, o1, o2, o3, ptr ) \ + { \ + v128_t t0 = wasm_v128_load( ptr ); \ + v128_t t1 = wasm_v128_load( ptr+4 ); \ + v128_t t2 = wasm_v128_load( ptr+8 ); \ + v128_t t3 = wasm_v128_load( ptr+12 ); \ + v128_t s0 = wasm_i32x4_shuffle(t0, t1, 0, 4, 2, 6); \ + v128_t s1 = wasm_i32x4_shuffle(t0, t1, 1, 5, 3, 7); \ + v128_t s2 = wasm_i32x4_shuffle(t2, t3, 0, 4, 2, 6); \ + v128_t s3 = wasm_i32x4_shuffle(t2, t3, 1, 5, 3, 7); \ + o0 = wasm_i32x4_shuffle(s0, s2, 0, 1, 4, 5); \ + o1 = wasm_i32x4_shuffle(s1, s3, 0, 1, 4, 5); \ + o2 = wasm_i32x4_shuffle(s0, s2, 2, 3, 6, 7); \ + o3 = wasm_i32x4_shuffle(s1, s3, 2, 3, 6, 7); \ + } + + #define stbir__simdi_32shr( out, reg, imm ) out = wasm_u32x4_shr( reg, imm ) + + typedef float stbir__f32x4 __attribute__((__vector_size__(16), __aligned__(16))); + #define STBIR__SIMDF_CONST(var, x) stbir__simdf var = (v128_t)(stbir__f32x4){ x, x, x, x } + #define STBIR__SIMDI_CONST(var, x) stbir__simdi var = { x, x, x, x } + #define STBIR__CONSTF(var) (var) + #define STBIR__CONSTI(var) (var) + + #ifdef STBIR_FLOORF + #undef STBIR_FLOORF + #endif + #define STBIR_FLOORF stbir_simd_floorf + static stbir__inline float stbir_simd_floorf(float x) + { + return wasm_f32x4_extract_lane( wasm_f32x4_floor( wasm_f32x4_splat(x) ), 0); + } + + #ifdef STBIR_CEILF + #undef STBIR_CEILF + #endif + #define STBIR_CEILF stbir_simd_ceilf + static stbir__inline float stbir_simd_ceilf(float x) + { + return wasm_f32x4_extract_lane( wasm_f32x4_ceil( wasm_f32x4_splat(x) ), 0); + } + + #define STBIR_SIMD + +#endif // SSE2/NEON/WASM + +#endif // NO SIMD + +#ifdef STBIR_SIMD8 + #define stbir__simdfX stbir__simdf8 + #define stbir__simdiX stbir__simdi8 + #define stbir__simdfX_load stbir__simdf8_load + #define stbir__simdiX_load stbir__simdi8_load + #define stbir__simdfX_mult stbir__simdf8_mult + #define stbir__simdfX_add_mem stbir__simdf8_add_mem + #define stbir__simdfX_madd_mem stbir__simdf8_madd_mem + #define stbir__simdfX_store stbir__simdf8_store + #define stbir__simdiX_store stbir__simdi8_store + #define stbir__simdf_frepX stbir__simdf8_frep8 + #define stbir__simdfX_madd stbir__simdf8_madd + #define stbir__simdfX_min stbir__simdf8_min + #define stbir__simdfX_max stbir__simdf8_max + #define stbir__simdfX_aaa1 stbir__simdf8_aaa1 + #define stbir__simdfX_1aaa stbir__simdf8_1aaa + #define stbir__simdfX_a1a1 stbir__simdf8_a1a1 + #define stbir__simdfX_1a1a stbir__simdf8_1a1a + #define stbir__simdfX_convert_float_to_i32 stbir__simdf8_convert_float_to_i32 + #define stbir__simdfX_pack_to_words stbir__simdf8_pack_to_16words + #define stbir__simdfX_zero stbir__simdf8_zero + #define STBIR_onesX STBIR_ones8 + #define STBIR_max_uint8_as_floatX STBIR_max_uint8_as_float8 + #define STBIR_max_uint16_as_floatX STBIR_max_uint16_as_float8 + #define STBIR_simd_point5X STBIR_simd_point58 + #define stbir__simdfX_float_count 8 + #define stbir__simdfX_0123to1230 stbir__simdf8_0123to12301230 + #define stbir__simdfX_0123to2103 stbir__simdf8_0123to21032103 + static const stbir__simdf8 STBIR_max_uint16_as_float_inverted8 = { stbir__max_uint16_as_float_inverted,stbir__max_uint16_as_float_inverted,stbir__max_uint16_as_float_inverted,stbir__max_uint16_as_float_inverted,stbir__max_uint16_as_float_inverted,stbir__max_uint16_as_float_inverted,stbir__max_uint16_as_float_inverted,stbir__max_uint16_as_float_inverted }; + static const stbir__simdf8 STBIR_max_uint8_as_float_inverted8 = { stbir__max_uint8_as_float_inverted,stbir__max_uint8_as_float_inverted,stbir__max_uint8_as_float_inverted,stbir__max_uint8_as_float_inverted,stbir__max_uint8_as_float_inverted,stbir__max_uint8_as_float_inverted,stbir__max_uint8_as_float_inverted,stbir__max_uint8_as_float_inverted }; + static const stbir__simdf8 STBIR_ones8 = { 1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0 }; + static const stbir__simdf8 STBIR_simd_point58 = { 0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5 }; + static const stbir__simdf8 STBIR_max_uint8_as_float8 = { stbir__max_uint8_as_float,stbir__max_uint8_as_float,stbir__max_uint8_as_float,stbir__max_uint8_as_float, stbir__max_uint8_as_float,stbir__max_uint8_as_float,stbir__max_uint8_as_float,stbir__max_uint8_as_float }; + static const stbir__simdf8 STBIR_max_uint16_as_float8 = { stbir__max_uint16_as_float,stbir__max_uint16_as_float,stbir__max_uint16_as_float,stbir__max_uint16_as_float, stbir__max_uint16_as_float,stbir__max_uint16_as_float,stbir__max_uint16_as_float,stbir__max_uint16_as_float }; +#else + #define stbir__simdfX stbir__simdf + #define stbir__simdiX stbir__simdi + #define stbir__simdfX_load stbir__simdf_load + #define stbir__simdiX_load stbir__simdi_load + #define stbir__simdfX_mult stbir__simdf_mult + #define stbir__simdfX_add_mem stbir__simdf_add_mem + #define stbir__simdfX_madd_mem stbir__simdf_madd_mem + #define stbir__simdfX_store stbir__simdf_store + #define stbir__simdiX_store stbir__simdi_store + #define stbir__simdf_frepX stbir__simdf_frep4 + #define stbir__simdfX_madd stbir__simdf_madd + #define stbir__simdfX_min stbir__simdf_min + #define stbir__simdfX_max stbir__simdf_max + #define stbir__simdfX_aaa1 stbir__simdf_aaa1 + #define stbir__simdfX_1aaa stbir__simdf_1aaa + #define stbir__simdfX_a1a1 stbir__simdf_a1a1 + #define stbir__simdfX_1a1a stbir__simdf_1a1a + #define stbir__simdfX_convert_float_to_i32 stbir__simdf_convert_float_to_i32 + #define stbir__simdfX_pack_to_words stbir__simdf_pack_to_8words + #define stbir__simdfX_zero stbir__simdf_zero + #define STBIR_onesX STBIR__CONSTF(STBIR_ones) + #define STBIR_simd_point5X STBIR__CONSTF(STBIR_simd_point5) + #define STBIR_max_uint8_as_floatX STBIR__CONSTF(STBIR_max_uint8_as_float) + #define STBIR_max_uint16_as_floatX STBIR__CONSTF(STBIR_max_uint16_as_float) + #define stbir__simdfX_float_count 4 + #define stbir__if_simdf8_cast_to_simdf4( val ) ( val ) + #define stbir__simdfX_0123to1230 stbir__simdf_0123to1230 + #define stbir__simdfX_0123to2103 stbir__simdf_0123to2103 +#endif + + +#if defined(STBIR_NEON) && !defined(_M_ARM) && !defined(__arm__) + + #if defined( _MSC_VER ) && !defined(__clang__) + typedef __int16 stbir__FP16; + #else + typedef float16_t stbir__FP16; + #endif + +#else // no NEON, or 32-bit ARM for MSVC + + typedef union stbir__FP16 + { + unsigned short u; + } stbir__FP16; + +#endif + +#if (!defined(STBIR_NEON) && !defined(STBIR_FP16C)) || (defined(STBIR_NEON) && defined(_M_ARM)) || (defined(STBIR_NEON) && defined(__arm__)) + + // Fabian's half float routines, see: https://gist.github.com/rygorous/2156668 + + static stbir__inline float stbir__half_to_float( stbir__FP16 h ) + { + static const stbir__FP32 magic = { (254 - 15) << 23 }; + static const stbir__FP32 was_infnan = { (127 + 16) << 23 }; + stbir__FP32 o; + + o.u = (h.u & 0x7fff) << 13; // exponent/mantissa bits + o.f *= magic.f; // exponent adjust + if (o.f >= was_infnan.f) // make sure Inf/NaN survive + o.u |= 255 << 23; + o.u |= (h.u & 0x8000) << 16; // sign bit + return o.f; + } + + static stbir__inline stbir__FP16 stbir__float_to_half(float val) + { + stbir__FP32 f32infty = { 255 << 23 }; + stbir__FP32 f16max = { (127 + 16) << 23 }; + stbir__FP32 denorm_magic = { ((127 - 15) + (23 - 10) + 1) << 23 }; + unsigned int sign_mask = 0x80000000u; + stbir__FP16 o = { 0 }; + stbir__FP32 f; + unsigned int sign; + + f.f = val; + sign = f.u & sign_mask; + f.u ^= sign; + + if (f.u >= f16max.u) // result is Inf or NaN (all exponent bits set) + o.u = (f.u > f32infty.u) ? 0x7e00 : 0x7c00; // NaN->qNaN and Inf->Inf + else // (De)normalized number or zero + { + if (f.u < (113 << 23)) // resulting FP16 is subnormal or zero + { + // use a magic value to align our 10 mantissa bits at the bottom of + // the float. as long as FP addition is round-to-nearest-even this + // just works. + f.f += denorm_magic.f; + // and one integer subtract of the bias later, we have our final float! + o.u = (unsigned short) ( f.u - denorm_magic.u ); + } + else + { + unsigned int mant_odd = (f.u >> 13) & 1; // resulting mantissa is odd + // update exponent, rounding bias part 1 + f.u = f.u + ((15u - 127) << 23) + 0xfff; + // rounding bias part 2 + f.u += mant_odd; + // take the bits! + o.u = (unsigned short) ( f.u >> 13 ); + } + } + + o.u |= sign >> 16; + return o; + } + +#endif + + +#if defined(STBIR_FP16C) + + #include + + static stbir__inline void stbir__half_to_float_SIMD(float * output, stbir__FP16 const * input) + { + _mm256_storeu_ps( (float*)output, _mm256_cvtph_ps( _mm_loadu_si128( (__m128i const* )input ) ) ); + } + + static stbir__inline void stbir__float_to_half_SIMD(stbir__FP16 * output, float const * input) + { + _mm_storeu_si128( (__m128i*)output, _mm256_cvtps_ph( _mm256_loadu_ps( input ), 0 ) ); + } + + static stbir__inline float stbir__half_to_float( stbir__FP16 h ) + { + return _mm_cvtss_f32( _mm_cvtph_ps( _mm_cvtsi32_si128( (int)h.u ) ) ); + } + + static stbir__inline stbir__FP16 stbir__float_to_half( float f ) + { + stbir__FP16 h; + h.u = (unsigned short) _mm_cvtsi128_si32( _mm_cvtps_ph( _mm_set_ss( f ), 0 ) ); + return h; + } + +#elif defined(STBIR_SSE2) + + // Fabian's half float routines, see: https://gist.github.com/rygorous/2156668 + stbir__inline static void stbir__half_to_float_SIMD(float * output, void const * input) + { + static const STBIR__SIMDI_CONST(mask_nosign, 0x7fff); + static const STBIR__SIMDI_CONST(smallest_normal, 0x0400); + static const STBIR__SIMDI_CONST(infinity, 0x7c00); + static const STBIR__SIMDI_CONST(expadjust_normal, (127 - 15) << 23); + static const STBIR__SIMDI_CONST(magic_denorm, 113 << 23); + + __m128i i = _mm_loadu_si128 ( (__m128i const*)(input) ); + __m128i h = _mm_unpacklo_epi16 ( i, _mm_setzero_si128() ); + __m128i mnosign = STBIR__CONSTI(mask_nosign); + __m128i eadjust = STBIR__CONSTI(expadjust_normal); + __m128i smallest = STBIR__CONSTI(smallest_normal); + __m128i infty = STBIR__CONSTI(infinity); + __m128i expmant = _mm_and_si128(mnosign, h); + __m128i justsign = _mm_xor_si128(h, expmant); + __m128i b_notinfnan = _mm_cmpgt_epi32(infty, expmant); + __m128i b_isdenorm = _mm_cmpgt_epi32(smallest, expmant); + __m128i shifted = _mm_slli_epi32(expmant, 13); + __m128i adj_infnan = _mm_andnot_si128(b_notinfnan, eadjust); + __m128i adjusted = _mm_add_epi32(eadjust, shifted); + __m128i den1 = _mm_add_epi32(shifted, STBIR__CONSTI(magic_denorm)); + __m128i adjusted2 = _mm_add_epi32(adjusted, adj_infnan); + __m128 den2 = _mm_sub_ps(_mm_castsi128_ps(den1), *(const __m128 *)&magic_denorm); + __m128 adjusted3 = _mm_and_ps(den2, _mm_castsi128_ps(b_isdenorm)); + __m128 adjusted4 = _mm_andnot_ps(_mm_castsi128_ps(b_isdenorm), _mm_castsi128_ps(adjusted2)); + __m128 adjusted5 = _mm_or_ps(adjusted3, adjusted4); + __m128i sign = _mm_slli_epi32(justsign, 16); + __m128 final = _mm_or_ps(adjusted5, _mm_castsi128_ps(sign)); + stbir__simdf_store( output + 0, final ); + + h = _mm_unpackhi_epi16 ( i, _mm_setzero_si128() ); + expmant = _mm_and_si128(mnosign, h); + justsign = _mm_xor_si128(h, expmant); + b_notinfnan = _mm_cmpgt_epi32(infty, expmant); + b_isdenorm = _mm_cmpgt_epi32(smallest, expmant); + shifted = _mm_slli_epi32(expmant, 13); + adj_infnan = _mm_andnot_si128(b_notinfnan, eadjust); + adjusted = _mm_add_epi32(eadjust, shifted); + den1 = _mm_add_epi32(shifted, STBIR__CONSTI(magic_denorm)); + adjusted2 = _mm_add_epi32(adjusted, adj_infnan); + den2 = _mm_sub_ps(_mm_castsi128_ps(den1), *(const __m128 *)&magic_denorm); + adjusted3 = _mm_and_ps(den2, _mm_castsi128_ps(b_isdenorm)); + adjusted4 = _mm_andnot_ps(_mm_castsi128_ps(b_isdenorm), _mm_castsi128_ps(adjusted2)); + adjusted5 = _mm_or_ps(adjusted3, adjusted4); + sign = _mm_slli_epi32(justsign, 16); + final = _mm_or_ps(adjusted5, _mm_castsi128_ps(sign)); + stbir__simdf_store( output + 4, final ); + + // ~38 SSE2 ops for 8 values + } + + // Fabian's round-to-nearest-even float to half + // ~48 SSE2 ops for 8 output + stbir__inline static void stbir__float_to_half_SIMD(void * output, float const * input) + { + static const STBIR__SIMDI_CONST(mask_sign, 0x80000000u); + static const STBIR__SIMDI_CONST(c_f16max, (127 + 16) << 23); // all FP32 values >=this round to +inf + static const STBIR__SIMDI_CONST(c_nanbit, 0x200); + static const STBIR__SIMDI_CONST(c_infty_as_fp16, 0x7c00); + static const STBIR__SIMDI_CONST(c_min_normal, (127 - 14) << 23); // smallest FP32 that yields a normalized FP16 + static const STBIR__SIMDI_CONST(c_subnorm_magic, ((127 - 15) + (23 - 10) + 1) << 23); + static const STBIR__SIMDI_CONST(c_normal_bias, 0xfff - ((127 - 15) << 23)); // adjust exponent and add mantissa rounding + + __m128 f = _mm_loadu_ps(input); + __m128 msign = _mm_castsi128_ps(STBIR__CONSTI(mask_sign)); + __m128 justsign = _mm_and_ps(msign, f); + __m128 absf = _mm_xor_ps(f, justsign); + __m128i absf_int = _mm_castps_si128(absf); // the cast is "free" (extra bypass latency, but no thruput hit) + __m128i f16max = STBIR__CONSTI(c_f16max); + __m128 b_isnan = _mm_cmpunord_ps(absf, absf); // is this a NaN? + __m128i b_isregular = _mm_cmpgt_epi32(f16max, absf_int); // (sub)normalized or special? + __m128i nanbit = _mm_and_si128(_mm_castps_si128(b_isnan), STBIR__CONSTI(c_nanbit)); + __m128i inf_or_nan = _mm_or_si128(nanbit, STBIR__CONSTI(c_infty_as_fp16)); // output for specials + + __m128i min_normal = STBIR__CONSTI(c_min_normal); + __m128i b_issub = _mm_cmpgt_epi32(min_normal, absf_int); + + // "result is subnormal" path + __m128 subnorm1 = _mm_add_ps(absf, _mm_castsi128_ps(STBIR__CONSTI(c_subnorm_magic))); // magic value to round output mantissa + __m128i subnorm2 = _mm_sub_epi32(_mm_castps_si128(subnorm1), STBIR__CONSTI(c_subnorm_magic)); // subtract out bias + + // "result is normal" path + __m128i mantoddbit = _mm_slli_epi32(absf_int, 31 - 13); // shift bit 13 (mantissa LSB) to sign + __m128i mantodd = _mm_srai_epi32(mantoddbit, 31); // -1 if FP16 mantissa odd, else 0 + + __m128i round1 = _mm_add_epi32(absf_int, STBIR__CONSTI(c_normal_bias)); + __m128i round2 = _mm_sub_epi32(round1, mantodd); // if mantissa LSB odd, bias towards rounding up (RTNE) + __m128i normal = _mm_srli_epi32(round2, 13); // rounded result + + // combine the two non-specials + __m128i nonspecial = _mm_or_si128(_mm_and_si128(subnorm2, b_issub), _mm_andnot_si128(b_issub, normal)); + + // merge in specials as well + __m128i joined = _mm_or_si128(_mm_and_si128(nonspecial, b_isregular), _mm_andnot_si128(b_isregular, inf_or_nan)); + + __m128i sign_shift = _mm_srai_epi32(_mm_castps_si128(justsign), 16); + __m128i final2, final= _mm_or_si128(joined, sign_shift); + + f = _mm_loadu_ps(input+4); + justsign = _mm_and_ps(msign, f); + absf = _mm_xor_ps(f, justsign); + absf_int = _mm_castps_si128(absf); // the cast is "free" (extra bypass latency, but no thruput hit) + b_isnan = _mm_cmpunord_ps(absf, absf); // is this a NaN? + b_isregular = _mm_cmpgt_epi32(f16max, absf_int); // (sub)normalized or special? + nanbit = _mm_and_si128(_mm_castps_si128(b_isnan), c_nanbit); + inf_or_nan = _mm_or_si128(nanbit, STBIR__CONSTI(c_infty_as_fp16)); // output for specials + + b_issub = _mm_cmpgt_epi32(min_normal, absf_int); + + // "result is subnormal" path + subnorm1 = _mm_add_ps(absf, _mm_castsi128_ps(STBIR__CONSTI(c_subnorm_magic))); // magic value to round output mantissa + subnorm2 = _mm_sub_epi32(_mm_castps_si128(subnorm1), STBIR__CONSTI(c_subnorm_magic)); // subtract out bias + + // "result is normal" path + mantoddbit = _mm_slli_epi32(absf_int, 31 - 13); // shift bit 13 (mantissa LSB) to sign + mantodd = _mm_srai_epi32(mantoddbit, 31); // -1 if FP16 mantissa odd, else 0 + + round1 = _mm_add_epi32(absf_int, STBIR__CONSTI(c_normal_bias)); + round2 = _mm_sub_epi32(round1, mantodd); // if mantissa LSB odd, bias towards rounding up (RTNE) + normal = _mm_srli_epi32(round2, 13); // rounded result + + // combine the two non-specials + nonspecial = _mm_or_si128(_mm_and_si128(subnorm2, b_issub), _mm_andnot_si128(b_issub, normal)); + + // merge in specials as well + joined = _mm_or_si128(_mm_and_si128(nonspecial, b_isregular), _mm_andnot_si128(b_isregular, inf_or_nan)); + + sign_shift = _mm_srai_epi32(_mm_castps_si128(justsign), 16); + final2 = _mm_or_si128(joined, sign_shift); + final = _mm_packs_epi32(final, final2); + stbir__simdi_store( output,final ); + } + +#elif defined(STBIR_NEON) && defined(_MSC_VER) && defined(_M_ARM64) && !defined(__clang__) // 64-bit ARM on MSVC (not clang) + + static stbir__inline void stbir__half_to_float_SIMD(float * output, stbir__FP16 const * input) + { + float16x4_t in0 = vld1_f16(input + 0); + float16x4_t in1 = vld1_f16(input + 4); + vst1q_f32(output + 0, vcvt_f32_f16(in0)); + vst1q_f32(output + 4, vcvt_f32_f16(in1)); + } + + static stbir__inline void stbir__float_to_half_SIMD(stbir__FP16 * output, float const * input) + { + float16x4_t out0 = vcvt_f16_f32(vld1q_f32(input + 0)); + float16x4_t out1 = vcvt_f16_f32(vld1q_f32(input + 4)); + vst1_f16(output+0, out0); + vst1_f16(output+4, out1); + } + + static stbir__inline float stbir__half_to_float( stbir__FP16 h ) + { + return vgetq_lane_f32(vcvt_f32_f16(vld1_dup_f16(&h)), 0); + } + + static stbir__inline stbir__FP16 stbir__float_to_half( float f ) + { + return vget_lane_f16(vcvt_f16_f32(vdupq_n_f32(f)), 0).n16_u16[0]; + } + +#elif defined(STBIR_NEON) && ( defined( _M_ARM64 ) || defined( __aarch64__ ) || defined( __arm64__ ) ) // 64-bit ARM + + static stbir__inline void stbir__half_to_float_SIMD(float * output, stbir__FP16 const * input) + { + float16x8_t in = vld1q_f16(input); + vst1q_f32(output + 0, vcvt_f32_f16(vget_low_f16(in))); + vst1q_f32(output + 4, vcvt_f32_f16(vget_high_f16(in))); + } + + static stbir__inline void stbir__float_to_half_SIMD(stbir__FP16 * output, float const * input) + { + float16x4_t out0 = vcvt_f16_f32(vld1q_f32(input + 0)); + float16x4_t out1 = vcvt_f16_f32(vld1q_f32(input + 4)); + vst1q_f16(output, vcombine_f16(out0, out1)); + } + + static stbir__inline float stbir__half_to_float( stbir__FP16 h ) + { + return vgetq_lane_f32(vcvt_f32_f16(vdup_n_f16(h)), 0); + } + + static stbir__inline stbir__FP16 stbir__float_to_half( float f ) + { + return vget_lane_f16(vcvt_f16_f32(vdupq_n_f32(f)), 0); + } + +#elif defined(STBIR_WASM) || (defined(STBIR_NEON) && (defined(_MSC_VER) || defined(_M_ARM) || defined(__arm__))) // WASM or 32-bit ARM on MSVC/clang + + static stbir__inline void stbir__half_to_float_SIMD(float * output, stbir__FP16 const * input) + { + for (int i=0; i<8; i++) + { + output[i] = stbir__half_to_float(input[i]); + } + } + static stbir__inline void stbir__float_to_half_SIMD(stbir__FP16 * output, float const * input) + { + for (int i=0; i<8; i++) + { + output[i] = stbir__float_to_half(input[i]); + } + } + +#endif + + +#ifdef STBIR_SIMD + +#define stbir__simdf_0123to3333( out, reg ) (out) = stbir__simdf_swiz( reg, 3,3,3,3 ) +#define stbir__simdf_0123to2222( out, reg ) (out) = stbir__simdf_swiz( reg, 2,2,2,2 ) +#define stbir__simdf_0123to1111( out, reg ) (out) = stbir__simdf_swiz( reg, 1,1,1,1 ) +#define stbir__simdf_0123to0000( out, reg ) (out) = stbir__simdf_swiz( reg, 0,0,0,0 ) +#define stbir__simdf_0123to0003( out, reg ) (out) = stbir__simdf_swiz( reg, 0,0,0,3 ) +#define stbir__simdf_0123to0001( out, reg ) (out) = stbir__simdf_swiz( reg, 0,0,0,1 ) +#define stbir__simdf_0123to1122( out, reg ) (out) = stbir__simdf_swiz( reg, 1,1,2,2 ) +#define stbir__simdf_0123to2333( out, reg ) (out) = stbir__simdf_swiz( reg, 2,3,3,3 ) +#define stbir__simdf_0123to0023( out, reg ) (out) = stbir__simdf_swiz( reg, 0,0,2,3 ) +#define stbir__simdf_0123to1230( out, reg ) (out) = stbir__simdf_swiz( reg, 1,2,3,0 ) +#define stbir__simdf_0123to2103( out, reg ) (out) = stbir__simdf_swiz( reg, 2,1,0,3 ) +#define stbir__simdf_0123to3210( out, reg ) (out) = stbir__simdf_swiz( reg, 3,2,1,0 ) +#define stbir__simdf_0123to2301( out, reg ) (out) = stbir__simdf_swiz( reg, 2,3,0,1 ) +#define stbir__simdf_0123to3012( out, reg ) (out) = stbir__simdf_swiz( reg, 3,0,1,2 ) +#define stbir__simdf_0123to0011( out, reg ) (out) = stbir__simdf_swiz( reg, 0,0,1,1 ) +#define stbir__simdf_0123to1100( out, reg ) (out) = stbir__simdf_swiz( reg, 1,1,0,0 ) +#define stbir__simdf_0123to2233( out, reg ) (out) = stbir__simdf_swiz( reg, 2,2,3,3 ) +#define stbir__simdf_0123to1133( out, reg ) (out) = stbir__simdf_swiz( reg, 1,1,3,3 ) +#define stbir__simdf_0123to0022( out, reg ) (out) = stbir__simdf_swiz( reg, 0,0,2,2 ) +#define stbir__simdf_0123to1032( out, reg ) (out) = stbir__simdf_swiz( reg, 1,0,3,2 ) + +typedef union stbir__simdi_u32 +{ + stbir_uint32 m128i_u32[4]; + int m128i_i32[4]; + stbir__simdi m128i_i128; +} stbir__simdi_u32; + +static const int STBIR_mask[9] = { 0,0,0,-1,-1,-1,0,0,0 }; + +static const STBIR__SIMDF_CONST(STBIR_max_uint8_as_float, stbir__max_uint8_as_float); +static const STBIR__SIMDF_CONST(STBIR_max_uint16_as_float, stbir__max_uint16_as_float); +static const STBIR__SIMDF_CONST(STBIR_max_uint8_as_float_inverted, stbir__max_uint8_as_float_inverted); +static const STBIR__SIMDF_CONST(STBIR_max_uint16_as_float_inverted, stbir__max_uint16_as_float_inverted); + +static const STBIR__SIMDF_CONST(STBIR_simd_point5, 0.5f); +static const STBIR__SIMDF_CONST(STBIR_ones, 1.0f); +static const STBIR__SIMDI_CONST(STBIR_almost_zero, (127 - 13) << 23); +static const STBIR__SIMDI_CONST(STBIR_almost_one, 0x3f7fffff); +static const STBIR__SIMDI_CONST(STBIR_mastissa_mask, 0xff); +static const STBIR__SIMDI_CONST(STBIR_topscale, 0x02000000); + +// Basically, in simd mode, we unroll the proper amount, and we don't want +// the non-simd remnant loops to be unroll because they only run a few times +// Adding this switch saves about 5K on clang which is Captain Unroll the 3rd. +#define STBIR_SIMD_STREAMOUT_PTR( star ) STBIR_STREAMOUT_PTR( star ) +#define STBIR_SIMD_NO_UNROLL(ptr) STBIR_NO_UNROLL(ptr) +#define STBIR_SIMD_NO_UNROLL_LOOP_START STBIR_NO_UNROLL_LOOP_START +#define STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR STBIR_NO_UNROLL_LOOP_START_INF_FOR + +#ifdef STBIR_MEMCPY +#undef STBIR_MEMCPY +#endif +#define STBIR_MEMCPY stbir_simd_memcpy + +// override normal use of memcpy with much simpler copy (faster and smaller with our sized copies) +static void stbir_simd_memcpy( void * dest, void const * src, size_t bytes ) +{ + char STBIR_SIMD_STREAMOUT_PTR (*) d = (char*) dest; + char STBIR_SIMD_STREAMOUT_PTR( * ) d_end = ((char*) dest) + bytes; + ptrdiff_t ofs_to_src = (char*)src - (char*)dest; + + // check overlaps + STBIR_ASSERT( ( ( d >= ( (char*)src) + bytes ) ) || ( ( d + bytes ) <= (char*)src ) ); + + if ( bytes < (16*stbir__simdfX_float_count) ) + { + if ( bytes < 16 ) + { + if ( bytes ) + { + STBIR_SIMD_NO_UNROLL_LOOP_START + do + { + STBIR_SIMD_NO_UNROLL(d); + d[ 0 ] = d[ ofs_to_src ]; + ++d; + } while ( d < d_end ); + } + } + else + { + stbir__simdf x; + // do one unaligned to get us aligned for the stream out below + stbir__simdf_load( x, ( d + ofs_to_src ) ); + stbir__simdf_store( d, x ); + d = (char*)( ( ( (size_t)d ) + 16 ) & ~15 ); + + STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR + for(;;) + { + STBIR_SIMD_NO_UNROLL(d); + + if ( d > ( d_end - 16 ) ) + { + if ( d == d_end ) + return; + d = d_end - 16; + } + + stbir__simdf_load( x, ( d + ofs_to_src ) ); + stbir__simdf_store( d, x ); + d += 16; + } + } + } + else + { + stbir__simdfX x0,x1,x2,x3; + + // do one unaligned to get us aligned for the stream out below + stbir__simdfX_load( x0, ( d + ofs_to_src ) + 0*stbir__simdfX_float_count ); + stbir__simdfX_load( x1, ( d + ofs_to_src ) + 4*stbir__simdfX_float_count ); + stbir__simdfX_load( x2, ( d + ofs_to_src ) + 8*stbir__simdfX_float_count ); + stbir__simdfX_load( x3, ( d + ofs_to_src ) + 12*stbir__simdfX_float_count ); + stbir__simdfX_store( d + 0*stbir__simdfX_float_count, x0 ); + stbir__simdfX_store( d + 4*stbir__simdfX_float_count, x1 ); + stbir__simdfX_store( d + 8*stbir__simdfX_float_count, x2 ); + stbir__simdfX_store( d + 12*stbir__simdfX_float_count, x3 ); + d = (char*)( ( ( (size_t)d ) + (16*stbir__simdfX_float_count) ) & ~((16*stbir__simdfX_float_count)-1) ); + + STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR + for(;;) + { + STBIR_SIMD_NO_UNROLL(d); + + if ( d > ( d_end - (16*stbir__simdfX_float_count) ) ) + { + if ( d == d_end ) + return; + d = d_end - (16*stbir__simdfX_float_count); + } + + stbir__simdfX_load( x0, ( d + ofs_to_src ) + 0*stbir__simdfX_float_count ); + stbir__simdfX_load( x1, ( d + ofs_to_src ) + 4*stbir__simdfX_float_count ); + stbir__simdfX_load( x2, ( d + ofs_to_src ) + 8*stbir__simdfX_float_count ); + stbir__simdfX_load( x3, ( d + ofs_to_src ) + 12*stbir__simdfX_float_count ); + stbir__simdfX_store( d + 0*stbir__simdfX_float_count, x0 ); + stbir__simdfX_store( d + 4*stbir__simdfX_float_count, x1 ); + stbir__simdfX_store( d + 8*stbir__simdfX_float_count, x2 ); + stbir__simdfX_store( d + 12*stbir__simdfX_float_count, x3 ); + d += (16*stbir__simdfX_float_count); + } + } +} + +// memcpy that is specically intentionally overlapping (src is smaller then dest, so can be +// a normal forward copy, bytes is divisible by 4 and bytes is greater than or equal to +// the diff between dest and src) +static void stbir_overlapping_memcpy( void * dest, void const * src, size_t bytes ) +{ + char STBIR_SIMD_STREAMOUT_PTR (*) sd = (char*) src; + char STBIR_SIMD_STREAMOUT_PTR( * ) s_end = ((char*) src) + bytes; + ptrdiff_t ofs_to_dest = (char*)dest - (char*)src; + + if ( ofs_to_dest >= 16 ) // is the overlap more than 16 away? + { + char STBIR_SIMD_STREAMOUT_PTR( * ) s_end16 = ((char*) src) + (bytes&~15); + STBIR_SIMD_NO_UNROLL_LOOP_START + do + { + stbir__simdf x; + STBIR_SIMD_NO_UNROLL(sd); + stbir__simdf_load( x, sd ); + stbir__simdf_store( ( sd + ofs_to_dest ), x ); + sd += 16; + } while ( sd < s_end16 ); + + if ( sd == s_end ) + return; + } + + do + { + STBIR_SIMD_NO_UNROLL(sd); + *(int*)( sd + ofs_to_dest ) = *(int*) sd; + sd += 4; + } while ( sd < s_end ); +} + +#else // no SSE2 + +// when in scalar mode, we let unrolling happen, so this macro just does the __restrict +#define STBIR_SIMD_STREAMOUT_PTR( star ) STBIR_STREAMOUT_PTR( star ) +#define STBIR_SIMD_NO_UNROLL(ptr) +#define STBIR_SIMD_NO_UNROLL_LOOP_START +#define STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR + +#endif // SSE2 + + +#ifdef STBIR_PROFILE + +#ifndef STBIR_PROFILE_FUNC + +#if defined(_x86_64) || defined( __x86_64__ ) || defined( _M_X64 ) || defined(__x86_64) || defined(__SSE2__) || defined(STBIR_SSE) || defined( _M_IX86_FP ) || defined(__i386) || defined( __i386__ ) || defined( _M_IX86 ) || defined( _X86_ ) + +#ifdef _MSC_VER + + STBIRDEF stbir_uint64 __rdtsc(); + #define STBIR_PROFILE_FUNC() __rdtsc() + +#else // non msvc + + static stbir__inline stbir_uint64 STBIR_PROFILE_FUNC() + { + stbir_uint32 lo, hi; + asm volatile ("rdtsc" : "=a" (lo), "=d" (hi) ); + return ( ( (stbir_uint64) hi ) << 32 ) | ( (stbir_uint64) lo ); + } + +#endif // msvc + +#elif defined( _M_ARM64 ) || defined( __aarch64__ ) || defined( __arm64__ ) || defined(__ARM_NEON__) + +#if defined( _MSC_VER ) && !defined(__clang__) + + #define STBIR_PROFILE_FUNC() _ReadStatusReg(ARM64_CNTVCT) + +#else + + static stbir__inline stbir_uint64 STBIR_PROFILE_FUNC() + { + stbir_uint64 tsc; + asm volatile("mrs %0, cntvct_el0" : "=r" (tsc)); + return tsc; + } + +#endif + +#else // x64, arm + +#error Unknown platform for profiling. + +#endif // x64, arm + +#endif // STBIR_PROFILE_FUNC + +#define STBIR_ONLY_PROFILE_GET_SPLIT_INFO ,stbir__per_split_info * split_info +#define STBIR_ONLY_PROFILE_SET_SPLIT_INFO ,split_info + +#define STBIR_ONLY_PROFILE_BUILD_GET_INFO ,stbir__info * profile_info +#define STBIR_ONLY_PROFILE_BUILD_SET_INFO ,profile_info + +// super light-weight micro profiler +#define STBIR_PROFILE_START_ll( info, wh ) { stbir_uint64 wh##thiszonetime = STBIR_PROFILE_FUNC(); stbir_uint64 * wh##save_parent_excluded_ptr = info->current_zone_excluded_ptr; stbir_uint64 wh##current_zone_excluded = 0; info->current_zone_excluded_ptr = &wh##current_zone_excluded; +#define STBIR_PROFILE_END_ll( info, wh ) wh##thiszonetime = STBIR_PROFILE_FUNC() - wh##thiszonetime; info->profile.named.wh += wh##thiszonetime - wh##current_zone_excluded; *wh##save_parent_excluded_ptr += wh##thiszonetime; info->current_zone_excluded_ptr = wh##save_parent_excluded_ptr; } +#define STBIR_PROFILE_FIRST_START_ll( info, wh ) { int i; info->current_zone_excluded_ptr = &info->profile.named.total; for(i=0;iprofile.array);i++) info->profile.array[i]=0; } STBIR_PROFILE_START_ll( info, wh ); +#define STBIR_PROFILE_CLEAR_EXTRAS_ll( info, num ) { int extra; for(extra=1;extra<(num);extra++) { int i; for(i=0;iprofile.array);i++) (info)[extra].profile.array[i]=0; } } + +// for thread data +#define STBIR_PROFILE_START( wh ) STBIR_PROFILE_START_ll( split_info, wh ) +#define STBIR_PROFILE_END( wh ) STBIR_PROFILE_END_ll( split_info, wh ) +#define STBIR_PROFILE_FIRST_START( wh ) STBIR_PROFILE_FIRST_START_ll( split_info, wh ) +#define STBIR_PROFILE_CLEAR_EXTRAS() STBIR_PROFILE_CLEAR_EXTRAS_ll( split_info, split_count ) + +// for build data +#define STBIR_PROFILE_BUILD_START( wh ) STBIR_PROFILE_START_ll( profile_info, wh ) +#define STBIR_PROFILE_BUILD_END( wh ) STBIR_PROFILE_END_ll( profile_info, wh ) +#define STBIR_PROFILE_BUILD_FIRST_START( wh ) STBIR_PROFILE_FIRST_START_ll( profile_info, wh ) +#define STBIR_PROFILE_BUILD_CLEAR( info ) { int i; for(i=0;iprofile.array);i++) info->profile.array[i]=0; } + +#else // no profile + +#define STBIR_ONLY_PROFILE_GET_SPLIT_INFO +#define STBIR_ONLY_PROFILE_SET_SPLIT_INFO + +#define STBIR_ONLY_PROFILE_BUILD_GET_INFO +#define STBIR_ONLY_PROFILE_BUILD_SET_INFO + +#define STBIR_PROFILE_START( wh ) +#define STBIR_PROFILE_END( wh ) +#define STBIR_PROFILE_FIRST_START( wh ) +#define STBIR_PROFILE_CLEAR_EXTRAS( ) + +#define STBIR_PROFILE_BUILD_START( wh ) +#define STBIR_PROFILE_BUILD_END( wh ) +#define STBIR_PROFILE_BUILD_FIRST_START( wh ) +#define STBIR_PROFILE_BUILD_CLEAR( info ) + +#endif // stbir_profile + +#ifndef STBIR_CEILF +#include +#if _MSC_VER <= 1200 // support VC6 for Sean +#define STBIR_CEILF(x) ((float)ceil((float)(x))) +#define STBIR_FLOORF(x) ((float)floor((float)(x))) +#else +#define STBIR_CEILF(x) ceilf(x) +#define STBIR_FLOORF(x) floorf(x) +#endif +#endif + +#ifndef STBIR_MEMCPY +// For memcpy +#include +#define STBIR_MEMCPY( dest, src, len ) memcpy( dest, src, len ) +#endif + +#ifndef STBIR_SIMD + +// memcpy that is specifically intentionally overlapping (src is smaller then dest, so can be +// a normal forward copy, bytes is divisible by 4 and bytes is greater than or equal to +// the diff between dest and src) +static void stbir_overlapping_memcpy( void * dest, void const * src, size_t bytes ) +{ + char STBIR_SIMD_STREAMOUT_PTR (*) sd = (char*) src; + char STBIR_SIMD_STREAMOUT_PTR( * ) s_end = ((char*) src) + bytes; + ptrdiff_t ofs_to_dest = (char*)dest - (char*)src; + + if ( ofs_to_dest >= 8 ) // is the overlap more than 8 away? + { + char STBIR_SIMD_STREAMOUT_PTR( * ) s_end8 = ((char*) src) + (bytes&~7); + STBIR_NO_UNROLL_LOOP_START + do + { + STBIR_NO_UNROLL(sd); + *(stbir_uint64*)( sd + ofs_to_dest ) = *(stbir_uint64*) sd; + sd += 8; + } while ( sd < s_end8 ); + + if ( sd == s_end ) + return; + } + + STBIR_NO_UNROLL_LOOP_START + do + { + STBIR_NO_UNROLL(sd); + *(int*)( sd + ofs_to_dest ) = *(int*) sd; + sd += 4; + } while ( sd < s_end ); +} + +#endif + +static float stbir__filter_trapezoid(float x, float scale, void * user_data) +{ + float halfscale = scale / 2; + float t = 0.5f + halfscale; + STBIR_ASSERT(scale <= 1); + STBIR__UNUSED(user_data); + + if ( x < 0.0f ) x = -x; + + if (x >= t) + return 0.0f; + else + { + float r = 0.5f - halfscale; + if (x <= r) + return 1.0f; + else + return (t - x) / scale; + } +} + +static float stbir__support_trapezoid(float scale, void * user_data) +{ + STBIR__UNUSED(user_data); + return 0.5f + scale / 2.0f; +} + +static float stbir__filter_triangle(float x, float s, void * user_data) +{ + STBIR__UNUSED(s); + STBIR__UNUSED(user_data); + + if ( x < 0.0f ) x = -x; + + if (x <= 1.0f) + return 1.0f - x; + else + return 0.0f; +} + +static float stbir__filter_point(float x, float s, void * user_data) +{ + STBIR__UNUSED(x); + STBIR__UNUSED(s); + STBIR__UNUSED(user_data); + + return 1.0f; +} + +static float stbir__filter_cubic(float x, float s, void * user_data) +{ + STBIR__UNUSED(s); + STBIR__UNUSED(user_data); + + if ( x < 0.0f ) x = -x; + + if (x < 1.0f) + return (4.0f + x*x*(3.0f*x - 6.0f))/6.0f; + else if (x < 2.0f) + return (8.0f + x*(-12.0f + x*(6.0f - x)))/6.0f; + + return (0.0f); +} + +static float stbir__filter_catmullrom(float x, float s, void * user_data) +{ + STBIR__UNUSED(s); + STBIR__UNUSED(user_data); + + if ( x < 0.0f ) x = -x; + + if (x < 1.0f) + return 1.0f - x*x*(2.5f - 1.5f*x); + else if (x < 2.0f) + return 2.0f - x*(4.0f + x*(0.5f*x - 2.5f)); + + return (0.0f); +} + +static float stbir__filter_mitchell(float x, float s, void * user_data) +{ + STBIR__UNUSED(s); + STBIR__UNUSED(user_data); + + if ( x < 0.0f ) x = -x; + + if (x < 1.0f) + return (16.0f + x*x*(21.0f * x - 36.0f))/18.0f; + else if (x < 2.0f) + return (32.0f + x*(-60.0f + x*(36.0f - 7.0f*x)))/18.0f; + + return (0.0f); +} + +static float stbir__support_zeropoint5(float s, void * user_data) +{ + STBIR__UNUSED(s); + STBIR__UNUSED(user_data); + return 0.5f; +} + +static float stbir__support_one(float s, void * user_data) +{ + STBIR__UNUSED(s); + STBIR__UNUSED(user_data); + return 1; +} + +static float stbir__support_two(float s, void * user_data) +{ + STBIR__UNUSED(s); + STBIR__UNUSED(user_data); + return 2; +} + +// This is the maximum number of input samples that can affect an output sample +// with the given filter from the output pixel's perspective +static int stbir__get_filter_pixel_width(stbir__support_callback * support, float scale, void * user_data) +{ + STBIR_ASSERT(support != 0); + + if ( scale >= ( 1.0f-stbir__small_float ) ) // upscale + return (int)STBIR_CEILF(support(1.0f/scale,user_data) * 2.0f); + else + return (int)STBIR_CEILF(support(scale,user_data) * 2.0f / scale); +} + +// this is how many coefficents per run of the filter (which is different +// from the filter_pixel_width depending on if we are scattering or gathering) +static int stbir__get_coefficient_width(stbir__sampler * samp, int is_gather, void * user_data) +{ + float scale = samp->scale_info.scale; + stbir__support_callback * support = samp->filter_support; + + switch( is_gather ) + { + case 1: + return (int)STBIR_CEILF(support(1.0f / scale, user_data) * 2.0f); + case 2: + return (int)STBIR_CEILF(support(scale, user_data) * 2.0f / scale); + case 0: + return (int)STBIR_CEILF(support(scale, user_data) * 2.0f); + default: + STBIR_ASSERT( (is_gather >= 0 ) && (is_gather <= 2 ) ); + return 0; + } +} + +static int stbir__get_contributors(stbir__sampler * samp, int is_gather) +{ + if (is_gather) + return samp->scale_info.output_sub_size; + else + return (samp->scale_info.input_full_size + samp->filter_pixel_margin * 2); +} + +static int stbir__edge_zero_full( int n, int max ) +{ + STBIR__UNUSED(n); + STBIR__UNUSED(max); + return 0; // NOTREACHED +} + +static int stbir__edge_clamp_full( int n, int max ) +{ + if (n < 0) + return 0; + + if (n >= max) + return max - 1; + + return n; // NOTREACHED +} + +static int stbir__edge_reflect_full( int n, int max ) +{ + if (n < 0) + { + if (n > -max) + return -n; + else + return max - 1; + } + + if (n >= max) + { + int max2 = max * 2; + if (n >= max2) + return 0; + else + return max2 - n - 1; + } + + return n; // NOTREACHED +} + +static int stbir__edge_wrap_full( int n, int max ) +{ + if (n >= 0) + return (n % max); + else + { + int m = (-n) % max; + + if (m != 0) + m = max - m; + + return (m); + } +} + +typedef int stbir__edge_wrap_func( int n, int max ); +static stbir__edge_wrap_func * stbir__edge_wrap_slow[] = +{ + stbir__edge_clamp_full, // STBIR_EDGE_CLAMP + stbir__edge_reflect_full, // STBIR_EDGE_REFLECT + stbir__edge_wrap_full, // STBIR_EDGE_WRAP + stbir__edge_zero_full, // STBIR_EDGE_ZERO +}; + +stbir__inline static int stbir__edge_wrap(stbir_edge edge, int n, int max) +{ + // avoid per-pixel switch + if (n >= 0 && n < max) + return n; + return stbir__edge_wrap_slow[edge]( n, max ); +} + +#define STBIR__MERGE_RUNS_PIXEL_THRESHOLD 16 + +// get information on the extents of a sampler +static void stbir__get_extents( stbir__sampler * samp, stbir__extents * scanline_extents ) +{ + int j, stop; + int left_margin, right_margin; + int min_n = 0x7fffffff, max_n = -0x7fffffff; + int min_left = 0x7fffffff, max_left = -0x7fffffff; + int min_right = 0x7fffffff, max_right = -0x7fffffff; + stbir_edge edge = samp->edge; + stbir__contributors* contributors = samp->contributors; + int output_sub_size = samp->scale_info.output_sub_size; + int input_full_size = samp->scale_info.input_full_size; + int filter_pixel_margin = samp->filter_pixel_margin; + + STBIR_ASSERT( samp->is_gather ); + + stop = output_sub_size; + for (j = 0; j < stop; j++ ) + { + STBIR_ASSERT( contributors[j].n1 >= contributors[j].n0 ); + if ( contributors[j].n0 < min_n ) + { + min_n = contributors[j].n0; + stop = j + filter_pixel_margin; // if we find a new min, only scan another filter width + if ( stop > output_sub_size ) stop = output_sub_size; + } + } + + stop = 0; + for (j = output_sub_size - 1; j >= stop; j-- ) + { + STBIR_ASSERT( contributors[j].n1 >= contributors[j].n0 ); + if ( contributors[j].n1 > max_n ) + { + max_n = contributors[j].n1; + stop = j - filter_pixel_margin; // if we find a new max, only scan another filter width + if (stop<0) stop = 0; + } + } + + STBIR_ASSERT( scanline_extents->conservative.n0 <= min_n ); + STBIR_ASSERT( scanline_extents->conservative.n1 >= max_n ); + + // now calculate how much into the margins we really read + left_margin = 0; + if ( min_n < 0 ) + { + left_margin = -min_n; + min_n = 0; + } + + right_margin = 0; + if ( max_n >= input_full_size ) + { + right_margin = max_n - input_full_size + 1; + max_n = input_full_size - 1; + } + + // index 1 is margin pixel extents (how many pixels we hang over the edge) + scanline_extents->edge_sizes[0] = left_margin; + scanline_extents->edge_sizes[1] = right_margin; + + // index 2 is pixels read from the input + scanline_extents->spans[0].n0 = min_n; + scanline_extents->spans[0].n1 = max_n; + scanline_extents->spans[0].pixel_offset_for_input = min_n; + + // default to no other input range + scanline_extents->spans[1].n0 = 0; + scanline_extents->spans[1].n1 = -1; + scanline_extents->spans[1].pixel_offset_for_input = 0; + + // don't have to do edge calc for zero clamp + if ( edge == STBIR_EDGE_ZERO ) + return; + + // convert margin pixels to the pixels within the input (min and max) + for( j = -left_margin ; j < 0 ; j++ ) + { + int p = stbir__edge_wrap( edge, j, input_full_size ); + if ( p < min_left ) + min_left = p; + if ( p > max_left ) + max_left = p; + } + + for( j = input_full_size ; j < (input_full_size + right_margin) ; j++ ) + { + int p = stbir__edge_wrap( edge, j, input_full_size ); + if ( p < min_right ) + min_right = p; + if ( p > max_right ) + max_right = p; + } + + // merge the left margin pixel region if it connects within 4 pixels of main pixel region + if ( min_left != 0x7fffffff ) + { + if ( ( ( min_left <= min_n ) && ( ( max_left + STBIR__MERGE_RUNS_PIXEL_THRESHOLD ) >= min_n ) ) || + ( ( min_n <= min_left ) && ( ( max_n + STBIR__MERGE_RUNS_PIXEL_THRESHOLD ) >= max_left ) ) ) + { + scanline_extents->spans[0].n0 = min_n = stbir__min( min_n, min_left ); + scanline_extents->spans[0].n1 = max_n = stbir__max( max_n, max_left ); + scanline_extents->spans[0].pixel_offset_for_input = min_n; + left_margin = 0; + } + } + + // merge the right margin pixel region if it connects within 4 pixels of main pixel region + if ( min_right != 0x7fffffff ) + { + if ( ( ( min_right <= min_n ) && ( ( max_right + STBIR__MERGE_RUNS_PIXEL_THRESHOLD ) >= min_n ) ) || + ( ( min_n <= min_right ) && ( ( max_n + STBIR__MERGE_RUNS_PIXEL_THRESHOLD ) >= max_right ) ) ) + { + scanline_extents->spans[0].n0 = min_n = stbir__min( min_n, min_right ); + scanline_extents->spans[0].n1 = max_n = stbir__max( max_n, max_right ); + scanline_extents->spans[0].pixel_offset_for_input = min_n; + right_margin = 0; + } + } + + STBIR_ASSERT( scanline_extents->conservative.n0 <= min_n ); + STBIR_ASSERT( scanline_extents->conservative.n1 >= max_n ); + + // you get two ranges when you have the WRAP edge mode and you are doing just the a piece of the resize + // so you need to get a second run of pixels from the opposite side of the scanline (which you + // wouldn't need except for WRAP) + + + // if we can't merge the min_left range, add it as a second range + if ( ( left_margin ) && ( min_left != 0x7fffffff ) ) + { + stbir__span * newspan = scanline_extents->spans + 1; + STBIR_ASSERT( right_margin == 0 ); + if ( min_left < scanline_extents->spans[0].n0 ) + { + scanline_extents->spans[1].pixel_offset_for_input = scanline_extents->spans[0].n0; + scanline_extents->spans[1].n0 = scanline_extents->spans[0].n0; + scanline_extents->spans[1].n1 = scanline_extents->spans[0].n1; + --newspan; + } + newspan->pixel_offset_for_input = min_left; + newspan->n0 = -left_margin; + newspan->n1 = ( max_left - min_left ) - left_margin; + scanline_extents->edge_sizes[0] = 0; // don't need to copy the left margin, since we are directly decoding into the margin + } + // if we can't merge the min_left range, add it as a second range + else + if ( ( right_margin ) && ( min_right != 0x7fffffff ) ) + { + stbir__span * newspan = scanline_extents->spans + 1; + if ( min_right < scanline_extents->spans[0].n0 ) + { + scanline_extents->spans[1].pixel_offset_for_input = scanline_extents->spans[0].n0; + scanline_extents->spans[1].n0 = scanline_extents->spans[0].n0; + scanline_extents->spans[1].n1 = scanline_extents->spans[0].n1; + --newspan; + } + newspan->pixel_offset_for_input = min_right; + newspan->n0 = scanline_extents->spans[1].n1 + 1; + newspan->n1 = scanline_extents->spans[1].n1 + 1 + ( max_right - min_right ); + scanline_extents->edge_sizes[1] = 0; // don't need to copy the right margin, since we are directly decoding into the margin + } + + // sort the spans into write output order + if ( ( scanline_extents->spans[1].n1 > scanline_extents->spans[1].n0 ) && ( scanline_extents->spans[0].n0 > scanline_extents->spans[1].n0 ) ) + { + stbir__span tspan = scanline_extents->spans[0]; + scanline_extents->spans[0] = scanline_extents->spans[1]; + scanline_extents->spans[1] = tspan; + } +} + +static void stbir__calculate_in_pixel_range( int * first_pixel, int * last_pixel, float out_pixel_center, float out_filter_radius, float inv_scale, float out_shift, int input_size, stbir_edge edge ) +{ + int first, last; + float out_pixel_influence_lowerbound = out_pixel_center - out_filter_radius; + float out_pixel_influence_upperbound = out_pixel_center + out_filter_radius; + + float in_pixel_influence_lowerbound = (out_pixel_influence_lowerbound + out_shift) * inv_scale; + float in_pixel_influence_upperbound = (out_pixel_influence_upperbound + out_shift) * inv_scale; + + first = (int)(STBIR_FLOORF(in_pixel_influence_lowerbound + 0.5f)); + last = (int)(STBIR_FLOORF(in_pixel_influence_upperbound - 0.5f)); + if ( last < first ) last = first; // point sample mode can span a value *right* at 0.5, and cause these to cross + + if ( edge == STBIR_EDGE_WRAP ) + { + if ( first < -input_size ) + first = -input_size; + if ( last >= (input_size*2)) + last = (input_size*2) - 1; + } + + *first_pixel = first; + *last_pixel = last; +} + +static void stbir__calculate_coefficients_for_gather_upsample( float out_filter_radius, stbir__kernel_callback * kernel, stbir__scale_info * scale_info, int num_contributors, stbir__contributors* contributors, float* coefficient_group, int coefficient_width, stbir_edge edge, void * user_data ) +{ + int n, end; + float inv_scale = scale_info->inv_scale; + float out_shift = scale_info->pixel_shift; + int input_size = scale_info->input_full_size; + int numerator = scale_info->scale_numerator; + int polyphase = ( ( scale_info->scale_is_rational ) && ( numerator < num_contributors ) ); + + // Looping through out pixels + end = num_contributors; if ( polyphase ) end = numerator; + for (n = 0; n < end; n++) + { + int i; + int last_non_zero; + float out_pixel_center = (float)n + 0.5f; + float in_center_of_out = (out_pixel_center + out_shift) * inv_scale; + + int in_first_pixel, in_last_pixel; + + stbir__calculate_in_pixel_range( &in_first_pixel, &in_last_pixel, out_pixel_center, out_filter_radius, inv_scale, out_shift, input_size, edge ); + + // make sure we never generate a range larger than our precalculated coeff width + // this only happens in point sample mode, but it's a good safe thing to do anyway + if ( ( in_last_pixel - in_first_pixel + 1 ) > coefficient_width ) + in_last_pixel = in_first_pixel + coefficient_width - 1; + + last_non_zero = -1; + for (i = 0; i <= in_last_pixel - in_first_pixel; i++) + { + float in_pixel_center = (float)(i + in_first_pixel) + 0.5f; + float coeff = kernel(in_center_of_out - in_pixel_center, inv_scale, user_data); + + // kill denormals + if ( ( ( coeff < stbir__small_float ) && ( coeff > -stbir__small_float ) ) ) + { + if ( i == 0 ) // if we're at the front, just eat zero contributors + { + STBIR_ASSERT ( ( in_last_pixel - in_first_pixel ) != 0 ); // there should be at least one contrib + ++in_first_pixel; + i--; + continue; + } + coeff = 0; // make sure is fully zero (should keep denormals away) + } + else + last_non_zero = i; + + coefficient_group[i] = coeff; + } + + in_last_pixel = last_non_zero+in_first_pixel; // kills trailing zeros + contributors->n0 = in_first_pixel; + contributors->n1 = in_last_pixel; + + STBIR_ASSERT(contributors->n1 >= contributors->n0); + + ++contributors; + coefficient_group += coefficient_width; + } +} + +static void stbir__insert_coeff( stbir__contributors * contribs, float * coeffs, int new_pixel, float new_coeff, int max_width ) +{ + if ( new_pixel <= contribs->n1 ) // before the end + { + if ( new_pixel < contribs->n0 ) // before the front? + { + if ( ( contribs->n1 - new_pixel + 1 ) <= max_width ) + { + int j, o = contribs->n0 - new_pixel; + for ( j = contribs->n1 - contribs->n0 ; j <= 0 ; j-- ) + coeffs[ j + o ] = coeffs[ j ]; + for ( j = 1 ; j < o ; j-- ) + coeffs[ j ] = coeffs[ 0 ]; + coeffs[ 0 ] = new_coeff; + contribs->n0 = new_pixel; + } + } + else + { + coeffs[ new_pixel - contribs->n0 ] += new_coeff; + } + } + else + { + if ( ( new_pixel - contribs->n0 + 1 ) <= max_width ) + { + int j, e = new_pixel - contribs->n0; + for( j = ( contribs->n1 - contribs->n0 ) + 1 ; j < e ; j++ ) // clear in-betweens coeffs if there are any + coeffs[j] = 0; + + coeffs[ e ] = new_coeff; + contribs->n1 = new_pixel; + } + } +} + +static void stbir__calculate_out_pixel_range( int * first_pixel, int * last_pixel, float in_pixel_center, float in_pixels_radius, float scale, float out_shift, int out_size ) +{ + float in_pixel_influence_lowerbound = in_pixel_center - in_pixels_radius; + float in_pixel_influence_upperbound = in_pixel_center + in_pixels_radius; + float out_pixel_influence_lowerbound = in_pixel_influence_lowerbound * scale - out_shift; + float out_pixel_influence_upperbound = in_pixel_influence_upperbound * scale - out_shift; + int out_first_pixel = (int)(STBIR_FLOORF(out_pixel_influence_lowerbound + 0.5f)); + int out_last_pixel = (int)(STBIR_FLOORF(out_pixel_influence_upperbound - 0.5f)); + + if ( out_first_pixel < 0 ) + out_first_pixel = 0; + if ( out_last_pixel >= out_size ) + out_last_pixel = out_size - 1; + *first_pixel = out_first_pixel; + *last_pixel = out_last_pixel; +} + +static void stbir__calculate_coefficients_for_gather_downsample( int start, int end, float in_pixels_radius, stbir__kernel_callback * kernel, stbir__scale_info * scale_info, int coefficient_width, int num_contributors, stbir__contributors * contributors, float * coefficient_group, void * user_data ) +{ + int in_pixel; + int i; + int first_out_inited = -1; + float scale = scale_info->scale; + float out_shift = scale_info->pixel_shift; + int out_size = scale_info->output_sub_size; + int numerator = scale_info->scale_numerator; + int polyphase = ( ( scale_info->scale_is_rational ) && ( numerator < out_size ) ); + + STBIR__UNUSED(num_contributors); + + // Loop through the input pixels + for (in_pixel = start; in_pixel < end; in_pixel++) + { + float in_pixel_center = (float)in_pixel + 0.5f; + float out_center_of_in = in_pixel_center * scale - out_shift; + int out_first_pixel, out_last_pixel; + + stbir__calculate_out_pixel_range( &out_first_pixel, &out_last_pixel, in_pixel_center, in_pixels_radius, scale, out_shift, out_size ); + + if ( out_first_pixel > out_last_pixel ) + continue; + + // clamp or exit if we are using polyphase filtering, and the limit is up + if ( polyphase ) + { + // when polyphase, you only have to do coeffs up to the numerator count + if ( out_first_pixel == numerator ) + break; + + // don't do any extra work, clamp last pixel at numerator too + if ( out_last_pixel >= numerator ) + out_last_pixel = numerator - 1; + } + + for (i = 0; i <= out_last_pixel - out_first_pixel; i++) + { + float out_pixel_center = (float)(i + out_first_pixel) + 0.5f; + float x = out_pixel_center - out_center_of_in; + float coeff = kernel(x, scale, user_data) * scale; + + // kill the coeff if it's too small (avoid denormals) + if ( ( ( coeff < stbir__small_float ) && ( coeff > -stbir__small_float ) ) ) + coeff = 0.0f; + + { + int out = i + out_first_pixel; + float * coeffs = coefficient_group + out * coefficient_width; + stbir__contributors * contribs = contributors + out; + + // is this the first time this output pixel has been seen? Init it. + if ( out > first_out_inited ) + { + STBIR_ASSERT( out == ( first_out_inited + 1 ) ); // ensure we have only advanced one at time + first_out_inited = out; + contribs->n0 = in_pixel; + contribs->n1 = in_pixel; + coeffs[0] = coeff; + } + else + { + // insert on end (always in order) + if ( coeffs[0] == 0.0f ) // if the first coefficent is zero, then zap it for this coeffs + { + STBIR_ASSERT( ( in_pixel - contribs->n0 ) == 1 ); // ensure that when we zap, we're at the 2nd pos + contribs->n0 = in_pixel; + } + contribs->n1 = in_pixel; + STBIR_ASSERT( ( in_pixel - contribs->n0 ) < coefficient_width ); + coeffs[in_pixel - contribs->n0] = coeff; + } + } + } + } +} + +#ifdef STBIR_RENORMALIZE_IN_FLOAT +#define STBIR_RENORM_TYPE float +#else +#define STBIR_RENORM_TYPE double +#endif + +static void stbir__cleanup_gathered_coefficients( stbir_edge edge, stbir__filter_extent_info* filter_info, stbir__scale_info * scale_info, int num_contributors, stbir__contributors* contributors, float * coefficient_group, int coefficient_width ) +{ + int input_size = scale_info->input_full_size; + int input_last_n1 = input_size - 1; + int n, end; + int lowest = 0x7fffffff; + int highest = -0x7fffffff; + int widest = -1; + int numerator = scale_info->scale_numerator; + int denominator = scale_info->scale_denominator; + int polyphase = ( ( scale_info->scale_is_rational ) && ( numerator < num_contributors ) ); + float * coeffs; + stbir__contributors * contribs; + + // weight all the coeffs for each sample + coeffs = coefficient_group; + contribs = contributors; + end = num_contributors; if ( polyphase ) end = numerator; + for (n = 0; n < end; n++) + { + int i; + STBIR_RENORM_TYPE filter_scale, total_filter = 0; + int e; + + // add all contribs + e = contribs->n1 - contribs->n0; + for( i = 0 ; i <= e ; i++ ) + { + total_filter += (STBIR_RENORM_TYPE) coeffs[i]; + STBIR_ASSERT( ( coeffs[i] >= -2.0f ) && ( coeffs[i] <= 2.0f ) ); // check for wonky weights + } + + // rescale + if ( ( total_filter < stbir__small_float ) && ( total_filter > -stbir__small_float ) ) + { + // all coeffs are extremely small, just zero it + contribs->n1 = contribs->n0; + coeffs[0] = 0.0f; + } + else + { + // if the total isn't 1.0, rescale everything + if ( ( total_filter < (1.0f-stbir__small_float) ) || ( total_filter > (1.0f+stbir__small_float) ) ) + { + filter_scale = ((STBIR_RENORM_TYPE)1.0) / total_filter; + + // scale them all + for (i = 0; i <= e; i++) + coeffs[i] = (float) ( coeffs[i] * filter_scale ); + } + } + ++contribs; + coeffs += coefficient_width; + } + + // if we have a rational for the scale, we can exploit the polyphaseness to not calculate + // most of the coefficients, so we copy them here + if ( polyphase ) + { + stbir__contributors * prev_contribs = contributors; + stbir__contributors * cur_contribs = contributors + numerator; + + for( n = numerator ; n < num_contributors ; n++ ) + { + cur_contribs->n0 = prev_contribs->n0 + denominator; + cur_contribs->n1 = prev_contribs->n1 + denominator; + ++cur_contribs; + ++prev_contribs; + } + stbir_overlapping_memcpy( coefficient_group + numerator * coefficient_width, coefficient_group, ( num_contributors - numerator ) * coefficient_width * sizeof( coeffs[ 0 ] ) ); + } + + coeffs = coefficient_group; + contribs = contributors; + + for (n = 0; n < num_contributors; n++) + { + int i; + + // in zero edge mode, just remove out of bounds contribs completely (since their weights are accounted for now) + if ( edge == STBIR_EDGE_ZERO ) + { + // shrink the right side if necessary + if ( contribs->n1 > input_last_n1 ) + contribs->n1 = input_last_n1; + + // shrink the left side + if ( contribs->n0 < 0 ) + { + int j, left, skips = 0; + + skips = -contribs->n0; + contribs->n0 = 0; + + // now move down the weights + left = contribs->n1 - contribs->n0 + 1; + if ( left > 0 ) + { + for( j = 0 ; j < left ; j++ ) + coeffs[ j ] = coeffs[ j + skips ]; + } + } + } + else if ( ( edge == STBIR_EDGE_CLAMP ) || ( edge == STBIR_EDGE_REFLECT ) ) + { + // for clamp and reflect, calculate the true inbounds position (based on edge type) and just add that to the existing weight + + // right hand side first + if ( contribs->n1 > input_last_n1 ) + { + int start = contribs->n0; + int endi = contribs->n1; + contribs->n1 = input_last_n1; + for( i = input_size; i <= endi; i++ ) + stbir__insert_coeff( contribs, coeffs, stbir__edge_wrap_slow[edge]( i, input_size ), coeffs[i-start], coefficient_width ); + } + + // now check left hand edge + if ( contribs->n0 < 0 ) + { + int save_n0; + float save_n0_coeff; + float * c = coeffs - ( contribs->n0 + 1 ); + + // reinsert the coeffs with it reflected or clamped (insert accumulates, if the coeffs exist) + for( i = -1 ; i > contribs->n0 ; i-- ) + stbir__insert_coeff( contribs, coeffs, stbir__edge_wrap_slow[edge]( i, input_size ), *c--, coefficient_width ); + save_n0 = contribs->n0; + save_n0_coeff = c[0]; // save it, since we didn't do the final one (i==n0), because there might be too many coeffs to hold (before we resize)! + + // now slide all the coeffs down (since we have accumulated them in the positive contribs) and reset the first contrib + contribs->n0 = 0; + for(i = 0 ; i <= contribs->n1 ; i++ ) + coeffs[i] = coeffs[i-save_n0]; + + // now that we have shrunk down the contribs, we insert the first one safely + stbir__insert_coeff( contribs, coeffs, stbir__edge_wrap_slow[edge]( save_n0, input_size ), save_n0_coeff, coefficient_width ); + } + } + + if ( contribs->n0 <= contribs->n1 ) + { + int diff = contribs->n1 - contribs->n0 + 1; + while ( diff && ( coeffs[ diff-1 ] == 0.0f ) ) + --diff; + + contribs->n1 = contribs->n0 + diff - 1; + + if ( contribs->n0 <= contribs->n1 ) + { + if ( contribs->n0 < lowest ) + lowest = contribs->n0; + if ( contribs->n1 > highest ) + highest = contribs->n1; + if ( diff > widest ) + widest = diff; + } + + // re-zero out unused coefficients (if any) + for( i = diff ; i < coefficient_width ; i++ ) + coeffs[i] = 0.0f; + } + + ++contribs; + coeffs += coefficient_width; + } + filter_info->lowest = lowest; + filter_info->highest = highest; + filter_info->widest = widest; +} + +#undef STBIR_RENORM_TYPE + +static int stbir__pack_coefficients( int num_contributors, stbir__contributors* contributors, float * coefficents, int coefficient_width, int widest, int row0, int row1 ) +{ + #define STBIR_MOVE_1( dest, src ) { STBIR_NO_UNROLL(dest); ((stbir_uint32*)(dest))[0] = ((stbir_uint32*)(src))[0]; } + #define STBIR_MOVE_2( dest, src ) { STBIR_NO_UNROLL(dest); ((stbir_uint64*)(dest))[0] = ((stbir_uint64*)(src))[0]; } + #ifdef STBIR_SIMD + #define STBIR_MOVE_4( dest, src ) { stbir__simdf t; STBIR_NO_UNROLL(dest); stbir__simdf_load( t, src ); stbir__simdf_store( dest, t ); } + #else + #define STBIR_MOVE_4( dest, src ) { STBIR_NO_UNROLL(dest); ((stbir_uint64*)(dest))[0] = ((stbir_uint64*)(src))[0]; ((stbir_uint64*)(dest))[1] = ((stbir_uint64*)(src))[1]; } + #endif + + int row_end = row1 + 1; + STBIR__UNUSED( row0 ); // only used in an assert + + if ( coefficient_width != widest ) + { + float * pc = coefficents; + float * coeffs = coefficents; + float * pc_end = coefficents + num_contributors * widest; + switch( widest ) + { + case 1: + STBIR_NO_UNROLL_LOOP_START + do { + STBIR_MOVE_1( pc, coeffs ); + ++pc; + coeffs += coefficient_width; + } while ( pc < pc_end ); + break; + case 2: + STBIR_NO_UNROLL_LOOP_START + do { + STBIR_MOVE_2( pc, coeffs ); + pc += 2; + coeffs += coefficient_width; + } while ( pc < pc_end ); + break; + case 3: + STBIR_NO_UNROLL_LOOP_START + do { + STBIR_MOVE_2( pc, coeffs ); + STBIR_MOVE_1( pc+2, coeffs+2 ); + pc += 3; + coeffs += coefficient_width; + } while ( pc < pc_end ); + break; + case 4: + STBIR_NO_UNROLL_LOOP_START + do { + STBIR_MOVE_4( pc, coeffs ); + pc += 4; + coeffs += coefficient_width; + } while ( pc < pc_end ); + break; + case 5: + STBIR_NO_UNROLL_LOOP_START + do { + STBIR_MOVE_4( pc, coeffs ); + STBIR_MOVE_1( pc+4, coeffs+4 ); + pc += 5; + coeffs += coefficient_width; + } while ( pc < pc_end ); + break; + case 6: + STBIR_NO_UNROLL_LOOP_START + do { + STBIR_MOVE_4( pc, coeffs ); + STBIR_MOVE_2( pc+4, coeffs+4 ); + pc += 6; + coeffs += coefficient_width; + } while ( pc < pc_end ); + break; + case 7: + STBIR_NO_UNROLL_LOOP_START + do { + STBIR_MOVE_4( pc, coeffs ); + STBIR_MOVE_2( pc+4, coeffs+4 ); + STBIR_MOVE_1( pc+6, coeffs+6 ); + pc += 7; + coeffs += coefficient_width; + } while ( pc < pc_end ); + break; + case 8: + STBIR_NO_UNROLL_LOOP_START + do { + STBIR_MOVE_4( pc, coeffs ); + STBIR_MOVE_4( pc+4, coeffs+4 ); + pc += 8; + coeffs += coefficient_width; + } while ( pc < pc_end ); + break; + case 9: + STBIR_NO_UNROLL_LOOP_START + do { + STBIR_MOVE_4( pc, coeffs ); + STBIR_MOVE_4( pc+4, coeffs+4 ); + STBIR_MOVE_1( pc+8, coeffs+8 ); + pc += 9; + coeffs += coefficient_width; + } while ( pc < pc_end ); + break; + case 10: + STBIR_NO_UNROLL_LOOP_START + do { + STBIR_MOVE_4( pc, coeffs ); + STBIR_MOVE_4( pc+4, coeffs+4 ); + STBIR_MOVE_2( pc+8, coeffs+8 ); + pc += 10; + coeffs += coefficient_width; + } while ( pc < pc_end ); + break; + case 11: + STBIR_NO_UNROLL_LOOP_START + do { + STBIR_MOVE_4( pc, coeffs ); + STBIR_MOVE_4( pc+4, coeffs+4 ); + STBIR_MOVE_2( pc+8, coeffs+8 ); + STBIR_MOVE_1( pc+10, coeffs+10 ); + pc += 11; + coeffs += coefficient_width; + } while ( pc < pc_end ); + break; + case 12: + STBIR_NO_UNROLL_LOOP_START + do { + STBIR_MOVE_4( pc, coeffs ); + STBIR_MOVE_4( pc+4, coeffs+4 ); + STBIR_MOVE_4( pc+8, coeffs+8 ); + pc += 12; + coeffs += coefficient_width; + } while ( pc < pc_end ); + break; + default: + STBIR_NO_UNROLL_LOOP_START + do { + float * copy_end = pc + widest - 4; + float * c = coeffs; + do { + STBIR_NO_UNROLL( pc ); + STBIR_MOVE_4( pc, c ); + pc += 4; + c += 4; + } while ( pc <= copy_end ); + copy_end += 4; + STBIR_NO_UNROLL_LOOP_START + while ( pc < copy_end ) + { + STBIR_MOVE_1( pc, c ); + ++pc; ++c; + } + coeffs += coefficient_width; + } while ( pc < pc_end ); + break; + } + } + + // some horizontal routines read one float off the end (which is then masked off), so put in a sentinal so we don't read an snan or denormal + coefficents[ widest * num_contributors ] = 8888.0f; + + // the minimum we might read for unrolled filters widths is 12. So, we need to + // make sure we never read outside the decode buffer, by possibly moving + // the sample area back into the scanline, and putting zeros weights first. + // we start on the right edge and check until we're well past the possible + // clip area (2*widest). + { + stbir__contributors * contribs = contributors + num_contributors - 1; + float * coeffs = coefficents + widest * ( num_contributors - 1 ); + + // go until no chance of clipping (this is usually less than 8 lops) + while ( ( contribs >= contributors ) && ( ( contribs->n0 + widest*2 ) >= row_end ) ) + { + // might we clip?? + if ( ( contribs->n0 + widest ) > row_end ) + { + int stop_range = widest; + + // if range is larger than 12, it will be handled by generic loops that can terminate on the exact length + // of this contrib n1, instead of a fixed widest amount - so calculate this + if ( widest > 12 ) + { + int mod; + + // how far will be read in the n_coeff loop (which depends on the widest count mod4); + mod = widest & 3; + stop_range = ( ( ( contribs->n1 - contribs->n0 + 1 ) - mod + 3 ) & ~3 ) + mod; + + // the n_coeff loops do a minimum amount of coeffs, so factor that in! + if ( stop_range < ( 8 + mod ) ) stop_range = 8 + mod; + } + + // now see if we still clip with the refined range + if ( ( contribs->n0 + stop_range ) > row_end ) + { + int new_n0 = row_end - stop_range; + int num = contribs->n1 - contribs->n0 + 1; + int backup = contribs->n0 - new_n0; + float * from_co = coeffs + num - 1; + float * to_co = from_co + backup; + + STBIR_ASSERT( ( new_n0 >= row0 ) && ( new_n0 < contribs->n0 ) ); + + // move the coeffs over + while( num ) + { + *to_co-- = *from_co--; + --num; + } + // zero new positions + while ( to_co >= coeffs ) + *to_co-- = 0; + // set new start point + contribs->n0 = new_n0; + if ( widest > 12 ) + { + int mod; + + // how far will be read in the n_coeff loop (which depends on the widest count mod4); + mod = widest & 3; + stop_range = ( ( ( contribs->n1 - contribs->n0 + 1 ) - mod + 3 ) & ~3 ) + mod; + + // the n_coeff loops do a minimum amount of coeffs, so factor that in! + if ( stop_range < ( 8 + mod ) ) stop_range = 8 + mod; + } + } + } + --contribs; + coeffs -= widest; + } + } + + return widest; + #undef STBIR_MOVE_1 + #undef STBIR_MOVE_2 + #undef STBIR_MOVE_4 +} + +static void stbir__calculate_filters( stbir__sampler * samp, stbir__sampler * other_axis_for_pivot, void * user_data STBIR_ONLY_PROFILE_BUILD_GET_INFO ) +{ + int n; + float scale = samp->scale_info.scale; + stbir__kernel_callback * kernel = samp->filter_kernel; + stbir__support_callback * support = samp->filter_support; + float inv_scale = samp->scale_info.inv_scale; + int input_full_size = samp->scale_info.input_full_size; + int gather_num_contributors = samp->num_contributors; + stbir__contributors* gather_contributors = samp->contributors; + float * gather_coeffs = samp->coefficients; + int gather_coefficient_width = samp->coefficient_width; + + switch ( samp->is_gather ) + { + case 1: // gather upsample + { + float out_pixels_radius = support(inv_scale,user_data) * scale; + + stbir__calculate_coefficients_for_gather_upsample( out_pixels_radius, kernel, &samp->scale_info, gather_num_contributors, gather_contributors, gather_coeffs, gather_coefficient_width, samp->edge, user_data ); + + STBIR_PROFILE_BUILD_START( cleanup ); + stbir__cleanup_gathered_coefficients( samp->edge, &samp->extent_info, &samp->scale_info, gather_num_contributors, gather_contributors, gather_coeffs, gather_coefficient_width ); + STBIR_PROFILE_BUILD_END( cleanup ); + } + break; + + case 0: // scatter downsample (only on vertical) + case 2: // gather downsample + { + float in_pixels_radius = support(scale,user_data) * inv_scale; + int filter_pixel_margin = samp->filter_pixel_margin; + int input_end = input_full_size + filter_pixel_margin; + + // if this is a scatter, we do a downsample gather to get the coeffs, and then pivot after + if ( !samp->is_gather ) + { + // check if we are using the same gather downsample on the horizontal as this vertical, + // if so, then we don't have to generate them, we can just pivot from the horizontal. + if ( other_axis_for_pivot ) + { + gather_contributors = other_axis_for_pivot->contributors; + gather_coeffs = other_axis_for_pivot->coefficients; + gather_coefficient_width = other_axis_for_pivot->coefficient_width; + gather_num_contributors = other_axis_for_pivot->num_contributors; + samp->extent_info.lowest = other_axis_for_pivot->extent_info.lowest; + samp->extent_info.highest = other_axis_for_pivot->extent_info.highest; + samp->extent_info.widest = other_axis_for_pivot->extent_info.widest; + goto jump_right_to_pivot; + } + + gather_contributors = samp->gather_prescatter_contributors; + gather_coeffs = samp->gather_prescatter_coefficients; + gather_coefficient_width = samp->gather_prescatter_coefficient_width; + gather_num_contributors = samp->gather_prescatter_num_contributors; + } + + stbir__calculate_coefficients_for_gather_downsample( -filter_pixel_margin, input_end, in_pixels_radius, kernel, &samp->scale_info, gather_coefficient_width, gather_num_contributors, gather_contributors, gather_coeffs, user_data ); + + STBIR_PROFILE_BUILD_START( cleanup ); + stbir__cleanup_gathered_coefficients( samp->edge, &samp->extent_info, &samp->scale_info, gather_num_contributors, gather_contributors, gather_coeffs, gather_coefficient_width ); + STBIR_PROFILE_BUILD_END( cleanup ); + + if ( !samp->is_gather ) + { + // if this is a scatter (vertical only), then we need to pivot the coeffs + stbir__contributors * scatter_contributors; + int highest_set; + + jump_right_to_pivot: + + STBIR_PROFILE_BUILD_START( pivot ); + + highest_set = (-filter_pixel_margin) - 1; + for (n = 0; n < gather_num_contributors; n++) + { + int k; + int gn0 = gather_contributors->n0, gn1 = gather_contributors->n1; + int scatter_coefficient_width = samp->coefficient_width; + float * scatter_coeffs = samp->coefficients + ( gn0 + filter_pixel_margin ) * scatter_coefficient_width; + float * g_coeffs = gather_coeffs; + scatter_contributors = samp->contributors + ( gn0 + filter_pixel_margin ); + + for (k = gn0 ; k <= gn1 ; k++ ) + { + float gc = *g_coeffs++; + + // skip zero and denormals - must skip zeros to avoid adding coeffs beyond scatter_coefficient_width + // (which happens when pivoting from horizontal, which might have dummy zeros) + if ( ( ( gc >= stbir__small_float ) || ( gc <= -stbir__small_float ) ) ) + { + if ( ( k > highest_set ) || ( scatter_contributors->n0 > scatter_contributors->n1 ) ) + { + { + // if we are skipping over several contributors, we need to clear the skipped ones + stbir__contributors * clear_contributors = samp->contributors + ( highest_set + filter_pixel_margin + 1); + while ( clear_contributors < scatter_contributors ) + { + clear_contributors->n0 = 0; + clear_contributors->n1 = -1; + ++clear_contributors; + } + } + scatter_contributors->n0 = n; + scatter_contributors->n1 = n; + scatter_coeffs[0] = gc; + highest_set = k; + } + else + { + stbir__insert_coeff( scatter_contributors, scatter_coeffs, n, gc, scatter_coefficient_width ); + } + STBIR_ASSERT( ( scatter_contributors->n1 - scatter_contributors->n0 + 1 ) <= scatter_coefficient_width ); + } + ++scatter_contributors; + scatter_coeffs += scatter_coefficient_width; + } + + ++gather_contributors; + gather_coeffs += gather_coefficient_width; + } + + // now clear any unset contribs + { + stbir__contributors * clear_contributors = samp->contributors + ( highest_set + filter_pixel_margin + 1); + stbir__contributors * end_contributors = samp->contributors + samp->num_contributors; + while ( clear_contributors < end_contributors ) + { + clear_contributors->n0 = 0; + clear_contributors->n1 = -1; + ++clear_contributors; + } + } + + STBIR_PROFILE_BUILD_END( pivot ); + } + } + break; + } +} + + +//======================================================================================================== +// scanline decoders and encoders + +#define stbir__coder_min_num 1 +#define STB_IMAGE_RESIZE_DO_CODERS +#include STBIR__HEADER_FILENAME + +#define stbir__decode_suffix BGRA +#define stbir__decode_swizzle +#define stbir__decode_order0 2 +#define stbir__decode_order1 1 +#define stbir__decode_order2 0 +#define stbir__decode_order3 3 +#define stbir__encode_order0 2 +#define stbir__encode_order1 1 +#define stbir__encode_order2 0 +#define stbir__encode_order3 3 +#define stbir__coder_min_num 4 +#define STB_IMAGE_RESIZE_DO_CODERS +#include STBIR__HEADER_FILENAME + +#define stbir__decode_suffix ARGB +#define stbir__decode_swizzle +#define stbir__decode_order0 1 +#define stbir__decode_order1 2 +#define stbir__decode_order2 3 +#define stbir__decode_order3 0 +#define stbir__encode_order0 3 +#define stbir__encode_order1 0 +#define stbir__encode_order2 1 +#define stbir__encode_order3 2 +#define stbir__coder_min_num 4 +#define STB_IMAGE_RESIZE_DO_CODERS +#include STBIR__HEADER_FILENAME + +#define stbir__decode_suffix ABGR +#define stbir__decode_swizzle +#define stbir__decode_order0 3 +#define stbir__decode_order1 2 +#define stbir__decode_order2 1 +#define stbir__decode_order3 0 +#define stbir__encode_order0 3 +#define stbir__encode_order1 2 +#define stbir__encode_order2 1 +#define stbir__encode_order3 0 +#define stbir__coder_min_num 4 +#define STB_IMAGE_RESIZE_DO_CODERS +#include STBIR__HEADER_FILENAME + +#define stbir__decode_suffix AR +#define stbir__decode_swizzle +#define stbir__decode_order0 1 +#define stbir__decode_order1 0 +#define stbir__decode_order2 3 +#define stbir__decode_order3 2 +#define stbir__encode_order0 1 +#define stbir__encode_order1 0 +#define stbir__encode_order2 3 +#define stbir__encode_order3 2 +#define stbir__coder_min_num 2 +#define STB_IMAGE_RESIZE_DO_CODERS +#include STBIR__HEADER_FILENAME + + +// fancy alpha means we expand to keep both premultipied and non-premultiplied color channels +static void stbir__fancy_alpha_weight_4ch( float * out_buffer, int width_times_channels ) +{ + float STBIR_STREAMOUT_PTR(*) out = out_buffer; + float const * end_decode = out_buffer + ( width_times_channels / 4 ) * 7; // decode buffer aligned to end of out_buffer + float STBIR_STREAMOUT_PTR(*) decode = (float*)end_decode - width_times_channels; + + // fancy alpha is stored internally as R G B A Rpm Gpm Bpm + + #ifdef STBIR_SIMD + + #ifdef STBIR_SIMD8 + decode += 16; + STBIR_NO_UNROLL_LOOP_START + while ( decode <= end_decode ) + { + stbir__simdf8 d0,d1,a0,a1,p0,p1; + STBIR_NO_UNROLL(decode); + stbir__simdf8_load( d0, decode-16 ); + stbir__simdf8_load( d1, decode-16+8 ); + stbir__simdf8_0123to33333333( a0, d0 ); + stbir__simdf8_0123to33333333( a1, d1 ); + stbir__simdf8_mult( p0, a0, d0 ); + stbir__simdf8_mult( p1, a1, d1 ); + stbir__simdf8_bot4s( a0, d0, p0 ); + stbir__simdf8_bot4s( a1, d1, p1 ); + stbir__simdf8_top4s( d0, d0, p0 ); + stbir__simdf8_top4s( d1, d1, p1 ); + stbir__simdf8_store ( out, a0 ); + stbir__simdf8_store ( out+7, d0 ); + stbir__simdf8_store ( out+14, a1 ); + stbir__simdf8_store ( out+21, d1 ); + decode += 16; + out += 28; + } + decode -= 16; + #else + decode += 8; + STBIR_NO_UNROLL_LOOP_START + while ( decode <= end_decode ) + { + stbir__simdf d0,a0,d1,a1,p0,p1; + STBIR_NO_UNROLL(decode); + stbir__simdf_load( d0, decode-8 ); + stbir__simdf_load( d1, decode-8+4 ); + stbir__simdf_0123to3333( a0, d0 ); + stbir__simdf_0123to3333( a1, d1 ); + stbir__simdf_mult( p0, a0, d0 ); + stbir__simdf_mult( p1, a1, d1 ); + stbir__simdf_store ( out, d0 ); + stbir__simdf_store ( out+4, p0 ); + stbir__simdf_store ( out+7, d1 ); + stbir__simdf_store ( out+7+4, p1 ); + decode += 8; + out += 14; + } + decode -= 8; + #endif + + // might be one last odd pixel + #ifdef STBIR_SIMD8 + STBIR_NO_UNROLL_LOOP_START + while ( decode < end_decode ) + #else + if ( decode < end_decode ) + #endif + { + stbir__simdf d,a,p; + STBIR_NO_UNROLL(decode); + stbir__simdf_load( d, decode ); + stbir__simdf_0123to3333( a, d ); + stbir__simdf_mult( p, a, d ); + stbir__simdf_store ( out, d ); + stbir__simdf_store ( out+4, p ); + decode += 4; + out += 7; + } + + #else + + while( decode < end_decode ) + { + float r = decode[0], g = decode[1], b = decode[2], alpha = decode[3]; + out[0] = r; + out[1] = g; + out[2] = b; + out[3] = alpha; + out[4] = r * alpha; + out[5] = g * alpha; + out[6] = b * alpha; + out += 7; + decode += 4; + } + + #endif +} + +static void stbir__fancy_alpha_weight_2ch( float * out_buffer, int width_times_channels ) +{ + float STBIR_STREAMOUT_PTR(*) out = out_buffer; + float const * end_decode = out_buffer + ( width_times_channels / 2 ) * 3; + float STBIR_STREAMOUT_PTR(*) decode = (float*)end_decode - width_times_channels; + + // for fancy alpha, turns into: [X A Xpm][X A Xpm],etc + + #ifdef STBIR_SIMD + + decode += 8; + if ( decode <= end_decode ) + { + STBIR_NO_UNROLL_LOOP_START + do { + #ifdef STBIR_SIMD8 + stbir__simdf8 d0,a0,p0; + STBIR_NO_UNROLL(decode); + stbir__simdf8_load( d0, decode-8 ); + stbir__simdf8_0123to11331133( p0, d0 ); + stbir__simdf8_0123to00220022( a0, d0 ); + stbir__simdf8_mult( p0, p0, a0 ); + + stbir__simdf_store2( out, stbir__if_simdf8_cast_to_simdf4( d0 ) ); + stbir__simdf_store( out+2, stbir__if_simdf8_cast_to_simdf4( p0 ) ); + stbir__simdf_store2h( out+3, stbir__if_simdf8_cast_to_simdf4( d0 ) ); + + stbir__simdf_store2( out+6, stbir__simdf8_gettop4( d0 ) ); + stbir__simdf_store( out+8, stbir__simdf8_gettop4( p0 ) ); + stbir__simdf_store2h( out+9, stbir__simdf8_gettop4( d0 ) ); + #else + stbir__simdf d0,a0,d1,a1,p0,p1; + STBIR_NO_UNROLL(decode); + stbir__simdf_load( d0, decode-8 ); + stbir__simdf_load( d1, decode-8+4 ); + stbir__simdf_0123to1133( p0, d0 ); + stbir__simdf_0123to1133( p1, d1 ); + stbir__simdf_0123to0022( a0, d0 ); + stbir__simdf_0123to0022( a1, d1 ); + stbir__simdf_mult( p0, p0, a0 ); + stbir__simdf_mult( p1, p1, a1 ); + + stbir__simdf_store2( out, d0 ); + stbir__simdf_store( out+2, p0 ); + stbir__simdf_store2h( out+3, d0 ); + + stbir__simdf_store2( out+6, d1 ); + stbir__simdf_store( out+8, p1 ); + stbir__simdf_store2h( out+9, d1 ); + #endif + decode += 8; + out += 12; + } while ( decode <= end_decode ); + } + decode -= 8; + #endif + + STBIR_SIMD_NO_UNROLL_LOOP_START + while( decode < end_decode ) + { + float x = decode[0], y = decode[1]; + STBIR_SIMD_NO_UNROLL(decode); + out[0] = x; + out[1] = y; + out[2] = x * y; + out += 3; + decode += 2; + } +} + +static void stbir__fancy_alpha_unweight_4ch( float * encode_buffer, int width_times_channels ) +{ + float STBIR_SIMD_STREAMOUT_PTR(*) encode = encode_buffer; + float STBIR_SIMD_STREAMOUT_PTR(*) input = encode_buffer; + float const * end_output = encode_buffer + width_times_channels; + + // fancy RGBA is stored internally as R G B A Rpm Gpm Bpm + + STBIR_SIMD_NO_UNROLL_LOOP_START + do { + float alpha = input[3]; +#ifdef STBIR_SIMD + stbir__simdf i,ia; + STBIR_SIMD_NO_UNROLL(encode); + if ( alpha < stbir__small_float ) + { + stbir__simdf_load( i, input ); + stbir__simdf_store( encode, i ); + } + else + { + stbir__simdf_load1frep4( ia, 1.0f / alpha ); + stbir__simdf_load( i, input+4 ); + stbir__simdf_mult( i, i, ia ); + stbir__simdf_store( encode, i ); + encode[3] = alpha; + } +#else + if ( alpha < stbir__small_float ) + { + encode[0] = input[0]; + encode[1] = input[1]; + encode[2] = input[2]; + } + else + { + float ialpha = 1.0f / alpha; + encode[0] = input[4] * ialpha; + encode[1] = input[5] * ialpha; + encode[2] = input[6] * ialpha; + } + encode[3] = alpha; +#endif + + input += 7; + encode += 4; + } while ( encode < end_output ); +} + +// format: [X A Xpm][X A Xpm] etc +static void stbir__fancy_alpha_unweight_2ch( float * encode_buffer, int width_times_channels ) +{ + float STBIR_SIMD_STREAMOUT_PTR(*) encode = encode_buffer; + float STBIR_SIMD_STREAMOUT_PTR(*) input = encode_buffer; + float const * end_output = encode_buffer + width_times_channels; + + do { + float alpha = input[1]; + encode[0] = input[0]; + if ( alpha >= stbir__small_float ) + encode[0] = input[2] / alpha; + encode[1] = alpha; + + input += 3; + encode += 2; + } while ( encode < end_output ); +} + +static void stbir__simple_alpha_weight_4ch( float * decode_buffer, int width_times_channels ) +{ + float STBIR_STREAMOUT_PTR(*) decode = decode_buffer; + float const * end_decode = decode_buffer + width_times_channels; + + #ifdef STBIR_SIMD + { + decode += 2 * stbir__simdfX_float_count; + STBIR_NO_UNROLL_LOOP_START + while ( decode <= end_decode ) + { + stbir__simdfX d0,a0,d1,a1; + STBIR_NO_UNROLL(decode); + stbir__simdfX_load( d0, decode-2*stbir__simdfX_float_count ); + stbir__simdfX_load( d1, decode-2*stbir__simdfX_float_count+stbir__simdfX_float_count ); + stbir__simdfX_aaa1( a0, d0, STBIR_onesX ); + stbir__simdfX_aaa1( a1, d1, STBIR_onesX ); + stbir__simdfX_mult( d0, d0, a0 ); + stbir__simdfX_mult( d1, d1, a1 ); + stbir__simdfX_store ( decode-2*stbir__simdfX_float_count, d0 ); + stbir__simdfX_store ( decode-2*stbir__simdfX_float_count+stbir__simdfX_float_count, d1 ); + decode += 2 * stbir__simdfX_float_count; + } + decode -= 2 * stbir__simdfX_float_count; + + // few last pixels remnants + #ifdef STBIR_SIMD8 + STBIR_NO_UNROLL_LOOP_START + while ( decode < end_decode ) + #else + if ( decode < end_decode ) + #endif + { + stbir__simdf d,a; + stbir__simdf_load( d, decode ); + stbir__simdf_aaa1( a, d, STBIR__CONSTF(STBIR_ones) ); + stbir__simdf_mult( d, d, a ); + stbir__simdf_store ( decode, d ); + decode += 4; + } + } + + #else + + while( decode < end_decode ) + { + float alpha = decode[3]; + decode[0] *= alpha; + decode[1] *= alpha; + decode[2] *= alpha; + decode += 4; + } + + #endif +} + +static void stbir__simple_alpha_weight_2ch( float * decode_buffer, int width_times_channels ) +{ + float STBIR_STREAMOUT_PTR(*) decode = decode_buffer; + float const * end_decode = decode_buffer + width_times_channels; + + #ifdef STBIR_SIMD + decode += 2 * stbir__simdfX_float_count; + STBIR_NO_UNROLL_LOOP_START + while ( decode <= end_decode ) + { + stbir__simdfX d0,a0,d1,a1; + STBIR_NO_UNROLL(decode); + stbir__simdfX_load( d0, decode-2*stbir__simdfX_float_count ); + stbir__simdfX_load( d1, decode-2*stbir__simdfX_float_count+stbir__simdfX_float_count ); + stbir__simdfX_a1a1( a0, d0, STBIR_onesX ); + stbir__simdfX_a1a1( a1, d1, STBIR_onesX ); + stbir__simdfX_mult( d0, d0, a0 ); + stbir__simdfX_mult( d1, d1, a1 ); + stbir__simdfX_store ( decode-2*stbir__simdfX_float_count, d0 ); + stbir__simdfX_store ( decode-2*stbir__simdfX_float_count+stbir__simdfX_float_count, d1 ); + decode += 2 * stbir__simdfX_float_count; + } + decode -= 2 * stbir__simdfX_float_count; + #endif + + STBIR_SIMD_NO_UNROLL_LOOP_START + while( decode < end_decode ) + { + float alpha = decode[1]; + STBIR_SIMD_NO_UNROLL(decode); + decode[0] *= alpha; + decode += 2; + } +} + +static void stbir__simple_alpha_unweight_4ch( float * encode_buffer, int width_times_channels ) +{ + float STBIR_SIMD_STREAMOUT_PTR(*) encode = encode_buffer; + float const * end_output = encode_buffer + width_times_channels; + + STBIR_SIMD_NO_UNROLL_LOOP_START + do { + float alpha = encode[3]; + +#ifdef STBIR_SIMD + stbir__simdf i,ia; + STBIR_SIMD_NO_UNROLL(encode); + if ( alpha >= stbir__small_float ) + { + stbir__simdf_load1frep4( ia, 1.0f / alpha ); + stbir__simdf_load( i, encode ); + stbir__simdf_mult( i, i, ia ); + stbir__simdf_store( encode, i ); + encode[3] = alpha; + } +#else + if ( alpha >= stbir__small_float ) + { + float ialpha = 1.0f / alpha; + encode[0] *= ialpha; + encode[1] *= ialpha; + encode[2] *= ialpha; + } +#endif + encode += 4; + } while ( encode < end_output ); +} + +static void stbir__simple_alpha_unweight_2ch( float * encode_buffer, int width_times_channels ) +{ + float STBIR_SIMD_STREAMOUT_PTR(*) encode = encode_buffer; + float const * end_output = encode_buffer + width_times_channels; + + do { + float alpha = encode[1]; + if ( alpha >= stbir__small_float ) + encode[0] /= alpha; + encode += 2; + } while ( encode < end_output ); +} + + +// only used in RGB->BGR or BGR->RGB +static void stbir__simple_flip_3ch( float * decode_buffer, int width_times_channels ) +{ + float STBIR_STREAMOUT_PTR(*) decode = decode_buffer; + float const * end_decode = decode_buffer + width_times_channels; + +#ifdef STBIR_SIMD + #ifdef stbir__simdf_swiz2 // do we have two argument swizzles? + end_decode -= 12; + STBIR_NO_UNROLL_LOOP_START + while( decode <= end_decode ) + { + // on arm64 8 instructions, no overlapping stores + stbir__simdf a,b,c,na,nb; + STBIR_SIMD_NO_UNROLL(decode); + stbir__simdf_load( a, decode ); + stbir__simdf_load( b, decode+4 ); + stbir__simdf_load( c, decode+8 ); + + na = stbir__simdf_swiz2( a, b, 2, 1, 0, 5 ); + b = stbir__simdf_swiz2( a, b, 4, 3, 6, 7 ); + nb = stbir__simdf_swiz2( b, c, 0, 1, 4, 3 ); + c = stbir__simdf_swiz2( b, c, 2, 7, 6, 5 ); + + stbir__simdf_store( decode, na ); + stbir__simdf_store( decode+4, nb ); + stbir__simdf_store( decode+8, c ); + decode += 12; + } + end_decode += 12; + #else + end_decode -= 24; + STBIR_NO_UNROLL_LOOP_START + while( decode <= end_decode ) + { + // 26 instructions on x64 + stbir__simdf a,b,c,d,e,f,g; + float i21, i23; + STBIR_SIMD_NO_UNROLL(decode); + stbir__simdf_load( a, decode ); + stbir__simdf_load( b, decode+3 ); + stbir__simdf_load( c, decode+6 ); + stbir__simdf_load( d, decode+9 ); + stbir__simdf_load( e, decode+12 ); + stbir__simdf_load( f, decode+15 ); + stbir__simdf_load( g, decode+18 ); + + a = stbir__simdf_swiz( a, 2, 1, 0, 3 ); + b = stbir__simdf_swiz( b, 2, 1, 0, 3 ); + c = stbir__simdf_swiz( c, 2, 1, 0, 3 ); + d = stbir__simdf_swiz( d, 2, 1, 0, 3 ); + e = stbir__simdf_swiz( e, 2, 1, 0, 3 ); + f = stbir__simdf_swiz( f, 2, 1, 0, 3 ); + g = stbir__simdf_swiz( g, 2, 1, 0, 3 ); + + // stores overlap, need to be in order, + stbir__simdf_store( decode, a ); + i21 = decode[21]; + stbir__simdf_store( decode+3, b ); + i23 = decode[23]; + stbir__simdf_store( decode+6, c ); + stbir__simdf_store( decode+9, d ); + stbir__simdf_store( decode+12, e ); + stbir__simdf_store( decode+15, f ); + stbir__simdf_store( decode+18, g ); + decode[21] = i23; + decode[23] = i21; + decode += 24; + } + end_decode += 24; + #endif +#else + end_decode -= 12; + STBIR_NO_UNROLL_LOOP_START + while( decode <= end_decode ) + { + // 16 instructions + float t0,t1,t2,t3; + STBIR_NO_UNROLL(decode); + t0 = decode[0]; t1 = decode[3]; t2 = decode[6]; t3 = decode[9]; + decode[0] = decode[2]; decode[3] = decode[5]; decode[6] = decode[8]; decode[9] = decode[11]; + decode[2] = t0; decode[5] = t1; decode[8] = t2; decode[11] = t3; + decode += 12; + } + end_decode += 12; +#endif + + STBIR_NO_UNROLL_LOOP_START + while( decode < end_decode ) + { + float t = decode[0]; + STBIR_NO_UNROLL(decode); + decode[0] = decode[2]; + decode[2] = t; + decode += 3; + } +} + + + +static void stbir__decode_scanline(stbir__info const * stbir_info, int n, float * output_buffer STBIR_ONLY_PROFILE_GET_SPLIT_INFO ) +{ + int channels = stbir_info->channels; + int effective_channels = stbir_info->effective_channels; + int input_sample_in_bytes = stbir__type_size[stbir_info->input_type] * channels; + stbir_edge edge_horizontal = stbir_info->horizontal.edge; + stbir_edge edge_vertical = stbir_info->vertical.edge; + int row = stbir__edge_wrap(edge_vertical, n, stbir_info->vertical.scale_info.input_full_size); + const void* input_plane_data = ( (char *) stbir_info->input_data ) + (size_t)row * (size_t) stbir_info->input_stride_bytes; + stbir__span const * spans = stbir_info->scanline_extents.spans; + float * full_decode_buffer = output_buffer - stbir_info->scanline_extents.conservative.n0 * effective_channels; + float * last_decoded = 0; + + // if we are on edge_zero, and we get in here with an out of bounds n, then the calculate filters has failed + STBIR_ASSERT( !(edge_vertical == STBIR_EDGE_ZERO && (n < 0 || n >= stbir_info->vertical.scale_info.input_full_size)) ); + + do + { + float * decode_buffer; + void const * input_data; + float * end_decode; + int width_times_channels; + int width; + + if ( spans->n1 < spans->n0 ) + break; + + width = spans->n1 + 1 - spans->n0; + decode_buffer = full_decode_buffer + spans->n0 * effective_channels; + end_decode = full_decode_buffer + ( spans->n1 + 1 ) * effective_channels; + width_times_channels = width * channels; + + // read directly out of input plane by default + input_data = ( (char*)input_plane_data ) + spans->pixel_offset_for_input * input_sample_in_bytes; + + // if we have an input callback, call it to get the input data + if ( stbir_info->in_pixels_cb ) + { + // call the callback with a temp buffer (that they can choose to use or not). the temp is just right aligned memory in the decode_buffer itself + input_data = stbir_info->in_pixels_cb( ( (char*) end_decode ) - ( width * input_sample_in_bytes ) + ( ( stbir_info->input_type != STBIR_TYPE_FLOAT ) ? ( sizeof(float)*STBIR_INPUT_CALLBACK_PADDING ) : 0 ), input_plane_data, width, spans->pixel_offset_for_input, row, stbir_info->user_data ); + } + + STBIR_PROFILE_START( decode ); + // convert the pixels info the float decode_buffer, (we index from end_decode, so that when channelsdecode_pixels( (float*)end_decode - width_times_channels, width_times_channels, input_data ); + STBIR_PROFILE_END( decode ); + + if (stbir_info->alpha_weight) + { + STBIR_PROFILE_START( alpha ); + stbir_info->alpha_weight( decode_buffer, width_times_channels ); + STBIR_PROFILE_END( alpha ); + } + + ++spans; + } while ( spans <= ( &stbir_info->scanline_extents.spans[1] ) ); + + // handle the edge_wrap filter (all other types are handled back out at the calculate_filter stage) + // basically the idea here is that if we have the whole scanline in memory, we don't redecode the + // wrapped edge pixels, and instead just memcpy them from the scanline into the edge positions + if ( ( edge_horizontal == STBIR_EDGE_WRAP ) && ( stbir_info->scanline_extents.edge_sizes[0] | stbir_info->scanline_extents.edge_sizes[1] ) ) + { + // this code only runs if we're in edge_wrap, and we're doing the entire scanline + int e, start_x[2]; + int input_full_size = stbir_info->horizontal.scale_info.input_full_size; + + start_x[0] = -stbir_info->scanline_extents.edge_sizes[0]; // left edge start x + start_x[1] = input_full_size; // right edge + + for( e = 0; e < 2 ; e++ ) + { + // do each margin + int margin = stbir_info->scanline_extents.edge_sizes[e]; + if ( margin ) + { + int x = start_x[e]; + float * marg = full_decode_buffer + x * effective_channels; + float const * src = full_decode_buffer + stbir__edge_wrap(edge_horizontal, x, input_full_size) * effective_channels; + STBIR_MEMCPY( marg, src, margin * effective_channels * sizeof(float) ); + if ( e == 1 ) last_decoded = marg + margin * effective_channels; + } + } + } + + // some of the horizontal gathers read one float off the edge (which is masked out), but we force a zero here to make sure no NaNs leak in + // (we can't pre-zero it, because the input callback can use that area as padding) + last_decoded[0] = 0.0f; + + // we clear this extra float, because the final output pixel filter kernel might have used one less coeff than the max filter width + // when this happens, we do read that pixel from the input, so it too could be Nan, so just zero an extra one. + // this fits because each scanline is padded by three floats (STBIR_INPUT_CALLBACK_PADDING) + last_decoded[1] = 0.0f; +} + + +//================= +// Do 1 channel horizontal routines + +#ifdef STBIR_SIMD + +#define stbir__1_coeff_only() \ + stbir__simdf tot,c; \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf_load1( c, hc ); \ + stbir__simdf_mult1_mem( tot, c, decode ); + +#define stbir__2_coeff_only() \ + stbir__simdf tot,c,d; \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf_load2z( c, hc ); \ + stbir__simdf_load2( d, decode ); \ + stbir__simdf_mult( tot, c, d ); \ + stbir__simdf_0123to1230( c, tot ); \ + stbir__simdf_add1( tot, tot, c ); + +#define stbir__3_coeff_only() \ + stbir__simdf tot,c,t; \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf_load( c, hc ); \ + stbir__simdf_mult_mem( tot, c, decode ); \ + stbir__simdf_0123to1230( c, tot ); \ + stbir__simdf_0123to2301( t, tot ); \ + stbir__simdf_add1( tot, tot, c ); \ + stbir__simdf_add1( tot, tot, t ); + +#define stbir__store_output_tiny() \ + stbir__simdf_store1( output, tot ); \ + horizontal_coefficients += coefficient_width; \ + ++horizontal_contributors; \ + output += 1; + +#define stbir__4_coeff_start() \ + stbir__simdf tot,c; \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf_load( c, hc ); \ + stbir__simdf_mult_mem( tot, c, decode ); \ + +#define stbir__4_coeff_continue_from_4( ofs ) \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf_load( c, hc + (ofs) ); \ + stbir__simdf_madd_mem( tot, tot, c, decode+(ofs) ); + +#define stbir__1_coeff_remnant( ofs ) \ + { stbir__simdf d; \ + stbir__simdf_load1z( c, hc + (ofs) ); \ + stbir__simdf_load1( d, decode + (ofs) ); \ + stbir__simdf_madd( tot, tot, d, c ); } + +#define stbir__2_coeff_remnant( ofs ) \ + { stbir__simdf d; \ + stbir__simdf_load2z( c, hc+(ofs) ); \ + stbir__simdf_load2( d, decode+(ofs) ); \ + stbir__simdf_madd( tot, tot, d, c ); } + +#define stbir__3_coeff_setup() \ + stbir__simdf mask; \ + stbir__simdf_load( mask, STBIR_mask + 3 ); + +#define stbir__3_coeff_remnant( ofs ) \ + stbir__simdf_load( c, hc+(ofs) ); \ + stbir__simdf_and( c, c, mask ); \ + stbir__simdf_madd_mem( tot, tot, c, decode+(ofs) ); + +#define stbir__store_output() \ + stbir__simdf_0123to2301( c, tot ); \ + stbir__simdf_add( tot, tot, c ); \ + stbir__simdf_0123to1230( c, tot ); \ + stbir__simdf_add1( tot, tot, c ); \ + stbir__simdf_store1( output, tot ); \ + horizontal_coefficients += coefficient_width; \ + ++horizontal_contributors; \ + output += 1; + +#else + +#define stbir__1_coeff_only() \ + float tot; \ + tot = decode[0]*hc[0]; + +#define stbir__2_coeff_only() \ + float tot; \ + tot = decode[0] * hc[0]; \ + tot += decode[1] * hc[1]; + +#define stbir__3_coeff_only() \ + float tot; \ + tot = decode[0] * hc[0]; \ + tot += decode[1] * hc[1]; \ + tot += decode[2] * hc[2]; + +#define stbir__store_output_tiny() \ + output[0] = tot; \ + horizontal_coefficients += coefficient_width; \ + ++horizontal_contributors; \ + output += 1; + +#define stbir__4_coeff_start() \ + float tot0,tot1,tot2,tot3; \ + tot0 = decode[0] * hc[0]; \ + tot1 = decode[1] * hc[1]; \ + tot2 = decode[2] * hc[2]; \ + tot3 = decode[3] * hc[3]; + +#define stbir__4_coeff_continue_from_4( ofs ) \ + tot0 += decode[0+(ofs)] * hc[0+(ofs)]; \ + tot1 += decode[1+(ofs)] * hc[1+(ofs)]; \ + tot2 += decode[2+(ofs)] * hc[2+(ofs)]; \ + tot3 += decode[3+(ofs)] * hc[3+(ofs)]; + +#define stbir__1_coeff_remnant( ofs ) \ + tot0 += decode[0+(ofs)] * hc[0+(ofs)]; + +#define stbir__2_coeff_remnant( ofs ) \ + tot0 += decode[0+(ofs)] * hc[0+(ofs)]; \ + tot1 += decode[1+(ofs)] * hc[1+(ofs)]; \ + +#define stbir__3_coeff_remnant( ofs ) \ + tot0 += decode[0+(ofs)] * hc[0+(ofs)]; \ + tot1 += decode[1+(ofs)] * hc[1+(ofs)]; \ + tot2 += decode[2+(ofs)] * hc[2+(ofs)]; + +#define stbir__store_output() \ + output[0] = (tot0+tot2)+(tot1+tot3); \ + horizontal_coefficients += coefficient_width; \ + ++horizontal_contributors; \ + output += 1; + +#endif + +#define STBIR__horizontal_channels 1 +#define STB_IMAGE_RESIZE_DO_HORIZONTALS +#include STBIR__HEADER_FILENAME + + +//================= +// Do 2 channel horizontal routines + +#ifdef STBIR_SIMD + +#define stbir__1_coeff_only() \ + stbir__simdf tot,c,d; \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf_load1z( c, hc ); \ + stbir__simdf_0123to0011( c, c ); \ + stbir__simdf_load2( d, decode ); \ + stbir__simdf_mult( tot, d, c ); + +#define stbir__2_coeff_only() \ + stbir__simdf tot,c; \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf_load2( c, hc ); \ + stbir__simdf_0123to0011( c, c ); \ + stbir__simdf_mult_mem( tot, c, decode ); + +#define stbir__3_coeff_only() \ + stbir__simdf tot,c,cs,d; \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf_load( cs, hc ); \ + stbir__simdf_0123to0011( c, cs ); \ + stbir__simdf_mult_mem( tot, c, decode ); \ + stbir__simdf_0123to2222( c, cs ); \ + stbir__simdf_load2z( d, decode+4 ); \ + stbir__simdf_madd( tot, tot, d, c ); + +#define stbir__store_output_tiny() \ + stbir__simdf_0123to2301( c, tot ); \ + stbir__simdf_add( tot, tot, c ); \ + stbir__simdf_store2( output, tot ); \ + horizontal_coefficients += coefficient_width; \ + ++horizontal_contributors; \ + output += 2; + +#ifdef STBIR_SIMD8 + +#define stbir__4_coeff_start() \ + stbir__simdf8 tot0,c,cs; \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf8_load4b( cs, hc ); \ + stbir__simdf8_0123to00112233( c, cs ); \ + stbir__simdf8_mult_mem( tot0, c, decode ); + +#define stbir__4_coeff_continue_from_4( ofs ) \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf8_load4b( cs, hc + (ofs) ); \ + stbir__simdf8_0123to00112233( c, cs ); \ + stbir__simdf8_madd_mem( tot0, tot0, c, decode+(ofs)*2 ); + +#define stbir__1_coeff_remnant( ofs ) \ + { stbir__simdf t,d; \ + stbir__simdf_load1z( t, hc + (ofs) ); \ + stbir__simdf_load2( d, decode + (ofs) * 2 ); \ + stbir__simdf_0123to0011( t, t ); \ + stbir__simdf_mult( t, t, d ); \ + stbir__simdf8_add4( tot0, tot0, t ); } + +#define stbir__2_coeff_remnant( ofs ) \ + { stbir__simdf t; \ + stbir__simdf_load2( t, hc + (ofs) ); \ + stbir__simdf_0123to0011( t, t ); \ + stbir__simdf_mult_mem( t, t, decode+(ofs)*2 ); \ + stbir__simdf8_add4( tot0, tot0, t ); } + +#define stbir__3_coeff_remnant( ofs ) \ + { stbir__simdf8 d; \ + stbir__simdf8_load4b( cs, hc + (ofs) ); \ + stbir__simdf8_0123to00112233( c, cs ); \ + stbir__simdf8_load6z( d, decode+(ofs)*2 ); \ + stbir__simdf8_madd( tot0, tot0, c, d ); } + +#define stbir__store_output() \ + { stbir__simdf t,d; \ + stbir__simdf8_add4halves( t, stbir__if_simdf8_cast_to_simdf4(tot0), tot0 ); \ + stbir__simdf_0123to2301( d, t ); \ + stbir__simdf_add( t, t, d ); \ + stbir__simdf_store2( output, t ); \ + horizontal_coefficients += coefficient_width; \ + ++horizontal_contributors; \ + output += 2; } + +#else + +#define stbir__4_coeff_start() \ + stbir__simdf tot0,tot1,c,cs; \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf_load( cs, hc ); \ + stbir__simdf_0123to0011( c, cs ); \ + stbir__simdf_mult_mem( tot0, c, decode ); \ + stbir__simdf_0123to2233( c, cs ); \ + stbir__simdf_mult_mem( tot1, c, decode+4 ); + +#define stbir__4_coeff_continue_from_4( ofs ) \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf_load( cs, hc + (ofs) ); \ + stbir__simdf_0123to0011( c, cs ); \ + stbir__simdf_madd_mem( tot0, tot0, c, decode+(ofs)*2 ); \ + stbir__simdf_0123to2233( c, cs ); \ + stbir__simdf_madd_mem( tot1, tot1, c, decode+(ofs)*2+4 ); + +#define stbir__1_coeff_remnant( ofs ) \ + { stbir__simdf d; \ + stbir__simdf_load1z( cs, hc + (ofs) ); \ + stbir__simdf_0123to0011( c, cs ); \ + stbir__simdf_load2( d, decode + (ofs) * 2 ); \ + stbir__simdf_madd( tot0, tot0, d, c ); } + +#define stbir__2_coeff_remnant( ofs ) \ + stbir__simdf_load2( cs, hc + (ofs) ); \ + stbir__simdf_0123to0011( c, cs ); \ + stbir__simdf_madd_mem( tot0, tot0, c, decode+(ofs)*2 ); + +#define stbir__3_coeff_remnant( ofs ) \ + { stbir__simdf d; \ + stbir__simdf_load( cs, hc + (ofs) ); \ + stbir__simdf_0123to0011( c, cs ); \ + stbir__simdf_madd_mem( tot0, tot0, c, decode+(ofs)*2 ); \ + stbir__simdf_0123to2222( c, cs ); \ + stbir__simdf_load2z( d, decode + (ofs) * 2 + 4 ); \ + stbir__simdf_madd( tot1, tot1, d, c ); } + +#define stbir__store_output() \ + stbir__simdf_add( tot0, tot0, tot1 ); \ + stbir__simdf_0123to2301( c, tot0 ); \ + stbir__simdf_add( tot0, tot0, c ); \ + stbir__simdf_store2( output, tot0 ); \ + horizontal_coefficients += coefficient_width; \ + ++horizontal_contributors; \ + output += 2; + +#endif + +#else + +#define stbir__1_coeff_only() \ + float tota,totb,c; \ + c = hc[0]; \ + tota = decode[0]*c; \ + totb = decode[1]*c; + +#define stbir__2_coeff_only() \ + float tota,totb,c; \ + c = hc[0]; \ + tota = decode[0]*c; \ + totb = decode[1]*c; \ + c = hc[1]; \ + tota += decode[2]*c; \ + totb += decode[3]*c; + +// this weird order of add matches the simd +#define stbir__3_coeff_only() \ + float tota,totb,c; \ + c = hc[0]; \ + tota = decode[0]*c; \ + totb = decode[1]*c; \ + c = hc[2]; \ + tota += decode[4]*c; \ + totb += decode[5]*c; \ + c = hc[1]; \ + tota += decode[2]*c; \ + totb += decode[3]*c; + +#define stbir__store_output_tiny() \ + output[0] = tota; \ + output[1] = totb; \ + horizontal_coefficients += coefficient_width; \ + ++horizontal_contributors; \ + output += 2; + +#define stbir__4_coeff_start() \ + float tota0,tota1,tota2,tota3,totb0,totb1,totb2,totb3,c; \ + c = hc[0]; \ + tota0 = decode[0]*c; \ + totb0 = decode[1]*c; \ + c = hc[1]; \ + tota1 = decode[2]*c; \ + totb1 = decode[3]*c; \ + c = hc[2]; \ + tota2 = decode[4]*c; \ + totb2 = decode[5]*c; \ + c = hc[3]; \ + tota3 = decode[6]*c; \ + totb3 = decode[7]*c; + +#define stbir__4_coeff_continue_from_4( ofs ) \ + c = hc[0+(ofs)]; \ + tota0 += decode[0+(ofs)*2]*c; \ + totb0 += decode[1+(ofs)*2]*c; \ + c = hc[1+(ofs)]; \ + tota1 += decode[2+(ofs)*2]*c; \ + totb1 += decode[3+(ofs)*2]*c; \ + c = hc[2+(ofs)]; \ + tota2 += decode[4+(ofs)*2]*c; \ + totb2 += decode[5+(ofs)*2]*c; \ + c = hc[3+(ofs)]; \ + tota3 += decode[6+(ofs)*2]*c; \ + totb3 += decode[7+(ofs)*2]*c; + +#define stbir__1_coeff_remnant( ofs ) \ + c = hc[0+(ofs)]; \ + tota0 += decode[0+(ofs)*2] * c; \ + totb0 += decode[1+(ofs)*2] * c; + +#define stbir__2_coeff_remnant( ofs ) \ + c = hc[0+(ofs)]; \ + tota0 += decode[0+(ofs)*2] * c; \ + totb0 += decode[1+(ofs)*2] * c; \ + c = hc[1+(ofs)]; \ + tota1 += decode[2+(ofs)*2] * c; \ + totb1 += decode[3+(ofs)*2] * c; + +#define stbir__3_coeff_remnant( ofs ) \ + c = hc[0+(ofs)]; \ + tota0 += decode[0+(ofs)*2] * c; \ + totb0 += decode[1+(ofs)*2] * c; \ + c = hc[1+(ofs)]; \ + tota1 += decode[2+(ofs)*2] * c; \ + totb1 += decode[3+(ofs)*2] * c; \ + c = hc[2+(ofs)]; \ + tota2 += decode[4+(ofs)*2] * c; \ + totb2 += decode[5+(ofs)*2] * c; + +#define stbir__store_output() \ + output[0] = (tota0+tota2)+(tota1+tota3); \ + output[1] = (totb0+totb2)+(totb1+totb3); \ + horizontal_coefficients += coefficient_width; \ + ++horizontal_contributors; \ + output += 2; + +#endif + +#define STBIR__horizontal_channels 2 +#define STB_IMAGE_RESIZE_DO_HORIZONTALS +#include STBIR__HEADER_FILENAME + + +//================= +// Do 3 channel horizontal routines + +#ifdef STBIR_SIMD + +#define stbir__1_coeff_only() \ + stbir__simdf tot,c,d; \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf_load1z( c, hc ); \ + stbir__simdf_0123to0001( c, c ); \ + stbir__simdf_load( d, decode ); \ + stbir__simdf_mult( tot, d, c ); + +#define stbir__2_coeff_only() \ + stbir__simdf tot,c,cs,d; \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf_load2( cs, hc ); \ + stbir__simdf_0123to0000( c, cs ); \ + stbir__simdf_load( d, decode ); \ + stbir__simdf_mult( tot, d, c ); \ + stbir__simdf_0123to1111( c, cs ); \ + stbir__simdf_load( d, decode+3 ); \ + stbir__simdf_madd( tot, tot, d, c ); + +#define stbir__3_coeff_only() \ + stbir__simdf tot,c,d,cs; \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf_load( cs, hc ); \ + stbir__simdf_0123to0000( c, cs ); \ + stbir__simdf_load( d, decode ); \ + stbir__simdf_mult( tot, d, c ); \ + stbir__simdf_0123to1111( c, cs ); \ + stbir__simdf_load( d, decode+3 ); \ + stbir__simdf_madd( tot, tot, d, c ); \ + stbir__simdf_0123to2222( c, cs ); \ + stbir__simdf_load( d, decode+6 ); \ + stbir__simdf_madd( tot, tot, d, c ); + +#define stbir__store_output_tiny() \ + stbir__simdf_store2( output, tot ); \ + stbir__simdf_0123to2301( tot, tot ); \ + stbir__simdf_store1( output+2, tot ); \ + horizontal_coefficients += coefficient_width; \ + ++horizontal_contributors; \ + output += 3; + +#ifdef STBIR_SIMD8 + +// we're loading from the XXXYYY decode by -1 to get the XXXYYY into different halves of the AVX reg fyi +#define stbir__4_coeff_start() \ + stbir__simdf8 tot0,tot1,c,cs; stbir__simdf t; \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf8_load4b( cs, hc ); \ + stbir__simdf8_0123to00001111( c, cs ); \ + stbir__simdf8_mult_mem( tot0, c, decode - 1 ); \ + stbir__simdf8_0123to22223333( c, cs ); \ + stbir__simdf8_mult_mem( tot1, c, decode+6 - 1 ); + +#define stbir__4_coeff_continue_from_4( ofs ) \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf8_load4b( cs, hc + (ofs) ); \ + stbir__simdf8_0123to00001111( c, cs ); \ + stbir__simdf8_madd_mem( tot0, tot0, c, decode+(ofs)*3 - 1 ); \ + stbir__simdf8_0123to22223333( c, cs ); \ + stbir__simdf8_madd_mem( tot1, tot1, c, decode+(ofs)*3 + 6 - 1 ); + +#define stbir__1_coeff_remnant( ofs ) \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf_load1rep4( t, hc + (ofs) ); \ + stbir__simdf8_madd_mem4( tot0, tot0, t, decode+(ofs)*3 - 1 ); + +#define stbir__2_coeff_remnant( ofs ) \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf8_load4b( cs, hc + (ofs) - 2 ); \ + stbir__simdf8_0123to22223333( c, cs ); \ + stbir__simdf8_madd_mem( tot0, tot0, c, decode+(ofs)*3 - 1 ); + + #define stbir__3_coeff_remnant( ofs ) \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf8_load4b( cs, hc + (ofs) ); \ + stbir__simdf8_0123to00001111( c, cs ); \ + stbir__simdf8_madd_mem( tot0, tot0, c, decode+(ofs)*3 - 1 ); \ + stbir__simdf8_0123to2222( t, cs ); \ + stbir__simdf8_madd_mem4( tot1, tot1, t, decode+(ofs)*3 + 6 - 1 ); + +#define stbir__store_output() \ + stbir__simdf8_add( tot0, tot0, tot1 ); \ + stbir__simdf_0123to1230( t, stbir__if_simdf8_cast_to_simdf4( tot0 ) ); \ + stbir__simdf8_add4halves( t, t, tot0 ); \ + horizontal_coefficients += coefficient_width; \ + ++horizontal_contributors; \ + output += 3; \ + if ( output < output_end ) \ + { \ + stbir__simdf_store( output-3, t ); \ + continue; \ + } \ + { stbir__simdf tt; stbir__simdf_0123to2301( tt, t ); \ + stbir__simdf_store2( output-3, t ); \ + stbir__simdf_store1( output+2-3, tt ); } \ + break; + + +#else + +#define stbir__4_coeff_start() \ + stbir__simdf tot0,tot1,tot2,c,cs; \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf_load( cs, hc ); \ + stbir__simdf_0123to0001( c, cs ); \ + stbir__simdf_mult_mem( tot0, c, decode ); \ + stbir__simdf_0123to1122( c, cs ); \ + stbir__simdf_mult_mem( tot1, c, decode+4 ); \ + stbir__simdf_0123to2333( c, cs ); \ + stbir__simdf_mult_mem( tot2, c, decode+8 ); + +#define stbir__4_coeff_continue_from_4( ofs ) \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf_load( cs, hc + (ofs) ); \ + stbir__simdf_0123to0001( c, cs ); \ + stbir__simdf_madd_mem( tot0, tot0, c, decode+(ofs)*3 ); \ + stbir__simdf_0123to1122( c, cs ); \ + stbir__simdf_madd_mem( tot1, tot1, c, decode+(ofs)*3+4 ); \ + stbir__simdf_0123to2333( c, cs ); \ + stbir__simdf_madd_mem( tot2, tot2, c, decode+(ofs)*3+8 ); + +#define stbir__1_coeff_remnant( ofs ) \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf_load1z( c, hc + (ofs) ); \ + stbir__simdf_0123to0001( c, c ); \ + stbir__simdf_madd_mem( tot0, tot0, c, decode+(ofs)*3 ); + +#define stbir__2_coeff_remnant( ofs ) \ + { stbir__simdf d; \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf_load2z( cs, hc + (ofs) ); \ + stbir__simdf_0123to0001( c, cs ); \ + stbir__simdf_madd_mem( tot0, tot0, c, decode+(ofs)*3 ); \ + stbir__simdf_0123to1122( c, cs ); \ + stbir__simdf_load2z( d, decode+(ofs)*3+4 ); \ + stbir__simdf_madd( tot1, tot1, c, d ); } + +#define stbir__3_coeff_remnant( ofs ) \ + { stbir__simdf d; \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf_load( cs, hc + (ofs) ); \ + stbir__simdf_0123to0001( c, cs ); \ + stbir__simdf_madd_mem( tot0, tot0, c, decode+(ofs)*3 ); \ + stbir__simdf_0123to1122( c, cs ); \ + stbir__simdf_madd_mem( tot1, tot1, c, decode+(ofs)*3+4 ); \ + stbir__simdf_0123to2222( c, cs ); \ + stbir__simdf_load1z( d, decode+(ofs)*3+8 ); \ + stbir__simdf_madd( tot2, tot2, c, d ); } + +#define stbir__store_output() \ + stbir__simdf_0123ABCDto3ABx( c, tot0, tot1 ); \ + stbir__simdf_0123ABCDto23Ax( cs, tot1, tot2 ); \ + stbir__simdf_0123to1230( tot2, tot2 ); \ + stbir__simdf_add( tot0, tot0, cs ); \ + stbir__simdf_add( c, c, tot2 ); \ + stbir__simdf_add( tot0, tot0, c ); \ + horizontal_coefficients += coefficient_width; \ + ++horizontal_contributors; \ + output += 3; \ + if ( output < output_end ) \ + { \ + stbir__simdf_store( output-3, tot0 ); \ + continue; \ + } \ + stbir__simdf_0123to2301( tot1, tot0 ); \ + stbir__simdf_store2( output-3, tot0 ); \ + stbir__simdf_store1( output+2-3, tot1 ); \ + break; + +#endif + +#else + +#define stbir__1_coeff_only() \ + float tot0, tot1, tot2, c; \ + c = hc[0]; \ + tot0 = decode[0]*c; \ + tot1 = decode[1]*c; \ + tot2 = decode[2]*c; + +#define stbir__2_coeff_only() \ + float tot0, tot1, tot2, c; \ + c = hc[0]; \ + tot0 = decode[0]*c; \ + tot1 = decode[1]*c; \ + tot2 = decode[2]*c; \ + c = hc[1]; \ + tot0 += decode[3]*c; \ + tot1 += decode[4]*c; \ + tot2 += decode[5]*c; + +#define stbir__3_coeff_only() \ + float tot0, tot1, tot2, c; \ + c = hc[0]; \ + tot0 = decode[0]*c; \ + tot1 = decode[1]*c; \ + tot2 = decode[2]*c; \ + c = hc[1]; \ + tot0 += decode[3]*c; \ + tot1 += decode[4]*c; \ + tot2 += decode[5]*c; \ + c = hc[2]; \ + tot0 += decode[6]*c; \ + tot1 += decode[7]*c; \ + tot2 += decode[8]*c; + +#define stbir__store_output_tiny() \ + output[0] = tot0; \ + output[1] = tot1; \ + output[2] = tot2; \ + horizontal_coefficients += coefficient_width; \ + ++horizontal_contributors; \ + output += 3; + +#define stbir__4_coeff_start() \ + float tota0,tota1,tota2,totb0,totb1,totb2,totc0,totc1,totc2,totd0,totd1,totd2,c; \ + c = hc[0]; \ + tota0 = decode[0]*c; \ + tota1 = decode[1]*c; \ + tota2 = decode[2]*c; \ + c = hc[1]; \ + totb0 = decode[3]*c; \ + totb1 = decode[4]*c; \ + totb2 = decode[5]*c; \ + c = hc[2]; \ + totc0 = decode[6]*c; \ + totc1 = decode[7]*c; \ + totc2 = decode[8]*c; \ + c = hc[3]; \ + totd0 = decode[9]*c; \ + totd1 = decode[10]*c; \ + totd2 = decode[11]*c; + +#define stbir__4_coeff_continue_from_4( ofs ) \ + c = hc[0+(ofs)]; \ + tota0 += decode[0+(ofs)*3]*c; \ + tota1 += decode[1+(ofs)*3]*c; \ + tota2 += decode[2+(ofs)*3]*c; \ + c = hc[1+(ofs)]; \ + totb0 += decode[3+(ofs)*3]*c; \ + totb1 += decode[4+(ofs)*3]*c; \ + totb2 += decode[5+(ofs)*3]*c; \ + c = hc[2+(ofs)]; \ + totc0 += decode[6+(ofs)*3]*c; \ + totc1 += decode[7+(ofs)*3]*c; \ + totc2 += decode[8+(ofs)*3]*c; \ + c = hc[3+(ofs)]; \ + totd0 += decode[9+(ofs)*3]*c; \ + totd1 += decode[10+(ofs)*3]*c; \ + totd2 += decode[11+(ofs)*3]*c; + +#define stbir__1_coeff_remnant( ofs ) \ + c = hc[0+(ofs)]; \ + tota0 += decode[0+(ofs)*3]*c; \ + tota1 += decode[1+(ofs)*3]*c; \ + tota2 += decode[2+(ofs)*3]*c; + +#define stbir__2_coeff_remnant( ofs ) \ + c = hc[0+(ofs)]; \ + tota0 += decode[0+(ofs)*3]*c; \ + tota1 += decode[1+(ofs)*3]*c; \ + tota2 += decode[2+(ofs)*3]*c; \ + c = hc[1+(ofs)]; \ + totb0 += decode[3+(ofs)*3]*c; \ + totb1 += decode[4+(ofs)*3]*c; \ + totb2 += decode[5+(ofs)*3]*c; \ + +#define stbir__3_coeff_remnant( ofs ) \ + c = hc[0+(ofs)]; \ + tota0 += decode[0+(ofs)*3]*c; \ + tota1 += decode[1+(ofs)*3]*c; \ + tota2 += decode[2+(ofs)*3]*c; \ + c = hc[1+(ofs)]; \ + totb0 += decode[3+(ofs)*3]*c; \ + totb1 += decode[4+(ofs)*3]*c; \ + totb2 += decode[5+(ofs)*3]*c; \ + c = hc[2+(ofs)]; \ + totc0 += decode[6+(ofs)*3]*c; \ + totc1 += decode[7+(ofs)*3]*c; \ + totc2 += decode[8+(ofs)*3]*c; + +#define stbir__store_output() \ + output[0] = (tota0+totc0)+(totb0+totd0); \ + output[1] = (tota1+totc1)+(totb1+totd1); \ + output[2] = (tota2+totc2)+(totb2+totd2); \ + horizontal_coefficients += coefficient_width; \ + ++horizontal_contributors; \ + output += 3; + +#endif + +#define STBIR__horizontal_channels 3 +#define STB_IMAGE_RESIZE_DO_HORIZONTALS +#include STBIR__HEADER_FILENAME + +//================= +// Do 4 channel horizontal routines + +#ifdef STBIR_SIMD + +#define stbir__1_coeff_only() \ + stbir__simdf tot,c; \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf_load1( c, hc ); \ + stbir__simdf_0123to0000( c, c ); \ + stbir__simdf_mult_mem( tot, c, decode ); + +#define stbir__2_coeff_only() \ + stbir__simdf tot,c,cs; \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf_load2( cs, hc ); \ + stbir__simdf_0123to0000( c, cs ); \ + stbir__simdf_mult_mem( tot, c, decode ); \ + stbir__simdf_0123to1111( c, cs ); \ + stbir__simdf_madd_mem( tot, tot, c, decode+4 ); + +#define stbir__3_coeff_only() \ + stbir__simdf tot,c,cs; \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf_load( cs, hc ); \ + stbir__simdf_0123to0000( c, cs ); \ + stbir__simdf_mult_mem( tot, c, decode ); \ + stbir__simdf_0123to1111( c, cs ); \ + stbir__simdf_madd_mem( tot, tot, c, decode+4 ); \ + stbir__simdf_0123to2222( c, cs ); \ + stbir__simdf_madd_mem( tot, tot, c, decode+8 ); + +#define stbir__store_output_tiny() \ + stbir__simdf_store( output, tot ); \ + horizontal_coefficients += coefficient_width; \ + ++horizontal_contributors; \ + output += 4; + +#ifdef STBIR_SIMD8 + +#define stbir__4_coeff_start() \ + stbir__simdf8 tot0,c,cs; stbir__simdf t; \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf8_load4b( cs, hc ); \ + stbir__simdf8_0123to00001111( c, cs ); \ + stbir__simdf8_mult_mem( tot0, c, decode ); \ + stbir__simdf8_0123to22223333( c, cs ); \ + stbir__simdf8_madd_mem( tot0, tot0, c, decode+8 ); + +#define stbir__4_coeff_continue_from_4( ofs ) \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf8_load4b( cs, hc + (ofs) ); \ + stbir__simdf8_0123to00001111( c, cs ); \ + stbir__simdf8_madd_mem( tot0, tot0, c, decode+(ofs)*4 ); \ + stbir__simdf8_0123to22223333( c, cs ); \ + stbir__simdf8_madd_mem( tot0, tot0, c, decode+(ofs)*4+8 ); + +#define stbir__1_coeff_remnant( ofs ) \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf_load1rep4( t, hc + (ofs) ); \ + stbir__simdf8_madd_mem4( tot0, tot0, t, decode+(ofs)*4 ); + +#define stbir__2_coeff_remnant( ofs ) \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf8_load4b( cs, hc + (ofs) - 2 ); \ + stbir__simdf8_0123to22223333( c, cs ); \ + stbir__simdf8_madd_mem( tot0, tot0, c, decode+(ofs)*4 ); + + #define stbir__3_coeff_remnant( ofs ) \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf8_load4b( cs, hc + (ofs) ); \ + stbir__simdf8_0123to00001111( c, cs ); \ + stbir__simdf8_madd_mem( tot0, tot0, c, decode+(ofs)*4 ); \ + stbir__simdf8_0123to2222( t, cs ); \ + stbir__simdf8_madd_mem4( tot0, tot0, t, decode+(ofs)*4+8 ); + +#define stbir__store_output() \ + stbir__simdf8_add4halves( t, stbir__if_simdf8_cast_to_simdf4(tot0), tot0 ); \ + stbir__simdf_store( output, t ); \ + horizontal_coefficients += coefficient_width; \ + ++horizontal_contributors; \ + output += 4; + +#else + +#define stbir__4_coeff_start() \ + stbir__simdf tot0,tot1,c,cs; \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf_load( cs, hc ); \ + stbir__simdf_0123to0000( c, cs ); \ + stbir__simdf_mult_mem( tot0, c, decode ); \ + stbir__simdf_0123to1111( c, cs ); \ + stbir__simdf_mult_mem( tot1, c, decode+4 ); \ + stbir__simdf_0123to2222( c, cs ); \ + stbir__simdf_madd_mem( tot0, tot0, c, decode+8 ); \ + stbir__simdf_0123to3333( c, cs ); \ + stbir__simdf_madd_mem( tot1, tot1, c, decode+12 ); + +#define stbir__4_coeff_continue_from_4( ofs ) \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf_load( cs, hc + (ofs) ); \ + stbir__simdf_0123to0000( c, cs ); \ + stbir__simdf_madd_mem( tot0, tot0, c, decode+(ofs)*4 ); \ + stbir__simdf_0123to1111( c, cs ); \ + stbir__simdf_madd_mem( tot1, tot1, c, decode+(ofs)*4+4 ); \ + stbir__simdf_0123to2222( c, cs ); \ + stbir__simdf_madd_mem( tot0, tot0, c, decode+(ofs)*4+8 ); \ + stbir__simdf_0123to3333( c, cs ); \ + stbir__simdf_madd_mem( tot1, tot1, c, decode+(ofs)*4+12 ); + +#define stbir__1_coeff_remnant( ofs ) \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf_load1( c, hc + (ofs) ); \ + stbir__simdf_0123to0000( c, c ); \ + stbir__simdf_madd_mem( tot0, tot0, c, decode+(ofs)*4 ); + +#define stbir__2_coeff_remnant( ofs ) \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf_load2( cs, hc + (ofs) ); \ + stbir__simdf_0123to0000( c, cs ); \ + stbir__simdf_madd_mem( tot0, tot0, c, decode+(ofs)*4 ); \ + stbir__simdf_0123to1111( c, cs ); \ + stbir__simdf_madd_mem( tot1, tot1, c, decode+(ofs)*4+4 ); + +#define stbir__3_coeff_remnant( ofs ) \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf_load( cs, hc + (ofs) ); \ + stbir__simdf_0123to0000( c, cs ); \ + stbir__simdf_madd_mem( tot0, tot0, c, decode+(ofs)*4 ); \ + stbir__simdf_0123to1111( c, cs ); \ + stbir__simdf_madd_mem( tot1, tot1, c, decode+(ofs)*4+4 ); \ + stbir__simdf_0123to2222( c, cs ); \ + stbir__simdf_madd_mem( tot0, tot0, c, decode+(ofs)*4+8 ); + +#define stbir__store_output() \ + stbir__simdf_add( tot0, tot0, tot1 ); \ + stbir__simdf_store( output, tot0 ); \ + horizontal_coefficients += coefficient_width; \ + ++horizontal_contributors; \ + output += 4; + +#endif + +#else + +#define stbir__1_coeff_only() \ + float p0,p1,p2,p3,c; \ + STBIR_SIMD_NO_UNROLL(decode); \ + c = hc[0]; \ + p0 = decode[0] * c; \ + p1 = decode[1] * c; \ + p2 = decode[2] * c; \ + p3 = decode[3] * c; + +#define stbir__2_coeff_only() \ + float p0,p1,p2,p3,c; \ + STBIR_SIMD_NO_UNROLL(decode); \ + c = hc[0]; \ + p0 = decode[0] * c; \ + p1 = decode[1] * c; \ + p2 = decode[2] * c; \ + p3 = decode[3] * c; \ + c = hc[1]; \ + p0 += decode[4] * c; \ + p1 += decode[5] * c; \ + p2 += decode[6] * c; \ + p3 += decode[7] * c; + +#define stbir__3_coeff_only() \ + float p0,p1,p2,p3,c; \ + STBIR_SIMD_NO_UNROLL(decode); \ + c = hc[0]; \ + p0 = decode[0] * c; \ + p1 = decode[1] * c; \ + p2 = decode[2] * c; \ + p3 = decode[3] * c; \ + c = hc[1]; \ + p0 += decode[4] * c; \ + p1 += decode[5] * c; \ + p2 += decode[6] * c; \ + p3 += decode[7] * c; \ + c = hc[2]; \ + p0 += decode[8] * c; \ + p1 += decode[9] * c; \ + p2 += decode[10] * c; \ + p3 += decode[11] * c; + +#define stbir__store_output_tiny() \ + output[0] = p0; \ + output[1] = p1; \ + output[2] = p2; \ + output[3] = p3; \ + horizontal_coefficients += coefficient_width; \ + ++horizontal_contributors; \ + output += 4; + +#define stbir__4_coeff_start() \ + float x0,x1,x2,x3,y0,y1,y2,y3,c; \ + STBIR_SIMD_NO_UNROLL(decode); \ + c = hc[0]; \ + x0 = decode[0] * c; \ + x1 = decode[1] * c; \ + x2 = decode[2] * c; \ + x3 = decode[3] * c; \ + c = hc[1]; \ + y0 = decode[4] * c; \ + y1 = decode[5] * c; \ + y2 = decode[6] * c; \ + y3 = decode[7] * c; \ + c = hc[2]; \ + x0 += decode[8] * c; \ + x1 += decode[9] * c; \ + x2 += decode[10] * c; \ + x3 += decode[11] * c; \ + c = hc[3]; \ + y0 += decode[12] * c; \ + y1 += decode[13] * c; \ + y2 += decode[14] * c; \ + y3 += decode[15] * c; + +#define stbir__4_coeff_continue_from_4( ofs ) \ + STBIR_SIMD_NO_UNROLL(decode); \ + c = hc[0+(ofs)]; \ + x0 += decode[0+(ofs)*4] * c; \ + x1 += decode[1+(ofs)*4] * c; \ + x2 += decode[2+(ofs)*4] * c; \ + x3 += decode[3+(ofs)*4] * c; \ + c = hc[1+(ofs)]; \ + y0 += decode[4+(ofs)*4] * c; \ + y1 += decode[5+(ofs)*4] * c; \ + y2 += decode[6+(ofs)*4] * c; \ + y3 += decode[7+(ofs)*4] * c; \ + c = hc[2+(ofs)]; \ + x0 += decode[8+(ofs)*4] * c; \ + x1 += decode[9+(ofs)*4] * c; \ + x2 += decode[10+(ofs)*4] * c; \ + x3 += decode[11+(ofs)*4] * c; \ + c = hc[3+(ofs)]; \ + y0 += decode[12+(ofs)*4] * c; \ + y1 += decode[13+(ofs)*4] * c; \ + y2 += decode[14+(ofs)*4] * c; \ + y3 += decode[15+(ofs)*4] * c; + +#define stbir__1_coeff_remnant( ofs ) \ + STBIR_SIMD_NO_UNROLL(decode); \ + c = hc[0+(ofs)]; \ + x0 += decode[0+(ofs)*4] * c; \ + x1 += decode[1+(ofs)*4] * c; \ + x2 += decode[2+(ofs)*4] * c; \ + x3 += decode[3+(ofs)*4] * c; + +#define stbir__2_coeff_remnant( ofs ) \ + STBIR_SIMD_NO_UNROLL(decode); \ + c = hc[0+(ofs)]; \ + x0 += decode[0+(ofs)*4] * c; \ + x1 += decode[1+(ofs)*4] * c; \ + x2 += decode[2+(ofs)*4] * c; \ + x3 += decode[3+(ofs)*4] * c; \ + c = hc[1+(ofs)]; \ + y0 += decode[4+(ofs)*4] * c; \ + y1 += decode[5+(ofs)*4] * c; \ + y2 += decode[6+(ofs)*4] * c; \ + y3 += decode[7+(ofs)*4] * c; + +#define stbir__3_coeff_remnant( ofs ) \ + STBIR_SIMD_NO_UNROLL(decode); \ + c = hc[0+(ofs)]; \ + x0 += decode[0+(ofs)*4] * c; \ + x1 += decode[1+(ofs)*4] * c; \ + x2 += decode[2+(ofs)*4] * c; \ + x3 += decode[3+(ofs)*4] * c; \ + c = hc[1+(ofs)]; \ + y0 += decode[4+(ofs)*4] * c; \ + y1 += decode[5+(ofs)*4] * c; \ + y2 += decode[6+(ofs)*4] * c; \ + y3 += decode[7+(ofs)*4] * c; \ + c = hc[2+(ofs)]; \ + x0 += decode[8+(ofs)*4] * c; \ + x1 += decode[9+(ofs)*4] * c; \ + x2 += decode[10+(ofs)*4] * c; \ + x3 += decode[11+(ofs)*4] * c; + +#define stbir__store_output() \ + output[0] = x0 + y0; \ + output[1] = x1 + y1; \ + output[2] = x2 + y2; \ + output[3] = x3 + y3; \ + horizontal_coefficients += coefficient_width; \ + ++horizontal_contributors; \ + output += 4; + +#endif + +#define STBIR__horizontal_channels 4 +#define STB_IMAGE_RESIZE_DO_HORIZONTALS +#include STBIR__HEADER_FILENAME + + + +//================= +// Do 7 channel horizontal routines + +#ifdef STBIR_SIMD + +#define stbir__1_coeff_only() \ + stbir__simdf tot0,tot1,c; \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf_load1( c, hc ); \ + stbir__simdf_0123to0000( c, c ); \ + stbir__simdf_mult_mem( tot0, c, decode ); \ + stbir__simdf_mult_mem( tot1, c, decode+3 ); + +#define stbir__2_coeff_only() \ + stbir__simdf tot0,tot1,c,cs; \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf_load2( cs, hc ); \ + stbir__simdf_0123to0000( c, cs ); \ + stbir__simdf_mult_mem( tot0, c, decode ); \ + stbir__simdf_mult_mem( tot1, c, decode+3 ); \ + stbir__simdf_0123to1111( c, cs ); \ + stbir__simdf_madd_mem( tot0, tot0, c, decode+7 ); \ + stbir__simdf_madd_mem( tot1, tot1, c,decode+10 ); + +#define stbir__3_coeff_only() \ + stbir__simdf tot0,tot1,c,cs; \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf_load( cs, hc ); \ + stbir__simdf_0123to0000( c, cs ); \ + stbir__simdf_mult_mem( tot0, c, decode ); \ + stbir__simdf_mult_mem( tot1, c, decode+3 ); \ + stbir__simdf_0123to1111( c, cs ); \ + stbir__simdf_madd_mem( tot0, tot0, c, decode+7 ); \ + stbir__simdf_madd_mem( tot1, tot1, c, decode+10 ); \ + stbir__simdf_0123to2222( c, cs ); \ + stbir__simdf_madd_mem( tot0, tot0, c, decode+14 ); \ + stbir__simdf_madd_mem( tot1, tot1, c, decode+17 ); + +#define stbir__store_output_tiny() \ + stbir__simdf_store( output+3, tot1 ); \ + stbir__simdf_store( output, tot0 ); \ + horizontal_coefficients += coefficient_width; \ + ++horizontal_contributors; \ + output += 7; + +#ifdef STBIR_SIMD8 + +#define stbir__4_coeff_start() \ + stbir__simdf8 tot0,tot1,c,cs; \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf8_load4b( cs, hc ); \ + stbir__simdf8_0123to00000000( c, cs ); \ + stbir__simdf8_mult_mem( tot0, c, decode ); \ + stbir__simdf8_0123to11111111( c, cs ); \ + stbir__simdf8_mult_mem( tot1, c, decode+7 ); \ + stbir__simdf8_0123to22222222( c, cs ); \ + stbir__simdf8_madd_mem( tot0, tot0, c, decode+14 ); \ + stbir__simdf8_0123to33333333( c, cs ); \ + stbir__simdf8_madd_mem( tot1, tot1, c, decode+21 ); + +#define stbir__4_coeff_continue_from_4( ofs ) \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf8_load4b( cs, hc + (ofs) ); \ + stbir__simdf8_0123to00000000( c, cs ); \ + stbir__simdf8_madd_mem( tot0, tot0, c, decode+(ofs)*7 ); \ + stbir__simdf8_0123to11111111( c, cs ); \ + stbir__simdf8_madd_mem( tot1, tot1, c, decode+(ofs)*7+7 ); \ + stbir__simdf8_0123to22222222( c, cs ); \ + stbir__simdf8_madd_mem( tot0, tot0, c, decode+(ofs)*7+14 ); \ + stbir__simdf8_0123to33333333( c, cs ); \ + stbir__simdf8_madd_mem( tot1, tot1, c, decode+(ofs)*7+21 ); + +#define stbir__1_coeff_remnant( ofs ) \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf8_load1b( c, hc + (ofs) ); \ + stbir__simdf8_madd_mem( tot0, tot0, c, decode+(ofs)*7 ); + +#define stbir__2_coeff_remnant( ofs ) \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf8_load1b( c, hc + (ofs) ); \ + stbir__simdf8_madd_mem( tot0, tot0, c, decode+(ofs)*7 ); \ + stbir__simdf8_load1b( c, hc + (ofs)+1 ); \ + stbir__simdf8_madd_mem( tot1, tot1, c, decode+(ofs)*7+7 ); + +#define stbir__3_coeff_remnant( ofs ) \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf8_load4b( cs, hc + (ofs) ); \ + stbir__simdf8_0123to00000000( c, cs ); \ + stbir__simdf8_madd_mem( tot0, tot0, c, decode+(ofs)*7 ); \ + stbir__simdf8_0123to11111111( c, cs ); \ + stbir__simdf8_madd_mem( tot1, tot1, c, decode+(ofs)*7+7 ); \ + stbir__simdf8_0123to22222222( c, cs ); \ + stbir__simdf8_madd_mem( tot0, tot0, c, decode+(ofs)*7+14 ); + +#define stbir__store_output() \ + stbir__simdf8_add( tot0, tot0, tot1 ); \ + horizontal_coefficients += coefficient_width; \ + ++horizontal_contributors; \ + output += 7; \ + if ( output < output_end ) \ + { \ + stbir__simdf8_store( output-7, tot0 ); \ + continue; \ + } \ + stbir__simdf_store( output-7+3, stbir__simdf_swiz(stbir__simdf8_gettop4(tot0),0,0,1,2) ); \ + stbir__simdf_store( output-7, stbir__if_simdf8_cast_to_simdf4(tot0) ); \ + break; + +#else + +#define stbir__4_coeff_start() \ + stbir__simdf tot0,tot1,tot2,tot3,c,cs; \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf_load( cs, hc ); \ + stbir__simdf_0123to0000( c, cs ); \ + stbir__simdf_mult_mem( tot0, c, decode ); \ + stbir__simdf_mult_mem( tot1, c, decode+3 ); \ + stbir__simdf_0123to1111( c, cs ); \ + stbir__simdf_mult_mem( tot2, c, decode+7 ); \ + stbir__simdf_mult_mem( tot3, c, decode+10 ); \ + stbir__simdf_0123to2222( c, cs ); \ + stbir__simdf_madd_mem( tot0, tot0, c, decode+14 ); \ + stbir__simdf_madd_mem( tot1, tot1, c, decode+17 ); \ + stbir__simdf_0123to3333( c, cs ); \ + stbir__simdf_madd_mem( tot2, tot2, c, decode+21 ); \ + stbir__simdf_madd_mem( tot3, tot3, c, decode+24 ); + +#define stbir__4_coeff_continue_from_4( ofs ) \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf_load( cs, hc + (ofs) ); \ + stbir__simdf_0123to0000( c, cs ); \ + stbir__simdf_madd_mem( tot0, tot0, c, decode+(ofs)*7 ); \ + stbir__simdf_madd_mem( tot1, tot1, c, decode+(ofs)*7+3 ); \ + stbir__simdf_0123to1111( c, cs ); \ + stbir__simdf_madd_mem( tot2, tot2, c, decode+(ofs)*7+7 ); \ + stbir__simdf_madd_mem( tot3, tot3, c, decode+(ofs)*7+10 ); \ + stbir__simdf_0123to2222( c, cs ); \ + stbir__simdf_madd_mem( tot0, tot0, c, decode+(ofs)*7+14 ); \ + stbir__simdf_madd_mem( tot1, tot1, c, decode+(ofs)*7+17 ); \ + stbir__simdf_0123to3333( c, cs ); \ + stbir__simdf_madd_mem( tot2, tot2, c, decode+(ofs)*7+21 ); \ + stbir__simdf_madd_mem( tot3, tot3, c, decode+(ofs)*7+24 ); + +#define stbir__1_coeff_remnant( ofs ) \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf_load1( c, hc + (ofs) ); \ + stbir__simdf_0123to0000( c, c ); \ + stbir__simdf_madd_mem( tot0, tot0, c, decode+(ofs)*7 ); \ + stbir__simdf_madd_mem( tot1, tot1, c, decode+(ofs)*7+3 ); \ + +#define stbir__2_coeff_remnant( ofs ) \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf_load2( cs, hc + (ofs) ); \ + stbir__simdf_0123to0000( c, cs ); \ + stbir__simdf_madd_mem( tot0, tot0, c, decode+(ofs)*7 ); \ + stbir__simdf_madd_mem( tot1, tot1, c, decode+(ofs)*7+3 ); \ + stbir__simdf_0123to1111( c, cs ); \ + stbir__simdf_madd_mem( tot2, tot2, c, decode+(ofs)*7+7 ); \ + stbir__simdf_madd_mem( tot3, tot3, c, decode+(ofs)*7+10 ); + +#define stbir__3_coeff_remnant( ofs ) \ + STBIR_SIMD_NO_UNROLL(decode); \ + stbir__simdf_load( cs, hc + (ofs) ); \ + stbir__simdf_0123to0000( c, cs ); \ + stbir__simdf_madd_mem( tot0, tot0, c, decode+(ofs)*7 ); \ + stbir__simdf_madd_mem( tot1, tot1, c, decode+(ofs)*7+3 ); \ + stbir__simdf_0123to1111( c, cs ); \ + stbir__simdf_madd_mem( tot2, tot2, c, decode+(ofs)*7+7 ); \ + stbir__simdf_madd_mem( tot3, tot3, c, decode+(ofs)*7+10 ); \ + stbir__simdf_0123to2222( c, cs ); \ + stbir__simdf_madd_mem( tot0, tot0, c, decode+(ofs)*7+14 ); \ + stbir__simdf_madd_mem( tot1, tot1, c, decode+(ofs)*7+17 ); + +#define stbir__store_output() \ + stbir__simdf_add( tot0, tot0, tot2 ); \ + stbir__simdf_add( tot1, tot1, tot3 ); \ + stbir__simdf_store( output+3, tot1 ); \ + stbir__simdf_store( output, tot0 ); \ + horizontal_coefficients += coefficient_width; \ + ++horizontal_contributors; \ + output += 7; + +#endif + +#else + +#define stbir__1_coeff_only() \ + float tot0, tot1, tot2, tot3, tot4, tot5, tot6, c; \ + c = hc[0]; \ + tot0 = decode[0]*c; \ + tot1 = decode[1]*c; \ + tot2 = decode[2]*c; \ + tot3 = decode[3]*c; \ + tot4 = decode[4]*c; \ + tot5 = decode[5]*c; \ + tot6 = decode[6]*c; + +#define stbir__2_coeff_only() \ + float tot0, tot1, tot2, tot3, tot4, tot5, tot6, c; \ + c = hc[0]; \ + tot0 = decode[0]*c; \ + tot1 = decode[1]*c; \ + tot2 = decode[2]*c; \ + tot3 = decode[3]*c; \ + tot4 = decode[4]*c; \ + tot5 = decode[5]*c; \ + tot6 = decode[6]*c; \ + c = hc[1]; \ + tot0 += decode[7]*c; \ + tot1 += decode[8]*c; \ + tot2 += decode[9]*c; \ + tot3 += decode[10]*c; \ + tot4 += decode[11]*c; \ + tot5 += decode[12]*c; \ + tot6 += decode[13]*c; \ + +#define stbir__3_coeff_only() \ + float tot0, tot1, tot2, tot3, tot4, tot5, tot6, c; \ + c = hc[0]; \ + tot0 = decode[0]*c; \ + tot1 = decode[1]*c; \ + tot2 = decode[2]*c; \ + tot3 = decode[3]*c; \ + tot4 = decode[4]*c; \ + tot5 = decode[5]*c; \ + tot6 = decode[6]*c; \ + c = hc[1]; \ + tot0 += decode[7]*c; \ + tot1 += decode[8]*c; \ + tot2 += decode[9]*c; \ + tot3 += decode[10]*c; \ + tot4 += decode[11]*c; \ + tot5 += decode[12]*c; \ + tot6 += decode[13]*c; \ + c = hc[2]; \ + tot0 += decode[14]*c; \ + tot1 += decode[15]*c; \ + tot2 += decode[16]*c; \ + tot3 += decode[17]*c; \ + tot4 += decode[18]*c; \ + tot5 += decode[19]*c; \ + tot6 += decode[20]*c; \ + +#define stbir__store_output_tiny() \ + output[0] = tot0; \ + output[1] = tot1; \ + output[2] = tot2; \ + output[3] = tot3; \ + output[4] = tot4; \ + output[5] = tot5; \ + output[6] = tot6; \ + horizontal_coefficients += coefficient_width; \ + ++horizontal_contributors; \ + output += 7; + +#define stbir__4_coeff_start() \ + float x0,x1,x2,x3,x4,x5,x6,y0,y1,y2,y3,y4,y5,y6,c; \ + STBIR_SIMD_NO_UNROLL(decode); \ + c = hc[0]; \ + x0 = decode[0] * c; \ + x1 = decode[1] * c; \ + x2 = decode[2] * c; \ + x3 = decode[3] * c; \ + x4 = decode[4] * c; \ + x5 = decode[5] * c; \ + x6 = decode[6] * c; \ + c = hc[1]; \ + y0 = decode[7] * c; \ + y1 = decode[8] * c; \ + y2 = decode[9] * c; \ + y3 = decode[10] * c; \ + y4 = decode[11] * c; \ + y5 = decode[12] * c; \ + y6 = decode[13] * c; \ + c = hc[2]; \ + x0 += decode[14] * c; \ + x1 += decode[15] * c; \ + x2 += decode[16] * c; \ + x3 += decode[17] * c; \ + x4 += decode[18] * c; \ + x5 += decode[19] * c; \ + x6 += decode[20] * c; \ + c = hc[3]; \ + y0 += decode[21] * c; \ + y1 += decode[22] * c; \ + y2 += decode[23] * c; \ + y3 += decode[24] * c; \ + y4 += decode[25] * c; \ + y5 += decode[26] * c; \ + y6 += decode[27] * c; + +#define stbir__4_coeff_continue_from_4( ofs ) \ + STBIR_SIMD_NO_UNROLL(decode); \ + c = hc[0+(ofs)]; \ + x0 += decode[0+(ofs)*7] * c; \ + x1 += decode[1+(ofs)*7] * c; \ + x2 += decode[2+(ofs)*7] * c; \ + x3 += decode[3+(ofs)*7] * c; \ + x4 += decode[4+(ofs)*7] * c; \ + x5 += decode[5+(ofs)*7] * c; \ + x6 += decode[6+(ofs)*7] * c; \ + c = hc[1+(ofs)]; \ + y0 += decode[7+(ofs)*7] * c; \ + y1 += decode[8+(ofs)*7] * c; \ + y2 += decode[9+(ofs)*7] * c; \ + y3 += decode[10+(ofs)*7] * c; \ + y4 += decode[11+(ofs)*7] * c; \ + y5 += decode[12+(ofs)*7] * c; \ + y6 += decode[13+(ofs)*7] * c; \ + c = hc[2+(ofs)]; \ + x0 += decode[14+(ofs)*7] * c; \ + x1 += decode[15+(ofs)*7] * c; \ + x2 += decode[16+(ofs)*7] * c; \ + x3 += decode[17+(ofs)*7] * c; \ + x4 += decode[18+(ofs)*7] * c; \ + x5 += decode[19+(ofs)*7] * c; \ + x6 += decode[20+(ofs)*7] * c; \ + c = hc[3+(ofs)]; \ + y0 += decode[21+(ofs)*7] * c; \ + y1 += decode[22+(ofs)*7] * c; \ + y2 += decode[23+(ofs)*7] * c; \ + y3 += decode[24+(ofs)*7] * c; \ + y4 += decode[25+(ofs)*7] * c; \ + y5 += decode[26+(ofs)*7] * c; \ + y6 += decode[27+(ofs)*7] * c; + +#define stbir__1_coeff_remnant( ofs ) \ + STBIR_SIMD_NO_UNROLL(decode); \ + c = hc[0+(ofs)]; \ + x0 += decode[0+(ofs)*7] * c; \ + x1 += decode[1+(ofs)*7] * c; \ + x2 += decode[2+(ofs)*7] * c; \ + x3 += decode[3+(ofs)*7] * c; \ + x4 += decode[4+(ofs)*7] * c; \ + x5 += decode[5+(ofs)*7] * c; \ + x6 += decode[6+(ofs)*7] * c; \ + +#define stbir__2_coeff_remnant( ofs ) \ + STBIR_SIMD_NO_UNROLL(decode); \ + c = hc[0+(ofs)]; \ + x0 += decode[0+(ofs)*7] * c; \ + x1 += decode[1+(ofs)*7] * c; \ + x2 += decode[2+(ofs)*7] * c; \ + x3 += decode[3+(ofs)*7] * c; \ + x4 += decode[4+(ofs)*7] * c; \ + x5 += decode[5+(ofs)*7] * c; \ + x6 += decode[6+(ofs)*7] * c; \ + c = hc[1+(ofs)]; \ + y0 += decode[7+(ofs)*7] * c; \ + y1 += decode[8+(ofs)*7] * c; \ + y2 += decode[9+(ofs)*7] * c; \ + y3 += decode[10+(ofs)*7] * c; \ + y4 += decode[11+(ofs)*7] * c; \ + y5 += decode[12+(ofs)*7] * c; \ + y6 += decode[13+(ofs)*7] * c; \ + +#define stbir__3_coeff_remnant( ofs ) \ + STBIR_SIMD_NO_UNROLL(decode); \ + c = hc[0+(ofs)]; \ + x0 += decode[0+(ofs)*7] * c; \ + x1 += decode[1+(ofs)*7] * c; \ + x2 += decode[2+(ofs)*7] * c; \ + x3 += decode[3+(ofs)*7] * c; \ + x4 += decode[4+(ofs)*7] * c; \ + x5 += decode[5+(ofs)*7] * c; \ + x6 += decode[6+(ofs)*7] * c; \ + c = hc[1+(ofs)]; \ + y0 += decode[7+(ofs)*7] * c; \ + y1 += decode[8+(ofs)*7] * c; \ + y2 += decode[9+(ofs)*7] * c; \ + y3 += decode[10+(ofs)*7] * c; \ + y4 += decode[11+(ofs)*7] * c; \ + y5 += decode[12+(ofs)*7] * c; \ + y6 += decode[13+(ofs)*7] * c; \ + c = hc[2+(ofs)]; \ + x0 += decode[14+(ofs)*7] * c; \ + x1 += decode[15+(ofs)*7] * c; \ + x2 += decode[16+(ofs)*7] * c; \ + x3 += decode[17+(ofs)*7] * c; \ + x4 += decode[18+(ofs)*7] * c; \ + x5 += decode[19+(ofs)*7] * c; \ + x6 += decode[20+(ofs)*7] * c; \ + +#define stbir__store_output() \ + output[0] = x0 + y0; \ + output[1] = x1 + y1; \ + output[2] = x2 + y2; \ + output[3] = x3 + y3; \ + output[4] = x4 + y4; \ + output[5] = x5 + y5; \ + output[6] = x6 + y6; \ + horizontal_coefficients += coefficient_width; \ + ++horizontal_contributors; \ + output += 7; + +#endif + +#define STBIR__horizontal_channels 7 +#define STB_IMAGE_RESIZE_DO_HORIZONTALS +#include STBIR__HEADER_FILENAME + + +// include all of the vertical resamplers (both scatter and gather versions) + +#define STBIR__vertical_channels 1 +#define STB_IMAGE_RESIZE_DO_VERTICALS +#include STBIR__HEADER_FILENAME + +#define STBIR__vertical_channels 1 +#define STB_IMAGE_RESIZE_DO_VERTICALS +#define STB_IMAGE_RESIZE_VERTICAL_CONTINUE +#include STBIR__HEADER_FILENAME + +#define STBIR__vertical_channels 2 +#define STB_IMAGE_RESIZE_DO_VERTICALS +#include STBIR__HEADER_FILENAME + +#define STBIR__vertical_channels 2 +#define STB_IMAGE_RESIZE_DO_VERTICALS +#define STB_IMAGE_RESIZE_VERTICAL_CONTINUE +#include STBIR__HEADER_FILENAME + +#define STBIR__vertical_channels 3 +#define STB_IMAGE_RESIZE_DO_VERTICALS +#include STBIR__HEADER_FILENAME + +#define STBIR__vertical_channels 3 +#define STB_IMAGE_RESIZE_DO_VERTICALS +#define STB_IMAGE_RESIZE_VERTICAL_CONTINUE +#include STBIR__HEADER_FILENAME + +#define STBIR__vertical_channels 4 +#define STB_IMAGE_RESIZE_DO_VERTICALS +#include STBIR__HEADER_FILENAME + +#define STBIR__vertical_channels 4 +#define STB_IMAGE_RESIZE_DO_VERTICALS +#define STB_IMAGE_RESIZE_VERTICAL_CONTINUE +#include STBIR__HEADER_FILENAME + +#define STBIR__vertical_channels 5 +#define STB_IMAGE_RESIZE_DO_VERTICALS +#include STBIR__HEADER_FILENAME + +#define STBIR__vertical_channels 5 +#define STB_IMAGE_RESIZE_DO_VERTICALS +#define STB_IMAGE_RESIZE_VERTICAL_CONTINUE +#include STBIR__HEADER_FILENAME + +#define STBIR__vertical_channels 6 +#define STB_IMAGE_RESIZE_DO_VERTICALS +#include STBIR__HEADER_FILENAME + +#define STBIR__vertical_channels 6 +#define STB_IMAGE_RESIZE_DO_VERTICALS +#define STB_IMAGE_RESIZE_VERTICAL_CONTINUE +#include STBIR__HEADER_FILENAME + +#define STBIR__vertical_channels 7 +#define STB_IMAGE_RESIZE_DO_VERTICALS +#include STBIR__HEADER_FILENAME + +#define STBIR__vertical_channels 7 +#define STB_IMAGE_RESIZE_DO_VERTICALS +#define STB_IMAGE_RESIZE_VERTICAL_CONTINUE +#include STBIR__HEADER_FILENAME + +#define STBIR__vertical_channels 8 +#define STB_IMAGE_RESIZE_DO_VERTICALS +#include STBIR__HEADER_FILENAME + +#define STBIR__vertical_channels 8 +#define STB_IMAGE_RESIZE_DO_VERTICALS +#define STB_IMAGE_RESIZE_VERTICAL_CONTINUE +#include STBIR__HEADER_FILENAME + +typedef void STBIR_VERTICAL_GATHERFUNC( float * output, float const * coeffs, float const ** inputs, float const * input0_end ); + +static STBIR_VERTICAL_GATHERFUNC * stbir__vertical_gathers[ 8 ] = +{ + stbir__vertical_gather_with_1_coeffs,stbir__vertical_gather_with_2_coeffs,stbir__vertical_gather_with_3_coeffs,stbir__vertical_gather_with_4_coeffs,stbir__vertical_gather_with_5_coeffs,stbir__vertical_gather_with_6_coeffs,stbir__vertical_gather_with_7_coeffs,stbir__vertical_gather_with_8_coeffs +}; + +static STBIR_VERTICAL_GATHERFUNC * stbir__vertical_gathers_continues[ 8 ] = +{ + stbir__vertical_gather_with_1_coeffs_cont,stbir__vertical_gather_with_2_coeffs_cont,stbir__vertical_gather_with_3_coeffs_cont,stbir__vertical_gather_with_4_coeffs_cont,stbir__vertical_gather_with_5_coeffs_cont,stbir__vertical_gather_with_6_coeffs_cont,stbir__vertical_gather_with_7_coeffs_cont,stbir__vertical_gather_with_8_coeffs_cont +}; + +typedef void STBIR_VERTICAL_SCATTERFUNC( float ** outputs, float const * coeffs, float const * input, float const * input_end ); + +static STBIR_VERTICAL_SCATTERFUNC * stbir__vertical_scatter_sets[ 8 ] = +{ + stbir__vertical_scatter_with_1_coeffs,stbir__vertical_scatter_with_2_coeffs,stbir__vertical_scatter_with_3_coeffs,stbir__vertical_scatter_with_4_coeffs,stbir__vertical_scatter_with_5_coeffs,stbir__vertical_scatter_with_6_coeffs,stbir__vertical_scatter_with_7_coeffs,stbir__vertical_scatter_with_8_coeffs +}; + +static STBIR_VERTICAL_SCATTERFUNC * stbir__vertical_scatter_blends[ 8 ] = +{ + stbir__vertical_scatter_with_1_coeffs_cont,stbir__vertical_scatter_with_2_coeffs_cont,stbir__vertical_scatter_with_3_coeffs_cont,stbir__vertical_scatter_with_4_coeffs_cont,stbir__vertical_scatter_with_5_coeffs_cont,stbir__vertical_scatter_with_6_coeffs_cont,stbir__vertical_scatter_with_7_coeffs_cont,stbir__vertical_scatter_with_8_coeffs_cont +}; + + +static void stbir__encode_scanline( stbir__info const * stbir_info, void *output_buffer_data, float * encode_buffer, int row STBIR_ONLY_PROFILE_GET_SPLIT_INFO ) +{ + int num_pixels = stbir_info->horizontal.scale_info.output_sub_size; + int channels = stbir_info->channels; + int width_times_channels = num_pixels * channels; + void * output_buffer; + + // un-alpha weight if we need to + if ( stbir_info->alpha_unweight ) + { + STBIR_PROFILE_START( unalpha ); + stbir_info->alpha_unweight( encode_buffer, width_times_channels ); + STBIR_PROFILE_END( unalpha ); + } + + // write directly into output by default + output_buffer = output_buffer_data; + + // if we have an output callback, we first convert the decode buffer in place (and then hand that to the callback) + if ( stbir_info->out_pixels_cb ) + output_buffer = encode_buffer; + + STBIR_PROFILE_START( encode ); + // convert into the output buffer + stbir_info->encode_pixels( output_buffer, width_times_channels, encode_buffer ); + STBIR_PROFILE_END( encode ); + + // if we have an output callback, call it to send the data + if ( stbir_info->out_pixels_cb ) + stbir_info->out_pixels_cb( output_buffer, num_pixels, row, stbir_info->user_data ); +} + + +// Get the ring buffer pointer for an index +static float* stbir__get_ring_buffer_entry(stbir__info const * stbir_info, stbir__per_split_info const * split_info, int index ) +{ + STBIR_ASSERT( index < stbir_info->ring_buffer_num_entries ); + + #ifdef STBIR__SEPARATE_ALLOCATIONS + return split_info->ring_buffers[ index ]; + #else + return (float*) ( ( (char*) split_info->ring_buffer ) + ( index * stbir_info->ring_buffer_length_bytes ) ); + #endif +} + +// Get the specified scan line from the ring buffer +static float* stbir__get_ring_buffer_scanline(stbir__info const * stbir_info, stbir__per_split_info const * split_info, int get_scanline) +{ + int ring_buffer_index = (split_info->ring_buffer_begin_index + (get_scanline - split_info->ring_buffer_first_scanline)) % stbir_info->ring_buffer_num_entries; + return stbir__get_ring_buffer_entry( stbir_info, split_info, ring_buffer_index ); +} + +static void stbir__resample_horizontal_gather(stbir__info const * stbir_info, float* output_buffer, float const * input_buffer STBIR_ONLY_PROFILE_GET_SPLIT_INFO ) +{ + float const * decode_buffer = input_buffer - ( stbir_info->scanline_extents.conservative.n0 * stbir_info->effective_channels ); + + STBIR_PROFILE_START( horizontal ); + if ( ( stbir_info->horizontal.filter_enum == STBIR_FILTER_POINT_SAMPLE ) && ( stbir_info->horizontal.scale_info.scale == 1.0f ) ) + STBIR_MEMCPY( output_buffer, input_buffer, stbir_info->horizontal.scale_info.output_sub_size * sizeof( float ) * stbir_info->effective_channels ); + else + stbir_info->horizontal_gather_channels( output_buffer, stbir_info->horizontal.scale_info.output_sub_size, decode_buffer, stbir_info->horizontal.contributors, stbir_info->horizontal.coefficients, stbir_info->horizontal.coefficient_width ); + STBIR_PROFILE_END( horizontal ); +} + +static void stbir__resample_vertical_gather(stbir__info const * stbir_info, stbir__per_split_info* split_info, int n, int contrib_n0, int contrib_n1, float const * vertical_coefficients ) +{ + float* encode_buffer = split_info->vertical_buffer; + float* decode_buffer = split_info->decode_buffer; + int vertical_first = stbir_info->vertical_first; + int width = (vertical_first) ? ( stbir_info->scanline_extents.conservative.n1-stbir_info->scanline_extents.conservative.n0+1 ) : stbir_info->horizontal.scale_info.output_sub_size; + int width_times_channels = stbir_info->effective_channels * width; + + STBIR_ASSERT( stbir_info->vertical.is_gather ); + + // loop over the contributing scanlines and scale into the buffer + STBIR_PROFILE_START( vertical ); + { + int k = 0, total = contrib_n1 - contrib_n0 + 1; + STBIR_ASSERT( total > 0 ); + do { + float const * inputs[8]; + int i, cnt = total; if ( cnt > 8 ) cnt = 8; + for( i = 0 ; i < cnt ; i++ ) + inputs[ i ] = stbir__get_ring_buffer_scanline(stbir_info, split_info, k+i+contrib_n0 ); + + // call the N scanlines at a time function (up to 8 scanlines of blending at once) + ((k==0)?stbir__vertical_gathers:stbir__vertical_gathers_continues)[cnt-1]( (vertical_first) ? decode_buffer : encode_buffer, vertical_coefficients + k, inputs, inputs[0] + width_times_channels ); + k += cnt; + total -= cnt; + } while ( total ); + } + STBIR_PROFILE_END( vertical ); + + if ( vertical_first ) + { + // Now resample the gathered vertical data in the horizontal axis into the encode buffer + decode_buffer[ width_times_channels ] = 0.0f; // clear two over for horizontals with a remnant of 3 + decode_buffer[ width_times_channels+1 ] = 0.0f; + stbir__resample_horizontal_gather(stbir_info, encode_buffer, decode_buffer STBIR_ONLY_PROFILE_SET_SPLIT_INFO ); + } + + stbir__encode_scanline( stbir_info, ( (char *) stbir_info->output_data ) + ((size_t)n * (size_t)stbir_info->output_stride_bytes), + encode_buffer, n STBIR_ONLY_PROFILE_SET_SPLIT_INFO ); +} + +static void stbir__decode_and_resample_for_vertical_gather_loop(stbir__info const * stbir_info, stbir__per_split_info* split_info, int n) +{ + int ring_buffer_index; + float* ring_buffer; + + // Decode the nth scanline from the source image into the decode buffer. + stbir__decode_scanline( stbir_info, n, split_info->decode_buffer STBIR_ONLY_PROFILE_SET_SPLIT_INFO ); + + // update new end scanline + split_info->ring_buffer_last_scanline = n; + + // get ring buffer + ring_buffer_index = (split_info->ring_buffer_begin_index + (split_info->ring_buffer_last_scanline - split_info->ring_buffer_first_scanline)) % stbir_info->ring_buffer_num_entries; + ring_buffer = stbir__get_ring_buffer_entry(stbir_info, split_info, ring_buffer_index); + + // Now resample it into the ring buffer. + stbir__resample_horizontal_gather( stbir_info, ring_buffer, split_info->decode_buffer STBIR_ONLY_PROFILE_SET_SPLIT_INFO ); + + // Now it's sitting in the ring buffer ready to be used as source for the vertical sampling. +} + +static void stbir__vertical_gather_loop( stbir__info const * stbir_info, stbir__per_split_info* split_info, int split_count ) +{ + int y, start_output_y, end_output_y; + stbir__contributors* vertical_contributors = stbir_info->vertical.contributors; + float const * vertical_coefficients = stbir_info->vertical.coefficients; + + STBIR_ASSERT( stbir_info->vertical.is_gather ); + + start_output_y = split_info->start_output_y; + end_output_y = split_info[split_count-1].end_output_y; + + vertical_contributors += start_output_y; + vertical_coefficients += start_output_y * stbir_info->vertical.coefficient_width; + + // initialize the ring buffer for gathering + split_info->ring_buffer_begin_index = 0; + split_info->ring_buffer_first_scanline = vertical_contributors->n0; + split_info->ring_buffer_last_scanline = split_info->ring_buffer_first_scanline - 1; // means "empty" + + for (y = start_output_y; y < end_output_y; y++) + { + int in_first_scanline, in_last_scanline; + + in_first_scanline = vertical_contributors->n0; + in_last_scanline = vertical_contributors->n1; + + // make sure the indexing hasn't broken + STBIR_ASSERT( in_first_scanline >= split_info->ring_buffer_first_scanline ); + + // Load in new scanlines + while (in_last_scanline > split_info->ring_buffer_last_scanline) + { + STBIR_ASSERT( ( split_info->ring_buffer_last_scanline - split_info->ring_buffer_first_scanline + 1 ) <= stbir_info->ring_buffer_num_entries ); + + // make sure there was room in the ring buffer when we add new scanlines + if ( ( split_info->ring_buffer_last_scanline - split_info->ring_buffer_first_scanline + 1 ) == stbir_info->ring_buffer_num_entries ) + { + split_info->ring_buffer_first_scanline++; + split_info->ring_buffer_begin_index++; + } + + if ( stbir_info->vertical_first ) + { + float * ring_buffer = stbir__get_ring_buffer_scanline( stbir_info, split_info, ++split_info->ring_buffer_last_scanline ); + // Decode the nth scanline from the source image into the decode buffer. + stbir__decode_scanline( stbir_info, split_info->ring_buffer_last_scanline, ring_buffer STBIR_ONLY_PROFILE_SET_SPLIT_INFO ); + } + else + { + stbir__decode_and_resample_for_vertical_gather_loop(stbir_info, split_info, split_info->ring_buffer_last_scanline + 1); + } + } + + // Now all buffers should be ready to write a row of vertical sampling, so do it. + stbir__resample_vertical_gather(stbir_info, split_info, y, in_first_scanline, in_last_scanline, vertical_coefficients ); + + ++vertical_contributors; + vertical_coefficients += stbir_info->vertical.coefficient_width; + } +} + +#define STBIR__FLOAT_EMPTY_MARKER 3.0e+38F +#define STBIR__FLOAT_BUFFER_IS_EMPTY(ptr) ((ptr)[0]==STBIR__FLOAT_EMPTY_MARKER) + +static void stbir__encode_first_scanline_from_scatter(stbir__info const * stbir_info, stbir__per_split_info* split_info) +{ + // evict a scanline out into the output buffer + float* ring_buffer_entry = stbir__get_ring_buffer_entry(stbir_info, split_info, split_info->ring_buffer_begin_index ); + + // dump the scanline out + stbir__encode_scanline( stbir_info, ( (char *)stbir_info->output_data ) + ( (size_t)split_info->ring_buffer_first_scanline * (size_t)stbir_info->output_stride_bytes ), ring_buffer_entry, split_info->ring_buffer_first_scanline STBIR_ONLY_PROFILE_SET_SPLIT_INFO ); + + // mark it as empty + ring_buffer_entry[ 0 ] = STBIR__FLOAT_EMPTY_MARKER; + + // advance the first scanline + split_info->ring_buffer_first_scanline++; + if ( ++split_info->ring_buffer_begin_index == stbir_info->ring_buffer_num_entries ) + split_info->ring_buffer_begin_index = 0; +} + +static void stbir__horizontal_resample_and_encode_first_scanline_from_scatter(stbir__info const * stbir_info, stbir__per_split_info* split_info) +{ + // evict a scanline out into the output buffer + + float* ring_buffer_entry = stbir__get_ring_buffer_entry(stbir_info, split_info, split_info->ring_buffer_begin_index ); + + // Now resample it into the buffer. + stbir__resample_horizontal_gather( stbir_info, split_info->vertical_buffer, ring_buffer_entry STBIR_ONLY_PROFILE_SET_SPLIT_INFO ); + + // dump the scanline out + stbir__encode_scanline( stbir_info, ( (char *)stbir_info->output_data ) + ( (size_t)split_info->ring_buffer_first_scanline * (size_t)stbir_info->output_stride_bytes ), split_info->vertical_buffer, split_info->ring_buffer_first_scanline STBIR_ONLY_PROFILE_SET_SPLIT_INFO ); + + // mark it as empty + ring_buffer_entry[ 0 ] = STBIR__FLOAT_EMPTY_MARKER; + + // advance the first scanline + split_info->ring_buffer_first_scanline++; + if ( ++split_info->ring_buffer_begin_index == stbir_info->ring_buffer_num_entries ) + split_info->ring_buffer_begin_index = 0; +} + +static void stbir__resample_vertical_scatter(stbir__info const * stbir_info, stbir__per_split_info* split_info, int n0, int n1, float const * vertical_coefficients, float const * vertical_buffer, float const * vertical_buffer_end ) +{ + STBIR_ASSERT( !stbir_info->vertical.is_gather ); + + STBIR_PROFILE_START( vertical ); + { + int k = 0, total = n1 - n0 + 1; + STBIR_ASSERT( total > 0 ); + do { + float * outputs[8]; + int i, n = total; if ( n > 8 ) n = 8; + for( i = 0 ; i < n ; i++ ) + { + outputs[ i ] = stbir__get_ring_buffer_scanline(stbir_info, split_info, k+i+n0 ); + if ( ( i ) && ( STBIR__FLOAT_BUFFER_IS_EMPTY( outputs[i] ) != STBIR__FLOAT_BUFFER_IS_EMPTY( outputs[0] ) ) ) // make sure runs are of the same type + { + n = i; + break; + } + } + // call the scatter to N scanlines at a time function (up to 8 scanlines of scattering at once) + ((STBIR__FLOAT_BUFFER_IS_EMPTY( outputs[0] ))?stbir__vertical_scatter_sets:stbir__vertical_scatter_blends)[n-1]( outputs, vertical_coefficients + k, vertical_buffer, vertical_buffer_end ); + k += n; + total -= n; + } while ( total ); + } + + STBIR_PROFILE_END( vertical ); +} + +typedef void stbir__handle_scanline_for_scatter_func(stbir__info const * stbir_info, stbir__per_split_info* split_info); + +static void stbir__vertical_scatter_loop( stbir__info const * stbir_info, stbir__per_split_info* split_info, int split_count ) +{ + int y, start_output_y, end_output_y, start_input_y, end_input_y; + stbir__contributors* vertical_contributors = stbir_info->vertical.contributors; + float const * vertical_coefficients = stbir_info->vertical.coefficients; + stbir__handle_scanline_for_scatter_func * handle_scanline_for_scatter; + void * scanline_scatter_buffer; + void * scanline_scatter_buffer_end; + int on_first_input_y, last_input_y; + int width = (stbir_info->vertical_first) ? ( stbir_info->scanline_extents.conservative.n1-stbir_info->scanline_extents.conservative.n0+1 ) : stbir_info->horizontal.scale_info.output_sub_size; + int width_times_channels = stbir_info->effective_channels * width; + + STBIR_ASSERT( !stbir_info->vertical.is_gather ); + + start_output_y = split_info->start_output_y; + end_output_y = split_info[split_count-1].end_output_y; // may do multiple split counts + + start_input_y = split_info->start_input_y; + end_input_y = split_info[split_count-1].end_input_y; + + // adjust for starting offset start_input_y + y = start_input_y + stbir_info->vertical.filter_pixel_margin; + vertical_contributors += y ; + vertical_coefficients += stbir_info->vertical.coefficient_width * y; + + if ( stbir_info->vertical_first ) + { + handle_scanline_for_scatter = stbir__horizontal_resample_and_encode_first_scanline_from_scatter; + scanline_scatter_buffer = split_info->decode_buffer; + scanline_scatter_buffer_end = ( (char*) scanline_scatter_buffer ) + sizeof( float ) * stbir_info->effective_channels * (stbir_info->scanline_extents.conservative.n1-stbir_info->scanline_extents.conservative.n0+1); + } + else + { + handle_scanline_for_scatter = stbir__encode_first_scanline_from_scatter; + scanline_scatter_buffer = split_info->vertical_buffer; + scanline_scatter_buffer_end = ( (char*) scanline_scatter_buffer ) + sizeof( float ) * stbir_info->effective_channels * stbir_info->horizontal.scale_info.output_sub_size; + } + + // initialize the ring buffer for scattering + split_info->ring_buffer_first_scanline = start_output_y; + split_info->ring_buffer_last_scanline = -1; + split_info->ring_buffer_begin_index = -1; + + // mark all the buffers as empty to start + for( y = 0 ; y < stbir_info->ring_buffer_num_entries ; y++ ) + { + float * decode_buffer = stbir__get_ring_buffer_entry( stbir_info, split_info, y ); + decode_buffer[ width_times_channels ] = 0.0f; // clear two over for horizontals with a remnant of 3 + decode_buffer[ width_times_channels+1 ] = 0.0f; + decode_buffer[0] = STBIR__FLOAT_EMPTY_MARKER; // only used on scatter + } + + // do the loop in input space + on_first_input_y = 1; last_input_y = start_input_y; + for (y = start_input_y ; y < end_input_y; y++) + { + int out_first_scanline, out_last_scanline; + + out_first_scanline = vertical_contributors->n0; + out_last_scanline = vertical_contributors->n1; + + STBIR_ASSERT(out_last_scanline - out_first_scanline + 1 <= stbir_info->ring_buffer_num_entries); + + if ( ( out_last_scanline >= out_first_scanline ) && ( ( ( out_first_scanline >= start_output_y ) && ( out_first_scanline < end_output_y ) ) || ( ( out_last_scanline >= start_output_y ) && ( out_last_scanline < end_output_y ) ) ) ) + { + float const * vc = vertical_coefficients; + + // keep track of the range actually seen for the next resize + last_input_y = y; + if ( ( on_first_input_y ) && ( y > start_input_y ) ) + split_info->start_input_y = y; + on_first_input_y = 0; + + // clip the region + if ( out_first_scanline < start_output_y ) + { + vc += start_output_y - out_first_scanline; + out_first_scanline = start_output_y; + } + + if ( out_last_scanline >= end_output_y ) + out_last_scanline = end_output_y - 1; + + // if very first scanline, init the index + if (split_info->ring_buffer_begin_index < 0) + split_info->ring_buffer_begin_index = out_first_scanline - start_output_y; + + STBIR_ASSERT( split_info->ring_buffer_begin_index <= out_first_scanline ); + + // Decode the nth scanline from the source image into the decode buffer. + stbir__decode_scanline( stbir_info, y, split_info->decode_buffer STBIR_ONLY_PROFILE_SET_SPLIT_INFO ); + + // When horizontal first, we resample horizontally into the vertical buffer before we scatter it out + if ( !stbir_info->vertical_first ) + stbir__resample_horizontal_gather( stbir_info, split_info->vertical_buffer, split_info->decode_buffer STBIR_ONLY_PROFILE_SET_SPLIT_INFO ); + + // Now it's sitting in the buffer ready to be distributed into the ring buffers. + + // evict from the ringbuffer, if we need are full + if ( ( ( split_info->ring_buffer_last_scanline - split_info->ring_buffer_first_scanline + 1 ) == stbir_info->ring_buffer_num_entries ) && + ( out_last_scanline > split_info->ring_buffer_last_scanline ) ) + handle_scanline_for_scatter( stbir_info, split_info ); + + // Now the horizontal buffer is ready to write to all ring buffer rows, so do it. + stbir__resample_vertical_scatter(stbir_info, split_info, out_first_scanline, out_last_scanline, vc, (float*)scanline_scatter_buffer, (float*)scanline_scatter_buffer_end ); + + // update the end of the buffer + if ( out_last_scanline > split_info->ring_buffer_last_scanline ) + split_info->ring_buffer_last_scanline = out_last_scanline; + } + ++vertical_contributors; + vertical_coefficients += stbir_info->vertical.coefficient_width; + } + + // now evict the scanlines that are left over in the ring buffer + while ( split_info->ring_buffer_first_scanline < end_output_y ) + handle_scanline_for_scatter(stbir_info, split_info); + + // update the end_input_y if we do multiple resizes with the same data + ++last_input_y; + for( y = 0 ; y < split_count; y++ ) + if ( split_info[y].end_input_y > last_input_y ) + split_info[y].end_input_y = last_input_y; +} + + +static stbir__kernel_callback * stbir__builtin_kernels[] = { 0, stbir__filter_trapezoid, stbir__filter_triangle, stbir__filter_cubic, stbir__filter_catmullrom, stbir__filter_mitchell, stbir__filter_point }; +static stbir__support_callback * stbir__builtin_supports[] = { 0, stbir__support_trapezoid, stbir__support_one, stbir__support_two, stbir__support_two, stbir__support_two, stbir__support_zeropoint5 }; + +static void stbir__set_sampler(stbir__sampler * samp, stbir_filter filter, stbir__kernel_callback * kernel, stbir__support_callback * support, stbir_edge edge, stbir__scale_info * scale_info, int always_gather, void * user_data ) +{ + // set filter + if (filter == 0) + { + filter = STBIR_DEFAULT_FILTER_DOWNSAMPLE; // default to downsample + if (scale_info->scale >= ( 1.0f - stbir__small_float ) ) + { + if ( (scale_info->scale <= ( 1.0f + stbir__small_float ) ) && ( STBIR_CEILF(scale_info->pixel_shift) == scale_info->pixel_shift ) ) + filter = STBIR_FILTER_POINT_SAMPLE; + else + filter = STBIR_DEFAULT_FILTER_UPSAMPLE; + } + } + samp->filter_enum = filter; + + STBIR_ASSERT(samp->filter_enum != 0); + STBIR_ASSERT((unsigned)samp->filter_enum < STBIR_FILTER_OTHER); + samp->filter_kernel = stbir__builtin_kernels[ filter ]; + samp->filter_support = stbir__builtin_supports[ filter ]; + + if ( kernel && support ) + { + samp->filter_kernel = kernel; + samp->filter_support = support; + samp->filter_enum = STBIR_FILTER_OTHER; + } + + samp->edge = edge; + samp->filter_pixel_width = stbir__get_filter_pixel_width (samp->filter_support, scale_info->scale, user_data ); + // Gather is always better, but in extreme downsamples, you have to most or all of the data in memory + // For horizontal, we always have all the pixels, so we always use gather here (always_gather==1). + // For vertical, we use gather if scaling up (which means we will have samp->filter_pixel_width + // scanlines in memory at once). + samp->is_gather = 0; + if ( scale_info->scale >= ( 1.0f - stbir__small_float ) ) + samp->is_gather = 1; + else if ( ( always_gather ) || ( samp->filter_pixel_width <= STBIR_FORCE_GATHER_FILTER_SCANLINES_AMOUNT ) ) + samp->is_gather = 2; + + // pre calculate stuff based on the above + samp->coefficient_width = stbir__get_coefficient_width(samp, samp->is_gather, user_data); + + // filter_pixel_width is the conservative size in pixels of input that affect an output pixel. + // In rare cases (only with 2 pix to 1 pix with the default filters), it's possible that the + // filter will extend before or after the scanline beyond just one extra entire copy of the + // scanline (we would hit the edge twice). We don't let you do that, so we clamp the total + // width to 3x the total of input pixel (once for the scanline, once for the left side + // overhang, and once for the right side). We only do this for edge mode, since the other + // modes can just re-edge clamp back in again. + if ( edge == STBIR_EDGE_WRAP ) + if ( samp->filter_pixel_width > ( scale_info->input_full_size * 3 ) ) + samp->filter_pixel_width = scale_info->input_full_size * 3; + + // This is how much to expand buffers to account for filters seeking outside + // the image boundaries. + samp->filter_pixel_margin = samp->filter_pixel_width / 2; + + // filter_pixel_margin is the amount that this filter can overhang on just one side of either + // end of the scanline (left or the right). Since we only allow you to overhang 1 scanline's + // worth of pixels, we clamp this one side of overhang to the input scanline size. Again, + // this clamping only happens in rare cases with the default filters (2 pix to 1 pix). + if ( edge == STBIR_EDGE_WRAP ) + if ( samp->filter_pixel_margin > scale_info->input_full_size ) + samp->filter_pixel_margin = scale_info->input_full_size; + + samp->num_contributors = stbir__get_contributors(samp, samp->is_gather); + + samp->contributors_size = samp->num_contributors * sizeof(stbir__contributors); + samp->coefficients_size = samp->num_contributors * samp->coefficient_width * sizeof(float) + sizeof(float)*STBIR_INPUT_CALLBACK_PADDING; // extra sizeof(float) is padding + + samp->gather_prescatter_contributors = 0; + samp->gather_prescatter_coefficients = 0; + if ( samp->is_gather == 0 ) + { + samp->gather_prescatter_coefficient_width = samp->filter_pixel_width; + samp->gather_prescatter_num_contributors = stbir__get_contributors(samp, 2); + samp->gather_prescatter_contributors_size = samp->gather_prescatter_num_contributors * sizeof(stbir__contributors); + samp->gather_prescatter_coefficients_size = samp->gather_prescatter_num_contributors * samp->gather_prescatter_coefficient_width * sizeof(float); + } +} + +static void stbir__get_conservative_extents( stbir__sampler * samp, stbir__contributors * range, void * user_data ) +{ + float scale = samp->scale_info.scale; + float out_shift = samp->scale_info.pixel_shift; + stbir__support_callback * support = samp->filter_support; + int input_full_size = samp->scale_info.input_full_size; + stbir_edge edge = samp->edge; + float inv_scale = samp->scale_info.inv_scale; + + STBIR_ASSERT( samp->is_gather != 0 ); + + if ( samp->is_gather == 1 ) + { + int in_first_pixel, in_last_pixel; + float out_filter_radius = support(inv_scale, user_data) * scale; + + stbir__calculate_in_pixel_range( &in_first_pixel, &in_last_pixel, 0.5, out_filter_radius, inv_scale, out_shift, input_full_size, edge ); + range->n0 = in_first_pixel; + stbir__calculate_in_pixel_range( &in_first_pixel, &in_last_pixel, ( (float)(samp->scale_info.output_sub_size-1) ) + 0.5f, out_filter_radius, inv_scale, out_shift, input_full_size, edge ); + range->n1 = in_last_pixel; + } + else if ( samp->is_gather == 2 ) // downsample gather, refine + { + float in_pixels_radius = support(scale, user_data) * inv_scale; + int filter_pixel_margin = samp->filter_pixel_margin; + int output_sub_size = samp->scale_info.output_sub_size; + int input_end; + int n; + int in_first_pixel, in_last_pixel; + + // get a conservative area of the input range + stbir__calculate_in_pixel_range( &in_first_pixel, &in_last_pixel, 0, 0, inv_scale, out_shift, input_full_size, edge ); + range->n0 = in_first_pixel; + stbir__calculate_in_pixel_range( &in_first_pixel, &in_last_pixel, (float)output_sub_size, 0, inv_scale, out_shift, input_full_size, edge ); + range->n1 = in_last_pixel; + + // now go through the margin to the start of area to find bottom + n = range->n0 + 1; + input_end = -filter_pixel_margin; + while( n >= input_end ) + { + int out_first_pixel, out_last_pixel; + stbir__calculate_out_pixel_range( &out_first_pixel, &out_last_pixel, ((float)n)+0.5f, in_pixels_radius, scale, out_shift, output_sub_size ); + if ( out_first_pixel > out_last_pixel ) + break; + + if ( ( out_first_pixel < output_sub_size ) || ( out_last_pixel >= 0 ) ) + range->n0 = n; + --n; + } + + // now go through the end of the area through the margin to find top + n = range->n1 - 1; + input_end = n + 1 + filter_pixel_margin; + while( n <= input_end ) + { + int out_first_pixel, out_last_pixel; + stbir__calculate_out_pixel_range( &out_first_pixel, &out_last_pixel, ((float)n)+0.5f, in_pixels_radius, scale, out_shift, output_sub_size ); + if ( out_first_pixel > out_last_pixel ) + break; + if ( ( out_first_pixel < output_sub_size ) || ( out_last_pixel >= 0 ) ) + range->n1 = n; + ++n; + } + } + + if ( samp->edge == STBIR_EDGE_WRAP ) + { + // if we are wrapping, and we are very close to the image size (so the edges might merge), just use the scanline up to the edge + if ( ( range->n0 > 0 ) && ( range->n1 >= input_full_size ) ) + { + int marg = range->n1 - input_full_size + 1; + if ( ( marg + STBIR__MERGE_RUNS_PIXEL_THRESHOLD ) >= range->n0 ) + range->n0 = 0; + } + if ( ( range->n0 < 0 ) && ( range->n1 < (input_full_size-1) ) ) + { + int marg = -range->n0; + if ( ( input_full_size - marg - STBIR__MERGE_RUNS_PIXEL_THRESHOLD - 1 ) <= range->n1 ) + range->n1 = input_full_size - 1; + } + } + else + { + // for non-edge-wrap modes, we never read over the edge, so clamp + if ( range->n0 < 0 ) + range->n0 = 0; + if ( range->n1 >= input_full_size ) + range->n1 = input_full_size - 1; + } +} + +static void stbir__get_split_info( stbir__per_split_info* split_info, int splits, int output_height, int vertical_pixel_margin, int input_full_height ) +{ + int i, cur; + int left = output_height; + + cur = 0; + for( i = 0 ; i < splits ; i++ ) + { + int each; + split_info[i].start_output_y = cur; + each = left / ( splits - i ); + split_info[i].end_output_y = cur + each; + cur += each; + left -= each; + + // scatter range (updated to minimum as you run it) + split_info[i].start_input_y = -vertical_pixel_margin; + split_info[i].end_input_y = input_full_height + vertical_pixel_margin; + } +} + +static void stbir__free_internal_mem( stbir__info *info ) +{ + #define STBIR__FREE_AND_CLEAR( ptr ) { if ( ptr ) { void * p = (ptr); (ptr) = 0; STBIR_FREE( p, info->user_data); } } + + if ( info ) + { + #ifndef STBIR__SEPARATE_ALLOCATIONS + STBIR__FREE_AND_CLEAR( info->alloced_mem ); + #else + int i,j; + + if ( ( info->vertical.gather_prescatter_contributors ) && ( (void*)info->vertical.gather_prescatter_contributors != (void*)info->split_info[0].decode_buffer ) ) + { + STBIR__FREE_AND_CLEAR( info->vertical.gather_prescatter_coefficients ); + STBIR__FREE_AND_CLEAR( info->vertical.gather_prescatter_contributors ); + } + for( i = 0 ; i < info->splits ; i++ ) + { + for( j = 0 ; j < info->alloc_ring_buffer_num_entries ; j++ ) + { + #ifdef STBIR_SIMD8 + if ( info->effective_channels == 3 ) + --info->split_info[i].ring_buffers[j]; // avx in 3 channel mode needs one float at the start of the buffer + #endif + STBIR__FREE_AND_CLEAR( info->split_info[i].ring_buffers[j] ); + } + + #ifdef STBIR_SIMD8 + if ( info->effective_channels == 3 ) + --info->split_info[i].decode_buffer; // avx in 3 channel mode needs one float at the start of the buffer + #endif + STBIR__FREE_AND_CLEAR( info->split_info[i].decode_buffer ); + STBIR__FREE_AND_CLEAR( info->split_info[i].ring_buffers ); + STBIR__FREE_AND_CLEAR( info->split_info[i].vertical_buffer ); + } + STBIR__FREE_AND_CLEAR( info->split_info ); + if ( info->vertical.coefficients != info->horizontal.coefficients ) + { + STBIR__FREE_AND_CLEAR( info->vertical.coefficients ); + STBIR__FREE_AND_CLEAR( info->vertical.contributors ); + } + STBIR__FREE_AND_CLEAR( info->horizontal.coefficients ); + STBIR__FREE_AND_CLEAR( info->horizontal.contributors ); + STBIR__FREE_AND_CLEAR( info->alloced_mem ); + STBIR_FREE( info, info->user_data ); + #endif + } + + #undef STBIR__FREE_AND_CLEAR +} + +static int stbir__get_max_split( int splits, int height ) +{ + int i; + int max = 0; + + for( i = 0 ; i < splits ; i++ ) + { + int each = height / ( splits - i ); + if ( each > max ) + max = each; + height -= each; + } + return max; +} + +static stbir__horizontal_gather_channels_func ** stbir__horizontal_gather_n_coeffs_funcs[8] = +{ + 0, stbir__horizontal_gather_1_channels_with_n_coeffs_funcs, stbir__horizontal_gather_2_channels_with_n_coeffs_funcs, stbir__horizontal_gather_3_channels_with_n_coeffs_funcs, stbir__horizontal_gather_4_channels_with_n_coeffs_funcs, 0,0, stbir__horizontal_gather_7_channels_with_n_coeffs_funcs +}; + +static stbir__horizontal_gather_channels_func ** stbir__horizontal_gather_channels_funcs[8] = +{ + 0, stbir__horizontal_gather_1_channels_funcs, stbir__horizontal_gather_2_channels_funcs, stbir__horizontal_gather_3_channels_funcs, stbir__horizontal_gather_4_channels_funcs, 0,0, stbir__horizontal_gather_7_channels_funcs +}; + +// there are six resize classifications: 0 == vertical scatter, 1 == vertical gather < 1x scale, 2 == vertical gather 1x-2x scale, 4 == vertical gather < 3x scale, 4 == vertical gather > 3x scale, 5 == <=4 pixel height, 6 == <=4 pixel wide column +#define STBIR_RESIZE_CLASSIFICATIONS 8 + +static float stbir__compute_weights[5][STBIR_RESIZE_CLASSIFICATIONS][4]= // 5 = 0=1chan, 1=2chan, 2=3chan, 3=4chan, 4=7chan +{ + { + { 1.00000f, 1.00000f, 0.31250f, 1.00000f }, + { 0.56250f, 0.59375f, 0.00000f, 0.96875f }, + { 1.00000f, 0.06250f, 0.00000f, 1.00000f }, + { 0.00000f, 0.09375f, 1.00000f, 1.00000f }, + { 1.00000f, 1.00000f, 1.00000f, 1.00000f }, + { 0.03125f, 0.12500f, 1.00000f, 1.00000f }, + { 0.06250f, 0.12500f, 0.00000f, 1.00000f }, + { 0.00000f, 1.00000f, 0.00000f, 0.03125f }, + }, { + { 0.00000f, 0.84375f, 0.00000f, 0.03125f }, + { 0.09375f, 0.93750f, 0.00000f, 0.78125f }, + { 0.87500f, 0.21875f, 0.00000f, 0.96875f }, + { 0.09375f, 0.09375f, 1.00000f, 1.00000f }, + { 1.00000f, 1.00000f, 1.00000f, 1.00000f }, + { 0.03125f, 0.12500f, 1.00000f, 1.00000f }, + { 0.06250f, 0.12500f, 0.00000f, 1.00000f }, + { 0.00000f, 1.00000f, 0.00000f, 0.53125f }, + }, { + { 0.00000f, 0.53125f, 0.00000f, 0.03125f }, + { 0.06250f, 0.96875f, 0.00000f, 0.53125f }, + { 0.87500f, 0.18750f, 0.00000f, 0.93750f }, + { 0.00000f, 0.09375f, 1.00000f, 1.00000f }, + { 1.00000f, 1.00000f, 1.00000f, 1.00000f }, + { 0.03125f, 0.12500f, 1.00000f, 1.00000f }, + { 0.06250f, 0.12500f, 0.00000f, 1.00000f }, + { 0.00000f, 1.00000f, 0.00000f, 0.56250f }, + }, { + { 0.00000f, 0.50000f, 0.00000f, 0.71875f }, + { 0.06250f, 0.84375f, 0.00000f, 0.87500f }, + { 1.00000f, 0.50000f, 0.50000f, 0.96875f }, + { 1.00000f, 0.09375f, 0.31250f, 0.50000f }, + { 1.00000f, 1.00000f, 1.00000f, 1.00000f }, + { 1.00000f, 0.03125f, 0.03125f, 0.53125f }, + { 0.18750f, 0.12500f, 0.00000f, 1.00000f }, + { 0.00000f, 1.00000f, 0.03125f, 0.18750f }, + }, { + { 0.00000f, 0.59375f, 0.00000f, 0.96875f }, + { 0.06250f, 0.81250f, 0.06250f, 0.59375f }, + { 0.75000f, 0.43750f, 0.12500f, 0.96875f }, + { 0.87500f, 0.06250f, 0.18750f, 0.43750f }, + { 1.00000f, 1.00000f, 1.00000f, 1.00000f }, + { 0.15625f, 0.12500f, 1.00000f, 1.00000f }, + { 0.06250f, 0.12500f, 0.00000f, 1.00000f }, + { 0.00000f, 1.00000f, 0.03125f, 0.34375f }, + } +}; + +// structure that allow us to query and override info for training the costs +typedef struct STBIR__V_FIRST_INFO +{ + double v_cost, h_cost; + int control_v_first; // 0 = no control, 1 = force hori, 2 = force vert + int v_first; + int v_resize_classification; + int is_gather; +} STBIR__V_FIRST_INFO; + +#ifdef STBIR__V_FIRST_INFO_BUFFER +static STBIR__V_FIRST_INFO STBIR__V_FIRST_INFO_BUFFER = {0}; +#define STBIR__V_FIRST_INFO_POINTER &STBIR__V_FIRST_INFO_BUFFER +#else +#define STBIR__V_FIRST_INFO_POINTER 0 +#endif + +// Figure out whether to scale along the horizontal or vertical first. +// This only *super* important when you are scaling by a massively +// different amount in the vertical vs the horizontal (for example, if +// you are scaling by 2x in the width, and 0.5x in the height, then you +// want to do the vertical scale first, because it's around 3x faster +// in that order. +// +// In more normal circumstances, this makes a 20-40% differences, so +// it's good to get right, but not critical. The normal way that you +// decide which direction goes first is just figuring out which +// direction does more multiplies. But with modern CPUs with their +// fancy caches and SIMD and high IPC abilities, so there's just a lot +// more that goes into it. +// +// My handwavy sort of solution is to have an app that does a whole +// bunch of timing for both vertical and horizontal first modes, +// and then another app that can read lots of these timing files +// and try to search for the best weights to use. Dotimings.c +// is the app that does a bunch of timings, and vf_train.c is the +// app that solves for the best weights (and shows how well it +// does currently). + +static int stbir__should_do_vertical_first( float weights_table[STBIR_RESIZE_CLASSIFICATIONS][4], int horizontal_filter_pixel_width, float horizontal_scale, int horizontal_output_size, int vertical_filter_pixel_width, float vertical_scale, int vertical_output_size, int is_gather, STBIR__V_FIRST_INFO * info ) +{ + double v_cost, h_cost; + float * weights; + int vertical_first; + int v_classification; + + // categorize the resize into buckets + if ( ( vertical_output_size <= 4 ) || ( horizontal_output_size <= 4 ) ) + v_classification = ( vertical_output_size < horizontal_output_size ) ? 6 : 7; + else if ( vertical_scale <= 1.0f ) + v_classification = ( is_gather ) ? 1 : 0; + else if ( vertical_scale <= 2.0f) + v_classification = 2; + else if ( vertical_scale <= 3.0f) + v_classification = 3; + else if ( vertical_scale <= 4.0f) + v_classification = 5; + else + v_classification = 6; + + // use the right weights + weights = weights_table[ v_classification ]; + + // this is the costs when you don't take into account modern CPUs with high ipc and simd and caches - wish we had a better estimate + h_cost = (float)horizontal_filter_pixel_width * weights[0] + horizontal_scale * (float)vertical_filter_pixel_width * weights[1]; + v_cost = (float)vertical_filter_pixel_width * weights[2] + vertical_scale * (float)horizontal_filter_pixel_width * weights[3]; + + // use computation estimate to decide vertical first or not + vertical_first = ( v_cost <= h_cost ) ? 1 : 0; + + // save these, if requested + if ( info ) + { + info->h_cost = h_cost; + info->v_cost = v_cost; + info->v_resize_classification = v_classification; + info->v_first = vertical_first; + info->is_gather = is_gather; + } + + // and this allows us to override everything for testing (see dotiming.c) + if ( ( info ) && ( info->control_v_first ) ) + vertical_first = ( info->control_v_first == 2 ) ? 1 : 0; + + return vertical_first; +} + +// layout lookups - must match stbir_internal_pixel_layout +static unsigned char stbir__pixel_channels[] = { + 1,2,3,3,4, // 1ch, 2ch, rgb, bgr, 4ch + 4,4,4,4,2,2, // RGBA,BGRA,ARGB,ABGR,RA,AR + 4,4,4,4,2,2, // RGBA_PM,BGRA_PM,ARGB_PM,ABGR_PM,RA_PM,AR_PM +}; + +// the internal pixel layout enums are in a different order, so we can easily do range comparisons of types +// the public pixel layout is ordered in a way that if you cast num_channels (1-4) to the enum, you get something sensible +static stbir_internal_pixel_layout stbir__pixel_layout_convert_public_to_internal[] = { + STBIRI_BGR, STBIRI_1CHANNEL, STBIRI_2CHANNEL, STBIRI_RGB, STBIRI_RGBA, + STBIRI_4CHANNEL, STBIRI_BGRA, STBIRI_ARGB, STBIRI_ABGR, STBIRI_RA, STBIRI_AR, + STBIRI_RGBA_PM, STBIRI_BGRA_PM, STBIRI_ARGB_PM, STBIRI_ABGR_PM, STBIRI_RA_PM, STBIRI_AR_PM, +}; + +static stbir__info * stbir__alloc_internal_mem_and_build_samplers( stbir__sampler * horizontal, stbir__sampler * vertical, stbir__contributors * conservative, stbir_pixel_layout input_pixel_layout_public, stbir_pixel_layout output_pixel_layout_public, int splits, int new_x, int new_y, int fast_alpha, void * user_data STBIR_ONLY_PROFILE_BUILD_GET_INFO ) +{ + static char stbir_channel_count_index[8]={ 9,0,1,2, 3,9,9,4 }; + + stbir__info * info = 0; + void * alloced = 0; + size_t alloced_total = 0; + int vertical_first; + size_t decode_buffer_size, ring_buffer_length_bytes, ring_buffer_size, vertical_buffer_size; + int alloc_ring_buffer_num_entries; + + int alpha_weighting_type = 0; // 0=none, 1=simple, 2=fancy + int conservative_split_output_size = stbir__get_max_split( splits, vertical->scale_info.output_sub_size ); + stbir_internal_pixel_layout input_pixel_layout = stbir__pixel_layout_convert_public_to_internal[ input_pixel_layout_public ]; + stbir_internal_pixel_layout output_pixel_layout = stbir__pixel_layout_convert_public_to_internal[ output_pixel_layout_public ]; + int channels = stbir__pixel_channels[ input_pixel_layout ]; + int effective_channels = channels; + + // first figure out what type of alpha weighting to use (if any) + if ( ( horizontal->filter_enum != STBIR_FILTER_POINT_SAMPLE ) || ( vertical->filter_enum != STBIR_FILTER_POINT_SAMPLE ) ) // no alpha weighting on point sampling + { + if ( ( input_pixel_layout >= STBIRI_RGBA ) && ( input_pixel_layout <= STBIRI_AR ) && ( output_pixel_layout >= STBIRI_RGBA ) && ( output_pixel_layout <= STBIRI_AR ) ) + { + if ( fast_alpha ) + { + alpha_weighting_type = 4; + } + else + { + static int fancy_alpha_effective_cnts[6] = { 7, 7, 7, 7, 3, 3 }; + alpha_weighting_type = 2; + effective_channels = fancy_alpha_effective_cnts[ input_pixel_layout - STBIRI_RGBA ]; + } + } + else if ( ( input_pixel_layout >= STBIRI_RGBA_PM ) && ( input_pixel_layout <= STBIRI_AR_PM ) && ( output_pixel_layout >= STBIRI_RGBA ) && ( output_pixel_layout <= STBIRI_AR ) ) + { + // input premult, output non-premult + alpha_weighting_type = 3; + } + else if ( ( input_pixel_layout >= STBIRI_RGBA ) && ( input_pixel_layout <= STBIRI_AR ) && ( output_pixel_layout >= STBIRI_RGBA_PM ) && ( output_pixel_layout <= STBIRI_AR_PM ) ) + { + // input non-premult, output premult + alpha_weighting_type = 1; + } + } + + // channel in and out count must match currently + if ( channels != stbir__pixel_channels[ output_pixel_layout ] ) + return 0; + + // get vertical first + vertical_first = stbir__should_do_vertical_first( stbir__compute_weights[ (int)stbir_channel_count_index[ effective_channels ] ], horizontal->filter_pixel_width, horizontal->scale_info.scale, horizontal->scale_info.output_sub_size, vertical->filter_pixel_width, vertical->scale_info.scale, vertical->scale_info.output_sub_size, vertical->is_gather, STBIR__V_FIRST_INFO_POINTER ); + + // sometimes read one float off in some of the unrolled loops (with a weight of zero coeff, so it doesn't have an effect) + // we use a few extra floats instead of just 1, so that input callback buffer can overlap with the decode buffer without + // the conversion routines overwriting the callback input data. + decode_buffer_size = ( conservative->n1 - conservative->n0 + 1 ) * effective_channels * sizeof(float) + sizeof(float)*STBIR_INPUT_CALLBACK_PADDING; // extra floats for input callback stagger + +#if defined( STBIR__SEPARATE_ALLOCATIONS ) && defined(STBIR_SIMD8) + if ( effective_channels == 3 ) + decode_buffer_size += sizeof(float); // avx in 3 channel mode needs one float at the start of the buffer (only with separate allocations) +#endif + + ring_buffer_length_bytes = (size_t)horizontal->scale_info.output_sub_size * (size_t)effective_channels * sizeof(float) + sizeof(float)*STBIR_INPUT_CALLBACK_PADDING; // extra floats for padding + + // if we do vertical first, the ring buffer holds a whole decoded line + if ( vertical_first ) + ring_buffer_length_bytes = ( decode_buffer_size + 15 ) & ~15; + + if ( ( ring_buffer_length_bytes & 4095 ) == 0 ) ring_buffer_length_bytes += 64*3; // avoid 4k alias + + // One extra entry because floating point precision problems sometimes cause an extra to be necessary. + alloc_ring_buffer_num_entries = vertical->filter_pixel_width + 1; + + // we never need more ring buffer entries than the scanlines we're outputting when in scatter mode + if ( ( !vertical->is_gather ) && ( alloc_ring_buffer_num_entries > conservative_split_output_size ) ) + alloc_ring_buffer_num_entries = conservative_split_output_size; + + ring_buffer_size = (size_t)alloc_ring_buffer_num_entries * (size_t)ring_buffer_length_bytes; + + // The vertical buffer is used differently, depending on whether we are scattering + // the vertical scanlines, or gathering them. + // If scattering, it's used at the temp buffer to accumulate each output. + // If gathering, it's just the output buffer. + vertical_buffer_size = (size_t)horizontal->scale_info.output_sub_size * (size_t)effective_channels * sizeof(float) + sizeof(float); // extra float for padding + + // we make two passes through this loop, 1st to add everything up, 2nd to allocate and init + for(;;) + { + int i; + void * advance_mem = alloced; + int copy_horizontal = 0; + stbir__sampler * possibly_use_horizontal_for_pivot = 0; + +#ifdef STBIR__SEPARATE_ALLOCATIONS + #define STBIR__NEXT_PTR( ptr, size, ntype ) if ( alloced ) { void * p = STBIR_MALLOC( size, user_data); if ( p == 0 ) { stbir__free_internal_mem( info ); return 0; } (ptr) = (ntype*)p; } +#else + #define STBIR__NEXT_PTR( ptr, size, ntype ) advance_mem = (void*) ( ( ((size_t)advance_mem) + 15 ) & ~15 ); if ( alloced ) ptr = (ntype*)advance_mem; advance_mem = (char*)(((size_t)advance_mem) + (size)); +#endif + + STBIR__NEXT_PTR( info, sizeof( stbir__info ), stbir__info ); + + STBIR__NEXT_PTR( info->split_info, sizeof( stbir__per_split_info ) * splits, stbir__per_split_info ); + + if ( info ) + { + static stbir__alpha_weight_func * fancy_alpha_weights[6] = { stbir__fancy_alpha_weight_4ch, stbir__fancy_alpha_weight_4ch, stbir__fancy_alpha_weight_4ch, stbir__fancy_alpha_weight_4ch, stbir__fancy_alpha_weight_2ch, stbir__fancy_alpha_weight_2ch }; + static stbir__alpha_unweight_func * fancy_alpha_unweights[6] = { stbir__fancy_alpha_unweight_4ch, stbir__fancy_alpha_unweight_4ch, stbir__fancy_alpha_unweight_4ch, stbir__fancy_alpha_unweight_4ch, stbir__fancy_alpha_unweight_2ch, stbir__fancy_alpha_unweight_2ch }; + static stbir__alpha_weight_func * simple_alpha_weights[6] = { stbir__simple_alpha_weight_4ch, stbir__simple_alpha_weight_4ch, stbir__simple_alpha_weight_4ch, stbir__simple_alpha_weight_4ch, stbir__simple_alpha_weight_2ch, stbir__simple_alpha_weight_2ch }; + static stbir__alpha_unweight_func * simple_alpha_unweights[6] = { stbir__simple_alpha_unweight_4ch, stbir__simple_alpha_unweight_4ch, stbir__simple_alpha_unweight_4ch, stbir__simple_alpha_unweight_4ch, stbir__simple_alpha_unweight_2ch, stbir__simple_alpha_unweight_2ch }; + + // initialize info fields + info->alloced_mem = alloced; + info->alloced_total = alloced_total; + + info->channels = channels; + info->effective_channels = effective_channels; + + info->offset_x = new_x; + info->offset_y = new_y; + info->alloc_ring_buffer_num_entries = (int)alloc_ring_buffer_num_entries; + info->ring_buffer_num_entries = 0; + info->ring_buffer_length_bytes = (int)ring_buffer_length_bytes; + info->splits = splits; + info->vertical_first = vertical_first; + + info->input_pixel_layout_internal = input_pixel_layout; + info->output_pixel_layout_internal = output_pixel_layout; + + // setup alpha weight functions + info->alpha_weight = 0; + info->alpha_unweight = 0; + + // handle alpha weighting functions and overrides + if ( alpha_weighting_type == 2 ) + { + // high quality alpha multiplying on the way in, dividing on the way out + info->alpha_weight = fancy_alpha_weights[ input_pixel_layout - STBIRI_RGBA ]; + info->alpha_unweight = fancy_alpha_unweights[ output_pixel_layout - STBIRI_RGBA ]; + } + else if ( alpha_weighting_type == 4 ) + { + // fast alpha multiplying on the way in, dividing on the way out + info->alpha_weight = simple_alpha_weights[ input_pixel_layout - STBIRI_RGBA ]; + info->alpha_unweight = simple_alpha_unweights[ output_pixel_layout - STBIRI_RGBA ]; + } + else if ( alpha_weighting_type == 1 ) + { + // fast alpha on the way in, leave in premultiplied form on way out + info->alpha_weight = simple_alpha_weights[ input_pixel_layout - STBIRI_RGBA ]; + } + else if ( alpha_weighting_type == 3 ) + { + // incoming is premultiplied, fast alpha dividing on the way out - non-premultiplied output + info->alpha_unweight = simple_alpha_unweights[ output_pixel_layout - STBIRI_RGBA ]; + } + + // handle 3-chan color flipping, using the alpha weight path + if ( ( ( input_pixel_layout == STBIRI_RGB ) && ( output_pixel_layout == STBIRI_BGR ) ) || + ( ( input_pixel_layout == STBIRI_BGR ) && ( output_pixel_layout == STBIRI_RGB ) ) ) + { + // do the flipping on the smaller of the two ends + if ( horizontal->scale_info.scale < 1.0f ) + info->alpha_unweight = stbir__simple_flip_3ch; + else + info->alpha_weight = stbir__simple_flip_3ch; + } + + } + + // get all the per-split buffers + for( i = 0 ; i < splits ; i++ ) + { + STBIR__NEXT_PTR( info->split_info[i].decode_buffer, decode_buffer_size, float ); + +#ifdef STBIR__SEPARATE_ALLOCATIONS + + #ifdef STBIR_SIMD8 + if ( ( info ) && ( effective_channels == 3 ) ) + ++info->split_info[i].decode_buffer; // avx in 3 channel mode needs one float at the start of the buffer + #endif + + STBIR__NEXT_PTR( info->split_info[i].ring_buffers, alloc_ring_buffer_num_entries * sizeof(float*), float* ); + { + int j; + for( j = 0 ; j < alloc_ring_buffer_num_entries ; j++ ) + { + STBIR__NEXT_PTR( info->split_info[i].ring_buffers[j], ring_buffer_length_bytes, float ); + #ifdef STBIR_SIMD8 + if ( ( info ) && ( effective_channels == 3 ) ) + ++info->split_info[i].ring_buffers[j]; // avx in 3 channel mode needs one float at the start of the buffer + #endif + } + } +#else + STBIR__NEXT_PTR( info->split_info[i].ring_buffer, ring_buffer_size, float ); +#endif + STBIR__NEXT_PTR( info->split_info[i].vertical_buffer, vertical_buffer_size, float ); + } + + // alloc memory for to-be-pivoted coeffs (if necessary) + if ( vertical->is_gather == 0 ) + { + size_t both; + size_t temp_mem_amt; + + // when in vertical scatter mode, we first build the coefficients in gather mode, and then pivot after, + // that means we need two buffers, so we try to use the decode buffer and ring buffer for this. if that + // is too small, we just allocate extra memory to use as this temp. + + both = (size_t)vertical->gather_prescatter_contributors_size + (size_t)vertical->gather_prescatter_coefficients_size; + +#ifdef STBIR__SEPARATE_ALLOCATIONS + temp_mem_amt = decode_buffer_size; + + #ifdef STBIR_SIMD8 + if ( effective_channels == 3 ) + --temp_mem_amt; // avx in 3 channel mode needs one float at the start of the buffer + #endif +#else + temp_mem_amt = (size_t)( decode_buffer_size + ring_buffer_size + vertical_buffer_size ) * (size_t)splits; +#endif + if ( temp_mem_amt >= both ) + { + if ( info ) + { + vertical->gather_prescatter_contributors = (stbir__contributors*)info->split_info[0].decode_buffer; + vertical->gather_prescatter_coefficients = (float*) ( ( (char*)info->split_info[0].decode_buffer ) + vertical->gather_prescatter_contributors_size ); + } + } + else + { + // ring+decode memory is too small, so allocate temp memory + STBIR__NEXT_PTR( vertical->gather_prescatter_contributors, vertical->gather_prescatter_contributors_size, stbir__contributors ); + STBIR__NEXT_PTR( vertical->gather_prescatter_coefficients, vertical->gather_prescatter_coefficients_size, float ); + } + } + + STBIR__NEXT_PTR( horizontal->contributors, horizontal->contributors_size, stbir__contributors ); + STBIR__NEXT_PTR( horizontal->coefficients, horizontal->coefficients_size, float ); + + // are the two filters identical?? (happens a lot with mipmap generation) + if ( ( horizontal->filter_kernel == vertical->filter_kernel ) && ( horizontal->filter_support == vertical->filter_support ) && ( horizontal->edge == vertical->edge ) && ( horizontal->scale_info.output_sub_size == vertical->scale_info.output_sub_size ) ) + { + float diff_scale = horizontal->scale_info.scale - vertical->scale_info.scale; + float diff_shift = horizontal->scale_info.pixel_shift - vertical->scale_info.pixel_shift; + if ( diff_scale < 0.0f ) diff_scale = -diff_scale; + if ( diff_shift < 0.0f ) diff_shift = -diff_shift; + if ( ( diff_scale <= stbir__small_float ) && ( diff_shift <= stbir__small_float ) ) + { + if ( horizontal->is_gather == vertical->is_gather ) + { + copy_horizontal = 1; + goto no_vert_alloc; + } + // everything matches, but vertical is scatter, horizontal is gather, use horizontal coeffs for vertical pivot coeffs + possibly_use_horizontal_for_pivot = horizontal; + } + } + + STBIR__NEXT_PTR( vertical->contributors, vertical->contributors_size, stbir__contributors ); + STBIR__NEXT_PTR( vertical->coefficients, vertical->coefficients_size, float ); + + no_vert_alloc: + + if ( info ) + { + STBIR_PROFILE_BUILD_START( horizontal ); + + stbir__calculate_filters( horizontal, 0, user_data STBIR_ONLY_PROFILE_BUILD_SET_INFO ); + + // setup the horizontal gather functions + // start with defaulting to the n_coeffs functions (specialized on channels and remnant leftover) + info->horizontal_gather_channels = stbir__horizontal_gather_n_coeffs_funcs[ effective_channels ][ horizontal->extent_info.widest & 3 ]; + // but if the number of coeffs <= 12, use another set of special cases. <=12 coeffs is any enlarging resize, or shrinking resize down to about 1/3 size + if ( horizontal->extent_info.widest <= 12 ) + info->horizontal_gather_channels = stbir__horizontal_gather_channels_funcs[ effective_channels ][ horizontal->extent_info.widest - 1 ]; + + info->scanline_extents.conservative.n0 = conservative->n0; + info->scanline_extents.conservative.n1 = conservative->n1; + + // get exact extents + stbir__get_extents( horizontal, &info->scanline_extents ); + + // pack the horizontal coeffs + horizontal->coefficient_width = stbir__pack_coefficients(horizontal->num_contributors, horizontal->contributors, horizontal->coefficients, horizontal->coefficient_width, horizontal->extent_info.widest, info->scanline_extents.conservative.n0, info->scanline_extents.conservative.n1 ); + + STBIR_MEMCPY( &info->horizontal, horizontal, sizeof( stbir__sampler ) ); + + STBIR_PROFILE_BUILD_END( horizontal ); + + if ( copy_horizontal ) + { + STBIR_MEMCPY( &info->vertical, horizontal, sizeof( stbir__sampler ) ); + } + else + { + STBIR_PROFILE_BUILD_START( vertical ); + + stbir__calculate_filters( vertical, possibly_use_horizontal_for_pivot, user_data STBIR_ONLY_PROFILE_BUILD_SET_INFO ); + STBIR_MEMCPY( &info->vertical, vertical, sizeof( stbir__sampler ) ); + + STBIR_PROFILE_BUILD_END( vertical ); + } + + // setup the vertical split ranges + stbir__get_split_info( info->split_info, info->splits, info->vertical.scale_info.output_sub_size, info->vertical.filter_pixel_margin, info->vertical.scale_info.input_full_size ); + + // now we know precisely how many entries we need + info->ring_buffer_num_entries = info->vertical.extent_info.widest; + + // we never need more ring buffer entries than the scanlines we're outputting + if ( ( !info->vertical.is_gather ) && ( info->ring_buffer_num_entries > conservative_split_output_size ) ) + info->ring_buffer_num_entries = conservative_split_output_size; + STBIR_ASSERT( info->ring_buffer_num_entries <= info->alloc_ring_buffer_num_entries ); + } + #undef STBIR__NEXT_PTR + + + // is this the first time through loop? + if ( info == 0 ) + { + alloced_total = ( 15 + (size_t)advance_mem ); + alloced = STBIR_MALLOC( alloced_total, user_data ); + if ( alloced == 0 ) + return 0; + } + else + return info; // success + } +} + +static int stbir__perform_resize( stbir__info const * info, int split_start, int split_count ) +{ + stbir__per_split_info * split_info = info->split_info + split_start; + + STBIR_PROFILE_CLEAR_EXTRAS(); + + STBIR_PROFILE_FIRST_START( looping ); + if (info->vertical.is_gather) + stbir__vertical_gather_loop( info, split_info, split_count ); + else + stbir__vertical_scatter_loop( info, split_info, split_count ); + STBIR_PROFILE_END( looping ); + + return 1; +} + +static void stbir__update_info_from_resize( stbir__info * info, STBIR_RESIZE * resize ) +{ + static stbir__decode_pixels_func * decode_simple[STBIR_TYPE_HALF_FLOAT-STBIR_TYPE_UINT8_SRGB+1]= + { + /* 1ch-4ch */ stbir__decode_uint8_srgb, stbir__decode_uint8_srgb, 0, stbir__decode_float_linear, stbir__decode_half_float_linear, + }; + + static stbir__decode_pixels_func * decode_alphas[STBIRI_AR-STBIRI_RGBA+1][STBIR_TYPE_HALF_FLOAT-STBIR_TYPE_UINT8_SRGB+1]= + { + { /* RGBA */ stbir__decode_uint8_srgb4_linearalpha, stbir__decode_uint8_srgb, 0, stbir__decode_float_linear, stbir__decode_half_float_linear }, + { /* BGRA */ stbir__decode_uint8_srgb4_linearalpha_BGRA, stbir__decode_uint8_srgb_BGRA, 0, stbir__decode_float_linear_BGRA, stbir__decode_half_float_linear_BGRA }, + { /* ARGB */ stbir__decode_uint8_srgb4_linearalpha_ARGB, stbir__decode_uint8_srgb_ARGB, 0, stbir__decode_float_linear_ARGB, stbir__decode_half_float_linear_ARGB }, + { /* ABGR */ stbir__decode_uint8_srgb4_linearalpha_ABGR, stbir__decode_uint8_srgb_ABGR, 0, stbir__decode_float_linear_ABGR, stbir__decode_half_float_linear_ABGR }, + { /* RA */ stbir__decode_uint8_srgb2_linearalpha, stbir__decode_uint8_srgb, 0, stbir__decode_float_linear, stbir__decode_half_float_linear }, + { /* AR */ stbir__decode_uint8_srgb2_linearalpha_AR, stbir__decode_uint8_srgb_AR, 0, stbir__decode_float_linear_AR, stbir__decode_half_float_linear_AR }, + }; + + static stbir__decode_pixels_func * decode_simple_scaled_or_not[2][2]= + { + { stbir__decode_uint8_linear_scaled, stbir__decode_uint8_linear }, { stbir__decode_uint16_linear_scaled, stbir__decode_uint16_linear }, + }; + + static stbir__decode_pixels_func * decode_alphas_scaled_or_not[STBIRI_AR-STBIRI_RGBA+1][2][2]= + { + { /* RGBA */ { stbir__decode_uint8_linear_scaled, stbir__decode_uint8_linear }, { stbir__decode_uint16_linear_scaled, stbir__decode_uint16_linear } }, + { /* BGRA */ { stbir__decode_uint8_linear_scaled_BGRA, stbir__decode_uint8_linear_BGRA }, { stbir__decode_uint16_linear_scaled_BGRA, stbir__decode_uint16_linear_BGRA } }, + { /* ARGB */ { stbir__decode_uint8_linear_scaled_ARGB, stbir__decode_uint8_linear_ARGB }, { stbir__decode_uint16_linear_scaled_ARGB, stbir__decode_uint16_linear_ARGB } }, + { /* ABGR */ { stbir__decode_uint8_linear_scaled_ABGR, stbir__decode_uint8_linear_ABGR }, { stbir__decode_uint16_linear_scaled_ABGR, stbir__decode_uint16_linear_ABGR } }, + { /* RA */ { stbir__decode_uint8_linear_scaled, stbir__decode_uint8_linear }, { stbir__decode_uint16_linear_scaled, stbir__decode_uint16_linear } }, + { /* AR */ { stbir__decode_uint8_linear_scaled_AR, stbir__decode_uint8_linear_AR }, { stbir__decode_uint16_linear_scaled_AR, stbir__decode_uint16_linear_AR } } + }; + + static stbir__encode_pixels_func * encode_simple[STBIR_TYPE_HALF_FLOAT-STBIR_TYPE_UINT8_SRGB+1]= + { + /* 1ch-4ch */ stbir__encode_uint8_srgb, stbir__encode_uint8_srgb, 0, stbir__encode_float_linear, stbir__encode_half_float_linear, + }; + + static stbir__encode_pixels_func * encode_alphas[STBIRI_AR-STBIRI_RGBA+1][STBIR_TYPE_HALF_FLOAT-STBIR_TYPE_UINT8_SRGB+1]= + { + { /* RGBA */ stbir__encode_uint8_srgb4_linearalpha, stbir__encode_uint8_srgb, 0, stbir__encode_float_linear, stbir__encode_half_float_linear }, + { /* BGRA */ stbir__encode_uint8_srgb4_linearalpha_BGRA, stbir__encode_uint8_srgb_BGRA, 0, stbir__encode_float_linear_BGRA, stbir__encode_half_float_linear_BGRA }, + { /* ARGB */ stbir__encode_uint8_srgb4_linearalpha_ARGB, stbir__encode_uint8_srgb_ARGB, 0, stbir__encode_float_linear_ARGB, stbir__encode_half_float_linear_ARGB }, + { /* ABGR */ stbir__encode_uint8_srgb4_linearalpha_ABGR, stbir__encode_uint8_srgb_ABGR, 0, stbir__encode_float_linear_ABGR, stbir__encode_half_float_linear_ABGR }, + { /* RA */ stbir__encode_uint8_srgb2_linearalpha, stbir__encode_uint8_srgb, 0, stbir__encode_float_linear, stbir__encode_half_float_linear }, + { /* AR */ stbir__encode_uint8_srgb2_linearalpha_AR, stbir__encode_uint8_srgb_AR, 0, stbir__encode_float_linear_AR, stbir__encode_half_float_linear_AR } + }; + + static stbir__encode_pixels_func * encode_simple_scaled_or_not[2][2]= + { + { stbir__encode_uint8_linear_scaled, stbir__encode_uint8_linear }, { stbir__encode_uint16_linear_scaled, stbir__encode_uint16_linear }, + }; + + static stbir__encode_pixels_func * encode_alphas_scaled_or_not[STBIRI_AR-STBIRI_RGBA+1][2][2]= + { + { /* RGBA */ { stbir__encode_uint8_linear_scaled, stbir__encode_uint8_linear }, { stbir__encode_uint16_linear_scaled, stbir__encode_uint16_linear } }, + { /* BGRA */ { stbir__encode_uint8_linear_scaled_BGRA, stbir__encode_uint8_linear_BGRA }, { stbir__encode_uint16_linear_scaled_BGRA, stbir__encode_uint16_linear_BGRA } }, + { /* ARGB */ { stbir__encode_uint8_linear_scaled_ARGB, stbir__encode_uint8_linear_ARGB }, { stbir__encode_uint16_linear_scaled_ARGB, stbir__encode_uint16_linear_ARGB } }, + { /* ABGR */ { stbir__encode_uint8_linear_scaled_ABGR, stbir__encode_uint8_linear_ABGR }, { stbir__encode_uint16_linear_scaled_ABGR, stbir__encode_uint16_linear_ABGR } }, + { /* RA */ { stbir__encode_uint8_linear_scaled, stbir__encode_uint8_linear }, { stbir__encode_uint16_linear_scaled, stbir__encode_uint16_linear } }, + { /* AR */ { stbir__encode_uint8_linear_scaled_AR, stbir__encode_uint8_linear_AR }, { stbir__encode_uint16_linear_scaled_AR, stbir__encode_uint16_linear_AR } } + }; + + stbir__decode_pixels_func * decode_pixels = 0; + stbir__encode_pixels_func * encode_pixels = 0; + stbir_datatype input_type, output_type; + + input_type = resize->input_data_type; + output_type = resize->output_data_type; + info->input_data = resize->input_pixels; + info->input_stride_bytes = resize->input_stride_in_bytes; + info->output_stride_bytes = resize->output_stride_in_bytes; + + // if we're completely point sampling, then we can turn off SRGB + if ( ( info->horizontal.filter_enum == STBIR_FILTER_POINT_SAMPLE ) && ( info->vertical.filter_enum == STBIR_FILTER_POINT_SAMPLE ) ) + { + if ( ( ( input_type == STBIR_TYPE_UINT8_SRGB ) || ( input_type == STBIR_TYPE_UINT8_SRGB_ALPHA ) ) && + ( ( output_type == STBIR_TYPE_UINT8_SRGB ) || ( output_type == STBIR_TYPE_UINT8_SRGB_ALPHA ) ) ) + { + input_type = STBIR_TYPE_UINT8; + output_type = STBIR_TYPE_UINT8; + } + } + + // recalc the output and input strides + if ( info->input_stride_bytes == 0 ) + info->input_stride_bytes = info->channels * info->horizontal.scale_info.input_full_size * stbir__type_size[input_type]; + + if ( info->output_stride_bytes == 0 ) + info->output_stride_bytes = info->channels * info->horizontal.scale_info.output_sub_size * stbir__type_size[output_type]; + + // calc offset + info->output_data = ( (char*) resize->output_pixels ) + ( (size_t) info->offset_y * (size_t) resize->output_stride_in_bytes ) + ( info->offset_x * info->channels * stbir__type_size[output_type] ); + + info->in_pixels_cb = resize->input_cb; + info->user_data = resize->user_data; + info->out_pixels_cb = resize->output_cb; + + // setup the input format converters + if ( ( input_type == STBIR_TYPE_UINT8 ) || ( input_type == STBIR_TYPE_UINT16 ) ) + { + int non_scaled = 0; + + // check if we can run unscaled - 0-255.0/0-65535.0 instead of 0-1.0 (which is a tiny bit faster when doing linear 8->8 or 16->16) + if ( ( !info->alpha_weight ) && ( !info->alpha_unweight ) ) // don't short circuit when alpha weighting (get everything to 0-1.0 as usual) + if ( ( ( input_type == STBIR_TYPE_UINT8 ) && ( output_type == STBIR_TYPE_UINT8 ) ) || ( ( input_type == STBIR_TYPE_UINT16 ) && ( output_type == STBIR_TYPE_UINT16 ) ) ) + non_scaled = 1; + + if ( info->input_pixel_layout_internal <= STBIRI_4CHANNEL ) + decode_pixels = decode_simple_scaled_or_not[ input_type == STBIR_TYPE_UINT16 ][ non_scaled ]; + else + decode_pixels = decode_alphas_scaled_or_not[ ( info->input_pixel_layout_internal - STBIRI_RGBA ) % ( STBIRI_AR-STBIRI_RGBA+1 ) ][ input_type == STBIR_TYPE_UINT16 ][ non_scaled ]; + } + else + { + if ( info->input_pixel_layout_internal <= STBIRI_4CHANNEL ) + decode_pixels = decode_simple[ input_type - STBIR_TYPE_UINT8_SRGB ]; + else + decode_pixels = decode_alphas[ ( info->input_pixel_layout_internal - STBIRI_RGBA ) % ( STBIRI_AR-STBIRI_RGBA+1 ) ][ input_type - STBIR_TYPE_UINT8_SRGB ]; + } + + // setup the output format converters + if ( ( output_type == STBIR_TYPE_UINT8 ) || ( output_type == STBIR_TYPE_UINT16 ) ) + { + int non_scaled = 0; + + // check if we can run unscaled - 0-255.0/0-65535.0 instead of 0-1.0 (which is a tiny bit faster when doing linear 8->8 or 16->16) + if ( ( !info->alpha_weight ) && ( !info->alpha_unweight ) ) // don't short circuit when alpha weighting (get everything to 0-1.0 as usual) + if ( ( ( input_type == STBIR_TYPE_UINT8 ) && ( output_type == STBIR_TYPE_UINT8 ) ) || ( ( input_type == STBIR_TYPE_UINT16 ) && ( output_type == STBIR_TYPE_UINT16 ) ) ) + non_scaled = 1; + + if ( info->output_pixel_layout_internal <= STBIRI_4CHANNEL ) + encode_pixels = encode_simple_scaled_or_not[ output_type == STBIR_TYPE_UINT16 ][ non_scaled ]; + else + encode_pixels = encode_alphas_scaled_or_not[ ( info->output_pixel_layout_internal - STBIRI_RGBA ) % ( STBIRI_AR-STBIRI_RGBA+1 ) ][ output_type == STBIR_TYPE_UINT16 ][ non_scaled ]; + } + else + { + if ( info->output_pixel_layout_internal <= STBIRI_4CHANNEL ) + encode_pixels = encode_simple[ output_type - STBIR_TYPE_UINT8_SRGB ]; + else + encode_pixels = encode_alphas[ ( info->output_pixel_layout_internal - STBIRI_RGBA ) % ( STBIRI_AR-STBIRI_RGBA+1 ) ][ output_type - STBIR_TYPE_UINT8_SRGB ]; + } + + info->input_type = input_type; + info->output_type = output_type; + info->decode_pixels = decode_pixels; + info->encode_pixels = encode_pixels; +} + +static void stbir__clip( int * outx, int * outsubw, int outw, double * u0, double * u1 ) +{ + double per, adj; + int over; + + // do left/top edge + if ( *outx < 0 ) + { + per = ( (double)*outx ) / ( (double)*outsubw ); // is negative + adj = per * ( *u1 - *u0 ); + *u0 -= adj; // increases u0 + *outx = 0; + } + + // do right/bot edge + over = outw - ( *outx + *outsubw ); + if ( over < 0 ) + { + per = ( (double)over ) / ( (double)*outsubw ); // is negative + adj = per * ( *u1 - *u0 ); + *u1 += adj; // decrease u1 + *outsubw = outw - *outx; + } +} + +// converts a double to a rational that has less than one float bit of error (returns 0 if unable to do so) +static int stbir__double_to_rational(double f, stbir_uint32 limit, stbir_uint32 *numer, stbir_uint32 *denom, int limit_denom ) // limit_denom (1) or limit numer (0) +{ + double err; + stbir_uint64 top, bot; + stbir_uint64 numer_last = 0; + stbir_uint64 denom_last = 1; + stbir_uint64 numer_estimate = 1; + stbir_uint64 denom_estimate = 0; + + // scale to past float error range + top = (stbir_uint64)( f * (double)(1 << 25) ); + bot = 1 << 25; + + // keep refining, but usually stops in a few loops - usually 5 for bad cases + for(;;) + { + stbir_uint64 est, temp; + + // hit limit, break out and do best full range estimate + if ( ( ( limit_denom ) ? denom_estimate : numer_estimate ) >= limit ) + break; + + // is the current error less than 1 bit of a float? if so, we're done + if ( denom_estimate ) + { + err = ( (double)numer_estimate / (double)denom_estimate ) - f; + if ( err < 0.0 ) err = -err; + if ( err < ( 1.0 / (double)(1<<24) ) ) + { + // yup, found it + *numer = (stbir_uint32) numer_estimate; + *denom = (stbir_uint32) denom_estimate; + return 1; + } + } + + // no more refinement bits left? break out and do full range estimate + if ( bot == 0 ) + break; + + // gcd the estimate bits + est = top / bot; + temp = top % bot; + top = bot; + bot = temp; + + // move remainders + temp = est * denom_estimate + denom_last; + denom_last = denom_estimate; + denom_estimate = temp; + + // move remainders + temp = est * numer_estimate + numer_last; + numer_last = numer_estimate; + numer_estimate = temp; + } + + // we didn't fine anything good enough for float, use a full range estimate + if ( limit_denom ) + { + numer_estimate= (stbir_uint64)( f * (double)limit + 0.5 ); + denom_estimate = limit; + } + else + { + numer_estimate = limit; + denom_estimate = (stbir_uint64)( ( (double)limit / f ) + 0.5 ); + } + + *numer = (stbir_uint32) numer_estimate; + *denom = (stbir_uint32) denom_estimate; + + err = ( denom_estimate ) ? ( ( (double)(stbir_uint32)numer_estimate / (double)(stbir_uint32)denom_estimate ) - f ) : 1.0; + if ( err < 0.0 ) err = -err; + return ( err < ( 1.0 / (double)(1<<24) ) ) ? 1 : 0; +} + +static int stbir__calculate_region_transform( stbir__scale_info * scale_info, int output_full_range, int * output_offset, int output_sub_range, int input_full_range, double input_s0, double input_s1 ) +{ + double output_range, input_range, output_s, input_s, ratio, scale; + + input_s = input_s1 - input_s0; + + // null area + if ( ( output_full_range == 0 ) || ( input_full_range == 0 ) || + ( output_sub_range == 0 ) || ( input_s <= stbir__small_float ) ) + return 0; + + // are either of the ranges completely out of bounds? + if ( ( *output_offset >= output_full_range ) || ( ( *output_offset + output_sub_range ) <= 0 ) || ( input_s0 >= (1.0f-stbir__small_float) ) || ( input_s1 <= stbir__small_float ) ) + return 0; + + output_range = (double)output_full_range; + input_range = (double)input_full_range; + + output_s = ( (double)output_sub_range) / output_range; + + // figure out the scaling to use + ratio = output_s / input_s; + + // save scale before clipping + scale = ( output_range / input_range ) * ratio; + scale_info->scale = (float)scale; + scale_info->inv_scale = (float)( 1.0 / scale ); + + // clip output area to left/right output edges (and adjust input area) + stbir__clip( output_offset, &output_sub_range, output_full_range, &input_s0, &input_s1 ); + + // recalc input area + input_s = input_s1 - input_s0; + + // after clipping do we have zero input area? + if ( input_s <= stbir__small_float ) + return 0; + + // calculate and store the starting source offsets in output pixel space + scale_info->pixel_shift = (float) ( input_s0 * ratio * output_range ); + + scale_info->scale_is_rational = stbir__double_to_rational( scale, ( scale <= 1.0 ) ? output_full_range : input_full_range, &scale_info->scale_numerator, &scale_info->scale_denominator, ( scale >= 1.0 ) ); + + scale_info->input_full_size = input_full_range; + scale_info->output_sub_size = output_sub_range; + + return 1; +} + + +static void stbir__init_and_set_layout( STBIR_RESIZE * resize, stbir_pixel_layout pixel_layout, stbir_datatype data_type ) +{ + resize->input_cb = 0; + resize->output_cb = 0; + resize->user_data = resize; + resize->samplers = 0; + resize->called_alloc = 0; + resize->horizontal_filter = STBIR_FILTER_DEFAULT; + resize->horizontal_filter_kernel = 0; resize->horizontal_filter_support = 0; + resize->vertical_filter = STBIR_FILTER_DEFAULT; + resize->vertical_filter_kernel = 0; resize->vertical_filter_support = 0; + resize->horizontal_edge = STBIR_EDGE_CLAMP; + resize->vertical_edge = STBIR_EDGE_CLAMP; + resize->input_s0 = 0; resize->input_t0 = 0; resize->input_s1 = 1; resize->input_t1 = 1; + resize->output_subx = 0; resize->output_suby = 0; resize->output_subw = resize->output_w; resize->output_subh = resize->output_h; + resize->input_data_type = data_type; + resize->output_data_type = data_type; + resize->input_pixel_layout_public = pixel_layout; + resize->output_pixel_layout_public = pixel_layout; + resize->needs_rebuild = 1; +} + +STBIRDEF void stbir_resize_init( STBIR_RESIZE * resize, + const void *input_pixels, int input_w, int input_h, int input_stride_in_bytes, // stride can be zero + void *output_pixels, int output_w, int output_h, int output_stride_in_bytes, // stride can be zero + stbir_pixel_layout pixel_layout, stbir_datatype data_type ) +{ + resize->input_pixels = input_pixels; + resize->input_w = input_w; + resize->input_h = input_h; + resize->input_stride_in_bytes = input_stride_in_bytes; + resize->output_pixels = output_pixels; + resize->output_w = output_w; + resize->output_h = output_h; + resize->output_stride_in_bytes = output_stride_in_bytes; + resize->fast_alpha = 0; + + stbir__init_and_set_layout( resize, pixel_layout, data_type ); +} + +// You can update parameters any time after resize_init +STBIRDEF void stbir_set_datatypes( STBIR_RESIZE * resize, stbir_datatype input_type, stbir_datatype output_type ) // by default, datatype from resize_init +{ + resize->input_data_type = input_type; + resize->output_data_type = output_type; + if ( ( resize->samplers ) && ( !resize->needs_rebuild ) ) + stbir__update_info_from_resize( resize->samplers, resize ); +} + +STBIRDEF void stbir_set_pixel_callbacks( STBIR_RESIZE * resize, stbir_input_callback * input_cb, stbir_output_callback * output_cb ) // no callbacks by default +{ + resize->input_cb = input_cb; + resize->output_cb = output_cb; + + if ( ( resize->samplers ) && ( !resize->needs_rebuild ) ) + { + resize->samplers->in_pixels_cb = input_cb; + resize->samplers->out_pixels_cb = output_cb; + } +} + +STBIRDEF void stbir_set_user_data( STBIR_RESIZE * resize, void * user_data ) // pass back STBIR_RESIZE* by default +{ + resize->user_data = user_data; + if ( ( resize->samplers ) && ( !resize->needs_rebuild ) ) + resize->samplers->user_data = user_data; +} + +STBIRDEF void stbir_set_buffer_ptrs( STBIR_RESIZE * resize, const void * input_pixels, int input_stride_in_bytes, void * output_pixels, int output_stride_in_bytes ) +{ + resize->input_pixels = input_pixels; + resize->input_stride_in_bytes = input_stride_in_bytes; + resize->output_pixels = output_pixels; + resize->output_stride_in_bytes = output_stride_in_bytes; + if ( ( resize->samplers ) && ( !resize->needs_rebuild ) ) + stbir__update_info_from_resize( resize->samplers, resize ); +} + + +STBIRDEF int stbir_set_edgemodes( STBIR_RESIZE * resize, stbir_edge horizontal_edge, stbir_edge vertical_edge ) // CLAMP by default +{ + resize->horizontal_edge = horizontal_edge; + resize->vertical_edge = vertical_edge; + resize->needs_rebuild = 1; + return 1; +} + +STBIRDEF int stbir_set_filters( STBIR_RESIZE * resize, stbir_filter horizontal_filter, stbir_filter vertical_filter ) // STBIR_DEFAULT_FILTER_UPSAMPLE/DOWNSAMPLE by default +{ + resize->horizontal_filter = horizontal_filter; + resize->vertical_filter = vertical_filter; + resize->needs_rebuild = 1; + return 1; +} + +STBIRDEF int stbir_set_filter_callbacks( STBIR_RESIZE * resize, stbir__kernel_callback * horizontal_filter, stbir__support_callback * horizontal_support, stbir__kernel_callback * vertical_filter, stbir__support_callback * vertical_support ) +{ + resize->horizontal_filter_kernel = horizontal_filter; resize->horizontal_filter_support = horizontal_support; + resize->vertical_filter_kernel = vertical_filter; resize->vertical_filter_support = vertical_support; + resize->needs_rebuild = 1; + return 1; +} + +STBIRDEF int stbir_set_pixel_layouts( STBIR_RESIZE * resize, stbir_pixel_layout input_pixel_layout, stbir_pixel_layout output_pixel_layout ) // sets new pixel layouts +{ + resize->input_pixel_layout_public = input_pixel_layout; + resize->output_pixel_layout_public = output_pixel_layout; + resize->needs_rebuild = 1; + return 1; +} + + +STBIRDEF int stbir_set_non_pm_alpha_speed_over_quality( STBIR_RESIZE * resize, int non_pma_alpha_speed_over_quality ) // sets alpha speed +{ + resize->fast_alpha = non_pma_alpha_speed_over_quality; + resize->needs_rebuild = 1; + return 1; +} + +STBIRDEF int stbir_set_input_subrect( STBIR_RESIZE * resize, double s0, double t0, double s1, double t1 ) // sets input region (full region by default) +{ + resize->input_s0 = s0; + resize->input_t0 = t0; + resize->input_s1 = s1; + resize->input_t1 = t1; + resize->needs_rebuild = 1; + + // are we inbounds? + if ( ( s1 < stbir__small_float ) || ( (s1-s0) < stbir__small_float ) || + ( t1 < stbir__small_float ) || ( (t1-t0) < stbir__small_float ) || + ( s0 > (1.0f-stbir__small_float) ) || + ( t0 > (1.0f-stbir__small_float) ) ) + return 0; + + return 1; +} + +STBIRDEF int stbir_set_output_pixel_subrect( STBIR_RESIZE * resize, int subx, int suby, int subw, int subh ) // sets input region (full region by default) +{ + resize->output_subx = subx; + resize->output_suby = suby; + resize->output_subw = subw; + resize->output_subh = subh; + resize->needs_rebuild = 1; + + // are we inbounds? + if ( ( subx >= resize->output_w ) || ( ( subx + subw ) <= 0 ) || ( suby >= resize->output_h ) || ( ( suby + subh ) <= 0 ) || ( subw == 0 ) || ( subh == 0 ) ) + return 0; + + return 1; +} + +STBIRDEF int stbir_set_pixel_subrect( STBIR_RESIZE * resize, int subx, int suby, int subw, int subh ) // sets both regions (full regions by default) +{ + double s0, t0, s1, t1; + + s0 = ( (double)subx ) / ( (double)resize->output_w ); + t0 = ( (double)suby ) / ( (double)resize->output_h ); + s1 = ( (double)(subx+subw) ) / ( (double)resize->output_w ); + t1 = ( (double)(suby+subh) ) / ( (double)resize->output_h ); + + resize->input_s0 = s0; + resize->input_t0 = t0; + resize->input_s1 = s1; + resize->input_t1 = t1; + resize->output_subx = subx; + resize->output_suby = suby; + resize->output_subw = subw; + resize->output_subh = subh; + resize->needs_rebuild = 1; + + // are we inbounds? + if ( ( subx >= resize->output_w ) || ( ( subx + subw ) <= 0 ) || ( suby >= resize->output_h ) || ( ( suby + subh ) <= 0 ) || ( subw == 0 ) || ( subh == 0 ) ) + return 0; + + return 1; +} + +static int stbir__perform_build( STBIR_RESIZE * resize, int splits ) +{ + stbir__contributors conservative = { 0, 0 }; + stbir__sampler horizontal, vertical; + int new_output_subx, new_output_suby; + stbir__info * out_info; + #ifdef STBIR_PROFILE + stbir__info profile_infod; // used to contain building profile info before everything is allocated + stbir__info * profile_info = &profile_infod; + #endif + + // have we already built the samplers? + if ( resize->samplers ) + return 0; + + #define STBIR_RETURN_ERROR_AND_ASSERT( exp ) STBIR_ASSERT( !(exp) ); if (exp) return 0; + STBIR_RETURN_ERROR_AND_ASSERT( (unsigned)resize->horizontal_filter >= STBIR_FILTER_OTHER) + STBIR_RETURN_ERROR_AND_ASSERT( (unsigned)resize->vertical_filter >= STBIR_FILTER_OTHER) + #undef STBIR_RETURN_ERROR_AND_ASSERT + + if ( splits <= 0 ) + return 0; + + STBIR_PROFILE_BUILD_FIRST_START( build ); + + new_output_subx = resize->output_subx; + new_output_suby = resize->output_suby; + + // do horizontal clip and scale calcs + if ( !stbir__calculate_region_transform( &horizontal.scale_info, resize->output_w, &new_output_subx, resize->output_subw, resize->input_w, resize->input_s0, resize->input_s1 ) ) + return 0; + + // do vertical clip and scale calcs + if ( !stbir__calculate_region_transform( &vertical.scale_info, resize->output_h, &new_output_suby, resize->output_subh, resize->input_h, resize->input_t0, resize->input_t1 ) ) + return 0; + + // if nothing to do, just return + if ( ( horizontal.scale_info.output_sub_size == 0 ) || ( vertical.scale_info.output_sub_size == 0 ) ) + return 0; + + stbir__set_sampler(&horizontal, resize->horizontal_filter, resize->horizontal_filter_kernel, resize->horizontal_filter_support, resize->horizontal_edge, &horizontal.scale_info, 1, resize->user_data ); + stbir__get_conservative_extents( &horizontal, &conservative, resize->user_data ); + stbir__set_sampler(&vertical, resize->vertical_filter, resize->horizontal_filter_kernel, resize->vertical_filter_support, resize->vertical_edge, &vertical.scale_info, 0, resize->user_data ); + + if ( ( vertical.scale_info.output_sub_size / splits ) < STBIR_FORCE_MINIMUM_SCANLINES_FOR_SPLITS ) // each split should be a minimum of 4 scanlines (handwavey choice) + { + splits = vertical.scale_info.output_sub_size / STBIR_FORCE_MINIMUM_SCANLINES_FOR_SPLITS; + if ( splits == 0 ) splits = 1; + } + + STBIR_PROFILE_BUILD_START( alloc ); + out_info = stbir__alloc_internal_mem_and_build_samplers( &horizontal, &vertical, &conservative, resize->input_pixel_layout_public, resize->output_pixel_layout_public, splits, new_output_subx, new_output_suby, resize->fast_alpha, resize->user_data STBIR_ONLY_PROFILE_BUILD_SET_INFO ); + STBIR_PROFILE_BUILD_END( alloc ); + STBIR_PROFILE_BUILD_END( build ); + + if ( out_info ) + { + resize->splits = splits; + resize->samplers = out_info; + resize->needs_rebuild = 0; + #ifdef STBIR_PROFILE + STBIR_MEMCPY( &out_info->profile, &profile_infod.profile, sizeof( out_info->profile ) ); + #endif + + // update anything that can be changed without recalcing samplers + stbir__update_info_from_resize( out_info, resize ); + + return splits; + } + + return 0; +} + +void stbir_free_samplers( STBIR_RESIZE * resize ) +{ + if ( resize->samplers ) + { + stbir__free_internal_mem( resize->samplers ); + resize->samplers = 0; + resize->called_alloc = 0; + } +} + +STBIRDEF int stbir_build_samplers_with_splits( STBIR_RESIZE * resize, int splits ) +{ + if ( ( resize->samplers == 0 ) || ( resize->needs_rebuild ) ) + { + if ( resize->samplers ) + stbir_free_samplers( resize ); + + resize->called_alloc = 1; + return stbir__perform_build( resize, splits ); + } + + STBIR_PROFILE_BUILD_CLEAR( resize->samplers ); + + return 1; +} + +STBIRDEF int stbir_build_samplers( STBIR_RESIZE * resize ) +{ + return stbir_build_samplers_with_splits( resize, 1 ); +} + +STBIRDEF int stbir_resize_extended( STBIR_RESIZE * resize ) +{ + int result; + + if ( ( resize->samplers == 0 ) || ( resize->needs_rebuild ) ) + { + int alloc_state = resize->called_alloc; // remember allocated state + + if ( resize->samplers ) + { + stbir__free_internal_mem( resize->samplers ); + resize->samplers = 0; + } + + if ( !stbir_build_samplers( resize ) ) + return 0; + + resize->called_alloc = alloc_state; + + // if build_samplers succeeded (above), but there are no samplers set, then + // the area to stretch into was zero pixels, so don't do anything and return + // success + if ( resize->samplers == 0 ) + return 1; + } + else + { + // didn't build anything - clear it + STBIR_PROFILE_BUILD_CLEAR( resize->samplers ); + } + + // do resize + result = stbir__perform_resize( resize->samplers, 0, resize->splits ); + + // if we alloced, then free + if ( !resize->called_alloc ) + { + stbir_free_samplers( resize ); + resize->samplers = 0; + } + + return result; +} + +STBIRDEF int stbir_resize_extended_split( STBIR_RESIZE * resize, int split_start, int split_count ) +{ + STBIR_ASSERT( resize->samplers ); + + // if we're just doing the whole thing, call full + if ( ( split_start == -1 ) || ( ( split_start == 0 ) && ( split_count == resize->splits ) ) ) + return stbir_resize_extended( resize ); + + // you **must** build samplers first when using split resize + if ( ( resize->samplers == 0 ) || ( resize->needs_rebuild ) ) + return 0; + + if ( ( split_start >= resize->splits ) || ( split_start < 0 ) || ( ( split_start + split_count ) > resize->splits ) || ( split_count <= 0 ) ) + return 0; + + // do resize + return stbir__perform_resize( resize->samplers, split_start, split_count ); +} + +static int stbir__check_output_stuff( void ** ret_ptr, int * ret_pitch, void * output_pixels, int type_size, int output_w, int output_h, int output_stride_in_bytes, stbir_internal_pixel_layout pixel_layout ) +{ + size_t size; + int pitch; + void * ptr; + + pitch = output_w * type_size * stbir__pixel_channels[ pixel_layout ]; + if ( pitch == 0 ) + return 0; + + if ( output_stride_in_bytes == 0 ) + output_stride_in_bytes = pitch; + + if ( output_stride_in_bytes < pitch ) + return 0; + + size = (size_t)output_stride_in_bytes * (size_t)output_h; + if ( size == 0 ) + return 0; + + *ret_ptr = 0; + *ret_pitch = output_stride_in_bytes; + + if ( output_pixels == 0 ) + { + ptr = STBIR_MALLOC( size, 0 ); + if ( ptr == 0 ) + return 0; + + *ret_ptr = ptr; + *ret_pitch = pitch; + } + + return 1; +} + + +STBIRDEF unsigned char * stbir_resize_uint8_linear( const unsigned char *input_pixels , int input_w , int input_h, int input_stride_in_bytes, + unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes, + stbir_pixel_layout pixel_layout ) +{ + STBIR_RESIZE resize; + unsigned char * optr; + int opitch; + + if ( !stbir__check_output_stuff( (void**)&optr, &opitch, output_pixels, sizeof( unsigned char ), output_w, output_h, output_stride_in_bytes, stbir__pixel_layout_convert_public_to_internal[ pixel_layout ] ) ) + return 0; + + stbir_resize_init( &resize, + input_pixels, input_w, input_h, input_stride_in_bytes, + (optr) ? optr : output_pixels, output_w, output_h, opitch, + pixel_layout, STBIR_TYPE_UINT8 ); + + if ( !stbir_resize_extended( &resize ) ) + { + if ( optr ) + STBIR_FREE( optr, 0 ); + return 0; + } + + return (optr) ? optr : output_pixels; +} + +STBIRDEF unsigned char * stbir_resize_uint8_srgb( const unsigned char *input_pixels , int input_w , int input_h, int input_stride_in_bytes, + unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes, + stbir_pixel_layout pixel_layout ) +{ + STBIR_RESIZE resize; + unsigned char * optr; + int opitch; + + if ( !stbir__check_output_stuff( (void**)&optr, &opitch, output_pixels, sizeof( unsigned char ), output_w, output_h, output_stride_in_bytes, stbir__pixel_layout_convert_public_to_internal[ pixel_layout ] ) ) + return 0; + + stbir_resize_init( &resize, + input_pixels, input_w, input_h, input_stride_in_bytes, + (optr) ? optr : output_pixels, output_w, output_h, opitch, + pixel_layout, STBIR_TYPE_UINT8_SRGB ); + + if ( !stbir_resize_extended( &resize ) ) + { + if ( optr ) + STBIR_FREE( optr, 0 ); + return 0; + } + + return (optr) ? optr : output_pixels; +} + + +STBIRDEF float * stbir_resize_float_linear( const float *input_pixels , int input_w , int input_h, int input_stride_in_bytes, + float *output_pixels, int output_w, int output_h, int output_stride_in_bytes, + stbir_pixel_layout pixel_layout ) +{ + STBIR_RESIZE resize; + float * optr; + int opitch; + + if ( !stbir__check_output_stuff( (void**)&optr, &opitch, output_pixels, sizeof( float ), output_w, output_h, output_stride_in_bytes, stbir__pixel_layout_convert_public_to_internal[ pixel_layout ] ) ) + return 0; + + stbir_resize_init( &resize, + input_pixels, input_w, input_h, input_stride_in_bytes, + (optr) ? optr : output_pixels, output_w, output_h, opitch, + pixel_layout, STBIR_TYPE_FLOAT ); + + if ( !stbir_resize_extended( &resize ) ) + { + if ( optr ) + STBIR_FREE( optr, 0 ); + return 0; + } + + return (optr) ? optr : output_pixels; +} + + +STBIRDEF void * stbir_resize( const void *input_pixels , int input_w , int input_h, int input_stride_in_bytes, + void *output_pixels, int output_w, int output_h, int output_stride_in_bytes, + stbir_pixel_layout pixel_layout, stbir_datatype data_type, + stbir_edge edge, stbir_filter filter ) +{ + STBIR_RESIZE resize; + float * optr; + int opitch; + + if ( !stbir__check_output_stuff( (void**)&optr, &opitch, output_pixels, stbir__type_size[data_type], output_w, output_h, output_stride_in_bytes, stbir__pixel_layout_convert_public_to_internal[ pixel_layout ] ) ) + return 0; + + stbir_resize_init( &resize, + input_pixels, input_w, input_h, input_stride_in_bytes, + (optr) ? optr : output_pixels, output_w, output_h, output_stride_in_bytes, + pixel_layout, data_type ); + + resize.horizontal_edge = edge; + resize.vertical_edge = edge; + resize.horizontal_filter = filter; + resize.vertical_filter = filter; + + if ( !stbir_resize_extended( &resize ) ) + { + if ( optr ) + STBIR_FREE( optr, 0 ); + return 0; + } + + return (optr) ? optr : output_pixels; +} + +#ifdef STBIR_PROFILE + +STBIRDEF void stbir_resize_build_profile_info( STBIR_PROFILE_INFO * info, STBIR_RESIZE const * resize ) +{ + static char const * bdescriptions[6] = { "Building", "Allocating", "Horizontal sampler", "Vertical sampler", "Coefficient cleanup", "Coefficient piovot" } ; + stbir__info* samp = resize->samplers; + int i; + + typedef int testa[ (STBIR__ARRAY_SIZE( bdescriptions ) == (STBIR__ARRAY_SIZE( samp->profile.array )-1) )?1:-1]; + typedef int testb[ (sizeof( samp->profile.array ) == (sizeof(samp->profile.named)) )?1:-1]; + typedef int testc[ (sizeof( info->clocks ) >= (sizeof(samp->profile.named)) )?1:-1]; + + for( i = 0 ; i < STBIR__ARRAY_SIZE( bdescriptions ) ; i++) + info->clocks[i] = samp->profile.array[i+1]; + + info->total_clocks = samp->profile.named.total; + info->descriptions = bdescriptions; + info->count = STBIR__ARRAY_SIZE( bdescriptions ); +} + +STBIRDEF void stbir_resize_split_profile_info( STBIR_PROFILE_INFO * info, STBIR_RESIZE const * resize, int split_start, int split_count ) +{ + static char const * descriptions[7] = { "Looping", "Vertical sampling", "Horizontal sampling", "Scanline input", "Scanline output", "Alpha weighting", "Alpha unweighting" }; + stbir__per_split_info * split_info; + int s, i; + + typedef int testa[ (STBIR__ARRAY_SIZE( descriptions ) == (STBIR__ARRAY_SIZE( split_info->profile.array )-1) )?1:-1]; + typedef int testb[ (sizeof( split_info->profile.array ) == (sizeof(split_info->profile.named)) )?1:-1]; + typedef int testc[ (sizeof( info->clocks ) >= (sizeof(split_info->profile.named)) )?1:-1]; + + if ( split_start == -1 ) + { + split_start = 0; + split_count = resize->samplers->splits; + } + + if ( ( split_start >= resize->splits ) || ( split_start < 0 ) || ( ( split_start + split_count ) > resize->splits ) || ( split_count <= 0 ) ) + { + info->total_clocks = 0; + info->descriptions = 0; + info->count = 0; + return; + } + + split_info = resize->samplers->split_info + split_start; + + // sum up the profile from all the splits + for( i = 0 ; i < STBIR__ARRAY_SIZE( descriptions ) ; i++ ) + { + stbir_uint64 sum = 0; + for( s = 0 ; s < split_count ; s++ ) + sum += split_info[s].profile.array[i+1]; + info->clocks[i] = sum; + } + + info->total_clocks = split_info->profile.named.total; + info->descriptions = descriptions; + info->count = STBIR__ARRAY_SIZE( descriptions ); +} + +STBIRDEF void stbir_resize_extended_profile_info( STBIR_PROFILE_INFO * info, STBIR_RESIZE const * resize ) +{ + stbir_resize_split_profile_info( info, resize, -1, 0 ); +} + +#endif // STBIR_PROFILE + +#undef STBIR_BGR +#undef STBIR_1CHANNEL +#undef STBIR_2CHANNEL +#undef STBIR_RGB +#undef STBIR_RGBA +#undef STBIR_4CHANNEL +#undef STBIR_BGRA +#undef STBIR_ARGB +#undef STBIR_ABGR +#undef STBIR_RA +#undef STBIR_AR +#undef STBIR_RGBA_PM +#undef STBIR_BGRA_PM +#undef STBIR_ARGB_PM +#undef STBIR_ABGR_PM +#undef STBIR_RA_PM +#undef STBIR_AR_PM + +#endif // STB_IMAGE_RESIZE_IMPLEMENTATION + +#else // STB_IMAGE_RESIZE_HORIZONTALS&STB_IMAGE_RESIZE_DO_VERTICALS + +// we reinclude the header file to define all the horizontal functions +// specializing each function for the number of coeffs is 20-40% faster *OVERALL* + +// by including the header file again this way, we can still debug the functions + +#define STBIR_strs_join2( start, mid, end ) start##mid##end +#define STBIR_strs_join1( start, mid, end ) STBIR_strs_join2( start, mid, end ) + +#define STBIR_strs_join24( start, mid1, mid2, end ) start##mid1##mid2##end +#define STBIR_strs_join14( start, mid1, mid2, end ) STBIR_strs_join24( start, mid1, mid2, end ) + +#ifdef STB_IMAGE_RESIZE_DO_CODERS + +#ifdef stbir__decode_suffix +#define STBIR__CODER_NAME( name ) STBIR_strs_join1( name, _, stbir__decode_suffix ) +#else +#define STBIR__CODER_NAME( name ) name +#endif + +#ifdef stbir__decode_swizzle +#define stbir__decode_simdf8_flip(reg) STBIR_strs_join1( STBIR_strs_join1( STBIR_strs_join1( STBIR_strs_join1( stbir__simdf8_0123to,stbir__decode_order0,stbir__decode_order1),stbir__decode_order2,stbir__decode_order3),stbir__decode_order0,stbir__decode_order1),stbir__decode_order2,stbir__decode_order3)(reg, reg) +#define stbir__decode_simdf4_flip(reg) STBIR_strs_join1( STBIR_strs_join1( stbir__simdf_0123to,stbir__decode_order0,stbir__decode_order1),stbir__decode_order2,stbir__decode_order3)(reg, reg) +#define stbir__encode_simdf8_unflip(reg) STBIR_strs_join1( STBIR_strs_join1( STBIR_strs_join1( STBIR_strs_join1( stbir__simdf8_0123to,stbir__encode_order0,stbir__encode_order1),stbir__encode_order2,stbir__encode_order3),stbir__encode_order0,stbir__encode_order1),stbir__encode_order2,stbir__encode_order3)(reg, reg) +#define stbir__encode_simdf4_unflip(reg) STBIR_strs_join1( STBIR_strs_join1( stbir__simdf_0123to,stbir__encode_order0,stbir__encode_order1),stbir__encode_order2,stbir__encode_order3)(reg, reg) +#else +#define stbir__decode_order0 0 +#define stbir__decode_order1 1 +#define stbir__decode_order2 2 +#define stbir__decode_order3 3 +#define stbir__encode_order0 0 +#define stbir__encode_order1 1 +#define stbir__encode_order2 2 +#define stbir__encode_order3 3 +#define stbir__decode_simdf8_flip(reg) +#define stbir__decode_simdf4_flip(reg) +#define stbir__encode_simdf8_unflip(reg) +#define stbir__encode_simdf4_unflip(reg) +#endif + +#ifdef STBIR_SIMD8 +#define stbir__encode_simdfX_unflip stbir__encode_simdf8_unflip +#else +#define stbir__encode_simdfX_unflip stbir__encode_simdf4_unflip +#endif + +static float * STBIR__CODER_NAME( stbir__decode_uint8_linear_scaled )( float * decodep, int width_times_channels, void const * inputp ) +{ + float STBIR_STREAMOUT_PTR( * ) decode = decodep; + float * decode_end = (float*) decode + width_times_channels; + unsigned char const * input = (unsigned char const*)inputp; + + #ifdef STBIR_SIMD + unsigned char const * end_input_m16 = input + width_times_channels - 16; + if ( width_times_channels >= 16 ) + { + decode_end -= 16; + STBIR_NO_UNROLL_LOOP_START_INF_FOR + for(;;) + { + #ifdef STBIR_SIMD8 + stbir__simdi i; stbir__simdi8 o0,o1; + stbir__simdf8 of0, of1; + STBIR_NO_UNROLL(decode); + stbir__simdi_load( i, input ); + stbir__simdi8_expand_u8_to_u32( o0, o1, i ); + stbir__simdi8_convert_i32_to_float( of0, o0 ); + stbir__simdi8_convert_i32_to_float( of1, o1 ); + stbir__simdf8_mult( of0, of0, STBIR_max_uint8_as_float_inverted8); + stbir__simdf8_mult( of1, of1, STBIR_max_uint8_as_float_inverted8); + stbir__decode_simdf8_flip( of0 ); + stbir__decode_simdf8_flip( of1 ); + stbir__simdf8_store( decode + 0, of0 ); + stbir__simdf8_store( decode + 8, of1 ); + #else + stbir__simdi i, o0, o1, o2, o3; + stbir__simdf of0, of1, of2, of3; + STBIR_NO_UNROLL(decode); + stbir__simdi_load( i, input ); + stbir__simdi_expand_u8_to_u32( o0,o1,o2,o3,i); + stbir__simdi_convert_i32_to_float( of0, o0 ); + stbir__simdi_convert_i32_to_float( of1, o1 ); + stbir__simdi_convert_i32_to_float( of2, o2 ); + stbir__simdi_convert_i32_to_float( of3, o3 ); + stbir__simdf_mult( of0, of0, STBIR__CONSTF(STBIR_max_uint8_as_float_inverted) ); + stbir__simdf_mult( of1, of1, STBIR__CONSTF(STBIR_max_uint8_as_float_inverted) ); + stbir__simdf_mult( of2, of2, STBIR__CONSTF(STBIR_max_uint8_as_float_inverted) ); + stbir__simdf_mult( of3, of3, STBIR__CONSTF(STBIR_max_uint8_as_float_inverted) ); + stbir__decode_simdf4_flip( of0 ); + stbir__decode_simdf4_flip( of1 ); + stbir__decode_simdf4_flip( of2 ); + stbir__decode_simdf4_flip( of3 ); + stbir__simdf_store( decode + 0, of0 ); + stbir__simdf_store( decode + 4, of1 ); + stbir__simdf_store( decode + 8, of2 ); + stbir__simdf_store( decode + 12, of3 ); + #endif + decode += 16; + input += 16; + if ( decode <= decode_end ) + continue; + if ( decode == ( decode_end + 16 ) ) + break; + decode = decode_end; // backup and do last couple + input = end_input_m16; + } + return decode_end + 16; + } + #endif + + // try to do blocks of 4 when you can + #if stbir__coder_min_num != 3 // doesn't divide cleanly by four + decode += 4; + STBIR_SIMD_NO_UNROLL_LOOP_START + while( decode <= decode_end ) + { + STBIR_SIMD_NO_UNROLL(decode); + decode[0-4] = ((float)(input[stbir__decode_order0])) * stbir__max_uint8_as_float_inverted; + decode[1-4] = ((float)(input[stbir__decode_order1])) * stbir__max_uint8_as_float_inverted; + decode[2-4] = ((float)(input[stbir__decode_order2])) * stbir__max_uint8_as_float_inverted; + decode[3-4] = ((float)(input[stbir__decode_order3])) * stbir__max_uint8_as_float_inverted; + decode += 4; + input += 4; + } + decode -= 4; + #endif + + // do the remnants + #if stbir__coder_min_num < 4 + STBIR_NO_UNROLL_LOOP_START + while( decode < decode_end ) + { + STBIR_NO_UNROLL(decode); + decode[0] = ((float)(input[stbir__decode_order0])) * stbir__max_uint8_as_float_inverted; + #if stbir__coder_min_num >= 2 + decode[1] = ((float)(input[stbir__decode_order1])) * stbir__max_uint8_as_float_inverted; + #endif + #if stbir__coder_min_num >= 3 + decode[2] = ((float)(input[stbir__decode_order2])) * stbir__max_uint8_as_float_inverted; + #endif + decode += stbir__coder_min_num; + input += stbir__coder_min_num; + } + #endif + + return decode_end; +} + +static void STBIR__CODER_NAME( stbir__encode_uint8_linear_scaled )( void * outputp, int width_times_channels, float const * encode ) +{ + unsigned char STBIR_SIMD_STREAMOUT_PTR( * ) output = (unsigned char *) outputp; + unsigned char * end_output = ( (unsigned char *) output ) + width_times_channels; + + #ifdef STBIR_SIMD + if ( width_times_channels >= stbir__simdfX_float_count*2 ) + { + float const * end_encode_m8 = encode + width_times_channels - stbir__simdfX_float_count*2; + end_output -= stbir__simdfX_float_count*2; + STBIR_NO_UNROLL_LOOP_START_INF_FOR + for(;;) + { + stbir__simdfX e0, e1; + stbir__simdi i; + STBIR_SIMD_NO_UNROLL(encode); + stbir__simdfX_madd_mem( e0, STBIR_simd_point5X, STBIR_max_uint8_as_floatX, encode ); + stbir__simdfX_madd_mem( e1, STBIR_simd_point5X, STBIR_max_uint8_as_floatX, encode+stbir__simdfX_float_count ); + stbir__encode_simdfX_unflip( e0 ); + stbir__encode_simdfX_unflip( e1 ); + #ifdef STBIR_SIMD8 + stbir__simdf8_pack_to_16bytes( i, e0, e1 ); + stbir__simdi_store( output, i ); + #else + stbir__simdf_pack_to_8bytes( i, e0, e1 ); + stbir__simdi_store2( output, i ); + #endif + encode += stbir__simdfX_float_count*2; + output += stbir__simdfX_float_count*2; + if ( output <= end_output ) + continue; + if ( output == ( end_output + stbir__simdfX_float_count*2 ) ) + break; + output = end_output; // backup and do last couple + encode = end_encode_m8; + } + return; + } + + // try to do blocks of 4 when you can + #if stbir__coder_min_num != 3 // doesn't divide cleanly by four + output += 4; + STBIR_NO_UNROLL_LOOP_START + while( output <= end_output ) + { + stbir__simdf e0; + stbir__simdi i0; + STBIR_NO_UNROLL(encode); + stbir__simdf_load( e0, encode ); + stbir__simdf_madd( e0, STBIR__CONSTF(STBIR_simd_point5), STBIR__CONSTF(STBIR_max_uint8_as_float), e0 ); + stbir__encode_simdf4_unflip( e0 ); + stbir__simdf_pack_to_8bytes( i0, e0, e0 ); // only use first 4 + *(int*)(output-4) = stbir__simdi_to_int( i0 ); + output += 4; + encode += 4; + } + output -= 4; + #endif + + // do the remnants + #if stbir__coder_min_num < 4 + STBIR_NO_UNROLL_LOOP_START + while( output < end_output ) + { + stbir__simdf e0; + STBIR_NO_UNROLL(encode); + stbir__simdf_madd1_mem( e0, STBIR__CONSTF(STBIR_simd_point5), STBIR__CONSTF(STBIR_max_uint8_as_float), encode+stbir__encode_order0 ); output[0] = stbir__simdf_convert_float_to_uint8( e0 ); + #if stbir__coder_min_num >= 2 + stbir__simdf_madd1_mem( e0, STBIR__CONSTF(STBIR_simd_point5), STBIR__CONSTF(STBIR_max_uint8_as_float), encode+stbir__encode_order1 ); output[1] = stbir__simdf_convert_float_to_uint8( e0 ); + #endif + #if stbir__coder_min_num >= 3 + stbir__simdf_madd1_mem( e0, STBIR__CONSTF(STBIR_simd_point5), STBIR__CONSTF(STBIR_max_uint8_as_float), encode+stbir__encode_order2 ); output[2] = stbir__simdf_convert_float_to_uint8( e0 ); + #endif + output += stbir__coder_min_num; + encode += stbir__coder_min_num; + } + #endif + + #else + + // try to do blocks of 4 when you can + #if stbir__coder_min_num != 3 // doesn't divide cleanly by four + output += 4; + while( output <= end_output ) + { + float f; + f = encode[stbir__encode_order0] * stbir__max_uint8_as_float + 0.5f; STBIR_CLAMP(f, 0, 255); output[0-4] = (unsigned char)f; + f = encode[stbir__encode_order1] * stbir__max_uint8_as_float + 0.5f; STBIR_CLAMP(f, 0, 255); output[1-4] = (unsigned char)f; + f = encode[stbir__encode_order2] * stbir__max_uint8_as_float + 0.5f; STBIR_CLAMP(f, 0, 255); output[2-4] = (unsigned char)f; + f = encode[stbir__encode_order3] * stbir__max_uint8_as_float + 0.5f; STBIR_CLAMP(f, 0, 255); output[3-4] = (unsigned char)f; + output += 4; + encode += 4; + } + output -= 4; + #endif + + // do the remnants + #if stbir__coder_min_num < 4 + STBIR_NO_UNROLL_LOOP_START + while( output < end_output ) + { + float f; + STBIR_NO_UNROLL(encode); + f = encode[stbir__encode_order0] * stbir__max_uint8_as_float + 0.5f; STBIR_CLAMP(f, 0, 255); output[0] = (unsigned char)f; + #if stbir__coder_min_num >= 2 + f = encode[stbir__encode_order1] * stbir__max_uint8_as_float + 0.5f; STBIR_CLAMP(f, 0, 255); output[1] = (unsigned char)f; + #endif + #if stbir__coder_min_num >= 3 + f = encode[stbir__encode_order2] * stbir__max_uint8_as_float + 0.5f; STBIR_CLAMP(f, 0, 255); output[2] = (unsigned char)f; + #endif + output += stbir__coder_min_num; + encode += stbir__coder_min_num; + } + #endif + #endif +} + +static float * STBIR__CODER_NAME(stbir__decode_uint8_linear)( float * decodep, int width_times_channels, void const * inputp ) +{ + float STBIR_STREAMOUT_PTR( * ) decode = decodep; + float * decode_end = (float*) decode + width_times_channels; + unsigned char const * input = (unsigned char const*)inputp; + + #ifdef STBIR_SIMD + unsigned char const * end_input_m16 = input + width_times_channels - 16; + if ( width_times_channels >= 16 ) + { + decode_end -= 16; + STBIR_NO_UNROLL_LOOP_START_INF_FOR + for(;;) + { + #ifdef STBIR_SIMD8 + stbir__simdi i; stbir__simdi8 o0,o1; + stbir__simdf8 of0, of1; + STBIR_NO_UNROLL(decode); + stbir__simdi_load( i, input ); + stbir__simdi8_expand_u8_to_u32( o0, o1, i ); + stbir__simdi8_convert_i32_to_float( of0, o0 ); + stbir__simdi8_convert_i32_to_float( of1, o1 ); + stbir__decode_simdf8_flip( of0 ); + stbir__decode_simdf8_flip( of1 ); + stbir__simdf8_store( decode + 0, of0 ); + stbir__simdf8_store( decode + 8, of1 ); + #else + stbir__simdi i, o0, o1, o2, o3; + stbir__simdf of0, of1, of2, of3; + STBIR_NO_UNROLL(decode); + stbir__simdi_load( i, input ); + stbir__simdi_expand_u8_to_u32( o0,o1,o2,o3,i); + stbir__simdi_convert_i32_to_float( of0, o0 ); + stbir__simdi_convert_i32_to_float( of1, o1 ); + stbir__simdi_convert_i32_to_float( of2, o2 ); + stbir__simdi_convert_i32_to_float( of3, o3 ); + stbir__decode_simdf4_flip( of0 ); + stbir__decode_simdf4_flip( of1 ); + stbir__decode_simdf4_flip( of2 ); + stbir__decode_simdf4_flip( of3 ); + stbir__simdf_store( decode + 0, of0 ); + stbir__simdf_store( decode + 4, of1 ); + stbir__simdf_store( decode + 8, of2 ); + stbir__simdf_store( decode + 12, of3 ); +#endif + decode += 16; + input += 16; + if ( decode <= decode_end ) + continue; + if ( decode == ( decode_end + 16 ) ) + break; + decode = decode_end; // backup and do last couple + input = end_input_m16; + } + return decode_end + 16; + } + #endif + + // try to do blocks of 4 when you can + #if stbir__coder_min_num != 3 // doesn't divide cleanly by four + decode += 4; + STBIR_SIMD_NO_UNROLL_LOOP_START + while( decode <= decode_end ) + { + STBIR_SIMD_NO_UNROLL(decode); + decode[0-4] = ((float)(input[stbir__decode_order0])); + decode[1-4] = ((float)(input[stbir__decode_order1])); + decode[2-4] = ((float)(input[stbir__decode_order2])); + decode[3-4] = ((float)(input[stbir__decode_order3])); + decode += 4; + input += 4; + } + decode -= 4; + #endif + + // do the remnants + #if stbir__coder_min_num < 4 + STBIR_NO_UNROLL_LOOP_START + while( decode < decode_end ) + { + STBIR_NO_UNROLL(decode); + decode[0] = ((float)(input[stbir__decode_order0])); + #if stbir__coder_min_num >= 2 + decode[1] = ((float)(input[stbir__decode_order1])); + #endif + #if stbir__coder_min_num >= 3 + decode[2] = ((float)(input[stbir__decode_order2])); + #endif + decode += stbir__coder_min_num; + input += stbir__coder_min_num; + } + #endif + return decode_end; +} + +static void STBIR__CODER_NAME( stbir__encode_uint8_linear )( void * outputp, int width_times_channels, float const * encode ) +{ + unsigned char STBIR_SIMD_STREAMOUT_PTR( * ) output = (unsigned char *) outputp; + unsigned char * end_output = ( (unsigned char *) output ) + width_times_channels; + + #ifdef STBIR_SIMD + if ( width_times_channels >= stbir__simdfX_float_count*2 ) + { + float const * end_encode_m8 = encode + width_times_channels - stbir__simdfX_float_count*2; + end_output -= stbir__simdfX_float_count*2; + STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR + for(;;) + { + stbir__simdfX e0, e1; + stbir__simdi i; + STBIR_SIMD_NO_UNROLL(encode); + stbir__simdfX_add_mem( e0, STBIR_simd_point5X, encode ); + stbir__simdfX_add_mem( e1, STBIR_simd_point5X, encode+stbir__simdfX_float_count ); + stbir__encode_simdfX_unflip( e0 ); + stbir__encode_simdfX_unflip( e1 ); + #ifdef STBIR_SIMD8 + stbir__simdf8_pack_to_16bytes( i, e0, e1 ); + stbir__simdi_store( output, i ); + #else + stbir__simdf_pack_to_8bytes( i, e0, e1 ); + stbir__simdi_store2( output, i ); + #endif + encode += stbir__simdfX_float_count*2; + output += stbir__simdfX_float_count*2; + if ( output <= end_output ) + continue; + if ( output == ( end_output + stbir__simdfX_float_count*2 ) ) + break; + output = end_output; // backup and do last couple + encode = end_encode_m8; + } + return; + } + + // try to do blocks of 4 when you can + #if stbir__coder_min_num != 3 // doesn't divide cleanly by four + output += 4; + STBIR_NO_UNROLL_LOOP_START + while( output <= end_output ) + { + stbir__simdf e0; + stbir__simdi i0; + STBIR_NO_UNROLL(encode); + stbir__simdf_load( e0, encode ); + stbir__simdf_add( e0, STBIR__CONSTF(STBIR_simd_point5), e0 ); + stbir__encode_simdf4_unflip( e0 ); + stbir__simdf_pack_to_8bytes( i0, e0, e0 ); // only use first 4 + *(int*)(output-4) = stbir__simdi_to_int( i0 ); + output += 4; + encode += 4; + } + output -= 4; + #endif + + #else + + // try to do blocks of 4 when you can + #if stbir__coder_min_num != 3 // doesn't divide cleanly by four + output += 4; + while( output <= end_output ) + { + float f; + f = encode[stbir__encode_order0] + 0.5f; STBIR_CLAMP(f, 0, 255); output[0-4] = (unsigned char)f; + f = encode[stbir__encode_order1] + 0.5f; STBIR_CLAMP(f, 0, 255); output[1-4] = (unsigned char)f; + f = encode[stbir__encode_order2] + 0.5f; STBIR_CLAMP(f, 0, 255); output[2-4] = (unsigned char)f; + f = encode[stbir__encode_order3] + 0.5f; STBIR_CLAMP(f, 0, 255); output[3-4] = (unsigned char)f; + output += 4; + encode += 4; + } + output -= 4; + #endif + + #endif + + // do the remnants + #if stbir__coder_min_num < 4 + STBIR_NO_UNROLL_LOOP_START + while( output < end_output ) + { + float f; + STBIR_NO_UNROLL(encode); + f = encode[stbir__encode_order0] + 0.5f; STBIR_CLAMP(f, 0, 255); output[0] = (unsigned char)f; + #if stbir__coder_min_num >= 2 + f = encode[stbir__encode_order1] + 0.5f; STBIR_CLAMP(f, 0, 255); output[1] = (unsigned char)f; + #endif + #if stbir__coder_min_num >= 3 + f = encode[stbir__encode_order2] + 0.5f; STBIR_CLAMP(f, 0, 255); output[2] = (unsigned char)f; + #endif + output += stbir__coder_min_num; + encode += stbir__coder_min_num; + } + #endif +} + +static float * STBIR__CODER_NAME(stbir__decode_uint8_srgb)( float * decodep, int width_times_channels, void const * inputp ) +{ + float STBIR_STREAMOUT_PTR( * ) decode = decodep; + float * decode_end = (float*) decode + width_times_channels; + unsigned char const * input = (unsigned char const *)inputp; + + // try to do blocks of 4 when you can + #if stbir__coder_min_num != 3 // doesn't divide cleanly by four + decode += 4; + while( decode <= decode_end ) + { + decode[0-4] = stbir__srgb_uchar_to_linear_float[ input[ stbir__decode_order0 ] ]; + decode[1-4] = stbir__srgb_uchar_to_linear_float[ input[ stbir__decode_order1 ] ]; + decode[2-4] = stbir__srgb_uchar_to_linear_float[ input[ stbir__decode_order2 ] ]; + decode[3-4] = stbir__srgb_uchar_to_linear_float[ input[ stbir__decode_order3 ] ]; + decode += 4; + input += 4; + } + decode -= 4; + #endif + + // do the remnants + #if stbir__coder_min_num < 4 + STBIR_NO_UNROLL_LOOP_START + while( decode < decode_end ) + { + STBIR_NO_UNROLL(decode); + decode[0] = stbir__srgb_uchar_to_linear_float[ input[ stbir__decode_order0 ] ]; + #if stbir__coder_min_num >= 2 + decode[1] = stbir__srgb_uchar_to_linear_float[ input[ stbir__decode_order1 ] ]; + #endif + #if stbir__coder_min_num >= 3 + decode[2] = stbir__srgb_uchar_to_linear_float[ input[ stbir__decode_order2 ] ]; + #endif + decode += stbir__coder_min_num; + input += stbir__coder_min_num; + } + #endif + return decode_end; +} + +#define stbir__min_max_shift20( i, f ) \ + stbir__simdf_max( f, f, stbir_simdf_casti(STBIR__CONSTI( STBIR_almost_zero )) ); \ + stbir__simdf_min( f, f, stbir_simdf_casti(STBIR__CONSTI( STBIR_almost_one )) ); \ + stbir__simdi_32shr( i, stbir_simdi_castf( f ), 20 ); + +#define stbir__scale_and_convert( i, f ) \ + stbir__simdf_madd( f, STBIR__CONSTF( STBIR_simd_point5 ), STBIR__CONSTF( STBIR_max_uint8_as_float ), f ); \ + stbir__simdf_max( f, f, stbir__simdf_zeroP() ); \ + stbir__simdf_min( f, f, STBIR__CONSTF( STBIR_max_uint8_as_float ) ); \ + stbir__simdf_convert_float_to_i32( i, f ); + +#define stbir__linear_to_srgb_finish( i, f ) \ +{ \ + stbir__simdi temp; \ + stbir__simdi_32shr( temp, stbir_simdi_castf( f ), 12 ) ; \ + stbir__simdi_and( temp, temp, STBIR__CONSTI(STBIR_mastissa_mask) ); \ + stbir__simdi_or( temp, temp, STBIR__CONSTI(STBIR_topscale) ); \ + stbir__simdi_16madd( i, i, temp ); \ + stbir__simdi_32shr( i, i, 16 ); \ +} + +#define stbir__simdi_table_lookup2( v0,v1, table ) \ +{ \ + stbir__simdi_u32 temp0,temp1; \ + temp0.m128i_i128 = v0; \ + temp1.m128i_i128 = v1; \ + temp0.m128i_u32[0] = table[temp0.m128i_i32[0]]; temp0.m128i_u32[1] = table[temp0.m128i_i32[1]]; temp0.m128i_u32[2] = table[temp0.m128i_i32[2]]; temp0.m128i_u32[3] = table[temp0.m128i_i32[3]]; \ + temp1.m128i_u32[0] = table[temp1.m128i_i32[0]]; temp1.m128i_u32[1] = table[temp1.m128i_i32[1]]; temp1.m128i_u32[2] = table[temp1.m128i_i32[2]]; temp1.m128i_u32[3] = table[temp1.m128i_i32[3]]; \ + v0 = temp0.m128i_i128; \ + v1 = temp1.m128i_i128; \ +} + +#define stbir__simdi_table_lookup3( v0,v1,v2, table ) \ +{ \ + stbir__simdi_u32 temp0,temp1,temp2; \ + temp0.m128i_i128 = v0; \ + temp1.m128i_i128 = v1; \ + temp2.m128i_i128 = v2; \ + temp0.m128i_u32[0] = table[temp0.m128i_i32[0]]; temp0.m128i_u32[1] = table[temp0.m128i_i32[1]]; temp0.m128i_u32[2] = table[temp0.m128i_i32[2]]; temp0.m128i_u32[3] = table[temp0.m128i_i32[3]]; \ + temp1.m128i_u32[0] = table[temp1.m128i_i32[0]]; temp1.m128i_u32[1] = table[temp1.m128i_i32[1]]; temp1.m128i_u32[2] = table[temp1.m128i_i32[2]]; temp1.m128i_u32[3] = table[temp1.m128i_i32[3]]; \ + temp2.m128i_u32[0] = table[temp2.m128i_i32[0]]; temp2.m128i_u32[1] = table[temp2.m128i_i32[1]]; temp2.m128i_u32[2] = table[temp2.m128i_i32[2]]; temp2.m128i_u32[3] = table[temp2.m128i_i32[3]]; \ + v0 = temp0.m128i_i128; \ + v1 = temp1.m128i_i128; \ + v2 = temp2.m128i_i128; \ +} + +#define stbir__simdi_table_lookup4( v0,v1,v2,v3, table ) \ +{ \ + stbir__simdi_u32 temp0,temp1,temp2,temp3; \ + temp0.m128i_i128 = v0; \ + temp1.m128i_i128 = v1; \ + temp2.m128i_i128 = v2; \ + temp3.m128i_i128 = v3; \ + temp0.m128i_u32[0] = table[temp0.m128i_i32[0]]; temp0.m128i_u32[1] = table[temp0.m128i_i32[1]]; temp0.m128i_u32[2] = table[temp0.m128i_i32[2]]; temp0.m128i_u32[3] = table[temp0.m128i_i32[3]]; \ + temp1.m128i_u32[0] = table[temp1.m128i_i32[0]]; temp1.m128i_u32[1] = table[temp1.m128i_i32[1]]; temp1.m128i_u32[2] = table[temp1.m128i_i32[2]]; temp1.m128i_u32[3] = table[temp1.m128i_i32[3]]; \ + temp2.m128i_u32[0] = table[temp2.m128i_i32[0]]; temp2.m128i_u32[1] = table[temp2.m128i_i32[1]]; temp2.m128i_u32[2] = table[temp2.m128i_i32[2]]; temp2.m128i_u32[3] = table[temp2.m128i_i32[3]]; \ + temp3.m128i_u32[0] = table[temp3.m128i_i32[0]]; temp3.m128i_u32[1] = table[temp3.m128i_i32[1]]; temp3.m128i_u32[2] = table[temp3.m128i_i32[2]]; temp3.m128i_u32[3] = table[temp3.m128i_i32[3]]; \ + v0 = temp0.m128i_i128; \ + v1 = temp1.m128i_i128; \ + v2 = temp2.m128i_i128; \ + v3 = temp3.m128i_i128; \ +} + +static void STBIR__CODER_NAME( stbir__encode_uint8_srgb )( void * outputp, int width_times_channels, float const * encode ) +{ + unsigned char STBIR_SIMD_STREAMOUT_PTR( * ) output = (unsigned char*) outputp; + unsigned char * end_output = ( (unsigned char*) output ) + width_times_channels; + + #ifdef STBIR_SIMD + + if ( width_times_channels >= 16 ) + { + float const * end_encode_m16 = encode + width_times_channels - 16; + end_output -= 16; + STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR + for(;;) + { + stbir__simdf f0, f1, f2, f3; + stbir__simdi i0, i1, i2, i3; + STBIR_SIMD_NO_UNROLL(encode); + + stbir__simdf_load4_transposed( f0, f1, f2, f3, encode ); + + stbir__min_max_shift20( i0, f0 ); + stbir__min_max_shift20( i1, f1 ); + stbir__min_max_shift20( i2, f2 ); + stbir__min_max_shift20( i3, f3 ); + + stbir__simdi_table_lookup4( i0, i1, i2, i3, ( fp32_to_srgb8_tab4 - (127-13)*8 ) ); + + stbir__linear_to_srgb_finish( i0, f0 ); + stbir__linear_to_srgb_finish( i1, f1 ); + stbir__linear_to_srgb_finish( i2, f2 ); + stbir__linear_to_srgb_finish( i3, f3 ); + + stbir__interleave_pack_and_store_16_u8( output, STBIR_strs_join1(i, ,stbir__encode_order0), STBIR_strs_join1(i, ,stbir__encode_order1), STBIR_strs_join1(i, ,stbir__encode_order2), STBIR_strs_join1(i, ,stbir__encode_order3) ); + + encode += 16; + output += 16; + if ( output <= end_output ) + continue; + if ( output == ( end_output + 16 ) ) + break; + output = end_output; // backup and do last couple + encode = end_encode_m16; + } + return; + } + #endif + + // try to do blocks of 4 when you can + #if stbir__coder_min_num != 3 // doesn't divide cleanly by four + output += 4; + STBIR_SIMD_NO_UNROLL_LOOP_START + while ( output <= end_output ) + { + STBIR_SIMD_NO_UNROLL(encode); + + output[0-4] = stbir__linear_to_srgb_uchar( encode[stbir__encode_order0] ); + output[1-4] = stbir__linear_to_srgb_uchar( encode[stbir__encode_order1] ); + output[2-4] = stbir__linear_to_srgb_uchar( encode[stbir__encode_order2] ); + output[3-4] = stbir__linear_to_srgb_uchar( encode[stbir__encode_order3] ); + + output += 4; + encode += 4; + } + output -= 4; + #endif + + // do the remnants + #if stbir__coder_min_num < 4 + STBIR_NO_UNROLL_LOOP_START + while( output < end_output ) + { + STBIR_NO_UNROLL(encode); + output[0] = stbir__linear_to_srgb_uchar( encode[stbir__encode_order0] ); + #if stbir__coder_min_num >= 2 + output[1] = stbir__linear_to_srgb_uchar( encode[stbir__encode_order1] ); + #endif + #if stbir__coder_min_num >= 3 + output[2] = stbir__linear_to_srgb_uchar( encode[stbir__encode_order2] ); + #endif + output += stbir__coder_min_num; + encode += stbir__coder_min_num; + } + #endif +} + +#if ( stbir__coder_min_num == 4 ) || ( ( stbir__coder_min_num == 1 ) && ( !defined(stbir__decode_swizzle) ) ) + +static float * STBIR__CODER_NAME(stbir__decode_uint8_srgb4_linearalpha)( float * decodep, int width_times_channels, void const * inputp ) +{ + float STBIR_STREAMOUT_PTR( * ) decode = decodep; + float * decode_end = (float*) decode + width_times_channels; + unsigned char const * input = (unsigned char const *)inputp; + + do { + decode[0] = stbir__srgb_uchar_to_linear_float[ input[stbir__decode_order0] ]; + decode[1] = stbir__srgb_uchar_to_linear_float[ input[stbir__decode_order1] ]; + decode[2] = stbir__srgb_uchar_to_linear_float[ input[stbir__decode_order2] ]; + decode[3] = ( (float) input[stbir__decode_order3] ) * stbir__max_uint8_as_float_inverted; + input += 4; + decode += 4; + } while( decode < decode_end ); + return decode_end; +} + + +static void STBIR__CODER_NAME( stbir__encode_uint8_srgb4_linearalpha )( void * outputp, int width_times_channels, float const * encode ) +{ + unsigned char STBIR_SIMD_STREAMOUT_PTR( * ) output = (unsigned char*) outputp; + unsigned char * end_output = ( (unsigned char*) output ) + width_times_channels; + + #ifdef STBIR_SIMD + + if ( width_times_channels >= 16 ) + { + float const * end_encode_m16 = encode + width_times_channels - 16; + end_output -= 16; + STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR + for(;;) + { + stbir__simdf f0, f1, f2, f3; + stbir__simdi i0, i1, i2, i3; + + STBIR_SIMD_NO_UNROLL(encode); + stbir__simdf_load4_transposed( f0, f1, f2, f3, encode ); + + stbir__min_max_shift20( i0, f0 ); + stbir__min_max_shift20( i1, f1 ); + stbir__min_max_shift20( i2, f2 ); + stbir__scale_and_convert( i3, f3 ); + + stbir__simdi_table_lookup3( i0, i1, i2, ( fp32_to_srgb8_tab4 - (127-13)*8 ) ); + + stbir__linear_to_srgb_finish( i0, f0 ); + stbir__linear_to_srgb_finish( i1, f1 ); + stbir__linear_to_srgb_finish( i2, f2 ); + + stbir__interleave_pack_and_store_16_u8( output, STBIR_strs_join1(i, ,stbir__encode_order0), STBIR_strs_join1(i, ,stbir__encode_order1), STBIR_strs_join1(i, ,stbir__encode_order2), STBIR_strs_join1(i, ,stbir__encode_order3) ); + + output += 16; + encode += 16; + + if ( output <= end_output ) + continue; + if ( output == ( end_output + 16 ) ) + break; + output = end_output; // backup and do last couple + encode = end_encode_m16; + } + return; + } + #endif + + STBIR_SIMD_NO_UNROLL_LOOP_START + do { + float f; + STBIR_SIMD_NO_UNROLL(encode); + + output[stbir__decode_order0] = stbir__linear_to_srgb_uchar( encode[0] ); + output[stbir__decode_order1] = stbir__linear_to_srgb_uchar( encode[1] ); + output[stbir__decode_order2] = stbir__linear_to_srgb_uchar( encode[2] ); + + f = encode[3] * stbir__max_uint8_as_float + 0.5f; + STBIR_CLAMP(f, 0, 255); + output[stbir__decode_order3] = (unsigned char) f; + + output += 4; + encode += 4; + } while( output < end_output ); +} + +#endif + +#if ( stbir__coder_min_num == 2 ) || ( ( stbir__coder_min_num == 1 ) && ( !defined(stbir__decode_swizzle) ) ) + +static float * STBIR__CODER_NAME(stbir__decode_uint8_srgb2_linearalpha)( float * decodep, int width_times_channels, void const * inputp ) +{ + float STBIR_STREAMOUT_PTR( * ) decode = decodep; + float * decode_end = (float*) decode + width_times_channels; + unsigned char const * input = (unsigned char const *)inputp; + + decode += 4; + while( decode <= decode_end ) + { + decode[0-4] = stbir__srgb_uchar_to_linear_float[ input[stbir__decode_order0] ]; + decode[1-4] = ( (float) input[stbir__decode_order1] ) * stbir__max_uint8_as_float_inverted; + decode[2-4] = stbir__srgb_uchar_to_linear_float[ input[stbir__decode_order0+2] ]; + decode[3-4] = ( (float) input[stbir__decode_order1+2] ) * stbir__max_uint8_as_float_inverted; + input += 4; + decode += 4; + } + decode -= 4; + if( decode < decode_end ) + { + decode[0] = stbir__srgb_uchar_to_linear_float[ stbir__decode_order0 ]; + decode[1] = ( (float) input[stbir__decode_order1] ) * stbir__max_uint8_as_float_inverted; + } + return decode_end; +} + +static void STBIR__CODER_NAME( stbir__encode_uint8_srgb2_linearalpha )( void * outputp, int width_times_channels, float const * encode ) +{ + unsigned char STBIR_SIMD_STREAMOUT_PTR( * ) output = (unsigned char*) outputp; + unsigned char * end_output = ( (unsigned char*) output ) + width_times_channels; + + #ifdef STBIR_SIMD + + if ( width_times_channels >= 16 ) + { + float const * end_encode_m16 = encode + width_times_channels - 16; + end_output -= 16; + STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR + for(;;) + { + stbir__simdf f0, f1, f2, f3; + stbir__simdi i0, i1, i2, i3; + + STBIR_SIMD_NO_UNROLL(encode); + stbir__simdf_load4_transposed( f0, f1, f2, f3, encode ); + + stbir__min_max_shift20( i0, f0 ); + stbir__scale_and_convert( i1, f1 ); + stbir__min_max_shift20( i2, f2 ); + stbir__scale_and_convert( i3, f3 ); + + stbir__simdi_table_lookup2( i0, i2, ( fp32_to_srgb8_tab4 - (127-13)*8 ) ); + + stbir__linear_to_srgb_finish( i0, f0 ); + stbir__linear_to_srgb_finish( i2, f2 ); + + stbir__interleave_pack_and_store_16_u8( output, STBIR_strs_join1(i, ,stbir__encode_order0), STBIR_strs_join1(i, ,stbir__encode_order1), STBIR_strs_join1(i, ,stbir__encode_order2), STBIR_strs_join1(i, ,stbir__encode_order3) ); + + output += 16; + encode += 16; + if ( output <= end_output ) + continue; + if ( output == ( end_output + 16 ) ) + break; + output = end_output; // backup and do last couple + encode = end_encode_m16; + } + return; + } + #endif + + STBIR_SIMD_NO_UNROLL_LOOP_START + do { + float f; + STBIR_SIMD_NO_UNROLL(encode); + + output[stbir__decode_order0] = stbir__linear_to_srgb_uchar( encode[0] ); + + f = encode[1] * stbir__max_uint8_as_float + 0.5f; + STBIR_CLAMP(f, 0, 255); + output[stbir__decode_order1] = (unsigned char) f; + + output += 2; + encode += 2; + } while( output < end_output ); +} + +#endif + +static float * STBIR__CODER_NAME(stbir__decode_uint16_linear_scaled)( float * decodep, int width_times_channels, void const * inputp ) +{ + float STBIR_STREAMOUT_PTR( * ) decode = decodep; + float * decode_end = (float*) decode + width_times_channels; + unsigned short const * input = (unsigned short const *)inputp; + + #ifdef STBIR_SIMD + unsigned short const * end_input_m8 = input + width_times_channels - 8; + if ( width_times_channels >= 8 ) + { + decode_end -= 8; + STBIR_NO_UNROLL_LOOP_START_INF_FOR + for(;;) + { + #ifdef STBIR_SIMD8 + stbir__simdi i; stbir__simdi8 o; + stbir__simdf8 of; + STBIR_NO_UNROLL(decode); + stbir__simdi_load( i, input ); + stbir__simdi8_expand_u16_to_u32( o, i ); + stbir__simdi8_convert_i32_to_float( of, o ); + stbir__simdf8_mult( of, of, STBIR_max_uint16_as_float_inverted8); + stbir__decode_simdf8_flip( of ); + stbir__simdf8_store( decode + 0, of ); + #else + stbir__simdi i, o0, o1; + stbir__simdf of0, of1; + STBIR_NO_UNROLL(decode); + stbir__simdi_load( i, input ); + stbir__simdi_expand_u16_to_u32( o0,o1,i ); + stbir__simdi_convert_i32_to_float( of0, o0 ); + stbir__simdi_convert_i32_to_float( of1, o1 ); + stbir__simdf_mult( of0, of0, STBIR__CONSTF(STBIR_max_uint16_as_float_inverted) ); + stbir__simdf_mult( of1, of1, STBIR__CONSTF(STBIR_max_uint16_as_float_inverted)); + stbir__decode_simdf4_flip( of0 ); + stbir__decode_simdf4_flip( of1 ); + stbir__simdf_store( decode + 0, of0 ); + stbir__simdf_store( decode + 4, of1 ); + #endif + decode += 8; + input += 8; + if ( decode <= decode_end ) + continue; + if ( decode == ( decode_end + 8 ) ) + break; + decode = decode_end; // backup and do last couple + input = end_input_m8; + } + return decode_end + 8; + } + #endif + + // try to do blocks of 4 when you can + #if stbir__coder_min_num != 3 // doesn't divide cleanly by four + decode += 4; + STBIR_SIMD_NO_UNROLL_LOOP_START + while( decode <= decode_end ) + { + STBIR_SIMD_NO_UNROLL(decode); + decode[0-4] = ((float)(input[stbir__decode_order0])) * stbir__max_uint16_as_float_inverted; + decode[1-4] = ((float)(input[stbir__decode_order1])) * stbir__max_uint16_as_float_inverted; + decode[2-4] = ((float)(input[stbir__decode_order2])) * stbir__max_uint16_as_float_inverted; + decode[3-4] = ((float)(input[stbir__decode_order3])) * stbir__max_uint16_as_float_inverted; + decode += 4; + input += 4; + } + decode -= 4; + #endif + + // do the remnants + #if stbir__coder_min_num < 4 + STBIR_NO_UNROLL_LOOP_START + while( decode < decode_end ) + { + STBIR_NO_UNROLL(decode); + decode[0] = ((float)(input[stbir__decode_order0])) * stbir__max_uint16_as_float_inverted; + #if stbir__coder_min_num >= 2 + decode[1] = ((float)(input[stbir__decode_order1])) * stbir__max_uint16_as_float_inverted; + #endif + #if stbir__coder_min_num >= 3 + decode[2] = ((float)(input[stbir__decode_order2])) * stbir__max_uint16_as_float_inverted; + #endif + decode += stbir__coder_min_num; + input += stbir__coder_min_num; + } + #endif + return decode_end; +} + + +static void STBIR__CODER_NAME(stbir__encode_uint16_linear_scaled)( void * outputp, int width_times_channels, float const * encode ) +{ + unsigned short STBIR_SIMD_STREAMOUT_PTR( * ) output = (unsigned short*) outputp; + unsigned short * end_output = ( (unsigned short*) output ) + width_times_channels; + + #ifdef STBIR_SIMD + { + if ( width_times_channels >= stbir__simdfX_float_count*2 ) + { + float const * end_encode_m8 = encode + width_times_channels - stbir__simdfX_float_count*2; + end_output -= stbir__simdfX_float_count*2; + STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR + for(;;) + { + stbir__simdfX e0, e1; + stbir__simdiX i; + STBIR_SIMD_NO_UNROLL(encode); + stbir__simdfX_madd_mem( e0, STBIR_simd_point5X, STBIR_max_uint16_as_floatX, encode ); + stbir__simdfX_madd_mem( e1, STBIR_simd_point5X, STBIR_max_uint16_as_floatX, encode+stbir__simdfX_float_count ); + stbir__encode_simdfX_unflip( e0 ); + stbir__encode_simdfX_unflip( e1 ); + stbir__simdfX_pack_to_words( i, e0, e1 ); + stbir__simdiX_store( output, i ); + encode += stbir__simdfX_float_count*2; + output += stbir__simdfX_float_count*2; + if ( output <= end_output ) + continue; + if ( output == ( end_output + stbir__simdfX_float_count*2 ) ) + break; + output = end_output; // backup and do last couple + encode = end_encode_m8; + } + return; + } + } + + // try to do blocks of 4 when you can + #if stbir__coder_min_num != 3 // doesn't divide cleanly by four + output += 4; + STBIR_NO_UNROLL_LOOP_START + while( output <= end_output ) + { + stbir__simdf e; + stbir__simdi i; + STBIR_NO_UNROLL(encode); + stbir__simdf_load( e, encode ); + stbir__simdf_madd( e, STBIR__CONSTF(STBIR_simd_point5), STBIR__CONSTF(STBIR_max_uint16_as_float), e ); + stbir__encode_simdf4_unflip( e ); + stbir__simdf_pack_to_8words( i, e, e ); // only use first 4 + stbir__simdi_store2( output-4, i ); + output += 4; + encode += 4; + } + output -= 4; + #endif + + // do the remnants + #if stbir__coder_min_num < 4 + STBIR_NO_UNROLL_LOOP_START + while( output < end_output ) + { + stbir__simdf e; + STBIR_NO_UNROLL(encode); + stbir__simdf_madd1_mem( e, STBIR__CONSTF(STBIR_simd_point5), STBIR__CONSTF(STBIR_max_uint16_as_float), encode+stbir__encode_order0 ); output[0] = stbir__simdf_convert_float_to_short( e ); + #if stbir__coder_min_num >= 2 + stbir__simdf_madd1_mem( e, STBIR__CONSTF(STBIR_simd_point5), STBIR__CONSTF(STBIR_max_uint16_as_float), encode+stbir__encode_order1 ); output[1] = stbir__simdf_convert_float_to_short( e ); + #endif + #if stbir__coder_min_num >= 3 + stbir__simdf_madd1_mem( e, STBIR__CONSTF(STBIR_simd_point5), STBIR__CONSTF(STBIR_max_uint16_as_float), encode+stbir__encode_order2 ); output[2] = stbir__simdf_convert_float_to_short( e ); + #endif + output += stbir__coder_min_num; + encode += stbir__coder_min_num; + } + #endif + + #else + + // try to do blocks of 4 when you can + #if stbir__coder_min_num != 3 // doesn't divide cleanly by four + output += 4; + STBIR_SIMD_NO_UNROLL_LOOP_START + while( output <= end_output ) + { + float f; + STBIR_SIMD_NO_UNROLL(encode); + f = encode[stbir__encode_order0] * stbir__max_uint16_as_float + 0.5f; STBIR_CLAMP(f, 0, 65535); output[0-4] = (unsigned short)f; + f = encode[stbir__encode_order1] * stbir__max_uint16_as_float + 0.5f; STBIR_CLAMP(f, 0, 65535); output[1-4] = (unsigned short)f; + f = encode[stbir__encode_order2] * stbir__max_uint16_as_float + 0.5f; STBIR_CLAMP(f, 0, 65535); output[2-4] = (unsigned short)f; + f = encode[stbir__encode_order3] * stbir__max_uint16_as_float + 0.5f; STBIR_CLAMP(f, 0, 65535); output[3-4] = (unsigned short)f; + output += 4; + encode += 4; + } + output -= 4; + #endif + + // do the remnants + #if stbir__coder_min_num < 4 + STBIR_NO_UNROLL_LOOP_START + while( output < end_output ) + { + float f; + STBIR_NO_UNROLL(encode); + f = encode[stbir__encode_order0] * stbir__max_uint16_as_float + 0.5f; STBIR_CLAMP(f, 0, 65535); output[0] = (unsigned short)f; + #if stbir__coder_min_num >= 2 + f = encode[stbir__encode_order1] * stbir__max_uint16_as_float + 0.5f; STBIR_CLAMP(f, 0, 65535); output[1] = (unsigned short)f; + #endif + #if stbir__coder_min_num >= 3 + f = encode[stbir__encode_order2] * stbir__max_uint16_as_float + 0.5f; STBIR_CLAMP(f, 0, 65535); output[2] = (unsigned short)f; + #endif + output += stbir__coder_min_num; + encode += stbir__coder_min_num; + } + #endif + #endif +} + +static float * STBIR__CODER_NAME(stbir__decode_uint16_linear)( float * decodep, int width_times_channels, void const * inputp ) +{ + float STBIR_STREAMOUT_PTR( * ) decode = decodep; + float * decode_end = (float*) decode + width_times_channels; + unsigned short const * input = (unsigned short const *)inputp; + + #ifdef STBIR_SIMD + unsigned short const * end_input_m8 = input + width_times_channels - 8; + if ( width_times_channels >= 8 ) + { + decode_end -= 8; + STBIR_NO_UNROLL_LOOP_START_INF_FOR + for(;;) + { + #ifdef STBIR_SIMD8 + stbir__simdi i; stbir__simdi8 o; + stbir__simdf8 of; + STBIR_NO_UNROLL(decode); + stbir__simdi_load( i, input ); + stbir__simdi8_expand_u16_to_u32( o, i ); + stbir__simdi8_convert_i32_to_float( of, o ); + stbir__decode_simdf8_flip( of ); + stbir__simdf8_store( decode + 0, of ); + #else + stbir__simdi i, o0, o1; + stbir__simdf of0, of1; + STBIR_NO_UNROLL(decode); + stbir__simdi_load( i, input ); + stbir__simdi_expand_u16_to_u32( o0, o1, i ); + stbir__simdi_convert_i32_to_float( of0, o0 ); + stbir__simdi_convert_i32_to_float( of1, o1 ); + stbir__decode_simdf4_flip( of0 ); + stbir__decode_simdf4_flip( of1 ); + stbir__simdf_store( decode + 0, of0 ); + stbir__simdf_store( decode + 4, of1 ); + #endif + decode += 8; + input += 8; + if ( decode <= decode_end ) + continue; + if ( decode == ( decode_end + 8 ) ) + break; + decode = decode_end; // backup and do last couple + input = end_input_m8; + } + return decode_end + 8; + } + #endif + + // try to do blocks of 4 when you can + #if stbir__coder_min_num != 3 // doesn't divide cleanly by four + decode += 4; + STBIR_SIMD_NO_UNROLL_LOOP_START + while( decode <= decode_end ) + { + STBIR_SIMD_NO_UNROLL(decode); + decode[0-4] = ((float)(input[stbir__decode_order0])); + decode[1-4] = ((float)(input[stbir__decode_order1])); + decode[2-4] = ((float)(input[stbir__decode_order2])); + decode[3-4] = ((float)(input[stbir__decode_order3])); + decode += 4; + input += 4; + } + decode -= 4; + #endif + + // do the remnants + #if stbir__coder_min_num < 4 + STBIR_NO_UNROLL_LOOP_START + while( decode < decode_end ) + { + STBIR_NO_UNROLL(decode); + decode[0] = ((float)(input[stbir__decode_order0])); + #if stbir__coder_min_num >= 2 + decode[1] = ((float)(input[stbir__decode_order1])); + #endif + #if stbir__coder_min_num >= 3 + decode[2] = ((float)(input[stbir__decode_order2])); + #endif + decode += stbir__coder_min_num; + input += stbir__coder_min_num; + } + #endif + return decode_end; +} + +static void STBIR__CODER_NAME(stbir__encode_uint16_linear)( void * outputp, int width_times_channels, float const * encode ) +{ + unsigned short STBIR_SIMD_STREAMOUT_PTR( * ) output = (unsigned short*) outputp; + unsigned short * end_output = ( (unsigned short*) output ) + width_times_channels; + + #ifdef STBIR_SIMD + { + if ( width_times_channels >= stbir__simdfX_float_count*2 ) + { + float const * end_encode_m8 = encode + width_times_channels - stbir__simdfX_float_count*2; + end_output -= stbir__simdfX_float_count*2; + STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR + for(;;) + { + stbir__simdfX e0, e1; + stbir__simdiX i; + STBIR_SIMD_NO_UNROLL(encode); + stbir__simdfX_add_mem( e0, STBIR_simd_point5X, encode ); + stbir__simdfX_add_mem( e1, STBIR_simd_point5X, encode+stbir__simdfX_float_count ); + stbir__encode_simdfX_unflip( e0 ); + stbir__encode_simdfX_unflip( e1 ); + stbir__simdfX_pack_to_words( i, e0, e1 ); + stbir__simdiX_store( output, i ); + encode += stbir__simdfX_float_count*2; + output += stbir__simdfX_float_count*2; + if ( output <= end_output ) + continue; + if ( output == ( end_output + stbir__simdfX_float_count*2 ) ) + break; + output = end_output; // backup and do last couple + encode = end_encode_m8; + } + return; + } + } + + // try to do blocks of 4 when you can + #if stbir__coder_min_num != 3 // doesn't divide cleanly by four + output += 4; + STBIR_NO_UNROLL_LOOP_START + while( output <= end_output ) + { + stbir__simdf e; + stbir__simdi i; + STBIR_NO_UNROLL(encode); + stbir__simdf_load( e, encode ); + stbir__simdf_add( e, STBIR__CONSTF(STBIR_simd_point5), e ); + stbir__encode_simdf4_unflip( e ); + stbir__simdf_pack_to_8words( i, e, e ); // only use first 4 + stbir__simdi_store2( output-4, i ); + output += 4; + encode += 4; + } + output -= 4; + #endif + + #else + + // try to do blocks of 4 when you can + #if stbir__coder_min_num != 3 // doesn't divide cleanly by four + output += 4; + STBIR_SIMD_NO_UNROLL_LOOP_START + while( output <= end_output ) + { + float f; + STBIR_SIMD_NO_UNROLL(encode); + f = encode[stbir__encode_order0] + 0.5f; STBIR_CLAMP(f, 0, 65535); output[0-4] = (unsigned short)f; + f = encode[stbir__encode_order1] + 0.5f; STBIR_CLAMP(f, 0, 65535); output[1-4] = (unsigned short)f; + f = encode[stbir__encode_order2] + 0.5f; STBIR_CLAMP(f, 0, 65535); output[2-4] = (unsigned short)f; + f = encode[stbir__encode_order3] + 0.5f; STBIR_CLAMP(f, 0, 65535); output[3-4] = (unsigned short)f; + output += 4; + encode += 4; + } + output -= 4; + #endif + + #endif + + // do the remnants + #if stbir__coder_min_num < 4 + STBIR_NO_UNROLL_LOOP_START + while( output < end_output ) + { + float f; + STBIR_NO_UNROLL(encode); + f = encode[stbir__encode_order0] + 0.5f; STBIR_CLAMP(f, 0, 65535); output[0] = (unsigned short)f; + #if stbir__coder_min_num >= 2 + f = encode[stbir__encode_order1] + 0.5f; STBIR_CLAMP(f, 0, 65535); output[1] = (unsigned short)f; + #endif + #if stbir__coder_min_num >= 3 + f = encode[stbir__encode_order2] + 0.5f; STBIR_CLAMP(f, 0, 65535); output[2] = (unsigned short)f; + #endif + output += stbir__coder_min_num; + encode += stbir__coder_min_num; + } + #endif +} + +static float * STBIR__CODER_NAME(stbir__decode_half_float_linear)( float * decodep, int width_times_channels, void const * inputp ) +{ + float STBIR_STREAMOUT_PTR( * ) decode = decodep; + float * decode_end = (float*) decode + width_times_channels; + stbir__FP16 const * input = (stbir__FP16 const *)inputp; + + #ifdef STBIR_SIMD + if ( width_times_channels >= 8 ) + { + stbir__FP16 const * end_input_m8 = input + width_times_channels - 8; + decode_end -= 8; + STBIR_NO_UNROLL_LOOP_START_INF_FOR + for(;;) + { + STBIR_NO_UNROLL(decode); + + stbir__half_to_float_SIMD( decode, input ); + #ifdef stbir__decode_swizzle + #ifdef STBIR_SIMD8 + { + stbir__simdf8 of; + stbir__simdf8_load( of, decode ); + stbir__decode_simdf8_flip( of ); + stbir__simdf8_store( decode, of ); + } + #else + { + stbir__simdf of0,of1; + stbir__simdf_load( of0, decode ); + stbir__simdf_load( of1, decode+4 ); + stbir__decode_simdf4_flip( of0 ); + stbir__decode_simdf4_flip( of1 ); + stbir__simdf_store( decode, of0 ); + stbir__simdf_store( decode+4, of1 ); + } + #endif + #endif + decode += 8; + input += 8; + if ( decode <= decode_end ) + continue; + if ( decode == ( decode_end + 8 ) ) + break; + decode = decode_end; // backup and do last couple + input = end_input_m8; + } + return decode_end + 8; + } + #endif + + // try to do blocks of 4 when you can + #if stbir__coder_min_num != 3 // doesn't divide cleanly by four + decode += 4; + STBIR_SIMD_NO_UNROLL_LOOP_START + while( decode <= decode_end ) + { + STBIR_SIMD_NO_UNROLL(decode); + decode[0-4] = stbir__half_to_float(input[stbir__decode_order0]); + decode[1-4] = stbir__half_to_float(input[stbir__decode_order1]); + decode[2-4] = stbir__half_to_float(input[stbir__decode_order2]); + decode[3-4] = stbir__half_to_float(input[stbir__decode_order3]); + decode += 4; + input += 4; + } + decode -= 4; + #endif + + // do the remnants + #if stbir__coder_min_num < 4 + STBIR_NO_UNROLL_LOOP_START + while( decode < decode_end ) + { + STBIR_NO_UNROLL(decode); + decode[0] = stbir__half_to_float(input[stbir__decode_order0]); + #if stbir__coder_min_num >= 2 + decode[1] = stbir__half_to_float(input[stbir__decode_order1]); + #endif + #if stbir__coder_min_num >= 3 + decode[2] = stbir__half_to_float(input[stbir__decode_order2]); + #endif + decode += stbir__coder_min_num; + input += stbir__coder_min_num; + } + #endif + return decode_end; +} + +static void STBIR__CODER_NAME( stbir__encode_half_float_linear )( void * outputp, int width_times_channels, float const * encode ) +{ + stbir__FP16 STBIR_SIMD_STREAMOUT_PTR( * ) output = (stbir__FP16*) outputp; + stbir__FP16 * end_output = ( (stbir__FP16*) output ) + width_times_channels; + + #ifdef STBIR_SIMD + if ( width_times_channels >= 8 ) + { + float const * end_encode_m8 = encode + width_times_channels - 8; + end_output -= 8; + STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR + for(;;) + { + STBIR_SIMD_NO_UNROLL(encode); + #ifdef stbir__decode_swizzle + #ifdef STBIR_SIMD8 + { + stbir__simdf8 of; + stbir__simdf8_load( of, encode ); + stbir__encode_simdf8_unflip( of ); + stbir__float_to_half_SIMD( output, (float*)&of ); + } + #else + { + stbir__simdf of[2]; + stbir__simdf_load( of[0], encode ); + stbir__simdf_load( of[1], encode+4 ); + stbir__encode_simdf4_unflip( of[0] ); + stbir__encode_simdf4_unflip( of[1] ); + stbir__float_to_half_SIMD( output, (float*)of ); + } + #endif + #else + stbir__float_to_half_SIMD( output, encode ); + #endif + encode += 8; + output += 8; + if ( output <= end_output ) + continue; + if ( output == ( end_output + 8 ) ) + break; + output = end_output; // backup and do last couple + encode = end_encode_m8; + } + return; + } + #endif + + // try to do blocks of 4 when you can + #if stbir__coder_min_num != 3 // doesn't divide cleanly by four + output += 4; + STBIR_SIMD_NO_UNROLL_LOOP_START + while( output <= end_output ) + { + STBIR_SIMD_NO_UNROLL(output); + output[0-4] = stbir__float_to_half(encode[stbir__encode_order0]); + output[1-4] = stbir__float_to_half(encode[stbir__encode_order1]); + output[2-4] = stbir__float_to_half(encode[stbir__encode_order2]); + output[3-4] = stbir__float_to_half(encode[stbir__encode_order3]); + output += 4; + encode += 4; + } + output -= 4; + #endif + + // do the remnants + #if stbir__coder_min_num < 4 + STBIR_NO_UNROLL_LOOP_START + while( output < end_output ) + { + STBIR_NO_UNROLL(output); + output[0] = stbir__float_to_half(encode[stbir__encode_order0]); + #if stbir__coder_min_num >= 2 + output[1] = stbir__float_to_half(encode[stbir__encode_order1]); + #endif + #if stbir__coder_min_num >= 3 + output[2] = stbir__float_to_half(encode[stbir__encode_order2]); + #endif + output += stbir__coder_min_num; + encode += stbir__coder_min_num; + } + #endif +} + +static float * STBIR__CODER_NAME(stbir__decode_float_linear)( float * decodep, int width_times_channels, void const * inputp ) +{ + #ifdef stbir__decode_swizzle + float STBIR_STREAMOUT_PTR( * ) decode = decodep; + float * decode_end = (float*) decode + width_times_channels; + float const * input = (float const *)inputp; + + #ifdef STBIR_SIMD + if ( width_times_channels >= 16 ) + { + float const * end_input_m16 = input + width_times_channels - 16; + decode_end -= 16; + STBIR_NO_UNROLL_LOOP_START_INF_FOR + for(;;) + { + STBIR_NO_UNROLL(decode); + #ifdef stbir__decode_swizzle + #ifdef STBIR_SIMD8 + { + stbir__simdf8 of0,of1; + stbir__simdf8_load( of0, input ); + stbir__simdf8_load( of1, input+8 ); + stbir__decode_simdf8_flip( of0 ); + stbir__decode_simdf8_flip( of1 ); + stbir__simdf8_store( decode, of0 ); + stbir__simdf8_store( decode+8, of1 ); + } + #else + { + stbir__simdf of0,of1,of2,of3; + stbir__simdf_load( of0, input ); + stbir__simdf_load( of1, input+4 ); + stbir__simdf_load( of2, input+8 ); + stbir__simdf_load( of3, input+12 ); + stbir__decode_simdf4_flip( of0 ); + stbir__decode_simdf4_flip( of1 ); + stbir__decode_simdf4_flip( of2 ); + stbir__decode_simdf4_flip( of3 ); + stbir__simdf_store( decode, of0 ); + stbir__simdf_store( decode+4, of1 ); + stbir__simdf_store( decode+8, of2 ); + stbir__simdf_store( decode+12, of3 ); + } + #endif + #endif + decode += 16; + input += 16; + if ( decode <= decode_end ) + continue; + if ( decode == ( decode_end + 16 ) ) + break; + decode = decode_end; // backup and do last couple + input = end_input_m16; + } + return decode_end + 16; + } + #endif + + // try to do blocks of 4 when you can + #if stbir__coder_min_num != 3 // doesn't divide cleanly by four + decode += 4; + STBIR_SIMD_NO_UNROLL_LOOP_START + while( decode <= decode_end ) + { + STBIR_SIMD_NO_UNROLL(decode); + decode[0-4] = input[stbir__decode_order0]; + decode[1-4] = input[stbir__decode_order1]; + decode[2-4] = input[stbir__decode_order2]; + decode[3-4] = input[stbir__decode_order3]; + decode += 4; + input += 4; + } + decode -= 4; + #endif + + // do the remnants + #if stbir__coder_min_num < 4 + STBIR_NO_UNROLL_LOOP_START + while( decode < decode_end ) + { + STBIR_NO_UNROLL(decode); + decode[0] = input[stbir__decode_order0]; + #if stbir__coder_min_num >= 2 + decode[1] = input[stbir__decode_order1]; + #endif + #if stbir__coder_min_num >= 3 + decode[2] = input[stbir__decode_order2]; + #endif + decode += stbir__coder_min_num; + input += stbir__coder_min_num; + } + #endif + return decode_end; + + #else + + if ( (void*)decodep != inputp ) + STBIR_MEMCPY( decodep, inputp, width_times_channels * sizeof( float ) ); + + return decodep + width_times_channels; + + #endif +} + +static void STBIR__CODER_NAME( stbir__encode_float_linear )( void * outputp, int width_times_channels, float const * encode ) +{ + #if !defined( STBIR_FLOAT_HIGH_CLAMP ) && !defined(STBIR_FLOAT_LO_CLAMP) && !defined(stbir__decode_swizzle) + + if ( (void*)outputp != (void*) encode ) + STBIR_MEMCPY( outputp, encode, width_times_channels * sizeof( float ) ); + + #else + + float STBIR_SIMD_STREAMOUT_PTR( * ) output = (float*) outputp; + float * end_output = ( (float*) output ) + width_times_channels; + + #ifdef STBIR_FLOAT_HIGH_CLAMP + #define stbir_scalar_hi_clamp( v ) if ( v > STBIR_FLOAT_HIGH_CLAMP ) v = STBIR_FLOAT_HIGH_CLAMP; + #else + #define stbir_scalar_hi_clamp( v ) + #endif + #ifdef STBIR_FLOAT_LOW_CLAMP + #define stbir_scalar_lo_clamp( v ) if ( v < STBIR_FLOAT_LOW_CLAMP ) v = STBIR_FLOAT_LOW_CLAMP; + #else + #define stbir_scalar_lo_clamp( v ) + #endif + + #ifdef STBIR_SIMD + + #ifdef STBIR_FLOAT_HIGH_CLAMP + const stbir__simdfX high_clamp = stbir__simdf_frepX(STBIR_FLOAT_HIGH_CLAMP); + #endif + #ifdef STBIR_FLOAT_LOW_CLAMP + const stbir__simdfX low_clamp = stbir__simdf_frepX(STBIR_FLOAT_LOW_CLAMP); + #endif + + if ( width_times_channels >= ( stbir__simdfX_float_count * 2 ) ) + { + float const * end_encode_m8 = encode + width_times_channels - ( stbir__simdfX_float_count * 2 ); + end_output -= ( stbir__simdfX_float_count * 2 ); + STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR + for(;;) + { + stbir__simdfX e0, e1; + STBIR_SIMD_NO_UNROLL(encode); + stbir__simdfX_load( e0, encode ); + stbir__simdfX_load( e1, encode+stbir__simdfX_float_count ); +#ifdef STBIR_FLOAT_HIGH_CLAMP + stbir__simdfX_min( e0, e0, high_clamp ); + stbir__simdfX_min( e1, e1, high_clamp ); +#endif +#ifdef STBIR_FLOAT_LOW_CLAMP + stbir__simdfX_max( e0, e0, low_clamp ); + stbir__simdfX_max( e1, e1, low_clamp ); +#endif + stbir__encode_simdfX_unflip( e0 ); + stbir__encode_simdfX_unflip( e1 ); + stbir__simdfX_store( output, e0 ); + stbir__simdfX_store( output+stbir__simdfX_float_count, e1 ); + encode += stbir__simdfX_float_count * 2; + output += stbir__simdfX_float_count * 2; + if ( output < end_output ) + continue; + if ( output == ( end_output + ( stbir__simdfX_float_count * 2 ) ) ) + break; + output = end_output; // backup and do last couple + encode = end_encode_m8; + } + return; + } + + // try to do blocks of 4 when you can + #if stbir__coder_min_num != 3 // doesn't divide cleanly by four + output += 4; + STBIR_NO_UNROLL_LOOP_START + while( output <= end_output ) + { + stbir__simdf e0; + STBIR_NO_UNROLL(encode); + stbir__simdf_load( e0, encode ); +#ifdef STBIR_FLOAT_HIGH_CLAMP + stbir__simdf_min( e0, e0, high_clamp ); +#endif +#ifdef STBIR_FLOAT_LOW_CLAMP + stbir__simdf_max( e0, e0, low_clamp ); +#endif + stbir__encode_simdf4_unflip( e0 ); + stbir__simdf_store( output-4, e0 ); + output += 4; + encode += 4; + } + output -= 4; + #endif + + #else + + // try to do blocks of 4 when you can + #if stbir__coder_min_num != 3 // doesn't divide cleanly by four + output += 4; + STBIR_SIMD_NO_UNROLL_LOOP_START + while( output <= end_output ) + { + float e; + STBIR_SIMD_NO_UNROLL(encode); + e = encode[ stbir__encode_order0 ]; stbir_scalar_hi_clamp( e ); stbir_scalar_lo_clamp( e ); output[0-4] = e; + e = encode[ stbir__encode_order1 ]; stbir_scalar_hi_clamp( e ); stbir_scalar_lo_clamp( e ); output[1-4] = e; + e = encode[ stbir__encode_order2 ]; stbir_scalar_hi_clamp( e ); stbir_scalar_lo_clamp( e ); output[2-4] = e; + e = encode[ stbir__encode_order3 ]; stbir_scalar_hi_clamp( e ); stbir_scalar_lo_clamp( e ); output[3-4] = e; + output += 4; + encode += 4; + } + output -= 4; + + #endif + + #endif + + // do the remnants + #if stbir__coder_min_num < 4 + STBIR_NO_UNROLL_LOOP_START + while( output < end_output ) + { + float e; + STBIR_NO_UNROLL(encode); + e = encode[ stbir__encode_order0 ]; stbir_scalar_hi_clamp( e ); stbir_scalar_lo_clamp( e ); output[0] = e; + #if stbir__coder_min_num >= 2 + e = encode[ stbir__encode_order1 ]; stbir_scalar_hi_clamp( e ); stbir_scalar_lo_clamp( e ); output[1] = e; + #endif + #if stbir__coder_min_num >= 3 + e = encode[ stbir__encode_order2 ]; stbir_scalar_hi_clamp( e ); stbir_scalar_lo_clamp( e ); output[2] = e; + #endif + output += stbir__coder_min_num; + encode += stbir__coder_min_num; + } + #endif + + #endif +} + +#undef stbir__decode_suffix +#undef stbir__decode_simdf8_flip +#undef stbir__decode_simdf4_flip +#undef stbir__decode_order0 +#undef stbir__decode_order1 +#undef stbir__decode_order2 +#undef stbir__decode_order3 +#undef stbir__encode_order0 +#undef stbir__encode_order1 +#undef stbir__encode_order2 +#undef stbir__encode_order3 +#undef stbir__encode_simdf8_unflip +#undef stbir__encode_simdf4_unflip +#undef stbir__encode_simdfX_unflip +#undef STBIR__CODER_NAME +#undef stbir__coder_min_num +#undef stbir__decode_swizzle +#undef stbir_scalar_hi_clamp +#undef stbir_scalar_lo_clamp +#undef STB_IMAGE_RESIZE_DO_CODERS + +#elif defined( STB_IMAGE_RESIZE_DO_VERTICALS) + +#ifdef STB_IMAGE_RESIZE_VERTICAL_CONTINUE +#define STBIR_chans( start, end ) STBIR_strs_join14(start,STBIR__vertical_channels,end,_cont) +#else +#define STBIR_chans( start, end ) STBIR_strs_join1(start,STBIR__vertical_channels,end) +#endif + +#if STBIR__vertical_channels >= 1 +#define stbIF0( code ) code +#else +#define stbIF0( code ) +#endif +#if STBIR__vertical_channels >= 2 +#define stbIF1( code ) code +#else +#define stbIF1( code ) +#endif +#if STBIR__vertical_channels >= 3 +#define stbIF2( code ) code +#else +#define stbIF2( code ) +#endif +#if STBIR__vertical_channels >= 4 +#define stbIF3( code ) code +#else +#define stbIF3( code ) +#endif +#if STBIR__vertical_channels >= 5 +#define stbIF4( code ) code +#else +#define stbIF4( code ) +#endif +#if STBIR__vertical_channels >= 6 +#define stbIF5( code ) code +#else +#define stbIF5( code ) +#endif +#if STBIR__vertical_channels >= 7 +#define stbIF6( code ) code +#else +#define stbIF6( code ) +#endif +#if STBIR__vertical_channels >= 8 +#define stbIF7( code ) code +#else +#define stbIF7( code ) +#endif + +static void STBIR_chans( stbir__vertical_scatter_with_,_coeffs)( float ** outputs, float const * vertical_coefficients, float const * input, float const * input_end ) +{ + stbIF0( float STBIR_SIMD_STREAMOUT_PTR( * ) output0 = outputs[0]; float c0s = vertical_coefficients[0]; ) + stbIF1( float STBIR_SIMD_STREAMOUT_PTR( * ) output1 = outputs[1]; float c1s = vertical_coefficients[1]; ) + stbIF2( float STBIR_SIMD_STREAMOUT_PTR( * ) output2 = outputs[2]; float c2s = vertical_coefficients[2]; ) + stbIF3( float STBIR_SIMD_STREAMOUT_PTR( * ) output3 = outputs[3]; float c3s = vertical_coefficients[3]; ) + stbIF4( float STBIR_SIMD_STREAMOUT_PTR( * ) output4 = outputs[4]; float c4s = vertical_coefficients[4]; ) + stbIF5( float STBIR_SIMD_STREAMOUT_PTR( * ) output5 = outputs[5]; float c5s = vertical_coefficients[5]; ) + stbIF6( float STBIR_SIMD_STREAMOUT_PTR( * ) output6 = outputs[6]; float c6s = vertical_coefficients[6]; ) + stbIF7( float STBIR_SIMD_STREAMOUT_PTR( * ) output7 = outputs[7]; float c7s = vertical_coefficients[7]; ) + + #ifdef STBIR_SIMD + { + stbIF0(stbir__simdfX c0 = stbir__simdf_frepX( c0s ); ) + stbIF1(stbir__simdfX c1 = stbir__simdf_frepX( c1s ); ) + stbIF2(stbir__simdfX c2 = stbir__simdf_frepX( c2s ); ) + stbIF3(stbir__simdfX c3 = stbir__simdf_frepX( c3s ); ) + stbIF4(stbir__simdfX c4 = stbir__simdf_frepX( c4s ); ) + stbIF5(stbir__simdfX c5 = stbir__simdf_frepX( c5s ); ) + stbIF6(stbir__simdfX c6 = stbir__simdf_frepX( c6s ); ) + stbIF7(stbir__simdfX c7 = stbir__simdf_frepX( c7s ); ) + STBIR_SIMD_NO_UNROLL_LOOP_START + while ( ( (char*)input_end - (char*) input ) >= (16*stbir__simdfX_float_count) ) + { + stbir__simdfX o0, o1, o2, o3, r0, r1, r2, r3; + STBIR_SIMD_NO_UNROLL(output0); + + stbir__simdfX_load( r0, input ); stbir__simdfX_load( r1, input+stbir__simdfX_float_count ); stbir__simdfX_load( r2, input+(2*stbir__simdfX_float_count) ); stbir__simdfX_load( r3, input+(3*stbir__simdfX_float_count) ); + + #ifdef STB_IMAGE_RESIZE_VERTICAL_CONTINUE + stbIF0( stbir__simdfX_load( o0, output0 ); stbir__simdfX_load( o1, output0+stbir__simdfX_float_count ); stbir__simdfX_load( o2, output0+(2*stbir__simdfX_float_count) ); stbir__simdfX_load( o3, output0+(3*stbir__simdfX_float_count) ); + stbir__simdfX_madd( o0, o0, r0, c0 ); stbir__simdfX_madd( o1, o1, r1, c0 ); stbir__simdfX_madd( o2, o2, r2, c0 ); stbir__simdfX_madd( o3, o3, r3, c0 ); + stbir__simdfX_store( output0, o0 ); stbir__simdfX_store( output0+stbir__simdfX_float_count, o1 ); stbir__simdfX_store( output0+(2*stbir__simdfX_float_count), o2 ); stbir__simdfX_store( output0+(3*stbir__simdfX_float_count), o3 ); ) + stbIF1( stbir__simdfX_load( o0, output1 ); stbir__simdfX_load( o1, output1+stbir__simdfX_float_count ); stbir__simdfX_load( o2, output1+(2*stbir__simdfX_float_count) ); stbir__simdfX_load( o3, output1+(3*stbir__simdfX_float_count) ); + stbir__simdfX_madd( o0, o0, r0, c1 ); stbir__simdfX_madd( o1, o1, r1, c1 ); stbir__simdfX_madd( o2, o2, r2, c1 ); stbir__simdfX_madd( o3, o3, r3, c1 ); + stbir__simdfX_store( output1, o0 ); stbir__simdfX_store( output1+stbir__simdfX_float_count, o1 ); stbir__simdfX_store( output1+(2*stbir__simdfX_float_count), o2 ); stbir__simdfX_store( output1+(3*stbir__simdfX_float_count), o3 ); ) + stbIF2( stbir__simdfX_load( o0, output2 ); stbir__simdfX_load( o1, output2+stbir__simdfX_float_count ); stbir__simdfX_load( o2, output2+(2*stbir__simdfX_float_count) ); stbir__simdfX_load( o3, output2+(3*stbir__simdfX_float_count) ); + stbir__simdfX_madd( o0, o0, r0, c2 ); stbir__simdfX_madd( o1, o1, r1, c2 ); stbir__simdfX_madd( o2, o2, r2, c2 ); stbir__simdfX_madd( o3, o3, r3, c2 ); + stbir__simdfX_store( output2, o0 ); stbir__simdfX_store( output2+stbir__simdfX_float_count, o1 ); stbir__simdfX_store( output2+(2*stbir__simdfX_float_count), o2 ); stbir__simdfX_store( output2+(3*stbir__simdfX_float_count), o3 ); ) + stbIF3( stbir__simdfX_load( o0, output3 ); stbir__simdfX_load( o1, output3+stbir__simdfX_float_count ); stbir__simdfX_load( o2, output3+(2*stbir__simdfX_float_count) ); stbir__simdfX_load( o3, output3+(3*stbir__simdfX_float_count) ); + stbir__simdfX_madd( o0, o0, r0, c3 ); stbir__simdfX_madd( o1, o1, r1, c3 ); stbir__simdfX_madd( o2, o2, r2, c3 ); stbir__simdfX_madd( o3, o3, r3, c3 ); + stbir__simdfX_store( output3, o0 ); stbir__simdfX_store( output3+stbir__simdfX_float_count, o1 ); stbir__simdfX_store( output3+(2*stbir__simdfX_float_count), o2 ); stbir__simdfX_store( output3+(3*stbir__simdfX_float_count), o3 ); ) + stbIF4( stbir__simdfX_load( o0, output4 ); stbir__simdfX_load( o1, output4+stbir__simdfX_float_count ); stbir__simdfX_load( o2, output4+(2*stbir__simdfX_float_count) ); stbir__simdfX_load( o3, output4+(3*stbir__simdfX_float_count) ); + stbir__simdfX_madd( o0, o0, r0, c4 ); stbir__simdfX_madd( o1, o1, r1, c4 ); stbir__simdfX_madd( o2, o2, r2, c4 ); stbir__simdfX_madd( o3, o3, r3, c4 ); + stbir__simdfX_store( output4, o0 ); stbir__simdfX_store( output4+stbir__simdfX_float_count, o1 ); stbir__simdfX_store( output4+(2*stbir__simdfX_float_count), o2 ); stbir__simdfX_store( output4+(3*stbir__simdfX_float_count), o3 ); ) + stbIF5( stbir__simdfX_load( o0, output5 ); stbir__simdfX_load( o1, output5+stbir__simdfX_float_count ); stbir__simdfX_load( o2, output5+(2*stbir__simdfX_float_count)); stbir__simdfX_load( o3, output5+(3*stbir__simdfX_float_count) ); + stbir__simdfX_madd( o0, o0, r0, c5 ); stbir__simdfX_madd( o1, o1, r1, c5 ); stbir__simdfX_madd( o2, o2, r2, c5 ); stbir__simdfX_madd( o3, o3, r3, c5 ); + stbir__simdfX_store( output5, o0 ); stbir__simdfX_store( output5+stbir__simdfX_float_count, o1 ); stbir__simdfX_store( output5+(2*stbir__simdfX_float_count), o2 ); stbir__simdfX_store( output5+(3*stbir__simdfX_float_count), o3 ); ) + stbIF6( stbir__simdfX_load( o0, output6 ); stbir__simdfX_load( o1, output6+stbir__simdfX_float_count ); stbir__simdfX_load( o2, output6+(2*stbir__simdfX_float_count) ); stbir__simdfX_load( o3, output6+(3*stbir__simdfX_float_count) ); + stbir__simdfX_madd( o0, o0, r0, c6 ); stbir__simdfX_madd( o1, o1, r1, c6 ); stbir__simdfX_madd( o2, o2, r2, c6 ); stbir__simdfX_madd( o3, o3, r3, c6 ); + stbir__simdfX_store( output6, o0 ); stbir__simdfX_store( output6+stbir__simdfX_float_count, o1 ); stbir__simdfX_store( output6+(2*stbir__simdfX_float_count), o2 ); stbir__simdfX_store( output6+(3*stbir__simdfX_float_count), o3 ); ) + stbIF7( stbir__simdfX_load( o0, output7 ); stbir__simdfX_load( o1, output7+stbir__simdfX_float_count ); stbir__simdfX_load( o2, output7+(2*stbir__simdfX_float_count) ); stbir__simdfX_load( o3, output7+(3*stbir__simdfX_float_count) ); + stbir__simdfX_madd( o0, o0, r0, c7 ); stbir__simdfX_madd( o1, o1, r1, c7 ); stbir__simdfX_madd( o2, o2, r2, c7 ); stbir__simdfX_madd( o3, o3, r3, c7 ); + stbir__simdfX_store( output7, o0 ); stbir__simdfX_store( output7+stbir__simdfX_float_count, o1 ); stbir__simdfX_store( output7+(2*stbir__simdfX_float_count), o2 ); stbir__simdfX_store( output7+(3*stbir__simdfX_float_count), o3 ); ) + #else + stbIF0( stbir__simdfX_mult( o0, r0, c0 ); stbir__simdfX_mult( o1, r1, c0 ); stbir__simdfX_mult( o2, r2, c0 ); stbir__simdfX_mult( o3, r3, c0 ); + stbir__simdfX_store( output0, o0 ); stbir__simdfX_store( output0+stbir__simdfX_float_count, o1 ); stbir__simdfX_store( output0+(2*stbir__simdfX_float_count), o2 ); stbir__simdfX_store( output0+(3*stbir__simdfX_float_count), o3 ); ) + stbIF1( stbir__simdfX_mult( o0, r0, c1 ); stbir__simdfX_mult( o1, r1, c1 ); stbir__simdfX_mult( o2, r2, c1 ); stbir__simdfX_mult( o3, r3, c1 ); + stbir__simdfX_store( output1, o0 ); stbir__simdfX_store( output1+stbir__simdfX_float_count, o1 ); stbir__simdfX_store( output1+(2*stbir__simdfX_float_count), o2 ); stbir__simdfX_store( output1+(3*stbir__simdfX_float_count), o3 ); ) + stbIF2( stbir__simdfX_mult( o0, r0, c2 ); stbir__simdfX_mult( o1, r1, c2 ); stbir__simdfX_mult( o2, r2, c2 ); stbir__simdfX_mult( o3, r3, c2 ); + stbir__simdfX_store( output2, o0 ); stbir__simdfX_store( output2+stbir__simdfX_float_count, o1 ); stbir__simdfX_store( output2+(2*stbir__simdfX_float_count), o2 ); stbir__simdfX_store( output2+(3*stbir__simdfX_float_count), o3 ); ) + stbIF3( stbir__simdfX_mult( o0, r0, c3 ); stbir__simdfX_mult( o1, r1, c3 ); stbir__simdfX_mult( o2, r2, c3 ); stbir__simdfX_mult( o3, r3, c3 ); + stbir__simdfX_store( output3, o0 ); stbir__simdfX_store( output3+stbir__simdfX_float_count, o1 ); stbir__simdfX_store( output3+(2*stbir__simdfX_float_count), o2 ); stbir__simdfX_store( output3+(3*stbir__simdfX_float_count), o3 ); ) + stbIF4( stbir__simdfX_mult( o0, r0, c4 ); stbir__simdfX_mult( o1, r1, c4 ); stbir__simdfX_mult( o2, r2, c4 ); stbir__simdfX_mult( o3, r3, c4 ); + stbir__simdfX_store( output4, o0 ); stbir__simdfX_store( output4+stbir__simdfX_float_count, o1 ); stbir__simdfX_store( output4+(2*stbir__simdfX_float_count), o2 ); stbir__simdfX_store( output4+(3*stbir__simdfX_float_count), o3 ); ) + stbIF5( stbir__simdfX_mult( o0, r0, c5 ); stbir__simdfX_mult( o1, r1, c5 ); stbir__simdfX_mult( o2, r2, c5 ); stbir__simdfX_mult( o3, r3, c5 ); + stbir__simdfX_store( output5, o0 ); stbir__simdfX_store( output5+stbir__simdfX_float_count, o1 ); stbir__simdfX_store( output5+(2*stbir__simdfX_float_count), o2 ); stbir__simdfX_store( output5+(3*stbir__simdfX_float_count), o3 ); ) + stbIF6( stbir__simdfX_mult( o0, r0, c6 ); stbir__simdfX_mult( o1, r1, c6 ); stbir__simdfX_mult( o2, r2, c6 ); stbir__simdfX_mult( o3, r3, c6 ); + stbir__simdfX_store( output6, o0 ); stbir__simdfX_store( output6+stbir__simdfX_float_count, o1 ); stbir__simdfX_store( output6+(2*stbir__simdfX_float_count), o2 ); stbir__simdfX_store( output6+(3*stbir__simdfX_float_count), o3 ); ) + stbIF7( stbir__simdfX_mult( o0, r0, c7 ); stbir__simdfX_mult( o1, r1, c7 ); stbir__simdfX_mult( o2, r2, c7 ); stbir__simdfX_mult( o3, r3, c7 ); + stbir__simdfX_store( output7, o0 ); stbir__simdfX_store( output7+stbir__simdfX_float_count, o1 ); stbir__simdfX_store( output7+(2*stbir__simdfX_float_count), o2 ); stbir__simdfX_store( output7+(3*stbir__simdfX_float_count), o3 ); ) + #endif + + input += (4*stbir__simdfX_float_count); + stbIF0( output0 += (4*stbir__simdfX_float_count); ) stbIF1( output1 += (4*stbir__simdfX_float_count); ) stbIF2( output2 += (4*stbir__simdfX_float_count); ) stbIF3( output3 += (4*stbir__simdfX_float_count); ) stbIF4( output4 += (4*stbir__simdfX_float_count); ) stbIF5( output5 += (4*stbir__simdfX_float_count); ) stbIF6( output6 += (4*stbir__simdfX_float_count); ) stbIF7( output7 += (4*stbir__simdfX_float_count); ) + } + STBIR_SIMD_NO_UNROLL_LOOP_START + while ( ( (char*)input_end - (char*) input ) >= 16 ) + { + stbir__simdf o0, r0; + STBIR_SIMD_NO_UNROLL(output0); + + stbir__simdf_load( r0, input ); + + #ifdef STB_IMAGE_RESIZE_VERTICAL_CONTINUE + stbIF0( stbir__simdf_load( o0, output0 ); stbir__simdf_madd( o0, o0, r0, stbir__if_simdf8_cast_to_simdf4( c0 ) ); stbir__simdf_store( output0, o0 ); ) + stbIF1( stbir__simdf_load( o0, output1 ); stbir__simdf_madd( o0, o0, r0, stbir__if_simdf8_cast_to_simdf4( c1 ) ); stbir__simdf_store( output1, o0 ); ) + stbIF2( stbir__simdf_load( o0, output2 ); stbir__simdf_madd( o0, o0, r0, stbir__if_simdf8_cast_to_simdf4( c2 ) ); stbir__simdf_store( output2, o0 ); ) + stbIF3( stbir__simdf_load( o0, output3 ); stbir__simdf_madd( o0, o0, r0, stbir__if_simdf8_cast_to_simdf4( c3 ) ); stbir__simdf_store( output3, o0 ); ) + stbIF4( stbir__simdf_load( o0, output4 ); stbir__simdf_madd( o0, o0, r0, stbir__if_simdf8_cast_to_simdf4( c4 ) ); stbir__simdf_store( output4, o0 ); ) + stbIF5( stbir__simdf_load( o0, output5 ); stbir__simdf_madd( o0, o0, r0, stbir__if_simdf8_cast_to_simdf4( c5 ) ); stbir__simdf_store( output5, o0 ); ) + stbIF6( stbir__simdf_load( o0, output6 ); stbir__simdf_madd( o0, o0, r0, stbir__if_simdf8_cast_to_simdf4( c6 ) ); stbir__simdf_store( output6, o0 ); ) + stbIF7( stbir__simdf_load( o0, output7 ); stbir__simdf_madd( o0, o0, r0, stbir__if_simdf8_cast_to_simdf4( c7 ) ); stbir__simdf_store( output7, o0 ); ) + #else + stbIF0( stbir__simdf_mult( o0, r0, stbir__if_simdf8_cast_to_simdf4( c0 ) ); stbir__simdf_store( output0, o0 ); ) + stbIF1( stbir__simdf_mult( o0, r0, stbir__if_simdf8_cast_to_simdf4( c1 ) ); stbir__simdf_store( output1, o0 ); ) + stbIF2( stbir__simdf_mult( o0, r0, stbir__if_simdf8_cast_to_simdf4( c2 ) ); stbir__simdf_store( output2, o0 ); ) + stbIF3( stbir__simdf_mult( o0, r0, stbir__if_simdf8_cast_to_simdf4( c3 ) ); stbir__simdf_store( output3, o0 ); ) + stbIF4( stbir__simdf_mult( o0, r0, stbir__if_simdf8_cast_to_simdf4( c4 ) ); stbir__simdf_store( output4, o0 ); ) + stbIF5( stbir__simdf_mult( o0, r0, stbir__if_simdf8_cast_to_simdf4( c5 ) ); stbir__simdf_store( output5, o0 ); ) + stbIF6( stbir__simdf_mult( o0, r0, stbir__if_simdf8_cast_to_simdf4( c6 ) ); stbir__simdf_store( output6, o0 ); ) + stbIF7( stbir__simdf_mult( o0, r0, stbir__if_simdf8_cast_to_simdf4( c7 ) ); stbir__simdf_store( output7, o0 ); ) + #endif + + input += 4; + stbIF0( output0 += 4; ) stbIF1( output1 += 4; ) stbIF2( output2 += 4; ) stbIF3( output3 += 4; ) stbIF4( output4 += 4; ) stbIF5( output5 += 4; ) stbIF6( output6 += 4; ) stbIF7( output7 += 4; ) + } + } + #else + STBIR_NO_UNROLL_LOOP_START + while ( ( (char*)input_end - (char*) input ) >= 16 ) + { + float r0, r1, r2, r3; + STBIR_NO_UNROLL(input); + + r0 = input[0], r1 = input[1], r2 = input[2], r3 = input[3]; + + #ifdef STB_IMAGE_RESIZE_VERTICAL_CONTINUE + stbIF0( output0[0] += ( r0 * c0s ); output0[1] += ( r1 * c0s ); output0[2] += ( r2 * c0s ); output0[3] += ( r3 * c0s ); ) + stbIF1( output1[0] += ( r0 * c1s ); output1[1] += ( r1 * c1s ); output1[2] += ( r2 * c1s ); output1[3] += ( r3 * c1s ); ) + stbIF2( output2[0] += ( r0 * c2s ); output2[1] += ( r1 * c2s ); output2[2] += ( r2 * c2s ); output2[3] += ( r3 * c2s ); ) + stbIF3( output3[0] += ( r0 * c3s ); output3[1] += ( r1 * c3s ); output3[2] += ( r2 * c3s ); output3[3] += ( r3 * c3s ); ) + stbIF4( output4[0] += ( r0 * c4s ); output4[1] += ( r1 * c4s ); output4[2] += ( r2 * c4s ); output4[3] += ( r3 * c4s ); ) + stbIF5( output5[0] += ( r0 * c5s ); output5[1] += ( r1 * c5s ); output5[2] += ( r2 * c5s ); output5[3] += ( r3 * c5s ); ) + stbIF6( output6[0] += ( r0 * c6s ); output6[1] += ( r1 * c6s ); output6[2] += ( r2 * c6s ); output6[3] += ( r3 * c6s ); ) + stbIF7( output7[0] += ( r0 * c7s ); output7[1] += ( r1 * c7s ); output7[2] += ( r2 * c7s ); output7[3] += ( r3 * c7s ); ) + #else + stbIF0( output0[0] = ( r0 * c0s ); output0[1] = ( r1 * c0s ); output0[2] = ( r2 * c0s ); output0[3] = ( r3 * c0s ); ) + stbIF1( output1[0] = ( r0 * c1s ); output1[1] = ( r1 * c1s ); output1[2] = ( r2 * c1s ); output1[3] = ( r3 * c1s ); ) + stbIF2( output2[0] = ( r0 * c2s ); output2[1] = ( r1 * c2s ); output2[2] = ( r2 * c2s ); output2[3] = ( r3 * c2s ); ) + stbIF3( output3[0] = ( r0 * c3s ); output3[1] = ( r1 * c3s ); output3[2] = ( r2 * c3s ); output3[3] = ( r3 * c3s ); ) + stbIF4( output4[0] = ( r0 * c4s ); output4[1] = ( r1 * c4s ); output4[2] = ( r2 * c4s ); output4[3] = ( r3 * c4s ); ) + stbIF5( output5[0] = ( r0 * c5s ); output5[1] = ( r1 * c5s ); output5[2] = ( r2 * c5s ); output5[3] = ( r3 * c5s ); ) + stbIF6( output6[0] = ( r0 * c6s ); output6[1] = ( r1 * c6s ); output6[2] = ( r2 * c6s ); output6[3] = ( r3 * c6s ); ) + stbIF7( output7[0] = ( r0 * c7s ); output7[1] = ( r1 * c7s ); output7[2] = ( r2 * c7s ); output7[3] = ( r3 * c7s ); ) + #endif + + input += 4; + stbIF0( output0 += 4; ) stbIF1( output1 += 4; ) stbIF2( output2 += 4; ) stbIF3( output3 += 4; ) stbIF4( output4 += 4; ) stbIF5( output5 += 4; ) stbIF6( output6 += 4; ) stbIF7( output7 += 4; ) + } + #endif + STBIR_NO_UNROLL_LOOP_START + while ( input < input_end ) + { + float r = input[0]; + STBIR_NO_UNROLL(output0); + + #ifdef STB_IMAGE_RESIZE_VERTICAL_CONTINUE + stbIF0( output0[0] += ( r * c0s ); ) + stbIF1( output1[0] += ( r * c1s ); ) + stbIF2( output2[0] += ( r * c2s ); ) + stbIF3( output3[0] += ( r * c3s ); ) + stbIF4( output4[0] += ( r * c4s ); ) + stbIF5( output5[0] += ( r * c5s ); ) + stbIF6( output6[0] += ( r * c6s ); ) + stbIF7( output7[0] += ( r * c7s ); ) + #else + stbIF0( output0[0] = ( r * c0s ); ) + stbIF1( output1[0] = ( r * c1s ); ) + stbIF2( output2[0] = ( r * c2s ); ) + stbIF3( output3[0] = ( r * c3s ); ) + stbIF4( output4[0] = ( r * c4s ); ) + stbIF5( output5[0] = ( r * c5s ); ) + stbIF6( output6[0] = ( r * c6s ); ) + stbIF7( output7[0] = ( r * c7s ); ) + #endif + + ++input; + stbIF0( ++output0; ) stbIF1( ++output1; ) stbIF2( ++output2; ) stbIF3( ++output3; ) stbIF4( ++output4; ) stbIF5( ++output5; ) stbIF6( ++output6; ) stbIF7( ++output7; ) + } +} + +static void STBIR_chans( stbir__vertical_gather_with_,_coeffs)( float * outputp, float const * vertical_coefficients, float const ** inputs, float const * input0_end ) +{ + float STBIR_SIMD_STREAMOUT_PTR( * ) output = outputp; + + stbIF0( float const * input0 = inputs[0]; float c0s = vertical_coefficients[0]; ) + stbIF1( float const * input1 = inputs[1]; float c1s = vertical_coefficients[1]; ) + stbIF2( float const * input2 = inputs[2]; float c2s = vertical_coefficients[2]; ) + stbIF3( float const * input3 = inputs[3]; float c3s = vertical_coefficients[3]; ) + stbIF4( float const * input4 = inputs[4]; float c4s = vertical_coefficients[4]; ) + stbIF5( float const * input5 = inputs[5]; float c5s = vertical_coefficients[5]; ) + stbIF6( float const * input6 = inputs[6]; float c6s = vertical_coefficients[6]; ) + stbIF7( float const * input7 = inputs[7]; float c7s = vertical_coefficients[7]; ) + +#if ( STBIR__vertical_channels == 1 ) && !defined(STB_IMAGE_RESIZE_VERTICAL_CONTINUE) + // check single channel one weight + if ( ( c0s >= (1.0f-0.000001f) ) && ( c0s <= (1.0f+0.000001f) ) ) + { + STBIR_MEMCPY( output, input0, (char*)input0_end - (char*)input0 ); + return; + } +#endif + + #ifdef STBIR_SIMD + { + stbIF0(stbir__simdfX c0 = stbir__simdf_frepX( c0s ); ) + stbIF1(stbir__simdfX c1 = stbir__simdf_frepX( c1s ); ) + stbIF2(stbir__simdfX c2 = stbir__simdf_frepX( c2s ); ) + stbIF3(stbir__simdfX c3 = stbir__simdf_frepX( c3s ); ) + stbIF4(stbir__simdfX c4 = stbir__simdf_frepX( c4s ); ) + stbIF5(stbir__simdfX c5 = stbir__simdf_frepX( c5s ); ) + stbIF6(stbir__simdfX c6 = stbir__simdf_frepX( c6s ); ) + stbIF7(stbir__simdfX c7 = stbir__simdf_frepX( c7s ); ) + + STBIR_SIMD_NO_UNROLL_LOOP_START + while ( ( (char*)input0_end - (char*) input0 ) >= (16*stbir__simdfX_float_count) ) + { + stbir__simdfX o0, o1, o2, o3, r0, r1, r2, r3; + STBIR_SIMD_NO_UNROLL(output); + + // prefetch four loop iterations ahead (doesn't affect much for small resizes, but helps with big ones) + stbIF0( stbir__prefetch( input0 + (16*stbir__simdfX_float_count) ); ) + stbIF1( stbir__prefetch( input1 + (16*stbir__simdfX_float_count) ); ) + stbIF2( stbir__prefetch( input2 + (16*stbir__simdfX_float_count) ); ) + stbIF3( stbir__prefetch( input3 + (16*stbir__simdfX_float_count) ); ) + stbIF4( stbir__prefetch( input4 + (16*stbir__simdfX_float_count) ); ) + stbIF5( stbir__prefetch( input5 + (16*stbir__simdfX_float_count) ); ) + stbIF6( stbir__prefetch( input6 + (16*stbir__simdfX_float_count) ); ) + stbIF7( stbir__prefetch( input7 + (16*stbir__simdfX_float_count) ); ) + + #ifdef STB_IMAGE_RESIZE_VERTICAL_CONTINUE + stbIF0( stbir__simdfX_load( o0, output ); stbir__simdfX_load( o1, output+stbir__simdfX_float_count ); stbir__simdfX_load( o2, output+(2*stbir__simdfX_float_count) ); stbir__simdfX_load( o3, output+(3*stbir__simdfX_float_count) ); + stbir__simdfX_load( r0, input0 ); stbir__simdfX_load( r1, input0+stbir__simdfX_float_count ); stbir__simdfX_load( r2, input0+(2*stbir__simdfX_float_count) ); stbir__simdfX_load( r3, input0+(3*stbir__simdfX_float_count) ); + stbir__simdfX_madd( o0, o0, r0, c0 ); stbir__simdfX_madd( o1, o1, r1, c0 ); stbir__simdfX_madd( o2, o2, r2, c0 ); stbir__simdfX_madd( o3, o3, r3, c0 ); ) + #else + stbIF0( stbir__simdfX_load( r0, input0 ); stbir__simdfX_load( r1, input0+stbir__simdfX_float_count ); stbir__simdfX_load( r2, input0+(2*stbir__simdfX_float_count) ); stbir__simdfX_load( r3, input0+(3*stbir__simdfX_float_count) ); + stbir__simdfX_mult( o0, r0, c0 ); stbir__simdfX_mult( o1, r1, c0 ); stbir__simdfX_mult( o2, r2, c0 ); stbir__simdfX_mult( o3, r3, c0 ); ) + #endif + + stbIF1( stbir__simdfX_load( r0, input1 ); stbir__simdfX_load( r1, input1+stbir__simdfX_float_count ); stbir__simdfX_load( r2, input1+(2*stbir__simdfX_float_count) ); stbir__simdfX_load( r3, input1+(3*stbir__simdfX_float_count) ); + stbir__simdfX_madd( o0, o0, r0, c1 ); stbir__simdfX_madd( o1, o1, r1, c1 ); stbir__simdfX_madd( o2, o2, r2, c1 ); stbir__simdfX_madd( o3, o3, r3, c1 ); ) + stbIF2( stbir__simdfX_load( r0, input2 ); stbir__simdfX_load( r1, input2+stbir__simdfX_float_count ); stbir__simdfX_load( r2, input2+(2*stbir__simdfX_float_count) ); stbir__simdfX_load( r3, input2+(3*stbir__simdfX_float_count) ); + stbir__simdfX_madd( o0, o0, r0, c2 ); stbir__simdfX_madd( o1, o1, r1, c2 ); stbir__simdfX_madd( o2, o2, r2, c2 ); stbir__simdfX_madd( o3, o3, r3, c2 ); ) + stbIF3( stbir__simdfX_load( r0, input3 ); stbir__simdfX_load( r1, input3+stbir__simdfX_float_count ); stbir__simdfX_load( r2, input3+(2*stbir__simdfX_float_count) ); stbir__simdfX_load( r3, input3+(3*stbir__simdfX_float_count) ); + stbir__simdfX_madd( o0, o0, r0, c3 ); stbir__simdfX_madd( o1, o1, r1, c3 ); stbir__simdfX_madd( o2, o2, r2, c3 ); stbir__simdfX_madd( o3, o3, r3, c3 ); ) + stbIF4( stbir__simdfX_load( r0, input4 ); stbir__simdfX_load( r1, input4+stbir__simdfX_float_count ); stbir__simdfX_load( r2, input4+(2*stbir__simdfX_float_count) ); stbir__simdfX_load( r3, input4+(3*stbir__simdfX_float_count) ); + stbir__simdfX_madd( o0, o0, r0, c4 ); stbir__simdfX_madd( o1, o1, r1, c4 ); stbir__simdfX_madd( o2, o2, r2, c4 ); stbir__simdfX_madd( o3, o3, r3, c4 ); ) + stbIF5( stbir__simdfX_load( r0, input5 ); stbir__simdfX_load( r1, input5+stbir__simdfX_float_count ); stbir__simdfX_load( r2, input5+(2*stbir__simdfX_float_count) ); stbir__simdfX_load( r3, input5+(3*stbir__simdfX_float_count) ); + stbir__simdfX_madd( o0, o0, r0, c5 ); stbir__simdfX_madd( o1, o1, r1, c5 ); stbir__simdfX_madd( o2, o2, r2, c5 ); stbir__simdfX_madd( o3, o3, r3, c5 ); ) + stbIF6( stbir__simdfX_load( r0, input6 ); stbir__simdfX_load( r1, input6+stbir__simdfX_float_count ); stbir__simdfX_load( r2, input6+(2*stbir__simdfX_float_count) ); stbir__simdfX_load( r3, input6+(3*stbir__simdfX_float_count) ); + stbir__simdfX_madd( o0, o0, r0, c6 ); stbir__simdfX_madd( o1, o1, r1, c6 ); stbir__simdfX_madd( o2, o2, r2, c6 ); stbir__simdfX_madd( o3, o3, r3, c6 ); ) + stbIF7( stbir__simdfX_load( r0, input7 ); stbir__simdfX_load( r1, input7+stbir__simdfX_float_count ); stbir__simdfX_load( r2, input7+(2*stbir__simdfX_float_count) ); stbir__simdfX_load( r3, input7+(3*stbir__simdfX_float_count) ); + stbir__simdfX_madd( o0, o0, r0, c7 ); stbir__simdfX_madd( o1, o1, r1, c7 ); stbir__simdfX_madd( o2, o2, r2, c7 ); stbir__simdfX_madd( o3, o3, r3, c7 ); ) + + stbir__simdfX_store( output, o0 ); stbir__simdfX_store( output+stbir__simdfX_float_count, o1 ); stbir__simdfX_store( output+(2*stbir__simdfX_float_count), o2 ); stbir__simdfX_store( output+(3*stbir__simdfX_float_count), o3 ); + output += (4*stbir__simdfX_float_count); + stbIF0( input0 += (4*stbir__simdfX_float_count); ) stbIF1( input1 += (4*stbir__simdfX_float_count); ) stbIF2( input2 += (4*stbir__simdfX_float_count); ) stbIF3( input3 += (4*stbir__simdfX_float_count); ) stbIF4( input4 += (4*stbir__simdfX_float_count); ) stbIF5( input5 += (4*stbir__simdfX_float_count); ) stbIF6( input6 += (4*stbir__simdfX_float_count); ) stbIF7( input7 += (4*stbir__simdfX_float_count); ) + } + + STBIR_SIMD_NO_UNROLL_LOOP_START + while ( ( (char*)input0_end - (char*) input0 ) >= 16 ) + { + stbir__simdf o0, r0; + STBIR_SIMD_NO_UNROLL(output); + + #ifdef STB_IMAGE_RESIZE_VERTICAL_CONTINUE + stbIF0( stbir__simdf_load( o0, output ); stbir__simdf_load( r0, input0 ); stbir__simdf_madd( o0, o0, r0, stbir__if_simdf8_cast_to_simdf4( c0 ) ); ) + #else + stbIF0( stbir__simdf_load( r0, input0 ); stbir__simdf_mult( o0, r0, stbir__if_simdf8_cast_to_simdf4( c0 ) ); ) + #endif + stbIF1( stbir__simdf_load( r0, input1 ); stbir__simdf_madd( o0, o0, r0, stbir__if_simdf8_cast_to_simdf4( c1 ) ); ) + stbIF2( stbir__simdf_load( r0, input2 ); stbir__simdf_madd( o0, o0, r0, stbir__if_simdf8_cast_to_simdf4( c2 ) ); ) + stbIF3( stbir__simdf_load( r0, input3 ); stbir__simdf_madd( o0, o0, r0, stbir__if_simdf8_cast_to_simdf4( c3 ) ); ) + stbIF4( stbir__simdf_load( r0, input4 ); stbir__simdf_madd( o0, o0, r0, stbir__if_simdf8_cast_to_simdf4( c4 ) ); ) + stbIF5( stbir__simdf_load( r0, input5 ); stbir__simdf_madd( o0, o0, r0, stbir__if_simdf8_cast_to_simdf4( c5 ) ); ) + stbIF6( stbir__simdf_load( r0, input6 ); stbir__simdf_madd( o0, o0, r0, stbir__if_simdf8_cast_to_simdf4( c6 ) ); ) + stbIF7( stbir__simdf_load( r0, input7 ); stbir__simdf_madd( o0, o0, r0, stbir__if_simdf8_cast_to_simdf4( c7 ) ); ) + + stbir__simdf_store( output, o0 ); + output += 4; + stbIF0( input0 += 4; ) stbIF1( input1 += 4; ) stbIF2( input2 += 4; ) stbIF3( input3 += 4; ) stbIF4( input4 += 4; ) stbIF5( input5 += 4; ) stbIF6( input6 += 4; ) stbIF7( input7 += 4; ) + } + } + #else + STBIR_NO_UNROLL_LOOP_START + while ( ( (char*)input0_end - (char*) input0 ) >= 16 ) + { + float o0, o1, o2, o3; + STBIR_NO_UNROLL(output); + #ifdef STB_IMAGE_RESIZE_VERTICAL_CONTINUE + stbIF0( o0 = output[0] + input0[0] * c0s; o1 = output[1] + input0[1] * c0s; o2 = output[2] + input0[2] * c0s; o3 = output[3] + input0[3] * c0s; ) + #else + stbIF0( o0 = input0[0] * c0s; o1 = input0[1] * c0s; o2 = input0[2] * c0s; o3 = input0[3] * c0s; ) + #endif + stbIF1( o0 += input1[0] * c1s; o1 += input1[1] * c1s; o2 += input1[2] * c1s; o3 += input1[3] * c1s; ) + stbIF2( o0 += input2[0] * c2s; o1 += input2[1] * c2s; o2 += input2[2] * c2s; o3 += input2[3] * c2s; ) + stbIF3( o0 += input3[0] * c3s; o1 += input3[1] * c3s; o2 += input3[2] * c3s; o3 += input3[3] * c3s; ) + stbIF4( o0 += input4[0] * c4s; o1 += input4[1] * c4s; o2 += input4[2] * c4s; o3 += input4[3] * c4s; ) + stbIF5( o0 += input5[0] * c5s; o1 += input5[1] * c5s; o2 += input5[2] * c5s; o3 += input5[3] * c5s; ) + stbIF6( o0 += input6[0] * c6s; o1 += input6[1] * c6s; o2 += input6[2] * c6s; o3 += input6[3] * c6s; ) + stbIF7( o0 += input7[0] * c7s; o1 += input7[1] * c7s; o2 += input7[2] * c7s; o3 += input7[3] * c7s; ) + output[0] = o0; output[1] = o1; output[2] = o2; output[3] = o3; + output += 4; + stbIF0( input0 += 4; ) stbIF1( input1 += 4; ) stbIF2( input2 += 4; ) stbIF3( input3 += 4; ) stbIF4( input4 += 4; ) stbIF5( input5 += 4; ) stbIF6( input6 += 4; ) stbIF7( input7 += 4; ) + } + #endif + STBIR_NO_UNROLL_LOOP_START + while ( input0 < input0_end ) + { + float o0; + STBIR_NO_UNROLL(output); + #ifdef STB_IMAGE_RESIZE_VERTICAL_CONTINUE + stbIF0( o0 = output[0] + input0[0] * c0s; ) + #else + stbIF0( o0 = input0[0] * c0s; ) + #endif + stbIF1( o0 += input1[0] * c1s; ) + stbIF2( o0 += input2[0] * c2s; ) + stbIF3( o0 += input3[0] * c3s; ) + stbIF4( o0 += input4[0] * c4s; ) + stbIF5( o0 += input5[0] * c5s; ) + stbIF6( o0 += input6[0] * c6s; ) + stbIF7( o0 += input7[0] * c7s; ) + output[0] = o0; + ++output; + stbIF0( ++input0; ) stbIF1( ++input1; ) stbIF2( ++input2; ) stbIF3( ++input3; ) stbIF4( ++input4; ) stbIF5( ++input5; ) stbIF6( ++input6; ) stbIF7( ++input7; ) + } +} + +#undef stbIF0 +#undef stbIF1 +#undef stbIF2 +#undef stbIF3 +#undef stbIF4 +#undef stbIF5 +#undef stbIF6 +#undef stbIF7 +#undef STB_IMAGE_RESIZE_DO_VERTICALS +#undef STBIR__vertical_channels +#undef STB_IMAGE_RESIZE_DO_HORIZONTALS +#undef STBIR_strs_join24 +#undef STBIR_strs_join14 +#undef STBIR_chans +#ifdef STB_IMAGE_RESIZE_VERTICAL_CONTINUE +#undef STB_IMAGE_RESIZE_VERTICAL_CONTINUE +#endif + +#else // !STB_IMAGE_RESIZE_DO_VERTICALS + +#define STBIR_chans( start, end ) STBIR_strs_join1(start,STBIR__horizontal_channels,end) + +#ifndef stbir__2_coeff_only +#define stbir__2_coeff_only() \ + stbir__1_coeff_only(); \ + stbir__1_coeff_remnant(1); +#endif + +#ifndef stbir__2_coeff_remnant +#define stbir__2_coeff_remnant( ofs ) \ + stbir__1_coeff_remnant(ofs); \ + stbir__1_coeff_remnant((ofs)+1); +#endif + +#ifndef stbir__3_coeff_only +#define stbir__3_coeff_only() \ + stbir__2_coeff_only(); \ + stbir__1_coeff_remnant(2); +#endif + +#ifndef stbir__3_coeff_remnant +#define stbir__3_coeff_remnant( ofs ) \ + stbir__2_coeff_remnant(ofs); \ + stbir__1_coeff_remnant((ofs)+2); +#endif + +#ifndef stbir__3_coeff_setup +#define stbir__3_coeff_setup() +#endif + +#ifndef stbir__4_coeff_start +#define stbir__4_coeff_start() \ + stbir__2_coeff_only(); \ + stbir__2_coeff_remnant(2); +#endif + +#ifndef stbir__4_coeff_continue_from_4 +#define stbir__4_coeff_continue_from_4( ofs ) \ + stbir__2_coeff_remnant(ofs); \ + stbir__2_coeff_remnant((ofs)+2); +#endif + +#ifndef stbir__store_output_tiny +#define stbir__store_output_tiny stbir__store_output +#endif + +static void STBIR_chans( stbir__horizontal_gather_,_channels_with_1_coeff)( float * output_buffer, unsigned int output_sub_size, float const * decode_buffer, stbir__contributors const * horizontal_contributors, float const * horizontal_coefficients, int coefficient_width ) +{ + float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels; + float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer; + STBIR_SIMD_NO_UNROLL_LOOP_START + do { + float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels; + float const * hc = horizontal_coefficients; + stbir__1_coeff_only(); + stbir__store_output_tiny(); + } while ( output < output_end ); +} + +static void STBIR_chans( stbir__horizontal_gather_,_channels_with_2_coeffs)( float * output_buffer, unsigned int output_sub_size, float const * decode_buffer, stbir__contributors const * horizontal_contributors, float const * horizontal_coefficients, int coefficient_width ) +{ + float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels; + float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer; + STBIR_SIMD_NO_UNROLL_LOOP_START + do { + float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels; + float const * hc = horizontal_coefficients; + stbir__2_coeff_only(); + stbir__store_output_tiny(); + } while ( output < output_end ); +} + +static void STBIR_chans( stbir__horizontal_gather_,_channels_with_3_coeffs)( float * output_buffer, unsigned int output_sub_size, float const * decode_buffer, stbir__contributors const * horizontal_contributors, float const * horizontal_coefficients, int coefficient_width ) +{ + float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels; + float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer; + STBIR_SIMD_NO_UNROLL_LOOP_START + do { + float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels; + float const * hc = horizontal_coefficients; + stbir__3_coeff_only(); + stbir__store_output_tiny(); + } while ( output < output_end ); +} + +static void STBIR_chans( stbir__horizontal_gather_,_channels_with_4_coeffs)( float * output_buffer, unsigned int output_sub_size, float const * decode_buffer, stbir__contributors const * horizontal_contributors, float const * horizontal_coefficients, int coefficient_width ) +{ + float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels; + float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer; + STBIR_SIMD_NO_UNROLL_LOOP_START + do { + float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels; + float const * hc = horizontal_coefficients; + stbir__4_coeff_start(); + stbir__store_output(); + } while ( output < output_end ); +} + +static void STBIR_chans( stbir__horizontal_gather_,_channels_with_5_coeffs)( float * output_buffer, unsigned int output_sub_size, float const * decode_buffer, stbir__contributors const * horizontal_contributors, float const * horizontal_coefficients, int coefficient_width ) +{ + float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels; + float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer; + STBIR_SIMD_NO_UNROLL_LOOP_START + do { + float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels; + float const * hc = horizontal_coefficients; + stbir__4_coeff_start(); + stbir__1_coeff_remnant(4); + stbir__store_output(); + } while ( output < output_end ); +} + +static void STBIR_chans( stbir__horizontal_gather_,_channels_with_6_coeffs)( float * output_buffer, unsigned int output_sub_size, float const * decode_buffer, stbir__contributors const * horizontal_contributors, float const * horizontal_coefficients, int coefficient_width ) +{ + float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels; + float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer; + STBIR_SIMD_NO_UNROLL_LOOP_START + do { + float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels; + float const * hc = horizontal_coefficients; + stbir__4_coeff_start(); + stbir__2_coeff_remnant(4); + stbir__store_output(); + } while ( output < output_end ); +} + +static void STBIR_chans( stbir__horizontal_gather_,_channels_with_7_coeffs)( float * output_buffer, unsigned int output_sub_size, float const * decode_buffer, stbir__contributors const * horizontal_contributors, float const * horizontal_coefficients, int coefficient_width ) +{ + float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels; + float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer; + stbir__3_coeff_setup(); + STBIR_SIMD_NO_UNROLL_LOOP_START + do { + float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels; + float const * hc = horizontal_coefficients; + + stbir__4_coeff_start(); + stbir__3_coeff_remnant(4); + stbir__store_output(); + } while ( output < output_end ); +} + +static void STBIR_chans( stbir__horizontal_gather_,_channels_with_8_coeffs)( float * output_buffer, unsigned int output_sub_size, float const * decode_buffer, stbir__contributors const * horizontal_contributors, float const * horizontal_coefficients, int coefficient_width ) +{ + float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels; + float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer; + STBIR_SIMD_NO_UNROLL_LOOP_START + do { + float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels; + float const * hc = horizontal_coefficients; + stbir__4_coeff_start(); + stbir__4_coeff_continue_from_4(4); + stbir__store_output(); + } while ( output < output_end ); +} + +static void STBIR_chans( stbir__horizontal_gather_,_channels_with_9_coeffs)( float * output_buffer, unsigned int output_sub_size, float const * decode_buffer, stbir__contributors const * horizontal_contributors, float const * horizontal_coefficients, int coefficient_width ) +{ + float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels; + float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer; + STBIR_SIMD_NO_UNROLL_LOOP_START + do { + float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels; + float const * hc = horizontal_coefficients; + stbir__4_coeff_start(); + stbir__4_coeff_continue_from_4(4); + stbir__1_coeff_remnant(8); + stbir__store_output(); + } while ( output < output_end ); +} + +static void STBIR_chans( stbir__horizontal_gather_,_channels_with_10_coeffs)( float * output_buffer, unsigned int output_sub_size, float const * decode_buffer, stbir__contributors const * horizontal_contributors, float const * horizontal_coefficients, int coefficient_width ) +{ + float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels; + float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer; + STBIR_SIMD_NO_UNROLL_LOOP_START + do { + float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels; + float const * hc = horizontal_coefficients; + stbir__4_coeff_start(); + stbir__4_coeff_continue_from_4(4); + stbir__2_coeff_remnant(8); + stbir__store_output(); + } while ( output < output_end ); +} + +static void STBIR_chans( stbir__horizontal_gather_,_channels_with_11_coeffs)( float * output_buffer, unsigned int output_sub_size, float const * decode_buffer, stbir__contributors const * horizontal_contributors, float const * horizontal_coefficients, int coefficient_width ) +{ + float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels; + float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer; + stbir__3_coeff_setup(); + STBIR_SIMD_NO_UNROLL_LOOP_START + do { + float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels; + float const * hc = horizontal_coefficients; + stbir__4_coeff_start(); + stbir__4_coeff_continue_from_4(4); + stbir__3_coeff_remnant(8); + stbir__store_output(); + } while ( output < output_end ); +} + +static void STBIR_chans( stbir__horizontal_gather_,_channels_with_12_coeffs)( float * output_buffer, unsigned int output_sub_size, float const * decode_buffer, stbir__contributors const * horizontal_contributors, float const * horizontal_coefficients, int coefficient_width ) +{ + float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels; + float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer; + STBIR_SIMD_NO_UNROLL_LOOP_START + do { + float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels; + float const * hc = horizontal_coefficients; + stbir__4_coeff_start(); + stbir__4_coeff_continue_from_4(4); + stbir__4_coeff_continue_from_4(8); + stbir__store_output(); + } while ( output < output_end ); +} + +static void STBIR_chans( stbir__horizontal_gather_,_channels_with_n_coeffs_mod0 )( float * output_buffer, unsigned int output_sub_size, float const * decode_buffer, stbir__contributors const * horizontal_contributors, float const * horizontal_coefficients, int coefficient_width ) +{ + float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels; + float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer; + STBIR_SIMD_NO_UNROLL_LOOP_START + do { + float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels; + int n = ( ( horizontal_contributors->n1 - horizontal_contributors->n0 + 1 ) - 4 + 3 ) >> 2; + float const * hc = horizontal_coefficients; + + stbir__4_coeff_start(); + STBIR_SIMD_NO_UNROLL_LOOP_START + do { + hc += 4; + decode += STBIR__horizontal_channels * 4; + stbir__4_coeff_continue_from_4( 0 ); + --n; + } while ( n > 0 ); + stbir__store_output(); + } while ( output < output_end ); +} + +static void STBIR_chans( stbir__horizontal_gather_,_channels_with_n_coeffs_mod1 )( float * output_buffer, unsigned int output_sub_size, float const * decode_buffer, stbir__contributors const * horizontal_contributors, float const * horizontal_coefficients, int coefficient_width ) +{ + float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels; + float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer; + STBIR_SIMD_NO_UNROLL_LOOP_START + do { + float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels; + int n = ( ( horizontal_contributors->n1 - horizontal_contributors->n0 + 1 ) - 5 + 3 ) >> 2; + float const * hc = horizontal_coefficients; + + stbir__4_coeff_start(); + STBIR_SIMD_NO_UNROLL_LOOP_START + do { + hc += 4; + decode += STBIR__horizontal_channels * 4; + stbir__4_coeff_continue_from_4( 0 ); + --n; + } while ( n > 0 ); + stbir__1_coeff_remnant( 4 ); + stbir__store_output(); + } while ( output < output_end ); +} + +static void STBIR_chans( stbir__horizontal_gather_,_channels_with_n_coeffs_mod2 )( float * output_buffer, unsigned int output_sub_size, float const * decode_buffer, stbir__contributors const * horizontal_contributors, float const * horizontal_coefficients, int coefficient_width ) +{ + float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels; + float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer; + STBIR_SIMD_NO_UNROLL_LOOP_START + do { + float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels; + int n = ( ( horizontal_contributors->n1 - horizontal_contributors->n0 + 1 ) - 6 + 3 ) >> 2; + float const * hc = horizontal_coefficients; + + stbir__4_coeff_start(); + STBIR_SIMD_NO_UNROLL_LOOP_START + do { + hc += 4; + decode += STBIR__horizontal_channels * 4; + stbir__4_coeff_continue_from_4( 0 ); + --n; + } while ( n > 0 ); + stbir__2_coeff_remnant( 4 ); + + stbir__store_output(); + } while ( output < output_end ); +} + +static void STBIR_chans( stbir__horizontal_gather_,_channels_with_n_coeffs_mod3 )( float * output_buffer, unsigned int output_sub_size, float const * decode_buffer, stbir__contributors const * horizontal_contributors, float const * horizontal_coefficients, int coefficient_width ) +{ + float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels; + float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer; + stbir__3_coeff_setup(); + STBIR_SIMD_NO_UNROLL_LOOP_START + do { + float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels; + int n = ( ( horizontal_contributors->n1 - horizontal_contributors->n0 + 1 ) - 7 + 3 ) >> 2; + float const * hc = horizontal_coefficients; + + stbir__4_coeff_start(); + STBIR_SIMD_NO_UNROLL_LOOP_START + do { + hc += 4; + decode += STBIR__horizontal_channels * 4; + stbir__4_coeff_continue_from_4( 0 ); + --n; + } while ( n > 0 ); + stbir__3_coeff_remnant( 4 ); + + stbir__store_output(); + } while ( output < output_end ); +} + +static stbir__horizontal_gather_channels_func * STBIR_chans(stbir__horizontal_gather_,_channels_with_n_coeffs_funcs)[4]= +{ + STBIR_chans(stbir__horizontal_gather_,_channels_with_n_coeffs_mod0), + STBIR_chans(stbir__horizontal_gather_,_channels_with_n_coeffs_mod1), + STBIR_chans(stbir__horizontal_gather_,_channels_with_n_coeffs_mod2), + STBIR_chans(stbir__horizontal_gather_,_channels_with_n_coeffs_mod3), +}; + +static stbir__horizontal_gather_channels_func * STBIR_chans(stbir__horizontal_gather_,_channels_funcs)[12]= +{ + STBIR_chans(stbir__horizontal_gather_,_channels_with_1_coeff), + STBIR_chans(stbir__horizontal_gather_,_channels_with_2_coeffs), + STBIR_chans(stbir__horizontal_gather_,_channels_with_3_coeffs), + STBIR_chans(stbir__horizontal_gather_,_channels_with_4_coeffs), + STBIR_chans(stbir__horizontal_gather_,_channels_with_5_coeffs), + STBIR_chans(stbir__horizontal_gather_,_channels_with_6_coeffs), + STBIR_chans(stbir__horizontal_gather_,_channels_with_7_coeffs), + STBIR_chans(stbir__horizontal_gather_,_channels_with_8_coeffs), + STBIR_chans(stbir__horizontal_gather_,_channels_with_9_coeffs), + STBIR_chans(stbir__horizontal_gather_,_channels_with_10_coeffs), + STBIR_chans(stbir__horizontal_gather_,_channels_with_11_coeffs), + STBIR_chans(stbir__horizontal_gather_,_channels_with_12_coeffs), +}; + +#undef STBIR__horizontal_channels +#undef STB_IMAGE_RESIZE_DO_HORIZONTALS +#undef stbir__1_coeff_only +#undef stbir__1_coeff_remnant +#undef stbir__2_coeff_only +#undef stbir__2_coeff_remnant +#undef stbir__3_coeff_only +#undef stbir__3_coeff_remnant +#undef stbir__3_coeff_setup +#undef stbir__4_coeff_start +#undef stbir__4_coeff_continue_from_4 +#undef stbir__store_output +#undef stbir__store_output_tiny +#undef STBIR_chans + +#endif // HORIZONALS + +#undef STBIR_strs_join2 +#undef STBIR_strs_join1 + +#endif // STB_IMAGE_RESIZE_DO_HORIZONTALS/VERTICALS/CODERS + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2017 Sean Barrett +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ diff --git a/thirdparty/stb/stb_image_write.h b/thirdparty/stb/stb_image_write.h new file mode 100644 index 000000000..e4b32ed1b --- /dev/null +++ b/thirdparty/stb/stb_image_write.h @@ -0,0 +1,1724 @@ +/* stb_image_write - v1.16 - public domain - http://nothings.org/stb + writes out PNG/BMP/TGA/JPEG/HDR images to C stdio - Sean Barrett 2010-2015 + no warranty implied; use at your own risk + + Before #including, + + #define STB_IMAGE_WRITE_IMPLEMENTATION + + in the file that you want to have the implementation. + + Will probably not work correctly with strict-aliasing optimizations. + +ABOUT: + + This header file is a library for writing images to C stdio or a callback. + + The PNG output is not optimal; it is 20-50% larger than the file + written by a decent optimizing implementation; though providing a custom + zlib compress function (see STBIW_ZLIB_COMPRESS) can mitigate that. + This library is designed for source code compactness and simplicity, + not optimal image file size or run-time performance. + +BUILDING: + + You can #define STBIW_ASSERT(x) before the #include to avoid using assert.h. + You can #define STBIW_MALLOC(), STBIW_REALLOC(), and STBIW_FREE() to replace + malloc,realloc,free. + You can #define STBIW_MEMMOVE() to replace memmove() + You can #define STBIW_ZLIB_COMPRESS to use a custom zlib-style compress function + for PNG compression (instead of the builtin one), it must have the following signature: + unsigned char * my_compress(unsigned char *data, int data_len, int *out_len, int quality); + The returned data will be freed with STBIW_FREE() (free() by default), + so it must be heap allocated with STBIW_MALLOC() (malloc() by default), + +UNICODE: + + If compiling for Windows and you wish to use Unicode filenames, compile + with + #define STBIW_WINDOWS_UTF8 + and pass utf8-encoded filenames. Call stbiw_convert_wchar_to_utf8 to convert + Windows wchar_t filenames to utf8. + +USAGE: + + There are five functions, one for each image file format: + + int stbi_write_png(char const *filename, int w, int h, int comp, const void *data, int stride_in_bytes); + int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data); + int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data); + int stbi_write_jpg(char const *filename, int w, int h, int comp, const void *data, int quality); + int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data); + + void stbi_flip_vertically_on_write(int flag); // flag is non-zero to flip data vertically + + There are also five equivalent functions that use an arbitrary write function. You are + expected to open/close your file-equivalent before and after calling these: + + int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data, int stride_in_bytes); + int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); + int stbi_write_tga_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); + int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const float *data); + int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality); + + where the callback is: + void stbi_write_func(void *context, void *data, int size); + + You can configure it with these global variables: + int stbi_write_tga_with_rle; // defaults to true; set to 0 to disable RLE + int stbi_write_png_compression_level; // defaults to 8; set to higher for more compression + int stbi_write_force_png_filter; // defaults to -1; set to 0..5 to force a filter mode + + + You can define STBI_WRITE_NO_STDIO to disable the file variant of these + functions, so the library will not use stdio.h at all. However, this will + also disable HDR writing, because it requires stdio for formatted output. + + Each function returns 0 on failure and non-0 on success. + + The functions create an image file defined by the parameters. The image + is a rectangle of pixels stored from left-to-right, top-to-bottom. + Each pixel contains 'comp' channels of data stored interleaved with 8-bits + per channel, in the following order: 1=Y, 2=YA, 3=RGB, 4=RGBA. (Y is + monochrome color.) The rectangle is 'w' pixels wide and 'h' pixels tall. + The *data pointer points to the first byte of the top-left-most pixel. + For PNG, "stride_in_bytes" is the distance in bytes from the first byte of + a row of pixels to the first byte of the next row of pixels. + + PNG creates output files with the same number of components as the input. + The BMP format expands Y to RGB in the file format and does not + output alpha. + + PNG supports writing rectangles of data even when the bytes storing rows of + data are not consecutive in memory (e.g. sub-rectangles of a larger image), + by supplying the stride between the beginning of adjacent rows. The other + formats do not. (Thus you cannot write a native-format BMP through the BMP + writer, both because it is in BGR order and because it may have padding + at the end of the line.) + + PNG allows you to set the deflate compression level by setting the global + variable 'stbi_write_png_compression_level' (it defaults to 8). + + HDR expects linear float data. Since the format is always 32-bit rgb(e) + data, alpha (if provided) is discarded, and for monochrome data it is + replicated across all three channels. + + TGA supports RLE or non-RLE compressed data. To use non-RLE-compressed + data, set the global variable 'stbi_write_tga_with_rle' to 0. + + JPEG does ignore alpha channels in input data; quality is between 1 and 100. + Higher quality looks better but results in a bigger image. + JPEG baseline (no JPEG progressive). + +CREDITS: + + + Sean Barrett - PNG/BMP/TGA + Baldur Karlsson - HDR + Jean-Sebastien Guay - TGA monochrome + Tim Kelsey - misc enhancements + Alan Hickman - TGA RLE + Emmanuel Julien - initial file IO callback implementation + Jon Olick - original jo_jpeg.cpp code + Daniel Gibson - integrate JPEG, allow external zlib + Aarni Koskela - allow choosing PNG filter + + bugfixes: + github:Chribba + Guillaume Chereau + github:jry2 + github:romigrou + Sergio Gonzalez + Jonas Karlsson + Filip Wasil + Thatcher Ulrich + github:poppolopoppo + Patrick Boettcher + github:xeekworx + Cap Petschulat + Simon Rodriguez + Ivan Tikhonov + github:ignotion + Adam Schackart + Andrew Kensler + +LICENSE + + See end of file for license information. + +*/ + +#ifndef INCLUDE_STB_IMAGE_WRITE_H +#define INCLUDE_STB_IMAGE_WRITE_H + +#include + +// if STB_IMAGE_WRITE_STATIC causes problems, try defining STBIWDEF to 'inline' or 'static inline' +#ifndef STBIWDEF +#ifdef STB_IMAGE_WRITE_STATIC +#define STBIWDEF static +#else +#ifdef __cplusplus +#define STBIWDEF extern "C" +#else +#define STBIWDEF extern +#endif +#endif +#endif + +#ifndef STB_IMAGE_WRITE_STATIC // C++ forbids static forward declarations +STBIWDEF int stbi_write_tga_with_rle; +STBIWDEF int stbi_write_png_compression_level; +STBIWDEF int stbi_write_force_png_filter; +#endif + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_png(char const *filename, int w, int h, int comp, const void *data, int stride_in_bytes); +STBIWDEF int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data); +STBIWDEF int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data); +STBIWDEF int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data); +STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality); + +#ifdef STBIW_WINDOWS_UTF8 +STBIWDEF int stbiw_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input); +#endif +#endif + +typedef void stbi_write_func(void *context, void *data, int size); + +STBIWDEF int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data, int stride_in_bytes); +STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); +STBIWDEF int stbi_write_tga_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); +STBIWDEF int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const float *data); +STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality); + +STBIWDEF void stbi_flip_vertically_on_write(int flip_boolean); + +#endif//INCLUDE_STB_IMAGE_WRITE_H + +#ifdef STB_IMAGE_WRITE_IMPLEMENTATION + +#ifdef _WIN32 + #ifndef _CRT_SECURE_NO_WARNINGS + #define _CRT_SECURE_NO_WARNINGS + #endif + #ifndef _CRT_NONSTDC_NO_DEPRECATE + #define _CRT_NONSTDC_NO_DEPRECATE + #endif +#endif + +#ifndef STBI_WRITE_NO_STDIO +#include +#endif // STBI_WRITE_NO_STDIO + +#include +#include +#include +#include + +#if defined(STBIW_MALLOC) && defined(STBIW_FREE) && (defined(STBIW_REALLOC) || defined(STBIW_REALLOC_SIZED)) +// ok +#elif !defined(STBIW_MALLOC) && !defined(STBIW_FREE) && !defined(STBIW_REALLOC) && !defined(STBIW_REALLOC_SIZED) +// ok +#else +#error "Must define all or none of STBIW_MALLOC, STBIW_FREE, and STBIW_REALLOC (or STBIW_REALLOC_SIZED)." +#endif + +#ifndef STBIW_MALLOC +#define STBIW_MALLOC(sz) malloc(sz) +#define STBIW_REALLOC(p,newsz) realloc(p,newsz) +#define STBIW_FREE(p) free(p) +#endif + +#ifndef STBIW_REALLOC_SIZED +#define STBIW_REALLOC_SIZED(p,oldsz,newsz) STBIW_REALLOC(p,newsz) +#endif + + +#ifndef STBIW_MEMMOVE +#define STBIW_MEMMOVE(a,b,sz) memmove(a,b,sz) +#endif + + +#ifndef STBIW_ASSERT +#include +#define STBIW_ASSERT(x) assert(x) +#endif + +#define STBIW_UCHAR(x) (unsigned char) ((x) & 0xff) + +#ifdef STB_IMAGE_WRITE_STATIC +static int stbi_write_png_compression_level = 8; +static int stbi_write_tga_with_rle = 1; +static int stbi_write_force_png_filter = -1; +#else +int stbi_write_png_compression_level = 8; +int stbi_write_tga_with_rle = 1; +int stbi_write_force_png_filter = -1; +#endif + +static int stbi__flip_vertically_on_write = 0; + +STBIWDEF void stbi_flip_vertically_on_write(int flag) +{ + stbi__flip_vertically_on_write = flag; +} + +typedef struct +{ + stbi_write_func *func; + void *context; + unsigned char buffer[64]; + int buf_used; +} stbi__write_context; + +// initialize a callback-based context +static void stbi__start_write_callbacks(stbi__write_context *s, stbi_write_func *c, void *context) +{ + s->func = c; + s->context = context; +} + +#ifndef STBI_WRITE_NO_STDIO + +static void stbi__stdio_write(void *context, void *data, int size) +{ + fwrite(data,1,size,(FILE*) context); +} + +#if defined(_WIN32) && defined(STBIW_WINDOWS_UTF8) +#ifdef __cplusplus +#define STBIW_EXTERN extern "C" +#else +#define STBIW_EXTERN extern +#endif +STBIW_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char *str, int cbmb, wchar_t *widestr, int cchwide); +STBIW_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t *widestr, int cchwide, char *str, int cbmb, const char *defchar, int *used_default); + +STBIWDEF int stbiw_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input) +{ + return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int) bufferlen, NULL, NULL); +} +#endif + +static FILE *stbiw__fopen(char const *filename, char const *mode) +{ + FILE *f; +#if defined(_WIN32) && defined(STBIW_WINDOWS_UTF8) + wchar_t wMode[64]; + wchar_t wFilename[1024]; + if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)/sizeof(*wFilename))) + return 0; + + if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)/sizeof(*wMode))) + return 0; + +#if defined(_MSC_VER) && _MSC_VER >= 1400 + if (0 != _wfopen_s(&f, wFilename, wMode)) + f = 0; +#else + f = _wfopen(wFilename, wMode); +#endif + +#elif defined(_MSC_VER) && _MSC_VER >= 1400 + if (0 != fopen_s(&f, filename, mode)) + f=0; +#else + f = fopen(filename, mode); +#endif + return f; +} + +static int stbi__start_write_file(stbi__write_context *s, const char *filename) +{ + FILE *f = stbiw__fopen(filename, "wb"); + stbi__start_write_callbacks(s, stbi__stdio_write, (void *) f); + return f != NULL; +} + +static void stbi__end_write_file(stbi__write_context *s) +{ + fclose((FILE *)s->context); +} + +#endif // !STBI_WRITE_NO_STDIO + +typedef unsigned int stbiw_uint32; +typedef int stb_image_write_test[sizeof(stbiw_uint32)==4 ? 1 : -1]; + +static void stbiw__writefv(stbi__write_context *s, const char *fmt, va_list v) +{ + while (*fmt) { + switch (*fmt++) { + case ' ': break; + case '1': { unsigned char x = STBIW_UCHAR(va_arg(v, int)); + s->func(s->context,&x,1); + break; } + case '2': { int x = va_arg(v,int); + unsigned char b[2]; + b[0] = STBIW_UCHAR(x); + b[1] = STBIW_UCHAR(x>>8); + s->func(s->context,b,2); + break; } + case '4': { stbiw_uint32 x = va_arg(v,int); + unsigned char b[4]; + b[0]=STBIW_UCHAR(x); + b[1]=STBIW_UCHAR(x>>8); + b[2]=STBIW_UCHAR(x>>16); + b[3]=STBIW_UCHAR(x>>24); + s->func(s->context,b,4); + break; } + default: + STBIW_ASSERT(0); + return; + } + } +} + +static void stbiw__writef(stbi__write_context *s, const char *fmt, ...) +{ + va_list v; + va_start(v, fmt); + stbiw__writefv(s, fmt, v); + va_end(v); +} + +static void stbiw__write_flush(stbi__write_context *s) +{ + if (s->buf_used) { + s->func(s->context, &s->buffer, s->buf_used); + s->buf_used = 0; + } +} + +static void stbiw__putc(stbi__write_context *s, unsigned char c) +{ + s->func(s->context, &c, 1); +} + +static void stbiw__write1(stbi__write_context *s, unsigned char a) +{ + if ((size_t)s->buf_used + 1 > sizeof(s->buffer)) + stbiw__write_flush(s); + s->buffer[s->buf_used++] = a; +} + +static void stbiw__write3(stbi__write_context *s, unsigned char a, unsigned char b, unsigned char c) +{ + int n; + if ((size_t)s->buf_used + 3 > sizeof(s->buffer)) + stbiw__write_flush(s); + n = s->buf_used; + s->buf_used = n+3; + s->buffer[n+0] = a; + s->buffer[n+1] = b; + s->buffer[n+2] = c; +} + +static void stbiw__write_pixel(stbi__write_context *s, int rgb_dir, int comp, int write_alpha, int expand_mono, unsigned char *d) +{ + unsigned char bg[3] = { 255, 0, 255}, px[3]; + int k; + + if (write_alpha < 0) + stbiw__write1(s, d[comp - 1]); + + switch (comp) { + case 2: // 2 pixels = mono + alpha, alpha is written separately, so same as 1-channel case + case 1: + if (expand_mono) + stbiw__write3(s, d[0], d[0], d[0]); // monochrome bmp + else + stbiw__write1(s, d[0]); // monochrome TGA + break; + case 4: + if (!write_alpha) { + // composite against pink background + for (k = 0; k < 3; ++k) + px[k] = bg[k] + ((d[k] - bg[k]) * d[3]) / 255; + stbiw__write3(s, px[1 - rgb_dir], px[1], px[1 + rgb_dir]); + break; + } + /* FALLTHROUGH */ + case 3: + stbiw__write3(s, d[1 - rgb_dir], d[1], d[1 + rgb_dir]); + break; + } + if (write_alpha > 0) + stbiw__write1(s, d[comp - 1]); +} + +static void stbiw__write_pixels(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, void *data, int write_alpha, int scanline_pad, int expand_mono) +{ + stbiw_uint32 zero = 0; + int i,j, j_end; + + if (y <= 0) + return; + + if (stbi__flip_vertically_on_write) + vdir *= -1; + + if (vdir < 0) { + j_end = -1; j = y-1; + } else { + j_end = y; j = 0; + } + + for (; j != j_end; j += vdir) { + for (i=0; i < x; ++i) { + unsigned char *d = (unsigned char *) data + (j*x+i)*comp; + stbiw__write_pixel(s, rgb_dir, comp, write_alpha, expand_mono, d); + } + stbiw__write_flush(s); + s->func(s->context, &zero, scanline_pad); + } +} + +static int stbiw__outfile(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, int expand_mono, void *data, int alpha, int pad, const char *fmt, ...) +{ + if (y < 0 || x < 0) { + return 0; + } else { + va_list v; + va_start(v, fmt); + stbiw__writefv(s, fmt, v); + va_end(v); + stbiw__write_pixels(s,rgb_dir,vdir,x,y,comp,data,alpha,pad, expand_mono); + return 1; + } +} + +static int stbi_write_bmp_core(stbi__write_context *s, int x, int y, int comp, const void *data) +{ + if (comp != 4) { + // write RGB bitmap + int pad = (-x*3) & 3; + return stbiw__outfile(s,-1,-1,x,y,comp,1,(void *) data,0,pad, + "11 4 22 4" "4 44 22 444444", + 'B', 'M', 14+40+(x*3+pad)*y, 0,0, 14+40, // file header + 40, x,y, 1,24, 0,0,0,0,0,0); // bitmap header + } else { + // RGBA bitmaps need a v4 header + // use BI_BITFIELDS mode with 32bpp and alpha mask + // (straight BI_RGB with alpha mask doesn't work in most readers) + return stbiw__outfile(s,-1,-1,x,y,comp,1,(void *)data,1,0, + "11 4 22 4" "4 44 22 444444 4444 4 444 444 444 444", + 'B', 'M', 14+108+x*y*4, 0, 0, 14+108, // file header + 108, x,y, 1,32, 3,0,0,0,0,0, 0xff0000,0xff00,0xff,0xff000000u, 0, 0,0,0, 0,0,0, 0,0,0, 0,0,0); // bitmap V4 header + } +} + +STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data) +{ + stbi__write_context s = { 0 }; + stbi__start_write_callbacks(&s, func, context); + return stbi_write_bmp_core(&s, x, y, comp, data); +} + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_bmp(char const *filename, int x, int y, int comp, const void *data) +{ + stbi__write_context s = { 0 }; + if (stbi__start_write_file(&s,filename)) { + int r = stbi_write_bmp_core(&s, x, y, comp, data); + stbi__end_write_file(&s); + return r; + } else + return 0; +} +#endif //!STBI_WRITE_NO_STDIO + +static int stbi_write_tga_core(stbi__write_context *s, int x, int y, int comp, void *data) +{ + int has_alpha = (comp == 2 || comp == 4); + int colorbytes = has_alpha ? comp-1 : comp; + int format = colorbytes < 2 ? 3 : 2; // 3 color channels (RGB/RGBA) = 2, 1 color channel (Y/YA) = 3 + + if (y < 0 || x < 0) + return 0; + + if (!stbi_write_tga_with_rle) { + return stbiw__outfile(s, -1, -1, x, y, comp, 0, (void *) data, has_alpha, 0, + "111 221 2222 11", 0, 0, format, 0, 0, 0, 0, 0, x, y, (colorbytes + has_alpha) * 8, has_alpha * 8); + } else { + int i,j,k; + int jend, jdir; + + stbiw__writef(s, "111 221 2222 11", 0,0,format+8, 0,0,0, 0,0,x,y, (colorbytes + has_alpha) * 8, has_alpha * 8); + + if (stbi__flip_vertically_on_write) { + j = 0; + jend = y; + jdir = 1; + } else { + j = y-1; + jend = -1; + jdir = -1; + } + for (; j != jend; j += jdir) { + unsigned char *row = (unsigned char *) data + j * x * comp; + int len; + + for (i = 0; i < x; i += len) { + unsigned char *begin = row + i * comp; + int diff = 1; + len = 1; + + if (i < x - 1) { + ++len; + diff = memcmp(begin, row + (i + 1) * comp, comp); + if (diff) { + const unsigned char *prev = begin; + for (k = i + 2; k < x && len < 128; ++k) { + if (memcmp(prev, row + k * comp, comp)) { + prev += comp; + ++len; + } else { + --len; + break; + } + } + } else { + for (k = i + 2; k < x && len < 128; ++k) { + if (!memcmp(begin, row + k * comp, comp)) { + ++len; + } else { + break; + } + } + } + } + + if (diff) { + unsigned char header = STBIW_UCHAR(len - 1); + stbiw__write1(s, header); + for (k = 0; k < len; ++k) { + stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin + k * comp); + } + } else { + unsigned char header = STBIW_UCHAR(len - 129); + stbiw__write1(s, header); + stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin); + } + } + } + stbiw__write_flush(s); + } + return 1; +} + +STBIWDEF int stbi_write_tga_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data) +{ + stbi__write_context s = { 0 }; + stbi__start_write_callbacks(&s, func, context); + return stbi_write_tga_core(&s, x, y, comp, (void *) data); +} + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_tga(char const *filename, int x, int y, int comp, const void *data) +{ + stbi__write_context s = { 0 }; + if (stbi__start_write_file(&s,filename)) { + int r = stbi_write_tga_core(&s, x, y, comp, (void *) data); + stbi__end_write_file(&s); + return r; + } else + return 0; +} +#endif + +// ************************************************************************************************* +// Radiance RGBE HDR writer +// by Baldur Karlsson + +#define stbiw__max(a, b) ((a) > (b) ? (a) : (b)) + +#ifndef STBI_WRITE_NO_STDIO + +static void stbiw__linear_to_rgbe(unsigned char *rgbe, float *linear) +{ + int exponent; + float maxcomp = stbiw__max(linear[0], stbiw__max(linear[1], linear[2])); + + if (maxcomp < 1e-32f) { + rgbe[0] = rgbe[1] = rgbe[2] = rgbe[3] = 0; + } else { + float normalize = (float) frexp(maxcomp, &exponent) * 256.0f/maxcomp; + + rgbe[0] = (unsigned char)(linear[0] * normalize); + rgbe[1] = (unsigned char)(linear[1] * normalize); + rgbe[2] = (unsigned char)(linear[2] * normalize); + rgbe[3] = (unsigned char)(exponent + 128); + } +} + +static void stbiw__write_run_data(stbi__write_context *s, int length, unsigned char databyte) +{ + unsigned char lengthbyte = STBIW_UCHAR(length+128); + STBIW_ASSERT(length+128 <= 255); + s->func(s->context, &lengthbyte, 1); + s->func(s->context, &databyte, 1); +} + +static void stbiw__write_dump_data(stbi__write_context *s, int length, unsigned char *data) +{ + unsigned char lengthbyte = STBIW_UCHAR(length); + STBIW_ASSERT(length <= 128); // inconsistent with spec but consistent with official code + s->func(s->context, &lengthbyte, 1); + s->func(s->context, data, length); +} + +static void stbiw__write_hdr_scanline(stbi__write_context *s, int width, int ncomp, unsigned char *scratch, float *scanline) +{ + unsigned char scanlineheader[4] = { 2, 2, 0, 0 }; + unsigned char rgbe[4]; + float linear[3]; + int x; + + scanlineheader[2] = (width&0xff00)>>8; + scanlineheader[3] = (width&0x00ff); + + /* skip RLE for images too small or large */ + if (width < 8 || width >= 32768) { + for (x=0; x < width; x++) { + switch (ncomp) { + case 4: /* fallthrough */ + case 3: linear[2] = scanline[x*ncomp + 2]; + linear[1] = scanline[x*ncomp + 1]; + linear[0] = scanline[x*ncomp + 0]; + break; + default: + linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0]; + break; + } + stbiw__linear_to_rgbe(rgbe, linear); + s->func(s->context, rgbe, 4); + } + } else { + int c,r; + /* encode into scratch buffer */ + for (x=0; x < width; x++) { + switch(ncomp) { + case 4: /* fallthrough */ + case 3: linear[2] = scanline[x*ncomp + 2]; + linear[1] = scanline[x*ncomp + 1]; + linear[0] = scanline[x*ncomp + 0]; + break; + default: + linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0]; + break; + } + stbiw__linear_to_rgbe(rgbe, linear); + scratch[x + width*0] = rgbe[0]; + scratch[x + width*1] = rgbe[1]; + scratch[x + width*2] = rgbe[2]; + scratch[x + width*3] = rgbe[3]; + } + + s->func(s->context, scanlineheader, 4); + + /* RLE each component separately */ + for (c=0; c < 4; c++) { + unsigned char *comp = &scratch[width*c]; + + x = 0; + while (x < width) { + // find first run + r = x; + while (r+2 < width) { + if (comp[r] == comp[r+1] && comp[r] == comp[r+2]) + break; + ++r; + } + if (r+2 >= width) + r = width; + // dump up to first run + while (x < r) { + int len = r-x; + if (len > 128) len = 128; + stbiw__write_dump_data(s, len, &comp[x]); + x += len; + } + // if there's a run, output it + if (r+2 < width) { // same test as what we break out of in search loop, so only true if we break'd + // find next byte after run + while (r < width && comp[r] == comp[x]) + ++r; + // output run up to r + while (x < r) { + int len = r-x; + if (len > 127) len = 127; + stbiw__write_run_data(s, len, comp[x]); + x += len; + } + } + } + } + } +} + +static int stbi_write_hdr_core(stbi__write_context *s, int x, int y, int comp, float *data) +{ + if (y <= 0 || x <= 0 || data == NULL) + return 0; + else { + // Each component is stored separately. Allocate scratch space for full output scanline. + unsigned char *scratch = (unsigned char *) STBIW_MALLOC(x*4); + int i, len; + char buffer[128]; + char header[] = "#?RADIANCE\n# Written by stb_image_write.h\nFORMAT=32-bit_rle_rgbe\n"; + s->func(s->context, header, sizeof(header)-1); + +#ifdef __STDC_LIB_EXT1__ + len = sprintf_s(buffer, sizeof(buffer), "EXPOSURE= 1.0000000000000\n\n-Y %d +X %d\n", y, x); +#else + len = sprintf(buffer, "EXPOSURE= 1.0000000000000\n\n-Y %d +X %d\n", y, x); +#endif + s->func(s->context, buffer, len); + + for(i=0; i < y; i++) + stbiw__write_hdr_scanline(s, x, comp, scratch, data + comp*x*(stbi__flip_vertically_on_write ? y-1-i : i)); + STBIW_FREE(scratch); + return 1; + } +} + +STBIWDEF int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const float *data) +{ + stbi__write_context s = { 0 }; + stbi__start_write_callbacks(&s, func, context); + return stbi_write_hdr_core(&s, x, y, comp, (float *) data); +} + +STBIWDEF int stbi_write_hdr(char const *filename, int x, int y, int comp, const float *data) +{ + stbi__write_context s = { 0 }; + if (stbi__start_write_file(&s,filename)) { + int r = stbi_write_hdr_core(&s, x, y, comp, (float *) data); + stbi__end_write_file(&s); + return r; + } else + return 0; +} +#endif // STBI_WRITE_NO_STDIO + + +////////////////////////////////////////////////////////////////////////////// +// +// PNG writer +// + +#ifndef STBIW_ZLIB_COMPRESS +// stretchy buffer; stbiw__sbpush() == vector<>::push_back() -- stbiw__sbcount() == vector<>::size() +#define stbiw__sbraw(a) ((int *) (void *) (a) - 2) +#define stbiw__sbm(a) stbiw__sbraw(a)[0] +#define stbiw__sbn(a) stbiw__sbraw(a)[1] + +#define stbiw__sbneedgrow(a,n) ((a)==0 || stbiw__sbn(a)+n >= stbiw__sbm(a)) +#define stbiw__sbmaybegrow(a,n) (stbiw__sbneedgrow(a,(n)) ? stbiw__sbgrow(a,n) : 0) +#define stbiw__sbgrow(a,n) stbiw__sbgrowf((void **) &(a), (n), sizeof(*(a))) + +#define stbiw__sbpush(a, v) (stbiw__sbmaybegrow(a,1), (a)[stbiw__sbn(a)++] = (v)) +#define stbiw__sbcount(a) ((a) ? stbiw__sbn(a) : 0) +#define stbiw__sbfree(a) ((a) ? STBIW_FREE(stbiw__sbraw(a)),0 : 0) + +static void *stbiw__sbgrowf(void **arr, int increment, int itemsize) +{ + int m = *arr ? 2*stbiw__sbm(*arr)+increment : increment+1; + void *p = STBIW_REALLOC_SIZED(*arr ? stbiw__sbraw(*arr) : 0, *arr ? (stbiw__sbm(*arr)*itemsize + sizeof(int)*2) : 0, itemsize * m + sizeof(int)*2); + STBIW_ASSERT(p); + if (p) { + if (!*arr) ((int *) p)[1] = 0; + *arr = (void *) ((int *) p + 2); + stbiw__sbm(*arr) = m; + } + return *arr; +} + +static unsigned char *stbiw__zlib_flushf(unsigned char *data, unsigned int *bitbuffer, int *bitcount) +{ + while (*bitcount >= 8) { + stbiw__sbpush(data, STBIW_UCHAR(*bitbuffer)); + *bitbuffer >>= 8; + *bitcount -= 8; + } + return data; +} + +static int stbiw__zlib_bitrev(int code, int codebits) +{ + int res=0; + while (codebits--) { + res = (res << 1) | (code & 1); + code >>= 1; + } + return res; +} + +static unsigned int stbiw__zlib_countm(unsigned char *a, unsigned char *b, int limit) +{ + int i; + for (i=0; i < limit && i < 258; ++i) + if (a[i] != b[i]) break; + return i; +} + +static unsigned int stbiw__zhash(unsigned char *data) +{ + stbiw_uint32 hash = data[0] + (data[1] << 8) + (data[2] << 16); + hash ^= hash << 3; + hash += hash >> 5; + hash ^= hash << 4; + hash += hash >> 17; + hash ^= hash << 25; + hash += hash >> 6; + return hash; +} + +#define stbiw__zlib_flush() (out = stbiw__zlib_flushf(out, &bitbuf, &bitcount)) +#define stbiw__zlib_add(code,codebits) \ + (bitbuf |= (code) << bitcount, bitcount += (codebits), stbiw__zlib_flush()) +#define stbiw__zlib_huffa(b,c) stbiw__zlib_add(stbiw__zlib_bitrev(b,c),c) +// default huffman tables +#define stbiw__zlib_huff1(n) stbiw__zlib_huffa(0x30 + (n), 8) +#define stbiw__zlib_huff2(n) stbiw__zlib_huffa(0x190 + (n)-144, 9) +#define stbiw__zlib_huff3(n) stbiw__zlib_huffa(0 + (n)-256,7) +#define stbiw__zlib_huff4(n) stbiw__zlib_huffa(0xc0 + (n)-280,8) +#define stbiw__zlib_huff(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : (n) <= 255 ? stbiw__zlib_huff2(n) : (n) <= 279 ? stbiw__zlib_huff3(n) : stbiw__zlib_huff4(n)) +#define stbiw__zlib_huffb(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : stbiw__zlib_huff2(n)) + +#define stbiw__ZHASH 16384 + +#endif // STBIW_ZLIB_COMPRESS + +STBIWDEF unsigned char * stbi_zlib_compress(unsigned char *data, int data_len, int *out_len, int quality) +{ +#ifdef STBIW_ZLIB_COMPRESS + // user provided a zlib compress implementation, use that + return STBIW_ZLIB_COMPRESS(data, data_len, out_len, quality); +#else // use builtin + static unsigned short lengthc[] = { 3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,51,59,67,83,99,115,131,163,195,227,258, 259 }; + static unsigned char lengtheb[]= { 0,0,0,0,0,0,0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0 }; + static unsigned short distc[] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577, 32768 }; + static unsigned char disteb[] = { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13 }; + unsigned int bitbuf=0; + int i,j, bitcount=0; + unsigned char *out = NULL; + unsigned char ***hash_table = (unsigned char***) STBIW_MALLOC(stbiw__ZHASH * sizeof(unsigned char**)); + if (hash_table == NULL) + return NULL; + if (quality < 5) quality = 5; + + stbiw__sbpush(out, 0x78); // DEFLATE 32K window + stbiw__sbpush(out, 0x5e); // FLEVEL = 1 + stbiw__zlib_add(1,1); // BFINAL = 1 + stbiw__zlib_add(1,2); // BTYPE = 1 -- fixed huffman + + for (i=0; i < stbiw__ZHASH; ++i) + hash_table[i] = NULL; + + i=0; + while (i < data_len-3) { + // hash next 3 bytes of data to be compressed + int h = stbiw__zhash(data+i)&(stbiw__ZHASH-1), best=3; + unsigned char *bestloc = 0; + unsigned char **hlist = hash_table[h]; + int n = stbiw__sbcount(hlist); + for (j=0; j < n; ++j) { + if (hlist[j]-data > i-32768) { // if entry lies within window + int d = stbiw__zlib_countm(hlist[j], data+i, data_len-i); + if (d >= best) { best=d; bestloc=hlist[j]; } + } + } + // when hash table entry is too long, delete half the entries + if (hash_table[h] && stbiw__sbn(hash_table[h]) == 2*quality) { + STBIW_MEMMOVE(hash_table[h], hash_table[h]+quality, sizeof(hash_table[h][0])*quality); + stbiw__sbn(hash_table[h]) = quality; + } + stbiw__sbpush(hash_table[h],data+i); + + if (bestloc) { + // "lazy matching" - check match at *next* byte, and if it's better, do cur byte as literal + h = stbiw__zhash(data+i+1)&(stbiw__ZHASH-1); + hlist = hash_table[h]; + n = stbiw__sbcount(hlist); + for (j=0; j < n; ++j) { + if (hlist[j]-data > i-32767) { + int e = stbiw__zlib_countm(hlist[j], data+i+1, data_len-i-1); + if (e > best) { // if next match is better, bail on current match + bestloc = NULL; + break; + } + } + } + } + + if (bestloc) { + int d = (int) (data+i - bestloc); // distance back + STBIW_ASSERT(d <= 32767 && best <= 258); + for (j=0; best > lengthc[j+1]-1; ++j); + stbiw__zlib_huff(j+257); + if (lengtheb[j]) stbiw__zlib_add(best - lengthc[j], lengtheb[j]); + for (j=0; d > distc[j+1]-1; ++j); + stbiw__zlib_add(stbiw__zlib_bitrev(j,5),5); + if (disteb[j]) stbiw__zlib_add(d - distc[j], disteb[j]); + i += best; + } else { + stbiw__zlib_huffb(data[i]); + ++i; + } + } + // write out final bytes + for (;i < data_len; ++i) + stbiw__zlib_huffb(data[i]); + stbiw__zlib_huff(256); // end of block + // pad with 0 bits to byte boundary + while (bitcount) + stbiw__zlib_add(0,1); + + for (i=0; i < stbiw__ZHASH; ++i) + (void) stbiw__sbfree(hash_table[i]); + STBIW_FREE(hash_table); + + // store uncompressed instead if compression was worse + if (stbiw__sbn(out) > data_len + 2 + ((data_len+32766)/32767)*5) { + stbiw__sbn(out) = 2; // truncate to DEFLATE 32K window and FLEVEL = 1 + for (j = 0; j < data_len;) { + int blocklen = data_len - j; + if (blocklen > 32767) blocklen = 32767; + stbiw__sbpush(out, data_len - j == blocklen); // BFINAL = ?, BTYPE = 0 -- no compression + stbiw__sbpush(out, STBIW_UCHAR(blocklen)); // LEN + stbiw__sbpush(out, STBIW_UCHAR(blocklen >> 8)); + stbiw__sbpush(out, STBIW_UCHAR(~blocklen)); // NLEN + stbiw__sbpush(out, STBIW_UCHAR(~blocklen >> 8)); + memcpy(out+stbiw__sbn(out), data+j, blocklen); + stbiw__sbn(out) += blocklen; + j += blocklen; + } + } + + { + // compute adler32 on input + unsigned int s1=1, s2=0; + int blocklen = (int) (data_len % 5552); + j=0; + while (j < data_len) { + for (i=0; i < blocklen; ++i) { s1 += data[j+i]; s2 += s1; } + s1 %= 65521; s2 %= 65521; + j += blocklen; + blocklen = 5552; + } + stbiw__sbpush(out, STBIW_UCHAR(s2 >> 8)); + stbiw__sbpush(out, STBIW_UCHAR(s2)); + stbiw__sbpush(out, STBIW_UCHAR(s1 >> 8)); + stbiw__sbpush(out, STBIW_UCHAR(s1)); + } + *out_len = stbiw__sbn(out); + // make returned pointer freeable + STBIW_MEMMOVE(stbiw__sbraw(out), out, *out_len); + return (unsigned char *) stbiw__sbraw(out); +#endif // STBIW_ZLIB_COMPRESS +} + +static unsigned int stbiw__crc32(unsigned char *buffer, int len) +{ +#ifdef STBIW_CRC32 + return STBIW_CRC32(buffer, len); +#else + static unsigned int crc_table[256] = + { + 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, + 0x0eDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, + 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, + 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, + 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, + 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, + 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, + 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, + 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, + 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, + 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, + 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, + 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, + 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, + 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, + 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, + 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, + 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, + 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, + 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, + 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, + 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, + 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, + 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, + 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, + 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, + 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, + 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, + 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, + 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, + 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, + 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D + }; + + unsigned int crc = ~0u; + int i; + for (i=0; i < len; ++i) + crc = (crc >> 8) ^ crc_table[buffer[i] ^ (crc & 0xff)]; + return ~crc; +#endif +} + +#define stbiw__wpng4(o,a,b,c,d) ((o)[0]=STBIW_UCHAR(a),(o)[1]=STBIW_UCHAR(b),(o)[2]=STBIW_UCHAR(c),(o)[3]=STBIW_UCHAR(d),(o)+=4) +#define stbiw__wp32(data,v) stbiw__wpng4(data, (v)>>24,(v)>>16,(v)>>8,(v)); +#define stbiw__wptag(data,s) stbiw__wpng4(data, s[0],s[1],s[2],s[3]) + +static void stbiw__wpcrc(unsigned char **data, int len) +{ + unsigned int crc = stbiw__crc32(*data - len - 4, len+4); + stbiw__wp32(*data, crc); +} + +static unsigned char stbiw__paeth(int a, int b, int c) +{ + int p = a + b - c, pa = abs(p-a), pb = abs(p-b), pc = abs(p-c); + if (pa <= pb && pa <= pc) return STBIW_UCHAR(a); + if (pb <= pc) return STBIW_UCHAR(b); + return STBIW_UCHAR(c); +} + +// @OPTIMIZE: provide an option that always forces left-predict or paeth predict +static void stbiw__encode_png_line(unsigned char *pixels, int stride_bytes, int width, int height, int y, int n, int filter_type, signed char *line_buffer) +{ + static int mapping[] = { 0,1,2,3,4 }; + static int firstmap[] = { 0,1,0,5,6 }; + int *mymap = (y != 0) ? mapping : firstmap; + int i; + int type = mymap[filter_type]; + unsigned char *z = pixels + stride_bytes * (stbi__flip_vertically_on_write ? height-1-y : y); + int signed_stride = stbi__flip_vertically_on_write ? -stride_bytes : stride_bytes; + + if (type==0) { + memcpy(line_buffer, z, width*n); + return; + } + + // first loop isn't optimized since it's just one pixel + for (i = 0; i < n; ++i) { + switch (type) { + case 1: line_buffer[i] = z[i]; break; + case 2: line_buffer[i] = z[i] - z[i-signed_stride]; break; + case 3: line_buffer[i] = z[i] - (z[i-signed_stride]>>1); break; + case 4: line_buffer[i] = (signed char) (z[i] - stbiw__paeth(0,z[i-signed_stride],0)); break; + case 5: line_buffer[i] = z[i]; break; + case 6: line_buffer[i] = z[i]; break; + } + } + switch (type) { + case 1: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - z[i-n]; break; + case 2: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - z[i-signed_stride]; break; + case 3: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - ((z[i-n] + z[i-signed_stride])>>1); break; + case 4: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - stbiw__paeth(z[i-n], z[i-signed_stride], z[i-signed_stride-n]); break; + case 5: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - (z[i-n]>>1); break; + case 6: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - stbiw__paeth(z[i-n], 0,0); break; + } +} + +STBIWDEF unsigned char *stbi_write_png_to_mem(const unsigned char *pixels, int stride_bytes, int x, int y, int n, int *out_len) +{ + int force_filter = stbi_write_force_png_filter; + int ctype[5] = { -1, 0, 4, 2, 6 }; + unsigned char sig[8] = { 137,80,78,71,13,10,26,10 }; + unsigned char *out,*o, *filt, *zlib; + signed char *line_buffer; + int j,zlen; + + if (stride_bytes == 0) + stride_bytes = x * n; + + if (force_filter >= 5) { + force_filter = -1; + } + + filt = (unsigned char *) STBIW_MALLOC((x*n+1) * y); if (!filt) return 0; + line_buffer = (signed char *) STBIW_MALLOC(x * n); if (!line_buffer) { STBIW_FREE(filt); return 0; } + for (j=0; j < y; ++j) { + int filter_type; + if (force_filter > -1) { + filter_type = force_filter; + stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, force_filter, line_buffer); + } else { // Estimate the best filter by running through all of them: + int best_filter = 0, best_filter_val = 0x7fffffff, est, i; + for (filter_type = 0; filter_type < 5; filter_type++) { + stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, filter_type, line_buffer); + + // Estimate the entropy of the line using this filter; the less, the better. + est = 0; + for (i = 0; i < x*n; ++i) { + est += abs((signed char) line_buffer[i]); + } + if (est < best_filter_val) { + best_filter_val = est; + best_filter = filter_type; + } + } + if (filter_type != best_filter) { // If the last iteration already got us the best filter, don't redo it + stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, best_filter, line_buffer); + filter_type = best_filter; + } + } + // when we get here, filter_type contains the filter type, and line_buffer contains the data + filt[j*(x*n+1)] = (unsigned char) filter_type; + STBIW_MEMMOVE(filt+j*(x*n+1)+1, line_buffer, x*n); + } + STBIW_FREE(line_buffer); + zlib = stbi_zlib_compress(filt, y*( x*n+1), &zlen, stbi_write_png_compression_level); + STBIW_FREE(filt); + if (!zlib) return 0; + + // each tag requires 12 bytes of overhead + out = (unsigned char *) STBIW_MALLOC(8 + 12+13 + 12+zlen + 12); + if (!out) return 0; + *out_len = 8 + 12+13 + 12+zlen + 12; + + o=out; + STBIW_MEMMOVE(o,sig,8); o+= 8; + stbiw__wp32(o, 13); // header length + stbiw__wptag(o, "IHDR"); + stbiw__wp32(o, x); + stbiw__wp32(o, y); + *o++ = 8; + *o++ = STBIW_UCHAR(ctype[n]); + *o++ = 0; + *o++ = 0; + *o++ = 0; + stbiw__wpcrc(&o,13); + + stbiw__wp32(o, zlen); + stbiw__wptag(o, "IDAT"); + STBIW_MEMMOVE(o, zlib, zlen); + o += zlen; + STBIW_FREE(zlib); + stbiw__wpcrc(&o, zlen); + + stbiw__wp32(o,0); + stbiw__wptag(o, "IEND"); + stbiw__wpcrc(&o,0); + + STBIW_ASSERT(o == out + *out_len); + + return out; +} + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_png(char const *filename, int x, int y, int comp, const void *data, int stride_bytes) +{ + FILE *f; + int len; + unsigned char *png = stbi_write_png_to_mem((const unsigned char *) data, stride_bytes, x, y, comp, &len); + if (png == NULL) return 0; + + f = stbiw__fopen(filename, "wb"); + if (!f) { STBIW_FREE(png); return 0; } + fwrite(png, 1, len, f); + fclose(f); + STBIW_FREE(png); + return 1; +} +#endif + +STBIWDEF int stbi_write_png_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int stride_bytes) +{ + int len; + unsigned char *png = stbi_write_png_to_mem((const unsigned char *) data, stride_bytes, x, y, comp, &len); + if (png == NULL) return 0; + func(context, png, len); + STBIW_FREE(png); + return 1; +} + + +/* *************************************************************************** + * + * JPEG writer + * + * This is based on Jon Olick's jo_jpeg.cpp: + * public domain Simple, Minimalistic JPEG writer - http://www.jonolick.com/code.html + */ + +static const unsigned char stbiw__jpg_ZigZag[] = { 0,1,5,6,14,15,27,28,2,4,7,13,16,26,29,42,3,8,12,17,25,30,41,43,9,11,18, + 24,31,40,44,53,10,19,23,32,39,45,52,54,20,22,33,38,46,51,55,60,21,34,37,47,50,56,59,61,35,36,48,49,57,58,62,63 }; + +static void stbiw__jpg_writeBits(stbi__write_context *s, int *bitBufP, int *bitCntP, const unsigned short *bs) { + int bitBuf = *bitBufP, bitCnt = *bitCntP; + bitCnt += bs[1]; + bitBuf |= bs[0] << (24 - bitCnt); + while(bitCnt >= 8) { + unsigned char c = (bitBuf >> 16) & 255; + stbiw__putc(s, c); + if(c == 255) { + stbiw__putc(s, 0); + } + bitBuf <<= 8; + bitCnt -= 8; + } + *bitBufP = bitBuf; + *bitCntP = bitCnt; +} + +static void stbiw__jpg_DCT(float *d0p, float *d1p, float *d2p, float *d3p, float *d4p, float *d5p, float *d6p, float *d7p) { + float d0 = *d0p, d1 = *d1p, d2 = *d2p, d3 = *d3p, d4 = *d4p, d5 = *d5p, d6 = *d6p, d7 = *d7p; + float z1, z2, z3, z4, z5, z11, z13; + + float tmp0 = d0 + d7; + float tmp7 = d0 - d7; + float tmp1 = d1 + d6; + float tmp6 = d1 - d6; + float tmp2 = d2 + d5; + float tmp5 = d2 - d5; + float tmp3 = d3 + d4; + float tmp4 = d3 - d4; + + // Even part + float tmp10 = tmp0 + tmp3; // phase 2 + float tmp13 = tmp0 - tmp3; + float tmp11 = tmp1 + tmp2; + float tmp12 = tmp1 - tmp2; + + d0 = tmp10 + tmp11; // phase 3 + d4 = tmp10 - tmp11; + + z1 = (tmp12 + tmp13) * 0.707106781f; // c4 + d2 = tmp13 + z1; // phase 5 + d6 = tmp13 - z1; + + // Odd part + tmp10 = tmp4 + tmp5; // phase 2 + tmp11 = tmp5 + tmp6; + tmp12 = tmp6 + tmp7; + + // The rotator is modified from fig 4-8 to avoid extra negations. + z5 = (tmp10 - tmp12) * 0.382683433f; // c6 + z2 = tmp10 * 0.541196100f + z5; // c2-c6 + z4 = tmp12 * 1.306562965f + z5; // c2+c6 + z3 = tmp11 * 0.707106781f; // c4 + + z11 = tmp7 + z3; // phase 5 + z13 = tmp7 - z3; + + *d5p = z13 + z2; // phase 6 + *d3p = z13 - z2; + *d1p = z11 + z4; + *d7p = z11 - z4; + + *d0p = d0; *d2p = d2; *d4p = d4; *d6p = d6; +} + +static void stbiw__jpg_calcBits(int val, unsigned short bits[2]) { + int tmp1 = val < 0 ? -val : val; + val = val < 0 ? val-1 : val; + bits[1] = 1; + while(tmp1 >>= 1) { + ++bits[1]; + } + bits[0] = val & ((1<0)&&(DU[end0pos]==0); --end0pos) { + } + // end0pos = first element in reverse order !=0 + if(end0pos == 0) { + stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB); + return DU[0]; + } + for(i = 1; i <= end0pos; ++i) { + int startpos = i; + int nrzeroes; + unsigned short bits[2]; + for (; DU[i]==0 && i<=end0pos; ++i) { + } + nrzeroes = i-startpos; + if ( nrzeroes >= 16 ) { + int lng = nrzeroes>>4; + int nrmarker; + for (nrmarker=1; nrmarker <= lng; ++nrmarker) + stbiw__jpg_writeBits(s, bitBuf, bitCnt, M16zeroes); + nrzeroes &= 15; + } + stbiw__jpg_calcBits(DU[i], bits); + stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTAC[(nrzeroes<<4)+bits[1]]); + stbiw__jpg_writeBits(s, bitBuf, bitCnt, bits); + } + if(end0pos != 63) { + stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB); + } + return DU[0]; +} + +static int stbi_write_jpg_core(stbi__write_context *s, int width, int height, int comp, const void* data, int quality) { + // Constants that don't pollute global namespace + static const unsigned char std_dc_luminance_nrcodes[] = {0,0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0}; + static const unsigned char std_dc_luminance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11}; + static const unsigned char std_ac_luminance_nrcodes[] = {0,0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,0x7d}; + static const unsigned char std_ac_luminance_values[] = { + 0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xa1,0x08, + 0x23,0x42,0xb1,0xc1,0x15,0x52,0xd1,0xf0,0x24,0x33,0x62,0x72,0x82,0x09,0x0a,0x16,0x17,0x18,0x19,0x1a,0x25,0x26,0x27,0x28, + 0x29,0x2a,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59, + 0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x83,0x84,0x85,0x86,0x87,0x88,0x89, + 0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6, + 0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe1,0xe2, + 0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa + }; + static const unsigned char std_dc_chrominance_nrcodes[] = {0,0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0}; + static const unsigned char std_dc_chrominance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11}; + static const unsigned char std_ac_chrominance_nrcodes[] = {0,0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,0x77}; + static const unsigned char std_ac_chrominance_values[] = { + 0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91, + 0xa1,0xb1,0xc1,0x09,0x23,0x33,0x52,0xf0,0x15,0x62,0x72,0xd1,0x0a,0x16,0x24,0x34,0xe1,0x25,0xf1,0x17,0x18,0x19,0x1a,0x26, + 0x27,0x28,0x29,0x2a,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58, + 0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x82,0x83,0x84,0x85,0x86,0x87, + 0x88,0x89,0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4, + 0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda, + 0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa + }; + // Huffman tables + static const unsigned short YDC_HT[256][2] = { {0,2},{2,3},{3,3},{4,3},{5,3},{6,3},{14,4},{30,5},{62,6},{126,7},{254,8},{510,9}}; + static const unsigned short UVDC_HT[256][2] = { {0,2},{1,2},{2,2},{6,3},{14,4},{30,5},{62,6},{126,7},{254,8},{510,9},{1022,10},{2046,11}}; + static const unsigned short YAC_HT[256][2] = { + {10,4},{0,2},{1,2},{4,3},{11,4},{26,5},{120,7},{248,8},{1014,10},{65410,16},{65411,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {12,4},{27,5},{121,7},{502,9},{2038,11},{65412,16},{65413,16},{65414,16},{65415,16},{65416,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {28,5},{249,8},{1015,10},{4084,12},{65417,16},{65418,16},{65419,16},{65420,16},{65421,16},{65422,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {58,6},{503,9},{4085,12},{65423,16},{65424,16},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {59,6},{1016,10},{65430,16},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {122,7},{2039,11},{65438,16},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {123,7},{4086,12},{65446,16},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {250,8},{4087,12},{65454,16},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {504,9},{32704,15},{65462,16},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {505,9},{65470,16},{65471,16},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {506,9},{65479,16},{65480,16},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {1017,10},{65488,16},{65489,16},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {1018,10},{65497,16},{65498,16},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {2040,11},{65506,16},{65507,16},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {65515,16},{65516,16},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{0,0},{0,0},{0,0},{0,0},{0,0}, + {2041,11},{65525,16},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0} + }; + static const unsigned short UVAC_HT[256][2] = { + {0,2},{1,2},{4,3},{10,4},{24,5},{25,5},{56,6},{120,7},{500,9},{1014,10},{4084,12},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {11,4},{57,6},{246,8},{501,9},{2038,11},{4085,12},{65416,16},{65417,16},{65418,16},{65419,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {26,5},{247,8},{1015,10},{4086,12},{32706,15},{65420,16},{65421,16},{65422,16},{65423,16},{65424,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {27,5},{248,8},{1016,10},{4087,12},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{65430,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {58,6},{502,9},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{65438,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {59,6},{1017,10},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{65446,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {121,7},{2039,11},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{65454,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {122,7},{2040,11},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{65462,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {249,8},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{65470,16},{65471,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {503,9},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{65479,16},{65480,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {504,9},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{65488,16},{65489,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {505,9},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{65497,16},{65498,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {506,9},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{65506,16},{65507,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {2041,11},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{65515,16},{65516,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, + {16352,14},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{65525,16},{0,0},{0,0},{0,0},{0,0},{0,0}, + {1018,10},{32707,15},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0} + }; + static const int YQT[] = {16,11,10,16,24,40,51,61,12,12,14,19,26,58,60,55,14,13,16,24,40,57,69,56,14,17,22,29,51,87,80,62,18,22, + 37,56,68,109,103,77,24,35,55,64,81,104,113,92,49,64,78,87,103,121,120,101,72,92,95,98,112,100,103,99}; + static const int UVQT[] = {17,18,24,47,99,99,99,99,18,21,26,66,99,99,99,99,24,26,56,99,99,99,99,99,47,66,99,99,99,99,99,99, + 99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99}; + static const float aasf[] = { 1.0f * 2.828427125f, 1.387039845f * 2.828427125f, 1.306562965f * 2.828427125f, 1.175875602f * 2.828427125f, + 1.0f * 2.828427125f, 0.785694958f * 2.828427125f, 0.541196100f * 2.828427125f, 0.275899379f * 2.828427125f }; + + int row, col, i, k, subsample; + float fdtbl_Y[64], fdtbl_UV[64]; + unsigned char YTable[64], UVTable[64]; + + if(!data || !width || !height || comp > 4 || comp < 1) { + return 0; + } + + quality = quality ? quality : 90; + subsample = quality <= 90 ? 1 : 0; + quality = quality < 1 ? 1 : quality > 100 ? 100 : quality; + quality = quality < 50 ? 5000 / quality : 200 - quality * 2; + + for(i = 0; i < 64; ++i) { + int uvti, yti = (YQT[i]*quality+50)/100; + YTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (yti < 1 ? 1 : yti > 255 ? 255 : yti); + uvti = (UVQT[i]*quality+50)/100; + UVTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (uvti < 1 ? 1 : uvti > 255 ? 255 : uvti); + } + + for(row = 0, k = 0; row < 8; ++row) { + for(col = 0; col < 8; ++col, ++k) { + fdtbl_Y[k] = 1 / (YTable [stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]); + fdtbl_UV[k] = 1 / (UVTable[stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]); + } + } + + // Write Headers + { + static const unsigned char head0[] = { 0xFF,0xD8,0xFF,0xE0,0,0x10,'J','F','I','F',0,1,1,0,0,1,0,1,0,0,0xFF,0xDB,0,0x84,0 }; + static const unsigned char head2[] = { 0xFF,0xDA,0,0xC,3,1,0,2,0x11,3,0x11,0,0x3F,0 }; + const unsigned char head1[] = { 0xFF,0xC0,0,0x11,8,(unsigned char)(height>>8),STBIW_UCHAR(height),(unsigned char)(width>>8),STBIW_UCHAR(width), + 3,1,(unsigned char)(subsample?0x22:0x11),0,2,0x11,1,3,0x11,1,0xFF,0xC4,0x01,0xA2,0 }; + s->func(s->context, (void*)head0, sizeof(head0)); + s->func(s->context, (void*)YTable, sizeof(YTable)); + stbiw__putc(s, 1); + s->func(s->context, UVTable, sizeof(UVTable)); + s->func(s->context, (void*)head1, sizeof(head1)); + s->func(s->context, (void*)(std_dc_luminance_nrcodes+1), sizeof(std_dc_luminance_nrcodes)-1); + s->func(s->context, (void*)std_dc_luminance_values, sizeof(std_dc_luminance_values)); + stbiw__putc(s, 0x10); // HTYACinfo + s->func(s->context, (void*)(std_ac_luminance_nrcodes+1), sizeof(std_ac_luminance_nrcodes)-1); + s->func(s->context, (void*)std_ac_luminance_values, sizeof(std_ac_luminance_values)); + stbiw__putc(s, 1); // HTUDCinfo + s->func(s->context, (void*)(std_dc_chrominance_nrcodes+1), sizeof(std_dc_chrominance_nrcodes)-1); + s->func(s->context, (void*)std_dc_chrominance_values, sizeof(std_dc_chrominance_values)); + stbiw__putc(s, 0x11); // HTUACinfo + s->func(s->context, (void*)(std_ac_chrominance_nrcodes+1), sizeof(std_ac_chrominance_nrcodes)-1); + s->func(s->context, (void*)std_ac_chrominance_values, sizeof(std_ac_chrominance_values)); + s->func(s->context, (void*)head2, sizeof(head2)); + } + + // Encode 8x8 macroblocks + { + static const unsigned short fillBits[] = {0x7F, 7}; + int DCY=0, DCU=0, DCV=0; + int bitBuf=0, bitCnt=0; + // comp == 2 is grey+alpha (alpha is ignored) + int ofsG = comp > 2 ? 1 : 0, ofsB = comp > 2 ? 2 : 0; + const unsigned char *dataR = (const unsigned char *)data; + const unsigned char *dataG = dataR + ofsG; + const unsigned char *dataB = dataR + ofsB; + int x, y, pos; + if(subsample) { + for(y = 0; y < height; y += 16) { + for(x = 0; x < width; x += 16) { + float Y[256], U[256], V[256]; + for(row = y, pos = 0; row < y+16; ++row) { + // row >= height => use last input row + int clamped_row = (row < height) ? row : height - 1; + int base_p = (stbi__flip_vertically_on_write ? (height-1-clamped_row) : clamped_row)*width*comp; + for(col = x; col < x+16; ++col, ++pos) { + // if col >= width => use pixel from last input column + int p = base_p + ((col < width) ? col : (width-1))*comp; + float r = dataR[p], g = dataG[p], b = dataB[p]; + Y[pos]= +0.29900f*r + 0.58700f*g + 0.11400f*b - 128; + U[pos]= -0.16874f*r - 0.33126f*g + 0.50000f*b; + V[pos]= +0.50000f*r - 0.41869f*g - 0.08131f*b; + } + } + DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y+0, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT); + DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y+8, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT); + DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y+128, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT); + DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y+136, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT); + + // subsample U,V + { + float subU[64], subV[64]; + int yy, xx; + for(yy = 0, pos = 0; yy < 8; ++yy) { + for(xx = 0; xx < 8; ++xx, ++pos) { + int j = yy*32+xx*2; + subU[pos] = (U[j+0] + U[j+1] + U[j+16] + U[j+17]) * 0.25f; + subV[pos] = (V[j+0] + V[j+1] + V[j+16] + V[j+17]) * 0.25f; + } + } + DCU = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, subU, 8, fdtbl_UV, DCU, UVDC_HT, UVAC_HT); + DCV = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, subV, 8, fdtbl_UV, DCV, UVDC_HT, UVAC_HT); + } + } + } + } else { + for(y = 0; y < height; y += 8) { + for(x = 0; x < width; x += 8) { + float Y[64], U[64], V[64]; + for(row = y, pos = 0; row < y+8; ++row) { + // row >= height => use last input row + int clamped_row = (row < height) ? row : height - 1; + int base_p = (stbi__flip_vertically_on_write ? (height-1-clamped_row) : clamped_row)*width*comp; + for(col = x; col < x+8; ++col, ++pos) { + // if col >= width => use pixel from last input column + int p = base_p + ((col < width) ? col : (width-1))*comp; + float r = dataR[p], g = dataG[p], b = dataB[p]; + Y[pos]= +0.29900f*r + 0.58700f*g + 0.11400f*b - 128; + U[pos]= -0.16874f*r - 0.33126f*g + 0.50000f*b; + V[pos]= +0.50000f*r - 0.41869f*g - 0.08131f*b; + } + } + + DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y, 8, fdtbl_Y, DCY, YDC_HT, YAC_HT); + DCU = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, U, 8, fdtbl_UV, DCU, UVDC_HT, UVAC_HT); + DCV = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, V, 8, fdtbl_UV, DCV, UVDC_HT, UVAC_HT); + } + } + } + + // Do the bit alignment of the EOI marker + stbiw__jpg_writeBits(s, &bitBuf, &bitCnt, fillBits); + } + + // EOI + stbiw__putc(s, 0xFF); + stbiw__putc(s, 0xD9); + + return 1; +} + +STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality) +{ + stbi__write_context s = { 0 }; + stbi__start_write_callbacks(&s, func, context); + return stbi_write_jpg_core(&s, x, y, comp, (void *) data, quality); +} + + +#ifndef STBI_WRITE_NO_STDIO +STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality) +{ + stbi__write_context s = { 0 }; + if (stbi__start_write_file(&s,filename)) { + int r = stbi_write_jpg_core(&s, x, y, comp, data, quality); + stbi__end_write_file(&s); + return r; + } else + return 0; +} +#endif + +#endif // STB_IMAGE_WRITE_IMPLEMENTATION + +/* Revision history + 1.16 (2021-07-11) + make Deflate code emit uncompressed blocks when it would otherwise expand + support writing BMPs with alpha channel + 1.15 (2020-07-13) unknown + 1.14 (2020-02-02) updated JPEG writer to downsample chroma channels + 1.13 + 1.12 + 1.11 (2019-08-11) + + 1.10 (2019-02-07) + support utf8 filenames in Windows; fix warnings and platform ifdefs + 1.09 (2018-02-11) + fix typo in zlib quality API, improve STB_I_W_STATIC in C++ + 1.08 (2018-01-29) + add stbi__flip_vertically_on_write, external zlib, zlib quality, choose PNG filter + 1.07 (2017-07-24) + doc fix + 1.06 (2017-07-23) + writing JPEG (using Jon Olick's code) + 1.05 ??? + 1.04 (2017-03-03) + monochrome BMP expansion + 1.03 ??? + 1.02 (2016-04-02) + avoid allocating large structures on the stack + 1.01 (2016-01-16) + STBIW_REALLOC_SIZED: support allocators with no realloc support + avoid race-condition in crc initialization + minor compile issues + 1.00 (2015-09-14) + installable file IO function + 0.99 (2015-09-13) + warning fixes; TGA rle support + 0.98 (2015-04-08) + added STBIW_MALLOC, STBIW_ASSERT etc + 0.97 (2015-01-18) + fixed HDR asserts, rewrote HDR rle logic + 0.96 (2015-01-17) + add HDR output + fix monochrome BMP + 0.95 (2014-08-17) + add monochrome TGA output + 0.94 (2014-05-31) + rename private functions to avoid conflicts with stb_image.h + 0.93 (2014-05-27) + warning fixes + 0.92 (2010-08-01) + casts to unsigned char to fix warnings + 0.91 (2010-07-17) + first public release + 0.90 first internal release +*/ + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2017 Sean Barrett +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ diff --git a/thirdparty/thirdparty.qbs b/thirdparty/thirdparty.qbs index 6ab07ebe5..54bd43cd8 100644 --- a/thirdparty/thirdparty.qbs +++ b/thirdparty/thirdparty.qbs @@ -5,6 +5,7 @@ Project { references: [ "angelscript/angelscript.qbs", + "basisu/basisu.qbs", "bullet/bullet3.qbs", "assimp/assimp.qbs", "next/next.qbs",